Revise -mdisable-fpregs option and add new -msoft-mult option
[official-gcc.git] / gcc / omp-expand.c
blob9715ef50e627fd29eb1033d280c539a750623d19
1 /* Expansion pass for OMP directives. Outlines regions of certain OMP
2 directives to separate functions, converts others into explicit calls to the
3 runtime library (libgomp) and so forth
5 Copyright (C) 2005-2021 Free Software Foundation, Inc.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "memmodel.h"
27 #include "backend.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "gimple.h"
32 #include "cfghooks.h"
33 #include "tree-pass.h"
34 #include "ssa.h"
35 #include "optabs.h"
36 #include "cgraph.h"
37 #include "pretty-print.h"
38 #include "diagnostic-core.h"
39 #include "fold-const.h"
40 #include "stor-layout.h"
41 #include "cfganal.h"
42 #include "internal-fn.h"
43 #include "gimplify.h"
44 #include "gimple-iterator.h"
45 #include "gimplify-me.h"
46 #include "gimple-walk.h"
47 #include "tree-cfg.h"
48 #include "tree-into-ssa.h"
49 #include "tree-ssa.h"
50 #include "splay-tree.h"
51 #include "cfgloop.h"
52 #include "omp-general.h"
53 #include "omp-offload.h"
54 #include "tree-cfgcleanup.h"
55 #include "alloc-pool.h"
56 #include "symbol-summary.h"
57 #include "gomp-constants.h"
58 #include "gimple-pretty-print.h"
59 #include "stringpool.h"
60 #include "attribs.h"
61 #include "tree-eh.h"
62 #include "opts.h"
64 /* OMP region information. Every parallel and workshare
65 directive is enclosed between two markers, the OMP_* directive
66 and a corresponding GIMPLE_OMP_RETURN statement. */
68 struct omp_region
70 /* The enclosing region. */
71 struct omp_region *outer;
73 /* First child region. */
74 struct omp_region *inner;
76 /* Next peer region. */
77 struct omp_region *next;
79 /* Block containing the omp directive as its last stmt. */
80 basic_block entry;
82 /* Block containing the GIMPLE_OMP_RETURN as its last stmt. */
83 basic_block exit;
85 /* Block containing the GIMPLE_OMP_CONTINUE as its last stmt. */
86 basic_block cont;
88 /* If this is a combined parallel+workshare region, this is a list
89 of additional arguments needed by the combined parallel+workshare
90 library call. */
91 vec<tree, va_gc> *ws_args;
93 /* The code for the omp directive of this region. */
94 enum gimple_code type;
96 /* Schedule kind, only used for GIMPLE_OMP_FOR type regions. */
97 enum omp_clause_schedule_kind sched_kind;
99 /* Schedule modifiers. */
100 unsigned char sched_modifiers;
102 /* True if this is a combined parallel+workshare region. */
103 bool is_combined_parallel;
105 /* Copy of fd.lastprivate_conditional != 0. */
106 bool has_lastprivate_conditional;
108 /* The ordered stmt if type is GIMPLE_OMP_ORDERED and it has
109 a depend clause. */
110 gomp_ordered *ord_stmt;
113 static struct omp_region *root_omp_region;
114 static bool omp_any_child_fn_dumped;
116 static void expand_omp_build_assign (gimple_stmt_iterator *, tree, tree,
117 bool = false);
118 static gphi *find_phi_with_arg_on_edge (tree, edge);
119 static void expand_omp (struct omp_region *region);
121 /* Return true if REGION is a combined parallel+workshare region. */
123 static inline bool
124 is_combined_parallel (struct omp_region *region)
126 return region->is_combined_parallel;
129 /* Given two blocks PAR_ENTRY_BB and WS_ENTRY_BB such that WS_ENTRY_BB
130 is the immediate dominator of PAR_ENTRY_BB, return true if there
131 are no data dependencies that would prevent expanding the parallel
132 directive at PAR_ENTRY_BB as a combined parallel+workshare region.
134 When expanding a combined parallel+workshare region, the call to
135 the child function may need additional arguments in the case of
136 GIMPLE_OMP_FOR regions. In some cases, these arguments are
137 computed out of variables passed in from the parent to the child
138 via 'struct .omp_data_s'. For instance:
140 #pragma omp parallel for schedule (guided, i * 4)
141 for (j ...)
143 Is lowered into:
145 # BLOCK 2 (PAR_ENTRY_BB)
146 .omp_data_o.i = i;
147 #pragma omp parallel [child fn: bar.omp_fn.0 ( ..., D.1598)
149 # BLOCK 3 (WS_ENTRY_BB)
150 .omp_data_i = &.omp_data_o;
151 D.1667 = .omp_data_i->i;
152 D.1598 = D.1667 * 4;
153 #pragma omp for schedule (guided, D.1598)
155 When we outline the parallel region, the call to the child function
156 'bar.omp_fn.0' will need the value D.1598 in its argument list, but
157 that value is computed *after* the call site. So, in principle we
158 cannot do the transformation.
160 To see whether the code in WS_ENTRY_BB blocks the combined
161 parallel+workshare call, we collect all the variables used in the
162 GIMPLE_OMP_FOR header check whether they appear on the LHS of any
163 statement in WS_ENTRY_BB. If so, then we cannot emit the combined
164 call.
166 FIXME. If we had the SSA form built at this point, we could merely
167 hoist the code in block 3 into block 2 and be done with it. But at
168 this point we don't have dataflow information and though we could
169 hack something up here, it is really not worth the aggravation. */
171 static bool
172 workshare_safe_to_combine_p (basic_block ws_entry_bb)
174 struct omp_for_data fd;
175 gimple *ws_stmt = last_stmt (ws_entry_bb);
177 if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
178 return true;
180 gcc_assert (gimple_code (ws_stmt) == GIMPLE_OMP_FOR);
181 if (gimple_omp_for_kind (ws_stmt) != GF_OMP_FOR_KIND_FOR)
182 return false;
184 omp_extract_for_data (as_a <gomp_for *> (ws_stmt), &fd, NULL);
186 if (fd.collapse > 1 && TREE_CODE (fd.loop.n2) != INTEGER_CST)
187 return false;
188 if (fd.iter_type != long_integer_type_node)
189 return false;
191 /* FIXME. We give up too easily here. If any of these arguments
192 are not constants, they will likely involve variables that have
193 been mapped into fields of .omp_data_s for sharing with the child
194 function. With appropriate data flow, it would be possible to
195 see through this. */
196 if (!is_gimple_min_invariant (fd.loop.n1)
197 || !is_gimple_min_invariant (fd.loop.n2)
198 || !is_gimple_min_invariant (fd.loop.step)
199 || (fd.chunk_size && !is_gimple_min_invariant (fd.chunk_size)))
200 return false;
202 return true;
205 /* Adjust CHUNK_SIZE from SCHEDULE clause, depending on simd modifier
206 presence (SIMD_SCHEDULE). */
208 static tree
209 omp_adjust_chunk_size (tree chunk_size, bool simd_schedule)
211 if (!simd_schedule || integer_zerop (chunk_size))
212 return chunk_size;
214 poly_uint64 vf = omp_max_vf ();
215 if (known_eq (vf, 1U))
216 return chunk_size;
218 tree type = TREE_TYPE (chunk_size);
219 chunk_size = fold_build2 (PLUS_EXPR, type, chunk_size,
220 build_int_cst (type, vf - 1));
221 return fold_build2 (BIT_AND_EXPR, type, chunk_size,
222 build_int_cst (type, -vf));
225 /* Collect additional arguments needed to emit a combined
226 parallel+workshare call. WS_STMT is the workshare directive being
227 expanded. */
229 static vec<tree, va_gc> *
230 get_ws_args_for (gimple *par_stmt, gimple *ws_stmt)
232 tree t;
233 location_t loc = gimple_location (ws_stmt);
234 vec<tree, va_gc> *ws_args;
236 if (gomp_for *for_stmt = dyn_cast <gomp_for *> (ws_stmt))
238 struct omp_for_data fd;
239 tree n1, n2;
241 omp_extract_for_data (for_stmt, &fd, NULL);
242 n1 = fd.loop.n1;
243 n2 = fd.loop.n2;
245 if (gimple_omp_for_combined_into_p (for_stmt))
247 tree innerc
248 = omp_find_clause (gimple_omp_parallel_clauses (par_stmt),
249 OMP_CLAUSE__LOOPTEMP_);
250 gcc_assert (innerc);
251 n1 = OMP_CLAUSE_DECL (innerc);
252 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
253 OMP_CLAUSE__LOOPTEMP_);
254 gcc_assert (innerc);
255 n2 = OMP_CLAUSE_DECL (innerc);
258 vec_alloc (ws_args, 3 + (fd.chunk_size != 0));
260 t = fold_convert_loc (loc, long_integer_type_node, n1);
261 ws_args->quick_push (t);
263 t = fold_convert_loc (loc, long_integer_type_node, n2);
264 ws_args->quick_push (t);
266 t = fold_convert_loc (loc, long_integer_type_node, fd.loop.step);
267 ws_args->quick_push (t);
269 if (fd.chunk_size)
271 t = fold_convert_loc (loc, long_integer_type_node, fd.chunk_size);
272 t = omp_adjust_chunk_size (t, fd.simd_schedule);
273 ws_args->quick_push (t);
276 return ws_args;
278 else if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
280 /* Number of sections is equal to the number of edges from the
281 GIMPLE_OMP_SECTIONS_SWITCH statement, except for the one to
282 the exit of the sections region. */
283 basic_block bb = single_succ (gimple_bb (ws_stmt));
284 t = build_int_cst (unsigned_type_node, EDGE_COUNT (bb->succs) - 1);
285 vec_alloc (ws_args, 1);
286 ws_args->quick_push (t);
287 return ws_args;
290 gcc_unreachable ();
293 /* Discover whether REGION is a combined parallel+workshare region. */
295 static void
296 determine_parallel_type (struct omp_region *region)
298 basic_block par_entry_bb, par_exit_bb;
299 basic_block ws_entry_bb, ws_exit_bb;
301 if (region == NULL || region->inner == NULL
302 || region->exit == NULL || region->inner->exit == NULL
303 || region->inner->cont == NULL)
304 return;
306 /* We only support parallel+for and parallel+sections. */
307 if (region->type != GIMPLE_OMP_PARALLEL
308 || (region->inner->type != GIMPLE_OMP_FOR
309 && region->inner->type != GIMPLE_OMP_SECTIONS))
310 return;
312 /* Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and
313 WS_EXIT_BB -> PAR_EXIT_BB. */
314 par_entry_bb = region->entry;
315 par_exit_bb = region->exit;
316 ws_entry_bb = region->inner->entry;
317 ws_exit_bb = region->inner->exit;
319 /* Give up for task reductions on the parallel, while it is implementable,
320 adding another big set of APIs or slowing down the normal paths is
321 not acceptable. */
322 tree pclauses = gimple_omp_parallel_clauses (last_stmt (par_entry_bb));
323 if (omp_find_clause (pclauses, OMP_CLAUSE__REDUCTEMP_))
324 return;
326 if (single_succ (par_entry_bb) == ws_entry_bb
327 && single_succ (ws_exit_bb) == par_exit_bb
328 && workshare_safe_to_combine_p (ws_entry_bb)
329 && (gimple_omp_parallel_combined_p (last_stmt (par_entry_bb))
330 || (last_and_only_stmt (ws_entry_bb)
331 && last_and_only_stmt (par_exit_bb))))
333 gimple *par_stmt = last_stmt (par_entry_bb);
334 gimple *ws_stmt = last_stmt (ws_entry_bb);
336 if (region->inner->type == GIMPLE_OMP_FOR)
338 /* If this is a combined parallel loop, we need to determine
339 whether or not to use the combined library calls. There
340 are two cases where we do not apply the transformation:
341 static loops and any kind of ordered loop. In the first
342 case, we already open code the loop so there is no need
343 to do anything else. In the latter case, the combined
344 parallel loop call would still need extra synchronization
345 to implement ordered semantics, so there would not be any
346 gain in using the combined call. */
347 tree clauses = gimple_omp_for_clauses (ws_stmt);
348 tree c = omp_find_clause (clauses, OMP_CLAUSE_SCHEDULE);
349 if (c == NULL
350 || ((OMP_CLAUSE_SCHEDULE_KIND (c) & OMP_CLAUSE_SCHEDULE_MASK)
351 == OMP_CLAUSE_SCHEDULE_STATIC)
352 || omp_find_clause (clauses, OMP_CLAUSE_ORDERED)
353 || omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_)
354 || ((c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_))
355 && POINTER_TYPE_P (TREE_TYPE (OMP_CLAUSE_DECL (c)))))
356 return;
358 else if (region->inner->type == GIMPLE_OMP_SECTIONS
359 && (omp_find_clause (gimple_omp_sections_clauses (ws_stmt),
360 OMP_CLAUSE__REDUCTEMP_)
361 || omp_find_clause (gimple_omp_sections_clauses (ws_stmt),
362 OMP_CLAUSE__CONDTEMP_)))
363 return;
365 region->is_combined_parallel = true;
366 region->inner->is_combined_parallel = true;
367 region->ws_args = get_ws_args_for (par_stmt, ws_stmt);
371 /* Debugging dumps for parallel regions. */
372 void dump_omp_region (FILE *, struct omp_region *, int);
373 void debug_omp_region (struct omp_region *);
374 void debug_all_omp_regions (void);
376 /* Dump the parallel region tree rooted at REGION. */
378 void
379 dump_omp_region (FILE *file, struct omp_region *region, int indent)
381 fprintf (file, "%*sbb %d: %s\n", indent, "", region->entry->index,
382 gimple_code_name[region->type]);
384 if (region->inner)
385 dump_omp_region (file, region->inner, indent + 4);
387 if (region->cont)
389 fprintf (file, "%*sbb %d: GIMPLE_OMP_CONTINUE\n", indent, "",
390 region->cont->index);
393 if (region->exit)
394 fprintf (file, "%*sbb %d: GIMPLE_OMP_RETURN\n", indent, "",
395 region->exit->index);
396 else
397 fprintf (file, "%*s[no exit marker]\n", indent, "");
399 if (region->next)
400 dump_omp_region (file, region->next, indent);
403 DEBUG_FUNCTION void
404 debug_omp_region (struct omp_region *region)
406 dump_omp_region (stderr, region, 0);
409 DEBUG_FUNCTION void
410 debug_all_omp_regions (void)
412 dump_omp_region (stderr, root_omp_region, 0);
415 /* Create a new parallel region starting at STMT inside region PARENT. */
417 static struct omp_region *
418 new_omp_region (basic_block bb, enum gimple_code type,
419 struct omp_region *parent)
421 struct omp_region *region = XCNEW (struct omp_region);
423 region->outer = parent;
424 region->entry = bb;
425 region->type = type;
427 if (parent)
429 /* This is a nested region. Add it to the list of inner
430 regions in PARENT. */
431 region->next = parent->inner;
432 parent->inner = region;
434 else
436 /* This is a toplevel region. Add it to the list of toplevel
437 regions in ROOT_OMP_REGION. */
438 region->next = root_omp_region;
439 root_omp_region = region;
442 return region;
445 /* Release the memory associated with the region tree rooted at REGION. */
447 static void
448 free_omp_region_1 (struct omp_region *region)
450 struct omp_region *i, *n;
452 for (i = region->inner; i ; i = n)
454 n = i->next;
455 free_omp_region_1 (i);
458 free (region);
461 /* Release the memory for the entire omp region tree. */
463 void
464 omp_free_regions (void)
466 struct omp_region *r, *n;
467 for (r = root_omp_region; r ; r = n)
469 n = r->next;
470 free_omp_region_1 (r);
472 root_omp_region = NULL;
475 /* A convenience function to build an empty GIMPLE_COND with just the
476 condition. */
478 static gcond *
479 gimple_build_cond_empty (tree cond)
481 enum tree_code pred_code;
482 tree lhs, rhs;
484 gimple_cond_get_ops_from_tree (cond, &pred_code, &lhs, &rhs);
485 return gimple_build_cond (pred_code, lhs, rhs, NULL_TREE, NULL_TREE);
488 /* Change DECL_CONTEXT of CHILD_FNDECL to that of the parent function.
489 Add CHILD_FNDECL to decl chain of the supercontext of the block
490 ENTRY_BLOCK - this is the block which originally contained the
491 code from which CHILD_FNDECL was created.
493 Together, these actions ensure that the debug info for the outlined
494 function will be emitted with the correct lexical scope. */
496 static void
497 adjust_context_and_scope (struct omp_region *region, tree entry_block,
498 tree child_fndecl)
500 tree parent_fndecl = NULL_TREE;
501 gimple *entry_stmt;
502 /* OMP expansion expands inner regions before outer ones, so if
503 we e.g. have explicit task region nested in parallel region, when
504 expanding the task region current_function_decl will be the original
505 source function, but we actually want to use as context the child
506 function of the parallel. */
507 for (region = region->outer;
508 region && parent_fndecl == NULL_TREE; region = region->outer)
509 switch (region->type)
511 case GIMPLE_OMP_PARALLEL:
512 case GIMPLE_OMP_TASK:
513 case GIMPLE_OMP_TEAMS:
514 entry_stmt = last_stmt (region->entry);
515 parent_fndecl = gimple_omp_taskreg_child_fn (entry_stmt);
516 break;
517 case GIMPLE_OMP_TARGET:
518 entry_stmt = last_stmt (region->entry);
519 parent_fndecl
520 = gimple_omp_target_child_fn (as_a <gomp_target *> (entry_stmt));
521 break;
522 default:
523 break;
526 if (parent_fndecl == NULL_TREE)
527 parent_fndecl = current_function_decl;
528 DECL_CONTEXT (child_fndecl) = parent_fndecl;
530 if (entry_block != NULL_TREE && TREE_CODE (entry_block) == BLOCK)
532 tree b = BLOCK_SUPERCONTEXT (entry_block);
533 if (TREE_CODE (b) == BLOCK)
535 DECL_CHAIN (child_fndecl) = BLOCK_VARS (b);
536 BLOCK_VARS (b) = child_fndecl;
541 /* Build the function calls to GOMP_parallel etc to actually
542 generate the parallel operation. REGION is the parallel region
543 being expanded. BB is the block where to insert the code. WS_ARGS
544 will be set if this is a call to a combined parallel+workshare
545 construct, it contains the list of additional arguments needed by
546 the workshare construct. */
548 static void
549 expand_parallel_call (struct omp_region *region, basic_block bb,
550 gomp_parallel *entry_stmt,
551 vec<tree, va_gc> *ws_args)
553 tree t, t1, t2, val, cond, c, clauses, flags;
554 gimple_stmt_iterator gsi;
555 gimple *stmt;
556 enum built_in_function start_ix;
557 int start_ix2;
558 location_t clause_loc;
559 vec<tree, va_gc> *args;
561 clauses = gimple_omp_parallel_clauses (entry_stmt);
563 /* Determine what flavor of GOMP_parallel we will be
564 emitting. */
565 start_ix = BUILT_IN_GOMP_PARALLEL;
566 tree rtmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
567 if (rtmp)
568 start_ix = BUILT_IN_GOMP_PARALLEL_REDUCTIONS;
569 else if (is_combined_parallel (region))
571 switch (region->inner->type)
573 case GIMPLE_OMP_FOR:
574 gcc_assert (region->inner->sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
575 switch (region->inner->sched_kind)
577 case OMP_CLAUSE_SCHEDULE_RUNTIME:
578 /* For lastprivate(conditional:), our implementation
579 requires monotonic behavior. */
580 if (region->inner->has_lastprivate_conditional != 0)
581 start_ix2 = 3;
582 else if ((region->inner->sched_modifiers
583 & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0)
584 start_ix2 = 6;
585 else if ((region->inner->sched_modifiers
586 & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0)
587 start_ix2 = 7;
588 else
589 start_ix2 = 3;
590 break;
591 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
592 case OMP_CLAUSE_SCHEDULE_GUIDED:
593 if ((region->inner->sched_modifiers
594 & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
595 && !region->inner->has_lastprivate_conditional)
597 start_ix2 = 3 + region->inner->sched_kind;
598 break;
600 /* FALLTHRU */
601 default:
602 start_ix2 = region->inner->sched_kind;
603 break;
605 start_ix2 += (int) BUILT_IN_GOMP_PARALLEL_LOOP_STATIC;
606 start_ix = (enum built_in_function) start_ix2;
607 break;
608 case GIMPLE_OMP_SECTIONS:
609 start_ix = BUILT_IN_GOMP_PARALLEL_SECTIONS;
610 break;
611 default:
612 gcc_unreachable ();
616 /* By default, the value of NUM_THREADS is zero (selected at run time)
617 and there is no conditional. */
618 cond = NULL_TREE;
619 val = build_int_cst (unsigned_type_node, 0);
620 flags = build_int_cst (unsigned_type_node, 0);
622 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
623 if (c)
624 cond = OMP_CLAUSE_IF_EXPR (c);
626 c = omp_find_clause (clauses, OMP_CLAUSE_NUM_THREADS);
627 if (c)
629 val = OMP_CLAUSE_NUM_THREADS_EXPR (c);
630 clause_loc = OMP_CLAUSE_LOCATION (c);
632 else
633 clause_loc = gimple_location (entry_stmt);
635 c = omp_find_clause (clauses, OMP_CLAUSE_PROC_BIND);
636 if (c)
637 flags = build_int_cst (unsigned_type_node, OMP_CLAUSE_PROC_BIND_KIND (c));
639 /* Ensure 'val' is of the correct type. */
640 val = fold_convert_loc (clause_loc, unsigned_type_node, val);
642 /* If we found the clause 'if (cond)', build either
643 (cond != 0) or (cond ? val : 1u). */
644 if (cond)
646 cond = gimple_boolify (cond);
648 if (integer_zerop (val))
649 val = fold_build2_loc (clause_loc,
650 EQ_EXPR, unsigned_type_node, cond,
651 build_int_cst (TREE_TYPE (cond), 0));
652 else
654 basic_block cond_bb, then_bb, else_bb;
655 edge e, e_then, e_else;
656 tree tmp_then, tmp_else, tmp_join, tmp_var;
658 tmp_var = create_tmp_var (TREE_TYPE (val));
659 if (gimple_in_ssa_p (cfun))
661 tmp_then = make_ssa_name (tmp_var);
662 tmp_else = make_ssa_name (tmp_var);
663 tmp_join = make_ssa_name (tmp_var);
665 else
667 tmp_then = tmp_var;
668 tmp_else = tmp_var;
669 tmp_join = tmp_var;
672 e = split_block_after_labels (bb);
673 cond_bb = e->src;
674 bb = e->dest;
675 remove_edge (e);
677 then_bb = create_empty_bb (cond_bb);
678 else_bb = create_empty_bb (then_bb);
679 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
680 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
682 stmt = gimple_build_cond_empty (cond);
683 gsi = gsi_start_bb (cond_bb);
684 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
686 gsi = gsi_start_bb (then_bb);
687 expand_omp_build_assign (&gsi, tmp_then, val, true);
689 gsi = gsi_start_bb (else_bb);
690 expand_omp_build_assign (&gsi, tmp_else,
691 build_int_cst (unsigned_type_node, 1),
692 true);
694 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
695 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
696 add_bb_to_loop (then_bb, cond_bb->loop_father);
697 add_bb_to_loop (else_bb, cond_bb->loop_father);
698 e_then = make_edge (then_bb, bb, EDGE_FALLTHRU);
699 e_else = make_edge (else_bb, bb, EDGE_FALLTHRU);
701 if (gimple_in_ssa_p (cfun))
703 gphi *phi = create_phi_node (tmp_join, bb);
704 add_phi_arg (phi, tmp_then, e_then, UNKNOWN_LOCATION);
705 add_phi_arg (phi, tmp_else, e_else, UNKNOWN_LOCATION);
708 val = tmp_join;
711 gsi = gsi_start_bb (bb);
712 val = force_gimple_operand_gsi (&gsi, val, true, NULL_TREE,
713 false, GSI_CONTINUE_LINKING);
716 gsi = gsi_last_nondebug_bb (bb);
717 t = gimple_omp_parallel_data_arg (entry_stmt);
718 if (t == NULL)
719 t1 = null_pointer_node;
720 else
721 t1 = build_fold_addr_expr (t);
722 tree child_fndecl = gimple_omp_parallel_child_fn (entry_stmt);
723 t2 = build_fold_addr_expr (child_fndecl);
725 vec_alloc (args, 4 + vec_safe_length (ws_args));
726 args->quick_push (t2);
727 args->quick_push (t1);
728 args->quick_push (val);
729 if (ws_args)
730 args->splice (*ws_args);
731 args->quick_push (flags);
733 t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
734 builtin_decl_explicit (start_ix), args);
736 if (rtmp)
738 tree type = TREE_TYPE (OMP_CLAUSE_DECL (rtmp));
739 t = build2 (MODIFY_EXPR, type, OMP_CLAUSE_DECL (rtmp),
740 fold_convert (type,
741 fold_convert (pointer_sized_int_node, t)));
743 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
744 false, GSI_CONTINUE_LINKING);
747 /* Build the function call to GOMP_task to actually
748 generate the task operation. BB is the block where to insert the code. */
750 static void
751 expand_task_call (struct omp_region *region, basic_block bb,
752 gomp_task *entry_stmt)
754 tree t1, t2, t3;
755 gimple_stmt_iterator gsi;
756 location_t loc = gimple_location (entry_stmt);
758 tree clauses = gimple_omp_task_clauses (entry_stmt);
760 tree ifc = omp_find_clause (clauses, OMP_CLAUSE_IF);
761 tree untied = omp_find_clause (clauses, OMP_CLAUSE_UNTIED);
762 tree mergeable = omp_find_clause (clauses, OMP_CLAUSE_MERGEABLE);
763 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
764 tree finalc = omp_find_clause (clauses, OMP_CLAUSE_FINAL);
765 tree priority = omp_find_clause (clauses, OMP_CLAUSE_PRIORITY);
766 tree detach = omp_find_clause (clauses, OMP_CLAUSE_DETACH);
768 unsigned int iflags
769 = (untied ? GOMP_TASK_FLAG_UNTIED : 0)
770 | (mergeable ? GOMP_TASK_FLAG_MERGEABLE : 0)
771 | (depend ? GOMP_TASK_FLAG_DEPEND : 0);
773 bool taskloop_p = gimple_omp_task_taskloop_p (entry_stmt);
774 tree startvar = NULL_TREE, endvar = NULL_TREE, step = NULL_TREE;
775 tree num_tasks = NULL_TREE;
776 bool ull = false;
777 if (taskloop_p)
779 gimple *g = last_stmt (region->outer->entry);
780 gcc_assert (gimple_code (g) == GIMPLE_OMP_FOR
781 && gimple_omp_for_kind (g) == GF_OMP_FOR_KIND_TASKLOOP);
782 struct omp_for_data fd;
783 omp_extract_for_data (as_a <gomp_for *> (g), &fd, NULL);
784 startvar = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
785 endvar = omp_find_clause (OMP_CLAUSE_CHAIN (startvar),
786 OMP_CLAUSE__LOOPTEMP_);
787 startvar = OMP_CLAUSE_DECL (startvar);
788 endvar = OMP_CLAUSE_DECL (endvar);
789 step = fold_convert_loc (loc, fd.iter_type, fd.loop.step);
790 if (fd.loop.cond_code == LT_EXPR)
791 iflags |= GOMP_TASK_FLAG_UP;
792 tree tclauses = gimple_omp_for_clauses (g);
793 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_NUM_TASKS);
794 if (num_tasks)
796 if (OMP_CLAUSE_NUM_TASKS_STRICT (num_tasks))
797 iflags |= GOMP_TASK_FLAG_STRICT;
798 num_tasks = OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks);
800 else
802 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_GRAINSIZE);
803 if (num_tasks)
805 iflags |= GOMP_TASK_FLAG_GRAINSIZE;
806 if (OMP_CLAUSE_GRAINSIZE_STRICT (num_tasks))
807 iflags |= GOMP_TASK_FLAG_STRICT;
808 num_tasks = OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks);
810 else
811 num_tasks = integer_zero_node;
813 num_tasks = fold_convert_loc (loc, long_integer_type_node, num_tasks);
814 if (ifc == NULL_TREE)
815 iflags |= GOMP_TASK_FLAG_IF;
816 if (omp_find_clause (tclauses, OMP_CLAUSE_NOGROUP))
817 iflags |= GOMP_TASK_FLAG_NOGROUP;
818 ull = fd.iter_type == long_long_unsigned_type_node;
819 if (omp_find_clause (clauses, OMP_CLAUSE_REDUCTION))
820 iflags |= GOMP_TASK_FLAG_REDUCTION;
822 else
824 if (priority)
825 iflags |= GOMP_TASK_FLAG_PRIORITY;
826 if (detach)
827 iflags |= GOMP_TASK_FLAG_DETACH;
830 tree flags = build_int_cst (unsigned_type_node, iflags);
832 tree cond = boolean_true_node;
833 if (ifc)
835 if (taskloop_p)
837 tree t = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
838 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
839 build_int_cst (unsigned_type_node,
840 GOMP_TASK_FLAG_IF),
841 build_int_cst (unsigned_type_node, 0));
842 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node,
843 flags, t);
845 else
846 cond = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
849 if (finalc)
851 tree t = gimple_boolify (OMP_CLAUSE_FINAL_EXPR (finalc));
852 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
853 build_int_cst (unsigned_type_node,
854 GOMP_TASK_FLAG_FINAL),
855 build_int_cst (unsigned_type_node, 0));
856 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, flags, t);
858 if (depend)
859 depend = OMP_CLAUSE_DECL (depend);
860 else
861 depend = build_int_cst (ptr_type_node, 0);
862 if (priority)
863 priority = fold_convert (integer_type_node,
864 OMP_CLAUSE_PRIORITY_EXPR (priority));
865 else
866 priority = integer_zero_node;
868 gsi = gsi_last_nondebug_bb (bb);
870 detach = (detach
871 ? build_fold_addr_expr (OMP_CLAUSE_DECL (detach))
872 : null_pointer_node);
874 tree t = gimple_omp_task_data_arg (entry_stmt);
875 if (t == NULL)
876 t2 = null_pointer_node;
877 else
878 t2 = build_fold_addr_expr_loc (loc, t);
879 t1 = build_fold_addr_expr_loc (loc, gimple_omp_task_child_fn (entry_stmt));
880 t = gimple_omp_task_copy_fn (entry_stmt);
881 if (t == NULL)
882 t3 = null_pointer_node;
883 else
884 t3 = build_fold_addr_expr_loc (loc, t);
886 if (taskloop_p)
887 t = build_call_expr (ull
888 ? builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP_ULL)
889 : builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP),
890 11, t1, t2, t3,
891 gimple_omp_task_arg_size (entry_stmt),
892 gimple_omp_task_arg_align (entry_stmt), flags,
893 num_tasks, priority, startvar, endvar, step);
894 else
895 t = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASK),
896 10, t1, t2, t3,
897 gimple_omp_task_arg_size (entry_stmt),
898 gimple_omp_task_arg_align (entry_stmt), cond, flags,
899 depend, priority, detach);
901 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
902 false, GSI_CONTINUE_LINKING);
905 /* Build the function call to GOMP_taskwait_depend to actually
906 generate the taskwait operation. BB is the block where to insert the
907 code. */
909 static void
910 expand_taskwait_call (basic_block bb, gomp_task *entry_stmt)
912 tree clauses = gimple_omp_task_clauses (entry_stmt);
913 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
914 if (depend == NULL_TREE)
915 return;
917 depend = OMP_CLAUSE_DECL (depend);
919 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
920 tree t
921 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASKWAIT_DEPEND),
922 1, depend);
924 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
925 false, GSI_CONTINUE_LINKING);
928 /* Build the function call to GOMP_teams_reg to actually
929 generate the host teams operation. REGION is the teams region
930 being expanded. BB is the block where to insert the code. */
932 static void
933 expand_teams_call (basic_block bb, gomp_teams *entry_stmt)
935 tree clauses = gimple_omp_teams_clauses (entry_stmt);
936 tree num_teams = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
937 if (num_teams == NULL_TREE)
938 num_teams = build_int_cst (unsigned_type_node, 0);
939 else
941 num_teams = OMP_CLAUSE_NUM_TEAMS_EXPR (num_teams);
942 num_teams = fold_convert (unsigned_type_node, num_teams);
944 tree thread_limit = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
945 if (thread_limit == NULL_TREE)
946 thread_limit = build_int_cst (unsigned_type_node, 0);
947 else
949 thread_limit = OMP_CLAUSE_THREAD_LIMIT_EXPR (thread_limit);
950 thread_limit = fold_convert (unsigned_type_node, thread_limit);
953 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
954 tree t = gimple_omp_teams_data_arg (entry_stmt), t1;
955 if (t == NULL)
956 t1 = null_pointer_node;
957 else
958 t1 = build_fold_addr_expr (t);
959 tree child_fndecl = gimple_omp_teams_child_fn (entry_stmt);
960 tree t2 = build_fold_addr_expr (child_fndecl);
962 vec<tree, va_gc> *args;
963 vec_alloc (args, 5);
964 args->quick_push (t2);
965 args->quick_push (t1);
966 args->quick_push (num_teams);
967 args->quick_push (thread_limit);
968 /* For future extensibility. */
969 args->quick_push (build_zero_cst (unsigned_type_node));
971 t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
972 builtin_decl_explicit (BUILT_IN_GOMP_TEAMS_REG),
973 args);
975 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
976 false, GSI_CONTINUE_LINKING);
979 /* Chain all the DECLs in LIST by their TREE_CHAIN fields. */
981 static tree
982 vec2chain (vec<tree, va_gc> *v)
984 tree chain = NULL_TREE, t;
985 unsigned ix;
987 FOR_EACH_VEC_SAFE_ELT_REVERSE (v, ix, t)
989 DECL_CHAIN (t) = chain;
990 chain = t;
993 return chain;
996 /* Remove barriers in REGION->EXIT's block. Note that this is only
997 valid for GIMPLE_OMP_PARALLEL regions. Since the end of a parallel region
998 is an implicit barrier, any workshare inside the GIMPLE_OMP_PARALLEL that
999 left a barrier at the end of the GIMPLE_OMP_PARALLEL region can now be
1000 removed. */
1002 static void
1003 remove_exit_barrier (struct omp_region *region)
1005 gimple_stmt_iterator gsi;
1006 basic_block exit_bb;
1007 edge_iterator ei;
1008 edge e;
1009 gimple *stmt;
1010 int any_addressable_vars = -1;
1012 exit_bb = region->exit;
1014 /* If the parallel region doesn't return, we don't have REGION->EXIT
1015 block at all. */
1016 if (! exit_bb)
1017 return;
1019 /* The last insn in the block will be the parallel's GIMPLE_OMP_RETURN. The
1020 workshare's GIMPLE_OMP_RETURN will be in a preceding block. The kinds of
1021 statements that can appear in between are extremely limited -- no
1022 memory operations at all. Here, we allow nothing at all, so the
1023 only thing we allow to precede this GIMPLE_OMP_RETURN is a label. */
1024 gsi = gsi_last_nondebug_bb (exit_bb);
1025 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1026 gsi_prev_nondebug (&gsi);
1027 if (!gsi_end_p (gsi) && gimple_code (gsi_stmt (gsi)) != GIMPLE_LABEL)
1028 return;
1030 FOR_EACH_EDGE (e, ei, exit_bb->preds)
1032 gsi = gsi_last_nondebug_bb (e->src);
1033 if (gsi_end_p (gsi))
1034 continue;
1035 stmt = gsi_stmt (gsi);
1036 if (gimple_code (stmt) == GIMPLE_OMP_RETURN
1037 && !gimple_omp_return_nowait_p (stmt))
1039 /* OpenMP 3.0 tasks unfortunately prevent this optimization
1040 in many cases. If there could be tasks queued, the barrier
1041 might be needed to let the tasks run before some local
1042 variable of the parallel that the task uses as shared
1043 runs out of scope. The task can be spawned either
1044 from within current function (this would be easy to check)
1045 or from some function it calls and gets passed an address
1046 of such a variable. */
1047 if (any_addressable_vars < 0)
1049 gomp_parallel *parallel_stmt
1050 = as_a <gomp_parallel *> (last_stmt (region->entry));
1051 tree child_fun = gimple_omp_parallel_child_fn (parallel_stmt);
1052 tree local_decls, block, decl;
1053 unsigned ix;
1055 any_addressable_vars = 0;
1056 FOR_EACH_LOCAL_DECL (DECL_STRUCT_FUNCTION (child_fun), ix, decl)
1057 if (TREE_ADDRESSABLE (decl))
1059 any_addressable_vars = 1;
1060 break;
1062 for (block = gimple_block (stmt);
1063 !any_addressable_vars
1064 && block
1065 && TREE_CODE (block) == BLOCK;
1066 block = BLOCK_SUPERCONTEXT (block))
1068 for (local_decls = BLOCK_VARS (block);
1069 local_decls;
1070 local_decls = DECL_CHAIN (local_decls))
1071 if (TREE_ADDRESSABLE (local_decls))
1073 any_addressable_vars = 1;
1074 break;
1076 if (block == gimple_block (parallel_stmt))
1077 break;
1080 if (!any_addressable_vars)
1081 gimple_omp_return_set_nowait (stmt);
1086 static void
1087 remove_exit_barriers (struct omp_region *region)
1089 if (region->type == GIMPLE_OMP_PARALLEL)
1090 remove_exit_barrier (region);
1092 if (region->inner)
1094 region = region->inner;
1095 remove_exit_barriers (region);
1096 while (region->next)
1098 region = region->next;
1099 remove_exit_barriers (region);
1104 /* Optimize omp_get_thread_num () and omp_get_num_threads ()
1105 calls. These can't be declared as const functions, but
1106 within one parallel body they are constant, so they can be
1107 transformed there into __builtin_omp_get_{thread_num,num_threads} ()
1108 which are declared const. Similarly for task body, except
1109 that in untied task omp_get_thread_num () can change at any task
1110 scheduling point. */
1112 static void
1113 optimize_omp_library_calls (gimple *entry_stmt)
1115 basic_block bb;
1116 gimple_stmt_iterator gsi;
1117 tree thr_num_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1118 tree thr_num_id = DECL_ASSEMBLER_NAME (thr_num_tree);
1119 tree num_thr_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1120 tree num_thr_id = DECL_ASSEMBLER_NAME (num_thr_tree);
1121 bool untied_task = (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1122 && omp_find_clause (gimple_omp_task_clauses (entry_stmt),
1123 OMP_CLAUSE_UNTIED) != NULL);
1125 FOR_EACH_BB_FN (bb, cfun)
1126 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1128 gimple *call = gsi_stmt (gsi);
1129 tree decl;
1131 if (is_gimple_call (call)
1132 && (decl = gimple_call_fndecl (call))
1133 && DECL_EXTERNAL (decl)
1134 && TREE_PUBLIC (decl)
1135 && DECL_INITIAL (decl) == NULL)
1137 tree built_in;
1139 if (DECL_NAME (decl) == thr_num_id)
1141 /* In #pragma omp task untied omp_get_thread_num () can change
1142 during the execution of the task region. */
1143 if (untied_task)
1144 continue;
1145 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1147 else if (DECL_NAME (decl) == num_thr_id)
1148 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1149 else
1150 continue;
1152 if (DECL_ASSEMBLER_NAME (decl) != DECL_ASSEMBLER_NAME (built_in)
1153 || gimple_call_num_args (call) != 0)
1154 continue;
1156 if (flag_exceptions && !TREE_NOTHROW (decl))
1157 continue;
1159 if (TREE_CODE (TREE_TYPE (decl)) != FUNCTION_TYPE
1160 || !types_compatible_p (TREE_TYPE (TREE_TYPE (decl)),
1161 TREE_TYPE (TREE_TYPE (built_in))))
1162 continue;
1164 gimple_call_set_fndecl (call, built_in);
1169 /* Callback for expand_omp_build_assign. Return non-NULL if *tp needs to be
1170 regimplified. */
1172 static tree
1173 expand_omp_regimplify_p (tree *tp, int *walk_subtrees, void *)
1175 tree t = *tp;
1177 /* Any variable with DECL_VALUE_EXPR needs to be regimplified. */
1178 if (VAR_P (t) && DECL_HAS_VALUE_EXPR_P (t))
1179 return t;
1181 if (TREE_CODE (t) == ADDR_EXPR)
1182 recompute_tree_invariant_for_addr_expr (t);
1184 *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
1185 return NULL_TREE;
1188 /* Prepend or append TO = FROM assignment before or after *GSI_P. */
1190 static void
1191 expand_omp_build_assign (gimple_stmt_iterator *gsi_p, tree to, tree from,
1192 bool after)
1194 bool simple_p = DECL_P (to) && TREE_ADDRESSABLE (to);
1195 from = force_gimple_operand_gsi (gsi_p, from, simple_p, NULL_TREE,
1196 !after, after ? GSI_CONTINUE_LINKING
1197 : GSI_SAME_STMT);
1198 gimple *stmt = gimple_build_assign (to, from);
1199 if (after)
1200 gsi_insert_after (gsi_p, stmt, GSI_CONTINUE_LINKING);
1201 else
1202 gsi_insert_before (gsi_p, stmt, GSI_SAME_STMT);
1203 if (walk_tree (&from, expand_omp_regimplify_p, NULL, NULL)
1204 || walk_tree (&to, expand_omp_regimplify_p, NULL, NULL))
1206 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1207 gimple_regimplify_operands (stmt, &gsi);
1211 /* Expand the OpenMP parallel or task directive starting at REGION. */
1213 static void
1214 expand_omp_taskreg (struct omp_region *region)
1216 basic_block entry_bb, exit_bb, new_bb;
1217 struct function *child_cfun;
1218 tree child_fn, block, t;
1219 gimple_stmt_iterator gsi;
1220 gimple *entry_stmt, *stmt;
1221 edge e;
1222 vec<tree, va_gc> *ws_args;
1224 entry_stmt = last_stmt (region->entry);
1225 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1226 && gimple_omp_task_taskwait_p (entry_stmt))
1228 new_bb = region->entry;
1229 gsi = gsi_last_nondebug_bb (region->entry);
1230 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK);
1231 gsi_remove (&gsi, true);
1232 expand_taskwait_call (new_bb, as_a <gomp_task *> (entry_stmt));
1233 return;
1236 child_fn = gimple_omp_taskreg_child_fn (entry_stmt);
1237 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
1239 entry_bb = region->entry;
1240 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK)
1241 exit_bb = region->cont;
1242 else
1243 exit_bb = region->exit;
1245 if (is_combined_parallel (region))
1246 ws_args = region->ws_args;
1247 else
1248 ws_args = NULL;
1250 if (child_cfun->cfg)
1252 /* Due to inlining, it may happen that we have already outlined
1253 the region, in which case all we need to do is make the
1254 sub-graph unreachable and emit the parallel call. */
1255 edge entry_succ_e, exit_succ_e;
1257 entry_succ_e = single_succ_edge (entry_bb);
1259 gsi = gsi_last_nondebug_bb (entry_bb);
1260 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_PARALLEL
1261 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK
1262 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TEAMS);
1263 gsi_remove (&gsi, true);
1265 new_bb = entry_bb;
1266 if (exit_bb)
1268 exit_succ_e = single_succ_edge (exit_bb);
1269 make_edge (new_bb, exit_succ_e->dest, EDGE_FALLTHRU);
1271 remove_edge_and_dominated_blocks (entry_succ_e);
1273 else
1275 unsigned srcidx, dstidx, num;
1277 /* If the parallel region needs data sent from the parent
1278 function, then the very first statement (except possible
1279 tree profile counter updates) of the parallel body
1280 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
1281 &.OMP_DATA_O is passed as an argument to the child function,
1282 we need to replace it with the argument as seen by the child
1283 function.
1285 In most cases, this will end up being the identity assignment
1286 .OMP_DATA_I = .OMP_DATA_I. However, if the parallel body had
1287 a function call that has been inlined, the original PARM_DECL
1288 .OMP_DATA_I may have been converted into a different local
1289 variable. In which case, we need to keep the assignment. */
1290 if (gimple_omp_taskreg_data_arg (entry_stmt))
1292 basic_block entry_succ_bb
1293 = single_succ_p (entry_bb) ? single_succ (entry_bb)
1294 : FALLTHRU_EDGE (entry_bb)->dest;
1295 tree arg;
1296 gimple *parcopy_stmt = NULL;
1298 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
1300 gimple *stmt;
1302 gcc_assert (!gsi_end_p (gsi));
1303 stmt = gsi_stmt (gsi);
1304 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1305 continue;
1307 if (gimple_num_ops (stmt) == 2)
1309 tree arg = gimple_assign_rhs1 (stmt);
1311 /* We're ignore the subcode because we're
1312 effectively doing a STRIP_NOPS. */
1314 if (TREE_CODE (arg) == ADDR_EXPR
1315 && (TREE_OPERAND (arg, 0)
1316 == gimple_omp_taskreg_data_arg (entry_stmt)))
1318 parcopy_stmt = stmt;
1319 break;
1324 gcc_assert (parcopy_stmt != NULL);
1325 arg = DECL_ARGUMENTS (child_fn);
1327 if (!gimple_in_ssa_p (cfun))
1329 if (gimple_assign_lhs (parcopy_stmt) == arg)
1330 gsi_remove (&gsi, true);
1331 else
1333 /* ?? Is setting the subcode really necessary ?? */
1334 gimple_omp_set_subcode (parcopy_stmt, TREE_CODE (arg));
1335 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1338 else
1340 tree lhs = gimple_assign_lhs (parcopy_stmt);
1341 gcc_assert (SSA_NAME_VAR (lhs) == arg);
1342 /* We'd like to set the rhs to the default def in the child_fn,
1343 but it's too early to create ssa names in the child_fn.
1344 Instead, we set the rhs to the parm. In
1345 move_sese_region_to_fn, we introduce a default def for the
1346 parm, map the parm to it's default def, and once we encounter
1347 this stmt, replace the parm with the default def. */
1348 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1349 update_stmt (parcopy_stmt);
1353 /* Declare local variables needed in CHILD_CFUN. */
1354 block = DECL_INITIAL (child_fn);
1355 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
1356 /* The gimplifier could record temporaries in parallel/task block
1357 rather than in containing function's local_decls chain,
1358 which would mean cgraph missed finalizing them. Do it now. */
1359 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
1360 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
1361 varpool_node::finalize_decl (t);
1362 DECL_SAVED_TREE (child_fn) = NULL;
1363 /* We'll create a CFG for child_fn, so no gimple body is needed. */
1364 gimple_set_body (child_fn, NULL);
1365 TREE_USED (block) = 1;
1367 /* Reset DECL_CONTEXT on function arguments. */
1368 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
1369 DECL_CONTEXT (t) = child_fn;
1371 /* Split ENTRY_BB at GIMPLE_OMP_PARALLEL or GIMPLE_OMP_TASK,
1372 so that it can be moved to the child function. */
1373 gsi = gsi_last_nondebug_bb (entry_bb);
1374 stmt = gsi_stmt (gsi);
1375 gcc_assert (stmt && (gimple_code (stmt) == GIMPLE_OMP_PARALLEL
1376 || gimple_code (stmt) == GIMPLE_OMP_TASK
1377 || gimple_code (stmt) == GIMPLE_OMP_TEAMS));
1378 e = split_block (entry_bb, stmt);
1379 gsi_remove (&gsi, true);
1380 entry_bb = e->dest;
1381 edge e2 = NULL;
1382 if (gimple_code (entry_stmt) != GIMPLE_OMP_TASK)
1383 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
1384 else
1386 e2 = make_edge (e->src, BRANCH_EDGE (entry_bb)->dest, EDGE_ABNORMAL);
1387 gcc_assert (e2->dest == region->exit);
1388 remove_edge (BRANCH_EDGE (entry_bb));
1389 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e->src);
1390 gsi = gsi_last_nondebug_bb (region->exit);
1391 gcc_assert (!gsi_end_p (gsi)
1392 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1393 gsi_remove (&gsi, true);
1396 /* Convert GIMPLE_OMP_{RETURN,CONTINUE} into a RETURN_EXPR. */
1397 if (exit_bb)
1399 gsi = gsi_last_nondebug_bb (exit_bb);
1400 gcc_assert (!gsi_end_p (gsi)
1401 && (gimple_code (gsi_stmt (gsi))
1402 == (e2 ? GIMPLE_OMP_CONTINUE : GIMPLE_OMP_RETURN)));
1403 stmt = gimple_build_return (NULL);
1404 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
1405 gsi_remove (&gsi, true);
1408 /* Move the parallel region into CHILD_CFUN. */
1410 if (gimple_in_ssa_p (cfun))
1412 init_tree_ssa (child_cfun);
1413 init_ssa_operands (child_cfun);
1414 child_cfun->gimple_df->in_ssa_p = true;
1415 block = NULL_TREE;
1417 else
1418 block = gimple_block (entry_stmt);
1420 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
1421 if (exit_bb)
1422 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
1423 if (e2)
1425 basic_block dest_bb = e2->dest;
1426 if (!exit_bb)
1427 make_edge (new_bb, dest_bb, EDGE_FALLTHRU);
1428 remove_edge (e2);
1429 set_immediate_dominator (CDI_DOMINATORS, dest_bb, new_bb);
1431 /* When the OMP expansion process cannot guarantee an up-to-date
1432 loop tree arrange for the child function to fixup loops. */
1433 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1434 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
1436 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
1437 num = vec_safe_length (child_cfun->local_decls);
1438 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
1440 t = (*child_cfun->local_decls)[srcidx];
1441 if (DECL_CONTEXT (t) == cfun->decl)
1442 continue;
1443 if (srcidx != dstidx)
1444 (*child_cfun->local_decls)[dstidx] = t;
1445 dstidx++;
1447 if (dstidx != num)
1448 vec_safe_truncate (child_cfun->local_decls, dstidx);
1450 /* Inform the callgraph about the new function. */
1451 child_cfun->curr_properties = cfun->curr_properties;
1452 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
1453 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
1454 cgraph_node *node = cgraph_node::get_create (child_fn);
1455 node->parallelized_function = 1;
1456 cgraph_node::add_new_function (child_fn, true);
1458 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
1459 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
1461 /* Fix the callgraph edges for child_cfun. Those for cfun will be
1462 fixed in a following pass. */
1463 push_cfun (child_cfun);
1464 if (need_asm)
1465 assign_assembler_name_if_needed (child_fn);
1467 if (optimize)
1468 optimize_omp_library_calls (entry_stmt);
1469 update_max_bb_count ();
1470 cgraph_edge::rebuild_edges ();
1472 /* Some EH regions might become dead, see PR34608. If
1473 pass_cleanup_cfg isn't the first pass to happen with the
1474 new child, these dead EH edges might cause problems.
1475 Clean them up now. */
1476 if (flag_exceptions)
1478 basic_block bb;
1479 bool changed = false;
1481 FOR_EACH_BB_FN (bb, cfun)
1482 changed |= gimple_purge_dead_eh_edges (bb);
1483 if (changed)
1484 cleanup_tree_cfg ();
1486 if (gimple_in_ssa_p (cfun))
1487 update_ssa (TODO_update_ssa);
1488 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1489 verify_loop_structure ();
1490 pop_cfun ();
1492 if (dump_file && !gimple_in_ssa_p (cfun))
1494 omp_any_child_fn_dumped = true;
1495 dump_function_header (dump_file, child_fn, dump_flags);
1496 dump_function_to_file (child_fn, dump_file, dump_flags);
1500 adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn);
1502 if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
1503 expand_parallel_call (region, new_bb,
1504 as_a <gomp_parallel *> (entry_stmt), ws_args);
1505 else if (gimple_code (entry_stmt) == GIMPLE_OMP_TEAMS)
1506 expand_teams_call (new_bb, as_a <gomp_teams *> (entry_stmt));
1507 else
1508 expand_task_call (region, new_bb, as_a <gomp_task *> (entry_stmt));
1509 if (gimple_in_ssa_p (cfun))
1510 update_ssa (TODO_update_ssa_only_virtuals);
1513 /* Information about members of an OpenACC collapsed loop nest. */
1515 struct oacc_collapse
1517 tree base; /* Base value. */
1518 tree iters; /* Number of steps. */
1519 tree step; /* Step size. */
1520 tree tile; /* Tile increment (if tiled). */
1521 tree outer; /* Tile iterator var. */
1524 /* Helper for expand_oacc_for. Determine collapsed loop information.
1525 Fill in COUNTS array. Emit any initialization code before GSI.
1526 Return the calculated outer loop bound of BOUND_TYPE. */
1528 static tree
1529 expand_oacc_collapse_init (const struct omp_for_data *fd,
1530 gimple_stmt_iterator *gsi,
1531 oacc_collapse *counts, tree diff_type,
1532 tree bound_type, location_t loc)
1534 tree tiling = fd->tiling;
1535 tree total = build_int_cst (bound_type, 1);
1536 int ix;
1538 gcc_assert (integer_onep (fd->loop.step));
1539 gcc_assert (integer_zerop (fd->loop.n1));
1541 /* When tiling, the first operand of the tile clause applies to the
1542 innermost loop, and we work outwards from there. Seems
1543 backwards, but whatever. */
1544 for (ix = fd->collapse; ix--;)
1546 const omp_for_data_loop *loop = &fd->loops[ix];
1548 tree iter_type = TREE_TYPE (loop->v);
1549 tree plus_type = iter_type;
1551 gcc_assert (loop->cond_code == LT_EXPR || loop->cond_code == GT_EXPR);
1553 if (POINTER_TYPE_P (iter_type))
1554 plus_type = sizetype;
1556 if (tiling)
1558 tree num = build_int_cst (integer_type_node, fd->collapse);
1559 tree loop_no = build_int_cst (integer_type_node, ix);
1560 tree tile = TREE_VALUE (tiling);
1561 gcall *call
1562 = gimple_build_call_internal (IFN_GOACC_TILE, 5, num, loop_no, tile,
1563 /* gwv-outer=*/integer_zero_node,
1564 /* gwv-inner=*/integer_zero_node);
1566 counts[ix].outer = create_tmp_var (iter_type, ".outer");
1567 counts[ix].tile = create_tmp_var (diff_type, ".tile");
1568 gimple_call_set_lhs (call, counts[ix].tile);
1569 gimple_set_location (call, loc);
1570 gsi_insert_before (gsi, call, GSI_SAME_STMT);
1572 tiling = TREE_CHAIN (tiling);
1574 else
1576 counts[ix].tile = NULL;
1577 counts[ix].outer = loop->v;
1580 tree b = loop->n1;
1581 tree e = loop->n2;
1582 tree s = loop->step;
1583 bool up = loop->cond_code == LT_EXPR;
1584 tree dir = build_int_cst (diff_type, up ? +1 : -1);
1585 bool negating;
1586 tree expr;
1588 b = force_gimple_operand_gsi (gsi, b, true, NULL_TREE,
1589 true, GSI_SAME_STMT);
1590 e = force_gimple_operand_gsi (gsi, e, true, NULL_TREE,
1591 true, GSI_SAME_STMT);
1593 /* Convert the step, avoiding possible unsigned->signed overflow. */
1594 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
1595 if (negating)
1596 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
1597 s = fold_convert (diff_type, s);
1598 if (negating)
1599 s = fold_build1 (NEGATE_EXPR, diff_type, s);
1600 s = force_gimple_operand_gsi (gsi, s, true, NULL_TREE,
1601 true, GSI_SAME_STMT);
1603 /* Determine the range, avoiding possible unsigned->signed overflow. */
1604 negating = !up && TYPE_UNSIGNED (iter_type);
1605 expr = fold_build2 (MINUS_EXPR, plus_type,
1606 fold_convert (plus_type, negating ? b : e),
1607 fold_convert (plus_type, negating ? e : b));
1608 expr = fold_convert (diff_type, expr);
1609 if (negating)
1610 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
1611 tree range = force_gimple_operand_gsi
1612 (gsi, expr, true, NULL_TREE, true, GSI_SAME_STMT);
1614 /* Determine number of iterations. */
1615 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
1616 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
1617 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
1619 tree iters = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE,
1620 true, GSI_SAME_STMT);
1622 counts[ix].base = b;
1623 counts[ix].iters = iters;
1624 counts[ix].step = s;
1626 total = fold_build2 (MULT_EXPR, bound_type, total,
1627 fold_convert (bound_type, iters));
1630 return total;
1633 /* Emit initializers for collapsed loop members. INNER is true if
1634 this is for the element loop of a TILE. IVAR is the outer
1635 loop iteration variable, from which collapsed loop iteration values
1636 are calculated. COUNTS array has been initialized by
1637 expand_oacc_collapse_inits. */
1639 static void
1640 expand_oacc_collapse_vars (const struct omp_for_data *fd, bool inner,
1641 gimple_stmt_iterator *gsi,
1642 const oacc_collapse *counts, tree ivar,
1643 tree diff_type)
1645 tree ivar_type = TREE_TYPE (ivar);
1647 /* The most rapidly changing iteration variable is the innermost
1648 one. */
1649 for (int ix = fd->collapse; ix--;)
1651 const omp_for_data_loop *loop = &fd->loops[ix];
1652 const oacc_collapse *collapse = &counts[ix];
1653 tree v = inner ? loop->v : collapse->outer;
1654 tree iter_type = TREE_TYPE (v);
1655 tree plus_type = iter_type;
1656 enum tree_code plus_code = PLUS_EXPR;
1657 tree expr;
1659 if (POINTER_TYPE_P (iter_type))
1661 plus_code = POINTER_PLUS_EXPR;
1662 plus_type = sizetype;
1665 expr = ivar;
1666 if (ix)
1668 tree mod = fold_convert (ivar_type, collapse->iters);
1669 ivar = fold_build2 (TRUNC_DIV_EXPR, ivar_type, expr, mod);
1670 expr = fold_build2 (TRUNC_MOD_EXPR, ivar_type, expr, mod);
1671 ivar = force_gimple_operand_gsi (gsi, ivar, true, NULL_TREE,
1672 true, GSI_SAME_STMT);
1675 expr = fold_build2 (MULT_EXPR, diff_type, fold_convert (diff_type, expr),
1676 fold_convert (diff_type, collapse->step));
1677 expr = fold_build2 (plus_code, iter_type,
1678 inner ? collapse->outer : collapse->base,
1679 fold_convert (plus_type, expr));
1680 expr = force_gimple_operand_gsi (gsi, expr, false, NULL_TREE,
1681 true, GSI_SAME_STMT);
1682 gassign *ass = gimple_build_assign (v, expr);
1683 gsi_insert_before (gsi, ass, GSI_SAME_STMT);
1687 /* Helper function for expand_omp_{for_*,simd}. If this is the outermost
1688 of the combined collapse > 1 loop constructs, generate code like:
1689 if (__builtin_expect (N32 cond3 N31, 0)) goto ZERO_ITER_BB;
1690 if (cond3 is <)
1691 adj = STEP3 - 1;
1692 else
1693 adj = STEP3 + 1;
1694 count3 = (adj + N32 - N31) / STEP3;
1695 if (__builtin_expect (N22 cond2 N21, 0)) goto ZERO_ITER_BB;
1696 if (cond2 is <)
1697 adj = STEP2 - 1;
1698 else
1699 adj = STEP2 + 1;
1700 count2 = (adj + N22 - N21) / STEP2;
1701 if (__builtin_expect (N12 cond1 N11, 0)) goto ZERO_ITER_BB;
1702 if (cond1 is <)
1703 adj = STEP1 - 1;
1704 else
1705 adj = STEP1 + 1;
1706 count1 = (adj + N12 - N11) / STEP1;
1707 count = count1 * count2 * count3;
1708 Furthermore, if ZERO_ITER_BB is NULL, create a BB which does:
1709 count = 0;
1710 and set ZERO_ITER_BB to that bb. If this isn't the outermost
1711 of the combined loop constructs, just initialize COUNTS array
1712 from the _looptemp_ clauses. For loop nests with non-rectangular
1713 loops, do this only for the rectangular loops. Then pick
1714 the loops which reference outer vars in their bound expressions
1715 and the loops which they refer to and for this sub-nest compute
1716 number of iterations. For triangular loops use Faulhaber's formula,
1717 otherwise as a fallback, compute by iterating the loops.
1718 If e.g. the sub-nest is
1719 for (I = N11; I COND1 N12; I += STEP1)
1720 for (J = M21 * I + N21; J COND2 M22 * I + N22; J += STEP2)
1721 for (K = M31 * J + N31; K COND3 M32 * J + N32; K += STEP3)
1723 COUNT = 0;
1724 for (tmpi = N11; tmpi COND1 N12; tmpi += STEP1)
1725 for (tmpj = M21 * tmpi + N21;
1726 tmpj COND2 M22 * tmpi + N22; tmpj += STEP2)
1728 int tmpk1 = M31 * tmpj + N31;
1729 int tmpk2 = M32 * tmpj + N32;
1730 if (tmpk1 COND3 tmpk2)
1732 if (COND3 is <)
1733 adj = STEP3 - 1;
1734 else
1735 adj = STEP3 + 1;
1736 COUNT += (adj + tmpk2 - tmpk1) / STEP3;
1739 and finally multiply the counts of the rectangular loops not
1740 in the sub-nest with COUNT. Also, as counts[fd->last_nonrect]
1741 store number of iterations of the loops from fd->first_nonrect
1742 to fd->last_nonrect inclusive, i.e. the above COUNT multiplied
1743 by the counts of rectangular loops not referenced in any non-rectangular
1744 loops sandwitched in between those. */
1746 /* NOTE: It *could* be better to moosh all of the BBs together,
1747 creating one larger BB with all the computation and the unexpected
1748 jump at the end. I.e.
1750 bool zero3, zero2, zero1, zero;
1752 zero3 = N32 c3 N31;
1753 count3 = (N32 - N31) /[cl] STEP3;
1754 zero2 = N22 c2 N21;
1755 count2 = (N22 - N21) /[cl] STEP2;
1756 zero1 = N12 c1 N11;
1757 count1 = (N12 - N11) /[cl] STEP1;
1758 zero = zero3 || zero2 || zero1;
1759 count = count1 * count2 * count3;
1760 if (__builtin_expect(zero, false)) goto zero_iter_bb;
1762 After all, we expect the zero=false, and thus we expect to have to
1763 evaluate all of the comparison expressions, so short-circuiting
1764 oughtn't be a win. Since the condition isn't protecting a
1765 denominator, we're not concerned about divide-by-zero, so we can
1766 fully evaluate count even if a numerator turned out to be wrong.
1768 It seems like putting this all together would create much better
1769 scheduling opportunities, and less pressure on the chip's branch
1770 predictor. */
1772 static void
1773 expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1774 basic_block &entry_bb, tree *counts,
1775 basic_block &zero_iter1_bb, int &first_zero_iter1,
1776 basic_block &zero_iter2_bb, int &first_zero_iter2,
1777 basic_block &l2_dom_bb)
1779 tree t, type = TREE_TYPE (fd->loop.v);
1780 edge e, ne;
1781 int i;
1783 /* Collapsed loops need work for expansion into SSA form. */
1784 gcc_assert (!gimple_in_ssa_p (cfun));
1786 if (gimple_omp_for_combined_into_p (fd->for_stmt)
1787 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
1789 gcc_assert (fd->ordered == 0);
1790 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1791 isn't supposed to be handled, as the inner loop doesn't
1792 use it. */
1793 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
1794 OMP_CLAUSE__LOOPTEMP_);
1795 gcc_assert (innerc);
1796 for (i = 0; i < fd->collapse; i++)
1798 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1799 OMP_CLAUSE__LOOPTEMP_);
1800 gcc_assert (innerc);
1801 if (i)
1802 counts[i] = OMP_CLAUSE_DECL (innerc);
1803 else
1804 counts[0] = NULL_TREE;
1806 if (fd->non_rect
1807 && fd->last_nonrect == fd->first_nonrect + 1
1808 && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[fd->last_nonrect].v)))
1810 tree c[4];
1811 for (i = 0; i < 4; i++)
1813 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1814 OMP_CLAUSE__LOOPTEMP_);
1815 gcc_assert (innerc);
1816 c[i] = OMP_CLAUSE_DECL (innerc);
1818 counts[0] = c[0];
1819 fd->first_inner_iterations = c[1];
1820 fd->factor = c[2];
1821 fd->adjn1 = c[3];
1823 return;
1826 for (i = fd->collapse; i < fd->ordered; i++)
1828 tree itype = TREE_TYPE (fd->loops[i].v);
1829 counts[i] = NULL_TREE;
1830 t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1831 fold_convert (itype, fd->loops[i].n1),
1832 fold_convert (itype, fd->loops[i].n2));
1833 if (t && integer_zerop (t))
1835 for (i = fd->collapse; i < fd->ordered; i++)
1836 counts[i] = build_int_cst (type, 0);
1837 break;
1840 bool rect_count_seen = false;
1841 for (i = 0; i < (fd->ordered ? fd->ordered : fd->collapse); i++)
1843 tree itype = TREE_TYPE (fd->loops[i].v);
1845 if (i >= fd->collapse && counts[i])
1846 continue;
1847 if (fd->non_rect)
1849 /* Skip loops that use outer iterators in their expressions
1850 during this phase. */
1851 if (fd->loops[i].m1 || fd->loops[i].m2)
1853 counts[i] = build_zero_cst (type);
1854 continue;
1857 if ((SSA_VAR_P (fd->loop.n2) || i >= fd->collapse)
1858 && ((t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1859 fold_convert (itype, fd->loops[i].n1),
1860 fold_convert (itype, fd->loops[i].n2)))
1861 == NULL_TREE || !integer_onep (t)))
1863 gcond *cond_stmt;
1864 tree n1, n2;
1865 n1 = fold_convert (itype, unshare_expr (fd->loops[i].n1));
1866 n1 = force_gimple_operand_gsi (gsi, n1, true, NULL_TREE,
1867 true, GSI_SAME_STMT);
1868 n2 = fold_convert (itype, unshare_expr (fd->loops[i].n2));
1869 n2 = force_gimple_operand_gsi (gsi, n2, true, NULL_TREE,
1870 true, GSI_SAME_STMT);
1871 cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
1872 NULL_TREE, NULL_TREE);
1873 gsi_insert_before (gsi, cond_stmt, GSI_SAME_STMT);
1874 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
1875 expand_omp_regimplify_p, NULL, NULL)
1876 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
1877 expand_omp_regimplify_p, NULL, NULL))
1879 *gsi = gsi_for_stmt (cond_stmt);
1880 gimple_regimplify_operands (cond_stmt, gsi);
1882 e = split_block (entry_bb, cond_stmt);
1883 basic_block &zero_iter_bb
1884 = i < fd->collapse ? zero_iter1_bb : zero_iter2_bb;
1885 int &first_zero_iter
1886 = i < fd->collapse ? first_zero_iter1 : first_zero_iter2;
1887 if (zero_iter_bb == NULL)
1889 gassign *assign_stmt;
1890 first_zero_iter = i;
1891 zero_iter_bb = create_empty_bb (entry_bb);
1892 add_bb_to_loop (zero_iter_bb, entry_bb->loop_father);
1893 *gsi = gsi_after_labels (zero_iter_bb);
1894 if (i < fd->collapse)
1895 assign_stmt = gimple_build_assign (fd->loop.n2,
1896 build_zero_cst (type));
1897 else
1899 counts[i] = create_tmp_reg (type, ".count");
1900 assign_stmt
1901 = gimple_build_assign (counts[i], build_zero_cst (type));
1903 gsi_insert_before (gsi, assign_stmt, GSI_SAME_STMT);
1904 set_immediate_dominator (CDI_DOMINATORS, zero_iter_bb,
1905 entry_bb);
1907 ne = make_edge (entry_bb, zero_iter_bb, EDGE_FALSE_VALUE);
1908 ne->probability = profile_probability::very_unlikely ();
1909 e->flags = EDGE_TRUE_VALUE;
1910 e->probability = ne->probability.invert ();
1911 if (l2_dom_bb == NULL)
1912 l2_dom_bb = entry_bb;
1913 entry_bb = e->dest;
1914 *gsi = gsi_last_nondebug_bb (entry_bb);
1917 if (POINTER_TYPE_P (itype))
1918 itype = signed_type_for (itype);
1919 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
1920 ? -1 : 1));
1921 t = fold_build2 (PLUS_EXPR, itype,
1922 fold_convert (itype, fd->loops[i].step), t);
1923 t = fold_build2 (PLUS_EXPR, itype, t,
1924 fold_convert (itype, fd->loops[i].n2));
1925 t = fold_build2 (MINUS_EXPR, itype, t,
1926 fold_convert (itype, fd->loops[i].n1));
1927 /* ?? We could probably use CEIL_DIV_EXPR instead of
1928 TRUNC_DIV_EXPR and adjusting by hand. Unless we can't
1929 generate the same code in the end because generically we
1930 don't know that the values involved must be negative for
1931 GT?? */
1932 if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
1933 t = fold_build2 (TRUNC_DIV_EXPR, itype,
1934 fold_build1 (NEGATE_EXPR, itype, t),
1935 fold_build1 (NEGATE_EXPR, itype,
1936 fold_convert (itype,
1937 fd->loops[i].step)));
1938 else
1939 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
1940 fold_convert (itype, fd->loops[i].step));
1941 t = fold_convert (type, t);
1942 if (TREE_CODE (t) == INTEGER_CST)
1943 counts[i] = t;
1944 else
1946 if (i < fd->collapse || i != first_zero_iter2)
1947 counts[i] = create_tmp_reg (type, ".count");
1948 expand_omp_build_assign (gsi, counts[i], t);
1950 if (SSA_VAR_P (fd->loop.n2) && i < fd->collapse)
1952 if (fd->non_rect && i >= fd->first_nonrect && i <= fd->last_nonrect)
1953 continue;
1954 if (!rect_count_seen)
1956 t = counts[i];
1957 rect_count_seen = true;
1959 else
1960 t = fold_build2 (MULT_EXPR, type, fd->loop.n2, counts[i]);
1961 expand_omp_build_assign (gsi, fd->loop.n2, t);
1964 if (fd->non_rect && SSA_VAR_P (fd->loop.n2))
1966 gcc_assert (fd->last_nonrect != -1);
1968 counts[fd->last_nonrect] = create_tmp_reg (type, ".count");
1969 expand_omp_build_assign (gsi, counts[fd->last_nonrect],
1970 build_zero_cst (type));
1971 for (i = fd->first_nonrect + 1; i < fd->last_nonrect; i++)
1972 if (fd->loops[i].m1
1973 || fd->loops[i].m2
1974 || fd->loops[i].non_rect_referenced)
1975 break;
1976 if (i == fd->last_nonrect
1977 && fd->loops[i].outer == fd->last_nonrect - fd->first_nonrect
1978 && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[i].v)))
1980 int o = fd->first_nonrect;
1981 tree itype = TREE_TYPE (fd->loops[o].v);
1982 tree n1o = create_tmp_reg (itype, ".n1o");
1983 t = fold_convert (itype, unshare_expr (fd->loops[o].n1));
1984 expand_omp_build_assign (gsi, n1o, t);
1985 tree n2o = create_tmp_reg (itype, ".n2o");
1986 t = fold_convert (itype, unshare_expr (fd->loops[o].n2));
1987 expand_omp_build_assign (gsi, n2o, t);
1988 if (fd->loops[i].m1 && fd->loops[i].m2)
1989 t = fold_build2 (MINUS_EXPR, itype, unshare_expr (fd->loops[i].m2),
1990 unshare_expr (fd->loops[i].m1));
1991 else if (fd->loops[i].m1)
1992 t = fold_unary (NEGATE_EXPR, itype,
1993 unshare_expr (fd->loops[i].m1));
1994 else
1995 t = unshare_expr (fd->loops[i].m2);
1996 tree m2minusm1
1997 = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
1998 true, GSI_SAME_STMT);
2000 gimple_stmt_iterator gsi2 = *gsi;
2001 gsi_prev (&gsi2);
2002 e = split_block (entry_bb, gsi_stmt (gsi2));
2003 e = split_block (e->dest, (gimple *) NULL);
2004 basic_block bb1 = e->src;
2005 entry_bb = e->dest;
2006 *gsi = gsi_after_labels (entry_bb);
2008 gsi2 = gsi_after_labels (bb1);
2009 tree ostep = fold_convert (itype, fd->loops[o].step);
2010 t = build_int_cst (itype, (fd->loops[o].cond_code
2011 == LT_EXPR ? -1 : 1));
2012 t = fold_build2 (PLUS_EXPR, itype, ostep, t);
2013 t = fold_build2 (PLUS_EXPR, itype, t, n2o);
2014 t = fold_build2 (MINUS_EXPR, itype, t, n1o);
2015 if (TYPE_UNSIGNED (itype)
2016 && fd->loops[o].cond_code == GT_EXPR)
2017 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2018 fold_build1 (NEGATE_EXPR, itype, t),
2019 fold_build1 (NEGATE_EXPR, itype, ostep));
2020 else
2021 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, ostep);
2022 tree outer_niters
2023 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2024 true, GSI_SAME_STMT);
2025 t = fold_build2 (MINUS_EXPR, itype, outer_niters,
2026 build_one_cst (itype));
2027 t = fold_build2 (MULT_EXPR, itype, t, ostep);
2028 t = fold_build2 (PLUS_EXPR, itype, n1o, t);
2029 tree last = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2030 true, GSI_SAME_STMT);
2031 tree n1, n2, n1e, n2e;
2032 t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2033 if (fd->loops[i].m1)
2035 n1 = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2036 n1 = fold_build2 (MULT_EXPR, itype, n1o, n1);
2037 n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2039 else
2040 n1 = t;
2041 n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2042 true, GSI_SAME_STMT);
2043 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2044 if (fd->loops[i].m2)
2046 n2 = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2047 n2 = fold_build2 (MULT_EXPR, itype, n1o, n2);
2048 n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2050 else
2051 n2 = t;
2052 n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2053 true, GSI_SAME_STMT);
2054 t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2055 if (fd->loops[i].m1)
2057 n1e = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2058 n1e = fold_build2 (MULT_EXPR, itype, last, n1e);
2059 n1e = fold_build2 (PLUS_EXPR, itype, n1e, t);
2061 else
2062 n1e = t;
2063 n1e = force_gimple_operand_gsi (&gsi2, n1e, true, NULL_TREE,
2064 true, GSI_SAME_STMT);
2065 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2066 if (fd->loops[i].m2)
2068 n2e = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2069 n2e = fold_build2 (MULT_EXPR, itype, last, n2e);
2070 n2e = fold_build2 (PLUS_EXPR, itype, n2e, t);
2072 else
2073 n2e = t;
2074 n2e = force_gimple_operand_gsi (&gsi2, n2e, true, NULL_TREE,
2075 true, GSI_SAME_STMT);
2076 gcond *cond_stmt
2077 = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
2078 NULL_TREE, NULL_TREE);
2079 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2080 e = split_block (bb1, cond_stmt);
2081 e->flags = EDGE_TRUE_VALUE;
2082 e->probability = profile_probability::likely ().guessed ();
2083 basic_block bb2 = e->dest;
2084 gsi2 = gsi_after_labels (bb2);
2086 cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1e, n2e,
2087 NULL_TREE, NULL_TREE);
2088 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2089 e = split_block (bb2, cond_stmt);
2090 e->flags = EDGE_TRUE_VALUE;
2091 e->probability = profile_probability::likely ().guessed ();
2092 gsi2 = gsi_after_labels (e->dest);
2094 tree step = fold_convert (itype, fd->loops[i].step);
2095 t = build_int_cst (itype, (fd->loops[i].cond_code
2096 == LT_EXPR ? -1 : 1));
2097 t = fold_build2 (PLUS_EXPR, itype, step, t);
2098 t = fold_build2 (PLUS_EXPR, itype, t, n2);
2099 t = fold_build2 (MINUS_EXPR, itype, t, n1);
2100 if (TYPE_UNSIGNED (itype)
2101 && fd->loops[i].cond_code == GT_EXPR)
2102 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2103 fold_build1 (NEGATE_EXPR, itype, t),
2104 fold_build1 (NEGATE_EXPR, itype, step));
2105 else
2106 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2107 tree first_inner_iterations
2108 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2109 true, GSI_SAME_STMT);
2110 t = fold_build2 (MULT_EXPR, itype, m2minusm1, ostep);
2111 if (TYPE_UNSIGNED (itype)
2112 && fd->loops[i].cond_code == GT_EXPR)
2113 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2114 fold_build1 (NEGATE_EXPR, itype, t),
2115 fold_build1 (NEGATE_EXPR, itype, step));
2116 else
2117 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2118 tree factor
2119 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2120 true, GSI_SAME_STMT);
2121 t = fold_build2 (MINUS_EXPR, itype, outer_niters,
2122 build_one_cst (itype));
2123 t = fold_build2 (MULT_EXPR, itype, t, outer_niters);
2124 t = fold_build2 (RSHIFT_EXPR, itype, t, integer_one_node);
2125 t = fold_build2 (MULT_EXPR, itype, factor, t);
2126 t = fold_build2 (PLUS_EXPR, itype,
2127 fold_build2 (MULT_EXPR, itype, outer_niters,
2128 first_inner_iterations), t);
2129 expand_omp_build_assign (&gsi2, counts[fd->last_nonrect],
2130 fold_convert (type, t));
2132 basic_block bb3 = create_empty_bb (bb1);
2133 add_bb_to_loop (bb3, bb1->loop_father);
2135 e = make_edge (bb1, bb3, EDGE_FALSE_VALUE);
2136 e->probability = profile_probability::unlikely ().guessed ();
2138 gsi2 = gsi_after_labels (bb3);
2139 cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1e, n2e,
2140 NULL_TREE, NULL_TREE);
2141 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2142 e = split_block (bb3, cond_stmt);
2143 e->flags = EDGE_TRUE_VALUE;
2144 e->probability = profile_probability::likely ().guessed ();
2145 basic_block bb4 = e->dest;
2147 ne = make_edge (bb3, entry_bb, EDGE_FALSE_VALUE);
2148 ne->probability = e->probability.invert ();
2150 basic_block bb5 = create_empty_bb (bb2);
2151 add_bb_to_loop (bb5, bb2->loop_father);
2153 ne = make_edge (bb2, bb5, EDGE_FALSE_VALUE);
2154 ne->probability = profile_probability::unlikely ().guessed ();
2156 for (int j = 0; j < 2; j++)
2158 gsi2 = gsi_after_labels (j ? bb5 : bb4);
2159 t = fold_build2 (MINUS_EXPR, itype,
2160 unshare_expr (fd->loops[i].n1),
2161 unshare_expr (fd->loops[i].n2));
2162 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, m2minusm1);
2163 tree tem
2164 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2165 true, GSI_SAME_STMT);
2166 t = fold_build2 (MINUS_EXPR, itype, tem, n1o);
2167 t = fold_build2 (TRUNC_MOD_EXPR, itype, t, ostep);
2168 t = fold_build2 (MINUS_EXPR, itype, tem, t);
2169 tem = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2170 true, GSI_SAME_STMT);
2171 t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2172 if (fd->loops[i].m1)
2174 n1 = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2175 n1 = fold_build2 (MULT_EXPR, itype, tem, n1);
2176 n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2178 else
2179 n1 = t;
2180 n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2181 true, GSI_SAME_STMT);
2182 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2183 if (fd->loops[i].m2)
2185 n2 = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2186 n2 = fold_build2 (MULT_EXPR, itype, tem, n2);
2187 n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2189 else
2190 n2 = t;
2191 n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2192 true, GSI_SAME_STMT);
2193 expand_omp_build_assign (&gsi2, j ? n2o : n1o, tem);
2195 cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
2196 NULL_TREE, NULL_TREE);
2197 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2198 e = split_block (gsi_bb (gsi2), cond_stmt);
2199 e->flags = j ? EDGE_TRUE_VALUE : EDGE_FALSE_VALUE;
2200 e->probability = profile_probability::unlikely ().guessed ();
2201 ne = make_edge (e->src, bb1,
2202 j ? EDGE_FALSE_VALUE : EDGE_TRUE_VALUE);
2203 ne->probability = e->probability.invert ();
2204 gsi2 = gsi_after_labels (e->dest);
2206 t = fold_build2 (PLUS_EXPR, itype, tem, ostep);
2207 expand_omp_build_assign (&gsi2, j ? n2o : n1o, t);
2209 make_edge (e->dest, bb1, EDGE_FALLTHRU);
2212 set_immediate_dominator (CDI_DOMINATORS, bb3, bb1);
2213 set_immediate_dominator (CDI_DOMINATORS, bb5, bb2);
2214 set_immediate_dominator (CDI_DOMINATORS, entry_bb, bb1);
2216 if (fd->first_nonrect + 1 == fd->last_nonrect)
2218 fd->first_inner_iterations = first_inner_iterations;
2219 fd->factor = factor;
2220 fd->adjn1 = n1o;
2223 else
2225 /* Fallback implementation. Evaluate the loops with m1/m2
2226 non-NULL as well as their outer loops at runtime using temporaries
2227 instead of the original iteration variables, and in the
2228 body just bump the counter. */
2229 gimple_stmt_iterator gsi2 = *gsi;
2230 gsi_prev (&gsi2);
2231 e = split_block (entry_bb, gsi_stmt (gsi2));
2232 e = split_block (e->dest, (gimple *) NULL);
2233 basic_block cur_bb = e->src;
2234 basic_block next_bb = e->dest;
2235 entry_bb = e->dest;
2236 *gsi = gsi_after_labels (entry_bb);
2238 tree *vs = XALLOCAVEC (tree, fd->last_nonrect);
2239 memset (vs, 0, fd->last_nonrect * sizeof (tree));
2241 for (i = 0; i <= fd->last_nonrect; i++)
2243 if (fd->loops[i].m1 == NULL_TREE
2244 && fd->loops[i].m2 == NULL_TREE
2245 && !fd->loops[i].non_rect_referenced)
2246 continue;
2248 tree itype = TREE_TYPE (fd->loops[i].v);
2250 gsi2 = gsi_after_labels (cur_bb);
2251 tree n1, n2;
2252 t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2253 if (fd->loops[i].m1)
2255 n1 = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2256 n1 = fold_build2 (MULT_EXPR, itype,
2257 vs[i - fd->loops[i].outer], n1);
2258 n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2260 else
2261 n1 = t;
2262 n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2263 true, GSI_SAME_STMT);
2264 if (i < fd->last_nonrect)
2266 vs[i] = create_tmp_reg (itype, ".it");
2267 expand_omp_build_assign (&gsi2, vs[i], n1);
2269 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2270 if (fd->loops[i].m2)
2272 n2 = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2273 n2 = fold_build2 (MULT_EXPR, itype,
2274 vs[i - fd->loops[i].outer], n2);
2275 n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2277 else
2278 n2 = t;
2279 n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2280 true, GSI_SAME_STMT);
2281 if (i == fd->last_nonrect)
2283 gcond *cond_stmt
2284 = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
2285 NULL_TREE, NULL_TREE);
2286 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2287 e = split_block (cur_bb, cond_stmt);
2288 e->flags = EDGE_TRUE_VALUE;
2289 ne = make_edge (cur_bb, next_bb, EDGE_FALSE_VALUE);
2290 e->probability = profile_probability::likely ().guessed ();
2291 ne->probability = e->probability.invert ();
2292 gsi2 = gsi_after_labels (e->dest);
2294 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
2295 ? -1 : 1));
2296 t = fold_build2 (PLUS_EXPR, itype,
2297 fold_convert (itype, fd->loops[i].step), t);
2298 t = fold_build2 (PLUS_EXPR, itype, t, n2);
2299 t = fold_build2 (MINUS_EXPR, itype, t, n1);
2300 tree step = fold_convert (itype, fd->loops[i].step);
2301 if (TYPE_UNSIGNED (itype)
2302 && fd->loops[i].cond_code == GT_EXPR)
2303 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2304 fold_build1 (NEGATE_EXPR, itype, t),
2305 fold_build1 (NEGATE_EXPR, itype, step));
2306 else
2307 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2308 t = fold_convert (type, t);
2309 t = fold_build2 (PLUS_EXPR, type,
2310 counts[fd->last_nonrect], t);
2311 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2312 true, GSI_SAME_STMT);
2313 expand_omp_build_assign (&gsi2, counts[fd->last_nonrect], t);
2314 e = make_edge (e->dest, next_bb, EDGE_FALLTHRU);
2315 set_immediate_dominator (CDI_DOMINATORS, next_bb, cur_bb);
2316 break;
2318 e = split_block (cur_bb, last_stmt (cur_bb));
2320 basic_block new_cur_bb = create_empty_bb (cur_bb);
2321 add_bb_to_loop (new_cur_bb, cur_bb->loop_father);
2323 gsi2 = gsi_after_labels (e->dest);
2324 tree step = fold_convert (itype,
2325 unshare_expr (fd->loops[i].step));
2326 t = fold_build2 (PLUS_EXPR, itype, vs[i], step);
2327 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2328 true, GSI_SAME_STMT);
2329 expand_omp_build_assign (&gsi2, vs[i], t);
2331 ne = split_block (e->dest, last_stmt (e->dest));
2332 gsi2 = gsi_after_labels (ne->dest);
2334 gcond *cond_stmt
2335 = gimple_build_cond (fd->loops[i].cond_code, vs[i], n2,
2336 NULL_TREE, NULL_TREE);
2337 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2338 edge e3, e4;
2339 if (next_bb == entry_bb)
2341 e3 = find_edge (ne->dest, next_bb);
2342 e3->flags = EDGE_FALSE_VALUE;
2344 else
2345 e3 = make_edge (ne->dest, next_bb, EDGE_FALSE_VALUE);
2346 e4 = make_edge (ne->dest, new_cur_bb, EDGE_TRUE_VALUE);
2347 e4->probability = profile_probability::likely ().guessed ();
2348 e3->probability = e4->probability.invert ();
2349 basic_block esrc = e->src;
2350 make_edge (e->src, ne->dest, EDGE_FALLTHRU);
2351 cur_bb = new_cur_bb;
2352 basic_block latch_bb = next_bb;
2353 next_bb = e->dest;
2354 remove_edge (e);
2355 set_immediate_dominator (CDI_DOMINATORS, ne->dest, esrc);
2356 set_immediate_dominator (CDI_DOMINATORS, latch_bb, ne->dest);
2357 set_immediate_dominator (CDI_DOMINATORS, cur_bb, ne->dest);
2360 t = NULL_TREE;
2361 for (i = fd->first_nonrect; i < fd->last_nonrect; i++)
2362 if (!fd->loops[i].non_rect_referenced
2363 && fd->loops[i].m1 == NULL_TREE
2364 && fd->loops[i].m2 == NULL_TREE)
2366 if (t == NULL_TREE)
2367 t = counts[i];
2368 else
2369 t = fold_build2 (MULT_EXPR, type, t, counts[i]);
2371 if (t)
2373 t = fold_build2 (MULT_EXPR, type, counts[fd->last_nonrect], t);
2374 expand_omp_build_assign (gsi, counts[fd->last_nonrect], t);
2376 if (!rect_count_seen)
2377 t = counts[fd->last_nonrect];
2378 else
2379 t = fold_build2 (MULT_EXPR, type, fd->loop.n2,
2380 counts[fd->last_nonrect]);
2381 expand_omp_build_assign (gsi, fd->loop.n2, t);
2383 else if (fd->non_rect)
2385 tree t = fd->loop.n2;
2386 gcc_assert (TREE_CODE (t) == INTEGER_CST);
2387 int non_rect_referenced = 0, non_rect = 0;
2388 for (i = 0; i < fd->collapse; i++)
2390 if ((i < fd->first_nonrect || i > fd->last_nonrect)
2391 && !integer_zerop (counts[i]))
2392 t = fold_build2 (TRUNC_DIV_EXPR, type, t, counts[i]);
2393 if (fd->loops[i].non_rect_referenced)
2394 non_rect_referenced++;
2395 if (fd->loops[i].m1 || fd->loops[i].m2)
2396 non_rect++;
2398 gcc_assert (non_rect == 1 && non_rect_referenced == 1);
2399 counts[fd->last_nonrect] = t;
2403 /* Helper function for expand_omp_{for_*,simd}. Generate code like:
2404 T = V;
2405 V3 = N31 + (T % count3) * STEP3;
2406 T = T / count3;
2407 V2 = N21 + (T % count2) * STEP2;
2408 T = T / count2;
2409 V1 = N11 + T * STEP1;
2410 if this loop doesn't have an inner loop construct combined with it.
2411 If it does have an inner loop construct combined with it and the
2412 iteration count isn't known constant, store values from counts array
2413 into its _looptemp_ temporaries instead.
2414 For non-rectangular loops (between fd->first_nonrect and fd->last_nonrect
2415 inclusive), use the count of all those loops together, and either
2416 find quadratic etc. equation roots, or as a fallback, do:
2417 COUNT = 0;
2418 for (tmpi = N11; tmpi COND1 N12; tmpi += STEP1)
2419 for (tmpj = M21 * tmpi + N21;
2420 tmpj COND2 M22 * tmpi + N22; tmpj += STEP2)
2422 int tmpk1 = M31 * tmpj + N31;
2423 int tmpk2 = M32 * tmpj + N32;
2424 if (tmpk1 COND3 tmpk2)
2426 if (COND3 is <)
2427 adj = STEP3 - 1;
2428 else
2429 adj = STEP3 + 1;
2430 int temp = (adj + tmpk2 - tmpk1) / STEP3;
2431 if (COUNT + temp > T)
2433 V1 = tmpi;
2434 V2 = tmpj;
2435 V3 = tmpk1 + (T - COUNT) * STEP3;
2436 goto done;
2438 else
2439 COUNT += temp;
2442 done:;
2443 but for optional innermost or outermost rectangular loops that aren't
2444 referenced by other loop expressions keep doing the division/modulo. */
2446 static void
2447 expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
2448 tree *counts, tree *nonrect_bounds,
2449 gimple *inner_stmt, tree startvar)
2451 int i;
2452 if (gimple_omp_for_combined_p (fd->for_stmt))
2454 /* If fd->loop.n2 is constant, then no propagation of the counts
2455 is needed, they are constant. */
2456 if (TREE_CODE (fd->loop.n2) == INTEGER_CST)
2457 return;
2459 tree clauses = gimple_code (inner_stmt) != GIMPLE_OMP_FOR
2460 ? gimple_omp_taskreg_clauses (inner_stmt)
2461 : gimple_omp_for_clauses (inner_stmt);
2462 /* First two _looptemp_ clauses are for istart/iend, counts[0]
2463 isn't supposed to be handled, as the inner loop doesn't
2464 use it. */
2465 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
2466 gcc_assert (innerc);
2467 int count = 0;
2468 if (fd->non_rect
2469 && fd->last_nonrect == fd->first_nonrect + 1
2470 && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[fd->last_nonrect].v)))
2471 count = 4;
2472 for (i = 0; i < fd->collapse + count; i++)
2474 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
2475 OMP_CLAUSE__LOOPTEMP_);
2476 gcc_assert (innerc);
2477 if (i)
2479 tree tem = OMP_CLAUSE_DECL (innerc);
2480 tree t;
2481 if (i < fd->collapse)
2482 t = counts[i];
2483 else
2484 switch (i - fd->collapse)
2486 case 0: t = counts[0]; break;
2487 case 1: t = fd->first_inner_iterations; break;
2488 case 2: t = fd->factor; break;
2489 case 3: t = fd->adjn1; break;
2490 default: gcc_unreachable ();
2492 t = fold_convert (TREE_TYPE (tem), t);
2493 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
2494 false, GSI_CONTINUE_LINKING);
2495 gassign *stmt = gimple_build_assign (tem, t);
2496 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2499 return;
2502 tree type = TREE_TYPE (fd->loop.v);
2503 tree tem = create_tmp_reg (type, ".tem");
2504 gassign *stmt = gimple_build_assign (tem, startvar);
2505 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2507 for (i = fd->collapse - 1; i >= 0; i--)
2509 tree vtype = TREE_TYPE (fd->loops[i].v), itype, t;
2510 itype = vtype;
2511 if (POINTER_TYPE_P (vtype))
2512 itype = signed_type_for (vtype);
2513 if (i != 0 && (i != fd->last_nonrect || fd->first_nonrect))
2514 t = fold_build2 (TRUNC_MOD_EXPR, type, tem, counts[i]);
2515 else
2516 t = tem;
2517 if (i == fd->last_nonrect)
2519 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
2520 false, GSI_CONTINUE_LINKING);
2521 tree stopval = t;
2522 tree idx = create_tmp_reg (type, ".count");
2523 expand_omp_build_assign (gsi, idx,
2524 build_zero_cst (type), true);
2525 basic_block bb_triang = NULL, bb_triang_dom = NULL;
2526 if (fd->first_nonrect + 1 == fd->last_nonrect
2527 && (TREE_CODE (fd->loop.n2) == INTEGER_CST
2528 || fd->first_inner_iterations)
2529 && (optab_handler (sqrt_optab, TYPE_MODE (double_type_node))
2530 != CODE_FOR_nothing)
2531 && !integer_zerop (fd->loop.n2))
2533 tree outer_n1 = fd->adjn1 ? fd->adjn1 : fd->loops[i - 1].n1;
2534 tree itype = TREE_TYPE (fd->loops[i].v);
2535 tree first_inner_iterations = fd->first_inner_iterations;
2536 tree factor = fd->factor;
2537 gcond *cond_stmt
2538 = gimple_build_cond (NE_EXPR, factor,
2539 build_zero_cst (TREE_TYPE (factor)),
2540 NULL_TREE, NULL_TREE);
2541 gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2542 edge e = split_block (gsi_bb (*gsi), cond_stmt);
2543 basic_block bb0 = e->src;
2544 e->flags = EDGE_TRUE_VALUE;
2545 e->probability = profile_probability::likely ();
2546 bb_triang_dom = bb0;
2547 *gsi = gsi_after_labels (e->dest);
2548 tree slltype = long_long_integer_type_node;
2549 tree ulltype = long_long_unsigned_type_node;
2550 tree stopvalull = fold_convert (ulltype, stopval);
2551 stopvalull
2552 = force_gimple_operand_gsi (gsi, stopvalull, true, NULL_TREE,
2553 false, GSI_CONTINUE_LINKING);
2554 first_inner_iterations
2555 = fold_convert (slltype, first_inner_iterations);
2556 first_inner_iterations
2557 = force_gimple_operand_gsi (gsi, first_inner_iterations, true,
2558 NULL_TREE, false,
2559 GSI_CONTINUE_LINKING);
2560 factor = fold_convert (slltype, factor);
2561 factor
2562 = force_gimple_operand_gsi (gsi, factor, true, NULL_TREE,
2563 false, GSI_CONTINUE_LINKING);
2564 tree first_inner_iterationsd
2565 = fold_build1 (FLOAT_EXPR, double_type_node,
2566 first_inner_iterations);
2567 first_inner_iterationsd
2568 = force_gimple_operand_gsi (gsi, first_inner_iterationsd, true,
2569 NULL_TREE, false,
2570 GSI_CONTINUE_LINKING);
2571 tree factord = fold_build1 (FLOAT_EXPR, double_type_node,
2572 factor);
2573 factord = force_gimple_operand_gsi (gsi, factord, true,
2574 NULL_TREE, false,
2575 GSI_CONTINUE_LINKING);
2576 tree stopvald = fold_build1 (FLOAT_EXPR, double_type_node,
2577 stopvalull);
2578 stopvald = force_gimple_operand_gsi (gsi, stopvald, true,
2579 NULL_TREE, false,
2580 GSI_CONTINUE_LINKING);
2581 /* Temporarily disable flag_rounding_math, values will be
2582 decimal numbers divided by 2 and worst case imprecisions
2583 due to too large values ought to be caught later by the
2584 checks for fallback. */
2585 int save_flag_rounding_math = flag_rounding_math;
2586 flag_rounding_math = 0;
2587 t = fold_build2 (RDIV_EXPR, double_type_node, factord,
2588 build_real (double_type_node, dconst2));
2589 tree t3 = fold_build2 (MINUS_EXPR, double_type_node,
2590 first_inner_iterationsd, t);
2591 t3 = force_gimple_operand_gsi (gsi, t3, true, NULL_TREE, false,
2592 GSI_CONTINUE_LINKING);
2593 t = fold_build2 (MULT_EXPR, double_type_node, factord,
2594 build_real (double_type_node, dconst2));
2595 t = fold_build2 (MULT_EXPR, double_type_node, t, stopvald);
2596 t = fold_build2 (PLUS_EXPR, double_type_node, t,
2597 fold_build2 (MULT_EXPR, double_type_node,
2598 t3, t3));
2599 flag_rounding_math = save_flag_rounding_math;
2600 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2601 GSI_CONTINUE_LINKING);
2602 if (flag_exceptions
2603 && cfun->can_throw_non_call_exceptions
2604 && operation_could_trap_p (LT_EXPR, true, false, NULL_TREE))
2606 tree tem = fold_build2 (LT_EXPR, boolean_type_node, t,
2607 build_zero_cst (double_type_node));
2608 tem = force_gimple_operand_gsi (gsi, tem, true, NULL_TREE,
2609 false, GSI_CONTINUE_LINKING);
2610 cond_stmt = gimple_build_cond (NE_EXPR, tem,
2611 boolean_false_node,
2612 NULL_TREE, NULL_TREE);
2614 else
2615 cond_stmt
2616 = gimple_build_cond (LT_EXPR, t,
2617 build_zero_cst (double_type_node),
2618 NULL_TREE, NULL_TREE);
2619 gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2620 e = split_block (gsi_bb (*gsi), cond_stmt);
2621 basic_block bb1 = e->src;
2622 e->flags = EDGE_FALSE_VALUE;
2623 e->probability = profile_probability::very_likely ();
2624 *gsi = gsi_after_labels (e->dest);
2625 gcall *call = gimple_build_call_internal (IFN_SQRT, 1, t);
2626 tree sqrtr = create_tmp_var (double_type_node);
2627 gimple_call_set_lhs (call, sqrtr);
2628 gsi_insert_after (gsi, call, GSI_CONTINUE_LINKING);
2629 t = fold_build2 (MINUS_EXPR, double_type_node, sqrtr, t3);
2630 t = fold_build2 (RDIV_EXPR, double_type_node, t, factord);
2631 t = fold_build1 (FIX_TRUNC_EXPR, ulltype, t);
2632 tree c = create_tmp_var (ulltype);
2633 tree d = create_tmp_var (ulltype);
2634 expand_omp_build_assign (gsi, c, t, true);
2635 t = fold_build2 (MINUS_EXPR, ulltype, c,
2636 build_one_cst (ulltype));
2637 t = fold_build2 (MULT_EXPR, ulltype, c, t);
2638 t = fold_build2 (RSHIFT_EXPR, ulltype, t, integer_one_node);
2639 t = fold_build2 (MULT_EXPR, ulltype,
2640 fold_convert (ulltype, fd->factor), t);
2641 tree t2
2642 = fold_build2 (MULT_EXPR, ulltype, c,
2643 fold_convert (ulltype,
2644 fd->first_inner_iterations));
2645 t = fold_build2 (PLUS_EXPR, ulltype, t, t2);
2646 expand_omp_build_assign (gsi, d, t, true);
2647 t = fold_build2 (MULT_EXPR, ulltype,
2648 fold_convert (ulltype, fd->factor), c);
2649 t = fold_build2 (PLUS_EXPR, ulltype,
2650 t, fold_convert (ulltype,
2651 fd->first_inner_iterations));
2652 t2 = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2653 GSI_CONTINUE_LINKING);
2654 cond_stmt = gimple_build_cond (GE_EXPR, stopvalull, d,
2655 NULL_TREE, NULL_TREE);
2656 gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2657 e = split_block (gsi_bb (*gsi), cond_stmt);
2658 basic_block bb2 = e->src;
2659 e->flags = EDGE_TRUE_VALUE;
2660 e->probability = profile_probability::very_likely ();
2661 *gsi = gsi_after_labels (e->dest);
2662 t = fold_build2 (PLUS_EXPR, ulltype, d, t2);
2663 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2664 GSI_CONTINUE_LINKING);
2665 cond_stmt = gimple_build_cond (GE_EXPR, stopvalull, t,
2666 NULL_TREE, NULL_TREE);
2667 gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2668 e = split_block (gsi_bb (*gsi), cond_stmt);
2669 basic_block bb3 = e->src;
2670 e->flags = EDGE_FALSE_VALUE;
2671 e->probability = profile_probability::very_likely ();
2672 *gsi = gsi_after_labels (e->dest);
2673 t = fold_convert (itype, c);
2674 t = fold_build2 (MULT_EXPR, itype, t, fd->loops[i - 1].step);
2675 t = fold_build2 (PLUS_EXPR, itype, outer_n1, t);
2676 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2677 GSI_CONTINUE_LINKING);
2678 expand_omp_build_assign (gsi, fd->loops[i - 1].v, t, true);
2679 t2 = fold_build2 (MINUS_EXPR, ulltype, stopvalull, d);
2680 t2 = fold_convert (itype, t2);
2681 t2 = fold_build2 (MULT_EXPR, itype, t2, fd->loops[i].step);
2682 t2 = fold_build2 (PLUS_EXPR, itype, t2, fd->loops[i].n1);
2683 if (fd->loops[i].m1)
2685 t = fold_build2 (MULT_EXPR, itype, t, fd->loops[i].m1);
2686 t2 = fold_build2 (PLUS_EXPR, itype, t2, t);
2688 expand_omp_build_assign (gsi, fd->loops[i].v, t2, true);
2689 e = split_block (gsi_bb (*gsi), gsi_stmt (*gsi));
2690 bb_triang = e->src;
2691 *gsi = gsi_after_labels (e->dest);
2692 remove_edge (e);
2693 e = make_edge (bb1, gsi_bb (*gsi), EDGE_TRUE_VALUE);
2694 e->probability = profile_probability::very_unlikely ();
2695 e = make_edge (bb2, gsi_bb (*gsi), EDGE_FALSE_VALUE);
2696 e->probability = profile_probability::very_unlikely ();
2697 e = make_edge (bb3, gsi_bb (*gsi), EDGE_TRUE_VALUE);
2698 e->probability = profile_probability::very_unlikely ();
2700 basic_block bb4 = create_empty_bb (bb0);
2701 add_bb_to_loop (bb4, bb0->loop_father);
2702 e = make_edge (bb0, bb4, EDGE_FALSE_VALUE);
2703 e->probability = profile_probability::unlikely ();
2704 make_edge (bb4, gsi_bb (*gsi), EDGE_FALLTHRU);
2705 set_immediate_dominator (CDI_DOMINATORS, bb4, bb0);
2706 set_immediate_dominator (CDI_DOMINATORS, gsi_bb (*gsi), bb0);
2707 gimple_stmt_iterator gsi2 = gsi_after_labels (bb4);
2708 t2 = fold_build2 (TRUNC_DIV_EXPR, type,
2709 counts[i], counts[i - 1]);
2710 t2 = force_gimple_operand_gsi (&gsi2, t2, true, NULL_TREE, false,
2711 GSI_CONTINUE_LINKING);
2712 t = fold_build2 (TRUNC_MOD_EXPR, type, stopval, t2);
2713 t2 = fold_build2 (TRUNC_DIV_EXPR, type, stopval, t2);
2714 t = fold_convert (itype, t);
2715 t2 = fold_convert (itype, t2);
2716 t = fold_build2 (MULT_EXPR, itype, t,
2717 fold_convert (itype, fd->loops[i].step));
2718 t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
2719 t2 = fold_build2 (MULT_EXPR, itype, t2,
2720 fold_convert (itype, fd->loops[i - 1].step));
2721 t2 = fold_build2 (PLUS_EXPR, itype, fd->loops[i - 1].n1, t2);
2722 t2 = force_gimple_operand_gsi (&gsi2, t2, false, NULL_TREE,
2723 false, GSI_CONTINUE_LINKING);
2724 stmt = gimple_build_assign (fd->loops[i - 1].v, t2);
2725 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
2726 if (fd->loops[i].m1)
2728 t2 = fold_build2 (MULT_EXPR, itype, fd->loops[i].m1,
2729 fd->loops[i - 1].v);
2730 t = fold_build2 (PLUS_EXPR, itype, t, t2);
2732 t = force_gimple_operand_gsi (&gsi2, t, false, NULL_TREE,
2733 false, GSI_CONTINUE_LINKING);
2734 stmt = gimple_build_assign (fd->loops[i].v, t);
2735 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
2737 /* Fallback implementation. Evaluate the loops in between
2738 (inclusive) fd->first_nonrect and fd->last_nonrect at
2739 runtime unsing temporaries instead of the original iteration
2740 variables, in the body just bump the counter and compare
2741 with the desired value. */
2742 gimple_stmt_iterator gsi2 = *gsi;
2743 basic_block entry_bb = gsi_bb (gsi2);
2744 edge e = split_block (entry_bb, gsi_stmt (gsi2));
2745 e = split_block (e->dest, (gimple *) NULL);
2746 basic_block dom_bb = NULL;
2747 basic_block cur_bb = e->src;
2748 basic_block next_bb = e->dest;
2749 entry_bb = e->dest;
2750 *gsi = gsi_after_labels (entry_bb);
2752 tree *vs = XALLOCAVEC (tree, fd->last_nonrect);
2753 tree n1 = NULL_TREE, n2 = NULL_TREE;
2754 memset (vs, 0, fd->last_nonrect * sizeof (tree));
2756 for (int j = fd->first_nonrect; j <= fd->last_nonrect; j++)
2758 tree itype = TREE_TYPE (fd->loops[j].v);
2759 bool rect_p = (fd->loops[j].m1 == NULL_TREE
2760 && fd->loops[j].m2 == NULL_TREE
2761 && !fd->loops[j].non_rect_referenced);
2762 gsi2 = gsi_after_labels (cur_bb);
2763 t = fold_convert (itype, unshare_expr (fd->loops[j].n1));
2764 if (fd->loops[j].m1)
2766 n1 = fold_convert (itype, unshare_expr (fd->loops[j].m1));
2767 n1 = fold_build2 (MULT_EXPR, itype,
2768 vs[j - fd->loops[j].outer], n1);
2769 n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2771 else if (rect_p)
2772 n1 = build_zero_cst (type);
2773 else
2774 n1 = t;
2775 n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2776 true, GSI_SAME_STMT);
2777 if (j < fd->last_nonrect)
2779 vs[j] = create_tmp_reg (rect_p ? type : itype, ".it");
2780 expand_omp_build_assign (&gsi2, vs[j], n1);
2782 t = fold_convert (itype, unshare_expr (fd->loops[j].n2));
2783 if (fd->loops[j].m2)
2785 n2 = fold_convert (itype, unshare_expr (fd->loops[j].m2));
2786 n2 = fold_build2 (MULT_EXPR, itype,
2787 vs[j - fd->loops[j].outer], n2);
2788 n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2790 else if (rect_p)
2791 n2 = counts[j];
2792 else
2793 n2 = t;
2794 n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2795 true, GSI_SAME_STMT);
2796 if (j == fd->last_nonrect)
2798 gcond *cond_stmt
2799 = gimple_build_cond (fd->loops[j].cond_code, n1, n2,
2800 NULL_TREE, NULL_TREE);
2801 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2802 e = split_block (cur_bb, cond_stmt);
2803 e->flags = EDGE_TRUE_VALUE;
2804 edge ne = make_edge (cur_bb, next_bb, EDGE_FALSE_VALUE);
2805 e->probability = profile_probability::likely ().guessed ();
2806 ne->probability = e->probability.invert ();
2807 gsi2 = gsi_after_labels (e->dest);
2809 t = build_int_cst (itype, (fd->loops[j].cond_code == LT_EXPR
2810 ? -1 : 1));
2811 t = fold_build2 (PLUS_EXPR, itype,
2812 fold_convert (itype, fd->loops[j].step), t);
2813 t = fold_build2 (PLUS_EXPR, itype, t, n2);
2814 t = fold_build2 (MINUS_EXPR, itype, t, n1);
2815 tree step = fold_convert (itype, fd->loops[j].step);
2816 if (TYPE_UNSIGNED (itype)
2817 && fd->loops[j].cond_code == GT_EXPR)
2818 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2819 fold_build1 (NEGATE_EXPR, itype, t),
2820 fold_build1 (NEGATE_EXPR, itype, step));
2821 else
2822 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2823 t = fold_convert (type, t);
2824 t = fold_build2 (PLUS_EXPR, type, idx, t);
2825 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2826 true, GSI_SAME_STMT);
2827 e = make_edge (e->dest, next_bb, EDGE_FALLTHRU);
2828 set_immediate_dominator (CDI_DOMINATORS, next_bb, cur_bb);
2829 cond_stmt
2830 = gimple_build_cond (LE_EXPR, t, stopval, NULL_TREE,
2831 NULL_TREE);
2832 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2833 e = split_block (gsi_bb (gsi2), cond_stmt);
2834 e->flags = EDGE_TRUE_VALUE;
2835 e->probability = profile_probability::likely ().guessed ();
2836 ne = make_edge (e->src, entry_bb, EDGE_FALSE_VALUE);
2837 ne->probability = e->probability.invert ();
2838 gsi2 = gsi_after_labels (e->dest);
2839 expand_omp_build_assign (&gsi2, idx, t);
2840 set_immediate_dominator (CDI_DOMINATORS, entry_bb, dom_bb);
2841 break;
2843 e = split_block (cur_bb, last_stmt (cur_bb));
2845 basic_block new_cur_bb = create_empty_bb (cur_bb);
2846 add_bb_to_loop (new_cur_bb, cur_bb->loop_father);
2848 gsi2 = gsi_after_labels (e->dest);
2849 if (rect_p)
2850 t = fold_build2 (PLUS_EXPR, type, vs[j],
2851 build_one_cst (type));
2852 else
2854 tree step
2855 = fold_convert (itype, unshare_expr (fd->loops[j].step));
2856 t = fold_build2 (PLUS_EXPR, itype, vs[j], step);
2858 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2859 true, GSI_SAME_STMT);
2860 expand_omp_build_assign (&gsi2, vs[j], t);
2862 edge ne = split_block (e->dest, last_stmt (e->dest));
2863 gsi2 = gsi_after_labels (ne->dest);
2865 gcond *cond_stmt;
2866 if (next_bb == entry_bb)
2867 /* No need to actually check the outermost condition. */
2868 cond_stmt
2869 = gimple_build_cond (EQ_EXPR, boolean_true_node,
2870 boolean_true_node,
2871 NULL_TREE, NULL_TREE);
2872 else
2873 cond_stmt
2874 = gimple_build_cond (rect_p ? LT_EXPR
2875 : fd->loops[j].cond_code,
2876 vs[j], n2, NULL_TREE, NULL_TREE);
2877 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2878 edge e3, e4;
2879 if (next_bb == entry_bb)
2881 e3 = find_edge (ne->dest, next_bb);
2882 e3->flags = EDGE_FALSE_VALUE;
2883 dom_bb = ne->dest;
2885 else
2886 e3 = make_edge (ne->dest, next_bb, EDGE_FALSE_VALUE);
2887 e4 = make_edge (ne->dest, new_cur_bb, EDGE_TRUE_VALUE);
2888 e4->probability = profile_probability::likely ().guessed ();
2889 e3->probability = e4->probability.invert ();
2890 basic_block esrc = e->src;
2891 make_edge (e->src, ne->dest, EDGE_FALLTHRU);
2892 cur_bb = new_cur_bb;
2893 basic_block latch_bb = next_bb;
2894 next_bb = e->dest;
2895 remove_edge (e);
2896 set_immediate_dominator (CDI_DOMINATORS, ne->dest, esrc);
2897 set_immediate_dominator (CDI_DOMINATORS, latch_bb, ne->dest);
2898 set_immediate_dominator (CDI_DOMINATORS, cur_bb, ne->dest);
2900 for (int j = fd->last_nonrect; j >= fd->first_nonrect; j--)
2902 tree itype = TREE_TYPE (fd->loops[j].v);
2903 bool rect_p = (fd->loops[j].m1 == NULL_TREE
2904 && fd->loops[j].m2 == NULL_TREE
2905 && !fd->loops[j].non_rect_referenced);
2906 if (j == fd->last_nonrect)
2908 t = fold_build2 (MINUS_EXPR, type, stopval, idx);
2909 t = fold_convert (itype, t);
2910 tree t2
2911 = fold_convert (itype, unshare_expr (fd->loops[j].step));
2912 t = fold_build2 (MULT_EXPR, itype, t, t2);
2913 t = fold_build2 (PLUS_EXPR, itype, n1, t);
2915 else if (rect_p)
2917 t = fold_convert (itype, vs[j]);
2918 t = fold_build2 (MULT_EXPR, itype, t,
2919 fold_convert (itype, fd->loops[j].step));
2920 if (POINTER_TYPE_P (vtype))
2921 t = fold_build_pointer_plus (fd->loops[j].n1, t);
2922 else
2923 t = fold_build2 (PLUS_EXPR, itype, fd->loops[j].n1, t);
2925 else
2926 t = vs[j];
2927 t = force_gimple_operand_gsi (gsi, t, false,
2928 NULL_TREE, true,
2929 GSI_SAME_STMT);
2930 stmt = gimple_build_assign (fd->loops[j].v, t);
2931 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
2933 if (gsi_end_p (*gsi))
2934 *gsi = gsi_last_bb (gsi_bb (*gsi));
2935 else
2936 gsi_prev (gsi);
2937 if (bb_triang)
2939 e = split_block (gsi_bb (*gsi), gsi_stmt (*gsi));
2940 make_edge (bb_triang, e->dest, EDGE_FALLTHRU);
2941 *gsi = gsi_after_labels (e->dest);
2942 if (!gsi_end_p (*gsi))
2943 gsi_insert_before (gsi, gimple_build_nop (), GSI_NEW_STMT);
2944 set_immediate_dominator (CDI_DOMINATORS, e->dest, bb_triang_dom);
2947 else
2949 t = fold_convert (itype, t);
2950 t = fold_build2 (MULT_EXPR, itype, t,
2951 fold_convert (itype, fd->loops[i].step));
2952 if (POINTER_TYPE_P (vtype))
2953 t = fold_build_pointer_plus (fd->loops[i].n1, t);
2954 else
2955 t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
2956 t = force_gimple_operand_gsi (gsi, t,
2957 DECL_P (fd->loops[i].v)
2958 && TREE_ADDRESSABLE (fd->loops[i].v),
2959 NULL_TREE, false,
2960 GSI_CONTINUE_LINKING);
2961 stmt = gimple_build_assign (fd->loops[i].v, t);
2962 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2964 if (i != 0 && (i != fd->last_nonrect || fd->first_nonrect))
2966 t = fold_build2 (TRUNC_DIV_EXPR, type, tem, counts[i]);
2967 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
2968 false, GSI_CONTINUE_LINKING);
2969 stmt = gimple_build_assign (tem, t);
2970 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2972 if (i == fd->last_nonrect)
2973 i = fd->first_nonrect;
2975 if (fd->non_rect)
2976 for (i = 0; i <= fd->last_nonrect; i++)
2977 if (fd->loops[i].m2)
2979 tree itype = TREE_TYPE (fd->loops[i].v);
2981 tree t = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2982 t = fold_build2 (MULT_EXPR, itype,
2983 fd->loops[i - fd->loops[i].outer].v, t);
2984 t = fold_build2 (PLUS_EXPR, itype, t,
2985 fold_convert (itype,
2986 unshare_expr (fd->loops[i].n2)));
2987 nonrect_bounds[i] = create_tmp_reg (itype, ".bound");
2988 t = force_gimple_operand_gsi (gsi, t, false,
2989 NULL_TREE, false,
2990 GSI_CONTINUE_LINKING);
2991 stmt = gimple_build_assign (nonrect_bounds[i], t);
2992 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2996 /* Helper function for expand_omp_for_*. Generate code like:
2997 L10:
2998 V3 += STEP3;
2999 if (V3 cond3 N32) goto BODY_BB; else goto L11;
3000 L11:
3001 V3 = N31;
3002 V2 += STEP2;
3003 if (V2 cond2 N22) goto BODY_BB; else goto L12;
3004 L12:
3005 V2 = N21;
3006 V1 += STEP1;
3007 goto BODY_BB;
3008 For non-rectangular loops, use temporaries stored in nonrect_bounds
3009 for the upper bounds if M?2 multiplier is present. Given e.g.
3010 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
3011 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
3012 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
3013 for (V4 = N41 + M41 * V2; V4 cond4 N42 + M42 * V2; V4 += STEP4)
3015 L10:
3016 V4 += STEP4;
3017 if (V4 cond4 NONRECT_BOUND4) goto BODY_BB; else goto L11;
3018 L11:
3019 V4 = N41 + M41 * V2; // This can be left out if the loop
3020 // refers to the immediate parent loop
3021 V3 += STEP3;
3022 if (V3 cond3 N32) goto BODY_BB; else goto L12;
3023 L12:
3024 V3 = N31;
3025 V2 += STEP2;
3026 if (V2 cond2 N22) goto L120; else goto L13;
3027 L120:
3028 V4 = N41 + M41 * V2;
3029 NONRECT_BOUND4 = N42 + M42 * V2;
3030 if (V4 cond4 NONRECT_BOUND4) goto BODY_BB; else goto L12;
3031 L13:
3032 V2 = N21;
3033 V1 += STEP1;
3034 goto L120; */
3036 static basic_block
3037 extract_omp_for_update_vars (struct omp_for_data *fd, tree *nonrect_bounds,
3038 basic_block cont_bb, basic_block body_bb)
3040 basic_block last_bb, bb, collapse_bb = NULL;
3041 int i;
3042 gimple_stmt_iterator gsi;
3043 edge e;
3044 tree t;
3045 gimple *stmt;
3047 last_bb = cont_bb;
3048 for (i = fd->collapse - 1; i >= 0; i--)
3050 tree vtype = TREE_TYPE (fd->loops[i].v);
3052 bb = create_empty_bb (last_bb);
3053 add_bb_to_loop (bb, last_bb->loop_father);
3054 gsi = gsi_start_bb (bb);
3056 if (i < fd->collapse - 1)
3058 e = make_edge (last_bb, bb, EDGE_FALSE_VALUE);
3059 e->probability
3060 = profile_probability::guessed_always ().apply_scale (1, 8);
3062 struct omp_for_data_loop *l = &fd->loops[i + 1];
3063 if (l->m1 == NULL_TREE || l->outer != 1)
3065 t = l->n1;
3066 if (l->m1)
3068 tree t2
3069 = fold_build2 (MULT_EXPR, TREE_TYPE (t),
3070 fd->loops[i + 1 - l->outer].v, l->m1);
3071 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t2, t);
3073 t = force_gimple_operand_gsi (&gsi, t,
3074 DECL_P (l->v)
3075 && TREE_ADDRESSABLE (l->v),
3076 NULL_TREE, false,
3077 GSI_CONTINUE_LINKING);
3078 stmt = gimple_build_assign (l->v, t);
3079 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
3082 else
3083 collapse_bb = bb;
3085 set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
3087 if (POINTER_TYPE_P (vtype))
3088 t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step);
3089 else
3090 t = fold_build2 (PLUS_EXPR, vtype, fd->loops[i].v, fd->loops[i].step);
3091 t = force_gimple_operand_gsi (&gsi, t,
3092 DECL_P (fd->loops[i].v)
3093 && TREE_ADDRESSABLE (fd->loops[i].v),
3094 NULL_TREE, false, GSI_CONTINUE_LINKING);
3095 stmt = gimple_build_assign (fd->loops[i].v, t);
3096 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
3098 if (fd->loops[i].non_rect_referenced)
3100 basic_block update_bb = NULL, prev_bb = NULL;
3101 for (int j = i + 1; j <= fd->last_nonrect; j++)
3102 if (j - fd->loops[j].outer == i)
3104 tree n1, n2;
3105 struct omp_for_data_loop *l = &fd->loops[j];
3106 basic_block this_bb = create_empty_bb (last_bb);
3107 add_bb_to_loop (this_bb, last_bb->loop_father);
3108 gimple_stmt_iterator gsi2 = gsi_start_bb (this_bb);
3109 if (prev_bb)
3111 e = make_edge (prev_bb, this_bb, EDGE_TRUE_VALUE);
3112 e->probability
3113 = profile_probability::guessed_always ().apply_scale (7,
3115 set_immediate_dominator (CDI_DOMINATORS, this_bb, prev_bb);
3117 if (l->m1)
3119 t = fold_build2 (MULT_EXPR, TREE_TYPE (l->m1), l->m1,
3120 fd->loops[i].v);
3121 t = fold_build2 (PLUS_EXPR, TREE_TYPE (l->v), t, l->n1);
3122 n1 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3123 false,
3124 GSI_CONTINUE_LINKING);
3125 stmt = gimple_build_assign (l->v, n1);
3126 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
3127 n1 = l->v;
3129 else
3130 n1 = force_gimple_operand_gsi (&gsi2, l->n1, true,
3131 NULL_TREE, false,
3132 GSI_CONTINUE_LINKING);
3133 if (l->m2)
3135 t = fold_build2 (MULT_EXPR, TREE_TYPE (l->m2), l->m2,
3136 fd->loops[i].v);
3137 t = fold_build2 (PLUS_EXPR, TREE_TYPE (nonrect_bounds[j]),
3138 t, unshare_expr (l->n2));
3139 n2 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3140 false,
3141 GSI_CONTINUE_LINKING);
3142 stmt = gimple_build_assign (nonrect_bounds[j], n2);
3143 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
3144 n2 = nonrect_bounds[j];
3146 else
3147 n2 = force_gimple_operand_gsi (&gsi2, unshare_expr (l->n2),
3148 true, NULL_TREE, false,
3149 GSI_CONTINUE_LINKING);
3150 gcond *cond_stmt
3151 = gimple_build_cond (l->cond_code, n1, n2,
3152 NULL_TREE, NULL_TREE);
3153 gsi_insert_after (&gsi2, cond_stmt, GSI_CONTINUE_LINKING);
3154 if (update_bb == NULL)
3155 update_bb = this_bb;
3156 e = make_edge (this_bb, bb, EDGE_FALSE_VALUE);
3157 e->probability
3158 = profile_probability::guessed_always ().apply_scale (1, 8);
3159 if (prev_bb == NULL)
3160 set_immediate_dominator (CDI_DOMINATORS, this_bb, bb);
3161 prev_bb = this_bb;
3163 e = make_edge (prev_bb, body_bb, EDGE_TRUE_VALUE);
3164 e->probability
3165 = profile_probability::guessed_always ().apply_scale (7, 8);
3166 body_bb = update_bb;
3169 if (i > 0)
3171 if (fd->loops[i].m2)
3172 t = nonrect_bounds[i];
3173 else
3174 t = unshare_expr (fd->loops[i].n2);
3175 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3176 false, GSI_CONTINUE_LINKING);
3177 tree v = fd->loops[i].v;
3178 if (DECL_P (v) && TREE_ADDRESSABLE (v))
3179 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
3180 false, GSI_CONTINUE_LINKING);
3181 t = fold_build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
3182 stmt = gimple_build_cond_empty (t);
3183 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
3184 if (walk_tree (gimple_cond_lhs_ptr (as_a <gcond *> (stmt)),
3185 expand_omp_regimplify_p, NULL, NULL)
3186 || walk_tree (gimple_cond_rhs_ptr (as_a <gcond *> (stmt)),
3187 expand_omp_regimplify_p, NULL, NULL))
3188 gimple_regimplify_operands (stmt, &gsi);
3189 e = make_edge (bb, body_bb, EDGE_TRUE_VALUE);
3190 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
3192 else
3193 make_edge (bb, body_bb, EDGE_FALLTHRU);
3194 set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
3195 last_bb = bb;
3198 return collapse_bb;
3201 /* Expand #pragma omp ordered depend(source). */
3203 static void
3204 expand_omp_ordered_source (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
3205 tree *counts, location_t loc)
3207 enum built_in_function source_ix
3208 = fd->iter_type == long_integer_type_node
3209 ? BUILT_IN_GOMP_DOACROSS_POST : BUILT_IN_GOMP_DOACROSS_ULL_POST;
3210 gimple *g
3211 = gimple_build_call (builtin_decl_explicit (source_ix), 1,
3212 build_fold_addr_expr (counts[fd->ordered]));
3213 gimple_set_location (g, loc);
3214 gsi_insert_before (gsi, g, GSI_SAME_STMT);
3217 /* Expand a single depend from #pragma omp ordered depend(sink:...). */
3219 static void
3220 expand_omp_ordered_sink (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
3221 tree *counts, tree c, location_t loc)
3223 auto_vec<tree, 10> args;
3224 enum built_in_function sink_ix
3225 = fd->iter_type == long_integer_type_node
3226 ? BUILT_IN_GOMP_DOACROSS_WAIT : BUILT_IN_GOMP_DOACROSS_ULL_WAIT;
3227 tree t, off, coff = NULL_TREE, deps = OMP_CLAUSE_DECL (c), cond = NULL_TREE;
3228 int i;
3229 gimple_stmt_iterator gsi2 = *gsi;
3230 bool warned_step = false;
3232 for (i = 0; i < fd->ordered; i++)
3234 tree step = NULL_TREE;
3235 off = TREE_PURPOSE (deps);
3236 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
3238 step = TREE_OPERAND (off, 1);
3239 off = TREE_OPERAND (off, 0);
3241 if (!integer_zerop (off))
3243 gcc_assert (fd->loops[i].cond_code == LT_EXPR
3244 || fd->loops[i].cond_code == GT_EXPR);
3245 bool forward = fd->loops[i].cond_code == LT_EXPR;
3246 if (step)
3248 /* Non-simple Fortran DO loops. If step is variable,
3249 we don't know at compile even the direction, so can't
3250 warn. */
3251 if (TREE_CODE (step) != INTEGER_CST)
3252 break;
3253 forward = tree_int_cst_sgn (step) != -1;
3255 if (forward ^ OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3256 warning_at (loc, 0, "%<depend%> clause with %<sink%> modifier "
3257 "waiting for lexically later iteration");
3258 break;
3260 deps = TREE_CHAIN (deps);
3262 /* If all offsets corresponding to the collapsed loops are zero,
3263 this depend clause can be ignored. FIXME: but there is still a
3264 flush needed. We need to emit one __sync_synchronize () for it
3265 though (perhaps conditionally)? Solve this together with the
3266 conservative dependence folding optimization.
3267 if (i >= fd->collapse)
3268 return; */
3270 deps = OMP_CLAUSE_DECL (c);
3271 gsi_prev (&gsi2);
3272 edge e1 = split_block (gsi_bb (gsi2), gsi_stmt (gsi2));
3273 edge e2 = split_block_after_labels (e1->dest);
3275 gsi2 = gsi_after_labels (e1->dest);
3276 *gsi = gsi_last_bb (e1->src);
3277 for (i = 0; i < fd->ordered; i++)
3279 tree itype = TREE_TYPE (fd->loops[i].v);
3280 tree step = NULL_TREE;
3281 tree orig_off = NULL_TREE;
3282 if (POINTER_TYPE_P (itype))
3283 itype = sizetype;
3284 if (i)
3285 deps = TREE_CHAIN (deps);
3286 off = TREE_PURPOSE (deps);
3287 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
3289 step = TREE_OPERAND (off, 1);
3290 off = TREE_OPERAND (off, 0);
3291 gcc_assert (fd->loops[i].cond_code == LT_EXPR
3292 && integer_onep (fd->loops[i].step)
3293 && !POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)));
3295 tree s = fold_convert_loc (loc, itype, step ? step : fd->loops[i].step);
3296 if (step)
3298 off = fold_convert_loc (loc, itype, off);
3299 orig_off = off;
3300 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
3303 if (integer_zerop (off))
3304 t = boolean_true_node;
3305 else
3307 tree a;
3308 tree co = fold_convert_loc (loc, itype, off);
3309 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
3311 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3312 co = fold_build1_loc (loc, NEGATE_EXPR, itype, co);
3313 a = fold_build2_loc (loc, POINTER_PLUS_EXPR,
3314 TREE_TYPE (fd->loops[i].v), fd->loops[i].v,
3315 co);
3317 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3318 a = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
3319 fd->loops[i].v, co);
3320 else
3321 a = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
3322 fd->loops[i].v, co);
3323 if (step)
3325 tree t1, t2;
3326 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3327 t1 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
3328 fd->loops[i].n1);
3329 else
3330 t1 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
3331 fd->loops[i].n2);
3332 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3333 t2 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
3334 fd->loops[i].n2);
3335 else
3336 t2 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
3337 fd->loops[i].n1);
3338 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node,
3339 step, build_int_cst (TREE_TYPE (step), 0));
3340 if (TREE_CODE (step) != INTEGER_CST)
3342 t1 = unshare_expr (t1);
3343 t1 = force_gimple_operand_gsi (gsi, t1, true, NULL_TREE,
3344 false, GSI_CONTINUE_LINKING);
3345 t2 = unshare_expr (t2);
3346 t2 = force_gimple_operand_gsi (gsi, t2, true, NULL_TREE,
3347 false, GSI_CONTINUE_LINKING);
3349 t = fold_build3_loc (loc, COND_EXPR, boolean_type_node,
3350 t, t2, t1);
3352 else if (fd->loops[i].cond_code == LT_EXPR)
3354 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3355 t = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
3356 fd->loops[i].n1);
3357 else
3358 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
3359 fd->loops[i].n2);
3361 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3362 t = fold_build2_loc (loc, GT_EXPR, boolean_type_node, a,
3363 fd->loops[i].n2);
3364 else
3365 t = fold_build2_loc (loc, LE_EXPR, boolean_type_node, a,
3366 fd->loops[i].n1);
3368 if (cond)
3369 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node, cond, t);
3370 else
3371 cond = t;
3373 off = fold_convert_loc (loc, itype, off);
3375 if (step
3376 || (fd->loops[i].cond_code == LT_EXPR
3377 ? !integer_onep (fd->loops[i].step)
3378 : !integer_minus_onep (fd->loops[i].step)))
3380 if (step == NULL_TREE
3381 && TYPE_UNSIGNED (itype)
3382 && fd->loops[i].cond_code == GT_EXPR)
3383 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype, off,
3384 fold_build1_loc (loc, NEGATE_EXPR, itype,
3385 s));
3386 else
3387 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype,
3388 orig_off ? orig_off : off, s);
3389 t = fold_build2_loc (loc, EQ_EXPR, boolean_type_node, t,
3390 build_int_cst (itype, 0));
3391 if (integer_zerop (t) && !warned_step)
3393 warning_at (loc, 0, "%<depend%> clause with %<sink%> modifier "
3394 "refers to iteration never in the iteration "
3395 "space");
3396 warned_step = true;
3398 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node,
3399 cond, t);
3402 if (i <= fd->collapse - 1 && fd->collapse > 1)
3403 t = fd->loop.v;
3404 else if (counts[i])
3405 t = counts[i];
3406 else
3408 t = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
3409 fd->loops[i].v, fd->loops[i].n1);
3410 t = fold_convert_loc (loc, fd->iter_type, t);
3412 if (step)
3413 /* We have divided off by step already earlier. */;
3414 else if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
3415 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off,
3416 fold_build1_loc (loc, NEGATE_EXPR, itype,
3417 s));
3418 else
3419 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
3420 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3421 off = fold_build1_loc (loc, NEGATE_EXPR, itype, off);
3422 off = fold_convert_loc (loc, fd->iter_type, off);
3423 if (i <= fd->collapse - 1 && fd->collapse > 1)
3425 if (i)
3426 off = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, coff,
3427 off);
3428 if (i < fd->collapse - 1)
3430 coff = fold_build2_loc (loc, MULT_EXPR, fd->iter_type, off,
3431 counts[i]);
3432 continue;
3435 off = unshare_expr (off);
3436 t = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, t, off);
3437 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3438 true, GSI_SAME_STMT);
3439 args.safe_push (t);
3441 gimple *g = gimple_build_call_vec (builtin_decl_explicit (sink_ix), args);
3442 gimple_set_location (g, loc);
3443 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
3445 cond = unshare_expr (cond);
3446 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, false,
3447 GSI_CONTINUE_LINKING);
3448 gsi_insert_after (gsi, gimple_build_cond_empty (cond), GSI_NEW_STMT);
3449 edge e3 = make_edge (e1->src, e2->dest, EDGE_FALSE_VALUE);
3450 e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
3451 e1->probability = e3->probability.invert ();
3452 e1->flags = EDGE_TRUE_VALUE;
3453 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e1->src);
3455 *gsi = gsi_after_labels (e2->dest);
3458 /* Expand all #pragma omp ordered depend(source) and
3459 #pragma omp ordered depend(sink:...) constructs in the current
3460 #pragma omp for ordered(n) region. */
3462 static void
3463 expand_omp_ordered_source_sink (struct omp_region *region,
3464 struct omp_for_data *fd, tree *counts,
3465 basic_block cont_bb)
3467 struct omp_region *inner;
3468 int i;
3469 for (i = fd->collapse - 1; i < fd->ordered; i++)
3470 if (i == fd->collapse - 1 && fd->collapse > 1)
3471 counts[i] = NULL_TREE;
3472 else if (i >= fd->collapse && !cont_bb)
3473 counts[i] = build_zero_cst (fd->iter_type);
3474 else if (!POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))
3475 && integer_onep (fd->loops[i].step))
3476 counts[i] = NULL_TREE;
3477 else
3478 counts[i] = create_tmp_var (fd->iter_type, ".orditer");
3479 tree atype
3480 = build_array_type_nelts (fd->iter_type, fd->ordered - fd->collapse + 1);
3481 counts[fd->ordered] = create_tmp_var (atype, ".orditera");
3482 TREE_ADDRESSABLE (counts[fd->ordered]) = 1;
3484 for (inner = region->inner; inner; inner = inner->next)
3485 if (inner->type == GIMPLE_OMP_ORDERED)
3487 gomp_ordered *ord_stmt = inner->ord_stmt;
3488 gimple_stmt_iterator gsi = gsi_for_stmt (ord_stmt);
3489 location_t loc = gimple_location (ord_stmt);
3490 tree c;
3491 for (c = gimple_omp_ordered_clauses (ord_stmt);
3492 c; c = OMP_CLAUSE_CHAIN (c))
3493 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SOURCE)
3494 break;
3495 if (c)
3496 expand_omp_ordered_source (&gsi, fd, counts, loc);
3497 for (c = gimple_omp_ordered_clauses (ord_stmt);
3498 c; c = OMP_CLAUSE_CHAIN (c))
3499 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK)
3500 expand_omp_ordered_sink (&gsi, fd, counts, c, loc);
3501 gsi_remove (&gsi, true);
3505 /* Wrap the body into fd->ordered - fd->collapse loops that aren't
3506 collapsed. */
3508 static basic_block
3509 expand_omp_for_ordered_loops (struct omp_for_data *fd, tree *counts,
3510 basic_block cont_bb, basic_block body_bb,
3511 bool ordered_lastprivate)
3513 if (fd->ordered == fd->collapse)
3514 return cont_bb;
3516 if (!cont_bb)
3518 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
3519 for (int i = fd->collapse; i < fd->ordered; i++)
3521 tree type = TREE_TYPE (fd->loops[i].v);
3522 tree n1 = fold_convert (type, fd->loops[i].n1);
3523 expand_omp_build_assign (&gsi, fd->loops[i].v, n1);
3524 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3525 size_int (i - fd->collapse + 1),
3526 NULL_TREE, NULL_TREE);
3527 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
3529 return NULL;
3532 for (int i = fd->ordered - 1; i >= fd->collapse; i--)
3534 tree t, type = TREE_TYPE (fd->loops[i].v);
3535 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
3536 expand_omp_build_assign (&gsi, fd->loops[i].v,
3537 fold_convert (type, fd->loops[i].n1));
3538 if (counts[i])
3539 expand_omp_build_assign (&gsi, counts[i],
3540 build_zero_cst (fd->iter_type));
3541 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3542 size_int (i - fd->collapse + 1),
3543 NULL_TREE, NULL_TREE);
3544 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
3545 if (!gsi_end_p (gsi))
3546 gsi_prev (&gsi);
3547 else
3548 gsi = gsi_last_bb (body_bb);
3549 edge e1 = split_block (body_bb, gsi_stmt (gsi));
3550 basic_block new_body = e1->dest;
3551 if (body_bb == cont_bb)
3552 cont_bb = new_body;
3553 edge e2 = NULL;
3554 basic_block new_header;
3555 if (EDGE_COUNT (cont_bb->preds) > 0)
3557 gsi = gsi_last_bb (cont_bb);
3558 if (POINTER_TYPE_P (type))
3559 t = fold_build_pointer_plus (fd->loops[i].v,
3560 fold_convert (sizetype,
3561 fd->loops[i].step));
3562 else
3563 t = fold_build2 (PLUS_EXPR, type, fd->loops[i].v,
3564 fold_convert (type, fd->loops[i].step));
3565 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
3566 if (counts[i])
3568 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[i],
3569 build_int_cst (fd->iter_type, 1));
3570 expand_omp_build_assign (&gsi, counts[i], t);
3571 t = counts[i];
3573 else
3575 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
3576 fd->loops[i].v, fd->loops[i].n1);
3577 t = fold_convert (fd->iter_type, t);
3578 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3579 true, GSI_SAME_STMT);
3581 aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3582 size_int (i - fd->collapse + 1),
3583 NULL_TREE, NULL_TREE);
3584 expand_omp_build_assign (&gsi, aref, t);
3585 gsi_prev (&gsi);
3586 e2 = split_block (cont_bb, gsi_stmt (gsi));
3587 new_header = e2->dest;
3589 else
3590 new_header = cont_bb;
3591 gsi = gsi_after_labels (new_header);
3592 tree v = force_gimple_operand_gsi (&gsi, fd->loops[i].v, true, NULL_TREE,
3593 true, GSI_SAME_STMT);
3594 tree n2
3595 = force_gimple_operand_gsi (&gsi, fold_convert (type, fd->loops[i].n2),
3596 true, NULL_TREE, true, GSI_SAME_STMT);
3597 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, n2);
3598 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_NEW_STMT);
3599 edge e3 = split_block (new_header, gsi_stmt (gsi));
3600 cont_bb = e3->dest;
3601 remove_edge (e1);
3602 make_edge (body_bb, new_header, EDGE_FALLTHRU);
3603 e3->flags = EDGE_FALSE_VALUE;
3604 e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
3605 e1 = make_edge (new_header, new_body, EDGE_TRUE_VALUE);
3606 e1->probability = e3->probability.invert ();
3608 set_immediate_dominator (CDI_DOMINATORS, new_header, body_bb);
3609 set_immediate_dominator (CDI_DOMINATORS, new_body, new_header);
3611 if (e2)
3613 class loop *loop = alloc_loop ();
3614 loop->header = new_header;
3615 loop->latch = e2->src;
3616 add_loop (loop, body_bb->loop_father);
3620 /* If there are any lastprivate clauses and it is possible some loops
3621 might have zero iterations, ensure all the decls are initialized,
3622 otherwise we could crash evaluating C++ class iterators with lastprivate
3623 clauses. */
3624 bool need_inits = false;
3625 for (int i = fd->collapse; ordered_lastprivate && i < fd->ordered; i++)
3626 if (need_inits)
3628 tree type = TREE_TYPE (fd->loops[i].v);
3629 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
3630 expand_omp_build_assign (&gsi, fd->loops[i].v,
3631 fold_convert (type, fd->loops[i].n1));
3633 else
3635 tree type = TREE_TYPE (fd->loops[i].v);
3636 tree this_cond = fold_build2 (fd->loops[i].cond_code,
3637 boolean_type_node,
3638 fold_convert (type, fd->loops[i].n1),
3639 fold_convert (type, fd->loops[i].n2));
3640 if (!integer_onep (this_cond))
3641 need_inits = true;
3644 return cont_bb;
3647 /* A subroutine of expand_omp_for. Generate code for a parallel
3648 loop with any schedule. Given parameters:
3650 for (V = N1; V cond N2; V += STEP) BODY;
3652 where COND is "<" or ">", we generate pseudocode
3654 more = GOMP_loop_foo_start (N1, N2, STEP, CHUNK, &istart0, &iend0);
3655 if (more) goto L0; else goto L3;
3657 V = istart0;
3658 iend = iend0;
3660 BODY;
3661 V += STEP;
3662 if (V cond iend) goto L1; else goto L2;
3664 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
3667 If this is a combined omp parallel loop, instead of the call to
3668 GOMP_loop_foo_start, we call GOMP_loop_foo_next.
3669 If this is gimple_omp_for_combined_p loop, then instead of assigning
3670 V and iend in L0 we assign the first two _looptemp_ clause decls of the
3671 inner GIMPLE_OMP_FOR and V += STEP; and
3672 if (V cond iend) goto L1; else goto L2; are removed.
3674 For collapsed loops, given parameters:
3675 collapse(3)
3676 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
3677 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
3678 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
3679 BODY;
3681 we generate pseudocode
3683 if (__builtin_expect (N32 cond3 N31, 0)) goto Z0;
3684 if (cond3 is <)
3685 adj = STEP3 - 1;
3686 else
3687 adj = STEP3 + 1;
3688 count3 = (adj + N32 - N31) / STEP3;
3689 if (__builtin_expect (N22 cond2 N21, 0)) goto Z0;
3690 if (cond2 is <)
3691 adj = STEP2 - 1;
3692 else
3693 adj = STEP2 + 1;
3694 count2 = (adj + N22 - N21) / STEP2;
3695 if (__builtin_expect (N12 cond1 N11, 0)) goto Z0;
3696 if (cond1 is <)
3697 adj = STEP1 - 1;
3698 else
3699 adj = STEP1 + 1;
3700 count1 = (adj + N12 - N11) / STEP1;
3701 count = count1 * count2 * count3;
3702 goto Z1;
3704 count = 0;
3706 more = GOMP_loop_foo_start (0, count, 1, CHUNK, &istart0, &iend0);
3707 if (more) goto L0; else goto L3;
3709 V = istart0;
3710 T = V;
3711 V3 = N31 + (T % count3) * STEP3;
3712 T = T / count3;
3713 V2 = N21 + (T % count2) * STEP2;
3714 T = T / count2;
3715 V1 = N11 + T * STEP1;
3716 iend = iend0;
3718 BODY;
3719 V += 1;
3720 if (V < iend) goto L10; else goto L2;
3721 L10:
3722 V3 += STEP3;
3723 if (V3 cond3 N32) goto L1; else goto L11;
3724 L11:
3725 V3 = N31;
3726 V2 += STEP2;
3727 if (V2 cond2 N22) goto L1; else goto L12;
3728 L12:
3729 V2 = N21;
3730 V1 += STEP1;
3731 goto L1;
3733 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
3738 static void
3739 expand_omp_for_generic (struct omp_region *region,
3740 struct omp_for_data *fd,
3741 enum built_in_function start_fn,
3742 enum built_in_function next_fn,
3743 tree sched_arg,
3744 gimple *inner_stmt)
3746 tree type, istart0, iend0, iend;
3747 tree t, vmain, vback, bias = NULL_TREE;
3748 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, collapse_bb;
3749 basic_block l2_bb = NULL, l3_bb = NULL;
3750 gimple_stmt_iterator gsi;
3751 gassign *assign_stmt;
3752 bool in_combined_parallel = is_combined_parallel (region);
3753 bool broken_loop = region->cont == NULL;
3754 edge e, ne;
3755 tree *counts = NULL;
3756 int i;
3757 bool ordered_lastprivate = false;
3759 gcc_assert (!broken_loop || !in_combined_parallel);
3760 gcc_assert (fd->iter_type == long_integer_type_node
3761 || !in_combined_parallel);
3763 entry_bb = region->entry;
3764 cont_bb = region->cont;
3765 collapse_bb = NULL;
3766 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
3767 gcc_assert (broken_loop
3768 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
3769 l0_bb = split_edge (FALLTHRU_EDGE (entry_bb));
3770 l1_bb = single_succ (l0_bb);
3771 if (!broken_loop)
3773 l2_bb = create_empty_bb (cont_bb);
3774 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l1_bb
3775 || (single_succ_edge (BRANCH_EDGE (cont_bb)->dest)->dest
3776 == l1_bb));
3777 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
3779 else
3780 l2_bb = NULL;
3781 l3_bb = BRANCH_EDGE (entry_bb)->dest;
3782 exit_bb = region->exit;
3784 gsi = gsi_last_nondebug_bb (entry_bb);
3786 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3787 if (fd->ordered
3788 && omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3789 OMP_CLAUSE_LASTPRIVATE))
3790 ordered_lastprivate = false;
3791 tree reductions = NULL_TREE;
3792 tree mem = NULL_TREE, cond_var = NULL_TREE, condtemp = NULL_TREE;
3793 tree memv = NULL_TREE;
3794 if (fd->lastprivate_conditional)
3796 tree c = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3797 OMP_CLAUSE__CONDTEMP_);
3798 if (fd->have_pointer_condtemp)
3799 condtemp = OMP_CLAUSE_DECL (c);
3800 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
3801 cond_var = OMP_CLAUSE_DECL (c);
3803 if (sched_arg)
3805 if (fd->have_reductemp)
3807 tree c = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3808 OMP_CLAUSE__REDUCTEMP_);
3809 reductions = OMP_CLAUSE_DECL (c);
3810 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
3811 gimple *g = SSA_NAME_DEF_STMT (reductions);
3812 reductions = gimple_assign_rhs1 (g);
3813 OMP_CLAUSE_DECL (c) = reductions;
3814 entry_bb = gimple_bb (g);
3815 edge e = split_block (entry_bb, g);
3816 if (region->entry == entry_bb)
3817 region->entry = e->dest;
3818 gsi = gsi_last_bb (entry_bb);
3820 else
3821 reductions = null_pointer_node;
3822 if (fd->have_pointer_condtemp)
3824 tree type = TREE_TYPE (condtemp);
3825 memv = create_tmp_var (type);
3826 TREE_ADDRESSABLE (memv) = 1;
3827 unsigned HOST_WIDE_INT sz
3828 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
3829 sz *= fd->lastprivate_conditional;
3830 expand_omp_build_assign (&gsi, memv, build_int_cst (type, sz),
3831 false);
3832 mem = build_fold_addr_expr (memv);
3834 else
3835 mem = null_pointer_node;
3837 if (fd->collapse > 1 || fd->ordered)
3839 int first_zero_iter1 = -1, first_zero_iter2 = -1;
3840 basic_block zero_iter1_bb = NULL, zero_iter2_bb = NULL, l2_dom_bb = NULL;
3842 counts = XALLOCAVEC (tree, fd->ordered ? fd->ordered + 1 : fd->collapse);
3843 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
3844 zero_iter1_bb, first_zero_iter1,
3845 zero_iter2_bb, first_zero_iter2, l2_dom_bb);
3847 if (zero_iter1_bb)
3849 /* Some counts[i] vars might be uninitialized if
3850 some loop has zero iterations. But the body shouldn't
3851 be executed in that case, so just avoid uninit warnings. */
3852 for (i = first_zero_iter1;
3853 i < (fd->ordered ? fd->ordered : fd->collapse); i++)
3854 if (SSA_VAR_P (counts[i]))
3855 suppress_warning (counts[i], OPT_Wuninitialized);
3856 gsi_prev (&gsi);
3857 e = split_block (entry_bb, gsi_stmt (gsi));
3858 entry_bb = e->dest;
3859 make_edge (zero_iter1_bb, entry_bb, EDGE_FALLTHRU);
3860 gsi = gsi_last_nondebug_bb (entry_bb);
3861 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
3862 get_immediate_dominator (CDI_DOMINATORS,
3863 zero_iter1_bb));
3865 if (zero_iter2_bb)
3867 /* Some counts[i] vars might be uninitialized if
3868 some loop has zero iterations. But the body shouldn't
3869 be executed in that case, so just avoid uninit warnings. */
3870 for (i = first_zero_iter2; i < fd->ordered; i++)
3871 if (SSA_VAR_P (counts[i]))
3872 suppress_warning (counts[i], OPT_Wuninitialized);
3873 if (zero_iter1_bb)
3874 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
3875 else
3877 gsi_prev (&gsi);
3878 e = split_block (entry_bb, gsi_stmt (gsi));
3879 entry_bb = e->dest;
3880 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
3881 gsi = gsi_last_nondebug_bb (entry_bb);
3882 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
3883 get_immediate_dominator
3884 (CDI_DOMINATORS, zero_iter2_bb));
3887 if (fd->collapse == 1)
3889 counts[0] = fd->loop.n2;
3890 fd->loop = fd->loops[0];
3894 type = TREE_TYPE (fd->loop.v);
3895 istart0 = create_tmp_var (fd->iter_type, ".istart0");
3896 iend0 = create_tmp_var (fd->iter_type, ".iend0");
3897 TREE_ADDRESSABLE (istart0) = 1;
3898 TREE_ADDRESSABLE (iend0) = 1;
3900 /* See if we need to bias by LLONG_MIN. */
3901 if (fd->iter_type == long_long_unsigned_type_node
3902 && TREE_CODE (type) == INTEGER_TYPE
3903 && !TYPE_UNSIGNED (type)
3904 && fd->ordered == 0)
3906 tree n1, n2;
3908 if (fd->loop.cond_code == LT_EXPR)
3910 n1 = fd->loop.n1;
3911 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
3913 else
3915 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
3916 n2 = fd->loop.n1;
3918 if (TREE_CODE (n1) != INTEGER_CST
3919 || TREE_CODE (n2) != INTEGER_CST
3920 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
3921 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
3924 gimple_stmt_iterator gsif = gsi;
3925 gsi_prev (&gsif);
3927 tree arr = NULL_TREE;
3928 if (in_combined_parallel)
3930 gcc_assert (fd->ordered == 0);
3931 /* In a combined parallel loop, emit a call to
3932 GOMP_loop_foo_next. */
3933 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
3934 build_fold_addr_expr (istart0),
3935 build_fold_addr_expr (iend0));
3937 else
3939 tree t0, t1, t2, t3, t4;
3940 /* If this is not a combined parallel loop, emit a call to
3941 GOMP_loop_foo_start in ENTRY_BB. */
3942 t4 = build_fold_addr_expr (iend0);
3943 t3 = build_fold_addr_expr (istart0);
3944 if (fd->ordered)
3946 t0 = build_int_cst (unsigned_type_node,
3947 fd->ordered - fd->collapse + 1);
3948 arr = create_tmp_var (build_array_type_nelts (fd->iter_type,
3949 fd->ordered
3950 - fd->collapse + 1),
3951 ".omp_counts");
3952 DECL_NAMELESS (arr) = 1;
3953 TREE_ADDRESSABLE (arr) = 1;
3954 TREE_STATIC (arr) = 1;
3955 vec<constructor_elt, va_gc> *v;
3956 vec_alloc (v, fd->ordered - fd->collapse + 1);
3957 int idx;
3959 for (idx = 0; idx < fd->ordered - fd->collapse + 1; idx++)
3961 tree c;
3962 if (idx == 0 && fd->collapse > 1)
3963 c = fd->loop.n2;
3964 else
3965 c = counts[idx + fd->collapse - 1];
3966 tree purpose = size_int (idx);
3967 CONSTRUCTOR_APPEND_ELT (v, purpose, c);
3968 if (TREE_CODE (c) != INTEGER_CST)
3969 TREE_STATIC (arr) = 0;
3972 DECL_INITIAL (arr) = build_constructor (TREE_TYPE (arr), v);
3973 if (!TREE_STATIC (arr))
3974 force_gimple_operand_gsi (&gsi, build1 (DECL_EXPR,
3975 void_type_node, arr),
3976 true, NULL_TREE, true, GSI_SAME_STMT);
3977 t1 = build_fold_addr_expr (arr);
3978 t2 = NULL_TREE;
3980 else
3982 t2 = fold_convert (fd->iter_type, fd->loop.step);
3983 t1 = fd->loop.n2;
3984 t0 = fd->loop.n1;
3985 if (gimple_omp_for_combined_into_p (fd->for_stmt))
3987 tree innerc
3988 = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3989 OMP_CLAUSE__LOOPTEMP_);
3990 gcc_assert (innerc);
3991 t0 = OMP_CLAUSE_DECL (innerc);
3992 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3993 OMP_CLAUSE__LOOPTEMP_);
3994 gcc_assert (innerc);
3995 t1 = OMP_CLAUSE_DECL (innerc);
3997 if (POINTER_TYPE_P (TREE_TYPE (t0))
3998 && TYPE_PRECISION (TREE_TYPE (t0))
3999 != TYPE_PRECISION (fd->iter_type))
4001 /* Avoid casting pointers to integer of a different size. */
4002 tree itype = signed_type_for (type);
4003 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
4004 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
4006 else
4008 t1 = fold_convert (fd->iter_type, t1);
4009 t0 = fold_convert (fd->iter_type, t0);
4011 if (bias)
4013 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
4014 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
4017 if (fd->iter_type == long_integer_type_node || fd->ordered)
4019 if (fd->chunk_size)
4021 t = fold_convert (fd->iter_type, fd->chunk_size);
4022 t = omp_adjust_chunk_size (t, fd->simd_schedule);
4023 if (sched_arg)
4025 if (fd->ordered)
4026 t = build_call_expr (builtin_decl_explicit (start_fn),
4027 8, t0, t1, sched_arg, t, t3, t4,
4028 reductions, mem);
4029 else
4030 t = build_call_expr (builtin_decl_explicit (start_fn),
4031 9, t0, t1, t2, sched_arg, t, t3, t4,
4032 reductions, mem);
4034 else if (fd->ordered)
4035 t = build_call_expr (builtin_decl_explicit (start_fn),
4036 5, t0, t1, t, t3, t4);
4037 else
4038 t = build_call_expr (builtin_decl_explicit (start_fn),
4039 6, t0, t1, t2, t, t3, t4);
4041 else if (fd->ordered)
4042 t = build_call_expr (builtin_decl_explicit (start_fn),
4043 4, t0, t1, t3, t4);
4044 else
4045 t = build_call_expr (builtin_decl_explicit (start_fn),
4046 5, t0, t1, t2, t3, t4);
4048 else
4050 tree t5;
4051 tree c_bool_type;
4052 tree bfn_decl;
4054 /* The GOMP_loop_ull_*start functions have additional boolean
4055 argument, true for < loops and false for > loops.
4056 In Fortran, the C bool type can be different from
4057 boolean_type_node. */
4058 bfn_decl = builtin_decl_explicit (start_fn);
4059 c_bool_type = TREE_TYPE (TREE_TYPE (bfn_decl));
4060 t5 = build_int_cst (c_bool_type,
4061 fd->loop.cond_code == LT_EXPR ? 1 : 0);
4062 if (fd->chunk_size)
4064 tree bfn_decl = builtin_decl_explicit (start_fn);
4065 t = fold_convert (fd->iter_type, fd->chunk_size);
4066 t = omp_adjust_chunk_size (t, fd->simd_schedule);
4067 if (sched_arg)
4068 t = build_call_expr (bfn_decl, 10, t5, t0, t1, t2, sched_arg,
4069 t, t3, t4, reductions, mem);
4070 else
4071 t = build_call_expr (bfn_decl, 7, t5, t0, t1, t2, t, t3, t4);
4073 else
4074 t = build_call_expr (builtin_decl_explicit (start_fn),
4075 6, t5, t0, t1, t2, t3, t4);
4078 if (TREE_TYPE (t) != boolean_type_node)
4079 t = fold_build2 (NE_EXPR, boolean_type_node,
4080 t, build_int_cst (TREE_TYPE (t), 0));
4081 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4082 true, GSI_SAME_STMT);
4083 if (arr && !TREE_STATIC (arr))
4085 tree clobber = build_clobber (TREE_TYPE (arr));
4086 gsi_insert_before (&gsi, gimple_build_assign (arr, clobber),
4087 GSI_SAME_STMT);
4089 if (fd->have_pointer_condtemp)
4090 expand_omp_build_assign (&gsi, condtemp, memv, false);
4091 if (fd->have_reductemp)
4093 gimple *g = gsi_stmt (gsi);
4094 gsi_remove (&gsi, true);
4095 release_ssa_name (gimple_assign_lhs (g));
4097 entry_bb = region->entry;
4098 gsi = gsi_last_nondebug_bb (entry_bb);
4100 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4102 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
4104 /* Remove the GIMPLE_OMP_FOR statement. */
4105 gsi_remove (&gsi, true);
4107 if (gsi_end_p (gsif))
4108 gsif = gsi_after_labels (gsi_bb (gsif));
4109 gsi_next (&gsif);
4111 /* Iteration setup for sequential loop goes in L0_BB. */
4112 tree startvar = fd->loop.v;
4113 tree endvar = NULL_TREE;
4115 if (gimple_omp_for_combined_p (fd->for_stmt))
4117 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_FOR
4118 && gimple_omp_for_kind (inner_stmt)
4119 == GF_OMP_FOR_KIND_SIMD);
4120 tree innerc = omp_find_clause (gimple_omp_for_clauses (inner_stmt),
4121 OMP_CLAUSE__LOOPTEMP_);
4122 gcc_assert (innerc);
4123 startvar = OMP_CLAUSE_DECL (innerc);
4124 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4125 OMP_CLAUSE__LOOPTEMP_);
4126 gcc_assert (innerc);
4127 endvar = OMP_CLAUSE_DECL (innerc);
4130 gsi = gsi_start_bb (l0_bb);
4131 t = istart0;
4132 if (fd->ordered && fd->collapse == 1)
4133 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
4134 fold_convert (fd->iter_type, fd->loop.step));
4135 else if (bias)
4136 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
4137 if (fd->ordered && fd->collapse == 1)
4139 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4140 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
4141 fd->loop.n1, fold_convert (sizetype, t));
4142 else
4144 t = fold_convert (TREE_TYPE (startvar), t);
4145 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
4146 fd->loop.n1, t);
4149 else
4151 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4152 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
4153 t = fold_convert (TREE_TYPE (startvar), t);
4155 t = force_gimple_operand_gsi (&gsi, t,
4156 DECL_P (startvar)
4157 && TREE_ADDRESSABLE (startvar),
4158 NULL_TREE, false, GSI_CONTINUE_LINKING);
4159 assign_stmt = gimple_build_assign (startvar, t);
4160 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4161 if (cond_var)
4163 tree itype = TREE_TYPE (cond_var);
4164 /* For lastprivate(conditional:) itervar, we need some iteration
4165 counter that starts at unsigned non-zero and increases.
4166 Prefer as few IVs as possible, so if we can use startvar
4167 itself, use that, or startvar + constant (those would be
4168 incremented with step), and as last resort use the s0 + 1
4169 incremented by 1. */
4170 if ((fd->ordered && fd->collapse == 1)
4171 || bias
4172 || POINTER_TYPE_P (type)
4173 || TREE_CODE (fd->loop.n1) != INTEGER_CST
4174 || fd->loop.cond_code != LT_EXPR)
4175 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, istart0),
4176 build_int_cst (itype, 1));
4177 else if (tree_int_cst_sgn (fd->loop.n1) == 1)
4178 t = fold_convert (itype, t);
4179 else
4181 tree c = fold_convert (itype, fd->loop.n1);
4182 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
4183 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
4185 t = force_gimple_operand_gsi (&gsi, t, false,
4186 NULL_TREE, false, GSI_CONTINUE_LINKING);
4187 assign_stmt = gimple_build_assign (cond_var, t);
4188 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4191 t = iend0;
4192 if (fd->ordered && fd->collapse == 1)
4193 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
4194 fold_convert (fd->iter_type, fd->loop.step));
4195 else if (bias)
4196 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
4197 if (fd->ordered && fd->collapse == 1)
4199 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4200 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
4201 fd->loop.n1, fold_convert (sizetype, t));
4202 else
4204 t = fold_convert (TREE_TYPE (startvar), t);
4205 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
4206 fd->loop.n1, t);
4209 else
4211 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4212 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
4213 t = fold_convert (TREE_TYPE (startvar), t);
4215 iend = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4216 false, GSI_CONTINUE_LINKING);
4217 if (endvar)
4219 assign_stmt = gimple_build_assign (endvar, iend);
4220 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4221 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (iend)))
4222 assign_stmt = gimple_build_assign (fd->loop.v, iend);
4223 else
4224 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, iend);
4225 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4227 /* Handle linear clause adjustments. */
4228 tree itercnt = NULL_TREE;
4229 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
4230 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
4231 c; c = OMP_CLAUSE_CHAIN (c))
4232 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
4233 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
4235 tree d = OMP_CLAUSE_DECL (c);
4236 tree t = d, a, dest;
4237 if (omp_privatize_by_reference (t))
4238 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
4239 tree type = TREE_TYPE (t);
4240 if (POINTER_TYPE_P (type))
4241 type = sizetype;
4242 dest = unshare_expr (t);
4243 tree v = create_tmp_var (TREE_TYPE (t), NULL);
4244 expand_omp_build_assign (&gsif, v, t);
4245 if (itercnt == NULL_TREE)
4247 itercnt = startvar;
4248 tree n1 = fd->loop.n1;
4249 if (POINTER_TYPE_P (TREE_TYPE (itercnt)))
4251 itercnt
4252 = fold_convert (signed_type_for (TREE_TYPE (itercnt)),
4253 itercnt);
4254 n1 = fold_convert (TREE_TYPE (itercnt), n1);
4256 itercnt = fold_build2 (MINUS_EXPR, TREE_TYPE (itercnt),
4257 itercnt, n1);
4258 itercnt = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (itercnt),
4259 itercnt, fd->loop.step);
4260 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
4261 NULL_TREE, false,
4262 GSI_CONTINUE_LINKING);
4264 a = fold_build2 (MULT_EXPR, type,
4265 fold_convert (type, itercnt),
4266 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
4267 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
4268 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
4269 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4270 false, GSI_CONTINUE_LINKING);
4271 expand_omp_build_assign (&gsi, dest, t, true);
4273 if (fd->collapse > 1)
4274 expand_omp_for_init_vars (fd, &gsi, counts, NULL, inner_stmt, startvar);
4276 if (fd->ordered)
4278 /* Until now, counts array contained number of iterations or
4279 variable containing it for ith loop. From now on, we need
4280 those counts only for collapsed loops, and only for the 2nd
4281 till the last collapsed one. Move those one element earlier,
4282 we'll use counts[fd->collapse - 1] for the first source/sink
4283 iteration counter and so on and counts[fd->ordered]
4284 as the array holding the current counter values for
4285 depend(source). */
4286 if (fd->collapse > 1)
4287 memmove (counts, counts + 1, (fd->collapse - 1) * sizeof (counts[0]));
4288 if (broken_loop)
4290 int i;
4291 for (i = fd->collapse; i < fd->ordered; i++)
4293 tree type = TREE_TYPE (fd->loops[i].v);
4294 tree this_cond
4295 = fold_build2 (fd->loops[i].cond_code, boolean_type_node,
4296 fold_convert (type, fd->loops[i].n1),
4297 fold_convert (type, fd->loops[i].n2));
4298 if (!integer_onep (this_cond))
4299 break;
4301 if (i < fd->ordered)
4303 cont_bb
4304 = create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb);
4305 add_bb_to_loop (cont_bb, l1_bb->loop_father);
4306 gimple_stmt_iterator gsi = gsi_after_labels (cont_bb);
4307 gimple *g = gimple_build_omp_continue (fd->loop.v, fd->loop.v);
4308 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4309 make_edge (cont_bb, l3_bb, EDGE_FALLTHRU);
4310 make_edge (cont_bb, l1_bb, 0);
4311 l2_bb = create_empty_bb (cont_bb);
4312 broken_loop = false;
4315 expand_omp_ordered_source_sink (region, fd, counts, cont_bb);
4316 cont_bb = expand_omp_for_ordered_loops (fd, counts, cont_bb, l1_bb,
4317 ordered_lastprivate);
4318 if (counts[fd->collapse - 1])
4320 gcc_assert (fd->collapse == 1);
4321 gsi = gsi_last_bb (l0_bb);
4322 expand_omp_build_assign (&gsi, counts[fd->collapse - 1],
4323 istart0, true);
4324 if (cont_bb)
4326 gsi = gsi_last_bb (cont_bb);
4327 t = fold_build2 (PLUS_EXPR, fd->iter_type,
4328 counts[fd->collapse - 1],
4329 build_int_cst (fd->iter_type, 1));
4330 expand_omp_build_assign (&gsi, counts[fd->collapse - 1], t);
4331 tree aref = build4 (ARRAY_REF, fd->iter_type,
4332 counts[fd->ordered], size_zero_node,
4333 NULL_TREE, NULL_TREE);
4334 expand_omp_build_assign (&gsi, aref, counts[fd->collapse - 1]);
4336 t = counts[fd->collapse - 1];
4338 else if (fd->collapse > 1)
4339 t = fd->loop.v;
4340 else
4342 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
4343 fd->loops[0].v, fd->loops[0].n1);
4344 t = fold_convert (fd->iter_type, t);
4346 gsi = gsi_last_bb (l0_bb);
4347 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
4348 size_zero_node, NULL_TREE, NULL_TREE);
4349 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4350 false, GSI_CONTINUE_LINKING);
4351 expand_omp_build_assign (&gsi, aref, t, true);
4354 if (!broken_loop)
4356 /* Code to control the increment and predicate for the sequential
4357 loop goes in the CONT_BB. */
4358 gsi = gsi_last_nondebug_bb (cont_bb);
4359 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
4360 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
4361 vmain = gimple_omp_continue_control_use (cont_stmt);
4362 vback = gimple_omp_continue_control_def (cont_stmt);
4364 if (cond_var)
4366 tree itype = TREE_TYPE (cond_var);
4367 tree t2;
4368 if ((fd->ordered && fd->collapse == 1)
4369 || bias
4370 || POINTER_TYPE_P (type)
4371 || TREE_CODE (fd->loop.n1) != INTEGER_CST
4372 || fd->loop.cond_code != LT_EXPR)
4373 t2 = build_int_cst (itype, 1);
4374 else
4375 t2 = fold_convert (itype, fd->loop.step);
4376 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
4377 t2 = force_gimple_operand_gsi (&gsi, t2, false,
4378 NULL_TREE, true, GSI_SAME_STMT);
4379 assign_stmt = gimple_build_assign (cond_var, t2);
4380 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4383 if (!gimple_omp_for_combined_p (fd->for_stmt))
4385 if (POINTER_TYPE_P (type))
4386 t = fold_build_pointer_plus (vmain, fd->loop.step);
4387 else
4388 t = fold_build2 (PLUS_EXPR, type, vmain, fd->loop.step);
4389 t = force_gimple_operand_gsi (&gsi, t,
4390 DECL_P (vback)
4391 && TREE_ADDRESSABLE (vback),
4392 NULL_TREE, true, GSI_SAME_STMT);
4393 assign_stmt = gimple_build_assign (vback, t);
4394 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4396 if (fd->ordered && counts[fd->collapse - 1] == NULL_TREE)
4398 tree tem;
4399 if (fd->collapse > 1)
4400 tem = fd->loop.v;
4401 else
4403 tem = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
4404 fd->loops[0].v, fd->loops[0].n1);
4405 tem = fold_convert (fd->iter_type, tem);
4407 tree aref = build4 (ARRAY_REF, fd->iter_type,
4408 counts[fd->ordered], size_zero_node,
4409 NULL_TREE, NULL_TREE);
4410 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE,
4411 true, GSI_SAME_STMT);
4412 expand_omp_build_assign (&gsi, aref, tem);
4415 t = build2 (fd->loop.cond_code, boolean_type_node,
4416 DECL_P (vback) && TREE_ADDRESSABLE (vback) ? t : vback,
4417 iend);
4418 gcond *cond_stmt = gimple_build_cond_empty (t);
4419 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
4422 /* Remove GIMPLE_OMP_CONTINUE. */
4423 gsi_remove (&gsi, true);
4425 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
4426 collapse_bb = extract_omp_for_update_vars (fd, NULL, cont_bb, l1_bb);
4428 /* Emit code to get the next parallel iteration in L2_BB. */
4429 gsi = gsi_start_bb (l2_bb);
4431 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
4432 build_fold_addr_expr (istart0),
4433 build_fold_addr_expr (iend0));
4434 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4435 false, GSI_CONTINUE_LINKING);
4436 if (TREE_TYPE (t) != boolean_type_node)
4437 t = fold_build2 (NE_EXPR, boolean_type_node,
4438 t, build_int_cst (TREE_TYPE (t), 0));
4439 gcond *cond_stmt = gimple_build_cond_empty (t);
4440 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
4443 /* Add the loop cleanup function. */
4444 gsi = gsi_last_nondebug_bb (exit_bb);
4445 if (gimple_omp_return_nowait_p (gsi_stmt (gsi)))
4446 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
4447 else if (gimple_omp_return_lhs (gsi_stmt (gsi)))
4448 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
4449 else
4450 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
4451 gcall *call_stmt = gimple_build_call (t, 0);
4452 if (fd->ordered)
4454 tree arr = counts[fd->ordered];
4455 tree clobber = build_clobber (TREE_TYPE (arr));
4456 gsi_insert_after (&gsi, gimple_build_assign (arr, clobber),
4457 GSI_SAME_STMT);
4459 if (gimple_omp_return_lhs (gsi_stmt (gsi)))
4461 gimple_call_set_lhs (call_stmt, gimple_omp_return_lhs (gsi_stmt (gsi)));
4462 if (fd->have_reductemp)
4464 gimple *g = gimple_build_assign (reductions, NOP_EXPR,
4465 gimple_call_lhs (call_stmt));
4466 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
4469 gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT);
4470 gsi_remove (&gsi, true);
4472 /* Connect the new blocks. */
4473 find_edge (entry_bb, l0_bb)->flags = EDGE_TRUE_VALUE;
4474 find_edge (entry_bb, l3_bb)->flags = EDGE_FALSE_VALUE;
4476 if (!broken_loop)
4478 gimple_seq phis;
4480 e = find_edge (cont_bb, l3_bb);
4481 ne = make_edge (l2_bb, l3_bb, EDGE_FALSE_VALUE);
4483 phis = phi_nodes (l3_bb);
4484 for (gsi = gsi_start (phis); !gsi_end_p (gsi); gsi_next (&gsi))
4486 gimple *phi = gsi_stmt (gsi);
4487 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, ne),
4488 PHI_ARG_DEF_FROM_EDGE (phi, e));
4490 remove_edge (e);
4492 make_edge (cont_bb, l2_bb, EDGE_FALSE_VALUE);
4493 e = find_edge (cont_bb, l1_bb);
4494 if (e == NULL)
4496 e = BRANCH_EDGE (cont_bb);
4497 gcc_assert (single_succ (e->dest) == l1_bb);
4499 if (gimple_omp_for_combined_p (fd->for_stmt))
4501 remove_edge (e);
4502 e = NULL;
4504 else if (fd->collapse > 1)
4506 remove_edge (e);
4507 e = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
4509 else
4510 e->flags = EDGE_TRUE_VALUE;
4511 if (e)
4513 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
4514 find_edge (cont_bb, l2_bb)->probability = e->probability.invert ();
4516 else
4518 e = find_edge (cont_bb, l2_bb);
4519 e->flags = EDGE_FALLTHRU;
4521 make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE);
4523 if (gimple_in_ssa_p (cfun))
4525 /* Add phis to the outer loop that connect to the phis in the inner,
4526 original loop, and move the loop entry value of the inner phi to
4527 the loop entry value of the outer phi. */
4528 gphi_iterator psi;
4529 for (psi = gsi_start_phis (l3_bb); !gsi_end_p (psi); gsi_next (&psi))
4531 location_t locus;
4532 gphi *nphi;
4533 gphi *exit_phi = psi.phi ();
4535 if (virtual_operand_p (gimple_phi_result (exit_phi)))
4536 continue;
4538 edge l2_to_l3 = find_edge (l2_bb, l3_bb);
4539 tree exit_res = PHI_ARG_DEF_FROM_EDGE (exit_phi, l2_to_l3);
4541 basic_block latch = BRANCH_EDGE (cont_bb)->dest;
4542 edge latch_to_l1 = find_edge (latch, l1_bb);
4543 gphi *inner_phi
4544 = find_phi_with_arg_on_edge (exit_res, latch_to_l1);
4546 tree t = gimple_phi_result (exit_phi);
4547 tree new_res = copy_ssa_name (t, NULL);
4548 nphi = create_phi_node (new_res, l0_bb);
4550 edge l0_to_l1 = find_edge (l0_bb, l1_bb);
4551 t = PHI_ARG_DEF_FROM_EDGE (inner_phi, l0_to_l1);
4552 locus = gimple_phi_arg_location_from_edge (inner_phi, l0_to_l1);
4553 edge entry_to_l0 = find_edge (entry_bb, l0_bb);
4554 add_phi_arg (nphi, t, entry_to_l0, locus);
4556 edge l2_to_l0 = find_edge (l2_bb, l0_bb);
4557 add_phi_arg (nphi, exit_res, l2_to_l0, UNKNOWN_LOCATION);
4559 add_phi_arg (inner_phi, new_res, l0_to_l1, UNKNOWN_LOCATION);
4563 set_immediate_dominator (CDI_DOMINATORS, l2_bb,
4564 recompute_dominator (CDI_DOMINATORS, l2_bb));
4565 set_immediate_dominator (CDI_DOMINATORS, l3_bb,
4566 recompute_dominator (CDI_DOMINATORS, l3_bb));
4567 set_immediate_dominator (CDI_DOMINATORS, l0_bb,
4568 recompute_dominator (CDI_DOMINATORS, l0_bb));
4569 set_immediate_dominator (CDI_DOMINATORS, l1_bb,
4570 recompute_dominator (CDI_DOMINATORS, l1_bb));
4572 /* We enter expand_omp_for_generic with a loop. This original loop may
4573 have its own loop struct, or it may be part of an outer loop struct
4574 (which may be the fake loop). */
4575 class loop *outer_loop = entry_bb->loop_father;
4576 bool orig_loop_has_loop_struct = l1_bb->loop_father != outer_loop;
4578 add_bb_to_loop (l2_bb, outer_loop);
4580 /* We've added a new loop around the original loop. Allocate the
4581 corresponding loop struct. */
4582 class loop *new_loop = alloc_loop ();
4583 new_loop->header = l0_bb;
4584 new_loop->latch = l2_bb;
4585 add_loop (new_loop, outer_loop);
4587 /* Allocate a loop structure for the original loop unless we already
4588 had one. */
4589 if (!orig_loop_has_loop_struct
4590 && !gimple_omp_for_combined_p (fd->for_stmt))
4592 class loop *orig_loop = alloc_loop ();
4593 orig_loop->header = l1_bb;
4594 /* The loop may have multiple latches. */
4595 add_loop (orig_loop, new_loop);
4600 /* Helper function for expand_omp_for_static_nochunk. If PTR is NULL,
4601 compute needed allocation size. If !ALLOC of team allocations,
4602 if ALLOC of thread allocation. SZ is the initial needed size for
4603 other purposes, ALLOC_ALIGN guaranteed alignment of allocation in bytes,
4604 CNT number of elements of each array, for !ALLOC this is
4605 omp_get_num_threads (), for ALLOC number of iterations handled by the
4606 current thread. If PTR is non-NULL, it is the start of the allocation
4607 and this routine shall assign to OMP_CLAUSE_DECL (c) of those _scantemp_
4608 clauses pointers to the corresponding arrays. */
4610 static tree
4611 expand_omp_scantemp_alloc (tree clauses, tree ptr, unsigned HOST_WIDE_INT sz,
4612 unsigned HOST_WIDE_INT alloc_align, tree cnt,
4613 gimple_stmt_iterator *gsi, bool alloc)
4615 tree eltsz = NULL_TREE;
4616 unsigned HOST_WIDE_INT preval = 0;
4617 if (ptr && sz)
4618 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr),
4619 ptr, size_int (sz));
4620 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
4621 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
4622 && !OMP_CLAUSE__SCANTEMP__CONTROL (c)
4623 && (!OMP_CLAUSE__SCANTEMP__ALLOC (c)) != alloc)
4625 tree pointee_type = TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c)));
4626 unsigned HOST_WIDE_INT al = TYPE_ALIGN_UNIT (pointee_type);
4627 if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (pointee_type)))
4629 unsigned HOST_WIDE_INT szl
4630 = tree_to_uhwi (TYPE_SIZE_UNIT (pointee_type));
4631 szl = least_bit_hwi (szl);
4632 if (szl)
4633 al = MIN (al, szl);
4635 if (ptr == NULL_TREE)
4637 if (eltsz == NULL_TREE)
4638 eltsz = TYPE_SIZE_UNIT (pointee_type);
4639 else
4640 eltsz = size_binop (PLUS_EXPR, eltsz,
4641 TYPE_SIZE_UNIT (pointee_type));
4643 if (preval == 0 && al <= alloc_align)
4645 unsigned HOST_WIDE_INT diff = ROUND_UP (sz, al) - sz;
4646 sz += diff;
4647 if (diff && ptr)
4648 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr),
4649 ptr, size_int (diff));
4651 else if (al > preval)
4653 if (ptr)
4655 ptr = fold_convert (pointer_sized_int_node, ptr);
4656 ptr = fold_build2 (PLUS_EXPR, pointer_sized_int_node, ptr,
4657 build_int_cst (pointer_sized_int_node,
4658 al - 1));
4659 ptr = fold_build2 (BIT_AND_EXPR, pointer_sized_int_node, ptr,
4660 build_int_cst (pointer_sized_int_node,
4661 -(HOST_WIDE_INT) al));
4662 ptr = fold_convert (ptr_type_node, ptr);
4664 else
4665 sz += al - 1;
4667 if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (pointee_type)))
4668 preval = al;
4669 else
4670 preval = 1;
4671 if (ptr)
4673 expand_omp_build_assign (gsi, OMP_CLAUSE_DECL (c), ptr, false);
4674 ptr = OMP_CLAUSE_DECL (c);
4675 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr), ptr,
4676 size_binop (MULT_EXPR, cnt,
4677 TYPE_SIZE_UNIT (pointee_type)));
4681 if (ptr == NULL_TREE)
4683 eltsz = size_binop (MULT_EXPR, eltsz, cnt);
4684 if (sz)
4685 eltsz = size_binop (PLUS_EXPR, eltsz, size_int (sz));
4686 return eltsz;
4688 else
4689 return ptr;
4692 /* Return the last _looptemp_ clause if one has been created for
4693 lastprivate on distribute parallel for{, simd} or taskloop.
4694 FD is the loop data and INNERC should be the second _looptemp_
4695 clause (the one holding the end of the range).
4696 This is followed by collapse - 1 _looptemp_ clauses for the
4697 counts[1] and up, and for triangular loops followed by 4
4698 further _looptemp_ clauses (one for counts[0], one first_inner_iterations,
4699 one factor and one adjn1). After this there is optionally one
4700 _looptemp_ clause that this function returns. */
4702 static tree
4703 find_lastprivate_looptemp (struct omp_for_data *fd, tree innerc)
4705 gcc_assert (innerc);
4706 int count = fd->collapse - 1;
4707 if (fd->non_rect
4708 && fd->last_nonrect == fd->first_nonrect + 1
4709 && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[fd->last_nonrect].v)))
4710 count += 4;
4711 for (int i = 0; i < count; i++)
4713 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4714 OMP_CLAUSE__LOOPTEMP_);
4715 gcc_assert (innerc);
4717 return omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4718 OMP_CLAUSE__LOOPTEMP_);
4721 /* A subroutine of expand_omp_for. Generate code for a parallel
4722 loop with static schedule and no specified chunk size. Given
4723 parameters:
4725 for (V = N1; V cond N2; V += STEP) BODY;
4727 where COND is "<" or ">", we generate pseudocode
4729 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
4730 if (cond is <)
4731 adj = STEP - 1;
4732 else
4733 adj = STEP + 1;
4734 if ((__typeof (V)) -1 > 0 && cond is >)
4735 n = -(adj + N2 - N1) / -STEP;
4736 else
4737 n = (adj + N2 - N1) / STEP;
4738 q = n / nthreads;
4739 tt = n % nthreads;
4740 if (threadid < tt) goto L3; else goto L4;
4742 tt = 0;
4743 q = q + 1;
4745 s0 = q * threadid + tt;
4746 e0 = s0 + q;
4747 V = s0 * STEP + N1;
4748 if (s0 >= e0) goto L2; else goto L0;
4750 e = e0 * STEP + N1;
4752 BODY;
4753 V += STEP;
4754 if (V cond e) goto L1;
4758 static void
4759 expand_omp_for_static_nochunk (struct omp_region *region,
4760 struct omp_for_data *fd,
4761 gimple *inner_stmt)
4763 tree n, q, s0, e0, e, t, tt, nthreads = NULL_TREE, threadid;
4764 tree type, itype, vmain, vback;
4765 basic_block entry_bb, second_bb, third_bb, exit_bb, seq_start_bb;
4766 basic_block body_bb, cont_bb, collapse_bb = NULL;
4767 basic_block fin_bb, fourth_bb = NULL, fifth_bb = NULL, sixth_bb = NULL;
4768 basic_block exit1_bb = NULL, exit2_bb = NULL, exit3_bb = NULL;
4769 gimple_stmt_iterator gsi, gsip;
4770 edge ep;
4771 bool broken_loop = region->cont == NULL;
4772 tree *counts = NULL;
4773 tree n1, n2, step;
4774 tree reductions = NULL_TREE;
4775 tree cond_var = NULL_TREE, condtemp = NULL_TREE;
4777 itype = type = TREE_TYPE (fd->loop.v);
4778 if (POINTER_TYPE_P (type))
4779 itype = signed_type_for (type);
4781 entry_bb = region->entry;
4782 cont_bb = region->cont;
4783 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4784 fin_bb = BRANCH_EDGE (entry_bb)->dest;
4785 gcc_assert (broken_loop
4786 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
4787 seq_start_bb = split_edge (FALLTHRU_EDGE (entry_bb));
4788 body_bb = single_succ (seq_start_bb);
4789 if (!broken_loop)
4791 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
4792 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
4793 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4795 exit_bb = region->exit;
4797 /* Iteration space partitioning goes in ENTRY_BB. */
4798 gsi = gsi_last_nondebug_bb (entry_bb);
4799 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4800 gsip = gsi;
4801 gsi_prev (&gsip);
4803 if (fd->collapse > 1)
4805 int first_zero_iter = -1, dummy = -1;
4806 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
4808 counts = XALLOCAVEC (tree, fd->collapse);
4809 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4810 fin_bb, first_zero_iter,
4811 dummy_bb, dummy, l2_dom_bb);
4812 t = NULL_TREE;
4814 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
4815 t = integer_one_node;
4816 else
4817 t = fold_binary (fd->loop.cond_code, boolean_type_node,
4818 fold_convert (type, fd->loop.n1),
4819 fold_convert (type, fd->loop.n2));
4820 if (fd->collapse == 1
4821 && TYPE_UNSIGNED (type)
4822 && (t == NULL_TREE || !integer_onep (t)))
4824 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
4825 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
4826 true, GSI_SAME_STMT);
4827 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
4828 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
4829 true, GSI_SAME_STMT);
4830 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
4831 NULL_TREE, NULL_TREE);
4832 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
4833 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
4834 expand_omp_regimplify_p, NULL, NULL)
4835 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
4836 expand_omp_regimplify_p, NULL, NULL))
4838 gsi = gsi_for_stmt (cond_stmt);
4839 gimple_regimplify_operands (cond_stmt, &gsi);
4841 ep = split_block (entry_bb, cond_stmt);
4842 ep->flags = EDGE_TRUE_VALUE;
4843 entry_bb = ep->dest;
4844 ep->probability = profile_probability::very_likely ();
4845 ep = make_edge (ep->src, fin_bb, EDGE_FALSE_VALUE);
4846 ep->probability = profile_probability::very_unlikely ();
4847 if (gimple_in_ssa_p (cfun))
4849 int dest_idx = find_edge (entry_bb, fin_bb)->dest_idx;
4850 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
4851 !gsi_end_p (gpi); gsi_next (&gpi))
4853 gphi *phi = gpi.phi ();
4854 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
4855 ep, UNKNOWN_LOCATION);
4858 gsi = gsi_last_bb (entry_bb);
4861 if (fd->lastprivate_conditional)
4863 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
4864 tree c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
4865 if (fd->have_pointer_condtemp)
4866 condtemp = OMP_CLAUSE_DECL (c);
4867 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
4868 cond_var = OMP_CLAUSE_DECL (c);
4870 if (fd->have_reductemp
4871 /* For scan, we don't want to reinitialize condtemp before the
4872 second loop. */
4873 || (fd->have_pointer_condtemp && !fd->have_scantemp)
4874 || fd->have_nonctrl_scantemp)
4876 tree t1 = build_int_cst (long_integer_type_node, 0);
4877 tree t2 = build_int_cst (long_integer_type_node, 1);
4878 tree t3 = build_int_cstu (long_integer_type_node,
4879 (HOST_WIDE_INT_1U << 31) + 1);
4880 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
4881 gimple_stmt_iterator gsi2 = gsi_none ();
4882 gimple *g = NULL;
4883 tree mem = null_pointer_node, memv = NULL_TREE;
4884 unsigned HOST_WIDE_INT condtemp_sz = 0;
4885 unsigned HOST_WIDE_INT alloc_align = 0;
4886 if (fd->have_reductemp)
4888 gcc_assert (!fd->have_nonctrl_scantemp);
4889 tree c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
4890 reductions = OMP_CLAUSE_DECL (c);
4891 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
4892 g = SSA_NAME_DEF_STMT (reductions);
4893 reductions = gimple_assign_rhs1 (g);
4894 OMP_CLAUSE_DECL (c) = reductions;
4895 gsi2 = gsi_for_stmt (g);
4897 else
4899 if (gsi_end_p (gsip))
4900 gsi2 = gsi_after_labels (region->entry);
4901 else
4902 gsi2 = gsip;
4903 reductions = null_pointer_node;
4905 if (fd->have_pointer_condtemp || fd->have_nonctrl_scantemp)
4907 tree type;
4908 if (fd->have_pointer_condtemp)
4909 type = TREE_TYPE (condtemp);
4910 else
4911 type = ptr_type_node;
4912 memv = create_tmp_var (type);
4913 TREE_ADDRESSABLE (memv) = 1;
4914 unsigned HOST_WIDE_INT sz = 0;
4915 tree size = NULL_TREE;
4916 if (fd->have_pointer_condtemp)
4918 sz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
4919 sz *= fd->lastprivate_conditional;
4920 condtemp_sz = sz;
4922 if (fd->have_nonctrl_scantemp)
4924 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
4925 gimple *g = gimple_build_call (nthreads, 0);
4926 nthreads = create_tmp_var (integer_type_node);
4927 gimple_call_set_lhs (g, nthreads);
4928 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
4929 nthreads = fold_convert (sizetype, nthreads);
4930 alloc_align = TYPE_ALIGN_UNIT (long_long_integer_type_node);
4931 size = expand_omp_scantemp_alloc (clauses, NULL_TREE, sz,
4932 alloc_align, nthreads, NULL,
4933 false);
4934 size = fold_convert (type, size);
4936 else
4937 size = build_int_cst (type, sz);
4938 expand_omp_build_assign (&gsi2, memv, size, false);
4939 mem = build_fold_addr_expr (memv);
4941 tree t
4942 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START),
4943 9, t1, t2, t2, t3, t1, null_pointer_node,
4944 null_pointer_node, reductions, mem);
4945 force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
4946 true, GSI_SAME_STMT);
4947 if (fd->have_pointer_condtemp)
4948 expand_omp_build_assign (&gsi2, condtemp, memv, false);
4949 if (fd->have_nonctrl_scantemp)
4951 tree ptr = fd->have_pointer_condtemp ? condtemp : memv;
4952 expand_omp_scantemp_alloc (clauses, ptr, condtemp_sz,
4953 alloc_align, nthreads, &gsi2, false);
4955 if (fd->have_reductemp)
4957 gsi_remove (&gsi2, true);
4958 release_ssa_name (gimple_assign_lhs (g));
4961 switch (gimple_omp_for_kind (fd->for_stmt))
4963 case GF_OMP_FOR_KIND_FOR:
4964 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
4965 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
4966 break;
4967 case GF_OMP_FOR_KIND_DISTRIBUTE:
4968 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
4969 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
4970 break;
4971 default:
4972 gcc_unreachable ();
4974 nthreads = build_call_expr (nthreads, 0);
4975 nthreads = fold_convert (itype, nthreads);
4976 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
4977 true, GSI_SAME_STMT);
4978 threadid = build_call_expr (threadid, 0);
4979 threadid = fold_convert (itype, threadid);
4980 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
4981 true, GSI_SAME_STMT);
4983 n1 = fd->loop.n1;
4984 n2 = fd->loop.n2;
4985 step = fd->loop.step;
4986 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4988 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4989 OMP_CLAUSE__LOOPTEMP_);
4990 gcc_assert (innerc);
4991 n1 = OMP_CLAUSE_DECL (innerc);
4992 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4993 OMP_CLAUSE__LOOPTEMP_);
4994 gcc_assert (innerc);
4995 n2 = OMP_CLAUSE_DECL (innerc);
4997 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
4998 true, NULL_TREE, true, GSI_SAME_STMT);
4999 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
5000 true, NULL_TREE, true, GSI_SAME_STMT);
5001 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
5002 true, NULL_TREE, true, GSI_SAME_STMT);
5004 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
5005 t = fold_build2 (PLUS_EXPR, itype, step, t);
5006 t = fold_build2 (PLUS_EXPR, itype, t, n2);
5007 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
5008 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
5009 t = fold_build2 (TRUNC_DIV_EXPR, itype,
5010 fold_build1 (NEGATE_EXPR, itype, t),
5011 fold_build1 (NEGATE_EXPR, itype, step));
5012 else
5013 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
5014 t = fold_convert (itype, t);
5015 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
5017 q = create_tmp_reg (itype, "q");
5018 t = fold_build2 (TRUNC_DIV_EXPR, itype, n, nthreads);
5019 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
5020 gsi_insert_before (&gsi, gimple_build_assign (q, t), GSI_SAME_STMT);
5022 tt = create_tmp_reg (itype, "tt");
5023 t = fold_build2 (TRUNC_MOD_EXPR, itype, n, nthreads);
5024 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
5025 gsi_insert_before (&gsi, gimple_build_assign (tt, t), GSI_SAME_STMT);
5027 t = build2 (LT_EXPR, boolean_type_node, threadid, tt);
5028 gcond *cond_stmt = gimple_build_cond_empty (t);
5029 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
5031 second_bb = split_block (entry_bb, cond_stmt)->dest;
5032 gsi = gsi_last_nondebug_bb (second_bb);
5033 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5035 gsi_insert_before (&gsi, gimple_build_assign (tt, build_int_cst (itype, 0)),
5036 GSI_SAME_STMT);
5037 gassign *assign_stmt
5038 = gimple_build_assign (q, PLUS_EXPR, q, build_int_cst (itype, 1));
5039 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5041 third_bb = split_block (second_bb, assign_stmt)->dest;
5042 gsi = gsi_last_nondebug_bb (third_bb);
5043 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5045 if (fd->have_nonctrl_scantemp)
5047 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5048 tree controlp = NULL_TREE, controlb = NULL_TREE;
5049 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
5050 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
5051 && OMP_CLAUSE__SCANTEMP__CONTROL (c))
5053 if (TREE_TYPE (OMP_CLAUSE_DECL (c)) == boolean_type_node)
5054 controlb = OMP_CLAUSE_DECL (c);
5055 else
5056 controlp = OMP_CLAUSE_DECL (c);
5057 if (controlb && controlp)
5058 break;
5060 gcc_assert (controlp && controlb);
5061 tree cnt = create_tmp_var (sizetype);
5062 gimple *g = gimple_build_assign (cnt, NOP_EXPR, q);
5063 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5064 unsigned HOST_WIDE_INT alloc_align = TYPE_ALIGN_UNIT (ptr_type_node);
5065 tree sz = expand_omp_scantemp_alloc (clauses, NULL_TREE, 0,
5066 alloc_align, cnt, NULL, true);
5067 tree size = create_tmp_var (sizetype);
5068 expand_omp_build_assign (&gsi, size, sz, false);
5069 tree cmp = fold_build2 (GT_EXPR, boolean_type_node,
5070 size, size_int (16384));
5071 expand_omp_build_assign (&gsi, controlb, cmp);
5072 g = gimple_build_cond (NE_EXPR, controlb, boolean_false_node,
5073 NULL_TREE, NULL_TREE);
5074 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5075 fourth_bb = split_block (third_bb, g)->dest;
5076 gsi = gsi_last_nondebug_bb (fourth_bb);
5077 /* FIXME: Once we have allocators, this should use allocator. */
5078 g = gimple_build_call (builtin_decl_explicit (BUILT_IN_MALLOC), 1, size);
5079 gimple_call_set_lhs (g, controlp);
5080 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5081 expand_omp_scantemp_alloc (clauses, controlp, 0, alloc_align, cnt,
5082 &gsi, true);
5083 gsi_prev (&gsi);
5084 g = gsi_stmt (gsi);
5085 fifth_bb = split_block (fourth_bb, g)->dest;
5086 gsi = gsi_last_nondebug_bb (fifth_bb);
5088 g = gimple_build_call (builtin_decl_implicit (BUILT_IN_STACK_SAVE), 0);
5089 gimple_call_set_lhs (g, controlp);
5090 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5091 tree alloca_decl = builtin_decl_explicit (BUILT_IN_ALLOCA_WITH_ALIGN);
5092 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
5093 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
5094 && OMP_CLAUSE__SCANTEMP__ALLOC (c))
5096 tree tmp = create_tmp_var (sizetype);
5097 tree pointee_type = TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c)));
5098 g = gimple_build_assign (tmp, MULT_EXPR, cnt,
5099 TYPE_SIZE_UNIT (pointee_type));
5100 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5101 g = gimple_build_call (alloca_decl, 2, tmp,
5102 size_int (TYPE_ALIGN (pointee_type)));
5103 gimple_call_set_lhs (g, OMP_CLAUSE_DECL (c));
5104 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5107 sixth_bb = split_block (fifth_bb, g)->dest;
5108 gsi = gsi_last_nondebug_bb (sixth_bb);
5111 t = build2 (MULT_EXPR, itype, q, threadid);
5112 t = build2 (PLUS_EXPR, itype, t, tt);
5113 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
5115 t = fold_build2 (PLUS_EXPR, itype, s0, q);
5116 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
5118 t = build2 (GE_EXPR, boolean_type_node, s0, e0);
5119 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
5121 /* Remove the GIMPLE_OMP_FOR statement. */
5122 gsi_remove (&gsi, true);
5124 /* Setup code for sequential iteration goes in SEQ_START_BB. */
5125 gsi = gsi_start_bb (seq_start_bb);
5127 tree startvar = fd->loop.v;
5128 tree endvar = NULL_TREE;
5130 if (gimple_omp_for_combined_p (fd->for_stmt))
5132 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
5133 ? gimple_omp_parallel_clauses (inner_stmt)
5134 : gimple_omp_for_clauses (inner_stmt);
5135 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
5136 gcc_assert (innerc);
5137 startvar = OMP_CLAUSE_DECL (innerc);
5138 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5139 OMP_CLAUSE__LOOPTEMP_);
5140 gcc_assert (innerc);
5141 endvar = OMP_CLAUSE_DECL (innerc);
5142 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
5143 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
5145 innerc = find_lastprivate_looptemp (fd, innerc);
5146 if (innerc)
5148 /* If needed (distribute parallel for with lastprivate),
5149 propagate down the total number of iterations. */
5150 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
5151 fd->loop.n2);
5152 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
5153 GSI_CONTINUE_LINKING);
5154 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
5155 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5159 t = fold_convert (itype, s0);
5160 t = fold_build2 (MULT_EXPR, itype, t, step);
5161 if (POINTER_TYPE_P (type))
5163 t = fold_build_pointer_plus (n1, t);
5164 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
5165 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
5166 t = fold_convert (signed_type_for (type), t);
5168 else
5169 t = fold_build2 (PLUS_EXPR, type, t, n1);
5170 t = fold_convert (TREE_TYPE (startvar), t);
5171 t = force_gimple_operand_gsi (&gsi, t,
5172 DECL_P (startvar)
5173 && TREE_ADDRESSABLE (startvar),
5174 NULL_TREE, false, GSI_CONTINUE_LINKING);
5175 assign_stmt = gimple_build_assign (startvar, t);
5176 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5177 if (cond_var)
5179 tree itype = TREE_TYPE (cond_var);
5180 /* For lastprivate(conditional:) itervar, we need some iteration
5181 counter that starts at unsigned non-zero and increases.
5182 Prefer as few IVs as possible, so if we can use startvar
5183 itself, use that, or startvar + constant (those would be
5184 incremented with step), and as last resort use the s0 + 1
5185 incremented by 1. */
5186 if (POINTER_TYPE_P (type)
5187 || TREE_CODE (n1) != INTEGER_CST
5188 || fd->loop.cond_code != LT_EXPR)
5189 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, s0),
5190 build_int_cst (itype, 1));
5191 else if (tree_int_cst_sgn (n1) == 1)
5192 t = fold_convert (itype, t);
5193 else
5195 tree c = fold_convert (itype, n1);
5196 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
5197 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
5199 t = force_gimple_operand_gsi (&gsi, t, false,
5200 NULL_TREE, false, GSI_CONTINUE_LINKING);
5201 assign_stmt = gimple_build_assign (cond_var, t);
5202 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5205 t = fold_convert (itype, e0);
5206 t = fold_build2 (MULT_EXPR, itype, t, step);
5207 if (POINTER_TYPE_P (type))
5209 t = fold_build_pointer_plus (n1, t);
5210 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
5211 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
5212 t = fold_convert (signed_type_for (type), t);
5214 else
5215 t = fold_build2 (PLUS_EXPR, type, t, n1);
5216 t = fold_convert (TREE_TYPE (startvar), t);
5217 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5218 false, GSI_CONTINUE_LINKING);
5219 if (endvar)
5221 assign_stmt = gimple_build_assign (endvar, e);
5222 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5223 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
5224 assign_stmt = gimple_build_assign (fd->loop.v, e);
5225 else
5226 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
5227 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5229 /* Handle linear clause adjustments. */
5230 tree itercnt = NULL_TREE;
5231 tree *nonrect_bounds = NULL;
5232 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
5233 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
5234 c; c = OMP_CLAUSE_CHAIN (c))
5235 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
5236 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
5238 tree d = OMP_CLAUSE_DECL (c);
5239 tree t = d, a, dest;
5240 if (omp_privatize_by_reference (t))
5241 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
5242 if (itercnt == NULL_TREE)
5244 if (gimple_omp_for_combined_into_p (fd->for_stmt))
5246 itercnt = fold_build2 (MINUS_EXPR, itype,
5247 fold_convert (itype, n1),
5248 fold_convert (itype, fd->loop.n1));
5249 itercnt = fold_build2 (EXACT_DIV_EXPR, itype, itercnt, step);
5250 itercnt = fold_build2 (PLUS_EXPR, itype, itercnt, s0);
5251 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
5252 NULL_TREE, false,
5253 GSI_CONTINUE_LINKING);
5255 else
5256 itercnt = s0;
5258 tree type = TREE_TYPE (t);
5259 if (POINTER_TYPE_P (type))
5260 type = sizetype;
5261 a = fold_build2 (MULT_EXPR, type,
5262 fold_convert (type, itercnt),
5263 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
5264 dest = unshare_expr (t);
5265 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
5266 : POINTER_PLUS_EXPR, TREE_TYPE (t), t, a);
5267 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5268 false, GSI_CONTINUE_LINKING);
5269 expand_omp_build_assign (&gsi, dest, t, true);
5271 if (fd->collapse > 1)
5273 if (fd->non_rect)
5275 nonrect_bounds = XALLOCAVEC (tree, fd->last_nonrect + 1);
5276 memset (nonrect_bounds, 0, sizeof (tree) * (fd->last_nonrect + 1));
5278 expand_omp_for_init_vars (fd, &gsi, counts, nonrect_bounds, inner_stmt,
5279 startvar);
5282 if (!broken_loop)
5284 /* The code controlling the sequential loop replaces the
5285 GIMPLE_OMP_CONTINUE. */
5286 gsi = gsi_last_nondebug_bb (cont_bb);
5287 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5288 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
5289 vmain = gimple_omp_continue_control_use (cont_stmt);
5290 vback = gimple_omp_continue_control_def (cont_stmt);
5292 if (cond_var)
5294 tree itype = TREE_TYPE (cond_var);
5295 tree t2;
5296 if (POINTER_TYPE_P (type)
5297 || TREE_CODE (n1) != INTEGER_CST
5298 || fd->loop.cond_code != LT_EXPR)
5299 t2 = build_int_cst (itype, 1);
5300 else
5301 t2 = fold_convert (itype, step);
5302 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
5303 t2 = force_gimple_operand_gsi (&gsi, t2, false,
5304 NULL_TREE, true, GSI_SAME_STMT);
5305 assign_stmt = gimple_build_assign (cond_var, t2);
5306 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5309 if (!gimple_omp_for_combined_p (fd->for_stmt))
5311 if (POINTER_TYPE_P (type))
5312 t = fold_build_pointer_plus (vmain, step);
5313 else
5314 t = fold_build2 (PLUS_EXPR, type, vmain, step);
5315 t = force_gimple_operand_gsi (&gsi, t,
5316 DECL_P (vback)
5317 && TREE_ADDRESSABLE (vback),
5318 NULL_TREE, true, GSI_SAME_STMT);
5319 assign_stmt = gimple_build_assign (vback, t);
5320 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5322 t = build2 (fd->loop.cond_code, boolean_type_node,
5323 DECL_P (vback) && TREE_ADDRESSABLE (vback)
5324 ? t : vback, e);
5325 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
5328 /* Remove the GIMPLE_OMP_CONTINUE statement. */
5329 gsi_remove (&gsi, true);
5331 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
5332 collapse_bb = extract_omp_for_update_vars (fd, nonrect_bounds,
5333 cont_bb, body_bb);
5336 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
5337 gsi = gsi_last_nondebug_bb (exit_bb);
5338 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
5340 t = gimple_omp_return_lhs (gsi_stmt (gsi));
5341 if (fd->have_reductemp
5342 || ((fd->have_pointer_condtemp || fd->have_scantemp)
5343 && !fd->have_nonctrl_scantemp))
5345 tree fn;
5346 if (t)
5347 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
5348 else
5349 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
5350 gcall *g = gimple_build_call (fn, 0);
5351 if (t)
5353 gimple_call_set_lhs (g, t);
5354 if (fd->have_reductemp)
5355 gsi_insert_after (&gsi, gimple_build_assign (reductions,
5356 NOP_EXPR, t),
5357 GSI_SAME_STMT);
5359 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
5361 else
5362 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
5364 else if ((fd->have_pointer_condtemp || fd->have_scantemp)
5365 && !fd->have_nonctrl_scantemp)
5367 tree fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
5368 gcall *g = gimple_build_call (fn, 0);
5369 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
5371 if (fd->have_scantemp && !fd->have_nonctrl_scantemp)
5373 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5374 tree controlp = NULL_TREE, controlb = NULL_TREE;
5375 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
5376 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
5377 && OMP_CLAUSE__SCANTEMP__CONTROL (c))
5379 if (TREE_TYPE (OMP_CLAUSE_DECL (c)) == boolean_type_node)
5380 controlb = OMP_CLAUSE_DECL (c);
5381 else
5382 controlp = OMP_CLAUSE_DECL (c);
5383 if (controlb && controlp)
5384 break;
5386 gcc_assert (controlp && controlb);
5387 gimple *g = gimple_build_cond (NE_EXPR, controlb, boolean_false_node,
5388 NULL_TREE, NULL_TREE);
5389 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5390 exit1_bb = split_block (exit_bb, g)->dest;
5391 gsi = gsi_after_labels (exit1_bb);
5392 g = gimple_build_call (builtin_decl_explicit (BUILT_IN_FREE), 1,
5393 controlp);
5394 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5395 exit2_bb = split_block (exit1_bb, g)->dest;
5396 gsi = gsi_after_labels (exit2_bb);
5397 g = gimple_build_call (builtin_decl_implicit (BUILT_IN_STACK_RESTORE), 1,
5398 controlp);
5399 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5400 exit3_bb = split_block (exit2_bb, g)->dest;
5401 gsi = gsi_after_labels (exit3_bb);
5403 gsi_remove (&gsi, true);
5405 /* Connect all the blocks. */
5406 ep = make_edge (entry_bb, third_bb, EDGE_FALSE_VALUE);
5407 ep->probability = profile_probability::guessed_always ().apply_scale (3, 4);
5408 ep = find_edge (entry_bb, second_bb);
5409 ep->flags = EDGE_TRUE_VALUE;
5410 ep->probability = profile_probability::guessed_always ().apply_scale (1, 4);
5411 if (fourth_bb)
5413 ep = make_edge (third_bb, fifth_bb, EDGE_FALSE_VALUE);
5414 ep->probability
5415 = profile_probability::guessed_always ().apply_scale (1, 2);
5416 ep = find_edge (third_bb, fourth_bb);
5417 ep->flags = EDGE_TRUE_VALUE;
5418 ep->probability
5419 = profile_probability::guessed_always ().apply_scale (1, 2);
5420 ep = find_edge (fourth_bb, fifth_bb);
5421 redirect_edge_and_branch (ep, sixth_bb);
5423 else
5424 sixth_bb = third_bb;
5425 find_edge (sixth_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE;
5426 find_edge (sixth_bb, fin_bb)->flags = EDGE_TRUE_VALUE;
5427 if (exit1_bb)
5429 ep = make_edge (exit_bb, exit2_bb, EDGE_FALSE_VALUE);
5430 ep->probability
5431 = profile_probability::guessed_always ().apply_scale (1, 2);
5432 ep = find_edge (exit_bb, exit1_bb);
5433 ep->flags = EDGE_TRUE_VALUE;
5434 ep->probability
5435 = profile_probability::guessed_always ().apply_scale (1, 2);
5436 ep = find_edge (exit1_bb, exit2_bb);
5437 redirect_edge_and_branch (ep, exit3_bb);
5440 if (!broken_loop)
5442 ep = find_edge (cont_bb, body_bb);
5443 if (ep == NULL)
5445 ep = BRANCH_EDGE (cont_bb);
5446 gcc_assert (single_succ (ep->dest) == body_bb);
5448 if (gimple_omp_for_combined_p (fd->for_stmt))
5450 remove_edge (ep);
5451 ep = NULL;
5453 else if (fd->collapse > 1)
5455 remove_edge (ep);
5456 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
5458 else
5459 ep->flags = EDGE_TRUE_VALUE;
5460 find_edge (cont_bb, fin_bb)->flags
5461 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
5464 set_immediate_dominator (CDI_DOMINATORS, second_bb, entry_bb);
5465 set_immediate_dominator (CDI_DOMINATORS, third_bb, entry_bb);
5466 if (fourth_bb)
5468 set_immediate_dominator (CDI_DOMINATORS, fifth_bb, third_bb);
5469 set_immediate_dominator (CDI_DOMINATORS, sixth_bb, third_bb);
5471 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, sixth_bb);
5473 set_immediate_dominator (CDI_DOMINATORS, body_bb,
5474 recompute_dominator (CDI_DOMINATORS, body_bb));
5475 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
5476 recompute_dominator (CDI_DOMINATORS, fin_bb));
5477 if (exit1_bb)
5479 set_immediate_dominator (CDI_DOMINATORS, exit2_bb, exit_bb);
5480 set_immediate_dominator (CDI_DOMINATORS, exit3_bb, exit_bb);
5483 class loop *loop = body_bb->loop_father;
5484 if (loop != entry_bb->loop_father)
5486 gcc_assert (broken_loop || loop->header == body_bb);
5487 gcc_assert (broken_loop
5488 || loop->latch == region->cont
5489 || single_pred (loop->latch) == region->cont);
5490 return;
5493 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
5495 loop = alloc_loop ();
5496 loop->header = body_bb;
5497 if (collapse_bb == NULL)
5498 loop->latch = cont_bb;
5499 add_loop (loop, body_bb->loop_father);
5503 /* Return phi in E->DEST with ARG on edge E. */
5505 static gphi *
5506 find_phi_with_arg_on_edge (tree arg, edge e)
5508 basic_block bb = e->dest;
5510 for (gphi_iterator gpi = gsi_start_phis (bb);
5511 !gsi_end_p (gpi);
5512 gsi_next (&gpi))
5514 gphi *phi = gpi.phi ();
5515 if (PHI_ARG_DEF_FROM_EDGE (phi, e) == arg)
5516 return phi;
5519 return NULL;
5522 /* A subroutine of expand_omp_for. Generate code for a parallel
5523 loop with static schedule and a specified chunk size. Given
5524 parameters:
5526 for (V = N1; V cond N2; V += STEP) BODY;
5528 where COND is "<" or ">", we generate pseudocode
5530 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
5531 if (cond is <)
5532 adj = STEP - 1;
5533 else
5534 adj = STEP + 1;
5535 if ((__typeof (V)) -1 > 0 && cond is >)
5536 n = -(adj + N2 - N1) / -STEP;
5537 else
5538 n = (adj + N2 - N1) / STEP;
5539 trip = 0;
5540 V = threadid * CHUNK * STEP + N1; -- this extra definition of V is
5541 here so that V is defined
5542 if the loop is not entered
5544 s0 = (trip * nthreads + threadid) * CHUNK;
5545 e0 = min (s0 + CHUNK, n);
5546 if (s0 < n) goto L1; else goto L4;
5548 V = s0 * STEP + N1;
5549 e = e0 * STEP + N1;
5551 BODY;
5552 V += STEP;
5553 if (V cond e) goto L2; else goto L3;
5555 trip += 1;
5556 goto L0;
5560 static void
5561 expand_omp_for_static_chunk (struct omp_region *region,
5562 struct omp_for_data *fd, gimple *inner_stmt)
5564 tree n, s0, e0, e, t;
5565 tree trip_var, trip_init, trip_main, trip_back, nthreads, threadid;
5566 tree type, itype, vmain, vback, vextra;
5567 basic_block entry_bb, exit_bb, body_bb, seq_start_bb, iter_part_bb;
5568 basic_block trip_update_bb = NULL, cont_bb, collapse_bb = NULL, fin_bb;
5569 gimple_stmt_iterator gsi, gsip;
5570 edge se;
5571 bool broken_loop = region->cont == NULL;
5572 tree *counts = NULL;
5573 tree n1, n2, step;
5574 tree reductions = NULL_TREE;
5575 tree cond_var = NULL_TREE, condtemp = NULL_TREE;
5577 itype = type = TREE_TYPE (fd->loop.v);
5578 if (POINTER_TYPE_P (type))
5579 itype = signed_type_for (type);
5581 entry_bb = region->entry;
5582 se = split_block (entry_bb, last_stmt (entry_bb));
5583 entry_bb = se->src;
5584 iter_part_bb = se->dest;
5585 cont_bb = region->cont;
5586 gcc_assert (EDGE_COUNT (iter_part_bb->succs) == 2);
5587 fin_bb = BRANCH_EDGE (iter_part_bb)->dest;
5588 gcc_assert (broken_loop
5589 || fin_bb == FALLTHRU_EDGE (cont_bb)->dest);
5590 seq_start_bb = split_edge (FALLTHRU_EDGE (iter_part_bb));
5591 body_bb = single_succ (seq_start_bb);
5592 if (!broken_loop)
5594 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
5595 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
5596 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
5597 trip_update_bb = split_edge (FALLTHRU_EDGE (cont_bb));
5599 exit_bb = region->exit;
5601 /* Trip and adjustment setup goes in ENTRY_BB. */
5602 gsi = gsi_last_nondebug_bb (entry_bb);
5603 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5604 gsip = gsi;
5605 gsi_prev (&gsip);
5607 if (fd->collapse > 1)
5609 int first_zero_iter = -1, dummy = -1;
5610 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
5612 counts = XALLOCAVEC (tree, fd->collapse);
5613 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
5614 fin_bb, first_zero_iter,
5615 dummy_bb, dummy, l2_dom_bb);
5616 t = NULL_TREE;
5618 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
5619 t = integer_one_node;
5620 else
5621 t = fold_binary (fd->loop.cond_code, boolean_type_node,
5622 fold_convert (type, fd->loop.n1),
5623 fold_convert (type, fd->loop.n2));
5624 if (fd->collapse == 1
5625 && TYPE_UNSIGNED (type)
5626 && (t == NULL_TREE || !integer_onep (t)))
5628 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
5629 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
5630 true, GSI_SAME_STMT);
5631 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
5632 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
5633 true, GSI_SAME_STMT);
5634 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
5635 NULL_TREE, NULL_TREE);
5636 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
5637 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
5638 expand_omp_regimplify_p, NULL, NULL)
5639 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
5640 expand_omp_regimplify_p, NULL, NULL))
5642 gsi = gsi_for_stmt (cond_stmt);
5643 gimple_regimplify_operands (cond_stmt, &gsi);
5645 se = split_block (entry_bb, cond_stmt);
5646 se->flags = EDGE_TRUE_VALUE;
5647 entry_bb = se->dest;
5648 se->probability = profile_probability::very_likely ();
5649 se = make_edge (se->src, fin_bb, EDGE_FALSE_VALUE);
5650 se->probability = profile_probability::very_unlikely ();
5651 if (gimple_in_ssa_p (cfun))
5653 int dest_idx = find_edge (iter_part_bb, fin_bb)->dest_idx;
5654 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
5655 !gsi_end_p (gpi); gsi_next (&gpi))
5657 gphi *phi = gpi.phi ();
5658 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
5659 se, UNKNOWN_LOCATION);
5662 gsi = gsi_last_bb (entry_bb);
5665 if (fd->lastprivate_conditional)
5667 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5668 tree c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
5669 if (fd->have_pointer_condtemp)
5670 condtemp = OMP_CLAUSE_DECL (c);
5671 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
5672 cond_var = OMP_CLAUSE_DECL (c);
5674 if (fd->have_reductemp || fd->have_pointer_condtemp)
5676 tree t1 = build_int_cst (long_integer_type_node, 0);
5677 tree t2 = build_int_cst (long_integer_type_node, 1);
5678 tree t3 = build_int_cstu (long_integer_type_node,
5679 (HOST_WIDE_INT_1U << 31) + 1);
5680 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5681 gimple_stmt_iterator gsi2 = gsi_none ();
5682 gimple *g = NULL;
5683 tree mem = null_pointer_node, memv = NULL_TREE;
5684 if (fd->have_reductemp)
5686 tree c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
5687 reductions = OMP_CLAUSE_DECL (c);
5688 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
5689 g = SSA_NAME_DEF_STMT (reductions);
5690 reductions = gimple_assign_rhs1 (g);
5691 OMP_CLAUSE_DECL (c) = reductions;
5692 gsi2 = gsi_for_stmt (g);
5694 else
5696 if (gsi_end_p (gsip))
5697 gsi2 = gsi_after_labels (region->entry);
5698 else
5699 gsi2 = gsip;
5700 reductions = null_pointer_node;
5702 if (fd->have_pointer_condtemp)
5704 tree type = TREE_TYPE (condtemp);
5705 memv = create_tmp_var (type);
5706 TREE_ADDRESSABLE (memv) = 1;
5707 unsigned HOST_WIDE_INT sz
5708 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
5709 sz *= fd->lastprivate_conditional;
5710 expand_omp_build_assign (&gsi2, memv, build_int_cst (type, sz),
5711 false);
5712 mem = build_fold_addr_expr (memv);
5714 tree t
5715 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START),
5716 9, t1, t2, t2, t3, t1, null_pointer_node,
5717 null_pointer_node, reductions, mem);
5718 force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
5719 true, GSI_SAME_STMT);
5720 if (fd->have_pointer_condtemp)
5721 expand_omp_build_assign (&gsi2, condtemp, memv, false);
5722 if (fd->have_reductemp)
5724 gsi_remove (&gsi2, true);
5725 release_ssa_name (gimple_assign_lhs (g));
5728 switch (gimple_omp_for_kind (fd->for_stmt))
5730 case GF_OMP_FOR_KIND_FOR:
5731 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
5732 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
5733 break;
5734 case GF_OMP_FOR_KIND_DISTRIBUTE:
5735 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
5736 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
5737 break;
5738 default:
5739 gcc_unreachable ();
5741 nthreads = build_call_expr (nthreads, 0);
5742 nthreads = fold_convert (itype, nthreads);
5743 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
5744 true, GSI_SAME_STMT);
5745 threadid = build_call_expr (threadid, 0);
5746 threadid = fold_convert (itype, threadid);
5747 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
5748 true, GSI_SAME_STMT);
5750 n1 = fd->loop.n1;
5751 n2 = fd->loop.n2;
5752 step = fd->loop.step;
5753 if (gimple_omp_for_combined_into_p (fd->for_stmt))
5755 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5756 OMP_CLAUSE__LOOPTEMP_);
5757 gcc_assert (innerc);
5758 n1 = OMP_CLAUSE_DECL (innerc);
5759 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5760 OMP_CLAUSE__LOOPTEMP_);
5761 gcc_assert (innerc);
5762 n2 = OMP_CLAUSE_DECL (innerc);
5764 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
5765 true, NULL_TREE, true, GSI_SAME_STMT);
5766 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
5767 true, NULL_TREE, true, GSI_SAME_STMT);
5768 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
5769 true, NULL_TREE, true, GSI_SAME_STMT);
5770 tree chunk_size = fold_convert (itype, fd->chunk_size);
5771 chunk_size = omp_adjust_chunk_size (chunk_size, fd->simd_schedule);
5772 chunk_size
5773 = force_gimple_operand_gsi (&gsi, chunk_size, true, NULL_TREE, true,
5774 GSI_SAME_STMT);
5776 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
5777 t = fold_build2 (PLUS_EXPR, itype, step, t);
5778 t = fold_build2 (PLUS_EXPR, itype, t, n2);
5779 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
5780 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
5781 t = fold_build2 (TRUNC_DIV_EXPR, itype,
5782 fold_build1 (NEGATE_EXPR, itype, t),
5783 fold_build1 (NEGATE_EXPR, itype, step));
5784 else
5785 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
5786 t = fold_convert (itype, t);
5787 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5788 true, GSI_SAME_STMT);
5790 trip_var = create_tmp_reg (itype, ".trip");
5791 if (gimple_in_ssa_p (cfun))
5793 trip_init = make_ssa_name (trip_var);
5794 trip_main = make_ssa_name (trip_var);
5795 trip_back = make_ssa_name (trip_var);
5797 else
5799 trip_init = trip_var;
5800 trip_main = trip_var;
5801 trip_back = trip_var;
5804 gassign *assign_stmt
5805 = gimple_build_assign (trip_init, build_int_cst (itype, 0));
5806 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5808 t = fold_build2 (MULT_EXPR, itype, threadid, chunk_size);
5809 t = fold_build2 (MULT_EXPR, itype, t, step);
5810 if (POINTER_TYPE_P (type))
5811 t = fold_build_pointer_plus (n1, t);
5812 else
5813 t = fold_build2 (PLUS_EXPR, type, t, n1);
5814 vextra = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5815 true, GSI_SAME_STMT);
5817 /* Remove the GIMPLE_OMP_FOR. */
5818 gsi_remove (&gsi, true);
5820 gimple_stmt_iterator gsif = gsi;
5822 /* Iteration space partitioning goes in ITER_PART_BB. */
5823 gsi = gsi_last_bb (iter_part_bb);
5825 t = fold_build2 (MULT_EXPR, itype, trip_main, nthreads);
5826 t = fold_build2 (PLUS_EXPR, itype, t, threadid);
5827 t = fold_build2 (MULT_EXPR, itype, t, chunk_size);
5828 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5829 false, GSI_CONTINUE_LINKING);
5831 t = fold_build2 (PLUS_EXPR, itype, s0, chunk_size);
5832 t = fold_build2 (MIN_EXPR, itype, t, n);
5833 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5834 false, GSI_CONTINUE_LINKING);
5836 t = build2 (LT_EXPR, boolean_type_node, s0, n);
5837 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_CONTINUE_LINKING);
5839 /* Setup code for sequential iteration goes in SEQ_START_BB. */
5840 gsi = gsi_start_bb (seq_start_bb);
5842 tree startvar = fd->loop.v;
5843 tree endvar = NULL_TREE;
5845 if (gimple_omp_for_combined_p (fd->for_stmt))
5847 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
5848 ? gimple_omp_parallel_clauses (inner_stmt)
5849 : gimple_omp_for_clauses (inner_stmt);
5850 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
5851 gcc_assert (innerc);
5852 startvar = OMP_CLAUSE_DECL (innerc);
5853 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5854 OMP_CLAUSE__LOOPTEMP_);
5855 gcc_assert (innerc);
5856 endvar = OMP_CLAUSE_DECL (innerc);
5857 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
5858 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
5860 innerc = find_lastprivate_looptemp (fd, innerc);
5861 if (innerc)
5863 /* If needed (distribute parallel for with lastprivate),
5864 propagate down the total number of iterations. */
5865 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
5866 fd->loop.n2);
5867 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
5868 GSI_CONTINUE_LINKING);
5869 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
5870 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5875 t = fold_convert (itype, s0);
5876 t = fold_build2 (MULT_EXPR, itype, t, step);
5877 if (POINTER_TYPE_P (type))
5879 t = fold_build_pointer_plus (n1, t);
5880 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
5881 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
5882 t = fold_convert (signed_type_for (type), t);
5884 else
5885 t = fold_build2 (PLUS_EXPR, type, t, n1);
5886 t = fold_convert (TREE_TYPE (startvar), t);
5887 t = force_gimple_operand_gsi (&gsi, t,
5888 DECL_P (startvar)
5889 && TREE_ADDRESSABLE (startvar),
5890 NULL_TREE, false, GSI_CONTINUE_LINKING);
5891 assign_stmt = gimple_build_assign (startvar, t);
5892 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5893 if (cond_var)
5895 tree itype = TREE_TYPE (cond_var);
5896 /* For lastprivate(conditional:) itervar, we need some iteration
5897 counter that starts at unsigned non-zero and increases.
5898 Prefer as few IVs as possible, so if we can use startvar
5899 itself, use that, or startvar + constant (those would be
5900 incremented with step), and as last resort use the s0 + 1
5901 incremented by 1. */
5902 if (POINTER_TYPE_P (type)
5903 || TREE_CODE (n1) != INTEGER_CST
5904 || fd->loop.cond_code != LT_EXPR)
5905 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, s0),
5906 build_int_cst (itype, 1));
5907 else if (tree_int_cst_sgn (n1) == 1)
5908 t = fold_convert (itype, t);
5909 else
5911 tree c = fold_convert (itype, n1);
5912 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
5913 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
5915 t = force_gimple_operand_gsi (&gsi, t, false,
5916 NULL_TREE, false, GSI_CONTINUE_LINKING);
5917 assign_stmt = gimple_build_assign (cond_var, t);
5918 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5921 t = fold_convert (itype, e0);
5922 t = fold_build2 (MULT_EXPR, itype, t, step);
5923 if (POINTER_TYPE_P (type))
5925 t = fold_build_pointer_plus (n1, t);
5926 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
5927 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
5928 t = fold_convert (signed_type_for (type), t);
5930 else
5931 t = fold_build2 (PLUS_EXPR, type, t, n1);
5932 t = fold_convert (TREE_TYPE (startvar), t);
5933 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5934 false, GSI_CONTINUE_LINKING);
5935 if (endvar)
5937 assign_stmt = gimple_build_assign (endvar, e);
5938 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5939 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
5940 assign_stmt = gimple_build_assign (fd->loop.v, e);
5941 else
5942 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
5943 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5945 /* Handle linear clause adjustments. */
5946 tree itercnt = NULL_TREE, itercntbias = NULL_TREE;
5947 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
5948 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
5949 c; c = OMP_CLAUSE_CHAIN (c))
5950 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
5951 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
5953 tree d = OMP_CLAUSE_DECL (c);
5954 tree t = d, a, dest;
5955 if (omp_privatize_by_reference (t))
5956 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
5957 tree type = TREE_TYPE (t);
5958 if (POINTER_TYPE_P (type))
5959 type = sizetype;
5960 dest = unshare_expr (t);
5961 tree v = create_tmp_var (TREE_TYPE (t), NULL);
5962 expand_omp_build_assign (&gsif, v, t);
5963 if (itercnt == NULL_TREE)
5965 if (gimple_omp_for_combined_into_p (fd->for_stmt))
5967 itercntbias
5968 = fold_build2 (MINUS_EXPR, itype, fold_convert (itype, n1),
5969 fold_convert (itype, fd->loop.n1));
5970 itercntbias = fold_build2 (EXACT_DIV_EXPR, itype,
5971 itercntbias, step);
5972 itercntbias
5973 = force_gimple_operand_gsi (&gsif, itercntbias, true,
5974 NULL_TREE, true,
5975 GSI_SAME_STMT);
5976 itercnt = fold_build2 (PLUS_EXPR, itype, itercntbias, s0);
5977 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
5978 NULL_TREE, false,
5979 GSI_CONTINUE_LINKING);
5981 else
5982 itercnt = s0;
5984 a = fold_build2 (MULT_EXPR, type,
5985 fold_convert (type, itercnt),
5986 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
5987 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
5988 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
5989 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5990 false, GSI_CONTINUE_LINKING);
5991 expand_omp_build_assign (&gsi, dest, t, true);
5993 if (fd->collapse > 1)
5994 expand_omp_for_init_vars (fd, &gsi, counts, NULL, inner_stmt, startvar);
5996 if (!broken_loop)
5998 /* The code controlling the sequential loop goes in CONT_BB,
5999 replacing the GIMPLE_OMP_CONTINUE. */
6000 gsi = gsi_last_nondebug_bb (cont_bb);
6001 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
6002 vmain = gimple_omp_continue_control_use (cont_stmt);
6003 vback = gimple_omp_continue_control_def (cont_stmt);
6005 if (cond_var)
6007 tree itype = TREE_TYPE (cond_var);
6008 tree t2;
6009 if (POINTER_TYPE_P (type)
6010 || TREE_CODE (n1) != INTEGER_CST
6011 || fd->loop.cond_code != LT_EXPR)
6012 t2 = build_int_cst (itype, 1);
6013 else
6014 t2 = fold_convert (itype, step);
6015 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
6016 t2 = force_gimple_operand_gsi (&gsi, t2, false,
6017 NULL_TREE, true, GSI_SAME_STMT);
6018 assign_stmt = gimple_build_assign (cond_var, t2);
6019 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
6022 if (!gimple_omp_for_combined_p (fd->for_stmt))
6024 if (POINTER_TYPE_P (type))
6025 t = fold_build_pointer_plus (vmain, step);
6026 else
6027 t = fold_build2 (PLUS_EXPR, type, vmain, step);
6028 if (DECL_P (vback) && TREE_ADDRESSABLE (vback))
6029 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6030 true, GSI_SAME_STMT);
6031 assign_stmt = gimple_build_assign (vback, t);
6032 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
6034 if (tree_int_cst_equal (fd->chunk_size, integer_one_node))
6035 t = build2 (EQ_EXPR, boolean_type_node,
6036 build_int_cst (itype, 0),
6037 build_int_cst (itype, 1));
6038 else
6039 t = build2 (fd->loop.cond_code, boolean_type_node,
6040 DECL_P (vback) && TREE_ADDRESSABLE (vback)
6041 ? t : vback, e);
6042 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
6045 /* Remove GIMPLE_OMP_CONTINUE. */
6046 gsi_remove (&gsi, true);
6048 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
6049 collapse_bb = extract_omp_for_update_vars (fd, NULL, cont_bb, body_bb);
6051 /* Trip update code goes into TRIP_UPDATE_BB. */
6052 gsi = gsi_start_bb (trip_update_bb);
6054 t = build_int_cst (itype, 1);
6055 t = build2 (PLUS_EXPR, itype, trip_main, t);
6056 assign_stmt = gimple_build_assign (trip_back, t);
6057 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6060 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
6061 gsi = gsi_last_nondebug_bb (exit_bb);
6062 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
6064 t = gimple_omp_return_lhs (gsi_stmt (gsi));
6065 if (fd->have_reductemp || fd->have_pointer_condtemp)
6067 tree fn;
6068 if (t)
6069 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
6070 else
6071 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
6072 gcall *g = gimple_build_call (fn, 0);
6073 if (t)
6075 gimple_call_set_lhs (g, t);
6076 if (fd->have_reductemp)
6077 gsi_insert_after (&gsi, gimple_build_assign (reductions,
6078 NOP_EXPR, t),
6079 GSI_SAME_STMT);
6081 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
6083 else
6084 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
6086 else if (fd->have_pointer_condtemp)
6088 tree fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
6089 gcall *g = gimple_build_call (fn, 0);
6090 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
6092 gsi_remove (&gsi, true);
6094 /* Connect the new blocks. */
6095 find_edge (iter_part_bb, seq_start_bb)->flags = EDGE_TRUE_VALUE;
6096 find_edge (iter_part_bb, fin_bb)->flags = EDGE_FALSE_VALUE;
6098 if (!broken_loop)
6100 se = find_edge (cont_bb, body_bb);
6101 if (se == NULL)
6103 se = BRANCH_EDGE (cont_bb);
6104 gcc_assert (single_succ (se->dest) == body_bb);
6106 if (gimple_omp_for_combined_p (fd->for_stmt))
6108 remove_edge (se);
6109 se = NULL;
6111 else if (fd->collapse > 1)
6113 remove_edge (se);
6114 se = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
6116 else
6117 se->flags = EDGE_TRUE_VALUE;
6118 find_edge (cont_bb, trip_update_bb)->flags
6119 = se ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
6121 redirect_edge_and_branch (single_succ_edge (trip_update_bb),
6122 iter_part_bb);
6125 if (gimple_in_ssa_p (cfun))
6127 gphi_iterator psi;
6128 gphi *phi;
6129 edge re, ene;
6130 edge_var_map *vm;
6131 size_t i;
6133 gcc_assert (fd->collapse == 1 && !broken_loop);
6135 /* When we redirect the edge from trip_update_bb to iter_part_bb, we
6136 remove arguments of the phi nodes in fin_bb. We need to create
6137 appropriate phi nodes in iter_part_bb instead. */
6138 se = find_edge (iter_part_bb, fin_bb);
6139 re = single_succ_edge (trip_update_bb);
6140 vec<edge_var_map> *head = redirect_edge_var_map_vector (re);
6141 ene = single_succ_edge (entry_bb);
6143 psi = gsi_start_phis (fin_bb);
6144 for (i = 0; !gsi_end_p (psi) && head->iterate (i, &vm);
6145 gsi_next (&psi), ++i)
6147 gphi *nphi;
6148 location_t locus;
6150 phi = psi.phi ();
6151 if (operand_equal_p (gimple_phi_arg_def (phi, 0),
6152 redirect_edge_var_map_def (vm), 0))
6153 continue;
6155 t = gimple_phi_result (phi);
6156 gcc_assert (t == redirect_edge_var_map_result (vm));
6158 if (!single_pred_p (fin_bb))
6159 t = copy_ssa_name (t, phi);
6161 nphi = create_phi_node (t, iter_part_bb);
6163 t = PHI_ARG_DEF_FROM_EDGE (phi, se);
6164 locus = gimple_phi_arg_location_from_edge (phi, se);
6166 /* A special case -- fd->loop.v is not yet computed in
6167 iter_part_bb, we need to use vextra instead. */
6168 if (t == fd->loop.v)
6169 t = vextra;
6170 add_phi_arg (nphi, t, ene, locus);
6171 locus = redirect_edge_var_map_location (vm);
6172 tree back_arg = redirect_edge_var_map_def (vm);
6173 add_phi_arg (nphi, back_arg, re, locus);
6174 edge ce = find_edge (cont_bb, body_bb);
6175 if (ce == NULL)
6177 ce = BRANCH_EDGE (cont_bb);
6178 gcc_assert (single_succ (ce->dest) == body_bb);
6179 ce = single_succ_edge (ce->dest);
6181 gphi *inner_loop_phi = find_phi_with_arg_on_edge (back_arg, ce);
6182 gcc_assert (inner_loop_phi != NULL);
6183 add_phi_arg (inner_loop_phi, gimple_phi_result (nphi),
6184 find_edge (seq_start_bb, body_bb), locus);
6186 if (!single_pred_p (fin_bb))
6187 add_phi_arg (phi, gimple_phi_result (nphi), se, locus);
6189 gcc_assert (gsi_end_p (psi) && (head == NULL || i == head->length ()));
6190 redirect_edge_var_map_clear (re);
6191 if (single_pred_p (fin_bb))
6192 while (1)
6194 psi = gsi_start_phis (fin_bb);
6195 if (gsi_end_p (psi))
6196 break;
6197 remove_phi_node (&psi, false);
6200 /* Make phi node for trip. */
6201 phi = create_phi_node (trip_main, iter_part_bb);
6202 add_phi_arg (phi, trip_back, single_succ_edge (trip_update_bb),
6203 UNKNOWN_LOCATION);
6204 add_phi_arg (phi, trip_init, single_succ_edge (entry_bb),
6205 UNKNOWN_LOCATION);
6208 if (!broken_loop)
6209 set_immediate_dominator (CDI_DOMINATORS, trip_update_bb, cont_bb);
6210 set_immediate_dominator (CDI_DOMINATORS, iter_part_bb,
6211 recompute_dominator (CDI_DOMINATORS, iter_part_bb));
6212 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
6213 recompute_dominator (CDI_DOMINATORS, fin_bb));
6214 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb,
6215 recompute_dominator (CDI_DOMINATORS, seq_start_bb));
6216 set_immediate_dominator (CDI_DOMINATORS, body_bb,
6217 recompute_dominator (CDI_DOMINATORS, body_bb));
6219 if (!broken_loop)
6221 class loop *loop = body_bb->loop_father;
6222 class loop *trip_loop = alloc_loop ();
6223 trip_loop->header = iter_part_bb;
6224 trip_loop->latch = trip_update_bb;
6225 add_loop (trip_loop, iter_part_bb->loop_father);
6227 if (loop != entry_bb->loop_father)
6229 gcc_assert (loop->header == body_bb);
6230 gcc_assert (loop->latch == region->cont
6231 || single_pred (loop->latch) == region->cont);
6232 trip_loop->inner = loop;
6233 return;
6236 if (!gimple_omp_for_combined_p (fd->for_stmt))
6238 loop = alloc_loop ();
6239 loop->header = body_bb;
6240 if (collapse_bb == NULL)
6241 loop->latch = cont_bb;
6242 add_loop (loop, trip_loop);
6247 /* A subroutine of expand_omp_for. Generate code for a simd non-worksharing
6248 loop. Given parameters:
6250 for (V = N1; V cond N2; V += STEP) BODY;
6252 where COND is "<" or ">", we generate pseudocode
6254 V = N1;
6255 goto L1;
6257 BODY;
6258 V += STEP;
6260 if (V cond N2) goto L0; else goto L2;
6263 For collapsed loops, emit the outer loops as scalar
6264 and only try to vectorize the innermost loop. */
6266 static void
6267 expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
6269 tree type, t;
6270 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, l2_bb, l2_dom_bb;
6271 gimple_stmt_iterator gsi;
6272 gimple *stmt;
6273 gcond *cond_stmt;
6274 bool broken_loop = region->cont == NULL;
6275 edge e, ne;
6276 tree *counts = NULL;
6277 int i;
6278 int safelen_int = INT_MAX;
6279 bool dont_vectorize = false;
6280 tree safelen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6281 OMP_CLAUSE_SAFELEN);
6282 tree simduid = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6283 OMP_CLAUSE__SIMDUID_);
6284 tree ifc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6285 OMP_CLAUSE_IF);
6286 tree simdlen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6287 OMP_CLAUSE_SIMDLEN);
6288 tree condtemp = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6289 OMP_CLAUSE__CONDTEMP_);
6290 tree n1, n2;
6291 tree cond_var = condtemp ? OMP_CLAUSE_DECL (condtemp) : NULL_TREE;
6293 if (safelen)
6295 poly_uint64 val;
6296 safelen = OMP_CLAUSE_SAFELEN_EXPR (safelen);
6297 if (!poly_int_tree_p (safelen, &val))
6298 safelen_int = 0;
6299 else
6300 safelen_int = MIN (constant_lower_bound (val), INT_MAX);
6301 if (safelen_int == 1)
6302 safelen_int = 0;
6304 if ((ifc && integer_zerop (OMP_CLAUSE_IF_EXPR (ifc)))
6305 || (simdlen && integer_onep (OMP_CLAUSE_SIMDLEN_EXPR (simdlen))))
6307 safelen_int = 0;
6308 dont_vectorize = true;
6310 type = TREE_TYPE (fd->loop.v);
6311 entry_bb = region->entry;
6312 cont_bb = region->cont;
6313 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
6314 gcc_assert (broken_loop
6315 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
6316 l0_bb = FALLTHRU_EDGE (entry_bb)->dest;
6317 if (!broken_loop)
6319 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb);
6320 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
6321 l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest;
6322 l2_bb = BRANCH_EDGE (entry_bb)->dest;
6324 else
6326 BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL;
6327 l1_bb = split_edge (BRANCH_EDGE (entry_bb));
6328 l2_bb = single_succ (l1_bb);
6330 exit_bb = region->exit;
6331 l2_dom_bb = NULL;
6333 gsi = gsi_last_nondebug_bb (entry_bb);
6335 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
6336 /* Not needed in SSA form right now. */
6337 gcc_assert (!gimple_in_ssa_p (cfun));
6338 if (fd->collapse > 1
6339 && (gimple_omp_for_combined_into_p (fd->for_stmt)
6340 || broken_loop))
6342 int first_zero_iter = -1, dummy = -1;
6343 basic_block zero_iter_bb = l2_bb, dummy_bb = NULL;
6345 counts = XALLOCAVEC (tree, fd->collapse);
6346 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
6347 zero_iter_bb, first_zero_iter,
6348 dummy_bb, dummy, l2_dom_bb);
6350 if (l2_dom_bb == NULL)
6351 l2_dom_bb = l1_bb;
6353 n1 = fd->loop.n1;
6354 n2 = fd->loop.n2;
6355 if (gimple_omp_for_combined_into_p (fd->for_stmt))
6357 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6358 OMP_CLAUSE__LOOPTEMP_);
6359 gcc_assert (innerc);
6360 n1 = OMP_CLAUSE_DECL (innerc);
6361 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
6362 OMP_CLAUSE__LOOPTEMP_);
6363 gcc_assert (innerc);
6364 n2 = OMP_CLAUSE_DECL (innerc);
6366 tree step = fd->loop.step;
6367 tree orig_step = step; /* May be different from step if is_simt. */
6369 bool is_simt = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6370 OMP_CLAUSE__SIMT_);
6371 if (is_simt)
6373 cfun->curr_properties &= ~PROP_gimple_lomp_dev;
6374 is_simt = safelen_int > 1;
6376 tree simt_lane = NULL_TREE, simt_maxlane = NULL_TREE;
6377 if (is_simt)
6379 simt_lane = create_tmp_var (unsigned_type_node);
6380 gimple *g = gimple_build_call_internal (IFN_GOMP_SIMT_LANE, 0);
6381 gimple_call_set_lhs (g, simt_lane);
6382 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
6383 tree offset = fold_build2 (MULT_EXPR, TREE_TYPE (step), step,
6384 fold_convert (TREE_TYPE (step), simt_lane));
6385 n1 = fold_convert (type, n1);
6386 if (POINTER_TYPE_P (type))
6387 n1 = fold_build_pointer_plus (n1, offset);
6388 else
6389 n1 = fold_build2 (PLUS_EXPR, type, n1, fold_convert (type, offset));
6391 /* Collapsed loops not handled for SIMT yet: limit to one lane only. */
6392 if (fd->collapse > 1)
6393 simt_maxlane = build_one_cst (unsigned_type_node);
6394 else if (safelen_int < omp_max_simt_vf ())
6395 simt_maxlane = build_int_cst (unsigned_type_node, safelen_int);
6396 tree vf
6397 = build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_GOMP_SIMT_VF,
6398 unsigned_type_node, 0);
6399 if (simt_maxlane)
6400 vf = fold_build2 (MIN_EXPR, unsigned_type_node, vf, simt_maxlane);
6401 vf = fold_convert (TREE_TYPE (step), vf);
6402 step = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, vf);
6405 tree n2var = NULL_TREE;
6406 tree n2v = NULL_TREE;
6407 tree *nonrect_bounds = NULL;
6408 tree min_arg1 = NULL_TREE, min_arg2 = NULL_TREE;
6409 if (fd->collapse > 1)
6411 if (broken_loop || gimple_omp_for_combined_into_p (fd->for_stmt))
6413 if (fd->non_rect)
6415 nonrect_bounds = XALLOCAVEC (tree, fd->last_nonrect + 1);
6416 memset (nonrect_bounds, 0,
6417 sizeof (tree) * (fd->last_nonrect + 1));
6419 expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
6420 gcc_assert (entry_bb == gsi_bb (gsi));
6421 gcc_assert (fd->for_stmt == gsi_stmt (gsi));
6422 gsi_prev (&gsi);
6423 entry_bb = split_block (entry_bb, gsi_stmt (gsi))->dest;
6424 expand_omp_for_init_vars (fd, &gsi, counts, nonrect_bounds,
6425 NULL, n1);
6426 gsi = gsi_for_stmt (fd->for_stmt);
6428 if (broken_loop)
6430 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
6432 /* Compute in n2var the limit for the first innermost loop,
6433 i.e. fd->loop.v + MIN (n2 - fd->loop.v, cnt)
6434 where cnt is how many iterations would the loop have if
6435 all further iterations were assigned to the current task. */
6436 n2var = create_tmp_var (type);
6437 i = fd->collapse - 1;
6438 tree itype = TREE_TYPE (fd->loops[i].v);
6439 if (POINTER_TYPE_P (itype))
6440 itype = signed_type_for (itype);
6441 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
6442 ? -1 : 1));
6443 t = fold_build2 (PLUS_EXPR, itype,
6444 fold_convert (itype, fd->loops[i].step), t);
6445 t = fold_build2 (PLUS_EXPR, itype, t,
6446 fold_convert (itype, fd->loops[i].n2));
6447 if (fd->loops[i].m2)
6449 tree t2 = fold_convert (itype,
6450 fd->loops[i - fd->loops[i].outer].v);
6451 tree t3 = fold_convert (itype, fd->loops[i].m2);
6452 t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
6453 t = fold_build2 (PLUS_EXPR, itype, t, t2);
6455 t = fold_build2 (MINUS_EXPR, itype, t,
6456 fold_convert (itype, fd->loops[i].v));
6457 if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
6458 t = fold_build2 (TRUNC_DIV_EXPR, itype,
6459 fold_build1 (NEGATE_EXPR, itype, t),
6460 fold_build1 (NEGATE_EXPR, itype,
6461 fold_convert (itype,
6462 fd->loops[i].step)));
6463 else
6464 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
6465 fold_convert (itype, fd->loops[i].step));
6466 t = fold_convert (type, t);
6467 tree t2 = fold_build2 (MINUS_EXPR, type, n2, n1);
6468 min_arg1 = create_tmp_var (type);
6469 expand_omp_build_assign (&gsi, min_arg1, t2);
6470 min_arg2 = create_tmp_var (type);
6471 expand_omp_build_assign (&gsi, min_arg2, t);
6473 else
6475 if (TREE_CODE (n2) == INTEGER_CST)
6477 /* Indicate for lastprivate handling that at least one iteration
6478 has been performed, without wasting runtime. */
6479 if (integer_nonzerop (n2))
6480 expand_omp_build_assign (&gsi, fd->loop.v,
6481 fold_convert (type, n2));
6482 else
6483 /* Indicate that no iteration has been performed. */
6484 expand_omp_build_assign (&gsi, fd->loop.v,
6485 build_one_cst (type));
6487 else
6489 expand_omp_build_assign (&gsi, fd->loop.v,
6490 build_zero_cst (type));
6491 expand_omp_build_assign (&gsi, n2, build_one_cst (type));
6493 for (i = 0; i < fd->collapse; i++)
6495 t = fold_convert (TREE_TYPE (fd->loops[i].v), fd->loops[i].n1);
6496 if (fd->loops[i].m1)
6498 tree t2
6499 = fold_convert (TREE_TYPE (t),
6500 fd->loops[i - fd->loops[i].outer].v);
6501 tree t3 = fold_convert (TREE_TYPE (t), fd->loops[i].m1);
6502 t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
6503 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t, t2);
6505 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
6506 /* For normal non-combined collapsed loops just initialize
6507 the outermost iterator in the entry_bb. */
6508 if (!broken_loop)
6509 break;
6513 else
6514 expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
6515 tree altv = NULL_TREE, altn2 = NULL_TREE;
6516 if (fd->collapse == 1
6517 && !broken_loop
6518 && TREE_CODE (orig_step) != INTEGER_CST)
6520 /* The vectorizer currently punts on loops with non-constant steps
6521 for the main IV (can't compute number of iterations and gives up
6522 because of that). As for OpenMP loops it is always possible to
6523 compute the number of iterations upfront, use an alternate IV
6524 as the loop iterator:
6525 altn2 = n1 < n2 ? (n2 - n1 + step - 1) / step : 0;
6526 for (i = n1, altv = 0; altv < altn2; altv++, i += step) */
6527 altv = create_tmp_var (unsigned_type_for (TREE_TYPE (fd->loops[0].v)));
6528 expand_omp_build_assign (&gsi, altv, build_zero_cst (TREE_TYPE (altv)));
6529 tree itype = TREE_TYPE (fd->loop.v);
6530 if (POINTER_TYPE_P (itype))
6531 itype = signed_type_for (itype);
6532 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
6533 t = fold_build2 (PLUS_EXPR, itype,
6534 fold_convert (itype, step), t);
6535 t = fold_build2 (PLUS_EXPR, itype, t, fold_convert (itype, n2));
6536 t = fold_build2 (MINUS_EXPR, itype, t,
6537 fold_convert (itype, fd->loop.v));
6538 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
6539 t = fold_build2 (TRUNC_DIV_EXPR, itype,
6540 fold_build1 (NEGATE_EXPR, itype, t),
6541 fold_build1 (NEGATE_EXPR, itype,
6542 fold_convert (itype, step)));
6543 else
6544 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
6545 fold_convert (itype, step));
6546 t = fold_convert (TREE_TYPE (altv), t);
6547 altn2 = create_tmp_var (TREE_TYPE (altv));
6548 expand_omp_build_assign (&gsi, altn2, t);
6549 tree t2 = fold_convert (TREE_TYPE (fd->loop.v), n2);
6550 t2 = force_gimple_operand_gsi (&gsi, t2, true, NULL_TREE,
6551 true, GSI_SAME_STMT);
6552 t2 = fold_build2 (fd->loop.cond_code, boolean_type_node, fd->loop.v, t2);
6553 gassign *g = gimple_build_assign (altn2, COND_EXPR, t2, altn2,
6554 build_zero_cst (TREE_TYPE (altv)));
6555 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
6557 else if (fd->collapse > 1
6558 && !broken_loop
6559 && !gimple_omp_for_combined_into_p (fd->for_stmt)
6560 && TREE_CODE (fd->loops[fd->collapse - 1].step) != INTEGER_CST)
6562 altv = create_tmp_var (unsigned_type_for (TREE_TYPE (fd->loops[0].v)));
6563 altn2 = create_tmp_var (TREE_TYPE (altv));
6565 if (cond_var)
6567 if (POINTER_TYPE_P (type)
6568 || TREE_CODE (n1) != INTEGER_CST
6569 || fd->loop.cond_code != LT_EXPR
6570 || tree_int_cst_sgn (n1) != 1)
6571 expand_omp_build_assign (&gsi, cond_var,
6572 build_one_cst (TREE_TYPE (cond_var)));
6573 else
6574 expand_omp_build_assign (&gsi, cond_var,
6575 fold_convert (TREE_TYPE (cond_var), n1));
6578 /* Remove the GIMPLE_OMP_FOR statement. */
6579 gsi_remove (&gsi, true);
6581 if (!broken_loop)
6583 /* Code to control the increment goes in the CONT_BB. */
6584 gsi = gsi_last_nondebug_bb (cont_bb);
6585 stmt = gsi_stmt (gsi);
6586 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
6588 if (fd->collapse == 1
6589 || gimple_omp_for_combined_into_p (fd->for_stmt))
6591 if (POINTER_TYPE_P (type))
6592 t = fold_build_pointer_plus (fd->loop.v, step);
6593 else
6594 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
6595 expand_omp_build_assign (&gsi, fd->loop.v, t);
6597 else if (TREE_CODE (n2) != INTEGER_CST)
6598 expand_omp_build_assign (&gsi, fd->loop.v, build_one_cst (type));
6599 if (altv)
6601 t = fold_build2 (PLUS_EXPR, TREE_TYPE (altv), altv,
6602 build_one_cst (TREE_TYPE (altv)));
6603 expand_omp_build_assign (&gsi, altv, t);
6606 if (fd->collapse > 1)
6608 i = fd->collapse - 1;
6609 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
6611 t = fold_convert (sizetype, fd->loops[i].step);
6612 t = fold_build_pointer_plus (fd->loops[i].v, t);
6614 else
6616 t = fold_convert (TREE_TYPE (fd->loops[i].v),
6617 fd->loops[i].step);
6618 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
6619 fd->loops[i].v, t);
6621 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
6623 if (cond_var)
6625 if (POINTER_TYPE_P (type)
6626 || TREE_CODE (n1) != INTEGER_CST
6627 || fd->loop.cond_code != LT_EXPR
6628 || tree_int_cst_sgn (n1) != 1)
6629 t = fold_build2 (PLUS_EXPR, TREE_TYPE (cond_var), cond_var,
6630 build_one_cst (TREE_TYPE (cond_var)));
6631 else
6632 t = fold_build2 (PLUS_EXPR, TREE_TYPE (cond_var), cond_var,
6633 fold_convert (TREE_TYPE (cond_var), step));
6634 expand_omp_build_assign (&gsi, cond_var, t);
6637 /* Remove GIMPLE_OMP_CONTINUE. */
6638 gsi_remove (&gsi, true);
6641 /* Emit the condition in L1_BB. */
6642 gsi = gsi_start_bb (l1_bb);
6644 if (altv)
6645 t = build2 (LT_EXPR, boolean_type_node, altv, altn2);
6646 else if (fd->collapse > 1
6647 && !gimple_omp_for_combined_into_p (fd->for_stmt)
6648 && !broken_loop)
6650 i = fd->collapse - 1;
6651 tree itype = TREE_TYPE (fd->loops[i].v);
6652 if (fd->loops[i].m2)
6653 t = n2v = create_tmp_var (itype);
6654 else
6655 t = fold_convert (itype, fd->loops[i].n2);
6656 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6657 false, GSI_CONTINUE_LINKING);
6658 tree v = fd->loops[i].v;
6659 if (DECL_P (v) && TREE_ADDRESSABLE (v))
6660 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
6661 false, GSI_CONTINUE_LINKING);
6662 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
6664 else
6666 if (fd->collapse > 1 && !broken_loop)
6667 t = n2var;
6668 else
6669 t = fold_convert (type, n2);
6670 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6671 false, GSI_CONTINUE_LINKING);
6672 tree v = fd->loop.v;
6673 if (DECL_P (v) && TREE_ADDRESSABLE (v))
6674 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
6675 false, GSI_CONTINUE_LINKING);
6676 t = build2 (fd->loop.cond_code, boolean_type_node, v, t);
6678 cond_stmt = gimple_build_cond_empty (t);
6679 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
6680 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p,
6681 NULL, NULL)
6682 || walk_tree (gimple_cond_rhs_ptr (cond_stmt), expand_omp_regimplify_p,
6683 NULL, NULL))
6685 gsi = gsi_for_stmt (cond_stmt);
6686 gimple_regimplify_operands (cond_stmt, &gsi);
6689 /* Add 'V -= STEP * (SIMT_VF - 1)' after the loop. */
6690 if (is_simt)
6692 gsi = gsi_start_bb (l2_bb);
6693 step = fold_build2 (MINUS_EXPR, TREE_TYPE (step), orig_step, step);
6694 if (POINTER_TYPE_P (type))
6695 t = fold_build_pointer_plus (fd->loop.v, step);
6696 else
6697 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
6698 expand_omp_build_assign (&gsi, fd->loop.v, t);
6701 /* Remove GIMPLE_OMP_RETURN. */
6702 gsi = gsi_last_nondebug_bb (exit_bb);
6703 gsi_remove (&gsi, true);
6705 /* Connect the new blocks. */
6706 remove_edge (FALLTHRU_EDGE (entry_bb));
6708 if (!broken_loop)
6710 remove_edge (BRANCH_EDGE (entry_bb));
6711 make_edge (entry_bb, l1_bb, EDGE_FALLTHRU);
6713 e = BRANCH_EDGE (l1_bb);
6714 ne = FALLTHRU_EDGE (l1_bb);
6715 e->flags = EDGE_TRUE_VALUE;
6717 else
6719 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
6721 ne = single_succ_edge (l1_bb);
6722 e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE);
6725 ne->flags = EDGE_FALSE_VALUE;
6726 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
6727 ne->probability = e->probability.invert ();
6729 set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb);
6730 set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb);
6732 if (simt_maxlane)
6734 cond_stmt = gimple_build_cond (LT_EXPR, simt_lane, simt_maxlane,
6735 NULL_TREE, NULL_TREE);
6736 gsi = gsi_last_bb (entry_bb);
6737 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
6738 make_edge (entry_bb, l2_bb, EDGE_FALSE_VALUE);
6739 FALLTHRU_EDGE (entry_bb)->flags = EDGE_TRUE_VALUE;
6740 FALLTHRU_EDGE (entry_bb)->probability
6741 = profile_probability::guessed_always ().apply_scale (7, 8);
6742 BRANCH_EDGE (entry_bb)->probability
6743 = FALLTHRU_EDGE (entry_bb)->probability.invert ();
6744 l2_dom_bb = entry_bb;
6746 set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb);
6748 if (!broken_loop && fd->collapse > 1)
6750 basic_block last_bb = l1_bb;
6751 basic_block init_bb = NULL;
6752 for (i = fd->collapse - 2; i >= 0; i--)
6754 tree nextn2v = NULL_TREE;
6755 if (EDGE_SUCC (last_bb, 0)->flags & EDGE_FALSE_VALUE)
6756 e = EDGE_SUCC (last_bb, 0);
6757 else
6758 e = EDGE_SUCC (last_bb, 1);
6759 basic_block bb = split_edge (e);
6760 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
6762 t = fold_convert (sizetype, fd->loops[i].step);
6763 t = fold_build_pointer_plus (fd->loops[i].v, t);
6765 else
6767 t = fold_convert (TREE_TYPE (fd->loops[i].v),
6768 fd->loops[i].step);
6769 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
6770 fd->loops[i].v, t);
6772 gsi = gsi_after_labels (bb);
6773 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
6775 bb = split_block (bb, last_stmt (bb))->dest;
6776 gsi = gsi_start_bb (bb);
6777 tree itype = TREE_TYPE (fd->loops[i].v);
6778 if (fd->loops[i].m2)
6779 t = nextn2v = create_tmp_var (itype);
6780 else
6781 t = fold_convert (itype, fd->loops[i].n2);
6782 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6783 false, GSI_CONTINUE_LINKING);
6784 tree v = fd->loops[i].v;
6785 if (DECL_P (v) && TREE_ADDRESSABLE (v))
6786 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
6787 false, GSI_CONTINUE_LINKING);
6788 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
6789 cond_stmt = gimple_build_cond_empty (t);
6790 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
6791 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
6792 expand_omp_regimplify_p, NULL, NULL)
6793 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
6794 expand_omp_regimplify_p, NULL, NULL))
6796 gsi = gsi_for_stmt (cond_stmt);
6797 gimple_regimplify_operands (cond_stmt, &gsi);
6799 ne = single_succ_edge (bb);
6800 ne->flags = EDGE_FALSE_VALUE;
6802 init_bb = create_empty_bb (bb);
6803 set_immediate_dominator (CDI_DOMINATORS, init_bb, bb);
6804 add_bb_to_loop (init_bb, bb->loop_father);
6805 e = make_edge (bb, init_bb, EDGE_TRUE_VALUE);
6806 e->probability
6807 = profile_probability::guessed_always ().apply_scale (7, 8);
6808 ne->probability = e->probability.invert ();
6810 gsi = gsi_after_labels (init_bb);
6811 t = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
6812 fd->loops[i + 1].n1);
6813 if (fd->loops[i + 1].m1)
6815 tree t2 = fold_convert (TREE_TYPE (t),
6816 fd->loops[i + 1
6817 - fd->loops[i + 1].outer].v);
6818 tree t3 = fold_convert (TREE_TYPE (t), fd->loops[i + 1].m1);
6819 t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
6820 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t, t2);
6822 expand_omp_build_assign (&gsi, fd->loops[i + 1].v, t);
6823 if (fd->loops[i + 1].m2)
6825 if (i + 2 == fd->collapse && (n2var || altv))
6827 gcc_assert (n2v == NULL_TREE);
6828 n2v = create_tmp_var (TREE_TYPE (fd->loops[i + 1].v));
6830 t = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
6831 fd->loops[i + 1].n2);
6832 tree t2 = fold_convert (TREE_TYPE (t),
6833 fd->loops[i + 1
6834 - fd->loops[i + 1].outer].v);
6835 tree t3 = fold_convert (TREE_TYPE (t), fd->loops[i + 1].m2);
6836 t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
6837 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t, t2);
6838 expand_omp_build_assign (&gsi, n2v, t);
6840 if (i + 2 == fd->collapse && n2var)
6842 /* For composite simd, n2 is the first iteration the current
6843 task shouldn't already handle, so we effectively want to use
6844 for (V3 = N31; V < N2 && V3 < N32; V++, V3 += STEP3)
6845 as the vectorized loop. Except the vectorizer will not
6846 vectorize that, so instead compute N2VAR as
6847 N2VAR = V + MIN (N2 - V, COUNTS3) and use
6848 for (V3 = N31; V < N2VAR; V++, V3 += STEP3)
6849 as the loop to vectorize. */
6850 tree t2 = fold_build2 (MINUS_EXPR, type, n2, fd->loop.v);
6851 if (fd->loops[i + 1].m1 || fd->loops[i + 1].m2)
6853 t = build_int_cst (itype, (fd->loops[i + 1].cond_code
6854 == LT_EXPR ? -1 : 1));
6855 t = fold_build2 (PLUS_EXPR, itype,
6856 fold_convert (itype,
6857 fd->loops[i + 1].step), t);
6858 if (fd->loops[i + 1].m2)
6859 t = fold_build2 (PLUS_EXPR, itype, t, n2v);
6860 else
6861 t = fold_build2 (PLUS_EXPR, itype, t,
6862 fold_convert (itype,
6863 fd->loops[i + 1].n2));
6864 t = fold_build2 (MINUS_EXPR, itype, t,
6865 fold_convert (itype, fd->loops[i + 1].v));
6866 tree step = fold_convert (itype, fd->loops[i + 1].step);
6867 if (TYPE_UNSIGNED (itype)
6868 && fd->loops[i + 1].cond_code == GT_EXPR)
6869 t = fold_build2 (TRUNC_DIV_EXPR, itype,
6870 fold_build1 (NEGATE_EXPR, itype, t),
6871 fold_build1 (NEGATE_EXPR, itype, step));
6872 else
6873 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
6874 t = fold_convert (type, t);
6876 else
6877 t = counts[i + 1];
6878 expand_omp_build_assign (&gsi, min_arg1, t2);
6879 expand_omp_build_assign (&gsi, min_arg2, t);
6880 e = split_block (init_bb, last_stmt (init_bb));
6881 gsi = gsi_after_labels (e->dest);
6882 init_bb = e->dest;
6883 remove_edge (FALLTHRU_EDGE (entry_bb));
6884 make_edge (entry_bb, init_bb, EDGE_FALLTHRU);
6885 set_immediate_dominator (CDI_DOMINATORS, init_bb, entry_bb);
6886 set_immediate_dominator (CDI_DOMINATORS, l1_bb, init_bb);
6887 t = fold_build2 (MIN_EXPR, type, min_arg1, min_arg2);
6888 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, t);
6889 expand_omp_build_assign (&gsi, n2var, t);
6891 if (i + 2 == fd->collapse && altv)
6893 /* The vectorizer currently punts on loops with non-constant
6894 steps for the main IV (can't compute number of iterations
6895 and gives up because of that). As for OpenMP loops it is
6896 always possible to compute the number of iterations upfront,
6897 use an alternate IV as the loop iterator. */
6898 expand_omp_build_assign (&gsi, altv,
6899 build_zero_cst (TREE_TYPE (altv)));
6900 tree itype = TREE_TYPE (fd->loops[i + 1].v);
6901 if (POINTER_TYPE_P (itype))
6902 itype = signed_type_for (itype);
6903 t = build_int_cst (itype, (fd->loops[i + 1].cond_code == LT_EXPR
6904 ? -1 : 1));
6905 t = fold_build2 (PLUS_EXPR, itype,
6906 fold_convert (itype, fd->loops[i + 1].step), t);
6907 t = fold_build2 (PLUS_EXPR, itype, t,
6908 fold_convert (itype,
6909 fd->loops[i + 1].m2
6910 ? n2v : fd->loops[i + 1].n2));
6911 t = fold_build2 (MINUS_EXPR, itype, t,
6912 fold_convert (itype, fd->loops[i + 1].v));
6913 tree step = fold_convert (itype, fd->loops[i + 1].step);
6914 if (TYPE_UNSIGNED (itype)
6915 && fd->loops[i + 1].cond_code == GT_EXPR)
6916 t = fold_build2 (TRUNC_DIV_EXPR, itype,
6917 fold_build1 (NEGATE_EXPR, itype, t),
6918 fold_build1 (NEGATE_EXPR, itype, step));
6919 else
6920 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
6921 t = fold_convert (TREE_TYPE (altv), t);
6922 expand_omp_build_assign (&gsi, altn2, t);
6923 tree t2 = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
6924 fd->loops[i + 1].m2
6925 ? n2v : fd->loops[i + 1].n2);
6926 t2 = force_gimple_operand_gsi (&gsi, t2, true, NULL_TREE,
6927 true, GSI_SAME_STMT);
6928 t2 = fold_build2 (fd->loops[i + 1].cond_code, boolean_type_node,
6929 fd->loops[i + 1].v, t2);
6930 gassign *g
6931 = gimple_build_assign (altn2, COND_EXPR, t2, altn2,
6932 build_zero_cst (TREE_TYPE (altv)));
6933 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
6935 n2v = nextn2v;
6937 make_edge (init_bb, last_bb, EDGE_FALLTHRU);
6938 if (!gimple_omp_for_combined_into_p (fd->for_stmt))
6940 e = find_edge (entry_bb, last_bb);
6941 redirect_edge_succ (e, bb);
6942 set_immediate_dominator (CDI_DOMINATORS, bb, entry_bb);
6943 set_immediate_dominator (CDI_DOMINATORS, last_bb, init_bb);
6946 last_bb = bb;
6949 if (!broken_loop)
6951 class loop *loop = alloc_loop ();
6952 loop->header = l1_bb;
6953 loop->latch = cont_bb;
6954 add_loop (loop, l1_bb->loop_father);
6955 loop->safelen = safelen_int;
6956 if (simduid)
6958 loop->simduid = OMP_CLAUSE__SIMDUID__DECL (simduid);
6959 cfun->has_simduid_loops = true;
6961 /* If not -fno-tree-loop-vectorize, hint that we want to vectorize
6962 the loop. */
6963 if ((flag_tree_loop_vectorize
6964 || !OPTION_SET_P (flag_tree_loop_vectorize))
6965 && flag_tree_loop_optimize
6966 && loop->safelen > 1)
6968 loop->force_vectorize = true;
6969 if (simdlen && tree_fits_uhwi_p (OMP_CLAUSE_SIMDLEN_EXPR (simdlen)))
6971 unsigned HOST_WIDE_INT v
6972 = tree_to_uhwi (OMP_CLAUSE_SIMDLEN_EXPR (simdlen));
6973 if (v < INT_MAX && v <= (unsigned HOST_WIDE_INT) loop->safelen)
6974 loop->simdlen = v;
6976 cfun->has_force_vectorize_loops = true;
6978 else if (dont_vectorize)
6979 loop->dont_vectorize = true;
6981 else if (simduid)
6982 cfun->has_simduid_loops = true;
6985 /* Taskloop construct is represented after gimplification with
6986 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
6987 in between them. This routine expands the outer GIMPLE_OMP_FOR,
6988 which should just compute all the needed loop temporaries
6989 for GIMPLE_OMP_TASK. */
6991 static void
6992 expand_omp_taskloop_for_outer (struct omp_region *region,
6993 struct omp_for_data *fd,
6994 gimple *inner_stmt)
6996 tree type, bias = NULL_TREE;
6997 basic_block entry_bb, cont_bb, exit_bb;
6998 gimple_stmt_iterator gsi;
6999 gassign *assign_stmt;
7000 tree *counts = NULL;
7001 int i;
7003 gcc_assert (inner_stmt);
7004 gcc_assert (region->cont);
7005 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_TASK
7006 && gimple_omp_task_taskloop_p (inner_stmt));
7007 type = TREE_TYPE (fd->loop.v);
7009 /* See if we need to bias by LLONG_MIN. */
7010 if (fd->iter_type == long_long_unsigned_type_node
7011 && TREE_CODE (type) == INTEGER_TYPE
7012 && !TYPE_UNSIGNED (type))
7014 tree n1, n2;
7016 if (fd->loop.cond_code == LT_EXPR)
7018 n1 = fd->loop.n1;
7019 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
7021 else
7023 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
7024 n2 = fd->loop.n1;
7026 if (TREE_CODE (n1) != INTEGER_CST
7027 || TREE_CODE (n2) != INTEGER_CST
7028 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
7029 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
7032 entry_bb = region->entry;
7033 cont_bb = region->cont;
7034 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
7035 gcc_assert (BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
7036 exit_bb = region->exit;
7038 gsi = gsi_last_nondebug_bb (entry_bb);
7039 gimple *for_stmt = gsi_stmt (gsi);
7040 gcc_assert (gimple_code (for_stmt) == GIMPLE_OMP_FOR);
7041 if (fd->collapse > 1)
7043 int first_zero_iter = -1, dummy = -1;
7044 basic_block zero_iter_bb = NULL, dummy_bb = NULL, l2_dom_bb = NULL;
7046 counts = XALLOCAVEC (tree, fd->collapse);
7047 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
7048 zero_iter_bb, first_zero_iter,
7049 dummy_bb, dummy, l2_dom_bb);
7051 if (zero_iter_bb)
7053 /* Some counts[i] vars might be uninitialized if
7054 some loop has zero iterations. But the body shouldn't
7055 be executed in that case, so just avoid uninit warnings. */
7056 for (i = first_zero_iter; i < fd->collapse; i++)
7057 if (SSA_VAR_P (counts[i]))
7058 suppress_warning (counts[i], OPT_Wuninitialized);
7059 gsi_prev (&gsi);
7060 edge e = split_block (entry_bb, gsi_stmt (gsi));
7061 entry_bb = e->dest;
7062 make_edge (zero_iter_bb, entry_bb, EDGE_FALLTHRU);
7063 gsi = gsi_last_bb (entry_bb);
7064 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
7065 get_immediate_dominator (CDI_DOMINATORS,
7066 zero_iter_bb));
7070 tree t0, t1;
7071 t1 = fd->loop.n2;
7072 t0 = fd->loop.n1;
7073 if (POINTER_TYPE_P (TREE_TYPE (t0))
7074 && TYPE_PRECISION (TREE_TYPE (t0))
7075 != TYPE_PRECISION (fd->iter_type))
7077 /* Avoid casting pointers to integer of a different size. */
7078 tree itype = signed_type_for (type);
7079 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
7080 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
7082 else
7084 t1 = fold_convert (fd->iter_type, t1);
7085 t0 = fold_convert (fd->iter_type, t0);
7087 if (bias)
7089 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
7090 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
7093 tree innerc = omp_find_clause (gimple_omp_task_clauses (inner_stmt),
7094 OMP_CLAUSE__LOOPTEMP_);
7095 gcc_assert (innerc);
7096 tree startvar = OMP_CLAUSE_DECL (innerc);
7097 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
7098 gcc_assert (innerc);
7099 tree endvar = OMP_CLAUSE_DECL (innerc);
7100 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
7102 innerc = find_lastprivate_looptemp (fd, innerc);
7103 if (innerc)
7105 /* If needed (inner taskloop has lastprivate clause), propagate
7106 down the total number of iterations. */
7107 tree t = force_gimple_operand_gsi (&gsi, fd->loop.n2, false,
7108 NULL_TREE, false,
7109 GSI_CONTINUE_LINKING);
7110 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
7111 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7115 t0 = force_gimple_operand_gsi (&gsi, t0, false, NULL_TREE, false,
7116 GSI_CONTINUE_LINKING);
7117 assign_stmt = gimple_build_assign (startvar, t0);
7118 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7120 t1 = force_gimple_operand_gsi (&gsi, t1, false, NULL_TREE, false,
7121 GSI_CONTINUE_LINKING);
7122 assign_stmt = gimple_build_assign (endvar, t1);
7123 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7124 if (fd->collapse > 1)
7125 expand_omp_for_init_vars (fd, &gsi, counts, NULL, inner_stmt, startvar);
7127 /* Remove the GIMPLE_OMP_FOR statement. */
7128 gsi = gsi_for_stmt (for_stmt);
7129 gsi_remove (&gsi, true);
7131 gsi = gsi_last_nondebug_bb (cont_bb);
7132 gsi_remove (&gsi, true);
7134 gsi = gsi_last_nondebug_bb (exit_bb);
7135 gsi_remove (&gsi, true);
7137 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
7138 remove_edge (BRANCH_EDGE (entry_bb));
7139 FALLTHRU_EDGE (cont_bb)->probability = profile_probability::always ();
7140 remove_edge (BRANCH_EDGE (cont_bb));
7141 set_immediate_dominator (CDI_DOMINATORS, exit_bb, cont_bb);
7142 set_immediate_dominator (CDI_DOMINATORS, region->entry,
7143 recompute_dominator (CDI_DOMINATORS, region->entry));
7146 /* Taskloop construct is represented after gimplification with
7147 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
7148 in between them. This routine expands the inner GIMPLE_OMP_FOR.
7149 GOMP_taskloop{,_ull} function arranges for each task to be given just
7150 a single range of iterations. */
7152 static void
7153 expand_omp_taskloop_for_inner (struct omp_region *region,
7154 struct omp_for_data *fd,
7155 gimple *inner_stmt)
7157 tree e, t, type, itype, vmain, vback, bias = NULL_TREE;
7158 basic_block entry_bb, exit_bb, body_bb, cont_bb, collapse_bb = NULL;
7159 basic_block fin_bb;
7160 gimple_stmt_iterator gsi;
7161 edge ep;
7162 bool broken_loop = region->cont == NULL;
7163 tree *counts = NULL;
7164 tree n1, n2, step;
7166 itype = type = TREE_TYPE (fd->loop.v);
7167 if (POINTER_TYPE_P (type))
7168 itype = signed_type_for (type);
7170 /* See if we need to bias by LLONG_MIN. */
7171 if (fd->iter_type == long_long_unsigned_type_node
7172 && TREE_CODE (type) == INTEGER_TYPE
7173 && !TYPE_UNSIGNED (type))
7175 tree n1, n2;
7177 if (fd->loop.cond_code == LT_EXPR)
7179 n1 = fd->loop.n1;
7180 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
7182 else
7184 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
7185 n2 = fd->loop.n1;
7187 if (TREE_CODE (n1) != INTEGER_CST
7188 || TREE_CODE (n2) != INTEGER_CST
7189 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
7190 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
7193 entry_bb = region->entry;
7194 cont_bb = region->cont;
7195 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
7196 fin_bb = BRANCH_EDGE (entry_bb)->dest;
7197 gcc_assert (broken_loop
7198 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
7199 body_bb = FALLTHRU_EDGE (entry_bb)->dest;
7200 if (!broken_loop)
7202 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb);
7203 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
7205 exit_bb = region->exit;
7207 /* Iteration space partitioning goes in ENTRY_BB. */
7208 gsi = gsi_last_nondebug_bb (entry_bb);
7209 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
7211 if (fd->collapse > 1)
7213 int first_zero_iter = -1, dummy = -1;
7214 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
7216 counts = XALLOCAVEC (tree, fd->collapse);
7217 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
7218 fin_bb, first_zero_iter,
7219 dummy_bb, dummy, l2_dom_bb);
7220 t = NULL_TREE;
7222 else
7223 t = integer_one_node;
7225 step = fd->loop.step;
7226 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
7227 OMP_CLAUSE__LOOPTEMP_);
7228 gcc_assert (innerc);
7229 n1 = OMP_CLAUSE_DECL (innerc);
7230 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
7231 gcc_assert (innerc);
7232 n2 = OMP_CLAUSE_DECL (innerc);
7233 if (bias)
7235 n1 = fold_build2 (PLUS_EXPR, fd->iter_type, n1, bias);
7236 n2 = fold_build2 (PLUS_EXPR, fd->iter_type, n2, bias);
7238 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
7239 true, NULL_TREE, true, GSI_SAME_STMT);
7240 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
7241 true, NULL_TREE, true, GSI_SAME_STMT);
7242 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
7243 true, NULL_TREE, true, GSI_SAME_STMT);
7245 tree startvar = fd->loop.v;
7246 tree endvar = NULL_TREE;
7248 if (gimple_omp_for_combined_p (fd->for_stmt))
7250 tree clauses = gimple_omp_for_clauses (inner_stmt);
7251 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
7252 gcc_assert (innerc);
7253 startvar = OMP_CLAUSE_DECL (innerc);
7254 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
7255 OMP_CLAUSE__LOOPTEMP_);
7256 gcc_assert (innerc);
7257 endvar = OMP_CLAUSE_DECL (innerc);
7259 t = fold_convert (TREE_TYPE (startvar), n1);
7260 t = force_gimple_operand_gsi (&gsi, t,
7261 DECL_P (startvar)
7262 && TREE_ADDRESSABLE (startvar),
7263 NULL_TREE, false, GSI_CONTINUE_LINKING);
7264 gimple *assign_stmt = gimple_build_assign (startvar, t);
7265 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7267 t = fold_convert (TREE_TYPE (startvar), n2);
7268 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
7269 false, GSI_CONTINUE_LINKING);
7270 if (endvar)
7272 assign_stmt = gimple_build_assign (endvar, e);
7273 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7274 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
7275 assign_stmt = gimple_build_assign (fd->loop.v, e);
7276 else
7277 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
7278 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7281 tree *nonrect_bounds = NULL;
7282 if (fd->collapse > 1)
7284 if (fd->non_rect)
7286 nonrect_bounds = XALLOCAVEC (tree, fd->last_nonrect + 1);
7287 memset (nonrect_bounds, 0, sizeof (tree) * (fd->last_nonrect + 1));
7289 gcc_assert (gsi_bb (gsi) == entry_bb);
7290 expand_omp_for_init_vars (fd, &gsi, counts, nonrect_bounds, inner_stmt,
7291 startvar);
7292 entry_bb = gsi_bb (gsi);
7295 if (!broken_loop)
7297 /* The code controlling the sequential loop replaces the
7298 GIMPLE_OMP_CONTINUE. */
7299 gsi = gsi_last_nondebug_bb (cont_bb);
7300 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
7301 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
7302 vmain = gimple_omp_continue_control_use (cont_stmt);
7303 vback = gimple_omp_continue_control_def (cont_stmt);
7305 if (!gimple_omp_for_combined_p (fd->for_stmt))
7307 if (POINTER_TYPE_P (type))
7308 t = fold_build_pointer_plus (vmain, step);
7309 else
7310 t = fold_build2 (PLUS_EXPR, type, vmain, step);
7311 t = force_gimple_operand_gsi (&gsi, t,
7312 DECL_P (vback)
7313 && TREE_ADDRESSABLE (vback),
7314 NULL_TREE, true, GSI_SAME_STMT);
7315 assign_stmt = gimple_build_assign (vback, t);
7316 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
7318 t = build2 (fd->loop.cond_code, boolean_type_node,
7319 DECL_P (vback) && TREE_ADDRESSABLE (vback)
7320 ? t : vback, e);
7321 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
7324 /* Remove the GIMPLE_OMP_CONTINUE statement. */
7325 gsi_remove (&gsi, true);
7327 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
7328 collapse_bb = extract_omp_for_update_vars (fd, nonrect_bounds,
7329 cont_bb, body_bb);
7332 /* Remove the GIMPLE_OMP_FOR statement. */
7333 gsi = gsi_for_stmt (fd->for_stmt);
7334 gsi_remove (&gsi, true);
7336 /* Remove the GIMPLE_OMP_RETURN statement. */
7337 gsi = gsi_last_nondebug_bb (exit_bb);
7338 gsi_remove (&gsi, true);
7340 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
7341 if (!broken_loop)
7342 remove_edge (BRANCH_EDGE (entry_bb));
7343 else
7345 remove_edge_and_dominated_blocks (BRANCH_EDGE (entry_bb));
7346 region->outer->cont = NULL;
7349 /* Connect all the blocks. */
7350 if (!broken_loop)
7352 ep = find_edge (cont_bb, body_bb);
7353 if (gimple_omp_for_combined_p (fd->for_stmt))
7355 remove_edge (ep);
7356 ep = NULL;
7358 else if (fd->collapse > 1)
7360 remove_edge (ep);
7361 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
7363 else
7364 ep->flags = EDGE_TRUE_VALUE;
7365 find_edge (cont_bb, fin_bb)->flags
7366 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
7369 set_immediate_dominator (CDI_DOMINATORS, body_bb,
7370 recompute_dominator (CDI_DOMINATORS, body_bb));
7371 if (!broken_loop)
7372 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
7373 recompute_dominator (CDI_DOMINATORS, fin_bb));
7375 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
7377 class loop *loop = alloc_loop ();
7378 loop->header = body_bb;
7379 if (collapse_bb == NULL)
7380 loop->latch = cont_bb;
7381 add_loop (loop, body_bb->loop_father);
7385 /* A subroutine of expand_omp_for. Generate code for an OpenACC
7386 partitioned loop. The lowering here is abstracted, in that the
7387 loop parameters are passed through internal functions, which are
7388 further lowered by oacc_device_lower, once we get to the target
7389 compiler. The loop is of the form:
7391 for (V = B; V LTGT E; V += S) {BODY}
7393 where LTGT is < or >. We may have a specified chunking size, CHUNKING
7394 (constant 0 for no chunking) and we will have a GWV partitioning
7395 mask, specifying dimensions over which the loop is to be
7396 partitioned (see note below). We generate code that looks like
7397 (this ignores tiling):
7399 <entry_bb> [incoming FALL->body, BRANCH->exit]
7400 typedef signedintify (typeof (V)) T; // underlying signed integral type
7401 T range = E - B;
7402 T chunk_no = 0;
7403 T DIR = LTGT == '<' ? +1 : -1;
7404 T chunk_max = GOACC_LOOP_CHUNK (dir, range, S, CHUNK_SIZE, GWV);
7405 T step = GOACC_LOOP_STEP (dir, range, S, CHUNK_SIZE, GWV);
7407 <head_bb> [created by splitting end of entry_bb]
7408 T offset = GOACC_LOOP_OFFSET (dir, range, S, CHUNK_SIZE, GWV, chunk_no);
7409 T bound = GOACC_LOOP_BOUND (dir, range, S, CHUNK_SIZE, GWV, offset);
7410 if (!(offset LTGT bound)) goto bottom_bb;
7412 <body_bb> [incoming]
7413 V = B + offset;
7414 {BODY}
7416 <cont_bb> [incoming, may == body_bb FALL->exit_bb, BRANCH->body_bb]
7417 offset += step;
7418 if (offset LTGT bound) goto body_bb; [*]
7420 <bottom_bb> [created by splitting start of exit_bb] insert BRANCH->head_bb
7421 chunk_no++;
7422 if (chunk < chunk_max) goto head_bb;
7424 <exit_bb> [incoming]
7425 V = B + ((range -/+ 1) / S +/- 1) * S [*]
7427 [*] Needed if V live at end of loop. */
7429 static void
7430 expand_oacc_for (struct omp_region *region, struct omp_for_data *fd)
7432 bool is_oacc_kernels_parallelized
7433 = (lookup_attribute ("oacc kernels parallelized",
7434 DECL_ATTRIBUTES (current_function_decl)) != NULL);
7436 bool is_oacc_kernels
7437 = (lookup_attribute ("oacc kernels",
7438 DECL_ATTRIBUTES (current_function_decl)) != NULL);
7439 if (is_oacc_kernels_parallelized)
7440 gcc_checking_assert (is_oacc_kernels);
7442 gcc_assert (gimple_in_ssa_p (cfun) == is_oacc_kernels_parallelized);
7443 /* In the following, some of the 'gimple_in_ssa_p (cfun)' conditionals are
7444 for SSA specifics, and some are for 'parloops' OpenACC
7445 'kernels'-parallelized specifics. */
7447 tree v = fd->loop.v;
7448 enum tree_code cond_code = fd->loop.cond_code;
7449 enum tree_code plus_code = PLUS_EXPR;
7451 tree chunk_size = integer_minus_one_node;
7452 tree gwv = integer_zero_node;
7453 tree iter_type = TREE_TYPE (v);
7454 tree diff_type = iter_type;
7455 tree plus_type = iter_type;
7456 struct oacc_collapse *counts = NULL;
7458 gcc_checking_assert (gimple_omp_for_kind (fd->for_stmt)
7459 == GF_OMP_FOR_KIND_OACC_LOOP);
7460 gcc_assert (!gimple_omp_for_combined_into_p (fd->for_stmt));
7461 gcc_assert (cond_code == LT_EXPR || cond_code == GT_EXPR);
7463 if (POINTER_TYPE_P (iter_type))
7465 plus_code = POINTER_PLUS_EXPR;
7466 plus_type = sizetype;
7468 for (int ix = fd->collapse; ix--;)
7470 tree diff_type2 = TREE_TYPE (fd->loops[ix].step);
7471 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (diff_type2))
7472 diff_type = diff_type2;
7474 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
7475 diff_type = signed_type_for (diff_type);
7476 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node))
7477 diff_type = integer_type_node;
7479 basic_block entry_bb = region->entry; /* BB ending in OMP_FOR */
7480 basic_block exit_bb = region->exit; /* BB ending in OMP_RETURN */
7481 basic_block cont_bb = region->cont; /* BB ending in OMP_CONTINUE */
7482 basic_block bottom_bb = NULL;
7484 /* entry_bb has two successors; the branch edge is to the exit
7485 block, fallthrough edge to body. */
7486 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2
7487 && BRANCH_EDGE (entry_bb)->dest == exit_bb);
7489 /* If cont_bb non-NULL, it has 2 successors. The branch successor is
7490 body_bb, or to a block whose only successor is the body_bb. Its
7491 fallthrough successor is the final block (same as the branch
7492 successor of the entry_bb). */
7493 if (cont_bb)
7495 basic_block body_bb = FALLTHRU_EDGE (entry_bb)->dest;
7496 basic_block bed = BRANCH_EDGE (cont_bb)->dest;
7498 gcc_assert (FALLTHRU_EDGE (cont_bb)->dest == exit_bb);
7499 gcc_assert (bed == body_bb || single_succ_edge (bed)->dest == body_bb);
7501 else
7502 gcc_assert (!gimple_in_ssa_p (cfun));
7504 /* The exit block only has entry_bb and cont_bb as predecessors. */
7505 gcc_assert (EDGE_COUNT (exit_bb->preds) == 1 + (cont_bb != NULL));
7507 tree chunk_no;
7508 tree chunk_max = NULL_TREE;
7509 tree bound, offset;
7510 tree step = create_tmp_var (diff_type, ".step");
7511 bool up = cond_code == LT_EXPR;
7512 tree dir = build_int_cst (diff_type, up ? +1 : -1);
7513 bool chunking = !gimple_in_ssa_p (cfun);
7514 bool negating;
7516 /* Tiling vars. */
7517 tree tile_size = NULL_TREE;
7518 tree element_s = NULL_TREE;
7519 tree e_bound = NULL_TREE, e_offset = NULL_TREE, e_step = NULL_TREE;
7520 basic_block elem_body_bb = NULL;
7521 basic_block elem_cont_bb = NULL;
7523 /* SSA instances. */
7524 tree offset_incr = NULL_TREE;
7525 tree offset_init = NULL_TREE;
7527 gimple_stmt_iterator gsi;
7528 gassign *ass;
7529 gcall *call;
7530 gimple *stmt;
7531 tree expr;
7532 location_t loc;
7533 edge split, be, fte;
7535 /* Split the end of entry_bb to create head_bb. */
7536 split = split_block (entry_bb, last_stmt (entry_bb));
7537 basic_block head_bb = split->dest;
7538 entry_bb = split->src;
7540 /* Chunk setup goes at end of entry_bb, replacing the omp_for. */
7541 gsi = gsi_last_nondebug_bb (entry_bb);
7542 gomp_for *for_stmt = as_a <gomp_for *> (gsi_stmt (gsi));
7543 loc = gimple_location (for_stmt);
7545 if (gimple_in_ssa_p (cfun))
7547 offset_init = gimple_omp_for_index (for_stmt, 0);
7548 gcc_assert (integer_zerop (fd->loop.n1));
7549 /* The SSA parallelizer does gang parallelism. */
7550 gwv = build_int_cst (integer_type_node, GOMP_DIM_MASK (GOMP_DIM_GANG));
7553 if (fd->collapse > 1 || fd->tiling)
7555 gcc_assert (!gimple_in_ssa_p (cfun) && up);
7556 counts = XALLOCAVEC (struct oacc_collapse, fd->collapse);
7557 tree total = expand_oacc_collapse_init (fd, &gsi, counts, diff_type,
7558 TREE_TYPE (fd->loop.n2), loc);
7560 if (SSA_VAR_P (fd->loop.n2))
7562 total = force_gimple_operand_gsi (&gsi, total, false, NULL_TREE,
7563 true, GSI_SAME_STMT);
7564 ass = gimple_build_assign (fd->loop.n2, total);
7565 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7569 tree b = fd->loop.n1;
7570 tree e = fd->loop.n2;
7571 tree s = fd->loop.step;
7573 b = force_gimple_operand_gsi (&gsi, b, true, NULL_TREE, true, GSI_SAME_STMT);
7574 e = force_gimple_operand_gsi (&gsi, e, true, NULL_TREE, true, GSI_SAME_STMT);
7576 /* Convert the step, avoiding possible unsigned->signed overflow. */
7577 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
7578 if (negating)
7579 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
7580 s = fold_convert (diff_type, s);
7581 if (negating)
7582 s = fold_build1 (NEGATE_EXPR, diff_type, s);
7583 s = force_gimple_operand_gsi (&gsi, s, true, NULL_TREE, true, GSI_SAME_STMT);
7585 if (!chunking)
7586 chunk_size = integer_zero_node;
7587 expr = fold_convert (diff_type, chunk_size);
7588 chunk_size = force_gimple_operand_gsi (&gsi, expr, true,
7589 NULL_TREE, true, GSI_SAME_STMT);
7591 if (fd->tiling)
7593 /* Determine the tile size and element step,
7594 modify the outer loop step size. */
7595 tile_size = create_tmp_var (diff_type, ".tile_size");
7596 expr = build_int_cst (diff_type, 1);
7597 for (int ix = 0; ix < fd->collapse; ix++)
7598 expr = fold_build2 (MULT_EXPR, diff_type, counts[ix].tile, expr);
7599 expr = force_gimple_operand_gsi (&gsi, expr, true,
7600 NULL_TREE, true, GSI_SAME_STMT);
7601 ass = gimple_build_assign (tile_size, expr);
7602 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7604 element_s = create_tmp_var (diff_type, ".element_s");
7605 ass = gimple_build_assign (element_s, s);
7606 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7608 expr = fold_build2 (MULT_EXPR, diff_type, s, tile_size);
7609 s = force_gimple_operand_gsi (&gsi, expr, true,
7610 NULL_TREE, true, GSI_SAME_STMT);
7613 /* Determine the range, avoiding possible unsigned->signed overflow. */
7614 negating = !up && TYPE_UNSIGNED (iter_type);
7615 expr = fold_build2 (MINUS_EXPR, plus_type,
7616 fold_convert (plus_type, negating ? b : e),
7617 fold_convert (plus_type, negating ? e : b));
7618 expr = fold_convert (diff_type, expr);
7619 if (negating)
7620 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
7621 tree range = force_gimple_operand_gsi (&gsi, expr, true,
7622 NULL_TREE, true, GSI_SAME_STMT);
7624 chunk_no = build_int_cst (diff_type, 0);
7625 if (chunking)
7627 gcc_assert (!gimple_in_ssa_p (cfun));
7629 expr = chunk_no;
7630 chunk_max = create_tmp_var (diff_type, ".chunk_max");
7631 chunk_no = create_tmp_var (diff_type, ".chunk_no");
7633 ass = gimple_build_assign (chunk_no, expr);
7634 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7636 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
7637 build_int_cst (integer_type_node,
7638 IFN_GOACC_LOOP_CHUNKS),
7639 dir, range, s, chunk_size, gwv);
7640 gimple_call_set_lhs (call, chunk_max);
7641 gimple_set_location (call, loc);
7642 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7644 else
7645 chunk_size = chunk_no;
7647 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
7648 build_int_cst (integer_type_node,
7649 IFN_GOACC_LOOP_STEP),
7650 dir, range, s, chunk_size, gwv);
7651 gimple_call_set_lhs (call, step);
7652 gimple_set_location (call, loc);
7653 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7655 /* Remove the GIMPLE_OMP_FOR. */
7656 gsi_remove (&gsi, true);
7658 /* Fixup edges from head_bb. */
7659 be = BRANCH_EDGE (head_bb);
7660 fte = FALLTHRU_EDGE (head_bb);
7661 be->flags |= EDGE_FALSE_VALUE;
7662 fte->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
7664 basic_block body_bb = fte->dest;
7666 if (gimple_in_ssa_p (cfun))
7668 gsi = gsi_last_nondebug_bb (cont_bb);
7669 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
7671 offset = gimple_omp_continue_control_use (cont_stmt);
7672 offset_incr = gimple_omp_continue_control_def (cont_stmt);
7674 else
7676 offset = create_tmp_var (diff_type, ".offset");
7677 offset_init = offset_incr = offset;
7679 bound = create_tmp_var (TREE_TYPE (offset), ".bound");
7681 /* Loop offset & bound go into head_bb. */
7682 gsi = gsi_start_bb (head_bb);
7684 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
7685 build_int_cst (integer_type_node,
7686 IFN_GOACC_LOOP_OFFSET),
7687 dir, range, s,
7688 chunk_size, gwv, chunk_no);
7689 gimple_call_set_lhs (call, offset_init);
7690 gimple_set_location (call, loc);
7691 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
7693 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
7694 build_int_cst (integer_type_node,
7695 IFN_GOACC_LOOP_BOUND),
7696 dir, range, s,
7697 chunk_size, gwv, offset_init);
7698 gimple_call_set_lhs (call, bound);
7699 gimple_set_location (call, loc);
7700 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
7702 expr = build2 (cond_code, boolean_type_node, offset_init, bound);
7703 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
7704 GSI_CONTINUE_LINKING);
7706 /* V assignment goes into body_bb. */
7707 if (!gimple_in_ssa_p (cfun))
7709 gsi = gsi_start_bb (body_bb);
7711 expr = build2 (plus_code, iter_type, b,
7712 fold_convert (plus_type, offset));
7713 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7714 true, GSI_SAME_STMT);
7715 ass = gimple_build_assign (v, expr);
7716 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7718 if (fd->collapse > 1 || fd->tiling)
7719 expand_oacc_collapse_vars (fd, false, &gsi, counts, v, diff_type);
7721 if (fd->tiling)
7723 /* Determine the range of the element loop -- usually simply
7724 the tile_size, but could be smaller if the final
7725 iteration of the outer loop is a partial tile. */
7726 tree e_range = create_tmp_var (diff_type, ".e_range");
7728 expr = build2 (MIN_EXPR, diff_type,
7729 build2 (MINUS_EXPR, diff_type, bound, offset),
7730 build2 (MULT_EXPR, diff_type, tile_size,
7731 element_s));
7732 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7733 true, GSI_SAME_STMT);
7734 ass = gimple_build_assign (e_range, expr);
7735 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7737 /* Determine bound, offset & step of inner loop. */
7738 e_bound = create_tmp_var (diff_type, ".e_bound");
7739 e_offset = create_tmp_var (diff_type, ".e_offset");
7740 e_step = create_tmp_var (diff_type, ".e_step");
7742 /* Mark these as element loops. */
7743 tree t, e_gwv = integer_minus_one_node;
7744 tree chunk = build_int_cst (diff_type, 0); /* Never chunked. */
7746 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_OFFSET);
7747 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
7748 element_s, chunk, e_gwv, chunk);
7749 gimple_call_set_lhs (call, e_offset);
7750 gimple_set_location (call, loc);
7751 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7753 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_BOUND);
7754 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
7755 element_s, chunk, e_gwv, e_offset);
7756 gimple_call_set_lhs (call, e_bound);
7757 gimple_set_location (call, loc);
7758 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7760 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_STEP);
7761 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6, t, dir, e_range,
7762 element_s, chunk, e_gwv);
7763 gimple_call_set_lhs (call, e_step);
7764 gimple_set_location (call, loc);
7765 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7767 /* Add test and split block. */
7768 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
7769 stmt = gimple_build_cond_empty (expr);
7770 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
7771 split = split_block (body_bb, stmt);
7772 elem_body_bb = split->dest;
7773 if (cont_bb == body_bb)
7774 cont_bb = elem_body_bb;
7775 body_bb = split->src;
7777 split->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
7779 /* Add a dummy exit for the tiled block when cont_bb is missing. */
7780 if (cont_bb == NULL)
7782 edge e = make_edge (body_bb, exit_bb, EDGE_FALSE_VALUE);
7783 e->probability = profile_probability::even ();
7784 split->probability = profile_probability::even ();
7787 /* Initialize the user's loop vars. */
7788 gsi = gsi_start_bb (elem_body_bb);
7789 expand_oacc_collapse_vars (fd, true, &gsi, counts, e_offset,
7790 diff_type);
7794 /* Loop increment goes into cont_bb. If this is not a loop, we
7795 will have spawned threads as if it was, and each one will
7796 execute one iteration. The specification is not explicit about
7797 whether such constructs are ill-formed or not, and they can
7798 occur, especially when noreturn routines are involved. */
7799 if (cont_bb)
7801 gsi = gsi_last_nondebug_bb (cont_bb);
7802 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
7803 loc = gimple_location (cont_stmt);
7805 if (fd->tiling)
7807 /* Insert element loop increment and test. */
7808 expr = build2 (PLUS_EXPR, diff_type, e_offset, e_step);
7809 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7810 true, GSI_SAME_STMT);
7811 ass = gimple_build_assign (e_offset, expr);
7812 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7813 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
7815 stmt = gimple_build_cond_empty (expr);
7816 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
7817 split = split_block (cont_bb, stmt);
7818 elem_cont_bb = split->src;
7819 cont_bb = split->dest;
7821 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
7822 split->probability = profile_probability::unlikely ().guessed ();
7823 edge latch_edge
7824 = make_edge (elem_cont_bb, elem_body_bb, EDGE_TRUE_VALUE);
7825 latch_edge->probability = profile_probability::likely ().guessed ();
7827 edge skip_edge = make_edge (body_bb, cont_bb, EDGE_FALSE_VALUE);
7828 skip_edge->probability = profile_probability::unlikely ().guessed ();
7829 edge loop_entry_edge = EDGE_SUCC (body_bb, 1 - skip_edge->dest_idx);
7830 loop_entry_edge->probability
7831 = profile_probability::likely ().guessed ();
7833 gsi = gsi_for_stmt (cont_stmt);
7836 /* Increment offset. */
7837 if (gimple_in_ssa_p (cfun))
7838 expr = build2 (plus_code, iter_type, offset,
7839 fold_convert (plus_type, step));
7840 else
7841 expr = build2 (PLUS_EXPR, diff_type, offset, step);
7842 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7843 true, GSI_SAME_STMT);
7844 ass = gimple_build_assign (offset_incr, expr);
7845 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7846 expr = build2 (cond_code, boolean_type_node, offset_incr, bound);
7847 gsi_insert_before (&gsi, gimple_build_cond_empty (expr), GSI_SAME_STMT);
7849 /* Remove the GIMPLE_OMP_CONTINUE. */
7850 gsi_remove (&gsi, true);
7852 /* Fixup edges from cont_bb. */
7853 be = BRANCH_EDGE (cont_bb);
7854 fte = FALLTHRU_EDGE (cont_bb);
7855 be->flags |= EDGE_TRUE_VALUE;
7856 fte->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
7858 if (chunking)
7860 /* Split the beginning of exit_bb to make bottom_bb. We
7861 need to insert a nop at the start, because splitting is
7862 after a stmt, not before. */
7863 gsi = gsi_start_bb (exit_bb);
7864 stmt = gimple_build_nop ();
7865 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
7866 split = split_block (exit_bb, stmt);
7867 bottom_bb = split->src;
7868 exit_bb = split->dest;
7869 gsi = gsi_last_bb (bottom_bb);
7871 /* Chunk increment and test goes into bottom_bb. */
7872 expr = build2 (PLUS_EXPR, diff_type, chunk_no,
7873 build_int_cst (diff_type, 1));
7874 ass = gimple_build_assign (chunk_no, expr);
7875 gsi_insert_after (&gsi, ass, GSI_CONTINUE_LINKING);
7877 /* Chunk test at end of bottom_bb. */
7878 expr = build2 (LT_EXPR, boolean_type_node, chunk_no, chunk_max);
7879 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
7880 GSI_CONTINUE_LINKING);
7882 /* Fixup edges from bottom_bb. */
7883 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
7884 split->probability = profile_probability::unlikely ().guessed ();
7885 edge latch_edge = make_edge (bottom_bb, head_bb, EDGE_TRUE_VALUE);
7886 latch_edge->probability = profile_probability::likely ().guessed ();
7890 gsi = gsi_last_nondebug_bb (exit_bb);
7891 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7892 loc = gimple_location (gsi_stmt (gsi));
7894 if (!gimple_in_ssa_p (cfun))
7896 /* Insert the final value of V, in case it is live. This is the
7897 value for the only thread that survives past the join. */
7898 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
7899 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
7900 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
7901 expr = fold_build2 (MULT_EXPR, diff_type, expr, s);
7902 expr = build2 (plus_code, iter_type, b, fold_convert (plus_type, expr));
7903 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7904 true, GSI_SAME_STMT);
7905 ass = gimple_build_assign (v, expr);
7906 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7909 /* Remove the OMP_RETURN. */
7910 gsi_remove (&gsi, true);
7912 if (cont_bb)
7914 /* We now have one, two or three nested loops. Update the loop
7915 structures. */
7916 class loop *parent = entry_bb->loop_father;
7917 class loop *body = body_bb->loop_father;
7919 if (chunking)
7921 class loop *chunk_loop = alloc_loop ();
7922 chunk_loop->header = head_bb;
7923 chunk_loop->latch = bottom_bb;
7924 add_loop (chunk_loop, parent);
7925 parent = chunk_loop;
7927 else if (parent != body)
7929 gcc_assert (body->header == body_bb);
7930 gcc_assert (body->latch == cont_bb
7931 || single_pred (body->latch) == cont_bb);
7932 parent = NULL;
7935 if (parent)
7937 class loop *body_loop = alloc_loop ();
7938 body_loop->header = body_bb;
7939 body_loop->latch = cont_bb;
7940 add_loop (body_loop, parent);
7942 if (fd->tiling)
7944 /* Insert tiling's element loop. */
7945 class loop *inner_loop = alloc_loop ();
7946 inner_loop->header = elem_body_bb;
7947 inner_loop->latch = elem_cont_bb;
7948 add_loop (inner_loop, body_loop);
7954 /* Expand the OMP loop defined by REGION. */
7956 static void
7957 expand_omp_for (struct omp_region *region, gimple *inner_stmt)
7959 struct omp_for_data fd;
7960 struct omp_for_data_loop *loops;
7962 loops = XALLOCAVEC (struct omp_for_data_loop,
7963 gimple_omp_for_collapse (last_stmt (region->entry)));
7964 omp_extract_for_data (as_a <gomp_for *> (last_stmt (region->entry)),
7965 &fd, loops);
7966 region->sched_kind = fd.sched_kind;
7967 region->sched_modifiers = fd.sched_modifiers;
7968 region->has_lastprivate_conditional = fd.lastprivate_conditional != 0;
7969 if (fd.non_rect && !gimple_omp_for_combined_into_p (fd.for_stmt))
7971 for (int i = fd.first_nonrect; i <= fd.last_nonrect; i++)
7972 if ((loops[i].m1 || loops[i].m2)
7973 && (loops[i].m1 == NULL_TREE
7974 || TREE_CODE (loops[i].m1) == INTEGER_CST)
7975 && (loops[i].m2 == NULL_TREE
7976 || TREE_CODE (loops[i].m2) == INTEGER_CST)
7977 && TREE_CODE (loops[i].step) == INTEGER_CST
7978 && TREE_CODE (loops[i - loops[i].outer].step) == INTEGER_CST)
7980 tree t;
7981 tree itype = TREE_TYPE (loops[i].v);
7982 if (loops[i].m1 && loops[i].m2)
7983 t = fold_build2 (MINUS_EXPR, itype, loops[i].m2, loops[i].m1);
7984 else if (loops[i].m1)
7985 t = fold_build1 (NEGATE_EXPR, itype, loops[i].m1);
7986 else
7987 t = loops[i].m2;
7988 t = fold_build2 (MULT_EXPR, itype, t,
7989 fold_convert (itype,
7990 loops[i - loops[i].outer].step));
7991 if (TYPE_UNSIGNED (itype) && loops[i].cond_code == GT_EXPR)
7992 t = fold_build2 (TRUNC_MOD_EXPR, itype,
7993 fold_build1 (NEGATE_EXPR, itype, t),
7994 fold_build1 (NEGATE_EXPR, itype,
7995 fold_convert (itype,
7996 loops[i].step)));
7997 else
7998 t = fold_build2 (TRUNC_MOD_EXPR, itype, t,
7999 fold_convert (itype, loops[i].step));
8000 if (integer_nonzerop (t))
8001 error_at (gimple_location (fd.for_stmt),
8002 "invalid OpenMP non-rectangular loop step; "
8003 "%<(%E - %E) * %E%> is not a multiple of loop %d "
8004 "step %qE",
8005 loops[i].m2 ? loops[i].m2 : integer_zero_node,
8006 loops[i].m1 ? loops[i].m1 : integer_zero_node,
8007 loops[i - loops[i].outer].step, i + 1,
8008 loops[i].step);
8012 gcc_assert (EDGE_COUNT (region->entry->succs) == 2);
8013 BRANCH_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
8014 FALLTHRU_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
8015 if (region->cont)
8017 gcc_assert (EDGE_COUNT (region->cont->succs) == 2);
8018 BRANCH_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
8019 FALLTHRU_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
8021 else
8022 /* If there isn't a continue then this is a degerate case where
8023 the introduction of abnormal edges during lowering will prevent
8024 original loops from being detected. Fix that up. */
8025 loops_state_set (LOOPS_NEED_FIXUP);
8027 if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_SIMD)
8028 expand_omp_simd (region, &fd);
8029 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP)
8031 gcc_assert (!inner_stmt && !fd.non_rect);
8032 expand_oacc_for (region, &fd);
8034 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_TASKLOOP)
8036 if (gimple_omp_for_combined_into_p (fd.for_stmt))
8037 expand_omp_taskloop_for_inner (region, &fd, inner_stmt);
8038 else
8039 expand_omp_taskloop_for_outer (region, &fd, inner_stmt);
8041 else if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC
8042 && !fd.have_ordered)
8044 if (fd.chunk_size == NULL)
8045 expand_omp_for_static_nochunk (region, &fd, inner_stmt);
8046 else
8047 expand_omp_for_static_chunk (region, &fd, inner_stmt);
8049 else
8051 int fn_index, start_ix, next_ix;
8052 unsigned HOST_WIDE_INT sched = 0;
8053 tree sched_arg = NULL_TREE;
8055 gcc_assert (gimple_omp_for_kind (fd.for_stmt)
8056 == GF_OMP_FOR_KIND_FOR && !fd.non_rect);
8057 if (fd.chunk_size == NULL
8058 && fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC)
8059 fd.chunk_size = integer_zero_node;
8060 switch (fd.sched_kind)
8062 case OMP_CLAUSE_SCHEDULE_RUNTIME:
8063 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0
8064 && fd.lastprivate_conditional == 0)
8066 gcc_assert (!fd.have_ordered);
8067 fn_index = 6;
8068 sched = 4;
8070 else if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
8071 && !fd.have_ordered
8072 && fd.lastprivate_conditional == 0)
8073 fn_index = 7;
8074 else
8076 fn_index = 3;
8077 sched = (HOST_WIDE_INT_1U << 31);
8079 break;
8080 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
8081 case OMP_CLAUSE_SCHEDULE_GUIDED:
8082 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
8083 && !fd.have_ordered
8084 && fd.lastprivate_conditional == 0)
8086 fn_index = 3 + fd.sched_kind;
8087 sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2;
8088 break;
8090 fn_index = fd.sched_kind;
8091 sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2;
8092 sched += (HOST_WIDE_INT_1U << 31);
8093 break;
8094 case OMP_CLAUSE_SCHEDULE_STATIC:
8095 gcc_assert (fd.have_ordered);
8096 fn_index = 0;
8097 sched = (HOST_WIDE_INT_1U << 31) + 1;
8098 break;
8099 default:
8100 gcc_unreachable ();
8102 if (!fd.ordered)
8103 fn_index += fd.have_ordered * 8;
8104 if (fd.ordered)
8105 start_ix = ((int)BUILT_IN_GOMP_LOOP_DOACROSS_STATIC_START) + fn_index;
8106 else
8107 start_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_START) + fn_index;
8108 next_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_NEXT) + fn_index;
8109 if (fd.have_reductemp || fd.have_pointer_condtemp)
8111 if (fd.ordered)
8112 start_ix = (int)BUILT_IN_GOMP_LOOP_DOACROSS_START;
8113 else if (fd.have_ordered)
8114 start_ix = (int)BUILT_IN_GOMP_LOOP_ORDERED_START;
8115 else
8116 start_ix = (int)BUILT_IN_GOMP_LOOP_START;
8117 sched_arg = build_int_cstu (long_integer_type_node, sched);
8118 if (!fd.chunk_size)
8119 fd.chunk_size = integer_zero_node;
8121 if (fd.iter_type == long_long_unsigned_type_node)
8123 start_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_START
8124 - (int)BUILT_IN_GOMP_LOOP_STATIC_START);
8125 next_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT
8126 - (int)BUILT_IN_GOMP_LOOP_STATIC_NEXT);
8128 expand_omp_for_generic (region, &fd, (enum built_in_function) start_ix,
8129 (enum built_in_function) next_ix, sched_arg,
8130 inner_stmt);
8133 if (gimple_in_ssa_p (cfun))
8134 update_ssa (TODO_update_ssa_only_virtuals);
8137 /* Expand code for an OpenMP sections directive. In pseudo code, we generate
8139 v = GOMP_sections_start (n);
8141 switch (v)
8143 case 0:
8144 goto L2;
8145 case 1:
8146 section 1;
8147 goto L1;
8148 case 2:
8150 case n:
8152 default:
8153 abort ();
8156 v = GOMP_sections_next ();
8157 goto L0;
8159 reduction;
8161 If this is a combined parallel sections, replace the call to
8162 GOMP_sections_start with call to GOMP_sections_next. */
8164 static void
8165 expand_omp_sections (struct omp_region *region)
8167 tree t, u, vin = NULL, vmain, vnext, l2;
8168 unsigned len;
8169 basic_block entry_bb, l0_bb, l1_bb, l2_bb, default_bb;
8170 gimple_stmt_iterator si, switch_si;
8171 gomp_sections *sections_stmt;
8172 gimple *stmt;
8173 gomp_continue *cont;
8174 edge_iterator ei;
8175 edge e;
8176 struct omp_region *inner;
8177 unsigned i, casei;
8178 bool exit_reachable = region->cont != NULL;
8180 gcc_assert (region->exit != NULL);
8181 entry_bb = region->entry;
8182 l0_bb = single_succ (entry_bb);
8183 l1_bb = region->cont;
8184 l2_bb = region->exit;
8185 if (single_pred_p (l2_bb) && single_pred (l2_bb) == l0_bb)
8186 l2 = gimple_block_label (l2_bb);
8187 else
8189 /* This can happen if there are reductions. */
8190 len = EDGE_COUNT (l0_bb->succs);
8191 gcc_assert (len > 0);
8192 e = EDGE_SUCC (l0_bb, len - 1);
8193 si = gsi_last_nondebug_bb (e->dest);
8194 l2 = NULL_TREE;
8195 if (gsi_end_p (si)
8196 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
8197 l2 = gimple_block_label (e->dest);
8198 else
8199 FOR_EACH_EDGE (e, ei, l0_bb->succs)
8201 si = gsi_last_nondebug_bb (e->dest);
8202 if (gsi_end_p (si)
8203 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
8205 l2 = gimple_block_label (e->dest);
8206 break;
8210 if (exit_reachable)
8211 default_bb = create_empty_bb (l1_bb->prev_bb);
8212 else
8213 default_bb = create_empty_bb (l0_bb);
8215 /* We will build a switch() with enough cases for all the
8216 GIMPLE_OMP_SECTION regions, a '0' case to handle the end of more work
8217 and a default case to abort if something goes wrong. */
8218 len = EDGE_COUNT (l0_bb->succs);
8220 /* Use vec::quick_push on label_vec throughout, since we know the size
8221 in advance. */
8222 auto_vec<tree> label_vec (len);
8224 /* The call to GOMP_sections_start goes in ENTRY_BB, replacing the
8225 GIMPLE_OMP_SECTIONS statement. */
8226 si = gsi_last_nondebug_bb (entry_bb);
8227 sections_stmt = as_a <gomp_sections *> (gsi_stmt (si));
8228 gcc_assert (gimple_code (sections_stmt) == GIMPLE_OMP_SECTIONS);
8229 vin = gimple_omp_sections_control (sections_stmt);
8230 tree clauses = gimple_omp_sections_clauses (sections_stmt);
8231 tree reductmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
8232 tree condtmp = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
8233 tree cond_var = NULL_TREE;
8234 if (reductmp || condtmp)
8236 tree reductions = null_pointer_node, mem = null_pointer_node;
8237 tree memv = NULL_TREE, condtemp = NULL_TREE;
8238 gimple_stmt_iterator gsi = gsi_none ();
8239 gimple *g = NULL;
8240 if (reductmp)
8242 reductions = OMP_CLAUSE_DECL (reductmp);
8243 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
8244 g = SSA_NAME_DEF_STMT (reductions);
8245 reductions = gimple_assign_rhs1 (g);
8246 OMP_CLAUSE_DECL (reductmp) = reductions;
8247 gsi = gsi_for_stmt (g);
8249 else
8250 gsi = si;
8251 if (condtmp)
8253 condtemp = OMP_CLAUSE_DECL (condtmp);
8254 tree c = omp_find_clause (OMP_CLAUSE_CHAIN (condtmp),
8255 OMP_CLAUSE__CONDTEMP_);
8256 cond_var = OMP_CLAUSE_DECL (c);
8257 tree type = TREE_TYPE (condtemp);
8258 memv = create_tmp_var (type);
8259 TREE_ADDRESSABLE (memv) = 1;
8260 unsigned cnt = 0;
8261 for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
8262 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE
8263 && OMP_CLAUSE_LASTPRIVATE_CONDITIONAL (c))
8264 ++cnt;
8265 unsigned HOST_WIDE_INT sz
8266 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type))) * cnt;
8267 expand_omp_build_assign (&gsi, memv, build_int_cst (type, sz),
8268 false);
8269 mem = build_fold_addr_expr (memv);
8271 t = build_int_cst (unsigned_type_node, len - 1);
8272 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS2_START);
8273 stmt = gimple_build_call (u, 3, t, reductions, mem);
8274 gimple_call_set_lhs (stmt, vin);
8275 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
8276 if (condtmp)
8278 expand_omp_build_assign (&gsi, condtemp, memv, false);
8279 tree t = build2 (PLUS_EXPR, TREE_TYPE (cond_var),
8280 vin, build_one_cst (TREE_TYPE (cond_var)));
8281 expand_omp_build_assign (&gsi, cond_var, t, false);
8283 if (reductmp)
8285 gsi_remove (&gsi, true);
8286 release_ssa_name (gimple_assign_lhs (g));
8289 else if (!is_combined_parallel (region))
8291 /* If we are not inside a combined parallel+sections region,
8292 call GOMP_sections_start. */
8293 t = build_int_cst (unsigned_type_node, len - 1);
8294 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_START);
8295 stmt = gimple_build_call (u, 1, t);
8297 else
8299 /* Otherwise, call GOMP_sections_next. */
8300 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
8301 stmt = gimple_build_call (u, 0);
8303 if (!reductmp && !condtmp)
8305 gimple_call_set_lhs (stmt, vin);
8306 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
8308 gsi_remove (&si, true);
8310 /* The switch() statement replacing GIMPLE_OMP_SECTIONS_SWITCH goes in
8311 L0_BB. */
8312 switch_si = gsi_last_nondebug_bb (l0_bb);
8313 gcc_assert (gimple_code (gsi_stmt (switch_si)) == GIMPLE_OMP_SECTIONS_SWITCH);
8314 if (exit_reachable)
8316 cont = as_a <gomp_continue *> (last_stmt (l1_bb));
8317 gcc_assert (gimple_code (cont) == GIMPLE_OMP_CONTINUE);
8318 vmain = gimple_omp_continue_control_use (cont);
8319 vnext = gimple_omp_continue_control_def (cont);
8321 else
8323 vmain = vin;
8324 vnext = NULL_TREE;
8327 t = build_case_label (build_int_cst (unsigned_type_node, 0), NULL, l2);
8328 label_vec.quick_push (t);
8329 i = 1;
8331 /* Convert each GIMPLE_OMP_SECTION into a CASE_LABEL_EXPR. */
8332 for (inner = region->inner, casei = 1;
8333 inner;
8334 inner = inner->next, i++, casei++)
8336 basic_block s_entry_bb, s_exit_bb;
8338 /* Skip optional reduction region. */
8339 if (inner->type == GIMPLE_OMP_ATOMIC_LOAD)
8341 --i;
8342 --casei;
8343 continue;
8346 s_entry_bb = inner->entry;
8347 s_exit_bb = inner->exit;
8349 t = gimple_block_label (s_entry_bb);
8350 u = build_int_cst (unsigned_type_node, casei);
8351 u = build_case_label (u, NULL, t);
8352 label_vec.quick_push (u);
8354 si = gsi_last_nondebug_bb (s_entry_bb);
8355 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SECTION);
8356 gcc_assert (i < len || gimple_omp_section_last_p (gsi_stmt (si)));
8357 gsi_remove (&si, true);
8358 single_succ_edge (s_entry_bb)->flags = EDGE_FALLTHRU;
8360 if (s_exit_bb == NULL)
8361 continue;
8363 si = gsi_last_nondebug_bb (s_exit_bb);
8364 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
8365 gsi_remove (&si, true);
8367 single_succ_edge (s_exit_bb)->flags = EDGE_FALLTHRU;
8370 /* Error handling code goes in DEFAULT_BB. */
8371 t = gimple_block_label (default_bb);
8372 u = build_case_label (NULL, NULL, t);
8373 make_edge (l0_bb, default_bb, 0);
8374 add_bb_to_loop (default_bb, current_loops->tree_root);
8376 stmt = gimple_build_switch (vmain, u, label_vec);
8377 gsi_insert_after (&switch_si, stmt, GSI_SAME_STMT);
8378 gsi_remove (&switch_si, true);
8380 si = gsi_start_bb (default_bb);
8381 stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_TRAP), 0);
8382 gsi_insert_after (&si, stmt, GSI_CONTINUE_LINKING);
8384 if (exit_reachable)
8386 tree bfn_decl;
8388 /* Code to get the next section goes in L1_BB. */
8389 si = gsi_last_nondebug_bb (l1_bb);
8390 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CONTINUE);
8392 bfn_decl = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
8393 stmt = gimple_build_call (bfn_decl, 0);
8394 gimple_call_set_lhs (stmt, vnext);
8395 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
8396 if (cond_var)
8398 tree t = build2 (PLUS_EXPR, TREE_TYPE (cond_var),
8399 vnext, build_one_cst (TREE_TYPE (cond_var)));
8400 expand_omp_build_assign (&si, cond_var, t, false);
8402 gsi_remove (&si, true);
8404 single_succ_edge (l1_bb)->flags = EDGE_FALLTHRU;
8407 /* Cleanup function replaces GIMPLE_OMP_RETURN in EXIT_BB. */
8408 si = gsi_last_nondebug_bb (l2_bb);
8409 if (gimple_omp_return_nowait_p (gsi_stmt (si)))
8410 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_NOWAIT);
8411 else if (gimple_omp_return_lhs (gsi_stmt (si)))
8412 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_CANCEL);
8413 else
8414 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END);
8415 stmt = gimple_build_call (t, 0);
8416 if (gimple_omp_return_lhs (gsi_stmt (si)))
8417 gimple_call_set_lhs (stmt, gimple_omp_return_lhs (gsi_stmt (si)));
8418 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
8419 gsi_remove (&si, true);
8421 set_immediate_dominator (CDI_DOMINATORS, default_bb, l0_bb);
8424 /* Expand code for an OpenMP single or scope directive. We've already expanded
8425 much of the code, here we simply place the GOMP_barrier call. */
8427 static void
8428 expand_omp_single (struct omp_region *region)
8430 basic_block entry_bb, exit_bb;
8431 gimple_stmt_iterator si;
8433 entry_bb = region->entry;
8434 exit_bb = region->exit;
8436 si = gsi_last_nondebug_bb (entry_bb);
8437 enum gimple_code code = gimple_code (gsi_stmt (si));
8438 gcc_assert (code == GIMPLE_OMP_SINGLE || code == GIMPLE_OMP_SCOPE);
8439 gsi_remove (&si, true);
8440 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
8442 if (exit_bb == NULL)
8444 gcc_assert (code == GIMPLE_OMP_SCOPE);
8445 return;
8448 si = gsi_last_nondebug_bb (exit_bb);
8449 if (!gimple_omp_return_nowait_p (gsi_stmt (si)))
8451 tree t = gimple_omp_return_lhs (gsi_stmt (si));
8452 gsi_insert_after (&si, omp_build_barrier (t), GSI_SAME_STMT);
8454 gsi_remove (&si, true);
8455 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
8458 /* Generic expansion for OpenMP synchronization directives: master,
8459 ordered and critical. All we need to do here is remove the entry
8460 and exit markers for REGION. */
8462 static void
8463 expand_omp_synch (struct omp_region *region)
8465 basic_block entry_bb, exit_bb;
8466 gimple_stmt_iterator si;
8468 entry_bb = region->entry;
8469 exit_bb = region->exit;
8471 si = gsi_last_nondebug_bb (entry_bb);
8472 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE
8473 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASTER
8474 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASKED
8475 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TASKGROUP
8476 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ORDERED
8477 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CRITICAL
8478 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS);
8479 if (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS
8480 && gimple_omp_teams_host (as_a <gomp_teams *> (gsi_stmt (si))))
8482 expand_omp_taskreg (region);
8483 return;
8485 gsi_remove (&si, true);
8486 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
8488 if (exit_bb)
8490 si = gsi_last_nondebug_bb (exit_bb);
8491 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
8492 gsi_remove (&si, true);
8493 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
8497 /* Translate enum omp_memory_order to enum memmodel for the embedded
8498 fail clause in there. */
8500 static enum memmodel
8501 omp_memory_order_to_fail_memmodel (enum omp_memory_order mo)
8503 switch (mo & OMP_FAIL_MEMORY_ORDER_MASK)
8505 case OMP_FAIL_MEMORY_ORDER_UNSPECIFIED:
8506 switch (mo & OMP_MEMORY_ORDER_MASK)
8508 case OMP_MEMORY_ORDER_RELAXED: return MEMMODEL_RELAXED;
8509 case OMP_MEMORY_ORDER_ACQUIRE: return MEMMODEL_ACQUIRE;
8510 case OMP_MEMORY_ORDER_RELEASE: return MEMMODEL_RELAXED;
8511 case OMP_MEMORY_ORDER_ACQ_REL: return MEMMODEL_ACQUIRE;
8512 case OMP_MEMORY_ORDER_SEQ_CST: return MEMMODEL_SEQ_CST;
8513 default: break;
8515 gcc_unreachable ();
8516 case OMP_FAIL_MEMORY_ORDER_RELAXED: return MEMMODEL_RELAXED;
8517 case OMP_FAIL_MEMORY_ORDER_ACQUIRE: return MEMMODEL_ACQUIRE;
8518 case OMP_FAIL_MEMORY_ORDER_SEQ_CST: return MEMMODEL_SEQ_CST;
8519 default: gcc_unreachable ();
8523 /* Translate enum omp_memory_order to enum memmodel. The two enums
8524 are using different numbers so that OMP_MEMORY_ORDER_UNSPECIFIED
8525 is 0 and omp_memory_order has the fail mode encoded in it too. */
8527 static enum memmodel
8528 omp_memory_order_to_memmodel (enum omp_memory_order mo)
8530 enum memmodel ret, fail_ret;
8531 switch (mo & OMP_MEMORY_ORDER_MASK)
8533 case OMP_MEMORY_ORDER_RELAXED: ret = MEMMODEL_RELAXED; break;
8534 case OMP_MEMORY_ORDER_ACQUIRE: ret = MEMMODEL_ACQUIRE; break;
8535 case OMP_MEMORY_ORDER_RELEASE: ret = MEMMODEL_RELEASE; break;
8536 case OMP_MEMORY_ORDER_ACQ_REL: ret = MEMMODEL_ACQ_REL; break;
8537 case OMP_MEMORY_ORDER_SEQ_CST: ret = MEMMODEL_SEQ_CST; break;
8538 default: gcc_unreachable ();
8540 /* If we drop the -Winvalid-memory-model warning for C++17 P0418R2,
8541 we can just return ret here unconditionally. Otherwise, work around
8542 it here and make sure fail memmodel is not stronger. */
8543 if ((mo & OMP_FAIL_MEMORY_ORDER_MASK) == OMP_FAIL_MEMORY_ORDER_UNSPECIFIED)
8544 return ret;
8545 fail_ret = omp_memory_order_to_fail_memmodel (mo);
8546 if (fail_ret > ret)
8547 return fail_ret;
8548 return ret;
8551 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
8552 operation as a normal volatile load. */
8554 static bool
8555 expand_omp_atomic_load (basic_block load_bb, tree addr,
8556 tree loaded_val, int index)
8558 enum built_in_function tmpbase;
8559 gimple_stmt_iterator gsi;
8560 basic_block store_bb;
8561 location_t loc;
8562 gimple *stmt;
8563 tree decl, call, type, itype;
8565 gsi = gsi_last_nondebug_bb (load_bb);
8566 stmt = gsi_stmt (gsi);
8567 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
8568 loc = gimple_location (stmt);
8570 /* ??? If the target does not implement atomic_load_optab[mode], and mode
8571 is smaller than word size, then expand_atomic_load assumes that the load
8572 is atomic. We could avoid the builtin entirely in this case. */
8574 tmpbase = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
8575 decl = builtin_decl_explicit (tmpbase);
8576 if (decl == NULL_TREE)
8577 return false;
8579 type = TREE_TYPE (loaded_val);
8580 itype = TREE_TYPE (TREE_TYPE (decl));
8582 enum omp_memory_order omo = gimple_omp_atomic_memory_order (stmt);
8583 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
8584 call = build_call_expr_loc (loc, decl, 2, addr, mo);
8585 if (!useless_type_conversion_p (type, itype))
8586 call = fold_build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
8587 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
8589 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
8590 gsi_remove (&gsi, true);
8592 store_bb = single_succ (load_bb);
8593 gsi = gsi_last_nondebug_bb (store_bb);
8594 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
8595 gsi_remove (&gsi, true);
8597 if (gimple_in_ssa_p (cfun))
8598 update_ssa (TODO_update_ssa_no_phi);
8600 return true;
8603 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
8604 operation as a normal volatile store. */
8606 static bool
8607 expand_omp_atomic_store (basic_block load_bb, tree addr,
8608 tree loaded_val, tree stored_val, int index)
8610 enum built_in_function tmpbase;
8611 gimple_stmt_iterator gsi;
8612 basic_block store_bb = single_succ (load_bb);
8613 location_t loc;
8614 gimple *stmt;
8615 tree decl, call, type, itype;
8616 machine_mode imode;
8617 bool exchange;
8619 gsi = gsi_last_nondebug_bb (load_bb);
8620 stmt = gsi_stmt (gsi);
8621 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
8623 /* If the load value is needed, then this isn't a store but an exchange. */
8624 exchange = gimple_omp_atomic_need_value_p (stmt);
8626 gsi = gsi_last_nondebug_bb (store_bb);
8627 stmt = gsi_stmt (gsi);
8628 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_STORE);
8629 loc = gimple_location (stmt);
8631 /* ??? If the target does not implement atomic_store_optab[mode], and mode
8632 is smaller than word size, then expand_atomic_store assumes that the store
8633 is atomic. We could avoid the builtin entirely in this case. */
8635 tmpbase = (exchange ? BUILT_IN_ATOMIC_EXCHANGE_N : BUILT_IN_ATOMIC_STORE_N);
8636 tmpbase = (enum built_in_function) ((int) tmpbase + index + 1);
8637 decl = builtin_decl_explicit (tmpbase);
8638 if (decl == NULL_TREE)
8639 return false;
8641 type = TREE_TYPE (stored_val);
8643 /* Dig out the type of the function's second argument. */
8644 itype = TREE_TYPE (decl);
8645 itype = TYPE_ARG_TYPES (itype);
8646 itype = TREE_CHAIN (itype);
8647 itype = TREE_VALUE (itype);
8648 imode = TYPE_MODE (itype);
8650 if (exchange && !can_atomic_exchange_p (imode, true))
8651 return false;
8653 if (!useless_type_conversion_p (itype, type))
8654 stored_val = fold_build1_loc (loc, VIEW_CONVERT_EXPR, itype, stored_val);
8655 enum omp_memory_order omo = gimple_omp_atomic_memory_order (stmt);
8656 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
8657 call = build_call_expr_loc (loc, decl, 3, addr, stored_val, mo);
8658 if (exchange)
8660 if (!useless_type_conversion_p (type, itype))
8661 call = build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
8662 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
8665 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
8666 gsi_remove (&gsi, true);
8668 /* Remove the GIMPLE_OMP_ATOMIC_LOAD that we verified above. */
8669 gsi = gsi_last_nondebug_bb (load_bb);
8670 gsi_remove (&gsi, true);
8672 if (gimple_in_ssa_p (cfun))
8673 update_ssa (TODO_update_ssa_no_phi);
8675 return true;
8678 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
8679 operation as a __atomic_fetch_op builtin. INDEX is log2 of the
8680 size of the data type, and thus usable to find the index of the builtin
8681 decl. Returns false if the expression is not of the proper form. */
8683 static bool
8684 expand_omp_atomic_fetch_op (basic_block load_bb,
8685 tree addr, tree loaded_val,
8686 tree stored_val, int index)
8688 enum built_in_function oldbase, newbase, tmpbase;
8689 tree decl, itype, call;
8690 tree lhs, rhs;
8691 basic_block store_bb = single_succ (load_bb);
8692 gimple_stmt_iterator gsi;
8693 gimple *stmt;
8694 location_t loc;
8695 enum tree_code code;
8696 bool need_old, need_new;
8697 machine_mode imode;
8699 /* We expect to find the following sequences:
8701 load_bb:
8702 GIMPLE_OMP_ATOMIC_LOAD (tmp, mem)
8704 store_bb:
8705 val = tmp OP something; (or: something OP tmp)
8706 GIMPLE_OMP_STORE (val)
8708 ???FIXME: Allow a more flexible sequence.
8709 Perhaps use data flow to pick the statements.
8713 gsi = gsi_after_labels (store_bb);
8714 stmt = gsi_stmt (gsi);
8715 if (is_gimple_debug (stmt))
8717 gsi_next_nondebug (&gsi);
8718 if (gsi_end_p (gsi))
8719 return false;
8720 stmt = gsi_stmt (gsi);
8722 loc = gimple_location (stmt);
8723 if (!is_gimple_assign (stmt))
8724 return false;
8725 gsi_next_nondebug (&gsi);
8726 if (gimple_code (gsi_stmt (gsi)) != GIMPLE_OMP_ATOMIC_STORE)
8727 return false;
8728 need_new = gimple_omp_atomic_need_value_p (gsi_stmt (gsi));
8729 need_old = gimple_omp_atomic_need_value_p (last_stmt (load_bb));
8730 enum omp_memory_order omo
8731 = gimple_omp_atomic_memory_order (last_stmt (load_bb));
8732 enum memmodel mo = omp_memory_order_to_memmodel (omo);
8733 gcc_checking_assert (!need_old || !need_new);
8735 if (!operand_equal_p (gimple_assign_lhs (stmt), stored_val, 0))
8736 return false;
8738 /* Check for one of the supported fetch-op operations. */
8739 code = gimple_assign_rhs_code (stmt);
8740 switch (code)
8742 case PLUS_EXPR:
8743 case POINTER_PLUS_EXPR:
8744 oldbase = BUILT_IN_ATOMIC_FETCH_ADD_N;
8745 newbase = BUILT_IN_ATOMIC_ADD_FETCH_N;
8746 break;
8747 case MINUS_EXPR:
8748 oldbase = BUILT_IN_ATOMIC_FETCH_SUB_N;
8749 newbase = BUILT_IN_ATOMIC_SUB_FETCH_N;
8750 break;
8751 case BIT_AND_EXPR:
8752 oldbase = BUILT_IN_ATOMIC_FETCH_AND_N;
8753 newbase = BUILT_IN_ATOMIC_AND_FETCH_N;
8754 break;
8755 case BIT_IOR_EXPR:
8756 oldbase = BUILT_IN_ATOMIC_FETCH_OR_N;
8757 newbase = BUILT_IN_ATOMIC_OR_FETCH_N;
8758 break;
8759 case BIT_XOR_EXPR:
8760 oldbase = BUILT_IN_ATOMIC_FETCH_XOR_N;
8761 newbase = BUILT_IN_ATOMIC_XOR_FETCH_N;
8762 break;
8763 default:
8764 return false;
8767 /* Make sure the expression is of the proper form. */
8768 if (operand_equal_p (gimple_assign_rhs1 (stmt), loaded_val, 0))
8769 rhs = gimple_assign_rhs2 (stmt);
8770 else if (commutative_tree_code (gimple_assign_rhs_code (stmt))
8771 && operand_equal_p (gimple_assign_rhs2 (stmt), loaded_val, 0))
8772 rhs = gimple_assign_rhs1 (stmt);
8773 else
8774 return false;
8776 tmpbase = ((enum built_in_function)
8777 ((need_new ? newbase : oldbase) + index + 1));
8778 decl = builtin_decl_explicit (tmpbase);
8779 if (decl == NULL_TREE)
8780 return false;
8781 itype = TREE_TYPE (TREE_TYPE (decl));
8782 imode = TYPE_MODE (itype);
8784 /* We could test all of the various optabs involved, but the fact of the
8785 matter is that (with the exception of i486 vs i586 and xadd) all targets
8786 that support any atomic operaton optab also implements compare-and-swap.
8787 Let optabs.c take care of expanding any compare-and-swap loop. */
8788 if (!can_compare_and_swap_p (imode, true) || !can_atomic_load_p (imode))
8789 return false;
8791 gsi = gsi_last_nondebug_bb (load_bb);
8792 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_LOAD);
8794 /* OpenMP does not imply any barrier-like semantics on its atomic ops.
8795 It only requires that the operation happen atomically. Thus we can
8796 use the RELAXED memory model. */
8797 call = build_call_expr_loc (loc, decl, 3, addr,
8798 fold_convert_loc (loc, itype, rhs),
8799 build_int_cst (NULL, mo));
8801 if (need_old || need_new)
8803 lhs = need_old ? loaded_val : stored_val;
8804 call = fold_convert_loc (loc, TREE_TYPE (lhs), call);
8805 call = build2_loc (loc, MODIFY_EXPR, void_type_node, lhs, call);
8807 else
8808 call = fold_convert_loc (loc, void_type_node, call);
8809 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
8810 gsi_remove (&gsi, true);
8812 gsi = gsi_last_nondebug_bb (store_bb);
8813 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
8814 gsi_remove (&gsi, true);
8815 gsi = gsi_last_nondebug_bb (store_bb);
8816 stmt = gsi_stmt (gsi);
8817 gsi_remove (&gsi, true);
8819 if (gimple_in_ssa_p (cfun))
8821 release_defs (stmt);
8822 update_ssa (TODO_update_ssa_no_phi);
8825 return true;
8828 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
8829 compare and exchange as an ATOMIC_COMPARE_EXCHANGE internal function.
8830 Returns false if the expression is not of the proper form. */
8832 static bool
8833 expand_omp_atomic_cas (basic_block load_bb, tree addr,
8834 tree loaded_val, tree stored_val, int index)
8836 /* We expect to find the following sequences:
8838 load_bb:
8839 GIMPLE_OMP_ATOMIC_LOAD (tmp, mem)
8841 store_bb:
8842 val = tmp == e ? d : tmp;
8843 GIMPLE_OMP_ATOMIC_STORE (val)
8845 or in store_bb instead:
8846 tmp2 = tmp == e;
8847 val = tmp2 ? d : tmp;
8848 GIMPLE_OMP_ATOMIC_STORE (val)
8851 tmp3 = VIEW_CONVERT_EXPR<integral_type>(tmp);
8852 val = e == tmp3 ? d : tmp;
8853 GIMPLE_OMP_ATOMIC_STORE (val)
8855 etc. */
8858 basic_block store_bb = single_succ (load_bb);
8859 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (store_bb);
8860 gimple *store_stmt = gsi_stmt (gsi);
8861 if (!store_stmt || gimple_code (store_stmt) != GIMPLE_OMP_ATOMIC_STORE)
8862 return false;
8863 gsi_prev_nondebug (&gsi);
8864 if (gsi_end_p (gsi))
8865 return false;
8866 gimple *condexpr_stmt = gsi_stmt (gsi);
8867 if (!is_gimple_assign (condexpr_stmt)
8868 || gimple_assign_rhs_code (condexpr_stmt) != COND_EXPR)
8869 return false;
8870 if (!operand_equal_p (gimple_assign_lhs (condexpr_stmt), stored_val, 0))
8871 return false;
8872 gimple *cond_stmt = NULL;
8873 gimple *vce_stmt = NULL;
8874 gsi_prev_nondebug (&gsi);
8875 if (!gsi_end_p (gsi))
8877 cond_stmt = gsi_stmt (gsi);
8878 if (!is_gimple_assign (cond_stmt))
8879 return false;
8880 if (gimple_assign_rhs_code (cond_stmt) == EQ_EXPR)
8882 gsi_prev_nondebug (&gsi);
8883 if (!gsi_end_p (gsi))
8885 vce_stmt = gsi_stmt (gsi);
8886 if (!is_gimple_assign (vce_stmt)
8887 || gimple_assign_rhs_code (vce_stmt) != VIEW_CONVERT_EXPR)
8888 return false;
8891 else if (gimple_assign_rhs_code (cond_stmt) == VIEW_CONVERT_EXPR)
8892 std::swap (vce_stmt, cond_stmt);
8893 else
8894 return false;
8895 if (vce_stmt)
8897 tree vce_rhs = gimple_assign_rhs1 (vce_stmt);
8898 if (TREE_CODE (vce_rhs) != VIEW_CONVERT_EXPR
8899 || !operand_equal_p (TREE_OPERAND (vce_rhs, 0), loaded_val))
8900 return false;
8901 if (!INTEGRAL_TYPE_P (TREE_TYPE (vce_rhs))
8902 || !SCALAR_FLOAT_TYPE_P (TREE_TYPE (loaded_val))
8903 || !tree_int_cst_equal (TYPE_SIZE (TREE_TYPE (vce_rhs)),
8904 TYPE_SIZE (TREE_TYPE (loaded_val))))
8905 return false;
8906 gsi_prev_nondebug (&gsi);
8907 if (!gsi_end_p (gsi))
8908 return false;
8911 tree cond = gimple_assign_rhs1 (condexpr_stmt);
8912 tree cond_op1, cond_op2;
8913 if (cond_stmt)
8915 if (!operand_equal_p (cond, gimple_assign_lhs (cond_stmt)))
8916 return false;
8917 cond_op1 = gimple_assign_rhs1 (cond_stmt);
8918 cond_op2 = gimple_assign_rhs2 (cond_stmt);
8920 else if (TREE_CODE (cond) != EQ_EXPR && TREE_CODE (cond) != NE_EXPR)
8921 return false;
8922 else
8924 cond_op1 = TREE_OPERAND (cond, 0);
8925 cond_op2 = TREE_OPERAND (cond, 1);
8927 tree d;
8928 if (TREE_CODE (cond) == NE_EXPR)
8930 if (!operand_equal_p (gimple_assign_rhs2 (condexpr_stmt), loaded_val))
8931 return false;
8932 d = gimple_assign_rhs3 (condexpr_stmt);
8934 else if (!operand_equal_p (gimple_assign_rhs3 (condexpr_stmt), loaded_val))
8935 return false;
8936 else
8937 d = gimple_assign_rhs2 (condexpr_stmt);
8938 tree e = vce_stmt ? gimple_assign_lhs (vce_stmt) : loaded_val;
8939 if (operand_equal_p (e, cond_op1))
8940 e = cond_op2;
8941 else if (operand_equal_p (e, cond_op2))
8942 e = cond_op1;
8943 else
8944 return false;
8946 location_t loc = gimple_location (store_stmt);
8947 gimple *load_stmt = last_stmt (load_bb);
8948 bool need_new = gimple_omp_atomic_need_value_p (store_stmt);
8949 bool need_old = gimple_omp_atomic_need_value_p (load_stmt);
8950 bool weak = gimple_omp_atomic_weak_p (load_stmt);
8951 enum omp_memory_order omo = gimple_omp_atomic_memory_order (load_stmt);
8952 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
8953 tree fmo = build_int_cst (NULL, omp_memory_order_to_fail_memmodel (omo));
8954 gcc_checking_assert (!need_old || !need_new);
8956 enum built_in_function fncode
8957 = (enum built_in_function) ((int) BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
8958 + index + 1);
8959 tree cmpxchg = builtin_decl_explicit (fncode);
8960 if (cmpxchg == NULL_TREE)
8961 return false;
8962 tree itype = TREE_TYPE (TREE_TYPE (cmpxchg));
8964 if (!can_compare_and_swap_p (TYPE_MODE (itype), true)
8965 || !can_atomic_load_p (TYPE_MODE (itype)))
8966 return false;
8968 tree type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
8969 if (SCALAR_FLOAT_TYPE_P (type) && !vce_stmt)
8970 return false;
8972 gsi = gsi_for_stmt (store_stmt);
8973 if (!useless_type_conversion_p (itype, TREE_TYPE (e)))
8975 tree ne = create_tmp_reg (itype);
8976 gimple *g = gimple_build_assign (ne, NOP_EXPR, e);
8977 gimple_set_location (g, loc);
8978 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
8979 e = ne;
8981 if (!useless_type_conversion_p (itype, TREE_TYPE (d)))
8983 tree nd = create_tmp_reg (itype);
8984 enum tree_code code;
8985 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (d)))
8987 code = VIEW_CONVERT_EXPR;
8988 d = build1 (VIEW_CONVERT_EXPR, itype, d);
8990 else
8991 code = NOP_EXPR;
8992 gimple *g = gimple_build_assign (nd, code, d);
8993 gimple_set_location (g, loc);
8994 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
8995 d = nd;
8998 tree ctype = build_complex_type (itype);
8999 int flag = int_size_in_bytes (itype) + (weak ? 256 : 0);
9000 gimple *g
9001 = gimple_build_call_internal (IFN_ATOMIC_COMPARE_EXCHANGE, 6, addr, e, d,
9002 build_int_cst (integer_type_node, flag),
9003 mo, fmo);
9004 tree cres = create_tmp_reg (ctype);
9005 gimple_call_set_lhs (g, cres);
9006 gimple_set_location (g, loc);
9007 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9009 if (cond_stmt || need_old || need_new)
9011 tree im = create_tmp_reg (itype);
9012 g = gimple_build_assign (im, IMAGPART_EXPR,
9013 build1 (IMAGPART_EXPR, itype, cres));
9014 gimple_set_location (g, loc);
9015 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9017 tree re = NULL_TREE;
9018 if (need_old || need_new)
9020 re = create_tmp_reg (itype);
9021 g = gimple_build_assign (re, REALPART_EXPR,
9022 build1 (REALPART_EXPR, itype, cres));
9023 gimple_set_location (g, loc);
9024 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9027 if (cond_stmt)
9029 g = gimple_build_assign (gimple_assign_lhs (cond_stmt),
9030 NOP_EXPR, im);
9031 gimple_set_location (g, loc);
9032 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9034 else if (need_new)
9036 g = gimple_build_assign (create_tmp_reg (itype), COND_EXPR,
9037 build2 (NE_EXPR, boolean_type_node,
9038 im, build_zero_cst (itype)),
9039 d, re);
9040 gimple_set_location (g, loc);
9041 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9042 re = gimple_assign_lhs (g);
9045 if (need_old || need_new)
9047 tree v = need_old ? loaded_val : stored_val;
9048 enum tree_code code;
9049 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (v)))
9051 code = VIEW_CONVERT_EXPR;
9052 re = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (v), re);
9054 else if (!useless_type_conversion_p (TREE_TYPE (v), itype))
9055 code = NOP_EXPR;
9056 else
9057 code = TREE_CODE (re);
9058 g = gimple_build_assign (v, code, re);
9059 gimple_set_location (g, loc);
9060 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9064 gsi_remove (&gsi, true);
9065 gsi = gsi_for_stmt (load_stmt);
9066 gsi_remove (&gsi, true);
9067 gsi = gsi_for_stmt (condexpr_stmt);
9068 gsi_remove (&gsi, true);
9069 if (cond_stmt)
9071 gsi = gsi_for_stmt (cond_stmt);
9072 gsi_remove (&gsi, true);
9074 if (vce_stmt)
9076 gsi = gsi_for_stmt (vce_stmt);
9077 gsi_remove (&gsi, true);
9080 return true;
9083 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
9085 oldval = *addr;
9086 repeat:
9087 newval = rhs; // with oldval replacing *addr in rhs
9088 oldval = __sync_val_compare_and_swap (addr, oldval, newval);
9089 if (oldval != newval)
9090 goto repeat;
9092 INDEX is log2 of the size of the data type, and thus usable to find the
9093 index of the builtin decl. */
9095 static bool
9096 expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb,
9097 tree addr, tree loaded_val, tree stored_val,
9098 int index)
9100 tree loadedi, storedi, initial, new_storedi, old_vali;
9101 tree type, itype, cmpxchg, iaddr, atype;
9102 gimple_stmt_iterator si;
9103 basic_block loop_header = single_succ (load_bb);
9104 gimple *phi, *stmt;
9105 edge e;
9106 enum built_in_function fncode;
9108 fncode = (enum built_in_function)((int)BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
9109 + index + 1);
9110 cmpxchg = builtin_decl_explicit (fncode);
9111 if (cmpxchg == NULL_TREE)
9112 return false;
9113 type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
9114 atype = type;
9115 itype = TREE_TYPE (TREE_TYPE (cmpxchg));
9117 if (!can_compare_and_swap_p (TYPE_MODE (itype), true)
9118 || !can_atomic_load_p (TYPE_MODE (itype)))
9119 return false;
9121 /* Load the initial value, replacing the GIMPLE_OMP_ATOMIC_LOAD. */
9122 si = gsi_last_nondebug_bb (load_bb);
9123 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
9124 location_t loc = gimple_location (gsi_stmt (si));
9125 enum omp_memory_order omo = gimple_omp_atomic_memory_order (gsi_stmt (si));
9126 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
9127 tree fmo = build_int_cst (NULL, omp_memory_order_to_fail_memmodel (omo));
9129 /* For floating-point values, we'll need to view-convert them to integers
9130 so that we can perform the atomic compare and swap. Simplify the
9131 following code by always setting up the "i"ntegral variables. */
9132 if (!INTEGRAL_TYPE_P (type) && !POINTER_TYPE_P (type))
9134 tree iaddr_val;
9136 iaddr = create_tmp_reg (build_pointer_type_for_mode (itype, ptr_mode,
9137 true));
9138 atype = itype;
9139 iaddr_val
9140 = force_gimple_operand_gsi (&si,
9141 fold_convert (TREE_TYPE (iaddr), addr),
9142 false, NULL_TREE, true, GSI_SAME_STMT);
9143 stmt = gimple_build_assign (iaddr, iaddr_val);
9144 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9145 loadedi = create_tmp_var (itype);
9146 if (gimple_in_ssa_p (cfun))
9147 loadedi = make_ssa_name (loadedi);
9149 else
9151 iaddr = addr;
9152 loadedi = loaded_val;
9155 fncode = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
9156 tree loaddecl = builtin_decl_explicit (fncode);
9157 if (loaddecl)
9158 initial
9159 = fold_convert (atype,
9160 build_call_expr (loaddecl, 2, iaddr,
9161 build_int_cst (NULL_TREE,
9162 MEMMODEL_RELAXED)));
9163 else
9165 tree off
9166 = build_int_cst (build_pointer_type_for_mode (atype, ptr_mode,
9167 true), 0);
9168 initial = build2 (MEM_REF, atype, iaddr, off);
9171 initial
9172 = force_gimple_operand_gsi (&si, initial, true, NULL_TREE, true,
9173 GSI_SAME_STMT);
9175 /* Move the value to the LOADEDI temporary. */
9176 if (gimple_in_ssa_p (cfun))
9178 gcc_assert (gimple_seq_empty_p (phi_nodes (loop_header)));
9179 phi = create_phi_node (loadedi, loop_header);
9180 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, single_succ_edge (load_bb)),
9181 initial);
9183 else
9184 gsi_insert_before (&si,
9185 gimple_build_assign (loadedi, initial),
9186 GSI_SAME_STMT);
9187 if (loadedi != loaded_val)
9189 gimple_stmt_iterator gsi2;
9190 tree x;
9192 x = build1 (VIEW_CONVERT_EXPR, type, loadedi);
9193 gsi2 = gsi_start_bb (loop_header);
9194 if (gimple_in_ssa_p (cfun))
9196 gassign *stmt;
9197 x = force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
9198 true, GSI_SAME_STMT);
9199 stmt = gimple_build_assign (loaded_val, x);
9200 gsi_insert_before (&gsi2, stmt, GSI_SAME_STMT);
9202 else
9204 x = build2 (MODIFY_EXPR, TREE_TYPE (loaded_val), loaded_val, x);
9205 force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
9206 true, GSI_SAME_STMT);
9209 gsi_remove (&si, true);
9211 si = gsi_last_nondebug_bb (store_bb);
9212 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
9214 if (iaddr == addr)
9215 storedi = stored_val;
9216 else
9217 storedi
9218 = force_gimple_operand_gsi (&si,
9219 build1 (VIEW_CONVERT_EXPR, itype,
9220 stored_val), true, NULL_TREE, true,
9221 GSI_SAME_STMT);
9223 /* Build the compare&swap statement. */
9224 tree ctype = build_complex_type (itype);
9225 int flag = int_size_in_bytes (itype);
9226 new_storedi = build_call_expr_internal_loc (loc, IFN_ATOMIC_COMPARE_EXCHANGE,
9227 ctype, 6, iaddr, loadedi,
9228 storedi,
9229 build_int_cst (integer_type_node,
9230 flag),
9231 mo, fmo);
9232 new_storedi = build1 (REALPART_EXPR, itype, new_storedi);
9233 new_storedi = force_gimple_operand_gsi (&si,
9234 fold_convert (TREE_TYPE (loadedi),
9235 new_storedi),
9236 true, NULL_TREE,
9237 true, GSI_SAME_STMT);
9239 if (gimple_in_ssa_p (cfun))
9240 old_vali = loadedi;
9241 else
9243 old_vali = create_tmp_var (TREE_TYPE (loadedi));
9244 stmt = gimple_build_assign (old_vali, loadedi);
9245 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9247 stmt = gimple_build_assign (loadedi, new_storedi);
9248 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9251 /* Note that we always perform the comparison as an integer, even for
9252 floating point. This allows the atomic operation to properly
9253 succeed even with NaNs and -0.0. */
9254 tree ne = build2 (NE_EXPR, boolean_type_node, new_storedi, old_vali);
9255 stmt = gimple_build_cond_empty (ne);
9256 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9258 /* Update cfg. */
9259 e = single_succ_edge (store_bb);
9260 e->flags &= ~EDGE_FALLTHRU;
9261 e->flags |= EDGE_FALSE_VALUE;
9262 /* Expect no looping. */
9263 e->probability = profile_probability::guessed_always ();
9265 e = make_edge (store_bb, loop_header, EDGE_TRUE_VALUE);
9266 e->probability = profile_probability::guessed_never ();
9268 /* Copy the new value to loadedi (we already did that before the condition
9269 if we are not in SSA). */
9270 if (gimple_in_ssa_p (cfun))
9272 phi = gimple_seq_first_stmt (phi_nodes (loop_header));
9273 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e), new_storedi);
9276 /* Remove GIMPLE_OMP_ATOMIC_STORE. */
9277 gsi_remove (&si, true);
9279 class loop *loop = alloc_loop ();
9280 loop->header = loop_header;
9281 loop->latch = store_bb;
9282 add_loop (loop, loop_header->loop_father);
9284 if (gimple_in_ssa_p (cfun))
9285 update_ssa (TODO_update_ssa_no_phi);
9287 return true;
9290 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
9292 GOMP_atomic_start ();
9293 *addr = rhs;
9294 GOMP_atomic_end ();
9296 The result is not globally atomic, but works so long as all parallel
9297 references are within #pragma omp atomic directives. According to
9298 responses received from omp@openmp.org, appears to be within spec.
9299 Which makes sense, since that's how several other compilers handle
9300 this situation as well.
9301 LOADED_VAL and ADDR are the operands of GIMPLE_OMP_ATOMIC_LOAD we're
9302 expanding. STORED_VAL is the operand of the matching
9303 GIMPLE_OMP_ATOMIC_STORE.
9305 We replace
9306 GIMPLE_OMP_ATOMIC_LOAD (loaded_val, addr) with
9307 loaded_val = *addr;
9309 and replace
9310 GIMPLE_OMP_ATOMIC_STORE (stored_val) with
9311 *addr = stored_val;
9314 static bool
9315 expand_omp_atomic_mutex (basic_block load_bb, basic_block store_bb,
9316 tree addr, tree loaded_val, tree stored_val)
9318 gimple_stmt_iterator si;
9319 gassign *stmt;
9320 tree t;
9322 si = gsi_last_nondebug_bb (load_bb);
9323 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
9325 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_START);
9326 t = build_call_expr (t, 0);
9327 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
9329 tree mem = build_simple_mem_ref (addr);
9330 TREE_TYPE (mem) = TREE_TYPE (loaded_val);
9331 TREE_OPERAND (mem, 1)
9332 = fold_convert (build_pointer_type_for_mode (TREE_TYPE (mem), ptr_mode,
9333 true),
9334 TREE_OPERAND (mem, 1));
9335 stmt = gimple_build_assign (loaded_val, mem);
9336 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9337 gsi_remove (&si, true);
9339 si = gsi_last_nondebug_bb (store_bb);
9340 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
9342 stmt = gimple_build_assign (unshare_expr (mem), stored_val);
9343 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9345 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_END);
9346 t = build_call_expr (t, 0);
9347 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
9348 gsi_remove (&si, true);
9350 if (gimple_in_ssa_p (cfun))
9351 update_ssa (TODO_update_ssa_no_phi);
9352 return true;
9355 /* Expand an GIMPLE_OMP_ATOMIC statement. We try to expand
9356 using expand_omp_atomic_fetch_op. If it failed, we try to
9357 call expand_omp_atomic_pipeline, and if it fails too, the
9358 ultimate fallback is wrapping the operation in a mutex
9359 (expand_omp_atomic_mutex). REGION is the atomic region built
9360 by build_omp_regions_1(). */
9362 static void
9363 expand_omp_atomic (struct omp_region *region)
9365 basic_block load_bb = region->entry, store_bb = region->exit;
9366 gomp_atomic_load *load = as_a <gomp_atomic_load *> (last_stmt (load_bb));
9367 gomp_atomic_store *store = as_a <gomp_atomic_store *> (last_stmt (store_bb));
9368 tree loaded_val = gimple_omp_atomic_load_lhs (load);
9369 tree addr = gimple_omp_atomic_load_rhs (load);
9370 tree stored_val = gimple_omp_atomic_store_val (store);
9371 tree type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
9372 HOST_WIDE_INT index;
9374 /* Make sure the type is one of the supported sizes. */
9375 index = tree_to_uhwi (TYPE_SIZE_UNIT (type));
9376 index = exact_log2 (index);
9377 if (index >= 0 && index <= 4)
9379 unsigned int align = TYPE_ALIGN_UNIT (type);
9381 /* __sync builtins require strict data alignment. */
9382 if (exact_log2 (align) >= index)
9384 /* Atomic load. */
9385 scalar_mode smode;
9386 if (loaded_val == stored_val
9387 && (is_int_mode (TYPE_MODE (type), &smode)
9388 || is_float_mode (TYPE_MODE (type), &smode))
9389 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
9390 && expand_omp_atomic_load (load_bb, addr, loaded_val, index))
9391 return;
9393 /* Atomic store. */
9394 if ((is_int_mode (TYPE_MODE (type), &smode)
9395 || is_float_mode (TYPE_MODE (type), &smode))
9396 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
9397 && store_bb == single_succ (load_bb)
9398 && first_stmt (store_bb) == store
9399 && expand_omp_atomic_store (load_bb, addr, loaded_val,
9400 stored_val, index))
9401 return;
9403 /* When possible, use specialized atomic update functions. */
9404 if ((INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type))
9405 && store_bb == single_succ (load_bb)
9406 && expand_omp_atomic_fetch_op (load_bb, addr,
9407 loaded_val, stored_val, index))
9408 return;
9410 /* When possible, use ATOMIC_COMPARE_EXCHANGE ifn without a loop. */
9411 if (store_bb == single_succ (load_bb)
9412 && !gimple_in_ssa_p (cfun)
9413 && expand_omp_atomic_cas (load_bb, addr, loaded_val, stored_val,
9414 index))
9415 return;
9417 /* If we don't have specialized __sync builtins, try and implement
9418 as a compare and swap loop. */
9419 if (expand_omp_atomic_pipeline (load_bb, store_bb, addr,
9420 loaded_val, stored_val, index))
9421 return;
9425 /* The ultimate fallback is wrapping the operation in a mutex. */
9426 expand_omp_atomic_mutex (load_bb, store_bb, addr, loaded_val, stored_val);
9429 /* Mark the loops inside the kernels region starting at REGION_ENTRY and ending
9430 at REGION_EXIT. */
9432 static void
9433 mark_loops_in_oacc_kernels_region (basic_block region_entry,
9434 basic_block region_exit)
9436 class loop *outer = region_entry->loop_father;
9437 gcc_assert (region_exit == NULL || outer == region_exit->loop_father);
9439 /* Don't parallelize the kernels region if it contains more than one outer
9440 loop. */
9441 unsigned int nr_outer_loops = 0;
9442 class loop *single_outer = NULL;
9443 for (class loop *loop = outer->inner; loop != NULL; loop = loop->next)
9445 gcc_assert (loop_outer (loop) == outer);
9447 if (!dominated_by_p (CDI_DOMINATORS, loop->header, region_entry))
9448 continue;
9450 if (region_exit != NULL
9451 && dominated_by_p (CDI_DOMINATORS, loop->header, region_exit))
9452 continue;
9454 nr_outer_loops++;
9455 single_outer = loop;
9457 if (nr_outer_loops != 1)
9458 return;
9460 for (class loop *loop = single_outer->inner;
9461 loop != NULL;
9462 loop = loop->inner)
9463 if (loop->next)
9464 return;
9466 /* Mark the loops in the region. */
9467 for (class loop *loop = single_outer; loop != NULL; loop = loop->inner)
9468 loop->in_oacc_kernels_region = true;
9471 /* Build target argument identifier from the DEVICE identifier, value
9472 identifier ID and whether the element also has a SUBSEQUENT_PARAM. */
9474 static tree
9475 get_target_argument_identifier_1 (int device, bool subseqent_param, int id)
9477 tree t = build_int_cst (integer_type_node, device);
9478 if (subseqent_param)
9479 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
9480 build_int_cst (integer_type_node,
9481 GOMP_TARGET_ARG_SUBSEQUENT_PARAM));
9482 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
9483 build_int_cst (integer_type_node, id));
9484 return t;
9487 /* Like above but return it in type that can be directly stored as an element
9488 of the argument array. */
9490 static tree
9491 get_target_argument_identifier (int device, bool subseqent_param, int id)
9493 tree t = get_target_argument_identifier_1 (device, subseqent_param, id);
9494 return fold_convert (ptr_type_node, t);
9497 /* Return a target argument consisting of DEVICE identifier, value identifier
9498 ID, and the actual VALUE. */
9500 static tree
9501 get_target_argument_value (gimple_stmt_iterator *gsi, int device, int id,
9502 tree value)
9504 tree t = fold_build2 (LSHIFT_EXPR, integer_type_node,
9505 fold_convert (integer_type_node, value),
9506 build_int_cst (unsigned_type_node,
9507 GOMP_TARGET_ARG_VALUE_SHIFT));
9508 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
9509 get_target_argument_identifier_1 (device, false, id));
9510 t = fold_convert (ptr_type_node, t);
9511 return force_gimple_operand_gsi (gsi, t, true, NULL, true, GSI_SAME_STMT);
9514 /* If VALUE is an integer constant greater than -2^15 and smaller than 2^15,
9515 push one argument to ARGS with both the DEVICE, ID and VALUE embedded in it,
9516 otherwise push an identifier (with DEVICE and ID) and the VALUE in two
9517 arguments. */
9519 static void
9520 push_target_argument_according_to_value (gimple_stmt_iterator *gsi, int device,
9521 int id, tree value, vec <tree> *args)
9523 if (tree_fits_shwi_p (value)
9524 && tree_to_shwi (value) > -(1 << 15)
9525 && tree_to_shwi (value) < (1 << 15))
9526 args->quick_push (get_target_argument_value (gsi, device, id, value));
9527 else
9529 args->quick_push (get_target_argument_identifier (device, true, id));
9530 value = fold_convert (ptr_type_node, value);
9531 value = force_gimple_operand_gsi (gsi, value, true, NULL, true,
9532 GSI_SAME_STMT);
9533 args->quick_push (value);
9537 /* Create an array of arguments that is then passed to GOMP_target. */
9539 static tree
9540 get_target_arguments (gimple_stmt_iterator *gsi, gomp_target *tgt_stmt)
9542 auto_vec <tree, 6> args;
9543 tree clauses = gimple_omp_target_clauses (tgt_stmt);
9544 tree t, c = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
9545 if (c)
9546 t = OMP_CLAUSE_NUM_TEAMS_EXPR (c);
9547 else
9548 t = integer_minus_one_node;
9549 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
9550 GOMP_TARGET_ARG_NUM_TEAMS, t, &args);
9552 c = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
9553 if (c)
9554 t = OMP_CLAUSE_THREAD_LIMIT_EXPR (c);
9555 else
9556 t = integer_minus_one_node;
9557 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
9558 GOMP_TARGET_ARG_THREAD_LIMIT, t,
9559 &args);
9561 /* Produce more, perhaps device specific, arguments here. */
9563 tree argarray = create_tmp_var (build_array_type_nelts (ptr_type_node,
9564 args.length () + 1),
9565 ".omp_target_args");
9566 for (unsigned i = 0; i < args.length (); i++)
9568 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
9569 build_int_cst (integer_type_node, i),
9570 NULL_TREE, NULL_TREE);
9571 gsi_insert_before (gsi, gimple_build_assign (ref, args[i]),
9572 GSI_SAME_STMT);
9574 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
9575 build_int_cst (integer_type_node, args.length ()),
9576 NULL_TREE, NULL_TREE);
9577 gsi_insert_before (gsi, gimple_build_assign (ref, null_pointer_node),
9578 GSI_SAME_STMT);
9579 TREE_ADDRESSABLE (argarray) = 1;
9580 return build_fold_addr_expr (argarray);
9583 /* Expand the GIMPLE_OMP_TARGET starting at REGION. */
9585 static void
9586 expand_omp_target (struct omp_region *region)
9588 basic_block entry_bb, exit_bb, new_bb;
9589 struct function *child_cfun;
9590 tree child_fn, block, t;
9591 gimple_stmt_iterator gsi;
9592 gomp_target *entry_stmt;
9593 gimple *stmt;
9594 edge e;
9595 bool offloaded;
9596 int target_kind;
9598 entry_stmt = as_a <gomp_target *> (last_stmt (region->entry));
9599 target_kind = gimple_omp_target_kind (entry_stmt);
9600 new_bb = region->entry;
9602 offloaded = is_gimple_omp_offloaded (entry_stmt);
9603 switch (target_kind)
9605 case GF_OMP_TARGET_KIND_REGION:
9606 case GF_OMP_TARGET_KIND_UPDATE:
9607 case GF_OMP_TARGET_KIND_ENTER_DATA:
9608 case GF_OMP_TARGET_KIND_EXIT_DATA:
9609 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
9610 case GF_OMP_TARGET_KIND_OACC_KERNELS:
9611 case GF_OMP_TARGET_KIND_OACC_SERIAL:
9612 case GF_OMP_TARGET_KIND_OACC_UPDATE:
9613 case GF_OMP_TARGET_KIND_OACC_ENTER_DATA:
9614 case GF_OMP_TARGET_KIND_OACC_EXIT_DATA:
9615 case GF_OMP_TARGET_KIND_OACC_DECLARE:
9616 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
9617 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
9618 case GF_OMP_TARGET_KIND_DATA:
9619 case GF_OMP_TARGET_KIND_OACC_DATA:
9620 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
9621 case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
9622 break;
9623 default:
9624 gcc_unreachable ();
9627 child_fn = NULL_TREE;
9628 child_cfun = NULL;
9629 if (offloaded)
9631 child_fn = gimple_omp_target_child_fn (entry_stmt);
9632 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
9635 /* Supported by expand_omp_taskreg, but not here. */
9636 if (child_cfun != NULL)
9637 gcc_checking_assert (!child_cfun->cfg);
9638 gcc_checking_assert (!gimple_in_ssa_p (cfun));
9640 entry_bb = region->entry;
9641 exit_bb = region->exit;
9643 if (target_kind == GF_OMP_TARGET_KIND_OACC_KERNELS)
9644 mark_loops_in_oacc_kernels_region (region->entry, region->exit);
9646 /* Going on, all OpenACC compute constructs are mapped to
9647 'BUILT_IN_GOACC_PARALLEL', and get their compute regions outlined.
9648 To distinguish between them, we attach attributes. */
9649 switch (target_kind)
9651 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
9652 DECL_ATTRIBUTES (child_fn)
9653 = tree_cons (get_identifier ("oacc parallel"),
9654 NULL_TREE, DECL_ATTRIBUTES (child_fn));
9655 break;
9656 case GF_OMP_TARGET_KIND_OACC_KERNELS:
9657 DECL_ATTRIBUTES (child_fn)
9658 = tree_cons (get_identifier ("oacc kernels"),
9659 NULL_TREE, DECL_ATTRIBUTES (child_fn));
9660 break;
9661 case GF_OMP_TARGET_KIND_OACC_SERIAL:
9662 DECL_ATTRIBUTES (child_fn)
9663 = tree_cons (get_identifier ("oacc serial"),
9664 NULL_TREE, DECL_ATTRIBUTES (child_fn));
9665 break;
9666 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
9667 DECL_ATTRIBUTES (child_fn)
9668 = tree_cons (get_identifier ("oacc parallel_kernels_parallelized"),
9669 NULL_TREE, DECL_ATTRIBUTES (child_fn));
9670 break;
9671 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
9672 DECL_ATTRIBUTES (child_fn)
9673 = tree_cons (get_identifier ("oacc parallel_kernels_gang_single"),
9674 NULL_TREE, DECL_ATTRIBUTES (child_fn));
9675 break;
9676 default:
9677 /* Make sure we don't miss any. */
9678 gcc_checking_assert (!(is_gimple_omp_oacc (entry_stmt)
9679 && is_gimple_omp_offloaded (entry_stmt)));
9680 break;
9683 if (offloaded)
9685 unsigned srcidx, dstidx, num;
9687 /* If the offloading region needs data sent from the parent
9688 function, then the very first statement (except possible
9689 tree profile counter updates) of the offloading body
9690 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
9691 &.OMP_DATA_O is passed as an argument to the child function,
9692 we need to replace it with the argument as seen by the child
9693 function.
9695 In most cases, this will end up being the identity assignment
9696 .OMP_DATA_I = .OMP_DATA_I. However, if the offloading body had
9697 a function call that has been inlined, the original PARM_DECL
9698 .OMP_DATA_I may have been converted into a different local
9699 variable. In which case, we need to keep the assignment. */
9700 tree data_arg = gimple_omp_target_data_arg (entry_stmt);
9701 if (data_arg)
9703 basic_block entry_succ_bb = single_succ (entry_bb);
9704 gimple_stmt_iterator gsi;
9705 tree arg;
9706 gimple *tgtcopy_stmt = NULL;
9707 tree sender = TREE_VEC_ELT (data_arg, 0);
9709 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
9711 gcc_assert (!gsi_end_p (gsi));
9712 stmt = gsi_stmt (gsi);
9713 if (gimple_code (stmt) != GIMPLE_ASSIGN)
9714 continue;
9716 if (gimple_num_ops (stmt) == 2)
9718 tree arg = gimple_assign_rhs1 (stmt);
9720 /* We're ignoring the subcode because we're
9721 effectively doing a STRIP_NOPS. */
9723 if (TREE_CODE (arg) == ADDR_EXPR
9724 && TREE_OPERAND (arg, 0) == sender)
9726 tgtcopy_stmt = stmt;
9727 break;
9732 gcc_assert (tgtcopy_stmt != NULL);
9733 arg = DECL_ARGUMENTS (child_fn);
9735 gcc_assert (gimple_assign_lhs (tgtcopy_stmt) == arg);
9736 gsi_remove (&gsi, true);
9739 /* Declare local variables needed in CHILD_CFUN. */
9740 block = DECL_INITIAL (child_fn);
9741 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
9742 /* The gimplifier could record temporaries in the offloading block
9743 rather than in containing function's local_decls chain,
9744 which would mean cgraph missed finalizing them. Do it now. */
9745 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
9746 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
9747 varpool_node::finalize_decl (t);
9748 DECL_SAVED_TREE (child_fn) = NULL;
9749 /* We'll create a CFG for child_fn, so no gimple body is needed. */
9750 gimple_set_body (child_fn, NULL);
9751 TREE_USED (block) = 1;
9753 /* Reset DECL_CONTEXT on function arguments. */
9754 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
9755 DECL_CONTEXT (t) = child_fn;
9757 /* Split ENTRY_BB at GIMPLE_*,
9758 so that it can be moved to the child function. */
9759 gsi = gsi_last_nondebug_bb (entry_bb);
9760 stmt = gsi_stmt (gsi);
9761 gcc_assert (stmt
9762 && gimple_code (stmt) == gimple_code (entry_stmt));
9763 e = split_block (entry_bb, stmt);
9764 gsi_remove (&gsi, true);
9765 entry_bb = e->dest;
9766 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
9768 /* Convert GIMPLE_OMP_RETURN into a RETURN_EXPR. */
9769 if (exit_bb)
9771 gsi = gsi_last_nondebug_bb (exit_bb);
9772 gcc_assert (!gsi_end_p (gsi)
9773 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
9774 stmt = gimple_build_return (NULL);
9775 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
9776 gsi_remove (&gsi, true);
9779 /* Move the offloading region into CHILD_CFUN. */
9781 block = gimple_block (entry_stmt);
9783 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
9784 if (exit_bb)
9785 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
9786 /* When the OMP expansion process cannot guarantee an up-to-date
9787 loop tree arrange for the child function to fixup loops. */
9788 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
9789 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
9791 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
9792 num = vec_safe_length (child_cfun->local_decls);
9793 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
9795 t = (*child_cfun->local_decls)[srcidx];
9796 if (DECL_CONTEXT (t) == cfun->decl)
9797 continue;
9798 if (srcidx != dstidx)
9799 (*child_cfun->local_decls)[dstidx] = t;
9800 dstidx++;
9802 if (dstidx != num)
9803 vec_safe_truncate (child_cfun->local_decls, dstidx);
9805 /* Inform the callgraph about the new function. */
9806 child_cfun->curr_properties = cfun->curr_properties;
9807 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
9808 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
9809 cgraph_node *node = cgraph_node::get_create (child_fn);
9810 node->parallelized_function = 1;
9811 cgraph_node::add_new_function (child_fn, true);
9813 /* Add the new function to the offload table. */
9814 if (ENABLE_OFFLOADING)
9816 if (in_lto_p)
9817 DECL_PRESERVE_P (child_fn) = 1;
9818 vec_safe_push (offload_funcs, child_fn);
9821 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
9822 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
9824 /* Fix the callgraph edges for child_cfun. Those for cfun will be
9825 fixed in a following pass. */
9826 push_cfun (child_cfun);
9827 if (need_asm)
9828 assign_assembler_name_if_needed (child_fn);
9829 cgraph_edge::rebuild_edges ();
9831 /* Some EH regions might become dead, see PR34608. If
9832 pass_cleanup_cfg isn't the first pass to happen with the
9833 new child, these dead EH edges might cause problems.
9834 Clean them up now. */
9835 if (flag_exceptions)
9837 basic_block bb;
9838 bool changed = false;
9840 FOR_EACH_BB_FN (bb, cfun)
9841 changed |= gimple_purge_dead_eh_edges (bb);
9842 if (changed)
9843 cleanup_tree_cfg ();
9845 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
9846 verify_loop_structure ();
9847 pop_cfun ();
9849 if (dump_file && !gimple_in_ssa_p (cfun))
9851 omp_any_child_fn_dumped = true;
9852 dump_function_header (dump_file, child_fn, dump_flags);
9853 dump_function_to_file (child_fn, dump_file, dump_flags);
9856 adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn);
9859 /* Emit a library call to launch the offloading region, or do data
9860 transfers. */
9861 tree t1, t2, t3, t4, depend, c, clauses;
9862 enum built_in_function start_ix;
9863 unsigned int flags_i = 0;
9865 switch (gimple_omp_target_kind (entry_stmt))
9867 case GF_OMP_TARGET_KIND_REGION:
9868 start_ix = BUILT_IN_GOMP_TARGET;
9869 break;
9870 case GF_OMP_TARGET_KIND_DATA:
9871 start_ix = BUILT_IN_GOMP_TARGET_DATA;
9872 break;
9873 case GF_OMP_TARGET_KIND_UPDATE:
9874 start_ix = BUILT_IN_GOMP_TARGET_UPDATE;
9875 break;
9876 case GF_OMP_TARGET_KIND_ENTER_DATA:
9877 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
9878 break;
9879 case GF_OMP_TARGET_KIND_EXIT_DATA:
9880 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
9881 flags_i |= GOMP_TARGET_FLAG_EXIT_DATA;
9882 break;
9883 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
9884 case GF_OMP_TARGET_KIND_OACC_KERNELS:
9885 case GF_OMP_TARGET_KIND_OACC_SERIAL:
9886 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
9887 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
9888 start_ix = BUILT_IN_GOACC_PARALLEL;
9889 break;
9890 case GF_OMP_TARGET_KIND_OACC_DATA:
9891 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
9892 case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
9893 start_ix = BUILT_IN_GOACC_DATA_START;
9894 break;
9895 case GF_OMP_TARGET_KIND_OACC_UPDATE:
9896 start_ix = BUILT_IN_GOACC_UPDATE;
9897 break;
9898 case GF_OMP_TARGET_KIND_OACC_ENTER_DATA:
9899 start_ix = BUILT_IN_GOACC_ENTER_DATA;
9900 break;
9901 case GF_OMP_TARGET_KIND_OACC_EXIT_DATA:
9902 start_ix = BUILT_IN_GOACC_EXIT_DATA;
9903 break;
9904 case GF_OMP_TARGET_KIND_OACC_DECLARE:
9905 start_ix = BUILT_IN_GOACC_DECLARE;
9906 break;
9907 default:
9908 gcc_unreachable ();
9911 clauses = gimple_omp_target_clauses (entry_stmt);
9913 tree device = NULL_TREE;
9914 location_t device_loc = UNKNOWN_LOCATION;
9915 tree goacc_flags = NULL_TREE;
9916 if (is_gimple_omp_oacc (entry_stmt))
9918 /* By default, no GOACC_FLAGs are set. */
9919 goacc_flags = integer_zero_node;
9921 else
9923 c = omp_find_clause (clauses, OMP_CLAUSE_DEVICE);
9924 if (c)
9926 device = OMP_CLAUSE_DEVICE_ID (c);
9927 device_loc = OMP_CLAUSE_LOCATION (c);
9928 if (OMP_CLAUSE_DEVICE_ANCESTOR (c))
9929 sorry_at (device_loc, "%<ancestor%> not yet supported");
9931 else
9933 /* By default, the value of DEVICE is GOMP_DEVICE_ICV (let runtime
9934 library choose). */
9935 device = build_int_cst (integer_type_node, GOMP_DEVICE_ICV);
9936 device_loc = gimple_location (entry_stmt);
9939 c = omp_find_clause (clauses, OMP_CLAUSE_NOWAIT);
9940 /* FIXME: in_reduction(...) nowait is unimplemented yet, pretend
9941 nowait doesn't appear. */
9942 if (c && omp_find_clause (clauses, OMP_CLAUSE_IN_REDUCTION))
9943 c = NULL;
9944 if (c)
9945 flags_i |= GOMP_TARGET_FLAG_NOWAIT;
9948 /* By default, there is no conditional. */
9949 tree cond = NULL_TREE;
9950 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
9951 if (c)
9952 cond = OMP_CLAUSE_IF_EXPR (c);
9953 /* If we found the clause 'if (cond)', build:
9954 OpenACC: goacc_flags = (cond ? goacc_flags : flags | GOACC_FLAG_HOST_FALLBACK)
9955 OpenMP: device = (cond ? device : GOMP_DEVICE_HOST_FALLBACK) */
9956 if (cond)
9958 tree *tp;
9959 if (is_gimple_omp_oacc (entry_stmt))
9960 tp = &goacc_flags;
9961 else
9963 /* Ensure 'device' is of the correct type. */
9964 device = fold_convert_loc (device_loc, integer_type_node, device);
9966 tp = &device;
9969 cond = gimple_boolify (cond);
9971 basic_block cond_bb, then_bb, else_bb;
9972 edge e;
9973 tree tmp_var;
9975 tmp_var = create_tmp_var (TREE_TYPE (*tp));
9976 if (offloaded)
9977 e = split_block_after_labels (new_bb);
9978 else
9980 gsi = gsi_last_nondebug_bb (new_bb);
9981 gsi_prev (&gsi);
9982 e = split_block (new_bb, gsi_stmt (gsi));
9984 cond_bb = e->src;
9985 new_bb = e->dest;
9986 remove_edge (e);
9988 then_bb = create_empty_bb (cond_bb);
9989 else_bb = create_empty_bb (then_bb);
9990 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
9991 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
9993 stmt = gimple_build_cond_empty (cond);
9994 gsi = gsi_last_bb (cond_bb);
9995 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
9997 gsi = gsi_start_bb (then_bb);
9998 stmt = gimple_build_assign (tmp_var, *tp);
9999 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
10001 gsi = gsi_start_bb (else_bb);
10002 if (is_gimple_omp_oacc (entry_stmt))
10003 stmt = gimple_build_assign (tmp_var,
10004 BIT_IOR_EXPR,
10005 *tp,
10006 build_int_cst (integer_type_node,
10007 GOACC_FLAG_HOST_FALLBACK));
10008 else
10009 stmt = gimple_build_assign (tmp_var,
10010 build_int_cst (integer_type_node,
10011 GOMP_DEVICE_HOST_FALLBACK));
10012 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
10014 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
10015 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
10016 add_bb_to_loop (then_bb, cond_bb->loop_father);
10017 add_bb_to_loop (else_bb, cond_bb->loop_father);
10018 make_edge (then_bb, new_bb, EDGE_FALLTHRU);
10019 make_edge (else_bb, new_bb, EDGE_FALLTHRU);
10021 *tp = tmp_var;
10023 gsi = gsi_last_nondebug_bb (new_bb);
10025 else
10027 gsi = gsi_last_nondebug_bb (new_bb);
10029 if (device != NULL_TREE)
10030 device = force_gimple_operand_gsi (&gsi, device, true, NULL_TREE,
10031 true, GSI_SAME_STMT);
10034 t = gimple_omp_target_data_arg (entry_stmt);
10035 if (t == NULL)
10037 t1 = size_zero_node;
10038 t2 = build_zero_cst (ptr_type_node);
10039 t3 = t2;
10040 t4 = t2;
10042 else
10044 t1 = TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (TREE_VEC_ELT (t, 1))));
10045 t1 = size_binop (PLUS_EXPR, t1, size_int (1));
10046 t2 = build_fold_addr_expr (TREE_VEC_ELT (t, 0));
10047 t3 = build_fold_addr_expr (TREE_VEC_ELT (t, 1));
10048 t4 = build_fold_addr_expr (TREE_VEC_ELT (t, 2));
10051 gimple *g;
10052 bool tagging = false;
10053 /* The maximum number used by any start_ix, without varargs. */
10054 auto_vec<tree, 11> args;
10055 if (is_gimple_omp_oacc (entry_stmt))
10057 tree goacc_flags_m = fold_build1 (GOACC_FLAGS_MARSHAL_OP,
10058 TREE_TYPE (goacc_flags), goacc_flags);
10059 goacc_flags_m = force_gimple_operand_gsi (&gsi, goacc_flags_m, true,
10060 NULL_TREE, true,
10061 GSI_SAME_STMT);
10062 args.quick_push (goacc_flags_m);
10064 else
10065 args.quick_push (device);
10066 if (offloaded)
10067 args.quick_push (build_fold_addr_expr (child_fn));
10068 args.quick_push (t1);
10069 args.quick_push (t2);
10070 args.quick_push (t3);
10071 args.quick_push (t4);
10072 switch (start_ix)
10074 case BUILT_IN_GOACC_DATA_START:
10075 case BUILT_IN_GOACC_DECLARE:
10076 case BUILT_IN_GOMP_TARGET_DATA:
10077 break;
10078 case BUILT_IN_GOMP_TARGET:
10079 case BUILT_IN_GOMP_TARGET_UPDATE:
10080 case BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA:
10081 args.quick_push (build_int_cst (unsigned_type_node, flags_i));
10082 c = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
10083 if (c)
10084 depend = OMP_CLAUSE_DECL (c);
10085 else
10086 depend = build_int_cst (ptr_type_node, 0);
10087 args.quick_push (depend);
10088 if (start_ix == BUILT_IN_GOMP_TARGET)
10089 args.quick_push (get_target_arguments (&gsi, entry_stmt));
10090 break;
10091 case BUILT_IN_GOACC_PARALLEL:
10092 if (lookup_attribute ("oacc serial", DECL_ATTRIBUTES (child_fn)) != NULL)
10094 tree dims = NULL_TREE;
10095 unsigned int ix;
10097 /* For serial constructs we set all dimensions to 1. */
10098 for (ix = GOMP_DIM_MAX; ix--;)
10099 dims = tree_cons (NULL_TREE, integer_one_node, dims);
10100 oacc_replace_fn_attrib (child_fn, dims);
10102 else
10103 oacc_set_fn_attrib (child_fn, clauses, &args);
10104 tagging = true;
10105 /* FALLTHRU */
10106 case BUILT_IN_GOACC_ENTER_DATA:
10107 case BUILT_IN_GOACC_EXIT_DATA:
10108 case BUILT_IN_GOACC_UPDATE:
10110 tree t_async = NULL_TREE;
10112 /* If present, use the value specified by the respective
10113 clause, making sure that is of the correct type. */
10114 c = omp_find_clause (clauses, OMP_CLAUSE_ASYNC);
10115 if (c)
10116 t_async = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
10117 integer_type_node,
10118 OMP_CLAUSE_ASYNC_EXPR (c));
10119 else if (!tagging)
10120 /* Default values for t_async. */
10121 t_async = fold_convert_loc (gimple_location (entry_stmt),
10122 integer_type_node,
10123 build_int_cst (integer_type_node,
10124 GOMP_ASYNC_SYNC));
10125 if (tagging && t_async)
10127 unsigned HOST_WIDE_INT i_async = GOMP_LAUNCH_OP_MAX;
10129 if (TREE_CODE (t_async) == INTEGER_CST)
10131 /* See if we can pack the async arg in to the tag's
10132 operand. */
10133 i_async = TREE_INT_CST_LOW (t_async);
10134 if (i_async < GOMP_LAUNCH_OP_MAX)
10135 t_async = NULL_TREE;
10136 else
10137 i_async = GOMP_LAUNCH_OP_MAX;
10139 args.safe_push (oacc_launch_pack (GOMP_LAUNCH_ASYNC, NULL_TREE,
10140 i_async));
10142 if (t_async)
10143 args.safe_push (force_gimple_operand_gsi (&gsi, t_async, true,
10144 NULL_TREE, true,
10145 GSI_SAME_STMT));
10147 /* Save the argument index, and ... */
10148 unsigned t_wait_idx = args.length ();
10149 unsigned num_waits = 0;
10150 c = omp_find_clause (clauses, OMP_CLAUSE_WAIT);
10151 if (!tagging || c)
10152 /* ... push a placeholder. */
10153 args.safe_push (integer_zero_node);
10155 for (; c; c = OMP_CLAUSE_CHAIN (c))
10156 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_WAIT)
10158 tree arg = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
10159 integer_type_node,
10160 OMP_CLAUSE_WAIT_EXPR (c));
10161 arg = force_gimple_operand_gsi (&gsi, arg, true, NULL_TREE, true,
10162 GSI_SAME_STMT);
10163 args.safe_push (arg);
10164 num_waits++;
10167 if (!tagging || num_waits)
10169 tree len;
10171 /* Now that we know the number, update the placeholder. */
10172 if (tagging)
10173 len = oacc_launch_pack (GOMP_LAUNCH_WAIT, NULL_TREE, num_waits);
10174 else
10175 len = build_int_cst (integer_type_node, num_waits);
10176 len = fold_convert_loc (gimple_location (entry_stmt),
10177 unsigned_type_node, len);
10178 args[t_wait_idx] = len;
10181 break;
10182 default:
10183 gcc_unreachable ();
10185 if (tagging)
10186 /* Push terminal marker - zero. */
10187 args.safe_push (oacc_launch_pack (0, NULL_TREE, 0));
10189 g = gimple_build_call_vec (builtin_decl_explicit (start_ix), args);
10190 gimple_set_location (g, gimple_location (entry_stmt));
10191 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
10192 if (!offloaded)
10194 g = gsi_stmt (gsi);
10195 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_TARGET);
10196 gsi_remove (&gsi, true);
10200 /* Expand the parallel region tree rooted at REGION. Expansion
10201 proceeds in depth-first order. Innermost regions are expanded
10202 first. This way, parallel regions that require a new function to
10203 be created (e.g., GIMPLE_OMP_PARALLEL) can be expanded without having any
10204 internal dependencies in their body. */
10206 static void
10207 expand_omp (struct omp_region *region)
10209 omp_any_child_fn_dumped = false;
10210 while (region)
10212 location_t saved_location;
10213 gimple *inner_stmt = NULL;
10215 /* First, determine whether this is a combined parallel+workshare
10216 region. */
10217 if (region->type == GIMPLE_OMP_PARALLEL)
10218 determine_parallel_type (region);
10220 if (region->type == GIMPLE_OMP_FOR
10221 && gimple_omp_for_combined_p (last_stmt (region->entry)))
10222 inner_stmt = last_stmt (region->inner->entry);
10224 if (region->inner)
10225 expand_omp (region->inner);
10227 saved_location = input_location;
10228 if (gimple_has_location (last_stmt (region->entry)))
10229 input_location = gimple_location (last_stmt (region->entry));
10231 switch (region->type)
10233 case GIMPLE_OMP_PARALLEL:
10234 case GIMPLE_OMP_TASK:
10235 expand_omp_taskreg (region);
10236 break;
10238 case GIMPLE_OMP_FOR:
10239 expand_omp_for (region, inner_stmt);
10240 break;
10242 case GIMPLE_OMP_SECTIONS:
10243 expand_omp_sections (region);
10244 break;
10246 case GIMPLE_OMP_SECTION:
10247 /* Individual omp sections are handled together with their
10248 parent GIMPLE_OMP_SECTIONS region. */
10249 break;
10251 case GIMPLE_OMP_SINGLE:
10252 case GIMPLE_OMP_SCOPE:
10253 expand_omp_single (region);
10254 break;
10256 case GIMPLE_OMP_ORDERED:
10258 gomp_ordered *ord_stmt
10259 = as_a <gomp_ordered *> (last_stmt (region->entry));
10260 if (omp_find_clause (gimple_omp_ordered_clauses (ord_stmt),
10261 OMP_CLAUSE_DEPEND))
10263 /* We'll expand these when expanding corresponding
10264 worksharing region with ordered(n) clause. */
10265 gcc_assert (region->outer
10266 && region->outer->type == GIMPLE_OMP_FOR);
10267 region->ord_stmt = ord_stmt;
10268 break;
10271 /* FALLTHRU */
10272 case GIMPLE_OMP_MASTER:
10273 case GIMPLE_OMP_MASKED:
10274 case GIMPLE_OMP_TASKGROUP:
10275 case GIMPLE_OMP_CRITICAL:
10276 case GIMPLE_OMP_TEAMS:
10277 expand_omp_synch (region);
10278 break;
10280 case GIMPLE_OMP_ATOMIC_LOAD:
10281 expand_omp_atomic (region);
10282 break;
10284 case GIMPLE_OMP_TARGET:
10285 expand_omp_target (region);
10286 break;
10288 default:
10289 gcc_unreachable ();
10292 input_location = saved_location;
10293 region = region->next;
10295 if (omp_any_child_fn_dumped)
10297 if (dump_file)
10298 dump_function_header (dump_file, current_function_decl, dump_flags);
10299 omp_any_child_fn_dumped = false;
10303 /* Helper for build_omp_regions. Scan the dominator tree starting at
10304 block BB. PARENT is the region that contains BB. If SINGLE_TREE is
10305 true, the function ends once a single tree is built (otherwise, whole
10306 forest of OMP constructs may be built). */
10308 static void
10309 build_omp_regions_1 (basic_block bb, struct omp_region *parent,
10310 bool single_tree)
10312 gimple_stmt_iterator gsi;
10313 gimple *stmt;
10314 basic_block son;
10316 gsi = gsi_last_nondebug_bb (bb);
10317 if (!gsi_end_p (gsi) && is_gimple_omp (gsi_stmt (gsi)))
10319 struct omp_region *region;
10320 enum gimple_code code;
10322 stmt = gsi_stmt (gsi);
10323 code = gimple_code (stmt);
10324 if (code == GIMPLE_OMP_RETURN)
10326 /* STMT is the return point out of region PARENT. Mark it
10327 as the exit point and make PARENT the immediately
10328 enclosing region. */
10329 gcc_assert (parent);
10330 region = parent;
10331 region->exit = bb;
10332 parent = parent->outer;
10334 else if (code == GIMPLE_OMP_ATOMIC_STORE)
10336 /* GIMPLE_OMP_ATOMIC_STORE is analogous to
10337 GIMPLE_OMP_RETURN, but matches with
10338 GIMPLE_OMP_ATOMIC_LOAD. */
10339 gcc_assert (parent);
10340 gcc_assert (parent->type == GIMPLE_OMP_ATOMIC_LOAD);
10341 region = parent;
10342 region->exit = bb;
10343 parent = parent->outer;
10345 else if (code == GIMPLE_OMP_CONTINUE)
10347 gcc_assert (parent);
10348 parent->cont = bb;
10350 else if (code == GIMPLE_OMP_SECTIONS_SWITCH)
10352 /* GIMPLE_OMP_SECTIONS_SWITCH is part of
10353 GIMPLE_OMP_SECTIONS, and we do nothing for it. */
10355 else
10357 region = new_omp_region (bb, code, parent);
10358 /* Otherwise... */
10359 if (code == GIMPLE_OMP_TARGET)
10361 switch (gimple_omp_target_kind (stmt))
10363 case GF_OMP_TARGET_KIND_REGION:
10364 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
10365 case GF_OMP_TARGET_KIND_OACC_KERNELS:
10366 case GF_OMP_TARGET_KIND_OACC_SERIAL:
10367 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
10368 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
10369 break;
10370 case GF_OMP_TARGET_KIND_UPDATE:
10371 case GF_OMP_TARGET_KIND_ENTER_DATA:
10372 case GF_OMP_TARGET_KIND_EXIT_DATA:
10373 case GF_OMP_TARGET_KIND_DATA:
10374 case GF_OMP_TARGET_KIND_OACC_DATA:
10375 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
10376 case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
10377 case GF_OMP_TARGET_KIND_OACC_UPDATE:
10378 case GF_OMP_TARGET_KIND_OACC_ENTER_DATA:
10379 case GF_OMP_TARGET_KIND_OACC_EXIT_DATA:
10380 case GF_OMP_TARGET_KIND_OACC_DECLARE:
10381 /* ..., other than for those stand-alone directives... */
10382 region = NULL;
10383 break;
10384 default:
10385 gcc_unreachable ();
10388 else if (code == GIMPLE_OMP_ORDERED
10389 && omp_find_clause (gimple_omp_ordered_clauses
10390 (as_a <gomp_ordered *> (stmt)),
10391 OMP_CLAUSE_DEPEND))
10392 /* #pragma omp ordered depend is also just a stand-alone
10393 directive. */
10394 region = NULL;
10395 else if (code == GIMPLE_OMP_TASK
10396 && gimple_omp_task_taskwait_p (stmt))
10397 /* #pragma omp taskwait depend(...) is a stand-alone directive. */
10398 region = NULL;
10399 /* ..., this directive becomes the parent for a new region. */
10400 if (region)
10401 parent = region;
10405 if (single_tree && !parent)
10406 return;
10408 for (son = first_dom_son (CDI_DOMINATORS, bb);
10409 son;
10410 son = next_dom_son (CDI_DOMINATORS, son))
10411 build_omp_regions_1 (son, parent, single_tree);
10414 /* Builds the tree of OMP regions rooted at ROOT, storing it to
10415 root_omp_region. */
10417 static void
10418 build_omp_regions_root (basic_block root)
10420 gcc_assert (root_omp_region == NULL);
10421 build_omp_regions_1 (root, NULL, true);
10422 gcc_assert (root_omp_region != NULL);
10425 /* Expands omp construct (and its subconstructs) starting in HEAD. */
10427 void
10428 omp_expand_local (basic_block head)
10430 build_omp_regions_root (head);
10431 if (dump_file && (dump_flags & TDF_DETAILS))
10433 fprintf (dump_file, "\nOMP region tree\n\n");
10434 dump_omp_region (dump_file, root_omp_region, 0);
10435 fprintf (dump_file, "\n");
10438 remove_exit_barriers (root_omp_region);
10439 expand_omp (root_omp_region);
10441 omp_free_regions ();
10444 /* Scan the CFG and build a tree of OMP regions. Return the root of
10445 the OMP region tree. */
10447 static void
10448 build_omp_regions (void)
10450 gcc_assert (root_omp_region == NULL);
10451 calculate_dominance_info (CDI_DOMINATORS);
10452 build_omp_regions_1 (ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, false);
10455 /* Main entry point for expanding OMP-GIMPLE into runtime calls. */
10457 static unsigned int
10458 execute_expand_omp (void)
10460 build_omp_regions ();
10462 if (!root_omp_region)
10463 return 0;
10465 if (dump_file)
10467 fprintf (dump_file, "\nOMP region tree\n\n");
10468 dump_omp_region (dump_file, root_omp_region, 0);
10469 fprintf (dump_file, "\n");
10472 remove_exit_barriers (root_omp_region);
10474 expand_omp (root_omp_region);
10476 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
10477 verify_loop_structure ();
10478 cleanup_tree_cfg ();
10480 omp_free_regions ();
10482 return 0;
10485 /* OMP expansion -- the default pass, run before creation of SSA form. */
10487 namespace {
10489 const pass_data pass_data_expand_omp =
10491 GIMPLE_PASS, /* type */
10492 "ompexp", /* name */
10493 OPTGROUP_OMP, /* optinfo_flags */
10494 TV_NONE, /* tv_id */
10495 PROP_gimple_any, /* properties_required */
10496 PROP_gimple_eomp, /* properties_provided */
10497 0, /* properties_destroyed */
10498 0, /* todo_flags_start */
10499 0, /* todo_flags_finish */
10502 class pass_expand_omp : public gimple_opt_pass
10504 public:
10505 pass_expand_omp (gcc::context *ctxt)
10506 : gimple_opt_pass (pass_data_expand_omp, ctxt)
10509 /* opt_pass methods: */
10510 virtual unsigned int execute (function *)
10512 bool gate = ((flag_openacc != 0 || flag_openmp != 0
10513 || flag_openmp_simd != 0)
10514 && !seen_error ());
10516 /* This pass always runs, to provide PROP_gimple_eomp.
10517 But often, there is nothing to do. */
10518 if (!gate)
10519 return 0;
10521 return execute_expand_omp ();
10524 }; // class pass_expand_omp
10526 } // anon namespace
10528 gimple_opt_pass *
10529 make_pass_expand_omp (gcc::context *ctxt)
10531 return new pass_expand_omp (ctxt);
10534 namespace {
10536 const pass_data pass_data_expand_omp_ssa =
10538 GIMPLE_PASS, /* type */
10539 "ompexpssa", /* name */
10540 OPTGROUP_OMP, /* optinfo_flags */
10541 TV_NONE, /* tv_id */
10542 PROP_cfg | PROP_ssa, /* properties_required */
10543 PROP_gimple_eomp, /* properties_provided */
10544 0, /* properties_destroyed */
10545 0, /* todo_flags_start */
10546 TODO_cleanup_cfg | TODO_rebuild_alias, /* todo_flags_finish */
10549 class pass_expand_omp_ssa : public gimple_opt_pass
10551 public:
10552 pass_expand_omp_ssa (gcc::context *ctxt)
10553 : gimple_opt_pass (pass_data_expand_omp_ssa, ctxt)
10556 /* opt_pass methods: */
10557 virtual bool gate (function *fun)
10559 return !(fun->curr_properties & PROP_gimple_eomp);
10561 virtual unsigned int execute (function *) { return execute_expand_omp (); }
10562 opt_pass * clone () { return new pass_expand_omp_ssa (m_ctxt); }
10564 }; // class pass_expand_omp_ssa
10566 } // anon namespace
10568 gimple_opt_pass *
10569 make_pass_expand_omp_ssa (gcc::context *ctxt)
10571 return new pass_expand_omp_ssa (ctxt);
10574 /* Called from tree-cfg.c::make_edges to create cfg edges for all relevant
10575 GIMPLE_* codes. */
10577 bool
10578 omp_make_gimple_edges (basic_block bb, struct omp_region **region,
10579 int *region_idx)
10581 gimple *last = last_stmt (bb);
10582 enum gimple_code code = gimple_code (last);
10583 struct omp_region *cur_region = *region;
10584 bool fallthru = false;
10586 switch (code)
10588 case GIMPLE_OMP_PARALLEL:
10589 case GIMPLE_OMP_FOR:
10590 case GIMPLE_OMP_SINGLE:
10591 case GIMPLE_OMP_TEAMS:
10592 case GIMPLE_OMP_MASTER:
10593 case GIMPLE_OMP_MASKED:
10594 case GIMPLE_OMP_SCOPE:
10595 case GIMPLE_OMP_TASKGROUP:
10596 case GIMPLE_OMP_CRITICAL:
10597 case GIMPLE_OMP_SECTION:
10598 cur_region = new_omp_region (bb, code, cur_region);
10599 fallthru = true;
10600 break;
10602 case GIMPLE_OMP_TASK:
10603 cur_region = new_omp_region (bb, code, cur_region);
10604 fallthru = true;
10605 if (gimple_omp_task_taskwait_p (last))
10606 cur_region = cur_region->outer;
10607 break;
10609 case GIMPLE_OMP_ORDERED:
10610 cur_region = new_omp_region (bb, code, cur_region);
10611 fallthru = true;
10612 if (omp_find_clause (gimple_omp_ordered_clauses
10613 (as_a <gomp_ordered *> (last)),
10614 OMP_CLAUSE_DEPEND))
10615 cur_region = cur_region->outer;
10616 break;
10618 case GIMPLE_OMP_TARGET:
10619 cur_region = new_omp_region (bb, code, cur_region);
10620 fallthru = true;
10621 switch (gimple_omp_target_kind (last))
10623 case GF_OMP_TARGET_KIND_REGION:
10624 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
10625 case GF_OMP_TARGET_KIND_OACC_KERNELS:
10626 case GF_OMP_TARGET_KIND_OACC_SERIAL:
10627 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
10628 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
10629 break;
10630 case GF_OMP_TARGET_KIND_UPDATE:
10631 case GF_OMP_TARGET_KIND_ENTER_DATA:
10632 case GF_OMP_TARGET_KIND_EXIT_DATA:
10633 case GF_OMP_TARGET_KIND_DATA:
10634 case GF_OMP_TARGET_KIND_OACC_DATA:
10635 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
10636 case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
10637 case GF_OMP_TARGET_KIND_OACC_UPDATE:
10638 case GF_OMP_TARGET_KIND_OACC_ENTER_DATA:
10639 case GF_OMP_TARGET_KIND_OACC_EXIT_DATA:
10640 case GF_OMP_TARGET_KIND_OACC_DECLARE:
10641 cur_region = cur_region->outer;
10642 break;
10643 default:
10644 gcc_unreachable ();
10646 break;
10648 case GIMPLE_OMP_SECTIONS:
10649 cur_region = new_omp_region (bb, code, cur_region);
10650 fallthru = true;
10651 break;
10653 case GIMPLE_OMP_SECTIONS_SWITCH:
10654 fallthru = false;
10655 break;
10657 case GIMPLE_OMP_ATOMIC_LOAD:
10658 case GIMPLE_OMP_ATOMIC_STORE:
10659 fallthru = true;
10660 break;
10662 case GIMPLE_OMP_RETURN:
10663 /* In the case of a GIMPLE_OMP_SECTION, the edge will go
10664 somewhere other than the next block. This will be
10665 created later. */
10666 cur_region->exit = bb;
10667 if (cur_region->type == GIMPLE_OMP_TASK)
10668 /* Add an edge corresponding to not scheduling the task
10669 immediately. */
10670 make_edge (cur_region->entry, bb, EDGE_ABNORMAL);
10671 fallthru = cur_region->type != GIMPLE_OMP_SECTION;
10672 cur_region = cur_region->outer;
10673 break;
10675 case GIMPLE_OMP_CONTINUE:
10676 cur_region->cont = bb;
10677 switch (cur_region->type)
10679 case GIMPLE_OMP_FOR:
10680 /* Mark all GIMPLE_OMP_FOR and GIMPLE_OMP_CONTINUE
10681 succs edges as abnormal to prevent splitting
10682 them. */
10683 single_succ_edge (cur_region->entry)->flags |= EDGE_ABNORMAL;
10684 /* Make the loopback edge. */
10685 make_edge (bb, single_succ (cur_region->entry),
10686 EDGE_ABNORMAL);
10688 /* Create an edge from GIMPLE_OMP_FOR to exit, which
10689 corresponds to the case that the body of the loop
10690 is not executed at all. */
10691 make_edge (cur_region->entry, bb->next_bb, EDGE_ABNORMAL);
10692 make_edge (bb, bb->next_bb, EDGE_FALLTHRU | EDGE_ABNORMAL);
10693 fallthru = false;
10694 break;
10696 case GIMPLE_OMP_SECTIONS:
10697 /* Wire up the edges into and out of the nested sections. */
10699 basic_block switch_bb = single_succ (cur_region->entry);
10701 struct omp_region *i;
10702 for (i = cur_region->inner; i ; i = i->next)
10704 gcc_assert (i->type == GIMPLE_OMP_SECTION);
10705 make_edge (switch_bb, i->entry, 0);
10706 make_edge (i->exit, bb, EDGE_FALLTHRU);
10709 /* Make the loopback edge to the block with
10710 GIMPLE_OMP_SECTIONS_SWITCH. */
10711 make_edge (bb, switch_bb, 0);
10713 /* Make the edge from the switch to exit. */
10714 make_edge (switch_bb, bb->next_bb, 0);
10715 fallthru = false;
10717 break;
10719 case GIMPLE_OMP_TASK:
10720 fallthru = true;
10721 break;
10723 default:
10724 gcc_unreachable ();
10726 break;
10728 default:
10729 gcc_unreachable ();
10732 if (*region != cur_region)
10734 *region = cur_region;
10735 if (cur_region)
10736 *region_idx = cur_region->entry->index;
10737 else
10738 *region_idx = 0;
10741 return fallthru;