testsuite: -mbig/-mlittle only is valid for powerpc-linux.
[official-gcc.git] / gcc / omp-expand.cc
blobee7083147933d47f0b6711791f19205886bf26ee
1 /* Expansion pass for OMP directives. Outlines regions of certain OMP
2 directives to separate functions, converts others into explicit calls to the
3 runtime library (libgomp) and so forth
5 Copyright (C) 2005-2022 Free Software Foundation, Inc.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "memmodel.h"
27 #include "backend.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "gimple.h"
32 #include "cfghooks.h"
33 #include "tree-pass.h"
34 #include "ssa.h"
35 #include "optabs.h"
36 #include "cgraph.h"
37 #include "pretty-print.h"
38 #include "diagnostic-core.h"
39 #include "fold-const.h"
40 #include "stor-layout.h"
41 #include "cfganal.h"
42 #include "internal-fn.h"
43 #include "gimplify.h"
44 #include "gimple-iterator.h"
45 #include "gimplify-me.h"
46 #include "gimple-walk.h"
47 #include "tree-cfg.h"
48 #include "tree-into-ssa.h"
49 #include "tree-ssa.h"
50 #include "splay-tree.h"
51 #include "cfgloop.h"
52 #include "omp-general.h"
53 #include "omp-offload.h"
54 #include "tree-cfgcleanup.h"
55 #include "alloc-pool.h"
56 #include "symbol-summary.h"
57 #include "gomp-constants.h"
58 #include "gimple-pretty-print.h"
59 #include "stringpool.h"
60 #include "attribs.h"
61 #include "tree-eh.h"
62 #include "opts.h"
64 /* OMP region information. Every parallel and workshare
65 directive is enclosed between two markers, the OMP_* directive
66 and a corresponding GIMPLE_OMP_RETURN statement. */
68 struct omp_region
70 /* The enclosing region. */
71 struct omp_region *outer;
73 /* First child region. */
74 struct omp_region *inner;
76 /* Next peer region. */
77 struct omp_region *next;
79 /* Block containing the omp directive as its last stmt. */
80 basic_block entry;
82 /* Block containing the GIMPLE_OMP_RETURN as its last stmt. */
83 basic_block exit;
85 /* Block containing the GIMPLE_OMP_CONTINUE as its last stmt. */
86 basic_block cont;
88 /* If this is a combined parallel+workshare region, this is a list
89 of additional arguments needed by the combined parallel+workshare
90 library call. */
91 vec<tree, va_gc> *ws_args;
93 /* The code for the omp directive of this region. */
94 enum gimple_code type;
96 /* Schedule kind, only used for GIMPLE_OMP_FOR type regions. */
97 enum omp_clause_schedule_kind sched_kind;
99 /* Schedule modifiers. */
100 unsigned char sched_modifiers;
102 /* True if this is a combined parallel+workshare region. */
103 bool is_combined_parallel;
105 /* Copy of fd.lastprivate_conditional != 0. */
106 bool has_lastprivate_conditional;
108 /* The ordered stmt if type is GIMPLE_OMP_ORDERED and it has
109 a depend clause. */
110 gomp_ordered *ord_stmt;
113 static struct omp_region *root_omp_region;
114 static bool omp_any_child_fn_dumped;
116 static void expand_omp_build_assign (gimple_stmt_iterator *, tree, tree,
117 bool = false);
118 static gphi *find_phi_with_arg_on_edge (tree, edge);
119 static void expand_omp (struct omp_region *region);
121 /* Return true if REGION is a combined parallel+workshare region. */
123 static inline bool
124 is_combined_parallel (struct omp_region *region)
126 return region->is_combined_parallel;
129 /* Given two blocks PAR_ENTRY_BB and WS_ENTRY_BB such that WS_ENTRY_BB
130 is the immediate dominator of PAR_ENTRY_BB, return true if there
131 are no data dependencies that would prevent expanding the parallel
132 directive at PAR_ENTRY_BB as a combined parallel+workshare region.
134 When expanding a combined parallel+workshare region, the call to
135 the child function may need additional arguments in the case of
136 GIMPLE_OMP_FOR regions. In some cases, these arguments are
137 computed out of variables passed in from the parent to the child
138 via 'struct .omp_data_s'. For instance:
140 #pragma omp parallel for schedule (guided, i * 4)
141 for (j ...)
143 Is lowered into:
145 # BLOCK 2 (PAR_ENTRY_BB)
146 .omp_data_o.i = i;
147 #pragma omp parallel [child fn: bar.omp_fn.0 ( ..., D.1598)
149 # BLOCK 3 (WS_ENTRY_BB)
150 .omp_data_i = &.omp_data_o;
151 D.1667 = .omp_data_i->i;
152 D.1598 = D.1667 * 4;
153 #pragma omp for schedule (guided, D.1598)
155 When we outline the parallel region, the call to the child function
156 'bar.omp_fn.0' will need the value D.1598 in its argument list, but
157 that value is computed *after* the call site. So, in principle we
158 cannot do the transformation.
160 To see whether the code in WS_ENTRY_BB blocks the combined
161 parallel+workshare call, we collect all the variables used in the
162 GIMPLE_OMP_FOR header check whether they appear on the LHS of any
163 statement in WS_ENTRY_BB. If so, then we cannot emit the combined
164 call.
166 FIXME. If we had the SSA form built at this point, we could merely
167 hoist the code in block 3 into block 2 and be done with it. But at
168 this point we don't have dataflow information and though we could
169 hack something up here, it is really not worth the aggravation. */
171 static bool
172 workshare_safe_to_combine_p (basic_block ws_entry_bb)
174 struct omp_for_data fd;
175 gimple *ws_stmt = last_stmt (ws_entry_bb);
177 if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
178 return true;
180 gcc_assert (gimple_code (ws_stmt) == GIMPLE_OMP_FOR);
181 if (gimple_omp_for_kind (ws_stmt) != GF_OMP_FOR_KIND_FOR)
182 return false;
184 omp_extract_for_data (as_a <gomp_for *> (ws_stmt), &fd, NULL);
186 if (fd.collapse > 1 && TREE_CODE (fd.loop.n2) != INTEGER_CST)
187 return false;
188 if (fd.iter_type != long_integer_type_node)
189 return false;
191 /* FIXME. We give up too easily here. If any of these arguments
192 are not constants, they will likely involve variables that have
193 been mapped into fields of .omp_data_s for sharing with the child
194 function. With appropriate data flow, it would be possible to
195 see through this. */
196 if (!is_gimple_min_invariant (fd.loop.n1)
197 || !is_gimple_min_invariant (fd.loop.n2)
198 || !is_gimple_min_invariant (fd.loop.step)
199 || (fd.chunk_size && !is_gimple_min_invariant (fd.chunk_size)))
200 return false;
202 return true;
205 /* Adjust CHUNK_SIZE from SCHEDULE clause, depending on simd modifier
206 presence (SIMD_SCHEDULE). */
208 static tree
209 omp_adjust_chunk_size (tree chunk_size, bool simd_schedule)
211 if (!simd_schedule || integer_zerop (chunk_size))
212 return chunk_size;
214 poly_uint64 vf = omp_max_vf ();
215 if (known_eq (vf, 1U))
216 return chunk_size;
218 tree type = TREE_TYPE (chunk_size);
219 chunk_size = fold_build2 (PLUS_EXPR, type, chunk_size,
220 build_int_cst (type, vf - 1));
221 return fold_build2 (BIT_AND_EXPR, type, chunk_size,
222 build_int_cst (type, -vf));
225 /* Collect additional arguments needed to emit a combined
226 parallel+workshare call. WS_STMT is the workshare directive being
227 expanded. */
229 static vec<tree, va_gc> *
230 get_ws_args_for (gimple *par_stmt, gimple *ws_stmt)
232 tree t;
233 location_t loc = gimple_location (ws_stmt);
234 vec<tree, va_gc> *ws_args;
236 if (gomp_for *for_stmt = dyn_cast <gomp_for *> (ws_stmt))
238 struct omp_for_data fd;
239 tree n1, n2;
241 omp_extract_for_data (for_stmt, &fd, NULL);
242 n1 = fd.loop.n1;
243 n2 = fd.loop.n2;
245 if (gimple_omp_for_combined_into_p (for_stmt))
247 tree innerc
248 = omp_find_clause (gimple_omp_parallel_clauses (par_stmt),
249 OMP_CLAUSE__LOOPTEMP_);
250 gcc_assert (innerc);
251 n1 = OMP_CLAUSE_DECL (innerc);
252 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
253 OMP_CLAUSE__LOOPTEMP_);
254 gcc_assert (innerc);
255 n2 = OMP_CLAUSE_DECL (innerc);
258 vec_alloc (ws_args, 3 + (fd.chunk_size != 0));
260 t = fold_convert_loc (loc, long_integer_type_node, n1);
261 ws_args->quick_push (t);
263 t = fold_convert_loc (loc, long_integer_type_node, n2);
264 ws_args->quick_push (t);
266 t = fold_convert_loc (loc, long_integer_type_node, fd.loop.step);
267 ws_args->quick_push (t);
269 if (fd.chunk_size)
271 t = fold_convert_loc (loc, long_integer_type_node, fd.chunk_size);
272 t = omp_adjust_chunk_size (t, fd.simd_schedule);
273 ws_args->quick_push (t);
276 return ws_args;
278 else if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
280 /* Number of sections is equal to the number of edges from the
281 GIMPLE_OMP_SECTIONS_SWITCH statement, except for the one to
282 the exit of the sections region. */
283 basic_block bb = single_succ (gimple_bb (ws_stmt));
284 t = build_int_cst (unsigned_type_node, EDGE_COUNT (bb->succs) - 1);
285 vec_alloc (ws_args, 1);
286 ws_args->quick_push (t);
287 return ws_args;
290 gcc_unreachable ();
293 /* Discover whether REGION is a combined parallel+workshare region. */
295 static void
296 determine_parallel_type (struct omp_region *region)
298 basic_block par_entry_bb, par_exit_bb;
299 basic_block ws_entry_bb, ws_exit_bb;
301 if (region == NULL || region->inner == NULL
302 || region->exit == NULL || region->inner->exit == NULL
303 || region->inner->cont == NULL)
304 return;
306 /* We only support parallel+for and parallel+sections. */
307 if (region->type != GIMPLE_OMP_PARALLEL
308 || (region->inner->type != GIMPLE_OMP_FOR
309 && region->inner->type != GIMPLE_OMP_SECTIONS))
310 return;
312 /* Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and
313 WS_EXIT_BB -> PAR_EXIT_BB. */
314 par_entry_bb = region->entry;
315 par_exit_bb = region->exit;
316 ws_entry_bb = region->inner->entry;
317 ws_exit_bb = region->inner->exit;
319 /* Give up for task reductions on the parallel, while it is implementable,
320 adding another big set of APIs or slowing down the normal paths is
321 not acceptable. */
322 tree pclauses = gimple_omp_parallel_clauses (last_stmt (par_entry_bb));
323 if (omp_find_clause (pclauses, OMP_CLAUSE__REDUCTEMP_))
324 return;
326 if (single_succ (par_entry_bb) == ws_entry_bb
327 && single_succ (ws_exit_bb) == par_exit_bb
328 && workshare_safe_to_combine_p (ws_entry_bb)
329 && (gimple_omp_parallel_combined_p (last_stmt (par_entry_bb))
330 || (last_and_only_stmt (ws_entry_bb)
331 && last_and_only_stmt (par_exit_bb))))
333 gimple *par_stmt = last_stmt (par_entry_bb);
334 gimple *ws_stmt = last_stmt (ws_entry_bb);
336 if (region->inner->type == GIMPLE_OMP_FOR)
338 /* If this is a combined parallel loop, we need to determine
339 whether or not to use the combined library calls. There
340 are two cases where we do not apply the transformation:
341 static loops and any kind of ordered loop. In the first
342 case, we already open code the loop so there is no need
343 to do anything else. In the latter case, the combined
344 parallel loop call would still need extra synchronization
345 to implement ordered semantics, so there would not be any
346 gain in using the combined call. */
347 tree clauses = gimple_omp_for_clauses (ws_stmt);
348 tree c = omp_find_clause (clauses, OMP_CLAUSE_SCHEDULE);
349 if (c == NULL
350 || ((OMP_CLAUSE_SCHEDULE_KIND (c) & OMP_CLAUSE_SCHEDULE_MASK)
351 == OMP_CLAUSE_SCHEDULE_STATIC)
352 || omp_find_clause (clauses, OMP_CLAUSE_ORDERED)
353 || omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_)
354 || ((c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_))
355 && POINTER_TYPE_P (TREE_TYPE (OMP_CLAUSE_DECL (c)))))
356 return;
358 else if (region->inner->type == GIMPLE_OMP_SECTIONS
359 && (omp_find_clause (gimple_omp_sections_clauses (ws_stmt),
360 OMP_CLAUSE__REDUCTEMP_)
361 || omp_find_clause (gimple_omp_sections_clauses (ws_stmt),
362 OMP_CLAUSE__CONDTEMP_)))
363 return;
365 region->is_combined_parallel = true;
366 region->inner->is_combined_parallel = true;
367 region->ws_args = get_ws_args_for (par_stmt, ws_stmt);
371 /* Debugging dumps for parallel regions. */
372 void dump_omp_region (FILE *, struct omp_region *, int);
373 void debug_omp_region (struct omp_region *);
374 void debug_all_omp_regions (void);
376 /* Dump the parallel region tree rooted at REGION. */
378 void
379 dump_omp_region (FILE *file, struct omp_region *region, int indent)
381 fprintf (file, "%*sbb %d: %s\n", indent, "", region->entry->index,
382 gimple_code_name[region->type]);
384 if (region->inner)
385 dump_omp_region (file, region->inner, indent + 4);
387 if (region->cont)
389 fprintf (file, "%*sbb %d: GIMPLE_OMP_CONTINUE\n", indent, "",
390 region->cont->index);
393 if (region->exit)
394 fprintf (file, "%*sbb %d: GIMPLE_OMP_RETURN\n", indent, "",
395 region->exit->index);
396 else
397 fprintf (file, "%*s[no exit marker]\n", indent, "");
399 if (region->next)
400 dump_omp_region (file, region->next, indent);
403 DEBUG_FUNCTION void
404 debug_omp_region (struct omp_region *region)
406 dump_omp_region (stderr, region, 0);
409 DEBUG_FUNCTION void
410 debug_all_omp_regions (void)
412 dump_omp_region (stderr, root_omp_region, 0);
415 /* Create a new parallel region starting at STMT inside region PARENT. */
417 static struct omp_region *
418 new_omp_region (basic_block bb, enum gimple_code type,
419 struct omp_region *parent)
421 struct omp_region *region = XCNEW (struct omp_region);
423 region->outer = parent;
424 region->entry = bb;
425 region->type = type;
427 if (parent)
429 /* This is a nested region. Add it to the list of inner
430 regions in PARENT. */
431 region->next = parent->inner;
432 parent->inner = region;
434 else
436 /* This is a toplevel region. Add it to the list of toplevel
437 regions in ROOT_OMP_REGION. */
438 region->next = root_omp_region;
439 root_omp_region = region;
442 return region;
445 /* Release the memory associated with the region tree rooted at REGION. */
447 static void
448 free_omp_region_1 (struct omp_region *region)
450 struct omp_region *i, *n;
452 for (i = region->inner; i ; i = n)
454 n = i->next;
455 free_omp_region_1 (i);
458 free (region);
461 /* Release the memory for the entire omp region tree. */
463 void
464 omp_free_regions (void)
466 struct omp_region *r, *n;
467 for (r = root_omp_region; r ; r = n)
469 n = r->next;
470 free_omp_region_1 (r);
472 root_omp_region = NULL;
475 /* A convenience function to build an empty GIMPLE_COND with just the
476 condition. */
478 static gcond *
479 gimple_build_cond_empty (tree cond)
481 enum tree_code pred_code;
482 tree lhs, rhs;
484 gimple_cond_get_ops_from_tree (cond, &pred_code, &lhs, &rhs);
485 return gimple_build_cond (pred_code, lhs, rhs, NULL_TREE, NULL_TREE);
488 /* Change DECL_CONTEXT of CHILD_FNDECL to that of the parent function.
489 Add CHILD_FNDECL to decl chain of the supercontext of the block
490 ENTRY_BLOCK - this is the block which originally contained the
491 code from which CHILD_FNDECL was created.
493 Together, these actions ensure that the debug info for the outlined
494 function will be emitted with the correct lexical scope. */
496 static void
497 adjust_context_and_scope (struct omp_region *region, tree entry_block,
498 tree child_fndecl)
500 tree parent_fndecl = NULL_TREE;
501 gimple *entry_stmt;
502 /* OMP expansion expands inner regions before outer ones, so if
503 we e.g. have explicit task region nested in parallel region, when
504 expanding the task region current_function_decl will be the original
505 source function, but we actually want to use as context the child
506 function of the parallel. */
507 for (region = region->outer;
508 region && parent_fndecl == NULL_TREE; region = region->outer)
509 switch (region->type)
511 case GIMPLE_OMP_PARALLEL:
512 case GIMPLE_OMP_TASK:
513 case GIMPLE_OMP_TEAMS:
514 entry_stmt = last_stmt (region->entry);
515 parent_fndecl = gimple_omp_taskreg_child_fn (entry_stmt);
516 break;
517 case GIMPLE_OMP_TARGET:
518 entry_stmt = last_stmt (region->entry);
519 parent_fndecl
520 = gimple_omp_target_child_fn (as_a <gomp_target *> (entry_stmt));
521 break;
522 default:
523 break;
526 if (parent_fndecl == NULL_TREE)
527 parent_fndecl = current_function_decl;
528 DECL_CONTEXT (child_fndecl) = parent_fndecl;
530 if (entry_block != NULL_TREE && TREE_CODE (entry_block) == BLOCK)
532 tree b = BLOCK_SUPERCONTEXT (entry_block);
533 if (TREE_CODE (b) == BLOCK)
535 DECL_CHAIN (child_fndecl) = BLOCK_VARS (b);
536 BLOCK_VARS (b) = child_fndecl;
541 /* Build the function calls to GOMP_parallel etc to actually
542 generate the parallel operation. REGION is the parallel region
543 being expanded. BB is the block where to insert the code. WS_ARGS
544 will be set if this is a call to a combined parallel+workshare
545 construct, it contains the list of additional arguments needed by
546 the workshare construct. */
548 static void
549 expand_parallel_call (struct omp_region *region, basic_block bb,
550 gomp_parallel *entry_stmt,
551 vec<tree, va_gc> *ws_args)
553 tree t, t1, t2, val, cond, c, clauses, flags;
554 gimple_stmt_iterator gsi;
555 gimple *stmt;
556 enum built_in_function start_ix;
557 int start_ix2;
558 location_t clause_loc;
559 vec<tree, va_gc> *args;
561 clauses = gimple_omp_parallel_clauses (entry_stmt);
563 /* Determine what flavor of GOMP_parallel we will be
564 emitting. */
565 start_ix = BUILT_IN_GOMP_PARALLEL;
566 tree rtmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
567 if (rtmp)
568 start_ix = BUILT_IN_GOMP_PARALLEL_REDUCTIONS;
569 else if (is_combined_parallel (region))
571 switch (region->inner->type)
573 case GIMPLE_OMP_FOR:
574 gcc_assert (region->inner->sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
575 switch (region->inner->sched_kind)
577 case OMP_CLAUSE_SCHEDULE_RUNTIME:
578 /* For lastprivate(conditional:), our implementation
579 requires monotonic behavior. */
580 if (region->inner->has_lastprivate_conditional != 0)
581 start_ix2 = 3;
582 else if ((region->inner->sched_modifiers
583 & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0)
584 start_ix2 = 6;
585 else if ((region->inner->sched_modifiers
586 & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0)
587 start_ix2 = 7;
588 else
589 start_ix2 = 3;
590 break;
591 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
592 case OMP_CLAUSE_SCHEDULE_GUIDED:
593 if ((region->inner->sched_modifiers
594 & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
595 && !region->inner->has_lastprivate_conditional)
597 start_ix2 = 3 + region->inner->sched_kind;
598 break;
600 /* FALLTHRU */
601 default:
602 start_ix2 = region->inner->sched_kind;
603 break;
605 start_ix2 += (int) BUILT_IN_GOMP_PARALLEL_LOOP_STATIC;
606 start_ix = (enum built_in_function) start_ix2;
607 break;
608 case GIMPLE_OMP_SECTIONS:
609 start_ix = BUILT_IN_GOMP_PARALLEL_SECTIONS;
610 break;
611 default:
612 gcc_unreachable ();
616 /* By default, the value of NUM_THREADS is zero (selected at run time)
617 and there is no conditional. */
618 cond = NULL_TREE;
619 val = build_int_cst (unsigned_type_node, 0);
620 flags = build_int_cst (unsigned_type_node, 0);
622 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
623 if (c)
624 cond = OMP_CLAUSE_IF_EXPR (c);
626 c = omp_find_clause (clauses, OMP_CLAUSE_NUM_THREADS);
627 if (c)
629 val = OMP_CLAUSE_NUM_THREADS_EXPR (c);
630 clause_loc = OMP_CLAUSE_LOCATION (c);
632 else
633 clause_loc = gimple_location (entry_stmt);
635 c = omp_find_clause (clauses, OMP_CLAUSE_PROC_BIND);
636 if (c)
637 flags = build_int_cst (unsigned_type_node, OMP_CLAUSE_PROC_BIND_KIND (c));
639 /* Ensure 'val' is of the correct type. */
640 val = fold_convert_loc (clause_loc, unsigned_type_node, val);
642 /* If we found the clause 'if (cond)', build either
643 (cond != 0) or (cond ? val : 1u). */
644 if (cond)
646 cond = gimple_boolify (cond);
648 if (integer_zerop (val))
649 val = fold_build2_loc (clause_loc,
650 EQ_EXPR, unsigned_type_node, cond,
651 build_int_cst (TREE_TYPE (cond), 0));
652 else
654 basic_block cond_bb, then_bb, else_bb;
655 edge e, e_then, e_else;
656 tree tmp_then, tmp_else, tmp_join, tmp_var;
658 tmp_var = create_tmp_var (TREE_TYPE (val));
659 if (gimple_in_ssa_p (cfun))
661 tmp_then = make_ssa_name (tmp_var);
662 tmp_else = make_ssa_name (tmp_var);
663 tmp_join = make_ssa_name (tmp_var);
665 else
667 tmp_then = tmp_var;
668 tmp_else = tmp_var;
669 tmp_join = tmp_var;
672 e = split_block_after_labels (bb);
673 cond_bb = e->src;
674 bb = e->dest;
675 remove_edge (e);
677 then_bb = create_empty_bb (cond_bb);
678 else_bb = create_empty_bb (then_bb);
679 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
680 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
682 stmt = gimple_build_cond_empty (cond);
683 gsi = gsi_start_bb (cond_bb);
684 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
686 gsi = gsi_start_bb (then_bb);
687 expand_omp_build_assign (&gsi, tmp_then, val, true);
689 gsi = gsi_start_bb (else_bb);
690 expand_omp_build_assign (&gsi, tmp_else,
691 build_int_cst (unsigned_type_node, 1),
692 true);
694 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
695 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
696 add_bb_to_loop (then_bb, cond_bb->loop_father);
697 add_bb_to_loop (else_bb, cond_bb->loop_father);
698 e_then = make_edge (then_bb, bb, EDGE_FALLTHRU);
699 e_else = make_edge (else_bb, bb, EDGE_FALLTHRU);
701 if (gimple_in_ssa_p (cfun))
703 gphi *phi = create_phi_node (tmp_join, bb);
704 add_phi_arg (phi, tmp_then, e_then, UNKNOWN_LOCATION);
705 add_phi_arg (phi, tmp_else, e_else, UNKNOWN_LOCATION);
708 val = tmp_join;
711 gsi = gsi_start_bb (bb);
712 val = force_gimple_operand_gsi (&gsi, val, true, NULL_TREE,
713 false, GSI_CONTINUE_LINKING);
716 gsi = gsi_last_nondebug_bb (bb);
717 t = gimple_omp_parallel_data_arg (entry_stmt);
718 if (t == NULL)
719 t1 = null_pointer_node;
720 else
721 t1 = build_fold_addr_expr (t);
722 tree child_fndecl = gimple_omp_parallel_child_fn (entry_stmt);
723 t2 = build_fold_addr_expr (child_fndecl);
725 vec_alloc (args, 4 + vec_safe_length (ws_args));
726 args->quick_push (t2);
727 args->quick_push (t1);
728 args->quick_push (val);
729 if (ws_args)
730 args->splice (*ws_args);
731 args->quick_push (flags);
733 t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
734 builtin_decl_explicit (start_ix), args);
736 if (rtmp)
738 tree type = TREE_TYPE (OMP_CLAUSE_DECL (rtmp));
739 t = build2 (MODIFY_EXPR, type, OMP_CLAUSE_DECL (rtmp),
740 fold_convert (type,
741 fold_convert (pointer_sized_int_node, t)));
743 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
744 false, GSI_CONTINUE_LINKING);
747 /* Build the function call to GOMP_task to actually
748 generate the task operation. BB is the block where to insert the code. */
750 static void
751 expand_task_call (struct omp_region *region, basic_block bb,
752 gomp_task *entry_stmt)
754 tree t1, t2, t3;
755 gimple_stmt_iterator gsi;
756 location_t loc = gimple_location (entry_stmt);
758 tree clauses = gimple_omp_task_clauses (entry_stmt);
760 tree ifc = omp_find_clause (clauses, OMP_CLAUSE_IF);
761 tree untied = omp_find_clause (clauses, OMP_CLAUSE_UNTIED);
762 tree mergeable = omp_find_clause (clauses, OMP_CLAUSE_MERGEABLE);
763 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
764 tree finalc = omp_find_clause (clauses, OMP_CLAUSE_FINAL);
765 tree priority = omp_find_clause (clauses, OMP_CLAUSE_PRIORITY);
766 tree detach = omp_find_clause (clauses, OMP_CLAUSE_DETACH);
768 unsigned int iflags
769 = (untied ? GOMP_TASK_FLAG_UNTIED : 0)
770 | (mergeable ? GOMP_TASK_FLAG_MERGEABLE : 0)
771 | (depend ? GOMP_TASK_FLAG_DEPEND : 0);
773 bool taskloop_p = gimple_omp_task_taskloop_p (entry_stmt);
774 tree startvar = NULL_TREE, endvar = NULL_TREE, step = NULL_TREE;
775 tree num_tasks = NULL_TREE;
776 bool ull = false;
777 if (taskloop_p)
779 gimple *g = last_stmt (region->outer->entry);
780 gcc_assert (gimple_code (g) == GIMPLE_OMP_FOR
781 && gimple_omp_for_kind (g) == GF_OMP_FOR_KIND_TASKLOOP);
782 struct omp_for_data fd;
783 omp_extract_for_data (as_a <gomp_for *> (g), &fd, NULL);
784 startvar = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
785 endvar = omp_find_clause (OMP_CLAUSE_CHAIN (startvar),
786 OMP_CLAUSE__LOOPTEMP_);
787 startvar = OMP_CLAUSE_DECL (startvar);
788 endvar = OMP_CLAUSE_DECL (endvar);
789 step = fold_convert_loc (loc, fd.iter_type, fd.loop.step);
790 if (fd.loop.cond_code == LT_EXPR)
791 iflags |= GOMP_TASK_FLAG_UP;
792 tree tclauses = gimple_omp_for_clauses (g);
793 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_NUM_TASKS);
794 if (num_tasks)
796 if (OMP_CLAUSE_NUM_TASKS_STRICT (num_tasks))
797 iflags |= GOMP_TASK_FLAG_STRICT;
798 num_tasks = OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks);
800 else
802 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_GRAINSIZE);
803 if (num_tasks)
805 iflags |= GOMP_TASK_FLAG_GRAINSIZE;
806 if (OMP_CLAUSE_GRAINSIZE_STRICT (num_tasks))
807 iflags |= GOMP_TASK_FLAG_STRICT;
808 num_tasks = OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks);
810 else
811 num_tasks = integer_zero_node;
813 num_tasks = fold_convert_loc (loc, long_integer_type_node, num_tasks);
814 if (ifc == NULL_TREE)
815 iflags |= GOMP_TASK_FLAG_IF;
816 if (omp_find_clause (tclauses, OMP_CLAUSE_NOGROUP))
817 iflags |= GOMP_TASK_FLAG_NOGROUP;
818 ull = fd.iter_type == long_long_unsigned_type_node;
819 if (omp_find_clause (clauses, OMP_CLAUSE_REDUCTION))
820 iflags |= GOMP_TASK_FLAG_REDUCTION;
822 else
824 if (priority)
825 iflags |= GOMP_TASK_FLAG_PRIORITY;
826 if (detach)
827 iflags |= GOMP_TASK_FLAG_DETACH;
830 tree flags = build_int_cst (unsigned_type_node, iflags);
832 tree cond = boolean_true_node;
833 if (ifc)
835 if (taskloop_p)
837 tree t = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
838 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
839 build_int_cst (unsigned_type_node,
840 GOMP_TASK_FLAG_IF),
841 build_int_cst (unsigned_type_node, 0));
842 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node,
843 flags, t);
845 else
846 cond = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
849 if (finalc)
851 tree t = gimple_boolify (OMP_CLAUSE_FINAL_EXPR (finalc));
852 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
853 build_int_cst (unsigned_type_node,
854 GOMP_TASK_FLAG_FINAL),
855 build_int_cst (unsigned_type_node, 0));
856 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, flags, t);
858 if (depend)
859 depend = OMP_CLAUSE_DECL (depend);
860 else
861 depend = build_int_cst (ptr_type_node, 0);
862 if (priority)
863 priority = fold_convert (integer_type_node,
864 OMP_CLAUSE_PRIORITY_EXPR (priority));
865 else
866 priority = integer_zero_node;
868 gsi = gsi_last_nondebug_bb (bb);
870 detach = (detach
871 ? build_fold_addr_expr (OMP_CLAUSE_DECL (detach))
872 : null_pointer_node);
874 tree t = gimple_omp_task_data_arg (entry_stmt);
875 if (t == NULL)
876 t2 = null_pointer_node;
877 else
878 t2 = build_fold_addr_expr_loc (loc, t);
879 t1 = build_fold_addr_expr_loc (loc, gimple_omp_task_child_fn (entry_stmt));
880 t = gimple_omp_task_copy_fn (entry_stmt);
881 if (t == NULL)
882 t3 = null_pointer_node;
883 else
884 t3 = build_fold_addr_expr_loc (loc, t);
886 if (taskloop_p)
887 t = build_call_expr (ull
888 ? builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP_ULL)
889 : builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP),
890 11, t1, t2, t3,
891 gimple_omp_task_arg_size (entry_stmt),
892 gimple_omp_task_arg_align (entry_stmt), flags,
893 num_tasks, priority, startvar, endvar, step);
894 else
895 t = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASK),
896 10, t1, t2, t3,
897 gimple_omp_task_arg_size (entry_stmt),
898 gimple_omp_task_arg_align (entry_stmt), cond, flags,
899 depend, priority, detach);
901 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
902 false, GSI_CONTINUE_LINKING);
905 /* Build the function call to GOMP_taskwait_depend to actually
906 generate the taskwait operation. BB is the block where to insert the
907 code. */
909 static void
910 expand_taskwait_call (basic_block bb, gomp_task *entry_stmt)
912 tree clauses = gimple_omp_task_clauses (entry_stmt);
913 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
914 if (depend == NULL_TREE)
915 return;
917 depend = OMP_CLAUSE_DECL (depend);
919 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
920 tree t
921 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASKWAIT_DEPEND),
922 1, depend);
924 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
925 false, GSI_CONTINUE_LINKING);
928 /* Build the function call to GOMP_teams_reg to actually
929 generate the host teams operation. REGION is the teams region
930 being expanded. BB is the block where to insert the code. */
932 static void
933 expand_teams_call (basic_block bb, gomp_teams *entry_stmt)
935 tree clauses = gimple_omp_teams_clauses (entry_stmt);
936 tree num_teams = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
937 if (num_teams == NULL_TREE)
938 num_teams = build_int_cst (unsigned_type_node, 0);
939 else
941 num_teams = OMP_CLAUSE_NUM_TEAMS_UPPER_EXPR (num_teams);
942 num_teams = fold_convert (unsigned_type_node, num_teams);
944 tree thread_limit = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
945 if (thread_limit == NULL_TREE)
946 thread_limit = build_int_cst (unsigned_type_node, 0);
947 else
949 thread_limit = OMP_CLAUSE_THREAD_LIMIT_EXPR (thread_limit);
950 thread_limit = fold_convert (unsigned_type_node, thread_limit);
953 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
954 tree t = gimple_omp_teams_data_arg (entry_stmt), t1;
955 if (t == NULL)
956 t1 = null_pointer_node;
957 else
958 t1 = build_fold_addr_expr (t);
959 tree child_fndecl = gimple_omp_teams_child_fn (entry_stmt);
960 tree t2 = build_fold_addr_expr (child_fndecl);
962 vec<tree, va_gc> *args;
963 vec_alloc (args, 5);
964 args->quick_push (t2);
965 args->quick_push (t1);
966 args->quick_push (num_teams);
967 args->quick_push (thread_limit);
968 /* For future extensibility. */
969 args->quick_push (build_zero_cst (unsigned_type_node));
971 t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
972 builtin_decl_explicit (BUILT_IN_GOMP_TEAMS_REG),
973 args);
975 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
976 false, GSI_CONTINUE_LINKING);
979 /* Chain all the DECLs in LIST by their TREE_CHAIN fields. */
981 static tree
982 vec2chain (vec<tree, va_gc> *v)
984 tree chain = NULL_TREE, t;
985 unsigned ix;
987 FOR_EACH_VEC_SAFE_ELT_REVERSE (v, ix, t)
989 DECL_CHAIN (t) = chain;
990 chain = t;
993 return chain;
996 /* Remove barriers in REGION->EXIT's block. Note that this is only
997 valid for GIMPLE_OMP_PARALLEL regions. Since the end of a parallel region
998 is an implicit barrier, any workshare inside the GIMPLE_OMP_PARALLEL that
999 left a barrier at the end of the GIMPLE_OMP_PARALLEL region can now be
1000 removed. */
1002 static void
1003 remove_exit_barrier (struct omp_region *region)
1005 gimple_stmt_iterator gsi;
1006 basic_block exit_bb;
1007 edge_iterator ei;
1008 edge e;
1009 gimple *stmt;
1010 int any_addressable_vars = -1;
1012 exit_bb = region->exit;
1014 /* If the parallel region doesn't return, we don't have REGION->EXIT
1015 block at all. */
1016 if (! exit_bb)
1017 return;
1019 /* The last insn in the block will be the parallel's GIMPLE_OMP_RETURN. The
1020 workshare's GIMPLE_OMP_RETURN will be in a preceding block. The kinds of
1021 statements that can appear in between are extremely limited -- no
1022 memory operations at all. Here, we allow nothing at all, so the
1023 only thing we allow to precede this GIMPLE_OMP_RETURN is a label. */
1024 gsi = gsi_last_nondebug_bb (exit_bb);
1025 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1026 gsi_prev_nondebug (&gsi);
1027 if (!gsi_end_p (gsi) && gimple_code (gsi_stmt (gsi)) != GIMPLE_LABEL)
1028 return;
1030 FOR_EACH_EDGE (e, ei, exit_bb->preds)
1032 gsi = gsi_last_nondebug_bb (e->src);
1033 if (gsi_end_p (gsi))
1034 continue;
1035 stmt = gsi_stmt (gsi);
1036 if (gimple_code (stmt) == GIMPLE_OMP_RETURN
1037 && !gimple_omp_return_nowait_p (stmt))
1039 /* OpenMP 3.0 tasks unfortunately prevent this optimization
1040 in many cases. If there could be tasks queued, the barrier
1041 might be needed to let the tasks run before some local
1042 variable of the parallel that the task uses as shared
1043 runs out of scope. The task can be spawned either
1044 from within current function (this would be easy to check)
1045 or from some function it calls and gets passed an address
1046 of such a variable. */
1047 if (any_addressable_vars < 0)
1049 gomp_parallel *parallel_stmt
1050 = as_a <gomp_parallel *> (last_stmt (region->entry));
1051 tree child_fun = gimple_omp_parallel_child_fn (parallel_stmt);
1052 tree local_decls, block, decl;
1053 unsigned ix;
1055 any_addressable_vars = 0;
1056 FOR_EACH_LOCAL_DECL (DECL_STRUCT_FUNCTION (child_fun), ix, decl)
1057 if (TREE_ADDRESSABLE (decl))
1059 any_addressable_vars = 1;
1060 break;
1062 for (block = gimple_block (stmt);
1063 !any_addressable_vars
1064 && block
1065 && TREE_CODE (block) == BLOCK;
1066 block = BLOCK_SUPERCONTEXT (block))
1068 for (local_decls = BLOCK_VARS (block);
1069 local_decls;
1070 local_decls = DECL_CHAIN (local_decls))
1071 if (TREE_ADDRESSABLE (local_decls))
1073 any_addressable_vars = 1;
1074 break;
1076 if (block == gimple_block (parallel_stmt))
1077 break;
1080 if (!any_addressable_vars)
1081 gimple_omp_return_set_nowait (stmt);
1086 static void
1087 remove_exit_barriers (struct omp_region *region)
1089 if (region->type == GIMPLE_OMP_PARALLEL)
1090 remove_exit_barrier (region);
1092 if (region->inner)
1094 region = region->inner;
1095 remove_exit_barriers (region);
1096 while (region->next)
1098 region = region->next;
1099 remove_exit_barriers (region);
1104 /* Optimize omp_get_thread_num () and omp_get_num_threads ()
1105 calls. These can't be declared as const functions, but
1106 within one parallel body they are constant, so they can be
1107 transformed there into __builtin_omp_get_{thread_num,num_threads} ()
1108 which are declared const. Similarly for task body, except
1109 that in untied task omp_get_thread_num () can change at any task
1110 scheduling point. */
1112 static void
1113 optimize_omp_library_calls (gimple *entry_stmt)
1115 basic_block bb;
1116 gimple_stmt_iterator gsi;
1117 tree thr_num_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1118 tree thr_num_id = DECL_ASSEMBLER_NAME (thr_num_tree);
1119 tree num_thr_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1120 tree num_thr_id = DECL_ASSEMBLER_NAME (num_thr_tree);
1121 bool untied_task = (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1122 && omp_find_clause (gimple_omp_task_clauses (entry_stmt),
1123 OMP_CLAUSE_UNTIED) != NULL);
1125 FOR_EACH_BB_FN (bb, cfun)
1126 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1128 gimple *call = gsi_stmt (gsi);
1129 tree decl;
1131 if (is_gimple_call (call)
1132 && (decl = gimple_call_fndecl (call))
1133 && DECL_EXTERNAL (decl)
1134 && TREE_PUBLIC (decl)
1135 && DECL_INITIAL (decl) == NULL)
1137 tree built_in;
1139 if (DECL_NAME (decl) == thr_num_id)
1141 /* In #pragma omp task untied omp_get_thread_num () can change
1142 during the execution of the task region. */
1143 if (untied_task)
1144 continue;
1145 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1147 else if (DECL_NAME (decl) == num_thr_id)
1148 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1149 else
1150 continue;
1152 if (DECL_ASSEMBLER_NAME (decl) != DECL_ASSEMBLER_NAME (built_in)
1153 || gimple_call_num_args (call) != 0)
1154 continue;
1156 if (flag_exceptions && !TREE_NOTHROW (decl))
1157 continue;
1159 if (TREE_CODE (TREE_TYPE (decl)) != FUNCTION_TYPE
1160 || !types_compatible_p (TREE_TYPE (TREE_TYPE (decl)),
1161 TREE_TYPE (TREE_TYPE (built_in))))
1162 continue;
1164 gimple_call_set_fndecl (call, built_in);
1169 /* Callback for expand_omp_build_assign. Return non-NULL if *tp needs to be
1170 regimplified. */
1172 static tree
1173 expand_omp_regimplify_p (tree *tp, int *walk_subtrees, void *)
1175 tree t = *tp;
1177 /* Any variable with DECL_VALUE_EXPR needs to be regimplified. */
1178 if (VAR_P (t) && DECL_HAS_VALUE_EXPR_P (t))
1179 return t;
1181 if (TREE_CODE (t) == ADDR_EXPR)
1182 recompute_tree_invariant_for_addr_expr (t);
1184 *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
1185 return NULL_TREE;
1188 /* Prepend or append TO = FROM assignment before or after *GSI_P. */
1190 static void
1191 expand_omp_build_assign (gimple_stmt_iterator *gsi_p, tree to, tree from,
1192 bool after)
1194 bool simple_p = DECL_P (to) && TREE_ADDRESSABLE (to);
1195 from = force_gimple_operand_gsi (gsi_p, from, simple_p, NULL_TREE,
1196 !after, after ? GSI_CONTINUE_LINKING
1197 : GSI_SAME_STMT);
1198 gimple *stmt = gimple_build_assign (to, from);
1199 if (after)
1200 gsi_insert_after (gsi_p, stmt, GSI_CONTINUE_LINKING);
1201 else
1202 gsi_insert_before (gsi_p, stmt, GSI_SAME_STMT);
1203 if (walk_tree (&from, expand_omp_regimplify_p, NULL, NULL)
1204 || walk_tree (&to, expand_omp_regimplify_p, NULL, NULL))
1206 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1207 gimple_regimplify_operands (stmt, &gsi);
1211 /* Prepend or append LHS CODE RHS condition before or after *GSI_P. */
1213 static gcond *
1214 expand_omp_build_cond (gimple_stmt_iterator *gsi_p, enum tree_code code,
1215 tree lhs, tree rhs, bool after = false)
1217 gcond *cond_stmt = gimple_build_cond (code, lhs, rhs, NULL_TREE, NULL_TREE);
1218 if (after)
1219 gsi_insert_after (gsi_p, cond_stmt, GSI_CONTINUE_LINKING);
1220 else
1221 gsi_insert_before (gsi_p, cond_stmt, GSI_SAME_STMT);
1222 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p,
1223 NULL, NULL)
1224 || walk_tree (gimple_cond_rhs_ptr (cond_stmt), expand_omp_regimplify_p,
1225 NULL, NULL))
1227 gimple_stmt_iterator gsi = gsi_for_stmt (cond_stmt);
1228 gimple_regimplify_operands (cond_stmt, &gsi);
1230 return cond_stmt;
1233 /* Expand the OpenMP parallel or task directive starting at REGION. */
1235 static void
1236 expand_omp_taskreg (struct omp_region *region)
1238 basic_block entry_bb, exit_bb, new_bb;
1239 struct function *child_cfun;
1240 tree child_fn, block, t;
1241 gimple_stmt_iterator gsi;
1242 gimple *entry_stmt, *stmt;
1243 edge e;
1244 vec<tree, va_gc> *ws_args;
1246 entry_stmt = last_stmt (region->entry);
1247 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1248 && gimple_omp_task_taskwait_p (entry_stmt))
1250 new_bb = region->entry;
1251 gsi = gsi_last_nondebug_bb (region->entry);
1252 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK);
1253 gsi_remove (&gsi, true);
1254 expand_taskwait_call (new_bb, as_a <gomp_task *> (entry_stmt));
1255 return;
1258 child_fn = gimple_omp_taskreg_child_fn (entry_stmt);
1259 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
1261 entry_bb = region->entry;
1262 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK)
1263 exit_bb = region->cont;
1264 else
1265 exit_bb = region->exit;
1267 if (is_combined_parallel (region))
1268 ws_args = region->ws_args;
1269 else
1270 ws_args = NULL;
1272 if (child_cfun->cfg)
1274 /* Due to inlining, it may happen that we have already outlined
1275 the region, in which case all we need to do is make the
1276 sub-graph unreachable and emit the parallel call. */
1277 edge entry_succ_e, exit_succ_e;
1279 entry_succ_e = single_succ_edge (entry_bb);
1281 gsi = gsi_last_nondebug_bb (entry_bb);
1282 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_PARALLEL
1283 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK
1284 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TEAMS);
1285 gsi_remove (&gsi, true);
1287 new_bb = entry_bb;
1288 if (exit_bb)
1290 exit_succ_e = single_succ_edge (exit_bb);
1291 make_edge (new_bb, exit_succ_e->dest, EDGE_FALLTHRU);
1293 remove_edge_and_dominated_blocks (entry_succ_e);
1295 else
1297 unsigned srcidx, dstidx, num;
1299 /* If the parallel region needs data sent from the parent
1300 function, then the very first statement (except possible
1301 tree profile counter updates) of the parallel body
1302 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
1303 &.OMP_DATA_O is passed as an argument to the child function,
1304 we need to replace it with the argument as seen by the child
1305 function.
1307 In most cases, this will end up being the identity assignment
1308 .OMP_DATA_I = .OMP_DATA_I. However, if the parallel body had
1309 a function call that has been inlined, the original PARM_DECL
1310 .OMP_DATA_I may have been converted into a different local
1311 variable. In which case, we need to keep the assignment. */
1312 if (gimple_omp_taskreg_data_arg (entry_stmt))
1314 basic_block entry_succ_bb
1315 = single_succ_p (entry_bb) ? single_succ (entry_bb)
1316 : FALLTHRU_EDGE (entry_bb)->dest;
1317 tree arg;
1318 gimple *parcopy_stmt = NULL;
1320 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
1322 gimple *stmt;
1324 gcc_assert (!gsi_end_p (gsi));
1325 stmt = gsi_stmt (gsi);
1326 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1327 continue;
1329 if (gimple_num_ops (stmt) == 2)
1331 tree arg = gimple_assign_rhs1 (stmt);
1333 /* We're ignore the subcode because we're
1334 effectively doing a STRIP_NOPS. */
1336 if (TREE_CODE (arg) == ADDR_EXPR
1337 && (TREE_OPERAND (arg, 0)
1338 == gimple_omp_taskreg_data_arg (entry_stmt)))
1340 parcopy_stmt = stmt;
1341 break;
1346 gcc_assert (parcopy_stmt != NULL);
1347 arg = DECL_ARGUMENTS (child_fn);
1349 if (!gimple_in_ssa_p (cfun))
1351 if (gimple_assign_lhs (parcopy_stmt) == arg)
1352 gsi_remove (&gsi, true);
1353 else
1355 /* ?? Is setting the subcode really necessary ?? */
1356 gimple_omp_set_subcode (parcopy_stmt, TREE_CODE (arg));
1357 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1360 else
1362 tree lhs = gimple_assign_lhs (parcopy_stmt);
1363 gcc_assert (SSA_NAME_VAR (lhs) == arg);
1364 /* We'd like to set the rhs to the default def in the child_fn,
1365 but it's too early to create ssa names in the child_fn.
1366 Instead, we set the rhs to the parm. In
1367 move_sese_region_to_fn, we introduce a default def for the
1368 parm, map the parm to it's default def, and once we encounter
1369 this stmt, replace the parm with the default def. */
1370 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1371 update_stmt (parcopy_stmt);
1375 /* Declare local variables needed in CHILD_CFUN. */
1376 block = DECL_INITIAL (child_fn);
1377 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
1378 /* The gimplifier could record temporaries in parallel/task block
1379 rather than in containing function's local_decls chain,
1380 which would mean cgraph missed finalizing them. Do it now. */
1381 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
1382 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
1383 varpool_node::finalize_decl (t);
1384 DECL_SAVED_TREE (child_fn) = NULL;
1385 /* We'll create a CFG for child_fn, so no gimple body is needed. */
1386 gimple_set_body (child_fn, NULL);
1387 TREE_USED (block) = 1;
1389 /* Reset DECL_CONTEXT on function arguments. */
1390 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
1391 DECL_CONTEXT (t) = child_fn;
1393 /* Split ENTRY_BB at GIMPLE_OMP_PARALLEL or GIMPLE_OMP_TASK,
1394 so that it can be moved to the child function. */
1395 gsi = gsi_last_nondebug_bb (entry_bb);
1396 stmt = gsi_stmt (gsi);
1397 gcc_assert (stmt && (gimple_code (stmt) == GIMPLE_OMP_PARALLEL
1398 || gimple_code (stmt) == GIMPLE_OMP_TASK
1399 || gimple_code (stmt) == GIMPLE_OMP_TEAMS));
1400 e = split_block (entry_bb, stmt);
1401 gsi_remove (&gsi, true);
1402 entry_bb = e->dest;
1403 edge e2 = NULL;
1404 if (gimple_code (entry_stmt) != GIMPLE_OMP_TASK)
1405 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
1406 else
1408 e2 = make_edge (e->src, BRANCH_EDGE (entry_bb)->dest, EDGE_ABNORMAL);
1409 gcc_assert (e2->dest == region->exit);
1410 remove_edge (BRANCH_EDGE (entry_bb));
1411 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e->src);
1412 gsi = gsi_last_nondebug_bb (region->exit);
1413 gcc_assert (!gsi_end_p (gsi)
1414 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1415 gsi_remove (&gsi, true);
1418 /* Convert GIMPLE_OMP_{RETURN,CONTINUE} into a RETURN_EXPR. */
1419 if (exit_bb)
1421 gsi = gsi_last_nondebug_bb (exit_bb);
1422 gcc_assert (!gsi_end_p (gsi)
1423 && (gimple_code (gsi_stmt (gsi))
1424 == (e2 ? GIMPLE_OMP_CONTINUE : GIMPLE_OMP_RETURN)));
1425 stmt = gimple_build_return (NULL);
1426 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
1427 gsi_remove (&gsi, true);
1430 /* Move the parallel region into CHILD_CFUN. */
1432 if (gimple_in_ssa_p (cfun))
1434 init_tree_ssa (child_cfun);
1435 init_ssa_operands (child_cfun);
1436 child_cfun->gimple_df->in_ssa_p = true;
1437 block = NULL_TREE;
1439 else
1440 block = gimple_block (entry_stmt);
1442 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
1443 if (exit_bb)
1444 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
1445 if (e2)
1447 basic_block dest_bb = e2->dest;
1448 if (!exit_bb)
1449 make_edge (new_bb, dest_bb, EDGE_FALLTHRU);
1450 remove_edge (e2);
1451 set_immediate_dominator (CDI_DOMINATORS, dest_bb, new_bb);
1453 /* When the OMP expansion process cannot guarantee an up-to-date
1454 loop tree arrange for the child function to fixup loops. */
1455 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1456 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
1458 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
1459 num = vec_safe_length (child_cfun->local_decls);
1460 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
1462 t = (*child_cfun->local_decls)[srcidx];
1463 if (DECL_CONTEXT (t) == cfun->decl)
1464 continue;
1465 if (srcidx != dstidx)
1466 (*child_cfun->local_decls)[dstidx] = t;
1467 dstidx++;
1469 if (dstidx != num)
1470 vec_safe_truncate (child_cfun->local_decls, dstidx);
1472 /* Inform the callgraph about the new function. */
1473 child_cfun->curr_properties = cfun->curr_properties;
1474 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
1475 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
1476 cgraph_node *node = cgraph_node::get_create (child_fn);
1477 node->parallelized_function = 1;
1478 cgraph_node::add_new_function (child_fn, true);
1480 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
1481 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
1483 /* Fix the callgraph edges for child_cfun. Those for cfun will be
1484 fixed in a following pass. */
1485 push_cfun (child_cfun);
1486 if (need_asm)
1487 assign_assembler_name_if_needed (child_fn);
1489 if (optimize)
1490 optimize_omp_library_calls (entry_stmt);
1491 update_max_bb_count ();
1492 cgraph_edge::rebuild_edges ();
1494 /* Some EH regions might become dead, see PR34608. If
1495 pass_cleanup_cfg isn't the first pass to happen with the
1496 new child, these dead EH edges might cause problems.
1497 Clean them up now. */
1498 if (flag_exceptions)
1500 basic_block bb;
1501 bool changed = false;
1503 FOR_EACH_BB_FN (bb, cfun)
1504 changed |= gimple_purge_dead_eh_edges (bb);
1505 if (changed)
1506 cleanup_tree_cfg ();
1508 if (gimple_in_ssa_p (cfun))
1509 update_ssa (TODO_update_ssa);
1510 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1511 verify_loop_structure ();
1512 pop_cfun ();
1514 if (dump_file && !gimple_in_ssa_p (cfun))
1516 omp_any_child_fn_dumped = true;
1517 dump_function_header (dump_file, child_fn, dump_flags);
1518 dump_function_to_file (child_fn, dump_file, dump_flags);
1522 adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn);
1524 if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
1525 expand_parallel_call (region, new_bb,
1526 as_a <gomp_parallel *> (entry_stmt), ws_args);
1527 else if (gimple_code (entry_stmt) == GIMPLE_OMP_TEAMS)
1528 expand_teams_call (new_bb, as_a <gomp_teams *> (entry_stmt));
1529 else
1530 expand_task_call (region, new_bb, as_a <gomp_task *> (entry_stmt));
1531 if (gimple_in_ssa_p (cfun))
1532 update_ssa (TODO_update_ssa_only_virtuals);
1535 /* Information about members of an OpenACC collapsed loop nest. */
1537 struct oacc_collapse
1539 tree base; /* Base value. */
1540 tree iters; /* Number of steps. */
1541 tree step; /* Step size. */
1542 tree tile; /* Tile increment (if tiled). */
1543 tree outer; /* Tile iterator var. */
1546 /* Helper for expand_oacc_for. Determine collapsed loop information.
1547 Fill in COUNTS array. Emit any initialization code before GSI.
1548 Return the calculated outer loop bound of BOUND_TYPE. */
1550 static tree
1551 expand_oacc_collapse_init (const struct omp_for_data *fd,
1552 gimple_stmt_iterator *gsi,
1553 oacc_collapse *counts, tree diff_type,
1554 tree bound_type, location_t loc)
1556 tree tiling = fd->tiling;
1557 tree total = build_int_cst (bound_type, 1);
1558 int ix;
1560 gcc_assert (integer_onep (fd->loop.step));
1561 gcc_assert (integer_zerop (fd->loop.n1));
1563 /* When tiling, the first operand of the tile clause applies to the
1564 innermost loop, and we work outwards from there. Seems
1565 backwards, but whatever. */
1566 for (ix = fd->collapse; ix--;)
1568 const omp_for_data_loop *loop = &fd->loops[ix];
1570 tree iter_type = TREE_TYPE (loop->v);
1571 tree plus_type = iter_type;
1573 gcc_assert (loop->cond_code == LT_EXPR || loop->cond_code == GT_EXPR);
1575 if (POINTER_TYPE_P (iter_type))
1576 plus_type = sizetype;
1578 if (tiling)
1580 tree num = build_int_cst (integer_type_node, fd->collapse);
1581 tree loop_no = build_int_cst (integer_type_node, ix);
1582 tree tile = TREE_VALUE (tiling);
1583 gcall *call
1584 = gimple_build_call_internal (IFN_GOACC_TILE, 5, num, loop_no, tile,
1585 /* gwv-outer=*/integer_zero_node,
1586 /* gwv-inner=*/integer_zero_node);
1588 counts[ix].outer = create_tmp_var (iter_type, ".outer");
1589 counts[ix].tile = create_tmp_var (diff_type, ".tile");
1590 gimple_call_set_lhs (call, counts[ix].tile);
1591 gimple_set_location (call, loc);
1592 gsi_insert_before (gsi, call, GSI_SAME_STMT);
1594 tiling = TREE_CHAIN (tiling);
1596 else
1598 counts[ix].tile = NULL;
1599 counts[ix].outer = loop->v;
1602 tree b = loop->n1;
1603 tree e = loop->n2;
1604 tree s = loop->step;
1605 bool up = loop->cond_code == LT_EXPR;
1606 tree dir = build_int_cst (diff_type, up ? +1 : -1);
1607 bool negating;
1608 tree expr;
1610 b = force_gimple_operand_gsi (gsi, b, true, NULL_TREE,
1611 true, GSI_SAME_STMT);
1612 e = force_gimple_operand_gsi (gsi, e, true, NULL_TREE,
1613 true, GSI_SAME_STMT);
1615 /* Convert the step, avoiding possible unsigned->signed overflow. */
1616 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
1617 if (negating)
1618 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
1619 s = fold_convert (diff_type, s);
1620 if (negating)
1621 s = fold_build1 (NEGATE_EXPR, diff_type, s);
1622 s = force_gimple_operand_gsi (gsi, s, true, NULL_TREE,
1623 true, GSI_SAME_STMT);
1625 /* Determine the range, avoiding possible unsigned->signed overflow. */
1626 negating = !up && TYPE_UNSIGNED (iter_type);
1627 expr = fold_build2 (MINUS_EXPR, plus_type,
1628 fold_convert (plus_type, negating ? b : e),
1629 fold_convert (plus_type, negating ? e : b));
1630 expr = fold_convert (diff_type, expr);
1631 if (negating)
1632 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
1633 tree range = force_gimple_operand_gsi
1634 (gsi, expr, true, NULL_TREE, true, GSI_SAME_STMT);
1636 /* Determine number of iterations. */
1637 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
1638 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
1639 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
1641 tree iters = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE,
1642 true, GSI_SAME_STMT);
1644 counts[ix].base = b;
1645 counts[ix].iters = iters;
1646 counts[ix].step = s;
1648 total = fold_build2 (MULT_EXPR, bound_type, total,
1649 fold_convert (bound_type, iters));
1652 return total;
1655 /* Emit initializers for collapsed loop members. INNER is true if
1656 this is for the element loop of a TILE. IVAR is the outer
1657 loop iteration variable, from which collapsed loop iteration values
1658 are calculated. COUNTS array has been initialized by
1659 expand_oacc_collapse_inits. */
1661 static void
1662 expand_oacc_collapse_vars (const struct omp_for_data *fd, bool inner,
1663 gimple_stmt_iterator *gsi,
1664 const oacc_collapse *counts, tree ivar,
1665 tree diff_type)
1667 tree ivar_type = TREE_TYPE (ivar);
1669 /* The most rapidly changing iteration variable is the innermost
1670 one. */
1671 for (int ix = fd->collapse; ix--;)
1673 const omp_for_data_loop *loop = &fd->loops[ix];
1674 const oacc_collapse *collapse = &counts[ix];
1675 tree v = inner ? loop->v : collapse->outer;
1676 tree iter_type = TREE_TYPE (v);
1677 tree plus_type = iter_type;
1678 enum tree_code plus_code = PLUS_EXPR;
1679 tree expr;
1681 if (POINTER_TYPE_P (iter_type))
1683 plus_code = POINTER_PLUS_EXPR;
1684 plus_type = sizetype;
1687 expr = ivar;
1688 if (ix)
1690 tree mod = fold_convert (ivar_type, collapse->iters);
1691 ivar = fold_build2 (TRUNC_DIV_EXPR, ivar_type, expr, mod);
1692 expr = fold_build2 (TRUNC_MOD_EXPR, ivar_type, expr, mod);
1693 ivar = force_gimple_operand_gsi (gsi, ivar, true, NULL_TREE,
1694 true, GSI_SAME_STMT);
1697 expr = fold_build2 (MULT_EXPR, diff_type, fold_convert (diff_type, expr),
1698 fold_convert (diff_type, collapse->step));
1699 expr = fold_build2 (plus_code, iter_type,
1700 inner ? collapse->outer : collapse->base,
1701 fold_convert (plus_type, expr));
1702 expr = force_gimple_operand_gsi (gsi, expr, false, NULL_TREE,
1703 true, GSI_SAME_STMT);
1704 gassign *ass = gimple_build_assign (v, expr);
1705 gsi_insert_before (gsi, ass, GSI_SAME_STMT);
1709 /* Helper function for expand_omp_{for_*,simd}. If this is the outermost
1710 of the combined collapse > 1 loop constructs, generate code like:
1711 if (__builtin_expect (N32 cond3 N31, 0)) goto ZERO_ITER_BB;
1712 if (cond3 is <)
1713 adj = STEP3 - 1;
1714 else
1715 adj = STEP3 + 1;
1716 count3 = (adj + N32 - N31) / STEP3;
1717 if (__builtin_expect (N22 cond2 N21, 0)) goto ZERO_ITER_BB;
1718 if (cond2 is <)
1719 adj = STEP2 - 1;
1720 else
1721 adj = STEP2 + 1;
1722 count2 = (adj + N22 - N21) / STEP2;
1723 if (__builtin_expect (N12 cond1 N11, 0)) goto ZERO_ITER_BB;
1724 if (cond1 is <)
1725 adj = STEP1 - 1;
1726 else
1727 adj = STEP1 + 1;
1728 count1 = (adj + N12 - N11) / STEP1;
1729 count = count1 * count2 * count3;
1730 Furthermore, if ZERO_ITER_BB is NULL, create a BB which does:
1731 count = 0;
1732 and set ZERO_ITER_BB to that bb. If this isn't the outermost
1733 of the combined loop constructs, just initialize COUNTS array
1734 from the _looptemp_ clauses. For loop nests with non-rectangular
1735 loops, do this only for the rectangular loops. Then pick
1736 the loops which reference outer vars in their bound expressions
1737 and the loops which they refer to and for this sub-nest compute
1738 number of iterations. For triangular loops use Faulhaber's formula,
1739 otherwise as a fallback, compute by iterating the loops.
1740 If e.g. the sub-nest is
1741 for (I = N11; I COND1 N12; I += STEP1)
1742 for (J = M21 * I + N21; J COND2 M22 * I + N22; J += STEP2)
1743 for (K = M31 * J + N31; K COND3 M32 * J + N32; K += STEP3)
1745 COUNT = 0;
1746 for (tmpi = N11; tmpi COND1 N12; tmpi += STEP1)
1747 for (tmpj = M21 * tmpi + N21;
1748 tmpj COND2 M22 * tmpi + N22; tmpj += STEP2)
1750 int tmpk1 = M31 * tmpj + N31;
1751 int tmpk2 = M32 * tmpj + N32;
1752 if (tmpk1 COND3 tmpk2)
1754 if (COND3 is <)
1755 adj = STEP3 - 1;
1756 else
1757 adj = STEP3 + 1;
1758 COUNT += (adj + tmpk2 - tmpk1) / STEP3;
1761 and finally multiply the counts of the rectangular loops not
1762 in the sub-nest with COUNT. Also, as counts[fd->last_nonrect]
1763 store number of iterations of the loops from fd->first_nonrect
1764 to fd->last_nonrect inclusive, i.e. the above COUNT multiplied
1765 by the counts of rectangular loops not referenced in any non-rectangular
1766 loops sandwitched in between those. */
1768 /* NOTE: It *could* be better to moosh all of the BBs together,
1769 creating one larger BB with all the computation and the unexpected
1770 jump at the end. I.e.
1772 bool zero3, zero2, zero1, zero;
1774 zero3 = N32 c3 N31;
1775 count3 = (N32 - N31) /[cl] STEP3;
1776 zero2 = N22 c2 N21;
1777 count2 = (N22 - N21) /[cl] STEP2;
1778 zero1 = N12 c1 N11;
1779 count1 = (N12 - N11) /[cl] STEP1;
1780 zero = zero3 || zero2 || zero1;
1781 count = count1 * count2 * count3;
1782 if (__builtin_expect(zero, false)) goto zero_iter_bb;
1784 After all, we expect the zero=false, and thus we expect to have to
1785 evaluate all of the comparison expressions, so short-circuiting
1786 oughtn't be a win. Since the condition isn't protecting a
1787 denominator, we're not concerned about divide-by-zero, so we can
1788 fully evaluate count even if a numerator turned out to be wrong.
1790 It seems like putting this all together would create much better
1791 scheduling opportunities, and less pressure on the chip's branch
1792 predictor. */
1794 static void
1795 expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1796 basic_block &entry_bb, tree *counts,
1797 basic_block &zero_iter1_bb, int &first_zero_iter1,
1798 basic_block &zero_iter2_bb, int &first_zero_iter2,
1799 basic_block &l2_dom_bb)
1801 tree t, type = TREE_TYPE (fd->loop.v);
1802 edge e, ne;
1803 int i;
1805 /* Collapsed loops need work for expansion into SSA form. */
1806 gcc_assert (!gimple_in_ssa_p (cfun));
1808 if (gimple_omp_for_combined_into_p (fd->for_stmt)
1809 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
1811 gcc_assert (fd->ordered == 0);
1812 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1813 isn't supposed to be handled, as the inner loop doesn't
1814 use it. */
1815 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
1816 OMP_CLAUSE__LOOPTEMP_);
1817 gcc_assert (innerc);
1818 for (i = 0; i < fd->collapse; i++)
1820 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1821 OMP_CLAUSE__LOOPTEMP_);
1822 gcc_assert (innerc);
1823 if (i)
1824 counts[i] = OMP_CLAUSE_DECL (innerc);
1825 else
1826 counts[0] = NULL_TREE;
1828 if (fd->non_rect
1829 && fd->last_nonrect == fd->first_nonrect + 1
1830 && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[fd->last_nonrect].v)))
1832 tree c[4];
1833 for (i = 0; i < 4; i++)
1835 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1836 OMP_CLAUSE__LOOPTEMP_);
1837 gcc_assert (innerc);
1838 c[i] = OMP_CLAUSE_DECL (innerc);
1840 counts[0] = c[0];
1841 fd->first_inner_iterations = c[1];
1842 fd->factor = c[2];
1843 fd->adjn1 = c[3];
1845 return;
1848 for (i = fd->collapse; i < fd->ordered; i++)
1850 tree itype = TREE_TYPE (fd->loops[i].v);
1851 counts[i] = NULL_TREE;
1852 t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1853 fold_convert (itype, fd->loops[i].n1),
1854 fold_convert (itype, fd->loops[i].n2));
1855 if (t && integer_zerop (t))
1857 for (i = fd->collapse; i < fd->ordered; i++)
1858 counts[i] = build_int_cst (type, 0);
1859 break;
1862 bool rect_count_seen = false;
1863 for (i = 0; i < (fd->ordered ? fd->ordered : fd->collapse); i++)
1865 tree itype = TREE_TYPE (fd->loops[i].v);
1867 if (i >= fd->collapse && counts[i])
1868 continue;
1869 if (fd->non_rect)
1871 /* Skip loops that use outer iterators in their expressions
1872 during this phase. */
1873 if (fd->loops[i].m1 || fd->loops[i].m2)
1875 counts[i] = build_zero_cst (type);
1876 continue;
1879 if ((SSA_VAR_P (fd->loop.n2) || i >= fd->collapse)
1880 && ((t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1881 fold_convert (itype, fd->loops[i].n1),
1882 fold_convert (itype, fd->loops[i].n2)))
1883 == NULL_TREE || !integer_onep (t)))
1885 gcond *cond_stmt;
1886 tree n1, n2;
1887 n1 = fold_convert (itype, unshare_expr (fd->loops[i].n1));
1888 n1 = force_gimple_operand_gsi (gsi, n1, true, NULL_TREE,
1889 true, GSI_SAME_STMT);
1890 n2 = fold_convert (itype, unshare_expr (fd->loops[i].n2));
1891 n2 = force_gimple_operand_gsi (gsi, n2, true, NULL_TREE,
1892 true, GSI_SAME_STMT);
1893 cond_stmt = expand_omp_build_cond (gsi, fd->loops[i].cond_code,
1894 n1, n2);
1895 e = split_block (entry_bb, cond_stmt);
1896 basic_block &zero_iter_bb
1897 = i < fd->collapse ? zero_iter1_bb : zero_iter2_bb;
1898 int &first_zero_iter
1899 = i < fd->collapse ? first_zero_iter1 : first_zero_iter2;
1900 if (zero_iter_bb == NULL)
1902 gassign *assign_stmt;
1903 first_zero_iter = i;
1904 zero_iter_bb = create_empty_bb (entry_bb);
1905 add_bb_to_loop (zero_iter_bb, entry_bb->loop_father);
1906 *gsi = gsi_after_labels (zero_iter_bb);
1907 if (i < fd->collapse)
1908 assign_stmt = gimple_build_assign (fd->loop.n2,
1909 build_zero_cst (type));
1910 else
1912 counts[i] = create_tmp_reg (type, ".count");
1913 assign_stmt
1914 = gimple_build_assign (counts[i], build_zero_cst (type));
1916 gsi_insert_before (gsi, assign_stmt, GSI_SAME_STMT);
1917 set_immediate_dominator (CDI_DOMINATORS, zero_iter_bb,
1918 entry_bb);
1920 ne = make_edge (entry_bb, zero_iter_bb, EDGE_FALSE_VALUE);
1921 ne->probability = profile_probability::very_unlikely ();
1922 e->flags = EDGE_TRUE_VALUE;
1923 e->probability = ne->probability.invert ();
1924 if (l2_dom_bb == NULL)
1925 l2_dom_bb = entry_bb;
1926 entry_bb = e->dest;
1927 *gsi = gsi_last_nondebug_bb (entry_bb);
1930 if (POINTER_TYPE_P (itype))
1931 itype = signed_type_for (itype);
1932 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
1933 ? -1 : 1));
1934 t = fold_build2 (PLUS_EXPR, itype,
1935 fold_convert (itype, fd->loops[i].step), t);
1936 t = fold_build2 (PLUS_EXPR, itype, t,
1937 fold_convert (itype, fd->loops[i].n2));
1938 t = fold_build2 (MINUS_EXPR, itype, t,
1939 fold_convert (itype, fd->loops[i].n1));
1940 /* ?? We could probably use CEIL_DIV_EXPR instead of
1941 TRUNC_DIV_EXPR and adjusting by hand. Unless we can't
1942 generate the same code in the end because generically we
1943 don't know that the values involved must be negative for
1944 GT?? */
1945 if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
1946 t = fold_build2 (TRUNC_DIV_EXPR, itype,
1947 fold_build1 (NEGATE_EXPR, itype, t),
1948 fold_build1 (NEGATE_EXPR, itype,
1949 fold_convert (itype,
1950 fd->loops[i].step)));
1951 else
1952 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
1953 fold_convert (itype, fd->loops[i].step));
1954 t = fold_convert (type, t);
1955 if (TREE_CODE (t) == INTEGER_CST)
1956 counts[i] = t;
1957 else
1959 if (i < fd->collapse || i != first_zero_iter2)
1960 counts[i] = create_tmp_reg (type, ".count");
1961 expand_omp_build_assign (gsi, counts[i], t);
1963 if (SSA_VAR_P (fd->loop.n2) && i < fd->collapse)
1965 if (fd->non_rect && i >= fd->first_nonrect && i <= fd->last_nonrect)
1966 continue;
1967 if (!rect_count_seen)
1969 t = counts[i];
1970 rect_count_seen = true;
1972 else
1973 t = fold_build2 (MULT_EXPR, type, fd->loop.n2, counts[i]);
1974 expand_omp_build_assign (gsi, fd->loop.n2, t);
1977 if (fd->non_rect && SSA_VAR_P (fd->loop.n2))
1979 gcc_assert (fd->last_nonrect != -1);
1981 counts[fd->last_nonrect] = create_tmp_reg (type, ".count");
1982 expand_omp_build_assign (gsi, counts[fd->last_nonrect],
1983 build_zero_cst (type));
1984 for (i = fd->first_nonrect + 1; i < fd->last_nonrect; i++)
1985 if (fd->loops[i].m1
1986 || fd->loops[i].m2
1987 || fd->loops[i].non_rect_referenced)
1988 break;
1989 if (i == fd->last_nonrect
1990 && fd->loops[i].outer == fd->last_nonrect - fd->first_nonrect
1991 && !POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))
1992 && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[i].v)))
1994 int o = fd->first_nonrect;
1995 tree itype = TREE_TYPE (fd->loops[o].v);
1996 tree n1o = create_tmp_reg (itype, ".n1o");
1997 t = fold_convert (itype, unshare_expr (fd->loops[o].n1));
1998 expand_omp_build_assign (gsi, n1o, t);
1999 tree n2o = create_tmp_reg (itype, ".n2o");
2000 t = fold_convert (itype, unshare_expr (fd->loops[o].n2));
2001 expand_omp_build_assign (gsi, n2o, t);
2002 if (fd->loops[i].m1 && fd->loops[i].m2)
2003 t = fold_build2 (MINUS_EXPR, itype, unshare_expr (fd->loops[i].m2),
2004 unshare_expr (fd->loops[i].m1));
2005 else if (fd->loops[i].m1)
2006 t = fold_unary (NEGATE_EXPR, itype,
2007 unshare_expr (fd->loops[i].m1));
2008 else
2009 t = unshare_expr (fd->loops[i].m2);
2010 tree m2minusm1
2011 = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
2012 true, GSI_SAME_STMT);
2014 gimple_stmt_iterator gsi2 = *gsi;
2015 gsi_prev (&gsi2);
2016 e = split_block (entry_bb, gsi_stmt (gsi2));
2017 e = split_block (e->dest, (gimple *) NULL);
2018 basic_block bb1 = e->src;
2019 entry_bb = e->dest;
2020 *gsi = gsi_after_labels (entry_bb);
2022 gsi2 = gsi_after_labels (bb1);
2023 tree ostep = fold_convert (itype, fd->loops[o].step);
2024 t = build_int_cst (itype, (fd->loops[o].cond_code
2025 == LT_EXPR ? -1 : 1));
2026 t = fold_build2 (PLUS_EXPR, itype, ostep, t);
2027 t = fold_build2 (PLUS_EXPR, itype, t, n2o);
2028 t = fold_build2 (MINUS_EXPR, itype, t, n1o);
2029 if (TYPE_UNSIGNED (itype)
2030 && fd->loops[o].cond_code == GT_EXPR)
2031 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2032 fold_build1 (NEGATE_EXPR, itype, t),
2033 fold_build1 (NEGATE_EXPR, itype, ostep));
2034 else
2035 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, ostep);
2036 tree outer_niters
2037 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2038 true, GSI_SAME_STMT);
2039 t = fold_build2 (MINUS_EXPR, itype, outer_niters,
2040 build_one_cst (itype));
2041 t = fold_build2 (MULT_EXPR, itype, t, ostep);
2042 t = fold_build2 (PLUS_EXPR, itype, n1o, t);
2043 tree last = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2044 true, GSI_SAME_STMT);
2045 tree n1, n2, n1e, n2e;
2046 t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2047 if (fd->loops[i].m1)
2049 n1 = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2050 n1 = fold_build2 (MULT_EXPR, itype, n1o, n1);
2051 n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2053 else
2054 n1 = t;
2055 n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2056 true, GSI_SAME_STMT);
2057 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2058 if (fd->loops[i].m2)
2060 n2 = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2061 n2 = fold_build2 (MULT_EXPR, itype, n1o, n2);
2062 n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2064 else
2065 n2 = t;
2066 n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2067 true, GSI_SAME_STMT);
2068 t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2069 if (fd->loops[i].m1)
2071 n1e = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2072 n1e = fold_build2 (MULT_EXPR, itype, last, n1e);
2073 n1e = fold_build2 (PLUS_EXPR, itype, n1e, t);
2075 else
2076 n1e = t;
2077 n1e = force_gimple_operand_gsi (&gsi2, n1e, true, NULL_TREE,
2078 true, GSI_SAME_STMT);
2079 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2080 if (fd->loops[i].m2)
2082 n2e = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2083 n2e = fold_build2 (MULT_EXPR, itype, last, n2e);
2084 n2e = fold_build2 (PLUS_EXPR, itype, n2e, t);
2086 else
2087 n2e = t;
2088 n2e = force_gimple_operand_gsi (&gsi2, n2e, true, NULL_TREE,
2089 true, GSI_SAME_STMT);
2090 gcond *cond_stmt
2091 = expand_omp_build_cond (&gsi2, fd->loops[i].cond_code,
2092 n1, n2);
2093 e = split_block (bb1, cond_stmt);
2094 e->flags = EDGE_TRUE_VALUE;
2095 e->probability = profile_probability::likely ().guessed ();
2096 basic_block bb2 = e->dest;
2097 gsi2 = gsi_after_labels (bb2);
2099 cond_stmt = expand_omp_build_cond (&gsi2, fd->loops[i].cond_code,
2100 n1e, n2e);
2101 e = split_block (bb2, cond_stmt);
2102 e->flags = EDGE_TRUE_VALUE;
2103 e->probability = profile_probability::likely ().guessed ();
2104 gsi2 = gsi_after_labels (e->dest);
2106 tree step = fold_convert (itype, fd->loops[i].step);
2107 t = build_int_cst (itype, (fd->loops[i].cond_code
2108 == LT_EXPR ? -1 : 1));
2109 t = fold_build2 (PLUS_EXPR, itype, step, t);
2110 t = fold_build2 (PLUS_EXPR, itype, t, n2);
2111 t = fold_build2 (MINUS_EXPR, itype, t, n1);
2112 if (TYPE_UNSIGNED (itype)
2113 && fd->loops[i].cond_code == GT_EXPR)
2114 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2115 fold_build1 (NEGATE_EXPR, itype, t),
2116 fold_build1 (NEGATE_EXPR, itype, step));
2117 else
2118 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2119 tree first_inner_iterations
2120 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2121 true, GSI_SAME_STMT);
2122 t = fold_build2 (MULT_EXPR, itype, m2minusm1, ostep);
2123 if (TYPE_UNSIGNED (itype)
2124 && fd->loops[i].cond_code == GT_EXPR)
2125 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2126 fold_build1 (NEGATE_EXPR, itype, t),
2127 fold_build1 (NEGATE_EXPR, itype, step));
2128 else
2129 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2130 tree factor
2131 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2132 true, GSI_SAME_STMT);
2133 t = fold_build2 (MINUS_EXPR, itype, outer_niters,
2134 build_one_cst (itype));
2135 t = fold_build2 (MULT_EXPR, itype, t, outer_niters);
2136 t = fold_build2 (RSHIFT_EXPR, itype, t, integer_one_node);
2137 t = fold_build2 (MULT_EXPR, itype, factor, t);
2138 t = fold_build2 (PLUS_EXPR, itype,
2139 fold_build2 (MULT_EXPR, itype, outer_niters,
2140 first_inner_iterations), t);
2141 expand_omp_build_assign (&gsi2, counts[fd->last_nonrect],
2142 fold_convert (type, t));
2144 basic_block bb3 = create_empty_bb (bb1);
2145 add_bb_to_loop (bb3, bb1->loop_father);
2147 e = make_edge (bb1, bb3, EDGE_FALSE_VALUE);
2148 e->probability = profile_probability::unlikely ().guessed ();
2150 gsi2 = gsi_after_labels (bb3);
2151 cond_stmt = expand_omp_build_cond (&gsi2, fd->loops[i].cond_code,
2152 n1e, n2e);
2153 e = split_block (bb3, cond_stmt);
2154 e->flags = EDGE_TRUE_VALUE;
2155 e->probability = profile_probability::likely ().guessed ();
2156 basic_block bb4 = e->dest;
2158 ne = make_edge (bb3, entry_bb, EDGE_FALSE_VALUE);
2159 ne->probability = e->probability.invert ();
2161 basic_block bb5 = create_empty_bb (bb2);
2162 add_bb_to_loop (bb5, bb2->loop_father);
2164 ne = make_edge (bb2, bb5, EDGE_FALSE_VALUE);
2165 ne->probability = profile_probability::unlikely ().guessed ();
2167 for (int j = 0; j < 2; j++)
2169 gsi2 = gsi_after_labels (j ? bb5 : bb4);
2170 t = fold_build2 (MINUS_EXPR, itype,
2171 unshare_expr (fd->loops[i].n1),
2172 unshare_expr (fd->loops[i].n2));
2173 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, m2minusm1);
2174 tree tem
2175 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2176 true, GSI_SAME_STMT);
2177 t = fold_build2 (MINUS_EXPR, itype, tem, n1o);
2178 t = fold_build2 (TRUNC_MOD_EXPR, itype, t, ostep);
2179 t = fold_build2 (MINUS_EXPR, itype, tem, t);
2180 tem = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2181 true, GSI_SAME_STMT);
2182 t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2183 if (fd->loops[i].m1)
2185 n1 = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2186 n1 = fold_build2 (MULT_EXPR, itype, tem, n1);
2187 n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2189 else
2190 n1 = t;
2191 n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2192 true, GSI_SAME_STMT);
2193 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2194 if (fd->loops[i].m2)
2196 n2 = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2197 n2 = fold_build2 (MULT_EXPR, itype, tem, n2);
2198 n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2200 else
2201 n2 = t;
2202 n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2203 true, GSI_SAME_STMT);
2204 expand_omp_build_assign (&gsi2, j ? n2o : n1o, tem);
2206 cond_stmt = expand_omp_build_cond (&gsi2, fd->loops[i].cond_code,
2207 n1, n2);
2208 e = split_block (gsi_bb (gsi2), cond_stmt);
2209 e->flags = j ? EDGE_TRUE_VALUE : EDGE_FALSE_VALUE;
2210 e->probability = profile_probability::unlikely ().guessed ();
2211 ne = make_edge (e->src, bb1,
2212 j ? EDGE_FALSE_VALUE : EDGE_TRUE_VALUE);
2213 ne->probability = e->probability.invert ();
2214 gsi2 = gsi_after_labels (e->dest);
2216 t = fold_build2 (PLUS_EXPR, itype, tem, ostep);
2217 expand_omp_build_assign (&gsi2, j ? n2o : n1o, t);
2219 make_edge (e->dest, bb1, EDGE_FALLTHRU);
2222 set_immediate_dominator (CDI_DOMINATORS, bb3, bb1);
2223 set_immediate_dominator (CDI_DOMINATORS, bb5, bb2);
2224 set_immediate_dominator (CDI_DOMINATORS, entry_bb, bb1);
2226 if (fd->first_nonrect + 1 == fd->last_nonrect)
2228 fd->first_inner_iterations = first_inner_iterations;
2229 fd->factor = factor;
2230 fd->adjn1 = n1o;
2233 else
2235 /* Fallback implementation. Evaluate the loops with m1/m2
2236 non-NULL as well as their outer loops at runtime using temporaries
2237 instead of the original iteration variables, and in the
2238 body just bump the counter. */
2239 gimple_stmt_iterator gsi2 = *gsi;
2240 gsi_prev (&gsi2);
2241 e = split_block (entry_bb, gsi_stmt (gsi2));
2242 e = split_block (e->dest, (gimple *) NULL);
2243 basic_block cur_bb = e->src;
2244 basic_block next_bb = e->dest;
2245 entry_bb = e->dest;
2246 *gsi = gsi_after_labels (entry_bb);
2248 tree *vs = XALLOCAVEC (tree, fd->last_nonrect);
2249 memset (vs, 0, fd->last_nonrect * sizeof (tree));
2251 for (i = 0; i <= fd->last_nonrect; i++)
2253 if (fd->loops[i].m1 == NULL_TREE
2254 && fd->loops[i].m2 == NULL_TREE
2255 && !fd->loops[i].non_rect_referenced)
2256 continue;
2258 tree itype = TREE_TYPE (fd->loops[i].v);
2260 gsi2 = gsi_after_labels (cur_bb);
2261 tree n1, n2;
2262 t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2263 if (fd->loops[i].m1 == NULL_TREE)
2264 n1 = t;
2265 else if (POINTER_TYPE_P (itype))
2267 gcc_assert (integer_onep (fd->loops[i].m1));
2268 t = fold_convert (sizetype,
2269 unshare_expr (fd->loops[i].n1));
2270 n1 = fold_build_pointer_plus (vs[i - fd->loops[i].outer], t);
2272 else
2274 n1 = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2275 n1 = fold_build2 (MULT_EXPR, itype,
2276 vs[i - fd->loops[i].outer], n1);
2277 n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2279 n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2280 true, GSI_SAME_STMT);
2281 if (i < fd->last_nonrect)
2283 vs[i] = create_tmp_reg (itype, ".it");
2284 expand_omp_build_assign (&gsi2, vs[i], n1);
2286 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2287 if (fd->loops[i].m2 == NULL_TREE)
2288 n2 = t;
2289 else if (POINTER_TYPE_P (itype))
2291 gcc_assert (integer_onep (fd->loops[i].m2));
2292 t = fold_convert (sizetype,
2293 unshare_expr (fd->loops[i].n2));
2294 n2 = fold_build_pointer_plus (vs[i - fd->loops[i].outer], t);
2296 else
2298 n2 = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2299 n2 = fold_build2 (MULT_EXPR, itype,
2300 vs[i - fd->loops[i].outer], n2);
2301 n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2303 n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2304 true, GSI_SAME_STMT);
2305 if (POINTER_TYPE_P (itype))
2306 itype = signed_type_for (itype);
2307 if (i == fd->last_nonrect)
2309 gcond *cond_stmt
2310 = expand_omp_build_cond (&gsi2, fd->loops[i].cond_code,
2311 n1, n2);
2312 e = split_block (cur_bb, cond_stmt);
2313 e->flags = EDGE_TRUE_VALUE;
2314 ne = make_edge (cur_bb, next_bb, EDGE_FALSE_VALUE);
2315 e->probability = profile_probability::likely ().guessed ();
2316 ne->probability = e->probability.invert ();
2317 gsi2 = gsi_after_labels (e->dest);
2319 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
2320 ? -1 : 1));
2321 t = fold_build2 (PLUS_EXPR, itype,
2322 fold_convert (itype, fd->loops[i].step), t);
2323 t = fold_build2 (PLUS_EXPR, itype, t,
2324 fold_convert (itype, n2));
2325 t = fold_build2 (MINUS_EXPR, itype, t,
2326 fold_convert (itype, n1));
2327 tree step = fold_convert (itype, fd->loops[i].step);
2328 if (TYPE_UNSIGNED (itype)
2329 && fd->loops[i].cond_code == GT_EXPR)
2330 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2331 fold_build1 (NEGATE_EXPR, itype, t),
2332 fold_build1 (NEGATE_EXPR, itype, step));
2333 else
2334 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2335 t = fold_convert (type, t);
2336 t = fold_build2 (PLUS_EXPR, type,
2337 counts[fd->last_nonrect], t);
2338 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2339 true, GSI_SAME_STMT);
2340 expand_omp_build_assign (&gsi2, counts[fd->last_nonrect], t);
2341 e = make_edge (e->dest, next_bb, EDGE_FALLTHRU);
2342 set_immediate_dominator (CDI_DOMINATORS, next_bb, cur_bb);
2343 break;
2345 e = split_block (cur_bb, last_stmt (cur_bb));
2347 basic_block new_cur_bb = create_empty_bb (cur_bb);
2348 add_bb_to_loop (new_cur_bb, cur_bb->loop_father);
2350 gsi2 = gsi_after_labels (e->dest);
2351 tree step = fold_convert (itype,
2352 unshare_expr (fd->loops[i].step));
2353 if (POINTER_TYPE_P (TREE_TYPE (vs[i])))
2354 t = fold_build_pointer_plus (vs[i],
2355 fold_convert (sizetype, step));
2356 else
2357 t = fold_build2 (PLUS_EXPR, itype, vs[i], step);
2358 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2359 true, GSI_SAME_STMT);
2360 expand_omp_build_assign (&gsi2, vs[i], t);
2362 ne = split_block (e->dest, last_stmt (e->dest));
2363 gsi2 = gsi_after_labels (ne->dest);
2365 expand_omp_build_cond (&gsi2, fd->loops[i].cond_code, vs[i], n2);
2366 edge e3, e4;
2367 if (next_bb == entry_bb)
2369 e3 = find_edge (ne->dest, next_bb);
2370 e3->flags = EDGE_FALSE_VALUE;
2372 else
2373 e3 = make_edge (ne->dest, next_bb, EDGE_FALSE_VALUE);
2374 e4 = make_edge (ne->dest, new_cur_bb, EDGE_TRUE_VALUE);
2375 e4->probability = profile_probability::likely ().guessed ();
2376 e3->probability = e4->probability.invert ();
2377 basic_block esrc = e->src;
2378 make_edge (e->src, ne->dest, EDGE_FALLTHRU);
2379 cur_bb = new_cur_bb;
2380 basic_block latch_bb = next_bb;
2381 next_bb = e->dest;
2382 remove_edge (e);
2383 set_immediate_dominator (CDI_DOMINATORS, ne->dest, esrc);
2384 set_immediate_dominator (CDI_DOMINATORS, latch_bb, ne->dest);
2385 set_immediate_dominator (CDI_DOMINATORS, cur_bb, ne->dest);
2388 t = NULL_TREE;
2389 for (i = fd->first_nonrect; i < fd->last_nonrect; i++)
2390 if (!fd->loops[i].non_rect_referenced
2391 && fd->loops[i].m1 == NULL_TREE
2392 && fd->loops[i].m2 == NULL_TREE)
2394 if (t == NULL_TREE)
2395 t = counts[i];
2396 else
2397 t = fold_build2 (MULT_EXPR, type, t, counts[i]);
2399 if (t)
2401 t = fold_build2 (MULT_EXPR, type, counts[fd->last_nonrect], t);
2402 expand_omp_build_assign (gsi, counts[fd->last_nonrect], t);
2404 if (!rect_count_seen)
2405 t = counts[fd->last_nonrect];
2406 else
2407 t = fold_build2 (MULT_EXPR, type, fd->loop.n2,
2408 counts[fd->last_nonrect]);
2409 expand_omp_build_assign (gsi, fd->loop.n2, t);
2411 else if (fd->non_rect)
2413 tree t = fd->loop.n2;
2414 gcc_assert (TREE_CODE (t) == INTEGER_CST);
2415 int non_rect_referenced = 0, non_rect = 0;
2416 for (i = 0; i < fd->collapse; i++)
2418 if ((i < fd->first_nonrect || i > fd->last_nonrect)
2419 && !integer_zerop (counts[i]))
2420 t = fold_build2 (TRUNC_DIV_EXPR, type, t, counts[i]);
2421 if (fd->loops[i].non_rect_referenced)
2422 non_rect_referenced++;
2423 if (fd->loops[i].m1 || fd->loops[i].m2)
2424 non_rect++;
2426 gcc_assert (non_rect == 1 && non_rect_referenced == 1);
2427 counts[fd->last_nonrect] = t;
2431 /* Helper function for expand_omp_{for_*,simd}. Generate code like:
2432 T = V;
2433 V3 = N31 + (T % count3) * STEP3;
2434 T = T / count3;
2435 V2 = N21 + (T % count2) * STEP2;
2436 T = T / count2;
2437 V1 = N11 + T * STEP1;
2438 if this loop doesn't have an inner loop construct combined with it.
2439 If it does have an inner loop construct combined with it and the
2440 iteration count isn't known constant, store values from counts array
2441 into its _looptemp_ temporaries instead.
2442 For non-rectangular loops (between fd->first_nonrect and fd->last_nonrect
2443 inclusive), use the count of all those loops together, and either
2444 find quadratic etc. equation roots, or as a fallback, do:
2445 COUNT = 0;
2446 for (tmpi = N11; tmpi COND1 N12; tmpi += STEP1)
2447 for (tmpj = M21 * tmpi + N21;
2448 tmpj COND2 M22 * tmpi + N22; tmpj += STEP2)
2450 int tmpk1 = M31 * tmpj + N31;
2451 int tmpk2 = M32 * tmpj + N32;
2452 if (tmpk1 COND3 tmpk2)
2454 if (COND3 is <)
2455 adj = STEP3 - 1;
2456 else
2457 adj = STEP3 + 1;
2458 int temp = (adj + tmpk2 - tmpk1) / STEP3;
2459 if (COUNT + temp > T)
2461 V1 = tmpi;
2462 V2 = tmpj;
2463 V3 = tmpk1 + (T - COUNT) * STEP3;
2464 goto done;
2466 else
2467 COUNT += temp;
2470 done:;
2471 but for optional innermost or outermost rectangular loops that aren't
2472 referenced by other loop expressions keep doing the division/modulo. */
2474 static void
2475 expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
2476 tree *counts, tree *nonrect_bounds,
2477 gimple *inner_stmt, tree startvar)
2479 int i;
2480 if (gimple_omp_for_combined_p (fd->for_stmt))
2482 /* If fd->loop.n2 is constant, then no propagation of the counts
2483 is needed, they are constant. */
2484 if (TREE_CODE (fd->loop.n2) == INTEGER_CST)
2485 return;
2487 tree clauses = gimple_code (inner_stmt) != GIMPLE_OMP_FOR
2488 ? gimple_omp_taskreg_clauses (inner_stmt)
2489 : gimple_omp_for_clauses (inner_stmt);
2490 /* First two _looptemp_ clauses are for istart/iend, counts[0]
2491 isn't supposed to be handled, as the inner loop doesn't
2492 use it. */
2493 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
2494 gcc_assert (innerc);
2495 int count = 0;
2496 if (fd->non_rect
2497 && fd->last_nonrect == fd->first_nonrect + 1
2498 && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[fd->last_nonrect].v)))
2499 count = 4;
2500 for (i = 0; i < fd->collapse + count; i++)
2502 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
2503 OMP_CLAUSE__LOOPTEMP_);
2504 gcc_assert (innerc);
2505 if (i)
2507 tree tem = OMP_CLAUSE_DECL (innerc);
2508 tree t;
2509 if (i < fd->collapse)
2510 t = counts[i];
2511 else
2512 switch (i - fd->collapse)
2514 case 0: t = counts[0]; break;
2515 case 1: t = fd->first_inner_iterations; break;
2516 case 2: t = fd->factor; break;
2517 case 3: t = fd->adjn1; break;
2518 default: gcc_unreachable ();
2520 t = fold_convert (TREE_TYPE (tem), t);
2521 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
2522 false, GSI_CONTINUE_LINKING);
2523 gassign *stmt = gimple_build_assign (tem, t);
2524 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2527 return;
2530 tree type = TREE_TYPE (fd->loop.v);
2531 tree tem = create_tmp_reg (type, ".tem");
2532 gassign *stmt = gimple_build_assign (tem, startvar);
2533 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2535 for (i = fd->collapse - 1; i >= 0; i--)
2537 tree vtype = TREE_TYPE (fd->loops[i].v), itype, t;
2538 itype = vtype;
2539 if (POINTER_TYPE_P (vtype))
2540 itype = signed_type_for (vtype);
2541 if (i != 0 && (i != fd->last_nonrect || fd->first_nonrect))
2542 t = fold_build2 (TRUNC_MOD_EXPR, type, tem, counts[i]);
2543 else
2544 t = tem;
2545 if (i == fd->last_nonrect)
2547 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
2548 false, GSI_CONTINUE_LINKING);
2549 tree stopval = t;
2550 tree idx = create_tmp_reg (type, ".count");
2551 expand_omp_build_assign (gsi, idx,
2552 build_zero_cst (type), true);
2553 basic_block bb_triang = NULL, bb_triang_dom = NULL;
2554 if (fd->first_nonrect + 1 == fd->last_nonrect
2555 && (TREE_CODE (fd->loop.n2) == INTEGER_CST
2556 || fd->first_inner_iterations)
2557 && (optab_handler (sqrt_optab, TYPE_MODE (double_type_node))
2558 != CODE_FOR_nothing)
2559 && !integer_zerop (fd->loop.n2))
2561 tree outer_n1 = fd->adjn1 ? fd->adjn1 : fd->loops[i - 1].n1;
2562 tree itype = TREE_TYPE (fd->loops[i].v);
2563 tree first_inner_iterations = fd->first_inner_iterations;
2564 tree factor = fd->factor;
2565 gcond *cond_stmt
2566 = expand_omp_build_cond (gsi, NE_EXPR, factor,
2567 build_zero_cst (TREE_TYPE (factor)));
2568 edge e = split_block (gsi_bb (*gsi), cond_stmt);
2569 basic_block bb0 = e->src;
2570 e->flags = EDGE_TRUE_VALUE;
2571 e->probability = profile_probability::likely ();
2572 bb_triang_dom = bb0;
2573 *gsi = gsi_after_labels (e->dest);
2574 tree slltype = long_long_integer_type_node;
2575 tree ulltype = long_long_unsigned_type_node;
2576 tree stopvalull = fold_convert (ulltype, stopval);
2577 stopvalull
2578 = force_gimple_operand_gsi (gsi, stopvalull, true, NULL_TREE,
2579 false, GSI_CONTINUE_LINKING);
2580 first_inner_iterations
2581 = fold_convert (slltype, first_inner_iterations);
2582 first_inner_iterations
2583 = force_gimple_operand_gsi (gsi, first_inner_iterations, true,
2584 NULL_TREE, false,
2585 GSI_CONTINUE_LINKING);
2586 factor = fold_convert (slltype, factor);
2587 factor
2588 = force_gimple_operand_gsi (gsi, factor, true, NULL_TREE,
2589 false, GSI_CONTINUE_LINKING);
2590 tree first_inner_iterationsd
2591 = fold_build1 (FLOAT_EXPR, double_type_node,
2592 first_inner_iterations);
2593 first_inner_iterationsd
2594 = force_gimple_operand_gsi (gsi, first_inner_iterationsd, true,
2595 NULL_TREE, false,
2596 GSI_CONTINUE_LINKING);
2597 tree factord = fold_build1 (FLOAT_EXPR, double_type_node,
2598 factor);
2599 factord = force_gimple_operand_gsi (gsi, factord, true,
2600 NULL_TREE, false,
2601 GSI_CONTINUE_LINKING);
2602 tree stopvald = fold_build1 (FLOAT_EXPR, double_type_node,
2603 stopvalull);
2604 stopvald = force_gimple_operand_gsi (gsi, stopvald, true,
2605 NULL_TREE, false,
2606 GSI_CONTINUE_LINKING);
2607 /* Temporarily disable flag_rounding_math, values will be
2608 decimal numbers divided by 2 and worst case imprecisions
2609 due to too large values ought to be caught later by the
2610 checks for fallback. */
2611 int save_flag_rounding_math = flag_rounding_math;
2612 flag_rounding_math = 0;
2613 t = fold_build2 (RDIV_EXPR, double_type_node, factord,
2614 build_real (double_type_node, dconst2));
2615 tree t3 = fold_build2 (MINUS_EXPR, double_type_node,
2616 first_inner_iterationsd, t);
2617 t3 = force_gimple_operand_gsi (gsi, t3, true, NULL_TREE, false,
2618 GSI_CONTINUE_LINKING);
2619 t = fold_build2 (MULT_EXPR, double_type_node, factord,
2620 build_real (double_type_node, dconst2));
2621 t = fold_build2 (MULT_EXPR, double_type_node, t, stopvald);
2622 t = fold_build2 (PLUS_EXPR, double_type_node, t,
2623 fold_build2 (MULT_EXPR, double_type_node,
2624 t3, t3));
2625 flag_rounding_math = save_flag_rounding_math;
2626 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2627 GSI_CONTINUE_LINKING);
2628 if (flag_exceptions
2629 && cfun->can_throw_non_call_exceptions
2630 && operation_could_trap_p (LT_EXPR, true, false, NULL_TREE))
2632 tree tem = fold_build2 (LT_EXPR, boolean_type_node, t,
2633 build_zero_cst (double_type_node));
2634 tem = force_gimple_operand_gsi (gsi, tem, true, NULL_TREE,
2635 false, GSI_CONTINUE_LINKING);
2636 cond_stmt = gimple_build_cond (NE_EXPR, tem,
2637 boolean_false_node,
2638 NULL_TREE, NULL_TREE);
2640 else
2641 cond_stmt
2642 = gimple_build_cond (LT_EXPR, t,
2643 build_zero_cst (double_type_node),
2644 NULL_TREE, NULL_TREE);
2645 gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2646 e = split_block (gsi_bb (*gsi), cond_stmt);
2647 basic_block bb1 = e->src;
2648 e->flags = EDGE_FALSE_VALUE;
2649 e->probability = profile_probability::very_likely ();
2650 *gsi = gsi_after_labels (e->dest);
2651 gcall *call = gimple_build_call_internal (IFN_SQRT, 1, t);
2652 tree sqrtr = create_tmp_var (double_type_node);
2653 gimple_call_set_lhs (call, sqrtr);
2654 gsi_insert_after (gsi, call, GSI_CONTINUE_LINKING);
2655 t = fold_build2 (MINUS_EXPR, double_type_node, sqrtr, t3);
2656 t = fold_build2 (RDIV_EXPR, double_type_node, t, factord);
2657 t = fold_build1 (FIX_TRUNC_EXPR, ulltype, t);
2658 tree c = create_tmp_var (ulltype);
2659 tree d = create_tmp_var (ulltype);
2660 expand_omp_build_assign (gsi, c, t, true);
2661 t = fold_build2 (MINUS_EXPR, ulltype, c,
2662 build_one_cst (ulltype));
2663 t = fold_build2 (MULT_EXPR, ulltype, c, t);
2664 t = fold_build2 (RSHIFT_EXPR, ulltype, t, integer_one_node);
2665 t = fold_build2 (MULT_EXPR, ulltype,
2666 fold_convert (ulltype, fd->factor), t);
2667 tree t2
2668 = fold_build2 (MULT_EXPR, ulltype, c,
2669 fold_convert (ulltype,
2670 fd->first_inner_iterations));
2671 t = fold_build2 (PLUS_EXPR, ulltype, t, t2);
2672 expand_omp_build_assign (gsi, d, t, true);
2673 t = fold_build2 (MULT_EXPR, ulltype,
2674 fold_convert (ulltype, fd->factor), c);
2675 t = fold_build2 (PLUS_EXPR, ulltype,
2676 t, fold_convert (ulltype,
2677 fd->first_inner_iterations));
2678 t2 = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2679 GSI_CONTINUE_LINKING);
2680 cond_stmt = gimple_build_cond (GE_EXPR, stopvalull, d,
2681 NULL_TREE, NULL_TREE);
2682 gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2683 e = split_block (gsi_bb (*gsi), cond_stmt);
2684 basic_block bb2 = e->src;
2685 e->flags = EDGE_TRUE_VALUE;
2686 e->probability = profile_probability::very_likely ();
2687 *gsi = gsi_after_labels (e->dest);
2688 t = fold_build2 (PLUS_EXPR, ulltype, d, t2);
2689 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2690 GSI_CONTINUE_LINKING);
2691 cond_stmt = gimple_build_cond (GE_EXPR, stopvalull, t,
2692 NULL_TREE, NULL_TREE);
2693 gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2694 e = split_block (gsi_bb (*gsi), cond_stmt);
2695 basic_block bb3 = e->src;
2696 e->flags = EDGE_FALSE_VALUE;
2697 e->probability = profile_probability::very_likely ();
2698 *gsi = gsi_after_labels (e->dest);
2699 t = fold_convert (itype, c);
2700 t = fold_build2 (MULT_EXPR, itype, t, fd->loops[i - 1].step);
2701 t = fold_build2 (PLUS_EXPR, itype, outer_n1, t);
2702 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2703 GSI_CONTINUE_LINKING);
2704 expand_omp_build_assign (gsi, fd->loops[i - 1].v, t, true);
2705 t2 = fold_build2 (MINUS_EXPR, ulltype, stopvalull, d);
2706 t2 = fold_convert (itype, t2);
2707 t2 = fold_build2 (MULT_EXPR, itype, t2, fd->loops[i].step);
2708 t2 = fold_build2 (PLUS_EXPR, itype, t2, fd->loops[i].n1);
2709 if (fd->loops[i].m1)
2711 t = fold_build2 (MULT_EXPR, itype, t, fd->loops[i].m1);
2712 t2 = fold_build2 (PLUS_EXPR, itype, t2, t);
2714 expand_omp_build_assign (gsi, fd->loops[i].v, t2, true);
2715 e = split_block (gsi_bb (*gsi), gsi_stmt (*gsi));
2716 bb_triang = e->src;
2717 *gsi = gsi_after_labels (e->dest);
2718 remove_edge (e);
2719 e = make_edge (bb1, gsi_bb (*gsi), EDGE_TRUE_VALUE);
2720 e->probability = profile_probability::very_unlikely ();
2721 e = make_edge (bb2, gsi_bb (*gsi), EDGE_FALSE_VALUE);
2722 e->probability = profile_probability::very_unlikely ();
2723 e = make_edge (bb3, gsi_bb (*gsi), EDGE_TRUE_VALUE);
2724 e->probability = profile_probability::very_unlikely ();
2726 basic_block bb4 = create_empty_bb (bb0);
2727 add_bb_to_loop (bb4, bb0->loop_father);
2728 e = make_edge (bb0, bb4, EDGE_FALSE_VALUE);
2729 e->probability = profile_probability::unlikely ();
2730 make_edge (bb4, gsi_bb (*gsi), EDGE_FALLTHRU);
2731 set_immediate_dominator (CDI_DOMINATORS, bb4, bb0);
2732 set_immediate_dominator (CDI_DOMINATORS, gsi_bb (*gsi), bb0);
2733 gimple_stmt_iterator gsi2 = gsi_after_labels (bb4);
2734 t2 = fold_build2 (TRUNC_DIV_EXPR, type,
2735 counts[i], counts[i - 1]);
2736 t2 = force_gimple_operand_gsi (&gsi2, t2, true, NULL_TREE, false,
2737 GSI_CONTINUE_LINKING);
2738 t = fold_build2 (TRUNC_MOD_EXPR, type, stopval, t2);
2739 t2 = fold_build2 (TRUNC_DIV_EXPR, type, stopval, t2);
2740 t = fold_convert (itype, t);
2741 t2 = fold_convert (itype, t2);
2742 t = fold_build2 (MULT_EXPR, itype, t,
2743 fold_convert (itype, fd->loops[i].step));
2744 t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
2745 t2 = fold_build2 (MULT_EXPR, itype, t2,
2746 fold_convert (itype, fd->loops[i - 1].step));
2747 t2 = fold_build2 (PLUS_EXPR, itype, fd->loops[i - 1].n1, t2);
2748 t2 = force_gimple_operand_gsi (&gsi2, t2, false, NULL_TREE,
2749 false, GSI_CONTINUE_LINKING);
2750 stmt = gimple_build_assign (fd->loops[i - 1].v, t2);
2751 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
2752 if (fd->loops[i].m1)
2754 t2 = fold_build2 (MULT_EXPR, itype, fd->loops[i].m1,
2755 fd->loops[i - 1].v);
2756 t = fold_build2 (PLUS_EXPR, itype, t, t2);
2758 t = force_gimple_operand_gsi (&gsi2, t, false, NULL_TREE,
2759 false, GSI_CONTINUE_LINKING);
2760 stmt = gimple_build_assign (fd->loops[i].v, t);
2761 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
2763 /* Fallback implementation. Evaluate the loops in between
2764 (inclusive) fd->first_nonrect and fd->last_nonrect at
2765 runtime unsing temporaries instead of the original iteration
2766 variables, in the body just bump the counter and compare
2767 with the desired value. */
2768 gimple_stmt_iterator gsi2 = *gsi;
2769 basic_block entry_bb = gsi_bb (gsi2);
2770 edge e = split_block (entry_bb, gsi_stmt (gsi2));
2771 e = split_block (e->dest, (gimple *) NULL);
2772 basic_block dom_bb = NULL;
2773 basic_block cur_bb = e->src;
2774 basic_block next_bb = e->dest;
2775 entry_bb = e->dest;
2776 *gsi = gsi_after_labels (entry_bb);
2778 tree *vs = XALLOCAVEC (tree, fd->last_nonrect);
2779 tree n1 = NULL_TREE, n2 = NULL_TREE;
2780 memset (vs, 0, fd->last_nonrect * sizeof (tree));
2782 for (int j = fd->first_nonrect; j <= fd->last_nonrect; j++)
2784 tree itype = TREE_TYPE (fd->loops[j].v);
2785 bool rect_p = (fd->loops[j].m1 == NULL_TREE
2786 && fd->loops[j].m2 == NULL_TREE
2787 && !fd->loops[j].non_rect_referenced);
2788 gsi2 = gsi_after_labels (cur_bb);
2789 t = fold_convert (itype, unshare_expr (fd->loops[j].n1));
2790 if (fd->loops[j].m1 == NULL_TREE)
2791 n1 = rect_p ? build_zero_cst (type) : t;
2792 else if (POINTER_TYPE_P (itype))
2794 gcc_assert (integer_onep (fd->loops[j].m1));
2795 t = fold_convert (sizetype,
2796 unshare_expr (fd->loops[j].n1));
2797 n1 = fold_build_pointer_plus (vs[j - fd->loops[j].outer], t);
2799 else
2801 n1 = fold_convert (itype, unshare_expr (fd->loops[j].m1));
2802 n1 = fold_build2 (MULT_EXPR, itype,
2803 vs[j - fd->loops[j].outer], n1);
2804 n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2806 n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2807 true, GSI_SAME_STMT);
2808 if (j < fd->last_nonrect)
2810 vs[j] = create_tmp_reg (rect_p ? type : itype, ".it");
2811 expand_omp_build_assign (&gsi2, vs[j], n1);
2813 t = fold_convert (itype, unshare_expr (fd->loops[j].n2));
2814 if (fd->loops[j].m2 == NULL_TREE)
2815 n2 = rect_p ? counts[j] : t;
2816 else if (POINTER_TYPE_P (itype))
2818 gcc_assert (integer_onep (fd->loops[j].m2));
2819 t = fold_convert (sizetype,
2820 unshare_expr (fd->loops[j].n2));
2821 n2 = fold_build_pointer_plus (vs[j - fd->loops[j].outer], t);
2823 else
2825 n2 = fold_convert (itype, unshare_expr (fd->loops[j].m2));
2826 n2 = fold_build2 (MULT_EXPR, itype,
2827 vs[j - fd->loops[j].outer], n2);
2828 n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2830 n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2831 true, GSI_SAME_STMT);
2832 if (POINTER_TYPE_P (itype))
2833 itype = signed_type_for (itype);
2834 if (j == fd->last_nonrect)
2836 gcond *cond_stmt
2837 = expand_omp_build_cond (&gsi2, fd->loops[i].cond_code,
2838 n1, n2);
2839 e = split_block (cur_bb, cond_stmt);
2840 e->flags = EDGE_TRUE_VALUE;
2841 edge ne = make_edge (cur_bb, next_bb, EDGE_FALSE_VALUE);
2842 e->probability = profile_probability::likely ().guessed ();
2843 ne->probability = e->probability.invert ();
2844 gsi2 = gsi_after_labels (e->dest);
2846 t = build_int_cst (itype, (fd->loops[j].cond_code == LT_EXPR
2847 ? -1 : 1));
2848 t = fold_build2 (PLUS_EXPR, itype,
2849 fold_convert (itype, fd->loops[j].step), t);
2850 t = fold_build2 (PLUS_EXPR, itype, t,
2851 fold_convert (itype, n2));
2852 t = fold_build2 (MINUS_EXPR, itype, t,
2853 fold_convert (itype, n1));
2854 tree step = fold_convert (itype, fd->loops[j].step);
2855 if (TYPE_UNSIGNED (itype)
2856 && fd->loops[j].cond_code == GT_EXPR)
2857 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2858 fold_build1 (NEGATE_EXPR, itype, t),
2859 fold_build1 (NEGATE_EXPR, itype, step));
2860 else
2861 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2862 t = fold_convert (type, t);
2863 t = fold_build2 (PLUS_EXPR, type, idx, t);
2864 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2865 true, GSI_SAME_STMT);
2866 e = make_edge (e->dest, next_bb, EDGE_FALLTHRU);
2867 set_immediate_dominator (CDI_DOMINATORS, next_bb, cur_bb);
2868 cond_stmt
2869 = gimple_build_cond (LE_EXPR, t, stopval, NULL_TREE,
2870 NULL_TREE);
2871 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2872 e = split_block (gsi_bb (gsi2), cond_stmt);
2873 e->flags = EDGE_TRUE_VALUE;
2874 e->probability = profile_probability::likely ().guessed ();
2875 ne = make_edge (e->src, entry_bb, EDGE_FALSE_VALUE);
2876 ne->probability = e->probability.invert ();
2877 gsi2 = gsi_after_labels (e->dest);
2878 expand_omp_build_assign (&gsi2, idx, t);
2879 set_immediate_dominator (CDI_DOMINATORS, entry_bb, dom_bb);
2880 break;
2882 e = split_block (cur_bb, last_stmt (cur_bb));
2884 basic_block new_cur_bb = create_empty_bb (cur_bb);
2885 add_bb_to_loop (new_cur_bb, cur_bb->loop_father);
2887 gsi2 = gsi_after_labels (e->dest);
2888 if (rect_p)
2889 t = fold_build2 (PLUS_EXPR, type, vs[j],
2890 build_one_cst (type));
2891 else
2893 tree step
2894 = fold_convert (itype, unshare_expr (fd->loops[j].step));
2895 if (POINTER_TYPE_P (vtype))
2896 t = fold_build_pointer_plus (vs[j], fold_convert (sizetype,
2897 step));
2898 else
2899 t = fold_build2 (PLUS_EXPR, itype, vs[j], step);
2901 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2902 true, GSI_SAME_STMT);
2903 expand_omp_build_assign (&gsi2, vs[j], t);
2905 edge ne = split_block (e->dest, last_stmt (e->dest));
2906 gsi2 = gsi_after_labels (ne->dest);
2908 gcond *cond_stmt;
2909 if (next_bb == entry_bb)
2910 /* No need to actually check the outermost condition. */
2911 cond_stmt
2912 = gimple_build_cond (EQ_EXPR, boolean_true_node,
2913 boolean_true_node,
2914 NULL_TREE, NULL_TREE);
2915 else
2916 cond_stmt
2917 = gimple_build_cond (rect_p ? LT_EXPR
2918 : fd->loops[j].cond_code,
2919 vs[j], n2, NULL_TREE, NULL_TREE);
2920 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2921 edge e3, e4;
2922 if (next_bb == entry_bb)
2924 e3 = find_edge (ne->dest, next_bb);
2925 e3->flags = EDGE_FALSE_VALUE;
2926 dom_bb = ne->dest;
2928 else
2929 e3 = make_edge (ne->dest, next_bb, EDGE_FALSE_VALUE);
2930 e4 = make_edge (ne->dest, new_cur_bb, EDGE_TRUE_VALUE);
2931 e4->probability = profile_probability::likely ().guessed ();
2932 e3->probability = e4->probability.invert ();
2933 basic_block esrc = e->src;
2934 make_edge (e->src, ne->dest, EDGE_FALLTHRU);
2935 cur_bb = new_cur_bb;
2936 basic_block latch_bb = next_bb;
2937 next_bb = e->dest;
2938 remove_edge (e);
2939 set_immediate_dominator (CDI_DOMINATORS, ne->dest, esrc);
2940 set_immediate_dominator (CDI_DOMINATORS, latch_bb, ne->dest);
2941 set_immediate_dominator (CDI_DOMINATORS, cur_bb, ne->dest);
2943 for (int j = fd->last_nonrect; j >= fd->first_nonrect; j--)
2945 tree vtype = TREE_TYPE (fd->loops[j].v);
2946 tree itype = vtype;
2947 if (POINTER_TYPE_P (itype))
2948 itype = signed_type_for (itype);
2949 bool rect_p = (fd->loops[j].m1 == NULL_TREE
2950 && fd->loops[j].m2 == NULL_TREE
2951 && !fd->loops[j].non_rect_referenced);
2952 if (j == fd->last_nonrect)
2954 t = fold_build2 (MINUS_EXPR, type, stopval, idx);
2955 t = fold_convert (itype, t);
2956 tree t2
2957 = fold_convert (itype, unshare_expr (fd->loops[j].step));
2958 t = fold_build2 (MULT_EXPR, itype, t, t2);
2959 if (POINTER_TYPE_P (vtype))
2960 t = fold_build_pointer_plus (n1,
2961 fold_convert (sizetype, t));
2962 else
2963 t = fold_build2 (PLUS_EXPR, itype, n1, t);
2965 else if (rect_p)
2967 t = fold_convert (itype, vs[j]);
2968 t = fold_build2 (MULT_EXPR, itype, t,
2969 fold_convert (itype, fd->loops[j].step));
2970 if (POINTER_TYPE_P (vtype))
2971 t = fold_build_pointer_plus (fd->loops[j].n1,
2972 fold_convert (sizetype, t));
2973 else
2974 t = fold_build2 (PLUS_EXPR, itype, fd->loops[j].n1, t);
2976 else
2977 t = vs[j];
2978 t = force_gimple_operand_gsi (gsi, t, false,
2979 NULL_TREE, true,
2980 GSI_SAME_STMT);
2981 stmt = gimple_build_assign (fd->loops[j].v, t);
2982 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
2984 if (gsi_end_p (*gsi))
2985 *gsi = gsi_last_bb (gsi_bb (*gsi));
2986 else
2987 gsi_prev (gsi);
2988 if (bb_triang)
2990 e = split_block (gsi_bb (*gsi), gsi_stmt (*gsi));
2991 make_edge (bb_triang, e->dest, EDGE_FALLTHRU);
2992 *gsi = gsi_after_labels (e->dest);
2993 if (!gsi_end_p (*gsi))
2994 gsi_insert_before (gsi, gimple_build_nop (), GSI_NEW_STMT);
2995 set_immediate_dominator (CDI_DOMINATORS, e->dest, bb_triang_dom);
2998 else
3000 t = fold_convert (itype, t);
3001 t = fold_build2 (MULT_EXPR, itype, t,
3002 fold_convert (itype, fd->loops[i].step));
3003 if (POINTER_TYPE_P (vtype))
3004 t = fold_build_pointer_plus (fd->loops[i].n1, t);
3005 else
3006 t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
3007 t = force_gimple_operand_gsi (gsi, t,
3008 DECL_P (fd->loops[i].v)
3009 && TREE_ADDRESSABLE (fd->loops[i].v),
3010 NULL_TREE, false,
3011 GSI_CONTINUE_LINKING);
3012 stmt = gimple_build_assign (fd->loops[i].v, t);
3013 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
3015 if (i != 0 && (i != fd->last_nonrect || fd->first_nonrect))
3017 t = fold_build2 (TRUNC_DIV_EXPR, type, tem, counts[i]);
3018 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
3019 false, GSI_CONTINUE_LINKING);
3020 stmt = gimple_build_assign (tem, t);
3021 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
3023 if (i == fd->last_nonrect)
3024 i = fd->first_nonrect;
3026 if (fd->non_rect)
3027 for (i = 0; i <= fd->last_nonrect; i++)
3028 if (fd->loops[i].m2)
3030 tree itype = TREE_TYPE (fd->loops[i].v);
3032 tree t;
3033 if (POINTER_TYPE_P (itype))
3035 gcc_assert (integer_onep (fd->loops[i].m2));
3036 t = fold_convert (sizetype, unshare_expr (fd->loops[i].n2));
3037 t = fold_build_pointer_plus (fd->loops[i - fd->loops[i].outer].v,
3040 else
3042 t = fold_convert (itype, unshare_expr (fd->loops[i].m2));
3043 t = fold_build2 (MULT_EXPR, itype,
3044 fd->loops[i - fd->loops[i].outer].v, t);
3045 t = fold_build2 (PLUS_EXPR, itype, t,
3046 fold_convert (itype,
3047 unshare_expr (fd->loops[i].n2)));
3049 nonrect_bounds[i] = create_tmp_reg (itype, ".bound");
3050 t = force_gimple_operand_gsi (gsi, t, false,
3051 NULL_TREE, false,
3052 GSI_CONTINUE_LINKING);
3053 stmt = gimple_build_assign (nonrect_bounds[i], t);
3054 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
3058 /* Helper function for expand_omp_for_*. Generate code like:
3059 L10:
3060 V3 += STEP3;
3061 if (V3 cond3 N32) goto BODY_BB; else goto L11;
3062 L11:
3063 V3 = N31;
3064 V2 += STEP2;
3065 if (V2 cond2 N22) goto BODY_BB; else goto L12;
3066 L12:
3067 V2 = N21;
3068 V1 += STEP1;
3069 goto BODY_BB;
3070 For non-rectangular loops, use temporaries stored in nonrect_bounds
3071 for the upper bounds if M?2 multiplier is present. Given e.g.
3072 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
3073 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
3074 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
3075 for (V4 = N41 + M41 * V2; V4 cond4 N42 + M42 * V2; V4 += STEP4)
3077 L10:
3078 V4 += STEP4;
3079 if (V4 cond4 NONRECT_BOUND4) goto BODY_BB; else goto L11;
3080 L11:
3081 V4 = N41 + M41 * V2; // This can be left out if the loop
3082 // refers to the immediate parent loop
3083 V3 += STEP3;
3084 if (V3 cond3 N32) goto BODY_BB; else goto L12;
3085 L12:
3086 V3 = N31;
3087 V2 += STEP2;
3088 if (V2 cond2 N22) goto L120; else goto L13;
3089 L120:
3090 V4 = N41 + M41 * V2;
3091 NONRECT_BOUND4 = N42 + M42 * V2;
3092 if (V4 cond4 NONRECT_BOUND4) goto BODY_BB; else goto L12;
3093 L13:
3094 V2 = N21;
3095 V1 += STEP1;
3096 goto L120; */
3098 static basic_block
3099 extract_omp_for_update_vars (struct omp_for_data *fd, tree *nonrect_bounds,
3100 basic_block cont_bb, basic_block body_bb)
3102 basic_block last_bb, bb, collapse_bb = NULL;
3103 int i;
3104 gimple_stmt_iterator gsi;
3105 edge e;
3106 tree t;
3107 gimple *stmt;
3109 last_bb = cont_bb;
3110 for (i = fd->collapse - 1; i >= 0; i--)
3112 tree vtype = TREE_TYPE (fd->loops[i].v);
3114 bb = create_empty_bb (last_bb);
3115 add_bb_to_loop (bb, last_bb->loop_father);
3116 gsi = gsi_start_bb (bb);
3118 if (i < fd->collapse - 1)
3120 e = make_edge (last_bb, bb, EDGE_FALSE_VALUE);
3121 e->probability
3122 = profile_probability::guessed_always ().apply_scale (1, 8);
3124 struct omp_for_data_loop *l = &fd->loops[i + 1];
3125 if (l->m1 == NULL_TREE || l->outer != 1)
3127 t = l->n1;
3128 if (l->m1)
3130 if (POINTER_TYPE_P (TREE_TYPE (l->v)))
3131 t = fold_build_pointer_plus (fd->loops[i + 1 - l->outer].v,
3132 fold_convert (sizetype, t));
3133 else
3135 tree t2
3136 = fold_build2 (MULT_EXPR, TREE_TYPE (t),
3137 fd->loops[i + 1 - l->outer].v, l->m1);
3138 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t2, t);
3141 t = force_gimple_operand_gsi (&gsi, t,
3142 DECL_P (l->v)
3143 && TREE_ADDRESSABLE (l->v),
3144 NULL_TREE, false,
3145 GSI_CONTINUE_LINKING);
3146 stmt = gimple_build_assign (l->v, t);
3147 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
3150 else
3151 collapse_bb = bb;
3153 set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
3155 if (POINTER_TYPE_P (vtype))
3156 t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step);
3157 else
3158 t = fold_build2 (PLUS_EXPR, vtype, fd->loops[i].v, fd->loops[i].step);
3159 t = force_gimple_operand_gsi (&gsi, t,
3160 DECL_P (fd->loops[i].v)
3161 && TREE_ADDRESSABLE (fd->loops[i].v),
3162 NULL_TREE, false, GSI_CONTINUE_LINKING);
3163 stmt = gimple_build_assign (fd->loops[i].v, t);
3164 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
3166 if (fd->loops[i].non_rect_referenced)
3168 basic_block update_bb = NULL, prev_bb = NULL;
3169 for (int j = i + 1; j <= fd->last_nonrect; j++)
3170 if (j - fd->loops[j].outer == i)
3172 tree n1, n2;
3173 struct omp_for_data_loop *l = &fd->loops[j];
3174 basic_block this_bb = create_empty_bb (last_bb);
3175 add_bb_to_loop (this_bb, last_bb->loop_father);
3176 gimple_stmt_iterator gsi2 = gsi_start_bb (this_bb);
3177 if (prev_bb)
3179 e = make_edge (prev_bb, this_bb, EDGE_TRUE_VALUE);
3180 e->probability
3181 = profile_probability::guessed_always ().apply_scale (7,
3183 set_immediate_dominator (CDI_DOMINATORS, this_bb, prev_bb);
3185 if (l->m1)
3187 if (POINTER_TYPE_P (TREE_TYPE (l->v)))
3188 t = fold_build_pointer_plus (fd->loops[i].v,
3189 fold_convert (sizetype,
3190 l->n1));
3191 else
3193 t = fold_build2 (MULT_EXPR, TREE_TYPE (l->m1), l->m1,
3194 fd->loops[i].v);
3195 t = fold_build2 (PLUS_EXPR, TREE_TYPE (l->v),
3196 t, l->n1);
3198 n1 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3199 false,
3200 GSI_CONTINUE_LINKING);
3201 stmt = gimple_build_assign (l->v, n1);
3202 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
3203 n1 = l->v;
3205 else
3206 n1 = force_gimple_operand_gsi (&gsi2, l->n1, true,
3207 NULL_TREE, false,
3208 GSI_CONTINUE_LINKING);
3209 if (l->m2)
3211 if (POINTER_TYPE_P (TREE_TYPE (l->v)))
3212 t = fold_build_pointer_plus (fd->loops[i].v,
3213 fold_convert (sizetype,
3214 l->n2));
3215 else
3217 t = fold_build2 (MULT_EXPR, TREE_TYPE (l->m2), l->m2,
3218 fd->loops[i].v);
3219 t = fold_build2 (PLUS_EXPR,
3220 TREE_TYPE (nonrect_bounds[j]),
3221 t, unshare_expr (l->n2));
3223 n2 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3224 false,
3225 GSI_CONTINUE_LINKING);
3226 stmt = gimple_build_assign (nonrect_bounds[j], n2);
3227 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
3228 n2 = nonrect_bounds[j];
3230 else
3231 n2 = force_gimple_operand_gsi (&gsi2, unshare_expr (l->n2),
3232 true, NULL_TREE, false,
3233 GSI_CONTINUE_LINKING);
3234 gcond *cond_stmt
3235 = gimple_build_cond (l->cond_code, n1, n2,
3236 NULL_TREE, NULL_TREE);
3237 gsi_insert_after (&gsi2, cond_stmt, GSI_CONTINUE_LINKING);
3238 if (update_bb == NULL)
3239 update_bb = this_bb;
3240 e = make_edge (this_bb, bb, EDGE_FALSE_VALUE);
3241 e->probability
3242 = profile_probability::guessed_always ().apply_scale (1, 8);
3243 if (prev_bb == NULL)
3244 set_immediate_dominator (CDI_DOMINATORS, this_bb, bb);
3245 prev_bb = this_bb;
3247 e = make_edge (prev_bb, body_bb, EDGE_TRUE_VALUE);
3248 e->probability
3249 = profile_probability::guessed_always ().apply_scale (7, 8);
3250 body_bb = update_bb;
3253 if (i > 0)
3255 if (fd->loops[i].m2)
3256 t = nonrect_bounds[i];
3257 else
3258 t = unshare_expr (fd->loops[i].n2);
3259 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3260 false, GSI_CONTINUE_LINKING);
3261 tree v = fd->loops[i].v;
3262 if (DECL_P (v) && TREE_ADDRESSABLE (v))
3263 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
3264 false, GSI_CONTINUE_LINKING);
3265 t = fold_build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
3266 stmt = gimple_build_cond_empty (t);
3267 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
3268 if (walk_tree (gimple_cond_lhs_ptr (as_a <gcond *> (stmt)),
3269 expand_omp_regimplify_p, NULL, NULL)
3270 || walk_tree (gimple_cond_rhs_ptr (as_a <gcond *> (stmt)),
3271 expand_omp_regimplify_p, NULL, NULL))
3272 gimple_regimplify_operands (stmt, &gsi);
3273 e = make_edge (bb, body_bb, EDGE_TRUE_VALUE);
3274 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
3276 else
3277 make_edge (bb, body_bb, EDGE_FALLTHRU);
3278 set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
3279 last_bb = bb;
3282 return collapse_bb;
3285 /* Expand #pragma omp ordered depend(source). */
3287 static void
3288 expand_omp_ordered_source (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
3289 tree *counts, location_t loc)
3291 enum built_in_function source_ix
3292 = fd->iter_type == long_integer_type_node
3293 ? BUILT_IN_GOMP_DOACROSS_POST : BUILT_IN_GOMP_DOACROSS_ULL_POST;
3294 gimple *g
3295 = gimple_build_call (builtin_decl_explicit (source_ix), 1,
3296 build_fold_addr_expr (counts[fd->ordered]));
3297 gimple_set_location (g, loc);
3298 gsi_insert_before (gsi, g, GSI_SAME_STMT);
3301 /* Expand a single depend from #pragma omp ordered depend(sink:...). */
3303 static void
3304 expand_omp_ordered_sink (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
3305 tree *counts, tree c, location_t loc)
3307 auto_vec<tree, 10> args;
3308 enum built_in_function sink_ix
3309 = fd->iter_type == long_integer_type_node
3310 ? BUILT_IN_GOMP_DOACROSS_WAIT : BUILT_IN_GOMP_DOACROSS_ULL_WAIT;
3311 tree t, off, coff = NULL_TREE, deps = OMP_CLAUSE_DECL (c), cond = NULL_TREE;
3312 int i;
3313 gimple_stmt_iterator gsi2 = *gsi;
3314 bool warned_step = false;
3316 for (i = 0; i < fd->ordered; i++)
3318 tree step = NULL_TREE;
3319 off = TREE_PURPOSE (deps);
3320 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
3322 step = TREE_OPERAND (off, 1);
3323 off = TREE_OPERAND (off, 0);
3325 if (!integer_zerop (off))
3327 gcc_assert (fd->loops[i].cond_code == LT_EXPR
3328 || fd->loops[i].cond_code == GT_EXPR);
3329 bool forward = fd->loops[i].cond_code == LT_EXPR;
3330 if (step)
3332 /* Non-simple Fortran DO loops. If step is variable,
3333 we don't know at compile even the direction, so can't
3334 warn. */
3335 if (TREE_CODE (step) != INTEGER_CST)
3336 break;
3337 forward = tree_int_cst_sgn (step) != -1;
3339 if (forward ^ OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3340 warning_at (loc, 0, "%<depend%> clause with %<sink%> modifier "
3341 "waiting for lexically later iteration");
3342 break;
3344 deps = TREE_CHAIN (deps);
3346 /* If all offsets corresponding to the collapsed loops are zero,
3347 this depend clause can be ignored. FIXME: but there is still a
3348 flush needed. We need to emit one __sync_synchronize () for it
3349 though (perhaps conditionally)? Solve this together with the
3350 conservative dependence folding optimization.
3351 if (i >= fd->collapse)
3352 return; */
3354 deps = OMP_CLAUSE_DECL (c);
3355 gsi_prev (&gsi2);
3356 edge e1 = split_block (gsi_bb (gsi2), gsi_stmt (gsi2));
3357 edge e2 = split_block_after_labels (e1->dest);
3359 gsi2 = gsi_after_labels (e1->dest);
3360 *gsi = gsi_last_bb (e1->src);
3361 for (i = 0; i < fd->ordered; i++)
3363 tree itype = TREE_TYPE (fd->loops[i].v);
3364 tree step = NULL_TREE;
3365 tree orig_off = NULL_TREE;
3366 if (POINTER_TYPE_P (itype))
3367 itype = sizetype;
3368 if (i)
3369 deps = TREE_CHAIN (deps);
3370 off = TREE_PURPOSE (deps);
3371 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
3373 step = TREE_OPERAND (off, 1);
3374 off = TREE_OPERAND (off, 0);
3375 gcc_assert (fd->loops[i].cond_code == LT_EXPR
3376 && integer_onep (fd->loops[i].step)
3377 && !POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)));
3379 tree s = fold_convert_loc (loc, itype, step ? step : fd->loops[i].step);
3380 if (step)
3382 off = fold_convert_loc (loc, itype, off);
3383 orig_off = off;
3384 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
3387 if (integer_zerop (off))
3388 t = boolean_true_node;
3389 else
3391 tree a;
3392 tree co = fold_convert_loc (loc, itype, off);
3393 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
3395 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3396 co = fold_build1_loc (loc, NEGATE_EXPR, itype, co);
3397 a = fold_build2_loc (loc, POINTER_PLUS_EXPR,
3398 TREE_TYPE (fd->loops[i].v), fd->loops[i].v,
3399 co);
3401 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3402 a = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
3403 fd->loops[i].v, co);
3404 else
3405 a = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
3406 fd->loops[i].v, co);
3407 if (step)
3409 tree t1, t2;
3410 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3411 t1 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
3412 fd->loops[i].n1);
3413 else
3414 t1 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
3415 fd->loops[i].n2);
3416 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3417 t2 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
3418 fd->loops[i].n2);
3419 else
3420 t2 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
3421 fd->loops[i].n1);
3422 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node,
3423 step, build_int_cst (TREE_TYPE (step), 0));
3424 if (TREE_CODE (step) != INTEGER_CST)
3426 t1 = unshare_expr (t1);
3427 t1 = force_gimple_operand_gsi (gsi, t1, true, NULL_TREE,
3428 false, GSI_CONTINUE_LINKING);
3429 t2 = unshare_expr (t2);
3430 t2 = force_gimple_operand_gsi (gsi, t2, true, NULL_TREE,
3431 false, GSI_CONTINUE_LINKING);
3433 t = fold_build3_loc (loc, COND_EXPR, boolean_type_node,
3434 t, t2, t1);
3436 else if (fd->loops[i].cond_code == LT_EXPR)
3438 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3439 t = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
3440 fd->loops[i].n1);
3441 else
3442 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
3443 fd->loops[i].n2);
3445 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3446 t = fold_build2_loc (loc, GT_EXPR, boolean_type_node, a,
3447 fd->loops[i].n2);
3448 else
3449 t = fold_build2_loc (loc, LE_EXPR, boolean_type_node, a,
3450 fd->loops[i].n1);
3452 if (cond)
3453 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node, cond, t);
3454 else
3455 cond = t;
3457 off = fold_convert_loc (loc, itype, off);
3459 if (step
3460 || (fd->loops[i].cond_code == LT_EXPR
3461 ? !integer_onep (fd->loops[i].step)
3462 : !integer_minus_onep (fd->loops[i].step)))
3464 if (step == NULL_TREE
3465 && TYPE_UNSIGNED (itype)
3466 && fd->loops[i].cond_code == GT_EXPR)
3467 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype, off,
3468 fold_build1_loc (loc, NEGATE_EXPR, itype,
3469 s));
3470 else
3471 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype,
3472 orig_off ? orig_off : off, s);
3473 t = fold_build2_loc (loc, EQ_EXPR, boolean_type_node, t,
3474 build_int_cst (itype, 0));
3475 if (integer_zerop (t) && !warned_step)
3477 warning_at (loc, 0, "%<depend%> clause with %<sink%> modifier "
3478 "refers to iteration never in the iteration "
3479 "space");
3480 warned_step = true;
3482 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node,
3483 cond, t);
3486 if (i <= fd->collapse - 1 && fd->collapse > 1)
3487 t = fd->loop.v;
3488 else if (counts[i])
3489 t = counts[i];
3490 else
3492 t = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
3493 fd->loops[i].v, fd->loops[i].n1);
3494 t = fold_convert_loc (loc, fd->iter_type, t);
3496 if (step)
3497 /* We have divided off by step already earlier. */;
3498 else if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
3499 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off,
3500 fold_build1_loc (loc, NEGATE_EXPR, itype,
3501 s));
3502 else
3503 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
3504 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3505 off = fold_build1_loc (loc, NEGATE_EXPR, itype, off);
3506 off = fold_convert_loc (loc, fd->iter_type, off);
3507 if (i <= fd->collapse - 1 && fd->collapse > 1)
3509 if (i)
3510 off = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, coff,
3511 off);
3512 if (i < fd->collapse - 1)
3514 coff = fold_build2_loc (loc, MULT_EXPR, fd->iter_type, off,
3515 counts[i]);
3516 continue;
3519 off = unshare_expr (off);
3520 t = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, t, off);
3521 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3522 true, GSI_SAME_STMT);
3523 args.safe_push (t);
3525 gimple *g = gimple_build_call_vec (builtin_decl_explicit (sink_ix), args);
3526 gimple_set_location (g, loc);
3527 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
3529 cond = unshare_expr (cond);
3530 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, false,
3531 GSI_CONTINUE_LINKING);
3532 gsi_insert_after (gsi, gimple_build_cond_empty (cond), GSI_NEW_STMT);
3533 edge e3 = make_edge (e1->src, e2->dest, EDGE_FALSE_VALUE);
3534 e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
3535 e1->probability = e3->probability.invert ();
3536 e1->flags = EDGE_TRUE_VALUE;
3537 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e1->src);
3539 *gsi = gsi_after_labels (e2->dest);
3542 /* Expand all #pragma omp ordered depend(source) and
3543 #pragma omp ordered depend(sink:...) constructs in the current
3544 #pragma omp for ordered(n) region. */
3546 static void
3547 expand_omp_ordered_source_sink (struct omp_region *region,
3548 struct omp_for_data *fd, tree *counts,
3549 basic_block cont_bb)
3551 struct omp_region *inner;
3552 int i;
3553 for (i = fd->collapse - 1; i < fd->ordered; i++)
3554 if (i == fd->collapse - 1 && fd->collapse > 1)
3555 counts[i] = NULL_TREE;
3556 else if (i >= fd->collapse && !cont_bb)
3557 counts[i] = build_zero_cst (fd->iter_type);
3558 else if (!POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))
3559 && integer_onep (fd->loops[i].step))
3560 counts[i] = NULL_TREE;
3561 else
3562 counts[i] = create_tmp_var (fd->iter_type, ".orditer");
3563 tree atype
3564 = build_array_type_nelts (fd->iter_type, fd->ordered - fd->collapse + 1);
3565 counts[fd->ordered] = create_tmp_var (atype, ".orditera");
3566 TREE_ADDRESSABLE (counts[fd->ordered]) = 1;
3568 for (inner = region->inner; inner; inner = inner->next)
3569 if (inner->type == GIMPLE_OMP_ORDERED)
3571 gomp_ordered *ord_stmt = inner->ord_stmt;
3572 gimple_stmt_iterator gsi = gsi_for_stmt (ord_stmt);
3573 location_t loc = gimple_location (ord_stmt);
3574 tree c;
3575 for (c = gimple_omp_ordered_clauses (ord_stmt);
3576 c; c = OMP_CLAUSE_CHAIN (c))
3577 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SOURCE)
3578 break;
3579 if (c)
3580 expand_omp_ordered_source (&gsi, fd, counts, loc);
3581 for (c = gimple_omp_ordered_clauses (ord_stmt);
3582 c; c = OMP_CLAUSE_CHAIN (c))
3583 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK)
3584 expand_omp_ordered_sink (&gsi, fd, counts, c, loc);
3585 gsi_remove (&gsi, true);
3589 /* Wrap the body into fd->ordered - fd->collapse loops that aren't
3590 collapsed. */
3592 static basic_block
3593 expand_omp_for_ordered_loops (struct omp_for_data *fd, tree *counts,
3594 basic_block cont_bb, basic_block body_bb,
3595 bool ordered_lastprivate)
3597 if (fd->ordered == fd->collapse)
3598 return cont_bb;
3600 if (!cont_bb)
3602 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
3603 for (int i = fd->collapse; i < fd->ordered; i++)
3605 tree type = TREE_TYPE (fd->loops[i].v);
3606 tree n1 = fold_convert (type, fd->loops[i].n1);
3607 expand_omp_build_assign (&gsi, fd->loops[i].v, n1);
3608 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3609 size_int (i - fd->collapse + 1),
3610 NULL_TREE, NULL_TREE);
3611 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
3613 return NULL;
3616 for (int i = fd->ordered - 1; i >= fd->collapse; i--)
3618 tree t, type = TREE_TYPE (fd->loops[i].v);
3619 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
3620 expand_omp_build_assign (&gsi, fd->loops[i].v,
3621 fold_convert (type, fd->loops[i].n1));
3622 if (counts[i])
3623 expand_omp_build_assign (&gsi, counts[i],
3624 build_zero_cst (fd->iter_type));
3625 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3626 size_int (i - fd->collapse + 1),
3627 NULL_TREE, NULL_TREE);
3628 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
3629 if (!gsi_end_p (gsi))
3630 gsi_prev (&gsi);
3631 else
3632 gsi = gsi_last_bb (body_bb);
3633 edge e1 = split_block (body_bb, gsi_stmt (gsi));
3634 basic_block new_body = e1->dest;
3635 if (body_bb == cont_bb)
3636 cont_bb = new_body;
3637 edge e2 = NULL;
3638 basic_block new_header;
3639 if (EDGE_COUNT (cont_bb->preds) > 0)
3641 gsi = gsi_last_bb (cont_bb);
3642 if (POINTER_TYPE_P (type))
3643 t = fold_build_pointer_plus (fd->loops[i].v,
3644 fold_convert (sizetype,
3645 fd->loops[i].step));
3646 else
3647 t = fold_build2 (PLUS_EXPR, type, fd->loops[i].v,
3648 fold_convert (type, fd->loops[i].step));
3649 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
3650 if (counts[i])
3652 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[i],
3653 build_int_cst (fd->iter_type, 1));
3654 expand_omp_build_assign (&gsi, counts[i], t);
3655 t = counts[i];
3657 else
3659 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
3660 fd->loops[i].v, fd->loops[i].n1);
3661 t = fold_convert (fd->iter_type, t);
3662 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3663 true, GSI_SAME_STMT);
3665 aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3666 size_int (i - fd->collapse + 1),
3667 NULL_TREE, NULL_TREE);
3668 expand_omp_build_assign (&gsi, aref, t);
3669 gsi_prev (&gsi);
3670 e2 = split_block (cont_bb, gsi_stmt (gsi));
3671 new_header = e2->dest;
3673 else
3674 new_header = cont_bb;
3675 gsi = gsi_after_labels (new_header);
3676 tree v = force_gimple_operand_gsi (&gsi, fd->loops[i].v, true, NULL_TREE,
3677 true, GSI_SAME_STMT);
3678 tree n2
3679 = force_gimple_operand_gsi (&gsi, fold_convert (type, fd->loops[i].n2),
3680 true, NULL_TREE, true, GSI_SAME_STMT);
3681 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, n2);
3682 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_NEW_STMT);
3683 edge e3 = split_block (new_header, gsi_stmt (gsi));
3684 cont_bb = e3->dest;
3685 remove_edge (e1);
3686 make_edge (body_bb, new_header, EDGE_FALLTHRU);
3687 e3->flags = EDGE_FALSE_VALUE;
3688 e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
3689 e1 = make_edge (new_header, new_body, EDGE_TRUE_VALUE);
3690 e1->probability = e3->probability.invert ();
3692 set_immediate_dominator (CDI_DOMINATORS, new_header, body_bb);
3693 set_immediate_dominator (CDI_DOMINATORS, new_body, new_header);
3695 if (e2)
3697 class loop *loop = alloc_loop ();
3698 loop->header = new_header;
3699 loop->latch = e2->src;
3700 add_loop (loop, body_bb->loop_father);
3704 /* If there are any lastprivate clauses and it is possible some loops
3705 might have zero iterations, ensure all the decls are initialized,
3706 otherwise we could crash evaluating C++ class iterators with lastprivate
3707 clauses. */
3708 bool need_inits = false;
3709 for (int i = fd->collapse; ordered_lastprivate && i < fd->ordered; i++)
3710 if (need_inits)
3712 tree type = TREE_TYPE (fd->loops[i].v);
3713 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
3714 expand_omp_build_assign (&gsi, fd->loops[i].v,
3715 fold_convert (type, fd->loops[i].n1));
3717 else
3719 tree type = TREE_TYPE (fd->loops[i].v);
3720 tree this_cond = fold_build2 (fd->loops[i].cond_code,
3721 boolean_type_node,
3722 fold_convert (type, fd->loops[i].n1),
3723 fold_convert (type, fd->loops[i].n2));
3724 if (!integer_onep (this_cond))
3725 need_inits = true;
3728 return cont_bb;
3731 /* A subroutine of expand_omp_for. Generate code for a parallel
3732 loop with any schedule. Given parameters:
3734 for (V = N1; V cond N2; V += STEP) BODY;
3736 where COND is "<" or ">", we generate pseudocode
3738 more = GOMP_loop_foo_start (N1, N2, STEP, CHUNK, &istart0, &iend0);
3739 if (more) goto L0; else goto L3;
3741 V = istart0;
3742 iend = iend0;
3744 BODY;
3745 V += STEP;
3746 if (V cond iend) goto L1; else goto L2;
3748 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
3751 If this is a combined omp parallel loop, instead of the call to
3752 GOMP_loop_foo_start, we call GOMP_loop_foo_next.
3753 If this is gimple_omp_for_combined_p loop, then instead of assigning
3754 V and iend in L0 we assign the first two _looptemp_ clause decls of the
3755 inner GIMPLE_OMP_FOR and V += STEP; and
3756 if (V cond iend) goto L1; else goto L2; are removed.
3758 For collapsed loops, given parameters:
3759 collapse(3)
3760 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
3761 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
3762 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
3763 BODY;
3765 we generate pseudocode
3767 if (__builtin_expect (N32 cond3 N31, 0)) goto Z0;
3768 if (cond3 is <)
3769 adj = STEP3 - 1;
3770 else
3771 adj = STEP3 + 1;
3772 count3 = (adj + N32 - N31) / STEP3;
3773 if (__builtin_expect (N22 cond2 N21, 0)) goto Z0;
3774 if (cond2 is <)
3775 adj = STEP2 - 1;
3776 else
3777 adj = STEP2 + 1;
3778 count2 = (adj + N22 - N21) / STEP2;
3779 if (__builtin_expect (N12 cond1 N11, 0)) goto Z0;
3780 if (cond1 is <)
3781 adj = STEP1 - 1;
3782 else
3783 adj = STEP1 + 1;
3784 count1 = (adj + N12 - N11) / STEP1;
3785 count = count1 * count2 * count3;
3786 goto Z1;
3788 count = 0;
3790 more = GOMP_loop_foo_start (0, count, 1, CHUNK, &istart0, &iend0);
3791 if (more) goto L0; else goto L3;
3793 V = istart0;
3794 T = V;
3795 V3 = N31 + (T % count3) * STEP3;
3796 T = T / count3;
3797 V2 = N21 + (T % count2) * STEP2;
3798 T = T / count2;
3799 V1 = N11 + T * STEP1;
3800 iend = iend0;
3802 BODY;
3803 V += 1;
3804 if (V < iend) goto L10; else goto L2;
3805 L10:
3806 V3 += STEP3;
3807 if (V3 cond3 N32) goto L1; else goto L11;
3808 L11:
3809 V3 = N31;
3810 V2 += STEP2;
3811 if (V2 cond2 N22) goto L1; else goto L12;
3812 L12:
3813 V2 = N21;
3814 V1 += STEP1;
3815 goto L1;
3817 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
3822 static void
3823 expand_omp_for_generic (struct omp_region *region,
3824 struct omp_for_data *fd,
3825 enum built_in_function start_fn,
3826 enum built_in_function next_fn,
3827 tree sched_arg,
3828 gimple *inner_stmt)
3830 tree type, istart0, iend0, iend;
3831 tree t, vmain, vback, bias = NULL_TREE;
3832 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, collapse_bb;
3833 basic_block l2_bb = NULL, l3_bb = NULL;
3834 gimple_stmt_iterator gsi;
3835 gassign *assign_stmt;
3836 bool in_combined_parallel = is_combined_parallel (region);
3837 bool broken_loop = region->cont == NULL;
3838 edge e, ne;
3839 tree *counts = NULL;
3840 int i;
3841 bool ordered_lastprivate = false;
3843 gcc_assert (!broken_loop || !in_combined_parallel);
3844 gcc_assert (fd->iter_type == long_integer_type_node
3845 || !in_combined_parallel);
3847 entry_bb = region->entry;
3848 cont_bb = region->cont;
3849 collapse_bb = NULL;
3850 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
3851 gcc_assert (broken_loop
3852 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
3853 l0_bb = split_edge (FALLTHRU_EDGE (entry_bb));
3854 l1_bb = single_succ (l0_bb);
3855 if (!broken_loop)
3857 l2_bb = create_empty_bb (cont_bb);
3858 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l1_bb
3859 || (single_succ_edge (BRANCH_EDGE (cont_bb)->dest)->dest
3860 == l1_bb));
3861 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
3863 else
3864 l2_bb = NULL;
3865 l3_bb = BRANCH_EDGE (entry_bb)->dest;
3866 exit_bb = region->exit;
3868 gsi = gsi_last_nondebug_bb (entry_bb);
3870 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3871 if (fd->ordered
3872 && omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3873 OMP_CLAUSE_LASTPRIVATE))
3874 ordered_lastprivate = false;
3875 tree reductions = NULL_TREE;
3876 tree mem = NULL_TREE, cond_var = NULL_TREE, condtemp = NULL_TREE;
3877 tree memv = NULL_TREE;
3878 if (fd->lastprivate_conditional)
3880 tree c = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3881 OMP_CLAUSE__CONDTEMP_);
3882 if (fd->have_pointer_condtemp)
3883 condtemp = OMP_CLAUSE_DECL (c);
3884 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
3885 cond_var = OMP_CLAUSE_DECL (c);
3887 if (sched_arg)
3889 if (fd->have_reductemp)
3891 tree c = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3892 OMP_CLAUSE__REDUCTEMP_);
3893 reductions = OMP_CLAUSE_DECL (c);
3894 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
3895 gimple *g = SSA_NAME_DEF_STMT (reductions);
3896 reductions = gimple_assign_rhs1 (g);
3897 OMP_CLAUSE_DECL (c) = reductions;
3898 entry_bb = gimple_bb (g);
3899 edge e = split_block (entry_bb, g);
3900 if (region->entry == entry_bb)
3901 region->entry = e->dest;
3902 gsi = gsi_last_bb (entry_bb);
3904 else
3905 reductions = null_pointer_node;
3906 if (fd->have_pointer_condtemp)
3908 tree type = TREE_TYPE (condtemp);
3909 memv = create_tmp_var (type);
3910 TREE_ADDRESSABLE (memv) = 1;
3911 unsigned HOST_WIDE_INT sz
3912 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
3913 sz *= fd->lastprivate_conditional;
3914 expand_omp_build_assign (&gsi, memv, build_int_cst (type, sz),
3915 false);
3916 mem = build_fold_addr_expr (memv);
3918 else
3919 mem = null_pointer_node;
3921 if (fd->collapse > 1 || fd->ordered)
3923 int first_zero_iter1 = -1, first_zero_iter2 = -1;
3924 basic_block zero_iter1_bb = NULL, zero_iter2_bb = NULL, l2_dom_bb = NULL;
3926 counts = XALLOCAVEC (tree, fd->ordered ? fd->ordered + 1 : fd->collapse);
3927 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
3928 zero_iter1_bb, first_zero_iter1,
3929 zero_iter2_bb, first_zero_iter2, l2_dom_bb);
3931 if (zero_iter1_bb)
3933 /* Some counts[i] vars might be uninitialized if
3934 some loop has zero iterations. But the body shouldn't
3935 be executed in that case, so just avoid uninit warnings. */
3936 for (i = first_zero_iter1;
3937 i < (fd->ordered ? fd->ordered : fd->collapse); i++)
3938 if (SSA_VAR_P (counts[i]))
3939 suppress_warning (counts[i], OPT_Wuninitialized);
3940 gsi_prev (&gsi);
3941 e = split_block (entry_bb, gsi_stmt (gsi));
3942 entry_bb = e->dest;
3943 make_edge (zero_iter1_bb, entry_bb, EDGE_FALLTHRU);
3944 gsi = gsi_last_nondebug_bb (entry_bb);
3945 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
3946 get_immediate_dominator (CDI_DOMINATORS,
3947 zero_iter1_bb));
3949 if (zero_iter2_bb)
3951 /* Some counts[i] vars might be uninitialized if
3952 some loop has zero iterations. But the body shouldn't
3953 be executed in that case, so just avoid uninit warnings. */
3954 for (i = first_zero_iter2; i < fd->ordered; i++)
3955 if (SSA_VAR_P (counts[i]))
3956 suppress_warning (counts[i], OPT_Wuninitialized);
3957 if (zero_iter1_bb)
3958 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
3959 else
3961 gsi_prev (&gsi);
3962 e = split_block (entry_bb, gsi_stmt (gsi));
3963 entry_bb = e->dest;
3964 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
3965 gsi = gsi_last_nondebug_bb (entry_bb);
3966 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
3967 get_immediate_dominator
3968 (CDI_DOMINATORS, zero_iter2_bb));
3971 if (fd->collapse == 1)
3973 counts[0] = fd->loop.n2;
3974 fd->loop = fd->loops[0];
3978 type = TREE_TYPE (fd->loop.v);
3979 istart0 = create_tmp_var (fd->iter_type, ".istart0");
3980 iend0 = create_tmp_var (fd->iter_type, ".iend0");
3981 TREE_ADDRESSABLE (istart0) = 1;
3982 TREE_ADDRESSABLE (iend0) = 1;
3984 /* See if we need to bias by LLONG_MIN. */
3985 if (fd->iter_type == long_long_unsigned_type_node
3986 && TREE_CODE (type) == INTEGER_TYPE
3987 && !TYPE_UNSIGNED (type)
3988 && fd->ordered == 0)
3990 tree n1, n2;
3992 if (fd->loop.cond_code == LT_EXPR)
3994 n1 = fd->loop.n1;
3995 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
3997 else
3999 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
4000 n2 = fd->loop.n1;
4002 if (TREE_CODE (n1) != INTEGER_CST
4003 || TREE_CODE (n2) != INTEGER_CST
4004 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
4005 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
4008 gimple_stmt_iterator gsif = gsi;
4009 gsi_prev (&gsif);
4011 tree arr = NULL_TREE;
4012 if (in_combined_parallel)
4014 gcc_assert (fd->ordered == 0);
4015 /* In a combined parallel loop, emit a call to
4016 GOMP_loop_foo_next. */
4017 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
4018 build_fold_addr_expr (istart0),
4019 build_fold_addr_expr (iend0));
4021 else
4023 tree t0, t1, t2, t3, t4;
4024 /* If this is not a combined parallel loop, emit a call to
4025 GOMP_loop_foo_start in ENTRY_BB. */
4026 t4 = build_fold_addr_expr (iend0);
4027 t3 = build_fold_addr_expr (istart0);
4028 if (fd->ordered)
4030 t0 = build_int_cst (unsigned_type_node,
4031 fd->ordered - fd->collapse + 1);
4032 arr = create_tmp_var (build_array_type_nelts (fd->iter_type,
4033 fd->ordered
4034 - fd->collapse + 1),
4035 ".omp_counts");
4036 DECL_NAMELESS (arr) = 1;
4037 TREE_ADDRESSABLE (arr) = 1;
4038 TREE_STATIC (arr) = 1;
4039 vec<constructor_elt, va_gc> *v;
4040 vec_alloc (v, fd->ordered - fd->collapse + 1);
4041 int idx;
4043 for (idx = 0; idx < fd->ordered - fd->collapse + 1; idx++)
4045 tree c;
4046 if (idx == 0 && fd->collapse > 1)
4047 c = fd->loop.n2;
4048 else
4049 c = counts[idx + fd->collapse - 1];
4050 tree purpose = size_int (idx);
4051 CONSTRUCTOR_APPEND_ELT (v, purpose, c);
4052 if (TREE_CODE (c) != INTEGER_CST)
4053 TREE_STATIC (arr) = 0;
4056 DECL_INITIAL (arr) = build_constructor (TREE_TYPE (arr), v);
4057 if (!TREE_STATIC (arr))
4058 force_gimple_operand_gsi (&gsi, build1 (DECL_EXPR,
4059 void_type_node, arr),
4060 true, NULL_TREE, true, GSI_SAME_STMT);
4061 t1 = build_fold_addr_expr (arr);
4062 t2 = NULL_TREE;
4064 else
4066 t2 = fold_convert (fd->iter_type, fd->loop.step);
4067 t1 = fd->loop.n2;
4068 t0 = fd->loop.n1;
4069 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4071 tree innerc
4072 = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4073 OMP_CLAUSE__LOOPTEMP_);
4074 gcc_assert (innerc);
4075 t0 = OMP_CLAUSE_DECL (innerc);
4076 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4077 OMP_CLAUSE__LOOPTEMP_);
4078 gcc_assert (innerc);
4079 t1 = OMP_CLAUSE_DECL (innerc);
4081 if (POINTER_TYPE_P (TREE_TYPE (t0))
4082 && TYPE_PRECISION (TREE_TYPE (t0))
4083 != TYPE_PRECISION (fd->iter_type))
4085 /* Avoid casting pointers to integer of a different size. */
4086 tree itype = signed_type_for (type);
4087 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
4088 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
4090 else
4092 t1 = fold_convert (fd->iter_type, t1);
4093 t0 = fold_convert (fd->iter_type, t0);
4095 if (bias)
4097 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
4098 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
4101 if (fd->iter_type == long_integer_type_node || fd->ordered)
4103 if (fd->chunk_size)
4105 t = fold_convert (fd->iter_type, fd->chunk_size);
4106 t = omp_adjust_chunk_size (t, fd->simd_schedule);
4107 if (sched_arg)
4109 if (fd->ordered)
4110 t = build_call_expr (builtin_decl_explicit (start_fn),
4111 8, t0, t1, sched_arg, t, t3, t4,
4112 reductions, mem);
4113 else
4114 t = build_call_expr (builtin_decl_explicit (start_fn),
4115 9, t0, t1, t2, sched_arg, t, t3, t4,
4116 reductions, mem);
4118 else if (fd->ordered)
4119 t = build_call_expr (builtin_decl_explicit (start_fn),
4120 5, t0, t1, t, t3, t4);
4121 else
4122 t = build_call_expr (builtin_decl_explicit (start_fn),
4123 6, t0, t1, t2, t, t3, t4);
4125 else if (fd->ordered)
4126 t = build_call_expr (builtin_decl_explicit (start_fn),
4127 4, t0, t1, t3, t4);
4128 else
4129 t = build_call_expr (builtin_decl_explicit (start_fn),
4130 5, t0, t1, t2, t3, t4);
4132 else
4134 tree t5;
4135 tree c_bool_type;
4136 tree bfn_decl;
4138 /* The GOMP_loop_ull_*start functions have additional boolean
4139 argument, true for < loops and false for > loops.
4140 In Fortran, the C bool type can be different from
4141 boolean_type_node. */
4142 bfn_decl = builtin_decl_explicit (start_fn);
4143 c_bool_type = TREE_TYPE (TREE_TYPE (bfn_decl));
4144 t5 = build_int_cst (c_bool_type,
4145 fd->loop.cond_code == LT_EXPR ? 1 : 0);
4146 if (fd->chunk_size)
4148 tree bfn_decl = builtin_decl_explicit (start_fn);
4149 t = fold_convert (fd->iter_type, fd->chunk_size);
4150 t = omp_adjust_chunk_size (t, fd->simd_schedule);
4151 if (sched_arg)
4152 t = build_call_expr (bfn_decl, 10, t5, t0, t1, t2, sched_arg,
4153 t, t3, t4, reductions, mem);
4154 else
4155 t = build_call_expr (bfn_decl, 7, t5, t0, t1, t2, t, t3, t4);
4157 else
4158 t = build_call_expr (builtin_decl_explicit (start_fn),
4159 6, t5, t0, t1, t2, t3, t4);
4162 if (TREE_TYPE (t) != boolean_type_node)
4163 t = fold_build2 (NE_EXPR, boolean_type_node,
4164 t, build_int_cst (TREE_TYPE (t), 0));
4165 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4166 true, GSI_SAME_STMT);
4167 if (arr && !TREE_STATIC (arr))
4169 tree clobber = build_clobber (TREE_TYPE (arr));
4170 gsi_insert_before (&gsi, gimple_build_assign (arr, clobber),
4171 GSI_SAME_STMT);
4173 if (fd->have_pointer_condtemp)
4174 expand_omp_build_assign (&gsi, condtemp, memv, false);
4175 if (fd->have_reductemp)
4177 gimple *g = gsi_stmt (gsi);
4178 gsi_remove (&gsi, true);
4179 release_ssa_name (gimple_assign_lhs (g));
4181 entry_bb = region->entry;
4182 gsi = gsi_last_nondebug_bb (entry_bb);
4184 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4186 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
4188 /* Remove the GIMPLE_OMP_FOR statement. */
4189 gsi_remove (&gsi, true);
4191 if (gsi_end_p (gsif))
4192 gsif = gsi_after_labels (gsi_bb (gsif));
4193 gsi_next (&gsif);
4195 /* Iteration setup for sequential loop goes in L0_BB. */
4196 tree startvar = fd->loop.v;
4197 tree endvar = NULL_TREE;
4199 if (gimple_omp_for_combined_p (fd->for_stmt))
4201 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_FOR
4202 && gimple_omp_for_kind (inner_stmt)
4203 == GF_OMP_FOR_KIND_SIMD);
4204 tree innerc = omp_find_clause (gimple_omp_for_clauses (inner_stmt),
4205 OMP_CLAUSE__LOOPTEMP_);
4206 gcc_assert (innerc);
4207 startvar = OMP_CLAUSE_DECL (innerc);
4208 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4209 OMP_CLAUSE__LOOPTEMP_);
4210 gcc_assert (innerc);
4211 endvar = OMP_CLAUSE_DECL (innerc);
4214 gsi = gsi_start_bb (l0_bb);
4215 t = istart0;
4216 if (fd->ordered && fd->collapse == 1)
4217 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
4218 fold_convert (fd->iter_type, fd->loop.step));
4219 else if (bias)
4220 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
4221 if (fd->ordered && fd->collapse == 1)
4223 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4224 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
4225 fd->loop.n1, fold_convert (sizetype, t));
4226 else
4228 t = fold_convert (TREE_TYPE (startvar), t);
4229 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
4230 fd->loop.n1, t);
4233 else
4235 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4236 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
4237 t = fold_convert (TREE_TYPE (startvar), t);
4239 t = force_gimple_operand_gsi (&gsi, t,
4240 DECL_P (startvar)
4241 && TREE_ADDRESSABLE (startvar),
4242 NULL_TREE, false, GSI_CONTINUE_LINKING);
4243 assign_stmt = gimple_build_assign (startvar, t);
4244 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4245 if (cond_var)
4247 tree itype = TREE_TYPE (cond_var);
4248 /* For lastprivate(conditional:) itervar, we need some iteration
4249 counter that starts at unsigned non-zero and increases.
4250 Prefer as few IVs as possible, so if we can use startvar
4251 itself, use that, or startvar + constant (those would be
4252 incremented with step), and as last resort use the s0 + 1
4253 incremented by 1. */
4254 if ((fd->ordered && fd->collapse == 1)
4255 || bias
4256 || POINTER_TYPE_P (type)
4257 || TREE_CODE (fd->loop.n1) != INTEGER_CST
4258 || fd->loop.cond_code != LT_EXPR)
4259 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, istart0),
4260 build_int_cst (itype, 1));
4261 else if (tree_int_cst_sgn (fd->loop.n1) == 1)
4262 t = fold_convert (itype, t);
4263 else
4265 tree c = fold_convert (itype, fd->loop.n1);
4266 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
4267 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
4269 t = force_gimple_operand_gsi (&gsi, t, false,
4270 NULL_TREE, false, GSI_CONTINUE_LINKING);
4271 assign_stmt = gimple_build_assign (cond_var, t);
4272 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4275 t = iend0;
4276 if (fd->ordered && fd->collapse == 1)
4277 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
4278 fold_convert (fd->iter_type, fd->loop.step));
4279 else if (bias)
4280 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
4281 if (fd->ordered && fd->collapse == 1)
4283 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4284 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
4285 fd->loop.n1, fold_convert (sizetype, t));
4286 else
4288 t = fold_convert (TREE_TYPE (startvar), t);
4289 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
4290 fd->loop.n1, t);
4293 else
4295 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4296 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
4297 t = fold_convert (TREE_TYPE (startvar), t);
4299 iend = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4300 false, GSI_CONTINUE_LINKING);
4301 if (endvar)
4303 assign_stmt = gimple_build_assign (endvar, iend);
4304 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4305 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (iend)))
4306 assign_stmt = gimple_build_assign (fd->loop.v, iend);
4307 else
4308 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, iend);
4309 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4311 /* Handle linear clause adjustments. */
4312 tree itercnt = NULL_TREE;
4313 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
4314 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
4315 c; c = OMP_CLAUSE_CHAIN (c))
4316 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
4317 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
4319 tree d = OMP_CLAUSE_DECL (c);
4320 tree t = d, a, dest;
4321 if (omp_privatize_by_reference (t))
4322 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
4323 tree type = TREE_TYPE (t);
4324 if (POINTER_TYPE_P (type))
4325 type = sizetype;
4326 dest = unshare_expr (t);
4327 tree v = create_tmp_var (TREE_TYPE (t), NULL);
4328 expand_omp_build_assign (&gsif, v, t);
4329 if (itercnt == NULL_TREE)
4331 itercnt = startvar;
4332 tree n1 = fd->loop.n1;
4333 if (POINTER_TYPE_P (TREE_TYPE (itercnt)))
4335 itercnt
4336 = fold_convert (signed_type_for (TREE_TYPE (itercnt)),
4337 itercnt);
4338 n1 = fold_convert (TREE_TYPE (itercnt), n1);
4340 itercnt = fold_build2 (MINUS_EXPR, TREE_TYPE (itercnt),
4341 itercnt, n1);
4342 itercnt = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (itercnt),
4343 itercnt, fd->loop.step);
4344 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
4345 NULL_TREE, false,
4346 GSI_CONTINUE_LINKING);
4348 a = fold_build2 (MULT_EXPR, type,
4349 fold_convert (type, itercnt),
4350 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
4351 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
4352 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
4353 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4354 false, GSI_CONTINUE_LINKING);
4355 expand_omp_build_assign (&gsi, dest, t, true);
4357 if (fd->collapse > 1)
4358 expand_omp_for_init_vars (fd, &gsi, counts, NULL, inner_stmt, startvar);
4360 if (fd->ordered)
4362 /* Until now, counts array contained number of iterations or
4363 variable containing it for ith loop. From now on, we need
4364 those counts only for collapsed loops, and only for the 2nd
4365 till the last collapsed one. Move those one element earlier,
4366 we'll use counts[fd->collapse - 1] for the first source/sink
4367 iteration counter and so on and counts[fd->ordered]
4368 as the array holding the current counter values for
4369 depend(source). */
4370 if (fd->collapse > 1)
4371 memmove (counts, counts + 1, (fd->collapse - 1) * sizeof (counts[0]));
4372 if (broken_loop)
4374 int i;
4375 for (i = fd->collapse; i < fd->ordered; i++)
4377 tree type = TREE_TYPE (fd->loops[i].v);
4378 tree this_cond
4379 = fold_build2 (fd->loops[i].cond_code, boolean_type_node,
4380 fold_convert (type, fd->loops[i].n1),
4381 fold_convert (type, fd->loops[i].n2));
4382 if (!integer_onep (this_cond))
4383 break;
4385 if (i < fd->ordered)
4387 cont_bb
4388 = create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb);
4389 add_bb_to_loop (cont_bb, l1_bb->loop_father);
4390 gimple_stmt_iterator gsi = gsi_after_labels (cont_bb);
4391 gimple *g = gimple_build_omp_continue (fd->loop.v, fd->loop.v);
4392 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4393 make_edge (cont_bb, l3_bb, EDGE_FALLTHRU);
4394 make_edge (cont_bb, l1_bb, 0);
4395 l2_bb = create_empty_bb (cont_bb);
4396 broken_loop = false;
4399 expand_omp_ordered_source_sink (region, fd, counts, cont_bb);
4400 cont_bb = expand_omp_for_ordered_loops (fd, counts, cont_bb, l1_bb,
4401 ordered_lastprivate);
4402 if (counts[fd->collapse - 1])
4404 gcc_assert (fd->collapse == 1);
4405 gsi = gsi_last_bb (l0_bb);
4406 expand_omp_build_assign (&gsi, counts[fd->collapse - 1],
4407 istart0, true);
4408 if (cont_bb)
4410 gsi = gsi_last_bb (cont_bb);
4411 t = fold_build2 (PLUS_EXPR, fd->iter_type,
4412 counts[fd->collapse - 1],
4413 build_int_cst (fd->iter_type, 1));
4414 expand_omp_build_assign (&gsi, counts[fd->collapse - 1], t);
4415 tree aref = build4 (ARRAY_REF, fd->iter_type,
4416 counts[fd->ordered], size_zero_node,
4417 NULL_TREE, NULL_TREE);
4418 expand_omp_build_assign (&gsi, aref, counts[fd->collapse - 1]);
4420 t = counts[fd->collapse - 1];
4422 else if (fd->collapse > 1)
4423 t = fd->loop.v;
4424 else
4426 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
4427 fd->loops[0].v, fd->loops[0].n1);
4428 t = fold_convert (fd->iter_type, t);
4430 gsi = gsi_last_bb (l0_bb);
4431 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
4432 size_zero_node, NULL_TREE, NULL_TREE);
4433 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4434 false, GSI_CONTINUE_LINKING);
4435 expand_omp_build_assign (&gsi, aref, t, true);
4438 if (!broken_loop)
4440 /* Code to control the increment and predicate for the sequential
4441 loop goes in the CONT_BB. */
4442 gsi = gsi_last_nondebug_bb (cont_bb);
4443 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
4444 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
4445 vmain = gimple_omp_continue_control_use (cont_stmt);
4446 vback = gimple_omp_continue_control_def (cont_stmt);
4448 if (cond_var)
4450 tree itype = TREE_TYPE (cond_var);
4451 tree t2;
4452 if ((fd->ordered && fd->collapse == 1)
4453 || bias
4454 || POINTER_TYPE_P (type)
4455 || TREE_CODE (fd->loop.n1) != INTEGER_CST
4456 || fd->loop.cond_code != LT_EXPR)
4457 t2 = build_int_cst (itype, 1);
4458 else
4459 t2 = fold_convert (itype, fd->loop.step);
4460 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
4461 t2 = force_gimple_operand_gsi (&gsi, t2, false,
4462 NULL_TREE, true, GSI_SAME_STMT);
4463 assign_stmt = gimple_build_assign (cond_var, t2);
4464 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4467 if (!gimple_omp_for_combined_p (fd->for_stmt))
4469 if (POINTER_TYPE_P (type))
4470 t = fold_build_pointer_plus (vmain, fd->loop.step);
4471 else
4472 t = fold_build2 (PLUS_EXPR, type, vmain, fd->loop.step);
4473 t = force_gimple_operand_gsi (&gsi, t,
4474 DECL_P (vback)
4475 && TREE_ADDRESSABLE (vback),
4476 NULL_TREE, true, GSI_SAME_STMT);
4477 assign_stmt = gimple_build_assign (vback, t);
4478 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4480 if (fd->ordered && counts[fd->collapse - 1] == NULL_TREE)
4482 tree tem;
4483 if (fd->collapse > 1)
4484 tem = fd->loop.v;
4485 else
4487 tem = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
4488 fd->loops[0].v, fd->loops[0].n1);
4489 tem = fold_convert (fd->iter_type, tem);
4491 tree aref = build4 (ARRAY_REF, fd->iter_type,
4492 counts[fd->ordered], size_zero_node,
4493 NULL_TREE, NULL_TREE);
4494 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE,
4495 true, GSI_SAME_STMT);
4496 expand_omp_build_assign (&gsi, aref, tem);
4499 t = build2 (fd->loop.cond_code, boolean_type_node,
4500 DECL_P (vback) && TREE_ADDRESSABLE (vback) ? t : vback,
4501 iend);
4502 gcond *cond_stmt = gimple_build_cond_empty (t);
4503 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
4506 /* Remove GIMPLE_OMP_CONTINUE. */
4507 gsi_remove (&gsi, true);
4509 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
4510 collapse_bb = extract_omp_for_update_vars (fd, NULL, cont_bb, l1_bb);
4512 /* Emit code to get the next parallel iteration in L2_BB. */
4513 gsi = gsi_start_bb (l2_bb);
4515 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
4516 build_fold_addr_expr (istart0),
4517 build_fold_addr_expr (iend0));
4518 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4519 false, GSI_CONTINUE_LINKING);
4520 if (TREE_TYPE (t) != boolean_type_node)
4521 t = fold_build2 (NE_EXPR, boolean_type_node,
4522 t, build_int_cst (TREE_TYPE (t), 0));
4523 gcond *cond_stmt = gimple_build_cond_empty (t);
4524 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
4527 /* Add the loop cleanup function. */
4528 gsi = gsi_last_nondebug_bb (exit_bb);
4529 if (gimple_omp_return_nowait_p (gsi_stmt (gsi)))
4530 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
4531 else if (gimple_omp_return_lhs (gsi_stmt (gsi)))
4532 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
4533 else
4534 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
4535 gcall *call_stmt = gimple_build_call (t, 0);
4536 if (fd->ordered)
4538 tree arr = counts[fd->ordered];
4539 tree clobber = build_clobber (TREE_TYPE (arr));
4540 gsi_insert_after (&gsi, gimple_build_assign (arr, clobber),
4541 GSI_SAME_STMT);
4543 if (gimple_omp_return_lhs (gsi_stmt (gsi)))
4545 gimple_call_set_lhs (call_stmt, gimple_omp_return_lhs (gsi_stmt (gsi)));
4546 if (fd->have_reductemp)
4548 gimple *g = gimple_build_assign (reductions, NOP_EXPR,
4549 gimple_call_lhs (call_stmt));
4550 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
4553 gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT);
4554 gsi_remove (&gsi, true);
4556 /* Connect the new blocks. */
4557 find_edge (entry_bb, l0_bb)->flags = EDGE_TRUE_VALUE;
4558 find_edge (entry_bb, l3_bb)->flags = EDGE_FALSE_VALUE;
4560 if (!broken_loop)
4562 gimple_seq phis;
4564 e = find_edge (cont_bb, l3_bb);
4565 ne = make_edge (l2_bb, l3_bb, EDGE_FALSE_VALUE);
4567 phis = phi_nodes (l3_bb);
4568 for (gsi = gsi_start (phis); !gsi_end_p (gsi); gsi_next (&gsi))
4570 gimple *phi = gsi_stmt (gsi);
4571 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, ne),
4572 PHI_ARG_DEF_FROM_EDGE (phi, e));
4574 remove_edge (e);
4576 make_edge (cont_bb, l2_bb, EDGE_FALSE_VALUE);
4577 e = find_edge (cont_bb, l1_bb);
4578 if (e == NULL)
4580 e = BRANCH_EDGE (cont_bb);
4581 gcc_assert (single_succ (e->dest) == l1_bb);
4583 if (gimple_omp_for_combined_p (fd->for_stmt))
4585 remove_edge (e);
4586 e = NULL;
4588 else if (fd->collapse > 1)
4590 remove_edge (e);
4591 e = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
4593 else
4594 e->flags = EDGE_TRUE_VALUE;
4595 if (e)
4597 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
4598 find_edge (cont_bb, l2_bb)->probability = e->probability.invert ();
4600 else
4602 e = find_edge (cont_bb, l2_bb);
4603 e->flags = EDGE_FALLTHRU;
4605 make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE);
4607 if (gimple_in_ssa_p (cfun))
4609 /* Add phis to the outer loop that connect to the phis in the inner,
4610 original loop, and move the loop entry value of the inner phi to
4611 the loop entry value of the outer phi. */
4612 gphi_iterator psi;
4613 for (psi = gsi_start_phis (l3_bb); !gsi_end_p (psi); gsi_next (&psi))
4615 location_t locus;
4616 gphi *nphi;
4617 gphi *exit_phi = psi.phi ();
4619 if (virtual_operand_p (gimple_phi_result (exit_phi)))
4620 continue;
4622 edge l2_to_l3 = find_edge (l2_bb, l3_bb);
4623 tree exit_res = PHI_ARG_DEF_FROM_EDGE (exit_phi, l2_to_l3);
4625 basic_block latch = BRANCH_EDGE (cont_bb)->dest;
4626 edge latch_to_l1 = find_edge (latch, l1_bb);
4627 gphi *inner_phi
4628 = find_phi_with_arg_on_edge (exit_res, latch_to_l1);
4630 tree t = gimple_phi_result (exit_phi);
4631 tree new_res = copy_ssa_name (t, NULL);
4632 nphi = create_phi_node (new_res, l0_bb);
4634 edge l0_to_l1 = find_edge (l0_bb, l1_bb);
4635 t = PHI_ARG_DEF_FROM_EDGE (inner_phi, l0_to_l1);
4636 locus = gimple_phi_arg_location_from_edge (inner_phi, l0_to_l1);
4637 edge entry_to_l0 = find_edge (entry_bb, l0_bb);
4638 add_phi_arg (nphi, t, entry_to_l0, locus);
4640 edge l2_to_l0 = find_edge (l2_bb, l0_bb);
4641 add_phi_arg (nphi, exit_res, l2_to_l0, UNKNOWN_LOCATION);
4643 add_phi_arg (inner_phi, new_res, l0_to_l1, UNKNOWN_LOCATION);
4647 set_immediate_dominator (CDI_DOMINATORS, l2_bb,
4648 recompute_dominator (CDI_DOMINATORS, l2_bb));
4649 set_immediate_dominator (CDI_DOMINATORS, l3_bb,
4650 recompute_dominator (CDI_DOMINATORS, l3_bb));
4651 set_immediate_dominator (CDI_DOMINATORS, l0_bb,
4652 recompute_dominator (CDI_DOMINATORS, l0_bb));
4653 set_immediate_dominator (CDI_DOMINATORS, l1_bb,
4654 recompute_dominator (CDI_DOMINATORS, l1_bb));
4656 /* We enter expand_omp_for_generic with a loop. This original loop may
4657 have its own loop struct, or it may be part of an outer loop struct
4658 (which may be the fake loop). */
4659 class loop *outer_loop = entry_bb->loop_father;
4660 bool orig_loop_has_loop_struct = l1_bb->loop_father != outer_loop;
4662 add_bb_to_loop (l2_bb, outer_loop);
4664 /* We've added a new loop around the original loop. Allocate the
4665 corresponding loop struct. */
4666 class loop *new_loop = alloc_loop ();
4667 new_loop->header = l0_bb;
4668 new_loop->latch = l2_bb;
4669 add_loop (new_loop, outer_loop);
4671 /* Allocate a loop structure for the original loop unless we already
4672 had one. */
4673 if (!orig_loop_has_loop_struct
4674 && !gimple_omp_for_combined_p (fd->for_stmt))
4676 class loop *orig_loop = alloc_loop ();
4677 orig_loop->header = l1_bb;
4678 /* The loop may have multiple latches. */
4679 add_loop (orig_loop, new_loop);
4684 /* Helper function for expand_omp_for_static_nochunk. If PTR is NULL,
4685 compute needed allocation size. If !ALLOC of team allocations,
4686 if ALLOC of thread allocation. SZ is the initial needed size for
4687 other purposes, ALLOC_ALIGN guaranteed alignment of allocation in bytes,
4688 CNT number of elements of each array, for !ALLOC this is
4689 omp_get_num_threads (), for ALLOC number of iterations handled by the
4690 current thread. If PTR is non-NULL, it is the start of the allocation
4691 and this routine shall assign to OMP_CLAUSE_DECL (c) of those _scantemp_
4692 clauses pointers to the corresponding arrays. */
4694 static tree
4695 expand_omp_scantemp_alloc (tree clauses, tree ptr, unsigned HOST_WIDE_INT sz,
4696 unsigned HOST_WIDE_INT alloc_align, tree cnt,
4697 gimple_stmt_iterator *gsi, bool alloc)
4699 tree eltsz = NULL_TREE;
4700 unsigned HOST_WIDE_INT preval = 0;
4701 if (ptr && sz)
4702 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr),
4703 ptr, size_int (sz));
4704 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
4705 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
4706 && !OMP_CLAUSE__SCANTEMP__CONTROL (c)
4707 && (!OMP_CLAUSE__SCANTEMP__ALLOC (c)) != alloc)
4709 tree pointee_type = TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c)));
4710 unsigned HOST_WIDE_INT al = TYPE_ALIGN_UNIT (pointee_type);
4711 if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (pointee_type)))
4713 unsigned HOST_WIDE_INT szl
4714 = tree_to_uhwi (TYPE_SIZE_UNIT (pointee_type));
4715 szl = least_bit_hwi (szl);
4716 if (szl)
4717 al = MIN (al, szl);
4719 if (ptr == NULL_TREE)
4721 if (eltsz == NULL_TREE)
4722 eltsz = TYPE_SIZE_UNIT (pointee_type);
4723 else
4724 eltsz = size_binop (PLUS_EXPR, eltsz,
4725 TYPE_SIZE_UNIT (pointee_type));
4727 if (preval == 0 && al <= alloc_align)
4729 unsigned HOST_WIDE_INT diff = ROUND_UP (sz, al) - sz;
4730 sz += diff;
4731 if (diff && ptr)
4732 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr),
4733 ptr, size_int (diff));
4735 else if (al > preval)
4737 if (ptr)
4739 ptr = fold_convert (pointer_sized_int_node, ptr);
4740 ptr = fold_build2 (PLUS_EXPR, pointer_sized_int_node, ptr,
4741 build_int_cst (pointer_sized_int_node,
4742 al - 1));
4743 ptr = fold_build2 (BIT_AND_EXPR, pointer_sized_int_node, ptr,
4744 build_int_cst (pointer_sized_int_node,
4745 -(HOST_WIDE_INT) al));
4746 ptr = fold_convert (ptr_type_node, ptr);
4748 else
4749 sz += al - 1;
4751 if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (pointee_type)))
4752 preval = al;
4753 else
4754 preval = 1;
4755 if (ptr)
4757 expand_omp_build_assign (gsi, OMP_CLAUSE_DECL (c), ptr, false);
4758 ptr = OMP_CLAUSE_DECL (c);
4759 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr), ptr,
4760 size_binop (MULT_EXPR, cnt,
4761 TYPE_SIZE_UNIT (pointee_type)));
4765 if (ptr == NULL_TREE)
4767 eltsz = size_binop (MULT_EXPR, eltsz, cnt);
4768 if (sz)
4769 eltsz = size_binop (PLUS_EXPR, eltsz, size_int (sz));
4770 return eltsz;
4772 else
4773 return ptr;
4776 /* Return the last _looptemp_ clause if one has been created for
4777 lastprivate on distribute parallel for{, simd} or taskloop.
4778 FD is the loop data and INNERC should be the second _looptemp_
4779 clause (the one holding the end of the range).
4780 This is followed by collapse - 1 _looptemp_ clauses for the
4781 counts[1] and up, and for triangular loops followed by 4
4782 further _looptemp_ clauses (one for counts[0], one first_inner_iterations,
4783 one factor and one adjn1). After this there is optionally one
4784 _looptemp_ clause that this function returns. */
4786 static tree
4787 find_lastprivate_looptemp (struct omp_for_data *fd, tree innerc)
4789 gcc_assert (innerc);
4790 int count = fd->collapse - 1;
4791 if (fd->non_rect
4792 && fd->last_nonrect == fd->first_nonrect + 1
4793 && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[fd->last_nonrect].v)))
4794 count += 4;
4795 for (int i = 0; i < count; i++)
4797 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4798 OMP_CLAUSE__LOOPTEMP_);
4799 gcc_assert (innerc);
4801 return omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4802 OMP_CLAUSE__LOOPTEMP_);
4805 /* A subroutine of expand_omp_for. Generate code for a parallel
4806 loop with static schedule and no specified chunk size. Given
4807 parameters:
4809 for (V = N1; V cond N2; V += STEP) BODY;
4811 where COND is "<" or ">", we generate pseudocode
4813 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
4814 if (cond is <)
4815 adj = STEP - 1;
4816 else
4817 adj = STEP + 1;
4818 if ((__typeof (V)) -1 > 0 && cond is >)
4819 n = -(adj + N2 - N1) / -STEP;
4820 else
4821 n = (adj + N2 - N1) / STEP;
4822 q = n / nthreads;
4823 tt = n % nthreads;
4824 if (threadid < tt) goto L3; else goto L4;
4826 tt = 0;
4827 q = q + 1;
4829 s0 = q * threadid + tt;
4830 e0 = s0 + q;
4831 V = s0 * STEP + N1;
4832 if (s0 >= e0) goto L2; else goto L0;
4834 e = e0 * STEP + N1;
4836 BODY;
4837 V += STEP;
4838 if (V cond e) goto L1;
4842 static void
4843 expand_omp_for_static_nochunk (struct omp_region *region,
4844 struct omp_for_data *fd,
4845 gimple *inner_stmt)
4847 tree n, q, s0, e0, e, t, tt, nthreads = NULL_TREE, threadid;
4848 tree type, itype, vmain, vback;
4849 basic_block entry_bb, second_bb, third_bb, exit_bb, seq_start_bb;
4850 basic_block body_bb, cont_bb, collapse_bb = NULL;
4851 basic_block fin_bb, fourth_bb = NULL, fifth_bb = NULL, sixth_bb = NULL;
4852 basic_block exit1_bb = NULL, exit2_bb = NULL, exit3_bb = NULL;
4853 gimple_stmt_iterator gsi, gsip;
4854 edge ep;
4855 bool broken_loop = region->cont == NULL;
4856 tree *counts = NULL;
4857 tree n1, n2, step;
4858 tree reductions = NULL_TREE;
4859 tree cond_var = NULL_TREE, condtemp = NULL_TREE;
4861 itype = type = TREE_TYPE (fd->loop.v);
4862 if (POINTER_TYPE_P (type))
4863 itype = signed_type_for (type);
4865 entry_bb = region->entry;
4866 cont_bb = region->cont;
4867 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4868 fin_bb = BRANCH_EDGE (entry_bb)->dest;
4869 gcc_assert (broken_loop
4870 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
4871 seq_start_bb = split_edge (FALLTHRU_EDGE (entry_bb));
4872 body_bb = single_succ (seq_start_bb);
4873 if (!broken_loop)
4875 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
4876 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
4877 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4879 exit_bb = region->exit;
4881 /* Iteration space partitioning goes in ENTRY_BB. */
4882 gsi = gsi_last_nondebug_bb (entry_bb);
4883 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4884 gsip = gsi;
4885 gsi_prev (&gsip);
4887 if (fd->collapse > 1)
4889 int first_zero_iter = -1, dummy = -1;
4890 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
4892 counts = XALLOCAVEC (tree, fd->collapse);
4893 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4894 fin_bb, first_zero_iter,
4895 dummy_bb, dummy, l2_dom_bb);
4896 t = NULL_TREE;
4898 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
4899 t = integer_one_node;
4900 else
4901 t = fold_binary (fd->loop.cond_code, boolean_type_node,
4902 fold_convert (type, fd->loop.n1),
4903 fold_convert (type, fd->loop.n2));
4904 if (fd->collapse == 1
4905 && TYPE_UNSIGNED (type)
4906 && (t == NULL_TREE || !integer_onep (t)))
4908 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
4909 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
4910 true, GSI_SAME_STMT);
4911 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
4912 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
4913 true, GSI_SAME_STMT);
4914 gcond *cond_stmt = expand_omp_build_cond (&gsi, fd->loop.cond_code,
4915 n1, n2);
4916 ep = split_block (entry_bb, cond_stmt);
4917 ep->flags = EDGE_TRUE_VALUE;
4918 entry_bb = ep->dest;
4919 ep->probability = profile_probability::very_likely ();
4920 ep = make_edge (ep->src, fin_bb, EDGE_FALSE_VALUE);
4921 ep->probability = profile_probability::very_unlikely ();
4922 if (gimple_in_ssa_p (cfun))
4924 int dest_idx = find_edge (entry_bb, fin_bb)->dest_idx;
4925 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
4926 !gsi_end_p (gpi); gsi_next (&gpi))
4928 gphi *phi = gpi.phi ();
4929 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
4930 ep, UNKNOWN_LOCATION);
4933 gsi = gsi_last_bb (entry_bb);
4936 if (fd->lastprivate_conditional)
4938 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
4939 tree c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
4940 if (fd->have_pointer_condtemp)
4941 condtemp = OMP_CLAUSE_DECL (c);
4942 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
4943 cond_var = OMP_CLAUSE_DECL (c);
4945 if (fd->have_reductemp
4946 /* For scan, we don't want to reinitialize condtemp before the
4947 second loop. */
4948 || (fd->have_pointer_condtemp && !fd->have_scantemp)
4949 || fd->have_nonctrl_scantemp)
4951 tree t1 = build_int_cst (long_integer_type_node, 0);
4952 tree t2 = build_int_cst (long_integer_type_node, 1);
4953 tree t3 = build_int_cstu (long_integer_type_node,
4954 (HOST_WIDE_INT_1U << 31) + 1);
4955 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
4956 gimple_stmt_iterator gsi2 = gsi_none ();
4957 gimple *g = NULL;
4958 tree mem = null_pointer_node, memv = NULL_TREE;
4959 unsigned HOST_WIDE_INT condtemp_sz = 0;
4960 unsigned HOST_WIDE_INT alloc_align = 0;
4961 if (fd->have_reductemp)
4963 gcc_assert (!fd->have_nonctrl_scantemp);
4964 tree c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
4965 reductions = OMP_CLAUSE_DECL (c);
4966 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
4967 g = SSA_NAME_DEF_STMT (reductions);
4968 reductions = gimple_assign_rhs1 (g);
4969 OMP_CLAUSE_DECL (c) = reductions;
4970 gsi2 = gsi_for_stmt (g);
4972 else
4974 if (gsi_end_p (gsip))
4975 gsi2 = gsi_after_labels (region->entry);
4976 else
4977 gsi2 = gsip;
4978 reductions = null_pointer_node;
4980 if (fd->have_pointer_condtemp || fd->have_nonctrl_scantemp)
4982 tree type;
4983 if (fd->have_pointer_condtemp)
4984 type = TREE_TYPE (condtemp);
4985 else
4986 type = ptr_type_node;
4987 memv = create_tmp_var (type);
4988 TREE_ADDRESSABLE (memv) = 1;
4989 unsigned HOST_WIDE_INT sz = 0;
4990 tree size = NULL_TREE;
4991 if (fd->have_pointer_condtemp)
4993 sz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
4994 sz *= fd->lastprivate_conditional;
4995 condtemp_sz = sz;
4997 if (fd->have_nonctrl_scantemp)
4999 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
5000 gimple *g = gimple_build_call (nthreads, 0);
5001 nthreads = create_tmp_var (integer_type_node);
5002 gimple_call_set_lhs (g, nthreads);
5003 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
5004 nthreads = fold_convert (sizetype, nthreads);
5005 alloc_align = TYPE_ALIGN_UNIT (long_long_integer_type_node);
5006 size = expand_omp_scantemp_alloc (clauses, NULL_TREE, sz,
5007 alloc_align, nthreads, NULL,
5008 false);
5009 size = fold_convert (type, size);
5011 else
5012 size = build_int_cst (type, sz);
5013 expand_omp_build_assign (&gsi2, memv, size, false);
5014 mem = build_fold_addr_expr (memv);
5016 tree t
5017 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START),
5018 9, t1, t2, t2, t3, t1, null_pointer_node,
5019 null_pointer_node, reductions, mem);
5020 force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
5021 true, GSI_SAME_STMT);
5022 if (fd->have_pointer_condtemp)
5023 expand_omp_build_assign (&gsi2, condtemp, memv, false);
5024 if (fd->have_nonctrl_scantemp)
5026 tree ptr = fd->have_pointer_condtemp ? condtemp : memv;
5027 expand_omp_scantemp_alloc (clauses, ptr, condtemp_sz,
5028 alloc_align, nthreads, &gsi2, false);
5030 if (fd->have_reductemp)
5032 gsi_remove (&gsi2, true);
5033 release_ssa_name (gimple_assign_lhs (g));
5036 switch (gimple_omp_for_kind (fd->for_stmt))
5038 case GF_OMP_FOR_KIND_FOR:
5039 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
5040 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
5041 break;
5042 case GF_OMP_FOR_KIND_DISTRIBUTE:
5043 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
5044 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
5045 break;
5046 default:
5047 gcc_unreachable ();
5049 nthreads = build_call_expr (nthreads, 0);
5050 nthreads = fold_convert (itype, nthreads);
5051 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
5052 true, GSI_SAME_STMT);
5053 threadid = build_call_expr (threadid, 0);
5054 threadid = fold_convert (itype, threadid);
5055 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
5056 true, GSI_SAME_STMT);
5058 n1 = fd->loop.n1;
5059 n2 = fd->loop.n2;
5060 step = fd->loop.step;
5061 if (gimple_omp_for_combined_into_p (fd->for_stmt))
5063 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5064 OMP_CLAUSE__LOOPTEMP_);
5065 gcc_assert (innerc);
5066 n1 = OMP_CLAUSE_DECL (innerc);
5067 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5068 OMP_CLAUSE__LOOPTEMP_);
5069 gcc_assert (innerc);
5070 n2 = OMP_CLAUSE_DECL (innerc);
5072 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
5073 true, NULL_TREE, true, GSI_SAME_STMT);
5074 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
5075 true, NULL_TREE, true, GSI_SAME_STMT);
5076 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
5077 true, NULL_TREE, true, GSI_SAME_STMT);
5079 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
5080 t = fold_build2 (PLUS_EXPR, itype, step, t);
5081 t = fold_build2 (PLUS_EXPR, itype, t, n2);
5082 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
5083 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
5084 t = fold_build2 (TRUNC_DIV_EXPR, itype,
5085 fold_build1 (NEGATE_EXPR, itype, t),
5086 fold_build1 (NEGATE_EXPR, itype, step));
5087 else
5088 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
5089 t = fold_convert (itype, t);
5090 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
5092 q = create_tmp_reg (itype, "q");
5093 t = fold_build2 (TRUNC_DIV_EXPR, itype, n, nthreads);
5094 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
5095 gsi_insert_before (&gsi, gimple_build_assign (q, t), GSI_SAME_STMT);
5097 tt = create_tmp_reg (itype, "tt");
5098 t = fold_build2 (TRUNC_MOD_EXPR, itype, n, nthreads);
5099 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
5100 gsi_insert_before (&gsi, gimple_build_assign (tt, t), GSI_SAME_STMT);
5102 t = build2 (LT_EXPR, boolean_type_node, threadid, tt);
5103 gcond *cond_stmt = gimple_build_cond_empty (t);
5104 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
5106 second_bb = split_block (entry_bb, cond_stmt)->dest;
5107 gsi = gsi_last_nondebug_bb (second_bb);
5108 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5110 gsi_insert_before (&gsi, gimple_build_assign (tt, build_int_cst (itype, 0)),
5111 GSI_SAME_STMT);
5112 gassign *assign_stmt
5113 = gimple_build_assign (q, PLUS_EXPR, q, build_int_cst (itype, 1));
5114 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5116 third_bb = split_block (second_bb, assign_stmt)->dest;
5117 gsi = gsi_last_nondebug_bb (third_bb);
5118 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5120 if (fd->have_nonctrl_scantemp)
5122 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5123 tree controlp = NULL_TREE, controlb = NULL_TREE;
5124 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
5125 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
5126 && OMP_CLAUSE__SCANTEMP__CONTROL (c))
5128 if (TREE_TYPE (OMP_CLAUSE_DECL (c)) == boolean_type_node)
5129 controlb = OMP_CLAUSE_DECL (c);
5130 else
5131 controlp = OMP_CLAUSE_DECL (c);
5132 if (controlb && controlp)
5133 break;
5135 gcc_assert (controlp && controlb);
5136 tree cnt = create_tmp_var (sizetype);
5137 gimple *g = gimple_build_assign (cnt, NOP_EXPR, q);
5138 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5139 unsigned HOST_WIDE_INT alloc_align = TYPE_ALIGN_UNIT (ptr_type_node);
5140 tree sz = expand_omp_scantemp_alloc (clauses, NULL_TREE, 0,
5141 alloc_align, cnt, NULL, true);
5142 tree size = create_tmp_var (sizetype);
5143 expand_omp_build_assign (&gsi, size, sz, false);
5144 tree cmp = fold_build2 (GT_EXPR, boolean_type_node,
5145 size, size_int (16384));
5146 expand_omp_build_assign (&gsi, controlb, cmp);
5147 g = gimple_build_cond (NE_EXPR, controlb, boolean_false_node,
5148 NULL_TREE, NULL_TREE);
5149 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5150 fourth_bb = split_block (third_bb, g)->dest;
5151 gsi = gsi_last_nondebug_bb (fourth_bb);
5152 /* FIXME: Once we have allocators, this should use allocator. */
5153 g = gimple_build_call (builtin_decl_explicit (BUILT_IN_MALLOC), 1, size);
5154 gimple_call_set_lhs (g, controlp);
5155 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5156 expand_omp_scantemp_alloc (clauses, controlp, 0, alloc_align, cnt,
5157 &gsi, true);
5158 gsi_prev (&gsi);
5159 g = gsi_stmt (gsi);
5160 fifth_bb = split_block (fourth_bb, g)->dest;
5161 gsi = gsi_last_nondebug_bb (fifth_bb);
5163 g = gimple_build_call (builtin_decl_implicit (BUILT_IN_STACK_SAVE), 0);
5164 gimple_call_set_lhs (g, controlp);
5165 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5166 tree alloca_decl = builtin_decl_explicit (BUILT_IN_ALLOCA_WITH_ALIGN);
5167 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
5168 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
5169 && OMP_CLAUSE__SCANTEMP__ALLOC (c))
5171 tree tmp = create_tmp_var (sizetype);
5172 tree pointee_type = TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c)));
5173 g = gimple_build_assign (tmp, MULT_EXPR, cnt,
5174 TYPE_SIZE_UNIT (pointee_type));
5175 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5176 g = gimple_build_call (alloca_decl, 2, tmp,
5177 size_int (TYPE_ALIGN (pointee_type)));
5178 gimple_call_set_lhs (g, OMP_CLAUSE_DECL (c));
5179 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5182 sixth_bb = split_block (fifth_bb, g)->dest;
5183 gsi = gsi_last_nondebug_bb (sixth_bb);
5186 t = build2 (MULT_EXPR, itype, q, threadid);
5187 t = build2 (PLUS_EXPR, itype, t, tt);
5188 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
5190 t = fold_build2 (PLUS_EXPR, itype, s0, q);
5191 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
5193 t = build2 (GE_EXPR, boolean_type_node, s0, e0);
5194 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
5196 /* Remove the GIMPLE_OMP_FOR statement. */
5197 gsi_remove (&gsi, true);
5199 /* Setup code for sequential iteration goes in SEQ_START_BB. */
5200 gsi = gsi_start_bb (seq_start_bb);
5202 tree startvar = fd->loop.v;
5203 tree endvar = NULL_TREE;
5205 if (gimple_omp_for_combined_p (fd->for_stmt))
5207 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
5208 ? gimple_omp_parallel_clauses (inner_stmt)
5209 : gimple_omp_for_clauses (inner_stmt);
5210 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
5211 gcc_assert (innerc);
5212 startvar = OMP_CLAUSE_DECL (innerc);
5213 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5214 OMP_CLAUSE__LOOPTEMP_);
5215 gcc_assert (innerc);
5216 endvar = OMP_CLAUSE_DECL (innerc);
5217 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
5218 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
5220 innerc = find_lastprivate_looptemp (fd, innerc);
5221 if (innerc)
5223 /* If needed (distribute parallel for with lastprivate),
5224 propagate down the total number of iterations. */
5225 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
5226 fd->loop.n2);
5227 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
5228 GSI_CONTINUE_LINKING);
5229 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
5230 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5234 t = fold_convert (itype, s0);
5235 t = fold_build2 (MULT_EXPR, itype, t, step);
5236 if (POINTER_TYPE_P (type))
5238 t = fold_build_pointer_plus (n1, t);
5239 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
5240 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
5241 t = fold_convert (signed_type_for (type), t);
5243 else
5244 t = fold_build2 (PLUS_EXPR, type, t, n1);
5245 t = fold_convert (TREE_TYPE (startvar), t);
5246 t = force_gimple_operand_gsi (&gsi, t,
5247 DECL_P (startvar)
5248 && TREE_ADDRESSABLE (startvar),
5249 NULL_TREE, false, GSI_CONTINUE_LINKING);
5250 assign_stmt = gimple_build_assign (startvar, t);
5251 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5252 if (cond_var)
5254 tree itype = TREE_TYPE (cond_var);
5255 /* For lastprivate(conditional:) itervar, we need some iteration
5256 counter that starts at unsigned non-zero and increases.
5257 Prefer as few IVs as possible, so if we can use startvar
5258 itself, use that, or startvar + constant (those would be
5259 incremented with step), and as last resort use the s0 + 1
5260 incremented by 1. */
5261 if (POINTER_TYPE_P (type)
5262 || TREE_CODE (n1) != INTEGER_CST
5263 || fd->loop.cond_code != LT_EXPR)
5264 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, s0),
5265 build_int_cst (itype, 1));
5266 else if (tree_int_cst_sgn (n1) == 1)
5267 t = fold_convert (itype, t);
5268 else
5270 tree c = fold_convert (itype, n1);
5271 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
5272 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
5274 t = force_gimple_operand_gsi (&gsi, t, false,
5275 NULL_TREE, false, GSI_CONTINUE_LINKING);
5276 assign_stmt = gimple_build_assign (cond_var, t);
5277 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5280 t = fold_convert (itype, e0);
5281 t = fold_build2 (MULT_EXPR, itype, t, step);
5282 if (POINTER_TYPE_P (type))
5284 t = fold_build_pointer_plus (n1, t);
5285 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
5286 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
5287 t = fold_convert (signed_type_for (type), t);
5289 else
5290 t = fold_build2 (PLUS_EXPR, type, t, n1);
5291 t = fold_convert (TREE_TYPE (startvar), t);
5292 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5293 false, GSI_CONTINUE_LINKING);
5294 if (endvar)
5296 assign_stmt = gimple_build_assign (endvar, e);
5297 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5298 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
5299 assign_stmt = gimple_build_assign (fd->loop.v, e);
5300 else
5301 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
5302 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5304 /* Handle linear clause adjustments. */
5305 tree itercnt = NULL_TREE;
5306 tree *nonrect_bounds = NULL;
5307 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
5308 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
5309 c; c = OMP_CLAUSE_CHAIN (c))
5310 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
5311 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
5313 tree d = OMP_CLAUSE_DECL (c);
5314 tree t = d, a, dest;
5315 if (omp_privatize_by_reference (t))
5316 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
5317 if (itercnt == NULL_TREE)
5319 if (gimple_omp_for_combined_into_p (fd->for_stmt))
5321 itercnt = fold_build2 (MINUS_EXPR, itype,
5322 fold_convert (itype, n1),
5323 fold_convert (itype, fd->loop.n1));
5324 itercnt = fold_build2 (EXACT_DIV_EXPR, itype, itercnt, step);
5325 itercnt = fold_build2 (PLUS_EXPR, itype, itercnt, s0);
5326 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
5327 NULL_TREE, false,
5328 GSI_CONTINUE_LINKING);
5330 else
5331 itercnt = s0;
5333 tree type = TREE_TYPE (t);
5334 if (POINTER_TYPE_P (type))
5335 type = sizetype;
5336 a = fold_build2 (MULT_EXPR, type,
5337 fold_convert (type, itercnt),
5338 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
5339 dest = unshare_expr (t);
5340 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
5341 : POINTER_PLUS_EXPR, TREE_TYPE (t), t, a);
5342 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5343 false, GSI_CONTINUE_LINKING);
5344 expand_omp_build_assign (&gsi, dest, t, true);
5346 if (fd->collapse > 1)
5348 if (fd->non_rect)
5350 nonrect_bounds = XALLOCAVEC (tree, fd->last_nonrect + 1);
5351 memset (nonrect_bounds, 0, sizeof (tree) * (fd->last_nonrect + 1));
5353 expand_omp_for_init_vars (fd, &gsi, counts, nonrect_bounds, inner_stmt,
5354 startvar);
5357 if (!broken_loop)
5359 /* The code controlling the sequential loop replaces the
5360 GIMPLE_OMP_CONTINUE. */
5361 gsi = gsi_last_nondebug_bb (cont_bb);
5362 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5363 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
5364 vmain = gimple_omp_continue_control_use (cont_stmt);
5365 vback = gimple_omp_continue_control_def (cont_stmt);
5367 if (cond_var)
5369 tree itype = TREE_TYPE (cond_var);
5370 tree t2;
5371 if (POINTER_TYPE_P (type)
5372 || TREE_CODE (n1) != INTEGER_CST
5373 || fd->loop.cond_code != LT_EXPR)
5374 t2 = build_int_cst (itype, 1);
5375 else
5376 t2 = fold_convert (itype, step);
5377 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
5378 t2 = force_gimple_operand_gsi (&gsi, t2, false,
5379 NULL_TREE, true, GSI_SAME_STMT);
5380 assign_stmt = gimple_build_assign (cond_var, t2);
5381 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5384 if (!gimple_omp_for_combined_p (fd->for_stmt))
5386 if (POINTER_TYPE_P (type))
5387 t = fold_build_pointer_plus (vmain, step);
5388 else
5389 t = fold_build2 (PLUS_EXPR, type, vmain, step);
5390 t = force_gimple_operand_gsi (&gsi, t,
5391 DECL_P (vback)
5392 && TREE_ADDRESSABLE (vback),
5393 NULL_TREE, true, GSI_SAME_STMT);
5394 assign_stmt = gimple_build_assign (vback, t);
5395 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5397 t = build2 (fd->loop.cond_code, boolean_type_node,
5398 DECL_P (vback) && TREE_ADDRESSABLE (vback)
5399 ? t : vback, e);
5400 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
5403 /* Remove the GIMPLE_OMP_CONTINUE statement. */
5404 gsi_remove (&gsi, true);
5406 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
5407 collapse_bb = extract_omp_for_update_vars (fd, nonrect_bounds,
5408 cont_bb, body_bb);
5411 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
5412 gsi = gsi_last_nondebug_bb (exit_bb);
5413 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
5415 t = gimple_omp_return_lhs (gsi_stmt (gsi));
5416 if (fd->have_reductemp
5417 || ((fd->have_pointer_condtemp || fd->have_scantemp)
5418 && !fd->have_nonctrl_scantemp))
5420 tree fn;
5421 if (t)
5422 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
5423 else
5424 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
5425 gcall *g = gimple_build_call (fn, 0);
5426 if (t)
5428 gimple_call_set_lhs (g, t);
5429 if (fd->have_reductemp)
5430 gsi_insert_after (&gsi, gimple_build_assign (reductions,
5431 NOP_EXPR, t),
5432 GSI_SAME_STMT);
5434 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
5436 else
5437 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
5439 else if ((fd->have_pointer_condtemp || fd->have_scantemp)
5440 && !fd->have_nonctrl_scantemp)
5442 tree fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
5443 gcall *g = gimple_build_call (fn, 0);
5444 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
5446 if (fd->have_scantemp && !fd->have_nonctrl_scantemp)
5448 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5449 tree controlp = NULL_TREE, controlb = NULL_TREE;
5450 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
5451 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
5452 && OMP_CLAUSE__SCANTEMP__CONTROL (c))
5454 if (TREE_TYPE (OMP_CLAUSE_DECL (c)) == boolean_type_node)
5455 controlb = OMP_CLAUSE_DECL (c);
5456 else
5457 controlp = OMP_CLAUSE_DECL (c);
5458 if (controlb && controlp)
5459 break;
5461 gcc_assert (controlp && controlb);
5462 gimple *g = gimple_build_cond (NE_EXPR, controlb, boolean_false_node,
5463 NULL_TREE, NULL_TREE);
5464 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5465 exit1_bb = split_block (exit_bb, g)->dest;
5466 gsi = gsi_after_labels (exit1_bb);
5467 g = gimple_build_call (builtin_decl_explicit (BUILT_IN_FREE), 1,
5468 controlp);
5469 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5470 exit2_bb = split_block (exit1_bb, g)->dest;
5471 gsi = gsi_after_labels (exit2_bb);
5472 g = gimple_build_call (builtin_decl_implicit (BUILT_IN_STACK_RESTORE), 1,
5473 controlp);
5474 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5475 exit3_bb = split_block (exit2_bb, g)->dest;
5476 gsi = gsi_after_labels (exit3_bb);
5478 gsi_remove (&gsi, true);
5480 /* Connect all the blocks. */
5481 ep = make_edge (entry_bb, third_bb, EDGE_FALSE_VALUE);
5482 ep->probability = profile_probability::guessed_always ().apply_scale (3, 4);
5483 ep = find_edge (entry_bb, second_bb);
5484 ep->flags = EDGE_TRUE_VALUE;
5485 ep->probability = profile_probability::guessed_always ().apply_scale (1, 4);
5486 if (fourth_bb)
5488 ep = make_edge (third_bb, fifth_bb, EDGE_FALSE_VALUE);
5489 ep->probability
5490 = profile_probability::guessed_always ().apply_scale (1, 2);
5491 ep = find_edge (third_bb, fourth_bb);
5492 ep->flags = EDGE_TRUE_VALUE;
5493 ep->probability
5494 = profile_probability::guessed_always ().apply_scale (1, 2);
5495 ep = find_edge (fourth_bb, fifth_bb);
5496 redirect_edge_and_branch (ep, sixth_bb);
5498 else
5499 sixth_bb = third_bb;
5500 find_edge (sixth_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE;
5501 find_edge (sixth_bb, fin_bb)->flags = EDGE_TRUE_VALUE;
5502 if (exit1_bb)
5504 ep = make_edge (exit_bb, exit2_bb, EDGE_FALSE_VALUE);
5505 ep->probability
5506 = profile_probability::guessed_always ().apply_scale (1, 2);
5507 ep = find_edge (exit_bb, exit1_bb);
5508 ep->flags = EDGE_TRUE_VALUE;
5509 ep->probability
5510 = profile_probability::guessed_always ().apply_scale (1, 2);
5511 ep = find_edge (exit1_bb, exit2_bb);
5512 redirect_edge_and_branch (ep, exit3_bb);
5515 if (!broken_loop)
5517 ep = find_edge (cont_bb, body_bb);
5518 if (ep == NULL)
5520 ep = BRANCH_EDGE (cont_bb);
5521 gcc_assert (single_succ (ep->dest) == body_bb);
5523 if (gimple_omp_for_combined_p (fd->for_stmt))
5525 remove_edge (ep);
5526 ep = NULL;
5528 else if (fd->collapse > 1)
5530 remove_edge (ep);
5531 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
5533 else
5534 ep->flags = EDGE_TRUE_VALUE;
5535 find_edge (cont_bb, fin_bb)->flags
5536 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
5539 set_immediate_dominator (CDI_DOMINATORS, second_bb, entry_bb);
5540 set_immediate_dominator (CDI_DOMINATORS, third_bb, entry_bb);
5541 if (fourth_bb)
5543 set_immediate_dominator (CDI_DOMINATORS, fifth_bb, third_bb);
5544 set_immediate_dominator (CDI_DOMINATORS, sixth_bb, third_bb);
5546 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, sixth_bb);
5548 set_immediate_dominator (CDI_DOMINATORS, body_bb,
5549 recompute_dominator (CDI_DOMINATORS, body_bb));
5550 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
5551 recompute_dominator (CDI_DOMINATORS, fin_bb));
5552 if (exit1_bb)
5554 set_immediate_dominator (CDI_DOMINATORS, exit2_bb, exit_bb);
5555 set_immediate_dominator (CDI_DOMINATORS, exit3_bb, exit_bb);
5558 class loop *loop = body_bb->loop_father;
5559 if (loop != entry_bb->loop_father)
5561 gcc_assert (broken_loop || loop->header == body_bb);
5562 gcc_assert (broken_loop
5563 || loop->latch == region->cont
5564 || single_pred (loop->latch) == region->cont);
5565 return;
5568 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
5570 loop = alloc_loop ();
5571 loop->header = body_bb;
5572 if (collapse_bb == NULL)
5573 loop->latch = cont_bb;
5574 add_loop (loop, body_bb->loop_father);
5578 /* Return phi in E->DEST with ARG on edge E. */
5580 static gphi *
5581 find_phi_with_arg_on_edge (tree arg, edge e)
5583 basic_block bb = e->dest;
5585 for (gphi_iterator gpi = gsi_start_phis (bb);
5586 !gsi_end_p (gpi);
5587 gsi_next (&gpi))
5589 gphi *phi = gpi.phi ();
5590 if (PHI_ARG_DEF_FROM_EDGE (phi, e) == arg)
5591 return phi;
5594 return NULL;
5597 /* A subroutine of expand_omp_for. Generate code for a parallel
5598 loop with static schedule and a specified chunk size. Given
5599 parameters:
5601 for (V = N1; V cond N2; V += STEP) BODY;
5603 where COND is "<" or ">", we generate pseudocode
5605 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
5606 if (cond is <)
5607 adj = STEP - 1;
5608 else
5609 adj = STEP + 1;
5610 if ((__typeof (V)) -1 > 0 && cond is >)
5611 n = -(adj + N2 - N1) / -STEP;
5612 else
5613 n = (adj + N2 - N1) / STEP;
5614 trip = 0;
5615 V = threadid * CHUNK * STEP + N1; -- this extra definition of V is
5616 here so that V is defined
5617 if the loop is not entered
5619 s0 = (trip * nthreads + threadid) * CHUNK;
5620 e0 = min (s0 + CHUNK, n);
5621 if (s0 < n) goto L1; else goto L4;
5623 V = s0 * STEP + N1;
5624 e = e0 * STEP + N1;
5626 BODY;
5627 V += STEP;
5628 if (V cond e) goto L2; else goto L3;
5630 trip += 1;
5631 goto L0;
5635 static void
5636 expand_omp_for_static_chunk (struct omp_region *region,
5637 struct omp_for_data *fd, gimple *inner_stmt)
5639 tree n, s0, e0, e, t;
5640 tree trip_var, trip_init, trip_main, trip_back, nthreads, threadid;
5641 tree type, itype, vmain, vback, vextra;
5642 basic_block entry_bb, exit_bb, body_bb, seq_start_bb, iter_part_bb;
5643 basic_block trip_update_bb = NULL, cont_bb, collapse_bb = NULL, fin_bb;
5644 gimple_stmt_iterator gsi, gsip;
5645 edge se;
5646 bool broken_loop = region->cont == NULL;
5647 tree *counts = NULL;
5648 tree n1, n2, step;
5649 tree reductions = NULL_TREE;
5650 tree cond_var = NULL_TREE, condtemp = NULL_TREE;
5652 itype = type = TREE_TYPE (fd->loop.v);
5653 if (POINTER_TYPE_P (type))
5654 itype = signed_type_for (type);
5656 entry_bb = region->entry;
5657 se = split_block (entry_bb, last_stmt (entry_bb));
5658 entry_bb = se->src;
5659 iter_part_bb = se->dest;
5660 cont_bb = region->cont;
5661 gcc_assert (EDGE_COUNT (iter_part_bb->succs) == 2);
5662 fin_bb = BRANCH_EDGE (iter_part_bb)->dest;
5663 gcc_assert (broken_loop
5664 || fin_bb == FALLTHRU_EDGE (cont_bb)->dest);
5665 seq_start_bb = split_edge (FALLTHRU_EDGE (iter_part_bb));
5666 body_bb = single_succ (seq_start_bb);
5667 if (!broken_loop)
5669 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
5670 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
5671 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
5672 trip_update_bb = split_edge (FALLTHRU_EDGE (cont_bb));
5674 exit_bb = region->exit;
5676 /* Trip and adjustment setup goes in ENTRY_BB. */
5677 gsi = gsi_last_nondebug_bb (entry_bb);
5678 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5679 gsip = gsi;
5680 gsi_prev (&gsip);
5682 if (fd->collapse > 1)
5684 int first_zero_iter = -1, dummy = -1;
5685 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
5687 counts = XALLOCAVEC (tree, fd->collapse);
5688 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
5689 fin_bb, first_zero_iter,
5690 dummy_bb, dummy, l2_dom_bb);
5691 t = NULL_TREE;
5693 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
5694 t = integer_one_node;
5695 else
5696 t = fold_binary (fd->loop.cond_code, boolean_type_node,
5697 fold_convert (type, fd->loop.n1),
5698 fold_convert (type, fd->loop.n2));
5699 if (fd->collapse == 1
5700 && TYPE_UNSIGNED (type)
5701 && (t == NULL_TREE || !integer_onep (t)))
5703 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
5704 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
5705 true, GSI_SAME_STMT);
5706 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
5707 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
5708 true, GSI_SAME_STMT);
5709 gcond *cond_stmt = expand_omp_build_cond (&gsi, fd->loop.cond_code,
5710 n1, n2);
5711 se = split_block (entry_bb, cond_stmt);
5712 se->flags = EDGE_TRUE_VALUE;
5713 entry_bb = se->dest;
5714 se->probability = profile_probability::very_likely ();
5715 se = make_edge (se->src, fin_bb, EDGE_FALSE_VALUE);
5716 se->probability = profile_probability::very_unlikely ();
5717 if (gimple_in_ssa_p (cfun))
5719 int dest_idx = find_edge (iter_part_bb, fin_bb)->dest_idx;
5720 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
5721 !gsi_end_p (gpi); gsi_next (&gpi))
5723 gphi *phi = gpi.phi ();
5724 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
5725 se, UNKNOWN_LOCATION);
5728 gsi = gsi_last_bb (entry_bb);
5731 if (fd->lastprivate_conditional)
5733 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5734 tree c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
5735 if (fd->have_pointer_condtemp)
5736 condtemp = OMP_CLAUSE_DECL (c);
5737 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
5738 cond_var = OMP_CLAUSE_DECL (c);
5740 if (fd->have_reductemp || fd->have_pointer_condtemp)
5742 tree t1 = build_int_cst (long_integer_type_node, 0);
5743 tree t2 = build_int_cst (long_integer_type_node, 1);
5744 tree t3 = build_int_cstu (long_integer_type_node,
5745 (HOST_WIDE_INT_1U << 31) + 1);
5746 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5747 gimple_stmt_iterator gsi2 = gsi_none ();
5748 gimple *g = NULL;
5749 tree mem = null_pointer_node, memv = NULL_TREE;
5750 if (fd->have_reductemp)
5752 tree c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
5753 reductions = OMP_CLAUSE_DECL (c);
5754 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
5755 g = SSA_NAME_DEF_STMT (reductions);
5756 reductions = gimple_assign_rhs1 (g);
5757 OMP_CLAUSE_DECL (c) = reductions;
5758 gsi2 = gsi_for_stmt (g);
5760 else
5762 if (gsi_end_p (gsip))
5763 gsi2 = gsi_after_labels (region->entry);
5764 else
5765 gsi2 = gsip;
5766 reductions = null_pointer_node;
5768 if (fd->have_pointer_condtemp)
5770 tree type = TREE_TYPE (condtemp);
5771 memv = create_tmp_var (type);
5772 TREE_ADDRESSABLE (memv) = 1;
5773 unsigned HOST_WIDE_INT sz
5774 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
5775 sz *= fd->lastprivate_conditional;
5776 expand_omp_build_assign (&gsi2, memv, build_int_cst (type, sz),
5777 false);
5778 mem = build_fold_addr_expr (memv);
5780 tree t
5781 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START),
5782 9, t1, t2, t2, t3, t1, null_pointer_node,
5783 null_pointer_node, reductions, mem);
5784 force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
5785 true, GSI_SAME_STMT);
5786 if (fd->have_pointer_condtemp)
5787 expand_omp_build_assign (&gsi2, condtemp, memv, false);
5788 if (fd->have_reductemp)
5790 gsi_remove (&gsi2, true);
5791 release_ssa_name (gimple_assign_lhs (g));
5794 switch (gimple_omp_for_kind (fd->for_stmt))
5796 case GF_OMP_FOR_KIND_FOR:
5797 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
5798 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
5799 break;
5800 case GF_OMP_FOR_KIND_DISTRIBUTE:
5801 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
5802 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
5803 break;
5804 default:
5805 gcc_unreachable ();
5807 nthreads = build_call_expr (nthreads, 0);
5808 nthreads = fold_convert (itype, nthreads);
5809 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
5810 true, GSI_SAME_STMT);
5811 threadid = build_call_expr (threadid, 0);
5812 threadid = fold_convert (itype, threadid);
5813 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
5814 true, GSI_SAME_STMT);
5816 n1 = fd->loop.n1;
5817 n2 = fd->loop.n2;
5818 step = fd->loop.step;
5819 if (gimple_omp_for_combined_into_p (fd->for_stmt))
5821 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5822 OMP_CLAUSE__LOOPTEMP_);
5823 gcc_assert (innerc);
5824 n1 = OMP_CLAUSE_DECL (innerc);
5825 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5826 OMP_CLAUSE__LOOPTEMP_);
5827 gcc_assert (innerc);
5828 n2 = OMP_CLAUSE_DECL (innerc);
5830 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
5831 true, NULL_TREE, true, GSI_SAME_STMT);
5832 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
5833 true, NULL_TREE, true, GSI_SAME_STMT);
5834 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
5835 true, NULL_TREE, true, GSI_SAME_STMT);
5836 tree chunk_size = fold_convert (itype, fd->chunk_size);
5837 chunk_size = omp_adjust_chunk_size (chunk_size, fd->simd_schedule);
5838 chunk_size
5839 = force_gimple_operand_gsi (&gsi, chunk_size, true, NULL_TREE, true,
5840 GSI_SAME_STMT);
5842 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
5843 t = fold_build2 (PLUS_EXPR, itype, step, t);
5844 t = fold_build2 (PLUS_EXPR, itype, t, n2);
5845 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
5846 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
5847 t = fold_build2 (TRUNC_DIV_EXPR, itype,
5848 fold_build1 (NEGATE_EXPR, itype, t),
5849 fold_build1 (NEGATE_EXPR, itype, step));
5850 else
5851 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
5852 t = fold_convert (itype, t);
5853 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5854 true, GSI_SAME_STMT);
5856 trip_var = create_tmp_reg (itype, ".trip");
5857 if (gimple_in_ssa_p (cfun))
5859 trip_init = make_ssa_name (trip_var);
5860 trip_main = make_ssa_name (trip_var);
5861 trip_back = make_ssa_name (trip_var);
5863 else
5865 trip_init = trip_var;
5866 trip_main = trip_var;
5867 trip_back = trip_var;
5870 gassign *assign_stmt
5871 = gimple_build_assign (trip_init, build_int_cst (itype, 0));
5872 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5874 t = fold_build2 (MULT_EXPR, itype, threadid, chunk_size);
5875 t = fold_build2 (MULT_EXPR, itype, t, step);
5876 if (POINTER_TYPE_P (type))
5877 t = fold_build_pointer_plus (n1, t);
5878 else
5879 t = fold_build2 (PLUS_EXPR, type, t, n1);
5880 vextra = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5881 true, GSI_SAME_STMT);
5883 /* Remove the GIMPLE_OMP_FOR. */
5884 gsi_remove (&gsi, true);
5886 gimple_stmt_iterator gsif = gsi;
5888 /* Iteration space partitioning goes in ITER_PART_BB. */
5889 gsi = gsi_last_bb (iter_part_bb);
5891 t = fold_build2 (MULT_EXPR, itype, trip_main, nthreads);
5892 t = fold_build2 (PLUS_EXPR, itype, t, threadid);
5893 t = fold_build2 (MULT_EXPR, itype, t, chunk_size);
5894 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5895 false, GSI_CONTINUE_LINKING);
5897 t = fold_build2 (PLUS_EXPR, itype, s0, chunk_size);
5898 t = fold_build2 (MIN_EXPR, itype, t, n);
5899 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5900 false, GSI_CONTINUE_LINKING);
5902 t = build2 (LT_EXPR, boolean_type_node, s0, n);
5903 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_CONTINUE_LINKING);
5905 /* Setup code for sequential iteration goes in SEQ_START_BB. */
5906 gsi = gsi_start_bb (seq_start_bb);
5908 tree startvar = fd->loop.v;
5909 tree endvar = NULL_TREE;
5911 if (gimple_omp_for_combined_p (fd->for_stmt))
5913 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
5914 ? gimple_omp_parallel_clauses (inner_stmt)
5915 : gimple_omp_for_clauses (inner_stmt);
5916 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
5917 gcc_assert (innerc);
5918 startvar = OMP_CLAUSE_DECL (innerc);
5919 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5920 OMP_CLAUSE__LOOPTEMP_);
5921 gcc_assert (innerc);
5922 endvar = OMP_CLAUSE_DECL (innerc);
5923 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
5924 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
5926 innerc = find_lastprivate_looptemp (fd, innerc);
5927 if (innerc)
5929 /* If needed (distribute parallel for with lastprivate),
5930 propagate down the total number of iterations. */
5931 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
5932 fd->loop.n2);
5933 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
5934 GSI_CONTINUE_LINKING);
5935 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
5936 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5941 t = fold_convert (itype, s0);
5942 t = fold_build2 (MULT_EXPR, itype, t, step);
5943 if (POINTER_TYPE_P (type))
5945 t = fold_build_pointer_plus (n1, t);
5946 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
5947 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
5948 t = fold_convert (signed_type_for (type), t);
5950 else
5951 t = fold_build2 (PLUS_EXPR, type, t, n1);
5952 t = fold_convert (TREE_TYPE (startvar), t);
5953 t = force_gimple_operand_gsi (&gsi, t,
5954 DECL_P (startvar)
5955 && TREE_ADDRESSABLE (startvar),
5956 NULL_TREE, false, GSI_CONTINUE_LINKING);
5957 assign_stmt = gimple_build_assign (startvar, t);
5958 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5959 if (cond_var)
5961 tree itype = TREE_TYPE (cond_var);
5962 /* For lastprivate(conditional:) itervar, we need some iteration
5963 counter that starts at unsigned non-zero and increases.
5964 Prefer as few IVs as possible, so if we can use startvar
5965 itself, use that, or startvar + constant (those would be
5966 incremented with step), and as last resort use the s0 + 1
5967 incremented by 1. */
5968 if (POINTER_TYPE_P (type)
5969 || TREE_CODE (n1) != INTEGER_CST
5970 || fd->loop.cond_code != LT_EXPR)
5971 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, s0),
5972 build_int_cst (itype, 1));
5973 else if (tree_int_cst_sgn (n1) == 1)
5974 t = fold_convert (itype, t);
5975 else
5977 tree c = fold_convert (itype, n1);
5978 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
5979 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
5981 t = force_gimple_operand_gsi (&gsi, t, false,
5982 NULL_TREE, false, GSI_CONTINUE_LINKING);
5983 assign_stmt = gimple_build_assign (cond_var, t);
5984 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5987 t = fold_convert (itype, e0);
5988 t = fold_build2 (MULT_EXPR, itype, t, step);
5989 if (POINTER_TYPE_P (type))
5991 t = fold_build_pointer_plus (n1, t);
5992 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
5993 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
5994 t = fold_convert (signed_type_for (type), t);
5996 else
5997 t = fold_build2 (PLUS_EXPR, type, t, n1);
5998 t = fold_convert (TREE_TYPE (startvar), t);
5999 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6000 false, GSI_CONTINUE_LINKING);
6001 if (endvar)
6003 assign_stmt = gimple_build_assign (endvar, e);
6004 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6005 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
6006 assign_stmt = gimple_build_assign (fd->loop.v, e);
6007 else
6008 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
6009 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6011 /* Handle linear clause adjustments. */
6012 tree itercnt = NULL_TREE, itercntbias = NULL_TREE;
6013 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
6014 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
6015 c; c = OMP_CLAUSE_CHAIN (c))
6016 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
6017 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
6019 tree d = OMP_CLAUSE_DECL (c);
6020 tree t = d, a, dest;
6021 if (omp_privatize_by_reference (t))
6022 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
6023 tree type = TREE_TYPE (t);
6024 if (POINTER_TYPE_P (type))
6025 type = sizetype;
6026 dest = unshare_expr (t);
6027 tree v = create_tmp_var (TREE_TYPE (t), NULL);
6028 expand_omp_build_assign (&gsif, v, t);
6029 if (itercnt == NULL_TREE)
6031 if (gimple_omp_for_combined_into_p (fd->for_stmt))
6033 itercntbias
6034 = fold_build2 (MINUS_EXPR, itype, fold_convert (itype, n1),
6035 fold_convert (itype, fd->loop.n1));
6036 itercntbias = fold_build2 (EXACT_DIV_EXPR, itype,
6037 itercntbias, step);
6038 itercntbias
6039 = force_gimple_operand_gsi (&gsif, itercntbias, true,
6040 NULL_TREE, true,
6041 GSI_SAME_STMT);
6042 itercnt = fold_build2 (PLUS_EXPR, itype, itercntbias, s0);
6043 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
6044 NULL_TREE, false,
6045 GSI_CONTINUE_LINKING);
6047 else
6048 itercnt = s0;
6050 a = fold_build2 (MULT_EXPR, type,
6051 fold_convert (type, itercnt),
6052 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
6053 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
6054 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
6055 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6056 false, GSI_CONTINUE_LINKING);
6057 expand_omp_build_assign (&gsi, dest, t, true);
6059 if (fd->collapse > 1)
6060 expand_omp_for_init_vars (fd, &gsi, counts, NULL, inner_stmt, startvar);
6062 if (!broken_loop)
6064 /* The code controlling the sequential loop goes in CONT_BB,
6065 replacing the GIMPLE_OMP_CONTINUE. */
6066 gsi = gsi_last_nondebug_bb (cont_bb);
6067 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
6068 vmain = gimple_omp_continue_control_use (cont_stmt);
6069 vback = gimple_omp_continue_control_def (cont_stmt);
6071 if (cond_var)
6073 tree itype = TREE_TYPE (cond_var);
6074 tree t2;
6075 if (POINTER_TYPE_P (type)
6076 || TREE_CODE (n1) != INTEGER_CST
6077 || fd->loop.cond_code != LT_EXPR)
6078 t2 = build_int_cst (itype, 1);
6079 else
6080 t2 = fold_convert (itype, step);
6081 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
6082 t2 = force_gimple_operand_gsi (&gsi, t2, false,
6083 NULL_TREE, true, GSI_SAME_STMT);
6084 assign_stmt = gimple_build_assign (cond_var, t2);
6085 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
6088 if (!gimple_omp_for_combined_p (fd->for_stmt))
6090 if (POINTER_TYPE_P (type))
6091 t = fold_build_pointer_plus (vmain, step);
6092 else
6093 t = fold_build2 (PLUS_EXPR, type, vmain, step);
6094 if (DECL_P (vback) && TREE_ADDRESSABLE (vback))
6095 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6096 true, GSI_SAME_STMT);
6097 assign_stmt = gimple_build_assign (vback, t);
6098 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
6100 if (tree_int_cst_equal (fd->chunk_size, integer_one_node))
6101 t = build2 (EQ_EXPR, boolean_type_node,
6102 build_int_cst (itype, 0),
6103 build_int_cst (itype, 1));
6104 else
6105 t = build2 (fd->loop.cond_code, boolean_type_node,
6106 DECL_P (vback) && TREE_ADDRESSABLE (vback)
6107 ? t : vback, e);
6108 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
6111 /* Remove GIMPLE_OMP_CONTINUE. */
6112 gsi_remove (&gsi, true);
6114 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
6115 collapse_bb = extract_omp_for_update_vars (fd, NULL, cont_bb, body_bb);
6117 /* Trip update code goes into TRIP_UPDATE_BB. */
6118 gsi = gsi_start_bb (trip_update_bb);
6120 t = build_int_cst (itype, 1);
6121 t = build2 (PLUS_EXPR, itype, trip_main, t);
6122 assign_stmt = gimple_build_assign (trip_back, t);
6123 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6126 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
6127 gsi = gsi_last_nondebug_bb (exit_bb);
6128 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
6130 t = gimple_omp_return_lhs (gsi_stmt (gsi));
6131 if (fd->have_reductemp || fd->have_pointer_condtemp)
6133 tree fn;
6134 if (t)
6135 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
6136 else
6137 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
6138 gcall *g = gimple_build_call (fn, 0);
6139 if (t)
6141 gimple_call_set_lhs (g, t);
6142 if (fd->have_reductemp)
6143 gsi_insert_after (&gsi, gimple_build_assign (reductions,
6144 NOP_EXPR, t),
6145 GSI_SAME_STMT);
6147 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
6149 else
6150 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
6152 else if (fd->have_pointer_condtemp)
6154 tree fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
6155 gcall *g = gimple_build_call (fn, 0);
6156 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
6158 gsi_remove (&gsi, true);
6160 /* Connect the new blocks. */
6161 find_edge (iter_part_bb, seq_start_bb)->flags = EDGE_TRUE_VALUE;
6162 find_edge (iter_part_bb, fin_bb)->flags = EDGE_FALSE_VALUE;
6164 if (!broken_loop)
6166 se = find_edge (cont_bb, body_bb);
6167 if (se == NULL)
6169 se = BRANCH_EDGE (cont_bb);
6170 gcc_assert (single_succ (se->dest) == body_bb);
6172 if (gimple_omp_for_combined_p (fd->for_stmt))
6174 remove_edge (se);
6175 se = NULL;
6177 else if (fd->collapse > 1)
6179 remove_edge (se);
6180 se = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
6182 else
6183 se->flags = EDGE_TRUE_VALUE;
6184 find_edge (cont_bb, trip_update_bb)->flags
6185 = se ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
6187 redirect_edge_and_branch (single_succ_edge (trip_update_bb),
6188 iter_part_bb);
6191 if (gimple_in_ssa_p (cfun))
6193 gphi_iterator psi;
6194 gphi *phi;
6195 edge re, ene;
6196 edge_var_map *vm;
6197 size_t i;
6199 gcc_assert (fd->collapse == 1 && !broken_loop);
6201 /* When we redirect the edge from trip_update_bb to iter_part_bb, we
6202 remove arguments of the phi nodes in fin_bb. We need to create
6203 appropriate phi nodes in iter_part_bb instead. */
6204 se = find_edge (iter_part_bb, fin_bb);
6205 re = single_succ_edge (trip_update_bb);
6206 vec<edge_var_map> *head = redirect_edge_var_map_vector (re);
6207 ene = single_succ_edge (entry_bb);
6209 psi = gsi_start_phis (fin_bb);
6210 for (i = 0; !gsi_end_p (psi) && head->iterate (i, &vm);
6211 gsi_next (&psi), ++i)
6213 gphi *nphi;
6214 location_t locus;
6216 phi = psi.phi ();
6217 if (operand_equal_p (gimple_phi_arg_def (phi, 0),
6218 redirect_edge_var_map_def (vm), 0))
6219 continue;
6221 t = gimple_phi_result (phi);
6222 gcc_assert (t == redirect_edge_var_map_result (vm));
6224 if (!single_pred_p (fin_bb))
6225 t = copy_ssa_name (t, phi);
6227 nphi = create_phi_node (t, iter_part_bb);
6229 t = PHI_ARG_DEF_FROM_EDGE (phi, se);
6230 locus = gimple_phi_arg_location_from_edge (phi, se);
6232 /* A special case -- fd->loop.v is not yet computed in
6233 iter_part_bb, we need to use vextra instead. */
6234 if (t == fd->loop.v)
6235 t = vextra;
6236 add_phi_arg (nphi, t, ene, locus);
6237 locus = redirect_edge_var_map_location (vm);
6238 tree back_arg = redirect_edge_var_map_def (vm);
6239 add_phi_arg (nphi, back_arg, re, locus);
6240 edge ce = find_edge (cont_bb, body_bb);
6241 if (ce == NULL)
6243 ce = BRANCH_EDGE (cont_bb);
6244 gcc_assert (single_succ (ce->dest) == body_bb);
6245 ce = single_succ_edge (ce->dest);
6247 gphi *inner_loop_phi = find_phi_with_arg_on_edge (back_arg, ce);
6248 gcc_assert (inner_loop_phi != NULL);
6249 add_phi_arg (inner_loop_phi, gimple_phi_result (nphi),
6250 find_edge (seq_start_bb, body_bb), locus);
6252 if (!single_pred_p (fin_bb))
6253 add_phi_arg (phi, gimple_phi_result (nphi), se, locus);
6255 gcc_assert (gsi_end_p (psi) && (head == NULL || i == head->length ()));
6256 redirect_edge_var_map_clear (re);
6257 if (single_pred_p (fin_bb))
6258 while (1)
6260 psi = gsi_start_phis (fin_bb);
6261 if (gsi_end_p (psi))
6262 break;
6263 remove_phi_node (&psi, false);
6266 /* Make phi node for trip. */
6267 phi = create_phi_node (trip_main, iter_part_bb);
6268 add_phi_arg (phi, trip_back, single_succ_edge (trip_update_bb),
6269 UNKNOWN_LOCATION);
6270 add_phi_arg (phi, trip_init, single_succ_edge (entry_bb),
6271 UNKNOWN_LOCATION);
6274 if (!broken_loop)
6275 set_immediate_dominator (CDI_DOMINATORS, trip_update_bb, cont_bb);
6276 set_immediate_dominator (CDI_DOMINATORS, iter_part_bb,
6277 recompute_dominator (CDI_DOMINATORS, iter_part_bb));
6278 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
6279 recompute_dominator (CDI_DOMINATORS, fin_bb));
6280 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb,
6281 recompute_dominator (CDI_DOMINATORS, seq_start_bb));
6282 set_immediate_dominator (CDI_DOMINATORS, body_bb,
6283 recompute_dominator (CDI_DOMINATORS, body_bb));
6285 if (!broken_loop)
6287 class loop *loop = body_bb->loop_father;
6288 class loop *trip_loop = alloc_loop ();
6289 trip_loop->header = iter_part_bb;
6290 trip_loop->latch = trip_update_bb;
6291 add_loop (trip_loop, iter_part_bb->loop_father);
6293 if (loop != entry_bb->loop_father)
6295 gcc_assert (loop->header == body_bb);
6296 gcc_assert (loop->latch == region->cont
6297 || single_pred (loop->latch) == region->cont);
6298 trip_loop->inner = loop;
6299 return;
6302 if (!gimple_omp_for_combined_p (fd->for_stmt))
6304 loop = alloc_loop ();
6305 loop->header = body_bb;
6306 if (collapse_bb == NULL)
6307 loop->latch = cont_bb;
6308 add_loop (loop, trip_loop);
6313 /* A subroutine of expand_omp_for. Generate code for a simd non-worksharing
6314 loop. Given parameters:
6316 for (V = N1; V cond N2; V += STEP) BODY;
6318 where COND is "<" or ">", we generate pseudocode
6320 V = N1;
6321 goto L1;
6323 BODY;
6324 V += STEP;
6326 if (V cond N2) goto L0; else goto L2;
6329 For collapsed loops, emit the outer loops as scalar
6330 and only try to vectorize the innermost loop. */
6332 static void
6333 expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
6335 tree type, t;
6336 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, l2_bb, l2_dom_bb;
6337 gimple_stmt_iterator gsi;
6338 gimple *stmt;
6339 gcond *cond_stmt;
6340 bool broken_loop = region->cont == NULL;
6341 edge e, ne;
6342 tree *counts = NULL;
6343 int i;
6344 int safelen_int = INT_MAX;
6345 bool dont_vectorize = false;
6346 tree safelen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6347 OMP_CLAUSE_SAFELEN);
6348 tree simduid = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6349 OMP_CLAUSE__SIMDUID_);
6350 tree ifc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6351 OMP_CLAUSE_IF);
6352 tree simdlen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6353 OMP_CLAUSE_SIMDLEN);
6354 tree condtemp = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6355 OMP_CLAUSE__CONDTEMP_);
6356 tree n1, n2;
6357 tree cond_var = condtemp ? OMP_CLAUSE_DECL (condtemp) : NULL_TREE;
6359 if (safelen)
6361 poly_uint64 val;
6362 safelen = OMP_CLAUSE_SAFELEN_EXPR (safelen);
6363 if (!poly_int_tree_p (safelen, &val))
6364 safelen_int = 0;
6365 else
6366 safelen_int = MIN (constant_lower_bound (val), INT_MAX);
6367 if (safelen_int == 1)
6368 safelen_int = 0;
6370 if ((ifc && integer_zerop (OMP_CLAUSE_IF_EXPR (ifc)))
6371 || (simdlen && integer_onep (OMP_CLAUSE_SIMDLEN_EXPR (simdlen))))
6373 safelen_int = 0;
6374 dont_vectorize = true;
6376 type = TREE_TYPE (fd->loop.v);
6377 entry_bb = region->entry;
6378 cont_bb = region->cont;
6379 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
6380 gcc_assert (broken_loop
6381 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
6382 l0_bb = FALLTHRU_EDGE (entry_bb)->dest;
6383 if (!broken_loop)
6385 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb);
6386 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
6387 l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest;
6388 l2_bb = BRANCH_EDGE (entry_bb)->dest;
6390 else
6392 BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL;
6393 l1_bb = split_edge (BRANCH_EDGE (entry_bb));
6394 l2_bb = single_succ (l1_bb);
6396 exit_bb = region->exit;
6397 l2_dom_bb = NULL;
6399 gsi = gsi_last_nondebug_bb (entry_bb);
6401 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
6402 /* Not needed in SSA form right now. */
6403 gcc_assert (!gimple_in_ssa_p (cfun));
6404 if (fd->collapse > 1
6405 && (gimple_omp_for_combined_into_p (fd->for_stmt)
6406 || broken_loop))
6408 int first_zero_iter = -1, dummy = -1;
6409 basic_block zero_iter_bb = l2_bb, dummy_bb = NULL;
6411 counts = XALLOCAVEC (tree, fd->collapse);
6412 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
6413 zero_iter_bb, first_zero_iter,
6414 dummy_bb, dummy, l2_dom_bb);
6416 if (l2_dom_bb == NULL)
6417 l2_dom_bb = l1_bb;
6419 n1 = fd->loop.n1;
6420 n2 = fd->loop.n2;
6421 if (gimple_omp_for_combined_into_p (fd->for_stmt))
6423 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6424 OMP_CLAUSE__LOOPTEMP_);
6425 gcc_assert (innerc);
6426 n1 = OMP_CLAUSE_DECL (innerc);
6427 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
6428 OMP_CLAUSE__LOOPTEMP_);
6429 gcc_assert (innerc);
6430 n2 = OMP_CLAUSE_DECL (innerc);
6432 tree step = fd->loop.step;
6433 tree orig_step = step; /* May be different from step if is_simt. */
6435 bool is_simt = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6436 OMP_CLAUSE__SIMT_);
6437 if (is_simt)
6439 cfun->curr_properties &= ~PROP_gimple_lomp_dev;
6440 is_simt = safelen_int > 1;
6442 tree simt_lane = NULL_TREE, simt_maxlane = NULL_TREE;
6443 if (is_simt)
6445 simt_lane = create_tmp_var (unsigned_type_node);
6446 gimple *g = gimple_build_call_internal (IFN_GOMP_SIMT_LANE, 0);
6447 gimple_call_set_lhs (g, simt_lane);
6448 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
6449 tree offset = fold_build2 (MULT_EXPR, TREE_TYPE (step), step,
6450 fold_convert (TREE_TYPE (step), simt_lane));
6451 n1 = fold_convert (type, n1);
6452 if (POINTER_TYPE_P (type))
6453 n1 = fold_build_pointer_plus (n1, offset);
6454 else
6455 n1 = fold_build2 (PLUS_EXPR, type, n1, fold_convert (type, offset));
6457 /* Collapsed loops not handled for SIMT yet: limit to one lane only. */
6458 if (fd->collapse > 1)
6459 simt_maxlane = build_one_cst (unsigned_type_node);
6460 else if (safelen_int < omp_max_simt_vf ())
6461 simt_maxlane = build_int_cst (unsigned_type_node, safelen_int);
6462 tree vf
6463 = build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_GOMP_SIMT_VF,
6464 unsigned_type_node, 0);
6465 if (simt_maxlane)
6466 vf = fold_build2 (MIN_EXPR, unsigned_type_node, vf, simt_maxlane);
6467 vf = fold_convert (TREE_TYPE (step), vf);
6468 step = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, vf);
6471 tree n2var = NULL_TREE;
6472 tree n2v = NULL_TREE;
6473 tree *nonrect_bounds = NULL;
6474 tree min_arg1 = NULL_TREE, min_arg2 = NULL_TREE;
6475 if (fd->collapse > 1)
6477 if (broken_loop || gimple_omp_for_combined_into_p (fd->for_stmt))
6479 if (fd->non_rect)
6481 nonrect_bounds = XALLOCAVEC (tree, fd->last_nonrect + 1);
6482 memset (nonrect_bounds, 0,
6483 sizeof (tree) * (fd->last_nonrect + 1));
6485 expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
6486 gcc_assert (entry_bb == gsi_bb (gsi));
6487 gcc_assert (fd->for_stmt == gsi_stmt (gsi));
6488 gsi_prev (&gsi);
6489 entry_bb = split_block (entry_bb, gsi_stmt (gsi))->dest;
6490 expand_omp_for_init_vars (fd, &gsi, counts, nonrect_bounds,
6491 NULL, n1);
6492 gsi = gsi_for_stmt (fd->for_stmt);
6494 if (broken_loop)
6496 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
6498 /* Compute in n2var the limit for the first innermost loop,
6499 i.e. fd->loop.v + MIN (n2 - fd->loop.v, cnt)
6500 where cnt is how many iterations would the loop have if
6501 all further iterations were assigned to the current task. */
6502 n2var = create_tmp_var (type);
6503 i = fd->collapse - 1;
6504 tree itype = TREE_TYPE (fd->loops[i].v);
6505 if (POINTER_TYPE_P (itype))
6506 itype = signed_type_for (itype);
6507 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
6508 ? -1 : 1));
6509 t = fold_build2 (PLUS_EXPR, itype,
6510 fold_convert (itype, fd->loops[i].step), t);
6511 t = fold_build2 (PLUS_EXPR, itype, t,
6512 fold_convert (itype, fd->loops[i].n2));
6513 if (fd->loops[i].m2)
6515 tree t2 = fold_convert (itype,
6516 fd->loops[i - fd->loops[i].outer].v);
6517 tree t3 = fold_convert (itype, fd->loops[i].m2);
6518 t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
6519 t = fold_build2 (PLUS_EXPR, itype, t, t2);
6521 t = fold_build2 (MINUS_EXPR, itype, t,
6522 fold_convert (itype, fd->loops[i].v));
6523 if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
6524 t = fold_build2 (TRUNC_DIV_EXPR, itype,
6525 fold_build1 (NEGATE_EXPR, itype, t),
6526 fold_build1 (NEGATE_EXPR, itype,
6527 fold_convert (itype,
6528 fd->loops[i].step)));
6529 else
6530 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
6531 fold_convert (itype, fd->loops[i].step));
6532 t = fold_convert (type, t);
6533 tree t2 = fold_build2 (MINUS_EXPR, type, n2, n1);
6534 min_arg1 = create_tmp_var (type);
6535 expand_omp_build_assign (&gsi, min_arg1, t2);
6536 min_arg2 = create_tmp_var (type);
6537 expand_omp_build_assign (&gsi, min_arg2, t);
6539 else
6541 if (TREE_CODE (n2) == INTEGER_CST)
6543 /* Indicate for lastprivate handling that at least one iteration
6544 has been performed, without wasting runtime. */
6545 if (integer_nonzerop (n2))
6546 expand_omp_build_assign (&gsi, fd->loop.v,
6547 fold_convert (type, n2));
6548 else
6549 /* Indicate that no iteration has been performed. */
6550 expand_omp_build_assign (&gsi, fd->loop.v,
6551 build_one_cst (type));
6553 else
6555 expand_omp_build_assign (&gsi, fd->loop.v,
6556 build_zero_cst (type));
6557 expand_omp_build_assign (&gsi, n2, build_one_cst (type));
6559 for (i = 0; i < fd->collapse; i++)
6561 t = fold_convert (TREE_TYPE (fd->loops[i].v), fd->loops[i].n1);
6562 if (fd->loops[i].m1)
6564 tree t2
6565 = fold_convert (TREE_TYPE (t),
6566 fd->loops[i - fd->loops[i].outer].v);
6567 tree t3 = fold_convert (TREE_TYPE (t), fd->loops[i].m1);
6568 t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
6569 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t, t2);
6571 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
6572 /* For normal non-combined collapsed loops just initialize
6573 the outermost iterator in the entry_bb. */
6574 if (!broken_loop)
6575 break;
6579 else
6580 expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
6581 tree altv = NULL_TREE, altn2 = NULL_TREE;
6582 if (fd->collapse == 1
6583 && !broken_loop
6584 && TREE_CODE (orig_step) != INTEGER_CST)
6586 /* The vectorizer currently punts on loops with non-constant steps
6587 for the main IV (can't compute number of iterations and gives up
6588 because of that). As for OpenMP loops it is always possible to
6589 compute the number of iterations upfront, use an alternate IV
6590 as the loop iterator:
6591 altn2 = n1 < n2 ? (n2 - n1 + step - 1) / step : 0;
6592 for (i = n1, altv = 0; altv < altn2; altv++, i += step) */
6593 altv = create_tmp_var (unsigned_type_for (TREE_TYPE (fd->loops[0].v)));
6594 expand_omp_build_assign (&gsi, altv, build_zero_cst (TREE_TYPE (altv)));
6595 tree itype = TREE_TYPE (fd->loop.v);
6596 if (POINTER_TYPE_P (itype))
6597 itype = signed_type_for (itype);
6598 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
6599 t = fold_build2 (PLUS_EXPR, itype,
6600 fold_convert (itype, step), t);
6601 t = fold_build2 (PLUS_EXPR, itype, t, fold_convert (itype, n2));
6602 t = fold_build2 (MINUS_EXPR, itype, t,
6603 fold_convert (itype, fd->loop.v));
6604 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
6605 t = fold_build2 (TRUNC_DIV_EXPR, itype,
6606 fold_build1 (NEGATE_EXPR, itype, t),
6607 fold_build1 (NEGATE_EXPR, itype,
6608 fold_convert (itype, step)));
6609 else
6610 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
6611 fold_convert (itype, step));
6612 t = fold_convert (TREE_TYPE (altv), t);
6613 altn2 = create_tmp_var (TREE_TYPE (altv));
6614 expand_omp_build_assign (&gsi, altn2, t);
6615 tree t2 = fold_convert (TREE_TYPE (fd->loop.v), n2);
6616 t2 = force_gimple_operand_gsi (&gsi, t2, true, NULL_TREE,
6617 true, GSI_SAME_STMT);
6618 t2 = fold_build2 (fd->loop.cond_code, boolean_type_node, fd->loop.v, t2);
6619 gassign *g = gimple_build_assign (altn2, COND_EXPR, t2, altn2,
6620 build_zero_cst (TREE_TYPE (altv)));
6621 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
6623 else if (fd->collapse > 1
6624 && !broken_loop
6625 && !gimple_omp_for_combined_into_p (fd->for_stmt)
6626 && TREE_CODE (fd->loops[fd->collapse - 1].step) != INTEGER_CST)
6628 altv = create_tmp_var (unsigned_type_for (TREE_TYPE (fd->loops[0].v)));
6629 altn2 = create_tmp_var (TREE_TYPE (altv));
6631 if (cond_var)
6633 if (POINTER_TYPE_P (type)
6634 || TREE_CODE (n1) != INTEGER_CST
6635 || fd->loop.cond_code != LT_EXPR
6636 || tree_int_cst_sgn (n1) != 1)
6637 expand_omp_build_assign (&gsi, cond_var,
6638 build_one_cst (TREE_TYPE (cond_var)));
6639 else
6640 expand_omp_build_assign (&gsi, cond_var,
6641 fold_convert (TREE_TYPE (cond_var), n1));
6644 /* Remove the GIMPLE_OMP_FOR statement. */
6645 gsi_remove (&gsi, true);
6647 if (!broken_loop)
6649 /* Code to control the increment goes in the CONT_BB. */
6650 gsi = gsi_last_nondebug_bb (cont_bb);
6651 stmt = gsi_stmt (gsi);
6652 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
6654 if (fd->collapse == 1
6655 || gimple_omp_for_combined_into_p (fd->for_stmt))
6657 if (POINTER_TYPE_P (type))
6658 t = fold_build_pointer_plus (fd->loop.v, step);
6659 else
6660 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
6661 expand_omp_build_assign (&gsi, fd->loop.v, t);
6663 else if (TREE_CODE (n2) != INTEGER_CST)
6664 expand_omp_build_assign (&gsi, fd->loop.v, build_one_cst (type));
6665 if (altv)
6667 t = fold_build2 (PLUS_EXPR, TREE_TYPE (altv), altv,
6668 build_one_cst (TREE_TYPE (altv)));
6669 expand_omp_build_assign (&gsi, altv, t);
6672 if (fd->collapse > 1)
6674 i = fd->collapse - 1;
6675 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
6677 t = fold_convert (sizetype, fd->loops[i].step);
6678 t = fold_build_pointer_plus (fd->loops[i].v, t);
6680 else
6682 t = fold_convert (TREE_TYPE (fd->loops[i].v),
6683 fd->loops[i].step);
6684 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
6685 fd->loops[i].v, t);
6687 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
6689 if (cond_var)
6691 if (POINTER_TYPE_P (type)
6692 || TREE_CODE (n1) != INTEGER_CST
6693 || fd->loop.cond_code != LT_EXPR
6694 || tree_int_cst_sgn (n1) != 1)
6695 t = fold_build2 (PLUS_EXPR, TREE_TYPE (cond_var), cond_var,
6696 build_one_cst (TREE_TYPE (cond_var)));
6697 else
6698 t = fold_build2 (PLUS_EXPR, TREE_TYPE (cond_var), cond_var,
6699 fold_convert (TREE_TYPE (cond_var), step));
6700 expand_omp_build_assign (&gsi, cond_var, t);
6703 /* Remove GIMPLE_OMP_CONTINUE. */
6704 gsi_remove (&gsi, true);
6707 /* Emit the condition in L1_BB. */
6708 gsi = gsi_start_bb (l1_bb);
6710 if (altv)
6711 t = build2 (LT_EXPR, boolean_type_node, altv, altn2);
6712 else if (fd->collapse > 1
6713 && !gimple_omp_for_combined_into_p (fd->for_stmt)
6714 && !broken_loop)
6716 i = fd->collapse - 1;
6717 tree itype = TREE_TYPE (fd->loops[i].v);
6718 if (fd->loops[i].m2)
6719 t = n2v = create_tmp_var (itype);
6720 else
6721 t = fold_convert (itype, fd->loops[i].n2);
6722 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6723 false, GSI_CONTINUE_LINKING);
6724 tree v = fd->loops[i].v;
6725 if (DECL_P (v) && TREE_ADDRESSABLE (v))
6726 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
6727 false, GSI_CONTINUE_LINKING);
6728 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
6730 else
6732 if (fd->collapse > 1 && !broken_loop)
6733 t = n2var;
6734 else
6735 t = fold_convert (type, n2);
6736 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6737 false, GSI_CONTINUE_LINKING);
6738 tree v = fd->loop.v;
6739 if (DECL_P (v) && TREE_ADDRESSABLE (v))
6740 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
6741 false, GSI_CONTINUE_LINKING);
6742 t = build2 (fd->loop.cond_code, boolean_type_node, v, t);
6744 cond_stmt = gimple_build_cond_empty (t);
6745 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
6746 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p,
6747 NULL, NULL)
6748 || walk_tree (gimple_cond_rhs_ptr (cond_stmt), expand_omp_regimplify_p,
6749 NULL, NULL))
6751 gsi = gsi_for_stmt (cond_stmt);
6752 gimple_regimplify_operands (cond_stmt, &gsi);
6755 /* Add 'V -= STEP * (SIMT_VF - 1)' after the loop. */
6756 if (is_simt)
6758 gsi = gsi_start_bb (l2_bb);
6759 step = fold_build2 (MINUS_EXPR, TREE_TYPE (step), orig_step, step);
6760 if (POINTER_TYPE_P (type))
6761 t = fold_build_pointer_plus (fd->loop.v, step);
6762 else
6763 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
6764 expand_omp_build_assign (&gsi, fd->loop.v, t);
6767 /* Remove GIMPLE_OMP_RETURN. */
6768 gsi = gsi_last_nondebug_bb (exit_bb);
6769 gsi_remove (&gsi, true);
6771 /* Connect the new blocks. */
6772 remove_edge (FALLTHRU_EDGE (entry_bb));
6774 if (!broken_loop)
6776 remove_edge (BRANCH_EDGE (entry_bb));
6777 make_edge (entry_bb, l1_bb, EDGE_FALLTHRU);
6779 e = BRANCH_EDGE (l1_bb);
6780 ne = FALLTHRU_EDGE (l1_bb);
6781 e->flags = EDGE_TRUE_VALUE;
6783 else
6785 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
6787 ne = single_succ_edge (l1_bb);
6788 e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE);
6791 ne->flags = EDGE_FALSE_VALUE;
6792 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
6793 ne->probability = e->probability.invert ();
6795 set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb);
6796 set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb);
6798 if (simt_maxlane)
6800 cond_stmt = gimple_build_cond (LT_EXPR, simt_lane, simt_maxlane,
6801 NULL_TREE, NULL_TREE);
6802 gsi = gsi_last_bb (entry_bb);
6803 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
6804 make_edge (entry_bb, l2_bb, EDGE_FALSE_VALUE);
6805 FALLTHRU_EDGE (entry_bb)->flags = EDGE_TRUE_VALUE;
6806 FALLTHRU_EDGE (entry_bb)->probability
6807 = profile_probability::guessed_always ().apply_scale (7, 8);
6808 BRANCH_EDGE (entry_bb)->probability
6809 = FALLTHRU_EDGE (entry_bb)->probability.invert ();
6810 l2_dom_bb = entry_bb;
6812 set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb);
6814 if (!broken_loop && fd->collapse > 1)
6816 basic_block last_bb = l1_bb;
6817 basic_block init_bb = NULL;
6818 for (i = fd->collapse - 2; i >= 0; i--)
6820 tree nextn2v = NULL_TREE;
6821 if (EDGE_SUCC (last_bb, 0)->flags & EDGE_FALSE_VALUE)
6822 e = EDGE_SUCC (last_bb, 0);
6823 else
6824 e = EDGE_SUCC (last_bb, 1);
6825 basic_block bb = split_edge (e);
6826 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
6828 t = fold_convert (sizetype, fd->loops[i].step);
6829 t = fold_build_pointer_plus (fd->loops[i].v, t);
6831 else
6833 t = fold_convert (TREE_TYPE (fd->loops[i].v),
6834 fd->loops[i].step);
6835 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
6836 fd->loops[i].v, t);
6838 gsi = gsi_after_labels (bb);
6839 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
6841 bb = split_block (bb, last_stmt (bb))->dest;
6842 gsi = gsi_start_bb (bb);
6843 tree itype = TREE_TYPE (fd->loops[i].v);
6844 if (fd->loops[i].m2)
6845 t = nextn2v = create_tmp_var (itype);
6846 else
6847 t = fold_convert (itype, fd->loops[i].n2);
6848 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6849 false, GSI_CONTINUE_LINKING);
6850 tree v = fd->loops[i].v;
6851 if (DECL_P (v) && TREE_ADDRESSABLE (v))
6852 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
6853 false, GSI_CONTINUE_LINKING);
6854 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
6855 cond_stmt = gimple_build_cond_empty (t);
6856 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
6857 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
6858 expand_omp_regimplify_p, NULL, NULL)
6859 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
6860 expand_omp_regimplify_p, NULL, NULL))
6862 gsi = gsi_for_stmt (cond_stmt);
6863 gimple_regimplify_operands (cond_stmt, &gsi);
6865 ne = single_succ_edge (bb);
6866 ne->flags = EDGE_FALSE_VALUE;
6868 init_bb = create_empty_bb (bb);
6869 set_immediate_dominator (CDI_DOMINATORS, init_bb, bb);
6870 add_bb_to_loop (init_bb, bb->loop_father);
6871 e = make_edge (bb, init_bb, EDGE_TRUE_VALUE);
6872 e->probability
6873 = profile_probability::guessed_always ().apply_scale (7, 8);
6874 ne->probability = e->probability.invert ();
6876 gsi = gsi_after_labels (init_bb);
6877 t = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
6878 fd->loops[i + 1].n1);
6879 if (fd->loops[i + 1].m1)
6881 tree t2 = fold_convert (TREE_TYPE (t),
6882 fd->loops[i + 1
6883 - fd->loops[i + 1].outer].v);
6884 tree t3 = fold_convert (TREE_TYPE (t), fd->loops[i + 1].m1);
6885 t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
6886 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t, t2);
6888 expand_omp_build_assign (&gsi, fd->loops[i + 1].v, t);
6889 if (fd->loops[i + 1].m2)
6891 if (i + 2 == fd->collapse && (n2var || altv))
6893 gcc_assert (n2v == NULL_TREE);
6894 n2v = create_tmp_var (TREE_TYPE (fd->loops[i + 1].v));
6896 t = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
6897 fd->loops[i + 1].n2);
6898 tree t2 = fold_convert (TREE_TYPE (t),
6899 fd->loops[i + 1
6900 - fd->loops[i + 1].outer].v);
6901 tree t3 = fold_convert (TREE_TYPE (t), fd->loops[i + 1].m2);
6902 t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
6903 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t, t2);
6904 expand_omp_build_assign (&gsi, n2v, t);
6906 if (i + 2 == fd->collapse && n2var)
6908 /* For composite simd, n2 is the first iteration the current
6909 task shouldn't already handle, so we effectively want to use
6910 for (V3 = N31; V < N2 && V3 < N32; V++, V3 += STEP3)
6911 as the vectorized loop. Except the vectorizer will not
6912 vectorize that, so instead compute N2VAR as
6913 N2VAR = V + MIN (N2 - V, COUNTS3) and use
6914 for (V3 = N31; V < N2VAR; V++, V3 += STEP3)
6915 as the loop to vectorize. */
6916 tree t2 = fold_build2 (MINUS_EXPR, type, n2, fd->loop.v);
6917 if (fd->loops[i + 1].m1 || fd->loops[i + 1].m2)
6919 t = build_int_cst (itype, (fd->loops[i + 1].cond_code
6920 == LT_EXPR ? -1 : 1));
6921 t = fold_build2 (PLUS_EXPR, itype,
6922 fold_convert (itype,
6923 fd->loops[i + 1].step), t);
6924 if (fd->loops[i + 1].m2)
6925 t = fold_build2 (PLUS_EXPR, itype, t, n2v);
6926 else
6927 t = fold_build2 (PLUS_EXPR, itype, t,
6928 fold_convert (itype,
6929 fd->loops[i + 1].n2));
6930 t = fold_build2 (MINUS_EXPR, itype, t,
6931 fold_convert (itype, fd->loops[i + 1].v));
6932 tree step = fold_convert (itype, fd->loops[i + 1].step);
6933 if (TYPE_UNSIGNED (itype)
6934 && fd->loops[i + 1].cond_code == GT_EXPR)
6935 t = fold_build2 (TRUNC_DIV_EXPR, itype,
6936 fold_build1 (NEGATE_EXPR, itype, t),
6937 fold_build1 (NEGATE_EXPR, itype, step));
6938 else
6939 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
6940 t = fold_convert (type, t);
6942 else
6943 t = counts[i + 1];
6944 expand_omp_build_assign (&gsi, min_arg1, t2);
6945 expand_omp_build_assign (&gsi, min_arg2, t);
6946 e = split_block (init_bb, last_stmt (init_bb));
6947 gsi = gsi_after_labels (e->dest);
6948 init_bb = e->dest;
6949 remove_edge (FALLTHRU_EDGE (entry_bb));
6950 make_edge (entry_bb, init_bb, EDGE_FALLTHRU);
6951 set_immediate_dominator (CDI_DOMINATORS, init_bb, entry_bb);
6952 set_immediate_dominator (CDI_DOMINATORS, l1_bb, init_bb);
6953 t = fold_build2 (MIN_EXPR, type, min_arg1, min_arg2);
6954 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, t);
6955 expand_omp_build_assign (&gsi, n2var, t);
6957 if (i + 2 == fd->collapse && altv)
6959 /* The vectorizer currently punts on loops with non-constant
6960 steps for the main IV (can't compute number of iterations
6961 and gives up because of that). As for OpenMP loops it is
6962 always possible to compute the number of iterations upfront,
6963 use an alternate IV as the loop iterator. */
6964 expand_omp_build_assign (&gsi, altv,
6965 build_zero_cst (TREE_TYPE (altv)));
6966 tree itype = TREE_TYPE (fd->loops[i + 1].v);
6967 if (POINTER_TYPE_P (itype))
6968 itype = signed_type_for (itype);
6969 t = build_int_cst (itype, (fd->loops[i + 1].cond_code == LT_EXPR
6970 ? -1 : 1));
6971 t = fold_build2 (PLUS_EXPR, itype,
6972 fold_convert (itype, fd->loops[i + 1].step), t);
6973 t = fold_build2 (PLUS_EXPR, itype, t,
6974 fold_convert (itype,
6975 fd->loops[i + 1].m2
6976 ? n2v : fd->loops[i + 1].n2));
6977 t = fold_build2 (MINUS_EXPR, itype, t,
6978 fold_convert (itype, fd->loops[i + 1].v));
6979 tree step = fold_convert (itype, fd->loops[i + 1].step);
6980 if (TYPE_UNSIGNED (itype)
6981 && fd->loops[i + 1].cond_code == GT_EXPR)
6982 t = fold_build2 (TRUNC_DIV_EXPR, itype,
6983 fold_build1 (NEGATE_EXPR, itype, t),
6984 fold_build1 (NEGATE_EXPR, itype, step));
6985 else
6986 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
6987 t = fold_convert (TREE_TYPE (altv), t);
6988 expand_omp_build_assign (&gsi, altn2, t);
6989 tree t2 = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
6990 fd->loops[i + 1].m2
6991 ? n2v : fd->loops[i + 1].n2);
6992 t2 = force_gimple_operand_gsi (&gsi, t2, true, NULL_TREE,
6993 true, GSI_SAME_STMT);
6994 t2 = fold_build2 (fd->loops[i + 1].cond_code, boolean_type_node,
6995 fd->loops[i + 1].v, t2);
6996 gassign *g
6997 = gimple_build_assign (altn2, COND_EXPR, t2, altn2,
6998 build_zero_cst (TREE_TYPE (altv)));
6999 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
7001 n2v = nextn2v;
7003 make_edge (init_bb, last_bb, EDGE_FALLTHRU);
7004 if (!gimple_omp_for_combined_into_p (fd->for_stmt))
7006 e = find_edge (entry_bb, last_bb);
7007 redirect_edge_succ (e, bb);
7008 set_immediate_dominator (CDI_DOMINATORS, bb, entry_bb);
7009 set_immediate_dominator (CDI_DOMINATORS, last_bb, init_bb);
7012 last_bb = bb;
7015 if (!broken_loop)
7017 class loop *loop = alloc_loop ();
7018 loop->header = l1_bb;
7019 loop->latch = cont_bb;
7020 add_loop (loop, l1_bb->loop_father);
7021 loop->safelen = safelen_int;
7022 if (simduid)
7024 loop->simduid = OMP_CLAUSE__SIMDUID__DECL (simduid);
7025 cfun->has_simduid_loops = true;
7027 /* If not -fno-tree-loop-vectorize, hint that we want to vectorize
7028 the loop. */
7029 if ((flag_tree_loop_vectorize
7030 || !OPTION_SET_P (flag_tree_loop_vectorize))
7031 && flag_tree_loop_optimize
7032 && loop->safelen > 1)
7034 loop->force_vectorize = true;
7035 if (simdlen && tree_fits_uhwi_p (OMP_CLAUSE_SIMDLEN_EXPR (simdlen)))
7037 unsigned HOST_WIDE_INT v
7038 = tree_to_uhwi (OMP_CLAUSE_SIMDLEN_EXPR (simdlen));
7039 if (v < INT_MAX && v <= (unsigned HOST_WIDE_INT) loop->safelen)
7040 loop->simdlen = v;
7042 cfun->has_force_vectorize_loops = true;
7044 else if (dont_vectorize)
7045 loop->dont_vectorize = true;
7047 else if (simduid)
7048 cfun->has_simduid_loops = true;
7051 /* Taskloop construct is represented after gimplification with
7052 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
7053 in between them. This routine expands the outer GIMPLE_OMP_FOR,
7054 which should just compute all the needed loop temporaries
7055 for GIMPLE_OMP_TASK. */
7057 static void
7058 expand_omp_taskloop_for_outer (struct omp_region *region,
7059 struct omp_for_data *fd,
7060 gimple *inner_stmt)
7062 tree type, bias = NULL_TREE;
7063 basic_block entry_bb, cont_bb, exit_bb;
7064 gimple_stmt_iterator gsi;
7065 gassign *assign_stmt;
7066 tree *counts = NULL;
7067 int i;
7069 gcc_assert (inner_stmt);
7070 gcc_assert (region->cont);
7071 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_TASK
7072 && gimple_omp_task_taskloop_p (inner_stmt));
7073 type = TREE_TYPE (fd->loop.v);
7075 /* See if we need to bias by LLONG_MIN. */
7076 if (fd->iter_type == long_long_unsigned_type_node
7077 && TREE_CODE (type) == INTEGER_TYPE
7078 && !TYPE_UNSIGNED (type))
7080 tree n1, n2;
7082 if (fd->loop.cond_code == LT_EXPR)
7084 n1 = fd->loop.n1;
7085 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
7087 else
7089 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
7090 n2 = fd->loop.n1;
7092 if (TREE_CODE (n1) != INTEGER_CST
7093 || TREE_CODE (n2) != INTEGER_CST
7094 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
7095 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
7098 entry_bb = region->entry;
7099 cont_bb = region->cont;
7100 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
7101 gcc_assert (BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
7102 exit_bb = region->exit;
7104 gsi = gsi_last_nondebug_bb (entry_bb);
7105 gimple *for_stmt = gsi_stmt (gsi);
7106 gcc_assert (gimple_code (for_stmt) == GIMPLE_OMP_FOR);
7107 if (fd->collapse > 1)
7109 int first_zero_iter = -1, dummy = -1;
7110 basic_block zero_iter_bb = NULL, dummy_bb = NULL, l2_dom_bb = NULL;
7112 counts = XALLOCAVEC (tree, fd->collapse);
7113 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
7114 zero_iter_bb, first_zero_iter,
7115 dummy_bb, dummy, l2_dom_bb);
7117 if (zero_iter_bb)
7119 /* Some counts[i] vars might be uninitialized if
7120 some loop has zero iterations. But the body shouldn't
7121 be executed in that case, so just avoid uninit warnings. */
7122 for (i = first_zero_iter; i < fd->collapse; i++)
7123 if (SSA_VAR_P (counts[i]))
7124 suppress_warning (counts[i], OPT_Wuninitialized);
7125 gsi_prev (&gsi);
7126 edge e = split_block (entry_bb, gsi_stmt (gsi));
7127 entry_bb = e->dest;
7128 make_edge (zero_iter_bb, entry_bb, EDGE_FALLTHRU);
7129 gsi = gsi_last_bb (entry_bb);
7130 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
7131 get_immediate_dominator (CDI_DOMINATORS,
7132 zero_iter_bb));
7136 tree t0, t1;
7137 t1 = fd->loop.n2;
7138 t0 = fd->loop.n1;
7139 if (POINTER_TYPE_P (TREE_TYPE (t0))
7140 && TYPE_PRECISION (TREE_TYPE (t0))
7141 != TYPE_PRECISION (fd->iter_type))
7143 /* Avoid casting pointers to integer of a different size. */
7144 tree itype = signed_type_for (type);
7145 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
7146 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
7148 else
7150 t1 = fold_convert (fd->iter_type, t1);
7151 t0 = fold_convert (fd->iter_type, t0);
7153 if (bias)
7155 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
7156 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
7159 tree innerc = omp_find_clause (gimple_omp_task_clauses (inner_stmt),
7160 OMP_CLAUSE__LOOPTEMP_);
7161 gcc_assert (innerc);
7162 tree startvar = OMP_CLAUSE_DECL (innerc);
7163 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
7164 gcc_assert (innerc);
7165 tree endvar = OMP_CLAUSE_DECL (innerc);
7166 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
7168 innerc = find_lastprivate_looptemp (fd, innerc);
7169 if (innerc)
7171 /* If needed (inner taskloop has lastprivate clause), propagate
7172 down the total number of iterations. */
7173 tree t = force_gimple_operand_gsi (&gsi, fd->loop.n2, false,
7174 NULL_TREE, false,
7175 GSI_CONTINUE_LINKING);
7176 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
7177 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7181 t0 = force_gimple_operand_gsi (&gsi, t0, false, NULL_TREE, false,
7182 GSI_CONTINUE_LINKING);
7183 assign_stmt = gimple_build_assign (startvar, t0);
7184 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7186 t1 = force_gimple_operand_gsi (&gsi, t1, false, NULL_TREE, false,
7187 GSI_CONTINUE_LINKING);
7188 assign_stmt = gimple_build_assign (endvar, t1);
7189 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7190 if (fd->collapse > 1)
7191 expand_omp_for_init_vars (fd, &gsi, counts, NULL, inner_stmt, startvar);
7193 /* Remove the GIMPLE_OMP_FOR statement. */
7194 gsi = gsi_for_stmt (for_stmt);
7195 gsi_remove (&gsi, true);
7197 gsi = gsi_last_nondebug_bb (cont_bb);
7198 gsi_remove (&gsi, true);
7200 gsi = gsi_last_nondebug_bb (exit_bb);
7201 gsi_remove (&gsi, true);
7203 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
7204 remove_edge (BRANCH_EDGE (entry_bb));
7205 FALLTHRU_EDGE (cont_bb)->probability = profile_probability::always ();
7206 remove_edge (BRANCH_EDGE (cont_bb));
7207 set_immediate_dominator (CDI_DOMINATORS, exit_bb, cont_bb);
7208 set_immediate_dominator (CDI_DOMINATORS, region->entry,
7209 recompute_dominator (CDI_DOMINATORS, region->entry));
7212 /* Taskloop construct is represented after gimplification with
7213 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
7214 in between them. This routine expands the inner GIMPLE_OMP_FOR.
7215 GOMP_taskloop{,_ull} function arranges for each task to be given just
7216 a single range of iterations. */
7218 static void
7219 expand_omp_taskloop_for_inner (struct omp_region *region,
7220 struct omp_for_data *fd,
7221 gimple *inner_stmt)
7223 tree e, t, type, itype, vmain, vback, bias = NULL_TREE;
7224 basic_block entry_bb, exit_bb, body_bb, cont_bb, collapse_bb = NULL;
7225 basic_block fin_bb;
7226 gimple_stmt_iterator gsi;
7227 edge ep;
7228 bool broken_loop = region->cont == NULL;
7229 tree *counts = NULL;
7230 tree n1, n2, step;
7232 itype = type = TREE_TYPE (fd->loop.v);
7233 if (POINTER_TYPE_P (type))
7234 itype = signed_type_for (type);
7236 /* See if we need to bias by LLONG_MIN. */
7237 if (fd->iter_type == long_long_unsigned_type_node
7238 && TREE_CODE (type) == INTEGER_TYPE
7239 && !TYPE_UNSIGNED (type))
7241 tree n1, n2;
7243 if (fd->loop.cond_code == LT_EXPR)
7245 n1 = fd->loop.n1;
7246 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
7248 else
7250 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
7251 n2 = fd->loop.n1;
7253 if (TREE_CODE (n1) != INTEGER_CST
7254 || TREE_CODE (n2) != INTEGER_CST
7255 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
7256 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
7259 entry_bb = region->entry;
7260 cont_bb = region->cont;
7261 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
7262 fin_bb = BRANCH_EDGE (entry_bb)->dest;
7263 gcc_assert (broken_loop
7264 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
7265 body_bb = FALLTHRU_EDGE (entry_bb)->dest;
7266 if (!broken_loop)
7268 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb);
7269 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
7271 exit_bb = region->exit;
7273 /* Iteration space partitioning goes in ENTRY_BB. */
7274 gsi = gsi_last_nondebug_bb (entry_bb);
7275 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
7277 if (fd->collapse > 1)
7279 int first_zero_iter = -1, dummy = -1;
7280 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
7282 counts = XALLOCAVEC (tree, fd->collapse);
7283 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
7284 fin_bb, first_zero_iter,
7285 dummy_bb, dummy, l2_dom_bb);
7286 t = NULL_TREE;
7288 else
7289 t = integer_one_node;
7291 step = fd->loop.step;
7292 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
7293 OMP_CLAUSE__LOOPTEMP_);
7294 gcc_assert (innerc);
7295 n1 = OMP_CLAUSE_DECL (innerc);
7296 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
7297 gcc_assert (innerc);
7298 n2 = OMP_CLAUSE_DECL (innerc);
7299 if (bias)
7301 n1 = fold_build2 (PLUS_EXPR, fd->iter_type, n1, bias);
7302 n2 = fold_build2 (PLUS_EXPR, fd->iter_type, n2, bias);
7304 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
7305 true, NULL_TREE, true, GSI_SAME_STMT);
7306 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
7307 true, NULL_TREE, true, GSI_SAME_STMT);
7308 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
7309 true, NULL_TREE, true, GSI_SAME_STMT);
7311 tree startvar = fd->loop.v;
7312 tree endvar = NULL_TREE;
7314 if (gimple_omp_for_combined_p (fd->for_stmt))
7316 tree clauses = gimple_omp_for_clauses (inner_stmt);
7317 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
7318 gcc_assert (innerc);
7319 startvar = OMP_CLAUSE_DECL (innerc);
7320 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
7321 OMP_CLAUSE__LOOPTEMP_);
7322 gcc_assert (innerc);
7323 endvar = OMP_CLAUSE_DECL (innerc);
7325 t = fold_convert (TREE_TYPE (startvar), n1);
7326 t = force_gimple_operand_gsi (&gsi, t,
7327 DECL_P (startvar)
7328 && TREE_ADDRESSABLE (startvar),
7329 NULL_TREE, false, GSI_CONTINUE_LINKING);
7330 gimple *assign_stmt = gimple_build_assign (startvar, t);
7331 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7333 t = fold_convert (TREE_TYPE (startvar), n2);
7334 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
7335 false, GSI_CONTINUE_LINKING);
7336 if (endvar)
7338 assign_stmt = gimple_build_assign (endvar, e);
7339 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7340 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
7341 assign_stmt = gimple_build_assign (fd->loop.v, e);
7342 else
7343 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
7344 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7347 tree *nonrect_bounds = NULL;
7348 if (fd->collapse > 1)
7350 if (fd->non_rect)
7352 nonrect_bounds = XALLOCAVEC (tree, fd->last_nonrect + 1);
7353 memset (nonrect_bounds, 0, sizeof (tree) * (fd->last_nonrect + 1));
7355 gcc_assert (gsi_bb (gsi) == entry_bb);
7356 expand_omp_for_init_vars (fd, &gsi, counts, nonrect_bounds, inner_stmt,
7357 startvar);
7358 entry_bb = gsi_bb (gsi);
7361 if (!broken_loop)
7363 /* The code controlling the sequential loop replaces the
7364 GIMPLE_OMP_CONTINUE. */
7365 gsi = gsi_last_nondebug_bb (cont_bb);
7366 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
7367 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
7368 vmain = gimple_omp_continue_control_use (cont_stmt);
7369 vback = gimple_omp_continue_control_def (cont_stmt);
7371 if (!gimple_omp_for_combined_p (fd->for_stmt))
7373 if (POINTER_TYPE_P (type))
7374 t = fold_build_pointer_plus (vmain, step);
7375 else
7376 t = fold_build2 (PLUS_EXPR, type, vmain, step);
7377 t = force_gimple_operand_gsi (&gsi, t,
7378 DECL_P (vback)
7379 && TREE_ADDRESSABLE (vback),
7380 NULL_TREE, true, GSI_SAME_STMT);
7381 assign_stmt = gimple_build_assign (vback, t);
7382 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
7384 t = build2 (fd->loop.cond_code, boolean_type_node,
7385 DECL_P (vback) && TREE_ADDRESSABLE (vback)
7386 ? t : vback, e);
7387 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
7390 /* Remove the GIMPLE_OMP_CONTINUE statement. */
7391 gsi_remove (&gsi, true);
7393 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
7394 collapse_bb = extract_omp_for_update_vars (fd, nonrect_bounds,
7395 cont_bb, body_bb);
7398 /* Remove the GIMPLE_OMP_FOR statement. */
7399 gsi = gsi_for_stmt (fd->for_stmt);
7400 gsi_remove (&gsi, true);
7402 /* Remove the GIMPLE_OMP_RETURN statement. */
7403 gsi = gsi_last_nondebug_bb (exit_bb);
7404 gsi_remove (&gsi, true);
7406 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
7407 if (!broken_loop)
7408 remove_edge (BRANCH_EDGE (entry_bb));
7409 else
7411 remove_edge_and_dominated_blocks (BRANCH_EDGE (entry_bb));
7412 region->outer->cont = NULL;
7415 /* Connect all the blocks. */
7416 if (!broken_loop)
7418 ep = find_edge (cont_bb, body_bb);
7419 if (gimple_omp_for_combined_p (fd->for_stmt))
7421 remove_edge (ep);
7422 ep = NULL;
7424 else if (fd->collapse > 1)
7426 remove_edge (ep);
7427 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
7429 else
7430 ep->flags = EDGE_TRUE_VALUE;
7431 find_edge (cont_bb, fin_bb)->flags
7432 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
7435 set_immediate_dominator (CDI_DOMINATORS, body_bb,
7436 recompute_dominator (CDI_DOMINATORS, body_bb));
7437 if (!broken_loop)
7438 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
7439 recompute_dominator (CDI_DOMINATORS, fin_bb));
7441 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
7443 class loop *loop = alloc_loop ();
7444 loop->header = body_bb;
7445 if (collapse_bb == NULL)
7446 loop->latch = cont_bb;
7447 add_loop (loop, body_bb->loop_father);
7451 /* A subroutine of expand_omp_for. Generate code for an OpenACC
7452 partitioned loop. The lowering here is abstracted, in that the
7453 loop parameters are passed through internal functions, which are
7454 further lowered by oacc_device_lower, once we get to the target
7455 compiler. The loop is of the form:
7457 for (V = B; V LTGT E; V += S) {BODY}
7459 where LTGT is < or >. We may have a specified chunking size, CHUNKING
7460 (constant 0 for no chunking) and we will have a GWV partitioning
7461 mask, specifying dimensions over which the loop is to be
7462 partitioned (see note below). We generate code that looks like
7463 (this ignores tiling):
7465 <entry_bb> [incoming FALL->body, BRANCH->exit]
7466 typedef signedintify (typeof (V)) T; // underlying signed integral type
7467 T range = E - B;
7468 T chunk_no = 0;
7469 T DIR = LTGT == '<' ? +1 : -1;
7470 T chunk_max = GOACC_LOOP_CHUNK (dir, range, S, CHUNK_SIZE, GWV);
7471 T step = GOACC_LOOP_STEP (dir, range, S, CHUNK_SIZE, GWV);
7473 <head_bb> [created by splitting end of entry_bb]
7474 T offset = GOACC_LOOP_OFFSET (dir, range, S, CHUNK_SIZE, GWV, chunk_no);
7475 T bound = GOACC_LOOP_BOUND (dir, range, S, CHUNK_SIZE, GWV, offset);
7476 if (!(offset LTGT bound)) goto bottom_bb;
7478 <body_bb> [incoming]
7479 V = B + offset;
7480 {BODY}
7482 <cont_bb> [incoming, may == body_bb FALL->exit_bb, BRANCH->body_bb]
7483 offset += step;
7484 if (offset LTGT bound) goto body_bb; [*]
7486 <bottom_bb> [created by splitting start of exit_bb] insert BRANCH->head_bb
7487 chunk_no++;
7488 if (chunk < chunk_max) goto head_bb;
7490 <exit_bb> [incoming]
7491 V = B + ((range -/+ 1) / S +/- 1) * S [*]
7493 [*] Needed if V live at end of loop. */
7495 static void
7496 expand_oacc_for (struct omp_region *region, struct omp_for_data *fd)
7498 bool is_oacc_kernels_parallelized
7499 = (lookup_attribute ("oacc kernels parallelized",
7500 DECL_ATTRIBUTES (current_function_decl)) != NULL);
7502 bool is_oacc_kernels
7503 = (lookup_attribute ("oacc kernels",
7504 DECL_ATTRIBUTES (current_function_decl)) != NULL);
7505 if (is_oacc_kernels_parallelized)
7506 gcc_checking_assert (is_oacc_kernels);
7508 gcc_assert (gimple_in_ssa_p (cfun) == is_oacc_kernels_parallelized);
7509 /* In the following, some of the 'gimple_in_ssa_p (cfun)' conditionals are
7510 for SSA specifics, and some are for 'parloops' OpenACC
7511 'kernels'-parallelized specifics. */
7513 tree v = fd->loop.v;
7514 enum tree_code cond_code = fd->loop.cond_code;
7515 enum tree_code plus_code = PLUS_EXPR;
7517 tree chunk_size = integer_minus_one_node;
7518 tree gwv = integer_zero_node;
7519 tree iter_type = TREE_TYPE (v);
7520 tree diff_type = iter_type;
7521 tree plus_type = iter_type;
7522 struct oacc_collapse *counts = NULL;
7524 gcc_checking_assert (gimple_omp_for_kind (fd->for_stmt)
7525 == GF_OMP_FOR_KIND_OACC_LOOP);
7526 gcc_assert (!gimple_omp_for_combined_into_p (fd->for_stmt));
7527 gcc_assert (cond_code == LT_EXPR || cond_code == GT_EXPR);
7529 if (POINTER_TYPE_P (iter_type))
7531 plus_code = POINTER_PLUS_EXPR;
7532 plus_type = sizetype;
7534 for (int ix = fd->collapse; ix--;)
7536 tree diff_type2 = TREE_TYPE (fd->loops[ix].step);
7537 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (diff_type2))
7538 diff_type = diff_type2;
7540 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
7541 diff_type = signed_type_for (diff_type);
7542 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node))
7543 diff_type = integer_type_node;
7545 basic_block entry_bb = region->entry; /* BB ending in OMP_FOR */
7546 basic_block exit_bb = region->exit; /* BB ending in OMP_RETURN */
7547 basic_block cont_bb = region->cont; /* BB ending in OMP_CONTINUE */
7548 basic_block bottom_bb = NULL;
7550 /* entry_bb has two successors; the branch edge is to the exit
7551 block, fallthrough edge to body. */
7552 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2
7553 && BRANCH_EDGE (entry_bb)->dest == exit_bb);
7555 /* If cont_bb non-NULL, it has 2 successors. The branch successor is
7556 body_bb, or to a block whose only successor is the body_bb. Its
7557 fallthrough successor is the final block (same as the branch
7558 successor of the entry_bb). */
7559 if (cont_bb)
7561 basic_block body_bb = FALLTHRU_EDGE (entry_bb)->dest;
7562 basic_block bed = BRANCH_EDGE (cont_bb)->dest;
7564 gcc_assert (FALLTHRU_EDGE (cont_bb)->dest == exit_bb);
7565 gcc_assert (bed == body_bb || single_succ_edge (bed)->dest == body_bb);
7567 else
7568 gcc_assert (!gimple_in_ssa_p (cfun));
7570 /* The exit block only has entry_bb and cont_bb as predecessors. */
7571 gcc_assert (EDGE_COUNT (exit_bb->preds) == 1 + (cont_bb != NULL));
7573 tree chunk_no;
7574 tree chunk_max = NULL_TREE;
7575 tree bound, offset;
7576 tree step = create_tmp_var (diff_type, ".step");
7577 bool up = cond_code == LT_EXPR;
7578 tree dir = build_int_cst (diff_type, up ? +1 : -1);
7579 bool chunking = !gimple_in_ssa_p (cfun);
7580 bool negating;
7582 /* Tiling vars. */
7583 tree tile_size = NULL_TREE;
7584 tree element_s = NULL_TREE;
7585 tree e_bound = NULL_TREE, e_offset = NULL_TREE, e_step = NULL_TREE;
7586 basic_block elem_body_bb = NULL;
7587 basic_block elem_cont_bb = NULL;
7589 /* SSA instances. */
7590 tree offset_incr = NULL_TREE;
7591 tree offset_init = NULL_TREE;
7593 gimple_stmt_iterator gsi;
7594 gassign *ass;
7595 gcall *call;
7596 gimple *stmt;
7597 tree expr;
7598 location_t loc;
7599 edge split, be, fte;
7601 /* Split the end of entry_bb to create head_bb. */
7602 split = split_block (entry_bb, last_stmt (entry_bb));
7603 basic_block head_bb = split->dest;
7604 entry_bb = split->src;
7606 /* Chunk setup goes at end of entry_bb, replacing the omp_for. */
7607 gsi = gsi_last_nondebug_bb (entry_bb);
7608 gomp_for *for_stmt = as_a <gomp_for *> (gsi_stmt (gsi));
7609 loc = gimple_location (for_stmt);
7611 if (gimple_in_ssa_p (cfun))
7613 offset_init = gimple_omp_for_index (for_stmt, 0);
7614 gcc_assert (integer_zerop (fd->loop.n1));
7615 /* The SSA parallelizer does gang parallelism. */
7616 gwv = build_int_cst (integer_type_node, GOMP_DIM_MASK (GOMP_DIM_GANG));
7619 if (fd->collapse > 1 || fd->tiling)
7621 gcc_assert (!gimple_in_ssa_p (cfun) && up);
7622 counts = XALLOCAVEC (struct oacc_collapse, fd->collapse);
7623 tree total = expand_oacc_collapse_init (fd, &gsi, counts, diff_type,
7624 TREE_TYPE (fd->loop.n2), loc);
7626 if (SSA_VAR_P (fd->loop.n2))
7628 total = force_gimple_operand_gsi (&gsi, total, false, NULL_TREE,
7629 true, GSI_SAME_STMT);
7630 ass = gimple_build_assign (fd->loop.n2, total);
7631 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7635 tree b = fd->loop.n1;
7636 tree e = fd->loop.n2;
7637 tree s = fd->loop.step;
7639 b = force_gimple_operand_gsi (&gsi, b, true, NULL_TREE, true, GSI_SAME_STMT);
7640 e = force_gimple_operand_gsi (&gsi, e, true, NULL_TREE, true, GSI_SAME_STMT);
7642 /* Convert the step, avoiding possible unsigned->signed overflow. */
7643 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
7644 if (negating)
7645 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
7646 s = fold_convert (diff_type, s);
7647 if (negating)
7648 s = fold_build1 (NEGATE_EXPR, diff_type, s);
7649 s = force_gimple_operand_gsi (&gsi, s, true, NULL_TREE, true, GSI_SAME_STMT);
7651 if (!chunking)
7652 chunk_size = integer_zero_node;
7653 expr = fold_convert (diff_type, chunk_size);
7654 chunk_size = force_gimple_operand_gsi (&gsi, expr, true,
7655 NULL_TREE, true, GSI_SAME_STMT);
7657 if (fd->tiling)
7659 /* Determine the tile size and element step,
7660 modify the outer loop step size. */
7661 tile_size = create_tmp_var (diff_type, ".tile_size");
7662 expr = build_int_cst (diff_type, 1);
7663 for (int ix = 0; ix < fd->collapse; ix++)
7664 expr = fold_build2 (MULT_EXPR, diff_type, counts[ix].tile, expr);
7665 expr = force_gimple_operand_gsi (&gsi, expr, true,
7666 NULL_TREE, true, GSI_SAME_STMT);
7667 ass = gimple_build_assign (tile_size, expr);
7668 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7670 element_s = create_tmp_var (diff_type, ".element_s");
7671 ass = gimple_build_assign (element_s, s);
7672 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7674 expr = fold_build2 (MULT_EXPR, diff_type, s, tile_size);
7675 s = force_gimple_operand_gsi (&gsi, expr, true,
7676 NULL_TREE, true, GSI_SAME_STMT);
7679 /* Determine the range, avoiding possible unsigned->signed overflow. */
7680 negating = !up && TYPE_UNSIGNED (iter_type);
7681 expr = fold_build2 (MINUS_EXPR, plus_type,
7682 fold_convert (plus_type, negating ? b : e),
7683 fold_convert (plus_type, negating ? e : b));
7684 expr = fold_convert (diff_type, expr);
7685 if (negating)
7686 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
7687 tree range = force_gimple_operand_gsi (&gsi, expr, true,
7688 NULL_TREE, true, GSI_SAME_STMT);
7690 chunk_no = build_int_cst (diff_type, 0);
7691 if (chunking)
7693 gcc_assert (!gimple_in_ssa_p (cfun));
7695 expr = chunk_no;
7696 chunk_max = create_tmp_var (diff_type, ".chunk_max");
7697 chunk_no = create_tmp_var (diff_type, ".chunk_no");
7699 ass = gimple_build_assign (chunk_no, expr);
7700 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7702 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
7703 build_int_cst (integer_type_node,
7704 IFN_GOACC_LOOP_CHUNKS),
7705 dir, range, s, chunk_size, gwv);
7706 gimple_call_set_lhs (call, chunk_max);
7707 gimple_set_location (call, loc);
7708 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7710 else
7711 chunk_size = chunk_no;
7713 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
7714 build_int_cst (integer_type_node,
7715 IFN_GOACC_LOOP_STEP),
7716 dir, range, s, chunk_size, gwv);
7717 gimple_call_set_lhs (call, step);
7718 gimple_set_location (call, loc);
7719 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7721 /* Remove the GIMPLE_OMP_FOR. */
7722 gsi_remove (&gsi, true);
7724 /* Fixup edges from head_bb. */
7725 be = BRANCH_EDGE (head_bb);
7726 fte = FALLTHRU_EDGE (head_bb);
7727 be->flags |= EDGE_FALSE_VALUE;
7728 fte->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
7730 basic_block body_bb = fte->dest;
7732 if (gimple_in_ssa_p (cfun))
7734 gsi = gsi_last_nondebug_bb (cont_bb);
7735 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
7737 offset = gimple_omp_continue_control_use (cont_stmt);
7738 offset_incr = gimple_omp_continue_control_def (cont_stmt);
7740 else
7742 offset = create_tmp_var (diff_type, ".offset");
7743 offset_init = offset_incr = offset;
7745 bound = create_tmp_var (TREE_TYPE (offset), ".bound");
7747 /* Loop offset & bound go into head_bb. */
7748 gsi = gsi_start_bb (head_bb);
7750 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
7751 build_int_cst (integer_type_node,
7752 IFN_GOACC_LOOP_OFFSET),
7753 dir, range, s,
7754 chunk_size, gwv, chunk_no);
7755 gimple_call_set_lhs (call, offset_init);
7756 gimple_set_location (call, loc);
7757 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
7759 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
7760 build_int_cst (integer_type_node,
7761 IFN_GOACC_LOOP_BOUND),
7762 dir, range, s,
7763 chunk_size, gwv, offset_init);
7764 gimple_call_set_lhs (call, bound);
7765 gimple_set_location (call, loc);
7766 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
7768 expr = build2 (cond_code, boolean_type_node, offset_init, bound);
7769 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
7770 GSI_CONTINUE_LINKING);
7772 /* V assignment goes into body_bb. */
7773 if (!gimple_in_ssa_p (cfun))
7775 gsi = gsi_start_bb (body_bb);
7777 expr = build2 (plus_code, iter_type, b,
7778 fold_convert (plus_type, offset));
7779 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7780 true, GSI_SAME_STMT);
7781 ass = gimple_build_assign (v, expr);
7782 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7784 if (fd->collapse > 1 || fd->tiling)
7785 expand_oacc_collapse_vars (fd, false, &gsi, counts, v, diff_type);
7787 if (fd->tiling)
7789 /* Determine the range of the element loop -- usually simply
7790 the tile_size, but could be smaller if the final
7791 iteration of the outer loop is a partial tile. */
7792 tree e_range = create_tmp_var (diff_type, ".e_range");
7794 expr = build2 (MIN_EXPR, diff_type,
7795 build2 (MINUS_EXPR, diff_type, bound, offset),
7796 build2 (MULT_EXPR, diff_type, tile_size,
7797 element_s));
7798 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7799 true, GSI_SAME_STMT);
7800 ass = gimple_build_assign (e_range, expr);
7801 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7803 /* Determine bound, offset & step of inner loop. */
7804 e_bound = create_tmp_var (diff_type, ".e_bound");
7805 e_offset = create_tmp_var (diff_type, ".e_offset");
7806 e_step = create_tmp_var (diff_type, ".e_step");
7808 /* Mark these as element loops. */
7809 tree t, e_gwv = integer_minus_one_node;
7810 tree chunk = build_int_cst (diff_type, 0); /* Never chunked. */
7812 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_OFFSET);
7813 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
7814 element_s, chunk, e_gwv, chunk);
7815 gimple_call_set_lhs (call, e_offset);
7816 gimple_set_location (call, loc);
7817 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7819 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_BOUND);
7820 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
7821 element_s, chunk, e_gwv, e_offset);
7822 gimple_call_set_lhs (call, e_bound);
7823 gimple_set_location (call, loc);
7824 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7826 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_STEP);
7827 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6, t, dir, e_range,
7828 element_s, chunk, e_gwv);
7829 gimple_call_set_lhs (call, e_step);
7830 gimple_set_location (call, loc);
7831 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7833 /* Add test and split block. */
7834 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
7835 stmt = gimple_build_cond_empty (expr);
7836 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
7837 split = split_block (body_bb, stmt);
7838 elem_body_bb = split->dest;
7839 if (cont_bb == body_bb)
7840 cont_bb = elem_body_bb;
7841 body_bb = split->src;
7843 split->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
7845 /* Add a dummy exit for the tiled block when cont_bb is missing. */
7846 if (cont_bb == NULL)
7848 edge e = make_edge (body_bb, exit_bb, EDGE_FALSE_VALUE);
7849 e->probability = profile_probability::even ();
7850 split->probability = profile_probability::even ();
7853 /* Initialize the user's loop vars. */
7854 gsi = gsi_start_bb (elem_body_bb);
7855 expand_oacc_collapse_vars (fd, true, &gsi, counts, e_offset,
7856 diff_type);
7860 /* Loop increment goes into cont_bb. If this is not a loop, we
7861 will have spawned threads as if it was, and each one will
7862 execute one iteration. The specification is not explicit about
7863 whether such constructs are ill-formed or not, and they can
7864 occur, especially when noreturn routines are involved. */
7865 if (cont_bb)
7867 gsi = gsi_last_nondebug_bb (cont_bb);
7868 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
7869 loc = gimple_location (cont_stmt);
7871 if (fd->tiling)
7873 /* Insert element loop increment and test. */
7874 expr = build2 (PLUS_EXPR, diff_type, e_offset, e_step);
7875 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7876 true, GSI_SAME_STMT);
7877 ass = gimple_build_assign (e_offset, expr);
7878 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7879 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
7881 stmt = gimple_build_cond_empty (expr);
7882 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
7883 split = split_block (cont_bb, stmt);
7884 elem_cont_bb = split->src;
7885 cont_bb = split->dest;
7887 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
7888 split->probability = profile_probability::unlikely ().guessed ();
7889 edge latch_edge
7890 = make_edge (elem_cont_bb, elem_body_bb, EDGE_TRUE_VALUE);
7891 latch_edge->probability = profile_probability::likely ().guessed ();
7893 edge skip_edge = make_edge (body_bb, cont_bb, EDGE_FALSE_VALUE);
7894 skip_edge->probability = profile_probability::unlikely ().guessed ();
7895 edge loop_entry_edge = EDGE_SUCC (body_bb, 1 - skip_edge->dest_idx);
7896 loop_entry_edge->probability
7897 = profile_probability::likely ().guessed ();
7899 gsi = gsi_for_stmt (cont_stmt);
7902 /* Increment offset. */
7903 if (gimple_in_ssa_p (cfun))
7904 expr = build2 (plus_code, iter_type, offset,
7905 fold_convert (plus_type, step));
7906 else
7907 expr = build2 (PLUS_EXPR, diff_type, offset, step);
7908 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7909 true, GSI_SAME_STMT);
7910 ass = gimple_build_assign (offset_incr, expr);
7911 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7912 expr = build2 (cond_code, boolean_type_node, offset_incr, bound);
7913 gsi_insert_before (&gsi, gimple_build_cond_empty (expr), GSI_SAME_STMT);
7915 /* Remove the GIMPLE_OMP_CONTINUE. */
7916 gsi_remove (&gsi, true);
7918 /* Fixup edges from cont_bb. */
7919 be = BRANCH_EDGE (cont_bb);
7920 fte = FALLTHRU_EDGE (cont_bb);
7921 be->flags |= EDGE_TRUE_VALUE;
7922 fte->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
7924 if (chunking)
7926 /* Split the beginning of exit_bb to make bottom_bb. We
7927 need to insert a nop at the start, because splitting is
7928 after a stmt, not before. */
7929 gsi = gsi_start_bb (exit_bb);
7930 stmt = gimple_build_nop ();
7931 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
7932 split = split_block (exit_bb, stmt);
7933 bottom_bb = split->src;
7934 exit_bb = split->dest;
7935 gsi = gsi_last_bb (bottom_bb);
7937 /* Chunk increment and test goes into bottom_bb. */
7938 expr = build2 (PLUS_EXPR, diff_type, chunk_no,
7939 build_int_cst (diff_type, 1));
7940 ass = gimple_build_assign (chunk_no, expr);
7941 gsi_insert_after (&gsi, ass, GSI_CONTINUE_LINKING);
7943 /* Chunk test at end of bottom_bb. */
7944 expr = build2 (LT_EXPR, boolean_type_node, chunk_no, chunk_max);
7945 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
7946 GSI_CONTINUE_LINKING);
7948 /* Fixup edges from bottom_bb. */
7949 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
7950 split->probability = profile_probability::unlikely ().guessed ();
7951 edge latch_edge = make_edge (bottom_bb, head_bb, EDGE_TRUE_VALUE);
7952 latch_edge->probability = profile_probability::likely ().guessed ();
7956 gsi = gsi_last_nondebug_bb (exit_bb);
7957 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7958 loc = gimple_location (gsi_stmt (gsi));
7960 if (!gimple_in_ssa_p (cfun))
7962 /* Insert the final value of V, in case it is live. This is the
7963 value for the only thread that survives past the join. */
7964 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
7965 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
7966 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
7967 expr = fold_build2 (MULT_EXPR, diff_type, expr, s);
7968 expr = build2 (plus_code, iter_type, b, fold_convert (plus_type, expr));
7969 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7970 true, GSI_SAME_STMT);
7971 ass = gimple_build_assign (v, expr);
7972 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7975 /* Remove the OMP_RETURN. */
7976 gsi_remove (&gsi, true);
7978 if (cont_bb)
7980 /* We now have one, two or three nested loops. Update the loop
7981 structures. */
7982 class loop *parent = entry_bb->loop_father;
7983 class loop *body = body_bb->loop_father;
7985 if (chunking)
7987 class loop *chunk_loop = alloc_loop ();
7988 chunk_loop->header = head_bb;
7989 chunk_loop->latch = bottom_bb;
7990 add_loop (chunk_loop, parent);
7991 parent = chunk_loop;
7993 else if (parent != body)
7995 gcc_assert (body->header == body_bb);
7996 gcc_assert (body->latch == cont_bb
7997 || single_pred (body->latch) == cont_bb);
7998 parent = NULL;
8001 if (parent)
8003 class loop *body_loop = alloc_loop ();
8004 body_loop->header = body_bb;
8005 body_loop->latch = cont_bb;
8006 add_loop (body_loop, parent);
8008 if (fd->tiling)
8010 /* Insert tiling's element loop. */
8011 class loop *inner_loop = alloc_loop ();
8012 inner_loop->header = elem_body_bb;
8013 inner_loop->latch = elem_cont_bb;
8014 add_loop (inner_loop, body_loop);
8020 /* Expand the OMP loop defined by REGION. */
8022 static void
8023 expand_omp_for (struct omp_region *region, gimple *inner_stmt)
8025 struct omp_for_data fd;
8026 struct omp_for_data_loop *loops;
8028 loops = XALLOCAVEC (struct omp_for_data_loop,
8029 gimple_omp_for_collapse (last_stmt (region->entry)));
8030 omp_extract_for_data (as_a <gomp_for *> (last_stmt (region->entry)),
8031 &fd, loops);
8032 region->sched_kind = fd.sched_kind;
8033 region->sched_modifiers = fd.sched_modifiers;
8034 region->has_lastprivate_conditional = fd.lastprivate_conditional != 0;
8035 if (fd.non_rect && !gimple_omp_for_combined_into_p (fd.for_stmt))
8037 for (int i = fd.first_nonrect; i <= fd.last_nonrect; i++)
8038 if ((loops[i].m1 || loops[i].m2)
8039 && (loops[i].m1 == NULL_TREE
8040 || TREE_CODE (loops[i].m1) == INTEGER_CST)
8041 && (loops[i].m2 == NULL_TREE
8042 || TREE_CODE (loops[i].m2) == INTEGER_CST)
8043 && TREE_CODE (loops[i].step) == INTEGER_CST
8044 && TREE_CODE (loops[i - loops[i].outer].step) == INTEGER_CST)
8046 tree t;
8047 tree itype = TREE_TYPE (loops[i].v);
8048 if (loops[i].m1 && loops[i].m2)
8049 t = fold_build2 (MINUS_EXPR, itype, loops[i].m2, loops[i].m1);
8050 else if (loops[i].m1)
8051 t = fold_build1 (NEGATE_EXPR, itype, loops[i].m1);
8052 else
8053 t = loops[i].m2;
8054 t = fold_build2 (MULT_EXPR, itype, t,
8055 fold_convert (itype,
8056 loops[i - loops[i].outer].step));
8057 if (TYPE_UNSIGNED (itype) && loops[i].cond_code == GT_EXPR)
8058 t = fold_build2 (TRUNC_MOD_EXPR, itype,
8059 fold_build1 (NEGATE_EXPR, itype, t),
8060 fold_build1 (NEGATE_EXPR, itype,
8061 fold_convert (itype,
8062 loops[i].step)));
8063 else
8064 t = fold_build2 (TRUNC_MOD_EXPR, itype, t,
8065 fold_convert (itype, loops[i].step));
8066 if (integer_nonzerop (t))
8067 error_at (gimple_location (fd.for_stmt),
8068 "invalid OpenMP non-rectangular loop step; "
8069 "%<(%E - %E) * %E%> is not a multiple of loop %d "
8070 "step %qE",
8071 loops[i].m2 ? loops[i].m2 : integer_zero_node,
8072 loops[i].m1 ? loops[i].m1 : integer_zero_node,
8073 loops[i - loops[i].outer].step, i + 1,
8074 loops[i].step);
8078 gcc_assert (EDGE_COUNT (region->entry->succs) == 2);
8079 BRANCH_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
8080 FALLTHRU_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
8081 if (region->cont)
8083 gcc_assert (EDGE_COUNT (region->cont->succs) == 2);
8084 BRANCH_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
8085 FALLTHRU_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
8087 else
8088 /* If there isn't a continue then this is a degerate case where
8089 the introduction of abnormal edges during lowering will prevent
8090 original loops from being detected. Fix that up. */
8091 loops_state_set (LOOPS_NEED_FIXUP);
8093 if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_SIMD)
8094 expand_omp_simd (region, &fd);
8095 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP)
8097 gcc_assert (!inner_stmt && !fd.non_rect);
8098 expand_oacc_for (region, &fd);
8100 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_TASKLOOP)
8102 if (gimple_omp_for_combined_into_p (fd.for_stmt))
8103 expand_omp_taskloop_for_inner (region, &fd, inner_stmt);
8104 else
8105 expand_omp_taskloop_for_outer (region, &fd, inner_stmt);
8107 else if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC
8108 && !fd.have_ordered)
8110 if (fd.chunk_size == NULL)
8111 expand_omp_for_static_nochunk (region, &fd, inner_stmt);
8112 else
8113 expand_omp_for_static_chunk (region, &fd, inner_stmt);
8115 else
8117 int fn_index, start_ix, next_ix;
8118 unsigned HOST_WIDE_INT sched = 0;
8119 tree sched_arg = NULL_TREE;
8121 gcc_assert (gimple_omp_for_kind (fd.for_stmt)
8122 == GF_OMP_FOR_KIND_FOR && !fd.non_rect);
8123 if (fd.chunk_size == NULL
8124 && fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC)
8125 fd.chunk_size = integer_zero_node;
8126 switch (fd.sched_kind)
8128 case OMP_CLAUSE_SCHEDULE_RUNTIME:
8129 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0
8130 && fd.lastprivate_conditional == 0)
8132 gcc_assert (!fd.have_ordered);
8133 fn_index = 6;
8134 sched = 4;
8136 else if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
8137 && !fd.have_ordered
8138 && fd.lastprivate_conditional == 0)
8139 fn_index = 7;
8140 else
8142 fn_index = 3;
8143 sched = (HOST_WIDE_INT_1U << 31);
8145 break;
8146 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
8147 case OMP_CLAUSE_SCHEDULE_GUIDED:
8148 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
8149 && !fd.have_ordered
8150 && fd.lastprivate_conditional == 0)
8152 fn_index = 3 + fd.sched_kind;
8153 sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2;
8154 break;
8156 fn_index = fd.sched_kind;
8157 sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2;
8158 sched += (HOST_WIDE_INT_1U << 31);
8159 break;
8160 case OMP_CLAUSE_SCHEDULE_STATIC:
8161 gcc_assert (fd.have_ordered);
8162 fn_index = 0;
8163 sched = (HOST_WIDE_INT_1U << 31) + 1;
8164 break;
8165 default:
8166 gcc_unreachable ();
8168 if (!fd.ordered)
8169 fn_index += fd.have_ordered * 8;
8170 if (fd.ordered)
8171 start_ix = ((int)BUILT_IN_GOMP_LOOP_DOACROSS_STATIC_START) + fn_index;
8172 else
8173 start_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_START) + fn_index;
8174 next_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_NEXT) + fn_index;
8175 if (fd.have_reductemp || fd.have_pointer_condtemp)
8177 if (fd.ordered)
8178 start_ix = (int)BUILT_IN_GOMP_LOOP_DOACROSS_START;
8179 else if (fd.have_ordered)
8180 start_ix = (int)BUILT_IN_GOMP_LOOP_ORDERED_START;
8181 else
8182 start_ix = (int)BUILT_IN_GOMP_LOOP_START;
8183 sched_arg = build_int_cstu (long_integer_type_node, sched);
8184 if (!fd.chunk_size)
8185 fd.chunk_size = integer_zero_node;
8187 if (fd.iter_type == long_long_unsigned_type_node)
8189 start_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_START
8190 - (int)BUILT_IN_GOMP_LOOP_STATIC_START);
8191 next_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT
8192 - (int)BUILT_IN_GOMP_LOOP_STATIC_NEXT);
8194 expand_omp_for_generic (region, &fd, (enum built_in_function) start_ix,
8195 (enum built_in_function) next_ix, sched_arg,
8196 inner_stmt);
8199 if (gimple_in_ssa_p (cfun))
8200 update_ssa (TODO_update_ssa_only_virtuals);
8203 /* Expand code for an OpenMP sections directive. In pseudo code, we generate
8205 v = GOMP_sections_start (n);
8207 switch (v)
8209 case 0:
8210 goto L2;
8211 case 1:
8212 section 1;
8213 goto L1;
8214 case 2:
8216 case n:
8218 default:
8219 abort ();
8222 v = GOMP_sections_next ();
8223 goto L0;
8225 reduction;
8227 If this is a combined parallel sections, replace the call to
8228 GOMP_sections_start with call to GOMP_sections_next. */
8230 static void
8231 expand_omp_sections (struct omp_region *region)
8233 tree t, u, vin = NULL, vmain, vnext, l2;
8234 unsigned len;
8235 basic_block entry_bb, l0_bb, l1_bb, l2_bb, default_bb;
8236 gimple_stmt_iterator si, switch_si;
8237 gomp_sections *sections_stmt;
8238 gimple *stmt;
8239 gomp_continue *cont;
8240 edge_iterator ei;
8241 edge e;
8242 struct omp_region *inner;
8243 unsigned i, casei;
8244 bool exit_reachable = region->cont != NULL;
8246 gcc_assert (region->exit != NULL);
8247 entry_bb = region->entry;
8248 l0_bb = single_succ (entry_bb);
8249 l1_bb = region->cont;
8250 l2_bb = region->exit;
8251 if (single_pred_p (l2_bb) && single_pred (l2_bb) == l0_bb)
8252 l2 = gimple_block_label (l2_bb);
8253 else
8255 /* This can happen if there are reductions. */
8256 len = EDGE_COUNT (l0_bb->succs);
8257 gcc_assert (len > 0);
8258 e = EDGE_SUCC (l0_bb, len - 1);
8259 si = gsi_last_nondebug_bb (e->dest);
8260 l2 = NULL_TREE;
8261 if (gsi_end_p (si)
8262 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
8263 l2 = gimple_block_label (e->dest);
8264 else
8265 FOR_EACH_EDGE (e, ei, l0_bb->succs)
8267 si = gsi_last_nondebug_bb (e->dest);
8268 if (gsi_end_p (si)
8269 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
8271 l2 = gimple_block_label (e->dest);
8272 break;
8276 if (exit_reachable)
8277 default_bb = create_empty_bb (l1_bb->prev_bb);
8278 else
8279 default_bb = create_empty_bb (l0_bb);
8281 /* We will build a switch() with enough cases for all the
8282 GIMPLE_OMP_SECTION regions, a '0' case to handle the end of more work
8283 and a default case to abort if something goes wrong. */
8284 len = EDGE_COUNT (l0_bb->succs);
8286 /* Use vec::quick_push on label_vec throughout, since we know the size
8287 in advance. */
8288 auto_vec<tree> label_vec (len);
8290 /* The call to GOMP_sections_start goes in ENTRY_BB, replacing the
8291 GIMPLE_OMP_SECTIONS statement. */
8292 si = gsi_last_nondebug_bb (entry_bb);
8293 sections_stmt = as_a <gomp_sections *> (gsi_stmt (si));
8294 gcc_assert (gimple_code (sections_stmt) == GIMPLE_OMP_SECTIONS);
8295 vin = gimple_omp_sections_control (sections_stmt);
8296 tree clauses = gimple_omp_sections_clauses (sections_stmt);
8297 tree reductmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
8298 tree condtmp = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
8299 tree cond_var = NULL_TREE;
8300 if (reductmp || condtmp)
8302 tree reductions = null_pointer_node, mem = null_pointer_node;
8303 tree memv = NULL_TREE, condtemp = NULL_TREE;
8304 gimple_stmt_iterator gsi = gsi_none ();
8305 gimple *g = NULL;
8306 if (reductmp)
8308 reductions = OMP_CLAUSE_DECL (reductmp);
8309 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
8310 g = SSA_NAME_DEF_STMT (reductions);
8311 reductions = gimple_assign_rhs1 (g);
8312 OMP_CLAUSE_DECL (reductmp) = reductions;
8313 gsi = gsi_for_stmt (g);
8315 else
8316 gsi = si;
8317 if (condtmp)
8319 condtemp = OMP_CLAUSE_DECL (condtmp);
8320 tree c = omp_find_clause (OMP_CLAUSE_CHAIN (condtmp),
8321 OMP_CLAUSE__CONDTEMP_);
8322 cond_var = OMP_CLAUSE_DECL (c);
8323 tree type = TREE_TYPE (condtemp);
8324 memv = create_tmp_var (type);
8325 TREE_ADDRESSABLE (memv) = 1;
8326 unsigned cnt = 0;
8327 for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
8328 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE
8329 && OMP_CLAUSE_LASTPRIVATE_CONDITIONAL (c))
8330 ++cnt;
8331 unsigned HOST_WIDE_INT sz
8332 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type))) * cnt;
8333 expand_omp_build_assign (&gsi, memv, build_int_cst (type, sz),
8334 false);
8335 mem = build_fold_addr_expr (memv);
8337 t = build_int_cst (unsigned_type_node, len - 1);
8338 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS2_START);
8339 stmt = gimple_build_call (u, 3, t, reductions, mem);
8340 gimple_call_set_lhs (stmt, vin);
8341 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
8342 if (condtmp)
8344 expand_omp_build_assign (&gsi, condtemp, memv, false);
8345 tree t = build2 (PLUS_EXPR, TREE_TYPE (cond_var),
8346 vin, build_one_cst (TREE_TYPE (cond_var)));
8347 expand_omp_build_assign (&gsi, cond_var, t, false);
8349 if (reductmp)
8351 gsi_remove (&gsi, true);
8352 release_ssa_name (gimple_assign_lhs (g));
8355 else if (!is_combined_parallel (region))
8357 /* If we are not inside a combined parallel+sections region,
8358 call GOMP_sections_start. */
8359 t = build_int_cst (unsigned_type_node, len - 1);
8360 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_START);
8361 stmt = gimple_build_call (u, 1, t);
8363 else
8365 /* Otherwise, call GOMP_sections_next. */
8366 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
8367 stmt = gimple_build_call (u, 0);
8369 if (!reductmp && !condtmp)
8371 gimple_call_set_lhs (stmt, vin);
8372 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
8374 gsi_remove (&si, true);
8376 /* The switch() statement replacing GIMPLE_OMP_SECTIONS_SWITCH goes in
8377 L0_BB. */
8378 switch_si = gsi_last_nondebug_bb (l0_bb);
8379 gcc_assert (gimple_code (gsi_stmt (switch_si)) == GIMPLE_OMP_SECTIONS_SWITCH);
8380 if (exit_reachable)
8382 cont = as_a <gomp_continue *> (last_stmt (l1_bb));
8383 gcc_assert (gimple_code (cont) == GIMPLE_OMP_CONTINUE);
8384 vmain = gimple_omp_continue_control_use (cont);
8385 vnext = gimple_omp_continue_control_def (cont);
8387 else
8389 vmain = vin;
8390 vnext = NULL_TREE;
8393 t = build_case_label (build_int_cst (unsigned_type_node, 0), NULL, l2);
8394 label_vec.quick_push (t);
8395 i = 1;
8397 /* Convert each GIMPLE_OMP_SECTION into a CASE_LABEL_EXPR. */
8398 for (inner = region->inner, casei = 1;
8399 inner;
8400 inner = inner->next, i++, casei++)
8402 basic_block s_entry_bb, s_exit_bb;
8404 /* Skip optional reduction region. */
8405 if (inner->type == GIMPLE_OMP_ATOMIC_LOAD)
8407 --i;
8408 --casei;
8409 continue;
8412 s_entry_bb = inner->entry;
8413 s_exit_bb = inner->exit;
8415 t = gimple_block_label (s_entry_bb);
8416 u = build_int_cst (unsigned_type_node, casei);
8417 u = build_case_label (u, NULL, t);
8418 label_vec.quick_push (u);
8420 si = gsi_last_nondebug_bb (s_entry_bb);
8421 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SECTION);
8422 gcc_assert (i < len || gimple_omp_section_last_p (gsi_stmt (si)));
8423 gsi_remove (&si, true);
8424 single_succ_edge (s_entry_bb)->flags = EDGE_FALLTHRU;
8426 if (s_exit_bb == NULL)
8427 continue;
8429 si = gsi_last_nondebug_bb (s_exit_bb);
8430 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
8431 gsi_remove (&si, true);
8433 single_succ_edge (s_exit_bb)->flags = EDGE_FALLTHRU;
8436 /* Error handling code goes in DEFAULT_BB. */
8437 t = gimple_block_label (default_bb);
8438 u = build_case_label (NULL, NULL, t);
8439 make_edge (l0_bb, default_bb, 0);
8440 add_bb_to_loop (default_bb, current_loops->tree_root);
8442 stmt = gimple_build_switch (vmain, u, label_vec);
8443 gsi_insert_after (&switch_si, stmt, GSI_SAME_STMT);
8444 gsi_remove (&switch_si, true);
8446 si = gsi_start_bb (default_bb);
8447 stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_TRAP), 0);
8448 gsi_insert_after (&si, stmt, GSI_CONTINUE_LINKING);
8450 if (exit_reachable)
8452 tree bfn_decl;
8454 /* Code to get the next section goes in L1_BB. */
8455 si = gsi_last_nondebug_bb (l1_bb);
8456 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CONTINUE);
8458 bfn_decl = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
8459 stmt = gimple_build_call (bfn_decl, 0);
8460 gimple_call_set_lhs (stmt, vnext);
8461 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
8462 if (cond_var)
8464 tree t = build2 (PLUS_EXPR, TREE_TYPE (cond_var),
8465 vnext, build_one_cst (TREE_TYPE (cond_var)));
8466 expand_omp_build_assign (&si, cond_var, t, false);
8468 gsi_remove (&si, true);
8470 single_succ_edge (l1_bb)->flags = EDGE_FALLTHRU;
8473 /* Cleanup function replaces GIMPLE_OMP_RETURN in EXIT_BB. */
8474 si = gsi_last_nondebug_bb (l2_bb);
8475 if (gimple_omp_return_nowait_p (gsi_stmt (si)))
8476 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_NOWAIT);
8477 else if (gimple_omp_return_lhs (gsi_stmt (si)))
8478 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_CANCEL);
8479 else
8480 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END);
8481 stmt = gimple_build_call (t, 0);
8482 if (gimple_omp_return_lhs (gsi_stmt (si)))
8483 gimple_call_set_lhs (stmt, gimple_omp_return_lhs (gsi_stmt (si)));
8484 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
8485 gsi_remove (&si, true);
8487 set_immediate_dominator (CDI_DOMINATORS, default_bb, l0_bb);
8490 /* Expand code for an OpenMP single or scope directive. We've already expanded
8491 much of the code, here we simply place the GOMP_barrier call. */
8493 static void
8494 expand_omp_single (struct omp_region *region)
8496 basic_block entry_bb, exit_bb;
8497 gimple_stmt_iterator si;
8499 entry_bb = region->entry;
8500 exit_bb = region->exit;
8502 si = gsi_last_nondebug_bb (entry_bb);
8503 enum gimple_code code = gimple_code (gsi_stmt (si));
8504 gcc_assert (code == GIMPLE_OMP_SINGLE || code == GIMPLE_OMP_SCOPE);
8505 gsi_remove (&si, true);
8506 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
8508 if (exit_bb == NULL)
8510 gcc_assert (code == GIMPLE_OMP_SCOPE);
8511 return;
8514 si = gsi_last_nondebug_bb (exit_bb);
8515 if (!gimple_omp_return_nowait_p (gsi_stmt (si)))
8517 tree t = gimple_omp_return_lhs (gsi_stmt (si));
8518 gsi_insert_after (&si, omp_build_barrier (t), GSI_SAME_STMT);
8520 gsi_remove (&si, true);
8521 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
8524 /* Generic expansion for OpenMP synchronization directives: master,
8525 ordered and critical. All we need to do here is remove the entry
8526 and exit markers for REGION. */
8528 static void
8529 expand_omp_synch (struct omp_region *region)
8531 basic_block entry_bb, exit_bb;
8532 gimple_stmt_iterator si;
8534 entry_bb = region->entry;
8535 exit_bb = region->exit;
8537 si = gsi_last_nondebug_bb (entry_bb);
8538 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE
8539 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASTER
8540 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASKED
8541 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TASKGROUP
8542 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ORDERED
8543 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CRITICAL
8544 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS);
8545 if (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS
8546 && gimple_omp_teams_host (as_a <gomp_teams *> (gsi_stmt (si))))
8548 expand_omp_taskreg (region);
8549 return;
8551 gsi_remove (&si, true);
8552 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
8554 if (exit_bb)
8556 si = gsi_last_nondebug_bb (exit_bb);
8557 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
8558 gsi_remove (&si, true);
8559 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
8563 /* Translate enum omp_memory_order to enum memmodel for the embedded
8564 fail clause in there. */
8566 static enum memmodel
8567 omp_memory_order_to_fail_memmodel (enum omp_memory_order mo)
8569 switch (mo & OMP_FAIL_MEMORY_ORDER_MASK)
8571 case OMP_FAIL_MEMORY_ORDER_UNSPECIFIED:
8572 switch (mo & OMP_MEMORY_ORDER_MASK)
8574 case OMP_MEMORY_ORDER_RELAXED: return MEMMODEL_RELAXED;
8575 case OMP_MEMORY_ORDER_ACQUIRE: return MEMMODEL_ACQUIRE;
8576 case OMP_MEMORY_ORDER_RELEASE: return MEMMODEL_RELAXED;
8577 case OMP_MEMORY_ORDER_ACQ_REL: return MEMMODEL_ACQUIRE;
8578 case OMP_MEMORY_ORDER_SEQ_CST: return MEMMODEL_SEQ_CST;
8579 default: break;
8581 gcc_unreachable ();
8582 case OMP_FAIL_MEMORY_ORDER_RELAXED: return MEMMODEL_RELAXED;
8583 case OMP_FAIL_MEMORY_ORDER_ACQUIRE: return MEMMODEL_ACQUIRE;
8584 case OMP_FAIL_MEMORY_ORDER_SEQ_CST: return MEMMODEL_SEQ_CST;
8585 default: gcc_unreachable ();
8589 /* Translate enum omp_memory_order to enum memmodel. The two enums
8590 are using different numbers so that OMP_MEMORY_ORDER_UNSPECIFIED
8591 is 0 and omp_memory_order has the fail mode encoded in it too. */
8593 static enum memmodel
8594 omp_memory_order_to_memmodel (enum omp_memory_order mo)
8596 enum memmodel ret, fail_ret;
8597 switch (mo & OMP_MEMORY_ORDER_MASK)
8599 case OMP_MEMORY_ORDER_RELAXED: ret = MEMMODEL_RELAXED; break;
8600 case OMP_MEMORY_ORDER_ACQUIRE: ret = MEMMODEL_ACQUIRE; break;
8601 case OMP_MEMORY_ORDER_RELEASE: ret = MEMMODEL_RELEASE; break;
8602 case OMP_MEMORY_ORDER_ACQ_REL: ret = MEMMODEL_ACQ_REL; break;
8603 case OMP_MEMORY_ORDER_SEQ_CST: ret = MEMMODEL_SEQ_CST; break;
8604 default: gcc_unreachable ();
8606 /* If we drop the -Winvalid-memory-model warning for C++17 P0418R2,
8607 we can just return ret here unconditionally. Otherwise, work around
8608 it here and make sure fail memmodel is not stronger. */
8609 if ((mo & OMP_FAIL_MEMORY_ORDER_MASK) == OMP_FAIL_MEMORY_ORDER_UNSPECIFIED)
8610 return ret;
8611 fail_ret = omp_memory_order_to_fail_memmodel (mo);
8612 if (fail_ret > ret)
8613 return fail_ret;
8614 return ret;
8617 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
8618 operation as a normal volatile load. */
8620 static bool
8621 expand_omp_atomic_load (basic_block load_bb, tree addr,
8622 tree loaded_val, int index)
8624 enum built_in_function tmpbase;
8625 gimple_stmt_iterator gsi;
8626 basic_block store_bb;
8627 location_t loc;
8628 gimple *stmt;
8629 tree decl, call, type, itype;
8631 gsi = gsi_last_nondebug_bb (load_bb);
8632 stmt = gsi_stmt (gsi);
8633 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
8634 loc = gimple_location (stmt);
8636 /* ??? If the target does not implement atomic_load_optab[mode], and mode
8637 is smaller than word size, then expand_atomic_load assumes that the load
8638 is atomic. We could avoid the builtin entirely in this case. */
8640 tmpbase = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
8641 decl = builtin_decl_explicit (tmpbase);
8642 if (decl == NULL_TREE)
8643 return false;
8645 type = TREE_TYPE (loaded_val);
8646 itype = TREE_TYPE (TREE_TYPE (decl));
8648 enum omp_memory_order omo = gimple_omp_atomic_memory_order (stmt);
8649 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
8650 call = build_call_expr_loc (loc, decl, 2, addr, mo);
8651 if (!useless_type_conversion_p (type, itype))
8652 call = fold_build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
8653 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
8655 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
8656 gsi_remove (&gsi, true);
8658 store_bb = single_succ (load_bb);
8659 gsi = gsi_last_nondebug_bb (store_bb);
8660 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
8661 gsi_remove (&gsi, true);
8663 if (gimple_in_ssa_p (cfun))
8664 update_ssa (TODO_update_ssa_no_phi);
8666 return true;
8669 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
8670 operation as a normal volatile store. */
8672 static bool
8673 expand_omp_atomic_store (basic_block load_bb, tree addr,
8674 tree loaded_val, tree stored_val, int index)
8676 enum built_in_function tmpbase;
8677 gimple_stmt_iterator gsi;
8678 basic_block store_bb = single_succ (load_bb);
8679 location_t loc;
8680 gimple *stmt;
8681 tree decl, call, type, itype;
8682 machine_mode imode;
8683 bool exchange;
8685 gsi = gsi_last_nondebug_bb (load_bb);
8686 stmt = gsi_stmt (gsi);
8687 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
8689 /* If the load value is needed, then this isn't a store but an exchange. */
8690 exchange = gimple_omp_atomic_need_value_p (stmt);
8692 gsi = gsi_last_nondebug_bb (store_bb);
8693 stmt = gsi_stmt (gsi);
8694 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_STORE);
8695 loc = gimple_location (stmt);
8697 /* ??? If the target does not implement atomic_store_optab[mode], and mode
8698 is smaller than word size, then expand_atomic_store assumes that the store
8699 is atomic. We could avoid the builtin entirely in this case. */
8701 tmpbase = (exchange ? BUILT_IN_ATOMIC_EXCHANGE_N : BUILT_IN_ATOMIC_STORE_N);
8702 tmpbase = (enum built_in_function) ((int) tmpbase + index + 1);
8703 decl = builtin_decl_explicit (tmpbase);
8704 if (decl == NULL_TREE)
8705 return false;
8707 type = TREE_TYPE (stored_val);
8709 /* Dig out the type of the function's second argument. */
8710 itype = TREE_TYPE (decl);
8711 itype = TYPE_ARG_TYPES (itype);
8712 itype = TREE_CHAIN (itype);
8713 itype = TREE_VALUE (itype);
8714 imode = TYPE_MODE (itype);
8716 if (exchange && !can_atomic_exchange_p (imode, true))
8717 return false;
8719 if (!useless_type_conversion_p (itype, type))
8720 stored_val = fold_build1_loc (loc, VIEW_CONVERT_EXPR, itype, stored_val);
8721 enum omp_memory_order omo = gimple_omp_atomic_memory_order (stmt);
8722 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
8723 call = build_call_expr_loc (loc, decl, 3, addr, stored_val, mo);
8724 if (exchange)
8726 if (!useless_type_conversion_p (type, itype))
8727 call = build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
8728 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
8731 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
8732 gsi_remove (&gsi, true);
8734 /* Remove the GIMPLE_OMP_ATOMIC_LOAD that we verified above. */
8735 gsi = gsi_last_nondebug_bb (load_bb);
8736 gsi_remove (&gsi, true);
8738 if (gimple_in_ssa_p (cfun))
8739 update_ssa (TODO_update_ssa_no_phi);
8741 return true;
8744 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
8745 operation as a __atomic_fetch_op builtin. INDEX is log2 of the
8746 size of the data type, and thus usable to find the index of the builtin
8747 decl. Returns false if the expression is not of the proper form. */
8749 static bool
8750 expand_omp_atomic_fetch_op (basic_block load_bb,
8751 tree addr, tree loaded_val,
8752 tree stored_val, int index)
8754 enum built_in_function oldbase, newbase, tmpbase;
8755 tree decl, itype, call;
8756 tree lhs, rhs;
8757 basic_block store_bb = single_succ (load_bb);
8758 gimple_stmt_iterator gsi;
8759 gimple *stmt;
8760 location_t loc;
8761 enum tree_code code;
8762 bool need_old, need_new;
8763 machine_mode imode;
8765 /* We expect to find the following sequences:
8767 load_bb:
8768 GIMPLE_OMP_ATOMIC_LOAD (tmp, mem)
8770 store_bb:
8771 val = tmp OP something; (or: something OP tmp)
8772 GIMPLE_OMP_STORE (val)
8774 ???FIXME: Allow a more flexible sequence.
8775 Perhaps use data flow to pick the statements.
8779 gsi = gsi_after_labels (store_bb);
8780 stmt = gsi_stmt (gsi);
8781 if (is_gimple_debug (stmt))
8783 gsi_next_nondebug (&gsi);
8784 if (gsi_end_p (gsi))
8785 return false;
8786 stmt = gsi_stmt (gsi);
8788 loc = gimple_location (stmt);
8789 if (!is_gimple_assign (stmt))
8790 return false;
8791 gsi_next_nondebug (&gsi);
8792 if (gimple_code (gsi_stmt (gsi)) != GIMPLE_OMP_ATOMIC_STORE)
8793 return false;
8794 need_new = gimple_omp_atomic_need_value_p (gsi_stmt (gsi));
8795 need_old = gimple_omp_atomic_need_value_p (last_stmt (load_bb));
8796 enum omp_memory_order omo
8797 = gimple_omp_atomic_memory_order (last_stmt (load_bb));
8798 enum memmodel mo = omp_memory_order_to_memmodel (omo);
8799 gcc_checking_assert (!need_old || !need_new);
8801 if (!operand_equal_p (gimple_assign_lhs (stmt), stored_val, 0))
8802 return false;
8804 /* Check for one of the supported fetch-op operations. */
8805 code = gimple_assign_rhs_code (stmt);
8806 switch (code)
8808 case PLUS_EXPR:
8809 case POINTER_PLUS_EXPR:
8810 oldbase = BUILT_IN_ATOMIC_FETCH_ADD_N;
8811 newbase = BUILT_IN_ATOMIC_ADD_FETCH_N;
8812 break;
8813 case MINUS_EXPR:
8814 oldbase = BUILT_IN_ATOMIC_FETCH_SUB_N;
8815 newbase = BUILT_IN_ATOMIC_SUB_FETCH_N;
8816 break;
8817 case BIT_AND_EXPR:
8818 oldbase = BUILT_IN_ATOMIC_FETCH_AND_N;
8819 newbase = BUILT_IN_ATOMIC_AND_FETCH_N;
8820 break;
8821 case BIT_IOR_EXPR:
8822 oldbase = BUILT_IN_ATOMIC_FETCH_OR_N;
8823 newbase = BUILT_IN_ATOMIC_OR_FETCH_N;
8824 break;
8825 case BIT_XOR_EXPR:
8826 oldbase = BUILT_IN_ATOMIC_FETCH_XOR_N;
8827 newbase = BUILT_IN_ATOMIC_XOR_FETCH_N;
8828 break;
8829 default:
8830 return false;
8833 /* Make sure the expression is of the proper form. */
8834 if (operand_equal_p (gimple_assign_rhs1 (stmt), loaded_val, 0))
8835 rhs = gimple_assign_rhs2 (stmt);
8836 else if (commutative_tree_code (gimple_assign_rhs_code (stmt))
8837 && operand_equal_p (gimple_assign_rhs2 (stmt), loaded_val, 0))
8838 rhs = gimple_assign_rhs1 (stmt);
8839 else
8840 return false;
8842 tmpbase = ((enum built_in_function)
8843 ((need_new ? newbase : oldbase) + index + 1));
8844 decl = builtin_decl_explicit (tmpbase);
8845 if (decl == NULL_TREE)
8846 return false;
8847 itype = TREE_TYPE (TREE_TYPE (decl));
8848 imode = TYPE_MODE (itype);
8850 /* We could test all of the various optabs involved, but the fact of the
8851 matter is that (with the exception of i486 vs i586 and xadd) all targets
8852 that support any atomic operaton optab also implements compare-and-swap.
8853 Let optabs.cc take care of expanding any compare-and-swap loop. */
8854 if (!can_compare_and_swap_p (imode, true) || !can_atomic_load_p (imode))
8855 return false;
8857 gsi = gsi_last_nondebug_bb (load_bb);
8858 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_LOAD);
8860 /* OpenMP does not imply any barrier-like semantics on its atomic ops.
8861 It only requires that the operation happen atomically. Thus we can
8862 use the RELAXED memory model. */
8863 call = build_call_expr_loc (loc, decl, 3, addr,
8864 fold_convert_loc (loc, itype, rhs),
8865 build_int_cst (NULL, mo));
8867 if (need_old || need_new)
8869 lhs = need_old ? loaded_val : stored_val;
8870 call = fold_convert_loc (loc, TREE_TYPE (lhs), call);
8871 call = build2_loc (loc, MODIFY_EXPR, void_type_node, lhs, call);
8873 else
8874 call = fold_convert_loc (loc, void_type_node, call);
8875 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
8876 gsi_remove (&gsi, true);
8878 gsi = gsi_last_nondebug_bb (store_bb);
8879 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
8880 gsi_remove (&gsi, true);
8881 gsi = gsi_last_nondebug_bb (store_bb);
8882 stmt = gsi_stmt (gsi);
8883 gsi_remove (&gsi, true);
8885 if (gimple_in_ssa_p (cfun))
8887 release_defs (stmt);
8888 update_ssa (TODO_update_ssa_no_phi);
8891 return true;
8894 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
8895 compare and exchange as an ATOMIC_COMPARE_EXCHANGE internal function.
8896 Returns false if the expression is not of the proper form. */
8898 static bool
8899 expand_omp_atomic_cas (basic_block load_bb, tree addr,
8900 tree loaded_val, tree stored_val, int index)
8902 /* We expect to find the following sequences:
8904 load_bb:
8905 GIMPLE_OMP_ATOMIC_LOAD (tmp, mem)
8907 store_bb:
8908 val = tmp == e ? d : tmp;
8909 GIMPLE_OMP_ATOMIC_STORE (val)
8911 or in store_bb instead:
8912 tmp2 = tmp == e;
8913 val = tmp2 ? d : tmp;
8914 GIMPLE_OMP_ATOMIC_STORE (val)
8917 tmp3 = VIEW_CONVERT_EXPR<integral_type>(tmp);
8918 val = e == tmp3 ? d : tmp;
8919 GIMPLE_OMP_ATOMIC_STORE (val)
8921 etc. */
8924 basic_block store_bb = single_succ (load_bb);
8925 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (store_bb);
8926 gimple *store_stmt = gsi_stmt (gsi);
8927 if (!store_stmt || gimple_code (store_stmt) != GIMPLE_OMP_ATOMIC_STORE)
8928 return false;
8929 gsi_prev_nondebug (&gsi);
8930 if (gsi_end_p (gsi))
8931 return false;
8932 gimple *condexpr_stmt = gsi_stmt (gsi);
8933 if (!is_gimple_assign (condexpr_stmt)
8934 || gimple_assign_rhs_code (condexpr_stmt) != COND_EXPR)
8935 return false;
8936 if (!operand_equal_p (gimple_assign_lhs (condexpr_stmt), stored_val, 0))
8937 return false;
8938 gimple *cond_stmt = NULL;
8939 gimple *vce_stmt = NULL;
8940 gsi_prev_nondebug (&gsi);
8941 if (!gsi_end_p (gsi))
8943 cond_stmt = gsi_stmt (gsi);
8944 if (!is_gimple_assign (cond_stmt))
8945 return false;
8946 if (gimple_assign_rhs_code (cond_stmt) == EQ_EXPR)
8948 gsi_prev_nondebug (&gsi);
8949 if (!gsi_end_p (gsi))
8951 vce_stmt = gsi_stmt (gsi);
8952 if (!is_gimple_assign (vce_stmt)
8953 || gimple_assign_rhs_code (vce_stmt) != VIEW_CONVERT_EXPR)
8954 return false;
8957 else if (gimple_assign_rhs_code (cond_stmt) == VIEW_CONVERT_EXPR)
8958 std::swap (vce_stmt, cond_stmt);
8959 else
8960 return false;
8961 if (vce_stmt)
8963 tree vce_rhs = gimple_assign_rhs1 (vce_stmt);
8964 if (TREE_CODE (vce_rhs) != VIEW_CONVERT_EXPR
8965 || !operand_equal_p (TREE_OPERAND (vce_rhs, 0), loaded_val))
8966 return false;
8967 if (!INTEGRAL_TYPE_P (TREE_TYPE (vce_rhs))
8968 || !SCALAR_FLOAT_TYPE_P (TREE_TYPE (loaded_val))
8969 || !tree_int_cst_equal (TYPE_SIZE (TREE_TYPE (vce_rhs)),
8970 TYPE_SIZE (TREE_TYPE (loaded_val))))
8971 return false;
8972 gsi_prev_nondebug (&gsi);
8973 if (!gsi_end_p (gsi))
8974 return false;
8977 tree cond = gimple_assign_rhs1 (condexpr_stmt);
8978 tree cond_op1, cond_op2;
8979 if (cond_stmt)
8981 if (!operand_equal_p (cond, gimple_assign_lhs (cond_stmt)))
8982 return false;
8983 cond_op1 = gimple_assign_rhs1 (cond_stmt);
8984 cond_op2 = gimple_assign_rhs2 (cond_stmt);
8986 else if (TREE_CODE (cond) != EQ_EXPR && TREE_CODE (cond) != NE_EXPR)
8987 return false;
8988 else
8990 cond_op1 = TREE_OPERAND (cond, 0);
8991 cond_op2 = TREE_OPERAND (cond, 1);
8993 tree d;
8994 if (TREE_CODE (cond) == NE_EXPR)
8996 if (!operand_equal_p (gimple_assign_rhs2 (condexpr_stmt), loaded_val))
8997 return false;
8998 d = gimple_assign_rhs3 (condexpr_stmt);
9000 else if (!operand_equal_p (gimple_assign_rhs3 (condexpr_stmt), loaded_val))
9001 return false;
9002 else
9003 d = gimple_assign_rhs2 (condexpr_stmt);
9004 tree e = vce_stmt ? gimple_assign_lhs (vce_stmt) : loaded_val;
9005 if (operand_equal_p (e, cond_op1))
9006 e = cond_op2;
9007 else if (operand_equal_p (e, cond_op2))
9008 e = cond_op1;
9009 else
9010 return false;
9012 location_t loc = gimple_location (store_stmt);
9013 gimple *load_stmt = last_stmt (load_bb);
9014 bool need_new = gimple_omp_atomic_need_value_p (store_stmt);
9015 bool need_old = gimple_omp_atomic_need_value_p (load_stmt);
9016 bool weak = gimple_omp_atomic_weak_p (load_stmt);
9017 enum omp_memory_order omo = gimple_omp_atomic_memory_order (load_stmt);
9018 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
9019 tree fmo = build_int_cst (NULL, omp_memory_order_to_fail_memmodel (omo));
9020 gcc_checking_assert (!need_old || !need_new);
9022 enum built_in_function fncode
9023 = (enum built_in_function) ((int) BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
9024 + index + 1);
9025 tree cmpxchg = builtin_decl_explicit (fncode);
9026 if (cmpxchg == NULL_TREE)
9027 return false;
9028 tree itype = TREE_TYPE (TREE_TYPE (cmpxchg));
9030 if (!can_compare_and_swap_p (TYPE_MODE (itype), true)
9031 || !can_atomic_load_p (TYPE_MODE (itype)))
9032 return false;
9034 tree type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
9035 if (SCALAR_FLOAT_TYPE_P (type) && !vce_stmt)
9036 return false;
9038 gsi = gsi_for_stmt (store_stmt);
9039 if (!useless_type_conversion_p (itype, TREE_TYPE (e)))
9041 tree ne = create_tmp_reg (itype);
9042 gimple *g = gimple_build_assign (ne, NOP_EXPR, e);
9043 gimple_set_location (g, loc);
9044 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9045 e = ne;
9047 if (!useless_type_conversion_p (itype, TREE_TYPE (d)))
9049 tree nd = create_tmp_reg (itype);
9050 enum tree_code code;
9051 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (d)))
9053 code = VIEW_CONVERT_EXPR;
9054 d = build1 (VIEW_CONVERT_EXPR, itype, d);
9056 else
9057 code = NOP_EXPR;
9058 gimple *g = gimple_build_assign (nd, code, d);
9059 gimple_set_location (g, loc);
9060 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9061 d = nd;
9064 tree ctype = build_complex_type (itype);
9065 int flag = int_size_in_bytes (itype) + (weak ? 256 : 0);
9066 gimple *g
9067 = gimple_build_call_internal (IFN_ATOMIC_COMPARE_EXCHANGE, 6, addr, e, d,
9068 build_int_cst (integer_type_node, flag),
9069 mo, fmo);
9070 tree cres = create_tmp_reg (ctype);
9071 gimple_call_set_lhs (g, cres);
9072 gimple_set_location (g, loc);
9073 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9075 if (cond_stmt || need_old || need_new)
9077 tree im = create_tmp_reg (itype);
9078 g = gimple_build_assign (im, IMAGPART_EXPR,
9079 build1 (IMAGPART_EXPR, itype, cres));
9080 gimple_set_location (g, loc);
9081 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9083 tree re = NULL_TREE;
9084 if (need_old || need_new)
9086 re = create_tmp_reg (itype);
9087 g = gimple_build_assign (re, REALPART_EXPR,
9088 build1 (REALPART_EXPR, itype, cres));
9089 gimple_set_location (g, loc);
9090 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9093 if (cond_stmt)
9095 g = gimple_build_assign (gimple_assign_lhs (cond_stmt),
9096 NOP_EXPR, im);
9097 gimple_set_location (g, loc);
9098 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9100 else if (need_new)
9102 g = gimple_build_assign (create_tmp_reg (itype), COND_EXPR,
9103 build2 (NE_EXPR, boolean_type_node,
9104 im, build_zero_cst (itype)),
9105 d, re);
9106 gimple_set_location (g, loc);
9107 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9108 re = gimple_assign_lhs (g);
9111 if (need_old || need_new)
9113 tree v = need_old ? loaded_val : stored_val;
9114 enum tree_code code;
9115 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (v)))
9117 code = VIEW_CONVERT_EXPR;
9118 re = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (v), re);
9120 else if (!useless_type_conversion_p (TREE_TYPE (v), itype))
9121 code = NOP_EXPR;
9122 else
9123 code = TREE_CODE (re);
9124 g = gimple_build_assign (v, code, re);
9125 gimple_set_location (g, loc);
9126 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9130 gsi_remove (&gsi, true);
9131 gsi = gsi_for_stmt (load_stmt);
9132 gsi_remove (&gsi, true);
9133 gsi = gsi_for_stmt (condexpr_stmt);
9134 gsi_remove (&gsi, true);
9135 if (cond_stmt)
9137 gsi = gsi_for_stmt (cond_stmt);
9138 gsi_remove (&gsi, true);
9140 if (vce_stmt)
9142 gsi = gsi_for_stmt (vce_stmt);
9143 gsi_remove (&gsi, true);
9146 return true;
9149 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
9151 oldval = *addr;
9152 repeat:
9153 newval = rhs; // with oldval replacing *addr in rhs
9154 oldval = __sync_val_compare_and_swap (addr, oldval, newval);
9155 if (oldval != newval)
9156 goto repeat;
9158 INDEX is log2 of the size of the data type, and thus usable to find the
9159 index of the builtin decl. */
9161 static bool
9162 expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb,
9163 tree addr, tree loaded_val, tree stored_val,
9164 int index)
9166 tree loadedi, storedi, initial, new_storedi, old_vali;
9167 tree type, itype, cmpxchg, iaddr, atype;
9168 gimple_stmt_iterator si;
9169 basic_block loop_header = single_succ (load_bb);
9170 gimple *phi, *stmt;
9171 edge e;
9172 enum built_in_function fncode;
9174 fncode = (enum built_in_function)((int)BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
9175 + index + 1);
9176 cmpxchg = builtin_decl_explicit (fncode);
9177 if (cmpxchg == NULL_TREE)
9178 return false;
9179 type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
9180 atype = type;
9181 itype = TREE_TYPE (TREE_TYPE (cmpxchg));
9183 if (!can_compare_and_swap_p (TYPE_MODE (itype), true)
9184 || !can_atomic_load_p (TYPE_MODE (itype)))
9185 return false;
9187 /* Load the initial value, replacing the GIMPLE_OMP_ATOMIC_LOAD. */
9188 si = gsi_last_nondebug_bb (load_bb);
9189 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
9190 location_t loc = gimple_location (gsi_stmt (si));
9191 enum omp_memory_order omo = gimple_omp_atomic_memory_order (gsi_stmt (si));
9192 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
9193 tree fmo = build_int_cst (NULL, omp_memory_order_to_fail_memmodel (omo));
9195 /* For floating-point values, we'll need to view-convert them to integers
9196 so that we can perform the atomic compare and swap. Simplify the
9197 following code by always setting up the "i"ntegral variables. */
9198 if (!INTEGRAL_TYPE_P (type) && !POINTER_TYPE_P (type))
9200 tree iaddr_val;
9202 iaddr = create_tmp_reg (build_pointer_type_for_mode (itype, ptr_mode,
9203 true));
9204 atype = itype;
9205 iaddr_val
9206 = force_gimple_operand_gsi (&si,
9207 fold_convert (TREE_TYPE (iaddr), addr),
9208 false, NULL_TREE, true, GSI_SAME_STMT);
9209 stmt = gimple_build_assign (iaddr, iaddr_val);
9210 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9211 loadedi = create_tmp_var (itype);
9212 if (gimple_in_ssa_p (cfun))
9213 loadedi = make_ssa_name (loadedi);
9215 else
9217 iaddr = addr;
9218 loadedi = loaded_val;
9221 fncode = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
9222 tree loaddecl = builtin_decl_explicit (fncode);
9223 if (loaddecl)
9224 initial
9225 = fold_convert (atype,
9226 build_call_expr (loaddecl, 2, iaddr,
9227 build_int_cst (NULL_TREE,
9228 MEMMODEL_RELAXED)));
9229 else
9231 tree off
9232 = build_int_cst (build_pointer_type_for_mode (atype, ptr_mode,
9233 true), 0);
9234 initial = build2 (MEM_REF, atype, iaddr, off);
9237 initial
9238 = force_gimple_operand_gsi (&si, initial, true, NULL_TREE, true,
9239 GSI_SAME_STMT);
9241 /* Move the value to the LOADEDI temporary. */
9242 if (gimple_in_ssa_p (cfun))
9244 gcc_assert (gimple_seq_empty_p (phi_nodes (loop_header)));
9245 phi = create_phi_node (loadedi, loop_header);
9246 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, single_succ_edge (load_bb)),
9247 initial);
9249 else
9250 gsi_insert_before (&si,
9251 gimple_build_assign (loadedi, initial),
9252 GSI_SAME_STMT);
9253 if (loadedi != loaded_val)
9255 gimple_stmt_iterator gsi2;
9256 tree x;
9258 x = build1 (VIEW_CONVERT_EXPR, type, loadedi);
9259 gsi2 = gsi_start_bb (loop_header);
9260 if (gimple_in_ssa_p (cfun))
9262 gassign *stmt;
9263 x = force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
9264 true, GSI_SAME_STMT);
9265 stmt = gimple_build_assign (loaded_val, x);
9266 gsi_insert_before (&gsi2, stmt, GSI_SAME_STMT);
9268 else
9270 x = build2 (MODIFY_EXPR, TREE_TYPE (loaded_val), loaded_val, x);
9271 force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
9272 true, GSI_SAME_STMT);
9275 gsi_remove (&si, true);
9277 si = gsi_last_nondebug_bb (store_bb);
9278 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
9280 if (iaddr == addr)
9281 storedi = stored_val;
9282 else
9283 storedi
9284 = force_gimple_operand_gsi (&si,
9285 build1 (VIEW_CONVERT_EXPR, itype,
9286 stored_val), true, NULL_TREE, true,
9287 GSI_SAME_STMT);
9289 /* Build the compare&swap statement. */
9290 tree ctype = build_complex_type (itype);
9291 int flag = int_size_in_bytes (itype);
9292 new_storedi = build_call_expr_internal_loc (loc, IFN_ATOMIC_COMPARE_EXCHANGE,
9293 ctype, 6, iaddr, loadedi,
9294 storedi,
9295 build_int_cst (integer_type_node,
9296 flag),
9297 mo, fmo);
9298 new_storedi = build1 (REALPART_EXPR, itype, new_storedi);
9299 new_storedi = force_gimple_operand_gsi (&si,
9300 fold_convert (TREE_TYPE (loadedi),
9301 new_storedi),
9302 true, NULL_TREE,
9303 true, GSI_SAME_STMT);
9305 if (gimple_in_ssa_p (cfun))
9306 old_vali = loadedi;
9307 else
9309 old_vali = create_tmp_var (TREE_TYPE (loadedi));
9310 stmt = gimple_build_assign (old_vali, loadedi);
9311 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9313 stmt = gimple_build_assign (loadedi, new_storedi);
9314 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9317 /* Note that we always perform the comparison as an integer, even for
9318 floating point. This allows the atomic operation to properly
9319 succeed even with NaNs and -0.0. */
9320 tree ne = build2 (NE_EXPR, boolean_type_node, new_storedi, old_vali);
9321 stmt = gimple_build_cond_empty (ne);
9322 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9324 /* Update cfg. */
9325 e = single_succ_edge (store_bb);
9326 e->flags &= ~EDGE_FALLTHRU;
9327 e->flags |= EDGE_FALSE_VALUE;
9328 /* Expect no looping. */
9329 e->probability = profile_probability::guessed_always ();
9331 e = make_edge (store_bb, loop_header, EDGE_TRUE_VALUE);
9332 e->probability = profile_probability::guessed_never ();
9334 /* Copy the new value to loadedi (we already did that before the condition
9335 if we are not in SSA). */
9336 if (gimple_in_ssa_p (cfun))
9338 phi = gimple_seq_first_stmt (phi_nodes (loop_header));
9339 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e), new_storedi);
9342 /* Remove GIMPLE_OMP_ATOMIC_STORE. */
9343 gsi_remove (&si, true);
9345 class loop *loop = alloc_loop ();
9346 loop->header = loop_header;
9347 loop->latch = store_bb;
9348 add_loop (loop, loop_header->loop_father);
9350 if (gimple_in_ssa_p (cfun))
9351 update_ssa (TODO_update_ssa_no_phi);
9353 return true;
9356 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
9358 GOMP_atomic_start ();
9359 *addr = rhs;
9360 GOMP_atomic_end ();
9362 The result is not globally atomic, but works so long as all parallel
9363 references are within #pragma omp atomic directives. According to
9364 responses received from omp@openmp.org, appears to be within spec.
9365 Which makes sense, since that's how several other compilers handle
9366 this situation as well.
9367 LOADED_VAL and ADDR are the operands of GIMPLE_OMP_ATOMIC_LOAD we're
9368 expanding. STORED_VAL is the operand of the matching
9369 GIMPLE_OMP_ATOMIC_STORE.
9371 We replace
9372 GIMPLE_OMP_ATOMIC_LOAD (loaded_val, addr) with
9373 loaded_val = *addr;
9375 and replace
9376 GIMPLE_OMP_ATOMIC_STORE (stored_val) with
9377 *addr = stored_val;
9380 static bool
9381 expand_omp_atomic_mutex (basic_block load_bb, basic_block store_bb,
9382 tree addr, tree loaded_val, tree stored_val)
9384 gimple_stmt_iterator si;
9385 gassign *stmt;
9386 tree t;
9388 si = gsi_last_nondebug_bb (load_bb);
9389 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
9391 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_START);
9392 t = build_call_expr (t, 0);
9393 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
9395 tree mem = build_simple_mem_ref (addr);
9396 TREE_TYPE (mem) = TREE_TYPE (loaded_val);
9397 TREE_OPERAND (mem, 1)
9398 = fold_convert (build_pointer_type_for_mode (TREE_TYPE (mem), ptr_mode,
9399 true),
9400 TREE_OPERAND (mem, 1));
9401 stmt = gimple_build_assign (loaded_val, mem);
9402 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9403 gsi_remove (&si, true);
9405 si = gsi_last_nondebug_bb (store_bb);
9406 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
9408 stmt = gimple_build_assign (unshare_expr (mem), stored_val);
9409 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9411 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_END);
9412 t = build_call_expr (t, 0);
9413 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
9414 gsi_remove (&si, true);
9416 if (gimple_in_ssa_p (cfun))
9417 update_ssa (TODO_update_ssa_no_phi);
9418 return true;
9421 /* Expand an GIMPLE_OMP_ATOMIC statement. We try to expand
9422 using expand_omp_atomic_fetch_op. If it failed, we try to
9423 call expand_omp_atomic_pipeline, and if it fails too, the
9424 ultimate fallback is wrapping the operation in a mutex
9425 (expand_omp_atomic_mutex). REGION is the atomic region built
9426 by build_omp_regions_1(). */
9428 static void
9429 expand_omp_atomic (struct omp_region *region)
9431 basic_block load_bb = region->entry, store_bb = region->exit;
9432 gomp_atomic_load *load = as_a <gomp_atomic_load *> (last_stmt (load_bb));
9433 gomp_atomic_store *store = as_a <gomp_atomic_store *> (last_stmt (store_bb));
9434 tree loaded_val = gimple_omp_atomic_load_lhs (load);
9435 tree addr = gimple_omp_atomic_load_rhs (load);
9436 tree stored_val = gimple_omp_atomic_store_val (store);
9437 tree type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
9438 HOST_WIDE_INT index;
9440 /* Make sure the type is one of the supported sizes. */
9441 index = tree_to_uhwi (TYPE_SIZE_UNIT (type));
9442 index = exact_log2 (index);
9443 if (index >= 0 && index <= 4)
9445 unsigned int align = TYPE_ALIGN_UNIT (type);
9447 /* __sync builtins require strict data alignment. */
9448 if (exact_log2 (align) >= index)
9450 /* Atomic load. */
9451 scalar_mode smode;
9452 if (loaded_val == stored_val
9453 && (is_int_mode (TYPE_MODE (type), &smode)
9454 || is_float_mode (TYPE_MODE (type), &smode))
9455 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
9456 && expand_omp_atomic_load (load_bb, addr, loaded_val, index))
9457 return;
9459 /* Atomic store. */
9460 if ((is_int_mode (TYPE_MODE (type), &smode)
9461 || is_float_mode (TYPE_MODE (type), &smode))
9462 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
9463 && store_bb == single_succ (load_bb)
9464 && first_stmt (store_bb) == store
9465 && expand_omp_atomic_store (load_bb, addr, loaded_val,
9466 stored_val, index))
9467 return;
9469 /* When possible, use specialized atomic update functions. */
9470 if ((INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type))
9471 && store_bb == single_succ (load_bb)
9472 && expand_omp_atomic_fetch_op (load_bb, addr,
9473 loaded_val, stored_val, index))
9474 return;
9476 /* When possible, use ATOMIC_COMPARE_EXCHANGE ifn without a loop. */
9477 if (store_bb == single_succ (load_bb)
9478 && !gimple_in_ssa_p (cfun)
9479 && expand_omp_atomic_cas (load_bb, addr, loaded_val, stored_val,
9480 index))
9481 return;
9483 /* If we don't have specialized __sync builtins, try and implement
9484 as a compare and swap loop. */
9485 if (expand_omp_atomic_pipeline (load_bb, store_bb, addr,
9486 loaded_val, stored_val, index))
9487 return;
9491 /* The ultimate fallback is wrapping the operation in a mutex. */
9492 expand_omp_atomic_mutex (load_bb, store_bb, addr, loaded_val, stored_val);
9495 /* Mark the loops inside the kernels region starting at REGION_ENTRY and ending
9496 at REGION_EXIT. */
9498 static void
9499 mark_loops_in_oacc_kernels_region (basic_block region_entry,
9500 basic_block region_exit)
9502 class loop *outer = region_entry->loop_father;
9503 gcc_assert (region_exit == NULL || outer == region_exit->loop_father);
9505 /* Don't parallelize the kernels region if it contains more than one outer
9506 loop. */
9507 unsigned int nr_outer_loops = 0;
9508 class loop *single_outer = NULL;
9509 for (class loop *loop = outer->inner; loop != NULL; loop = loop->next)
9511 gcc_assert (loop_outer (loop) == outer);
9513 if (!dominated_by_p (CDI_DOMINATORS, loop->header, region_entry))
9514 continue;
9516 if (region_exit != NULL
9517 && dominated_by_p (CDI_DOMINATORS, loop->header, region_exit))
9518 continue;
9520 nr_outer_loops++;
9521 single_outer = loop;
9523 if (nr_outer_loops != 1)
9524 return;
9526 for (class loop *loop = single_outer->inner;
9527 loop != NULL;
9528 loop = loop->inner)
9529 if (loop->next)
9530 return;
9532 /* Mark the loops in the region. */
9533 for (class loop *loop = single_outer; loop != NULL; loop = loop->inner)
9534 loop->in_oacc_kernels_region = true;
9537 /* Build target argument identifier from the DEVICE identifier, value
9538 identifier ID and whether the element also has a SUBSEQUENT_PARAM. */
9540 static tree
9541 get_target_argument_identifier_1 (int device, bool subseqent_param, int id)
9543 tree t = build_int_cst (integer_type_node, device);
9544 if (subseqent_param)
9545 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
9546 build_int_cst (integer_type_node,
9547 GOMP_TARGET_ARG_SUBSEQUENT_PARAM));
9548 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
9549 build_int_cst (integer_type_node, id));
9550 return t;
9553 /* Like above but return it in type that can be directly stored as an element
9554 of the argument array. */
9556 static tree
9557 get_target_argument_identifier (int device, bool subseqent_param, int id)
9559 tree t = get_target_argument_identifier_1 (device, subseqent_param, id);
9560 return fold_convert (ptr_type_node, t);
9563 /* Return a target argument consisting of DEVICE identifier, value identifier
9564 ID, and the actual VALUE. */
9566 static tree
9567 get_target_argument_value (gimple_stmt_iterator *gsi, int device, int id,
9568 tree value)
9570 tree t = fold_build2 (LSHIFT_EXPR, integer_type_node,
9571 fold_convert (integer_type_node, value),
9572 build_int_cst (unsigned_type_node,
9573 GOMP_TARGET_ARG_VALUE_SHIFT));
9574 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
9575 get_target_argument_identifier_1 (device, false, id));
9576 t = fold_convert (ptr_type_node, t);
9577 return force_gimple_operand_gsi (gsi, t, true, NULL, true, GSI_SAME_STMT);
9580 /* If VALUE is an integer constant greater than -2^15 and smaller than 2^15,
9581 push one argument to ARGS with both the DEVICE, ID and VALUE embedded in it,
9582 otherwise push an identifier (with DEVICE and ID) and the VALUE in two
9583 arguments. */
9585 static void
9586 push_target_argument_according_to_value (gimple_stmt_iterator *gsi, int device,
9587 int id, tree value, vec <tree> *args)
9589 if (tree_fits_shwi_p (value)
9590 && tree_to_shwi (value) > -(1 << 15)
9591 && tree_to_shwi (value) < (1 << 15))
9592 args->quick_push (get_target_argument_value (gsi, device, id, value));
9593 else
9595 args->quick_push (get_target_argument_identifier (device, true, id));
9596 value = fold_convert (ptr_type_node, value);
9597 value = force_gimple_operand_gsi (gsi, value, true, NULL, true,
9598 GSI_SAME_STMT);
9599 args->quick_push (value);
9603 /* Create an array of arguments that is then passed to GOMP_target. */
9605 static tree
9606 get_target_arguments (gimple_stmt_iterator *gsi, gomp_target *tgt_stmt)
9608 auto_vec <tree, 6> args;
9609 tree clauses = gimple_omp_target_clauses (tgt_stmt);
9610 tree t, c = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
9611 if (c)
9612 t = OMP_CLAUSE_NUM_TEAMS_UPPER_EXPR (c);
9613 else
9614 t = integer_minus_one_node;
9615 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
9616 GOMP_TARGET_ARG_NUM_TEAMS, t, &args);
9618 c = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
9619 if (c)
9620 t = OMP_CLAUSE_THREAD_LIMIT_EXPR (c);
9621 else
9622 t = integer_minus_one_node;
9623 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
9624 GOMP_TARGET_ARG_THREAD_LIMIT, t,
9625 &args);
9627 /* Produce more, perhaps device specific, arguments here. */
9629 tree argarray = create_tmp_var (build_array_type_nelts (ptr_type_node,
9630 args.length () + 1),
9631 ".omp_target_args");
9632 for (unsigned i = 0; i < args.length (); i++)
9634 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
9635 build_int_cst (integer_type_node, i),
9636 NULL_TREE, NULL_TREE);
9637 gsi_insert_before (gsi, gimple_build_assign (ref, args[i]),
9638 GSI_SAME_STMT);
9640 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
9641 build_int_cst (integer_type_node, args.length ()),
9642 NULL_TREE, NULL_TREE);
9643 gsi_insert_before (gsi, gimple_build_assign (ref, null_pointer_node),
9644 GSI_SAME_STMT);
9645 TREE_ADDRESSABLE (argarray) = 1;
9646 return build_fold_addr_expr (argarray);
9649 /* Expand the GIMPLE_OMP_TARGET starting at REGION. */
9651 static void
9652 expand_omp_target (struct omp_region *region)
9654 basic_block entry_bb, exit_bb, new_bb;
9655 struct function *child_cfun;
9656 tree child_fn, block, t;
9657 gimple_stmt_iterator gsi;
9658 gomp_target *entry_stmt;
9659 gimple *stmt;
9660 edge e;
9661 bool offloaded;
9662 int target_kind;
9664 entry_stmt = as_a <gomp_target *> (last_stmt (region->entry));
9665 target_kind = gimple_omp_target_kind (entry_stmt);
9666 new_bb = region->entry;
9668 offloaded = is_gimple_omp_offloaded (entry_stmt);
9669 switch (target_kind)
9671 case GF_OMP_TARGET_KIND_REGION:
9672 case GF_OMP_TARGET_KIND_UPDATE:
9673 case GF_OMP_TARGET_KIND_ENTER_DATA:
9674 case GF_OMP_TARGET_KIND_EXIT_DATA:
9675 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
9676 case GF_OMP_TARGET_KIND_OACC_KERNELS:
9677 case GF_OMP_TARGET_KIND_OACC_SERIAL:
9678 case GF_OMP_TARGET_KIND_OACC_UPDATE:
9679 case GF_OMP_TARGET_KIND_OACC_ENTER_DATA:
9680 case GF_OMP_TARGET_KIND_OACC_EXIT_DATA:
9681 case GF_OMP_TARGET_KIND_OACC_DECLARE:
9682 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
9683 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
9684 case GF_OMP_TARGET_KIND_DATA:
9685 case GF_OMP_TARGET_KIND_OACC_DATA:
9686 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
9687 case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
9688 break;
9689 default:
9690 gcc_unreachable ();
9693 child_fn = NULL_TREE;
9694 child_cfun = NULL;
9695 if (offloaded)
9697 child_fn = gimple_omp_target_child_fn (entry_stmt);
9698 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
9701 /* Supported by expand_omp_taskreg, but not here. */
9702 if (child_cfun != NULL)
9703 gcc_checking_assert (!child_cfun->cfg);
9704 gcc_checking_assert (!gimple_in_ssa_p (cfun));
9706 entry_bb = region->entry;
9707 exit_bb = region->exit;
9709 if (target_kind == GF_OMP_TARGET_KIND_OACC_KERNELS)
9710 mark_loops_in_oacc_kernels_region (region->entry, region->exit);
9712 /* Going on, all OpenACC compute constructs are mapped to
9713 'BUILT_IN_GOACC_PARALLEL', and get their compute regions outlined.
9714 To distinguish between them, we attach attributes. */
9715 switch (target_kind)
9717 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
9718 DECL_ATTRIBUTES (child_fn)
9719 = tree_cons (get_identifier ("oacc parallel"),
9720 NULL_TREE, DECL_ATTRIBUTES (child_fn));
9721 break;
9722 case GF_OMP_TARGET_KIND_OACC_KERNELS:
9723 DECL_ATTRIBUTES (child_fn)
9724 = tree_cons (get_identifier ("oacc kernels"),
9725 NULL_TREE, DECL_ATTRIBUTES (child_fn));
9726 break;
9727 case GF_OMP_TARGET_KIND_OACC_SERIAL:
9728 DECL_ATTRIBUTES (child_fn)
9729 = tree_cons (get_identifier ("oacc serial"),
9730 NULL_TREE, DECL_ATTRIBUTES (child_fn));
9731 break;
9732 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
9733 DECL_ATTRIBUTES (child_fn)
9734 = tree_cons (get_identifier ("oacc parallel_kernels_parallelized"),
9735 NULL_TREE, DECL_ATTRIBUTES (child_fn));
9736 break;
9737 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
9738 DECL_ATTRIBUTES (child_fn)
9739 = tree_cons (get_identifier ("oacc parallel_kernels_gang_single"),
9740 NULL_TREE, DECL_ATTRIBUTES (child_fn));
9741 break;
9742 default:
9743 /* Make sure we don't miss any. */
9744 gcc_checking_assert (!(is_gimple_omp_oacc (entry_stmt)
9745 && is_gimple_omp_offloaded (entry_stmt)));
9746 break;
9749 if (offloaded)
9751 unsigned srcidx, dstidx, num;
9753 /* If the offloading region needs data sent from the parent
9754 function, then the very first statement (except possible
9755 tree profile counter updates) of the offloading body
9756 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
9757 &.OMP_DATA_O is passed as an argument to the child function,
9758 we need to replace it with the argument as seen by the child
9759 function.
9761 In most cases, this will end up being the identity assignment
9762 .OMP_DATA_I = .OMP_DATA_I. However, if the offloading body had
9763 a function call that has been inlined, the original PARM_DECL
9764 .OMP_DATA_I may have been converted into a different local
9765 variable. In which case, we need to keep the assignment. */
9766 tree data_arg = gimple_omp_target_data_arg (entry_stmt);
9767 if (data_arg)
9769 basic_block entry_succ_bb = single_succ (entry_bb);
9770 gimple_stmt_iterator gsi;
9771 tree arg;
9772 gimple *tgtcopy_stmt = NULL;
9773 tree sender = TREE_VEC_ELT (data_arg, 0);
9775 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
9777 gcc_assert (!gsi_end_p (gsi));
9778 stmt = gsi_stmt (gsi);
9779 if (gimple_code (stmt) != GIMPLE_ASSIGN)
9780 continue;
9782 if (gimple_num_ops (stmt) == 2)
9784 tree arg = gimple_assign_rhs1 (stmt);
9786 /* We're ignoring the subcode because we're
9787 effectively doing a STRIP_NOPS. */
9789 if (TREE_CODE (arg) == ADDR_EXPR
9790 && TREE_OPERAND (arg, 0) == sender)
9792 tgtcopy_stmt = stmt;
9793 break;
9798 gcc_assert (tgtcopy_stmt != NULL);
9799 arg = DECL_ARGUMENTS (child_fn);
9801 gcc_assert (gimple_assign_lhs (tgtcopy_stmt) == arg);
9802 gsi_remove (&gsi, true);
9805 /* Declare local variables needed in CHILD_CFUN. */
9806 block = DECL_INITIAL (child_fn);
9807 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
9808 /* The gimplifier could record temporaries in the offloading block
9809 rather than in containing function's local_decls chain,
9810 which would mean cgraph missed finalizing them. Do it now. */
9811 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
9812 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
9813 varpool_node::finalize_decl (t);
9814 DECL_SAVED_TREE (child_fn) = NULL;
9815 /* We'll create a CFG for child_fn, so no gimple body is needed. */
9816 gimple_set_body (child_fn, NULL);
9817 TREE_USED (block) = 1;
9819 /* Reset DECL_CONTEXT on function arguments. */
9820 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
9821 DECL_CONTEXT (t) = child_fn;
9823 /* Split ENTRY_BB at GIMPLE_*,
9824 so that it can be moved to the child function. */
9825 gsi = gsi_last_nondebug_bb (entry_bb);
9826 stmt = gsi_stmt (gsi);
9827 gcc_assert (stmt
9828 && gimple_code (stmt) == gimple_code (entry_stmt));
9829 e = split_block (entry_bb, stmt);
9830 gsi_remove (&gsi, true);
9831 entry_bb = e->dest;
9832 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
9834 /* Convert GIMPLE_OMP_RETURN into a RETURN_EXPR. */
9835 if (exit_bb)
9837 gsi = gsi_last_nondebug_bb (exit_bb);
9838 gcc_assert (!gsi_end_p (gsi)
9839 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
9840 stmt = gimple_build_return (NULL);
9841 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
9842 gsi_remove (&gsi, true);
9845 /* Move the offloading region into CHILD_CFUN. */
9847 block = gimple_block (entry_stmt);
9849 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
9850 if (exit_bb)
9851 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
9852 /* When the OMP expansion process cannot guarantee an up-to-date
9853 loop tree arrange for the child function to fixup loops. */
9854 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
9855 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
9857 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
9858 num = vec_safe_length (child_cfun->local_decls);
9859 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
9861 t = (*child_cfun->local_decls)[srcidx];
9862 if (DECL_CONTEXT (t) == cfun->decl)
9863 continue;
9864 if (srcidx != dstidx)
9865 (*child_cfun->local_decls)[dstidx] = t;
9866 dstidx++;
9868 if (dstidx != num)
9869 vec_safe_truncate (child_cfun->local_decls, dstidx);
9871 /* Inform the callgraph about the new function. */
9872 child_cfun->curr_properties = cfun->curr_properties;
9873 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
9874 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
9875 cgraph_node *node = cgraph_node::get_create (child_fn);
9876 node->parallelized_function = 1;
9877 cgraph_node::add_new_function (child_fn, true);
9879 /* Add the new function to the offload table. */
9880 if (ENABLE_OFFLOADING)
9882 if (in_lto_p)
9883 DECL_PRESERVE_P (child_fn) = 1;
9884 vec_safe_push (offload_funcs, child_fn);
9887 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
9888 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
9890 /* Fix the callgraph edges for child_cfun. Those for cfun will be
9891 fixed in a following pass. */
9892 push_cfun (child_cfun);
9893 if (need_asm)
9894 assign_assembler_name_if_needed (child_fn);
9895 cgraph_edge::rebuild_edges ();
9897 /* Some EH regions might become dead, see PR34608. If
9898 pass_cleanup_cfg isn't the first pass to happen with the
9899 new child, these dead EH edges might cause problems.
9900 Clean them up now. */
9901 if (flag_exceptions)
9903 basic_block bb;
9904 bool changed = false;
9906 FOR_EACH_BB_FN (bb, cfun)
9907 changed |= gimple_purge_dead_eh_edges (bb);
9908 if (changed)
9909 cleanup_tree_cfg ();
9911 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
9912 verify_loop_structure ();
9913 pop_cfun ();
9915 if (dump_file && !gimple_in_ssa_p (cfun))
9917 omp_any_child_fn_dumped = true;
9918 dump_function_header (dump_file, child_fn, dump_flags);
9919 dump_function_to_file (child_fn, dump_file, dump_flags);
9922 adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn);
9925 /* Emit a library call to launch the offloading region, or do data
9926 transfers. */
9927 tree t1, t2, t3, t4, depend, c, clauses;
9928 enum built_in_function start_ix;
9929 unsigned int flags_i = 0;
9931 switch (gimple_omp_target_kind (entry_stmt))
9933 case GF_OMP_TARGET_KIND_REGION:
9934 start_ix = BUILT_IN_GOMP_TARGET;
9935 break;
9936 case GF_OMP_TARGET_KIND_DATA:
9937 start_ix = BUILT_IN_GOMP_TARGET_DATA;
9938 break;
9939 case GF_OMP_TARGET_KIND_UPDATE:
9940 start_ix = BUILT_IN_GOMP_TARGET_UPDATE;
9941 break;
9942 case GF_OMP_TARGET_KIND_ENTER_DATA:
9943 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
9944 break;
9945 case GF_OMP_TARGET_KIND_EXIT_DATA:
9946 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
9947 flags_i |= GOMP_TARGET_FLAG_EXIT_DATA;
9948 break;
9949 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
9950 case GF_OMP_TARGET_KIND_OACC_KERNELS:
9951 case GF_OMP_TARGET_KIND_OACC_SERIAL:
9952 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
9953 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
9954 start_ix = BUILT_IN_GOACC_PARALLEL;
9955 break;
9956 case GF_OMP_TARGET_KIND_OACC_DATA:
9957 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
9958 case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
9959 start_ix = BUILT_IN_GOACC_DATA_START;
9960 break;
9961 case GF_OMP_TARGET_KIND_OACC_UPDATE:
9962 start_ix = BUILT_IN_GOACC_UPDATE;
9963 break;
9964 case GF_OMP_TARGET_KIND_OACC_ENTER_DATA:
9965 start_ix = BUILT_IN_GOACC_ENTER_DATA;
9966 break;
9967 case GF_OMP_TARGET_KIND_OACC_EXIT_DATA:
9968 start_ix = BUILT_IN_GOACC_EXIT_DATA;
9969 break;
9970 case GF_OMP_TARGET_KIND_OACC_DECLARE:
9971 start_ix = BUILT_IN_GOACC_DECLARE;
9972 break;
9973 default:
9974 gcc_unreachable ();
9977 clauses = gimple_omp_target_clauses (entry_stmt);
9979 tree device = NULL_TREE;
9980 location_t device_loc = UNKNOWN_LOCATION;
9981 tree goacc_flags = NULL_TREE;
9982 if (is_gimple_omp_oacc (entry_stmt))
9984 /* By default, no GOACC_FLAGs are set. */
9985 goacc_flags = integer_zero_node;
9987 else
9989 c = omp_find_clause (clauses, OMP_CLAUSE_DEVICE);
9990 if (c)
9992 device = OMP_CLAUSE_DEVICE_ID (c);
9993 device_loc = OMP_CLAUSE_LOCATION (c);
9994 if (OMP_CLAUSE_DEVICE_ANCESTOR (c))
9995 sorry_at (device_loc, "%<ancestor%> not yet supported");
9997 else
9999 /* By default, the value of DEVICE is GOMP_DEVICE_ICV (let runtime
10000 library choose). */
10001 device = build_int_cst (integer_type_node, GOMP_DEVICE_ICV);
10002 device_loc = gimple_location (entry_stmt);
10005 c = omp_find_clause (clauses, OMP_CLAUSE_NOWAIT);
10006 /* FIXME: in_reduction(...) nowait is unimplemented yet, pretend
10007 nowait doesn't appear. */
10008 if (c && omp_find_clause (clauses, OMP_CLAUSE_IN_REDUCTION))
10009 c = NULL;
10010 if (c)
10011 flags_i |= GOMP_TARGET_FLAG_NOWAIT;
10014 /* By default, there is no conditional. */
10015 tree cond = NULL_TREE;
10016 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
10017 if (c)
10018 cond = OMP_CLAUSE_IF_EXPR (c);
10019 /* If we found the clause 'if (cond)', build:
10020 OpenACC: goacc_flags = (cond ? goacc_flags : flags | GOACC_FLAG_HOST_FALLBACK)
10021 OpenMP: device = (cond ? device : GOMP_DEVICE_HOST_FALLBACK) */
10022 if (cond)
10024 tree *tp;
10025 if (is_gimple_omp_oacc (entry_stmt))
10026 tp = &goacc_flags;
10027 else
10029 /* Ensure 'device' is of the correct type. */
10030 device = fold_convert_loc (device_loc, integer_type_node, device);
10032 tp = &device;
10035 cond = gimple_boolify (cond);
10037 basic_block cond_bb, then_bb, else_bb;
10038 edge e;
10039 tree tmp_var;
10041 tmp_var = create_tmp_var (TREE_TYPE (*tp));
10042 if (offloaded)
10043 e = split_block_after_labels (new_bb);
10044 else
10046 gsi = gsi_last_nondebug_bb (new_bb);
10047 gsi_prev (&gsi);
10048 e = split_block (new_bb, gsi_stmt (gsi));
10050 cond_bb = e->src;
10051 new_bb = e->dest;
10052 remove_edge (e);
10054 then_bb = create_empty_bb (cond_bb);
10055 else_bb = create_empty_bb (then_bb);
10056 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
10057 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
10059 stmt = gimple_build_cond_empty (cond);
10060 gsi = gsi_last_bb (cond_bb);
10061 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
10063 gsi = gsi_start_bb (then_bb);
10064 stmt = gimple_build_assign (tmp_var, *tp);
10065 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
10067 gsi = gsi_start_bb (else_bb);
10068 if (is_gimple_omp_oacc (entry_stmt))
10069 stmt = gimple_build_assign (tmp_var,
10070 BIT_IOR_EXPR,
10071 *tp,
10072 build_int_cst (integer_type_node,
10073 GOACC_FLAG_HOST_FALLBACK));
10074 else
10075 stmt = gimple_build_assign (tmp_var,
10076 build_int_cst (integer_type_node,
10077 GOMP_DEVICE_HOST_FALLBACK));
10078 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
10080 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
10081 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
10082 add_bb_to_loop (then_bb, cond_bb->loop_father);
10083 add_bb_to_loop (else_bb, cond_bb->loop_father);
10084 make_edge (then_bb, new_bb, EDGE_FALLTHRU);
10085 make_edge (else_bb, new_bb, EDGE_FALLTHRU);
10087 *tp = tmp_var;
10089 gsi = gsi_last_nondebug_bb (new_bb);
10091 else
10093 gsi = gsi_last_nondebug_bb (new_bb);
10095 if (device != NULL_TREE)
10096 device = force_gimple_operand_gsi (&gsi, device, true, NULL_TREE,
10097 true, GSI_SAME_STMT);
10100 t = gimple_omp_target_data_arg (entry_stmt);
10101 if (t == NULL)
10103 t1 = size_zero_node;
10104 t2 = build_zero_cst (ptr_type_node);
10105 t3 = t2;
10106 t4 = t2;
10108 else
10110 t1 = TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (TREE_VEC_ELT (t, 1))));
10111 t1 = size_binop (PLUS_EXPR, t1, size_int (1));
10112 t2 = build_fold_addr_expr (TREE_VEC_ELT (t, 0));
10113 t3 = build_fold_addr_expr (TREE_VEC_ELT (t, 1));
10114 t4 = build_fold_addr_expr (TREE_VEC_ELT (t, 2));
10117 gimple *g;
10118 bool tagging = false;
10119 /* The maximum number used by any start_ix, without varargs. */
10120 auto_vec<tree, 11> args;
10121 if (is_gimple_omp_oacc (entry_stmt))
10123 tree goacc_flags_m = fold_build1 (GOACC_FLAGS_MARSHAL_OP,
10124 TREE_TYPE (goacc_flags), goacc_flags);
10125 goacc_flags_m = force_gimple_operand_gsi (&gsi, goacc_flags_m, true,
10126 NULL_TREE, true,
10127 GSI_SAME_STMT);
10128 args.quick_push (goacc_flags_m);
10130 else
10131 args.quick_push (device);
10132 if (offloaded)
10133 args.quick_push (build_fold_addr_expr (child_fn));
10134 args.quick_push (t1);
10135 args.quick_push (t2);
10136 args.quick_push (t3);
10137 args.quick_push (t4);
10138 switch (start_ix)
10140 case BUILT_IN_GOACC_DATA_START:
10141 case BUILT_IN_GOACC_DECLARE:
10142 case BUILT_IN_GOMP_TARGET_DATA:
10143 break;
10144 case BUILT_IN_GOMP_TARGET:
10145 case BUILT_IN_GOMP_TARGET_UPDATE:
10146 case BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA:
10147 args.quick_push (build_int_cst (unsigned_type_node, flags_i));
10148 c = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
10149 if (c)
10150 depend = OMP_CLAUSE_DECL (c);
10151 else
10152 depend = build_int_cst (ptr_type_node, 0);
10153 args.quick_push (depend);
10154 if (start_ix == BUILT_IN_GOMP_TARGET)
10155 args.quick_push (get_target_arguments (&gsi, entry_stmt));
10156 break;
10157 case BUILT_IN_GOACC_PARALLEL:
10158 if (lookup_attribute ("oacc serial", DECL_ATTRIBUTES (child_fn)) != NULL)
10160 tree dims = NULL_TREE;
10161 unsigned int ix;
10163 /* For serial constructs we set all dimensions to 1. */
10164 for (ix = GOMP_DIM_MAX; ix--;)
10165 dims = tree_cons (NULL_TREE, integer_one_node, dims);
10166 oacc_replace_fn_attrib (child_fn, dims);
10168 else
10169 oacc_set_fn_attrib (child_fn, clauses, &args);
10170 tagging = true;
10171 /* FALLTHRU */
10172 case BUILT_IN_GOACC_ENTER_DATA:
10173 case BUILT_IN_GOACC_EXIT_DATA:
10174 case BUILT_IN_GOACC_UPDATE:
10176 tree t_async = NULL_TREE;
10178 /* If present, use the value specified by the respective
10179 clause, making sure that is of the correct type. */
10180 c = omp_find_clause (clauses, OMP_CLAUSE_ASYNC);
10181 if (c)
10182 t_async = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
10183 integer_type_node,
10184 OMP_CLAUSE_ASYNC_EXPR (c));
10185 else if (!tagging)
10186 /* Default values for t_async. */
10187 t_async = fold_convert_loc (gimple_location (entry_stmt),
10188 integer_type_node,
10189 build_int_cst (integer_type_node,
10190 GOMP_ASYNC_SYNC));
10191 if (tagging && t_async)
10193 unsigned HOST_WIDE_INT i_async = GOMP_LAUNCH_OP_MAX;
10195 if (TREE_CODE (t_async) == INTEGER_CST)
10197 /* See if we can pack the async arg in to the tag's
10198 operand. */
10199 i_async = TREE_INT_CST_LOW (t_async);
10200 if (i_async < GOMP_LAUNCH_OP_MAX)
10201 t_async = NULL_TREE;
10202 else
10203 i_async = GOMP_LAUNCH_OP_MAX;
10205 args.safe_push (oacc_launch_pack (GOMP_LAUNCH_ASYNC, NULL_TREE,
10206 i_async));
10208 if (t_async)
10209 args.safe_push (force_gimple_operand_gsi (&gsi, t_async, true,
10210 NULL_TREE, true,
10211 GSI_SAME_STMT));
10213 /* Save the argument index, and ... */
10214 unsigned t_wait_idx = args.length ();
10215 unsigned num_waits = 0;
10216 c = omp_find_clause (clauses, OMP_CLAUSE_WAIT);
10217 if (!tagging || c)
10218 /* ... push a placeholder. */
10219 args.safe_push (integer_zero_node);
10221 for (; c; c = OMP_CLAUSE_CHAIN (c))
10222 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_WAIT)
10224 tree arg = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
10225 integer_type_node,
10226 OMP_CLAUSE_WAIT_EXPR (c));
10227 arg = force_gimple_operand_gsi (&gsi, arg, true, NULL_TREE, true,
10228 GSI_SAME_STMT);
10229 args.safe_push (arg);
10230 num_waits++;
10233 if (!tagging || num_waits)
10235 tree len;
10237 /* Now that we know the number, update the placeholder. */
10238 if (tagging)
10239 len = oacc_launch_pack (GOMP_LAUNCH_WAIT, NULL_TREE, num_waits);
10240 else
10241 len = build_int_cst (integer_type_node, num_waits);
10242 len = fold_convert_loc (gimple_location (entry_stmt),
10243 unsigned_type_node, len);
10244 args[t_wait_idx] = len;
10247 break;
10248 default:
10249 gcc_unreachable ();
10251 if (tagging)
10252 /* Push terminal marker - zero. */
10253 args.safe_push (oacc_launch_pack (0, NULL_TREE, 0));
10255 g = gimple_build_call_vec (builtin_decl_explicit (start_ix), args);
10256 gimple_set_location (g, gimple_location (entry_stmt));
10257 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
10258 if (!offloaded)
10260 g = gsi_stmt (gsi);
10261 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_TARGET);
10262 gsi_remove (&gsi, true);
10266 /* Expand the parallel region tree rooted at REGION. Expansion
10267 proceeds in depth-first order. Innermost regions are expanded
10268 first. This way, parallel regions that require a new function to
10269 be created (e.g., GIMPLE_OMP_PARALLEL) can be expanded without having any
10270 internal dependencies in their body. */
10272 static void
10273 expand_omp (struct omp_region *region)
10275 omp_any_child_fn_dumped = false;
10276 while (region)
10278 location_t saved_location;
10279 gimple *inner_stmt = NULL;
10281 /* First, determine whether this is a combined parallel+workshare
10282 region. */
10283 if (region->type == GIMPLE_OMP_PARALLEL)
10284 determine_parallel_type (region);
10286 if (region->type == GIMPLE_OMP_FOR
10287 && gimple_omp_for_combined_p (last_stmt (region->entry)))
10288 inner_stmt = last_stmt (region->inner->entry);
10290 if (region->inner)
10291 expand_omp (region->inner);
10293 saved_location = input_location;
10294 if (gimple_has_location (last_stmt (region->entry)))
10295 input_location = gimple_location (last_stmt (region->entry));
10297 switch (region->type)
10299 case GIMPLE_OMP_PARALLEL:
10300 case GIMPLE_OMP_TASK:
10301 expand_omp_taskreg (region);
10302 break;
10304 case GIMPLE_OMP_FOR:
10305 expand_omp_for (region, inner_stmt);
10306 break;
10308 case GIMPLE_OMP_SECTIONS:
10309 expand_omp_sections (region);
10310 break;
10312 case GIMPLE_OMP_SECTION:
10313 /* Individual omp sections are handled together with their
10314 parent GIMPLE_OMP_SECTIONS region. */
10315 break;
10317 case GIMPLE_OMP_SINGLE:
10318 case GIMPLE_OMP_SCOPE:
10319 expand_omp_single (region);
10320 break;
10322 case GIMPLE_OMP_ORDERED:
10324 gomp_ordered *ord_stmt
10325 = as_a <gomp_ordered *> (last_stmt (region->entry));
10326 if (omp_find_clause (gimple_omp_ordered_clauses (ord_stmt),
10327 OMP_CLAUSE_DEPEND))
10329 /* We'll expand these when expanding corresponding
10330 worksharing region with ordered(n) clause. */
10331 gcc_assert (region->outer
10332 && region->outer->type == GIMPLE_OMP_FOR);
10333 region->ord_stmt = ord_stmt;
10334 break;
10337 /* FALLTHRU */
10338 case GIMPLE_OMP_MASTER:
10339 case GIMPLE_OMP_MASKED:
10340 case GIMPLE_OMP_TASKGROUP:
10341 case GIMPLE_OMP_CRITICAL:
10342 case GIMPLE_OMP_TEAMS:
10343 expand_omp_synch (region);
10344 break;
10346 case GIMPLE_OMP_ATOMIC_LOAD:
10347 expand_omp_atomic (region);
10348 break;
10350 case GIMPLE_OMP_TARGET:
10351 expand_omp_target (region);
10352 break;
10354 default:
10355 gcc_unreachable ();
10358 input_location = saved_location;
10359 region = region->next;
10361 if (omp_any_child_fn_dumped)
10363 if (dump_file)
10364 dump_function_header (dump_file, current_function_decl, dump_flags);
10365 omp_any_child_fn_dumped = false;
10369 /* Helper for build_omp_regions. Scan the dominator tree starting at
10370 block BB. PARENT is the region that contains BB. If SINGLE_TREE is
10371 true, the function ends once a single tree is built (otherwise, whole
10372 forest of OMP constructs may be built). */
10374 static void
10375 build_omp_regions_1 (basic_block bb, struct omp_region *parent,
10376 bool single_tree)
10378 gimple_stmt_iterator gsi;
10379 gimple *stmt;
10380 basic_block son;
10382 gsi = gsi_last_nondebug_bb (bb);
10383 if (!gsi_end_p (gsi) && is_gimple_omp (gsi_stmt (gsi)))
10385 struct omp_region *region;
10386 enum gimple_code code;
10388 stmt = gsi_stmt (gsi);
10389 code = gimple_code (stmt);
10390 if (code == GIMPLE_OMP_RETURN)
10392 /* STMT is the return point out of region PARENT. Mark it
10393 as the exit point and make PARENT the immediately
10394 enclosing region. */
10395 gcc_assert (parent);
10396 region = parent;
10397 region->exit = bb;
10398 parent = parent->outer;
10400 else if (code == GIMPLE_OMP_ATOMIC_STORE)
10402 /* GIMPLE_OMP_ATOMIC_STORE is analogous to
10403 GIMPLE_OMP_RETURN, but matches with
10404 GIMPLE_OMP_ATOMIC_LOAD. */
10405 gcc_assert (parent);
10406 gcc_assert (parent->type == GIMPLE_OMP_ATOMIC_LOAD);
10407 region = parent;
10408 region->exit = bb;
10409 parent = parent->outer;
10411 else if (code == GIMPLE_OMP_CONTINUE)
10413 gcc_assert (parent);
10414 parent->cont = bb;
10416 else if (code == GIMPLE_OMP_SECTIONS_SWITCH)
10418 /* GIMPLE_OMP_SECTIONS_SWITCH is part of
10419 GIMPLE_OMP_SECTIONS, and we do nothing for it. */
10421 else
10423 region = new_omp_region (bb, code, parent);
10424 /* Otherwise... */
10425 if (code == GIMPLE_OMP_TARGET)
10427 switch (gimple_omp_target_kind (stmt))
10429 case GF_OMP_TARGET_KIND_REGION:
10430 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
10431 case GF_OMP_TARGET_KIND_OACC_KERNELS:
10432 case GF_OMP_TARGET_KIND_OACC_SERIAL:
10433 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
10434 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
10435 break;
10436 case GF_OMP_TARGET_KIND_UPDATE:
10437 case GF_OMP_TARGET_KIND_ENTER_DATA:
10438 case GF_OMP_TARGET_KIND_EXIT_DATA:
10439 case GF_OMP_TARGET_KIND_DATA:
10440 case GF_OMP_TARGET_KIND_OACC_DATA:
10441 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
10442 case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
10443 case GF_OMP_TARGET_KIND_OACC_UPDATE:
10444 case GF_OMP_TARGET_KIND_OACC_ENTER_DATA:
10445 case GF_OMP_TARGET_KIND_OACC_EXIT_DATA:
10446 case GF_OMP_TARGET_KIND_OACC_DECLARE:
10447 /* ..., other than for those stand-alone directives... */
10448 region = NULL;
10449 break;
10450 default:
10451 gcc_unreachable ();
10454 else if (code == GIMPLE_OMP_ORDERED
10455 && omp_find_clause (gimple_omp_ordered_clauses
10456 (as_a <gomp_ordered *> (stmt)),
10457 OMP_CLAUSE_DEPEND))
10458 /* #pragma omp ordered depend is also just a stand-alone
10459 directive. */
10460 region = NULL;
10461 else if (code == GIMPLE_OMP_TASK
10462 && gimple_omp_task_taskwait_p (stmt))
10463 /* #pragma omp taskwait depend(...) is a stand-alone directive. */
10464 region = NULL;
10465 /* ..., this directive becomes the parent for a new region. */
10466 if (region)
10467 parent = region;
10471 if (single_tree && !parent)
10472 return;
10474 for (son = first_dom_son (CDI_DOMINATORS, bb);
10475 son;
10476 son = next_dom_son (CDI_DOMINATORS, son))
10477 build_omp_regions_1 (son, parent, single_tree);
10480 /* Builds the tree of OMP regions rooted at ROOT, storing it to
10481 root_omp_region. */
10483 static void
10484 build_omp_regions_root (basic_block root)
10486 gcc_assert (root_omp_region == NULL);
10487 build_omp_regions_1 (root, NULL, true);
10488 gcc_assert (root_omp_region != NULL);
10491 /* Expands omp construct (and its subconstructs) starting in HEAD. */
10493 void
10494 omp_expand_local (basic_block head)
10496 build_omp_regions_root (head);
10497 if (dump_file && (dump_flags & TDF_DETAILS))
10499 fprintf (dump_file, "\nOMP region tree\n\n");
10500 dump_omp_region (dump_file, root_omp_region, 0);
10501 fprintf (dump_file, "\n");
10504 remove_exit_barriers (root_omp_region);
10505 expand_omp (root_omp_region);
10507 omp_free_regions ();
10510 /* Scan the CFG and build a tree of OMP regions. Return the root of
10511 the OMP region tree. */
10513 static void
10514 build_omp_regions (void)
10516 gcc_assert (root_omp_region == NULL);
10517 calculate_dominance_info (CDI_DOMINATORS);
10518 build_omp_regions_1 (ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, false);
10521 /* Main entry point for expanding OMP-GIMPLE into runtime calls. */
10523 static unsigned int
10524 execute_expand_omp (void)
10526 build_omp_regions ();
10528 if (!root_omp_region)
10529 return 0;
10531 if (dump_file)
10533 fprintf (dump_file, "\nOMP region tree\n\n");
10534 dump_omp_region (dump_file, root_omp_region, 0);
10535 fprintf (dump_file, "\n");
10538 remove_exit_barriers (root_omp_region);
10540 expand_omp (root_omp_region);
10542 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
10543 verify_loop_structure ();
10544 cleanup_tree_cfg ();
10546 omp_free_regions ();
10548 return 0;
10551 /* OMP expansion -- the default pass, run before creation of SSA form. */
10553 namespace {
10555 const pass_data pass_data_expand_omp =
10557 GIMPLE_PASS, /* type */
10558 "ompexp", /* name */
10559 OPTGROUP_OMP, /* optinfo_flags */
10560 TV_NONE, /* tv_id */
10561 PROP_gimple_any, /* properties_required */
10562 PROP_gimple_eomp, /* properties_provided */
10563 0, /* properties_destroyed */
10564 0, /* todo_flags_start */
10565 0, /* todo_flags_finish */
10568 class pass_expand_omp : public gimple_opt_pass
10570 public:
10571 pass_expand_omp (gcc::context *ctxt)
10572 : gimple_opt_pass (pass_data_expand_omp, ctxt)
10575 /* opt_pass methods: */
10576 virtual unsigned int execute (function *)
10578 bool gate = ((flag_openacc != 0 || flag_openmp != 0
10579 || flag_openmp_simd != 0)
10580 && !seen_error ());
10582 /* This pass always runs, to provide PROP_gimple_eomp.
10583 But often, there is nothing to do. */
10584 if (!gate)
10585 return 0;
10587 return execute_expand_omp ();
10590 }; // class pass_expand_omp
10592 } // anon namespace
10594 gimple_opt_pass *
10595 make_pass_expand_omp (gcc::context *ctxt)
10597 return new pass_expand_omp (ctxt);
10600 namespace {
10602 const pass_data pass_data_expand_omp_ssa =
10604 GIMPLE_PASS, /* type */
10605 "ompexpssa", /* name */
10606 OPTGROUP_OMP, /* optinfo_flags */
10607 TV_NONE, /* tv_id */
10608 PROP_cfg | PROP_ssa, /* properties_required */
10609 PROP_gimple_eomp, /* properties_provided */
10610 0, /* properties_destroyed */
10611 0, /* todo_flags_start */
10612 TODO_cleanup_cfg | TODO_rebuild_alias, /* todo_flags_finish */
10615 class pass_expand_omp_ssa : public gimple_opt_pass
10617 public:
10618 pass_expand_omp_ssa (gcc::context *ctxt)
10619 : gimple_opt_pass (pass_data_expand_omp_ssa, ctxt)
10622 /* opt_pass methods: */
10623 virtual bool gate (function *fun)
10625 return !(fun->curr_properties & PROP_gimple_eomp);
10627 virtual unsigned int execute (function *) { return execute_expand_omp (); }
10628 opt_pass * clone () { return new pass_expand_omp_ssa (m_ctxt); }
10630 }; // class pass_expand_omp_ssa
10632 } // anon namespace
10634 gimple_opt_pass *
10635 make_pass_expand_omp_ssa (gcc::context *ctxt)
10637 return new pass_expand_omp_ssa (ctxt);
10640 /* Called from tree-cfg.cc::make_edges to create cfg edges for all relevant
10641 GIMPLE_* codes. */
10643 bool
10644 omp_make_gimple_edges (basic_block bb, struct omp_region **region,
10645 int *region_idx)
10647 gimple *last = last_stmt (bb);
10648 enum gimple_code code = gimple_code (last);
10649 struct omp_region *cur_region = *region;
10650 bool fallthru = false;
10652 switch (code)
10654 case GIMPLE_OMP_PARALLEL:
10655 case GIMPLE_OMP_FOR:
10656 case GIMPLE_OMP_SINGLE:
10657 case GIMPLE_OMP_TEAMS:
10658 case GIMPLE_OMP_MASTER:
10659 case GIMPLE_OMP_MASKED:
10660 case GIMPLE_OMP_SCOPE:
10661 case GIMPLE_OMP_TASKGROUP:
10662 case GIMPLE_OMP_CRITICAL:
10663 case GIMPLE_OMP_SECTION:
10664 cur_region = new_omp_region (bb, code, cur_region);
10665 fallthru = true;
10666 break;
10668 case GIMPLE_OMP_TASK:
10669 cur_region = new_omp_region (bb, code, cur_region);
10670 fallthru = true;
10671 if (gimple_omp_task_taskwait_p (last))
10672 cur_region = cur_region->outer;
10673 break;
10675 case GIMPLE_OMP_ORDERED:
10676 cur_region = new_omp_region (bb, code, cur_region);
10677 fallthru = true;
10678 if (omp_find_clause (gimple_omp_ordered_clauses
10679 (as_a <gomp_ordered *> (last)),
10680 OMP_CLAUSE_DEPEND))
10681 cur_region = cur_region->outer;
10682 break;
10684 case GIMPLE_OMP_TARGET:
10685 cur_region = new_omp_region (bb, code, cur_region);
10686 fallthru = true;
10687 switch (gimple_omp_target_kind (last))
10689 case GF_OMP_TARGET_KIND_REGION:
10690 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
10691 case GF_OMP_TARGET_KIND_OACC_KERNELS:
10692 case GF_OMP_TARGET_KIND_OACC_SERIAL:
10693 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
10694 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
10695 break;
10696 case GF_OMP_TARGET_KIND_UPDATE:
10697 case GF_OMP_TARGET_KIND_ENTER_DATA:
10698 case GF_OMP_TARGET_KIND_EXIT_DATA:
10699 case GF_OMP_TARGET_KIND_DATA:
10700 case GF_OMP_TARGET_KIND_OACC_DATA:
10701 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
10702 case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
10703 case GF_OMP_TARGET_KIND_OACC_UPDATE:
10704 case GF_OMP_TARGET_KIND_OACC_ENTER_DATA:
10705 case GF_OMP_TARGET_KIND_OACC_EXIT_DATA:
10706 case GF_OMP_TARGET_KIND_OACC_DECLARE:
10707 cur_region = cur_region->outer;
10708 break;
10709 default:
10710 gcc_unreachable ();
10712 break;
10714 case GIMPLE_OMP_SECTIONS:
10715 cur_region = new_omp_region (bb, code, cur_region);
10716 fallthru = true;
10717 break;
10719 case GIMPLE_OMP_SECTIONS_SWITCH:
10720 fallthru = false;
10721 break;
10723 case GIMPLE_OMP_ATOMIC_LOAD:
10724 case GIMPLE_OMP_ATOMIC_STORE:
10725 fallthru = true;
10726 break;
10728 case GIMPLE_OMP_RETURN:
10729 /* In the case of a GIMPLE_OMP_SECTION, the edge will go
10730 somewhere other than the next block. This will be
10731 created later. */
10732 cur_region->exit = bb;
10733 if (cur_region->type == GIMPLE_OMP_TASK)
10734 /* Add an edge corresponding to not scheduling the task
10735 immediately. */
10736 make_edge (cur_region->entry, bb, EDGE_ABNORMAL);
10737 fallthru = cur_region->type != GIMPLE_OMP_SECTION;
10738 cur_region = cur_region->outer;
10739 break;
10741 case GIMPLE_OMP_CONTINUE:
10742 cur_region->cont = bb;
10743 switch (cur_region->type)
10745 case GIMPLE_OMP_FOR:
10746 /* Mark all GIMPLE_OMP_FOR and GIMPLE_OMP_CONTINUE
10747 succs edges as abnormal to prevent splitting
10748 them. */
10749 single_succ_edge (cur_region->entry)->flags |= EDGE_ABNORMAL;
10750 /* Make the loopback edge. */
10751 make_edge (bb, single_succ (cur_region->entry),
10752 EDGE_ABNORMAL);
10754 /* Create an edge from GIMPLE_OMP_FOR to exit, which
10755 corresponds to the case that the body of the loop
10756 is not executed at all. */
10757 make_edge (cur_region->entry, bb->next_bb, EDGE_ABNORMAL);
10758 make_edge (bb, bb->next_bb, EDGE_FALLTHRU | EDGE_ABNORMAL);
10759 fallthru = false;
10760 break;
10762 case GIMPLE_OMP_SECTIONS:
10763 /* Wire up the edges into and out of the nested sections. */
10765 basic_block switch_bb = single_succ (cur_region->entry);
10767 struct omp_region *i;
10768 for (i = cur_region->inner; i ; i = i->next)
10770 gcc_assert (i->type == GIMPLE_OMP_SECTION);
10771 make_edge (switch_bb, i->entry, 0);
10772 make_edge (i->exit, bb, EDGE_FALLTHRU);
10775 /* Make the loopback edge to the block with
10776 GIMPLE_OMP_SECTIONS_SWITCH. */
10777 make_edge (bb, switch_bb, 0);
10779 /* Make the edge from the switch to exit. */
10780 make_edge (switch_bb, bb->next_bb, 0);
10781 fallthru = false;
10783 break;
10785 case GIMPLE_OMP_TASK:
10786 fallthru = true;
10787 break;
10789 default:
10790 gcc_unreachable ();
10792 break;
10794 default:
10795 gcc_unreachable ();
10798 if (*region != cur_region)
10800 *region = cur_region;
10801 if (cur_region)
10802 *region_idx = cur_region->entry->index;
10803 else
10804 *region_idx = 0;
10807 return fallthru;