Move PREFERRED_DEBUGGING_TYPE define in pa64-hpux.h to pa.h
[official-gcc.git] / gcc / omp-expand.c
blob70957a66da834bf2d86965163001d45bbd71882c
1 /* Expansion pass for OMP directives. Outlines regions of certain OMP
2 directives to separate functions, converts others into explicit calls to the
3 runtime library (libgomp) and so forth
5 Copyright (C) 2005-2021 Free Software Foundation, Inc.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "memmodel.h"
27 #include "backend.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "gimple.h"
32 #include "cfghooks.h"
33 #include "tree-pass.h"
34 #include "ssa.h"
35 #include "optabs.h"
36 #include "cgraph.h"
37 #include "pretty-print.h"
38 #include "diagnostic-core.h"
39 #include "fold-const.h"
40 #include "stor-layout.h"
41 #include "cfganal.h"
42 #include "internal-fn.h"
43 #include "gimplify.h"
44 #include "gimple-iterator.h"
45 #include "gimplify-me.h"
46 #include "gimple-walk.h"
47 #include "tree-cfg.h"
48 #include "tree-into-ssa.h"
49 #include "tree-ssa.h"
50 #include "splay-tree.h"
51 #include "cfgloop.h"
52 #include "omp-general.h"
53 #include "omp-offload.h"
54 #include "tree-cfgcleanup.h"
55 #include "alloc-pool.h"
56 #include "symbol-summary.h"
57 #include "gomp-constants.h"
58 #include "gimple-pretty-print.h"
59 #include "stringpool.h"
60 #include "attribs.h"
61 #include "tree-eh.h"
62 #include "opts.h"
64 /* OMP region information. Every parallel and workshare
65 directive is enclosed between two markers, the OMP_* directive
66 and a corresponding GIMPLE_OMP_RETURN statement. */
68 struct omp_region
70 /* The enclosing region. */
71 struct omp_region *outer;
73 /* First child region. */
74 struct omp_region *inner;
76 /* Next peer region. */
77 struct omp_region *next;
79 /* Block containing the omp directive as its last stmt. */
80 basic_block entry;
82 /* Block containing the GIMPLE_OMP_RETURN as its last stmt. */
83 basic_block exit;
85 /* Block containing the GIMPLE_OMP_CONTINUE as its last stmt. */
86 basic_block cont;
88 /* If this is a combined parallel+workshare region, this is a list
89 of additional arguments needed by the combined parallel+workshare
90 library call. */
91 vec<tree, va_gc> *ws_args;
93 /* The code for the omp directive of this region. */
94 enum gimple_code type;
96 /* Schedule kind, only used for GIMPLE_OMP_FOR type regions. */
97 enum omp_clause_schedule_kind sched_kind;
99 /* Schedule modifiers. */
100 unsigned char sched_modifiers;
102 /* True if this is a combined parallel+workshare region. */
103 bool is_combined_parallel;
105 /* Copy of fd.lastprivate_conditional != 0. */
106 bool has_lastprivate_conditional;
108 /* The ordered stmt if type is GIMPLE_OMP_ORDERED and it has
109 a depend clause. */
110 gomp_ordered *ord_stmt;
113 static struct omp_region *root_omp_region;
114 static bool omp_any_child_fn_dumped;
116 static void expand_omp_build_assign (gimple_stmt_iterator *, tree, tree,
117 bool = false);
118 static gphi *find_phi_with_arg_on_edge (tree, edge);
119 static void expand_omp (struct omp_region *region);
121 /* Return true if REGION is a combined parallel+workshare region. */
123 static inline bool
124 is_combined_parallel (struct omp_region *region)
126 return region->is_combined_parallel;
129 /* Given two blocks PAR_ENTRY_BB and WS_ENTRY_BB such that WS_ENTRY_BB
130 is the immediate dominator of PAR_ENTRY_BB, return true if there
131 are no data dependencies that would prevent expanding the parallel
132 directive at PAR_ENTRY_BB as a combined parallel+workshare region.
134 When expanding a combined parallel+workshare region, the call to
135 the child function may need additional arguments in the case of
136 GIMPLE_OMP_FOR regions. In some cases, these arguments are
137 computed out of variables passed in from the parent to the child
138 via 'struct .omp_data_s'. For instance:
140 #pragma omp parallel for schedule (guided, i * 4)
141 for (j ...)
143 Is lowered into:
145 # BLOCK 2 (PAR_ENTRY_BB)
146 .omp_data_o.i = i;
147 #pragma omp parallel [child fn: bar.omp_fn.0 ( ..., D.1598)
149 # BLOCK 3 (WS_ENTRY_BB)
150 .omp_data_i = &.omp_data_o;
151 D.1667 = .omp_data_i->i;
152 D.1598 = D.1667 * 4;
153 #pragma omp for schedule (guided, D.1598)
155 When we outline the parallel region, the call to the child function
156 'bar.omp_fn.0' will need the value D.1598 in its argument list, but
157 that value is computed *after* the call site. So, in principle we
158 cannot do the transformation.
160 To see whether the code in WS_ENTRY_BB blocks the combined
161 parallel+workshare call, we collect all the variables used in the
162 GIMPLE_OMP_FOR header check whether they appear on the LHS of any
163 statement in WS_ENTRY_BB. If so, then we cannot emit the combined
164 call.
166 FIXME. If we had the SSA form built at this point, we could merely
167 hoist the code in block 3 into block 2 and be done with it. But at
168 this point we don't have dataflow information and though we could
169 hack something up here, it is really not worth the aggravation. */
171 static bool
172 workshare_safe_to_combine_p (basic_block ws_entry_bb)
174 struct omp_for_data fd;
175 gimple *ws_stmt = last_stmt (ws_entry_bb);
177 if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
178 return true;
180 gcc_assert (gimple_code (ws_stmt) == GIMPLE_OMP_FOR);
181 if (gimple_omp_for_kind (ws_stmt) != GF_OMP_FOR_KIND_FOR)
182 return false;
184 omp_extract_for_data (as_a <gomp_for *> (ws_stmt), &fd, NULL);
186 if (fd.collapse > 1 && TREE_CODE (fd.loop.n2) != INTEGER_CST)
187 return false;
188 if (fd.iter_type != long_integer_type_node)
189 return false;
191 /* FIXME. We give up too easily here. If any of these arguments
192 are not constants, they will likely involve variables that have
193 been mapped into fields of .omp_data_s for sharing with the child
194 function. With appropriate data flow, it would be possible to
195 see through this. */
196 if (!is_gimple_min_invariant (fd.loop.n1)
197 || !is_gimple_min_invariant (fd.loop.n2)
198 || !is_gimple_min_invariant (fd.loop.step)
199 || (fd.chunk_size && !is_gimple_min_invariant (fd.chunk_size)))
200 return false;
202 return true;
205 /* Adjust CHUNK_SIZE from SCHEDULE clause, depending on simd modifier
206 presence (SIMD_SCHEDULE). */
208 static tree
209 omp_adjust_chunk_size (tree chunk_size, bool simd_schedule)
211 if (!simd_schedule || integer_zerop (chunk_size))
212 return chunk_size;
214 poly_uint64 vf = omp_max_vf ();
215 if (known_eq (vf, 1U))
216 return chunk_size;
218 tree type = TREE_TYPE (chunk_size);
219 chunk_size = fold_build2 (PLUS_EXPR, type, chunk_size,
220 build_int_cst (type, vf - 1));
221 return fold_build2 (BIT_AND_EXPR, type, chunk_size,
222 build_int_cst (type, -vf));
225 /* Collect additional arguments needed to emit a combined
226 parallel+workshare call. WS_STMT is the workshare directive being
227 expanded. */
229 static vec<tree, va_gc> *
230 get_ws_args_for (gimple *par_stmt, gimple *ws_stmt)
232 tree t;
233 location_t loc = gimple_location (ws_stmt);
234 vec<tree, va_gc> *ws_args;
236 if (gomp_for *for_stmt = dyn_cast <gomp_for *> (ws_stmt))
238 struct omp_for_data fd;
239 tree n1, n2;
241 omp_extract_for_data (for_stmt, &fd, NULL);
242 n1 = fd.loop.n1;
243 n2 = fd.loop.n2;
245 if (gimple_omp_for_combined_into_p (for_stmt))
247 tree innerc
248 = omp_find_clause (gimple_omp_parallel_clauses (par_stmt),
249 OMP_CLAUSE__LOOPTEMP_);
250 gcc_assert (innerc);
251 n1 = OMP_CLAUSE_DECL (innerc);
252 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
253 OMP_CLAUSE__LOOPTEMP_);
254 gcc_assert (innerc);
255 n2 = OMP_CLAUSE_DECL (innerc);
258 vec_alloc (ws_args, 3 + (fd.chunk_size != 0));
260 t = fold_convert_loc (loc, long_integer_type_node, n1);
261 ws_args->quick_push (t);
263 t = fold_convert_loc (loc, long_integer_type_node, n2);
264 ws_args->quick_push (t);
266 t = fold_convert_loc (loc, long_integer_type_node, fd.loop.step);
267 ws_args->quick_push (t);
269 if (fd.chunk_size)
271 t = fold_convert_loc (loc, long_integer_type_node, fd.chunk_size);
272 t = omp_adjust_chunk_size (t, fd.simd_schedule);
273 ws_args->quick_push (t);
276 return ws_args;
278 else if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
280 /* Number of sections is equal to the number of edges from the
281 GIMPLE_OMP_SECTIONS_SWITCH statement, except for the one to
282 the exit of the sections region. */
283 basic_block bb = single_succ (gimple_bb (ws_stmt));
284 t = build_int_cst (unsigned_type_node, EDGE_COUNT (bb->succs) - 1);
285 vec_alloc (ws_args, 1);
286 ws_args->quick_push (t);
287 return ws_args;
290 gcc_unreachable ();
293 /* Discover whether REGION is a combined parallel+workshare region. */
295 static void
296 determine_parallel_type (struct omp_region *region)
298 basic_block par_entry_bb, par_exit_bb;
299 basic_block ws_entry_bb, ws_exit_bb;
301 if (region == NULL || region->inner == NULL
302 || region->exit == NULL || region->inner->exit == NULL
303 || region->inner->cont == NULL)
304 return;
306 /* We only support parallel+for and parallel+sections. */
307 if (region->type != GIMPLE_OMP_PARALLEL
308 || (region->inner->type != GIMPLE_OMP_FOR
309 && region->inner->type != GIMPLE_OMP_SECTIONS))
310 return;
312 /* Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and
313 WS_EXIT_BB -> PAR_EXIT_BB. */
314 par_entry_bb = region->entry;
315 par_exit_bb = region->exit;
316 ws_entry_bb = region->inner->entry;
317 ws_exit_bb = region->inner->exit;
319 /* Give up for task reductions on the parallel, while it is implementable,
320 adding another big set of APIs or slowing down the normal paths is
321 not acceptable. */
322 tree pclauses = gimple_omp_parallel_clauses (last_stmt (par_entry_bb));
323 if (omp_find_clause (pclauses, OMP_CLAUSE__REDUCTEMP_))
324 return;
326 if (single_succ (par_entry_bb) == ws_entry_bb
327 && single_succ (ws_exit_bb) == par_exit_bb
328 && workshare_safe_to_combine_p (ws_entry_bb)
329 && (gimple_omp_parallel_combined_p (last_stmt (par_entry_bb))
330 || (last_and_only_stmt (ws_entry_bb)
331 && last_and_only_stmt (par_exit_bb))))
333 gimple *par_stmt = last_stmt (par_entry_bb);
334 gimple *ws_stmt = last_stmt (ws_entry_bb);
336 if (region->inner->type == GIMPLE_OMP_FOR)
338 /* If this is a combined parallel loop, we need to determine
339 whether or not to use the combined library calls. There
340 are two cases where we do not apply the transformation:
341 static loops and any kind of ordered loop. In the first
342 case, we already open code the loop so there is no need
343 to do anything else. In the latter case, the combined
344 parallel loop call would still need extra synchronization
345 to implement ordered semantics, so there would not be any
346 gain in using the combined call. */
347 tree clauses = gimple_omp_for_clauses (ws_stmt);
348 tree c = omp_find_clause (clauses, OMP_CLAUSE_SCHEDULE);
349 if (c == NULL
350 || ((OMP_CLAUSE_SCHEDULE_KIND (c) & OMP_CLAUSE_SCHEDULE_MASK)
351 == OMP_CLAUSE_SCHEDULE_STATIC)
352 || omp_find_clause (clauses, OMP_CLAUSE_ORDERED)
353 || omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_)
354 || ((c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_))
355 && POINTER_TYPE_P (TREE_TYPE (OMP_CLAUSE_DECL (c)))))
356 return;
358 else if (region->inner->type == GIMPLE_OMP_SECTIONS
359 && (omp_find_clause (gimple_omp_sections_clauses (ws_stmt),
360 OMP_CLAUSE__REDUCTEMP_)
361 || omp_find_clause (gimple_omp_sections_clauses (ws_stmt),
362 OMP_CLAUSE__CONDTEMP_)))
363 return;
365 region->is_combined_parallel = true;
366 region->inner->is_combined_parallel = true;
367 region->ws_args = get_ws_args_for (par_stmt, ws_stmt);
371 /* Debugging dumps for parallel regions. */
372 void dump_omp_region (FILE *, struct omp_region *, int);
373 void debug_omp_region (struct omp_region *);
374 void debug_all_omp_regions (void);
376 /* Dump the parallel region tree rooted at REGION. */
378 void
379 dump_omp_region (FILE *file, struct omp_region *region, int indent)
381 fprintf (file, "%*sbb %d: %s\n", indent, "", region->entry->index,
382 gimple_code_name[region->type]);
384 if (region->inner)
385 dump_omp_region (file, region->inner, indent + 4);
387 if (region->cont)
389 fprintf (file, "%*sbb %d: GIMPLE_OMP_CONTINUE\n", indent, "",
390 region->cont->index);
393 if (region->exit)
394 fprintf (file, "%*sbb %d: GIMPLE_OMP_RETURN\n", indent, "",
395 region->exit->index);
396 else
397 fprintf (file, "%*s[no exit marker]\n", indent, "");
399 if (region->next)
400 dump_omp_region (file, region->next, indent);
403 DEBUG_FUNCTION void
404 debug_omp_region (struct omp_region *region)
406 dump_omp_region (stderr, region, 0);
409 DEBUG_FUNCTION void
410 debug_all_omp_regions (void)
412 dump_omp_region (stderr, root_omp_region, 0);
415 /* Create a new parallel region starting at STMT inside region PARENT. */
417 static struct omp_region *
418 new_omp_region (basic_block bb, enum gimple_code type,
419 struct omp_region *parent)
421 struct omp_region *region = XCNEW (struct omp_region);
423 region->outer = parent;
424 region->entry = bb;
425 region->type = type;
427 if (parent)
429 /* This is a nested region. Add it to the list of inner
430 regions in PARENT. */
431 region->next = parent->inner;
432 parent->inner = region;
434 else
436 /* This is a toplevel region. Add it to the list of toplevel
437 regions in ROOT_OMP_REGION. */
438 region->next = root_omp_region;
439 root_omp_region = region;
442 return region;
445 /* Release the memory associated with the region tree rooted at REGION. */
447 static void
448 free_omp_region_1 (struct omp_region *region)
450 struct omp_region *i, *n;
452 for (i = region->inner; i ; i = n)
454 n = i->next;
455 free_omp_region_1 (i);
458 free (region);
461 /* Release the memory for the entire omp region tree. */
463 void
464 omp_free_regions (void)
466 struct omp_region *r, *n;
467 for (r = root_omp_region; r ; r = n)
469 n = r->next;
470 free_omp_region_1 (r);
472 root_omp_region = NULL;
475 /* A convenience function to build an empty GIMPLE_COND with just the
476 condition. */
478 static gcond *
479 gimple_build_cond_empty (tree cond)
481 enum tree_code pred_code;
482 tree lhs, rhs;
484 gimple_cond_get_ops_from_tree (cond, &pred_code, &lhs, &rhs);
485 return gimple_build_cond (pred_code, lhs, rhs, NULL_TREE, NULL_TREE);
488 /* Change DECL_CONTEXT of CHILD_FNDECL to that of the parent function.
489 Add CHILD_FNDECL to decl chain of the supercontext of the block
490 ENTRY_BLOCK - this is the block which originally contained the
491 code from which CHILD_FNDECL was created.
493 Together, these actions ensure that the debug info for the outlined
494 function will be emitted with the correct lexical scope. */
496 static void
497 adjust_context_and_scope (struct omp_region *region, tree entry_block,
498 tree child_fndecl)
500 tree parent_fndecl = NULL_TREE;
501 gimple *entry_stmt;
502 /* OMP expansion expands inner regions before outer ones, so if
503 we e.g. have explicit task region nested in parallel region, when
504 expanding the task region current_function_decl will be the original
505 source function, but we actually want to use as context the child
506 function of the parallel. */
507 for (region = region->outer;
508 region && parent_fndecl == NULL_TREE; region = region->outer)
509 switch (region->type)
511 case GIMPLE_OMP_PARALLEL:
512 case GIMPLE_OMP_TASK:
513 case GIMPLE_OMP_TEAMS:
514 entry_stmt = last_stmt (region->entry);
515 parent_fndecl = gimple_omp_taskreg_child_fn (entry_stmt);
516 break;
517 case GIMPLE_OMP_TARGET:
518 entry_stmt = last_stmt (region->entry);
519 parent_fndecl
520 = gimple_omp_target_child_fn (as_a <gomp_target *> (entry_stmt));
521 break;
522 default:
523 break;
526 if (parent_fndecl == NULL_TREE)
527 parent_fndecl = current_function_decl;
528 DECL_CONTEXT (child_fndecl) = parent_fndecl;
530 if (entry_block != NULL_TREE && TREE_CODE (entry_block) == BLOCK)
532 tree b = BLOCK_SUPERCONTEXT (entry_block);
533 if (TREE_CODE (b) == BLOCK)
535 DECL_CHAIN (child_fndecl) = BLOCK_VARS (b);
536 BLOCK_VARS (b) = child_fndecl;
541 /* Build the function calls to GOMP_parallel etc to actually
542 generate the parallel operation. REGION is the parallel region
543 being expanded. BB is the block where to insert the code. WS_ARGS
544 will be set if this is a call to a combined parallel+workshare
545 construct, it contains the list of additional arguments needed by
546 the workshare construct. */
548 static void
549 expand_parallel_call (struct omp_region *region, basic_block bb,
550 gomp_parallel *entry_stmt,
551 vec<tree, va_gc> *ws_args)
553 tree t, t1, t2, val, cond, c, clauses, flags;
554 gimple_stmt_iterator gsi;
555 gimple *stmt;
556 enum built_in_function start_ix;
557 int start_ix2;
558 location_t clause_loc;
559 vec<tree, va_gc> *args;
561 clauses = gimple_omp_parallel_clauses (entry_stmt);
563 /* Determine what flavor of GOMP_parallel we will be
564 emitting. */
565 start_ix = BUILT_IN_GOMP_PARALLEL;
566 tree rtmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
567 if (rtmp)
568 start_ix = BUILT_IN_GOMP_PARALLEL_REDUCTIONS;
569 else if (is_combined_parallel (region))
571 switch (region->inner->type)
573 case GIMPLE_OMP_FOR:
574 gcc_assert (region->inner->sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
575 switch (region->inner->sched_kind)
577 case OMP_CLAUSE_SCHEDULE_RUNTIME:
578 /* For lastprivate(conditional:), our implementation
579 requires monotonic behavior. */
580 if (region->inner->has_lastprivate_conditional != 0)
581 start_ix2 = 3;
582 else if ((region->inner->sched_modifiers
583 & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0)
584 start_ix2 = 6;
585 else if ((region->inner->sched_modifiers
586 & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0)
587 start_ix2 = 7;
588 else
589 start_ix2 = 3;
590 break;
591 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
592 case OMP_CLAUSE_SCHEDULE_GUIDED:
593 if ((region->inner->sched_modifiers
594 & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
595 && !region->inner->has_lastprivate_conditional)
597 start_ix2 = 3 + region->inner->sched_kind;
598 break;
600 /* FALLTHRU */
601 default:
602 start_ix2 = region->inner->sched_kind;
603 break;
605 start_ix2 += (int) BUILT_IN_GOMP_PARALLEL_LOOP_STATIC;
606 start_ix = (enum built_in_function) start_ix2;
607 break;
608 case GIMPLE_OMP_SECTIONS:
609 start_ix = BUILT_IN_GOMP_PARALLEL_SECTIONS;
610 break;
611 default:
612 gcc_unreachable ();
616 /* By default, the value of NUM_THREADS is zero (selected at run time)
617 and there is no conditional. */
618 cond = NULL_TREE;
619 val = build_int_cst (unsigned_type_node, 0);
620 flags = build_int_cst (unsigned_type_node, 0);
622 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
623 if (c)
624 cond = OMP_CLAUSE_IF_EXPR (c);
626 c = omp_find_clause (clauses, OMP_CLAUSE_NUM_THREADS);
627 if (c)
629 val = OMP_CLAUSE_NUM_THREADS_EXPR (c);
630 clause_loc = OMP_CLAUSE_LOCATION (c);
632 else
633 clause_loc = gimple_location (entry_stmt);
635 c = omp_find_clause (clauses, OMP_CLAUSE_PROC_BIND);
636 if (c)
637 flags = build_int_cst (unsigned_type_node, OMP_CLAUSE_PROC_BIND_KIND (c));
639 /* Ensure 'val' is of the correct type. */
640 val = fold_convert_loc (clause_loc, unsigned_type_node, val);
642 /* If we found the clause 'if (cond)', build either
643 (cond != 0) or (cond ? val : 1u). */
644 if (cond)
646 cond = gimple_boolify (cond);
648 if (integer_zerop (val))
649 val = fold_build2_loc (clause_loc,
650 EQ_EXPR, unsigned_type_node, cond,
651 build_int_cst (TREE_TYPE (cond), 0));
652 else
654 basic_block cond_bb, then_bb, else_bb;
655 edge e, e_then, e_else;
656 tree tmp_then, tmp_else, tmp_join, tmp_var;
658 tmp_var = create_tmp_var (TREE_TYPE (val));
659 if (gimple_in_ssa_p (cfun))
661 tmp_then = make_ssa_name (tmp_var);
662 tmp_else = make_ssa_name (tmp_var);
663 tmp_join = make_ssa_name (tmp_var);
665 else
667 tmp_then = tmp_var;
668 tmp_else = tmp_var;
669 tmp_join = tmp_var;
672 e = split_block_after_labels (bb);
673 cond_bb = e->src;
674 bb = e->dest;
675 remove_edge (e);
677 then_bb = create_empty_bb (cond_bb);
678 else_bb = create_empty_bb (then_bb);
679 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
680 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
682 stmt = gimple_build_cond_empty (cond);
683 gsi = gsi_start_bb (cond_bb);
684 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
686 gsi = gsi_start_bb (then_bb);
687 expand_omp_build_assign (&gsi, tmp_then, val, true);
689 gsi = gsi_start_bb (else_bb);
690 expand_omp_build_assign (&gsi, tmp_else,
691 build_int_cst (unsigned_type_node, 1),
692 true);
694 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
695 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
696 add_bb_to_loop (then_bb, cond_bb->loop_father);
697 add_bb_to_loop (else_bb, cond_bb->loop_father);
698 e_then = make_edge (then_bb, bb, EDGE_FALLTHRU);
699 e_else = make_edge (else_bb, bb, EDGE_FALLTHRU);
701 if (gimple_in_ssa_p (cfun))
703 gphi *phi = create_phi_node (tmp_join, bb);
704 add_phi_arg (phi, tmp_then, e_then, UNKNOWN_LOCATION);
705 add_phi_arg (phi, tmp_else, e_else, UNKNOWN_LOCATION);
708 val = tmp_join;
711 gsi = gsi_start_bb (bb);
712 val = force_gimple_operand_gsi (&gsi, val, true, NULL_TREE,
713 false, GSI_CONTINUE_LINKING);
716 gsi = gsi_last_nondebug_bb (bb);
717 t = gimple_omp_parallel_data_arg (entry_stmt);
718 if (t == NULL)
719 t1 = null_pointer_node;
720 else
721 t1 = build_fold_addr_expr (t);
722 tree child_fndecl = gimple_omp_parallel_child_fn (entry_stmt);
723 t2 = build_fold_addr_expr (child_fndecl);
725 vec_alloc (args, 4 + vec_safe_length (ws_args));
726 args->quick_push (t2);
727 args->quick_push (t1);
728 args->quick_push (val);
729 if (ws_args)
730 args->splice (*ws_args);
731 args->quick_push (flags);
733 t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
734 builtin_decl_explicit (start_ix), args);
736 if (rtmp)
738 tree type = TREE_TYPE (OMP_CLAUSE_DECL (rtmp));
739 t = build2 (MODIFY_EXPR, type, OMP_CLAUSE_DECL (rtmp),
740 fold_convert (type,
741 fold_convert (pointer_sized_int_node, t)));
743 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
744 false, GSI_CONTINUE_LINKING);
747 /* Build the function call to GOMP_task to actually
748 generate the task operation. BB is the block where to insert the code. */
750 static void
751 expand_task_call (struct omp_region *region, basic_block bb,
752 gomp_task *entry_stmt)
754 tree t1, t2, t3;
755 gimple_stmt_iterator gsi;
756 location_t loc = gimple_location (entry_stmt);
758 tree clauses = gimple_omp_task_clauses (entry_stmt);
760 tree ifc = omp_find_clause (clauses, OMP_CLAUSE_IF);
761 tree untied = omp_find_clause (clauses, OMP_CLAUSE_UNTIED);
762 tree mergeable = omp_find_clause (clauses, OMP_CLAUSE_MERGEABLE);
763 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
764 tree finalc = omp_find_clause (clauses, OMP_CLAUSE_FINAL);
765 tree priority = omp_find_clause (clauses, OMP_CLAUSE_PRIORITY);
766 tree detach = omp_find_clause (clauses, OMP_CLAUSE_DETACH);
768 unsigned int iflags
769 = (untied ? GOMP_TASK_FLAG_UNTIED : 0)
770 | (mergeable ? GOMP_TASK_FLAG_MERGEABLE : 0)
771 | (depend ? GOMP_TASK_FLAG_DEPEND : 0);
773 bool taskloop_p = gimple_omp_task_taskloop_p (entry_stmt);
774 tree startvar = NULL_TREE, endvar = NULL_TREE, step = NULL_TREE;
775 tree num_tasks = NULL_TREE;
776 bool ull = false;
777 if (taskloop_p)
779 gimple *g = last_stmt (region->outer->entry);
780 gcc_assert (gimple_code (g) == GIMPLE_OMP_FOR
781 && gimple_omp_for_kind (g) == GF_OMP_FOR_KIND_TASKLOOP);
782 struct omp_for_data fd;
783 omp_extract_for_data (as_a <gomp_for *> (g), &fd, NULL);
784 startvar = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
785 endvar = omp_find_clause (OMP_CLAUSE_CHAIN (startvar),
786 OMP_CLAUSE__LOOPTEMP_);
787 startvar = OMP_CLAUSE_DECL (startvar);
788 endvar = OMP_CLAUSE_DECL (endvar);
789 step = fold_convert_loc (loc, fd.iter_type, fd.loop.step);
790 if (fd.loop.cond_code == LT_EXPR)
791 iflags |= GOMP_TASK_FLAG_UP;
792 tree tclauses = gimple_omp_for_clauses (g);
793 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_NUM_TASKS);
794 if (num_tasks)
796 if (OMP_CLAUSE_NUM_TASKS_STRICT (num_tasks))
797 iflags |= GOMP_TASK_FLAG_STRICT;
798 num_tasks = OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks);
800 else
802 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_GRAINSIZE);
803 if (num_tasks)
805 iflags |= GOMP_TASK_FLAG_GRAINSIZE;
806 if (OMP_CLAUSE_GRAINSIZE_STRICT (num_tasks))
807 iflags |= GOMP_TASK_FLAG_STRICT;
808 num_tasks = OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks);
810 else
811 num_tasks = integer_zero_node;
813 num_tasks = fold_convert_loc (loc, long_integer_type_node, num_tasks);
814 if (ifc == NULL_TREE)
815 iflags |= GOMP_TASK_FLAG_IF;
816 if (omp_find_clause (tclauses, OMP_CLAUSE_NOGROUP))
817 iflags |= GOMP_TASK_FLAG_NOGROUP;
818 ull = fd.iter_type == long_long_unsigned_type_node;
819 if (omp_find_clause (clauses, OMP_CLAUSE_REDUCTION))
820 iflags |= GOMP_TASK_FLAG_REDUCTION;
822 else
824 if (priority)
825 iflags |= GOMP_TASK_FLAG_PRIORITY;
826 if (detach)
827 iflags |= GOMP_TASK_FLAG_DETACH;
830 tree flags = build_int_cst (unsigned_type_node, iflags);
832 tree cond = boolean_true_node;
833 if (ifc)
835 if (taskloop_p)
837 tree t = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
838 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
839 build_int_cst (unsigned_type_node,
840 GOMP_TASK_FLAG_IF),
841 build_int_cst (unsigned_type_node, 0));
842 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node,
843 flags, t);
845 else
846 cond = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
849 if (finalc)
851 tree t = gimple_boolify (OMP_CLAUSE_FINAL_EXPR (finalc));
852 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
853 build_int_cst (unsigned_type_node,
854 GOMP_TASK_FLAG_FINAL),
855 build_int_cst (unsigned_type_node, 0));
856 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, flags, t);
858 if (depend)
859 depend = OMP_CLAUSE_DECL (depend);
860 else
861 depend = build_int_cst (ptr_type_node, 0);
862 if (priority)
863 priority = fold_convert (integer_type_node,
864 OMP_CLAUSE_PRIORITY_EXPR (priority));
865 else
866 priority = integer_zero_node;
868 gsi = gsi_last_nondebug_bb (bb);
870 detach = (detach
871 ? build_fold_addr_expr (OMP_CLAUSE_DECL (detach))
872 : null_pointer_node);
874 tree t = gimple_omp_task_data_arg (entry_stmt);
875 if (t == NULL)
876 t2 = null_pointer_node;
877 else
878 t2 = build_fold_addr_expr_loc (loc, t);
879 t1 = build_fold_addr_expr_loc (loc, gimple_omp_task_child_fn (entry_stmt));
880 t = gimple_omp_task_copy_fn (entry_stmt);
881 if (t == NULL)
882 t3 = null_pointer_node;
883 else
884 t3 = build_fold_addr_expr_loc (loc, t);
886 if (taskloop_p)
887 t = build_call_expr (ull
888 ? builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP_ULL)
889 : builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP),
890 11, t1, t2, t3,
891 gimple_omp_task_arg_size (entry_stmt),
892 gimple_omp_task_arg_align (entry_stmt), flags,
893 num_tasks, priority, startvar, endvar, step);
894 else
895 t = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASK),
896 10, t1, t2, t3,
897 gimple_omp_task_arg_size (entry_stmt),
898 gimple_omp_task_arg_align (entry_stmt), cond, flags,
899 depend, priority, detach);
901 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
902 false, GSI_CONTINUE_LINKING);
905 /* Build the function call to GOMP_taskwait_depend to actually
906 generate the taskwait operation. BB is the block where to insert the
907 code. */
909 static void
910 expand_taskwait_call (basic_block bb, gomp_task *entry_stmt)
912 tree clauses = gimple_omp_task_clauses (entry_stmt);
913 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
914 if (depend == NULL_TREE)
915 return;
917 depend = OMP_CLAUSE_DECL (depend);
919 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
920 tree t
921 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASKWAIT_DEPEND),
922 1, depend);
924 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
925 false, GSI_CONTINUE_LINKING);
928 /* Build the function call to GOMP_teams_reg to actually
929 generate the host teams operation. REGION is the teams region
930 being expanded. BB is the block where to insert the code. */
932 static void
933 expand_teams_call (basic_block bb, gomp_teams *entry_stmt)
935 tree clauses = gimple_omp_teams_clauses (entry_stmt);
936 tree num_teams = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
937 if (num_teams == NULL_TREE)
938 num_teams = build_int_cst (unsigned_type_node, 0);
939 else
941 num_teams = OMP_CLAUSE_NUM_TEAMS_EXPR (num_teams);
942 num_teams = fold_convert (unsigned_type_node, num_teams);
944 tree thread_limit = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
945 if (thread_limit == NULL_TREE)
946 thread_limit = build_int_cst (unsigned_type_node, 0);
947 else
949 thread_limit = OMP_CLAUSE_THREAD_LIMIT_EXPR (thread_limit);
950 thread_limit = fold_convert (unsigned_type_node, thread_limit);
953 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
954 tree t = gimple_omp_teams_data_arg (entry_stmt), t1;
955 if (t == NULL)
956 t1 = null_pointer_node;
957 else
958 t1 = build_fold_addr_expr (t);
959 tree child_fndecl = gimple_omp_teams_child_fn (entry_stmt);
960 tree t2 = build_fold_addr_expr (child_fndecl);
962 vec<tree, va_gc> *args;
963 vec_alloc (args, 5);
964 args->quick_push (t2);
965 args->quick_push (t1);
966 args->quick_push (num_teams);
967 args->quick_push (thread_limit);
968 /* For future extensibility. */
969 args->quick_push (build_zero_cst (unsigned_type_node));
971 t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
972 builtin_decl_explicit (BUILT_IN_GOMP_TEAMS_REG),
973 args);
975 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
976 false, GSI_CONTINUE_LINKING);
979 /* Chain all the DECLs in LIST by their TREE_CHAIN fields. */
981 static tree
982 vec2chain (vec<tree, va_gc> *v)
984 tree chain = NULL_TREE, t;
985 unsigned ix;
987 FOR_EACH_VEC_SAFE_ELT_REVERSE (v, ix, t)
989 DECL_CHAIN (t) = chain;
990 chain = t;
993 return chain;
996 /* Remove barriers in REGION->EXIT's block. Note that this is only
997 valid for GIMPLE_OMP_PARALLEL regions. Since the end of a parallel region
998 is an implicit barrier, any workshare inside the GIMPLE_OMP_PARALLEL that
999 left a barrier at the end of the GIMPLE_OMP_PARALLEL region can now be
1000 removed. */
1002 static void
1003 remove_exit_barrier (struct omp_region *region)
1005 gimple_stmt_iterator gsi;
1006 basic_block exit_bb;
1007 edge_iterator ei;
1008 edge e;
1009 gimple *stmt;
1010 int any_addressable_vars = -1;
1012 exit_bb = region->exit;
1014 /* If the parallel region doesn't return, we don't have REGION->EXIT
1015 block at all. */
1016 if (! exit_bb)
1017 return;
1019 /* The last insn in the block will be the parallel's GIMPLE_OMP_RETURN. The
1020 workshare's GIMPLE_OMP_RETURN will be in a preceding block. The kinds of
1021 statements that can appear in between are extremely limited -- no
1022 memory operations at all. Here, we allow nothing at all, so the
1023 only thing we allow to precede this GIMPLE_OMP_RETURN is a label. */
1024 gsi = gsi_last_nondebug_bb (exit_bb);
1025 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1026 gsi_prev_nondebug (&gsi);
1027 if (!gsi_end_p (gsi) && gimple_code (gsi_stmt (gsi)) != GIMPLE_LABEL)
1028 return;
1030 FOR_EACH_EDGE (e, ei, exit_bb->preds)
1032 gsi = gsi_last_nondebug_bb (e->src);
1033 if (gsi_end_p (gsi))
1034 continue;
1035 stmt = gsi_stmt (gsi);
1036 if (gimple_code (stmt) == GIMPLE_OMP_RETURN
1037 && !gimple_omp_return_nowait_p (stmt))
1039 /* OpenMP 3.0 tasks unfortunately prevent this optimization
1040 in many cases. If there could be tasks queued, the barrier
1041 might be needed to let the tasks run before some local
1042 variable of the parallel that the task uses as shared
1043 runs out of scope. The task can be spawned either
1044 from within current function (this would be easy to check)
1045 or from some function it calls and gets passed an address
1046 of such a variable. */
1047 if (any_addressable_vars < 0)
1049 gomp_parallel *parallel_stmt
1050 = as_a <gomp_parallel *> (last_stmt (region->entry));
1051 tree child_fun = gimple_omp_parallel_child_fn (parallel_stmt);
1052 tree local_decls, block, decl;
1053 unsigned ix;
1055 any_addressable_vars = 0;
1056 FOR_EACH_LOCAL_DECL (DECL_STRUCT_FUNCTION (child_fun), ix, decl)
1057 if (TREE_ADDRESSABLE (decl))
1059 any_addressable_vars = 1;
1060 break;
1062 for (block = gimple_block (stmt);
1063 !any_addressable_vars
1064 && block
1065 && TREE_CODE (block) == BLOCK;
1066 block = BLOCK_SUPERCONTEXT (block))
1068 for (local_decls = BLOCK_VARS (block);
1069 local_decls;
1070 local_decls = DECL_CHAIN (local_decls))
1071 if (TREE_ADDRESSABLE (local_decls))
1073 any_addressable_vars = 1;
1074 break;
1076 if (block == gimple_block (parallel_stmt))
1077 break;
1080 if (!any_addressable_vars)
1081 gimple_omp_return_set_nowait (stmt);
1086 static void
1087 remove_exit_barriers (struct omp_region *region)
1089 if (region->type == GIMPLE_OMP_PARALLEL)
1090 remove_exit_barrier (region);
1092 if (region->inner)
1094 region = region->inner;
1095 remove_exit_barriers (region);
1096 while (region->next)
1098 region = region->next;
1099 remove_exit_barriers (region);
1104 /* Optimize omp_get_thread_num () and omp_get_num_threads ()
1105 calls. These can't be declared as const functions, but
1106 within one parallel body they are constant, so they can be
1107 transformed there into __builtin_omp_get_{thread_num,num_threads} ()
1108 which are declared const. Similarly for task body, except
1109 that in untied task omp_get_thread_num () can change at any task
1110 scheduling point. */
1112 static void
1113 optimize_omp_library_calls (gimple *entry_stmt)
1115 basic_block bb;
1116 gimple_stmt_iterator gsi;
1117 tree thr_num_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1118 tree thr_num_id = DECL_ASSEMBLER_NAME (thr_num_tree);
1119 tree num_thr_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1120 tree num_thr_id = DECL_ASSEMBLER_NAME (num_thr_tree);
1121 bool untied_task = (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1122 && omp_find_clause (gimple_omp_task_clauses (entry_stmt),
1123 OMP_CLAUSE_UNTIED) != NULL);
1125 FOR_EACH_BB_FN (bb, cfun)
1126 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1128 gimple *call = gsi_stmt (gsi);
1129 tree decl;
1131 if (is_gimple_call (call)
1132 && (decl = gimple_call_fndecl (call))
1133 && DECL_EXTERNAL (decl)
1134 && TREE_PUBLIC (decl)
1135 && DECL_INITIAL (decl) == NULL)
1137 tree built_in;
1139 if (DECL_NAME (decl) == thr_num_id)
1141 /* In #pragma omp task untied omp_get_thread_num () can change
1142 during the execution of the task region. */
1143 if (untied_task)
1144 continue;
1145 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1147 else if (DECL_NAME (decl) == num_thr_id)
1148 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1149 else
1150 continue;
1152 if (DECL_ASSEMBLER_NAME (decl) != DECL_ASSEMBLER_NAME (built_in)
1153 || gimple_call_num_args (call) != 0)
1154 continue;
1156 if (flag_exceptions && !TREE_NOTHROW (decl))
1157 continue;
1159 if (TREE_CODE (TREE_TYPE (decl)) != FUNCTION_TYPE
1160 || !types_compatible_p (TREE_TYPE (TREE_TYPE (decl)),
1161 TREE_TYPE (TREE_TYPE (built_in))))
1162 continue;
1164 gimple_call_set_fndecl (call, built_in);
1169 /* Callback for expand_omp_build_assign. Return non-NULL if *tp needs to be
1170 regimplified. */
1172 static tree
1173 expand_omp_regimplify_p (tree *tp, int *walk_subtrees, void *)
1175 tree t = *tp;
1177 /* Any variable with DECL_VALUE_EXPR needs to be regimplified. */
1178 if (VAR_P (t) && DECL_HAS_VALUE_EXPR_P (t))
1179 return t;
1181 if (TREE_CODE (t) == ADDR_EXPR)
1182 recompute_tree_invariant_for_addr_expr (t);
1184 *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
1185 return NULL_TREE;
1188 /* Prepend or append TO = FROM assignment before or after *GSI_P. */
1190 static void
1191 expand_omp_build_assign (gimple_stmt_iterator *gsi_p, tree to, tree from,
1192 bool after)
1194 bool simple_p = DECL_P (to) && TREE_ADDRESSABLE (to);
1195 from = force_gimple_operand_gsi (gsi_p, from, simple_p, NULL_TREE,
1196 !after, after ? GSI_CONTINUE_LINKING
1197 : GSI_SAME_STMT);
1198 gimple *stmt = gimple_build_assign (to, from);
1199 if (after)
1200 gsi_insert_after (gsi_p, stmt, GSI_CONTINUE_LINKING);
1201 else
1202 gsi_insert_before (gsi_p, stmt, GSI_SAME_STMT);
1203 if (walk_tree (&from, expand_omp_regimplify_p, NULL, NULL)
1204 || walk_tree (&to, expand_omp_regimplify_p, NULL, NULL))
1206 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1207 gimple_regimplify_operands (stmt, &gsi);
1211 /* Expand the OpenMP parallel or task directive starting at REGION. */
1213 static void
1214 expand_omp_taskreg (struct omp_region *region)
1216 basic_block entry_bb, exit_bb, new_bb;
1217 struct function *child_cfun;
1218 tree child_fn, block, t;
1219 gimple_stmt_iterator gsi;
1220 gimple *entry_stmt, *stmt;
1221 edge e;
1222 vec<tree, va_gc> *ws_args;
1224 entry_stmt = last_stmt (region->entry);
1225 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1226 && gimple_omp_task_taskwait_p (entry_stmt))
1228 new_bb = region->entry;
1229 gsi = gsi_last_nondebug_bb (region->entry);
1230 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK);
1231 gsi_remove (&gsi, true);
1232 expand_taskwait_call (new_bb, as_a <gomp_task *> (entry_stmt));
1233 return;
1236 child_fn = gimple_omp_taskreg_child_fn (entry_stmt);
1237 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
1239 entry_bb = region->entry;
1240 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK)
1241 exit_bb = region->cont;
1242 else
1243 exit_bb = region->exit;
1245 if (is_combined_parallel (region))
1246 ws_args = region->ws_args;
1247 else
1248 ws_args = NULL;
1250 if (child_cfun->cfg)
1252 /* Due to inlining, it may happen that we have already outlined
1253 the region, in which case all we need to do is make the
1254 sub-graph unreachable and emit the parallel call. */
1255 edge entry_succ_e, exit_succ_e;
1257 entry_succ_e = single_succ_edge (entry_bb);
1259 gsi = gsi_last_nondebug_bb (entry_bb);
1260 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_PARALLEL
1261 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK
1262 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TEAMS);
1263 gsi_remove (&gsi, true);
1265 new_bb = entry_bb;
1266 if (exit_bb)
1268 exit_succ_e = single_succ_edge (exit_bb);
1269 make_edge (new_bb, exit_succ_e->dest, EDGE_FALLTHRU);
1271 remove_edge_and_dominated_blocks (entry_succ_e);
1273 else
1275 unsigned srcidx, dstidx, num;
1277 /* If the parallel region needs data sent from the parent
1278 function, then the very first statement (except possible
1279 tree profile counter updates) of the parallel body
1280 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
1281 &.OMP_DATA_O is passed as an argument to the child function,
1282 we need to replace it with the argument as seen by the child
1283 function.
1285 In most cases, this will end up being the identity assignment
1286 .OMP_DATA_I = .OMP_DATA_I. However, if the parallel body had
1287 a function call that has been inlined, the original PARM_DECL
1288 .OMP_DATA_I may have been converted into a different local
1289 variable. In which case, we need to keep the assignment. */
1290 if (gimple_omp_taskreg_data_arg (entry_stmt))
1292 basic_block entry_succ_bb
1293 = single_succ_p (entry_bb) ? single_succ (entry_bb)
1294 : FALLTHRU_EDGE (entry_bb)->dest;
1295 tree arg;
1296 gimple *parcopy_stmt = NULL;
1298 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
1300 gimple *stmt;
1302 gcc_assert (!gsi_end_p (gsi));
1303 stmt = gsi_stmt (gsi);
1304 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1305 continue;
1307 if (gimple_num_ops (stmt) == 2)
1309 tree arg = gimple_assign_rhs1 (stmt);
1311 /* We're ignore the subcode because we're
1312 effectively doing a STRIP_NOPS. */
1314 if (TREE_CODE (arg) == ADDR_EXPR
1315 && (TREE_OPERAND (arg, 0)
1316 == gimple_omp_taskreg_data_arg (entry_stmt)))
1318 parcopy_stmt = stmt;
1319 break;
1324 gcc_assert (parcopy_stmt != NULL);
1325 arg = DECL_ARGUMENTS (child_fn);
1327 if (!gimple_in_ssa_p (cfun))
1329 if (gimple_assign_lhs (parcopy_stmt) == arg)
1330 gsi_remove (&gsi, true);
1331 else
1333 /* ?? Is setting the subcode really necessary ?? */
1334 gimple_omp_set_subcode (parcopy_stmt, TREE_CODE (arg));
1335 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1338 else
1340 tree lhs = gimple_assign_lhs (parcopy_stmt);
1341 gcc_assert (SSA_NAME_VAR (lhs) == arg);
1342 /* We'd like to set the rhs to the default def in the child_fn,
1343 but it's too early to create ssa names in the child_fn.
1344 Instead, we set the rhs to the parm. In
1345 move_sese_region_to_fn, we introduce a default def for the
1346 parm, map the parm to it's default def, and once we encounter
1347 this stmt, replace the parm with the default def. */
1348 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1349 update_stmt (parcopy_stmt);
1353 /* Declare local variables needed in CHILD_CFUN. */
1354 block = DECL_INITIAL (child_fn);
1355 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
1356 /* The gimplifier could record temporaries in parallel/task block
1357 rather than in containing function's local_decls chain,
1358 which would mean cgraph missed finalizing them. Do it now. */
1359 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
1360 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
1361 varpool_node::finalize_decl (t);
1362 DECL_SAVED_TREE (child_fn) = NULL;
1363 /* We'll create a CFG for child_fn, so no gimple body is needed. */
1364 gimple_set_body (child_fn, NULL);
1365 TREE_USED (block) = 1;
1367 /* Reset DECL_CONTEXT on function arguments. */
1368 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
1369 DECL_CONTEXT (t) = child_fn;
1371 /* Split ENTRY_BB at GIMPLE_OMP_PARALLEL or GIMPLE_OMP_TASK,
1372 so that it can be moved to the child function. */
1373 gsi = gsi_last_nondebug_bb (entry_bb);
1374 stmt = gsi_stmt (gsi);
1375 gcc_assert (stmt && (gimple_code (stmt) == GIMPLE_OMP_PARALLEL
1376 || gimple_code (stmt) == GIMPLE_OMP_TASK
1377 || gimple_code (stmt) == GIMPLE_OMP_TEAMS));
1378 e = split_block (entry_bb, stmt);
1379 gsi_remove (&gsi, true);
1380 entry_bb = e->dest;
1381 edge e2 = NULL;
1382 if (gimple_code (entry_stmt) != GIMPLE_OMP_TASK)
1383 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
1384 else
1386 e2 = make_edge (e->src, BRANCH_EDGE (entry_bb)->dest, EDGE_ABNORMAL);
1387 gcc_assert (e2->dest == region->exit);
1388 remove_edge (BRANCH_EDGE (entry_bb));
1389 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e->src);
1390 gsi = gsi_last_nondebug_bb (region->exit);
1391 gcc_assert (!gsi_end_p (gsi)
1392 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1393 gsi_remove (&gsi, true);
1396 /* Convert GIMPLE_OMP_{RETURN,CONTINUE} into a RETURN_EXPR. */
1397 if (exit_bb)
1399 gsi = gsi_last_nondebug_bb (exit_bb);
1400 gcc_assert (!gsi_end_p (gsi)
1401 && (gimple_code (gsi_stmt (gsi))
1402 == (e2 ? GIMPLE_OMP_CONTINUE : GIMPLE_OMP_RETURN)));
1403 stmt = gimple_build_return (NULL);
1404 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
1405 gsi_remove (&gsi, true);
1408 /* Move the parallel region into CHILD_CFUN. */
1410 if (gimple_in_ssa_p (cfun))
1412 init_tree_ssa (child_cfun);
1413 init_ssa_operands (child_cfun);
1414 child_cfun->gimple_df->in_ssa_p = true;
1415 block = NULL_TREE;
1417 else
1418 block = gimple_block (entry_stmt);
1420 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
1421 if (exit_bb)
1422 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
1423 if (e2)
1425 basic_block dest_bb = e2->dest;
1426 if (!exit_bb)
1427 make_edge (new_bb, dest_bb, EDGE_FALLTHRU);
1428 remove_edge (e2);
1429 set_immediate_dominator (CDI_DOMINATORS, dest_bb, new_bb);
1431 /* When the OMP expansion process cannot guarantee an up-to-date
1432 loop tree arrange for the child function to fixup loops. */
1433 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1434 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
1436 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
1437 num = vec_safe_length (child_cfun->local_decls);
1438 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
1440 t = (*child_cfun->local_decls)[srcidx];
1441 if (DECL_CONTEXT (t) == cfun->decl)
1442 continue;
1443 if (srcidx != dstidx)
1444 (*child_cfun->local_decls)[dstidx] = t;
1445 dstidx++;
1447 if (dstidx != num)
1448 vec_safe_truncate (child_cfun->local_decls, dstidx);
1450 /* Inform the callgraph about the new function. */
1451 child_cfun->curr_properties = cfun->curr_properties;
1452 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
1453 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
1454 cgraph_node *node = cgraph_node::get_create (child_fn);
1455 node->parallelized_function = 1;
1456 cgraph_node::add_new_function (child_fn, true);
1458 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
1459 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
1461 /* Fix the callgraph edges for child_cfun. Those for cfun will be
1462 fixed in a following pass. */
1463 push_cfun (child_cfun);
1464 if (need_asm)
1465 assign_assembler_name_if_needed (child_fn);
1467 if (optimize)
1468 optimize_omp_library_calls (entry_stmt);
1469 update_max_bb_count ();
1470 cgraph_edge::rebuild_edges ();
1472 /* Some EH regions might become dead, see PR34608. If
1473 pass_cleanup_cfg isn't the first pass to happen with the
1474 new child, these dead EH edges might cause problems.
1475 Clean them up now. */
1476 if (flag_exceptions)
1478 basic_block bb;
1479 bool changed = false;
1481 FOR_EACH_BB_FN (bb, cfun)
1482 changed |= gimple_purge_dead_eh_edges (bb);
1483 if (changed)
1484 cleanup_tree_cfg ();
1486 if (gimple_in_ssa_p (cfun))
1487 update_ssa (TODO_update_ssa);
1488 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1489 verify_loop_structure ();
1490 pop_cfun ();
1492 if (dump_file && !gimple_in_ssa_p (cfun))
1494 omp_any_child_fn_dumped = true;
1495 dump_function_header (dump_file, child_fn, dump_flags);
1496 dump_function_to_file (child_fn, dump_file, dump_flags);
1500 adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn);
1502 if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
1503 expand_parallel_call (region, new_bb,
1504 as_a <gomp_parallel *> (entry_stmt), ws_args);
1505 else if (gimple_code (entry_stmt) == GIMPLE_OMP_TEAMS)
1506 expand_teams_call (new_bb, as_a <gomp_teams *> (entry_stmt));
1507 else
1508 expand_task_call (region, new_bb, as_a <gomp_task *> (entry_stmt));
1509 if (gimple_in_ssa_p (cfun))
1510 update_ssa (TODO_update_ssa_only_virtuals);
1513 /* Information about members of an OpenACC collapsed loop nest. */
1515 struct oacc_collapse
1517 tree base; /* Base value. */
1518 tree iters; /* Number of steps. */
1519 tree step; /* Step size. */
1520 tree tile; /* Tile increment (if tiled). */
1521 tree outer; /* Tile iterator var. */
1524 /* Helper for expand_oacc_for. Determine collapsed loop information.
1525 Fill in COUNTS array. Emit any initialization code before GSI.
1526 Return the calculated outer loop bound of BOUND_TYPE. */
1528 static tree
1529 expand_oacc_collapse_init (const struct omp_for_data *fd,
1530 gimple_stmt_iterator *gsi,
1531 oacc_collapse *counts, tree diff_type,
1532 tree bound_type, location_t loc)
1534 tree tiling = fd->tiling;
1535 tree total = build_int_cst (bound_type, 1);
1536 int ix;
1538 gcc_assert (integer_onep (fd->loop.step));
1539 gcc_assert (integer_zerop (fd->loop.n1));
1541 /* When tiling, the first operand of the tile clause applies to the
1542 innermost loop, and we work outwards from there. Seems
1543 backwards, but whatever. */
1544 for (ix = fd->collapse; ix--;)
1546 const omp_for_data_loop *loop = &fd->loops[ix];
1548 tree iter_type = TREE_TYPE (loop->v);
1549 tree plus_type = iter_type;
1551 gcc_assert (loop->cond_code == LT_EXPR || loop->cond_code == GT_EXPR);
1553 if (POINTER_TYPE_P (iter_type))
1554 plus_type = sizetype;
1556 if (tiling)
1558 tree num = build_int_cst (integer_type_node, fd->collapse);
1559 tree loop_no = build_int_cst (integer_type_node, ix);
1560 tree tile = TREE_VALUE (tiling);
1561 gcall *call
1562 = gimple_build_call_internal (IFN_GOACC_TILE, 5, num, loop_no, tile,
1563 /* gwv-outer=*/integer_zero_node,
1564 /* gwv-inner=*/integer_zero_node);
1566 counts[ix].outer = create_tmp_var (iter_type, ".outer");
1567 counts[ix].tile = create_tmp_var (diff_type, ".tile");
1568 gimple_call_set_lhs (call, counts[ix].tile);
1569 gimple_set_location (call, loc);
1570 gsi_insert_before (gsi, call, GSI_SAME_STMT);
1572 tiling = TREE_CHAIN (tiling);
1574 else
1576 counts[ix].tile = NULL;
1577 counts[ix].outer = loop->v;
1580 tree b = loop->n1;
1581 tree e = loop->n2;
1582 tree s = loop->step;
1583 bool up = loop->cond_code == LT_EXPR;
1584 tree dir = build_int_cst (diff_type, up ? +1 : -1);
1585 bool negating;
1586 tree expr;
1588 b = force_gimple_operand_gsi (gsi, b, true, NULL_TREE,
1589 true, GSI_SAME_STMT);
1590 e = force_gimple_operand_gsi (gsi, e, true, NULL_TREE,
1591 true, GSI_SAME_STMT);
1593 /* Convert the step, avoiding possible unsigned->signed overflow. */
1594 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
1595 if (negating)
1596 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
1597 s = fold_convert (diff_type, s);
1598 if (negating)
1599 s = fold_build1 (NEGATE_EXPR, diff_type, s);
1600 s = force_gimple_operand_gsi (gsi, s, true, NULL_TREE,
1601 true, GSI_SAME_STMT);
1603 /* Determine the range, avoiding possible unsigned->signed overflow. */
1604 negating = !up && TYPE_UNSIGNED (iter_type);
1605 expr = fold_build2 (MINUS_EXPR, plus_type,
1606 fold_convert (plus_type, negating ? b : e),
1607 fold_convert (plus_type, negating ? e : b));
1608 expr = fold_convert (diff_type, expr);
1609 if (negating)
1610 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
1611 tree range = force_gimple_operand_gsi
1612 (gsi, expr, true, NULL_TREE, true, GSI_SAME_STMT);
1614 /* Determine number of iterations. */
1615 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
1616 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
1617 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
1619 tree iters = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE,
1620 true, GSI_SAME_STMT);
1622 counts[ix].base = b;
1623 counts[ix].iters = iters;
1624 counts[ix].step = s;
1626 total = fold_build2 (MULT_EXPR, bound_type, total,
1627 fold_convert (bound_type, iters));
1630 return total;
1633 /* Emit initializers for collapsed loop members. INNER is true if
1634 this is for the element loop of a TILE. IVAR is the outer
1635 loop iteration variable, from which collapsed loop iteration values
1636 are calculated. COUNTS array has been initialized by
1637 expand_oacc_collapse_inits. */
1639 static void
1640 expand_oacc_collapse_vars (const struct omp_for_data *fd, bool inner,
1641 gimple_stmt_iterator *gsi,
1642 const oacc_collapse *counts, tree ivar,
1643 tree diff_type)
1645 tree ivar_type = TREE_TYPE (ivar);
1647 /* The most rapidly changing iteration variable is the innermost
1648 one. */
1649 for (int ix = fd->collapse; ix--;)
1651 const omp_for_data_loop *loop = &fd->loops[ix];
1652 const oacc_collapse *collapse = &counts[ix];
1653 tree v = inner ? loop->v : collapse->outer;
1654 tree iter_type = TREE_TYPE (v);
1655 tree plus_type = iter_type;
1656 enum tree_code plus_code = PLUS_EXPR;
1657 tree expr;
1659 if (POINTER_TYPE_P (iter_type))
1661 plus_code = POINTER_PLUS_EXPR;
1662 plus_type = sizetype;
1665 expr = ivar;
1666 if (ix)
1668 tree mod = fold_convert (ivar_type, collapse->iters);
1669 ivar = fold_build2 (TRUNC_DIV_EXPR, ivar_type, expr, mod);
1670 expr = fold_build2 (TRUNC_MOD_EXPR, ivar_type, expr, mod);
1671 ivar = force_gimple_operand_gsi (gsi, ivar, true, NULL_TREE,
1672 true, GSI_SAME_STMT);
1675 expr = fold_build2 (MULT_EXPR, diff_type, fold_convert (diff_type, expr),
1676 fold_convert (diff_type, collapse->step));
1677 expr = fold_build2 (plus_code, iter_type,
1678 inner ? collapse->outer : collapse->base,
1679 fold_convert (plus_type, expr));
1680 expr = force_gimple_operand_gsi (gsi, expr, false, NULL_TREE,
1681 true, GSI_SAME_STMT);
1682 gassign *ass = gimple_build_assign (v, expr);
1683 gsi_insert_before (gsi, ass, GSI_SAME_STMT);
1687 /* Helper function for expand_omp_{for_*,simd}. If this is the outermost
1688 of the combined collapse > 1 loop constructs, generate code like:
1689 if (__builtin_expect (N32 cond3 N31, 0)) goto ZERO_ITER_BB;
1690 if (cond3 is <)
1691 adj = STEP3 - 1;
1692 else
1693 adj = STEP3 + 1;
1694 count3 = (adj + N32 - N31) / STEP3;
1695 if (__builtin_expect (N22 cond2 N21, 0)) goto ZERO_ITER_BB;
1696 if (cond2 is <)
1697 adj = STEP2 - 1;
1698 else
1699 adj = STEP2 + 1;
1700 count2 = (adj + N22 - N21) / STEP2;
1701 if (__builtin_expect (N12 cond1 N11, 0)) goto ZERO_ITER_BB;
1702 if (cond1 is <)
1703 adj = STEP1 - 1;
1704 else
1705 adj = STEP1 + 1;
1706 count1 = (adj + N12 - N11) / STEP1;
1707 count = count1 * count2 * count3;
1708 Furthermore, if ZERO_ITER_BB is NULL, create a BB which does:
1709 count = 0;
1710 and set ZERO_ITER_BB to that bb. If this isn't the outermost
1711 of the combined loop constructs, just initialize COUNTS array
1712 from the _looptemp_ clauses. For loop nests with non-rectangular
1713 loops, do this only for the rectangular loops. Then pick
1714 the loops which reference outer vars in their bound expressions
1715 and the loops which they refer to and for this sub-nest compute
1716 number of iterations. For triangular loops use Faulhaber's formula,
1717 otherwise as a fallback, compute by iterating the loops.
1718 If e.g. the sub-nest is
1719 for (I = N11; I COND1 N12; I += STEP1)
1720 for (J = M21 * I + N21; J COND2 M22 * I + N22; J += STEP2)
1721 for (K = M31 * J + N31; K COND3 M32 * J + N32; K += STEP3)
1723 COUNT = 0;
1724 for (tmpi = N11; tmpi COND1 N12; tmpi += STEP1)
1725 for (tmpj = M21 * tmpi + N21;
1726 tmpj COND2 M22 * tmpi + N22; tmpj += STEP2)
1728 int tmpk1 = M31 * tmpj + N31;
1729 int tmpk2 = M32 * tmpj + N32;
1730 if (tmpk1 COND3 tmpk2)
1732 if (COND3 is <)
1733 adj = STEP3 - 1;
1734 else
1735 adj = STEP3 + 1;
1736 COUNT += (adj + tmpk2 - tmpk1) / STEP3;
1739 and finally multiply the counts of the rectangular loops not
1740 in the sub-nest with COUNT. Also, as counts[fd->last_nonrect]
1741 store number of iterations of the loops from fd->first_nonrect
1742 to fd->last_nonrect inclusive, i.e. the above COUNT multiplied
1743 by the counts of rectangular loops not referenced in any non-rectangular
1744 loops sandwitched in between those. */
1746 /* NOTE: It *could* be better to moosh all of the BBs together,
1747 creating one larger BB with all the computation and the unexpected
1748 jump at the end. I.e.
1750 bool zero3, zero2, zero1, zero;
1752 zero3 = N32 c3 N31;
1753 count3 = (N32 - N31) /[cl] STEP3;
1754 zero2 = N22 c2 N21;
1755 count2 = (N22 - N21) /[cl] STEP2;
1756 zero1 = N12 c1 N11;
1757 count1 = (N12 - N11) /[cl] STEP1;
1758 zero = zero3 || zero2 || zero1;
1759 count = count1 * count2 * count3;
1760 if (__builtin_expect(zero, false)) goto zero_iter_bb;
1762 After all, we expect the zero=false, and thus we expect to have to
1763 evaluate all of the comparison expressions, so short-circuiting
1764 oughtn't be a win. Since the condition isn't protecting a
1765 denominator, we're not concerned about divide-by-zero, so we can
1766 fully evaluate count even if a numerator turned out to be wrong.
1768 It seems like putting this all together would create much better
1769 scheduling opportunities, and less pressure on the chip's branch
1770 predictor. */
1772 static void
1773 expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1774 basic_block &entry_bb, tree *counts,
1775 basic_block &zero_iter1_bb, int &first_zero_iter1,
1776 basic_block &zero_iter2_bb, int &first_zero_iter2,
1777 basic_block &l2_dom_bb)
1779 tree t, type = TREE_TYPE (fd->loop.v);
1780 edge e, ne;
1781 int i;
1783 /* Collapsed loops need work for expansion into SSA form. */
1784 gcc_assert (!gimple_in_ssa_p (cfun));
1786 if (gimple_omp_for_combined_into_p (fd->for_stmt)
1787 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
1789 gcc_assert (fd->ordered == 0);
1790 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1791 isn't supposed to be handled, as the inner loop doesn't
1792 use it. */
1793 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
1794 OMP_CLAUSE__LOOPTEMP_);
1795 gcc_assert (innerc);
1796 for (i = 0; i < fd->collapse; i++)
1798 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1799 OMP_CLAUSE__LOOPTEMP_);
1800 gcc_assert (innerc);
1801 if (i)
1802 counts[i] = OMP_CLAUSE_DECL (innerc);
1803 else
1804 counts[0] = NULL_TREE;
1806 if (fd->non_rect
1807 && fd->last_nonrect == fd->first_nonrect + 1
1808 && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[fd->last_nonrect].v)))
1810 tree c[4];
1811 for (i = 0; i < 4; i++)
1813 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1814 OMP_CLAUSE__LOOPTEMP_);
1815 gcc_assert (innerc);
1816 c[i] = OMP_CLAUSE_DECL (innerc);
1818 counts[0] = c[0];
1819 fd->first_inner_iterations = c[1];
1820 fd->factor = c[2];
1821 fd->adjn1 = c[3];
1823 return;
1826 for (i = fd->collapse; i < fd->ordered; i++)
1828 tree itype = TREE_TYPE (fd->loops[i].v);
1829 counts[i] = NULL_TREE;
1830 t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1831 fold_convert (itype, fd->loops[i].n1),
1832 fold_convert (itype, fd->loops[i].n2));
1833 if (t && integer_zerop (t))
1835 for (i = fd->collapse; i < fd->ordered; i++)
1836 counts[i] = build_int_cst (type, 0);
1837 break;
1840 bool rect_count_seen = false;
1841 for (i = 0; i < (fd->ordered ? fd->ordered : fd->collapse); i++)
1843 tree itype = TREE_TYPE (fd->loops[i].v);
1845 if (i >= fd->collapse && counts[i])
1846 continue;
1847 if (fd->non_rect)
1849 /* Skip loops that use outer iterators in their expressions
1850 during this phase. */
1851 if (fd->loops[i].m1 || fd->loops[i].m2)
1853 counts[i] = build_zero_cst (type);
1854 continue;
1857 if ((SSA_VAR_P (fd->loop.n2) || i >= fd->collapse)
1858 && ((t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1859 fold_convert (itype, fd->loops[i].n1),
1860 fold_convert (itype, fd->loops[i].n2)))
1861 == NULL_TREE || !integer_onep (t)))
1863 gcond *cond_stmt;
1864 tree n1, n2;
1865 n1 = fold_convert (itype, unshare_expr (fd->loops[i].n1));
1866 n1 = force_gimple_operand_gsi (gsi, n1, true, NULL_TREE,
1867 true, GSI_SAME_STMT);
1868 n2 = fold_convert (itype, unshare_expr (fd->loops[i].n2));
1869 n2 = force_gimple_operand_gsi (gsi, n2, true, NULL_TREE,
1870 true, GSI_SAME_STMT);
1871 cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
1872 NULL_TREE, NULL_TREE);
1873 gsi_insert_before (gsi, cond_stmt, GSI_SAME_STMT);
1874 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
1875 expand_omp_regimplify_p, NULL, NULL)
1876 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
1877 expand_omp_regimplify_p, NULL, NULL))
1879 *gsi = gsi_for_stmt (cond_stmt);
1880 gimple_regimplify_operands (cond_stmt, gsi);
1882 e = split_block (entry_bb, cond_stmt);
1883 basic_block &zero_iter_bb
1884 = i < fd->collapse ? zero_iter1_bb : zero_iter2_bb;
1885 int &first_zero_iter
1886 = i < fd->collapse ? first_zero_iter1 : first_zero_iter2;
1887 if (zero_iter_bb == NULL)
1889 gassign *assign_stmt;
1890 first_zero_iter = i;
1891 zero_iter_bb = create_empty_bb (entry_bb);
1892 add_bb_to_loop (zero_iter_bb, entry_bb->loop_father);
1893 *gsi = gsi_after_labels (zero_iter_bb);
1894 if (i < fd->collapse)
1895 assign_stmt = gimple_build_assign (fd->loop.n2,
1896 build_zero_cst (type));
1897 else
1899 counts[i] = create_tmp_reg (type, ".count");
1900 assign_stmt
1901 = gimple_build_assign (counts[i], build_zero_cst (type));
1903 gsi_insert_before (gsi, assign_stmt, GSI_SAME_STMT);
1904 set_immediate_dominator (CDI_DOMINATORS, zero_iter_bb,
1905 entry_bb);
1907 ne = make_edge (entry_bb, zero_iter_bb, EDGE_FALSE_VALUE);
1908 ne->probability = profile_probability::very_unlikely ();
1909 e->flags = EDGE_TRUE_VALUE;
1910 e->probability = ne->probability.invert ();
1911 if (l2_dom_bb == NULL)
1912 l2_dom_bb = entry_bb;
1913 entry_bb = e->dest;
1914 *gsi = gsi_last_nondebug_bb (entry_bb);
1917 if (POINTER_TYPE_P (itype))
1918 itype = signed_type_for (itype);
1919 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
1920 ? -1 : 1));
1921 t = fold_build2 (PLUS_EXPR, itype,
1922 fold_convert (itype, fd->loops[i].step), t);
1923 t = fold_build2 (PLUS_EXPR, itype, t,
1924 fold_convert (itype, fd->loops[i].n2));
1925 t = fold_build2 (MINUS_EXPR, itype, t,
1926 fold_convert (itype, fd->loops[i].n1));
1927 /* ?? We could probably use CEIL_DIV_EXPR instead of
1928 TRUNC_DIV_EXPR and adjusting by hand. Unless we can't
1929 generate the same code in the end because generically we
1930 don't know that the values involved must be negative for
1931 GT?? */
1932 if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
1933 t = fold_build2 (TRUNC_DIV_EXPR, itype,
1934 fold_build1 (NEGATE_EXPR, itype, t),
1935 fold_build1 (NEGATE_EXPR, itype,
1936 fold_convert (itype,
1937 fd->loops[i].step)));
1938 else
1939 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
1940 fold_convert (itype, fd->loops[i].step));
1941 t = fold_convert (type, t);
1942 if (TREE_CODE (t) == INTEGER_CST)
1943 counts[i] = t;
1944 else
1946 if (i < fd->collapse || i != first_zero_iter2)
1947 counts[i] = create_tmp_reg (type, ".count");
1948 expand_omp_build_assign (gsi, counts[i], t);
1950 if (SSA_VAR_P (fd->loop.n2) && i < fd->collapse)
1952 if (fd->non_rect && i >= fd->first_nonrect && i <= fd->last_nonrect)
1953 continue;
1954 if (!rect_count_seen)
1956 t = counts[i];
1957 rect_count_seen = true;
1959 else
1960 t = fold_build2 (MULT_EXPR, type, fd->loop.n2, counts[i]);
1961 expand_omp_build_assign (gsi, fd->loop.n2, t);
1964 if (fd->non_rect && SSA_VAR_P (fd->loop.n2))
1966 gcc_assert (fd->last_nonrect != -1);
1968 counts[fd->last_nonrect] = create_tmp_reg (type, ".count");
1969 expand_omp_build_assign (gsi, counts[fd->last_nonrect],
1970 build_zero_cst (type));
1971 for (i = fd->first_nonrect + 1; i < fd->last_nonrect; i++)
1972 if (fd->loops[i].m1
1973 || fd->loops[i].m2
1974 || fd->loops[i].non_rect_referenced)
1975 break;
1976 if (i == fd->last_nonrect
1977 && fd->loops[i].outer == fd->last_nonrect - fd->first_nonrect
1978 && !POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))
1979 && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[i].v)))
1981 int o = fd->first_nonrect;
1982 tree itype = TREE_TYPE (fd->loops[o].v);
1983 tree n1o = create_tmp_reg (itype, ".n1o");
1984 t = fold_convert (itype, unshare_expr (fd->loops[o].n1));
1985 expand_omp_build_assign (gsi, n1o, t);
1986 tree n2o = create_tmp_reg (itype, ".n2o");
1987 t = fold_convert (itype, unshare_expr (fd->loops[o].n2));
1988 expand_omp_build_assign (gsi, n2o, t);
1989 if (fd->loops[i].m1 && fd->loops[i].m2)
1990 t = fold_build2 (MINUS_EXPR, itype, unshare_expr (fd->loops[i].m2),
1991 unshare_expr (fd->loops[i].m1));
1992 else if (fd->loops[i].m1)
1993 t = fold_unary (NEGATE_EXPR, itype,
1994 unshare_expr (fd->loops[i].m1));
1995 else
1996 t = unshare_expr (fd->loops[i].m2);
1997 tree m2minusm1
1998 = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
1999 true, GSI_SAME_STMT);
2001 gimple_stmt_iterator gsi2 = *gsi;
2002 gsi_prev (&gsi2);
2003 e = split_block (entry_bb, gsi_stmt (gsi2));
2004 e = split_block (e->dest, (gimple *) NULL);
2005 basic_block bb1 = e->src;
2006 entry_bb = e->dest;
2007 *gsi = gsi_after_labels (entry_bb);
2009 gsi2 = gsi_after_labels (bb1);
2010 tree ostep = fold_convert (itype, fd->loops[o].step);
2011 t = build_int_cst (itype, (fd->loops[o].cond_code
2012 == LT_EXPR ? -1 : 1));
2013 t = fold_build2 (PLUS_EXPR, itype, ostep, t);
2014 t = fold_build2 (PLUS_EXPR, itype, t, n2o);
2015 t = fold_build2 (MINUS_EXPR, itype, t, n1o);
2016 if (TYPE_UNSIGNED (itype)
2017 && fd->loops[o].cond_code == GT_EXPR)
2018 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2019 fold_build1 (NEGATE_EXPR, itype, t),
2020 fold_build1 (NEGATE_EXPR, itype, ostep));
2021 else
2022 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, ostep);
2023 tree outer_niters
2024 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2025 true, GSI_SAME_STMT);
2026 t = fold_build2 (MINUS_EXPR, itype, outer_niters,
2027 build_one_cst (itype));
2028 t = fold_build2 (MULT_EXPR, itype, t, ostep);
2029 t = fold_build2 (PLUS_EXPR, itype, n1o, t);
2030 tree last = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2031 true, GSI_SAME_STMT);
2032 tree n1, n2, n1e, n2e;
2033 t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2034 if (fd->loops[i].m1)
2036 n1 = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2037 n1 = fold_build2 (MULT_EXPR, itype, n1o, n1);
2038 n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2040 else
2041 n1 = t;
2042 n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2043 true, GSI_SAME_STMT);
2044 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2045 if (fd->loops[i].m2)
2047 n2 = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2048 n2 = fold_build2 (MULT_EXPR, itype, n1o, n2);
2049 n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2051 else
2052 n2 = t;
2053 n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2054 true, GSI_SAME_STMT);
2055 t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2056 if (fd->loops[i].m1)
2058 n1e = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2059 n1e = fold_build2 (MULT_EXPR, itype, last, n1e);
2060 n1e = fold_build2 (PLUS_EXPR, itype, n1e, t);
2062 else
2063 n1e = t;
2064 n1e = force_gimple_operand_gsi (&gsi2, n1e, true, NULL_TREE,
2065 true, GSI_SAME_STMT);
2066 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2067 if (fd->loops[i].m2)
2069 n2e = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2070 n2e = fold_build2 (MULT_EXPR, itype, last, n2e);
2071 n2e = fold_build2 (PLUS_EXPR, itype, n2e, t);
2073 else
2074 n2e = t;
2075 n2e = force_gimple_operand_gsi (&gsi2, n2e, true, NULL_TREE,
2076 true, GSI_SAME_STMT);
2077 gcond *cond_stmt
2078 = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
2079 NULL_TREE, NULL_TREE);
2080 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2081 e = split_block (bb1, cond_stmt);
2082 e->flags = EDGE_TRUE_VALUE;
2083 e->probability = profile_probability::likely ().guessed ();
2084 basic_block bb2 = e->dest;
2085 gsi2 = gsi_after_labels (bb2);
2087 cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1e, n2e,
2088 NULL_TREE, NULL_TREE);
2089 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2090 e = split_block (bb2, cond_stmt);
2091 e->flags = EDGE_TRUE_VALUE;
2092 e->probability = profile_probability::likely ().guessed ();
2093 gsi2 = gsi_after_labels (e->dest);
2095 tree step = fold_convert (itype, fd->loops[i].step);
2096 t = build_int_cst (itype, (fd->loops[i].cond_code
2097 == LT_EXPR ? -1 : 1));
2098 t = fold_build2 (PLUS_EXPR, itype, step, t);
2099 t = fold_build2 (PLUS_EXPR, itype, t, n2);
2100 t = fold_build2 (MINUS_EXPR, itype, t, n1);
2101 if (TYPE_UNSIGNED (itype)
2102 && fd->loops[i].cond_code == GT_EXPR)
2103 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2104 fold_build1 (NEGATE_EXPR, itype, t),
2105 fold_build1 (NEGATE_EXPR, itype, step));
2106 else
2107 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2108 tree first_inner_iterations
2109 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2110 true, GSI_SAME_STMT);
2111 t = fold_build2 (MULT_EXPR, itype, m2minusm1, ostep);
2112 if (TYPE_UNSIGNED (itype)
2113 && fd->loops[i].cond_code == GT_EXPR)
2114 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2115 fold_build1 (NEGATE_EXPR, itype, t),
2116 fold_build1 (NEGATE_EXPR, itype, step));
2117 else
2118 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2119 tree factor
2120 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2121 true, GSI_SAME_STMT);
2122 t = fold_build2 (MINUS_EXPR, itype, outer_niters,
2123 build_one_cst (itype));
2124 t = fold_build2 (MULT_EXPR, itype, t, outer_niters);
2125 t = fold_build2 (RSHIFT_EXPR, itype, t, integer_one_node);
2126 t = fold_build2 (MULT_EXPR, itype, factor, t);
2127 t = fold_build2 (PLUS_EXPR, itype,
2128 fold_build2 (MULT_EXPR, itype, outer_niters,
2129 first_inner_iterations), t);
2130 expand_omp_build_assign (&gsi2, counts[fd->last_nonrect],
2131 fold_convert (type, t));
2133 basic_block bb3 = create_empty_bb (bb1);
2134 add_bb_to_loop (bb3, bb1->loop_father);
2136 e = make_edge (bb1, bb3, EDGE_FALSE_VALUE);
2137 e->probability = profile_probability::unlikely ().guessed ();
2139 gsi2 = gsi_after_labels (bb3);
2140 cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1e, n2e,
2141 NULL_TREE, NULL_TREE);
2142 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2143 e = split_block (bb3, cond_stmt);
2144 e->flags = EDGE_TRUE_VALUE;
2145 e->probability = profile_probability::likely ().guessed ();
2146 basic_block bb4 = e->dest;
2148 ne = make_edge (bb3, entry_bb, EDGE_FALSE_VALUE);
2149 ne->probability = e->probability.invert ();
2151 basic_block bb5 = create_empty_bb (bb2);
2152 add_bb_to_loop (bb5, bb2->loop_father);
2154 ne = make_edge (bb2, bb5, EDGE_FALSE_VALUE);
2155 ne->probability = profile_probability::unlikely ().guessed ();
2157 for (int j = 0; j < 2; j++)
2159 gsi2 = gsi_after_labels (j ? bb5 : bb4);
2160 t = fold_build2 (MINUS_EXPR, itype,
2161 unshare_expr (fd->loops[i].n1),
2162 unshare_expr (fd->loops[i].n2));
2163 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, m2minusm1);
2164 tree tem
2165 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2166 true, GSI_SAME_STMT);
2167 t = fold_build2 (MINUS_EXPR, itype, tem, n1o);
2168 t = fold_build2 (TRUNC_MOD_EXPR, itype, t, ostep);
2169 t = fold_build2 (MINUS_EXPR, itype, tem, t);
2170 tem = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2171 true, GSI_SAME_STMT);
2172 t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2173 if (fd->loops[i].m1)
2175 n1 = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2176 n1 = fold_build2 (MULT_EXPR, itype, tem, n1);
2177 n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2179 else
2180 n1 = t;
2181 n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2182 true, GSI_SAME_STMT);
2183 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2184 if (fd->loops[i].m2)
2186 n2 = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2187 n2 = fold_build2 (MULT_EXPR, itype, tem, n2);
2188 n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2190 else
2191 n2 = t;
2192 n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2193 true, GSI_SAME_STMT);
2194 expand_omp_build_assign (&gsi2, j ? n2o : n1o, tem);
2196 cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
2197 NULL_TREE, NULL_TREE);
2198 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2199 e = split_block (gsi_bb (gsi2), cond_stmt);
2200 e->flags = j ? EDGE_TRUE_VALUE : EDGE_FALSE_VALUE;
2201 e->probability = profile_probability::unlikely ().guessed ();
2202 ne = make_edge (e->src, bb1,
2203 j ? EDGE_FALSE_VALUE : EDGE_TRUE_VALUE);
2204 ne->probability = e->probability.invert ();
2205 gsi2 = gsi_after_labels (e->dest);
2207 t = fold_build2 (PLUS_EXPR, itype, tem, ostep);
2208 expand_omp_build_assign (&gsi2, j ? n2o : n1o, t);
2210 make_edge (e->dest, bb1, EDGE_FALLTHRU);
2213 set_immediate_dominator (CDI_DOMINATORS, bb3, bb1);
2214 set_immediate_dominator (CDI_DOMINATORS, bb5, bb2);
2215 set_immediate_dominator (CDI_DOMINATORS, entry_bb, bb1);
2217 if (fd->first_nonrect + 1 == fd->last_nonrect)
2219 fd->first_inner_iterations = first_inner_iterations;
2220 fd->factor = factor;
2221 fd->adjn1 = n1o;
2224 else
2226 /* Fallback implementation. Evaluate the loops with m1/m2
2227 non-NULL as well as their outer loops at runtime using temporaries
2228 instead of the original iteration variables, and in the
2229 body just bump the counter. */
2230 gimple_stmt_iterator gsi2 = *gsi;
2231 gsi_prev (&gsi2);
2232 e = split_block (entry_bb, gsi_stmt (gsi2));
2233 e = split_block (e->dest, (gimple *) NULL);
2234 basic_block cur_bb = e->src;
2235 basic_block next_bb = e->dest;
2236 entry_bb = e->dest;
2237 *gsi = gsi_after_labels (entry_bb);
2239 tree *vs = XALLOCAVEC (tree, fd->last_nonrect);
2240 memset (vs, 0, fd->last_nonrect * sizeof (tree));
2242 for (i = 0; i <= fd->last_nonrect; i++)
2244 if (fd->loops[i].m1 == NULL_TREE
2245 && fd->loops[i].m2 == NULL_TREE
2246 && !fd->loops[i].non_rect_referenced)
2247 continue;
2249 tree itype = TREE_TYPE (fd->loops[i].v);
2251 gsi2 = gsi_after_labels (cur_bb);
2252 tree n1, n2;
2253 t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2254 if (fd->loops[i].m1 == NULL_TREE)
2255 n1 = t;
2256 else if (POINTER_TYPE_P (itype))
2258 gcc_assert (integer_onep (fd->loops[i].m1));
2259 t = fold_convert (sizetype,
2260 unshare_expr (fd->loops[i].n1));
2261 n1 = fold_build_pointer_plus (vs[i - fd->loops[i].outer], t);
2263 else
2265 n1 = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2266 n1 = fold_build2 (MULT_EXPR, itype,
2267 vs[i - fd->loops[i].outer], n1);
2268 n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2270 n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2271 true, GSI_SAME_STMT);
2272 if (i < fd->last_nonrect)
2274 vs[i] = create_tmp_reg (itype, ".it");
2275 expand_omp_build_assign (&gsi2, vs[i], n1);
2277 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2278 if (fd->loops[i].m2 == NULL_TREE)
2279 n2 = t;
2280 else if (POINTER_TYPE_P (itype))
2282 gcc_assert (integer_onep (fd->loops[i].m2));
2283 t = fold_convert (sizetype,
2284 unshare_expr (fd->loops[i].n2));
2285 n2 = fold_build_pointer_plus (vs[i - fd->loops[i].outer], t);
2287 else
2289 n2 = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2290 n2 = fold_build2 (MULT_EXPR, itype,
2291 vs[i - fd->loops[i].outer], n2);
2292 n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2294 n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2295 true, GSI_SAME_STMT);
2296 if (POINTER_TYPE_P (itype))
2297 itype = signed_type_for (itype);
2298 if (i == fd->last_nonrect)
2300 gcond *cond_stmt
2301 = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
2302 NULL_TREE, NULL_TREE);
2303 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2304 e = split_block (cur_bb, cond_stmt);
2305 e->flags = EDGE_TRUE_VALUE;
2306 ne = make_edge (cur_bb, next_bb, EDGE_FALSE_VALUE);
2307 e->probability = profile_probability::likely ().guessed ();
2308 ne->probability = e->probability.invert ();
2309 gsi2 = gsi_after_labels (e->dest);
2311 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
2312 ? -1 : 1));
2313 t = fold_build2 (PLUS_EXPR, itype,
2314 fold_convert (itype, fd->loops[i].step), t);
2315 t = fold_build2 (PLUS_EXPR, itype, t,
2316 fold_convert (itype, n2));
2317 t = fold_build2 (MINUS_EXPR, itype, t,
2318 fold_convert (itype, n1));
2319 tree step = fold_convert (itype, fd->loops[i].step);
2320 if (TYPE_UNSIGNED (itype)
2321 && fd->loops[i].cond_code == GT_EXPR)
2322 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2323 fold_build1 (NEGATE_EXPR, itype, t),
2324 fold_build1 (NEGATE_EXPR, itype, step));
2325 else
2326 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2327 t = fold_convert (type, t);
2328 t = fold_build2 (PLUS_EXPR, type,
2329 counts[fd->last_nonrect], t);
2330 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2331 true, GSI_SAME_STMT);
2332 expand_omp_build_assign (&gsi2, counts[fd->last_nonrect], t);
2333 e = make_edge (e->dest, next_bb, EDGE_FALLTHRU);
2334 set_immediate_dominator (CDI_DOMINATORS, next_bb, cur_bb);
2335 break;
2337 e = split_block (cur_bb, last_stmt (cur_bb));
2339 basic_block new_cur_bb = create_empty_bb (cur_bb);
2340 add_bb_to_loop (new_cur_bb, cur_bb->loop_father);
2342 gsi2 = gsi_after_labels (e->dest);
2343 tree step = fold_convert (itype,
2344 unshare_expr (fd->loops[i].step));
2345 if (POINTER_TYPE_P (TREE_TYPE (vs[i])))
2346 t = fold_build_pointer_plus (vs[i],
2347 fold_convert (sizetype, step));
2348 else
2349 t = fold_build2 (PLUS_EXPR, itype, vs[i], step);
2350 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2351 true, GSI_SAME_STMT);
2352 expand_omp_build_assign (&gsi2, vs[i], t);
2354 ne = split_block (e->dest, last_stmt (e->dest));
2355 gsi2 = gsi_after_labels (ne->dest);
2357 gcond *cond_stmt
2358 = gimple_build_cond (fd->loops[i].cond_code, vs[i], n2,
2359 NULL_TREE, NULL_TREE);
2360 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2361 edge e3, e4;
2362 if (next_bb == entry_bb)
2364 e3 = find_edge (ne->dest, next_bb);
2365 e3->flags = EDGE_FALSE_VALUE;
2367 else
2368 e3 = make_edge (ne->dest, next_bb, EDGE_FALSE_VALUE);
2369 e4 = make_edge (ne->dest, new_cur_bb, EDGE_TRUE_VALUE);
2370 e4->probability = profile_probability::likely ().guessed ();
2371 e3->probability = e4->probability.invert ();
2372 basic_block esrc = e->src;
2373 make_edge (e->src, ne->dest, EDGE_FALLTHRU);
2374 cur_bb = new_cur_bb;
2375 basic_block latch_bb = next_bb;
2376 next_bb = e->dest;
2377 remove_edge (e);
2378 set_immediate_dominator (CDI_DOMINATORS, ne->dest, esrc);
2379 set_immediate_dominator (CDI_DOMINATORS, latch_bb, ne->dest);
2380 set_immediate_dominator (CDI_DOMINATORS, cur_bb, ne->dest);
2383 t = NULL_TREE;
2384 for (i = fd->first_nonrect; i < fd->last_nonrect; i++)
2385 if (!fd->loops[i].non_rect_referenced
2386 && fd->loops[i].m1 == NULL_TREE
2387 && fd->loops[i].m2 == NULL_TREE)
2389 if (t == NULL_TREE)
2390 t = counts[i];
2391 else
2392 t = fold_build2 (MULT_EXPR, type, t, counts[i]);
2394 if (t)
2396 t = fold_build2 (MULT_EXPR, type, counts[fd->last_nonrect], t);
2397 expand_omp_build_assign (gsi, counts[fd->last_nonrect], t);
2399 if (!rect_count_seen)
2400 t = counts[fd->last_nonrect];
2401 else
2402 t = fold_build2 (MULT_EXPR, type, fd->loop.n2,
2403 counts[fd->last_nonrect]);
2404 expand_omp_build_assign (gsi, fd->loop.n2, t);
2406 else if (fd->non_rect)
2408 tree t = fd->loop.n2;
2409 gcc_assert (TREE_CODE (t) == INTEGER_CST);
2410 int non_rect_referenced = 0, non_rect = 0;
2411 for (i = 0; i < fd->collapse; i++)
2413 if ((i < fd->first_nonrect || i > fd->last_nonrect)
2414 && !integer_zerop (counts[i]))
2415 t = fold_build2 (TRUNC_DIV_EXPR, type, t, counts[i]);
2416 if (fd->loops[i].non_rect_referenced)
2417 non_rect_referenced++;
2418 if (fd->loops[i].m1 || fd->loops[i].m2)
2419 non_rect++;
2421 gcc_assert (non_rect == 1 && non_rect_referenced == 1);
2422 counts[fd->last_nonrect] = t;
2426 /* Helper function for expand_omp_{for_*,simd}. Generate code like:
2427 T = V;
2428 V3 = N31 + (T % count3) * STEP3;
2429 T = T / count3;
2430 V2 = N21 + (T % count2) * STEP2;
2431 T = T / count2;
2432 V1 = N11 + T * STEP1;
2433 if this loop doesn't have an inner loop construct combined with it.
2434 If it does have an inner loop construct combined with it and the
2435 iteration count isn't known constant, store values from counts array
2436 into its _looptemp_ temporaries instead.
2437 For non-rectangular loops (between fd->first_nonrect and fd->last_nonrect
2438 inclusive), use the count of all those loops together, and either
2439 find quadratic etc. equation roots, or as a fallback, do:
2440 COUNT = 0;
2441 for (tmpi = N11; tmpi COND1 N12; tmpi += STEP1)
2442 for (tmpj = M21 * tmpi + N21;
2443 tmpj COND2 M22 * tmpi + N22; tmpj += STEP2)
2445 int tmpk1 = M31 * tmpj + N31;
2446 int tmpk2 = M32 * tmpj + N32;
2447 if (tmpk1 COND3 tmpk2)
2449 if (COND3 is <)
2450 adj = STEP3 - 1;
2451 else
2452 adj = STEP3 + 1;
2453 int temp = (adj + tmpk2 - tmpk1) / STEP3;
2454 if (COUNT + temp > T)
2456 V1 = tmpi;
2457 V2 = tmpj;
2458 V3 = tmpk1 + (T - COUNT) * STEP3;
2459 goto done;
2461 else
2462 COUNT += temp;
2465 done:;
2466 but for optional innermost or outermost rectangular loops that aren't
2467 referenced by other loop expressions keep doing the division/modulo. */
2469 static void
2470 expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
2471 tree *counts, tree *nonrect_bounds,
2472 gimple *inner_stmt, tree startvar)
2474 int i;
2475 if (gimple_omp_for_combined_p (fd->for_stmt))
2477 /* If fd->loop.n2 is constant, then no propagation of the counts
2478 is needed, they are constant. */
2479 if (TREE_CODE (fd->loop.n2) == INTEGER_CST)
2480 return;
2482 tree clauses = gimple_code (inner_stmt) != GIMPLE_OMP_FOR
2483 ? gimple_omp_taskreg_clauses (inner_stmt)
2484 : gimple_omp_for_clauses (inner_stmt);
2485 /* First two _looptemp_ clauses are for istart/iend, counts[0]
2486 isn't supposed to be handled, as the inner loop doesn't
2487 use it. */
2488 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
2489 gcc_assert (innerc);
2490 int count = 0;
2491 if (fd->non_rect
2492 && fd->last_nonrect == fd->first_nonrect + 1
2493 && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[fd->last_nonrect].v)))
2494 count = 4;
2495 for (i = 0; i < fd->collapse + count; i++)
2497 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
2498 OMP_CLAUSE__LOOPTEMP_);
2499 gcc_assert (innerc);
2500 if (i)
2502 tree tem = OMP_CLAUSE_DECL (innerc);
2503 tree t;
2504 if (i < fd->collapse)
2505 t = counts[i];
2506 else
2507 switch (i - fd->collapse)
2509 case 0: t = counts[0]; break;
2510 case 1: t = fd->first_inner_iterations; break;
2511 case 2: t = fd->factor; break;
2512 case 3: t = fd->adjn1; break;
2513 default: gcc_unreachable ();
2515 t = fold_convert (TREE_TYPE (tem), t);
2516 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
2517 false, GSI_CONTINUE_LINKING);
2518 gassign *stmt = gimple_build_assign (tem, t);
2519 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2522 return;
2525 tree type = TREE_TYPE (fd->loop.v);
2526 tree tem = create_tmp_reg (type, ".tem");
2527 gassign *stmt = gimple_build_assign (tem, startvar);
2528 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2530 for (i = fd->collapse - 1; i >= 0; i--)
2532 tree vtype = TREE_TYPE (fd->loops[i].v), itype, t;
2533 itype = vtype;
2534 if (POINTER_TYPE_P (vtype))
2535 itype = signed_type_for (vtype);
2536 if (i != 0 && (i != fd->last_nonrect || fd->first_nonrect))
2537 t = fold_build2 (TRUNC_MOD_EXPR, type, tem, counts[i]);
2538 else
2539 t = tem;
2540 if (i == fd->last_nonrect)
2542 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
2543 false, GSI_CONTINUE_LINKING);
2544 tree stopval = t;
2545 tree idx = create_tmp_reg (type, ".count");
2546 expand_omp_build_assign (gsi, idx,
2547 build_zero_cst (type), true);
2548 basic_block bb_triang = NULL, bb_triang_dom = NULL;
2549 if (fd->first_nonrect + 1 == fd->last_nonrect
2550 && (TREE_CODE (fd->loop.n2) == INTEGER_CST
2551 || fd->first_inner_iterations)
2552 && (optab_handler (sqrt_optab, TYPE_MODE (double_type_node))
2553 != CODE_FOR_nothing)
2554 && !integer_zerop (fd->loop.n2))
2556 tree outer_n1 = fd->adjn1 ? fd->adjn1 : fd->loops[i - 1].n1;
2557 tree itype = TREE_TYPE (fd->loops[i].v);
2558 tree first_inner_iterations = fd->first_inner_iterations;
2559 tree factor = fd->factor;
2560 gcond *cond_stmt
2561 = gimple_build_cond (NE_EXPR, factor,
2562 build_zero_cst (TREE_TYPE (factor)),
2563 NULL_TREE, NULL_TREE);
2564 gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2565 edge e = split_block (gsi_bb (*gsi), cond_stmt);
2566 basic_block bb0 = e->src;
2567 e->flags = EDGE_TRUE_VALUE;
2568 e->probability = profile_probability::likely ();
2569 bb_triang_dom = bb0;
2570 *gsi = gsi_after_labels (e->dest);
2571 tree slltype = long_long_integer_type_node;
2572 tree ulltype = long_long_unsigned_type_node;
2573 tree stopvalull = fold_convert (ulltype, stopval);
2574 stopvalull
2575 = force_gimple_operand_gsi (gsi, stopvalull, true, NULL_TREE,
2576 false, GSI_CONTINUE_LINKING);
2577 first_inner_iterations
2578 = fold_convert (slltype, first_inner_iterations);
2579 first_inner_iterations
2580 = force_gimple_operand_gsi (gsi, first_inner_iterations, true,
2581 NULL_TREE, false,
2582 GSI_CONTINUE_LINKING);
2583 factor = fold_convert (slltype, factor);
2584 factor
2585 = force_gimple_operand_gsi (gsi, factor, true, NULL_TREE,
2586 false, GSI_CONTINUE_LINKING);
2587 tree first_inner_iterationsd
2588 = fold_build1 (FLOAT_EXPR, double_type_node,
2589 first_inner_iterations);
2590 first_inner_iterationsd
2591 = force_gimple_operand_gsi (gsi, first_inner_iterationsd, true,
2592 NULL_TREE, false,
2593 GSI_CONTINUE_LINKING);
2594 tree factord = fold_build1 (FLOAT_EXPR, double_type_node,
2595 factor);
2596 factord = force_gimple_operand_gsi (gsi, factord, true,
2597 NULL_TREE, false,
2598 GSI_CONTINUE_LINKING);
2599 tree stopvald = fold_build1 (FLOAT_EXPR, double_type_node,
2600 stopvalull);
2601 stopvald = force_gimple_operand_gsi (gsi, stopvald, true,
2602 NULL_TREE, false,
2603 GSI_CONTINUE_LINKING);
2604 /* Temporarily disable flag_rounding_math, values will be
2605 decimal numbers divided by 2 and worst case imprecisions
2606 due to too large values ought to be caught later by the
2607 checks for fallback. */
2608 int save_flag_rounding_math = flag_rounding_math;
2609 flag_rounding_math = 0;
2610 t = fold_build2 (RDIV_EXPR, double_type_node, factord,
2611 build_real (double_type_node, dconst2));
2612 tree t3 = fold_build2 (MINUS_EXPR, double_type_node,
2613 first_inner_iterationsd, t);
2614 t3 = force_gimple_operand_gsi (gsi, t3, true, NULL_TREE, false,
2615 GSI_CONTINUE_LINKING);
2616 t = fold_build2 (MULT_EXPR, double_type_node, factord,
2617 build_real (double_type_node, dconst2));
2618 t = fold_build2 (MULT_EXPR, double_type_node, t, stopvald);
2619 t = fold_build2 (PLUS_EXPR, double_type_node, t,
2620 fold_build2 (MULT_EXPR, double_type_node,
2621 t3, t3));
2622 flag_rounding_math = save_flag_rounding_math;
2623 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2624 GSI_CONTINUE_LINKING);
2625 if (flag_exceptions
2626 && cfun->can_throw_non_call_exceptions
2627 && operation_could_trap_p (LT_EXPR, true, false, NULL_TREE))
2629 tree tem = fold_build2 (LT_EXPR, boolean_type_node, t,
2630 build_zero_cst (double_type_node));
2631 tem = force_gimple_operand_gsi (gsi, tem, true, NULL_TREE,
2632 false, GSI_CONTINUE_LINKING);
2633 cond_stmt = gimple_build_cond (NE_EXPR, tem,
2634 boolean_false_node,
2635 NULL_TREE, NULL_TREE);
2637 else
2638 cond_stmt
2639 = gimple_build_cond (LT_EXPR, t,
2640 build_zero_cst (double_type_node),
2641 NULL_TREE, NULL_TREE);
2642 gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2643 e = split_block (gsi_bb (*gsi), cond_stmt);
2644 basic_block bb1 = e->src;
2645 e->flags = EDGE_FALSE_VALUE;
2646 e->probability = profile_probability::very_likely ();
2647 *gsi = gsi_after_labels (e->dest);
2648 gcall *call = gimple_build_call_internal (IFN_SQRT, 1, t);
2649 tree sqrtr = create_tmp_var (double_type_node);
2650 gimple_call_set_lhs (call, sqrtr);
2651 gsi_insert_after (gsi, call, GSI_CONTINUE_LINKING);
2652 t = fold_build2 (MINUS_EXPR, double_type_node, sqrtr, t3);
2653 t = fold_build2 (RDIV_EXPR, double_type_node, t, factord);
2654 t = fold_build1 (FIX_TRUNC_EXPR, ulltype, t);
2655 tree c = create_tmp_var (ulltype);
2656 tree d = create_tmp_var (ulltype);
2657 expand_omp_build_assign (gsi, c, t, true);
2658 t = fold_build2 (MINUS_EXPR, ulltype, c,
2659 build_one_cst (ulltype));
2660 t = fold_build2 (MULT_EXPR, ulltype, c, t);
2661 t = fold_build2 (RSHIFT_EXPR, ulltype, t, integer_one_node);
2662 t = fold_build2 (MULT_EXPR, ulltype,
2663 fold_convert (ulltype, fd->factor), t);
2664 tree t2
2665 = fold_build2 (MULT_EXPR, ulltype, c,
2666 fold_convert (ulltype,
2667 fd->first_inner_iterations));
2668 t = fold_build2 (PLUS_EXPR, ulltype, t, t2);
2669 expand_omp_build_assign (gsi, d, t, true);
2670 t = fold_build2 (MULT_EXPR, ulltype,
2671 fold_convert (ulltype, fd->factor), c);
2672 t = fold_build2 (PLUS_EXPR, ulltype,
2673 t, fold_convert (ulltype,
2674 fd->first_inner_iterations));
2675 t2 = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2676 GSI_CONTINUE_LINKING);
2677 cond_stmt = gimple_build_cond (GE_EXPR, stopvalull, d,
2678 NULL_TREE, NULL_TREE);
2679 gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2680 e = split_block (gsi_bb (*gsi), cond_stmt);
2681 basic_block bb2 = e->src;
2682 e->flags = EDGE_TRUE_VALUE;
2683 e->probability = profile_probability::very_likely ();
2684 *gsi = gsi_after_labels (e->dest);
2685 t = fold_build2 (PLUS_EXPR, ulltype, d, t2);
2686 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2687 GSI_CONTINUE_LINKING);
2688 cond_stmt = gimple_build_cond (GE_EXPR, stopvalull, t,
2689 NULL_TREE, NULL_TREE);
2690 gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2691 e = split_block (gsi_bb (*gsi), cond_stmt);
2692 basic_block bb3 = e->src;
2693 e->flags = EDGE_FALSE_VALUE;
2694 e->probability = profile_probability::very_likely ();
2695 *gsi = gsi_after_labels (e->dest);
2696 t = fold_convert (itype, c);
2697 t = fold_build2 (MULT_EXPR, itype, t, fd->loops[i - 1].step);
2698 t = fold_build2 (PLUS_EXPR, itype, outer_n1, t);
2699 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2700 GSI_CONTINUE_LINKING);
2701 expand_omp_build_assign (gsi, fd->loops[i - 1].v, t, true);
2702 t2 = fold_build2 (MINUS_EXPR, ulltype, stopvalull, d);
2703 t2 = fold_convert (itype, t2);
2704 t2 = fold_build2 (MULT_EXPR, itype, t2, fd->loops[i].step);
2705 t2 = fold_build2 (PLUS_EXPR, itype, t2, fd->loops[i].n1);
2706 if (fd->loops[i].m1)
2708 t = fold_build2 (MULT_EXPR, itype, t, fd->loops[i].m1);
2709 t2 = fold_build2 (PLUS_EXPR, itype, t2, t);
2711 expand_omp_build_assign (gsi, fd->loops[i].v, t2, true);
2712 e = split_block (gsi_bb (*gsi), gsi_stmt (*gsi));
2713 bb_triang = e->src;
2714 *gsi = gsi_after_labels (e->dest);
2715 remove_edge (e);
2716 e = make_edge (bb1, gsi_bb (*gsi), EDGE_TRUE_VALUE);
2717 e->probability = profile_probability::very_unlikely ();
2718 e = make_edge (bb2, gsi_bb (*gsi), EDGE_FALSE_VALUE);
2719 e->probability = profile_probability::very_unlikely ();
2720 e = make_edge (bb3, gsi_bb (*gsi), EDGE_TRUE_VALUE);
2721 e->probability = profile_probability::very_unlikely ();
2723 basic_block bb4 = create_empty_bb (bb0);
2724 add_bb_to_loop (bb4, bb0->loop_father);
2725 e = make_edge (bb0, bb4, EDGE_FALSE_VALUE);
2726 e->probability = profile_probability::unlikely ();
2727 make_edge (bb4, gsi_bb (*gsi), EDGE_FALLTHRU);
2728 set_immediate_dominator (CDI_DOMINATORS, bb4, bb0);
2729 set_immediate_dominator (CDI_DOMINATORS, gsi_bb (*gsi), bb0);
2730 gimple_stmt_iterator gsi2 = gsi_after_labels (bb4);
2731 t2 = fold_build2 (TRUNC_DIV_EXPR, type,
2732 counts[i], counts[i - 1]);
2733 t2 = force_gimple_operand_gsi (&gsi2, t2, true, NULL_TREE, false,
2734 GSI_CONTINUE_LINKING);
2735 t = fold_build2 (TRUNC_MOD_EXPR, type, stopval, t2);
2736 t2 = fold_build2 (TRUNC_DIV_EXPR, type, stopval, t2);
2737 t = fold_convert (itype, t);
2738 t2 = fold_convert (itype, t2);
2739 t = fold_build2 (MULT_EXPR, itype, t,
2740 fold_convert (itype, fd->loops[i].step));
2741 t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
2742 t2 = fold_build2 (MULT_EXPR, itype, t2,
2743 fold_convert (itype, fd->loops[i - 1].step));
2744 t2 = fold_build2 (PLUS_EXPR, itype, fd->loops[i - 1].n1, t2);
2745 t2 = force_gimple_operand_gsi (&gsi2, t2, false, NULL_TREE,
2746 false, GSI_CONTINUE_LINKING);
2747 stmt = gimple_build_assign (fd->loops[i - 1].v, t2);
2748 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
2749 if (fd->loops[i].m1)
2751 t2 = fold_build2 (MULT_EXPR, itype, fd->loops[i].m1,
2752 fd->loops[i - 1].v);
2753 t = fold_build2 (PLUS_EXPR, itype, t, t2);
2755 t = force_gimple_operand_gsi (&gsi2, t, false, NULL_TREE,
2756 false, GSI_CONTINUE_LINKING);
2757 stmt = gimple_build_assign (fd->loops[i].v, t);
2758 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
2760 /* Fallback implementation. Evaluate the loops in between
2761 (inclusive) fd->first_nonrect and fd->last_nonrect at
2762 runtime unsing temporaries instead of the original iteration
2763 variables, in the body just bump the counter and compare
2764 with the desired value. */
2765 gimple_stmt_iterator gsi2 = *gsi;
2766 basic_block entry_bb = gsi_bb (gsi2);
2767 edge e = split_block (entry_bb, gsi_stmt (gsi2));
2768 e = split_block (e->dest, (gimple *) NULL);
2769 basic_block dom_bb = NULL;
2770 basic_block cur_bb = e->src;
2771 basic_block next_bb = e->dest;
2772 entry_bb = e->dest;
2773 *gsi = gsi_after_labels (entry_bb);
2775 tree *vs = XALLOCAVEC (tree, fd->last_nonrect);
2776 tree n1 = NULL_TREE, n2 = NULL_TREE;
2777 memset (vs, 0, fd->last_nonrect * sizeof (tree));
2779 for (int j = fd->first_nonrect; j <= fd->last_nonrect; j++)
2781 tree itype = TREE_TYPE (fd->loops[j].v);
2782 bool rect_p = (fd->loops[j].m1 == NULL_TREE
2783 && fd->loops[j].m2 == NULL_TREE
2784 && !fd->loops[j].non_rect_referenced);
2785 gsi2 = gsi_after_labels (cur_bb);
2786 t = fold_convert (itype, unshare_expr (fd->loops[j].n1));
2787 if (fd->loops[j].m1 == NULL_TREE)
2788 n1 = rect_p ? build_zero_cst (type) : t;
2789 else if (POINTER_TYPE_P (itype))
2791 gcc_assert (integer_onep (fd->loops[j].m1));
2792 t = fold_convert (sizetype,
2793 unshare_expr (fd->loops[j].n1));
2794 n1 = fold_build_pointer_plus (vs[j - fd->loops[j].outer], t);
2796 else
2798 n1 = fold_convert (itype, unshare_expr (fd->loops[j].m1));
2799 n1 = fold_build2 (MULT_EXPR, itype,
2800 vs[j - fd->loops[j].outer], n1);
2801 n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2803 n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2804 true, GSI_SAME_STMT);
2805 if (j < fd->last_nonrect)
2807 vs[j] = create_tmp_reg (rect_p ? type : itype, ".it");
2808 expand_omp_build_assign (&gsi2, vs[j], n1);
2810 t = fold_convert (itype, unshare_expr (fd->loops[j].n2));
2811 if (fd->loops[j].m2 == NULL_TREE)
2812 n2 = rect_p ? counts[j] : t;
2813 else if (POINTER_TYPE_P (itype))
2815 gcc_assert (integer_onep (fd->loops[j].m2));
2816 t = fold_convert (sizetype,
2817 unshare_expr (fd->loops[j].n2));
2818 n2 = fold_build_pointer_plus (vs[j - fd->loops[j].outer], t);
2820 else
2822 n2 = fold_convert (itype, unshare_expr (fd->loops[j].m2));
2823 n2 = fold_build2 (MULT_EXPR, itype,
2824 vs[j - fd->loops[j].outer], n2);
2825 n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2827 n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2828 true, GSI_SAME_STMT);
2829 if (POINTER_TYPE_P (itype))
2830 itype = signed_type_for (itype);
2831 if (j == fd->last_nonrect)
2833 gcond *cond_stmt
2834 = gimple_build_cond (fd->loops[j].cond_code, n1, n2,
2835 NULL_TREE, NULL_TREE);
2836 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2837 e = split_block (cur_bb, cond_stmt);
2838 e->flags = EDGE_TRUE_VALUE;
2839 edge ne = make_edge (cur_bb, next_bb, EDGE_FALSE_VALUE);
2840 e->probability = profile_probability::likely ().guessed ();
2841 ne->probability = e->probability.invert ();
2842 gsi2 = gsi_after_labels (e->dest);
2844 t = build_int_cst (itype, (fd->loops[j].cond_code == LT_EXPR
2845 ? -1 : 1));
2846 t = fold_build2 (PLUS_EXPR, itype,
2847 fold_convert (itype, fd->loops[j].step), t);
2848 t = fold_build2 (PLUS_EXPR, itype, t,
2849 fold_convert (itype, n2));
2850 t = fold_build2 (MINUS_EXPR, itype, t,
2851 fold_convert (itype, n1));
2852 tree step = fold_convert (itype, fd->loops[j].step);
2853 if (TYPE_UNSIGNED (itype)
2854 && fd->loops[j].cond_code == GT_EXPR)
2855 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2856 fold_build1 (NEGATE_EXPR, itype, t),
2857 fold_build1 (NEGATE_EXPR, itype, step));
2858 else
2859 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2860 t = fold_convert (type, t);
2861 t = fold_build2 (PLUS_EXPR, type, idx, t);
2862 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2863 true, GSI_SAME_STMT);
2864 e = make_edge (e->dest, next_bb, EDGE_FALLTHRU);
2865 set_immediate_dominator (CDI_DOMINATORS, next_bb, cur_bb);
2866 cond_stmt
2867 = gimple_build_cond (LE_EXPR, t, stopval, NULL_TREE,
2868 NULL_TREE);
2869 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2870 e = split_block (gsi_bb (gsi2), cond_stmt);
2871 e->flags = EDGE_TRUE_VALUE;
2872 e->probability = profile_probability::likely ().guessed ();
2873 ne = make_edge (e->src, entry_bb, EDGE_FALSE_VALUE);
2874 ne->probability = e->probability.invert ();
2875 gsi2 = gsi_after_labels (e->dest);
2876 expand_omp_build_assign (&gsi2, idx, t);
2877 set_immediate_dominator (CDI_DOMINATORS, entry_bb, dom_bb);
2878 break;
2880 e = split_block (cur_bb, last_stmt (cur_bb));
2882 basic_block new_cur_bb = create_empty_bb (cur_bb);
2883 add_bb_to_loop (new_cur_bb, cur_bb->loop_father);
2885 gsi2 = gsi_after_labels (e->dest);
2886 if (rect_p)
2887 t = fold_build2 (PLUS_EXPR, type, vs[j],
2888 build_one_cst (type));
2889 else
2891 tree step
2892 = fold_convert (itype, unshare_expr (fd->loops[j].step));
2893 if (POINTER_TYPE_P (vtype))
2894 t = fold_build_pointer_plus (vs[j], fold_convert (sizetype,
2895 step));
2896 else
2897 t = fold_build2 (PLUS_EXPR, itype, vs[j], step);
2899 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2900 true, GSI_SAME_STMT);
2901 expand_omp_build_assign (&gsi2, vs[j], t);
2903 edge ne = split_block (e->dest, last_stmt (e->dest));
2904 gsi2 = gsi_after_labels (ne->dest);
2906 gcond *cond_stmt;
2907 if (next_bb == entry_bb)
2908 /* No need to actually check the outermost condition. */
2909 cond_stmt
2910 = gimple_build_cond (EQ_EXPR, boolean_true_node,
2911 boolean_true_node,
2912 NULL_TREE, NULL_TREE);
2913 else
2914 cond_stmt
2915 = gimple_build_cond (rect_p ? LT_EXPR
2916 : fd->loops[j].cond_code,
2917 vs[j], n2, NULL_TREE, NULL_TREE);
2918 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2919 edge e3, e4;
2920 if (next_bb == entry_bb)
2922 e3 = find_edge (ne->dest, next_bb);
2923 e3->flags = EDGE_FALSE_VALUE;
2924 dom_bb = ne->dest;
2926 else
2927 e3 = make_edge (ne->dest, next_bb, EDGE_FALSE_VALUE);
2928 e4 = make_edge (ne->dest, new_cur_bb, EDGE_TRUE_VALUE);
2929 e4->probability = profile_probability::likely ().guessed ();
2930 e3->probability = e4->probability.invert ();
2931 basic_block esrc = e->src;
2932 make_edge (e->src, ne->dest, EDGE_FALLTHRU);
2933 cur_bb = new_cur_bb;
2934 basic_block latch_bb = next_bb;
2935 next_bb = e->dest;
2936 remove_edge (e);
2937 set_immediate_dominator (CDI_DOMINATORS, ne->dest, esrc);
2938 set_immediate_dominator (CDI_DOMINATORS, latch_bb, ne->dest);
2939 set_immediate_dominator (CDI_DOMINATORS, cur_bb, ne->dest);
2941 for (int j = fd->last_nonrect; j >= fd->first_nonrect; j--)
2943 tree vtype = TREE_TYPE (fd->loops[j].v);
2944 tree itype = vtype;
2945 if (POINTER_TYPE_P (itype))
2946 itype = signed_type_for (itype);
2947 bool rect_p = (fd->loops[j].m1 == NULL_TREE
2948 && fd->loops[j].m2 == NULL_TREE
2949 && !fd->loops[j].non_rect_referenced);
2950 if (j == fd->last_nonrect)
2952 t = fold_build2 (MINUS_EXPR, type, stopval, idx);
2953 t = fold_convert (itype, t);
2954 tree t2
2955 = fold_convert (itype, unshare_expr (fd->loops[j].step));
2956 t = fold_build2 (MULT_EXPR, itype, t, t2);
2957 if (POINTER_TYPE_P (vtype))
2958 t = fold_build_pointer_plus (n1,
2959 fold_convert (sizetype, t));
2960 else
2961 t = fold_build2 (PLUS_EXPR, itype, n1, t);
2963 else if (rect_p)
2965 t = fold_convert (itype, vs[j]);
2966 t = fold_build2 (MULT_EXPR, itype, t,
2967 fold_convert (itype, fd->loops[j].step));
2968 if (POINTER_TYPE_P (vtype))
2969 t = fold_build_pointer_plus (fd->loops[j].n1,
2970 fold_convert (sizetype, t));
2971 else
2972 t = fold_build2 (PLUS_EXPR, itype, fd->loops[j].n1, t);
2974 else
2975 t = vs[j];
2976 t = force_gimple_operand_gsi (gsi, t, false,
2977 NULL_TREE, true,
2978 GSI_SAME_STMT);
2979 stmt = gimple_build_assign (fd->loops[j].v, t);
2980 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
2982 if (gsi_end_p (*gsi))
2983 *gsi = gsi_last_bb (gsi_bb (*gsi));
2984 else
2985 gsi_prev (gsi);
2986 if (bb_triang)
2988 e = split_block (gsi_bb (*gsi), gsi_stmt (*gsi));
2989 make_edge (bb_triang, e->dest, EDGE_FALLTHRU);
2990 *gsi = gsi_after_labels (e->dest);
2991 if (!gsi_end_p (*gsi))
2992 gsi_insert_before (gsi, gimple_build_nop (), GSI_NEW_STMT);
2993 set_immediate_dominator (CDI_DOMINATORS, e->dest, bb_triang_dom);
2996 else
2998 t = fold_convert (itype, t);
2999 t = fold_build2 (MULT_EXPR, itype, t,
3000 fold_convert (itype, fd->loops[i].step));
3001 if (POINTER_TYPE_P (vtype))
3002 t = fold_build_pointer_plus (fd->loops[i].n1, t);
3003 else
3004 t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
3005 t = force_gimple_operand_gsi (gsi, t,
3006 DECL_P (fd->loops[i].v)
3007 && TREE_ADDRESSABLE (fd->loops[i].v),
3008 NULL_TREE, false,
3009 GSI_CONTINUE_LINKING);
3010 stmt = gimple_build_assign (fd->loops[i].v, t);
3011 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
3013 if (i != 0 && (i != fd->last_nonrect || fd->first_nonrect))
3015 t = fold_build2 (TRUNC_DIV_EXPR, type, tem, counts[i]);
3016 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
3017 false, GSI_CONTINUE_LINKING);
3018 stmt = gimple_build_assign (tem, t);
3019 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
3021 if (i == fd->last_nonrect)
3022 i = fd->first_nonrect;
3024 if (fd->non_rect)
3025 for (i = 0; i <= fd->last_nonrect; i++)
3026 if (fd->loops[i].m2)
3028 tree itype = TREE_TYPE (fd->loops[i].v);
3030 tree t;
3031 if (POINTER_TYPE_P (itype))
3033 gcc_assert (integer_onep (fd->loops[i].m2));
3034 t = fold_convert (sizetype, unshare_expr (fd->loops[i].n2));
3035 t = fold_build_pointer_plus (fd->loops[i - fd->loops[i].outer].v,
3038 else
3040 t = fold_convert (itype, unshare_expr (fd->loops[i].m2));
3041 t = fold_build2 (MULT_EXPR, itype,
3042 fd->loops[i - fd->loops[i].outer].v, t);
3043 t = fold_build2 (PLUS_EXPR, itype, t,
3044 fold_convert (itype,
3045 unshare_expr (fd->loops[i].n2)));
3047 nonrect_bounds[i] = create_tmp_reg (itype, ".bound");
3048 t = force_gimple_operand_gsi (gsi, t, false,
3049 NULL_TREE, false,
3050 GSI_CONTINUE_LINKING);
3051 stmt = gimple_build_assign (nonrect_bounds[i], t);
3052 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
3056 /* Helper function for expand_omp_for_*. Generate code like:
3057 L10:
3058 V3 += STEP3;
3059 if (V3 cond3 N32) goto BODY_BB; else goto L11;
3060 L11:
3061 V3 = N31;
3062 V2 += STEP2;
3063 if (V2 cond2 N22) goto BODY_BB; else goto L12;
3064 L12:
3065 V2 = N21;
3066 V1 += STEP1;
3067 goto BODY_BB;
3068 For non-rectangular loops, use temporaries stored in nonrect_bounds
3069 for the upper bounds if M?2 multiplier is present. Given e.g.
3070 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
3071 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
3072 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
3073 for (V4 = N41 + M41 * V2; V4 cond4 N42 + M42 * V2; V4 += STEP4)
3075 L10:
3076 V4 += STEP4;
3077 if (V4 cond4 NONRECT_BOUND4) goto BODY_BB; else goto L11;
3078 L11:
3079 V4 = N41 + M41 * V2; // This can be left out if the loop
3080 // refers to the immediate parent loop
3081 V3 += STEP3;
3082 if (V3 cond3 N32) goto BODY_BB; else goto L12;
3083 L12:
3084 V3 = N31;
3085 V2 += STEP2;
3086 if (V2 cond2 N22) goto L120; else goto L13;
3087 L120:
3088 V4 = N41 + M41 * V2;
3089 NONRECT_BOUND4 = N42 + M42 * V2;
3090 if (V4 cond4 NONRECT_BOUND4) goto BODY_BB; else goto L12;
3091 L13:
3092 V2 = N21;
3093 V1 += STEP1;
3094 goto L120; */
3096 static basic_block
3097 extract_omp_for_update_vars (struct omp_for_data *fd, tree *nonrect_bounds,
3098 basic_block cont_bb, basic_block body_bb)
3100 basic_block last_bb, bb, collapse_bb = NULL;
3101 int i;
3102 gimple_stmt_iterator gsi;
3103 edge e;
3104 tree t;
3105 gimple *stmt;
3107 last_bb = cont_bb;
3108 for (i = fd->collapse - 1; i >= 0; i--)
3110 tree vtype = TREE_TYPE (fd->loops[i].v);
3112 bb = create_empty_bb (last_bb);
3113 add_bb_to_loop (bb, last_bb->loop_father);
3114 gsi = gsi_start_bb (bb);
3116 if (i < fd->collapse - 1)
3118 e = make_edge (last_bb, bb, EDGE_FALSE_VALUE);
3119 e->probability
3120 = profile_probability::guessed_always ().apply_scale (1, 8);
3122 struct omp_for_data_loop *l = &fd->loops[i + 1];
3123 if (l->m1 == NULL_TREE || l->outer != 1)
3125 t = l->n1;
3126 if (l->m1)
3128 if (POINTER_TYPE_P (TREE_TYPE (l->v)))
3129 t = fold_build_pointer_plus (fd->loops[i + 1 - l->outer].v,
3130 fold_convert (sizetype, t));
3131 else
3133 tree t2
3134 = fold_build2 (MULT_EXPR, TREE_TYPE (t),
3135 fd->loops[i + 1 - l->outer].v, l->m1);
3136 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t2, t);
3139 t = force_gimple_operand_gsi (&gsi, t,
3140 DECL_P (l->v)
3141 && TREE_ADDRESSABLE (l->v),
3142 NULL_TREE, false,
3143 GSI_CONTINUE_LINKING);
3144 stmt = gimple_build_assign (l->v, t);
3145 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
3148 else
3149 collapse_bb = bb;
3151 set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
3153 if (POINTER_TYPE_P (vtype))
3154 t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step);
3155 else
3156 t = fold_build2 (PLUS_EXPR, vtype, fd->loops[i].v, fd->loops[i].step);
3157 t = force_gimple_operand_gsi (&gsi, t,
3158 DECL_P (fd->loops[i].v)
3159 && TREE_ADDRESSABLE (fd->loops[i].v),
3160 NULL_TREE, false, GSI_CONTINUE_LINKING);
3161 stmt = gimple_build_assign (fd->loops[i].v, t);
3162 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
3164 if (fd->loops[i].non_rect_referenced)
3166 basic_block update_bb = NULL, prev_bb = NULL;
3167 for (int j = i + 1; j <= fd->last_nonrect; j++)
3168 if (j - fd->loops[j].outer == i)
3170 tree n1, n2;
3171 struct omp_for_data_loop *l = &fd->loops[j];
3172 basic_block this_bb = create_empty_bb (last_bb);
3173 add_bb_to_loop (this_bb, last_bb->loop_father);
3174 gimple_stmt_iterator gsi2 = gsi_start_bb (this_bb);
3175 if (prev_bb)
3177 e = make_edge (prev_bb, this_bb, EDGE_TRUE_VALUE);
3178 e->probability
3179 = profile_probability::guessed_always ().apply_scale (7,
3181 set_immediate_dominator (CDI_DOMINATORS, this_bb, prev_bb);
3183 if (l->m1)
3185 if (POINTER_TYPE_P (TREE_TYPE (l->v)))
3186 t = fold_build_pointer_plus (fd->loops[i].v,
3187 fold_convert (sizetype,
3188 l->n1));
3189 else
3191 t = fold_build2 (MULT_EXPR, TREE_TYPE (l->m1), l->m1,
3192 fd->loops[i].v);
3193 t = fold_build2 (PLUS_EXPR, TREE_TYPE (l->v),
3194 t, l->n1);
3196 n1 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3197 false,
3198 GSI_CONTINUE_LINKING);
3199 stmt = gimple_build_assign (l->v, n1);
3200 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
3201 n1 = l->v;
3203 else
3204 n1 = force_gimple_operand_gsi (&gsi2, l->n1, true,
3205 NULL_TREE, false,
3206 GSI_CONTINUE_LINKING);
3207 if (l->m2)
3209 if (POINTER_TYPE_P (TREE_TYPE (l->v)))
3210 t = fold_build_pointer_plus (fd->loops[i].v,
3211 fold_convert (sizetype,
3212 l->n2));
3213 else
3215 t = fold_build2 (MULT_EXPR, TREE_TYPE (l->m2), l->m2,
3216 fd->loops[i].v);
3217 t = fold_build2 (PLUS_EXPR,
3218 TREE_TYPE (nonrect_bounds[j]),
3219 t, unshare_expr (l->n2));
3221 n2 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3222 false,
3223 GSI_CONTINUE_LINKING);
3224 stmt = gimple_build_assign (nonrect_bounds[j], n2);
3225 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
3226 n2 = nonrect_bounds[j];
3228 else
3229 n2 = force_gimple_operand_gsi (&gsi2, unshare_expr (l->n2),
3230 true, NULL_TREE, false,
3231 GSI_CONTINUE_LINKING);
3232 gcond *cond_stmt
3233 = gimple_build_cond (l->cond_code, n1, n2,
3234 NULL_TREE, NULL_TREE);
3235 gsi_insert_after (&gsi2, cond_stmt, GSI_CONTINUE_LINKING);
3236 if (update_bb == NULL)
3237 update_bb = this_bb;
3238 e = make_edge (this_bb, bb, EDGE_FALSE_VALUE);
3239 e->probability
3240 = profile_probability::guessed_always ().apply_scale (1, 8);
3241 if (prev_bb == NULL)
3242 set_immediate_dominator (CDI_DOMINATORS, this_bb, bb);
3243 prev_bb = this_bb;
3245 e = make_edge (prev_bb, body_bb, EDGE_TRUE_VALUE);
3246 e->probability
3247 = profile_probability::guessed_always ().apply_scale (7, 8);
3248 body_bb = update_bb;
3251 if (i > 0)
3253 if (fd->loops[i].m2)
3254 t = nonrect_bounds[i];
3255 else
3256 t = unshare_expr (fd->loops[i].n2);
3257 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3258 false, GSI_CONTINUE_LINKING);
3259 tree v = fd->loops[i].v;
3260 if (DECL_P (v) && TREE_ADDRESSABLE (v))
3261 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
3262 false, GSI_CONTINUE_LINKING);
3263 t = fold_build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
3264 stmt = gimple_build_cond_empty (t);
3265 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
3266 if (walk_tree (gimple_cond_lhs_ptr (as_a <gcond *> (stmt)),
3267 expand_omp_regimplify_p, NULL, NULL)
3268 || walk_tree (gimple_cond_rhs_ptr (as_a <gcond *> (stmt)),
3269 expand_omp_regimplify_p, NULL, NULL))
3270 gimple_regimplify_operands (stmt, &gsi);
3271 e = make_edge (bb, body_bb, EDGE_TRUE_VALUE);
3272 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
3274 else
3275 make_edge (bb, body_bb, EDGE_FALLTHRU);
3276 set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
3277 last_bb = bb;
3280 return collapse_bb;
3283 /* Expand #pragma omp ordered depend(source). */
3285 static void
3286 expand_omp_ordered_source (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
3287 tree *counts, location_t loc)
3289 enum built_in_function source_ix
3290 = fd->iter_type == long_integer_type_node
3291 ? BUILT_IN_GOMP_DOACROSS_POST : BUILT_IN_GOMP_DOACROSS_ULL_POST;
3292 gimple *g
3293 = gimple_build_call (builtin_decl_explicit (source_ix), 1,
3294 build_fold_addr_expr (counts[fd->ordered]));
3295 gimple_set_location (g, loc);
3296 gsi_insert_before (gsi, g, GSI_SAME_STMT);
3299 /* Expand a single depend from #pragma omp ordered depend(sink:...). */
3301 static void
3302 expand_omp_ordered_sink (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
3303 tree *counts, tree c, location_t loc)
3305 auto_vec<tree, 10> args;
3306 enum built_in_function sink_ix
3307 = fd->iter_type == long_integer_type_node
3308 ? BUILT_IN_GOMP_DOACROSS_WAIT : BUILT_IN_GOMP_DOACROSS_ULL_WAIT;
3309 tree t, off, coff = NULL_TREE, deps = OMP_CLAUSE_DECL (c), cond = NULL_TREE;
3310 int i;
3311 gimple_stmt_iterator gsi2 = *gsi;
3312 bool warned_step = false;
3314 for (i = 0; i < fd->ordered; i++)
3316 tree step = NULL_TREE;
3317 off = TREE_PURPOSE (deps);
3318 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
3320 step = TREE_OPERAND (off, 1);
3321 off = TREE_OPERAND (off, 0);
3323 if (!integer_zerop (off))
3325 gcc_assert (fd->loops[i].cond_code == LT_EXPR
3326 || fd->loops[i].cond_code == GT_EXPR);
3327 bool forward = fd->loops[i].cond_code == LT_EXPR;
3328 if (step)
3330 /* Non-simple Fortran DO loops. If step is variable,
3331 we don't know at compile even the direction, so can't
3332 warn. */
3333 if (TREE_CODE (step) != INTEGER_CST)
3334 break;
3335 forward = tree_int_cst_sgn (step) != -1;
3337 if (forward ^ OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3338 warning_at (loc, 0, "%<depend%> clause with %<sink%> modifier "
3339 "waiting for lexically later iteration");
3340 break;
3342 deps = TREE_CHAIN (deps);
3344 /* If all offsets corresponding to the collapsed loops are zero,
3345 this depend clause can be ignored. FIXME: but there is still a
3346 flush needed. We need to emit one __sync_synchronize () for it
3347 though (perhaps conditionally)? Solve this together with the
3348 conservative dependence folding optimization.
3349 if (i >= fd->collapse)
3350 return; */
3352 deps = OMP_CLAUSE_DECL (c);
3353 gsi_prev (&gsi2);
3354 edge e1 = split_block (gsi_bb (gsi2), gsi_stmt (gsi2));
3355 edge e2 = split_block_after_labels (e1->dest);
3357 gsi2 = gsi_after_labels (e1->dest);
3358 *gsi = gsi_last_bb (e1->src);
3359 for (i = 0; i < fd->ordered; i++)
3361 tree itype = TREE_TYPE (fd->loops[i].v);
3362 tree step = NULL_TREE;
3363 tree orig_off = NULL_TREE;
3364 if (POINTER_TYPE_P (itype))
3365 itype = sizetype;
3366 if (i)
3367 deps = TREE_CHAIN (deps);
3368 off = TREE_PURPOSE (deps);
3369 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
3371 step = TREE_OPERAND (off, 1);
3372 off = TREE_OPERAND (off, 0);
3373 gcc_assert (fd->loops[i].cond_code == LT_EXPR
3374 && integer_onep (fd->loops[i].step)
3375 && !POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)));
3377 tree s = fold_convert_loc (loc, itype, step ? step : fd->loops[i].step);
3378 if (step)
3380 off = fold_convert_loc (loc, itype, off);
3381 orig_off = off;
3382 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
3385 if (integer_zerop (off))
3386 t = boolean_true_node;
3387 else
3389 tree a;
3390 tree co = fold_convert_loc (loc, itype, off);
3391 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
3393 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3394 co = fold_build1_loc (loc, NEGATE_EXPR, itype, co);
3395 a = fold_build2_loc (loc, POINTER_PLUS_EXPR,
3396 TREE_TYPE (fd->loops[i].v), fd->loops[i].v,
3397 co);
3399 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3400 a = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
3401 fd->loops[i].v, co);
3402 else
3403 a = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
3404 fd->loops[i].v, co);
3405 if (step)
3407 tree t1, t2;
3408 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3409 t1 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
3410 fd->loops[i].n1);
3411 else
3412 t1 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
3413 fd->loops[i].n2);
3414 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3415 t2 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
3416 fd->loops[i].n2);
3417 else
3418 t2 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
3419 fd->loops[i].n1);
3420 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node,
3421 step, build_int_cst (TREE_TYPE (step), 0));
3422 if (TREE_CODE (step) != INTEGER_CST)
3424 t1 = unshare_expr (t1);
3425 t1 = force_gimple_operand_gsi (gsi, t1, true, NULL_TREE,
3426 false, GSI_CONTINUE_LINKING);
3427 t2 = unshare_expr (t2);
3428 t2 = force_gimple_operand_gsi (gsi, t2, true, NULL_TREE,
3429 false, GSI_CONTINUE_LINKING);
3431 t = fold_build3_loc (loc, COND_EXPR, boolean_type_node,
3432 t, t2, t1);
3434 else if (fd->loops[i].cond_code == LT_EXPR)
3436 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3437 t = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
3438 fd->loops[i].n1);
3439 else
3440 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
3441 fd->loops[i].n2);
3443 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3444 t = fold_build2_loc (loc, GT_EXPR, boolean_type_node, a,
3445 fd->loops[i].n2);
3446 else
3447 t = fold_build2_loc (loc, LE_EXPR, boolean_type_node, a,
3448 fd->loops[i].n1);
3450 if (cond)
3451 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node, cond, t);
3452 else
3453 cond = t;
3455 off = fold_convert_loc (loc, itype, off);
3457 if (step
3458 || (fd->loops[i].cond_code == LT_EXPR
3459 ? !integer_onep (fd->loops[i].step)
3460 : !integer_minus_onep (fd->loops[i].step)))
3462 if (step == NULL_TREE
3463 && TYPE_UNSIGNED (itype)
3464 && fd->loops[i].cond_code == GT_EXPR)
3465 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype, off,
3466 fold_build1_loc (loc, NEGATE_EXPR, itype,
3467 s));
3468 else
3469 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype,
3470 orig_off ? orig_off : off, s);
3471 t = fold_build2_loc (loc, EQ_EXPR, boolean_type_node, t,
3472 build_int_cst (itype, 0));
3473 if (integer_zerop (t) && !warned_step)
3475 warning_at (loc, 0, "%<depend%> clause with %<sink%> modifier "
3476 "refers to iteration never in the iteration "
3477 "space");
3478 warned_step = true;
3480 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node,
3481 cond, t);
3484 if (i <= fd->collapse - 1 && fd->collapse > 1)
3485 t = fd->loop.v;
3486 else if (counts[i])
3487 t = counts[i];
3488 else
3490 t = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
3491 fd->loops[i].v, fd->loops[i].n1);
3492 t = fold_convert_loc (loc, fd->iter_type, t);
3494 if (step)
3495 /* We have divided off by step already earlier. */;
3496 else if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
3497 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off,
3498 fold_build1_loc (loc, NEGATE_EXPR, itype,
3499 s));
3500 else
3501 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
3502 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3503 off = fold_build1_loc (loc, NEGATE_EXPR, itype, off);
3504 off = fold_convert_loc (loc, fd->iter_type, off);
3505 if (i <= fd->collapse - 1 && fd->collapse > 1)
3507 if (i)
3508 off = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, coff,
3509 off);
3510 if (i < fd->collapse - 1)
3512 coff = fold_build2_loc (loc, MULT_EXPR, fd->iter_type, off,
3513 counts[i]);
3514 continue;
3517 off = unshare_expr (off);
3518 t = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, t, off);
3519 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3520 true, GSI_SAME_STMT);
3521 args.safe_push (t);
3523 gimple *g = gimple_build_call_vec (builtin_decl_explicit (sink_ix), args);
3524 gimple_set_location (g, loc);
3525 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
3527 cond = unshare_expr (cond);
3528 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, false,
3529 GSI_CONTINUE_LINKING);
3530 gsi_insert_after (gsi, gimple_build_cond_empty (cond), GSI_NEW_STMT);
3531 edge e3 = make_edge (e1->src, e2->dest, EDGE_FALSE_VALUE);
3532 e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
3533 e1->probability = e3->probability.invert ();
3534 e1->flags = EDGE_TRUE_VALUE;
3535 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e1->src);
3537 *gsi = gsi_after_labels (e2->dest);
3540 /* Expand all #pragma omp ordered depend(source) and
3541 #pragma omp ordered depend(sink:...) constructs in the current
3542 #pragma omp for ordered(n) region. */
3544 static void
3545 expand_omp_ordered_source_sink (struct omp_region *region,
3546 struct omp_for_data *fd, tree *counts,
3547 basic_block cont_bb)
3549 struct omp_region *inner;
3550 int i;
3551 for (i = fd->collapse - 1; i < fd->ordered; i++)
3552 if (i == fd->collapse - 1 && fd->collapse > 1)
3553 counts[i] = NULL_TREE;
3554 else if (i >= fd->collapse && !cont_bb)
3555 counts[i] = build_zero_cst (fd->iter_type);
3556 else if (!POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))
3557 && integer_onep (fd->loops[i].step))
3558 counts[i] = NULL_TREE;
3559 else
3560 counts[i] = create_tmp_var (fd->iter_type, ".orditer");
3561 tree atype
3562 = build_array_type_nelts (fd->iter_type, fd->ordered - fd->collapse + 1);
3563 counts[fd->ordered] = create_tmp_var (atype, ".orditera");
3564 TREE_ADDRESSABLE (counts[fd->ordered]) = 1;
3566 for (inner = region->inner; inner; inner = inner->next)
3567 if (inner->type == GIMPLE_OMP_ORDERED)
3569 gomp_ordered *ord_stmt = inner->ord_stmt;
3570 gimple_stmt_iterator gsi = gsi_for_stmt (ord_stmt);
3571 location_t loc = gimple_location (ord_stmt);
3572 tree c;
3573 for (c = gimple_omp_ordered_clauses (ord_stmt);
3574 c; c = OMP_CLAUSE_CHAIN (c))
3575 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SOURCE)
3576 break;
3577 if (c)
3578 expand_omp_ordered_source (&gsi, fd, counts, loc);
3579 for (c = gimple_omp_ordered_clauses (ord_stmt);
3580 c; c = OMP_CLAUSE_CHAIN (c))
3581 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK)
3582 expand_omp_ordered_sink (&gsi, fd, counts, c, loc);
3583 gsi_remove (&gsi, true);
3587 /* Wrap the body into fd->ordered - fd->collapse loops that aren't
3588 collapsed. */
3590 static basic_block
3591 expand_omp_for_ordered_loops (struct omp_for_data *fd, tree *counts,
3592 basic_block cont_bb, basic_block body_bb,
3593 bool ordered_lastprivate)
3595 if (fd->ordered == fd->collapse)
3596 return cont_bb;
3598 if (!cont_bb)
3600 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
3601 for (int i = fd->collapse; i < fd->ordered; i++)
3603 tree type = TREE_TYPE (fd->loops[i].v);
3604 tree n1 = fold_convert (type, fd->loops[i].n1);
3605 expand_omp_build_assign (&gsi, fd->loops[i].v, n1);
3606 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3607 size_int (i - fd->collapse + 1),
3608 NULL_TREE, NULL_TREE);
3609 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
3611 return NULL;
3614 for (int i = fd->ordered - 1; i >= fd->collapse; i--)
3616 tree t, type = TREE_TYPE (fd->loops[i].v);
3617 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
3618 expand_omp_build_assign (&gsi, fd->loops[i].v,
3619 fold_convert (type, fd->loops[i].n1));
3620 if (counts[i])
3621 expand_omp_build_assign (&gsi, counts[i],
3622 build_zero_cst (fd->iter_type));
3623 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3624 size_int (i - fd->collapse + 1),
3625 NULL_TREE, NULL_TREE);
3626 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
3627 if (!gsi_end_p (gsi))
3628 gsi_prev (&gsi);
3629 else
3630 gsi = gsi_last_bb (body_bb);
3631 edge e1 = split_block (body_bb, gsi_stmt (gsi));
3632 basic_block new_body = e1->dest;
3633 if (body_bb == cont_bb)
3634 cont_bb = new_body;
3635 edge e2 = NULL;
3636 basic_block new_header;
3637 if (EDGE_COUNT (cont_bb->preds) > 0)
3639 gsi = gsi_last_bb (cont_bb);
3640 if (POINTER_TYPE_P (type))
3641 t = fold_build_pointer_plus (fd->loops[i].v,
3642 fold_convert (sizetype,
3643 fd->loops[i].step));
3644 else
3645 t = fold_build2 (PLUS_EXPR, type, fd->loops[i].v,
3646 fold_convert (type, fd->loops[i].step));
3647 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
3648 if (counts[i])
3650 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[i],
3651 build_int_cst (fd->iter_type, 1));
3652 expand_omp_build_assign (&gsi, counts[i], t);
3653 t = counts[i];
3655 else
3657 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
3658 fd->loops[i].v, fd->loops[i].n1);
3659 t = fold_convert (fd->iter_type, t);
3660 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3661 true, GSI_SAME_STMT);
3663 aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3664 size_int (i - fd->collapse + 1),
3665 NULL_TREE, NULL_TREE);
3666 expand_omp_build_assign (&gsi, aref, t);
3667 gsi_prev (&gsi);
3668 e2 = split_block (cont_bb, gsi_stmt (gsi));
3669 new_header = e2->dest;
3671 else
3672 new_header = cont_bb;
3673 gsi = gsi_after_labels (new_header);
3674 tree v = force_gimple_operand_gsi (&gsi, fd->loops[i].v, true, NULL_TREE,
3675 true, GSI_SAME_STMT);
3676 tree n2
3677 = force_gimple_operand_gsi (&gsi, fold_convert (type, fd->loops[i].n2),
3678 true, NULL_TREE, true, GSI_SAME_STMT);
3679 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, n2);
3680 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_NEW_STMT);
3681 edge e3 = split_block (new_header, gsi_stmt (gsi));
3682 cont_bb = e3->dest;
3683 remove_edge (e1);
3684 make_edge (body_bb, new_header, EDGE_FALLTHRU);
3685 e3->flags = EDGE_FALSE_VALUE;
3686 e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
3687 e1 = make_edge (new_header, new_body, EDGE_TRUE_VALUE);
3688 e1->probability = e3->probability.invert ();
3690 set_immediate_dominator (CDI_DOMINATORS, new_header, body_bb);
3691 set_immediate_dominator (CDI_DOMINATORS, new_body, new_header);
3693 if (e2)
3695 class loop *loop = alloc_loop ();
3696 loop->header = new_header;
3697 loop->latch = e2->src;
3698 add_loop (loop, body_bb->loop_father);
3702 /* If there are any lastprivate clauses and it is possible some loops
3703 might have zero iterations, ensure all the decls are initialized,
3704 otherwise we could crash evaluating C++ class iterators with lastprivate
3705 clauses. */
3706 bool need_inits = false;
3707 for (int i = fd->collapse; ordered_lastprivate && i < fd->ordered; i++)
3708 if (need_inits)
3710 tree type = TREE_TYPE (fd->loops[i].v);
3711 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
3712 expand_omp_build_assign (&gsi, fd->loops[i].v,
3713 fold_convert (type, fd->loops[i].n1));
3715 else
3717 tree type = TREE_TYPE (fd->loops[i].v);
3718 tree this_cond = fold_build2 (fd->loops[i].cond_code,
3719 boolean_type_node,
3720 fold_convert (type, fd->loops[i].n1),
3721 fold_convert (type, fd->loops[i].n2));
3722 if (!integer_onep (this_cond))
3723 need_inits = true;
3726 return cont_bb;
3729 /* A subroutine of expand_omp_for. Generate code for a parallel
3730 loop with any schedule. Given parameters:
3732 for (V = N1; V cond N2; V += STEP) BODY;
3734 where COND is "<" or ">", we generate pseudocode
3736 more = GOMP_loop_foo_start (N1, N2, STEP, CHUNK, &istart0, &iend0);
3737 if (more) goto L0; else goto L3;
3739 V = istart0;
3740 iend = iend0;
3742 BODY;
3743 V += STEP;
3744 if (V cond iend) goto L1; else goto L2;
3746 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
3749 If this is a combined omp parallel loop, instead of the call to
3750 GOMP_loop_foo_start, we call GOMP_loop_foo_next.
3751 If this is gimple_omp_for_combined_p loop, then instead of assigning
3752 V and iend in L0 we assign the first two _looptemp_ clause decls of the
3753 inner GIMPLE_OMP_FOR and V += STEP; and
3754 if (V cond iend) goto L1; else goto L2; are removed.
3756 For collapsed loops, given parameters:
3757 collapse(3)
3758 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
3759 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
3760 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
3761 BODY;
3763 we generate pseudocode
3765 if (__builtin_expect (N32 cond3 N31, 0)) goto Z0;
3766 if (cond3 is <)
3767 adj = STEP3 - 1;
3768 else
3769 adj = STEP3 + 1;
3770 count3 = (adj + N32 - N31) / STEP3;
3771 if (__builtin_expect (N22 cond2 N21, 0)) goto Z0;
3772 if (cond2 is <)
3773 adj = STEP2 - 1;
3774 else
3775 adj = STEP2 + 1;
3776 count2 = (adj + N22 - N21) / STEP2;
3777 if (__builtin_expect (N12 cond1 N11, 0)) goto Z0;
3778 if (cond1 is <)
3779 adj = STEP1 - 1;
3780 else
3781 adj = STEP1 + 1;
3782 count1 = (adj + N12 - N11) / STEP1;
3783 count = count1 * count2 * count3;
3784 goto Z1;
3786 count = 0;
3788 more = GOMP_loop_foo_start (0, count, 1, CHUNK, &istart0, &iend0);
3789 if (more) goto L0; else goto L3;
3791 V = istart0;
3792 T = V;
3793 V3 = N31 + (T % count3) * STEP3;
3794 T = T / count3;
3795 V2 = N21 + (T % count2) * STEP2;
3796 T = T / count2;
3797 V1 = N11 + T * STEP1;
3798 iend = iend0;
3800 BODY;
3801 V += 1;
3802 if (V < iend) goto L10; else goto L2;
3803 L10:
3804 V3 += STEP3;
3805 if (V3 cond3 N32) goto L1; else goto L11;
3806 L11:
3807 V3 = N31;
3808 V2 += STEP2;
3809 if (V2 cond2 N22) goto L1; else goto L12;
3810 L12:
3811 V2 = N21;
3812 V1 += STEP1;
3813 goto L1;
3815 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
3820 static void
3821 expand_omp_for_generic (struct omp_region *region,
3822 struct omp_for_data *fd,
3823 enum built_in_function start_fn,
3824 enum built_in_function next_fn,
3825 tree sched_arg,
3826 gimple *inner_stmt)
3828 tree type, istart0, iend0, iend;
3829 tree t, vmain, vback, bias = NULL_TREE;
3830 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, collapse_bb;
3831 basic_block l2_bb = NULL, l3_bb = NULL;
3832 gimple_stmt_iterator gsi;
3833 gassign *assign_stmt;
3834 bool in_combined_parallel = is_combined_parallel (region);
3835 bool broken_loop = region->cont == NULL;
3836 edge e, ne;
3837 tree *counts = NULL;
3838 int i;
3839 bool ordered_lastprivate = false;
3841 gcc_assert (!broken_loop || !in_combined_parallel);
3842 gcc_assert (fd->iter_type == long_integer_type_node
3843 || !in_combined_parallel);
3845 entry_bb = region->entry;
3846 cont_bb = region->cont;
3847 collapse_bb = NULL;
3848 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
3849 gcc_assert (broken_loop
3850 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
3851 l0_bb = split_edge (FALLTHRU_EDGE (entry_bb));
3852 l1_bb = single_succ (l0_bb);
3853 if (!broken_loop)
3855 l2_bb = create_empty_bb (cont_bb);
3856 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l1_bb
3857 || (single_succ_edge (BRANCH_EDGE (cont_bb)->dest)->dest
3858 == l1_bb));
3859 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
3861 else
3862 l2_bb = NULL;
3863 l3_bb = BRANCH_EDGE (entry_bb)->dest;
3864 exit_bb = region->exit;
3866 gsi = gsi_last_nondebug_bb (entry_bb);
3868 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3869 if (fd->ordered
3870 && omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3871 OMP_CLAUSE_LASTPRIVATE))
3872 ordered_lastprivate = false;
3873 tree reductions = NULL_TREE;
3874 tree mem = NULL_TREE, cond_var = NULL_TREE, condtemp = NULL_TREE;
3875 tree memv = NULL_TREE;
3876 if (fd->lastprivate_conditional)
3878 tree c = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3879 OMP_CLAUSE__CONDTEMP_);
3880 if (fd->have_pointer_condtemp)
3881 condtemp = OMP_CLAUSE_DECL (c);
3882 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
3883 cond_var = OMP_CLAUSE_DECL (c);
3885 if (sched_arg)
3887 if (fd->have_reductemp)
3889 tree c = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3890 OMP_CLAUSE__REDUCTEMP_);
3891 reductions = OMP_CLAUSE_DECL (c);
3892 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
3893 gimple *g = SSA_NAME_DEF_STMT (reductions);
3894 reductions = gimple_assign_rhs1 (g);
3895 OMP_CLAUSE_DECL (c) = reductions;
3896 entry_bb = gimple_bb (g);
3897 edge e = split_block (entry_bb, g);
3898 if (region->entry == entry_bb)
3899 region->entry = e->dest;
3900 gsi = gsi_last_bb (entry_bb);
3902 else
3903 reductions = null_pointer_node;
3904 if (fd->have_pointer_condtemp)
3906 tree type = TREE_TYPE (condtemp);
3907 memv = create_tmp_var (type);
3908 TREE_ADDRESSABLE (memv) = 1;
3909 unsigned HOST_WIDE_INT sz
3910 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
3911 sz *= fd->lastprivate_conditional;
3912 expand_omp_build_assign (&gsi, memv, build_int_cst (type, sz),
3913 false);
3914 mem = build_fold_addr_expr (memv);
3916 else
3917 mem = null_pointer_node;
3919 if (fd->collapse > 1 || fd->ordered)
3921 int first_zero_iter1 = -1, first_zero_iter2 = -1;
3922 basic_block zero_iter1_bb = NULL, zero_iter2_bb = NULL, l2_dom_bb = NULL;
3924 counts = XALLOCAVEC (tree, fd->ordered ? fd->ordered + 1 : fd->collapse);
3925 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
3926 zero_iter1_bb, first_zero_iter1,
3927 zero_iter2_bb, first_zero_iter2, l2_dom_bb);
3929 if (zero_iter1_bb)
3931 /* Some counts[i] vars might be uninitialized if
3932 some loop has zero iterations. But the body shouldn't
3933 be executed in that case, so just avoid uninit warnings. */
3934 for (i = first_zero_iter1;
3935 i < (fd->ordered ? fd->ordered : fd->collapse); i++)
3936 if (SSA_VAR_P (counts[i]))
3937 suppress_warning (counts[i], OPT_Wuninitialized);
3938 gsi_prev (&gsi);
3939 e = split_block (entry_bb, gsi_stmt (gsi));
3940 entry_bb = e->dest;
3941 make_edge (zero_iter1_bb, entry_bb, EDGE_FALLTHRU);
3942 gsi = gsi_last_nondebug_bb (entry_bb);
3943 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
3944 get_immediate_dominator (CDI_DOMINATORS,
3945 zero_iter1_bb));
3947 if (zero_iter2_bb)
3949 /* Some counts[i] vars might be uninitialized if
3950 some loop has zero iterations. But the body shouldn't
3951 be executed in that case, so just avoid uninit warnings. */
3952 for (i = first_zero_iter2; i < fd->ordered; i++)
3953 if (SSA_VAR_P (counts[i]))
3954 suppress_warning (counts[i], OPT_Wuninitialized);
3955 if (zero_iter1_bb)
3956 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
3957 else
3959 gsi_prev (&gsi);
3960 e = split_block (entry_bb, gsi_stmt (gsi));
3961 entry_bb = e->dest;
3962 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
3963 gsi = gsi_last_nondebug_bb (entry_bb);
3964 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
3965 get_immediate_dominator
3966 (CDI_DOMINATORS, zero_iter2_bb));
3969 if (fd->collapse == 1)
3971 counts[0] = fd->loop.n2;
3972 fd->loop = fd->loops[0];
3976 type = TREE_TYPE (fd->loop.v);
3977 istart0 = create_tmp_var (fd->iter_type, ".istart0");
3978 iend0 = create_tmp_var (fd->iter_type, ".iend0");
3979 TREE_ADDRESSABLE (istart0) = 1;
3980 TREE_ADDRESSABLE (iend0) = 1;
3982 /* See if we need to bias by LLONG_MIN. */
3983 if (fd->iter_type == long_long_unsigned_type_node
3984 && TREE_CODE (type) == INTEGER_TYPE
3985 && !TYPE_UNSIGNED (type)
3986 && fd->ordered == 0)
3988 tree n1, n2;
3990 if (fd->loop.cond_code == LT_EXPR)
3992 n1 = fd->loop.n1;
3993 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
3995 else
3997 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
3998 n2 = fd->loop.n1;
4000 if (TREE_CODE (n1) != INTEGER_CST
4001 || TREE_CODE (n2) != INTEGER_CST
4002 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
4003 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
4006 gimple_stmt_iterator gsif = gsi;
4007 gsi_prev (&gsif);
4009 tree arr = NULL_TREE;
4010 if (in_combined_parallel)
4012 gcc_assert (fd->ordered == 0);
4013 /* In a combined parallel loop, emit a call to
4014 GOMP_loop_foo_next. */
4015 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
4016 build_fold_addr_expr (istart0),
4017 build_fold_addr_expr (iend0));
4019 else
4021 tree t0, t1, t2, t3, t4;
4022 /* If this is not a combined parallel loop, emit a call to
4023 GOMP_loop_foo_start in ENTRY_BB. */
4024 t4 = build_fold_addr_expr (iend0);
4025 t3 = build_fold_addr_expr (istart0);
4026 if (fd->ordered)
4028 t0 = build_int_cst (unsigned_type_node,
4029 fd->ordered - fd->collapse + 1);
4030 arr = create_tmp_var (build_array_type_nelts (fd->iter_type,
4031 fd->ordered
4032 - fd->collapse + 1),
4033 ".omp_counts");
4034 DECL_NAMELESS (arr) = 1;
4035 TREE_ADDRESSABLE (arr) = 1;
4036 TREE_STATIC (arr) = 1;
4037 vec<constructor_elt, va_gc> *v;
4038 vec_alloc (v, fd->ordered - fd->collapse + 1);
4039 int idx;
4041 for (idx = 0; idx < fd->ordered - fd->collapse + 1; idx++)
4043 tree c;
4044 if (idx == 0 && fd->collapse > 1)
4045 c = fd->loop.n2;
4046 else
4047 c = counts[idx + fd->collapse - 1];
4048 tree purpose = size_int (idx);
4049 CONSTRUCTOR_APPEND_ELT (v, purpose, c);
4050 if (TREE_CODE (c) != INTEGER_CST)
4051 TREE_STATIC (arr) = 0;
4054 DECL_INITIAL (arr) = build_constructor (TREE_TYPE (arr), v);
4055 if (!TREE_STATIC (arr))
4056 force_gimple_operand_gsi (&gsi, build1 (DECL_EXPR,
4057 void_type_node, arr),
4058 true, NULL_TREE, true, GSI_SAME_STMT);
4059 t1 = build_fold_addr_expr (arr);
4060 t2 = NULL_TREE;
4062 else
4064 t2 = fold_convert (fd->iter_type, fd->loop.step);
4065 t1 = fd->loop.n2;
4066 t0 = fd->loop.n1;
4067 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4069 tree innerc
4070 = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4071 OMP_CLAUSE__LOOPTEMP_);
4072 gcc_assert (innerc);
4073 t0 = OMP_CLAUSE_DECL (innerc);
4074 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4075 OMP_CLAUSE__LOOPTEMP_);
4076 gcc_assert (innerc);
4077 t1 = OMP_CLAUSE_DECL (innerc);
4079 if (POINTER_TYPE_P (TREE_TYPE (t0))
4080 && TYPE_PRECISION (TREE_TYPE (t0))
4081 != TYPE_PRECISION (fd->iter_type))
4083 /* Avoid casting pointers to integer of a different size. */
4084 tree itype = signed_type_for (type);
4085 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
4086 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
4088 else
4090 t1 = fold_convert (fd->iter_type, t1);
4091 t0 = fold_convert (fd->iter_type, t0);
4093 if (bias)
4095 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
4096 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
4099 if (fd->iter_type == long_integer_type_node || fd->ordered)
4101 if (fd->chunk_size)
4103 t = fold_convert (fd->iter_type, fd->chunk_size);
4104 t = omp_adjust_chunk_size (t, fd->simd_schedule);
4105 if (sched_arg)
4107 if (fd->ordered)
4108 t = build_call_expr (builtin_decl_explicit (start_fn),
4109 8, t0, t1, sched_arg, t, t3, t4,
4110 reductions, mem);
4111 else
4112 t = build_call_expr (builtin_decl_explicit (start_fn),
4113 9, t0, t1, t2, sched_arg, t, t3, t4,
4114 reductions, mem);
4116 else if (fd->ordered)
4117 t = build_call_expr (builtin_decl_explicit (start_fn),
4118 5, t0, t1, t, t3, t4);
4119 else
4120 t = build_call_expr (builtin_decl_explicit (start_fn),
4121 6, t0, t1, t2, t, t3, t4);
4123 else if (fd->ordered)
4124 t = build_call_expr (builtin_decl_explicit (start_fn),
4125 4, t0, t1, t3, t4);
4126 else
4127 t = build_call_expr (builtin_decl_explicit (start_fn),
4128 5, t0, t1, t2, t3, t4);
4130 else
4132 tree t5;
4133 tree c_bool_type;
4134 tree bfn_decl;
4136 /* The GOMP_loop_ull_*start functions have additional boolean
4137 argument, true for < loops and false for > loops.
4138 In Fortran, the C bool type can be different from
4139 boolean_type_node. */
4140 bfn_decl = builtin_decl_explicit (start_fn);
4141 c_bool_type = TREE_TYPE (TREE_TYPE (bfn_decl));
4142 t5 = build_int_cst (c_bool_type,
4143 fd->loop.cond_code == LT_EXPR ? 1 : 0);
4144 if (fd->chunk_size)
4146 tree bfn_decl = builtin_decl_explicit (start_fn);
4147 t = fold_convert (fd->iter_type, fd->chunk_size);
4148 t = omp_adjust_chunk_size (t, fd->simd_schedule);
4149 if (sched_arg)
4150 t = build_call_expr (bfn_decl, 10, t5, t0, t1, t2, sched_arg,
4151 t, t3, t4, reductions, mem);
4152 else
4153 t = build_call_expr (bfn_decl, 7, t5, t0, t1, t2, t, t3, t4);
4155 else
4156 t = build_call_expr (builtin_decl_explicit (start_fn),
4157 6, t5, t0, t1, t2, t3, t4);
4160 if (TREE_TYPE (t) != boolean_type_node)
4161 t = fold_build2 (NE_EXPR, boolean_type_node,
4162 t, build_int_cst (TREE_TYPE (t), 0));
4163 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4164 true, GSI_SAME_STMT);
4165 if (arr && !TREE_STATIC (arr))
4167 tree clobber = build_clobber (TREE_TYPE (arr));
4168 gsi_insert_before (&gsi, gimple_build_assign (arr, clobber),
4169 GSI_SAME_STMT);
4171 if (fd->have_pointer_condtemp)
4172 expand_omp_build_assign (&gsi, condtemp, memv, false);
4173 if (fd->have_reductemp)
4175 gimple *g = gsi_stmt (gsi);
4176 gsi_remove (&gsi, true);
4177 release_ssa_name (gimple_assign_lhs (g));
4179 entry_bb = region->entry;
4180 gsi = gsi_last_nondebug_bb (entry_bb);
4182 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4184 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
4186 /* Remove the GIMPLE_OMP_FOR statement. */
4187 gsi_remove (&gsi, true);
4189 if (gsi_end_p (gsif))
4190 gsif = gsi_after_labels (gsi_bb (gsif));
4191 gsi_next (&gsif);
4193 /* Iteration setup for sequential loop goes in L0_BB. */
4194 tree startvar = fd->loop.v;
4195 tree endvar = NULL_TREE;
4197 if (gimple_omp_for_combined_p (fd->for_stmt))
4199 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_FOR
4200 && gimple_omp_for_kind (inner_stmt)
4201 == GF_OMP_FOR_KIND_SIMD);
4202 tree innerc = omp_find_clause (gimple_omp_for_clauses (inner_stmt),
4203 OMP_CLAUSE__LOOPTEMP_);
4204 gcc_assert (innerc);
4205 startvar = OMP_CLAUSE_DECL (innerc);
4206 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4207 OMP_CLAUSE__LOOPTEMP_);
4208 gcc_assert (innerc);
4209 endvar = OMP_CLAUSE_DECL (innerc);
4212 gsi = gsi_start_bb (l0_bb);
4213 t = istart0;
4214 if (fd->ordered && fd->collapse == 1)
4215 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
4216 fold_convert (fd->iter_type, fd->loop.step));
4217 else if (bias)
4218 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
4219 if (fd->ordered && fd->collapse == 1)
4221 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4222 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
4223 fd->loop.n1, fold_convert (sizetype, t));
4224 else
4226 t = fold_convert (TREE_TYPE (startvar), t);
4227 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
4228 fd->loop.n1, t);
4231 else
4233 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4234 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
4235 t = fold_convert (TREE_TYPE (startvar), t);
4237 t = force_gimple_operand_gsi (&gsi, t,
4238 DECL_P (startvar)
4239 && TREE_ADDRESSABLE (startvar),
4240 NULL_TREE, false, GSI_CONTINUE_LINKING);
4241 assign_stmt = gimple_build_assign (startvar, t);
4242 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4243 if (cond_var)
4245 tree itype = TREE_TYPE (cond_var);
4246 /* For lastprivate(conditional:) itervar, we need some iteration
4247 counter that starts at unsigned non-zero and increases.
4248 Prefer as few IVs as possible, so if we can use startvar
4249 itself, use that, or startvar + constant (those would be
4250 incremented with step), and as last resort use the s0 + 1
4251 incremented by 1. */
4252 if ((fd->ordered && fd->collapse == 1)
4253 || bias
4254 || POINTER_TYPE_P (type)
4255 || TREE_CODE (fd->loop.n1) != INTEGER_CST
4256 || fd->loop.cond_code != LT_EXPR)
4257 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, istart0),
4258 build_int_cst (itype, 1));
4259 else if (tree_int_cst_sgn (fd->loop.n1) == 1)
4260 t = fold_convert (itype, t);
4261 else
4263 tree c = fold_convert (itype, fd->loop.n1);
4264 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
4265 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
4267 t = force_gimple_operand_gsi (&gsi, t, false,
4268 NULL_TREE, false, GSI_CONTINUE_LINKING);
4269 assign_stmt = gimple_build_assign (cond_var, t);
4270 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4273 t = iend0;
4274 if (fd->ordered && fd->collapse == 1)
4275 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
4276 fold_convert (fd->iter_type, fd->loop.step));
4277 else if (bias)
4278 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
4279 if (fd->ordered && fd->collapse == 1)
4281 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4282 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
4283 fd->loop.n1, fold_convert (sizetype, t));
4284 else
4286 t = fold_convert (TREE_TYPE (startvar), t);
4287 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
4288 fd->loop.n1, t);
4291 else
4293 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4294 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
4295 t = fold_convert (TREE_TYPE (startvar), t);
4297 iend = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4298 false, GSI_CONTINUE_LINKING);
4299 if (endvar)
4301 assign_stmt = gimple_build_assign (endvar, iend);
4302 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4303 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (iend)))
4304 assign_stmt = gimple_build_assign (fd->loop.v, iend);
4305 else
4306 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, iend);
4307 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4309 /* Handle linear clause adjustments. */
4310 tree itercnt = NULL_TREE;
4311 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
4312 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
4313 c; c = OMP_CLAUSE_CHAIN (c))
4314 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
4315 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
4317 tree d = OMP_CLAUSE_DECL (c);
4318 tree t = d, a, dest;
4319 if (omp_privatize_by_reference (t))
4320 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
4321 tree type = TREE_TYPE (t);
4322 if (POINTER_TYPE_P (type))
4323 type = sizetype;
4324 dest = unshare_expr (t);
4325 tree v = create_tmp_var (TREE_TYPE (t), NULL);
4326 expand_omp_build_assign (&gsif, v, t);
4327 if (itercnt == NULL_TREE)
4329 itercnt = startvar;
4330 tree n1 = fd->loop.n1;
4331 if (POINTER_TYPE_P (TREE_TYPE (itercnt)))
4333 itercnt
4334 = fold_convert (signed_type_for (TREE_TYPE (itercnt)),
4335 itercnt);
4336 n1 = fold_convert (TREE_TYPE (itercnt), n1);
4338 itercnt = fold_build2 (MINUS_EXPR, TREE_TYPE (itercnt),
4339 itercnt, n1);
4340 itercnt = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (itercnt),
4341 itercnt, fd->loop.step);
4342 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
4343 NULL_TREE, false,
4344 GSI_CONTINUE_LINKING);
4346 a = fold_build2 (MULT_EXPR, type,
4347 fold_convert (type, itercnt),
4348 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
4349 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
4350 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
4351 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4352 false, GSI_CONTINUE_LINKING);
4353 expand_omp_build_assign (&gsi, dest, t, true);
4355 if (fd->collapse > 1)
4356 expand_omp_for_init_vars (fd, &gsi, counts, NULL, inner_stmt, startvar);
4358 if (fd->ordered)
4360 /* Until now, counts array contained number of iterations or
4361 variable containing it for ith loop. From now on, we need
4362 those counts only for collapsed loops, and only for the 2nd
4363 till the last collapsed one. Move those one element earlier,
4364 we'll use counts[fd->collapse - 1] for the first source/sink
4365 iteration counter and so on and counts[fd->ordered]
4366 as the array holding the current counter values for
4367 depend(source). */
4368 if (fd->collapse > 1)
4369 memmove (counts, counts + 1, (fd->collapse - 1) * sizeof (counts[0]));
4370 if (broken_loop)
4372 int i;
4373 for (i = fd->collapse; i < fd->ordered; i++)
4375 tree type = TREE_TYPE (fd->loops[i].v);
4376 tree this_cond
4377 = fold_build2 (fd->loops[i].cond_code, boolean_type_node,
4378 fold_convert (type, fd->loops[i].n1),
4379 fold_convert (type, fd->loops[i].n2));
4380 if (!integer_onep (this_cond))
4381 break;
4383 if (i < fd->ordered)
4385 cont_bb
4386 = create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb);
4387 add_bb_to_loop (cont_bb, l1_bb->loop_father);
4388 gimple_stmt_iterator gsi = gsi_after_labels (cont_bb);
4389 gimple *g = gimple_build_omp_continue (fd->loop.v, fd->loop.v);
4390 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4391 make_edge (cont_bb, l3_bb, EDGE_FALLTHRU);
4392 make_edge (cont_bb, l1_bb, 0);
4393 l2_bb = create_empty_bb (cont_bb);
4394 broken_loop = false;
4397 expand_omp_ordered_source_sink (region, fd, counts, cont_bb);
4398 cont_bb = expand_omp_for_ordered_loops (fd, counts, cont_bb, l1_bb,
4399 ordered_lastprivate);
4400 if (counts[fd->collapse - 1])
4402 gcc_assert (fd->collapse == 1);
4403 gsi = gsi_last_bb (l0_bb);
4404 expand_omp_build_assign (&gsi, counts[fd->collapse - 1],
4405 istart0, true);
4406 if (cont_bb)
4408 gsi = gsi_last_bb (cont_bb);
4409 t = fold_build2 (PLUS_EXPR, fd->iter_type,
4410 counts[fd->collapse - 1],
4411 build_int_cst (fd->iter_type, 1));
4412 expand_omp_build_assign (&gsi, counts[fd->collapse - 1], t);
4413 tree aref = build4 (ARRAY_REF, fd->iter_type,
4414 counts[fd->ordered], size_zero_node,
4415 NULL_TREE, NULL_TREE);
4416 expand_omp_build_assign (&gsi, aref, counts[fd->collapse - 1]);
4418 t = counts[fd->collapse - 1];
4420 else if (fd->collapse > 1)
4421 t = fd->loop.v;
4422 else
4424 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
4425 fd->loops[0].v, fd->loops[0].n1);
4426 t = fold_convert (fd->iter_type, t);
4428 gsi = gsi_last_bb (l0_bb);
4429 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
4430 size_zero_node, NULL_TREE, NULL_TREE);
4431 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4432 false, GSI_CONTINUE_LINKING);
4433 expand_omp_build_assign (&gsi, aref, t, true);
4436 if (!broken_loop)
4438 /* Code to control the increment and predicate for the sequential
4439 loop goes in the CONT_BB. */
4440 gsi = gsi_last_nondebug_bb (cont_bb);
4441 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
4442 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
4443 vmain = gimple_omp_continue_control_use (cont_stmt);
4444 vback = gimple_omp_continue_control_def (cont_stmt);
4446 if (cond_var)
4448 tree itype = TREE_TYPE (cond_var);
4449 tree t2;
4450 if ((fd->ordered && fd->collapse == 1)
4451 || bias
4452 || POINTER_TYPE_P (type)
4453 || TREE_CODE (fd->loop.n1) != INTEGER_CST
4454 || fd->loop.cond_code != LT_EXPR)
4455 t2 = build_int_cst (itype, 1);
4456 else
4457 t2 = fold_convert (itype, fd->loop.step);
4458 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
4459 t2 = force_gimple_operand_gsi (&gsi, t2, false,
4460 NULL_TREE, true, GSI_SAME_STMT);
4461 assign_stmt = gimple_build_assign (cond_var, t2);
4462 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4465 if (!gimple_omp_for_combined_p (fd->for_stmt))
4467 if (POINTER_TYPE_P (type))
4468 t = fold_build_pointer_plus (vmain, fd->loop.step);
4469 else
4470 t = fold_build2 (PLUS_EXPR, type, vmain, fd->loop.step);
4471 t = force_gimple_operand_gsi (&gsi, t,
4472 DECL_P (vback)
4473 && TREE_ADDRESSABLE (vback),
4474 NULL_TREE, true, GSI_SAME_STMT);
4475 assign_stmt = gimple_build_assign (vback, t);
4476 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4478 if (fd->ordered && counts[fd->collapse - 1] == NULL_TREE)
4480 tree tem;
4481 if (fd->collapse > 1)
4482 tem = fd->loop.v;
4483 else
4485 tem = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
4486 fd->loops[0].v, fd->loops[0].n1);
4487 tem = fold_convert (fd->iter_type, tem);
4489 tree aref = build4 (ARRAY_REF, fd->iter_type,
4490 counts[fd->ordered], size_zero_node,
4491 NULL_TREE, NULL_TREE);
4492 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE,
4493 true, GSI_SAME_STMT);
4494 expand_omp_build_assign (&gsi, aref, tem);
4497 t = build2 (fd->loop.cond_code, boolean_type_node,
4498 DECL_P (vback) && TREE_ADDRESSABLE (vback) ? t : vback,
4499 iend);
4500 gcond *cond_stmt = gimple_build_cond_empty (t);
4501 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
4504 /* Remove GIMPLE_OMP_CONTINUE. */
4505 gsi_remove (&gsi, true);
4507 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
4508 collapse_bb = extract_omp_for_update_vars (fd, NULL, cont_bb, l1_bb);
4510 /* Emit code to get the next parallel iteration in L2_BB. */
4511 gsi = gsi_start_bb (l2_bb);
4513 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
4514 build_fold_addr_expr (istart0),
4515 build_fold_addr_expr (iend0));
4516 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4517 false, GSI_CONTINUE_LINKING);
4518 if (TREE_TYPE (t) != boolean_type_node)
4519 t = fold_build2 (NE_EXPR, boolean_type_node,
4520 t, build_int_cst (TREE_TYPE (t), 0));
4521 gcond *cond_stmt = gimple_build_cond_empty (t);
4522 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
4525 /* Add the loop cleanup function. */
4526 gsi = gsi_last_nondebug_bb (exit_bb);
4527 if (gimple_omp_return_nowait_p (gsi_stmt (gsi)))
4528 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
4529 else if (gimple_omp_return_lhs (gsi_stmt (gsi)))
4530 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
4531 else
4532 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
4533 gcall *call_stmt = gimple_build_call (t, 0);
4534 if (fd->ordered)
4536 tree arr = counts[fd->ordered];
4537 tree clobber = build_clobber (TREE_TYPE (arr));
4538 gsi_insert_after (&gsi, gimple_build_assign (arr, clobber),
4539 GSI_SAME_STMT);
4541 if (gimple_omp_return_lhs (gsi_stmt (gsi)))
4543 gimple_call_set_lhs (call_stmt, gimple_omp_return_lhs (gsi_stmt (gsi)));
4544 if (fd->have_reductemp)
4546 gimple *g = gimple_build_assign (reductions, NOP_EXPR,
4547 gimple_call_lhs (call_stmt));
4548 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
4551 gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT);
4552 gsi_remove (&gsi, true);
4554 /* Connect the new blocks. */
4555 find_edge (entry_bb, l0_bb)->flags = EDGE_TRUE_VALUE;
4556 find_edge (entry_bb, l3_bb)->flags = EDGE_FALSE_VALUE;
4558 if (!broken_loop)
4560 gimple_seq phis;
4562 e = find_edge (cont_bb, l3_bb);
4563 ne = make_edge (l2_bb, l3_bb, EDGE_FALSE_VALUE);
4565 phis = phi_nodes (l3_bb);
4566 for (gsi = gsi_start (phis); !gsi_end_p (gsi); gsi_next (&gsi))
4568 gimple *phi = gsi_stmt (gsi);
4569 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, ne),
4570 PHI_ARG_DEF_FROM_EDGE (phi, e));
4572 remove_edge (e);
4574 make_edge (cont_bb, l2_bb, EDGE_FALSE_VALUE);
4575 e = find_edge (cont_bb, l1_bb);
4576 if (e == NULL)
4578 e = BRANCH_EDGE (cont_bb);
4579 gcc_assert (single_succ (e->dest) == l1_bb);
4581 if (gimple_omp_for_combined_p (fd->for_stmt))
4583 remove_edge (e);
4584 e = NULL;
4586 else if (fd->collapse > 1)
4588 remove_edge (e);
4589 e = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
4591 else
4592 e->flags = EDGE_TRUE_VALUE;
4593 if (e)
4595 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
4596 find_edge (cont_bb, l2_bb)->probability = e->probability.invert ();
4598 else
4600 e = find_edge (cont_bb, l2_bb);
4601 e->flags = EDGE_FALLTHRU;
4603 make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE);
4605 if (gimple_in_ssa_p (cfun))
4607 /* Add phis to the outer loop that connect to the phis in the inner,
4608 original loop, and move the loop entry value of the inner phi to
4609 the loop entry value of the outer phi. */
4610 gphi_iterator psi;
4611 for (psi = gsi_start_phis (l3_bb); !gsi_end_p (psi); gsi_next (&psi))
4613 location_t locus;
4614 gphi *nphi;
4615 gphi *exit_phi = psi.phi ();
4617 if (virtual_operand_p (gimple_phi_result (exit_phi)))
4618 continue;
4620 edge l2_to_l3 = find_edge (l2_bb, l3_bb);
4621 tree exit_res = PHI_ARG_DEF_FROM_EDGE (exit_phi, l2_to_l3);
4623 basic_block latch = BRANCH_EDGE (cont_bb)->dest;
4624 edge latch_to_l1 = find_edge (latch, l1_bb);
4625 gphi *inner_phi
4626 = find_phi_with_arg_on_edge (exit_res, latch_to_l1);
4628 tree t = gimple_phi_result (exit_phi);
4629 tree new_res = copy_ssa_name (t, NULL);
4630 nphi = create_phi_node (new_res, l0_bb);
4632 edge l0_to_l1 = find_edge (l0_bb, l1_bb);
4633 t = PHI_ARG_DEF_FROM_EDGE (inner_phi, l0_to_l1);
4634 locus = gimple_phi_arg_location_from_edge (inner_phi, l0_to_l1);
4635 edge entry_to_l0 = find_edge (entry_bb, l0_bb);
4636 add_phi_arg (nphi, t, entry_to_l0, locus);
4638 edge l2_to_l0 = find_edge (l2_bb, l0_bb);
4639 add_phi_arg (nphi, exit_res, l2_to_l0, UNKNOWN_LOCATION);
4641 add_phi_arg (inner_phi, new_res, l0_to_l1, UNKNOWN_LOCATION);
4645 set_immediate_dominator (CDI_DOMINATORS, l2_bb,
4646 recompute_dominator (CDI_DOMINATORS, l2_bb));
4647 set_immediate_dominator (CDI_DOMINATORS, l3_bb,
4648 recompute_dominator (CDI_DOMINATORS, l3_bb));
4649 set_immediate_dominator (CDI_DOMINATORS, l0_bb,
4650 recompute_dominator (CDI_DOMINATORS, l0_bb));
4651 set_immediate_dominator (CDI_DOMINATORS, l1_bb,
4652 recompute_dominator (CDI_DOMINATORS, l1_bb));
4654 /* We enter expand_omp_for_generic with a loop. This original loop may
4655 have its own loop struct, or it may be part of an outer loop struct
4656 (which may be the fake loop). */
4657 class loop *outer_loop = entry_bb->loop_father;
4658 bool orig_loop_has_loop_struct = l1_bb->loop_father != outer_loop;
4660 add_bb_to_loop (l2_bb, outer_loop);
4662 /* We've added a new loop around the original loop. Allocate the
4663 corresponding loop struct. */
4664 class loop *new_loop = alloc_loop ();
4665 new_loop->header = l0_bb;
4666 new_loop->latch = l2_bb;
4667 add_loop (new_loop, outer_loop);
4669 /* Allocate a loop structure for the original loop unless we already
4670 had one. */
4671 if (!orig_loop_has_loop_struct
4672 && !gimple_omp_for_combined_p (fd->for_stmt))
4674 class loop *orig_loop = alloc_loop ();
4675 orig_loop->header = l1_bb;
4676 /* The loop may have multiple latches. */
4677 add_loop (orig_loop, new_loop);
4682 /* Helper function for expand_omp_for_static_nochunk. If PTR is NULL,
4683 compute needed allocation size. If !ALLOC of team allocations,
4684 if ALLOC of thread allocation. SZ is the initial needed size for
4685 other purposes, ALLOC_ALIGN guaranteed alignment of allocation in bytes,
4686 CNT number of elements of each array, for !ALLOC this is
4687 omp_get_num_threads (), for ALLOC number of iterations handled by the
4688 current thread. If PTR is non-NULL, it is the start of the allocation
4689 and this routine shall assign to OMP_CLAUSE_DECL (c) of those _scantemp_
4690 clauses pointers to the corresponding arrays. */
4692 static tree
4693 expand_omp_scantemp_alloc (tree clauses, tree ptr, unsigned HOST_WIDE_INT sz,
4694 unsigned HOST_WIDE_INT alloc_align, tree cnt,
4695 gimple_stmt_iterator *gsi, bool alloc)
4697 tree eltsz = NULL_TREE;
4698 unsigned HOST_WIDE_INT preval = 0;
4699 if (ptr && sz)
4700 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr),
4701 ptr, size_int (sz));
4702 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
4703 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
4704 && !OMP_CLAUSE__SCANTEMP__CONTROL (c)
4705 && (!OMP_CLAUSE__SCANTEMP__ALLOC (c)) != alloc)
4707 tree pointee_type = TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c)));
4708 unsigned HOST_WIDE_INT al = TYPE_ALIGN_UNIT (pointee_type);
4709 if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (pointee_type)))
4711 unsigned HOST_WIDE_INT szl
4712 = tree_to_uhwi (TYPE_SIZE_UNIT (pointee_type));
4713 szl = least_bit_hwi (szl);
4714 if (szl)
4715 al = MIN (al, szl);
4717 if (ptr == NULL_TREE)
4719 if (eltsz == NULL_TREE)
4720 eltsz = TYPE_SIZE_UNIT (pointee_type);
4721 else
4722 eltsz = size_binop (PLUS_EXPR, eltsz,
4723 TYPE_SIZE_UNIT (pointee_type));
4725 if (preval == 0 && al <= alloc_align)
4727 unsigned HOST_WIDE_INT diff = ROUND_UP (sz, al) - sz;
4728 sz += diff;
4729 if (diff && ptr)
4730 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr),
4731 ptr, size_int (diff));
4733 else if (al > preval)
4735 if (ptr)
4737 ptr = fold_convert (pointer_sized_int_node, ptr);
4738 ptr = fold_build2 (PLUS_EXPR, pointer_sized_int_node, ptr,
4739 build_int_cst (pointer_sized_int_node,
4740 al - 1));
4741 ptr = fold_build2 (BIT_AND_EXPR, pointer_sized_int_node, ptr,
4742 build_int_cst (pointer_sized_int_node,
4743 -(HOST_WIDE_INT) al));
4744 ptr = fold_convert (ptr_type_node, ptr);
4746 else
4747 sz += al - 1;
4749 if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (pointee_type)))
4750 preval = al;
4751 else
4752 preval = 1;
4753 if (ptr)
4755 expand_omp_build_assign (gsi, OMP_CLAUSE_DECL (c), ptr, false);
4756 ptr = OMP_CLAUSE_DECL (c);
4757 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr), ptr,
4758 size_binop (MULT_EXPR, cnt,
4759 TYPE_SIZE_UNIT (pointee_type)));
4763 if (ptr == NULL_TREE)
4765 eltsz = size_binop (MULT_EXPR, eltsz, cnt);
4766 if (sz)
4767 eltsz = size_binop (PLUS_EXPR, eltsz, size_int (sz));
4768 return eltsz;
4770 else
4771 return ptr;
4774 /* Return the last _looptemp_ clause if one has been created for
4775 lastprivate on distribute parallel for{, simd} or taskloop.
4776 FD is the loop data and INNERC should be the second _looptemp_
4777 clause (the one holding the end of the range).
4778 This is followed by collapse - 1 _looptemp_ clauses for the
4779 counts[1] and up, and for triangular loops followed by 4
4780 further _looptemp_ clauses (one for counts[0], one first_inner_iterations,
4781 one factor and one adjn1). After this there is optionally one
4782 _looptemp_ clause that this function returns. */
4784 static tree
4785 find_lastprivate_looptemp (struct omp_for_data *fd, tree innerc)
4787 gcc_assert (innerc);
4788 int count = fd->collapse - 1;
4789 if (fd->non_rect
4790 && fd->last_nonrect == fd->first_nonrect + 1
4791 && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[fd->last_nonrect].v)))
4792 count += 4;
4793 for (int i = 0; i < count; i++)
4795 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4796 OMP_CLAUSE__LOOPTEMP_);
4797 gcc_assert (innerc);
4799 return omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4800 OMP_CLAUSE__LOOPTEMP_);
4803 /* A subroutine of expand_omp_for. Generate code for a parallel
4804 loop with static schedule and no specified chunk size. Given
4805 parameters:
4807 for (V = N1; V cond N2; V += STEP) BODY;
4809 where COND is "<" or ">", we generate pseudocode
4811 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
4812 if (cond is <)
4813 adj = STEP - 1;
4814 else
4815 adj = STEP + 1;
4816 if ((__typeof (V)) -1 > 0 && cond is >)
4817 n = -(adj + N2 - N1) / -STEP;
4818 else
4819 n = (adj + N2 - N1) / STEP;
4820 q = n / nthreads;
4821 tt = n % nthreads;
4822 if (threadid < tt) goto L3; else goto L4;
4824 tt = 0;
4825 q = q + 1;
4827 s0 = q * threadid + tt;
4828 e0 = s0 + q;
4829 V = s0 * STEP + N1;
4830 if (s0 >= e0) goto L2; else goto L0;
4832 e = e0 * STEP + N1;
4834 BODY;
4835 V += STEP;
4836 if (V cond e) goto L1;
4840 static void
4841 expand_omp_for_static_nochunk (struct omp_region *region,
4842 struct omp_for_data *fd,
4843 gimple *inner_stmt)
4845 tree n, q, s0, e0, e, t, tt, nthreads = NULL_TREE, threadid;
4846 tree type, itype, vmain, vback;
4847 basic_block entry_bb, second_bb, third_bb, exit_bb, seq_start_bb;
4848 basic_block body_bb, cont_bb, collapse_bb = NULL;
4849 basic_block fin_bb, fourth_bb = NULL, fifth_bb = NULL, sixth_bb = NULL;
4850 basic_block exit1_bb = NULL, exit2_bb = NULL, exit3_bb = NULL;
4851 gimple_stmt_iterator gsi, gsip;
4852 edge ep;
4853 bool broken_loop = region->cont == NULL;
4854 tree *counts = NULL;
4855 tree n1, n2, step;
4856 tree reductions = NULL_TREE;
4857 tree cond_var = NULL_TREE, condtemp = NULL_TREE;
4859 itype = type = TREE_TYPE (fd->loop.v);
4860 if (POINTER_TYPE_P (type))
4861 itype = signed_type_for (type);
4863 entry_bb = region->entry;
4864 cont_bb = region->cont;
4865 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4866 fin_bb = BRANCH_EDGE (entry_bb)->dest;
4867 gcc_assert (broken_loop
4868 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
4869 seq_start_bb = split_edge (FALLTHRU_EDGE (entry_bb));
4870 body_bb = single_succ (seq_start_bb);
4871 if (!broken_loop)
4873 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
4874 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
4875 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4877 exit_bb = region->exit;
4879 /* Iteration space partitioning goes in ENTRY_BB. */
4880 gsi = gsi_last_nondebug_bb (entry_bb);
4881 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4882 gsip = gsi;
4883 gsi_prev (&gsip);
4885 if (fd->collapse > 1)
4887 int first_zero_iter = -1, dummy = -1;
4888 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
4890 counts = XALLOCAVEC (tree, fd->collapse);
4891 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4892 fin_bb, first_zero_iter,
4893 dummy_bb, dummy, l2_dom_bb);
4894 t = NULL_TREE;
4896 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
4897 t = integer_one_node;
4898 else
4899 t = fold_binary (fd->loop.cond_code, boolean_type_node,
4900 fold_convert (type, fd->loop.n1),
4901 fold_convert (type, fd->loop.n2));
4902 if (fd->collapse == 1
4903 && TYPE_UNSIGNED (type)
4904 && (t == NULL_TREE || !integer_onep (t)))
4906 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
4907 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
4908 true, GSI_SAME_STMT);
4909 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
4910 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
4911 true, GSI_SAME_STMT);
4912 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
4913 NULL_TREE, NULL_TREE);
4914 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
4915 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
4916 expand_omp_regimplify_p, NULL, NULL)
4917 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
4918 expand_omp_regimplify_p, NULL, NULL))
4920 gsi = gsi_for_stmt (cond_stmt);
4921 gimple_regimplify_operands (cond_stmt, &gsi);
4923 ep = split_block (entry_bb, cond_stmt);
4924 ep->flags = EDGE_TRUE_VALUE;
4925 entry_bb = ep->dest;
4926 ep->probability = profile_probability::very_likely ();
4927 ep = make_edge (ep->src, fin_bb, EDGE_FALSE_VALUE);
4928 ep->probability = profile_probability::very_unlikely ();
4929 if (gimple_in_ssa_p (cfun))
4931 int dest_idx = find_edge (entry_bb, fin_bb)->dest_idx;
4932 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
4933 !gsi_end_p (gpi); gsi_next (&gpi))
4935 gphi *phi = gpi.phi ();
4936 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
4937 ep, UNKNOWN_LOCATION);
4940 gsi = gsi_last_bb (entry_bb);
4943 if (fd->lastprivate_conditional)
4945 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
4946 tree c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
4947 if (fd->have_pointer_condtemp)
4948 condtemp = OMP_CLAUSE_DECL (c);
4949 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
4950 cond_var = OMP_CLAUSE_DECL (c);
4952 if (fd->have_reductemp
4953 /* For scan, we don't want to reinitialize condtemp before the
4954 second loop. */
4955 || (fd->have_pointer_condtemp && !fd->have_scantemp)
4956 || fd->have_nonctrl_scantemp)
4958 tree t1 = build_int_cst (long_integer_type_node, 0);
4959 tree t2 = build_int_cst (long_integer_type_node, 1);
4960 tree t3 = build_int_cstu (long_integer_type_node,
4961 (HOST_WIDE_INT_1U << 31) + 1);
4962 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
4963 gimple_stmt_iterator gsi2 = gsi_none ();
4964 gimple *g = NULL;
4965 tree mem = null_pointer_node, memv = NULL_TREE;
4966 unsigned HOST_WIDE_INT condtemp_sz = 0;
4967 unsigned HOST_WIDE_INT alloc_align = 0;
4968 if (fd->have_reductemp)
4970 gcc_assert (!fd->have_nonctrl_scantemp);
4971 tree c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
4972 reductions = OMP_CLAUSE_DECL (c);
4973 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
4974 g = SSA_NAME_DEF_STMT (reductions);
4975 reductions = gimple_assign_rhs1 (g);
4976 OMP_CLAUSE_DECL (c) = reductions;
4977 gsi2 = gsi_for_stmt (g);
4979 else
4981 if (gsi_end_p (gsip))
4982 gsi2 = gsi_after_labels (region->entry);
4983 else
4984 gsi2 = gsip;
4985 reductions = null_pointer_node;
4987 if (fd->have_pointer_condtemp || fd->have_nonctrl_scantemp)
4989 tree type;
4990 if (fd->have_pointer_condtemp)
4991 type = TREE_TYPE (condtemp);
4992 else
4993 type = ptr_type_node;
4994 memv = create_tmp_var (type);
4995 TREE_ADDRESSABLE (memv) = 1;
4996 unsigned HOST_WIDE_INT sz = 0;
4997 tree size = NULL_TREE;
4998 if (fd->have_pointer_condtemp)
5000 sz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
5001 sz *= fd->lastprivate_conditional;
5002 condtemp_sz = sz;
5004 if (fd->have_nonctrl_scantemp)
5006 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
5007 gimple *g = gimple_build_call (nthreads, 0);
5008 nthreads = create_tmp_var (integer_type_node);
5009 gimple_call_set_lhs (g, nthreads);
5010 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
5011 nthreads = fold_convert (sizetype, nthreads);
5012 alloc_align = TYPE_ALIGN_UNIT (long_long_integer_type_node);
5013 size = expand_omp_scantemp_alloc (clauses, NULL_TREE, sz,
5014 alloc_align, nthreads, NULL,
5015 false);
5016 size = fold_convert (type, size);
5018 else
5019 size = build_int_cst (type, sz);
5020 expand_omp_build_assign (&gsi2, memv, size, false);
5021 mem = build_fold_addr_expr (memv);
5023 tree t
5024 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START),
5025 9, t1, t2, t2, t3, t1, null_pointer_node,
5026 null_pointer_node, reductions, mem);
5027 force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
5028 true, GSI_SAME_STMT);
5029 if (fd->have_pointer_condtemp)
5030 expand_omp_build_assign (&gsi2, condtemp, memv, false);
5031 if (fd->have_nonctrl_scantemp)
5033 tree ptr = fd->have_pointer_condtemp ? condtemp : memv;
5034 expand_omp_scantemp_alloc (clauses, ptr, condtemp_sz,
5035 alloc_align, nthreads, &gsi2, false);
5037 if (fd->have_reductemp)
5039 gsi_remove (&gsi2, true);
5040 release_ssa_name (gimple_assign_lhs (g));
5043 switch (gimple_omp_for_kind (fd->for_stmt))
5045 case GF_OMP_FOR_KIND_FOR:
5046 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
5047 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
5048 break;
5049 case GF_OMP_FOR_KIND_DISTRIBUTE:
5050 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
5051 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
5052 break;
5053 default:
5054 gcc_unreachable ();
5056 nthreads = build_call_expr (nthreads, 0);
5057 nthreads = fold_convert (itype, nthreads);
5058 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
5059 true, GSI_SAME_STMT);
5060 threadid = build_call_expr (threadid, 0);
5061 threadid = fold_convert (itype, threadid);
5062 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
5063 true, GSI_SAME_STMT);
5065 n1 = fd->loop.n1;
5066 n2 = fd->loop.n2;
5067 step = fd->loop.step;
5068 if (gimple_omp_for_combined_into_p (fd->for_stmt))
5070 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5071 OMP_CLAUSE__LOOPTEMP_);
5072 gcc_assert (innerc);
5073 n1 = OMP_CLAUSE_DECL (innerc);
5074 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5075 OMP_CLAUSE__LOOPTEMP_);
5076 gcc_assert (innerc);
5077 n2 = OMP_CLAUSE_DECL (innerc);
5079 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
5080 true, NULL_TREE, true, GSI_SAME_STMT);
5081 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
5082 true, NULL_TREE, true, GSI_SAME_STMT);
5083 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
5084 true, NULL_TREE, true, GSI_SAME_STMT);
5086 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
5087 t = fold_build2 (PLUS_EXPR, itype, step, t);
5088 t = fold_build2 (PLUS_EXPR, itype, t, n2);
5089 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
5090 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
5091 t = fold_build2 (TRUNC_DIV_EXPR, itype,
5092 fold_build1 (NEGATE_EXPR, itype, t),
5093 fold_build1 (NEGATE_EXPR, itype, step));
5094 else
5095 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
5096 t = fold_convert (itype, t);
5097 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
5099 q = create_tmp_reg (itype, "q");
5100 t = fold_build2 (TRUNC_DIV_EXPR, itype, n, nthreads);
5101 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
5102 gsi_insert_before (&gsi, gimple_build_assign (q, t), GSI_SAME_STMT);
5104 tt = create_tmp_reg (itype, "tt");
5105 t = fold_build2 (TRUNC_MOD_EXPR, itype, n, nthreads);
5106 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
5107 gsi_insert_before (&gsi, gimple_build_assign (tt, t), GSI_SAME_STMT);
5109 t = build2 (LT_EXPR, boolean_type_node, threadid, tt);
5110 gcond *cond_stmt = gimple_build_cond_empty (t);
5111 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
5113 second_bb = split_block (entry_bb, cond_stmt)->dest;
5114 gsi = gsi_last_nondebug_bb (second_bb);
5115 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5117 gsi_insert_before (&gsi, gimple_build_assign (tt, build_int_cst (itype, 0)),
5118 GSI_SAME_STMT);
5119 gassign *assign_stmt
5120 = gimple_build_assign (q, PLUS_EXPR, q, build_int_cst (itype, 1));
5121 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5123 third_bb = split_block (second_bb, assign_stmt)->dest;
5124 gsi = gsi_last_nondebug_bb (third_bb);
5125 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5127 if (fd->have_nonctrl_scantemp)
5129 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5130 tree controlp = NULL_TREE, controlb = NULL_TREE;
5131 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
5132 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
5133 && OMP_CLAUSE__SCANTEMP__CONTROL (c))
5135 if (TREE_TYPE (OMP_CLAUSE_DECL (c)) == boolean_type_node)
5136 controlb = OMP_CLAUSE_DECL (c);
5137 else
5138 controlp = OMP_CLAUSE_DECL (c);
5139 if (controlb && controlp)
5140 break;
5142 gcc_assert (controlp && controlb);
5143 tree cnt = create_tmp_var (sizetype);
5144 gimple *g = gimple_build_assign (cnt, NOP_EXPR, q);
5145 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5146 unsigned HOST_WIDE_INT alloc_align = TYPE_ALIGN_UNIT (ptr_type_node);
5147 tree sz = expand_omp_scantemp_alloc (clauses, NULL_TREE, 0,
5148 alloc_align, cnt, NULL, true);
5149 tree size = create_tmp_var (sizetype);
5150 expand_omp_build_assign (&gsi, size, sz, false);
5151 tree cmp = fold_build2 (GT_EXPR, boolean_type_node,
5152 size, size_int (16384));
5153 expand_omp_build_assign (&gsi, controlb, cmp);
5154 g = gimple_build_cond (NE_EXPR, controlb, boolean_false_node,
5155 NULL_TREE, NULL_TREE);
5156 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5157 fourth_bb = split_block (third_bb, g)->dest;
5158 gsi = gsi_last_nondebug_bb (fourth_bb);
5159 /* FIXME: Once we have allocators, this should use allocator. */
5160 g = gimple_build_call (builtin_decl_explicit (BUILT_IN_MALLOC), 1, size);
5161 gimple_call_set_lhs (g, controlp);
5162 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5163 expand_omp_scantemp_alloc (clauses, controlp, 0, alloc_align, cnt,
5164 &gsi, true);
5165 gsi_prev (&gsi);
5166 g = gsi_stmt (gsi);
5167 fifth_bb = split_block (fourth_bb, g)->dest;
5168 gsi = gsi_last_nondebug_bb (fifth_bb);
5170 g = gimple_build_call (builtin_decl_implicit (BUILT_IN_STACK_SAVE), 0);
5171 gimple_call_set_lhs (g, controlp);
5172 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5173 tree alloca_decl = builtin_decl_explicit (BUILT_IN_ALLOCA_WITH_ALIGN);
5174 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
5175 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
5176 && OMP_CLAUSE__SCANTEMP__ALLOC (c))
5178 tree tmp = create_tmp_var (sizetype);
5179 tree pointee_type = TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c)));
5180 g = gimple_build_assign (tmp, MULT_EXPR, cnt,
5181 TYPE_SIZE_UNIT (pointee_type));
5182 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5183 g = gimple_build_call (alloca_decl, 2, tmp,
5184 size_int (TYPE_ALIGN (pointee_type)));
5185 gimple_call_set_lhs (g, OMP_CLAUSE_DECL (c));
5186 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5189 sixth_bb = split_block (fifth_bb, g)->dest;
5190 gsi = gsi_last_nondebug_bb (sixth_bb);
5193 t = build2 (MULT_EXPR, itype, q, threadid);
5194 t = build2 (PLUS_EXPR, itype, t, tt);
5195 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
5197 t = fold_build2 (PLUS_EXPR, itype, s0, q);
5198 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
5200 t = build2 (GE_EXPR, boolean_type_node, s0, e0);
5201 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
5203 /* Remove the GIMPLE_OMP_FOR statement. */
5204 gsi_remove (&gsi, true);
5206 /* Setup code for sequential iteration goes in SEQ_START_BB. */
5207 gsi = gsi_start_bb (seq_start_bb);
5209 tree startvar = fd->loop.v;
5210 tree endvar = NULL_TREE;
5212 if (gimple_omp_for_combined_p (fd->for_stmt))
5214 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
5215 ? gimple_omp_parallel_clauses (inner_stmt)
5216 : gimple_omp_for_clauses (inner_stmt);
5217 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
5218 gcc_assert (innerc);
5219 startvar = OMP_CLAUSE_DECL (innerc);
5220 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5221 OMP_CLAUSE__LOOPTEMP_);
5222 gcc_assert (innerc);
5223 endvar = OMP_CLAUSE_DECL (innerc);
5224 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
5225 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
5227 innerc = find_lastprivate_looptemp (fd, innerc);
5228 if (innerc)
5230 /* If needed (distribute parallel for with lastprivate),
5231 propagate down the total number of iterations. */
5232 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
5233 fd->loop.n2);
5234 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
5235 GSI_CONTINUE_LINKING);
5236 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
5237 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5241 t = fold_convert (itype, s0);
5242 t = fold_build2 (MULT_EXPR, itype, t, step);
5243 if (POINTER_TYPE_P (type))
5245 t = fold_build_pointer_plus (n1, t);
5246 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
5247 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
5248 t = fold_convert (signed_type_for (type), t);
5250 else
5251 t = fold_build2 (PLUS_EXPR, type, t, n1);
5252 t = fold_convert (TREE_TYPE (startvar), t);
5253 t = force_gimple_operand_gsi (&gsi, t,
5254 DECL_P (startvar)
5255 && TREE_ADDRESSABLE (startvar),
5256 NULL_TREE, false, GSI_CONTINUE_LINKING);
5257 assign_stmt = gimple_build_assign (startvar, t);
5258 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5259 if (cond_var)
5261 tree itype = TREE_TYPE (cond_var);
5262 /* For lastprivate(conditional:) itervar, we need some iteration
5263 counter that starts at unsigned non-zero and increases.
5264 Prefer as few IVs as possible, so if we can use startvar
5265 itself, use that, or startvar + constant (those would be
5266 incremented with step), and as last resort use the s0 + 1
5267 incremented by 1. */
5268 if (POINTER_TYPE_P (type)
5269 || TREE_CODE (n1) != INTEGER_CST
5270 || fd->loop.cond_code != LT_EXPR)
5271 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, s0),
5272 build_int_cst (itype, 1));
5273 else if (tree_int_cst_sgn (n1) == 1)
5274 t = fold_convert (itype, t);
5275 else
5277 tree c = fold_convert (itype, n1);
5278 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
5279 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
5281 t = force_gimple_operand_gsi (&gsi, t, false,
5282 NULL_TREE, false, GSI_CONTINUE_LINKING);
5283 assign_stmt = gimple_build_assign (cond_var, t);
5284 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5287 t = fold_convert (itype, e0);
5288 t = fold_build2 (MULT_EXPR, itype, t, step);
5289 if (POINTER_TYPE_P (type))
5291 t = fold_build_pointer_plus (n1, t);
5292 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
5293 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
5294 t = fold_convert (signed_type_for (type), t);
5296 else
5297 t = fold_build2 (PLUS_EXPR, type, t, n1);
5298 t = fold_convert (TREE_TYPE (startvar), t);
5299 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5300 false, GSI_CONTINUE_LINKING);
5301 if (endvar)
5303 assign_stmt = gimple_build_assign (endvar, e);
5304 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5305 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
5306 assign_stmt = gimple_build_assign (fd->loop.v, e);
5307 else
5308 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
5309 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5311 /* Handle linear clause adjustments. */
5312 tree itercnt = NULL_TREE;
5313 tree *nonrect_bounds = NULL;
5314 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
5315 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
5316 c; c = OMP_CLAUSE_CHAIN (c))
5317 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
5318 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
5320 tree d = OMP_CLAUSE_DECL (c);
5321 tree t = d, a, dest;
5322 if (omp_privatize_by_reference (t))
5323 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
5324 if (itercnt == NULL_TREE)
5326 if (gimple_omp_for_combined_into_p (fd->for_stmt))
5328 itercnt = fold_build2 (MINUS_EXPR, itype,
5329 fold_convert (itype, n1),
5330 fold_convert (itype, fd->loop.n1));
5331 itercnt = fold_build2 (EXACT_DIV_EXPR, itype, itercnt, step);
5332 itercnt = fold_build2 (PLUS_EXPR, itype, itercnt, s0);
5333 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
5334 NULL_TREE, false,
5335 GSI_CONTINUE_LINKING);
5337 else
5338 itercnt = s0;
5340 tree type = TREE_TYPE (t);
5341 if (POINTER_TYPE_P (type))
5342 type = sizetype;
5343 a = fold_build2 (MULT_EXPR, type,
5344 fold_convert (type, itercnt),
5345 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
5346 dest = unshare_expr (t);
5347 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
5348 : POINTER_PLUS_EXPR, TREE_TYPE (t), t, a);
5349 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5350 false, GSI_CONTINUE_LINKING);
5351 expand_omp_build_assign (&gsi, dest, t, true);
5353 if (fd->collapse > 1)
5355 if (fd->non_rect)
5357 nonrect_bounds = XALLOCAVEC (tree, fd->last_nonrect + 1);
5358 memset (nonrect_bounds, 0, sizeof (tree) * (fd->last_nonrect + 1));
5360 expand_omp_for_init_vars (fd, &gsi, counts, nonrect_bounds, inner_stmt,
5361 startvar);
5364 if (!broken_loop)
5366 /* The code controlling the sequential loop replaces the
5367 GIMPLE_OMP_CONTINUE. */
5368 gsi = gsi_last_nondebug_bb (cont_bb);
5369 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5370 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
5371 vmain = gimple_omp_continue_control_use (cont_stmt);
5372 vback = gimple_omp_continue_control_def (cont_stmt);
5374 if (cond_var)
5376 tree itype = TREE_TYPE (cond_var);
5377 tree t2;
5378 if (POINTER_TYPE_P (type)
5379 || TREE_CODE (n1) != INTEGER_CST
5380 || fd->loop.cond_code != LT_EXPR)
5381 t2 = build_int_cst (itype, 1);
5382 else
5383 t2 = fold_convert (itype, step);
5384 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
5385 t2 = force_gimple_operand_gsi (&gsi, t2, false,
5386 NULL_TREE, true, GSI_SAME_STMT);
5387 assign_stmt = gimple_build_assign (cond_var, t2);
5388 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5391 if (!gimple_omp_for_combined_p (fd->for_stmt))
5393 if (POINTER_TYPE_P (type))
5394 t = fold_build_pointer_plus (vmain, step);
5395 else
5396 t = fold_build2 (PLUS_EXPR, type, vmain, step);
5397 t = force_gimple_operand_gsi (&gsi, t,
5398 DECL_P (vback)
5399 && TREE_ADDRESSABLE (vback),
5400 NULL_TREE, true, GSI_SAME_STMT);
5401 assign_stmt = gimple_build_assign (vback, t);
5402 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5404 t = build2 (fd->loop.cond_code, boolean_type_node,
5405 DECL_P (vback) && TREE_ADDRESSABLE (vback)
5406 ? t : vback, e);
5407 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
5410 /* Remove the GIMPLE_OMP_CONTINUE statement. */
5411 gsi_remove (&gsi, true);
5413 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
5414 collapse_bb = extract_omp_for_update_vars (fd, nonrect_bounds,
5415 cont_bb, body_bb);
5418 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
5419 gsi = gsi_last_nondebug_bb (exit_bb);
5420 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
5422 t = gimple_omp_return_lhs (gsi_stmt (gsi));
5423 if (fd->have_reductemp
5424 || ((fd->have_pointer_condtemp || fd->have_scantemp)
5425 && !fd->have_nonctrl_scantemp))
5427 tree fn;
5428 if (t)
5429 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
5430 else
5431 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
5432 gcall *g = gimple_build_call (fn, 0);
5433 if (t)
5435 gimple_call_set_lhs (g, t);
5436 if (fd->have_reductemp)
5437 gsi_insert_after (&gsi, gimple_build_assign (reductions,
5438 NOP_EXPR, t),
5439 GSI_SAME_STMT);
5441 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
5443 else
5444 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
5446 else if ((fd->have_pointer_condtemp || fd->have_scantemp)
5447 && !fd->have_nonctrl_scantemp)
5449 tree fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
5450 gcall *g = gimple_build_call (fn, 0);
5451 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
5453 if (fd->have_scantemp && !fd->have_nonctrl_scantemp)
5455 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5456 tree controlp = NULL_TREE, controlb = NULL_TREE;
5457 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
5458 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
5459 && OMP_CLAUSE__SCANTEMP__CONTROL (c))
5461 if (TREE_TYPE (OMP_CLAUSE_DECL (c)) == boolean_type_node)
5462 controlb = OMP_CLAUSE_DECL (c);
5463 else
5464 controlp = OMP_CLAUSE_DECL (c);
5465 if (controlb && controlp)
5466 break;
5468 gcc_assert (controlp && controlb);
5469 gimple *g = gimple_build_cond (NE_EXPR, controlb, boolean_false_node,
5470 NULL_TREE, NULL_TREE);
5471 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5472 exit1_bb = split_block (exit_bb, g)->dest;
5473 gsi = gsi_after_labels (exit1_bb);
5474 g = gimple_build_call (builtin_decl_explicit (BUILT_IN_FREE), 1,
5475 controlp);
5476 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5477 exit2_bb = split_block (exit1_bb, g)->dest;
5478 gsi = gsi_after_labels (exit2_bb);
5479 g = gimple_build_call (builtin_decl_implicit (BUILT_IN_STACK_RESTORE), 1,
5480 controlp);
5481 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5482 exit3_bb = split_block (exit2_bb, g)->dest;
5483 gsi = gsi_after_labels (exit3_bb);
5485 gsi_remove (&gsi, true);
5487 /* Connect all the blocks. */
5488 ep = make_edge (entry_bb, third_bb, EDGE_FALSE_VALUE);
5489 ep->probability = profile_probability::guessed_always ().apply_scale (3, 4);
5490 ep = find_edge (entry_bb, second_bb);
5491 ep->flags = EDGE_TRUE_VALUE;
5492 ep->probability = profile_probability::guessed_always ().apply_scale (1, 4);
5493 if (fourth_bb)
5495 ep = make_edge (third_bb, fifth_bb, EDGE_FALSE_VALUE);
5496 ep->probability
5497 = profile_probability::guessed_always ().apply_scale (1, 2);
5498 ep = find_edge (third_bb, fourth_bb);
5499 ep->flags = EDGE_TRUE_VALUE;
5500 ep->probability
5501 = profile_probability::guessed_always ().apply_scale (1, 2);
5502 ep = find_edge (fourth_bb, fifth_bb);
5503 redirect_edge_and_branch (ep, sixth_bb);
5505 else
5506 sixth_bb = third_bb;
5507 find_edge (sixth_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE;
5508 find_edge (sixth_bb, fin_bb)->flags = EDGE_TRUE_VALUE;
5509 if (exit1_bb)
5511 ep = make_edge (exit_bb, exit2_bb, EDGE_FALSE_VALUE);
5512 ep->probability
5513 = profile_probability::guessed_always ().apply_scale (1, 2);
5514 ep = find_edge (exit_bb, exit1_bb);
5515 ep->flags = EDGE_TRUE_VALUE;
5516 ep->probability
5517 = profile_probability::guessed_always ().apply_scale (1, 2);
5518 ep = find_edge (exit1_bb, exit2_bb);
5519 redirect_edge_and_branch (ep, exit3_bb);
5522 if (!broken_loop)
5524 ep = find_edge (cont_bb, body_bb);
5525 if (ep == NULL)
5527 ep = BRANCH_EDGE (cont_bb);
5528 gcc_assert (single_succ (ep->dest) == body_bb);
5530 if (gimple_omp_for_combined_p (fd->for_stmt))
5532 remove_edge (ep);
5533 ep = NULL;
5535 else if (fd->collapse > 1)
5537 remove_edge (ep);
5538 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
5540 else
5541 ep->flags = EDGE_TRUE_VALUE;
5542 find_edge (cont_bb, fin_bb)->flags
5543 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
5546 set_immediate_dominator (CDI_DOMINATORS, second_bb, entry_bb);
5547 set_immediate_dominator (CDI_DOMINATORS, third_bb, entry_bb);
5548 if (fourth_bb)
5550 set_immediate_dominator (CDI_DOMINATORS, fifth_bb, third_bb);
5551 set_immediate_dominator (CDI_DOMINATORS, sixth_bb, third_bb);
5553 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, sixth_bb);
5555 set_immediate_dominator (CDI_DOMINATORS, body_bb,
5556 recompute_dominator (CDI_DOMINATORS, body_bb));
5557 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
5558 recompute_dominator (CDI_DOMINATORS, fin_bb));
5559 if (exit1_bb)
5561 set_immediate_dominator (CDI_DOMINATORS, exit2_bb, exit_bb);
5562 set_immediate_dominator (CDI_DOMINATORS, exit3_bb, exit_bb);
5565 class loop *loop = body_bb->loop_father;
5566 if (loop != entry_bb->loop_father)
5568 gcc_assert (broken_loop || loop->header == body_bb);
5569 gcc_assert (broken_loop
5570 || loop->latch == region->cont
5571 || single_pred (loop->latch) == region->cont);
5572 return;
5575 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
5577 loop = alloc_loop ();
5578 loop->header = body_bb;
5579 if (collapse_bb == NULL)
5580 loop->latch = cont_bb;
5581 add_loop (loop, body_bb->loop_father);
5585 /* Return phi in E->DEST with ARG on edge E. */
5587 static gphi *
5588 find_phi_with_arg_on_edge (tree arg, edge e)
5590 basic_block bb = e->dest;
5592 for (gphi_iterator gpi = gsi_start_phis (bb);
5593 !gsi_end_p (gpi);
5594 gsi_next (&gpi))
5596 gphi *phi = gpi.phi ();
5597 if (PHI_ARG_DEF_FROM_EDGE (phi, e) == arg)
5598 return phi;
5601 return NULL;
5604 /* A subroutine of expand_omp_for. Generate code for a parallel
5605 loop with static schedule and a specified chunk size. Given
5606 parameters:
5608 for (V = N1; V cond N2; V += STEP) BODY;
5610 where COND is "<" or ">", we generate pseudocode
5612 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
5613 if (cond is <)
5614 adj = STEP - 1;
5615 else
5616 adj = STEP + 1;
5617 if ((__typeof (V)) -1 > 0 && cond is >)
5618 n = -(adj + N2 - N1) / -STEP;
5619 else
5620 n = (adj + N2 - N1) / STEP;
5621 trip = 0;
5622 V = threadid * CHUNK * STEP + N1; -- this extra definition of V is
5623 here so that V is defined
5624 if the loop is not entered
5626 s0 = (trip * nthreads + threadid) * CHUNK;
5627 e0 = min (s0 + CHUNK, n);
5628 if (s0 < n) goto L1; else goto L4;
5630 V = s0 * STEP + N1;
5631 e = e0 * STEP + N1;
5633 BODY;
5634 V += STEP;
5635 if (V cond e) goto L2; else goto L3;
5637 trip += 1;
5638 goto L0;
5642 static void
5643 expand_omp_for_static_chunk (struct omp_region *region,
5644 struct omp_for_data *fd, gimple *inner_stmt)
5646 tree n, s0, e0, e, t;
5647 tree trip_var, trip_init, trip_main, trip_back, nthreads, threadid;
5648 tree type, itype, vmain, vback, vextra;
5649 basic_block entry_bb, exit_bb, body_bb, seq_start_bb, iter_part_bb;
5650 basic_block trip_update_bb = NULL, cont_bb, collapse_bb = NULL, fin_bb;
5651 gimple_stmt_iterator gsi, gsip;
5652 edge se;
5653 bool broken_loop = region->cont == NULL;
5654 tree *counts = NULL;
5655 tree n1, n2, step;
5656 tree reductions = NULL_TREE;
5657 tree cond_var = NULL_TREE, condtemp = NULL_TREE;
5659 itype = type = TREE_TYPE (fd->loop.v);
5660 if (POINTER_TYPE_P (type))
5661 itype = signed_type_for (type);
5663 entry_bb = region->entry;
5664 se = split_block (entry_bb, last_stmt (entry_bb));
5665 entry_bb = se->src;
5666 iter_part_bb = se->dest;
5667 cont_bb = region->cont;
5668 gcc_assert (EDGE_COUNT (iter_part_bb->succs) == 2);
5669 fin_bb = BRANCH_EDGE (iter_part_bb)->dest;
5670 gcc_assert (broken_loop
5671 || fin_bb == FALLTHRU_EDGE (cont_bb)->dest);
5672 seq_start_bb = split_edge (FALLTHRU_EDGE (iter_part_bb));
5673 body_bb = single_succ (seq_start_bb);
5674 if (!broken_loop)
5676 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
5677 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
5678 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
5679 trip_update_bb = split_edge (FALLTHRU_EDGE (cont_bb));
5681 exit_bb = region->exit;
5683 /* Trip and adjustment setup goes in ENTRY_BB. */
5684 gsi = gsi_last_nondebug_bb (entry_bb);
5685 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5686 gsip = gsi;
5687 gsi_prev (&gsip);
5689 if (fd->collapse > 1)
5691 int first_zero_iter = -1, dummy = -1;
5692 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
5694 counts = XALLOCAVEC (tree, fd->collapse);
5695 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
5696 fin_bb, first_zero_iter,
5697 dummy_bb, dummy, l2_dom_bb);
5698 t = NULL_TREE;
5700 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
5701 t = integer_one_node;
5702 else
5703 t = fold_binary (fd->loop.cond_code, boolean_type_node,
5704 fold_convert (type, fd->loop.n1),
5705 fold_convert (type, fd->loop.n2));
5706 if (fd->collapse == 1
5707 && TYPE_UNSIGNED (type)
5708 && (t == NULL_TREE || !integer_onep (t)))
5710 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
5711 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
5712 true, GSI_SAME_STMT);
5713 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
5714 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
5715 true, GSI_SAME_STMT);
5716 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
5717 NULL_TREE, NULL_TREE);
5718 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
5719 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
5720 expand_omp_regimplify_p, NULL, NULL)
5721 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
5722 expand_omp_regimplify_p, NULL, NULL))
5724 gsi = gsi_for_stmt (cond_stmt);
5725 gimple_regimplify_operands (cond_stmt, &gsi);
5727 se = split_block (entry_bb, cond_stmt);
5728 se->flags = EDGE_TRUE_VALUE;
5729 entry_bb = se->dest;
5730 se->probability = profile_probability::very_likely ();
5731 se = make_edge (se->src, fin_bb, EDGE_FALSE_VALUE);
5732 se->probability = profile_probability::very_unlikely ();
5733 if (gimple_in_ssa_p (cfun))
5735 int dest_idx = find_edge (iter_part_bb, fin_bb)->dest_idx;
5736 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
5737 !gsi_end_p (gpi); gsi_next (&gpi))
5739 gphi *phi = gpi.phi ();
5740 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
5741 se, UNKNOWN_LOCATION);
5744 gsi = gsi_last_bb (entry_bb);
5747 if (fd->lastprivate_conditional)
5749 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5750 tree c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
5751 if (fd->have_pointer_condtemp)
5752 condtemp = OMP_CLAUSE_DECL (c);
5753 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
5754 cond_var = OMP_CLAUSE_DECL (c);
5756 if (fd->have_reductemp || fd->have_pointer_condtemp)
5758 tree t1 = build_int_cst (long_integer_type_node, 0);
5759 tree t2 = build_int_cst (long_integer_type_node, 1);
5760 tree t3 = build_int_cstu (long_integer_type_node,
5761 (HOST_WIDE_INT_1U << 31) + 1);
5762 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5763 gimple_stmt_iterator gsi2 = gsi_none ();
5764 gimple *g = NULL;
5765 tree mem = null_pointer_node, memv = NULL_TREE;
5766 if (fd->have_reductemp)
5768 tree c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
5769 reductions = OMP_CLAUSE_DECL (c);
5770 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
5771 g = SSA_NAME_DEF_STMT (reductions);
5772 reductions = gimple_assign_rhs1 (g);
5773 OMP_CLAUSE_DECL (c) = reductions;
5774 gsi2 = gsi_for_stmt (g);
5776 else
5778 if (gsi_end_p (gsip))
5779 gsi2 = gsi_after_labels (region->entry);
5780 else
5781 gsi2 = gsip;
5782 reductions = null_pointer_node;
5784 if (fd->have_pointer_condtemp)
5786 tree type = TREE_TYPE (condtemp);
5787 memv = create_tmp_var (type);
5788 TREE_ADDRESSABLE (memv) = 1;
5789 unsigned HOST_WIDE_INT sz
5790 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
5791 sz *= fd->lastprivate_conditional;
5792 expand_omp_build_assign (&gsi2, memv, build_int_cst (type, sz),
5793 false);
5794 mem = build_fold_addr_expr (memv);
5796 tree t
5797 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START),
5798 9, t1, t2, t2, t3, t1, null_pointer_node,
5799 null_pointer_node, reductions, mem);
5800 force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
5801 true, GSI_SAME_STMT);
5802 if (fd->have_pointer_condtemp)
5803 expand_omp_build_assign (&gsi2, condtemp, memv, false);
5804 if (fd->have_reductemp)
5806 gsi_remove (&gsi2, true);
5807 release_ssa_name (gimple_assign_lhs (g));
5810 switch (gimple_omp_for_kind (fd->for_stmt))
5812 case GF_OMP_FOR_KIND_FOR:
5813 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
5814 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
5815 break;
5816 case GF_OMP_FOR_KIND_DISTRIBUTE:
5817 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
5818 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
5819 break;
5820 default:
5821 gcc_unreachable ();
5823 nthreads = build_call_expr (nthreads, 0);
5824 nthreads = fold_convert (itype, nthreads);
5825 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
5826 true, GSI_SAME_STMT);
5827 threadid = build_call_expr (threadid, 0);
5828 threadid = fold_convert (itype, threadid);
5829 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
5830 true, GSI_SAME_STMT);
5832 n1 = fd->loop.n1;
5833 n2 = fd->loop.n2;
5834 step = fd->loop.step;
5835 if (gimple_omp_for_combined_into_p (fd->for_stmt))
5837 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5838 OMP_CLAUSE__LOOPTEMP_);
5839 gcc_assert (innerc);
5840 n1 = OMP_CLAUSE_DECL (innerc);
5841 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5842 OMP_CLAUSE__LOOPTEMP_);
5843 gcc_assert (innerc);
5844 n2 = OMP_CLAUSE_DECL (innerc);
5846 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
5847 true, NULL_TREE, true, GSI_SAME_STMT);
5848 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
5849 true, NULL_TREE, true, GSI_SAME_STMT);
5850 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
5851 true, NULL_TREE, true, GSI_SAME_STMT);
5852 tree chunk_size = fold_convert (itype, fd->chunk_size);
5853 chunk_size = omp_adjust_chunk_size (chunk_size, fd->simd_schedule);
5854 chunk_size
5855 = force_gimple_operand_gsi (&gsi, chunk_size, true, NULL_TREE, true,
5856 GSI_SAME_STMT);
5858 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
5859 t = fold_build2 (PLUS_EXPR, itype, step, t);
5860 t = fold_build2 (PLUS_EXPR, itype, t, n2);
5861 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
5862 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
5863 t = fold_build2 (TRUNC_DIV_EXPR, itype,
5864 fold_build1 (NEGATE_EXPR, itype, t),
5865 fold_build1 (NEGATE_EXPR, itype, step));
5866 else
5867 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
5868 t = fold_convert (itype, t);
5869 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5870 true, GSI_SAME_STMT);
5872 trip_var = create_tmp_reg (itype, ".trip");
5873 if (gimple_in_ssa_p (cfun))
5875 trip_init = make_ssa_name (trip_var);
5876 trip_main = make_ssa_name (trip_var);
5877 trip_back = make_ssa_name (trip_var);
5879 else
5881 trip_init = trip_var;
5882 trip_main = trip_var;
5883 trip_back = trip_var;
5886 gassign *assign_stmt
5887 = gimple_build_assign (trip_init, build_int_cst (itype, 0));
5888 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5890 t = fold_build2 (MULT_EXPR, itype, threadid, chunk_size);
5891 t = fold_build2 (MULT_EXPR, itype, t, step);
5892 if (POINTER_TYPE_P (type))
5893 t = fold_build_pointer_plus (n1, t);
5894 else
5895 t = fold_build2 (PLUS_EXPR, type, t, n1);
5896 vextra = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5897 true, GSI_SAME_STMT);
5899 /* Remove the GIMPLE_OMP_FOR. */
5900 gsi_remove (&gsi, true);
5902 gimple_stmt_iterator gsif = gsi;
5904 /* Iteration space partitioning goes in ITER_PART_BB. */
5905 gsi = gsi_last_bb (iter_part_bb);
5907 t = fold_build2 (MULT_EXPR, itype, trip_main, nthreads);
5908 t = fold_build2 (PLUS_EXPR, itype, t, threadid);
5909 t = fold_build2 (MULT_EXPR, itype, t, chunk_size);
5910 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5911 false, GSI_CONTINUE_LINKING);
5913 t = fold_build2 (PLUS_EXPR, itype, s0, chunk_size);
5914 t = fold_build2 (MIN_EXPR, itype, t, n);
5915 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5916 false, GSI_CONTINUE_LINKING);
5918 t = build2 (LT_EXPR, boolean_type_node, s0, n);
5919 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_CONTINUE_LINKING);
5921 /* Setup code for sequential iteration goes in SEQ_START_BB. */
5922 gsi = gsi_start_bb (seq_start_bb);
5924 tree startvar = fd->loop.v;
5925 tree endvar = NULL_TREE;
5927 if (gimple_omp_for_combined_p (fd->for_stmt))
5929 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
5930 ? gimple_omp_parallel_clauses (inner_stmt)
5931 : gimple_omp_for_clauses (inner_stmt);
5932 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
5933 gcc_assert (innerc);
5934 startvar = OMP_CLAUSE_DECL (innerc);
5935 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5936 OMP_CLAUSE__LOOPTEMP_);
5937 gcc_assert (innerc);
5938 endvar = OMP_CLAUSE_DECL (innerc);
5939 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
5940 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
5942 innerc = find_lastprivate_looptemp (fd, innerc);
5943 if (innerc)
5945 /* If needed (distribute parallel for with lastprivate),
5946 propagate down the total number of iterations. */
5947 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
5948 fd->loop.n2);
5949 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
5950 GSI_CONTINUE_LINKING);
5951 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
5952 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5957 t = fold_convert (itype, s0);
5958 t = fold_build2 (MULT_EXPR, itype, t, step);
5959 if (POINTER_TYPE_P (type))
5961 t = fold_build_pointer_plus (n1, t);
5962 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
5963 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
5964 t = fold_convert (signed_type_for (type), t);
5966 else
5967 t = fold_build2 (PLUS_EXPR, type, t, n1);
5968 t = fold_convert (TREE_TYPE (startvar), t);
5969 t = force_gimple_operand_gsi (&gsi, t,
5970 DECL_P (startvar)
5971 && TREE_ADDRESSABLE (startvar),
5972 NULL_TREE, false, GSI_CONTINUE_LINKING);
5973 assign_stmt = gimple_build_assign (startvar, t);
5974 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5975 if (cond_var)
5977 tree itype = TREE_TYPE (cond_var);
5978 /* For lastprivate(conditional:) itervar, we need some iteration
5979 counter that starts at unsigned non-zero and increases.
5980 Prefer as few IVs as possible, so if we can use startvar
5981 itself, use that, or startvar + constant (those would be
5982 incremented with step), and as last resort use the s0 + 1
5983 incremented by 1. */
5984 if (POINTER_TYPE_P (type)
5985 || TREE_CODE (n1) != INTEGER_CST
5986 || fd->loop.cond_code != LT_EXPR)
5987 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, s0),
5988 build_int_cst (itype, 1));
5989 else if (tree_int_cst_sgn (n1) == 1)
5990 t = fold_convert (itype, t);
5991 else
5993 tree c = fold_convert (itype, n1);
5994 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
5995 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
5997 t = force_gimple_operand_gsi (&gsi, t, false,
5998 NULL_TREE, false, GSI_CONTINUE_LINKING);
5999 assign_stmt = gimple_build_assign (cond_var, t);
6000 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6003 t = fold_convert (itype, e0);
6004 t = fold_build2 (MULT_EXPR, itype, t, step);
6005 if (POINTER_TYPE_P (type))
6007 t = fold_build_pointer_plus (n1, t);
6008 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
6009 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
6010 t = fold_convert (signed_type_for (type), t);
6012 else
6013 t = fold_build2 (PLUS_EXPR, type, t, n1);
6014 t = fold_convert (TREE_TYPE (startvar), t);
6015 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6016 false, GSI_CONTINUE_LINKING);
6017 if (endvar)
6019 assign_stmt = gimple_build_assign (endvar, e);
6020 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6021 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
6022 assign_stmt = gimple_build_assign (fd->loop.v, e);
6023 else
6024 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
6025 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6027 /* Handle linear clause adjustments. */
6028 tree itercnt = NULL_TREE, itercntbias = NULL_TREE;
6029 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
6030 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
6031 c; c = OMP_CLAUSE_CHAIN (c))
6032 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
6033 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
6035 tree d = OMP_CLAUSE_DECL (c);
6036 tree t = d, a, dest;
6037 if (omp_privatize_by_reference (t))
6038 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
6039 tree type = TREE_TYPE (t);
6040 if (POINTER_TYPE_P (type))
6041 type = sizetype;
6042 dest = unshare_expr (t);
6043 tree v = create_tmp_var (TREE_TYPE (t), NULL);
6044 expand_omp_build_assign (&gsif, v, t);
6045 if (itercnt == NULL_TREE)
6047 if (gimple_omp_for_combined_into_p (fd->for_stmt))
6049 itercntbias
6050 = fold_build2 (MINUS_EXPR, itype, fold_convert (itype, n1),
6051 fold_convert (itype, fd->loop.n1));
6052 itercntbias = fold_build2 (EXACT_DIV_EXPR, itype,
6053 itercntbias, step);
6054 itercntbias
6055 = force_gimple_operand_gsi (&gsif, itercntbias, true,
6056 NULL_TREE, true,
6057 GSI_SAME_STMT);
6058 itercnt = fold_build2 (PLUS_EXPR, itype, itercntbias, s0);
6059 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
6060 NULL_TREE, false,
6061 GSI_CONTINUE_LINKING);
6063 else
6064 itercnt = s0;
6066 a = fold_build2 (MULT_EXPR, type,
6067 fold_convert (type, itercnt),
6068 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
6069 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
6070 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
6071 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6072 false, GSI_CONTINUE_LINKING);
6073 expand_omp_build_assign (&gsi, dest, t, true);
6075 if (fd->collapse > 1)
6076 expand_omp_for_init_vars (fd, &gsi, counts, NULL, inner_stmt, startvar);
6078 if (!broken_loop)
6080 /* The code controlling the sequential loop goes in CONT_BB,
6081 replacing the GIMPLE_OMP_CONTINUE. */
6082 gsi = gsi_last_nondebug_bb (cont_bb);
6083 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
6084 vmain = gimple_omp_continue_control_use (cont_stmt);
6085 vback = gimple_omp_continue_control_def (cont_stmt);
6087 if (cond_var)
6089 tree itype = TREE_TYPE (cond_var);
6090 tree t2;
6091 if (POINTER_TYPE_P (type)
6092 || TREE_CODE (n1) != INTEGER_CST
6093 || fd->loop.cond_code != LT_EXPR)
6094 t2 = build_int_cst (itype, 1);
6095 else
6096 t2 = fold_convert (itype, step);
6097 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
6098 t2 = force_gimple_operand_gsi (&gsi, t2, false,
6099 NULL_TREE, true, GSI_SAME_STMT);
6100 assign_stmt = gimple_build_assign (cond_var, t2);
6101 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
6104 if (!gimple_omp_for_combined_p (fd->for_stmt))
6106 if (POINTER_TYPE_P (type))
6107 t = fold_build_pointer_plus (vmain, step);
6108 else
6109 t = fold_build2 (PLUS_EXPR, type, vmain, step);
6110 if (DECL_P (vback) && TREE_ADDRESSABLE (vback))
6111 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6112 true, GSI_SAME_STMT);
6113 assign_stmt = gimple_build_assign (vback, t);
6114 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
6116 if (tree_int_cst_equal (fd->chunk_size, integer_one_node))
6117 t = build2 (EQ_EXPR, boolean_type_node,
6118 build_int_cst (itype, 0),
6119 build_int_cst (itype, 1));
6120 else
6121 t = build2 (fd->loop.cond_code, boolean_type_node,
6122 DECL_P (vback) && TREE_ADDRESSABLE (vback)
6123 ? t : vback, e);
6124 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
6127 /* Remove GIMPLE_OMP_CONTINUE. */
6128 gsi_remove (&gsi, true);
6130 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
6131 collapse_bb = extract_omp_for_update_vars (fd, NULL, cont_bb, body_bb);
6133 /* Trip update code goes into TRIP_UPDATE_BB. */
6134 gsi = gsi_start_bb (trip_update_bb);
6136 t = build_int_cst (itype, 1);
6137 t = build2 (PLUS_EXPR, itype, trip_main, t);
6138 assign_stmt = gimple_build_assign (trip_back, t);
6139 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6142 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
6143 gsi = gsi_last_nondebug_bb (exit_bb);
6144 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
6146 t = gimple_omp_return_lhs (gsi_stmt (gsi));
6147 if (fd->have_reductemp || fd->have_pointer_condtemp)
6149 tree fn;
6150 if (t)
6151 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
6152 else
6153 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
6154 gcall *g = gimple_build_call (fn, 0);
6155 if (t)
6157 gimple_call_set_lhs (g, t);
6158 if (fd->have_reductemp)
6159 gsi_insert_after (&gsi, gimple_build_assign (reductions,
6160 NOP_EXPR, t),
6161 GSI_SAME_STMT);
6163 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
6165 else
6166 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
6168 else if (fd->have_pointer_condtemp)
6170 tree fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
6171 gcall *g = gimple_build_call (fn, 0);
6172 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
6174 gsi_remove (&gsi, true);
6176 /* Connect the new blocks. */
6177 find_edge (iter_part_bb, seq_start_bb)->flags = EDGE_TRUE_VALUE;
6178 find_edge (iter_part_bb, fin_bb)->flags = EDGE_FALSE_VALUE;
6180 if (!broken_loop)
6182 se = find_edge (cont_bb, body_bb);
6183 if (se == NULL)
6185 se = BRANCH_EDGE (cont_bb);
6186 gcc_assert (single_succ (se->dest) == body_bb);
6188 if (gimple_omp_for_combined_p (fd->for_stmt))
6190 remove_edge (se);
6191 se = NULL;
6193 else if (fd->collapse > 1)
6195 remove_edge (se);
6196 se = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
6198 else
6199 se->flags = EDGE_TRUE_VALUE;
6200 find_edge (cont_bb, trip_update_bb)->flags
6201 = se ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
6203 redirect_edge_and_branch (single_succ_edge (trip_update_bb),
6204 iter_part_bb);
6207 if (gimple_in_ssa_p (cfun))
6209 gphi_iterator psi;
6210 gphi *phi;
6211 edge re, ene;
6212 edge_var_map *vm;
6213 size_t i;
6215 gcc_assert (fd->collapse == 1 && !broken_loop);
6217 /* When we redirect the edge from trip_update_bb to iter_part_bb, we
6218 remove arguments of the phi nodes in fin_bb. We need to create
6219 appropriate phi nodes in iter_part_bb instead. */
6220 se = find_edge (iter_part_bb, fin_bb);
6221 re = single_succ_edge (trip_update_bb);
6222 vec<edge_var_map> *head = redirect_edge_var_map_vector (re);
6223 ene = single_succ_edge (entry_bb);
6225 psi = gsi_start_phis (fin_bb);
6226 for (i = 0; !gsi_end_p (psi) && head->iterate (i, &vm);
6227 gsi_next (&psi), ++i)
6229 gphi *nphi;
6230 location_t locus;
6232 phi = psi.phi ();
6233 if (operand_equal_p (gimple_phi_arg_def (phi, 0),
6234 redirect_edge_var_map_def (vm), 0))
6235 continue;
6237 t = gimple_phi_result (phi);
6238 gcc_assert (t == redirect_edge_var_map_result (vm));
6240 if (!single_pred_p (fin_bb))
6241 t = copy_ssa_name (t, phi);
6243 nphi = create_phi_node (t, iter_part_bb);
6245 t = PHI_ARG_DEF_FROM_EDGE (phi, se);
6246 locus = gimple_phi_arg_location_from_edge (phi, se);
6248 /* A special case -- fd->loop.v is not yet computed in
6249 iter_part_bb, we need to use vextra instead. */
6250 if (t == fd->loop.v)
6251 t = vextra;
6252 add_phi_arg (nphi, t, ene, locus);
6253 locus = redirect_edge_var_map_location (vm);
6254 tree back_arg = redirect_edge_var_map_def (vm);
6255 add_phi_arg (nphi, back_arg, re, locus);
6256 edge ce = find_edge (cont_bb, body_bb);
6257 if (ce == NULL)
6259 ce = BRANCH_EDGE (cont_bb);
6260 gcc_assert (single_succ (ce->dest) == body_bb);
6261 ce = single_succ_edge (ce->dest);
6263 gphi *inner_loop_phi = find_phi_with_arg_on_edge (back_arg, ce);
6264 gcc_assert (inner_loop_phi != NULL);
6265 add_phi_arg (inner_loop_phi, gimple_phi_result (nphi),
6266 find_edge (seq_start_bb, body_bb), locus);
6268 if (!single_pred_p (fin_bb))
6269 add_phi_arg (phi, gimple_phi_result (nphi), se, locus);
6271 gcc_assert (gsi_end_p (psi) && (head == NULL || i == head->length ()));
6272 redirect_edge_var_map_clear (re);
6273 if (single_pred_p (fin_bb))
6274 while (1)
6276 psi = gsi_start_phis (fin_bb);
6277 if (gsi_end_p (psi))
6278 break;
6279 remove_phi_node (&psi, false);
6282 /* Make phi node for trip. */
6283 phi = create_phi_node (trip_main, iter_part_bb);
6284 add_phi_arg (phi, trip_back, single_succ_edge (trip_update_bb),
6285 UNKNOWN_LOCATION);
6286 add_phi_arg (phi, trip_init, single_succ_edge (entry_bb),
6287 UNKNOWN_LOCATION);
6290 if (!broken_loop)
6291 set_immediate_dominator (CDI_DOMINATORS, trip_update_bb, cont_bb);
6292 set_immediate_dominator (CDI_DOMINATORS, iter_part_bb,
6293 recompute_dominator (CDI_DOMINATORS, iter_part_bb));
6294 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
6295 recompute_dominator (CDI_DOMINATORS, fin_bb));
6296 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb,
6297 recompute_dominator (CDI_DOMINATORS, seq_start_bb));
6298 set_immediate_dominator (CDI_DOMINATORS, body_bb,
6299 recompute_dominator (CDI_DOMINATORS, body_bb));
6301 if (!broken_loop)
6303 class loop *loop = body_bb->loop_father;
6304 class loop *trip_loop = alloc_loop ();
6305 trip_loop->header = iter_part_bb;
6306 trip_loop->latch = trip_update_bb;
6307 add_loop (trip_loop, iter_part_bb->loop_father);
6309 if (loop != entry_bb->loop_father)
6311 gcc_assert (loop->header == body_bb);
6312 gcc_assert (loop->latch == region->cont
6313 || single_pred (loop->latch) == region->cont);
6314 trip_loop->inner = loop;
6315 return;
6318 if (!gimple_omp_for_combined_p (fd->for_stmt))
6320 loop = alloc_loop ();
6321 loop->header = body_bb;
6322 if (collapse_bb == NULL)
6323 loop->latch = cont_bb;
6324 add_loop (loop, trip_loop);
6329 /* A subroutine of expand_omp_for. Generate code for a simd non-worksharing
6330 loop. Given parameters:
6332 for (V = N1; V cond N2; V += STEP) BODY;
6334 where COND is "<" or ">", we generate pseudocode
6336 V = N1;
6337 goto L1;
6339 BODY;
6340 V += STEP;
6342 if (V cond N2) goto L0; else goto L2;
6345 For collapsed loops, emit the outer loops as scalar
6346 and only try to vectorize the innermost loop. */
6348 static void
6349 expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
6351 tree type, t;
6352 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, l2_bb, l2_dom_bb;
6353 gimple_stmt_iterator gsi;
6354 gimple *stmt;
6355 gcond *cond_stmt;
6356 bool broken_loop = region->cont == NULL;
6357 edge e, ne;
6358 tree *counts = NULL;
6359 int i;
6360 int safelen_int = INT_MAX;
6361 bool dont_vectorize = false;
6362 tree safelen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6363 OMP_CLAUSE_SAFELEN);
6364 tree simduid = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6365 OMP_CLAUSE__SIMDUID_);
6366 tree ifc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6367 OMP_CLAUSE_IF);
6368 tree simdlen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6369 OMP_CLAUSE_SIMDLEN);
6370 tree condtemp = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6371 OMP_CLAUSE__CONDTEMP_);
6372 tree n1, n2;
6373 tree cond_var = condtemp ? OMP_CLAUSE_DECL (condtemp) : NULL_TREE;
6375 if (safelen)
6377 poly_uint64 val;
6378 safelen = OMP_CLAUSE_SAFELEN_EXPR (safelen);
6379 if (!poly_int_tree_p (safelen, &val))
6380 safelen_int = 0;
6381 else
6382 safelen_int = MIN (constant_lower_bound (val), INT_MAX);
6383 if (safelen_int == 1)
6384 safelen_int = 0;
6386 if ((ifc && integer_zerop (OMP_CLAUSE_IF_EXPR (ifc)))
6387 || (simdlen && integer_onep (OMP_CLAUSE_SIMDLEN_EXPR (simdlen))))
6389 safelen_int = 0;
6390 dont_vectorize = true;
6392 type = TREE_TYPE (fd->loop.v);
6393 entry_bb = region->entry;
6394 cont_bb = region->cont;
6395 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
6396 gcc_assert (broken_loop
6397 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
6398 l0_bb = FALLTHRU_EDGE (entry_bb)->dest;
6399 if (!broken_loop)
6401 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb);
6402 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
6403 l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest;
6404 l2_bb = BRANCH_EDGE (entry_bb)->dest;
6406 else
6408 BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL;
6409 l1_bb = split_edge (BRANCH_EDGE (entry_bb));
6410 l2_bb = single_succ (l1_bb);
6412 exit_bb = region->exit;
6413 l2_dom_bb = NULL;
6415 gsi = gsi_last_nondebug_bb (entry_bb);
6417 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
6418 /* Not needed in SSA form right now. */
6419 gcc_assert (!gimple_in_ssa_p (cfun));
6420 if (fd->collapse > 1
6421 && (gimple_omp_for_combined_into_p (fd->for_stmt)
6422 || broken_loop))
6424 int first_zero_iter = -1, dummy = -1;
6425 basic_block zero_iter_bb = l2_bb, dummy_bb = NULL;
6427 counts = XALLOCAVEC (tree, fd->collapse);
6428 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
6429 zero_iter_bb, first_zero_iter,
6430 dummy_bb, dummy, l2_dom_bb);
6432 if (l2_dom_bb == NULL)
6433 l2_dom_bb = l1_bb;
6435 n1 = fd->loop.n1;
6436 n2 = fd->loop.n2;
6437 if (gimple_omp_for_combined_into_p (fd->for_stmt))
6439 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6440 OMP_CLAUSE__LOOPTEMP_);
6441 gcc_assert (innerc);
6442 n1 = OMP_CLAUSE_DECL (innerc);
6443 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
6444 OMP_CLAUSE__LOOPTEMP_);
6445 gcc_assert (innerc);
6446 n2 = OMP_CLAUSE_DECL (innerc);
6448 tree step = fd->loop.step;
6449 tree orig_step = step; /* May be different from step if is_simt. */
6451 bool is_simt = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6452 OMP_CLAUSE__SIMT_);
6453 if (is_simt)
6455 cfun->curr_properties &= ~PROP_gimple_lomp_dev;
6456 is_simt = safelen_int > 1;
6458 tree simt_lane = NULL_TREE, simt_maxlane = NULL_TREE;
6459 if (is_simt)
6461 simt_lane = create_tmp_var (unsigned_type_node);
6462 gimple *g = gimple_build_call_internal (IFN_GOMP_SIMT_LANE, 0);
6463 gimple_call_set_lhs (g, simt_lane);
6464 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
6465 tree offset = fold_build2 (MULT_EXPR, TREE_TYPE (step), step,
6466 fold_convert (TREE_TYPE (step), simt_lane));
6467 n1 = fold_convert (type, n1);
6468 if (POINTER_TYPE_P (type))
6469 n1 = fold_build_pointer_plus (n1, offset);
6470 else
6471 n1 = fold_build2 (PLUS_EXPR, type, n1, fold_convert (type, offset));
6473 /* Collapsed loops not handled for SIMT yet: limit to one lane only. */
6474 if (fd->collapse > 1)
6475 simt_maxlane = build_one_cst (unsigned_type_node);
6476 else if (safelen_int < omp_max_simt_vf ())
6477 simt_maxlane = build_int_cst (unsigned_type_node, safelen_int);
6478 tree vf
6479 = build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_GOMP_SIMT_VF,
6480 unsigned_type_node, 0);
6481 if (simt_maxlane)
6482 vf = fold_build2 (MIN_EXPR, unsigned_type_node, vf, simt_maxlane);
6483 vf = fold_convert (TREE_TYPE (step), vf);
6484 step = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, vf);
6487 tree n2var = NULL_TREE;
6488 tree n2v = NULL_TREE;
6489 tree *nonrect_bounds = NULL;
6490 tree min_arg1 = NULL_TREE, min_arg2 = NULL_TREE;
6491 if (fd->collapse > 1)
6493 if (broken_loop || gimple_omp_for_combined_into_p (fd->for_stmt))
6495 if (fd->non_rect)
6497 nonrect_bounds = XALLOCAVEC (tree, fd->last_nonrect + 1);
6498 memset (nonrect_bounds, 0,
6499 sizeof (tree) * (fd->last_nonrect + 1));
6501 expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
6502 gcc_assert (entry_bb == gsi_bb (gsi));
6503 gcc_assert (fd->for_stmt == gsi_stmt (gsi));
6504 gsi_prev (&gsi);
6505 entry_bb = split_block (entry_bb, gsi_stmt (gsi))->dest;
6506 expand_omp_for_init_vars (fd, &gsi, counts, nonrect_bounds,
6507 NULL, n1);
6508 gsi = gsi_for_stmt (fd->for_stmt);
6510 if (broken_loop)
6512 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
6514 /* Compute in n2var the limit for the first innermost loop,
6515 i.e. fd->loop.v + MIN (n2 - fd->loop.v, cnt)
6516 where cnt is how many iterations would the loop have if
6517 all further iterations were assigned to the current task. */
6518 n2var = create_tmp_var (type);
6519 i = fd->collapse - 1;
6520 tree itype = TREE_TYPE (fd->loops[i].v);
6521 if (POINTER_TYPE_P (itype))
6522 itype = signed_type_for (itype);
6523 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
6524 ? -1 : 1));
6525 t = fold_build2 (PLUS_EXPR, itype,
6526 fold_convert (itype, fd->loops[i].step), t);
6527 t = fold_build2 (PLUS_EXPR, itype, t,
6528 fold_convert (itype, fd->loops[i].n2));
6529 if (fd->loops[i].m2)
6531 tree t2 = fold_convert (itype,
6532 fd->loops[i - fd->loops[i].outer].v);
6533 tree t3 = fold_convert (itype, fd->loops[i].m2);
6534 t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
6535 t = fold_build2 (PLUS_EXPR, itype, t, t2);
6537 t = fold_build2 (MINUS_EXPR, itype, t,
6538 fold_convert (itype, fd->loops[i].v));
6539 if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
6540 t = fold_build2 (TRUNC_DIV_EXPR, itype,
6541 fold_build1 (NEGATE_EXPR, itype, t),
6542 fold_build1 (NEGATE_EXPR, itype,
6543 fold_convert (itype,
6544 fd->loops[i].step)));
6545 else
6546 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
6547 fold_convert (itype, fd->loops[i].step));
6548 t = fold_convert (type, t);
6549 tree t2 = fold_build2 (MINUS_EXPR, type, n2, n1);
6550 min_arg1 = create_tmp_var (type);
6551 expand_omp_build_assign (&gsi, min_arg1, t2);
6552 min_arg2 = create_tmp_var (type);
6553 expand_omp_build_assign (&gsi, min_arg2, t);
6555 else
6557 if (TREE_CODE (n2) == INTEGER_CST)
6559 /* Indicate for lastprivate handling that at least one iteration
6560 has been performed, without wasting runtime. */
6561 if (integer_nonzerop (n2))
6562 expand_omp_build_assign (&gsi, fd->loop.v,
6563 fold_convert (type, n2));
6564 else
6565 /* Indicate that no iteration has been performed. */
6566 expand_omp_build_assign (&gsi, fd->loop.v,
6567 build_one_cst (type));
6569 else
6571 expand_omp_build_assign (&gsi, fd->loop.v,
6572 build_zero_cst (type));
6573 expand_omp_build_assign (&gsi, n2, build_one_cst (type));
6575 for (i = 0; i < fd->collapse; i++)
6577 t = fold_convert (TREE_TYPE (fd->loops[i].v), fd->loops[i].n1);
6578 if (fd->loops[i].m1)
6580 tree t2
6581 = fold_convert (TREE_TYPE (t),
6582 fd->loops[i - fd->loops[i].outer].v);
6583 tree t3 = fold_convert (TREE_TYPE (t), fd->loops[i].m1);
6584 t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
6585 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t, t2);
6587 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
6588 /* For normal non-combined collapsed loops just initialize
6589 the outermost iterator in the entry_bb. */
6590 if (!broken_loop)
6591 break;
6595 else
6596 expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
6597 tree altv = NULL_TREE, altn2 = NULL_TREE;
6598 if (fd->collapse == 1
6599 && !broken_loop
6600 && TREE_CODE (orig_step) != INTEGER_CST)
6602 /* The vectorizer currently punts on loops with non-constant steps
6603 for the main IV (can't compute number of iterations and gives up
6604 because of that). As for OpenMP loops it is always possible to
6605 compute the number of iterations upfront, use an alternate IV
6606 as the loop iterator:
6607 altn2 = n1 < n2 ? (n2 - n1 + step - 1) / step : 0;
6608 for (i = n1, altv = 0; altv < altn2; altv++, i += step) */
6609 altv = create_tmp_var (unsigned_type_for (TREE_TYPE (fd->loops[0].v)));
6610 expand_omp_build_assign (&gsi, altv, build_zero_cst (TREE_TYPE (altv)));
6611 tree itype = TREE_TYPE (fd->loop.v);
6612 if (POINTER_TYPE_P (itype))
6613 itype = signed_type_for (itype);
6614 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
6615 t = fold_build2 (PLUS_EXPR, itype,
6616 fold_convert (itype, step), t);
6617 t = fold_build2 (PLUS_EXPR, itype, t, fold_convert (itype, n2));
6618 t = fold_build2 (MINUS_EXPR, itype, t,
6619 fold_convert (itype, fd->loop.v));
6620 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
6621 t = fold_build2 (TRUNC_DIV_EXPR, itype,
6622 fold_build1 (NEGATE_EXPR, itype, t),
6623 fold_build1 (NEGATE_EXPR, itype,
6624 fold_convert (itype, step)));
6625 else
6626 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
6627 fold_convert (itype, step));
6628 t = fold_convert (TREE_TYPE (altv), t);
6629 altn2 = create_tmp_var (TREE_TYPE (altv));
6630 expand_omp_build_assign (&gsi, altn2, t);
6631 tree t2 = fold_convert (TREE_TYPE (fd->loop.v), n2);
6632 t2 = force_gimple_operand_gsi (&gsi, t2, true, NULL_TREE,
6633 true, GSI_SAME_STMT);
6634 t2 = fold_build2 (fd->loop.cond_code, boolean_type_node, fd->loop.v, t2);
6635 gassign *g = gimple_build_assign (altn2, COND_EXPR, t2, altn2,
6636 build_zero_cst (TREE_TYPE (altv)));
6637 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
6639 else if (fd->collapse > 1
6640 && !broken_loop
6641 && !gimple_omp_for_combined_into_p (fd->for_stmt)
6642 && TREE_CODE (fd->loops[fd->collapse - 1].step) != INTEGER_CST)
6644 altv = create_tmp_var (unsigned_type_for (TREE_TYPE (fd->loops[0].v)));
6645 altn2 = create_tmp_var (TREE_TYPE (altv));
6647 if (cond_var)
6649 if (POINTER_TYPE_P (type)
6650 || TREE_CODE (n1) != INTEGER_CST
6651 || fd->loop.cond_code != LT_EXPR
6652 || tree_int_cst_sgn (n1) != 1)
6653 expand_omp_build_assign (&gsi, cond_var,
6654 build_one_cst (TREE_TYPE (cond_var)));
6655 else
6656 expand_omp_build_assign (&gsi, cond_var,
6657 fold_convert (TREE_TYPE (cond_var), n1));
6660 /* Remove the GIMPLE_OMP_FOR statement. */
6661 gsi_remove (&gsi, true);
6663 if (!broken_loop)
6665 /* Code to control the increment goes in the CONT_BB. */
6666 gsi = gsi_last_nondebug_bb (cont_bb);
6667 stmt = gsi_stmt (gsi);
6668 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
6670 if (fd->collapse == 1
6671 || gimple_omp_for_combined_into_p (fd->for_stmt))
6673 if (POINTER_TYPE_P (type))
6674 t = fold_build_pointer_plus (fd->loop.v, step);
6675 else
6676 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
6677 expand_omp_build_assign (&gsi, fd->loop.v, t);
6679 else if (TREE_CODE (n2) != INTEGER_CST)
6680 expand_omp_build_assign (&gsi, fd->loop.v, build_one_cst (type));
6681 if (altv)
6683 t = fold_build2 (PLUS_EXPR, TREE_TYPE (altv), altv,
6684 build_one_cst (TREE_TYPE (altv)));
6685 expand_omp_build_assign (&gsi, altv, t);
6688 if (fd->collapse > 1)
6690 i = fd->collapse - 1;
6691 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
6693 t = fold_convert (sizetype, fd->loops[i].step);
6694 t = fold_build_pointer_plus (fd->loops[i].v, t);
6696 else
6698 t = fold_convert (TREE_TYPE (fd->loops[i].v),
6699 fd->loops[i].step);
6700 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
6701 fd->loops[i].v, t);
6703 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
6705 if (cond_var)
6707 if (POINTER_TYPE_P (type)
6708 || TREE_CODE (n1) != INTEGER_CST
6709 || fd->loop.cond_code != LT_EXPR
6710 || tree_int_cst_sgn (n1) != 1)
6711 t = fold_build2 (PLUS_EXPR, TREE_TYPE (cond_var), cond_var,
6712 build_one_cst (TREE_TYPE (cond_var)));
6713 else
6714 t = fold_build2 (PLUS_EXPR, TREE_TYPE (cond_var), cond_var,
6715 fold_convert (TREE_TYPE (cond_var), step));
6716 expand_omp_build_assign (&gsi, cond_var, t);
6719 /* Remove GIMPLE_OMP_CONTINUE. */
6720 gsi_remove (&gsi, true);
6723 /* Emit the condition in L1_BB. */
6724 gsi = gsi_start_bb (l1_bb);
6726 if (altv)
6727 t = build2 (LT_EXPR, boolean_type_node, altv, altn2);
6728 else if (fd->collapse > 1
6729 && !gimple_omp_for_combined_into_p (fd->for_stmt)
6730 && !broken_loop)
6732 i = fd->collapse - 1;
6733 tree itype = TREE_TYPE (fd->loops[i].v);
6734 if (fd->loops[i].m2)
6735 t = n2v = create_tmp_var (itype);
6736 else
6737 t = fold_convert (itype, fd->loops[i].n2);
6738 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6739 false, GSI_CONTINUE_LINKING);
6740 tree v = fd->loops[i].v;
6741 if (DECL_P (v) && TREE_ADDRESSABLE (v))
6742 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
6743 false, GSI_CONTINUE_LINKING);
6744 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
6746 else
6748 if (fd->collapse > 1 && !broken_loop)
6749 t = n2var;
6750 else
6751 t = fold_convert (type, n2);
6752 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6753 false, GSI_CONTINUE_LINKING);
6754 tree v = fd->loop.v;
6755 if (DECL_P (v) && TREE_ADDRESSABLE (v))
6756 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
6757 false, GSI_CONTINUE_LINKING);
6758 t = build2 (fd->loop.cond_code, boolean_type_node, v, t);
6760 cond_stmt = gimple_build_cond_empty (t);
6761 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
6762 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p,
6763 NULL, NULL)
6764 || walk_tree (gimple_cond_rhs_ptr (cond_stmt), expand_omp_regimplify_p,
6765 NULL, NULL))
6767 gsi = gsi_for_stmt (cond_stmt);
6768 gimple_regimplify_operands (cond_stmt, &gsi);
6771 /* Add 'V -= STEP * (SIMT_VF - 1)' after the loop. */
6772 if (is_simt)
6774 gsi = gsi_start_bb (l2_bb);
6775 step = fold_build2 (MINUS_EXPR, TREE_TYPE (step), orig_step, step);
6776 if (POINTER_TYPE_P (type))
6777 t = fold_build_pointer_plus (fd->loop.v, step);
6778 else
6779 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
6780 expand_omp_build_assign (&gsi, fd->loop.v, t);
6783 /* Remove GIMPLE_OMP_RETURN. */
6784 gsi = gsi_last_nondebug_bb (exit_bb);
6785 gsi_remove (&gsi, true);
6787 /* Connect the new blocks. */
6788 remove_edge (FALLTHRU_EDGE (entry_bb));
6790 if (!broken_loop)
6792 remove_edge (BRANCH_EDGE (entry_bb));
6793 make_edge (entry_bb, l1_bb, EDGE_FALLTHRU);
6795 e = BRANCH_EDGE (l1_bb);
6796 ne = FALLTHRU_EDGE (l1_bb);
6797 e->flags = EDGE_TRUE_VALUE;
6799 else
6801 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
6803 ne = single_succ_edge (l1_bb);
6804 e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE);
6807 ne->flags = EDGE_FALSE_VALUE;
6808 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
6809 ne->probability = e->probability.invert ();
6811 set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb);
6812 set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb);
6814 if (simt_maxlane)
6816 cond_stmt = gimple_build_cond (LT_EXPR, simt_lane, simt_maxlane,
6817 NULL_TREE, NULL_TREE);
6818 gsi = gsi_last_bb (entry_bb);
6819 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
6820 make_edge (entry_bb, l2_bb, EDGE_FALSE_VALUE);
6821 FALLTHRU_EDGE (entry_bb)->flags = EDGE_TRUE_VALUE;
6822 FALLTHRU_EDGE (entry_bb)->probability
6823 = profile_probability::guessed_always ().apply_scale (7, 8);
6824 BRANCH_EDGE (entry_bb)->probability
6825 = FALLTHRU_EDGE (entry_bb)->probability.invert ();
6826 l2_dom_bb = entry_bb;
6828 set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb);
6830 if (!broken_loop && fd->collapse > 1)
6832 basic_block last_bb = l1_bb;
6833 basic_block init_bb = NULL;
6834 for (i = fd->collapse - 2; i >= 0; i--)
6836 tree nextn2v = NULL_TREE;
6837 if (EDGE_SUCC (last_bb, 0)->flags & EDGE_FALSE_VALUE)
6838 e = EDGE_SUCC (last_bb, 0);
6839 else
6840 e = EDGE_SUCC (last_bb, 1);
6841 basic_block bb = split_edge (e);
6842 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
6844 t = fold_convert (sizetype, fd->loops[i].step);
6845 t = fold_build_pointer_plus (fd->loops[i].v, t);
6847 else
6849 t = fold_convert (TREE_TYPE (fd->loops[i].v),
6850 fd->loops[i].step);
6851 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
6852 fd->loops[i].v, t);
6854 gsi = gsi_after_labels (bb);
6855 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
6857 bb = split_block (bb, last_stmt (bb))->dest;
6858 gsi = gsi_start_bb (bb);
6859 tree itype = TREE_TYPE (fd->loops[i].v);
6860 if (fd->loops[i].m2)
6861 t = nextn2v = create_tmp_var (itype);
6862 else
6863 t = fold_convert (itype, fd->loops[i].n2);
6864 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6865 false, GSI_CONTINUE_LINKING);
6866 tree v = fd->loops[i].v;
6867 if (DECL_P (v) && TREE_ADDRESSABLE (v))
6868 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
6869 false, GSI_CONTINUE_LINKING);
6870 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
6871 cond_stmt = gimple_build_cond_empty (t);
6872 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
6873 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
6874 expand_omp_regimplify_p, NULL, NULL)
6875 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
6876 expand_omp_regimplify_p, NULL, NULL))
6878 gsi = gsi_for_stmt (cond_stmt);
6879 gimple_regimplify_operands (cond_stmt, &gsi);
6881 ne = single_succ_edge (bb);
6882 ne->flags = EDGE_FALSE_VALUE;
6884 init_bb = create_empty_bb (bb);
6885 set_immediate_dominator (CDI_DOMINATORS, init_bb, bb);
6886 add_bb_to_loop (init_bb, bb->loop_father);
6887 e = make_edge (bb, init_bb, EDGE_TRUE_VALUE);
6888 e->probability
6889 = profile_probability::guessed_always ().apply_scale (7, 8);
6890 ne->probability = e->probability.invert ();
6892 gsi = gsi_after_labels (init_bb);
6893 t = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
6894 fd->loops[i + 1].n1);
6895 if (fd->loops[i + 1].m1)
6897 tree t2 = fold_convert (TREE_TYPE (t),
6898 fd->loops[i + 1
6899 - fd->loops[i + 1].outer].v);
6900 tree t3 = fold_convert (TREE_TYPE (t), fd->loops[i + 1].m1);
6901 t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
6902 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t, t2);
6904 expand_omp_build_assign (&gsi, fd->loops[i + 1].v, t);
6905 if (fd->loops[i + 1].m2)
6907 if (i + 2 == fd->collapse && (n2var || altv))
6909 gcc_assert (n2v == NULL_TREE);
6910 n2v = create_tmp_var (TREE_TYPE (fd->loops[i + 1].v));
6912 t = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
6913 fd->loops[i + 1].n2);
6914 tree t2 = fold_convert (TREE_TYPE (t),
6915 fd->loops[i + 1
6916 - fd->loops[i + 1].outer].v);
6917 tree t3 = fold_convert (TREE_TYPE (t), fd->loops[i + 1].m2);
6918 t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
6919 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t, t2);
6920 expand_omp_build_assign (&gsi, n2v, t);
6922 if (i + 2 == fd->collapse && n2var)
6924 /* For composite simd, n2 is the first iteration the current
6925 task shouldn't already handle, so we effectively want to use
6926 for (V3 = N31; V < N2 && V3 < N32; V++, V3 += STEP3)
6927 as the vectorized loop. Except the vectorizer will not
6928 vectorize that, so instead compute N2VAR as
6929 N2VAR = V + MIN (N2 - V, COUNTS3) and use
6930 for (V3 = N31; V < N2VAR; V++, V3 += STEP3)
6931 as the loop to vectorize. */
6932 tree t2 = fold_build2 (MINUS_EXPR, type, n2, fd->loop.v);
6933 if (fd->loops[i + 1].m1 || fd->loops[i + 1].m2)
6935 t = build_int_cst (itype, (fd->loops[i + 1].cond_code
6936 == LT_EXPR ? -1 : 1));
6937 t = fold_build2 (PLUS_EXPR, itype,
6938 fold_convert (itype,
6939 fd->loops[i + 1].step), t);
6940 if (fd->loops[i + 1].m2)
6941 t = fold_build2 (PLUS_EXPR, itype, t, n2v);
6942 else
6943 t = fold_build2 (PLUS_EXPR, itype, t,
6944 fold_convert (itype,
6945 fd->loops[i + 1].n2));
6946 t = fold_build2 (MINUS_EXPR, itype, t,
6947 fold_convert (itype, fd->loops[i + 1].v));
6948 tree step = fold_convert (itype, fd->loops[i + 1].step);
6949 if (TYPE_UNSIGNED (itype)
6950 && fd->loops[i + 1].cond_code == GT_EXPR)
6951 t = fold_build2 (TRUNC_DIV_EXPR, itype,
6952 fold_build1 (NEGATE_EXPR, itype, t),
6953 fold_build1 (NEGATE_EXPR, itype, step));
6954 else
6955 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
6956 t = fold_convert (type, t);
6958 else
6959 t = counts[i + 1];
6960 expand_omp_build_assign (&gsi, min_arg1, t2);
6961 expand_omp_build_assign (&gsi, min_arg2, t);
6962 e = split_block (init_bb, last_stmt (init_bb));
6963 gsi = gsi_after_labels (e->dest);
6964 init_bb = e->dest;
6965 remove_edge (FALLTHRU_EDGE (entry_bb));
6966 make_edge (entry_bb, init_bb, EDGE_FALLTHRU);
6967 set_immediate_dominator (CDI_DOMINATORS, init_bb, entry_bb);
6968 set_immediate_dominator (CDI_DOMINATORS, l1_bb, init_bb);
6969 t = fold_build2 (MIN_EXPR, type, min_arg1, min_arg2);
6970 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, t);
6971 expand_omp_build_assign (&gsi, n2var, t);
6973 if (i + 2 == fd->collapse && altv)
6975 /* The vectorizer currently punts on loops with non-constant
6976 steps for the main IV (can't compute number of iterations
6977 and gives up because of that). As for OpenMP loops it is
6978 always possible to compute the number of iterations upfront,
6979 use an alternate IV as the loop iterator. */
6980 expand_omp_build_assign (&gsi, altv,
6981 build_zero_cst (TREE_TYPE (altv)));
6982 tree itype = TREE_TYPE (fd->loops[i + 1].v);
6983 if (POINTER_TYPE_P (itype))
6984 itype = signed_type_for (itype);
6985 t = build_int_cst (itype, (fd->loops[i + 1].cond_code == LT_EXPR
6986 ? -1 : 1));
6987 t = fold_build2 (PLUS_EXPR, itype,
6988 fold_convert (itype, fd->loops[i + 1].step), t);
6989 t = fold_build2 (PLUS_EXPR, itype, t,
6990 fold_convert (itype,
6991 fd->loops[i + 1].m2
6992 ? n2v : fd->loops[i + 1].n2));
6993 t = fold_build2 (MINUS_EXPR, itype, t,
6994 fold_convert (itype, fd->loops[i + 1].v));
6995 tree step = fold_convert (itype, fd->loops[i + 1].step);
6996 if (TYPE_UNSIGNED (itype)
6997 && fd->loops[i + 1].cond_code == GT_EXPR)
6998 t = fold_build2 (TRUNC_DIV_EXPR, itype,
6999 fold_build1 (NEGATE_EXPR, itype, t),
7000 fold_build1 (NEGATE_EXPR, itype, step));
7001 else
7002 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
7003 t = fold_convert (TREE_TYPE (altv), t);
7004 expand_omp_build_assign (&gsi, altn2, t);
7005 tree t2 = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
7006 fd->loops[i + 1].m2
7007 ? n2v : fd->loops[i + 1].n2);
7008 t2 = force_gimple_operand_gsi (&gsi, t2, true, NULL_TREE,
7009 true, GSI_SAME_STMT);
7010 t2 = fold_build2 (fd->loops[i + 1].cond_code, boolean_type_node,
7011 fd->loops[i + 1].v, t2);
7012 gassign *g
7013 = gimple_build_assign (altn2, COND_EXPR, t2, altn2,
7014 build_zero_cst (TREE_TYPE (altv)));
7015 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
7017 n2v = nextn2v;
7019 make_edge (init_bb, last_bb, EDGE_FALLTHRU);
7020 if (!gimple_omp_for_combined_into_p (fd->for_stmt))
7022 e = find_edge (entry_bb, last_bb);
7023 redirect_edge_succ (e, bb);
7024 set_immediate_dominator (CDI_DOMINATORS, bb, entry_bb);
7025 set_immediate_dominator (CDI_DOMINATORS, last_bb, init_bb);
7028 last_bb = bb;
7031 if (!broken_loop)
7033 class loop *loop = alloc_loop ();
7034 loop->header = l1_bb;
7035 loop->latch = cont_bb;
7036 add_loop (loop, l1_bb->loop_father);
7037 loop->safelen = safelen_int;
7038 if (simduid)
7040 loop->simduid = OMP_CLAUSE__SIMDUID__DECL (simduid);
7041 cfun->has_simduid_loops = true;
7043 /* If not -fno-tree-loop-vectorize, hint that we want to vectorize
7044 the loop. */
7045 if ((flag_tree_loop_vectorize
7046 || !OPTION_SET_P (flag_tree_loop_vectorize))
7047 && flag_tree_loop_optimize
7048 && loop->safelen > 1)
7050 loop->force_vectorize = true;
7051 if (simdlen && tree_fits_uhwi_p (OMP_CLAUSE_SIMDLEN_EXPR (simdlen)))
7053 unsigned HOST_WIDE_INT v
7054 = tree_to_uhwi (OMP_CLAUSE_SIMDLEN_EXPR (simdlen));
7055 if (v < INT_MAX && v <= (unsigned HOST_WIDE_INT) loop->safelen)
7056 loop->simdlen = v;
7058 cfun->has_force_vectorize_loops = true;
7060 else if (dont_vectorize)
7061 loop->dont_vectorize = true;
7063 else if (simduid)
7064 cfun->has_simduid_loops = true;
7067 /* Taskloop construct is represented after gimplification with
7068 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
7069 in between them. This routine expands the outer GIMPLE_OMP_FOR,
7070 which should just compute all the needed loop temporaries
7071 for GIMPLE_OMP_TASK. */
7073 static void
7074 expand_omp_taskloop_for_outer (struct omp_region *region,
7075 struct omp_for_data *fd,
7076 gimple *inner_stmt)
7078 tree type, bias = NULL_TREE;
7079 basic_block entry_bb, cont_bb, exit_bb;
7080 gimple_stmt_iterator gsi;
7081 gassign *assign_stmt;
7082 tree *counts = NULL;
7083 int i;
7085 gcc_assert (inner_stmt);
7086 gcc_assert (region->cont);
7087 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_TASK
7088 && gimple_omp_task_taskloop_p (inner_stmt));
7089 type = TREE_TYPE (fd->loop.v);
7091 /* See if we need to bias by LLONG_MIN. */
7092 if (fd->iter_type == long_long_unsigned_type_node
7093 && TREE_CODE (type) == INTEGER_TYPE
7094 && !TYPE_UNSIGNED (type))
7096 tree n1, n2;
7098 if (fd->loop.cond_code == LT_EXPR)
7100 n1 = fd->loop.n1;
7101 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
7103 else
7105 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
7106 n2 = fd->loop.n1;
7108 if (TREE_CODE (n1) != INTEGER_CST
7109 || TREE_CODE (n2) != INTEGER_CST
7110 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
7111 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
7114 entry_bb = region->entry;
7115 cont_bb = region->cont;
7116 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
7117 gcc_assert (BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
7118 exit_bb = region->exit;
7120 gsi = gsi_last_nondebug_bb (entry_bb);
7121 gimple *for_stmt = gsi_stmt (gsi);
7122 gcc_assert (gimple_code (for_stmt) == GIMPLE_OMP_FOR);
7123 if (fd->collapse > 1)
7125 int first_zero_iter = -1, dummy = -1;
7126 basic_block zero_iter_bb = NULL, dummy_bb = NULL, l2_dom_bb = NULL;
7128 counts = XALLOCAVEC (tree, fd->collapse);
7129 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
7130 zero_iter_bb, first_zero_iter,
7131 dummy_bb, dummy, l2_dom_bb);
7133 if (zero_iter_bb)
7135 /* Some counts[i] vars might be uninitialized if
7136 some loop has zero iterations. But the body shouldn't
7137 be executed in that case, so just avoid uninit warnings. */
7138 for (i = first_zero_iter; i < fd->collapse; i++)
7139 if (SSA_VAR_P (counts[i]))
7140 suppress_warning (counts[i], OPT_Wuninitialized);
7141 gsi_prev (&gsi);
7142 edge e = split_block (entry_bb, gsi_stmt (gsi));
7143 entry_bb = e->dest;
7144 make_edge (zero_iter_bb, entry_bb, EDGE_FALLTHRU);
7145 gsi = gsi_last_bb (entry_bb);
7146 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
7147 get_immediate_dominator (CDI_DOMINATORS,
7148 zero_iter_bb));
7152 tree t0, t1;
7153 t1 = fd->loop.n2;
7154 t0 = fd->loop.n1;
7155 if (POINTER_TYPE_P (TREE_TYPE (t0))
7156 && TYPE_PRECISION (TREE_TYPE (t0))
7157 != TYPE_PRECISION (fd->iter_type))
7159 /* Avoid casting pointers to integer of a different size. */
7160 tree itype = signed_type_for (type);
7161 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
7162 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
7164 else
7166 t1 = fold_convert (fd->iter_type, t1);
7167 t0 = fold_convert (fd->iter_type, t0);
7169 if (bias)
7171 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
7172 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
7175 tree innerc = omp_find_clause (gimple_omp_task_clauses (inner_stmt),
7176 OMP_CLAUSE__LOOPTEMP_);
7177 gcc_assert (innerc);
7178 tree startvar = OMP_CLAUSE_DECL (innerc);
7179 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
7180 gcc_assert (innerc);
7181 tree endvar = OMP_CLAUSE_DECL (innerc);
7182 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
7184 innerc = find_lastprivate_looptemp (fd, innerc);
7185 if (innerc)
7187 /* If needed (inner taskloop has lastprivate clause), propagate
7188 down the total number of iterations. */
7189 tree t = force_gimple_operand_gsi (&gsi, fd->loop.n2, false,
7190 NULL_TREE, false,
7191 GSI_CONTINUE_LINKING);
7192 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
7193 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7197 t0 = force_gimple_operand_gsi (&gsi, t0, false, NULL_TREE, false,
7198 GSI_CONTINUE_LINKING);
7199 assign_stmt = gimple_build_assign (startvar, t0);
7200 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7202 t1 = force_gimple_operand_gsi (&gsi, t1, false, NULL_TREE, false,
7203 GSI_CONTINUE_LINKING);
7204 assign_stmt = gimple_build_assign (endvar, t1);
7205 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7206 if (fd->collapse > 1)
7207 expand_omp_for_init_vars (fd, &gsi, counts, NULL, inner_stmt, startvar);
7209 /* Remove the GIMPLE_OMP_FOR statement. */
7210 gsi = gsi_for_stmt (for_stmt);
7211 gsi_remove (&gsi, true);
7213 gsi = gsi_last_nondebug_bb (cont_bb);
7214 gsi_remove (&gsi, true);
7216 gsi = gsi_last_nondebug_bb (exit_bb);
7217 gsi_remove (&gsi, true);
7219 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
7220 remove_edge (BRANCH_EDGE (entry_bb));
7221 FALLTHRU_EDGE (cont_bb)->probability = profile_probability::always ();
7222 remove_edge (BRANCH_EDGE (cont_bb));
7223 set_immediate_dominator (CDI_DOMINATORS, exit_bb, cont_bb);
7224 set_immediate_dominator (CDI_DOMINATORS, region->entry,
7225 recompute_dominator (CDI_DOMINATORS, region->entry));
7228 /* Taskloop construct is represented after gimplification with
7229 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
7230 in between them. This routine expands the inner GIMPLE_OMP_FOR.
7231 GOMP_taskloop{,_ull} function arranges for each task to be given just
7232 a single range of iterations. */
7234 static void
7235 expand_omp_taskloop_for_inner (struct omp_region *region,
7236 struct omp_for_data *fd,
7237 gimple *inner_stmt)
7239 tree e, t, type, itype, vmain, vback, bias = NULL_TREE;
7240 basic_block entry_bb, exit_bb, body_bb, cont_bb, collapse_bb = NULL;
7241 basic_block fin_bb;
7242 gimple_stmt_iterator gsi;
7243 edge ep;
7244 bool broken_loop = region->cont == NULL;
7245 tree *counts = NULL;
7246 tree n1, n2, step;
7248 itype = type = TREE_TYPE (fd->loop.v);
7249 if (POINTER_TYPE_P (type))
7250 itype = signed_type_for (type);
7252 /* See if we need to bias by LLONG_MIN. */
7253 if (fd->iter_type == long_long_unsigned_type_node
7254 && TREE_CODE (type) == INTEGER_TYPE
7255 && !TYPE_UNSIGNED (type))
7257 tree n1, n2;
7259 if (fd->loop.cond_code == LT_EXPR)
7261 n1 = fd->loop.n1;
7262 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
7264 else
7266 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
7267 n2 = fd->loop.n1;
7269 if (TREE_CODE (n1) != INTEGER_CST
7270 || TREE_CODE (n2) != INTEGER_CST
7271 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
7272 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
7275 entry_bb = region->entry;
7276 cont_bb = region->cont;
7277 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
7278 fin_bb = BRANCH_EDGE (entry_bb)->dest;
7279 gcc_assert (broken_loop
7280 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
7281 body_bb = FALLTHRU_EDGE (entry_bb)->dest;
7282 if (!broken_loop)
7284 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb);
7285 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
7287 exit_bb = region->exit;
7289 /* Iteration space partitioning goes in ENTRY_BB. */
7290 gsi = gsi_last_nondebug_bb (entry_bb);
7291 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
7293 if (fd->collapse > 1)
7295 int first_zero_iter = -1, dummy = -1;
7296 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
7298 counts = XALLOCAVEC (tree, fd->collapse);
7299 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
7300 fin_bb, first_zero_iter,
7301 dummy_bb, dummy, l2_dom_bb);
7302 t = NULL_TREE;
7304 else
7305 t = integer_one_node;
7307 step = fd->loop.step;
7308 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
7309 OMP_CLAUSE__LOOPTEMP_);
7310 gcc_assert (innerc);
7311 n1 = OMP_CLAUSE_DECL (innerc);
7312 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
7313 gcc_assert (innerc);
7314 n2 = OMP_CLAUSE_DECL (innerc);
7315 if (bias)
7317 n1 = fold_build2 (PLUS_EXPR, fd->iter_type, n1, bias);
7318 n2 = fold_build2 (PLUS_EXPR, fd->iter_type, n2, bias);
7320 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
7321 true, NULL_TREE, true, GSI_SAME_STMT);
7322 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
7323 true, NULL_TREE, true, GSI_SAME_STMT);
7324 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
7325 true, NULL_TREE, true, GSI_SAME_STMT);
7327 tree startvar = fd->loop.v;
7328 tree endvar = NULL_TREE;
7330 if (gimple_omp_for_combined_p (fd->for_stmt))
7332 tree clauses = gimple_omp_for_clauses (inner_stmt);
7333 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
7334 gcc_assert (innerc);
7335 startvar = OMP_CLAUSE_DECL (innerc);
7336 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
7337 OMP_CLAUSE__LOOPTEMP_);
7338 gcc_assert (innerc);
7339 endvar = OMP_CLAUSE_DECL (innerc);
7341 t = fold_convert (TREE_TYPE (startvar), n1);
7342 t = force_gimple_operand_gsi (&gsi, t,
7343 DECL_P (startvar)
7344 && TREE_ADDRESSABLE (startvar),
7345 NULL_TREE, false, GSI_CONTINUE_LINKING);
7346 gimple *assign_stmt = gimple_build_assign (startvar, t);
7347 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7349 t = fold_convert (TREE_TYPE (startvar), n2);
7350 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
7351 false, GSI_CONTINUE_LINKING);
7352 if (endvar)
7354 assign_stmt = gimple_build_assign (endvar, e);
7355 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7356 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
7357 assign_stmt = gimple_build_assign (fd->loop.v, e);
7358 else
7359 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
7360 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7363 tree *nonrect_bounds = NULL;
7364 if (fd->collapse > 1)
7366 if (fd->non_rect)
7368 nonrect_bounds = XALLOCAVEC (tree, fd->last_nonrect + 1);
7369 memset (nonrect_bounds, 0, sizeof (tree) * (fd->last_nonrect + 1));
7371 gcc_assert (gsi_bb (gsi) == entry_bb);
7372 expand_omp_for_init_vars (fd, &gsi, counts, nonrect_bounds, inner_stmt,
7373 startvar);
7374 entry_bb = gsi_bb (gsi);
7377 if (!broken_loop)
7379 /* The code controlling the sequential loop replaces the
7380 GIMPLE_OMP_CONTINUE. */
7381 gsi = gsi_last_nondebug_bb (cont_bb);
7382 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
7383 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
7384 vmain = gimple_omp_continue_control_use (cont_stmt);
7385 vback = gimple_omp_continue_control_def (cont_stmt);
7387 if (!gimple_omp_for_combined_p (fd->for_stmt))
7389 if (POINTER_TYPE_P (type))
7390 t = fold_build_pointer_plus (vmain, step);
7391 else
7392 t = fold_build2 (PLUS_EXPR, type, vmain, step);
7393 t = force_gimple_operand_gsi (&gsi, t,
7394 DECL_P (vback)
7395 && TREE_ADDRESSABLE (vback),
7396 NULL_TREE, true, GSI_SAME_STMT);
7397 assign_stmt = gimple_build_assign (vback, t);
7398 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
7400 t = build2 (fd->loop.cond_code, boolean_type_node,
7401 DECL_P (vback) && TREE_ADDRESSABLE (vback)
7402 ? t : vback, e);
7403 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
7406 /* Remove the GIMPLE_OMP_CONTINUE statement. */
7407 gsi_remove (&gsi, true);
7409 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
7410 collapse_bb = extract_omp_for_update_vars (fd, nonrect_bounds,
7411 cont_bb, body_bb);
7414 /* Remove the GIMPLE_OMP_FOR statement. */
7415 gsi = gsi_for_stmt (fd->for_stmt);
7416 gsi_remove (&gsi, true);
7418 /* Remove the GIMPLE_OMP_RETURN statement. */
7419 gsi = gsi_last_nondebug_bb (exit_bb);
7420 gsi_remove (&gsi, true);
7422 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
7423 if (!broken_loop)
7424 remove_edge (BRANCH_EDGE (entry_bb));
7425 else
7427 remove_edge_and_dominated_blocks (BRANCH_EDGE (entry_bb));
7428 region->outer->cont = NULL;
7431 /* Connect all the blocks. */
7432 if (!broken_loop)
7434 ep = find_edge (cont_bb, body_bb);
7435 if (gimple_omp_for_combined_p (fd->for_stmt))
7437 remove_edge (ep);
7438 ep = NULL;
7440 else if (fd->collapse > 1)
7442 remove_edge (ep);
7443 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
7445 else
7446 ep->flags = EDGE_TRUE_VALUE;
7447 find_edge (cont_bb, fin_bb)->flags
7448 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
7451 set_immediate_dominator (CDI_DOMINATORS, body_bb,
7452 recompute_dominator (CDI_DOMINATORS, body_bb));
7453 if (!broken_loop)
7454 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
7455 recompute_dominator (CDI_DOMINATORS, fin_bb));
7457 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
7459 class loop *loop = alloc_loop ();
7460 loop->header = body_bb;
7461 if (collapse_bb == NULL)
7462 loop->latch = cont_bb;
7463 add_loop (loop, body_bb->loop_father);
7467 /* A subroutine of expand_omp_for. Generate code for an OpenACC
7468 partitioned loop. The lowering here is abstracted, in that the
7469 loop parameters are passed through internal functions, which are
7470 further lowered by oacc_device_lower, once we get to the target
7471 compiler. The loop is of the form:
7473 for (V = B; V LTGT E; V += S) {BODY}
7475 where LTGT is < or >. We may have a specified chunking size, CHUNKING
7476 (constant 0 for no chunking) and we will have a GWV partitioning
7477 mask, specifying dimensions over which the loop is to be
7478 partitioned (see note below). We generate code that looks like
7479 (this ignores tiling):
7481 <entry_bb> [incoming FALL->body, BRANCH->exit]
7482 typedef signedintify (typeof (V)) T; // underlying signed integral type
7483 T range = E - B;
7484 T chunk_no = 0;
7485 T DIR = LTGT == '<' ? +1 : -1;
7486 T chunk_max = GOACC_LOOP_CHUNK (dir, range, S, CHUNK_SIZE, GWV);
7487 T step = GOACC_LOOP_STEP (dir, range, S, CHUNK_SIZE, GWV);
7489 <head_bb> [created by splitting end of entry_bb]
7490 T offset = GOACC_LOOP_OFFSET (dir, range, S, CHUNK_SIZE, GWV, chunk_no);
7491 T bound = GOACC_LOOP_BOUND (dir, range, S, CHUNK_SIZE, GWV, offset);
7492 if (!(offset LTGT bound)) goto bottom_bb;
7494 <body_bb> [incoming]
7495 V = B + offset;
7496 {BODY}
7498 <cont_bb> [incoming, may == body_bb FALL->exit_bb, BRANCH->body_bb]
7499 offset += step;
7500 if (offset LTGT bound) goto body_bb; [*]
7502 <bottom_bb> [created by splitting start of exit_bb] insert BRANCH->head_bb
7503 chunk_no++;
7504 if (chunk < chunk_max) goto head_bb;
7506 <exit_bb> [incoming]
7507 V = B + ((range -/+ 1) / S +/- 1) * S [*]
7509 [*] Needed if V live at end of loop. */
7511 static void
7512 expand_oacc_for (struct omp_region *region, struct omp_for_data *fd)
7514 bool is_oacc_kernels_parallelized
7515 = (lookup_attribute ("oacc kernels parallelized",
7516 DECL_ATTRIBUTES (current_function_decl)) != NULL);
7518 bool is_oacc_kernels
7519 = (lookup_attribute ("oacc kernels",
7520 DECL_ATTRIBUTES (current_function_decl)) != NULL);
7521 if (is_oacc_kernels_parallelized)
7522 gcc_checking_assert (is_oacc_kernels);
7524 gcc_assert (gimple_in_ssa_p (cfun) == is_oacc_kernels_parallelized);
7525 /* In the following, some of the 'gimple_in_ssa_p (cfun)' conditionals are
7526 for SSA specifics, and some are for 'parloops' OpenACC
7527 'kernels'-parallelized specifics. */
7529 tree v = fd->loop.v;
7530 enum tree_code cond_code = fd->loop.cond_code;
7531 enum tree_code plus_code = PLUS_EXPR;
7533 tree chunk_size = integer_minus_one_node;
7534 tree gwv = integer_zero_node;
7535 tree iter_type = TREE_TYPE (v);
7536 tree diff_type = iter_type;
7537 tree plus_type = iter_type;
7538 struct oacc_collapse *counts = NULL;
7540 gcc_checking_assert (gimple_omp_for_kind (fd->for_stmt)
7541 == GF_OMP_FOR_KIND_OACC_LOOP);
7542 gcc_assert (!gimple_omp_for_combined_into_p (fd->for_stmt));
7543 gcc_assert (cond_code == LT_EXPR || cond_code == GT_EXPR);
7545 if (POINTER_TYPE_P (iter_type))
7547 plus_code = POINTER_PLUS_EXPR;
7548 plus_type = sizetype;
7550 for (int ix = fd->collapse; ix--;)
7552 tree diff_type2 = TREE_TYPE (fd->loops[ix].step);
7553 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (diff_type2))
7554 diff_type = diff_type2;
7556 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
7557 diff_type = signed_type_for (diff_type);
7558 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node))
7559 diff_type = integer_type_node;
7561 basic_block entry_bb = region->entry; /* BB ending in OMP_FOR */
7562 basic_block exit_bb = region->exit; /* BB ending in OMP_RETURN */
7563 basic_block cont_bb = region->cont; /* BB ending in OMP_CONTINUE */
7564 basic_block bottom_bb = NULL;
7566 /* entry_bb has two successors; the branch edge is to the exit
7567 block, fallthrough edge to body. */
7568 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2
7569 && BRANCH_EDGE (entry_bb)->dest == exit_bb);
7571 /* If cont_bb non-NULL, it has 2 successors. The branch successor is
7572 body_bb, or to a block whose only successor is the body_bb. Its
7573 fallthrough successor is the final block (same as the branch
7574 successor of the entry_bb). */
7575 if (cont_bb)
7577 basic_block body_bb = FALLTHRU_EDGE (entry_bb)->dest;
7578 basic_block bed = BRANCH_EDGE (cont_bb)->dest;
7580 gcc_assert (FALLTHRU_EDGE (cont_bb)->dest == exit_bb);
7581 gcc_assert (bed == body_bb || single_succ_edge (bed)->dest == body_bb);
7583 else
7584 gcc_assert (!gimple_in_ssa_p (cfun));
7586 /* The exit block only has entry_bb and cont_bb as predecessors. */
7587 gcc_assert (EDGE_COUNT (exit_bb->preds) == 1 + (cont_bb != NULL));
7589 tree chunk_no;
7590 tree chunk_max = NULL_TREE;
7591 tree bound, offset;
7592 tree step = create_tmp_var (diff_type, ".step");
7593 bool up = cond_code == LT_EXPR;
7594 tree dir = build_int_cst (diff_type, up ? +1 : -1);
7595 bool chunking = !gimple_in_ssa_p (cfun);
7596 bool negating;
7598 /* Tiling vars. */
7599 tree tile_size = NULL_TREE;
7600 tree element_s = NULL_TREE;
7601 tree e_bound = NULL_TREE, e_offset = NULL_TREE, e_step = NULL_TREE;
7602 basic_block elem_body_bb = NULL;
7603 basic_block elem_cont_bb = NULL;
7605 /* SSA instances. */
7606 tree offset_incr = NULL_TREE;
7607 tree offset_init = NULL_TREE;
7609 gimple_stmt_iterator gsi;
7610 gassign *ass;
7611 gcall *call;
7612 gimple *stmt;
7613 tree expr;
7614 location_t loc;
7615 edge split, be, fte;
7617 /* Split the end of entry_bb to create head_bb. */
7618 split = split_block (entry_bb, last_stmt (entry_bb));
7619 basic_block head_bb = split->dest;
7620 entry_bb = split->src;
7622 /* Chunk setup goes at end of entry_bb, replacing the omp_for. */
7623 gsi = gsi_last_nondebug_bb (entry_bb);
7624 gomp_for *for_stmt = as_a <gomp_for *> (gsi_stmt (gsi));
7625 loc = gimple_location (for_stmt);
7627 if (gimple_in_ssa_p (cfun))
7629 offset_init = gimple_omp_for_index (for_stmt, 0);
7630 gcc_assert (integer_zerop (fd->loop.n1));
7631 /* The SSA parallelizer does gang parallelism. */
7632 gwv = build_int_cst (integer_type_node, GOMP_DIM_MASK (GOMP_DIM_GANG));
7635 if (fd->collapse > 1 || fd->tiling)
7637 gcc_assert (!gimple_in_ssa_p (cfun) && up);
7638 counts = XALLOCAVEC (struct oacc_collapse, fd->collapse);
7639 tree total = expand_oacc_collapse_init (fd, &gsi, counts, diff_type,
7640 TREE_TYPE (fd->loop.n2), loc);
7642 if (SSA_VAR_P (fd->loop.n2))
7644 total = force_gimple_operand_gsi (&gsi, total, false, NULL_TREE,
7645 true, GSI_SAME_STMT);
7646 ass = gimple_build_assign (fd->loop.n2, total);
7647 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7651 tree b = fd->loop.n1;
7652 tree e = fd->loop.n2;
7653 tree s = fd->loop.step;
7655 b = force_gimple_operand_gsi (&gsi, b, true, NULL_TREE, true, GSI_SAME_STMT);
7656 e = force_gimple_operand_gsi (&gsi, e, true, NULL_TREE, true, GSI_SAME_STMT);
7658 /* Convert the step, avoiding possible unsigned->signed overflow. */
7659 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
7660 if (negating)
7661 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
7662 s = fold_convert (diff_type, s);
7663 if (negating)
7664 s = fold_build1 (NEGATE_EXPR, diff_type, s);
7665 s = force_gimple_operand_gsi (&gsi, s, true, NULL_TREE, true, GSI_SAME_STMT);
7667 if (!chunking)
7668 chunk_size = integer_zero_node;
7669 expr = fold_convert (diff_type, chunk_size);
7670 chunk_size = force_gimple_operand_gsi (&gsi, expr, true,
7671 NULL_TREE, true, GSI_SAME_STMT);
7673 if (fd->tiling)
7675 /* Determine the tile size and element step,
7676 modify the outer loop step size. */
7677 tile_size = create_tmp_var (diff_type, ".tile_size");
7678 expr = build_int_cst (diff_type, 1);
7679 for (int ix = 0; ix < fd->collapse; ix++)
7680 expr = fold_build2 (MULT_EXPR, diff_type, counts[ix].tile, expr);
7681 expr = force_gimple_operand_gsi (&gsi, expr, true,
7682 NULL_TREE, true, GSI_SAME_STMT);
7683 ass = gimple_build_assign (tile_size, expr);
7684 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7686 element_s = create_tmp_var (diff_type, ".element_s");
7687 ass = gimple_build_assign (element_s, s);
7688 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7690 expr = fold_build2 (MULT_EXPR, diff_type, s, tile_size);
7691 s = force_gimple_operand_gsi (&gsi, expr, true,
7692 NULL_TREE, true, GSI_SAME_STMT);
7695 /* Determine the range, avoiding possible unsigned->signed overflow. */
7696 negating = !up && TYPE_UNSIGNED (iter_type);
7697 expr = fold_build2 (MINUS_EXPR, plus_type,
7698 fold_convert (plus_type, negating ? b : e),
7699 fold_convert (plus_type, negating ? e : b));
7700 expr = fold_convert (diff_type, expr);
7701 if (negating)
7702 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
7703 tree range = force_gimple_operand_gsi (&gsi, expr, true,
7704 NULL_TREE, true, GSI_SAME_STMT);
7706 chunk_no = build_int_cst (diff_type, 0);
7707 if (chunking)
7709 gcc_assert (!gimple_in_ssa_p (cfun));
7711 expr = chunk_no;
7712 chunk_max = create_tmp_var (diff_type, ".chunk_max");
7713 chunk_no = create_tmp_var (diff_type, ".chunk_no");
7715 ass = gimple_build_assign (chunk_no, expr);
7716 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7718 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
7719 build_int_cst (integer_type_node,
7720 IFN_GOACC_LOOP_CHUNKS),
7721 dir, range, s, chunk_size, gwv);
7722 gimple_call_set_lhs (call, chunk_max);
7723 gimple_set_location (call, loc);
7724 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7726 else
7727 chunk_size = chunk_no;
7729 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
7730 build_int_cst (integer_type_node,
7731 IFN_GOACC_LOOP_STEP),
7732 dir, range, s, chunk_size, gwv);
7733 gimple_call_set_lhs (call, step);
7734 gimple_set_location (call, loc);
7735 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7737 /* Remove the GIMPLE_OMP_FOR. */
7738 gsi_remove (&gsi, true);
7740 /* Fixup edges from head_bb. */
7741 be = BRANCH_EDGE (head_bb);
7742 fte = FALLTHRU_EDGE (head_bb);
7743 be->flags |= EDGE_FALSE_VALUE;
7744 fte->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
7746 basic_block body_bb = fte->dest;
7748 if (gimple_in_ssa_p (cfun))
7750 gsi = gsi_last_nondebug_bb (cont_bb);
7751 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
7753 offset = gimple_omp_continue_control_use (cont_stmt);
7754 offset_incr = gimple_omp_continue_control_def (cont_stmt);
7756 else
7758 offset = create_tmp_var (diff_type, ".offset");
7759 offset_init = offset_incr = offset;
7761 bound = create_tmp_var (TREE_TYPE (offset), ".bound");
7763 /* Loop offset & bound go into head_bb. */
7764 gsi = gsi_start_bb (head_bb);
7766 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
7767 build_int_cst (integer_type_node,
7768 IFN_GOACC_LOOP_OFFSET),
7769 dir, range, s,
7770 chunk_size, gwv, chunk_no);
7771 gimple_call_set_lhs (call, offset_init);
7772 gimple_set_location (call, loc);
7773 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
7775 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
7776 build_int_cst (integer_type_node,
7777 IFN_GOACC_LOOP_BOUND),
7778 dir, range, s,
7779 chunk_size, gwv, offset_init);
7780 gimple_call_set_lhs (call, bound);
7781 gimple_set_location (call, loc);
7782 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
7784 expr = build2 (cond_code, boolean_type_node, offset_init, bound);
7785 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
7786 GSI_CONTINUE_LINKING);
7788 /* V assignment goes into body_bb. */
7789 if (!gimple_in_ssa_p (cfun))
7791 gsi = gsi_start_bb (body_bb);
7793 expr = build2 (plus_code, iter_type, b,
7794 fold_convert (plus_type, offset));
7795 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7796 true, GSI_SAME_STMT);
7797 ass = gimple_build_assign (v, expr);
7798 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7800 if (fd->collapse > 1 || fd->tiling)
7801 expand_oacc_collapse_vars (fd, false, &gsi, counts, v, diff_type);
7803 if (fd->tiling)
7805 /* Determine the range of the element loop -- usually simply
7806 the tile_size, but could be smaller if the final
7807 iteration of the outer loop is a partial tile. */
7808 tree e_range = create_tmp_var (diff_type, ".e_range");
7810 expr = build2 (MIN_EXPR, diff_type,
7811 build2 (MINUS_EXPR, diff_type, bound, offset),
7812 build2 (MULT_EXPR, diff_type, tile_size,
7813 element_s));
7814 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7815 true, GSI_SAME_STMT);
7816 ass = gimple_build_assign (e_range, expr);
7817 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7819 /* Determine bound, offset & step of inner loop. */
7820 e_bound = create_tmp_var (diff_type, ".e_bound");
7821 e_offset = create_tmp_var (diff_type, ".e_offset");
7822 e_step = create_tmp_var (diff_type, ".e_step");
7824 /* Mark these as element loops. */
7825 tree t, e_gwv = integer_minus_one_node;
7826 tree chunk = build_int_cst (diff_type, 0); /* Never chunked. */
7828 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_OFFSET);
7829 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
7830 element_s, chunk, e_gwv, chunk);
7831 gimple_call_set_lhs (call, e_offset);
7832 gimple_set_location (call, loc);
7833 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7835 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_BOUND);
7836 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
7837 element_s, chunk, e_gwv, e_offset);
7838 gimple_call_set_lhs (call, e_bound);
7839 gimple_set_location (call, loc);
7840 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7842 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_STEP);
7843 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6, t, dir, e_range,
7844 element_s, chunk, e_gwv);
7845 gimple_call_set_lhs (call, e_step);
7846 gimple_set_location (call, loc);
7847 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7849 /* Add test and split block. */
7850 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
7851 stmt = gimple_build_cond_empty (expr);
7852 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
7853 split = split_block (body_bb, stmt);
7854 elem_body_bb = split->dest;
7855 if (cont_bb == body_bb)
7856 cont_bb = elem_body_bb;
7857 body_bb = split->src;
7859 split->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
7861 /* Add a dummy exit for the tiled block when cont_bb is missing. */
7862 if (cont_bb == NULL)
7864 edge e = make_edge (body_bb, exit_bb, EDGE_FALSE_VALUE);
7865 e->probability = profile_probability::even ();
7866 split->probability = profile_probability::even ();
7869 /* Initialize the user's loop vars. */
7870 gsi = gsi_start_bb (elem_body_bb);
7871 expand_oacc_collapse_vars (fd, true, &gsi, counts, e_offset,
7872 diff_type);
7876 /* Loop increment goes into cont_bb. If this is not a loop, we
7877 will have spawned threads as if it was, and each one will
7878 execute one iteration. The specification is not explicit about
7879 whether such constructs are ill-formed or not, and they can
7880 occur, especially when noreturn routines are involved. */
7881 if (cont_bb)
7883 gsi = gsi_last_nondebug_bb (cont_bb);
7884 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
7885 loc = gimple_location (cont_stmt);
7887 if (fd->tiling)
7889 /* Insert element loop increment and test. */
7890 expr = build2 (PLUS_EXPR, diff_type, e_offset, e_step);
7891 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7892 true, GSI_SAME_STMT);
7893 ass = gimple_build_assign (e_offset, expr);
7894 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7895 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
7897 stmt = gimple_build_cond_empty (expr);
7898 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
7899 split = split_block (cont_bb, stmt);
7900 elem_cont_bb = split->src;
7901 cont_bb = split->dest;
7903 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
7904 split->probability = profile_probability::unlikely ().guessed ();
7905 edge latch_edge
7906 = make_edge (elem_cont_bb, elem_body_bb, EDGE_TRUE_VALUE);
7907 latch_edge->probability = profile_probability::likely ().guessed ();
7909 edge skip_edge = make_edge (body_bb, cont_bb, EDGE_FALSE_VALUE);
7910 skip_edge->probability = profile_probability::unlikely ().guessed ();
7911 edge loop_entry_edge = EDGE_SUCC (body_bb, 1 - skip_edge->dest_idx);
7912 loop_entry_edge->probability
7913 = profile_probability::likely ().guessed ();
7915 gsi = gsi_for_stmt (cont_stmt);
7918 /* Increment offset. */
7919 if (gimple_in_ssa_p (cfun))
7920 expr = build2 (plus_code, iter_type, offset,
7921 fold_convert (plus_type, step));
7922 else
7923 expr = build2 (PLUS_EXPR, diff_type, offset, step);
7924 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7925 true, GSI_SAME_STMT);
7926 ass = gimple_build_assign (offset_incr, expr);
7927 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7928 expr = build2 (cond_code, boolean_type_node, offset_incr, bound);
7929 gsi_insert_before (&gsi, gimple_build_cond_empty (expr), GSI_SAME_STMT);
7931 /* Remove the GIMPLE_OMP_CONTINUE. */
7932 gsi_remove (&gsi, true);
7934 /* Fixup edges from cont_bb. */
7935 be = BRANCH_EDGE (cont_bb);
7936 fte = FALLTHRU_EDGE (cont_bb);
7937 be->flags |= EDGE_TRUE_VALUE;
7938 fte->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
7940 if (chunking)
7942 /* Split the beginning of exit_bb to make bottom_bb. We
7943 need to insert a nop at the start, because splitting is
7944 after a stmt, not before. */
7945 gsi = gsi_start_bb (exit_bb);
7946 stmt = gimple_build_nop ();
7947 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
7948 split = split_block (exit_bb, stmt);
7949 bottom_bb = split->src;
7950 exit_bb = split->dest;
7951 gsi = gsi_last_bb (bottom_bb);
7953 /* Chunk increment and test goes into bottom_bb. */
7954 expr = build2 (PLUS_EXPR, diff_type, chunk_no,
7955 build_int_cst (diff_type, 1));
7956 ass = gimple_build_assign (chunk_no, expr);
7957 gsi_insert_after (&gsi, ass, GSI_CONTINUE_LINKING);
7959 /* Chunk test at end of bottom_bb. */
7960 expr = build2 (LT_EXPR, boolean_type_node, chunk_no, chunk_max);
7961 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
7962 GSI_CONTINUE_LINKING);
7964 /* Fixup edges from bottom_bb. */
7965 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
7966 split->probability = profile_probability::unlikely ().guessed ();
7967 edge latch_edge = make_edge (bottom_bb, head_bb, EDGE_TRUE_VALUE);
7968 latch_edge->probability = profile_probability::likely ().guessed ();
7972 gsi = gsi_last_nondebug_bb (exit_bb);
7973 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7974 loc = gimple_location (gsi_stmt (gsi));
7976 if (!gimple_in_ssa_p (cfun))
7978 /* Insert the final value of V, in case it is live. This is the
7979 value for the only thread that survives past the join. */
7980 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
7981 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
7982 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
7983 expr = fold_build2 (MULT_EXPR, diff_type, expr, s);
7984 expr = build2 (plus_code, iter_type, b, fold_convert (plus_type, expr));
7985 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7986 true, GSI_SAME_STMT);
7987 ass = gimple_build_assign (v, expr);
7988 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7991 /* Remove the OMP_RETURN. */
7992 gsi_remove (&gsi, true);
7994 if (cont_bb)
7996 /* We now have one, two or three nested loops. Update the loop
7997 structures. */
7998 class loop *parent = entry_bb->loop_father;
7999 class loop *body = body_bb->loop_father;
8001 if (chunking)
8003 class loop *chunk_loop = alloc_loop ();
8004 chunk_loop->header = head_bb;
8005 chunk_loop->latch = bottom_bb;
8006 add_loop (chunk_loop, parent);
8007 parent = chunk_loop;
8009 else if (parent != body)
8011 gcc_assert (body->header == body_bb);
8012 gcc_assert (body->latch == cont_bb
8013 || single_pred (body->latch) == cont_bb);
8014 parent = NULL;
8017 if (parent)
8019 class loop *body_loop = alloc_loop ();
8020 body_loop->header = body_bb;
8021 body_loop->latch = cont_bb;
8022 add_loop (body_loop, parent);
8024 if (fd->tiling)
8026 /* Insert tiling's element loop. */
8027 class loop *inner_loop = alloc_loop ();
8028 inner_loop->header = elem_body_bb;
8029 inner_loop->latch = elem_cont_bb;
8030 add_loop (inner_loop, body_loop);
8036 /* Expand the OMP loop defined by REGION. */
8038 static void
8039 expand_omp_for (struct omp_region *region, gimple *inner_stmt)
8041 struct omp_for_data fd;
8042 struct omp_for_data_loop *loops;
8044 loops = XALLOCAVEC (struct omp_for_data_loop,
8045 gimple_omp_for_collapse (last_stmt (region->entry)));
8046 omp_extract_for_data (as_a <gomp_for *> (last_stmt (region->entry)),
8047 &fd, loops);
8048 region->sched_kind = fd.sched_kind;
8049 region->sched_modifiers = fd.sched_modifiers;
8050 region->has_lastprivate_conditional = fd.lastprivate_conditional != 0;
8051 if (fd.non_rect && !gimple_omp_for_combined_into_p (fd.for_stmt))
8053 for (int i = fd.first_nonrect; i <= fd.last_nonrect; i++)
8054 if ((loops[i].m1 || loops[i].m2)
8055 && (loops[i].m1 == NULL_TREE
8056 || TREE_CODE (loops[i].m1) == INTEGER_CST)
8057 && (loops[i].m2 == NULL_TREE
8058 || TREE_CODE (loops[i].m2) == INTEGER_CST)
8059 && TREE_CODE (loops[i].step) == INTEGER_CST
8060 && TREE_CODE (loops[i - loops[i].outer].step) == INTEGER_CST)
8062 tree t;
8063 tree itype = TREE_TYPE (loops[i].v);
8064 if (loops[i].m1 && loops[i].m2)
8065 t = fold_build2 (MINUS_EXPR, itype, loops[i].m2, loops[i].m1);
8066 else if (loops[i].m1)
8067 t = fold_build1 (NEGATE_EXPR, itype, loops[i].m1);
8068 else
8069 t = loops[i].m2;
8070 t = fold_build2 (MULT_EXPR, itype, t,
8071 fold_convert (itype,
8072 loops[i - loops[i].outer].step));
8073 if (TYPE_UNSIGNED (itype) && loops[i].cond_code == GT_EXPR)
8074 t = fold_build2 (TRUNC_MOD_EXPR, itype,
8075 fold_build1 (NEGATE_EXPR, itype, t),
8076 fold_build1 (NEGATE_EXPR, itype,
8077 fold_convert (itype,
8078 loops[i].step)));
8079 else
8080 t = fold_build2 (TRUNC_MOD_EXPR, itype, t,
8081 fold_convert (itype, loops[i].step));
8082 if (integer_nonzerop (t))
8083 error_at (gimple_location (fd.for_stmt),
8084 "invalid OpenMP non-rectangular loop step; "
8085 "%<(%E - %E) * %E%> is not a multiple of loop %d "
8086 "step %qE",
8087 loops[i].m2 ? loops[i].m2 : integer_zero_node,
8088 loops[i].m1 ? loops[i].m1 : integer_zero_node,
8089 loops[i - loops[i].outer].step, i + 1,
8090 loops[i].step);
8094 gcc_assert (EDGE_COUNT (region->entry->succs) == 2);
8095 BRANCH_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
8096 FALLTHRU_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
8097 if (region->cont)
8099 gcc_assert (EDGE_COUNT (region->cont->succs) == 2);
8100 BRANCH_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
8101 FALLTHRU_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
8103 else
8104 /* If there isn't a continue then this is a degerate case where
8105 the introduction of abnormal edges during lowering will prevent
8106 original loops from being detected. Fix that up. */
8107 loops_state_set (LOOPS_NEED_FIXUP);
8109 if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_SIMD)
8110 expand_omp_simd (region, &fd);
8111 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP)
8113 gcc_assert (!inner_stmt && !fd.non_rect);
8114 expand_oacc_for (region, &fd);
8116 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_TASKLOOP)
8118 if (gimple_omp_for_combined_into_p (fd.for_stmt))
8119 expand_omp_taskloop_for_inner (region, &fd, inner_stmt);
8120 else
8121 expand_omp_taskloop_for_outer (region, &fd, inner_stmt);
8123 else if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC
8124 && !fd.have_ordered)
8126 if (fd.chunk_size == NULL)
8127 expand_omp_for_static_nochunk (region, &fd, inner_stmt);
8128 else
8129 expand_omp_for_static_chunk (region, &fd, inner_stmt);
8131 else
8133 int fn_index, start_ix, next_ix;
8134 unsigned HOST_WIDE_INT sched = 0;
8135 tree sched_arg = NULL_TREE;
8137 gcc_assert (gimple_omp_for_kind (fd.for_stmt)
8138 == GF_OMP_FOR_KIND_FOR && !fd.non_rect);
8139 if (fd.chunk_size == NULL
8140 && fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC)
8141 fd.chunk_size = integer_zero_node;
8142 switch (fd.sched_kind)
8144 case OMP_CLAUSE_SCHEDULE_RUNTIME:
8145 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0
8146 && fd.lastprivate_conditional == 0)
8148 gcc_assert (!fd.have_ordered);
8149 fn_index = 6;
8150 sched = 4;
8152 else if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
8153 && !fd.have_ordered
8154 && fd.lastprivate_conditional == 0)
8155 fn_index = 7;
8156 else
8158 fn_index = 3;
8159 sched = (HOST_WIDE_INT_1U << 31);
8161 break;
8162 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
8163 case OMP_CLAUSE_SCHEDULE_GUIDED:
8164 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
8165 && !fd.have_ordered
8166 && fd.lastprivate_conditional == 0)
8168 fn_index = 3 + fd.sched_kind;
8169 sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2;
8170 break;
8172 fn_index = fd.sched_kind;
8173 sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2;
8174 sched += (HOST_WIDE_INT_1U << 31);
8175 break;
8176 case OMP_CLAUSE_SCHEDULE_STATIC:
8177 gcc_assert (fd.have_ordered);
8178 fn_index = 0;
8179 sched = (HOST_WIDE_INT_1U << 31) + 1;
8180 break;
8181 default:
8182 gcc_unreachable ();
8184 if (!fd.ordered)
8185 fn_index += fd.have_ordered * 8;
8186 if (fd.ordered)
8187 start_ix = ((int)BUILT_IN_GOMP_LOOP_DOACROSS_STATIC_START) + fn_index;
8188 else
8189 start_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_START) + fn_index;
8190 next_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_NEXT) + fn_index;
8191 if (fd.have_reductemp || fd.have_pointer_condtemp)
8193 if (fd.ordered)
8194 start_ix = (int)BUILT_IN_GOMP_LOOP_DOACROSS_START;
8195 else if (fd.have_ordered)
8196 start_ix = (int)BUILT_IN_GOMP_LOOP_ORDERED_START;
8197 else
8198 start_ix = (int)BUILT_IN_GOMP_LOOP_START;
8199 sched_arg = build_int_cstu (long_integer_type_node, sched);
8200 if (!fd.chunk_size)
8201 fd.chunk_size = integer_zero_node;
8203 if (fd.iter_type == long_long_unsigned_type_node)
8205 start_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_START
8206 - (int)BUILT_IN_GOMP_LOOP_STATIC_START);
8207 next_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT
8208 - (int)BUILT_IN_GOMP_LOOP_STATIC_NEXT);
8210 expand_omp_for_generic (region, &fd, (enum built_in_function) start_ix,
8211 (enum built_in_function) next_ix, sched_arg,
8212 inner_stmt);
8215 if (gimple_in_ssa_p (cfun))
8216 update_ssa (TODO_update_ssa_only_virtuals);
8219 /* Expand code for an OpenMP sections directive. In pseudo code, we generate
8221 v = GOMP_sections_start (n);
8223 switch (v)
8225 case 0:
8226 goto L2;
8227 case 1:
8228 section 1;
8229 goto L1;
8230 case 2:
8232 case n:
8234 default:
8235 abort ();
8238 v = GOMP_sections_next ();
8239 goto L0;
8241 reduction;
8243 If this is a combined parallel sections, replace the call to
8244 GOMP_sections_start with call to GOMP_sections_next. */
8246 static void
8247 expand_omp_sections (struct omp_region *region)
8249 tree t, u, vin = NULL, vmain, vnext, l2;
8250 unsigned len;
8251 basic_block entry_bb, l0_bb, l1_bb, l2_bb, default_bb;
8252 gimple_stmt_iterator si, switch_si;
8253 gomp_sections *sections_stmt;
8254 gimple *stmt;
8255 gomp_continue *cont;
8256 edge_iterator ei;
8257 edge e;
8258 struct omp_region *inner;
8259 unsigned i, casei;
8260 bool exit_reachable = region->cont != NULL;
8262 gcc_assert (region->exit != NULL);
8263 entry_bb = region->entry;
8264 l0_bb = single_succ (entry_bb);
8265 l1_bb = region->cont;
8266 l2_bb = region->exit;
8267 if (single_pred_p (l2_bb) && single_pred (l2_bb) == l0_bb)
8268 l2 = gimple_block_label (l2_bb);
8269 else
8271 /* This can happen if there are reductions. */
8272 len = EDGE_COUNT (l0_bb->succs);
8273 gcc_assert (len > 0);
8274 e = EDGE_SUCC (l0_bb, len - 1);
8275 si = gsi_last_nondebug_bb (e->dest);
8276 l2 = NULL_TREE;
8277 if (gsi_end_p (si)
8278 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
8279 l2 = gimple_block_label (e->dest);
8280 else
8281 FOR_EACH_EDGE (e, ei, l0_bb->succs)
8283 si = gsi_last_nondebug_bb (e->dest);
8284 if (gsi_end_p (si)
8285 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
8287 l2 = gimple_block_label (e->dest);
8288 break;
8292 if (exit_reachable)
8293 default_bb = create_empty_bb (l1_bb->prev_bb);
8294 else
8295 default_bb = create_empty_bb (l0_bb);
8297 /* We will build a switch() with enough cases for all the
8298 GIMPLE_OMP_SECTION regions, a '0' case to handle the end of more work
8299 and a default case to abort if something goes wrong. */
8300 len = EDGE_COUNT (l0_bb->succs);
8302 /* Use vec::quick_push on label_vec throughout, since we know the size
8303 in advance. */
8304 auto_vec<tree> label_vec (len);
8306 /* The call to GOMP_sections_start goes in ENTRY_BB, replacing the
8307 GIMPLE_OMP_SECTIONS statement. */
8308 si = gsi_last_nondebug_bb (entry_bb);
8309 sections_stmt = as_a <gomp_sections *> (gsi_stmt (si));
8310 gcc_assert (gimple_code (sections_stmt) == GIMPLE_OMP_SECTIONS);
8311 vin = gimple_omp_sections_control (sections_stmt);
8312 tree clauses = gimple_omp_sections_clauses (sections_stmt);
8313 tree reductmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
8314 tree condtmp = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
8315 tree cond_var = NULL_TREE;
8316 if (reductmp || condtmp)
8318 tree reductions = null_pointer_node, mem = null_pointer_node;
8319 tree memv = NULL_TREE, condtemp = NULL_TREE;
8320 gimple_stmt_iterator gsi = gsi_none ();
8321 gimple *g = NULL;
8322 if (reductmp)
8324 reductions = OMP_CLAUSE_DECL (reductmp);
8325 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
8326 g = SSA_NAME_DEF_STMT (reductions);
8327 reductions = gimple_assign_rhs1 (g);
8328 OMP_CLAUSE_DECL (reductmp) = reductions;
8329 gsi = gsi_for_stmt (g);
8331 else
8332 gsi = si;
8333 if (condtmp)
8335 condtemp = OMP_CLAUSE_DECL (condtmp);
8336 tree c = omp_find_clause (OMP_CLAUSE_CHAIN (condtmp),
8337 OMP_CLAUSE__CONDTEMP_);
8338 cond_var = OMP_CLAUSE_DECL (c);
8339 tree type = TREE_TYPE (condtemp);
8340 memv = create_tmp_var (type);
8341 TREE_ADDRESSABLE (memv) = 1;
8342 unsigned cnt = 0;
8343 for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
8344 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE
8345 && OMP_CLAUSE_LASTPRIVATE_CONDITIONAL (c))
8346 ++cnt;
8347 unsigned HOST_WIDE_INT sz
8348 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type))) * cnt;
8349 expand_omp_build_assign (&gsi, memv, build_int_cst (type, sz),
8350 false);
8351 mem = build_fold_addr_expr (memv);
8353 t = build_int_cst (unsigned_type_node, len - 1);
8354 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS2_START);
8355 stmt = gimple_build_call (u, 3, t, reductions, mem);
8356 gimple_call_set_lhs (stmt, vin);
8357 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
8358 if (condtmp)
8360 expand_omp_build_assign (&gsi, condtemp, memv, false);
8361 tree t = build2 (PLUS_EXPR, TREE_TYPE (cond_var),
8362 vin, build_one_cst (TREE_TYPE (cond_var)));
8363 expand_omp_build_assign (&gsi, cond_var, t, false);
8365 if (reductmp)
8367 gsi_remove (&gsi, true);
8368 release_ssa_name (gimple_assign_lhs (g));
8371 else if (!is_combined_parallel (region))
8373 /* If we are not inside a combined parallel+sections region,
8374 call GOMP_sections_start. */
8375 t = build_int_cst (unsigned_type_node, len - 1);
8376 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_START);
8377 stmt = gimple_build_call (u, 1, t);
8379 else
8381 /* Otherwise, call GOMP_sections_next. */
8382 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
8383 stmt = gimple_build_call (u, 0);
8385 if (!reductmp && !condtmp)
8387 gimple_call_set_lhs (stmt, vin);
8388 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
8390 gsi_remove (&si, true);
8392 /* The switch() statement replacing GIMPLE_OMP_SECTIONS_SWITCH goes in
8393 L0_BB. */
8394 switch_si = gsi_last_nondebug_bb (l0_bb);
8395 gcc_assert (gimple_code (gsi_stmt (switch_si)) == GIMPLE_OMP_SECTIONS_SWITCH);
8396 if (exit_reachable)
8398 cont = as_a <gomp_continue *> (last_stmt (l1_bb));
8399 gcc_assert (gimple_code (cont) == GIMPLE_OMP_CONTINUE);
8400 vmain = gimple_omp_continue_control_use (cont);
8401 vnext = gimple_omp_continue_control_def (cont);
8403 else
8405 vmain = vin;
8406 vnext = NULL_TREE;
8409 t = build_case_label (build_int_cst (unsigned_type_node, 0), NULL, l2);
8410 label_vec.quick_push (t);
8411 i = 1;
8413 /* Convert each GIMPLE_OMP_SECTION into a CASE_LABEL_EXPR. */
8414 for (inner = region->inner, casei = 1;
8415 inner;
8416 inner = inner->next, i++, casei++)
8418 basic_block s_entry_bb, s_exit_bb;
8420 /* Skip optional reduction region. */
8421 if (inner->type == GIMPLE_OMP_ATOMIC_LOAD)
8423 --i;
8424 --casei;
8425 continue;
8428 s_entry_bb = inner->entry;
8429 s_exit_bb = inner->exit;
8431 t = gimple_block_label (s_entry_bb);
8432 u = build_int_cst (unsigned_type_node, casei);
8433 u = build_case_label (u, NULL, t);
8434 label_vec.quick_push (u);
8436 si = gsi_last_nondebug_bb (s_entry_bb);
8437 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SECTION);
8438 gcc_assert (i < len || gimple_omp_section_last_p (gsi_stmt (si)));
8439 gsi_remove (&si, true);
8440 single_succ_edge (s_entry_bb)->flags = EDGE_FALLTHRU;
8442 if (s_exit_bb == NULL)
8443 continue;
8445 si = gsi_last_nondebug_bb (s_exit_bb);
8446 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
8447 gsi_remove (&si, true);
8449 single_succ_edge (s_exit_bb)->flags = EDGE_FALLTHRU;
8452 /* Error handling code goes in DEFAULT_BB. */
8453 t = gimple_block_label (default_bb);
8454 u = build_case_label (NULL, NULL, t);
8455 make_edge (l0_bb, default_bb, 0);
8456 add_bb_to_loop (default_bb, current_loops->tree_root);
8458 stmt = gimple_build_switch (vmain, u, label_vec);
8459 gsi_insert_after (&switch_si, stmt, GSI_SAME_STMT);
8460 gsi_remove (&switch_si, true);
8462 si = gsi_start_bb (default_bb);
8463 stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_TRAP), 0);
8464 gsi_insert_after (&si, stmt, GSI_CONTINUE_LINKING);
8466 if (exit_reachable)
8468 tree bfn_decl;
8470 /* Code to get the next section goes in L1_BB. */
8471 si = gsi_last_nondebug_bb (l1_bb);
8472 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CONTINUE);
8474 bfn_decl = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
8475 stmt = gimple_build_call (bfn_decl, 0);
8476 gimple_call_set_lhs (stmt, vnext);
8477 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
8478 if (cond_var)
8480 tree t = build2 (PLUS_EXPR, TREE_TYPE (cond_var),
8481 vnext, build_one_cst (TREE_TYPE (cond_var)));
8482 expand_omp_build_assign (&si, cond_var, t, false);
8484 gsi_remove (&si, true);
8486 single_succ_edge (l1_bb)->flags = EDGE_FALLTHRU;
8489 /* Cleanup function replaces GIMPLE_OMP_RETURN in EXIT_BB. */
8490 si = gsi_last_nondebug_bb (l2_bb);
8491 if (gimple_omp_return_nowait_p (gsi_stmt (si)))
8492 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_NOWAIT);
8493 else if (gimple_omp_return_lhs (gsi_stmt (si)))
8494 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_CANCEL);
8495 else
8496 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END);
8497 stmt = gimple_build_call (t, 0);
8498 if (gimple_omp_return_lhs (gsi_stmt (si)))
8499 gimple_call_set_lhs (stmt, gimple_omp_return_lhs (gsi_stmt (si)));
8500 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
8501 gsi_remove (&si, true);
8503 set_immediate_dominator (CDI_DOMINATORS, default_bb, l0_bb);
8506 /* Expand code for an OpenMP single or scope directive. We've already expanded
8507 much of the code, here we simply place the GOMP_barrier call. */
8509 static void
8510 expand_omp_single (struct omp_region *region)
8512 basic_block entry_bb, exit_bb;
8513 gimple_stmt_iterator si;
8515 entry_bb = region->entry;
8516 exit_bb = region->exit;
8518 si = gsi_last_nondebug_bb (entry_bb);
8519 enum gimple_code code = gimple_code (gsi_stmt (si));
8520 gcc_assert (code == GIMPLE_OMP_SINGLE || code == GIMPLE_OMP_SCOPE);
8521 gsi_remove (&si, true);
8522 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
8524 if (exit_bb == NULL)
8526 gcc_assert (code == GIMPLE_OMP_SCOPE);
8527 return;
8530 si = gsi_last_nondebug_bb (exit_bb);
8531 if (!gimple_omp_return_nowait_p (gsi_stmt (si)))
8533 tree t = gimple_omp_return_lhs (gsi_stmt (si));
8534 gsi_insert_after (&si, omp_build_barrier (t), GSI_SAME_STMT);
8536 gsi_remove (&si, true);
8537 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
8540 /* Generic expansion for OpenMP synchronization directives: master,
8541 ordered and critical. All we need to do here is remove the entry
8542 and exit markers for REGION. */
8544 static void
8545 expand_omp_synch (struct omp_region *region)
8547 basic_block entry_bb, exit_bb;
8548 gimple_stmt_iterator si;
8550 entry_bb = region->entry;
8551 exit_bb = region->exit;
8553 si = gsi_last_nondebug_bb (entry_bb);
8554 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE
8555 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASTER
8556 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASKED
8557 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TASKGROUP
8558 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ORDERED
8559 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CRITICAL
8560 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS);
8561 if (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS
8562 && gimple_omp_teams_host (as_a <gomp_teams *> (gsi_stmt (si))))
8564 expand_omp_taskreg (region);
8565 return;
8567 gsi_remove (&si, true);
8568 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
8570 if (exit_bb)
8572 si = gsi_last_nondebug_bb (exit_bb);
8573 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
8574 gsi_remove (&si, true);
8575 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
8579 /* Translate enum omp_memory_order to enum memmodel for the embedded
8580 fail clause in there. */
8582 static enum memmodel
8583 omp_memory_order_to_fail_memmodel (enum omp_memory_order mo)
8585 switch (mo & OMP_FAIL_MEMORY_ORDER_MASK)
8587 case OMP_FAIL_MEMORY_ORDER_UNSPECIFIED:
8588 switch (mo & OMP_MEMORY_ORDER_MASK)
8590 case OMP_MEMORY_ORDER_RELAXED: return MEMMODEL_RELAXED;
8591 case OMP_MEMORY_ORDER_ACQUIRE: return MEMMODEL_ACQUIRE;
8592 case OMP_MEMORY_ORDER_RELEASE: return MEMMODEL_RELAXED;
8593 case OMP_MEMORY_ORDER_ACQ_REL: return MEMMODEL_ACQUIRE;
8594 case OMP_MEMORY_ORDER_SEQ_CST: return MEMMODEL_SEQ_CST;
8595 default: break;
8597 gcc_unreachable ();
8598 case OMP_FAIL_MEMORY_ORDER_RELAXED: return MEMMODEL_RELAXED;
8599 case OMP_FAIL_MEMORY_ORDER_ACQUIRE: return MEMMODEL_ACQUIRE;
8600 case OMP_FAIL_MEMORY_ORDER_SEQ_CST: return MEMMODEL_SEQ_CST;
8601 default: gcc_unreachable ();
8605 /* Translate enum omp_memory_order to enum memmodel. The two enums
8606 are using different numbers so that OMP_MEMORY_ORDER_UNSPECIFIED
8607 is 0 and omp_memory_order has the fail mode encoded in it too. */
8609 static enum memmodel
8610 omp_memory_order_to_memmodel (enum omp_memory_order mo)
8612 enum memmodel ret, fail_ret;
8613 switch (mo & OMP_MEMORY_ORDER_MASK)
8615 case OMP_MEMORY_ORDER_RELAXED: ret = MEMMODEL_RELAXED; break;
8616 case OMP_MEMORY_ORDER_ACQUIRE: ret = MEMMODEL_ACQUIRE; break;
8617 case OMP_MEMORY_ORDER_RELEASE: ret = MEMMODEL_RELEASE; break;
8618 case OMP_MEMORY_ORDER_ACQ_REL: ret = MEMMODEL_ACQ_REL; break;
8619 case OMP_MEMORY_ORDER_SEQ_CST: ret = MEMMODEL_SEQ_CST; break;
8620 default: gcc_unreachable ();
8622 /* If we drop the -Winvalid-memory-model warning for C++17 P0418R2,
8623 we can just return ret here unconditionally. Otherwise, work around
8624 it here and make sure fail memmodel is not stronger. */
8625 if ((mo & OMP_FAIL_MEMORY_ORDER_MASK) == OMP_FAIL_MEMORY_ORDER_UNSPECIFIED)
8626 return ret;
8627 fail_ret = omp_memory_order_to_fail_memmodel (mo);
8628 if (fail_ret > ret)
8629 return fail_ret;
8630 return ret;
8633 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
8634 operation as a normal volatile load. */
8636 static bool
8637 expand_omp_atomic_load (basic_block load_bb, tree addr,
8638 tree loaded_val, int index)
8640 enum built_in_function tmpbase;
8641 gimple_stmt_iterator gsi;
8642 basic_block store_bb;
8643 location_t loc;
8644 gimple *stmt;
8645 tree decl, call, type, itype;
8647 gsi = gsi_last_nondebug_bb (load_bb);
8648 stmt = gsi_stmt (gsi);
8649 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
8650 loc = gimple_location (stmt);
8652 /* ??? If the target does not implement atomic_load_optab[mode], and mode
8653 is smaller than word size, then expand_atomic_load assumes that the load
8654 is atomic. We could avoid the builtin entirely in this case. */
8656 tmpbase = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
8657 decl = builtin_decl_explicit (tmpbase);
8658 if (decl == NULL_TREE)
8659 return false;
8661 type = TREE_TYPE (loaded_val);
8662 itype = TREE_TYPE (TREE_TYPE (decl));
8664 enum omp_memory_order omo = gimple_omp_atomic_memory_order (stmt);
8665 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
8666 call = build_call_expr_loc (loc, decl, 2, addr, mo);
8667 if (!useless_type_conversion_p (type, itype))
8668 call = fold_build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
8669 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
8671 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
8672 gsi_remove (&gsi, true);
8674 store_bb = single_succ (load_bb);
8675 gsi = gsi_last_nondebug_bb (store_bb);
8676 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
8677 gsi_remove (&gsi, true);
8679 if (gimple_in_ssa_p (cfun))
8680 update_ssa (TODO_update_ssa_no_phi);
8682 return true;
8685 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
8686 operation as a normal volatile store. */
8688 static bool
8689 expand_omp_atomic_store (basic_block load_bb, tree addr,
8690 tree loaded_val, tree stored_val, int index)
8692 enum built_in_function tmpbase;
8693 gimple_stmt_iterator gsi;
8694 basic_block store_bb = single_succ (load_bb);
8695 location_t loc;
8696 gimple *stmt;
8697 tree decl, call, type, itype;
8698 machine_mode imode;
8699 bool exchange;
8701 gsi = gsi_last_nondebug_bb (load_bb);
8702 stmt = gsi_stmt (gsi);
8703 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
8705 /* If the load value is needed, then this isn't a store but an exchange. */
8706 exchange = gimple_omp_atomic_need_value_p (stmt);
8708 gsi = gsi_last_nondebug_bb (store_bb);
8709 stmt = gsi_stmt (gsi);
8710 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_STORE);
8711 loc = gimple_location (stmt);
8713 /* ??? If the target does not implement atomic_store_optab[mode], and mode
8714 is smaller than word size, then expand_atomic_store assumes that the store
8715 is atomic. We could avoid the builtin entirely in this case. */
8717 tmpbase = (exchange ? BUILT_IN_ATOMIC_EXCHANGE_N : BUILT_IN_ATOMIC_STORE_N);
8718 tmpbase = (enum built_in_function) ((int) tmpbase + index + 1);
8719 decl = builtin_decl_explicit (tmpbase);
8720 if (decl == NULL_TREE)
8721 return false;
8723 type = TREE_TYPE (stored_val);
8725 /* Dig out the type of the function's second argument. */
8726 itype = TREE_TYPE (decl);
8727 itype = TYPE_ARG_TYPES (itype);
8728 itype = TREE_CHAIN (itype);
8729 itype = TREE_VALUE (itype);
8730 imode = TYPE_MODE (itype);
8732 if (exchange && !can_atomic_exchange_p (imode, true))
8733 return false;
8735 if (!useless_type_conversion_p (itype, type))
8736 stored_val = fold_build1_loc (loc, VIEW_CONVERT_EXPR, itype, stored_val);
8737 enum omp_memory_order omo = gimple_omp_atomic_memory_order (stmt);
8738 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
8739 call = build_call_expr_loc (loc, decl, 3, addr, stored_val, mo);
8740 if (exchange)
8742 if (!useless_type_conversion_p (type, itype))
8743 call = build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
8744 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
8747 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
8748 gsi_remove (&gsi, true);
8750 /* Remove the GIMPLE_OMP_ATOMIC_LOAD that we verified above. */
8751 gsi = gsi_last_nondebug_bb (load_bb);
8752 gsi_remove (&gsi, true);
8754 if (gimple_in_ssa_p (cfun))
8755 update_ssa (TODO_update_ssa_no_phi);
8757 return true;
8760 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
8761 operation as a __atomic_fetch_op builtin. INDEX is log2 of the
8762 size of the data type, and thus usable to find the index of the builtin
8763 decl. Returns false if the expression is not of the proper form. */
8765 static bool
8766 expand_omp_atomic_fetch_op (basic_block load_bb,
8767 tree addr, tree loaded_val,
8768 tree stored_val, int index)
8770 enum built_in_function oldbase, newbase, tmpbase;
8771 tree decl, itype, call;
8772 tree lhs, rhs;
8773 basic_block store_bb = single_succ (load_bb);
8774 gimple_stmt_iterator gsi;
8775 gimple *stmt;
8776 location_t loc;
8777 enum tree_code code;
8778 bool need_old, need_new;
8779 machine_mode imode;
8781 /* We expect to find the following sequences:
8783 load_bb:
8784 GIMPLE_OMP_ATOMIC_LOAD (tmp, mem)
8786 store_bb:
8787 val = tmp OP something; (or: something OP tmp)
8788 GIMPLE_OMP_STORE (val)
8790 ???FIXME: Allow a more flexible sequence.
8791 Perhaps use data flow to pick the statements.
8795 gsi = gsi_after_labels (store_bb);
8796 stmt = gsi_stmt (gsi);
8797 if (is_gimple_debug (stmt))
8799 gsi_next_nondebug (&gsi);
8800 if (gsi_end_p (gsi))
8801 return false;
8802 stmt = gsi_stmt (gsi);
8804 loc = gimple_location (stmt);
8805 if (!is_gimple_assign (stmt))
8806 return false;
8807 gsi_next_nondebug (&gsi);
8808 if (gimple_code (gsi_stmt (gsi)) != GIMPLE_OMP_ATOMIC_STORE)
8809 return false;
8810 need_new = gimple_omp_atomic_need_value_p (gsi_stmt (gsi));
8811 need_old = gimple_omp_atomic_need_value_p (last_stmt (load_bb));
8812 enum omp_memory_order omo
8813 = gimple_omp_atomic_memory_order (last_stmt (load_bb));
8814 enum memmodel mo = omp_memory_order_to_memmodel (omo);
8815 gcc_checking_assert (!need_old || !need_new);
8817 if (!operand_equal_p (gimple_assign_lhs (stmt), stored_val, 0))
8818 return false;
8820 /* Check for one of the supported fetch-op operations. */
8821 code = gimple_assign_rhs_code (stmt);
8822 switch (code)
8824 case PLUS_EXPR:
8825 case POINTER_PLUS_EXPR:
8826 oldbase = BUILT_IN_ATOMIC_FETCH_ADD_N;
8827 newbase = BUILT_IN_ATOMIC_ADD_FETCH_N;
8828 break;
8829 case MINUS_EXPR:
8830 oldbase = BUILT_IN_ATOMIC_FETCH_SUB_N;
8831 newbase = BUILT_IN_ATOMIC_SUB_FETCH_N;
8832 break;
8833 case BIT_AND_EXPR:
8834 oldbase = BUILT_IN_ATOMIC_FETCH_AND_N;
8835 newbase = BUILT_IN_ATOMIC_AND_FETCH_N;
8836 break;
8837 case BIT_IOR_EXPR:
8838 oldbase = BUILT_IN_ATOMIC_FETCH_OR_N;
8839 newbase = BUILT_IN_ATOMIC_OR_FETCH_N;
8840 break;
8841 case BIT_XOR_EXPR:
8842 oldbase = BUILT_IN_ATOMIC_FETCH_XOR_N;
8843 newbase = BUILT_IN_ATOMIC_XOR_FETCH_N;
8844 break;
8845 default:
8846 return false;
8849 /* Make sure the expression is of the proper form. */
8850 if (operand_equal_p (gimple_assign_rhs1 (stmt), loaded_val, 0))
8851 rhs = gimple_assign_rhs2 (stmt);
8852 else if (commutative_tree_code (gimple_assign_rhs_code (stmt))
8853 && operand_equal_p (gimple_assign_rhs2 (stmt), loaded_val, 0))
8854 rhs = gimple_assign_rhs1 (stmt);
8855 else
8856 return false;
8858 tmpbase = ((enum built_in_function)
8859 ((need_new ? newbase : oldbase) + index + 1));
8860 decl = builtin_decl_explicit (tmpbase);
8861 if (decl == NULL_TREE)
8862 return false;
8863 itype = TREE_TYPE (TREE_TYPE (decl));
8864 imode = TYPE_MODE (itype);
8866 /* We could test all of the various optabs involved, but the fact of the
8867 matter is that (with the exception of i486 vs i586 and xadd) all targets
8868 that support any atomic operaton optab also implements compare-and-swap.
8869 Let optabs.c take care of expanding any compare-and-swap loop. */
8870 if (!can_compare_and_swap_p (imode, true) || !can_atomic_load_p (imode))
8871 return false;
8873 gsi = gsi_last_nondebug_bb (load_bb);
8874 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_LOAD);
8876 /* OpenMP does not imply any barrier-like semantics on its atomic ops.
8877 It only requires that the operation happen atomically. Thus we can
8878 use the RELAXED memory model. */
8879 call = build_call_expr_loc (loc, decl, 3, addr,
8880 fold_convert_loc (loc, itype, rhs),
8881 build_int_cst (NULL, mo));
8883 if (need_old || need_new)
8885 lhs = need_old ? loaded_val : stored_val;
8886 call = fold_convert_loc (loc, TREE_TYPE (lhs), call);
8887 call = build2_loc (loc, MODIFY_EXPR, void_type_node, lhs, call);
8889 else
8890 call = fold_convert_loc (loc, void_type_node, call);
8891 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
8892 gsi_remove (&gsi, true);
8894 gsi = gsi_last_nondebug_bb (store_bb);
8895 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
8896 gsi_remove (&gsi, true);
8897 gsi = gsi_last_nondebug_bb (store_bb);
8898 stmt = gsi_stmt (gsi);
8899 gsi_remove (&gsi, true);
8901 if (gimple_in_ssa_p (cfun))
8903 release_defs (stmt);
8904 update_ssa (TODO_update_ssa_no_phi);
8907 return true;
8910 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
8911 compare and exchange as an ATOMIC_COMPARE_EXCHANGE internal function.
8912 Returns false if the expression is not of the proper form. */
8914 static bool
8915 expand_omp_atomic_cas (basic_block load_bb, tree addr,
8916 tree loaded_val, tree stored_val, int index)
8918 /* We expect to find the following sequences:
8920 load_bb:
8921 GIMPLE_OMP_ATOMIC_LOAD (tmp, mem)
8923 store_bb:
8924 val = tmp == e ? d : tmp;
8925 GIMPLE_OMP_ATOMIC_STORE (val)
8927 or in store_bb instead:
8928 tmp2 = tmp == e;
8929 val = tmp2 ? d : tmp;
8930 GIMPLE_OMP_ATOMIC_STORE (val)
8933 tmp3 = VIEW_CONVERT_EXPR<integral_type>(tmp);
8934 val = e == tmp3 ? d : tmp;
8935 GIMPLE_OMP_ATOMIC_STORE (val)
8937 etc. */
8940 basic_block store_bb = single_succ (load_bb);
8941 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (store_bb);
8942 gimple *store_stmt = gsi_stmt (gsi);
8943 if (!store_stmt || gimple_code (store_stmt) != GIMPLE_OMP_ATOMIC_STORE)
8944 return false;
8945 gsi_prev_nondebug (&gsi);
8946 if (gsi_end_p (gsi))
8947 return false;
8948 gimple *condexpr_stmt = gsi_stmt (gsi);
8949 if (!is_gimple_assign (condexpr_stmt)
8950 || gimple_assign_rhs_code (condexpr_stmt) != COND_EXPR)
8951 return false;
8952 if (!operand_equal_p (gimple_assign_lhs (condexpr_stmt), stored_val, 0))
8953 return false;
8954 gimple *cond_stmt = NULL;
8955 gimple *vce_stmt = NULL;
8956 gsi_prev_nondebug (&gsi);
8957 if (!gsi_end_p (gsi))
8959 cond_stmt = gsi_stmt (gsi);
8960 if (!is_gimple_assign (cond_stmt))
8961 return false;
8962 if (gimple_assign_rhs_code (cond_stmt) == EQ_EXPR)
8964 gsi_prev_nondebug (&gsi);
8965 if (!gsi_end_p (gsi))
8967 vce_stmt = gsi_stmt (gsi);
8968 if (!is_gimple_assign (vce_stmt)
8969 || gimple_assign_rhs_code (vce_stmt) != VIEW_CONVERT_EXPR)
8970 return false;
8973 else if (gimple_assign_rhs_code (cond_stmt) == VIEW_CONVERT_EXPR)
8974 std::swap (vce_stmt, cond_stmt);
8975 else
8976 return false;
8977 if (vce_stmt)
8979 tree vce_rhs = gimple_assign_rhs1 (vce_stmt);
8980 if (TREE_CODE (vce_rhs) != VIEW_CONVERT_EXPR
8981 || !operand_equal_p (TREE_OPERAND (vce_rhs, 0), loaded_val))
8982 return false;
8983 if (!INTEGRAL_TYPE_P (TREE_TYPE (vce_rhs))
8984 || !SCALAR_FLOAT_TYPE_P (TREE_TYPE (loaded_val))
8985 || !tree_int_cst_equal (TYPE_SIZE (TREE_TYPE (vce_rhs)),
8986 TYPE_SIZE (TREE_TYPE (loaded_val))))
8987 return false;
8988 gsi_prev_nondebug (&gsi);
8989 if (!gsi_end_p (gsi))
8990 return false;
8993 tree cond = gimple_assign_rhs1 (condexpr_stmt);
8994 tree cond_op1, cond_op2;
8995 if (cond_stmt)
8997 if (!operand_equal_p (cond, gimple_assign_lhs (cond_stmt)))
8998 return false;
8999 cond_op1 = gimple_assign_rhs1 (cond_stmt);
9000 cond_op2 = gimple_assign_rhs2 (cond_stmt);
9002 else if (TREE_CODE (cond) != EQ_EXPR && TREE_CODE (cond) != NE_EXPR)
9003 return false;
9004 else
9006 cond_op1 = TREE_OPERAND (cond, 0);
9007 cond_op2 = TREE_OPERAND (cond, 1);
9009 tree d;
9010 if (TREE_CODE (cond) == NE_EXPR)
9012 if (!operand_equal_p (gimple_assign_rhs2 (condexpr_stmt), loaded_val))
9013 return false;
9014 d = gimple_assign_rhs3 (condexpr_stmt);
9016 else if (!operand_equal_p (gimple_assign_rhs3 (condexpr_stmt), loaded_val))
9017 return false;
9018 else
9019 d = gimple_assign_rhs2 (condexpr_stmt);
9020 tree e = vce_stmt ? gimple_assign_lhs (vce_stmt) : loaded_val;
9021 if (operand_equal_p (e, cond_op1))
9022 e = cond_op2;
9023 else if (operand_equal_p (e, cond_op2))
9024 e = cond_op1;
9025 else
9026 return false;
9028 location_t loc = gimple_location (store_stmt);
9029 gimple *load_stmt = last_stmt (load_bb);
9030 bool need_new = gimple_omp_atomic_need_value_p (store_stmt);
9031 bool need_old = gimple_omp_atomic_need_value_p (load_stmt);
9032 bool weak = gimple_omp_atomic_weak_p (load_stmt);
9033 enum omp_memory_order omo = gimple_omp_atomic_memory_order (load_stmt);
9034 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
9035 tree fmo = build_int_cst (NULL, omp_memory_order_to_fail_memmodel (omo));
9036 gcc_checking_assert (!need_old || !need_new);
9038 enum built_in_function fncode
9039 = (enum built_in_function) ((int) BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
9040 + index + 1);
9041 tree cmpxchg = builtin_decl_explicit (fncode);
9042 if (cmpxchg == NULL_TREE)
9043 return false;
9044 tree itype = TREE_TYPE (TREE_TYPE (cmpxchg));
9046 if (!can_compare_and_swap_p (TYPE_MODE (itype), true)
9047 || !can_atomic_load_p (TYPE_MODE (itype)))
9048 return false;
9050 tree type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
9051 if (SCALAR_FLOAT_TYPE_P (type) && !vce_stmt)
9052 return false;
9054 gsi = gsi_for_stmt (store_stmt);
9055 if (!useless_type_conversion_p (itype, TREE_TYPE (e)))
9057 tree ne = create_tmp_reg (itype);
9058 gimple *g = gimple_build_assign (ne, NOP_EXPR, e);
9059 gimple_set_location (g, loc);
9060 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9061 e = ne;
9063 if (!useless_type_conversion_p (itype, TREE_TYPE (d)))
9065 tree nd = create_tmp_reg (itype);
9066 enum tree_code code;
9067 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (d)))
9069 code = VIEW_CONVERT_EXPR;
9070 d = build1 (VIEW_CONVERT_EXPR, itype, d);
9072 else
9073 code = NOP_EXPR;
9074 gimple *g = gimple_build_assign (nd, code, d);
9075 gimple_set_location (g, loc);
9076 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9077 d = nd;
9080 tree ctype = build_complex_type (itype);
9081 int flag = int_size_in_bytes (itype) + (weak ? 256 : 0);
9082 gimple *g
9083 = gimple_build_call_internal (IFN_ATOMIC_COMPARE_EXCHANGE, 6, addr, e, d,
9084 build_int_cst (integer_type_node, flag),
9085 mo, fmo);
9086 tree cres = create_tmp_reg (ctype);
9087 gimple_call_set_lhs (g, cres);
9088 gimple_set_location (g, loc);
9089 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9091 if (cond_stmt || need_old || need_new)
9093 tree im = create_tmp_reg (itype);
9094 g = gimple_build_assign (im, IMAGPART_EXPR,
9095 build1 (IMAGPART_EXPR, itype, cres));
9096 gimple_set_location (g, loc);
9097 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9099 tree re = NULL_TREE;
9100 if (need_old || need_new)
9102 re = create_tmp_reg (itype);
9103 g = gimple_build_assign (re, REALPART_EXPR,
9104 build1 (REALPART_EXPR, itype, cres));
9105 gimple_set_location (g, loc);
9106 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9109 if (cond_stmt)
9111 g = gimple_build_assign (gimple_assign_lhs (cond_stmt),
9112 NOP_EXPR, im);
9113 gimple_set_location (g, loc);
9114 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9116 else if (need_new)
9118 g = gimple_build_assign (create_tmp_reg (itype), COND_EXPR,
9119 build2 (NE_EXPR, boolean_type_node,
9120 im, build_zero_cst (itype)),
9121 d, re);
9122 gimple_set_location (g, loc);
9123 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9124 re = gimple_assign_lhs (g);
9127 if (need_old || need_new)
9129 tree v = need_old ? loaded_val : stored_val;
9130 enum tree_code code;
9131 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (v)))
9133 code = VIEW_CONVERT_EXPR;
9134 re = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (v), re);
9136 else if (!useless_type_conversion_p (TREE_TYPE (v), itype))
9137 code = NOP_EXPR;
9138 else
9139 code = TREE_CODE (re);
9140 g = gimple_build_assign (v, code, re);
9141 gimple_set_location (g, loc);
9142 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9146 gsi_remove (&gsi, true);
9147 gsi = gsi_for_stmt (load_stmt);
9148 gsi_remove (&gsi, true);
9149 gsi = gsi_for_stmt (condexpr_stmt);
9150 gsi_remove (&gsi, true);
9151 if (cond_stmt)
9153 gsi = gsi_for_stmt (cond_stmt);
9154 gsi_remove (&gsi, true);
9156 if (vce_stmt)
9158 gsi = gsi_for_stmt (vce_stmt);
9159 gsi_remove (&gsi, true);
9162 return true;
9165 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
9167 oldval = *addr;
9168 repeat:
9169 newval = rhs; // with oldval replacing *addr in rhs
9170 oldval = __sync_val_compare_and_swap (addr, oldval, newval);
9171 if (oldval != newval)
9172 goto repeat;
9174 INDEX is log2 of the size of the data type, and thus usable to find the
9175 index of the builtin decl. */
9177 static bool
9178 expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb,
9179 tree addr, tree loaded_val, tree stored_val,
9180 int index)
9182 tree loadedi, storedi, initial, new_storedi, old_vali;
9183 tree type, itype, cmpxchg, iaddr, atype;
9184 gimple_stmt_iterator si;
9185 basic_block loop_header = single_succ (load_bb);
9186 gimple *phi, *stmt;
9187 edge e;
9188 enum built_in_function fncode;
9190 fncode = (enum built_in_function)((int)BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
9191 + index + 1);
9192 cmpxchg = builtin_decl_explicit (fncode);
9193 if (cmpxchg == NULL_TREE)
9194 return false;
9195 type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
9196 atype = type;
9197 itype = TREE_TYPE (TREE_TYPE (cmpxchg));
9199 if (!can_compare_and_swap_p (TYPE_MODE (itype), true)
9200 || !can_atomic_load_p (TYPE_MODE (itype)))
9201 return false;
9203 /* Load the initial value, replacing the GIMPLE_OMP_ATOMIC_LOAD. */
9204 si = gsi_last_nondebug_bb (load_bb);
9205 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
9206 location_t loc = gimple_location (gsi_stmt (si));
9207 enum omp_memory_order omo = gimple_omp_atomic_memory_order (gsi_stmt (si));
9208 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
9209 tree fmo = build_int_cst (NULL, omp_memory_order_to_fail_memmodel (omo));
9211 /* For floating-point values, we'll need to view-convert them to integers
9212 so that we can perform the atomic compare and swap. Simplify the
9213 following code by always setting up the "i"ntegral variables. */
9214 if (!INTEGRAL_TYPE_P (type) && !POINTER_TYPE_P (type))
9216 tree iaddr_val;
9218 iaddr = create_tmp_reg (build_pointer_type_for_mode (itype, ptr_mode,
9219 true));
9220 atype = itype;
9221 iaddr_val
9222 = force_gimple_operand_gsi (&si,
9223 fold_convert (TREE_TYPE (iaddr), addr),
9224 false, NULL_TREE, true, GSI_SAME_STMT);
9225 stmt = gimple_build_assign (iaddr, iaddr_val);
9226 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9227 loadedi = create_tmp_var (itype);
9228 if (gimple_in_ssa_p (cfun))
9229 loadedi = make_ssa_name (loadedi);
9231 else
9233 iaddr = addr;
9234 loadedi = loaded_val;
9237 fncode = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
9238 tree loaddecl = builtin_decl_explicit (fncode);
9239 if (loaddecl)
9240 initial
9241 = fold_convert (atype,
9242 build_call_expr (loaddecl, 2, iaddr,
9243 build_int_cst (NULL_TREE,
9244 MEMMODEL_RELAXED)));
9245 else
9247 tree off
9248 = build_int_cst (build_pointer_type_for_mode (atype, ptr_mode,
9249 true), 0);
9250 initial = build2 (MEM_REF, atype, iaddr, off);
9253 initial
9254 = force_gimple_operand_gsi (&si, initial, true, NULL_TREE, true,
9255 GSI_SAME_STMT);
9257 /* Move the value to the LOADEDI temporary. */
9258 if (gimple_in_ssa_p (cfun))
9260 gcc_assert (gimple_seq_empty_p (phi_nodes (loop_header)));
9261 phi = create_phi_node (loadedi, loop_header);
9262 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, single_succ_edge (load_bb)),
9263 initial);
9265 else
9266 gsi_insert_before (&si,
9267 gimple_build_assign (loadedi, initial),
9268 GSI_SAME_STMT);
9269 if (loadedi != loaded_val)
9271 gimple_stmt_iterator gsi2;
9272 tree x;
9274 x = build1 (VIEW_CONVERT_EXPR, type, loadedi);
9275 gsi2 = gsi_start_bb (loop_header);
9276 if (gimple_in_ssa_p (cfun))
9278 gassign *stmt;
9279 x = force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
9280 true, GSI_SAME_STMT);
9281 stmt = gimple_build_assign (loaded_val, x);
9282 gsi_insert_before (&gsi2, stmt, GSI_SAME_STMT);
9284 else
9286 x = build2 (MODIFY_EXPR, TREE_TYPE (loaded_val), loaded_val, x);
9287 force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
9288 true, GSI_SAME_STMT);
9291 gsi_remove (&si, true);
9293 si = gsi_last_nondebug_bb (store_bb);
9294 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
9296 if (iaddr == addr)
9297 storedi = stored_val;
9298 else
9299 storedi
9300 = force_gimple_operand_gsi (&si,
9301 build1 (VIEW_CONVERT_EXPR, itype,
9302 stored_val), true, NULL_TREE, true,
9303 GSI_SAME_STMT);
9305 /* Build the compare&swap statement. */
9306 tree ctype = build_complex_type (itype);
9307 int flag = int_size_in_bytes (itype);
9308 new_storedi = build_call_expr_internal_loc (loc, IFN_ATOMIC_COMPARE_EXCHANGE,
9309 ctype, 6, iaddr, loadedi,
9310 storedi,
9311 build_int_cst (integer_type_node,
9312 flag),
9313 mo, fmo);
9314 new_storedi = build1 (REALPART_EXPR, itype, new_storedi);
9315 new_storedi = force_gimple_operand_gsi (&si,
9316 fold_convert (TREE_TYPE (loadedi),
9317 new_storedi),
9318 true, NULL_TREE,
9319 true, GSI_SAME_STMT);
9321 if (gimple_in_ssa_p (cfun))
9322 old_vali = loadedi;
9323 else
9325 old_vali = create_tmp_var (TREE_TYPE (loadedi));
9326 stmt = gimple_build_assign (old_vali, loadedi);
9327 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9329 stmt = gimple_build_assign (loadedi, new_storedi);
9330 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9333 /* Note that we always perform the comparison as an integer, even for
9334 floating point. This allows the atomic operation to properly
9335 succeed even with NaNs and -0.0. */
9336 tree ne = build2 (NE_EXPR, boolean_type_node, new_storedi, old_vali);
9337 stmt = gimple_build_cond_empty (ne);
9338 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9340 /* Update cfg. */
9341 e = single_succ_edge (store_bb);
9342 e->flags &= ~EDGE_FALLTHRU;
9343 e->flags |= EDGE_FALSE_VALUE;
9344 /* Expect no looping. */
9345 e->probability = profile_probability::guessed_always ();
9347 e = make_edge (store_bb, loop_header, EDGE_TRUE_VALUE);
9348 e->probability = profile_probability::guessed_never ();
9350 /* Copy the new value to loadedi (we already did that before the condition
9351 if we are not in SSA). */
9352 if (gimple_in_ssa_p (cfun))
9354 phi = gimple_seq_first_stmt (phi_nodes (loop_header));
9355 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e), new_storedi);
9358 /* Remove GIMPLE_OMP_ATOMIC_STORE. */
9359 gsi_remove (&si, true);
9361 class loop *loop = alloc_loop ();
9362 loop->header = loop_header;
9363 loop->latch = store_bb;
9364 add_loop (loop, loop_header->loop_father);
9366 if (gimple_in_ssa_p (cfun))
9367 update_ssa (TODO_update_ssa_no_phi);
9369 return true;
9372 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
9374 GOMP_atomic_start ();
9375 *addr = rhs;
9376 GOMP_atomic_end ();
9378 The result is not globally atomic, but works so long as all parallel
9379 references are within #pragma omp atomic directives. According to
9380 responses received from omp@openmp.org, appears to be within spec.
9381 Which makes sense, since that's how several other compilers handle
9382 this situation as well.
9383 LOADED_VAL and ADDR are the operands of GIMPLE_OMP_ATOMIC_LOAD we're
9384 expanding. STORED_VAL is the operand of the matching
9385 GIMPLE_OMP_ATOMIC_STORE.
9387 We replace
9388 GIMPLE_OMP_ATOMIC_LOAD (loaded_val, addr) with
9389 loaded_val = *addr;
9391 and replace
9392 GIMPLE_OMP_ATOMIC_STORE (stored_val) with
9393 *addr = stored_val;
9396 static bool
9397 expand_omp_atomic_mutex (basic_block load_bb, basic_block store_bb,
9398 tree addr, tree loaded_val, tree stored_val)
9400 gimple_stmt_iterator si;
9401 gassign *stmt;
9402 tree t;
9404 si = gsi_last_nondebug_bb (load_bb);
9405 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
9407 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_START);
9408 t = build_call_expr (t, 0);
9409 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
9411 tree mem = build_simple_mem_ref (addr);
9412 TREE_TYPE (mem) = TREE_TYPE (loaded_val);
9413 TREE_OPERAND (mem, 1)
9414 = fold_convert (build_pointer_type_for_mode (TREE_TYPE (mem), ptr_mode,
9415 true),
9416 TREE_OPERAND (mem, 1));
9417 stmt = gimple_build_assign (loaded_val, mem);
9418 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9419 gsi_remove (&si, true);
9421 si = gsi_last_nondebug_bb (store_bb);
9422 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
9424 stmt = gimple_build_assign (unshare_expr (mem), stored_val);
9425 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9427 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_END);
9428 t = build_call_expr (t, 0);
9429 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
9430 gsi_remove (&si, true);
9432 if (gimple_in_ssa_p (cfun))
9433 update_ssa (TODO_update_ssa_no_phi);
9434 return true;
9437 /* Expand an GIMPLE_OMP_ATOMIC statement. We try to expand
9438 using expand_omp_atomic_fetch_op. If it failed, we try to
9439 call expand_omp_atomic_pipeline, and if it fails too, the
9440 ultimate fallback is wrapping the operation in a mutex
9441 (expand_omp_atomic_mutex). REGION is the atomic region built
9442 by build_omp_regions_1(). */
9444 static void
9445 expand_omp_atomic (struct omp_region *region)
9447 basic_block load_bb = region->entry, store_bb = region->exit;
9448 gomp_atomic_load *load = as_a <gomp_atomic_load *> (last_stmt (load_bb));
9449 gomp_atomic_store *store = as_a <gomp_atomic_store *> (last_stmt (store_bb));
9450 tree loaded_val = gimple_omp_atomic_load_lhs (load);
9451 tree addr = gimple_omp_atomic_load_rhs (load);
9452 tree stored_val = gimple_omp_atomic_store_val (store);
9453 tree type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
9454 HOST_WIDE_INT index;
9456 /* Make sure the type is one of the supported sizes. */
9457 index = tree_to_uhwi (TYPE_SIZE_UNIT (type));
9458 index = exact_log2 (index);
9459 if (index >= 0 && index <= 4)
9461 unsigned int align = TYPE_ALIGN_UNIT (type);
9463 /* __sync builtins require strict data alignment. */
9464 if (exact_log2 (align) >= index)
9466 /* Atomic load. */
9467 scalar_mode smode;
9468 if (loaded_val == stored_val
9469 && (is_int_mode (TYPE_MODE (type), &smode)
9470 || is_float_mode (TYPE_MODE (type), &smode))
9471 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
9472 && expand_omp_atomic_load (load_bb, addr, loaded_val, index))
9473 return;
9475 /* Atomic store. */
9476 if ((is_int_mode (TYPE_MODE (type), &smode)
9477 || is_float_mode (TYPE_MODE (type), &smode))
9478 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
9479 && store_bb == single_succ (load_bb)
9480 && first_stmt (store_bb) == store
9481 && expand_omp_atomic_store (load_bb, addr, loaded_val,
9482 stored_val, index))
9483 return;
9485 /* When possible, use specialized atomic update functions. */
9486 if ((INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type))
9487 && store_bb == single_succ (load_bb)
9488 && expand_omp_atomic_fetch_op (load_bb, addr,
9489 loaded_val, stored_val, index))
9490 return;
9492 /* When possible, use ATOMIC_COMPARE_EXCHANGE ifn without a loop. */
9493 if (store_bb == single_succ (load_bb)
9494 && !gimple_in_ssa_p (cfun)
9495 && expand_omp_atomic_cas (load_bb, addr, loaded_val, stored_val,
9496 index))
9497 return;
9499 /* If we don't have specialized __sync builtins, try and implement
9500 as a compare and swap loop. */
9501 if (expand_omp_atomic_pipeline (load_bb, store_bb, addr,
9502 loaded_val, stored_val, index))
9503 return;
9507 /* The ultimate fallback is wrapping the operation in a mutex. */
9508 expand_omp_atomic_mutex (load_bb, store_bb, addr, loaded_val, stored_val);
9511 /* Mark the loops inside the kernels region starting at REGION_ENTRY and ending
9512 at REGION_EXIT. */
9514 static void
9515 mark_loops_in_oacc_kernels_region (basic_block region_entry,
9516 basic_block region_exit)
9518 class loop *outer = region_entry->loop_father;
9519 gcc_assert (region_exit == NULL || outer == region_exit->loop_father);
9521 /* Don't parallelize the kernels region if it contains more than one outer
9522 loop. */
9523 unsigned int nr_outer_loops = 0;
9524 class loop *single_outer = NULL;
9525 for (class loop *loop = outer->inner; loop != NULL; loop = loop->next)
9527 gcc_assert (loop_outer (loop) == outer);
9529 if (!dominated_by_p (CDI_DOMINATORS, loop->header, region_entry))
9530 continue;
9532 if (region_exit != NULL
9533 && dominated_by_p (CDI_DOMINATORS, loop->header, region_exit))
9534 continue;
9536 nr_outer_loops++;
9537 single_outer = loop;
9539 if (nr_outer_loops != 1)
9540 return;
9542 for (class loop *loop = single_outer->inner;
9543 loop != NULL;
9544 loop = loop->inner)
9545 if (loop->next)
9546 return;
9548 /* Mark the loops in the region. */
9549 for (class loop *loop = single_outer; loop != NULL; loop = loop->inner)
9550 loop->in_oacc_kernels_region = true;
9553 /* Build target argument identifier from the DEVICE identifier, value
9554 identifier ID and whether the element also has a SUBSEQUENT_PARAM. */
9556 static tree
9557 get_target_argument_identifier_1 (int device, bool subseqent_param, int id)
9559 tree t = build_int_cst (integer_type_node, device);
9560 if (subseqent_param)
9561 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
9562 build_int_cst (integer_type_node,
9563 GOMP_TARGET_ARG_SUBSEQUENT_PARAM));
9564 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
9565 build_int_cst (integer_type_node, id));
9566 return t;
9569 /* Like above but return it in type that can be directly stored as an element
9570 of the argument array. */
9572 static tree
9573 get_target_argument_identifier (int device, bool subseqent_param, int id)
9575 tree t = get_target_argument_identifier_1 (device, subseqent_param, id);
9576 return fold_convert (ptr_type_node, t);
9579 /* Return a target argument consisting of DEVICE identifier, value identifier
9580 ID, and the actual VALUE. */
9582 static tree
9583 get_target_argument_value (gimple_stmt_iterator *gsi, int device, int id,
9584 tree value)
9586 tree t = fold_build2 (LSHIFT_EXPR, integer_type_node,
9587 fold_convert (integer_type_node, value),
9588 build_int_cst (unsigned_type_node,
9589 GOMP_TARGET_ARG_VALUE_SHIFT));
9590 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
9591 get_target_argument_identifier_1 (device, false, id));
9592 t = fold_convert (ptr_type_node, t);
9593 return force_gimple_operand_gsi (gsi, t, true, NULL, true, GSI_SAME_STMT);
9596 /* If VALUE is an integer constant greater than -2^15 and smaller than 2^15,
9597 push one argument to ARGS with both the DEVICE, ID and VALUE embedded in it,
9598 otherwise push an identifier (with DEVICE and ID) and the VALUE in two
9599 arguments. */
9601 static void
9602 push_target_argument_according_to_value (gimple_stmt_iterator *gsi, int device,
9603 int id, tree value, vec <tree> *args)
9605 if (tree_fits_shwi_p (value)
9606 && tree_to_shwi (value) > -(1 << 15)
9607 && tree_to_shwi (value) < (1 << 15))
9608 args->quick_push (get_target_argument_value (gsi, device, id, value));
9609 else
9611 args->quick_push (get_target_argument_identifier (device, true, id));
9612 value = fold_convert (ptr_type_node, value);
9613 value = force_gimple_operand_gsi (gsi, value, true, NULL, true,
9614 GSI_SAME_STMT);
9615 args->quick_push (value);
9619 /* Create an array of arguments that is then passed to GOMP_target. */
9621 static tree
9622 get_target_arguments (gimple_stmt_iterator *gsi, gomp_target *tgt_stmt)
9624 auto_vec <tree, 6> args;
9625 tree clauses = gimple_omp_target_clauses (tgt_stmt);
9626 tree t, c = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
9627 if (c)
9628 t = OMP_CLAUSE_NUM_TEAMS_EXPR (c);
9629 else
9630 t = integer_minus_one_node;
9631 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
9632 GOMP_TARGET_ARG_NUM_TEAMS, t, &args);
9634 c = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
9635 if (c)
9636 t = OMP_CLAUSE_THREAD_LIMIT_EXPR (c);
9637 else
9638 t = integer_minus_one_node;
9639 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
9640 GOMP_TARGET_ARG_THREAD_LIMIT, t,
9641 &args);
9643 /* Produce more, perhaps device specific, arguments here. */
9645 tree argarray = create_tmp_var (build_array_type_nelts (ptr_type_node,
9646 args.length () + 1),
9647 ".omp_target_args");
9648 for (unsigned i = 0; i < args.length (); i++)
9650 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
9651 build_int_cst (integer_type_node, i),
9652 NULL_TREE, NULL_TREE);
9653 gsi_insert_before (gsi, gimple_build_assign (ref, args[i]),
9654 GSI_SAME_STMT);
9656 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
9657 build_int_cst (integer_type_node, args.length ()),
9658 NULL_TREE, NULL_TREE);
9659 gsi_insert_before (gsi, gimple_build_assign (ref, null_pointer_node),
9660 GSI_SAME_STMT);
9661 TREE_ADDRESSABLE (argarray) = 1;
9662 return build_fold_addr_expr (argarray);
9665 /* Expand the GIMPLE_OMP_TARGET starting at REGION. */
9667 static void
9668 expand_omp_target (struct omp_region *region)
9670 basic_block entry_bb, exit_bb, new_bb;
9671 struct function *child_cfun;
9672 tree child_fn, block, t;
9673 gimple_stmt_iterator gsi;
9674 gomp_target *entry_stmt;
9675 gimple *stmt;
9676 edge e;
9677 bool offloaded;
9678 int target_kind;
9680 entry_stmt = as_a <gomp_target *> (last_stmt (region->entry));
9681 target_kind = gimple_omp_target_kind (entry_stmt);
9682 new_bb = region->entry;
9684 offloaded = is_gimple_omp_offloaded (entry_stmt);
9685 switch (target_kind)
9687 case GF_OMP_TARGET_KIND_REGION:
9688 case GF_OMP_TARGET_KIND_UPDATE:
9689 case GF_OMP_TARGET_KIND_ENTER_DATA:
9690 case GF_OMP_TARGET_KIND_EXIT_DATA:
9691 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
9692 case GF_OMP_TARGET_KIND_OACC_KERNELS:
9693 case GF_OMP_TARGET_KIND_OACC_SERIAL:
9694 case GF_OMP_TARGET_KIND_OACC_UPDATE:
9695 case GF_OMP_TARGET_KIND_OACC_ENTER_DATA:
9696 case GF_OMP_TARGET_KIND_OACC_EXIT_DATA:
9697 case GF_OMP_TARGET_KIND_OACC_DECLARE:
9698 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
9699 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
9700 case GF_OMP_TARGET_KIND_DATA:
9701 case GF_OMP_TARGET_KIND_OACC_DATA:
9702 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
9703 case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
9704 break;
9705 default:
9706 gcc_unreachable ();
9709 child_fn = NULL_TREE;
9710 child_cfun = NULL;
9711 if (offloaded)
9713 child_fn = gimple_omp_target_child_fn (entry_stmt);
9714 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
9717 /* Supported by expand_omp_taskreg, but not here. */
9718 if (child_cfun != NULL)
9719 gcc_checking_assert (!child_cfun->cfg);
9720 gcc_checking_assert (!gimple_in_ssa_p (cfun));
9722 entry_bb = region->entry;
9723 exit_bb = region->exit;
9725 if (target_kind == GF_OMP_TARGET_KIND_OACC_KERNELS)
9726 mark_loops_in_oacc_kernels_region (region->entry, region->exit);
9728 /* Going on, all OpenACC compute constructs are mapped to
9729 'BUILT_IN_GOACC_PARALLEL', and get their compute regions outlined.
9730 To distinguish between them, we attach attributes. */
9731 switch (target_kind)
9733 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
9734 DECL_ATTRIBUTES (child_fn)
9735 = tree_cons (get_identifier ("oacc parallel"),
9736 NULL_TREE, DECL_ATTRIBUTES (child_fn));
9737 break;
9738 case GF_OMP_TARGET_KIND_OACC_KERNELS:
9739 DECL_ATTRIBUTES (child_fn)
9740 = tree_cons (get_identifier ("oacc kernels"),
9741 NULL_TREE, DECL_ATTRIBUTES (child_fn));
9742 break;
9743 case GF_OMP_TARGET_KIND_OACC_SERIAL:
9744 DECL_ATTRIBUTES (child_fn)
9745 = tree_cons (get_identifier ("oacc serial"),
9746 NULL_TREE, DECL_ATTRIBUTES (child_fn));
9747 break;
9748 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
9749 DECL_ATTRIBUTES (child_fn)
9750 = tree_cons (get_identifier ("oacc parallel_kernels_parallelized"),
9751 NULL_TREE, DECL_ATTRIBUTES (child_fn));
9752 break;
9753 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
9754 DECL_ATTRIBUTES (child_fn)
9755 = tree_cons (get_identifier ("oacc parallel_kernels_gang_single"),
9756 NULL_TREE, DECL_ATTRIBUTES (child_fn));
9757 break;
9758 default:
9759 /* Make sure we don't miss any. */
9760 gcc_checking_assert (!(is_gimple_omp_oacc (entry_stmt)
9761 && is_gimple_omp_offloaded (entry_stmt)));
9762 break;
9765 if (offloaded)
9767 unsigned srcidx, dstidx, num;
9769 /* If the offloading region needs data sent from the parent
9770 function, then the very first statement (except possible
9771 tree profile counter updates) of the offloading body
9772 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
9773 &.OMP_DATA_O is passed as an argument to the child function,
9774 we need to replace it with the argument as seen by the child
9775 function.
9777 In most cases, this will end up being the identity assignment
9778 .OMP_DATA_I = .OMP_DATA_I. However, if the offloading body had
9779 a function call that has been inlined, the original PARM_DECL
9780 .OMP_DATA_I may have been converted into a different local
9781 variable. In which case, we need to keep the assignment. */
9782 tree data_arg = gimple_omp_target_data_arg (entry_stmt);
9783 if (data_arg)
9785 basic_block entry_succ_bb = single_succ (entry_bb);
9786 gimple_stmt_iterator gsi;
9787 tree arg;
9788 gimple *tgtcopy_stmt = NULL;
9789 tree sender = TREE_VEC_ELT (data_arg, 0);
9791 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
9793 gcc_assert (!gsi_end_p (gsi));
9794 stmt = gsi_stmt (gsi);
9795 if (gimple_code (stmt) != GIMPLE_ASSIGN)
9796 continue;
9798 if (gimple_num_ops (stmt) == 2)
9800 tree arg = gimple_assign_rhs1 (stmt);
9802 /* We're ignoring the subcode because we're
9803 effectively doing a STRIP_NOPS. */
9805 if (TREE_CODE (arg) == ADDR_EXPR
9806 && TREE_OPERAND (arg, 0) == sender)
9808 tgtcopy_stmt = stmt;
9809 break;
9814 gcc_assert (tgtcopy_stmt != NULL);
9815 arg = DECL_ARGUMENTS (child_fn);
9817 gcc_assert (gimple_assign_lhs (tgtcopy_stmt) == arg);
9818 gsi_remove (&gsi, true);
9821 /* Declare local variables needed in CHILD_CFUN. */
9822 block = DECL_INITIAL (child_fn);
9823 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
9824 /* The gimplifier could record temporaries in the offloading block
9825 rather than in containing function's local_decls chain,
9826 which would mean cgraph missed finalizing them. Do it now. */
9827 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
9828 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
9829 varpool_node::finalize_decl (t);
9830 DECL_SAVED_TREE (child_fn) = NULL;
9831 /* We'll create a CFG for child_fn, so no gimple body is needed. */
9832 gimple_set_body (child_fn, NULL);
9833 TREE_USED (block) = 1;
9835 /* Reset DECL_CONTEXT on function arguments. */
9836 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
9837 DECL_CONTEXT (t) = child_fn;
9839 /* Split ENTRY_BB at GIMPLE_*,
9840 so that it can be moved to the child function. */
9841 gsi = gsi_last_nondebug_bb (entry_bb);
9842 stmt = gsi_stmt (gsi);
9843 gcc_assert (stmt
9844 && gimple_code (stmt) == gimple_code (entry_stmt));
9845 e = split_block (entry_bb, stmt);
9846 gsi_remove (&gsi, true);
9847 entry_bb = e->dest;
9848 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
9850 /* Convert GIMPLE_OMP_RETURN into a RETURN_EXPR. */
9851 if (exit_bb)
9853 gsi = gsi_last_nondebug_bb (exit_bb);
9854 gcc_assert (!gsi_end_p (gsi)
9855 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
9856 stmt = gimple_build_return (NULL);
9857 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
9858 gsi_remove (&gsi, true);
9861 /* Move the offloading region into CHILD_CFUN. */
9863 block = gimple_block (entry_stmt);
9865 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
9866 if (exit_bb)
9867 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
9868 /* When the OMP expansion process cannot guarantee an up-to-date
9869 loop tree arrange for the child function to fixup loops. */
9870 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
9871 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
9873 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
9874 num = vec_safe_length (child_cfun->local_decls);
9875 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
9877 t = (*child_cfun->local_decls)[srcidx];
9878 if (DECL_CONTEXT (t) == cfun->decl)
9879 continue;
9880 if (srcidx != dstidx)
9881 (*child_cfun->local_decls)[dstidx] = t;
9882 dstidx++;
9884 if (dstidx != num)
9885 vec_safe_truncate (child_cfun->local_decls, dstidx);
9887 /* Inform the callgraph about the new function. */
9888 child_cfun->curr_properties = cfun->curr_properties;
9889 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
9890 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
9891 cgraph_node *node = cgraph_node::get_create (child_fn);
9892 node->parallelized_function = 1;
9893 cgraph_node::add_new_function (child_fn, true);
9895 /* Add the new function to the offload table. */
9896 if (ENABLE_OFFLOADING)
9898 if (in_lto_p)
9899 DECL_PRESERVE_P (child_fn) = 1;
9900 vec_safe_push (offload_funcs, child_fn);
9903 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
9904 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
9906 /* Fix the callgraph edges for child_cfun. Those for cfun will be
9907 fixed in a following pass. */
9908 push_cfun (child_cfun);
9909 if (need_asm)
9910 assign_assembler_name_if_needed (child_fn);
9911 cgraph_edge::rebuild_edges ();
9913 /* Some EH regions might become dead, see PR34608. If
9914 pass_cleanup_cfg isn't the first pass to happen with the
9915 new child, these dead EH edges might cause problems.
9916 Clean them up now. */
9917 if (flag_exceptions)
9919 basic_block bb;
9920 bool changed = false;
9922 FOR_EACH_BB_FN (bb, cfun)
9923 changed |= gimple_purge_dead_eh_edges (bb);
9924 if (changed)
9925 cleanup_tree_cfg ();
9927 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
9928 verify_loop_structure ();
9929 pop_cfun ();
9931 if (dump_file && !gimple_in_ssa_p (cfun))
9933 omp_any_child_fn_dumped = true;
9934 dump_function_header (dump_file, child_fn, dump_flags);
9935 dump_function_to_file (child_fn, dump_file, dump_flags);
9938 adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn);
9941 /* Emit a library call to launch the offloading region, or do data
9942 transfers. */
9943 tree t1, t2, t3, t4, depend, c, clauses;
9944 enum built_in_function start_ix;
9945 unsigned int flags_i = 0;
9947 switch (gimple_omp_target_kind (entry_stmt))
9949 case GF_OMP_TARGET_KIND_REGION:
9950 start_ix = BUILT_IN_GOMP_TARGET;
9951 break;
9952 case GF_OMP_TARGET_KIND_DATA:
9953 start_ix = BUILT_IN_GOMP_TARGET_DATA;
9954 break;
9955 case GF_OMP_TARGET_KIND_UPDATE:
9956 start_ix = BUILT_IN_GOMP_TARGET_UPDATE;
9957 break;
9958 case GF_OMP_TARGET_KIND_ENTER_DATA:
9959 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
9960 break;
9961 case GF_OMP_TARGET_KIND_EXIT_DATA:
9962 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
9963 flags_i |= GOMP_TARGET_FLAG_EXIT_DATA;
9964 break;
9965 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
9966 case GF_OMP_TARGET_KIND_OACC_KERNELS:
9967 case GF_OMP_TARGET_KIND_OACC_SERIAL:
9968 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
9969 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
9970 start_ix = BUILT_IN_GOACC_PARALLEL;
9971 break;
9972 case GF_OMP_TARGET_KIND_OACC_DATA:
9973 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
9974 case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
9975 start_ix = BUILT_IN_GOACC_DATA_START;
9976 break;
9977 case GF_OMP_TARGET_KIND_OACC_UPDATE:
9978 start_ix = BUILT_IN_GOACC_UPDATE;
9979 break;
9980 case GF_OMP_TARGET_KIND_OACC_ENTER_DATA:
9981 start_ix = BUILT_IN_GOACC_ENTER_DATA;
9982 break;
9983 case GF_OMP_TARGET_KIND_OACC_EXIT_DATA:
9984 start_ix = BUILT_IN_GOACC_EXIT_DATA;
9985 break;
9986 case GF_OMP_TARGET_KIND_OACC_DECLARE:
9987 start_ix = BUILT_IN_GOACC_DECLARE;
9988 break;
9989 default:
9990 gcc_unreachable ();
9993 clauses = gimple_omp_target_clauses (entry_stmt);
9995 tree device = NULL_TREE;
9996 location_t device_loc = UNKNOWN_LOCATION;
9997 tree goacc_flags = NULL_TREE;
9998 if (is_gimple_omp_oacc (entry_stmt))
10000 /* By default, no GOACC_FLAGs are set. */
10001 goacc_flags = integer_zero_node;
10003 else
10005 c = omp_find_clause (clauses, OMP_CLAUSE_DEVICE);
10006 if (c)
10008 device = OMP_CLAUSE_DEVICE_ID (c);
10009 device_loc = OMP_CLAUSE_LOCATION (c);
10010 if (OMP_CLAUSE_DEVICE_ANCESTOR (c))
10011 sorry_at (device_loc, "%<ancestor%> not yet supported");
10013 else
10015 /* By default, the value of DEVICE is GOMP_DEVICE_ICV (let runtime
10016 library choose). */
10017 device = build_int_cst (integer_type_node, GOMP_DEVICE_ICV);
10018 device_loc = gimple_location (entry_stmt);
10021 c = omp_find_clause (clauses, OMP_CLAUSE_NOWAIT);
10022 /* FIXME: in_reduction(...) nowait is unimplemented yet, pretend
10023 nowait doesn't appear. */
10024 if (c && omp_find_clause (clauses, OMP_CLAUSE_IN_REDUCTION))
10025 c = NULL;
10026 if (c)
10027 flags_i |= GOMP_TARGET_FLAG_NOWAIT;
10030 /* By default, there is no conditional. */
10031 tree cond = NULL_TREE;
10032 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
10033 if (c)
10034 cond = OMP_CLAUSE_IF_EXPR (c);
10035 /* If we found the clause 'if (cond)', build:
10036 OpenACC: goacc_flags = (cond ? goacc_flags : flags | GOACC_FLAG_HOST_FALLBACK)
10037 OpenMP: device = (cond ? device : GOMP_DEVICE_HOST_FALLBACK) */
10038 if (cond)
10040 tree *tp;
10041 if (is_gimple_omp_oacc (entry_stmt))
10042 tp = &goacc_flags;
10043 else
10045 /* Ensure 'device' is of the correct type. */
10046 device = fold_convert_loc (device_loc, integer_type_node, device);
10048 tp = &device;
10051 cond = gimple_boolify (cond);
10053 basic_block cond_bb, then_bb, else_bb;
10054 edge e;
10055 tree tmp_var;
10057 tmp_var = create_tmp_var (TREE_TYPE (*tp));
10058 if (offloaded)
10059 e = split_block_after_labels (new_bb);
10060 else
10062 gsi = gsi_last_nondebug_bb (new_bb);
10063 gsi_prev (&gsi);
10064 e = split_block (new_bb, gsi_stmt (gsi));
10066 cond_bb = e->src;
10067 new_bb = e->dest;
10068 remove_edge (e);
10070 then_bb = create_empty_bb (cond_bb);
10071 else_bb = create_empty_bb (then_bb);
10072 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
10073 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
10075 stmt = gimple_build_cond_empty (cond);
10076 gsi = gsi_last_bb (cond_bb);
10077 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
10079 gsi = gsi_start_bb (then_bb);
10080 stmt = gimple_build_assign (tmp_var, *tp);
10081 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
10083 gsi = gsi_start_bb (else_bb);
10084 if (is_gimple_omp_oacc (entry_stmt))
10085 stmt = gimple_build_assign (tmp_var,
10086 BIT_IOR_EXPR,
10087 *tp,
10088 build_int_cst (integer_type_node,
10089 GOACC_FLAG_HOST_FALLBACK));
10090 else
10091 stmt = gimple_build_assign (tmp_var,
10092 build_int_cst (integer_type_node,
10093 GOMP_DEVICE_HOST_FALLBACK));
10094 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
10096 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
10097 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
10098 add_bb_to_loop (then_bb, cond_bb->loop_father);
10099 add_bb_to_loop (else_bb, cond_bb->loop_father);
10100 make_edge (then_bb, new_bb, EDGE_FALLTHRU);
10101 make_edge (else_bb, new_bb, EDGE_FALLTHRU);
10103 *tp = tmp_var;
10105 gsi = gsi_last_nondebug_bb (new_bb);
10107 else
10109 gsi = gsi_last_nondebug_bb (new_bb);
10111 if (device != NULL_TREE)
10112 device = force_gimple_operand_gsi (&gsi, device, true, NULL_TREE,
10113 true, GSI_SAME_STMT);
10116 t = gimple_omp_target_data_arg (entry_stmt);
10117 if (t == NULL)
10119 t1 = size_zero_node;
10120 t2 = build_zero_cst (ptr_type_node);
10121 t3 = t2;
10122 t4 = t2;
10124 else
10126 t1 = TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (TREE_VEC_ELT (t, 1))));
10127 t1 = size_binop (PLUS_EXPR, t1, size_int (1));
10128 t2 = build_fold_addr_expr (TREE_VEC_ELT (t, 0));
10129 t3 = build_fold_addr_expr (TREE_VEC_ELT (t, 1));
10130 t4 = build_fold_addr_expr (TREE_VEC_ELT (t, 2));
10133 gimple *g;
10134 bool tagging = false;
10135 /* The maximum number used by any start_ix, without varargs. */
10136 auto_vec<tree, 11> args;
10137 if (is_gimple_omp_oacc (entry_stmt))
10139 tree goacc_flags_m = fold_build1 (GOACC_FLAGS_MARSHAL_OP,
10140 TREE_TYPE (goacc_flags), goacc_flags);
10141 goacc_flags_m = force_gimple_operand_gsi (&gsi, goacc_flags_m, true,
10142 NULL_TREE, true,
10143 GSI_SAME_STMT);
10144 args.quick_push (goacc_flags_m);
10146 else
10147 args.quick_push (device);
10148 if (offloaded)
10149 args.quick_push (build_fold_addr_expr (child_fn));
10150 args.quick_push (t1);
10151 args.quick_push (t2);
10152 args.quick_push (t3);
10153 args.quick_push (t4);
10154 switch (start_ix)
10156 case BUILT_IN_GOACC_DATA_START:
10157 case BUILT_IN_GOACC_DECLARE:
10158 case BUILT_IN_GOMP_TARGET_DATA:
10159 break;
10160 case BUILT_IN_GOMP_TARGET:
10161 case BUILT_IN_GOMP_TARGET_UPDATE:
10162 case BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA:
10163 args.quick_push (build_int_cst (unsigned_type_node, flags_i));
10164 c = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
10165 if (c)
10166 depend = OMP_CLAUSE_DECL (c);
10167 else
10168 depend = build_int_cst (ptr_type_node, 0);
10169 args.quick_push (depend);
10170 if (start_ix == BUILT_IN_GOMP_TARGET)
10171 args.quick_push (get_target_arguments (&gsi, entry_stmt));
10172 break;
10173 case BUILT_IN_GOACC_PARALLEL:
10174 if (lookup_attribute ("oacc serial", DECL_ATTRIBUTES (child_fn)) != NULL)
10176 tree dims = NULL_TREE;
10177 unsigned int ix;
10179 /* For serial constructs we set all dimensions to 1. */
10180 for (ix = GOMP_DIM_MAX; ix--;)
10181 dims = tree_cons (NULL_TREE, integer_one_node, dims);
10182 oacc_replace_fn_attrib (child_fn, dims);
10184 else
10185 oacc_set_fn_attrib (child_fn, clauses, &args);
10186 tagging = true;
10187 /* FALLTHRU */
10188 case BUILT_IN_GOACC_ENTER_DATA:
10189 case BUILT_IN_GOACC_EXIT_DATA:
10190 case BUILT_IN_GOACC_UPDATE:
10192 tree t_async = NULL_TREE;
10194 /* If present, use the value specified by the respective
10195 clause, making sure that is of the correct type. */
10196 c = omp_find_clause (clauses, OMP_CLAUSE_ASYNC);
10197 if (c)
10198 t_async = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
10199 integer_type_node,
10200 OMP_CLAUSE_ASYNC_EXPR (c));
10201 else if (!tagging)
10202 /* Default values for t_async. */
10203 t_async = fold_convert_loc (gimple_location (entry_stmt),
10204 integer_type_node,
10205 build_int_cst (integer_type_node,
10206 GOMP_ASYNC_SYNC));
10207 if (tagging && t_async)
10209 unsigned HOST_WIDE_INT i_async = GOMP_LAUNCH_OP_MAX;
10211 if (TREE_CODE (t_async) == INTEGER_CST)
10213 /* See if we can pack the async arg in to the tag's
10214 operand. */
10215 i_async = TREE_INT_CST_LOW (t_async);
10216 if (i_async < GOMP_LAUNCH_OP_MAX)
10217 t_async = NULL_TREE;
10218 else
10219 i_async = GOMP_LAUNCH_OP_MAX;
10221 args.safe_push (oacc_launch_pack (GOMP_LAUNCH_ASYNC, NULL_TREE,
10222 i_async));
10224 if (t_async)
10225 args.safe_push (force_gimple_operand_gsi (&gsi, t_async, true,
10226 NULL_TREE, true,
10227 GSI_SAME_STMT));
10229 /* Save the argument index, and ... */
10230 unsigned t_wait_idx = args.length ();
10231 unsigned num_waits = 0;
10232 c = omp_find_clause (clauses, OMP_CLAUSE_WAIT);
10233 if (!tagging || c)
10234 /* ... push a placeholder. */
10235 args.safe_push (integer_zero_node);
10237 for (; c; c = OMP_CLAUSE_CHAIN (c))
10238 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_WAIT)
10240 tree arg = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
10241 integer_type_node,
10242 OMP_CLAUSE_WAIT_EXPR (c));
10243 arg = force_gimple_operand_gsi (&gsi, arg, true, NULL_TREE, true,
10244 GSI_SAME_STMT);
10245 args.safe_push (arg);
10246 num_waits++;
10249 if (!tagging || num_waits)
10251 tree len;
10253 /* Now that we know the number, update the placeholder. */
10254 if (tagging)
10255 len = oacc_launch_pack (GOMP_LAUNCH_WAIT, NULL_TREE, num_waits);
10256 else
10257 len = build_int_cst (integer_type_node, num_waits);
10258 len = fold_convert_loc (gimple_location (entry_stmt),
10259 unsigned_type_node, len);
10260 args[t_wait_idx] = len;
10263 break;
10264 default:
10265 gcc_unreachable ();
10267 if (tagging)
10268 /* Push terminal marker - zero. */
10269 args.safe_push (oacc_launch_pack (0, NULL_TREE, 0));
10271 g = gimple_build_call_vec (builtin_decl_explicit (start_ix), args);
10272 gimple_set_location (g, gimple_location (entry_stmt));
10273 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
10274 if (!offloaded)
10276 g = gsi_stmt (gsi);
10277 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_TARGET);
10278 gsi_remove (&gsi, true);
10282 /* Expand the parallel region tree rooted at REGION. Expansion
10283 proceeds in depth-first order. Innermost regions are expanded
10284 first. This way, parallel regions that require a new function to
10285 be created (e.g., GIMPLE_OMP_PARALLEL) can be expanded without having any
10286 internal dependencies in their body. */
10288 static void
10289 expand_omp (struct omp_region *region)
10291 omp_any_child_fn_dumped = false;
10292 while (region)
10294 location_t saved_location;
10295 gimple *inner_stmt = NULL;
10297 /* First, determine whether this is a combined parallel+workshare
10298 region. */
10299 if (region->type == GIMPLE_OMP_PARALLEL)
10300 determine_parallel_type (region);
10302 if (region->type == GIMPLE_OMP_FOR
10303 && gimple_omp_for_combined_p (last_stmt (region->entry)))
10304 inner_stmt = last_stmt (region->inner->entry);
10306 if (region->inner)
10307 expand_omp (region->inner);
10309 saved_location = input_location;
10310 if (gimple_has_location (last_stmt (region->entry)))
10311 input_location = gimple_location (last_stmt (region->entry));
10313 switch (region->type)
10315 case GIMPLE_OMP_PARALLEL:
10316 case GIMPLE_OMP_TASK:
10317 expand_omp_taskreg (region);
10318 break;
10320 case GIMPLE_OMP_FOR:
10321 expand_omp_for (region, inner_stmt);
10322 break;
10324 case GIMPLE_OMP_SECTIONS:
10325 expand_omp_sections (region);
10326 break;
10328 case GIMPLE_OMP_SECTION:
10329 /* Individual omp sections are handled together with their
10330 parent GIMPLE_OMP_SECTIONS region. */
10331 break;
10333 case GIMPLE_OMP_SINGLE:
10334 case GIMPLE_OMP_SCOPE:
10335 expand_omp_single (region);
10336 break;
10338 case GIMPLE_OMP_ORDERED:
10340 gomp_ordered *ord_stmt
10341 = as_a <gomp_ordered *> (last_stmt (region->entry));
10342 if (omp_find_clause (gimple_omp_ordered_clauses (ord_stmt),
10343 OMP_CLAUSE_DEPEND))
10345 /* We'll expand these when expanding corresponding
10346 worksharing region with ordered(n) clause. */
10347 gcc_assert (region->outer
10348 && region->outer->type == GIMPLE_OMP_FOR);
10349 region->ord_stmt = ord_stmt;
10350 break;
10353 /* FALLTHRU */
10354 case GIMPLE_OMP_MASTER:
10355 case GIMPLE_OMP_MASKED:
10356 case GIMPLE_OMP_TASKGROUP:
10357 case GIMPLE_OMP_CRITICAL:
10358 case GIMPLE_OMP_TEAMS:
10359 expand_omp_synch (region);
10360 break;
10362 case GIMPLE_OMP_ATOMIC_LOAD:
10363 expand_omp_atomic (region);
10364 break;
10366 case GIMPLE_OMP_TARGET:
10367 expand_omp_target (region);
10368 break;
10370 default:
10371 gcc_unreachable ();
10374 input_location = saved_location;
10375 region = region->next;
10377 if (omp_any_child_fn_dumped)
10379 if (dump_file)
10380 dump_function_header (dump_file, current_function_decl, dump_flags);
10381 omp_any_child_fn_dumped = false;
10385 /* Helper for build_omp_regions. Scan the dominator tree starting at
10386 block BB. PARENT is the region that contains BB. If SINGLE_TREE is
10387 true, the function ends once a single tree is built (otherwise, whole
10388 forest of OMP constructs may be built). */
10390 static void
10391 build_omp_regions_1 (basic_block bb, struct omp_region *parent,
10392 bool single_tree)
10394 gimple_stmt_iterator gsi;
10395 gimple *stmt;
10396 basic_block son;
10398 gsi = gsi_last_nondebug_bb (bb);
10399 if (!gsi_end_p (gsi) && is_gimple_omp (gsi_stmt (gsi)))
10401 struct omp_region *region;
10402 enum gimple_code code;
10404 stmt = gsi_stmt (gsi);
10405 code = gimple_code (stmt);
10406 if (code == GIMPLE_OMP_RETURN)
10408 /* STMT is the return point out of region PARENT. Mark it
10409 as the exit point and make PARENT the immediately
10410 enclosing region. */
10411 gcc_assert (parent);
10412 region = parent;
10413 region->exit = bb;
10414 parent = parent->outer;
10416 else if (code == GIMPLE_OMP_ATOMIC_STORE)
10418 /* GIMPLE_OMP_ATOMIC_STORE is analogous to
10419 GIMPLE_OMP_RETURN, but matches with
10420 GIMPLE_OMP_ATOMIC_LOAD. */
10421 gcc_assert (parent);
10422 gcc_assert (parent->type == GIMPLE_OMP_ATOMIC_LOAD);
10423 region = parent;
10424 region->exit = bb;
10425 parent = parent->outer;
10427 else if (code == GIMPLE_OMP_CONTINUE)
10429 gcc_assert (parent);
10430 parent->cont = bb;
10432 else if (code == GIMPLE_OMP_SECTIONS_SWITCH)
10434 /* GIMPLE_OMP_SECTIONS_SWITCH is part of
10435 GIMPLE_OMP_SECTIONS, and we do nothing for it. */
10437 else
10439 region = new_omp_region (bb, code, parent);
10440 /* Otherwise... */
10441 if (code == GIMPLE_OMP_TARGET)
10443 switch (gimple_omp_target_kind (stmt))
10445 case GF_OMP_TARGET_KIND_REGION:
10446 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
10447 case GF_OMP_TARGET_KIND_OACC_KERNELS:
10448 case GF_OMP_TARGET_KIND_OACC_SERIAL:
10449 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
10450 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
10451 break;
10452 case GF_OMP_TARGET_KIND_UPDATE:
10453 case GF_OMP_TARGET_KIND_ENTER_DATA:
10454 case GF_OMP_TARGET_KIND_EXIT_DATA:
10455 case GF_OMP_TARGET_KIND_DATA:
10456 case GF_OMP_TARGET_KIND_OACC_DATA:
10457 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
10458 case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
10459 case GF_OMP_TARGET_KIND_OACC_UPDATE:
10460 case GF_OMP_TARGET_KIND_OACC_ENTER_DATA:
10461 case GF_OMP_TARGET_KIND_OACC_EXIT_DATA:
10462 case GF_OMP_TARGET_KIND_OACC_DECLARE:
10463 /* ..., other than for those stand-alone directives... */
10464 region = NULL;
10465 break;
10466 default:
10467 gcc_unreachable ();
10470 else if (code == GIMPLE_OMP_ORDERED
10471 && omp_find_clause (gimple_omp_ordered_clauses
10472 (as_a <gomp_ordered *> (stmt)),
10473 OMP_CLAUSE_DEPEND))
10474 /* #pragma omp ordered depend is also just a stand-alone
10475 directive. */
10476 region = NULL;
10477 else if (code == GIMPLE_OMP_TASK
10478 && gimple_omp_task_taskwait_p (stmt))
10479 /* #pragma omp taskwait depend(...) is a stand-alone directive. */
10480 region = NULL;
10481 /* ..., this directive becomes the parent for a new region. */
10482 if (region)
10483 parent = region;
10487 if (single_tree && !parent)
10488 return;
10490 for (son = first_dom_son (CDI_DOMINATORS, bb);
10491 son;
10492 son = next_dom_son (CDI_DOMINATORS, son))
10493 build_omp_regions_1 (son, parent, single_tree);
10496 /* Builds the tree of OMP regions rooted at ROOT, storing it to
10497 root_omp_region. */
10499 static void
10500 build_omp_regions_root (basic_block root)
10502 gcc_assert (root_omp_region == NULL);
10503 build_omp_regions_1 (root, NULL, true);
10504 gcc_assert (root_omp_region != NULL);
10507 /* Expands omp construct (and its subconstructs) starting in HEAD. */
10509 void
10510 omp_expand_local (basic_block head)
10512 build_omp_regions_root (head);
10513 if (dump_file && (dump_flags & TDF_DETAILS))
10515 fprintf (dump_file, "\nOMP region tree\n\n");
10516 dump_omp_region (dump_file, root_omp_region, 0);
10517 fprintf (dump_file, "\n");
10520 remove_exit_barriers (root_omp_region);
10521 expand_omp (root_omp_region);
10523 omp_free_regions ();
10526 /* Scan the CFG and build a tree of OMP regions. Return the root of
10527 the OMP region tree. */
10529 static void
10530 build_omp_regions (void)
10532 gcc_assert (root_omp_region == NULL);
10533 calculate_dominance_info (CDI_DOMINATORS);
10534 build_omp_regions_1 (ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, false);
10537 /* Main entry point for expanding OMP-GIMPLE into runtime calls. */
10539 static unsigned int
10540 execute_expand_omp (void)
10542 build_omp_regions ();
10544 if (!root_omp_region)
10545 return 0;
10547 if (dump_file)
10549 fprintf (dump_file, "\nOMP region tree\n\n");
10550 dump_omp_region (dump_file, root_omp_region, 0);
10551 fprintf (dump_file, "\n");
10554 remove_exit_barriers (root_omp_region);
10556 expand_omp (root_omp_region);
10558 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
10559 verify_loop_structure ();
10560 cleanup_tree_cfg ();
10562 omp_free_regions ();
10564 return 0;
10567 /* OMP expansion -- the default pass, run before creation of SSA form. */
10569 namespace {
10571 const pass_data pass_data_expand_omp =
10573 GIMPLE_PASS, /* type */
10574 "ompexp", /* name */
10575 OPTGROUP_OMP, /* optinfo_flags */
10576 TV_NONE, /* tv_id */
10577 PROP_gimple_any, /* properties_required */
10578 PROP_gimple_eomp, /* properties_provided */
10579 0, /* properties_destroyed */
10580 0, /* todo_flags_start */
10581 0, /* todo_flags_finish */
10584 class pass_expand_omp : public gimple_opt_pass
10586 public:
10587 pass_expand_omp (gcc::context *ctxt)
10588 : gimple_opt_pass (pass_data_expand_omp, ctxt)
10591 /* opt_pass methods: */
10592 virtual unsigned int execute (function *)
10594 bool gate = ((flag_openacc != 0 || flag_openmp != 0
10595 || flag_openmp_simd != 0)
10596 && !seen_error ());
10598 /* This pass always runs, to provide PROP_gimple_eomp.
10599 But often, there is nothing to do. */
10600 if (!gate)
10601 return 0;
10603 return execute_expand_omp ();
10606 }; // class pass_expand_omp
10608 } // anon namespace
10610 gimple_opt_pass *
10611 make_pass_expand_omp (gcc::context *ctxt)
10613 return new pass_expand_omp (ctxt);
10616 namespace {
10618 const pass_data pass_data_expand_omp_ssa =
10620 GIMPLE_PASS, /* type */
10621 "ompexpssa", /* name */
10622 OPTGROUP_OMP, /* optinfo_flags */
10623 TV_NONE, /* tv_id */
10624 PROP_cfg | PROP_ssa, /* properties_required */
10625 PROP_gimple_eomp, /* properties_provided */
10626 0, /* properties_destroyed */
10627 0, /* todo_flags_start */
10628 TODO_cleanup_cfg | TODO_rebuild_alias, /* todo_flags_finish */
10631 class pass_expand_omp_ssa : public gimple_opt_pass
10633 public:
10634 pass_expand_omp_ssa (gcc::context *ctxt)
10635 : gimple_opt_pass (pass_data_expand_omp_ssa, ctxt)
10638 /* opt_pass methods: */
10639 virtual bool gate (function *fun)
10641 return !(fun->curr_properties & PROP_gimple_eomp);
10643 virtual unsigned int execute (function *) { return execute_expand_omp (); }
10644 opt_pass * clone () { return new pass_expand_omp_ssa (m_ctxt); }
10646 }; // class pass_expand_omp_ssa
10648 } // anon namespace
10650 gimple_opt_pass *
10651 make_pass_expand_omp_ssa (gcc::context *ctxt)
10653 return new pass_expand_omp_ssa (ctxt);
10656 /* Called from tree-cfg.c::make_edges to create cfg edges for all relevant
10657 GIMPLE_* codes. */
10659 bool
10660 omp_make_gimple_edges (basic_block bb, struct omp_region **region,
10661 int *region_idx)
10663 gimple *last = last_stmt (bb);
10664 enum gimple_code code = gimple_code (last);
10665 struct omp_region *cur_region = *region;
10666 bool fallthru = false;
10668 switch (code)
10670 case GIMPLE_OMP_PARALLEL:
10671 case GIMPLE_OMP_FOR:
10672 case GIMPLE_OMP_SINGLE:
10673 case GIMPLE_OMP_TEAMS:
10674 case GIMPLE_OMP_MASTER:
10675 case GIMPLE_OMP_MASKED:
10676 case GIMPLE_OMP_SCOPE:
10677 case GIMPLE_OMP_TASKGROUP:
10678 case GIMPLE_OMP_CRITICAL:
10679 case GIMPLE_OMP_SECTION:
10680 cur_region = new_omp_region (bb, code, cur_region);
10681 fallthru = true;
10682 break;
10684 case GIMPLE_OMP_TASK:
10685 cur_region = new_omp_region (bb, code, cur_region);
10686 fallthru = true;
10687 if (gimple_omp_task_taskwait_p (last))
10688 cur_region = cur_region->outer;
10689 break;
10691 case GIMPLE_OMP_ORDERED:
10692 cur_region = new_omp_region (bb, code, cur_region);
10693 fallthru = true;
10694 if (omp_find_clause (gimple_omp_ordered_clauses
10695 (as_a <gomp_ordered *> (last)),
10696 OMP_CLAUSE_DEPEND))
10697 cur_region = cur_region->outer;
10698 break;
10700 case GIMPLE_OMP_TARGET:
10701 cur_region = new_omp_region (bb, code, cur_region);
10702 fallthru = true;
10703 switch (gimple_omp_target_kind (last))
10705 case GF_OMP_TARGET_KIND_REGION:
10706 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
10707 case GF_OMP_TARGET_KIND_OACC_KERNELS:
10708 case GF_OMP_TARGET_KIND_OACC_SERIAL:
10709 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
10710 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
10711 break;
10712 case GF_OMP_TARGET_KIND_UPDATE:
10713 case GF_OMP_TARGET_KIND_ENTER_DATA:
10714 case GF_OMP_TARGET_KIND_EXIT_DATA:
10715 case GF_OMP_TARGET_KIND_DATA:
10716 case GF_OMP_TARGET_KIND_OACC_DATA:
10717 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
10718 case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
10719 case GF_OMP_TARGET_KIND_OACC_UPDATE:
10720 case GF_OMP_TARGET_KIND_OACC_ENTER_DATA:
10721 case GF_OMP_TARGET_KIND_OACC_EXIT_DATA:
10722 case GF_OMP_TARGET_KIND_OACC_DECLARE:
10723 cur_region = cur_region->outer;
10724 break;
10725 default:
10726 gcc_unreachable ();
10728 break;
10730 case GIMPLE_OMP_SECTIONS:
10731 cur_region = new_omp_region (bb, code, cur_region);
10732 fallthru = true;
10733 break;
10735 case GIMPLE_OMP_SECTIONS_SWITCH:
10736 fallthru = false;
10737 break;
10739 case GIMPLE_OMP_ATOMIC_LOAD:
10740 case GIMPLE_OMP_ATOMIC_STORE:
10741 fallthru = true;
10742 break;
10744 case GIMPLE_OMP_RETURN:
10745 /* In the case of a GIMPLE_OMP_SECTION, the edge will go
10746 somewhere other than the next block. This will be
10747 created later. */
10748 cur_region->exit = bb;
10749 if (cur_region->type == GIMPLE_OMP_TASK)
10750 /* Add an edge corresponding to not scheduling the task
10751 immediately. */
10752 make_edge (cur_region->entry, bb, EDGE_ABNORMAL);
10753 fallthru = cur_region->type != GIMPLE_OMP_SECTION;
10754 cur_region = cur_region->outer;
10755 break;
10757 case GIMPLE_OMP_CONTINUE:
10758 cur_region->cont = bb;
10759 switch (cur_region->type)
10761 case GIMPLE_OMP_FOR:
10762 /* Mark all GIMPLE_OMP_FOR and GIMPLE_OMP_CONTINUE
10763 succs edges as abnormal to prevent splitting
10764 them. */
10765 single_succ_edge (cur_region->entry)->flags |= EDGE_ABNORMAL;
10766 /* Make the loopback edge. */
10767 make_edge (bb, single_succ (cur_region->entry),
10768 EDGE_ABNORMAL);
10770 /* Create an edge from GIMPLE_OMP_FOR to exit, which
10771 corresponds to the case that the body of the loop
10772 is not executed at all. */
10773 make_edge (cur_region->entry, bb->next_bb, EDGE_ABNORMAL);
10774 make_edge (bb, bb->next_bb, EDGE_FALLTHRU | EDGE_ABNORMAL);
10775 fallthru = false;
10776 break;
10778 case GIMPLE_OMP_SECTIONS:
10779 /* Wire up the edges into and out of the nested sections. */
10781 basic_block switch_bb = single_succ (cur_region->entry);
10783 struct omp_region *i;
10784 for (i = cur_region->inner; i ; i = i->next)
10786 gcc_assert (i->type == GIMPLE_OMP_SECTION);
10787 make_edge (switch_bb, i->entry, 0);
10788 make_edge (i->exit, bb, EDGE_FALLTHRU);
10791 /* Make the loopback edge to the block with
10792 GIMPLE_OMP_SECTIONS_SWITCH. */
10793 make_edge (bb, switch_bb, 0);
10795 /* Make the edge from the switch to exit. */
10796 make_edge (switch_bb, bb->next_bb, 0);
10797 fallthru = false;
10799 break;
10801 case GIMPLE_OMP_TASK:
10802 fallthru = true;
10803 break;
10805 default:
10806 gcc_unreachable ();
10808 break;
10810 default:
10811 gcc_unreachable ();
10814 if (*region != cur_region)
10816 *region = cur_region;
10817 if (cur_region)
10818 *region_idx = cur_region->entry->index;
10819 else
10820 *region_idx = 0;
10823 return fallthru;