1 /* Expansion pass for OMP directives. Outlines regions of certain OMP
2 directives to separate functions, converts others into explicit calls to the
3 runtime library (libgomp) and so forth
5 Copyright (C) 2005-2018 Free Software Foundation, Inc.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
33 #include "tree-pass.h"
37 #include "pretty-print.h"
38 #include "diagnostic-core.h"
39 #include "fold-const.h"
40 #include "stor-layout.h"
42 #include "internal-fn.h"
44 #include "gimple-iterator.h"
45 #include "gimplify-me.h"
46 #include "gimple-walk.h"
48 #include "tree-into-ssa.h"
50 #include "splay-tree.h"
52 #include "omp-general.h"
53 #include "omp-offload.h"
54 #include "tree-cfgcleanup.h"
55 #include "symbol-summary.h"
56 #include "gomp-constants.h"
57 #include "gimple-pretty-print.h"
58 #include "hsa-common.h"
59 #include "stringpool.h"
62 /* OMP region information. Every parallel and workshare
63 directive is enclosed between two markers, the OMP_* directive
64 and a corresponding GIMPLE_OMP_RETURN statement. */
68 /* The enclosing region. */
69 struct omp_region
*outer
;
71 /* First child region. */
72 struct omp_region
*inner
;
74 /* Next peer region. */
75 struct omp_region
*next
;
77 /* Block containing the omp directive as its last stmt. */
80 /* Block containing the GIMPLE_OMP_RETURN as its last stmt. */
83 /* Block containing the GIMPLE_OMP_CONTINUE as its last stmt. */
86 /* If this is a combined parallel+workshare region, this is a list
87 of additional arguments needed by the combined parallel+workshare
89 vec
<tree
, va_gc
> *ws_args
;
91 /* The code for the omp directive of this region. */
92 enum gimple_code type
;
94 /* Schedule kind, only used for GIMPLE_OMP_FOR type regions. */
95 enum omp_clause_schedule_kind sched_kind
;
97 /* Schedule modifiers. */
98 unsigned char sched_modifiers
;
100 /* True if this is a combined parallel+workshare region. */
101 bool is_combined_parallel
;
103 /* The ordered stmt if type is GIMPLE_OMP_ORDERED and it has
105 gomp_ordered
*ord_stmt
;
108 static struct omp_region
*root_omp_region
;
109 static bool omp_any_child_fn_dumped
;
111 static void expand_omp_build_assign (gimple_stmt_iterator
*, tree
, tree
,
113 static gphi
*find_phi_with_arg_on_edge (tree
, edge
);
114 static void expand_omp (struct omp_region
*region
);
116 /* Return true if REGION is a combined parallel+workshare region. */
119 is_combined_parallel (struct omp_region
*region
)
121 return region
->is_combined_parallel
;
124 /* Given two blocks PAR_ENTRY_BB and WS_ENTRY_BB such that WS_ENTRY_BB
125 is the immediate dominator of PAR_ENTRY_BB, return true if there
126 are no data dependencies that would prevent expanding the parallel
127 directive at PAR_ENTRY_BB as a combined parallel+workshare region.
129 When expanding a combined parallel+workshare region, the call to
130 the child function may need additional arguments in the case of
131 GIMPLE_OMP_FOR regions. In some cases, these arguments are
132 computed out of variables passed in from the parent to the child
133 via 'struct .omp_data_s'. For instance:
135 #pragma omp parallel for schedule (guided, i * 4)
140 # BLOCK 2 (PAR_ENTRY_BB)
142 #pragma omp parallel [child fn: bar.omp_fn.0 ( ..., D.1598)
144 # BLOCK 3 (WS_ENTRY_BB)
145 .omp_data_i = &.omp_data_o;
146 D.1667 = .omp_data_i->i;
148 #pragma omp for schedule (guided, D.1598)
150 When we outline the parallel region, the call to the child function
151 'bar.omp_fn.0' will need the value D.1598 in its argument list, but
152 that value is computed *after* the call site. So, in principle we
153 cannot do the transformation.
155 To see whether the code in WS_ENTRY_BB blocks the combined
156 parallel+workshare call, we collect all the variables used in the
157 GIMPLE_OMP_FOR header check whether they appear on the LHS of any
158 statement in WS_ENTRY_BB. If so, then we cannot emit the combined
161 FIXME. If we had the SSA form built at this point, we could merely
162 hoist the code in block 3 into block 2 and be done with it. But at
163 this point we don't have dataflow information and though we could
164 hack something up here, it is really not worth the aggravation. */
167 workshare_safe_to_combine_p (basic_block ws_entry_bb
)
169 struct omp_for_data fd
;
170 gimple
*ws_stmt
= last_stmt (ws_entry_bb
);
172 if (gimple_code (ws_stmt
) == GIMPLE_OMP_SECTIONS
)
175 gcc_assert (gimple_code (ws_stmt
) == GIMPLE_OMP_FOR
);
177 omp_extract_for_data (as_a
<gomp_for
*> (ws_stmt
), &fd
, NULL
);
179 if (fd
.collapse
> 1 && TREE_CODE (fd
.loop
.n2
) != INTEGER_CST
)
181 if (fd
.iter_type
!= long_integer_type_node
)
184 /* FIXME. We give up too easily here. If any of these arguments
185 are not constants, they will likely involve variables that have
186 been mapped into fields of .omp_data_s for sharing with the child
187 function. With appropriate data flow, it would be possible to
189 if (!is_gimple_min_invariant (fd
.loop
.n1
)
190 || !is_gimple_min_invariant (fd
.loop
.n2
)
191 || !is_gimple_min_invariant (fd
.loop
.step
)
192 || (fd
.chunk_size
&& !is_gimple_min_invariant (fd
.chunk_size
)))
198 /* Adjust CHUNK_SIZE from SCHEDULE clause, depending on simd modifier
199 presence (SIMD_SCHEDULE). */
202 omp_adjust_chunk_size (tree chunk_size
, bool simd_schedule
)
207 poly_uint64 vf
= omp_max_vf ();
208 if (known_eq (vf
, 1U))
211 tree type
= TREE_TYPE (chunk_size
);
212 chunk_size
= fold_build2 (PLUS_EXPR
, type
, chunk_size
,
213 build_int_cst (type
, vf
- 1));
214 return fold_build2 (BIT_AND_EXPR
, type
, chunk_size
,
215 build_int_cst (type
, -vf
));
218 /* Collect additional arguments needed to emit a combined
219 parallel+workshare call. WS_STMT is the workshare directive being
222 static vec
<tree
, va_gc
> *
223 get_ws_args_for (gimple
*par_stmt
, gimple
*ws_stmt
)
226 location_t loc
= gimple_location (ws_stmt
);
227 vec
<tree
, va_gc
> *ws_args
;
229 if (gomp_for
*for_stmt
= dyn_cast
<gomp_for
*> (ws_stmt
))
231 struct omp_for_data fd
;
234 omp_extract_for_data (for_stmt
, &fd
, NULL
);
238 if (gimple_omp_for_combined_into_p (for_stmt
))
241 = omp_find_clause (gimple_omp_parallel_clauses (par_stmt
),
242 OMP_CLAUSE__LOOPTEMP_
);
244 n1
= OMP_CLAUSE_DECL (innerc
);
245 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
246 OMP_CLAUSE__LOOPTEMP_
);
248 n2
= OMP_CLAUSE_DECL (innerc
);
251 vec_alloc (ws_args
, 3 + (fd
.chunk_size
!= 0));
253 t
= fold_convert_loc (loc
, long_integer_type_node
, n1
);
254 ws_args
->quick_push (t
);
256 t
= fold_convert_loc (loc
, long_integer_type_node
, n2
);
257 ws_args
->quick_push (t
);
259 t
= fold_convert_loc (loc
, long_integer_type_node
, fd
.loop
.step
);
260 ws_args
->quick_push (t
);
264 t
= fold_convert_loc (loc
, long_integer_type_node
, fd
.chunk_size
);
265 t
= omp_adjust_chunk_size (t
, fd
.simd_schedule
);
266 ws_args
->quick_push (t
);
271 else if (gimple_code (ws_stmt
) == GIMPLE_OMP_SECTIONS
)
273 /* Number of sections is equal to the number of edges from the
274 GIMPLE_OMP_SECTIONS_SWITCH statement, except for the one to
275 the exit of the sections region. */
276 basic_block bb
= single_succ (gimple_bb (ws_stmt
));
277 t
= build_int_cst (unsigned_type_node
, EDGE_COUNT (bb
->succs
) - 1);
278 vec_alloc (ws_args
, 1);
279 ws_args
->quick_push (t
);
286 /* Discover whether REGION is a combined parallel+workshare region. */
289 determine_parallel_type (struct omp_region
*region
)
291 basic_block par_entry_bb
, par_exit_bb
;
292 basic_block ws_entry_bb
, ws_exit_bb
;
294 if (region
== NULL
|| region
->inner
== NULL
295 || region
->exit
== NULL
|| region
->inner
->exit
== NULL
296 || region
->inner
->cont
== NULL
)
299 /* We only support parallel+for and parallel+sections. */
300 if (region
->type
!= GIMPLE_OMP_PARALLEL
301 || (region
->inner
->type
!= GIMPLE_OMP_FOR
302 && region
->inner
->type
!= GIMPLE_OMP_SECTIONS
))
305 /* Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and
306 WS_EXIT_BB -> PAR_EXIT_BB. */
307 par_entry_bb
= region
->entry
;
308 par_exit_bb
= region
->exit
;
309 ws_entry_bb
= region
->inner
->entry
;
310 ws_exit_bb
= region
->inner
->exit
;
312 if (single_succ (par_entry_bb
) == ws_entry_bb
313 && single_succ (ws_exit_bb
) == par_exit_bb
314 && workshare_safe_to_combine_p (ws_entry_bb
)
315 && (gimple_omp_parallel_combined_p (last_stmt (par_entry_bb
))
316 || (last_and_only_stmt (ws_entry_bb
)
317 && last_and_only_stmt (par_exit_bb
))))
319 gimple
*par_stmt
= last_stmt (par_entry_bb
);
320 gimple
*ws_stmt
= last_stmt (ws_entry_bb
);
322 if (region
->inner
->type
== GIMPLE_OMP_FOR
)
324 /* If this is a combined parallel loop, we need to determine
325 whether or not to use the combined library calls. There
326 are two cases where we do not apply the transformation:
327 static loops and any kind of ordered loop. In the first
328 case, we already open code the loop so there is no need
329 to do anything else. In the latter case, the combined
330 parallel loop call would still need extra synchronization
331 to implement ordered semantics, so there would not be any
332 gain in using the combined call. */
333 tree clauses
= gimple_omp_for_clauses (ws_stmt
);
334 tree c
= omp_find_clause (clauses
, OMP_CLAUSE_SCHEDULE
);
336 || ((OMP_CLAUSE_SCHEDULE_KIND (c
) & OMP_CLAUSE_SCHEDULE_MASK
)
337 == OMP_CLAUSE_SCHEDULE_STATIC
)
338 || omp_find_clause (clauses
, OMP_CLAUSE_ORDERED
))
340 region
->is_combined_parallel
= false;
341 region
->inner
->is_combined_parallel
= false;
346 region
->is_combined_parallel
= true;
347 region
->inner
->is_combined_parallel
= true;
348 region
->ws_args
= get_ws_args_for (par_stmt
, ws_stmt
);
352 /* Debugging dumps for parallel regions. */
353 void dump_omp_region (FILE *, struct omp_region
*, int);
354 void debug_omp_region (struct omp_region
*);
355 void debug_all_omp_regions (void);
357 /* Dump the parallel region tree rooted at REGION. */
360 dump_omp_region (FILE *file
, struct omp_region
*region
, int indent
)
362 fprintf (file
, "%*sbb %d: %s\n", indent
, "", region
->entry
->index
,
363 gimple_code_name
[region
->type
]);
366 dump_omp_region (file
, region
->inner
, indent
+ 4);
370 fprintf (file
, "%*sbb %d: GIMPLE_OMP_CONTINUE\n", indent
, "",
371 region
->cont
->index
);
375 fprintf (file
, "%*sbb %d: GIMPLE_OMP_RETURN\n", indent
, "",
376 region
->exit
->index
);
378 fprintf (file
, "%*s[no exit marker]\n", indent
, "");
381 dump_omp_region (file
, region
->next
, indent
);
385 debug_omp_region (struct omp_region
*region
)
387 dump_omp_region (stderr
, region
, 0);
391 debug_all_omp_regions (void)
393 dump_omp_region (stderr
, root_omp_region
, 0);
396 /* Create a new parallel region starting at STMT inside region PARENT. */
398 static struct omp_region
*
399 new_omp_region (basic_block bb
, enum gimple_code type
,
400 struct omp_region
*parent
)
402 struct omp_region
*region
= XCNEW (struct omp_region
);
404 region
->outer
= parent
;
410 /* This is a nested region. Add it to the list of inner
411 regions in PARENT. */
412 region
->next
= parent
->inner
;
413 parent
->inner
= region
;
417 /* This is a toplevel region. Add it to the list of toplevel
418 regions in ROOT_OMP_REGION. */
419 region
->next
= root_omp_region
;
420 root_omp_region
= region
;
426 /* Release the memory associated with the region tree rooted at REGION. */
429 free_omp_region_1 (struct omp_region
*region
)
431 struct omp_region
*i
, *n
;
433 for (i
= region
->inner
; i
; i
= n
)
436 free_omp_region_1 (i
);
442 /* Release the memory for the entire omp region tree. */
445 omp_free_regions (void)
447 struct omp_region
*r
, *n
;
448 for (r
= root_omp_region
; r
; r
= n
)
451 free_omp_region_1 (r
);
453 root_omp_region
= NULL
;
456 /* A convenience function to build an empty GIMPLE_COND with just the
460 gimple_build_cond_empty (tree cond
)
462 enum tree_code pred_code
;
465 gimple_cond_get_ops_from_tree (cond
, &pred_code
, &lhs
, &rhs
);
466 return gimple_build_cond (pred_code
, lhs
, rhs
, NULL_TREE
, NULL_TREE
);
469 /* Return true if a parallel REGION is within a declare target function or
470 within a target region and is not a part of a gridified target. */
473 parallel_needs_hsa_kernel_p (struct omp_region
*region
)
475 bool indirect
= false;
476 for (region
= region
->outer
; region
; region
= region
->outer
)
478 if (region
->type
== GIMPLE_OMP_PARALLEL
)
480 else if (region
->type
== GIMPLE_OMP_TARGET
)
482 gomp_target
*tgt_stmt
483 = as_a
<gomp_target
*> (last_stmt (region
->entry
));
485 if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt
),
486 OMP_CLAUSE__GRIDDIM_
))
493 if (lookup_attribute ("omp declare target",
494 DECL_ATTRIBUTES (current_function_decl
)))
500 /* Change DECL_CONTEXT of CHILD_FNDECL to that of the parent function.
501 Add CHILD_FNDECL to decl chain of the supercontext of the block
502 ENTRY_BLOCK - this is the block which originally contained the
503 code from which CHILD_FNDECL was created.
505 Together, these actions ensure that the debug info for the outlined
506 function will be emitted with the correct lexical scope. */
509 adjust_context_and_scope (struct omp_region
*region
, tree entry_block
,
512 tree parent_fndecl
= NULL_TREE
;
514 /* OMP expansion expands inner regions before outer ones, so if
515 we e.g. have explicit task region nested in parallel region, when
516 expanding the task region current_function_decl will be the original
517 source function, but we actually want to use as context the child
518 function of the parallel. */
519 for (region
= region
->outer
;
520 region
&& parent_fndecl
== NULL_TREE
; region
= region
->outer
)
521 switch (region
->type
)
523 case GIMPLE_OMP_PARALLEL
:
524 case GIMPLE_OMP_TASK
:
525 entry_stmt
= last_stmt (region
->entry
);
526 parent_fndecl
= gimple_omp_taskreg_child_fn (entry_stmt
);
528 case GIMPLE_OMP_TARGET
:
529 entry_stmt
= last_stmt (region
->entry
);
531 = gimple_omp_target_child_fn (as_a
<gomp_target
*> (entry_stmt
));
537 if (parent_fndecl
== NULL_TREE
)
538 parent_fndecl
= current_function_decl
;
539 DECL_CONTEXT (child_fndecl
) = parent_fndecl
;
541 if (entry_block
!= NULL_TREE
&& TREE_CODE (entry_block
) == BLOCK
)
543 tree b
= BLOCK_SUPERCONTEXT (entry_block
);
544 if (TREE_CODE (b
) == BLOCK
)
546 DECL_CHAIN (child_fndecl
) = BLOCK_VARS (b
);
547 BLOCK_VARS (b
) = child_fndecl
;
552 /* Build the function calls to GOMP_parallel_start etc to actually
553 generate the parallel operation. REGION is the parallel region
554 being expanded. BB is the block where to insert the code. WS_ARGS
555 will be set if this is a call to a combined parallel+workshare
556 construct, it contains the list of additional arguments needed by
557 the workshare construct. */
560 expand_parallel_call (struct omp_region
*region
, basic_block bb
,
561 gomp_parallel
*entry_stmt
,
562 vec
<tree
, va_gc
> *ws_args
)
564 tree t
, t1
, t2
, val
, cond
, c
, clauses
, flags
;
565 gimple_stmt_iterator gsi
;
567 enum built_in_function start_ix
;
569 location_t clause_loc
;
570 vec
<tree
, va_gc
> *args
;
572 clauses
= gimple_omp_parallel_clauses (entry_stmt
);
574 /* Determine what flavor of GOMP_parallel we will be
576 start_ix
= BUILT_IN_GOMP_PARALLEL
;
577 if (is_combined_parallel (region
))
579 switch (region
->inner
->type
)
582 gcc_assert (region
->inner
->sched_kind
!= OMP_CLAUSE_SCHEDULE_AUTO
);
583 switch (region
->inner
->sched_kind
)
585 case OMP_CLAUSE_SCHEDULE_RUNTIME
:
588 case OMP_CLAUSE_SCHEDULE_DYNAMIC
:
589 case OMP_CLAUSE_SCHEDULE_GUIDED
:
590 if (region
->inner
->sched_modifiers
591 & OMP_CLAUSE_SCHEDULE_NONMONOTONIC
)
593 start_ix2
= 3 + region
->inner
->sched_kind
;
598 start_ix2
= region
->inner
->sched_kind
;
601 start_ix2
+= (int) BUILT_IN_GOMP_PARALLEL_LOOP_STATIC
;
602 start_ix
= (enum built_in_function
) start_ix2
;
604 case GIMPLE_OMP_SECTIONS
:
605 start_ix
= BUILT_IN_GOMP_PARALLEL_SECTIONS
;
612 /* By default, the value of NUM_THREADS is zero (selected at run time)
613 and there is no conditional. */
615 val
= build_int_cst (unsigned_type_node
, 0);
616 flags
= build_int_cst (unsigned_type_node
, 0);
618 c
= omp_find_clause (clauses
, OMP_CLAUSE_IF
);
620 cond
= OMP_CLAUSE_IF_EXPR (c
);
622 c
= omp_find_clause (clauses
, OMP_CLAUSE_NUM_THREADS
);
625 val
= OMP_CLAUSE_NUM_THREADS_EXPR (c
);
626 clause_loc
= OMP_CLAUSE_LOCATION (c
);
629 clause_loc
= gimple_location (entry_stmt
);
631 c
= omp_find_clause (clauses
, OMP_CLAUSE_PROC_BIND
);
633 flags
= build_int_cst (unsigned_type_node
, OMP_CLAUSE_PROC_BIND_KIND (c
));
635 /* Ensure 'val' is of the correct type. */
636 val
= fold_convert_loc (clause_loc
, unsigned_type_node
, val
);
638 /* If we found the clause 'if (cond)', build either
639 (cond != 0) or (cond ? val : 1u). */
642 cond
= gimple_boolify (cond
);
644 if (integer_zerop (val
))
645 val
= fold_build2_loc (clause_loc
,
646 EQ_EXPR
, unsigned_type_node
, cond
,
647 build_int_cst (TREE_TYPE (cond
), 0));
650 basic_block cond_bb
, then_bb
, else_bb
;
651 edge e
, e_then
, e_else
;
652 tree tmp_then
, tmp_else
, tmp_join
, tmp_var
;
654 tmp_var
= create_tmp_var (TREE_TYPE (val
));
655 if (gimple_in_ssa_p (cfun
))
657 tmp_then
= make_ssa_name (tmp_var
);
658 tmp_else
= make_ssa_name (tmp_var
);
659 tmp_join
= make_ssa_name (tmp_var
);
668 e
= split_block_after_labels (bb
);
673 then_bb
= create_empty_bb (cond_bb
);
674 else_bb
= create_empty_bb (then_bb
);
675 set_immediate_dominator (CDI_DOMINATORS
, then_bb
, cond_bb
);
676 set_immediate_dominator (CDI_DOMINATORS
, else_bb
, cond_bb
);
678 stmt
= gimple_build_cond_empty (cond
);
679 gsi
= gsi_start_bb (cond_bb
);
680 gsi_insert_after (&gsi
, stmt
, GSI_CONTINUE_LINKING
);
682 gsi
= gsi_start_bb (then_bb
);
683 expand_omp_build_assign (&gsi
, tmp_then
, val
, true);
685 gsi
= gsi_start_bb (else_bb
);
686 expand_omp_build_assign (&gsi
, tmp_else
,
687 build_int_cst (unsigned_type_node
, 1),
690 make_edge (cond_bb
, then_bb
, EDGE_TRUE_VALUE
);
691 make_edge (cond_bb
, else_bb
, EDGE_FALSE_VALUE
);
692 add_bb_to_loop (then_bb
, cond_bb
->loop_father
);
693 add_bb_to_loop (else_bb
, cond_bb
->loop_father
);
694 e_then
= make_edge (then_bb
, bb
, EDGE_FALLTHRU
);
695 e_else
= make_edge (else_bb
, bb
, EDGE_FALLTHRU
);
697 if (gimple_in_ssa_p (cfun
))
699 gphi
*phi
= create_phi_node (tmp_join
, bb
);
700 add_phi_arg (phi
, tmp_then
, e_then
, UNKNOWN_LOCATION
);
701 add_phi_arg (phi
, tmp_else
, e_else
, UNKNOWN_LOCATION
);
707 gsi
= gsi_start_bb (bb
);
708 val
= force_gimple_operand_gsi (&gsi
, val
, true, NULL_TREE
,
709 false, GSI_CONTINUE_LINKING
);
712 gsi
= gsi_last_nondebug_bb (bb
);
713 t
= gimple_omp_parallel_data_arg (entry_stmt
);
715 t1
= null_pointer_node
;
717 t1
= build_fold_addr_expr (t
);
718 tree child_fndecl
= gimple_omp_parallel_child_fn (entry_stmt
);
719 t2
= build_fold_addr_expr (child_fndecl
);
721 vec_alloc (args
, 4 + vec_safe_length (ws_args
));
722 args
->quick_push (t2
);
723 args
->quick_push (t1
);
724 args
->quick_push (val
);
726 args
->splice (*ws_args
);
727 args
->quick_push (flags
);
729 t
= build_call_expr_loc_vec (UNKNOWN_LOCATION
,
730 builtin_decl_explicit (start_ix
), args
);
732 force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
733 false, GSI_CONTINUE_LINKING
);
735 if (hsa_gen_requested_p ()
736 && parallel_needs_hsa_kernel_p (region
))
738 cgraph_node
*child_cnode
= cgraph_node::get (child_fndecl
);
739 hsa_register_kernel (child_cnode
);
743 /* Build the function call to GOMP_task to actually
744 generate the task operation. BB is the block where to insert the code. */
747 expand_task_call (struct omp_region
*region
, basic_block bb
,
748 gomp_task
*entry_stmt
)
751 gimple_stmt_iterator gsi
;
752 location_t loc
= gimple_location (entry_stmt
);
754 tree clauses
= gimple_omp_task_clauses (entry_stmt
);
756 tree ifc
= omp_find_clause (clauses
, OMP_CLAUSE_IF
);
757 tree untied
= omp_find_clause (clauses
, OMP_CLAUSE_UNTIED
);
758 tree mergeable
= omp_find_clause (clauses
, OMP_CLAUSE_MERGEABLE
);
759 tree depend
= omp_find_clause (clauses
, OMP_CLAUSE_DEPEND
);
760 tree finalc
= omp_find_clause (clauses
, OMP_CLAUSE_FINAL
);
761 tree priority
= omp_find_clause (clauses
, OMP_CLAUSE_PRIORITY
);
764 = (untied
? GOMP_TASK_FLAG_UNTIED
: 0)
765 | (mergeable
? GOMP_TASK_FLAG_MERGEABLE
: 0)
766 | (depend
? GOMP_TASK_FLAG_DEPEND
: 0);
768 bool taskloop_p
= gimple_omp_task_taskloop_p (entry_stmt
);
769 tree startvar
= NULL_TREE
, endvar
= NULL_TREE
, step
= NULL_TREE
;
770 tree num_tasks
= NULL_TREE
;
774 gimple
*g
= last_stmt (region
->outer
->entry
);
775 gcc_assert (gimple_code (g
) == GIMPLE_OMP_FOR
776 && gimple_omp_for_kind (g
) == GF_OMP_FOR_KIND_TASKLOOP
);
777 struct omp_for_data fd
;
778 omp_extract_for_data (as_a
<gomp_for
*> (g
), &fd
, NULL
);
779 startvar
= omp_find_clause (clauses
, OMP_CLAUSE__LOOPTEMP_
);
780 endvar
= omp_find_clause (OMP_CLAUSE_CHAIN (startvar
),
781 OMP_CLAUSE__LOOPTEMP_
);
782 startvar
= OMP_CLAUSE_DECL (startvar
);
783 endvar
= OMP_CLAUSE_DECL (endvar
);
784 step
= fold_convert_loc (loc
, fd
.iter_type
, fd
.loop
.step
);
785 if (fd
.loop
.cond_code
== LT_EXPR
)
786 iflags
|= GOMP_TASK_FLAG_UP
;
787 tree tclauses
= gimple_omp_for_clauses (g
);
788 num_tasks
= omp_find_clause (tclauses
, OMP_CLAUSE_NUM_TASKS
);
790 num_tasks
= OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks
);
793 num_tasks
= omp_find_clause (tclauses
, OMP_CLAUSE_GRAINSIZE
);
796 iflags
|= GOMP_TASK_FLAG_GRAINSIZE
;
797 num_tasks
= OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks
);
800 num_tasks
= integer_zero_node
;
802 num_tasks
= fold_convert_loc (loc
, long_integer_type_node
, num_tasks
);
803 if (ifc
== NULL_TREE
)
804 iflags
|= GOMP_TASK_FLAG_IF
;
805 if (omp_find_clause (tclauses
, OMP_CLAUSE_NOGROUP
))
806 iflags
|= GOMP_TASK_FLAG_NOGROUP
;
807 ull
= fd
.iter_type
== long_long_unsigned_type_node
;
810 iflags
|= GOMP_TASK_FLAG_PRIORITY
;
812 tree flags
= build_int_cst (unsigned_type_node
, iflags
);
814 tree cond
= boolean_true_node
;
819 tree t
= gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc
));
820 t
= fold_build3_loc (loc
, COND_EXPR
, unsigned_type_node
, t
,
821 build_int_cst (unsigned_type_node
,
823 build_int_cst (unsigned_type_node
, 0));
824 flags
= fold_build2_loc (loc
, PLUS_EXPR
, unsigned_type_node
,
828 cond
= gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc
));
833 tree t
= gimple_boolify (OMP_CLAUSE_FINAL_EXPR (finalc
));
834 t
= fold_build3_loc (loc
, COND_EXPR
, unsigned_type_node
, t
,
835 build_int_cst (unsigned_type_node
,
836 GOMP_TASK_FLAG_FINAL
),
837 build_int_cst (unsigned_type_node
, 0));
838 flags
= fold_build2_loc (loc
, PLUS_EXPR
, unsigned_type_node
, flags
, t
);
841 depend
= OMP_CLAUSE_DECL (depend
);
843 depend
= build_int_cst (ptr_type_node
, 0);
845 priority
= fold_convert (integer_type_node
,
846 OMP_CLAUSE_PRIORITY_EXPR (priority
));
848 priority
= integer_zero_node
;
850 gsi
= gsi_last_nondebug_bb (bb
);
851 tree t
= gimple_omp_task_data_arg (entry_stmt
);
853 t2
= null_pointer_node
;
855 t2
= build_fold_addr_expr_loc (loc
, t
);
856 t1
= build_fold_addr_expr_loc (loc
, gimple_omp_task_child_fn (entry_stmt
));
857 t
= gimple_omp_task_copy_fn (entry_stmt
);
859 t3
= null_pointer_node
;
861 t3
= build_fold_addr_expr_loc (loc
, t
);
864 t
= build_call_expr (ull
865 ? builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP_ULL
)
866 : builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP
),
868 gimple_omp_task_arg_size (entry_stmt
),
869 gimple_omp_task_arg_align (entry_stmt
), flags
,
870 num_tasks
, priority
, startvar
, endvar
, step
);
872 t
= build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASK
),
874 gimple_omp_task_arg_size (entry_stmt
),
875 gimple_omp_task_arg_align (entry_stmt
), cond
, flags
,
878 force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
879 false, GSI_CONTINUE_LINKING
);
882 /* Chain all the DECLs in LIST by their TREE_CHAIN fields. */
885 vec2chain (vec
<tree
, va_gc
> *v
)
887 tree chain
= NULL_TREE
, t
;
890 FOR_EACH_VEC_SAFE_ELT_REVERSE (v
, ix
, t
)
892 DECL_CHAIN (t
) = chain
;
899 /* Remove barriers in REGION->EXIT's block. Note that this is only
900 valid for GIMPLE_OMP_PARALLEL regions. Since the end of a parallel region
901 is an implicit barrier, any workshare inside the GIMPLE_OMP_PARALLEL that
902 left a barrier at the end of the GIMPLE_OMP_PARALLEL region can now be
906 remove_exit_barrier (struct omp_region
*region
)
908 gimple_stmt_iterator gsi
;
913 int any_addressable_vars
= -1;
915 exit_bb
= region
->exit
;
917 /* If the parallel region doesn't return, we don't have REGION->EXIT
922 /* The last insn in the block will be the parallel's GIMPLE_OMP_RETURN. The
923 workshare's GIMPLE_OMP_RETURN will be in a preceding block. The kinds of
924 statements that can appear in between are extremely limited -- no
925 memory operations at all. Here, we allow nothing at all, so the
926 only thing we allow to precede this GIMPLE_OMP_RETURN is a label. */
927 gsi
= gsi_last_nondebug_bb (exit_bb
);
928 gcc_assert (gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_RETURN
);
929 gsi_prev_nondebug (&gsi
);
930 if (!gsi_end_p (gsi
) && gimple_code (gsi_stmt (gsi
)) != GIMPLE_LABEL
)
933 FOR_EACH_EDGE (e
, ei
, exit_bb
->preds
)
935 gsi
= gsi_last_nondebug_bb (e
->src
);
938 stmt
= gsi_stmt (gsi
);
939 if (gimple_code (stmt
) == GIMPLE_OMP_RETURN
940 && !gimple_omp_return_nowait_p (stmt
))
942 /* OpenMP 3.0 tasks unfortunately prevent this optimization
943 in many cases. If there could be tasks queued, the barrier
944 might be needed to let the tasks run before some local
945 variable of the parallel that the task uses as shared
946 runs out of scope. The task can be spawned either
947 from within current function (this would be easy to check)
948 or from some function it calls and gets passed an address
949 of such a variable. */
950 if (any_addressable_vars
< 0)
952 gomp_parallel
*parallel_stmt
953 = as_a
<gomp_parallel
*> (last_stmt (region
->entry
));
954 tree child_fun
= gimple_omp_parallel_child_fn (parallel_stmt
);
955 tree local_decls
, block
, decl
;
958 any_addressable_vars
= 0;
959 FOR_EACH_LOCAL_DECL (DECL_STRUCT_FUNCTION (child_fun
), ix
, decl
)
960 if (TREE_ADDRESSABLE (decl
))
962 any_addressable_vars
= 1;
965 for (block
= gimple_block (stmt
);
966 !any_addressable_vars
968 && TREE_CODE (block
) == BLOCK
;
969 block
= BLOCK_SUPERCONTEXT (block
))
971 for (local_decls
= BLOCK_VARS (block
);
973 local_decls
= DECL_CHAIN (local_decls
))
974 if (TREE_ADDRESSABLE (local_decls
))
976 any_addressable_vars
= 1;
979 if (block
== gimple_block (parallel_stmt
))
983 if (!any_addressable_vars
)
984 gimple_omp_return_set_nowait (stmt
);
990 remove_exit_barriers (struct omp_region
*region
)
992 if (region
->type
== GIMPLE_OMP_PARALLEL
)
993 remove_exit_barrier (region
);
997 region
= region
->inner
;
998 remove_exit_barriers (region
);
1001 region
= region
->next
;
1002 remove_exit_barriers (region
);
1007 /* Optimize omp_get_thread_num () and omp_get_num_threads ()
1008 calls. These can't be declared as const functions, but
1009 within one parallel body they are constant, so they can be
1010 transformed there into __builtin_omp_get_{thread_num,num_threads} ()
1011 which are declared const. Similarly for task body, except
1012 that in untied task omp_get_thread_num () can change at any task
1013 scheduling point. */
1016 optimize_omp_library_calls (gimple
*entry_stmt
)
1019 gimple_stmt_iterator gsi
;
1020 tree thr_num_tree
= builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM
);
1021 tree thr_num_id
= DECL_ASSEMBLER_NAME (thr_num_tree
);
1022 tree num_thr_tree
= builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS
);
1023 tree num_thr_id
= DECL_ASSEMBLER_NAME (num_thr_tree
);
1024 bool untied_task
= (gimple_code (entry_stmt
) == GIMPLE_OMP_TASK
1025 && omp_find_clause (gimple_omp_task_clauses (entry_stmt
),
1026 OMP_CLAUSE_UNTIED
) != NULL
);
1028 FOR_EACH_BB_FN (bb
, cfun
)
1029 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1031 gimple
*call
= gsi_stmt (gsi
);
1034 if (is_gimple_call (call
)
1035 && (decl
= gimple_call_fndecl (call
))
1036 && DECL_EXTERNAL (decl
)
1037 && TREE_PUBLIC (decl
)
1038 && DECL_INITIAL (decl
) == NULL
)
1042 if (DECL_NAME (decl
) == thr_num_id
)
1044 /* In #pragma omp task untied omp_get_thread_num () can change
1045 during the execution of the task region. */
1048 built_in
= builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM
);
1050 else if (DECL_NAME (decl
) == num_thr_id
)
1051 built_in
= builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS
);
1055 if (DECL_ASSEMBLER_NAME (decl
) != DECL_ASSEMBLER_NAME (built_in
)
1056 || gimple_call_num_args (call
) != 0)
1059 if (flag_exceptions
&& !TREE_NOTHROW (decl
))
1062 if (TREE_CODE (TREE_TYPE (decl
)) != FUNCTION_TYPE
1063 || !types_compatible_p (TREE_TYPE (TREE_TYPE (decl
)),
1064 TREE_TYPE (TREE_TYPE (built_in
))))
1067 gimple_call_set_fndecl (call
, built_in
);
1072 /* Callback for expand_omp_build_assign. Return non-NULL if *tp needs to be
1076 expand_omp_regimplify_p (tree
*tp
, int *walk_subtrees
, void *)
1080 /* Any variable with DECL_VALUE_EXPR needs to be regimplified. */
1081 if (VAR_P (t
) && DECL_HAS_VALUE_EXPR_P (t
))
1084 if (TREE_CODE (t
) == ADDR_EXPR
)
1085 recompute_tree_invariant_for_addr_expr (t
);
1087 *walk_subtrees
= !TYPE_P (t
) && !DECL_P (t
);
1091 /* Prepend or append TO = FROM assignment before or after *GSI_P. */
1094 expand_omp_build_assign (gimple_stmt_iterator
*gsi_p
, tree to
, tree from
,
1097 bool simple_p
= DECL_P (to
) && TREE_ADDRESSABLE (to
);
1098 from
= force_gimple_operand_gsi (gsi_p
, from
, simple_p
, NULL_TREE
,
1099 !after
, after
? GSI_CONTINUE_LINKING
1101 gimple
*stmt
= gimple_build_assign (to
, from
);
1103 gsi_insert_after (gsi_p
, stmt
, GSI_CONTINUE_LINKING
);
1105 gsi_insert_before (gsi_p
, stmt
, GSI_SAME_STMT
);
1106 if (walk_tree (&from
, expand_omp_regimplify_p
, NULL
, NULL
)
1107 || walk_tree (&to
, expand_omp_regimplify_p
, NULL
, NULL
))
1109 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
1110 gimple_regimplify_operands (stmt
, &gsi
);
1114 /* Expand the OpenMP parallel or task directive starting at REGION. */
1117 expand_omp_taskreg (struct omp_region
*region
)
1119 basic_block entry_bb
, exit_bb
, new_bb
;
1120 struct function
*child_cfun
;
1121 tree child_fn
, block
, t
;
1122 gimple_stmt_iterator gsi
;
1123 gimple
*entry_stmt
, *stmt
;
1125 vec
<tree
, va_gc
> *ws_args
;
1127 entry_stmt
= last_stmt (region
->entry
);
1128 child_fn
= gimple_omp_taskreg_child_fn (entry_stmt
);
1129 child_cfun
= DECL_STRUCT_FUNCTION (child_fn
);
1131 entry_bb
= region
->entry
;
1132 if (gimple_code (entry_stmt
) == GIMPLE_OMP_TASK
)
1133 exit_bb
= region
->cont
;
1135 exit_bb
= region
->exit
;
1137 if (is_combined_parallel (region
))
1138 ws_args
= region
->ws_args
;
1142 if (child_cfun
->cfg
)
1144 /* Due to inlining, it may happen that we have already outlined
1145 the region, in which case all we need to do is make the
1146 sub-graph unreachable and emit the parallel call. */
1147 edge entry_succ_e
, exit_succ_e
;
1149 entry_succ_e
= single_succ_edge (entry_bb
);
1151 gsi
= gsi_last_nondebug_bb (entry_bb
);
1152 gcc_assert (gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_PARALLEL
1153 || gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_TASK
);
1154 gsi_remove (&gsi
, true);
1159 exit_succ_e
= single_succ_edge (exit_bb
);
1160 make_edge (new_bb
, exit_succ_e
->dest
, EDGE_FALLTHRU
);
1162 remove_edge_and_dominated_blocks (entry_succ_e
);
1166 unsigned srcidx
, dstidx
, num
;
1168 /* If the parallel region needs data sent from the parent
1169 function, then the very first statement (except possible
1170 tree profile counter updates) of the parallel body
1171 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
1172 &.OMP_DATA_O is passed as an argument to the child function,
1173 we need to replace it with the argument as seen by the child
1176 In most cases, this will end up being the identity assignment
1177 .OMP_DATA_I = .OMP_DATA_I. However, if the parallel body had
1178 a function call that has been inlined, the original PARM_DECL
1179 .OMP_DATA_I may have been converted into a different local
1180 variable. In which case, we need to keep the assignment. */
1181 if (gimple_omp_taskreg_data_arg (entry_stmt
))
1183 basic_block entry_succ_bb
1184 = single_succ_p (entry_bb
) ? single_succ (entry_bb
)
1185 : FALLTHRU_EDGE (entry_bb
)->dest
;
1187 gimple
*parcopy_stmt
= NULL
;
1189 for (gsi
= gsi_start_bb (entry_succ_bb
); ; gsi_next (&gsi
))
1193 gcc_assert (!gsi_end_p (gsi
));
1194 stmt
= gsi_stmt (gsi
);
1195 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
1198 if (gimple_num_ops (stmt
) == 2)
1200 tree arg
= gimple_assign_rhs1 (stmt
);
1202 /* We're ignore the subcode because we're
1203 effectively doing a STRIP_NOPS. */
1205 if (TREE_CODE (arg
) == ADDR_EXPR
1206 && TREE_OPERAND (arg
, 0)
1207 == gimple_omp_taskreg_data_arg (entry_stmt
))
1209 parcopy_stmt
= stmt
;
1215 gcc_assert (parcopy_stmt
!= NULL
);
1216 arg
= DECL_ARGUMENTS (child_fn
);
1218 if (!gimple_in_ssa_p (cfun
))
1220 if (gimple_assign_lhs (parcopy_stmt
) == arg
)
1221 gsi_remove (&gsi
, true);
1224 /* ?? Is setting the subcode really necessary ?? */
1225 gimple_omp_set_subcode (parcopy_stmt
, TREE_CODE (arg
));
1226 gimple_assign_set_rhs1 (parcopy_stmt
, arg
);
1231 tree lhs
= gimple_assign_lhs (parcopy_stmt
);
1232 gcc_assert (SSA_NAME_VAR (lhs
) == arg
);
1233 /* We'd like to set the rhs to the default def in the child_fn,
1234 but it's too early to create ssa names in the child_fn.
1235 Instead, we set the rhs to the parm. In
1236 move_sese_region_to_fn, we introduce a default def for the
1237 parm, map the parm to it's default def, and once we encounter
1238 this stmt, replace the parm with the default def. */
1239 gimple_assign_set_rhs1 (parcopy_stmt
, arg
);
1240 update_stmt (parcopy_stmt
);
1244 /* Declare local variables needed in CHILD_CFUN. */
1245 block
= DECL_INITIAL (child_fn
);
1246 BLOCK_VARS (block
) = vec2chain (child_cfun
->local_decls
);
1247 /* The gimplifier could record temporaries in parallel/task block
1248 rather than in containing function's local_decls chain,
1249 which would mean cgraph missed finalizing them. Do it now. */
1250 for (t
= BLOCK_VARS (block
); t
; t
= DECL_CHAIN (t
))
1251 if (VAR_P (t
) && TREE_STATIC (t
) && !DECL_EXTERNAL (t
))
1252 varpool_node::finalize_decl (t
);
1253 DECL_SAVED_TREE (child_fn
) = NULL
;
1254 /* We'll create a CFG for child_fn, so no gimple body is needed. */
1255 gimple_set_body (child_fn
, NULL
);
1256 TREE_USED (block
) = 1;
1258 /* Reset DECL_CONTEXT on function arguments. */
1259 for (t
= DECL_ARGUMENTS (child_fn
); t
; t
= DECL_CHAIN (t
))
1260 DECL_CONTEXT (t
) = child_fn
;
1262 /* Split ENTRY_BB at GIMPLE_OMP_PARALLEL or GIMPLE_OMP_TASK,
1263 so that it can be moved to the child function. */
1264 gsi
= gsi_last_nondebug_bb (entry_bb
);
1265 stmt
= gsi_stmt (gsi
);
1266 gcc_assert (stmt
&& (gimple_code (stmt
) == GIMPLE_OMP_PARALLEL
1267 || gimple_code (stmt
) == GIMPLE_OMP_TASK
));
1268 e
= split_block (entry_bb
, stmt
);
1269 gsi_remove (&gsi
, true);
1272 if (gimple_code (entry_stmt
) == GIMPLE_OMP_PARALLEL
)
1273 single_succ_edge (entry_bb
)->flags
= EDGE_FALLTHRU
;
1276 e2
= make_edge (e
->src
, BRANCH_EDGE (entry_bb
)->dest
, EDGE_ABNORMAL
);
1277 gcc_assert (e2
->dest
== region
->exit
);
1278 remove_edge (BRANCH_EDGE (entry_bb
));
1279 set_immediate_dominator (CDI_DOMINATORS
, e2
->dest
, e
->src
);
1280 gsi
= gsi_last_nondebug_bb (region
->exit
);
1281 gcc_assert (!gsi_end_p (gsi
)
1282 && gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_RETURN
);
1283 gsi_remove (&gsi
, true);
1286 /* Convert GIMPLE_OMP_{RETURN,CONTINUE} into a RETURN_EXPR. */
1289 gsi
= gsi_last_nondebug_bb (exit_bb
);
1290 gcc_assert (!gsi_end_p (gsi
)
1291 && (gimple_code (gsi_stmt (gsi
))
1292 == (e2
? GIMPLE_OMP_CONTINUE
: GIMPLE_OMP_RETURN
)));
1293 stmt
= gimple_build_return (NULL
);
1294 gsi_insert_after (&gsi
, stmt
, GSI_SAME_STMT
);
1295 gsi_remove (&gsi
, true);
1298 /* Move the parallel region into CHILD_CFUN. */
1300 if (gimple_in_ssa_p (cfun
))
1302 init_tree_ssa (child_cfun
);
1303 init_ssa_operands (child_cfun
);
1304 child_cfun
->gimple_df
->in_ssa_p
= true;
1308 block
= gimple_block (entry_stmt
);
1310 new_bb
= move_sese_region_to_fn (child_cfun
, entry_bb
, exit_bb
, block
);
1312 single_succ_edge (new_bb
)->flags
= EDGE_FALLTHRU
;
1315 basic_block dest_bb
= e2
->dest
;
1317 make_edge (new_bb
, dest_bb
, EDGE_FALLTHRU
);
1319 set_immediate_dominator (CDI_DOMINATORS
, dest_bb
, new_bb
);
1321 /* When the OMP expansion process cannot guarantee an up-to-date
1322 loop tree arrange for the child function to fixup loops. */
1323 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP
))
1324 child_cfun
->x_current_loops
->state
|= LOOPS_NEED_FIXUP
;
1326 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
1327 num
= vec_safe_length (child_cfun
->local_decls
);
1328 for (srcidx
= 0, dstidx
= 0; srcidx
< num
; srcidx
++)
1330 t
= (*child_cfun
->local_decls
)[srcidx
];
1331 if (DECL_CONTEXT (t
) == cfun
->decl
)
1333 if (srcidx
!= dstidx
)
1334 (*child_cfun
->local_decls
)[dstidx
] = t
;
1338 vec_safe_truncate (child_cfun
->local_decls
, dstidx
);
1340 /* Inform the callgraph about the new function. */
1341 child_cfun
->curr_properties
= cfun
->curr_properties
;
1342 child_cfun
->has_simduid_loops
|= cfun
->has_simduid_loops
;
1343 child_cfun
->has_force_vectorize_loops
|= cfun
->has_force_vectorize_loops
;
1344 cgraph_node
*node
= cgraph_node::get_create (child_fn
);
1345 node
->parallelized_function
= 1;
1346 cgraph_node::add_new_function (child_fn
, true);
1348 bool need_asm
= DECL_ASSEMBLER_NAME_SET_P (current_function_decl
)
1349 && !DECL_ASSEMBLER_NAME_SET_P (child_fn
);
1351 /* Fix the callgraph edges for child_cfun. Those for cfun will be
1352 fixed in a following pass. */
1353 push_cfun (child_cfun
);
1355 assign_assembler_name_if_needed (child_fn
);
1358 optimize_omp_library_calls (entry_stmt
);
1359 update_max_bb_count ();
1360 cgraph_edge::rebuild_edges ();
1362 /* Some EH regions might become dead, see PR34608. If
1363 pass_cleanup_cfg isn't the first pass to happen with the
1364 new child, these dead EH edges might cause problems.
1365 Clean them up now. */
1366 if (flag_exceptions
)
1369 bool changed
= false;
1371 FOR_EACH_BB_FN (bb
, cfun
)
1372 changed
|= gimple_purge_dead_eh_edges (bb
);
1374 cleanup_tree_cfg ();
1376 if (gimple_in_ssa_p (cfun
))
1377 update_ssa (TODO_update_ssa
);
1378 if (flag_checking
&& !loops_state_satisfies_p (LOOPS_NEED_FIXUP
))
1379 verify_loop_structure ();
1382 if (dump_file
&& !gimple_in_ssa_p (cfun
))
1384 omp_any_child_fn_dumped
= true;
1385 dump_function_header (dump_file
, child_fn
, dump_flags
);
1386 dump_function_to_file (child_fn
, dump_file
, dump_flags
);
1390 adjust_context_and_scope (region
, gimple_block (entry_stmt
), child_fn
);
1392 if (gimple_code (entry_stmt
) == GIMPLE_OMP_PARALLEL
)
1393 expand_parallel_call (region
, new_bb
,
1394 as_a
<gomp_parallel
*> (entry_stmt
), ws_args
);
1396 expand_task_call (region
, new_bb
, as_a
<gomp_task
*> (entry_stmt
));
1397 if (gimple_in_ssa_p (cfun
))
1398 update_ssa (TODO_update_ssa_only_virtuals
);
1401 /* Information about members of an OpenACC collapsed loop nest. */
1403 struct oacc_collapse
1405 tree base
; /* Base value. */
1406 tree iters
; /* Number of steps. */
1407 tree step
; /* Step size. */
1408 tree tile
; /* Tile increment (if tiled). */
1409 tree outer
; /* Tile iterator var. */
1412 /* Helper for expand_oacc_for. Determine collapsed loop information.
1413 Fill in COUNTS array. Emit any initialization code before GSI.
1414 Return the calculated outer loop bound of BOUND_TYPE. */
1417 expand_oacc_collapse_init (const struct omp_for_data
*fd
,
1418 gimple_stmt_iterator
*gsi
,
1419 oacc_collapse
*counts
, tree bound_type
,
1422 tree tiling
= fd
->tiling
;
1423 tree total
= build_int_cst (bound_type
, 1);
1426 gcc_assert (integer_onep (fd
->loop
.step
));
1427 gcc_assert (integer_zerop (fd
->loop
.n1
));
1429 /* When tiling, the first operand of the tile clause applies to the
1430 innermost loop, and we work outwards from there. Seems
1431 backwards, but whatever. */
1432 for (ix
= fd
->collapse
; ix
--;)
1434 const omp_for_data_loop
*loop
= &fd
->loops
[ix
];
1436 tree iter_type
= TREE_TYPE (loop
->v
);
1437 tree diff_type
= iter_type
;
1438 tree plus_type
= iter_type
;
1440 gcc_assert (loop
->cond_code
== fd
->loop
.cond_code
);
1442 if (POINTER_TYPE_P (iter_type
))
1443 plus_type
= sizetype
;
1444 if (POINTER_TYPE_P (diff_type
) || TYPE_UNSIGNED (diff_type
))
1445 diff_type
= signed_type_for (diff_type
);
1446 if (TYPE_PRECISION (diff_type
) < TYPE_PRECISION (integer_type_node
))
1447 diff_type
= integer_type_node
;
1451 tree num
= build_int_cst (integer_type_node
, fd
->collapse
);
1452 tree loop_no
= build_int_cst (integer_type_node
, ix
);
1453 tree tile
= TREE_VALUE (tiling
);
1455 = gimple_build_call_internal (IFN_GOACC_TILE
, 5, num
, loop_no
, tile
,
1456 /* gwv-outer=*/integer_zero_node
,
1457 /* gwv-inner=*/integer_zero_node
);
1459 counts
[ix
].outer
= create_tmp_var (iter_type
, ".outer");
1460 counts
[ix
].tile
= create_tmp_var (diff_type
, ".tile");
1461 gimple_call_set_lhs (call
, counts
[ix
].tile
);
1462 gimple_set_location (call
, loc
);
1463 gsi_insert_before (gsi
, call
, GSI_SAME_STMT
);
1465 tiling
= TREE_CHAIN (tiling
);
1469 counts
[ix
].tile
= NULL
;
1470 counts
[ix
].outer
= loop
->v
;
1475 tree s
= loop
->step
;
1476 bool up
= loop
->cond_code
== LT_EXPR
;
1477 tree dir
= build_int_cst (diff_type
, up
? +1 : -1);
1481 b
= force_gimple_operand_gsi (gsi
, b
, true, NULL_TREE
,
1482 true, GSI_SAME_STMT
);
1483 e
= force_gimple_operand_gsi (gsi
, e
, true, NULL_TREE
,
1484 true, GSI_SAME_STMT
);
1486 /* Convert the step, avoiding possible unsigned->signed overflow. */
1487 negating
= !up
&& TYPE_UNSIGNED (TREE_TYPE (s
));
1489 s
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (s
), s
);
1490 s
= fold_convert (diff_type
, s
);
1492 s
= fold_build1 (NEGATE_EXPR
, diff_type
, s
);
1493 s
= force_gimple_operand_gsi (gsi
, s
, true, NULL_TREE
,
1494 true, GSI_SAME_STMT
);
1496 /* Determine the range, avoiding possible unsigned->signed overflow. */
1497 negating
= !up
&& TYPE_UNSIGNED (iter_type
);
1498 expr
= fold_build2 (MINUS_EXPR
, plus_type
,
1499 fold_convert (plus_type
, negating
? b
: e
),
1500 fold_convert (plus_type
, negating
? e
: b
));
1501 expr
= fold_convert (diff_type
, expr
);
1503 expr
= fold_build1 (NEGATE_EXPR
, diff_type
, expr
);
1504 tree range
= force_gimple_operand_gsi
1505 (gsi
, expr
, true, NULL_TREE
, true, GSI_SAME_STMT
);
1507 /* Determine number of iterations. */
1508 expr
= fold_build2 (MINUS_EXPR
, diff_type
, range
, dir
);
1509 expr
= fold_build2 (PLUS_EXPR
, diff_type
, expr
, s
);
1510 expr
= fold_build2 (TRUNC_DIV_EXPR
, diff_type
, expr
, s
);
1512 tree iters
= force_gimple_operand_gsi (gsi
, expr
, true, NULL_TREE
,
1513 true, GSI_SAME_STMT
);
1515 counts
[ix
].base
= b
;
1516 counts
[ix
].iters
= iters
;
1517 counts
[ix
].step
= s
;
1519 total
= fold_build2 (MULT_EXPR
, bound_type
, total
,
1520 fold_convert (bound_type
, iters
));
1526 /* Emit initializers for collapsed loop members. INNER is true if
1527 this is for the element loop of a TILE. IVAR is the outer
1528 loop iteration variable, from which collapsed loop iteration values
1529 are calculated. COUNTS array has been initialized by
1530 expand_oacc_collapse_inits. */
1533 expand_oacc_collapse_vars (const struct omp_for_data
*fd
, bool inner
,
1534 gimple_stmt_iterator
*gsi
,
1535 const oacc_collapse
*counts
, tree ivar
)
1537 tree ivar_type
= TREE_TYPE (ivar
);
1539 /* The most rapidly changing iteration variable is the innermost
1541 for (int ix
= fd
->collapse
; ix
--;)
1543 const omp_for_data_loop
*loop
= &fd
->loops
[ix
];
1544 const oacc_collapse
*collapse
= &counts
[ix
];
1545 tree v
= inner
? loop
->v
: collapse
->outer
;
1546 tree iter_type
= TREE_TYPE (v
);
1547 tree diff_type
= TREE_TYPE (collapse
->step
);
1548 tree plus_type
= iter_type
;
1549 enum tree_code plus_code
= PLUS_EXPR
;
1552 if (POINTER_TYPE_P (iter_type
))
1554 plus_code
= POINTER_PLUS_EXPR
;
1555 plus_type
= sizetype
;
1561 tree mod
= fold_convert (ivar_type
, collapse
->iters
);
1562 ivar
= fold_build2 (TRUNC_DIV_EXPR
, ivar_type
, expr
, mod
);
1563 expr
= fold_build2 (TRUNC_MOD_EXPR
, ivar_type
, expr
, mod
);
1564 ivar
= force_gimple_operand_gsi (gsi
, ivar
, true, NULL_TREE
,
1565 true, GSI_SAME_STMT
);
1568 expr
= fold_build2 (MULT_EXPR
, diff_type
, fold_convert (diff_type
, expr
),
1570 expr
= fold_build2 (plus_code
, iter_type
,
1571 inner
? collapse
->outer
: collapse
->base
,
1572 fold_convert (plus_type
, expr
));
1573 expr
= force_gimple_operand_gsi (gsi
, expr
, false, NULL_TREE
,
1574 true, GSI_SAME_STMT
);
1575 gassign
*ass
= gimple_build_assign (v
, expr
);
1576 gsi_insert_before (gsi
, ass
, GSI_SAME_STMT
);
1580 /* Helper function for expand_omp_{for_*,simd}. If this is the outermost
1581 of the combined collapse > 1 loop constructs, generate code like:
1582 if (__builtin_expect (N32 cond3 N31, 0)) goto ZERO_ITER_BB;
1587 count3 = (adj + N32 - N31) / STEP3;
1588 if (__builtin_expect (N22 cond2 N21, 0)) goto ZERO_ITER_BB;
1593 count2 = (adj + N22 - N21) / STEP2;
1594 if (__builtin_expect (N12 cond1 N11, 0)) goto ZERO_ITER_BB;
1599 count1 = (adj + N12 - N11) / STEP1;
1600 count = count1 * count2 * count3;
1601 Furthermore, if ZERO_ITER_BB is NULL, create a BB which does:
1603 and set ZERO_ITER_BB to that bb. If this isn't the outermost
1604 of the combined loop constructs, just initialize COUNTS array
1605 from the _looptemp_ clauses. */
1607 /* NOTE: It *could* be better to moosh all of the BBs together,
1608 creating one larger BB with all the computation and the unexpected
1609 jump at the end. I.e.
1611 bool zero3, zero2, zero1, zero;
1614 count3 = (N32 - N31) /[cl] STEP3;
1616 count2 = (N22 - N21) /[cl] STEP2;
1618 count1 = (N12 - N11) /[cl] STEP1;
1619 zero = zero3 || zero2 || zero1;
1620 count = count1 * count2 * count3;
1621 if (__builtin_expect(zero, false)) goto zero_iter_bb;
1623 After all, we expect the zero=false, and thus we expect to have to
1624 evaluate all of the comparison expressions, so short-circuiting
1625 oughtn't be a win. Since the condition isn't protecting a
1626 denominator, we're not concerned about divide-by-zero, so we can
1627 fully evaluate count even if a numerator turned out to be wrong.
1629 It seems like putting this all together would create much better
1630 scheduling opportunities, and less pressure on the chip's branch
1634 expand_omp_for_init_counts (struct omp_for_data
*fd
, gimple_stmt_iterator
*gsi
,
1635 basic_block
&entry_bb
, tree
*counts
,
1636 basic_block
&zero_iter1_bb
, int &first_zero_iter1
,
1637 basic_block
&zero_iter2_bb
, int &first_zero_iter2
,
1638 basic_block
&l2_dom_bb
)
1640 tree t
, type
= TREE_TYPE (fd
->loop
.v
);
1644 /* Collapsed loops need work for expansion into SSA form. */
1645 gcc_assert (!gimple_in_ssa_p (cfun
));
1647 if (gimple_omp_for_combined_into_p (fd
->for_stmt
)
1648 && TREE_CODE (fd
->loop
.n2
) != INTEGER_CST
)
1650 gcc_assert (fd
->ordered
== 0);
1651 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1652 isn't supposed to be handled, as the inner loop doesn't
1654 tree innerc
= omp_find_clause (gimple_omp_for_clauses (fd
->for_stmt
),
1655 OMP_CLAUSE__LOOPTEMP_
);
1656 gcc_assert (innerc
);
1657 for (i
= 0; i
< fd
->collapse
; i
++)
1659 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
1660 OMP_CLAUSE__LOOPTEMP_
);
1661 gcc_assert (innerc
);
1663 counts
[i
] = OMP_CLAUSE_DECL (innerc
);
1665 counts
[0] = NULL_TREE
;
1670 for (i
= fd
->collapse
; i
< fd
->ordered
; i
++)
1672 tree itype
= TREE_TYPE (fd
->loops
[i
].v
);
1673 counts
[i
] = NULL_TREE
;
1674 t
= fold_binary (fd
->loops
[i
].cond_code
, boolean_type_node
,
1675 fold_convert (itype
, fd
->loops
[i
].n1
),
1676 fold_convert (itype
, fd
->loops
[i
].n2
));
1677 if (t
&& integer_zerop (t
))
1679 for (i
= fd
->collapse
; i
< fd
->ordered
; i
++)
1680 counts
[i
] = build_int_cst (type
, 0);
1684 for (i
= 0; i
< (fd
->ordered
? fd
->ordered
: fd
->collapse
); i
++)
1686 tree itype
= TREE_TYPE (fd
->loops
[i
].v
);
1688 if (i
>= fd
->collapse
&& counts
[i
])
1690 if ((SSA_VAR_P (fd
->loop
.n2
) || i
>= fd
->collapse
)
1691 && ((t
= fold_binary (fd
->loops
[i
].cond_code
, boolean_type_node
,
1692 fold_convert (itype
, fd
->loops
[i
].n1
),
1693 fold_convert (itype
, fd
->loops
[i
].n2
)))
1694 == NULL_TREE
|| !integer_onep (t
)))
1698 n1
= fold_convert (itype
, unshare_expr (fd
->loops
[i
].n1
));
1699 n1
= force_gimple_operand_gsi (gsi
, n1
, true, NULL_TREE
,
1700 true, GSI_SAME_STMT
);
1701 n2
= fold_convert (itype
, unshare_expr (fd
->loops
[i
].n2
));
1702 n2
= force_gimple_operand_gsi (gsi
, n2
, true, NULL_TREE
,
1703 true, GSI_SAME_STMT
);
1704 cond_stmt
= gimple_build_cond (fd
->loops
[i
].cond_code
, n1
, n2
,
1705 NULL_TREE
, NULL_TREE
);
1706 gsi_insert_before (gsi
, cond_stmt
, GSI_SAME_STMT
);
1707 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt
),
1708 expand_omp_regimplify_p
, NULL
, NULL
)
1709 || walk_tree (gimple_cond_rhs_ptr (cond_stmt
),
1710 expand_omp_regimplify_p
, NULL
, NULL
))
1712 *gsi
= gsi_for_stmt (cond_stmt
);
1713 gimple_regimplify_operands (cond_stmt
, gsi
);
1715 e
= split_block (entry_bb
, cond_stmt
);
1716 basic_block
&zero_iter_bb
1717 = i
< fd
->collapse
? zero_iter1_bb
: zero_iter2_bb
;
1718 int &first_zero_iter
1719 = i
< fd
->collapse
? first_zero_iter1
: first_zero_iter2
;
1720 if (zero_iter_bb
== NULL
)
1722 gassign
*assign_stmt
;
1723 first_zero_iter
= i
;
1724 zero_iter_bb
= create_empty_bb (entry_bb
);
1725 add_bb_to_loop (zero_iter_bb
, entry_bb
->loop_father
);
1726 *gsi
= gsi_after_labels (zero_iter_bb
);
1727 if (i
< fd
->collapse
)
1728 assign_stmt
= gimple_build_assign (fd
->loop
.n2
,
1729 build_zero_cst (type
));
1732 counts
[i
] = create_tmp_reg (type
, ".count");
1734 = gimple_build_assign (counts
[i
], build_zero_cst (type
));
1736 gsi_insert_before (gsi
, assign_stmt
, GSI_SAME_STMT
);
1737 set_immediate_dominator (CDI_DOMINATORS
, zero_iter_bb
,
1740 ne
= make_edge (entry_bb
, zero_iter_bb
, EDGE_FALSE_VALUE
);
1741 ne
->probability
= profile_probability::very_unlikely ();
1742 e
->flags
= EDGE_TRUE_VALUE
;
1743 e
->probability
= ne
->probability
.invert ();
1744 if (l2_dom_bb
== NULL
)
1745 l2_dom_bb
= entry_bb
;
1747 *gsi
= gsi_last_nondebug_bb (entry_bb
);
1750 if (POINTER_TYPE_P (itype
))
1751 itype
= signed_type_for (itype
);
1752 t
= build_int_cst (itype
, (fd
->loops
[i
].cond_code
== LT_EXPR
1754 t
= fold_build2 (PLUS_EXPR
, itype
,
1755 fold_convert (itype
, fd
->loops
[i
].step
), t
);
1756 t
= fold_build2 (PLUS_EXPR
, itype
, t
,
1757 fold_convert (itype
, fd
->loops
[i
].n2
));
1758 t
= fold_build2 (MINUS_EXPR
, itype
, t
,
1759 fold_convert (itype
, fd
->loops
[i
].n1
));
1760 /* ?? We could probably use CEIL_DIV_EXPR instead of
1761 TRUNC_DIV_EXPR and adjusting by hand. Unless we can't
1762 generate the same code in the end because generically we
1763 don't know that the values involved must be negative for
1765 if (TYPE_UNSIGNED (itype
) && fd
->loops
[i
].cond_code
== GT_EXPR
)
1766 t
= fold_build2 (TRUNC_DIV_EXPR
, itype
,
1767 fold_build1 (NEGATE_EXPR
, itype
, t
),
1768 fold_build1 (NEGATE_EXPR
, itype
,
1769 fold_convert (itype
,
1770 fd
->loops
[i
].step
)));
1772 t
= fold_build2 (TRUNC_DIV_EXPR
, itype
, t
,
1773 fold_convert (itype
, fd
->loops
[i
].step
));
1774 t
= fold_convert (type
, t
);
1775 if (TREE_CODE (t
) == INTEGER_CST
)
1779 if (i
< fd
->collapse
|| i
!= first_zero_iter2
)
1780 counts
[i
] = create_tmp_reg (type
, ".count");
1781 expand_omp_build_assign (gsi
, counts
[i
], t
);
1783 if (SSA_VAR_P (fd
->loop
.n2
) && i
< fd
->collapse
)
1788 t
= fold_build2 (MULT_EXPR
, type
, fd
->loop
.n2
, counts
[i
]);
1789 expand_omp_build_assign (gsi
, fd
->loop
.n2
, t
);
1794 /* Helper function for expand_omp_{for_*,simd}. Generate code like:
1796 V3 = N31 + (T % count3) * STEP3;
1798 V2 = N21 + (T % count2) * STEP2;
1800 V1 = N11 + T * STEP1;
1801 if this loop doesn't have an inner loop construct combined with it.
1802 If it does have an inner loop construct combined with it and the
1803 iteration count isn't known constant, store values from counts array
1804 into its _looptemp_ temporaries instead. */
1807 expand_omp_for_init_vars (struct omp_for_data
*fd
, gimple_stmt_iterator
*gsi
,
1808 tree
*counts
, gimple
*inner_stmt
, tree startvar
)
1811 if (gimple_omp_for_combined_p (fd
->for_stmt
))
1813 /* If fd->loop.n2 is constant, then no propagation of the counts
1814 is needed, they are constant. */
1815 if (TREE_CODE (fd
->loop
.n2
) == INTEGER_CST
)
1818 tree clauses
= gimple_code (inner_stmt
) != GIMPLE_OMP_FOR
1819 ? gimple_omp_taskreg_clauses (inner_stmt
)
1820 : gimple_omp_for_clauses (inner_stmt
);
1821 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1822 isn't supposed to be handled, as the inner loop doesn't
1824 tree innerc
= omp_find_clause (clauses
, OMP_CLAUSE__LOOPTEMP_
);
1825 gcc_assert (innerc
);
1826 for (i
= 0; i
< fd
->collapse
; i
++)
1828 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
1829 OMP_CLAUSE__LOOPTEMP_
);
1830 gcc_assert (innerc
);
1833 tree tem
= OMP_CLAUSE_DECL (innerc
);
1834 tree t
= fold_convert (TREE_TYPE (tem
), counts
[i
]);
1835 t
= force_gimple_operand_gsi (gsi
, t
, false, NULL_TREE
,
1836 false, GSI_CONTINUE_LINKING
);
1837 gassign
*stmt
= gimple_build_assign (tem
, t
);
1838 gsi_insert_after (gsi
, stmt
, GSI_CONTINUE_LINKING
);
1844 tree type
= TREE_TYPE (fd
->loop
.v
);
1845 tree tem
= create_tmp_reg (type
, ".tem");
1846 gassign
*stmt
= gimple_build_assign (tem
, startvar
);
1847 gsi_insert_after (gsi
, stmt
, GSI_CONTINUE_LINKING
);
1849 for (i
= fd
->collapse
- 1; i
>= 0; i
--)
1851 tree vtype
= TREE_TYPE (fd
->loops
[i
].v
), itype
, t
;
1853 if (POINTER_TYPE_P (vtype
))
1854 itype
= signed_type_for (vtype
);
1856 t
= fold_build2 (TRUNC_MOD_EXPR
, type
, tem
, counts
[i
]);
1859 t
= fold_convert (itype
, t
);
1860 t
= fold_build2 (MULT_EXPR
, itype
, t
,
1861 fold_convert (itype
, fd
->loops
[i
].step
));
1862 if (POINTER_TYPE_P (vtype
))
1863 t
= fold_build_pointer_plus (fd
->loops
[i
].n1
, t
);
1865 t
= fold_build2 (PLUS_EXPR
, itype
, fd
->loops
[i
].n1
, t
);
1866 t
= force_gimple_operand_gsi (gsi
, t
,
1867 DECL_P (fd
->loops
[i
].v
)
1868 && TREE_ADDRESSABLE (fd
->loops
[i
].v
),
1870 GSI_CONTINUE_LINKING
);
1871 stmt
= gimple_build_assign (fd
->loops
[i
].v
, t
);
1872 gsi_insert_after (gsi
, stmt
, GSI_CONTINUE_LINKING
);
1875 t
= fold_build2 (TRUNC_DIV_EXPR
, type
, tem
, counts
[i
]);
1876 t
= force_gimple_operand_gsi (gsi
, t
, false, NULL_TREE
,
1877 false, GSI_CONTINUE_LINKING
);
1878 stmt
= gimple_build_assign (tem
, t
);
1879 gsi_insert_after (gsi
, stmt
, GSI_CONTINUE_LINKING
);
1884 /* Helper function for expand_omp_for_*. Generate code like:
1887 if (V3 cond3 N32) goto BODY_BB; else goto L11;
1891 if (V2 cond2 N22) goto BODY_BB; else goto L12;
1898 extract_omp_for_update_vars (struct omp_for_data
*fd
, basic_block cont_bb
,
1899 basic_block body_bb
)
1901 basic_block last_bb
, bb
, collapse_bb
= NULL
;
1903 gimple_stmt_iterator gsi
;
1909 for (i
= fd
->collapse
- 1; i
>= 0; i
--)
1911 tree vtype
= TREE_TYPE (fd
->loops
[i
].v
);
1913 bb
= create_empty_bb (last_bb
);
1914 add_bb_to_loop (bb
, last_bb
->loop_father
);
1915 gsi
= gsi_start_bb (bb
);
1917 if (i
< fd
->collapse
- 1)
1919 e
= make_edge (last_bb
, bb
, EDGE_FALSE_VALUE
);
1920 e
->probability
= profile_probability::guessed_always ().apply_scale (1, 8);
1922 t
= fd
->loops
[i
+ 1].n1
;
1923 t
= force_gimple_operand_gsi (&gsi
, t
,
1924 DECL_P (fd
->loops
[i
+ 1].v
)
1925 && TREE_ADDRESSABLE (fd
->loops
[i
1928 GSI_CONTINUE_LINKING
);
1929 stmt
= gimple_build_assign (fd
->loops
[i
+ 1].v
, t
);
1930 gsi_insert_after (&gsi
, stmt
, GSI_CONTINUE_LINKING
);
1935 set_immediate_dominator (CDI_DOMINATORS
, bb
, last_bb
);
1937 if (POINTER_TYPE_P (vtype
))
1938 t
= fold_build_pointer_plus (fd
->loops
[i
].v
, fd
->loops
[i
].step
);
1940 t
= fold_build2 (PLUS_EXPR
, vtype
, fd
->loops
[i
].v
, fd
->loops
[i
].step
);
1941 t
= force_gimple_operand_gsi (&gsi
, t
,
1942 DECL_P (fd
->loops
[i
].v
)
1943 && TREE_ADDRESSABLE (fd
->loops
[i
].v
),
1944 NULL_TREE
, false, GSI_CONTINUE_LINKING
);
1945 stmt
= gimple_build_assign (fd
->loops
[i
].v
, t
);
1946 gsi_insert_after (&gsi
, stmt
, GSI_CONTINUE_LINKING
);
1950 t
= fd
->loops
[i
].n2
;
1951 t
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
1952 false, GSI_CONTINUE_LINKING
);
1953 tree v
= fd
->loops
[i
].v
;
1954 if (DECL_P (v
) && TREE_ADDRESSABLE (v
))
1955 v
= force_gimple_operand_gsi (&gsi
, v
, true, NULL_TREE
,
1956 false, GSI_CONTINUE_LINKING
);
1957 t
= fold_build2 (fd
->loops
[i
].cond_code
, boolean_type_node
, v
, t
);
1958 stmt
= gimple_build_cond_empty (t
);
1959 gsi_insert_after (&gsi
, stmt
, GSI_CONTINUE_LINKING
);
1960 if (walk_tree (gimple_cond_lhs_ptr (as_a
<gcond
*> (stmt
)),
1961 expand_omp_regimplify_p
, NULL
, NULL
)
1962 || walk_tree (gimple_cond_rhs_ptr (as_a
<gcond
*> (stmt
)),
1963 expand_omp_regimplify_p
, NULL
, NULL
))
1964 gimple_regimplify_operands (stmt
, &gsi
);
1965 e
= make_edge (bb
, body_bb
, EDGE_TRUE_VALUE
);
1966 e
->probability
= profile_probability::guessed_always ().apply_scale (7, 8);
1969 make_edge (bb
, body_bb
, EDGE_FALLTHRU
);
1976 /* Expand #pragma omp ordered depend(source). */
1979 expand_omp_ordered_source (gimple_stmt_iterator
*gsi
, struct omp_for_data
*fd
,
1980 tree
*counts
, location_t loc
)
1982 enum built_in_function source_ix
1983 = fd
->iter_type
== long_integer_type_node
1984 ? BUILT_IN_GOMP_DOACROSS_POST
: BUILT_IN_GOMP_DOACROSS_ULL_POST
;
1986 = gimple_build_call (builtin_decl_explicit (source_ix
), 1,
1987 build_fold_addr_expr (counts
[fd
->ordered
]));
1988 gimple_set_location (g
, loc
);
1989 gsi_insert_before (gsi
, g
, GSI_SAME_STMT
);
1992 /* Expand a single depend from #pragma omp ordered depend(sink:...). */
1995 expand_omp_ordered_sink (gimple_stmt_iterator
*gsi
, struct omp_for_data
*fd
,
1996 tree
*counts
, tree c
, location_t loc
)
1998 auto_vec
<tree
, 10> args
;
1999 enum built_in_function sink_ix
2000 = fd
->iter_type
== long_integer_type_node
2001 ? BUILT_IN_GOMP_DOACROSS_WAIT
: BUILT_IN_GOMP_DOACROSS_ULL_WAIT
;
2002 tree t
, off
, coff
= NULL_TREE
, deps
= OMP_CLAUSE_DECL (c
), cond
= NULL_TREE
;
2004 gimple_stmt_iterator gsi2
= *gsi
;
2005 bool warned_step
= false;
2007 for (i
= 0; i
< fd
->ordered
; i
++)
2009 tree step
= NULL_TREE
;
2010 off
= TREE_PURPOSE (deps
);
2011 if (TREE_CODE (off
) == TRUNC_DIV_EXPR
)
2013 step
= TREE_OPERAND (off
, 1);
2014 off
= TREE_OPERAND (off
, 0);
2016 if (!integer_zerop (off
))
2018 gcc_assert (fd
->loops
[i
].cond_code
== LT_EXPR
2019 || fd
->loops
[i
].cond_code
== GT_EXPR
);
2020 bool forward
= fd
->loops
[i
].cond_code
== LT_EXPR
;
2023 /* Non-simple Fortran DO loops. If step is variable,
2024 we don't know at compile even the direction, so can't
2026 if (TREE_CODE (step
) != INTEGER_CST
)
2028 forward
= tree_int_cst_sgn (step
) != -1;
2030 if (forward
^ OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps
))
2031 warning_at (loc
, 0, "%<depend(sink)%> clause waiting for "
2032 "lexically later iteration");
2035 deps
= TREE_CHAIN (deps
);
2037 /* If all offsets corresponding to the collapsed loops are zero,
2038 this depend clause can be ignored. FIXME: but there is still a
2039 flush needed. We need to emit one __sync_synchronize () for it
2040 though (perhaps conditionally)? Solve this together with the
2041 conservative dependence folding optimization.
2042 if (i >= fd->collapse)
2045 deps
= OMP_CLAUSE_DECL (c
);
2047 edge e1
= split_block (gsi_bb (gsi2
), gsi_stmt (gsi2
));
2048 edge e2
= split_block_after_labels (e1
->dest
);
2050 gsi2
= gsi_after_labels (e1
->dest
);
2051 *gsi
= gsi_last_bb (e1
->src
);
2052 for (i
= 0; i
< fd
->ordered
; i
++)
2054 tree itype
= TREE_TYPE (fd
->loops
[i
].v
);
2055 tree step
= NULL_TREE
;
2056 tree orig_off
= NULL_TREE
;
2057 if (POINTER_TYPE_P (itype
))
2060 deps
= TREE_CHAIN (deps
);
2061 off
= TREE_PURPOSE (deps
);
2062 if (TREE_CODE (off
) == TRUNC_DIV_EXPR
)
2064 step
= TREE_OPERAND (off
, 1);
2065 off
= TREE_OPERAND (off
, 0);
2066 gcc_assert (fd
->loops
[i
].cond_code
== LT_EXPR
2067 && integer_onep (fd
->loops
[i
].step
)
2068 && !POINTER_TYPE_P (TREE_TYPE (fd
->loops
[i
].v
)));
2070 tree s
= fold_convert_loc (loc
, itype
, step
? step
: fd
->loops
[i
].step
);
2073 off
= fold_convert_loc (loc
, itype
, off
);
2075 off
= fold_build2_loc (loc
, TRUNC_DIV_EXPR
, itype
, off
, s
);
2078 if (integer_zerop (off
))
2079 t
= boolean_true_node
;
2083 tree co
= fold_convert_loc (loc
, itype
, off
);
2084 if (POINTER_TYPE_P (TREE_TYPE (fd
->loops
[i
].v
)))
2086 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps
))
2087 co
= fold_build1_loc (loc
, NEGATE_EXPR
, itype
, co
);
2088 a
= fold_build2_loc (loc
, POINTER_PLUS_EXPR
,
2089 TREE_TYPE (fd
->loops
[i
].v
), fd
->loops
[i
].v
,
2092 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps
))
2093 a
= fold_build2_loc (loc
, MINUS_EXPR
, TREE_TYPE (fd
->loops
[i
].v
),
2094 fd
->loops
[i
].v
, co
);
2096 a
= fold_build2_loc (loc
, PLUS_EXPR
, TREE_TYPE (fd
->loops
[i
].v
),
2097 fd
->loops
[i
].v
, co
);
2101 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps
))
2102 t1
= fold_build2_loc (loc
, GE_EXPR
, boolean_type_node
, a
,
2105 t1
= fold_build2_loc (loc
, LT_EXPR
, boolean_type_node
, a
,
2107 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps
))
2108 t2
= fold_build2_loc (loc
, LT_EXPR
, boolean_type_node
, a
,
2111 t2
= fold_build2_loc (loc
, GE_EXPR
, boolean_type_node
, a
,
2113 t
= fold_build2_loc (loc
, LT_EXPR
, boolean_type_node
,
2114 step
, build_int_cst (TREE_TYPE (step
), 0));
2115 if (TREE_CODE (step
) != INTEGER_CST
)
2117 t1
= unshare_expr (t1
);
2118 t1
= force_gimple_operand_gsi (gsi
, t1
, true, NULL_TREE
,
2119 false, GSI_CONTINUE_LINKING
);
2120 t2
= unshare_expr (t2
);
2121 t2
= force_gimple_operand_gsi (gsi
, t2
, true, NULL_TREE
,
2122 false, GSI_CONTINUE_LINKING
);
2124 t
= fold_build3_loc (loc
, COND_EXPR
, boolean_type_node
,
2127 else if (fd
->loops
[i
].cond_code
== LT_EXPR
)
2129 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps
))
2130 t
= fold_build2_loc (loc
, GE_EXPR
, boolean_type_node
, a
,
2133 t
= fold_build2_loc (loc
, LT_EXPR
, boolean_type_node
, a
,
2136 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps
))
2137 t
= fold_build2_loc (loc
, GT_EXPR
, boolean_type_node
, a
,
2140 t
= fold_build2_loc (loc
, LE_EXPR
, boolean_type_node
, a
,
2144 cond
= fold_build2_loc (loc
, BIT_AND_EXPR
, boolean_type_node
, cond
, t
);
2148 off
= fold_convert_loc (loc
, itype
, off
);
2151 || (fd
->loops
[i
].cond_code
== LT_EXPR
2152 ? !integer_onep (fd
->loops
[i
].step
)
2153 : !integer_minus_onep (fd
->loops
[i
].step
)))
2155 if (step
== NULL_TREE
2156 && TYPE_UNSIGNED (itype
)
2157 && fd
->loops
[i
].cond_code
== GT_EXPR
)
2158 t
= fold_build2_loc (loc
, TRUNC_MOD_EXPR
, itype
, off
,
2159 fold_build1_loc (loc
, NEGATE_EXPR
, itype
,
2162 t
= fold_build2_loc (loc
, TRUNC_MOD_EXPR
, itype
,
2163 orig_off
? orig_off
: off
, s
);
2164 t
= fold_build2_loc (loc
, EQ_EXPR
, boolean_type_node
, t
,
2165 build_int_cst (itype
, 0));
2166 if (integer_zerop (t
) && !warned_step
)
2168 warning_at (loc
, 0, "%<depend(sink)%> refers to iteration never "
2169 "in the iteration space");
2172 cond
= fold_build2_loc (loc
, BIT_AND_EXPR
, boolean_type_node
,
2176 if (i
<= fd
->collapse
- 1 && fd
->collapse
> 1)
2182 t
= fold_build2_loc (loc
, MINUS_EXPR
, TREE_TYPE (fd
->loops
[i
].v
),
2183 fd
->loops
[i
].v
, fd
->loops
[i
].n1
);
2184 t
= fold_convert_loc (loc
, fd
->iter_type
, t
);
2187 /* We have divided off by step already earlier. */;
2188 else if (TYPE_UNSIGNED (itype
) && fd
->loops
[i
].cond_code
== GT_EXPR
)
2189 off
= fold_build2_loc (loc
, TRUNC_DIV_EXPR
, itype
, off
,
2190 fold_build1_loc (loc
, NEGATE_EXPR
, itype
,
2193 off
= fold_build2_loc (loc
, TRUNC_DIV_EXPR
, itype
, off
, s
);
2194 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps
))
2195 off
= fold_build1_loc (loc
, NEGATE_EXPR
, itype
, off
);
2196 off
= fold_convert_loc (loc
, fd
->iter_type
, off
);
2197 if (i
<= fd
->collapse
- 1 && fd
->collapse
> 1)
2200 off
= fold_build2_loc (loc
, PLUS_EXPR
, fd
->iter_type
, coff
,
2202 if (i
< fd
->collapse
- 1)
2204 coff
= fold_build2_loc (loc
, MULT_EXPR
, fd
->iter_type
, off
,
2209 off
= unshare_expr (off
);
2210 t
= fold_build2_loc (loc
, PLUS_EXPR
, fd
->iter_type
, t
, off
);
2211 t
= force_gimple_operand_gsi (&gsi2
, t
, true, NULL_TREE
,
2212 true, GSI_SAME_STMT
);
2215 gimple
*g
= gimple_build_call_vec (builtin_decl_explicit (sink_ix
), args
);
2216 gimple_set_location (g
, loc
);
2217 gsi_insert_before (&gsi2
, g
, GSI_SAME_STMT
);
2219 cond
= unshare_expr (cond
);
2220 cond
= force_gimple_operand_gsi (gsi
, cond
, true, NULL_TREE
, false,
2221 GSI_CONTINUE_LINKING
);
2222 gsi_insert_after (gsi
, gimple_build_cond_empty (cond
), GSI_NEW_STMT
);
2223 edge e3
= make_edge (e1
->src
, e2
->dest
, EDGE_FALSE_VALUE
);
2224 e3
->probability
= profile_probability::guessed_always ().apply_scale (1, 8);
2225 e1
->probability
= e3
->probability
.invert ();
2226 e1
->flags
= EDGE_TRUE_VALUE
;
2227 set_immediate_dominator (CDI_DOMINATORS
, e2
->dest
, e1
->src
);
2229 *gsi
= gsi_after_labels (e2
->dest
);
2232 /* Expand all #pragma omp ordered depend(source) and
2233 #pragma omp ordered depend(sink:...) constructs in the current
2234 #pragma omp for ordered(n) region. */
2237 expand_omp_ordered_source_sink (struct omp_region
*region
,
2238 struct omp_for_data
*fd
, tree
*counts
,
2239 basic_block cont_bb
)
2241 struct omp_region
*inner
;
2243 for (i
= fd
->collapse
- 1; i
< fd
->ordered
; i
++)
2244 if (i
== fd
->collapse
- 1 && fd
->collapse
> 1)
2245 counts
[i
] = NULL_TREE
;
2246 else if (i
>= fd
->collapse
&& !cont_bb
)
2247 counts
[i
] = build_zero_cst (fd
->iter_type
);
2248 else if (!POINTER_TYPE_P (TREE_TYPE (fd
->loops
[i
].v
))
2249 && integer_onep (fd
->loops
[i
].step
))
2250 counts
[i
] = NULL_TREE
;
2252 counts
[i
] = create_tmp_var (fd
->iter_type
, ".orditer");
2254 = build_array_type_nelts (fd
->iter_type
, fd
->ordered
- fd
->collapse
+ 1);
2255 counts
[fd
->ordered
] = create_tmp_var (atype
, ".orditera");
2256 TREE_ADDRESSABLE (counts
[fd
->ordered
]) = 1;
2258 for (inner
= region
->inner
; inner
; inner
= inner
->next
)
2259 if (inner
->type
== GIMPLE_OMP_ORDERED
)
2261 gomp_ordered
*ord_stmt
= inner
->ord_stmt
;
2262 gimple_stmt_iterator gsi
= gsi_for_stmt (ord_stmt
);
2263 location_t loc
= gimple_location (ord_stmt
);
2265 for (c
= gimple_omp_ordered_clauses (ord_stmt
);
2266 c
; c
= OMP_CLAUSE_CHAIN (c
))
2267 if (OMP_CLAUSE_DEPEND_KIND (c
) == OMP_CLAUSE_DEPEND_SOURCE
)
2270 expand_omp_ordered_source (&gsi
, fd
, counts
, loc
);
2271 for (c
= gimple_omp_ordered_clauses (ord_stmt
);
2272 c
; c
= OMP_CLAUSE_CHAIN (c
))
2273 if (OMP_CLAUSE_DEPEND_KIND (c
) == OMP_CLAUSE_DEPEND_SINK
)
2274 expand_omp_ordered_sink (&gsi
, fd
, counts
, c
, loc
);
2275 gsi_remove (&gsi
, true);
2279 /* Wrap the body into fd->ordered - fd->collapse loops that aren't
2283 expand_omp_for_ordered_loops (struct omp_for_data
*fd
, tree
*counts
,
2284 basic_block cont_bb
, basic_block body_bb
,
2285 bool ordered_lastprivate
)
2287 if (fd
->ordered
== fd
->collapse
)
2292 gimple_stmt_iterator gsi
= gsi_after_labels (body_bb
);
2293 for (int i
= fd
->collapse
; i
< fd
->ordered
; i
++)
2295 tree type
= TREE_TYPE (fd
->loops
[i
].v
);
2296 tree n1
= fold_convert (type
, fd
->loops
[i
].n1
);
2297 expand_omp_build_assign (&gsi
, fd
->loops
[i
].v
, n1
);
2298 tree aref
= build4 (ARRAY_REF
, fd
->iter_type
, counts
[fd
->ordered
],
2299 size_int (i
- fd
->collapse
+ 1),
2300 NULL_TREE
, NULL_TREE
);
2301 expand_omp_build_assign (&gsi
, aref
, build_zero_cst (fd
->iter_type
));
2306 for (int i
= fd
->ordered
- 1; i
>= fd
->collapse
; i
--)
2308 tree t
, type
= TREE_TYPE (fd
->loops
[i
].v
);
2309 gimple_stmt_iterator gsi
= gsi_after_labels (body_bb
);
2310 expand_omp_build_assign (&gsi
, fd
->loops
[i
].v
,
2311 fold_convert (type
, fd
->loops
[i
].n1
));
2313 expand_omp_build_assign (&gsi
, counts
[i
],
2314 build_zero_cst (fd
->iter_type
));
2315 tree aref
= build4 (ARRAY_REF
, fd
->iter_type
, counts
[fd
->ordered
],
2316 size_int (i
- fd
->collapse
+ 1),
2317 NULL_TREE
, NULL_TREE
);
2318 expand_omp_build_assign (&gsi
, aref
, build_zero_cst (fd
->iter_type
));
2319 if (!gsi_end_p (gsi
))
2322 gsi
= gsi_last_bb (body_bb
);
2323 edge e1
= split_block (body_bb
, gsi_stmt (gsi
));
2324 basic_block new_body
= e1
->dest
;
2325 if (body_bb
== cont_bb
)
2328 basic_block new_header
;
2329 if (EDGE_COUNT (cont_bb
->preds
) > 0)
2331 gsi
= gsi_last_bb (cont_bb
);
2332 if (POINTER_TYPE_P (type
))
2333 t
= fold_build_pointer_plus (fd
->loops
[i
].v
,
2334 fold_convert (sizetype
,
2335 fd
->loops
[i
].step
));
2337 t
= fold_build2 (PLUS_EXPR
, type
, fd
->loops
[i
].v
,
2338 fold_convert (type
, fd
->loops
[i
].step
));
2339 expand_omp_build_assign (&gsi
, fd
->loops
[i
].v
, t
);
2342 t
= fold_build2 (PLUS_EXPR
, fd
->iter_type
, counts
[i
],
2343 build_int_cst (fd
->iter_type
, 1));
2344 expand_omp_build_assign (&gsi
, counts
[i
], t
);
2349 t
= fold_build2 (MINUS_EXPR
, TREE_TYPE (fd
->loops
[i
].v
),
2350 fd
->loops
[i
].v
, fd
->loops
[i
].n1
);
2351 t
= fold_convert (fd
->iter_type
, t
);
2352 t
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
2353 true, GSI_SAME_STMT
);
2355 aref
= build4 (ARRAY_REF
, fd
->iter_type
, counts
[fd
->ordered
],
2356 size_int (i
- fd
->collapse
+ 1),
2357 NULL_TREE
, NULL_TREE
);
2358 expand_omp_build_assign (&gsi
, aref
, t
);
2360 e2
= split_block (cont_bb
, gsi_stmt (gsi
));
2361 new_header
= e2
->dest
;
2364 new_header
= cont_bb
;
2365 gsi
= gsi_after_labels (new_header
);
2366 tree v
= force_gimple_operand_gsi (&gsi
, fd
->loops
[i
].v
, true, NULL_TREE
,
2367 true, GSI_SAME_STMT
);
2369 = force_gimple_operand_gsi (&gsi
, fold_convert (type
, fd
->loops
[i
].n2
),
2370 true, NULL_TREE
, true, GSI_SAME_STMT
);
2371 t
= build2 (fd
->loops
[i
].cond_code
, boolean_type_node
, v
, n2
);
2372 gsi_insert_before (&gsi
, gimple_build_cond_empty (t
), GSI_NEW_STMT
);
2373 edge e3
= split_block (new_header
, gsi_stmt (gsi
));
2376 make_edge (body_bb
, new_header
, EDGE_FALLTHRU
);
2377 e3
->flags
= EDGE_FALSE_VALUE
;
2378 e3
->probability
= profile_probability::guessed_always ().apply_scale (1, 8);
2379 e1
= make_edge (new_header
, new_body
, EDGE_TRUE_VALUE
);
2380 e1
->probability
= e3
->probability
.invert ();
2382 set_immediate_dominator (CDI_DOMINATORS
, new_header
, body_bb
);
2383 set_immediate_dominator (CDI_DOMINATORS
, new_body
, new_header
);
2387 struct loop
*loop
= alloc_loop ();
2388 loop
->header
= new_header
;
2389 loop
->latch
= e2
->src
;
2390 add_loop (loop
, body_bb
->loop_father
);
2394 /* If there are any lastprivate clauses and it is possible some loops
2395 might have zero iterations, ensure all the decls are initialized,
2396 otherwise we could crash evaluating C++ class iterators with lastprivate
2398 bool need_inits
= false;
2399 for (int i
= fd
->collapse
; ordered_lastprivate
&& i
< fd
->ordered
; i
++)
2402 tree type
= TREE_TYPE (fd
->loops
[i
].v
);
2403 gimple_stmt_iterator gsi
= gsi_after_labels (body_bb
);
2404 expand_omp_build_assign (&gsi
, fd
->loops
[i
].v
,
2405 fold_convert (type
, fd
->loops
[i
].n1
));
2409 tree type
= TREE_TYPE (fd
->loops
[i
].v
);
2410 tree this_cond
= fold_build2 (fd
->loops
[i
].cond_code
,
2412 fold_convert (type
, fd
->loops
[i
].n1
),
2413 fold_convert (type
, fd
->loops
[i
].n2
));
2414 if (!integer_onep (this_cond
))
2421 /* A subroutine of expand_omp_for. Generate code for a parallel
2422 loop with any schedule. Given parameters:
2424 for (V = N1; V cond N2; V += STEP) BODY;
2426 where COND is "<" or ">", we generate pseudocode
2428 more = GOMP_loop_foo_start (N1, N2, STEP, CHUNK, &istart0, &iend0);
2429 if (more) goto L0; else goto L3;
2436 if (V cond iend) goto L1; else goto L2;
2438 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
2441 If this is a combined omp parallel loop, instead of the call to
2442 GOMP_loop_foo_start, we call GOMP_loop_foo_next.
2443 If this is gimple_omp_for_combined_p loop, then instead of assigning
2444 V and iend in L0 we assign the first two _looptemp_ clause decls of the
2445 inner GIMPLE_OMP_FOR and V += STEP; and
2446 if (V cond iend) goto L1; else goto L2; are removed.
2448 For collapsed loops, given parameters:
2450 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
2451 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
2452 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
2455 we generate pseudocode
2457 if (__builtin_expect (N32 cond3 N31, 0)) goto Z0;
2462 count3 = (adj + N32 - N31) / STEP3;
2463 if (__builtin_expect (N22 cond2 N21, 0)) goto Z0;
2468 count2 = (adj + N22 - N21) / STEP2;
2469 if (__builtin_expect (N12 cond1 N11, 0)) goto Z0;
2474 count1 = (adj + N12 - N11) / STEP1;
2475 count = count1 * count2 * count3;
2480 more = GOMP_loop_foo_start (0, count, 1, CHUNK, &istart0, &iend0);
2481 if (more) goto L0; else goto L3;
2485 V3 = N31 + (T % count3) * STEP3;
2487 V2 = N21 + (T % count2) * STEP2;
2489 V1 = N11 + T * STEP1;
2494 if (V < iend) goto L10; else goto L2;
2497 if (V3 cond3 N32) goto L1; else goto L11;
2501 if (V2 cond2 N22) goto L1; else goto L12;
2507 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
2513 expand_omp_for_generic (struct omp_region
*region
,
2514 struct omp_for_data
*fd
,
2515 enum built_in_function start_fn
,
2516 enum built_in_function next_fn
,
2519 tree type
, istart0
, iend0
, iend
;
2520 tree t
, vmain
, vback
, bias
= NULL_TREE
;
2521 basic_block entry_bb
, cont_bb
, exit_bb
, l0_bb
, l1_bb
, collapse_bb
;
2522 basic_block l2_bb
= NULL
, l3_bb
= NULL
;
2523 gimple_stmt_iterator gsi
;
2524 gassign
*assign_stmt
;
2525 bool in_combined_parallel
= is_combined_parallel (region
);
2526 bool broken_loop
= region
->cont
== NULL
;
2528 tree
*counts
= NULL
;
2530 bool ordered_lastprivate
= false;
2532 gcc_assert (!broken_loop
|| !in_combined_parallel
);
2533 gcc_assert (fd
->iter_type
== long_integer_type_node
2534 || !in_combined_parallel
);
2536 entry_bb
= region
->entry
;
2537 cont_bb
= region
->cont
;
2539 gcc_assert (EDGE_COUNT (entry_bb
->succs
) == 2);
2540 gcc_assert (broken_loop
2541 || BRANCH_EDGE (entry_bb
)->dest
== FALLTHRU_EDGE (cont_bb
)->dest
);
2542 l0_bb
= split_edge (FALLTHRU_EDGE (entry_bb
));
2543 l1_bb
= single_succ (l0_bb
);
2546 l2_bb
= create_empty_bb (cont_bb
);
2547 gcc_assert (BRANCH_EDGE (cont_bb
)->dest
== l1_bb
2548 || (single_succ_edge (BRANCH_EDGE (cont_bb
)->dest
)->dest
2550 gcc_assert (EDGE_COUNT (cont_bb
->succs
) == 2);
2554 l3_bb
= BRANCH_EDGE (entry_bb
)->dest
;
2555 exit_bb
= region
->exit
;
2557 gsi
= gsi_last_nondebug_bb (entry_bb
);
2559 gcc_assert (gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_FOR
);
2561 && omp_find_clause (gimple_omp_for_clauses (gsi_stmt (gsi
)),
2562 OMP_CLAUSE_LASTPRIVATE
))
2563 ordered_lastprivate
= false;
2564 if (fd
->collapse
> 1 || fd
->ordered
)
2566 int first_zero_iter1
= -1, first_zero_iter2
= -1;
2567 basic_block zero_iter1_bb
= NULL
, zero_iter2_bb
= NULL
, l2_dom_bb
= NULL
;
2569 counts
= XALLOCAVEC (tree
, fd
->ordered
? fd
->ordered
+ 1 : fd
->collapse
);
2570 expand_omp_for_init_counts (fd
, &gsi
, entry_bb
, counts
,
2571 zero_iter1_bb
, first_zero_iter1
,
2572 zero_iter2_bb
, first_zero_iter2
, l2_dom_bb
);
2576 /* Some counts[i] vars might be uninitialized if
2577 some loop has zero iterations. But the body shouldn't
2578 be executed in that case, so just avoid uninit warnings. */
2579 for (i
= first_zero_iter1
;
2580 i
< (fd
->ordered
? fd
->ordered
: fd
->collapse
); i
++)
2581 if (SSA_VAR_P (counts
[i
]))
2582 TREE_NO_WARNING (counts
[i
]) = 1;
2584 e
= split_block (entry_bb
, gsi_stmt (gsi
));
2586 make_edge (zero_iter1_bb
, entry_bb
, EDGE_FALLTHRU
);
2587 gsi
= gsi_last_nondebug_bb (entry_bb
);
2588 set_immediate_dominator (CDI_DOMINATORS
, entry_bb
,
2589 get_immediate_dominator (CDI_DOMINATORS
,
2594 /* Some counts[i] vars might be uninitialized if
2595 some loop has zero iterations. But the body shouldn't
2596 be executed in that case, so just avoid uninit warnings. */
2597 for (i
= first_zero_iter2
; i
< fd
->ordered
; i
++)
2598 if (SSA_VAR_P (counts
[i
]))
2599 TREE_NO_WARNING (counts
[i
]) = 1;
2601 make_edge (zero_iter2_bb
, entry_bb
, EDGE_FALLTHRU
);
2605 e
= split_block (entry_bb
, gsi_stmt (gsi
));
2607 make_edge (zero_iter2_bb
, entry_bb
, EDGE_FALLTHRU
);
2608 gsi
= gsi_last_nondebug_bb (entry_bb
);
2609 set_immediate_dominator (CDI_DOMINATORS
, entry_bb
,
2610 get_immediate_dominator
2611 (CDI_DOMINATORS
, zero_iter2_bb
));
2614 if (fd
->collapse
== 1)
2616 counts
[0] = fd
->loop
.n2
;
2617 fd
->loop
= fd
->loops
[0];
2621 type
= TREE_TYPE (fd
->loop
.v
);
2622 istart0
= create_tmp_var (fd
->iter_type
, ".istart0");
2623 iend0
= create_tmp_var (fd
->iter_type
, ".iend0");
2624 TREE_ADDRESSABLE (istart0
) = 1;
2625 TREE_ADDRESSABLE (iend0
) = 1;
2627 /* See if we need to bias by LLONG_MIN. */
2628 if (fd
->iter_type
== long_long_unsigned_type_node
2629 && TREE_CODE (type
) == INTEGER_TYPE
2630 && !TYPE_UNSIGNED (type
)
2631 && fd
->ordered
== 0)
2635 if (fd
->loop
.cond_code
== LT_EXPR
)
2638 n2
= fold_build2 (PLUS_EXPR
, type
, fd
->loop
.n2
, fd
->loop
.step
);
2642 n1
= fold_build2 (MINUS_EXPR
, type
, fd
->loop
.n2
, fd
->loop
.step
);
2645 if (TREE_CODE (n1
) != INTEGER_CST
2646 || TREE_CODE (n2
) != INTEGER_CST
2647 || ((tree_int_cst_sgn (n1
) < 0) ^ (tree_int_cst_sgn (n2
) < 0)))
2648 bias
= fold_convert (fd
->iter_type
, TYPE_MIN_VALUE (type
));
2651 gimple_stmt_iterator gsif
= gsi
;
2654 tree arr
= NULL_TREE
;
2655 if (in_combined_parallel
)
2657 gcc_assert (fd
->ordered
== 0);
2658 /* In a combined parallel loop, emit a call to
2659 GOMP_loop_foo_next. */
2660 t
= build_call_expr (builtin_decl_explicit (next_fn
), 2,
2661 build_fold_addr_expr (istart0
),
2662 build_fold_addr_expr (iend0
));
2666 tree t0
, t1
, t2
, t3
, t4
;
2667 /* If this is not a combined parallel loop, emit a call to
2668 GOMP_loop_foo_start in ENTRY_BB. */
2669 t4
= build_fold_addr_expr (iend0
);
2670 t3
= build_fold_addr_expr (istart0
);
2673 t0
= build_int_cst (unsigned_type_node
,
2674 fd
->ordered
- fd
->collapse
+ 1);
2675 arr
= create_tmp_var (build_array_type_nelts (fd
->iter_type
,
2677 - fd
->collapse
+ 1),
2679 DECL_NAMELESS (arr
) = 1;
2680 TREE_ADDRESSABLE (arr
) = 1;
2681 TREE_STATIC (arr
) = 1;
2682 vec
<constructor_elt
, va_gc
> *v
;
2683 vec_alloc (v
, fd
->ordered
- fd
->collapse
+ 1);
2686 for (idx
= 0; idx
< fd
->ordered
- fd
->collapse
+ 1; idx
++)
2689 if (idx
== 0 && fd
->collapse
> 1)
2692 c
= counts
[idx
+ fd
->collapse
- 1];
2693 tree purpose
= size_int (idx
);
2694 CONSTRUCTOR_APPEND_ELT (v
, purpose
, c
);
2695 if (TREE_CODE (c
) != INTEGER_CST
)
2696 TREE_STATIC (arr
) = 0;
2699 DECL_INITIAL (arr
) = build_constructor (TREE_TYPE (arr
), v
);
2700 if (!TREE_STATIC (arr
))
2701 force_gimple_operand_gsi (&gsi
, build1 (DECL_EXPR
,
2702 void_type_node
, arr
),
2703 true, NULL_TREE
, true, GSI_SAME_STMT
);
2704 t1
= build_fold_addr_expr (arr
);
2709 t2
= fold_convert (fd
->iter_type
, fd
->loop
.step
);
2712 if (gimple_omp_for_combined_into_p (fd
->for_stmt
))
2715 = omp_find_clause (gimple_omp_for_clauses (fd
->for_stmt
),
2716 OMP_CLAUSE__LOOPTEMP_
);
2717 gcc_assert (innerc
);
2718 t0
= OMP_CLAUSE_DECL (innerc
);
2719 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
2720 OMP_CLAUSE__LOOPTEMP_
);
2721 gcc_assert (innerc
);
2722 t1
= OMP_CLAUSE_DECL (innerc
);
2724 if (POINTER_TYPE_P (TREE_TYPE (t0
))
2725 && TYPE_PRECISION (TREE_TYPE (t0
))
2726 != TYPE_PRECISION (fd
->iter_type
))
2728 /* Avoid casting pointers to integer of a different size. */
2729 tree itype
= signed_type_for (type
);
2730 t1
= fold_convert (fd
->iter_type
, fold_convert (itype
, t1
));
2731 t0
= fold_convert (fd
->iter_type
, fold_convert (itype
, t0
));
2735 t1
= fold_convert (fd
->iter_type
, t1
);
2736 t0
= fold_convert (fd
->iter_type
, t0
);
2740 t1
= fold_build2 (PLUS_EXPR
, fd
->iter_type
, t1
, bias
);
2741 t0
= fold_build2 (PLUS_EXPR
, fd
->iter_type
, t0
, bias
);
2744 if (fd
->iter_type
== long_integer_type_node
|| fd
->ordered
)
2748 t
= fold_convert (fd
->iter_type
, fd
->chunk_size
);
2749 t
= omp_adjust_chunk_size (t
, fd
->simd_schedule
);
2751 t
= build_call_expr (builtin_decl_explicit (start_fn
),
2752 5, t0
, t1
, t
, t3
, t4
);
2754 t
= build_call_expr (builtin_decl_explicit (start_fn
),
2755 6, t0
, t1
, t2
, t
, t3
, t4
);
2757 else if (fd
->ordered
)
2758 t
= build_call_expr (builtin_decl_explicit (start_fn
),
2761 t
= build_call_expr (builtin_decl_explicit (start_fn
),
2762 5, t0
, t1
, t2
, t3
, t4
);
2770 /* The GOMP_loop_ull_*start functions have additional boolean
2771 argument, true for < loops and false for > loops.
2772 In Fortran, the C bool type can be different from
2773 boolean_type_node. */
2774 bfn_decl
= builtin_decl_explicit (start_fn
);
2775 c_bool_type
= TREE_TYPE (TREE_TYPE (bfn_decl
));
2776 t5
= build_int_cst (c_bool_type
,
2777 fd
->loop
.cond_code
== LT_EXPR
? 1 : 0);
2780 tree bfn_decl
= builtin_decl_explicit (start_fn
);
2781 t
= fold_convert (fd
->iter_type
, fd
->chunk_size
);
2782 t
= omp_adjust_chunk_size (t
, fd
->simd_schedule
);
2783 t
= build_call_expr (bfn_decl
, 7, t5
, t0
, t1
, t2
, t
, t3
, t4
);
2786 t
= build_call_expr (builtin_decl_explicit (start_fn
),
2787 6, t5
, t0
, t1
, t2
, t3
, t4
);
2790 if (TREE_TYPE (t
) != boolean_type_node
)
2791 t
= fold_build2 (NE_EXPR
, boolean_type_node
,
2792 t
, build_int_cst (TREE_TYPE (t
), 0));
2793 t
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
2794 true, GSI_SAME_STMT
);
2795 if (arr
&& !TREE_STATIC (arr
))
2797 tree clobber
= build_constructor (TREE_TYPE (arr
), NULL
);
2798 TREE_THIS_VOLATILE (clobber
) = 1;
2799 gsi_insert_before (&gsi
, gimple_build_assign (arr
, clobber
),
2802 gsi_insert_after (&gsi
, gimple_build_cond_empty (t
), GSI_SAME_STMT
);
2804 /* Remove the GIMPLE_OMP_FOR statement. */
2805 gsi_remove (&gsi
, true);
2807 if (gsi_end_p (gsif
))
2808 gsif
= gsi_after_labels (gsi_bb (gsif
));
2811 /* Iteration setup for sequential loop goes in L0_BB. */
2812 tree startvar
= fd
->loop
.v
;
2813 tree endvar
= NULL_TREE
;
2815 if (gimple_omp_for_combined_p (fd
->for_stmt
))
2817 gcc_assert (gimple_code (inner_stmt
) == GIMPLE_OMP_FOR
2818 && gimple_omp_for_kind (inner_stmt
)
2819 == GF_OMP_FOR_KIND_SIMD
);
2820 tree innerc
= omp_find_clause (gimple_omp_for_clauses (inner_stmt
),
2821 OMP_CLAUSE__LOOPTEMP_
);
2822 gcc_assert (innerc
);
2823 startvar
= OMP_CLAUSE_DECL (innerc
);
2824 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
2825 OMP_CLAUSE__LOOPTEMP_
);
2826 gcc_assert (innerc
);
2827 endvar
= OMP_CLAUSE_DECL (innerc
);
2830 gsi
= gsi_start_bb (l0_bb
);
2832 if (fd
->ordered
&& fd
->collapse
== 1)
2833 t
= fold_build2 (MULT_EXPR
, fd
->iter_type
, t
,
2834 fold_convert (fd
->iter_type
, fd
->loop
.step
));
2836 t
= fold_build2 (MINUS_EXPR
, fd
->iter_type
, t
, bias
);
2837 if (fd
->ordered
&& fd
->collapse
== 1)
2839 if (POINTER_TYPE_P (TREE_TYPE (startvar
)))
2840 t
= fold_build2 (POINTER_PLUS_EXPR
, TREE_TYPE (startvar
),
2841 fd
->loop
.n1
, fold_convert (sizetype
, t
));
2844 t
= fold_convert (TREE_TYPE (startvar
), t
);
2845 t
= fold_build2 (PLUS_EXPR
, TREE_TYPE (startvar
),
2851 if (POINTER_TYPE_P (TREE_TYPE (startvar
)))
2852 t
= fold_convert (signed_type_for (TREE_TYPE (startvar
)), t
);
2853 t
= fold_convert (TREE_TYPE (startvar
), t
);
2855 t
= force_gimple_operand_gsi (&gsi
, t
,
2857 && TREE_ADDRESSABLE (startvar
),
2858 NULL_TREE
, false, GSI_CONTINUE_LINKING
);
2859 assign_stmt
= gimple_build_assign (startvar
, t
);
2860 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
2863 if (fd
->ordered
&& fd
->collapse
== 1)
2864 t
= fold_build2 (MULT_EXPR
, fd
->iter_type
, t
,
2865 fold_convert (fd
->iter_type
, fd
->loop
.step
));
2867 t
= fold_build2 (MINUS_EXPR
, fd
->iter_type
, t
, bias
);
2868 if (fd
->ordered
&& fd
->collapse
== 1)
2870 if (POINTER_TYPE_P (TREE_TYPE (startvar
)))
2871 t
= fold_build2 (POINTER_PLUS_EXPR
, TREE_TYPE (startvar
),
2872 fd
->loop
.n1
, fold_convert (sizetype
, t
));
2875 t
= fold_convert (TREE_TYPE (startvar
), t
);
2876 t
= fold_build2 (PLUS_EXPR
, TREE_TYPE (startvar
),
2882 if (POINTER_TYPE_P (TREE_TYPE (startvar
)))
2883 t
= fold_convert (signed_type_for (TREE_TYPE (startvar
)), t
);
2884 t
= fold_convert (TREE_TYPE (startvar
), t
);
2886 iend
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
2887 false, GSI_CONTINUE_LINKING
);
2890 assign_stmt
= gimple_build_assign (endvar
, iend
);
2891 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
2892 if (useless_type_conversion_p (TREE_TYPE (fd
->loop
.v
), TREE_TYPE (iend
)))
2893 assign_stmt
= gimple_build_assign (fd
->loop
.v
, iend
);
2895 assign_stmt
= gimple_build_assign (fd
->loop
.v
, NOP_EXPR
, iend
);
2896 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
2898 /* Handle linear clause adjustments. */
2899 tree itercnt
= NULL_TREE
;
2900 if (gimple_omp_for_kind (fd
->for_stmt
) == GF_OMP_FOR_KIND_FOR
)
2901 for (tree c
= gimple_omp_for_clauses (fd
->for_stmt
);
2902 c
; c
= OMP_CLAUSE_CHAIN (c
))
2903 if (OMP_CLAUSE_CODE (c
) == OMP_CLAUSE_LINEAR
2904 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c
))
2906 tree d
= OMP_CLAUSE_DECL (c
);
2907 bool is_ref
= omp_is_reference (d
);
2908 tree t
= d
, a
, dest
;
2910 t
= build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c
), t
);
2911 tree type
= TREE_TYPE (t
);
2912 if (POINTER_TYPE_P (type
))
2914 dest
= unshare_expr (t
);
2915 tree v
= create_tmp_var (TREE_TYPE (t
), NULL
);
2916 expand_omp_build_assign (&gsif
, v
, t
);
2917 if (itercnt
== NULL_TREE
)
2920 tree n1
= fd
->loop
.n1
;
2921 if (POINTER_TYPE_P (TREE_TYPE (itercnt
)))
2924 = fold_convert (signed_type_for (TREE_TYPE (itercnt
)),
2926 n1
= fold_convert (TREE_TYPE (itercnt
), n1
);
2928 itercnt
= fold_build2 (MINUS_EXPR
, TREE_TYPE (itercnt
),
2930 itercnt
= fold_build2 (EXACT_DIV_EXPR
, TREE_TYPE (itercnt
),
2931 itercnt
, fd
->loop
.step
);
2932 itercnt
= force_gimple_operand_gsi (&gsi
, itercnt
, true,
2934 GSI_CONTINUE_LINKING
);
2936 a
= fold_build2 (MULT_EXPR
, type
,
2937 fold_convert (type
, itercnt
),
2938 fold_convert (type
, OMP_CLAUSE_LINEAR_STEP (c
)));
2939 t
= fold_build2 (type
== TREE_TYPE (t
) ? PLUS_EXPR
2940 : POINTER_PLUS_EXPR
, TREE_TYPE (t
), v
, a
);
2941 t
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
2942 false, GSI_CONTINUE_LINKING
);
2943 assign_stmt
= gimple_build_assign (dest
, t
);
2944 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
2946 if (fd
->collapse
> 1)
2947 expand_omp_for_init_vars (fd
, &gsi
, counts
, inner_stmt
, startvar
);
2951 /* Until now, counts array contained number of iterations or
2952 variable containing it for ith loop. From now on, we need
2953 those counts only for collapsed loops, and only for the 2nd
2954 till the last collapsed one. Move those one element earlier,
2955 we'll use counts[fd->collapse - 1] for the first source/sink
2956 iteration counter and so on and counts[fd->ordered]
2957 as the array holding the current counter values for
2959 if (fd
->collapse
> 1)
2960 memmove (counts
, counts
+ 1, (fd
->collapse
- 1) * sizeof (counts
[0]));
2964 for (i
= fd
->collapse
; i
< fd
->ordered
; i
++)
2966 tree type
= TREE_TYPE (fd
->loops
[i
].v
);
2968 = fold_build2 (fd
->loops
[i
].cond_code
, boolean_type_node
,
2969 fold_convert (type
, fd
->loops
[i
].n1
),
2970 fold_convert (type
, fd
->loops
[i
].n2
));
2971 if (!integer_onep (this_cond
))
2974 if (i
< fd
->ordered
)
2977 = create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun
)->prev_bb
);
2978 add_bb_to_loop (cont_bb
, l1_bb
->loop_father
);
2979 gimple_stmt_iterator gsi
= gsi_after_labels (cont_bb
);
2980 gimple
*g
= gimple_build_omp_continue (fd
->loop
.v
, fd
->loop
.v
);
2981 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
2982 make_edge (cont_bb
, l3_bb
, EDGE_FALLTHRU
);
2983 make_edge (cont_bb
, l1_bb
, 0);
2984 l2_bb
= create_empty_bb (cont_bb
);
2985 broken_loop
= false;
2988 expand_omp_ordered_source_sink (region
, fd
, counts
, cont_bb
);
2989 cont_bb
= expand_omp_for_ordered_loops (fd
, counts
, cont_bb
, l1_bb
,
2990 ordered_lastprivate
);
2991 if (counts
[fd
->collapse
- 1])
2993 gcc_assert (fd
->collapse
== 1);
2994 gsi
= gsi_last_bb (l0_bb
);
2995 expand_omp_build_assign (&gsi
, counts
[fd
->collapse
- 1],
2997 gsi
= gsi_last_bb (cont_bb
);
2998 t
= fold_build2 (PLUS_EXPR
, fd
->iter_type
, counts
[fd
->collapse
- 1],
2999 build_int_cst (fd
->iter_type
, 1));
3000 expand_omp_build_assign (&gsi
, counts
[fd
->collapse
- 1], t
);
3001 tree aref
= build4 (ARRAY_REF
, fd
->iter_type
, counts
[fd
->ordered
],
3002 size_zero_node
, NULL_TREE
, NULL_TREE
);
3003 expand_omp_build_assign (&gsi
, aref
, counts
[fd
->collapse
- 1]);
3004 t
= counts
[fd
->collapse
- 1];
3006 else if (fd
->collapse
> 1)
3010 t
= fold_build2 (MINUS_EXPR
, TREE_TYPE (fd
->loops
[0].v
),
3011 fd
->loops
[0].v
, fd
->loops
[0].n1
);
3012 t
= fold_convert (fd
->iter_type
, t
);
3014 gsi
= gsi_last_bb (l0_bb
);
3015 tree aref
= build4 (ARRAY_REF
, fd
->iter_type
, counts
[fd
->ordered
],
3016 size_zero_node
, NULL_TREE
, NULL_TREE
);
3017 t
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
3018 false, GSI_CONTINUE_LINKING
);
3019 expand_omp_build_assign (&gsi
, aref
, t
, true);
3024 /* Code to control the increment and predicate for the sequential
3025 loop goes in the CONT_BB. */
3026 gsi
= gsi_last_nondebug_bb (cont_bb
);
3027 gomp_continue
*cont_stmt
= as_a
<gomp_continue
*> (gsi_stmt (gsi
));
3028 gcc_assert (gimple_code (cont_stmt
) == GIMPLE_OMP_CONTINUE
);
3029 vmain
= gimple_omp_continue_control_use (cont_stmt
);
3030 vback
= gimple_omp_continue_control_def (cont_stmt
);
3032 if (!gimple_omp_for_combined_p (fd
->for_stmt
))
3034 if (POINTER_TYPE_P (type
))
3035 t
= fold_build_pointer_plus (vmain
, fd
->loop
.step
);
3037 t
= fold_build2 (PLUS_EXPR
, type
, vmain
, fd
->loop
.step
);
3038 t
= force_gimple_operand_gsi (&gsi
, t
,
3040 && TREE_ADDRESSABLE (vback
),
3041 NULL_TREE
, true, GSI_SAME_STMT
);
3042 assign_stmt
= gimple_build_assign (vback
, t
);
3043 gsi_insert_before (&gsi
, assign_stmt
, GSI_SAME_STMT
);
3045 if (fd
->ordered
&& counts
[fd
->collapse
- 1] == NULL_TREE
)
3048 if (fd
->collapse
> 1)
3052 tem
= fold_build2 (MINUS_EXPR
, TREE_TYPE (fd
->loops
[0].v
),
3053 fd
->loops
[0].v
, fd
->loops
[0].n1
);
3054 tem
= fold_convert (fd
->iter_type
, tem
);
3056 tree aref
= build4 (ARRAY_REF
, fd
->iter_type
,
3057 counts
[fd
->ordered
], size_zero_node
,
3058 NULL_TREE
, NULL_TREE
);
3059 tem
= force_gimple_operand_gsi (&gsi
, tem
, true, NULL_TREE
,
3060 true, GSI_SAME_STMT
);
3061 expand_omp_build_assign (&gsi
, aref
, tem
);
3064 t
= build2 (fd
->loop
.cond_code
, boolean_type_node
,
3065 DECL_P (vback
) && TREE_ADDRESSABLE (vback
) ? t
: vback
,
3067 gcond
*cond_stmt
= gimple_build_cond_empty (t
);
3068 gsi_insert_before (&gsi
, cond_stmt
, GSI_SAME_STMT
);
3071 /* Remove GIMPLE_OMP_CONTINUE. */
3072 gsi_remove (&gsi
, true);
3074 if (fd
->collapse
> 1 && !gimple_omp_for_combined_p (fd
->for_stmt
))
3075 collapse_bb
= extract_omp_for_update_vars (fd
, cont_bb
, l1_bb
);
3077 /* Emit code to get the next parallel iteration in L2_BB. */
3078 gsi
= gsi_start_bb (l2_bb
);
3080 t
= build_call_expr (builtin_decl_explicit (next_fn
), 2,
3081 build_fold_addr_expr (istart0
),
3082 build_fold_addr_expr (iend0
));
3083 t
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
3084 false, GSI_CONTINUE_LINKING
);
3085 if (TREE_TYPE (t
) != boolean_type_node
)
3086 t
= fold_build2 (NE_EXPR
, boolean_type_node
,
3087 t
, build_int_cst (TREE_TYPE (t
), 0));
3088 gcond
*cond_stmt
= gimple_build_cond_empty (t
);
3089 gsi_insert_after (&gsi
, cond_stmt
, GSI_CONTINUE_LINKING
);
3092 /* Add the loop cleanup function. */
3093 gsi
= gsi_last_nondebug_bb (exit_bb
);
3094 if (gimple_omp_return_nowait_p (gsi_stmt (gsi
)))
3095 t
= builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT
);
3096 else if (gimple_omp_return_lhs (gsi_stmt (gsi
)))
3097 t
= builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL
);
3099 t
= builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END
);
3100 gcall
*call_stmt
= gimple_build_call (t
, 0);
3101 if (gimple_omp_return_lhs (gsi_stmt (gsi
)))
3102 gimple_call_set_lhs (call_stmt
, gimple_omp_return_lhs (gsi_stmt (gsi
)));
3103 gsi_insert_after (&gsi
, call_stmt
, GSI_SAME_STMT
);
3106 tree arr
= counts
[fd
->ordered
];
3107 tree clobber
= build_constructor (TREE_TYPE (arr
), NULL
);
3108 TREE_THIS_VOLATILE (clobber
) = 1;
3109 gsi_insert_after (&gsi
, gimple_build_assign (arr
, clobber
),
3112 gsi_remove (&gsi
, true);
3114 /* Connect the new blocks. */
3115 find_edge (entry_bb
, l0_bb
)->flags
= EDGE_TRUE_VALUE
;
3116 find_edge (entry_bb
, l3_bb
)->flags
= EDGE_FALSE_VALUE
;
3122 e
= find_edge (cont_bb
, l3_bb
);
3123 ne
= make_edge (l2_bb
, l3_bb
, EDGE_FALSE_VALUE
);
3125 phis
= phi_nodes (l3_bb
);
3126 for (gsi
= gsi_start (phis
); !gsi_end_p (gsi
); gsi_next (&gsi
))
3128 gimple
*phi
= gsi_stmt (gsi
);
3129 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi
, ne
),
3130 PHI_ARG_DEF_FROM_EDGE (phi
, e
));
3134 make_edge (cont_bb
, l2_bb
, EDGE_FALSE_VALUE
);
3135 e
= find_edge (cont_bb
, l1_bb
);
3138 e
= BRANCH_EDGE (cont_bb
);
3139 gcc_assert (single_succ (e
->dest
) == l1_bb
);
3141 if (gimple_omp_for_combined_p (fd
->for_stmt
))
3146 else if (fd
->collapse
> 1)
3149 e
= make_edge (cont_bb
, collapse_bb
, EDGE_TRUE_VALUE
);
3152 e
->flags
= EDGE_TRUE_VALUE
;
3155 e
->probability
= profile_probability::guessed_always ().apply_scale (7, 8);
3156 find_edge (cont_bb
, l2_bb
)->probability
= e
->probability
.invert ();
3160 e
= find_edge (cont_bb
, l2_bb
);
3161 e
->flags
= EDGE_FALLTHRU
;
3163 make_edge (l2_bb
, l0_bb
, EDGE_TRUE_VALUE
);
3165 if (gimple_in_ssa_p (cfun
))
3167 /* Add phis to the outer loop that connect to the phis in the inner,
3168 original loop, and move the loop entry value of the inner phi to
3169 the loop entry value of the outer phi. */
3171 for (psi
= gsi_start_phis (l3_bb
); !gsi_end_p (psi
); gsi_next (&psi
))
3173 source_location locus
;
3175 gphi
*exit_phi
= psi
.phi ();
3177 if (virtual_operand_p (gimple_phi_result (exit_phi
)))
3180 edge l2_to_l3
= find_edge (l2_bb
, l3_bb
);
3181 tree exit_res
= PHI_ARG_DEF_FROM_EDGE (exit_phi
, l2_to_l3
);
3183 basic_block latch
= BRANCH_EDGE (cont_bb
)->dest
;
3184 edge latch_to_l1
= find_edge (latch
, l1_bb
);
3186 = find_phi_with_arg_on_edge (exit_res
, latch_to_l1
);
3188 tree t
= gimple_phi_result (exit_phi
);
3189 tree new_res
= copy_ssa_name (t
, NULL
);
3190 nphi
= create_phi_node (new_res
, l0_bb
);
3192 edge l0_to_l1
= find_edge (l0_bb
, l1_bb
);
3193 t
= PHI_ARG_DEF_FROM_EDGE (inner_phi
, l0_to_l1
);
3194 locus
= gimple_phi_arg_location_from_edge (inner_phi
, l0_to_l1
);
3195 edge entry_to_l0
= find_edge (entry_bb
, l0_bb
);
3196 add_phi_arg (nphi
, t
, entry_to_l0
, locus
);
3198 edge l2_to_l0
= find_edge (l2_bb
, l0_bb
);
3199 add_phi_arg (nphi
, exit_res
, l2_to_l0
, UNKNOWN_LOCATION
);
3201 add_phi_arg (inner_phi
, new_res
, l0_to_l1
, UNKNOWN_LOCATION
);
3205 set_immediate_dominator (CDI_DOMINATORS
, l2_bb
,
3206 recompute_dominator (CDI_DOMINATORS
, l2_bb
));
3207 set_immediate_dominator (CDI_DOMINATORS
, l3_bb
,
3208 recompute_dominator (CDI_DOMINATORS
, l3_bb
));
3209 set_immediate_dominator (CDI_DOMINATORS
, l0_bb
,
3210 recompute_dominator (CDI_DOMINATORS
, l0_bb
));
3211 set_immediate_dominator (CDI_DOMINATORS
, l1_bb
,
3212 recompute_dominator (CDI_DOMINATORS
, l1_bb
));
3214 /* We enter expand_omp_for_generic with a loop. This original loop may
3215 have its own loop struct, or it may be part of an outer loop struct
3216 (which may be the fake loop). */
3217 struct loop
*outer_loop
= entry_bb
->loop_father
;
3218 bool orig_loop_has_loop_struct
= l1_bb
->loop_father
!= outer_loop
;
3220 add_bb_to_loop (l2_bb
, outer_loop
);
3222 /* We've added a new loop around the original loop. Allocate the
3223 corresponding loop struct. */
3224 struct loop
*new_loop
= alloc_loop ();
3225 new_loop
->header
= l0_bb
;
3226 new_loop
->latch
= l2_bb
;
3227 add_loop (new_loop
, outer_loop
);
3229 /* Allocate a loop structure for the original loop unless we already
3231 if (!orig_loop_has_loop_struct
3232 && !gimple_omp_for_combined_p (fd
->for_stmt
))
3234 struct loop
*orig_loop
= alloc_loop ();
3235 orig_loop
->header
= l1_bb
;
3236 /* The loop may have multiple latches. */
3237 add_loop (orig_loop
, new_loop
);
3242 /* A subroutine of expand_omp_for. Generate code for a parallel
3243 loop with static schedule and no specified chunk size. Given
3246 for (V = N1; V cond N2; V += STEP) BODY;
3248 where COND is "<" or ">", we generate pseudocode
3250 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
3255 if ((__typeof (V)) -1 > 0 && cond is >)
3256 n = -(adj + N2 - N1) / -STEP;
3258 n = (adj + N2 - N1) / STEP;
3261 if (threadid < tt) goto L3; else goto L4;
3266 s0 = q * threadid + tt;
3269 if (s0 >= e0) goto L2; else goto L0;
3275 if (V cond e) goto L1;
3280 expand_omp_for_static_nochunk (struct omp_region
*region
,
3281 struct omp_for_data
*fd
,
3284 tree n
, q
, s0
, e0
, e
, t
, tt
, nthreads
, threadid
;
3285 tree type
, itype
, vmain
, vback
;
3286 basic_block entry_bb
, second_bb
, third_bb
, exit_bb
, seq_start_bb
;
3287 basic_block body_bb
, cont_bb
, collapse_bb
= NULL
;
3289 gimple_stmt_iterator gsi
;
3291 bool broken_loop
= region
->cont
== NULL
;
3292 tree
*counts
= NULL
;
3295 itype
= type
= TREE_TYPE (fd
->loop
.v
);
3296 if (POINTER_TYPE_P (type
))
3297 itype
= signed_type_for (type
);
3299 entry_bb
= region
->entry
;
3300 cont_bb
= region
->cont
;
3301 gcc_assert (EDGE_COUNT (entry_bb
->succs
) == 2);
3302 fin_bb
= BRANCH_EDGE (entry_bb
)->dest
;
3303 gcc_assert (broken_loop
3304 || (fin_bb
== FALLTHRU_EDGE (cont_bb
)->dest
));
3305 seq_start_bb
= split_edge (FALLTHRU_EDGE (entry_bb
));
3306 body_bb
= single_succ (seq_start_bb
);
3309 gcc_assert (BRANCH_EDGE (cont_bb
)->dest
== body_bb
3310 || single_succ (BRANCH_EDGE (cont_bb
)->dest
) == body_bb
);
3311 gcc_assert (EDGE_COUNT (cont_bb
->succs
) == 2);
3313 exit_bb
= region
->exit
;
3315 /* Iteration space partitioning goes in ENTRY_BB. */
3316 gsi
= gsi_last_nondebug_bb (entry_bb
);
3317 gcc_assert (gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_FOR
);
3319 if (fd
->collapse
> 1)
3321 int first_zero_iter
= -1, dummy
= -1;
3322 basic_block l2_dom_bb
= NULL
, dummy_bb
= NULL
;
3324 counts
= XALLOCAVEC (tree
, fd
->collapse
);
3325 expand_omp_for_init_counts (fd
, &gsi
, entry_bb
, counts
,
3326 fin_bb
, first_zero_iter
,
3327 dummy_bb
, dummy
, l2_dom_bb
);
3330 else if (gimple_omp_for_combined_into_p (fd
->for_stmt
))
3331 t
= integer_one_node
;
3333 t
= fold_binary (fd
->loop
.cond_code
, boolean_type_node
,
3334 fold_convert (type
, fd
->loop
.n1
),
3335 fold_convert (type
, fd
->loop
.n2
));
3336 if (fd
->collapse
== 1
3337 && TYPE_UNSIGNED (type
)
3338 && (t
== NULL_TREE
|| !integer_onep (t
)))
3340 n1
= fold_convert (type
, unshare_expr (fd
->loop
.n1
));
3341 n1
= force_gimple_operand_gsi (&gsi
, n1
, true, NULL_TREE
,
3342 true, GSI_SAME_STMT
);
3343 n2
= fold_convert (type
, unshare_expr (fd
->loop
.n2
));
3344 n2
= force_gimple_operand_gsi (&gsi
, n2
, true, NULL_TREE
,
3345 true, GSI_SAME_STMT
);
3346 gcond
*cond_stmt
= gimple_build_cond (fd
->loop
.cond_code
, n1
, n2
,
3347 NULL_TREE
, NULL_TREE
);
3348 gsi_insert_before (&gsi
, cond_stmt
, GSI_SAME_STMT
);
3349 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt
),
3350 expand_omp_regimplify_p
, NULL
, NULL
)
3351 || walk_tree (gimple_cond_rhs_ptr (cond_stmt
),
3352 expand_omp_regimplify_p
, NULL
, NULL
))
3354 gsi
= gsi_for_stmt (cond_stmt
);
3355 gimple_regimplify_operands (cond_stmt
, &gsi
);
3357 ep
= split_block (entry_bb
, cond_stmt
);
3358 ep
->flags
= EDGE_TRUE_VALUE
;
3359 entry_bb
= ep
->dest
;
3360 ep
->probability
= profile_probability::very_likely ();
3361 ep
= make_edge (ep
->src
, fin_bb
, EDGE_FALSE_VALUE
);
3362 ep
->probability
= profile_probability::very_unlikely ();
3363 if (gimple_in_ssa_p (cfun
))
3365 int dest_idx
= find_edge (entry_bb
, fin_bb
)->dest_idx
;
3366 for (gphi_iterator gpi
= gsi_start_phis (fin_bb
);
3367 !gsi_end_p (gpi
); gsi_next (&gpi
))
3369 gphi
*phi
= gpi
.phi ();
3370 add_phi_arg (phi
, gimple_phi_arg_def (phi
, dest_idx
),
3371 ep
, UNKNOWN_LOCATION
);
3374 gsi
= gsi_last_bb (entry_bb
);
3377 switch (gimple_omp_for_kind (fd
->for_stmt
))
3379 case GF_OMP_FOR_KIND_FOR
:
3380 nthreads
= builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS
);
3381 threadid
= builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM
);
3383 case GF_OMP_FOR_KIND_DISTRIBUTE
:
3384 nthreads
= builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS
);
3385 threadid
= builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM
);
3390 nthreads
= build_call_expr (nthreads
, 0);
3391 nthreads
= fold_convert (itype
, nthreads
);
3392 nthreads
= force_gimple_operand_gsi (&gsi
, nthreads
, true, NULL_TREE
,
3393 true, GSI_SAME_STMT
);
3394 threadid
= build_call_expr (threadid
, 0);
3395 threadid
= fold_convert (itype
, threadid
);
3396 threadid
= force_gimple_operand_gsi (&gsi
, threadid
, true, NULL_TREE
,
3397 true, GSI_SAME_STMT
);
3401 step
= fd
->loop
.step
;
3402 if (gimple_omp_for_combined_into_p (fd
->for_stmt
))
3404 tree innerc
= omp_find_clause (gimple_omp_for_clauses (fd
->for_stmt
),
3405 OMP_CLAUSE__LOOPTEMP_
);
3406 gcc_assert (innerc
);
3407 n1
= OMP_CLAUSE_DECL (innerc
);
3408 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
3409 OMP_CLAUSE__LOOPTEMP_
);
3410 gcc_assert (innerc
);
3411 n2
= OMP_CLAUSE_DECL (innerc
);
3413 n1
= force_gimple_operand_gsi (&gsi
, fold_convert (type
, n1
),
3414 true, NULL_TREE
, true, GSI_SAME_STMT
);
3415 n2
= force_gimple_operand_gsi (&gsi
, fold_convert (itype
, n2
),
3416 true, NULL_TREE
, true, GSI_SAME_STMT
);
3417 step
= force_gimple_operand_gsi (&gsi
, fold_convert (itype
, step
),
3418 true, NULL_TREE
, true, GSI_SAME_STMT
);
3420 t
= build_int_cst (itype
, (fd
->loop
.cond_code
== LT_EXPR
? -1 : 1));
3421 t
= fold_build2 (PLUS_EXPR
, itype
, step
, t
);
3422 t
= fold_build2 (PLUS_EXPR
, itype
, t
, n2
);
3423 t
= fold_build2 (MINUS_EXPR
, itype
, t
, fold_convert (itype
, n1
));
3424 if (TYPE_UNSIGNED (itype
) && fd
->loop
.cond_code
== GT_EXPR
)
3425 t
= fold_build2 (TRUNC_DIV_EXPR
, itype
,
3426 fold_build1 (NEGATE_EXPR
, itype
, t
),
3427 fold_build1 (NEGATE_EXPR
, itype
, step
));
3429 t
= fold_build2 (TRUNC_DIV_EXPR
, itype
, t
, step
);
3430 t
= fold_convert (itype
, t
);
3431 n
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
, true, GSI_SAME_STMT
);
3433 q
= create_tmp_reg (itype
, "q");
3434 t
= fold_build2 (TRUNC_DIV_EXPR
, itype
, n
, nthreads
);
3435 t
= force_gimple_operand_gsi (&gsi
, t
, false, NULL_TREE
, true, GSI_SAME_STMT
);
3436 gsi_insert_before (&gsi
, gimple_build_assign (q
, t
), GSI_SAME_STMT
);
3438 tt
= create_tmp_reg (itype
, "tt");
3439 t
= fold_build2 (TRUNC_MOD_EXPR
, itype
, n
, nthreads
);
3440 t
= force_gimple_operand_gsi (&gsi
, t
, false, NULL_TREE
, true, GSI_SAME_STMT
);
3441 gsi_insert_before (&gsi
, gimple_build_assign (tt
, t
), GSI_SAME_STMT
);
3443 t
= build2 (LT_EXPR
, boolean_type_node
, threadid
, tt
);
3444 gcond
*cond_stmt
= gimple_build_cond_empty (t
);
3445 gsi_insert_before (&gsi
, cond_stmt
, GSI_SAME_STMT
);
3447 second_bb
= split_block (entry_bb
, cond_stmt
)->dest
;
3448 gsi
= gsi_last_nondebug_bb (second_bb
);
3449 gcc_assert (gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_FOR
);
3451 gsi_insert_before (&gsi
, gimple_build_assign (tt
, build_int_cst (itype
, 0)),
3453 gassign
*assign_stmt
3454 = gimple_build_assign (q
, PLUS_EXPR
, q
, build_int_cst (itype
, 1));
3455 gsi_insert_before (&gsi
, assign_stmt
, GSI_SAME_STMT
);
3457 third_bb
= split_block (second_bb
, assign_stmt
)->dest
;
3458 gsi
= gsi_last_nondebug_bb (third_bb
);
3459 gcc_assert (gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_FOR
);
3461 t
= build2 (MULT_EXPR
, itype
, q
, threadid
);
3462 t
= build2 (PLUS_EXPR
, itype
, t
, tt
);
3463 s0
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
, true, GSI_SAME_STMT
);
3465 t
= fold_build2 (PLUS_EXPR
, itype
, s0
, q
);
3466 e0
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
, true, GSI_SAME_STMT
);
3468 t
= build2 (GE_EXPR
, boolean_type_node
, s0
, e0
);
3469 gsi_insert_before (&gsi
, gimple_build_cond_empty (t
), GSI_SAME_STMT
);
3471 /* Remove the GIMPLE_OMP_FOR statement. */
3472 gsi_remove (&gsi
, true);
3474 /* Setup code for sequential iteration goes in SEQ_START_BB. */
3475 gsi
= gsi_start_bb (seq_start_bb
);
3477 tree startvar
= fd
->loop
.v
;
3478 tree endvar
= NULL_TREE
;
3480 if (gimple_omp_for_combined_p (fd
->for_stmt
))
3482 tree clauses
= gimple_code (inner_stmt
) == GIMPLE_OMP_PARALLEL
3483 ? gimple_omp_parallel_clauses (inner_stmt
)
3484 : gimple_omp_for_clauses (inner_stmt
);
3485 tree innerc
= omp_find_clause (clauses
, OMP_CLAUSE__LOOPTEMP_
);
3486 gcc_assert (innerc
);
3487 startvar
= OMP_CLAUSE_DECL (innerc
);
3488 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
3489 OMP_CLAUSE__LOOPTEMP_
);
3490 gcc_assert (innerc
);
3491 endvar
= OMP_CLAUSE_DECL (innerc
);
3492 if (fd
->collapse
> 1 && TREE_CODE (fd
->loop
.n2
) != INTEGER_CST
3493 && gimple_omp_for_kind (fd
->for_stmt
) == GF_OMP_FOR_KIND_DISTRIBUTE
)
3496 for (i
= 1; i
< fd
->collapse
; i
++)
3498 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
3499 OMP_CLAUSE__LOOPTEMP_
);
3500 gcc_assert (innerc
);
3502 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
3503 OMP_CLAUSE__LOOPTEMP_
);
3506 /* If needed (distribute parallel for with lastprivate),
3507 propagate down the total number of iterations. */
3508 tree t
= fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc
)),
3510 t
= force_gimple_operand_gsi (&gsi
, t
, false, NULL_TREE
, false,
3511 GSI_CONTINUE_LINKING
);
3512 assign_stmt
= gimple_build_assign (OMP_CLAUSE_DECL (innerc
), t
);
3513 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
3517 t
= fold_convert (itype
, s0
);
3518 t
= fold_build2 (MULT_EXPR
, itype
, t
, step
);
3519 if (POINTER_TYPE_P (type
))
3520 t
= fold_build_pointer_plus (n1
, t
);
3522 t
= fold_build2 (PLUS_EXPR
, type
, t
, n1
);
3523 t
= fold_convert (TREE_TYPE (startvar
), t
);
3524 t
= force_gimple_operand_gsi (&gsi
, t
,
3526 && TREE_ADDRESSABLE (startvar
),
3527 NULL_TREE
, false, GSI_CONTINUE_LINKING
);
3528 assign_stmt
= gimple_build_assign (startvar
, t
);
3529 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
3531 t
= fold_convert (itype
, e0
);
3532 t
= fold_build2 (MULT_EXPR
, itype
, t
, step
);
3533 if (POINTER_TYPE_P (type
))
3534 t
= fold_build_pointer_plus (n1
, t
);
3536 t
= fold_build2 (PLUS_EXPR
, type
, t
, n1
);
3537 t
= fold_convert (TREE_TYPE (startvar
), t
);
3538 e
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
3539 false, GSI_CONTINUE_LINKING
);
3542 assign_stmt
= gimple_build_assign (endvar
, e
);
3543 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
3544 if (useless_type_conversion_p (TREE_TYPE (fd
->loop
.v
), TREE_TYPE (e
)))
3545 assign_stmt
= gimple_build_assign (fd
->loop
.v
, e
);
3547 assign_stmt
= gimple_build_assign (fd
->loop
.v
, NOP_EXPR
, e
);
3548 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
3550 /* Handle linear clause adjustments. */
3551 tree itercnt
= NULL_TREE
;
3552 if (gimple_omp_for_kind (fd
->for_stmt
) == GF_OMP_FOR_KIND_FOR
)
3553 for (tree c
= gimple_omp_for_clauses (fd
->for_stmt
);
3554 c
; c
= OMP_CLAUSE_CHAIN (c
))
3555 if (OMP_CLAUSE_CODE (c
) == OMP_CLAUSE_LINEAR
3556 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c
))
3558 tree d
= OMP_CLAUSE_DECL (c
);
3559 bool is_ref
= omp_is_reference (d
);
3560 tree t
= d
, a
, dest
;
3562 t
= build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c
), t
);
3563 if (itercnt
== NULL_TREE
)
3565 if (gimple_omp_for_combined_into_p (fd
->for_stmt
))
3567 itercnt
= fold_build2 (MINUS_EXPR
, itype
,
3568 fold_convert (itype
, n1
),
3569 fold_convert (itype
, fd
->loop
.n1
));
3570 itercnt
= fold_build2 (EXACT_DIV_EXPR
, itype
, itercnt
, step
);
3571 itercnt
= fold_build2 (PLUS_EXPR
, itype
, itercnt
, s0
);
3572 itercnt
= force_gimple_operand_gsi (&gsi
, itercnt
, true,
3574 GSI_CONTINUE_LINKING
);
3579 tree type
= TREE_TYPE (t
);
3580 if (POINTER_TYPE_P (type
))
3582 a
= fold_build2 (MULT_EXPR
, type
,
3583 fold_convert (type
, itercnt
),
3584 fold_convert (type
, OMP_CLAUSE_LINEAR_STEP (c
)));
3585 dest
= unshare_expr (t
);
3586 t
= fold_build2 (type
== TREE_TYPE (t
) ? PLUS_EXPR
3587 : POINTER_PLUS_EXPR
, TREE_TYPE (t
), t
, a
);
3588 t
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
3589 false, GSI_CONTINUE_LINKING
);
3590 assign_stmt
= gimple_build_assign (dest
, t
);
3591 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
3593 if (fd
->collapse
> 1)
3594 expand_omp_for_init_vars (fd
, &gsi
, counts
, inner_stmt
, startvar
);
3598 /* The code controlling the sequential loop replaces the
3599 GIMPLE_OMP_CONTINUE. */
3600 gsi
= gsi_last_nondebug_bb (cont_bb
);
3601 gomp_continue
*cont_stmt
= as_a
<gomp_continue
*> (gsi_stmt (gsi
));
3602 gcc_assert (gimple_code (cont_stmt
) == GIMPLE_OMP_CONTINUE
);
3603 vmain
= gimple_omp_continue_control_use (cont_stmt
);
3604 vback
= gimple_omp_continue_control_def (cont_stmt
);
3606 if (!gimple_omp_for_combined_p (fd
->for_stmt
))
3608 if (POINTER_TYPE_P (type
))
3609 t
= fold_build_pointer_plus (vmain
, step
);
3611 t
= fold_build2 (PLUS_EXPR
, type
, vmain
, step
);
3612 t
= force_gimple_operand_gsi (&gsi
, t
,
3614 && TREE_ADDRESSABLE (vback
),
3615 NULL_TREE
, true, GSI_SAME_STMT
);
3616 assign_stmt
= gimple_build_assign (vback
, t
);
3617 gsi_insert_before (&gsi
, assign_stmt
, GSI_SAME_STMT
);
3619 t
= build2 (fd
->loop
.cond_code
, boolean_type_node
,
3620 DECL_P (vback
) && TREE_ADDRESSABLE (vback
)
3622 gsi_insert_before (&gsi
, gimple_build_cond_empty (t
), GSI_SAME_STMT
);
3625 /* Remove the GIMPLE_OMP_CONTINUE statement. */
3626 gsi_remove (&gsi
, true);
3628 if (fd
->collapse
> 1 && !gimple_omp_for_combined_p (fd
->for_stmt
))
3629 collapse_bb
= extract_omp_for_update_vars (fd
, cont_bb
, body_bb
);
3632 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
3633 gsi
= gsi_last_nondebug_bb (exit_bb
);
3634 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi
)))
3636 t
= gimple_omp_return_lhs (gsi_stmt (gsi
));
3637 gsi_insert_after (&gsi
, omp_build_barrier (t
), GSI_SAME_STMT
);
3639 gsi_remove (&gsi
, true);
3641 /* Connect all the blocks. */
3642 ep
= make_edge (entry_bb
, third_bb
, EDGE_FALSE_VALUE
);
3643 ep
->probability
= profile_probability::guessed_always ().apply_scale (3, 4);
3644 ep
= find_edge (entry_bb
, second_bb
);
3645 ep
->flags
= EDGE_TRUE_VALUE
;
3646 ep
->probability
= profile_probability::guessed_always ().apply_scale (1, 4);
3647 find_edge (third_bb
, seq_start_bb
)->flags
= EDGE_FALSE_VALUE
;
3648 find_edge (third_bb
, fin_bb
)->flags
= EDGE_TRUE_VALUE
;
3652 ep
= find_edge (cont_bb
, body_bb
);
3655 ep
= BRANCH_EDGE (cont_bb
);
3656 gcc_assert (single_succ (ep
->dest
) == body_bb
);
3658 if (gimple_omp_for_combined_p (fd
->for_stmt
))
3663 else if (fd
->collapse
> 1)
3666 ep
= make_edge (cont_bb
, collapse_bb
, EDGE_TRUE_VALUE
);
3669 ep
->flags
= EDGE_TRUE_VALUE
;
3670 find_edge (cont_bb
, fin_bb
)->flags
3671 = ep
? EDGE_FALSE_VALUE
: EDGE_FALLTHRU
;
3674 set_immediate_dominator (CDI_DOMINATORS
, second_bb
, entry_bb
);
3675 set_immediate_dominator (CDI_DOMINATORS
, third_bb
, entry_bb
);
3676 set_immediate_dominator (CDI_DOMINATORS
, seq_start_bb
, third_bb
);
3678 set_immediate_dominator (CDI_DOMINATORS
, body_bb
,
3679 recompute_dominator (CDI_DOMINATORS
, body_bb
));
3680 set_immediate_dominator (CDI_DOMINATORS
, fin_bb
,
3681 recompute_dominator (CDI_DOMINATORS
, fin_bb
));
3683 struct loop
*loop
= body_bb
->loop_father
;
3684 if (loop
!= entry_bb
->loop_father
)
3686 gcc_assert (broken_loop
|| loop
->header
== body_bb
);
3687 gcc_assert (broken_loop
3688 || loop
->latch
== region
->cont
3689 || single_pred (loop
->latch
) == region
->cont
);
3693 if (!broken_loop
&& !gimple_omp_for_combined_p (fd
->for_stmt
))
3695 loop
= alloc_loop ();
3696 loop
->header
= body_bb
;
3697 if (collapse_bb
== NULL
)
3698 loop
->latch
= cont_bb
;
3699 add_loop (loop
, body_bb
->loop_father
);
3703 /* Return phi in E->DEST with ARG on edge E. */
3706 find_phi_with_arg_on_edge (tree arg
, edge e
)
3708 basic_block bb
= e
->dest
;
3710 for (gphi_iterator gpi
= gsi_start_phis (bb
);
3714 gphi
*phi
= gpi
.phi ();
3715 if (PHI_ARG_DEF_FROM_EDGE (phi
, e
) == arg
)
3722 /* A subroutine of expand_omp_for. Generate code for a parallel
3723 loop with static schedule and a specified chunk size. Given
3726 for (V = N1; V cond N2; V += STEP) BODY;
3728 where COND is "<" or ">", we generate pseudocode
3730 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
3735 if ((__typeof (V)) -1 > 0 && cond is >)
3736 n = -(adj + N2 - N1) / -STEP;
3738 n = (adj + N2 - N1) / STEP;
3740 V = threadid * CHUNK * STEP + N1; -- this extra definition of V is
3741 here so that V is defined
3742 if the loop is not entered
3744 s0 = (trip * nthreads + threadid) * CHUNK;
3745 e0 = min (s0 + CHUNK, n);
3746 if (s0 < n) goto L1; else goto L4;
3753 if (V cond e) goto L2; else goto L3;
3761 expand_omp_for_static_chunk (struct omp_region
*region
,
3762 struct omp_for_data
*fd
, gimple
*inner_stmt
)
3764 tree n
, s0
, e0
, e
, t
;
3765 tree trip_var
, trip_init
, trip_main
, trip_back
, nthreads
, threadid
;
3766 tree type
, itype
, vmain
, vback
, vextra
;
3767 basic_block entry_bb
, exit_bb
, body_bb
, seq_start_bb
, iter_part_bb
;
3768 basic_block trip_update_bb
= NULL
, cont_bb
, collapse_bb
= NULL
, fin_bb
;
3769 gimple_stmt_iterator gsi
;
3771 bool broken_loop
= region
->cont
== NULL
;
3772 tree
*counts
= NULL
;
3775 itype
= type
= TREE_TYPE (fd
->loop
.v
);
3776 if (POINTER_TYPE_P (type
))
3777 itype
= signed_type_for (type
);
3779 entry_bb
= region
->entry
;
3780 se
= split_block (entry_bb
, last_stmt (entry_bb
));
3782 iter_part_bb
= se
->dest
;
3783 cont_bb
= region
->cont
;
3784 gcc_assert (EDGE_COUNT (iter_part_bb
->succs
) == 2);
3785 fin_bb
= BRANCH_EDGE (iter_part_bb
)->dest
;
3786 gcc_assert (broken_loop
3787 || fin_bb
== FALLTHRU_EDGE (cont_bb
)->dest
);
3788 seq_start_bb
= split_edge (FALLTHRU_EDGE (iter_part_bb
));
3789 body_bb
= single_succ (seq_start_bb
);
3792 gcc_assert (BRANCH_EDGE (cont_bb
)->dest
== body_bb
3793 || single_succ (BRANCH_EDGE (cont_bb
)->dest
) == body_bb
);
3794 gcc_assert (EDGE_COUNT (cont_bb
->succs
) == 2);
3795 trip_update_bb
= split_edge (FALLTHRU_EDGE (cont_bb
));
3797 exit_bb
= region
->exit
;
3799 /* Trip and adjustment setup goes in ENTRY_BB. */
3800 gsi
= gsi_last_nondebug_bb (entry_bb
);
3801 gcc_assert (gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_FOR
);
3803 if (fd
->collapse
> 1)
3805 int first_zero_iter
= -1, dummy
= -1;
3806 basic_block l2_dom_bb
= NULL
, dummy_bb
= NULL
;
3808 counts
= XALLOCAVEC (tree
, fd
->collapse
);
3809 expand_omp_for_init_counts (fd
, &gsi
, entry_bb
, counts
,
3810 fin_bb
, first_zero_iter
,
3811 dummy_bb
, dummy
, l2_dom_bb
);
3814 else if (gimple_omp_for_combined_into_p (fd
->for_stmt
))
3815 t
= integer_one_node
;
3817 t
= fold_binary (fd
->loop
.cond_code
, boolean_type_node
,
3818 fold_convert (type
, fd
->loop
.n1
),
3819 fold_convert (type
, fd
->loop
.n2
));
3820 if (fd
->collapse
== 1
3821 && TYPE_UNSIGNED (type
)
3822 && (t
== NULL_TREE
|| !integer_onep (t
)))
3824 n1
= fold_convert (type
, unshare_expr (fd
->loop
.n1
));
3825 n1
= force_gimple_operand_gsi (&gsi
, n1
, true, NULL_TREE
,
3826 true, GSI_SAME_STMT
);
3827 n2
= fold_convert (type
, unshare_expr (fd
->loop
.n2
));
3828 n2
= force_gimple_operand_gsi (&gsi
, n2
, true, NULL_TREE
,
3829 true, GSI_SAME_STMT
);
3830 gcond
*cond_stmt
= gimple_build_cond (fd
->loop
.cond_code
, n1
, n2
,
3831 NULL_TREE
, NULL_TREE
);
3832 gsi_insert_before (&gsi
, cond_stmt
, GSI_SAME_STMT
);
3833 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt
),
3834 expand_omp_regimplify_p
, NULL
, NULL
)
3835 || walk_tree (gimple_cond_rhs_ptr (cond_stmt
),
3836 expand_omp_regimplify_p
, NULL
, NULL
))
3838 gsi
= gsi_for_stmt (cond_stmt
);
3839 gimple_regimplify_operands (cond_stmt
, &gsi
);
3841 se
= split_block (entry_bb
, cond_stmt
);
3842 se
->flags
= EDGE_TRUE_VALUE
;
3843 entry_bb
= se
->dest
;
3844 se
->probability
= profile_probability::very_likely ();
3845 se
= make_edge (se
->src
, fin_bb
, EDGE_FALSE_VALUE
);
3846 se
->probability
= profile_probability::very_unlikely ();
3847 if (gimple_in_ssa_p (cfun
))
3849 int dest_idx
= find_edge (iter_part_bb
, fin_bb
)->dest_idx
;
3850 for (gphi_iterator gpi
= gsi_start_phis (fin_bb
);
3851 !gsi_end_p (gpi
); gsi_next (&gpi
))
3853 gphi
*phi
= gpi
.phi ();
3854 add_phi_arg (phi
, gimple_phi_arg_def (phi
, dest_idx
),
3855 se
, UNKNOWN_LOCATION
);
3858 gsi
= gsi_last_bb (entry_bb
);
3861 switch (gimple_omp_for_kind (fd
->for_stmt
))
3863 case GF_OMP_FOR_KIND_FOR
:
3864 nthreads
= builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS
);
3865 threadid
= builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM
);
3867 case GF_OMP_FOR_KIND_DISTRIBUTE
:
3868 nthreads
= builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS
);
3869 threadid
= builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM
);
3874 nthreads
= build_call_expr (nthreads
, 0);
3875 nthreads
= fold_convert (itype
, nthreads
);
3876 nthreads
= force_gimple_operand_gsi (&gsi
, nthreads
, true, NULL_TREE
,
3877 true, GSI_SAME_STMT
);
3878 threadid
= build_call_expr (threadid
, 0);
3879 threadid
= fold_convert (itype
, threadid
);
3880 threadid
= force_gimple_operand_gsi (&gsi
, threadid
, true, NULL_TREE
,
3881 true, GSI_SAME_STMT
);
3885 step
= fd
->loop
.step
;
3886 if (gimple_omp_for_combined_into_p (fd
->for_stmt
))
3888 tree innerc
= omp_find_clause (gimple_omp_for_clauses (fd
->for_stmt
),
3889 OMP_CLAUSE__LOOPTEMP_
);
3890 gcc_assert (innerc
);
3891 n1
= OMP_CLAUSE_DECL (innerc
);
3892 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
3893 OMP_CLAUSE__LOOPTEMP_
);
3894 gcc_assert (innerc
);
3895 n2
= OMP_CLAUSE_DECL (innerc
);
3897 n1
= force_gimple_operand_gsi (&gsi
, fold_convert (type
, n1
),
3898 true, NULL_TREE
, true, GSI_SAME_STMT
);
3899 n2
= force_gimple_operand_gsi (&gsi
, fold_convert (itype
, n2
),
3900 true, NULL_TREE
, true, GSI_SAME_STMT
);
3901 step
= force_gimple_operand_gsi (&gsi
, fold_convert (itype
, step
),
3902 true, NULL_TREE
, true, GSI_SAME_STMT
);
3903 tree chunk_size
= fold_convert (itype
, fd
->chunk_size
);
3904 chunk_size
= omp_adjust_chunk_size (chunk_size
, fd
->simd_schedule
);
3906 = force_gimple_operand_gsi (&gsi
, chunk_size
, true, NULL_TREE
, true,
3909 t
= build_int_cst (itype
, (fd
->loop
.cond_code
== LT_EXPR
? -1 : 1));
3910 t
= fold_build2 (PLUS_EXPR
, itype
, step
, t
);
3911 t
= fold_build2 (PLUS_EXPR
, itype
, t
, n2
);
3912 t
= fold_build2 (MINUS_EXPR
, itype
, t
, fold_convert (itype
, n1
));
3913 if (TYPE_UNSIGNED (itype
) && fd
->loop
.cond_code
== GT_EXPR
)
3914 t
= fold_build2 (TRUNC_DIV_EXPR
, itype
,
3915 fold_build1 (NEGATE_EXPR
, itype
, t
),
3916 fold_build1 (NEGATE_EXPR
, itype
, step
));
3918 t
= fold_build2 (TRUNC_DIV_EXPR
, itype
, t
, step
);
3919 t
= fold_convert (itype
, t
);
3920 n
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
3921 true, GSI_SAME_STMT
);
3923 trip_var
= create_tmp_reg (itype
, ".trip");
3924 if (gimple_in_ssa_p (cfun
))
3926 trip_init
= make_ssa_name (trip_var
);
3927 trip_main
= make_ssa_name (trip_var
);
3928 trip_back
= make_ssa_name (trip_var
);
3932 trip_init
= trip_var
;
3933 trip_main
= trip_var
;
3934 trip_back
= trip_var
;
3937 gassign
*assign_stmt
3938 = gimple_build_assign (trip_init
, build_int_cst (itype
, 0));
3939 gsi_insert_before (&gsi
, assign_stmt
, GSI_SAME_STMT
);
3941 t
= fold_build2 (MULT_EXPR
, itype
, threadid
, chunk_size
);
3942 t
= fold_build2 (MULT_EXPR
, itype
, t
, step
);
3943 if (POINTER_TYPE_P (type
))
3944 t
= fold_build_pointer_plus (n1
, t
);
3946 t
= fold_build2 (PLUS_EXPR
, type
, t
, n1
);
3947 vextra
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
3948 true, GSI_SAME_STMT
);
3950 /* Remove the GIMPLE_OMP_FOR. */
3951 gsi_remove (&gsi
, true);
3953 gimple_stmt_iterator gsif
= gsi
;
3955 /* Iteration space partitioning goes in ITER_PART_BB. */
3956 gsi
= gsi_last_bb (iter_part_bb
);
3958 t
= fold_build2 (MULT_EXPR
, itype
, trip_main
, nthreads
);
3959 t
= fold_build2 (PLUS_EXPR
, itype
, t
, threadid
);
3960 t
= fold_build2 (MULT_EXPR
, itype
, t
, chunk_size
);
3961 s0
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
3962 false, GSI_CONTINUE_LINKING
);
3964 t
= fold_build2 (PLUS_EXPR
, itype
, s0
, chunk_size
);
3965 t
= fold_build2 (MIN_EXPR
, itype
, t
, n
);
3966 e0
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
3967 false, GSI_CONTINUE_LINKING
);
3969 t
= build2 (LT_EXPR
, boolean_type_node
, s0
, n
);
3970 gsi_insert_after (&gsi
, gimple_build_cond_empty (t
), GSI_CONTINUE_LINKING
);
3972 /* Setup code for sequential iteration goes in SEQ_START_BB. */
3973 gsi
= gsi_start_bb (seq_start_bb
);
3975 tree startvar
= fd
->loop
.v
;
3976 tree endvar
= NULL_TREE
;
3978 if (gimple_omp_for_combined_p (fd
->for_stmt
))
3980 tree clauses
= gimple_code (inner_stmt
) == GIMPLE_OMP_PARALLEL
3981 ? gimple_omp_parallel_clauses (inner_stmt
)
3982 : gimple_omp_for_clauses (inner_stmt
);
3983 tree innerc
= omp_find_clause (clauses
, OMP_CLAUSE__LOOPTEMP_
);
3984 gcc_assert (innerc
);
3985 startvar
= OMP_CLAUSE_DECL (innerc
);
3986 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
3987 OMP_CLAUSE__LOOPTEMP_
);
3988 gcc_assert (innerc
);
3989 endvar
= OMP_CLAUSE_DECL (innerc
);
3990 if (fd
->collapse
> 1 && TREE_CODE (fd
->loop
.n2
) != INTEGER_CST
3991 && gimple_omp_for_kind (fd
->for_stmt
) == GF_OMP_FOR_KIND_DISTRIBUTE
)
3994 for (i
= 1; i
< fd
->collapse
; i
++)
3996 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
3997 OMP_CLAUSE__LOOPTEMP_
);
3998 gcc_assert (innerc
);
4000 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
4001 OMP_CLAUSE__LOOPTEMP_
);
4004 /* If needed (distribute parallel for with lastprivate),
4005 propagate down the total number of iterations. */
4006 tree t
= fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc
)),
4008 t
= force_gimple_operand_gsi (&gsi
, t
, false, NULL_TREE
, false,
4009 GSI_CONTINUE_LINKING
);
4010 assign_stmt
= gimple_build_assign (OMP_CLAUSE_DECL (innerc
), t
);
4011 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
4016 t
= fold_convert (itype
, s0
);
4017 t
= fold_build2 (MULT_EXPR
, itype
, t
, step
);
4018 if (POINTER_TYPE_P (type
))
4019 t
= fold_build_pointer_plus (n1
, t
);
4021 t
= fold_build2 (PLUS_EXPR
, type
, t
, n1
);
4022 t
= fold_convert (TREE_TYPE (startvar
), t
);
4023 t
= force_gimple_operand_gsi (&gsi
, t
,
4025 && TREE_ADDRESSABLE (startvar
),
4026 NULL_TREE
, false, GSI_CONTINUE_LINKING
);
4027 assign_stmt
= gimple_build_assign (startvar
, t
);
4028 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
4030 t
= fold_convert (itype
, e0
);
4031 t
= fold_build2 (MULT_EXPR
, itype
, t
, step
);
4032 if (POINTER_TYPE_P (type
))
4033 t
= fold_build_pointer_plus (n1
, t
);
4035 t
= fold_build2 (PLUS_EXPR
, type
, t
, n1
);
4036 t
= fold_convert (TREE_TYPE (startvar
), t
);
4037 e
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
4038 false, GSI_CONTINUE_LINKING
);
4041 assign_stmt
= gimple_build_assign (endvar
, e
);
4042 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
4043 if (useless_type_conversion_p (TREE_TYPE (fd
->loop
.v
), TREE_TYPE (e
)))
4044 assign_stmt
= gimple_build_assign (fd
->loop
.v
, e
);
4046 assign_stmt
= gimple_build_assign (fd
->loop
.v
, NOP_EXPR
, e
);
4047 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
4049 /* Handle linear clause adjustments. */
4050 tree itercnt
= NULL_TREE
, itercntbias
= NULL_TREE
;
4051 if (gimple_omp_for_kind (fd
->for_stmt
) == GF_OMP_FOR_KIND_FOR
)
4052 for (tree c
= gimple_omp_for_clauses (fd
->for_stmt
);
4053 c
; c
= OMP_CLAUSE_CHAIN (c
))
4054 if (OMP_CLAUSE_CODE (c
) == OMP_CLAUSE_LINEAR
4055 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c
))
4057 tree d
= OMP_CLAUSE_DECL (c
);
4058 bool is_ref
= omp_is_reference (d
);
4059 tree t
= d
, a
, dest
;
4061 t
= build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c
), t
);
4062 tree type
= TREE_TYPE (t
);
4063 if (POINTER_TYPE_P (type
))
4065 dest
= unshare_expr (t
);
4066 tree v
= create_tmp_var (TREE_TYPE (t
), NULL
);
4067 expand_omp_build_assign (&gsif
, v
, t
);
4068 if (itercnt
== NULL_TREE
)
4070 if (gimple_omp_for_combined_into_p (fd
->for_stmt
))
4073 = fold_build2 (MINUS_EXPR
, itype
, fold_convert (itype
, n1
),
4074 fold_convert (itype
, fd
->loop
.n1
));
4075 itercntbias
= fold_build2 (EXACT_DIV_EXPR
, itype
,
4078 = force_gimple_operand_gsi (&gsif
, itercntbias
, true,
4081 itercnt
= fold_build2 (PLUS_EXPR
, itype
, itercntbias
, s0
);
4082 itercnt
= force_gimple_operand_gsi (&gsi
, itercnt
, true,
4084 GSI_CONTINUE_LINKING
);
4089 a
= fold_build2 (MULT_EXPR
, type
,
4090 fold_convert (type
, itercnt
),
4091 fold_convert (type
, OMP_CLAUSE_LINEAR_STEP (c
)));
4092 t
= fold_build2 (type
== TREE_TYPE (t
) ? PLUS_EXPR
4093 : POINTER_PLUS_EXPR
, TREE_TYPE (t
), v
, a
);
4094 t
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
4095 false, GSI_CONTINUE_LINKING
);
4096 assign_stmt
= gimple_build_assign (dest
, t
);
4097 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
4099 if (fd
->collapse
> 1)
4100 expand_omp_for_init_vars (fd
, &gsi
, counts
, inner_stmt
, startvar
);
4104 /* The code controlling the sequential loop goes in CONT_BB,
4105 replacing the GIMPLE_OMP_CONTINUE. */
4106 gsi
= gsi_last_nondebug_bb (cont_bb
);
4107 gomp_continue
*cont_stmt
= as_a
<gomp_continue
*> (gsi_stmt (gsi
));
4108 vmain
= gimple_omp_continue_control_use (cont_stmt
);
4109 vback
= gimple_omp_continue_control_def (cont_stmt
);
4111 if (!gimple_omp_for_combined_p (fd
->for_stmt
))
4113 if (POINTER_TYPE_P (type
))
4114 t
= fold_build_pointer_plus (vmain
, step
);
4116 t
= fold_build2 (PLUS_EXPR
, type
, vmain
, step
);
4117 if (DECL_P (vback
) && TREE_ADDRESSABLE (vback
))
4118 t
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
4119 true, GSI_SAME_STMT
);
4120 assign_stmt
= gimple_build_assign (vback
, t
);
4121 gsi_insert_before (&gsi
, assign_stmt
, GSI_SAME_STMT
);
4123 if (tree_int_cst_equal (fd
->chunk_size
, integer_one_node
))
4124 t
= build2 (EQ_EXPR
, boolean_type_node
,
4125 build_int_cst (itype
, 0),
4126 build_int_cst (itype
, 1));
4128 t
= build2 (fd
->loop
.cond_code
, boolean_type_node
,
4129 DECL_P (vback
) && TREE_ADDRESSABLE (vback
)
4131 gsi_insert_before (&gsi
, gimple_build_cond_empty (t
), GSI_SAME_STMT
);
4134 /* Remove GIMPLE_OMP_CONTINUE. */
4135 gsi_remove (&gsi
, true);
4137 if (fd
->collapse
> 1 && !gimple_omp_for_combined_p (fd
->for_stmt
))
4138 collapse_bb
= extract_omp_for_update_vars (fd
, cont_bb
, body_bb
);
4140 /* Trip update code goes into TRIP_UPDATE_BB. */
4141 gsi
= gsi_start_bb (trip_update_bb
);
4143 t
= build_int_cst (itype
, 1);
4144 t
= build2 (PLUS_EXPR
, itype
, trip_main
, t
);
4145 assign_stmt
= gimple_build_assign (trip_back
, t
);
4146 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
4149 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
4150 gsi
= gsi_last_nondebug_bb (exit_bb
);
4151 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi
)))
4153 t
= gimple_omp_return_lhs (gsi_stmt (gsi
));
4154 gsi_insert_after (&gsi
, omp_build_barrier (t
), GSI_SAME_STMT
);
4156 gsi_remove (&gsi
, true);
4158 /* Connect the new blocks. */
4159 find_edge (iter_part_bb
, seq_start_bb
)->flags
= EDGE_TRUE_VALUE
;
4160 find_edge (iter_part_bb
, fin_bb
)->flags
= EDGE_FALSE_VALUE
;
4164 se
= find_edge (cont_bb
, body_bb
);
4167 se
= BRANCH_EDGE (cont_bb
);
4168 gcc_assert (single_succ (se
->dest
) == body_bb
);
4170 if (gimple_omp_for_combined_p (fd
->for_stmt
))
4175 else if (fd
->collapse
> 1)
4178 se
= make_edge (cont_bb
, collapse_bb
, EDGE_TRUE_VALUE
);
4181 se
->flags
= EDGE_TRUE_VALUE
;
4182 find_edge (cont_bb
, trip_update_bb
)->flags
4183 = se
? EDGE_FALSE_VALUE
: EDGE_FALLTHRU
;
4185 redirect_edge_and_branch (single_succ_edge (trip_update_bb
),
4189 if (gimple_in_ssa_p (cfun
))
4197 gcc_assert (fd
->collapse
== 1 && !broken_loop
);
4199 /* When we redirect the edge from trip_update_bb to iter_part_bb, we
4200 remove arguments of the phi nodes in fin_bb. We need to create
4201 appropriate phi nodes in iter_part_bb instead. */
4202 se
= find_edge (iter_part_bb
, fin_bb
);
4203 re
= single_succ_edge (trip_update_bb
);
4204 vec
<edge_var_map
> *head
= redirect_edge_var_map_vector (re
);
4205 ene
= single_succ_edge (entry_bb
);
4207 psi
= gsi_start_phis (fin_bb
);
4208 for (i
= 0; !gsi_end_p (psi
) && head
->iterate (i
, &vm
);
4209 gsi_next (&psi
), ++i
)
4212 source_location locus
;
4215 if (operand_equal_p (gimple_phi_arg_def (phi
, 0),
4216 redirect_edge_var_map_def (vm
), 0))
4219 t
= gimple_phi_result (phi
);
4220 gcc_assert (t
== redirect_edge_var_map_result (vm
));
4222 if (!single_pred_p (fin_bb
))
4223 t
= copy_ssa_name (t
, phi
);
4225 nphi
= create_phi_node (t
, iter_part_bb
);
4227 t
= PHI_ARG_DEF_FROM_EDGE (phi
, se
);
4228 locus
= gimple_phi_arg_location_from_edge (phi
, se
);
4230 /* A special case -- fd->loop.v is not yet computed in
4231 iter_part_bb, we need to use vextra instead. */
4232 if (t
== fd
->loop
.v
)
4234 add_phi_arg (nphi
, t
, ene
, locus
);
4235 locus
= redirect_edge_var_map_location (vm
);
4236 tree back_arg
= redirect_edge_var_map_def (vm
);
4237 add_phi_arg (nphi
, back_arg
, re
, locus
);
4238 edge ce
= find_edge (cont_bb
, body_bb
);
4241 ce
= BRANCH_EDGE (cont_bb
);
4242 gcc_assert (single_succ (ce
->dest
) == body_bb
);
4243 ce
= single_succ_edge (ce
->dest
);
4245 gphi
*inner_loop_phi
= find_phi_with_arg_on_edge (back_arg
, ce
);
4246 gcc_assert (inner_loop_phi
!= NULL
);
4247 add_phi_arg (inner_loop_phi
, gimple_phi_result (nphi
),
4248 find_edge (seq_start_bb
, body_bb
), locus
);
4250 if (!single_pred_p (fin_bb
))
4251 add_phi_arg (phi
, gimple_phi_result (nphi
), se
, locus
);
4253 gcc_assert (gsi_end_p (psi
) && (head
== NULL
|| i
== head
->length ()));
4254 redirect_edge_var_map_clear (re
);
4255 if (single_pred_p (fin_bb
))
4258 psi
= gsi_start_phis (fin_bb
);
4259 if (gsi_end_p (psi
))
4261 remove_phi_node (&psi
, false);
4264 /* Make phi node for trip. */
4265 phi
= create_phi_node (trip_main
, iter_part_bb
);
4266 add_phi_arg (phi
, trip_back
, single_succ_edge (trip_update_bb
),
4268 add_phi_arg (phi
, trip_init
, single_succ_edge (entry_bb
),
4273 set_immediate_dominator (CDI_DOMINATORS
, trip_update_bb
, cont_bb
);
4274 set_immediate_dominator (CDI_DOMINATORS
, iter_part_bb
,
4275 recompute_dominator (CDI_DOMINATORS
, iter_part_bb
));
4276 set_immediate_dominator (CDI_DOMINATORS
, fin_bb
,
4277 recompute_dominator (CDI_DOMINATORS
, fin_bb
));
4278 set_immediate_dominator (CDI_DOMINATORS
, seq_start_bb
,
4279 recompute_dominator (CDI_DOMINATORS
, seq_start_bb
));
4280 set_immediate_dominator (CDI_DOMINATORS
, body_bb
,
4281 recompute_dominator (CDI_DOMINATORS
, body_bb
));
4285 struct loop
*loop
= body_bb
->loop_father
;
4286 struct loop
*trip_loop
= alloc_loop ();
4287 trip_loop
->header
= iter_part_bb
;
4288 trip_loop
->latch
= trip_update_bb
;
4289 add_loop (trip_loop
, iter_part_bb
->loop_father
);
4291 if (loop
!= entry_bb
->loop_father
)
4293 gcc_assert (loop
->header
== body_bb
);
4294 gcc_assert (loop
->latch
== region
->cont
4295 || single_pred (loop
->latch
) == region
->cont
);
4296 trip_loop
->inner
= loop
;
4300 if (!gimple_omp_for_combined_p (fd
->for_stmt
))
4302 loop
= alloc_loop ();
4303 loop
->header
= body_bb
;
4304 if (collapse_bb
== NULL
)
4305 loop
->latch
= cont_bb
;
4306 add_loop (loop
, trip_loop
);
4311 /* A subroutine of expand_omp_for. Generate code for a simd non-worksharing
4312 loop. Given parameters:
4314 for (V = N1; V cond N2; V += STEP) BODY;
4316 where COND is "<" or ">", we generate pseudocode
4324 if (V cond N2) goto L0; else goto L2;
4327 For collapsed loops, given parameters:
4329 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
4330 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
4331 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
4334 we generate pseudocode
4340 count3 = (adj + N32 - N31) / STEP3;
4345 count2 = (adj + N22 - N21) / STEP2;
4350 count1 = (adj + N12 - N11) / STEP1;
4351 count = count1 * count2 * count3;
4361 V2 += (V3 cond3 N32) ? 0 : STEP2;
4362 V3 = (V3 cond3 N32) ? V3 : N31;
4363 V1 += (V2 cond2 N22) ? 0 : STEP1;
4364 V2 = (V2 cond2 N22) ? V2 : N21;
4366 if (V < count) goto L0; else goto L2;
4372 expand_omp_simd (struct omp_region
*region
, struct omp_for_data
*fd
)
4375 basic_block entry_bb
, cont_bb
, exit_bb
, l0_bb
, l1_bb
, l2_bb
, l2_dom_bb
;
4376 gimple_stmt_iterator gsi
;
4379 bool broken_loop
= region
->cont
== NULL
;
4381 tree
*counts
= NULL
;
4383 int safelen_int
= INT_MAX
;
4384 tree safelen
= omp_find_clause (gimple_omp_for_clauses (fd
->for_stmt
),
4385 OMP_CLAUSE_SAFELEN
);
4386 tree simduid
= omp_find_clause (gimple_omp_for_clauses (fd
->for_stmt
),
4387 OMP_CLAUSE__SIMDUID_
);
4393 safelen
= OMP_CLAUSE_SAFELEN_EXPR (safelen
);
4394 if (!poly_int_tree_p (safelen
, &val
))
4397 safelen_int
= MIN (constant_lower_bound (val
), INT_MAX
);
4398 if (safelen_int
== 1)
4401 type
= TREE_TYPE (fd
->loop
.v
);
4402 entry_bb
= region
->entry
;
4403 cont_bb
= region
->cont
;
4404 gcc_assert (EDGE_COUNT (entry_bb
->succs
) == 2);
4405 gcc_assert (broken_loop
4406 || BRANCH_EDGE (entry_bb
)->dest
== FALLTHRU_EDGE (cont_bb
)->dest
);
4407 l0_bb
= FALLTHRU_EDGE (entry_bb
)->dest
;
4410 gcc_assert (BRANCH_EDGE (cont_bb
)->dest
== l0_bb
);
4411 gcc_assert (EDGE_COUNT (cont_bb
->succs
) == 2);
4412 l1_bb
= split_block (cont_bb
, last_stmt (cont_bb
))->dest
;
4413 l2_bb
= BRANCH_EDGE (entry_bb
)->dest
;
4417 BRANCH_EDGE (entry_bb
)->flags
&= ~EDGE_ABNORMAL
;
4418 l1_bb
= split_edge (BRANCH_EDGE (entry_bb
));
4419 l2_bb
= single_succ (l1_bb
);
4421 exit_bb
= region
->exit
;
4424 gsi
= gsi_last_nondebug_bb (entry_bb
);
4426 gcc_assert (gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_FOR
);
4427 /* Not needed in SSA form right now. */
4428 gcc_assert (!gimple_in_ssa_p (cfun
));
4429 if (fd
->collapse
> 1)
4431 int first_zero_iter
= -1, dummy
= -1;
4432 basic_block zero_iter_bb
= l2_bb
, dummy_bb
= NULL
;
4434 counts
= XALLOCAVEC (tree
, fd
->collapse
);
4435 expand_omp_for_init_counts (fd
, &gsi
, entry_bb
, counts
,
4436 zero_iter_bb
, first_zero_iter
,
4437 dummy_bb
, dummy
, l2_dom_bb
);
4439 if (l2_dom_bb
== NULL
)
4444 if (gimple_omp_for_combined_into_p (fd
->for_stmt
))
4446 tree innerc
= omp_find_clause (gimple_omp_for_clauses (fd
->for_stmt
),
4447 OMP_CLAUSE__LOOPTEMP_
);
4448 gcc_assert (innerc
);
4449 n1
= OMP_CLAUSE_DECL (innerc
);
4450 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
4451 OMP_CLAUSE__LOOPTEMP_
);
4452 gcc_assert (innerc
);
4453 n2
= OMP_CLAUSE_DECL (innerc
);
4455 tree step
= fd
->loop
.step
;
4457 bool is_simt
= omp_find_clause (gimple_omp_for_clauses (fd
->for_stmt
),
4461 cfun
->curr_properties
&= ~PROP_gimple_lomp_dev
;
4462 is_simt
= safelen_int
> 1;
4464 tree simt_lane
= NULL_TREE
, simt_maxlane
= NULL_TREE
;
4467 simt_lane
= create_tmp_var (unsigned_type_node
);
4468 gimple
*g
= gimple_build_call_internal (IFN_GOMP_SIMT_LANE
, 0);
4469 gimple_call_set_lhs (g
, simt_lane
);
4470 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
4471 tree offset
= fold_build2 (MULT_EXPR
, TREE_TYPE (step
), step
,
4472 fold_convert (TREE_TYPE (step
), simt_lane
));
4473 n1
= fold_convert (type
, n1
);
4474 if (POINTER_TYPE_P (type
))
4475 n1
= fold_build_pointer_plus (n1
, offset
);
4477 n1
= fold_build2 (PLUS_EXPR
, type
, n1
, fold_convert (type
, offset
));
4479 /* Collapsed loops not handled for SIMT yet: limit to one lane only. */
4480 if (fd
->collapse
> 1)
4481 simt_maxlane
= build_one_cst (unsigned_type_node
);
4482 else if (safelen_int
< omp_max_simt_vf ())
4483 simt_maxlane
= build_int_cst (unsigned_type_node
, safelen_int
);
4485 = build_call_expr_internal_loc (UNKNOWN_LOCATION
, IFN_GOMP_SIMT_VF
,
4486 unsigned_type_node
, 0);
4488 vf
= fold_build2 (MIN_EXPR
, unsigned_type_node
, vf
, simt_maxlane
);
4489 vf
= fold_convert (TREE_TYPE (step
), vf
);
4490 step
= fold_build2 (MULT_EXPR
, TREE_TYPE (step
), step
, vf
);
4493 expand_omp_build_assign (&gsi
, fd
->loop
.v
, fold_convert (type
, n1
));
4494 if (fd
->collapse
> 1)
4496 if (gimple_omp_for_combined_into_p (fd
->for_stmt
))
4499 expand_omp_for_init_vars (fd
, &gsi
, counts
, NULL
, n1
);
4503 for (i
= 0; i
< fd
->collapse
; i
++)
4505 tree itype
= TREE_TYPE (fd
->loops
[i
].v
);
4506 if (POINTER_TYPE_P (itype
))
4507 itype
= signed_type_for (itype
);
4508 t
= fold_convert (TREE_TYPE (fd
->loops
[i
].v
), fd
->loops
[i
].n1
);
4509 expand_omp_build_assign (&gsi
, fd
->loops
[i
].v
, t
);
4513 /* Remove the GIMPLE_OMP_FOR statement. */
4514 gsi_remove (&gsi
, true);
4518 /* Code to control the increment goes in the CONT_BB. */
4519 gsi
= gsi_last_nondebug_bb (cont_bb
);
4520 stmt
= gsi_stmt (gsi
);
4521 gcc_assert (gimple_code (stmt
) == GIMPLE_OMP_CONTINUE
);
4523 if (POINTER_TYPE_P (type
))
4524 t
= fold_build_pointer_plus (fd
->loop
.v
, step
);
4526 t
= fold_build2 (PLUS_EXPR
, type
, fd
->loop
.v
, step
);
4527 expand_omp_build_assign (&gsi
, fd
->loop
.v
, t
);
4529 if (fd
->collapse
> 1)
4531 i
= fd
->collapse
- 1;
4532 if (POINTER_TYPE_P (TREE_TYPE (fd
->loops
[i
].v
)))
4534 t
= fold_convert (sizetype
, fd
->loops
[i
].step
);
4535 t
= fold_build_pointer_plus (fd
->loops
[i
].v
, t
);
4539 t
= fold_convert (TREE_TYPE (fd
->loops
[i
].v
),
4541 t
= fold_build2 (PLUS_EXPR
, TREE_TYPE (fd
->loops
[i
].v
),
4544 expand_omp_build_assign (&gsi
, fd
->loops
[i
].v
, t
);
4546 for (i
= fd
->collapse
- 1; i
> 0; i
--)
4548 tree itype
= TREE_TYPE (fd
->loops
[i
].v
);
4549 tree itype2
= TREE_TYPE (fd
->loops
[i
- 1].v
);
4550 if (POINTER_TYPE_P (itype2
))
4551 itype2
= signed_type_for (itype2
);
4552 t
= fold_convert (itype2
, fd
->loops
[i
- 1].step
);
4553 t
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
, true,
4555 t
= build3 (COND_EXPR
, itype2
,
4556 build2 (fd
->loops
[i
].cond_code
, boolean_type_node
,
4558 fold_convert (itype
, fd
->loops
[i
].n2
)),
4559 build_int_cst (itype2
, 0), t
);
4560 if (POINTER_TYPE_P (TREE_TYPE (fd
->loops
[i
- 1].v
)))
4561 t
= fold_build_pointer_plus (fd
->loops
[i
- 1].v
, t
);
4563 t
= fold_build2 (PLUS_EXPR
, itype2
, fd
->loops
[i
- 1].v
, t
);
4564 expand_omp_build_assign (&gsi
, fd
->loops
[i
- 1].v
, t
);
4566 t
= fold_convert (itype
, fd
->loops
[i
].n1
);
4567 t
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
, true,
4569 t
= build3 (COND_EXPR
, itype
,
4570 build2 (fd
->loops
[i
].cond_code
, boolean_type_node
,
4572 fold_convert (itype
, fd
->loops
[i
].n2
)),
4574 expand_omp_build_assign (&gsi
, fd
->loops
[i
].v
, t
);
4578 /* Remove GIMPLE_OMP_CONTINUE. */
4579 gsi_remove (&gsi
, true);
4582 /* Emit the condition in L1_BB. */
4583 gsi
= gsi_start_bb (l1_bb
);
4585 t
= fold_convert (type
, n2
);
4586 t
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
4587 false, GSI_CONTINUE_LINKING
);
4588 tree v
= fd
->loop
.v
;
4589 if (DECL_P (v
) && TREE_ADDRESSABLE (v
))
4590 v
= force_gimple_operand_gsi (&gsi
, v
, true, NULL_TREE
,
4591 false, GSI_CONTINUE_LINKING
);
4592 t
= build2 (fd
->loop
.cond_code
, boolean_type_node
, v
, t
);
4593 cond_stmt
= gimple_build_cond_empty (t
);
4594 gsi_insert_after (&gsi
, cond_stmt
, GSI_CONTINUE_LINKING
);
4595 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt
), expand_omp_regimplify_p
,
4597 || walk_tree (gimple_cond_rhs_ptr (cond_stmt
), expand_omp_regimplify_p
,
4600 gsi
= gsi_for_stmt (cond_stmt
);
4601 gimple_regimplify_operands (cond_stmt
, &gsi
);
4604 /* Add 'V -= STEP * (SIMT_VF - 1)' after the loop. */
4607 gsi
= gsi_start_bb (l2_bb
);
4608 step
= fold_build2 (MINUS_EXPR
, TREE_TYPE (step
), fd
->loop
.step
, step
);
4609 if (POINTER_TYPE_P (type
))
4610 t
= fold_build_pointer_plus (fd
->loop
.v
, step
);
4612 t
= fold_build2 (PLUS_EXPR
, type
, fd
->loop
.v
, step
);
4613 expand_omp_build_assign (&gsi
, fd
->loop
.v
, t
);
4616 /* Remove GIMPLE_OMP_RETURN. */
4617 gsi
= gsi_last_nondebug_bb (exit_bb
);
4618 gsi_remove (&gsi
, true);
4620 /* Connect the new blocks. */
4621 remove_edge (FALLTHRU_EDGE (entry_bb
));
4625 remove_edge (BRANCH_EDGE (entry_bb
));
4626 make_edge (entry_bb
, l1_bb
, EDGE_FALLTHRU
);
4628 e
= BRANCH_EDGE (l1_bb
);
4629 ne
= FALLTHRU_EDGE (l1_bb
);
4630 e
->flags
= EDGE_TRUE_VALUE
;
4634 single_succ_edge (entry_bb
)->flags
= EDGE_FALLTHRU
;
4636 ne
= single_succ_edge (l1_bb
);
4637 e
= make_edge (l1_bb
, l0_bb
, EDGE_TRUE_VALUE
);
4640 ne
->flags
= EDGE_FALSE_VALUE
;
4641 e
->probability
= profile_probability::guessed_always ().apply_scale (7, 8);
4642 ne
->probability
= e
->probability
.invert ();
4644 set_immediate_dominator (CDI_DOMINATORS
, l1_bb
, entry_bb
);
4645 set_immediate_dominator (CDI_DOMINATORS
, l0_bb
, l1_bb
);
4649 cond_stmt
= gimple_build_cond (LT_EXPR
, simt_lane
, simt_maxlane
,
4650 NULL_TREE
, NULL_TREE
);
4651 gsi
= gsi_last_bb (entry_bb
);
4652 gsi_insert_after (&gsi
, cond_stmt
, GSI_NEW_STMT
);
4653 make_edge (entry_bb
, l2_bb
, EDGE_FALSE_VALUE
);
4654 FALLTHRU_EDGE (entry_bb
)->flags
= EDGE_TRUE_VALUE
;
4655 FALLTHRU_EDGE (entry_bb
)->probability
4656 = profile_probability::guessed_always ().apply_scale (7, 8);
4657 BRANCH_EDGE (entry_bb
)->probability
4658 = FALLTHRU_EDGE (entry_bb
)->probability
.invert ();
4659 l2_dom_bb
= entry_bb
;
4661 set_immediate_dominator (CDI_DOMINATORS
, l2_bb
, l2_dom_bb
);
4665 struct loop
*loop
= alloc_loop ();
4666 loop
->header
= l1_bb
;
4667 loop
->latch
= cont_bb
;
4668 add_loop (loop
, l1_bb
->loop_father
);
4669 loop
->safelen
= safelen_int
;
4672 loop
->simduid
= OMP_CLAUSE__SIMDUID__DECL (simduid
);
4673 cfun
->has_simduid_loops
= true;
4675 /* If not -fno-tree-loop-vectorize, hint that we want to vectorize
4677 if ((flag_tree_loop_vectorize
4678 || !global_options_set
.x_flag_tree_loop_vectorize
)
4679 && flag_tree_loop_optimize
4680 && loop
->safelen
> 1)
4682 loop
->force_vectorize
= true;
4683 cfun
->has_force_vectorize_loops
= true;
4687 cfun
->has_simduid_loops
= true;
4690 /* Taskloop construct is represented after gimplification with
4691 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
4692 in between them. This routine expands the outer GIMPLE_OMP_FOR,
4693 which should just compute all the needed loop temporaries
4694 for GIMPLE_OMP_TASK. */
4697 expand_omp_taskloop_for_outer (struct omp_region
*region
,
4698 struct omp_for_data
*fd
,
4701 tree type
, bias
= NULL_TREE
;
4702 basic_block entry_bb
, cont_bb
, exit_bb
;
4703 gimple_stmt_iterator gsi
;
4704 gassign
*assign_stmt
;
4705 tree
*counts
= NULL
;
4708 gcc_assert (inner_stmt
);
4709 gcc_assert (region
->cont
);
4710 gcc_assert (gimple_code (inner_stmt
) == GIMPLE_OMP_TASK
4711 && gimple_omp_task_taskloop_p (inner_stmt
));
4712 type
= TREE_TYPE (fd
->loop
.v
);
4714 /* See if we need to bias by LLONG_MIN. */
4715 if (fd
->iter_type
== long_long_unsigned_type_node
4716 && TREE_CODE (type
) == INTEGER_TYPE
4717 && !TYPE_UNSIGNED (type
))
4721 if (fd
->loop
.cond_code
== LT_EXPR
)
4724 n2
= fold_build2 (PLUS_EXPR
, type
, fd
->loop
.n2
, fd
->loop
.step
);
4728 n1
= fold_build2 (MINUS_EXPR
, type
, fd
->loop
.n2
, fd
->loop
.step
);
4731 if (TREE_CODE (n1
) != INTEGER_CST
4732 || TREE_CODE (n2
) != INTEGER_CST
4733 || ((tree_int_cst_sgn (n1
) < 0) ^ (tree_int_cst_sgn (n2
) < 0)))
4734 bias
= fold_convert (fd
->iter_type
, TYPE_MIN_VALUE (type
));
4737 entry_bb
= region
->entry
;
4738 cont_bb
= region
->cont
;
4739 gcc_assert (EDGE_COUNT (entry_bb
->succs
) == 2);
4740 gcc_assert (BRANCH_EDGE (entry_bb
)->dest
== FALLTHRU_EDGE (cont_bb
)->dest
);
4741 exit_bb
= region
->exit
;
4743 gsi
= gsi_last_nondebug_bb (entry_bb
);
4744 gimple
*for_stmt
= gsi_stmt (gsi
);
4745 gcc_assert (gimple_code (for_stmt
) == GIMPLE_OMP_FOR
);
4746 if (fd
->collapse
> 1)
4748 int first_zero_iter
= -1, dummy
= -1;
4749 basic_block zero_iter_bb
= NULL
, dummy_bb
= NULL
, l2_dom_bb
= NULL
;
4751 counts
= XALLOCAVEC (tree
, fd
->collapse
);
4752 expand_omp_for_init_counts (fd
, &gsi
, entry_bb
, counts
,
4753 zero_iter_bb
, first_zero_iter
,
4754 dummy_bb
, dummy
, l2_dom_bb
);
4758 /* Some counts[i] vars might be uninitialized if
4759 some loop has zero iterations. But the body shouldn't
4760 be executed in that case, so just avoid uninit warnings. */
4761 for (i
= first_zero_iter
; i
< fd
->collapse
; i
++)
4762 if (SSA_VAR_P (counts
[i
]))
4763 TREE_NO_WARNING (counts
[i
]) = 1;
4765 edge e
= split_block (entry_bb
, gsi_stmt (gsi
));
4767 make_edge (zero_iter_bb
, entry_bb
, EDGE_FALLTHRU
);
4768 gsi
= gsi_last_bb (entry_bb
);
4769 set_immediate_dominator (CDI_DOMINATORS
, entry_bb
,
4770 get_immediate_dominator (CDI_DOMINATORS
,
4778 if (POINTER_TYPE_P (TREE_TYPE (t0
))
4779 && TYPE_PRECISION (TREE_TYPE (t0
))
4780 != TYPE_PRECISION (fd
->iter_type
))
4782 /* Avoid casting pointers to integer of a different size. */
4783 tree itype
= signed_type_for (type
);
4784 t1
= fold_convert (fd
->iter_type
, fold_convert (itype
, t1
));
4785 t0
= fold_convert (fd
->iter_type
, fold_convert (itype
, t0
));
4789 t1
= fold_convert (fd
->iter_type
, t1
);
4790 t0
= fold_convert (fd
->iter_type
, t0
);
4794 t1
= fold_build2 (PLUS_EXPR
, fd
->iter_type
, t1
, bias
);
4795 t0
= fold_build2 (PLUS_EXPR
, fd
->iter_type
, t0
, bias
);
4798 tree innerc
= omp_find_clause (gimple_omp_task_clauses (inner_stmt
),
4799 OMP_CLAUSE__LOOPTEMP_
);
4800 gcc_assert (innerc
);
4801 tree startvar
= OMP_CLAUSE_DECL (innerc
);
4802 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
), OMP_CLAUSE__LOOPTEMP_
);
4803 gcc_assert (innerc
);
4804 tree endvar
= OMP_CLAUSE_DECL (innerc
);
4805 if (fd
->collapse
> 1 && TREE_CODE (fd
->loop
.n2
) != INTEGER_CST
)
4807 gcc_assert (innerc
);
4808 for (i
= 1; i
< fd
->collapse
; i
++)
4810 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
4811 OMP_CLAUSE__LOOPTEMP_
);
4812 gcc_assert (innerc
);
4814 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
4815 OMP_CLAUSE__LOOPTEMP_
);
4818 /* If needed (inner taskloop has lastprivate clause), propagate
4819 down the total number of iterations. */
4820 tree t
= force_gimple_operand_gsi (&gsi
, fd
->loop
.n2
, false,
4822 GSI_CONTINUE_LINKING
);
4823 assign_stmt
= gimple_build_assign (OMP_CLAUSE_DECL (innerc
), t
);
4824 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
4828 t0
= force_gimple_operand_gsi (&gsi
, t0
, false, NULL_TREE
, false,
4829 GSI_CONTINUE_LINKING
);
4830 assign_stmt
= gimple_build_assign (startvar
, t0
);
4831 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
4833 t1
= force_gimple_operand_gsi (&gsi
, t1
, false, NULL_TREE
, false,
4834 GSI_CONTINUE_LINKING
);
4835 assign_stmt
= gimple_build_assign (endvar
, t1
);
4836 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
4837 if (fd
->collapse
> 1)
4838 expand_omp_for_init_vars (fd
, &gsi
, counts
, inner_stmt
, startvar
);
4840 /* Remove the GIMPLE_OMP_FOR statement. */
4841 gsi
= gsi_for_stmt (for_stmt
);
4842 gsi_remove (&gsi
, true);
4844 gsi
= gsi_last_nondebug_bb (cont_bb
);
4845 gsi_remove (&gsi
, true);
4847 gsi
= gsi_last_nondebug_bb (exit_bb
);
4848 gsi_remove (&gsi
, true);
4850 FALLTHRU_EDGE (entry_bb
)->probability
= profile_probability::always ();
4851 remove_edge (BRANCH_EDGE (entry_bb
));
4852 FALLTHRU_EDGE (cont_bb
)->probability
= profile_probability::always ();
4853 remove_edge (BRANCH_EDGE (cont_bb
));
4854 set_immediate_dominator (CDI_DOMINATORS
, exit_bb
, cont_bb
);
4855 set_immediate_dominator (CDI_DOMINATORS
, region
->entry
,
4856 recompute_dominator (CDI_DOMINATORS
, region
->entry
));
4859 /* Taskloop construct is represented after gimplification with
4860 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
4861 in between them. This routine expands the inner GIMPLE_OMP_FOR.
4862 GOMP_taskloop{,_ull} function arranges for each task to be given just
4863 a single range of iterations. */
4866 expand_omp_taskloop_for_inner (struct omp_region
*region
,
4867 struct omp_for_data
*fd
,
4870 tree e
, t
, type
, itype
, vmain
, vback
, bias
= NULL_TREE
;
4871 basic_block entry_bb
, exit_bb
, body_bb
, cont_bb
, collapse_bb
= NULL
;
4873 gimple_stmt_iterator gsi
;
4875 bool broken_loop
= region
->cont
== NULL
;
4876 tree
*counts
= NULL
;
4879 itype
= type
= TREE_TYPE (fd
->loop
.v
);
4880 if (POINTER_TYPE_P (type
))
4881 itype
= signed_type_for (type
);
4883 /* See if we need to bias by LLONG_MIN. */
4884 if (fd
->iter_type
== long_long_unsigned_type_node
4885 && TREE_CODE (type
) == INTEGER_TYPE
4886 && !TYPE_UNSIGNED (type
))
4890 if (fd
->loop
.cond_code
== LT_EXPR
)
4893 n2
= fold_build2 (PLUS_EXPR
, type
, fd
->loop
.n2
, fd
->loop
.step
);
4897 n1
= fold_build2 (MINUS_EXPR
, type
, fd
->loop
.n2
, fd
->loop
.step
);
4900 if (TREE_CODE (n1
) != INTEGER_CST
4901 || TREE_CODE (n2
) != INTEGER_CST
4902 || ((tree_int_cst_sgn (n1
) < 0) ^ (tree_int_cst_sgn (n2
) < 0)))
4903 bias
= fold_convert (fd
->iter_type
, TYPE_MIN_VALUE (type
));
4906 entry_bb
= region
->entry
;
4907 cont_bb
= region
->cont
;
4908 gcc_assert (EDGE_COUNT (entry_bb
->succs
) == 2);
4909 fin_bb
= BRANCH_EDGE (entry_bb
)->dest
;
4910 gcc_assert (broken_loop
4911 || (fin_bb
== FALLTHRU_EDGE (cont_bb
)->dest
));
4912 body_bb
= FALLTHRU_EDGE (entry_bb
)->dest
;
4915 gcc_assert (BRANCH_EDGE (cont_bb
)->dest
== body_bb
);
4916 gcc_assert (EDGE_COUNT (cont_bb
->succs
) == 2);
4918 exit_bb
= region
->exit
;
4920 /* Iteration space partitioning goes in ENTRY_BB. */
4921 gsi
= gsi_last_nondebug_bb (entry_bb
);
4922 gcc_assert (gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_FOR
);
4924 if (fd
->collapse
> 1)
4926 int first_zero_iter
= -1, dummy
= -1;
4927 basic_block l2_dom_bb
= NULL
, dummy_bb
= NULL
;
4929 counts
= XALLOCAVEC (tree
, fd
->collapse
);
4930 expand_omp_for_init_counts (fd
, &gsi
, entry_bb
, counts
,
4931 fin_bb
, first_zero_iter
,
4932 dummy_bb
, dummy
, l2_dom_bb
);
4936 t
= integer_one_node
;
4938 step
= fd
->loop
.step
;
4939 tree innerc
= omp_find_clause (gimple_omp_for_clauses (fd
->for_stmt
),
4940 OMP_CLAUSE__LOOPTEMP_
);
4941 gcc_assert (innerc
);
4942 n1
= OMP_CLAUSE_DECL (innerc
);
4943 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
), OMP_CLAUSE__LOOPTEMP_
);
4944 gcc_assert (innerc
);
4945 n2
= OMP_CLAUSE_DECL (innerc
);
4948 n1
= fold_build2 (PLUS_EXPR
, fd
->iter_type
, n1
, bias
);
4949 n2
= fold_build2 (PLUS_EXPR
, fd
->iter_type
, n2
, bias
);
4951 n1
= force_gimple_operand_gsi (&gsi
, fold_convert (type
, n1
),
4952 true, NULL_TREE
, true, GSI_SAME_STMT
);
4953 n2
= force_gimple_operand_gsi (&gsi
, fold_convert (itype
, n2
),
4954 true, NULL_TREE
, true, GSI_SAME_STMT
);
4955 step
= force_gimple_operand_gsi (&gsi
, fold_convert (itype
, step
),
4956 true, NULL_TREE
, true, GSI_SAME_STMT
);
4958 tree startvar
= fd
->loop
.v
;
4959 tree endvar
= NULL_TREE
;
4961 if (gimple_omp_for_combined_p (fd
->for_stmt
))
4963 tree clauses
= gimple_omp_for_clauses (inner_stmt
);
4964 tree innerc
= omp_find_clause (clauses
, OMP_CLAUSE__LOOPTEMP_
);
4965 gcc_assert (innerc
);
4966 startvar
= OMP_CLAUSE_DECL (innerc
);
4967 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
4968 OMP_CLAUSE__LOOPTEMP_
);
4969 gcc_assert (innerc
);
4970 endvar
= OMP_CLAUSE_DECL (innerc
);
4972 t
= fold_convert (TREE_TYPE (startvar
), n1
);
4973 t
= force_gimple_operand_gsi (&gsi
, t
,
4975 && TREE_ADDRESSABLE (startvar
),
4976 NULL_TREE
, false, GSI_CONTINUE_LINKING
);
4977 gimple
*assign_stmt
= gimple_build_assign (startvar
, t
);
4978 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
4980 t
= fold_convert (TREE_TYPE (startvar
), n2
);
4981 e
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
4982 false, GSI_CONTINUE_LINKING
);
4985 assign_stmt
= gimple_build_assign (endvar
, e
);
4986 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
4987 if (useless_type_conversion_p (TREE_TYPE (fd
->loop
.v
), TREE_TYPE (e
)))
4988 assign_stmt
= gimple_build_assign (fd
->loop
.v
, e
);
4990 assign_stmt
= gimple_build_assign (fd
->loop
.v
, NOP_EXPR
, e
);
4991 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
4993 if (fd
->collapse
> 1)
4994 expand_omp_for_init_vars (fd
, &gsi
, counts
, inner_stmt
, startvar
);
4998 /* The code controlling the sequential loop replaces the
4999 GIMPLE_OMP_CONTINUE. */
5000 gsi
= gsi_last_nondebug_bb (cont_bb
);
5001 gomp_continue
*cont_stmt
= as_a
<gomp_continue
*> (gsi_stmt (gsi
));
5002 gcc_assert (gimple_code (cont_stmt
) == GIMPLE_OMP_CONTINUE
);
5003 vmain
= gimple_omp_continue_control_use (cont_stmt
);
5004 vback
= gimple_omp_continue_control_def (cont_stmt
);
5006 if (!gimple_omp_for_combined_p (fd
->for_stmt
))
5008 if (POINTER_TYPE_P (type
))
5009 t
= fold_build_pointer_plus (vmain
, step
);
5011 t
= fold_build2 (PLUS_EXPR
, type
, vmain
, step
);
5012 t
= force_gimple_operand_gsi (&gsi
, t
,
5014 && TREE_ADDRESSABLE (vback
),
5015 NULL_TREE
, true, GSI_SAME_STMT
);
5016 assign_stmt
= gimple_build_assign (vback
, t
);
5017 gsi_insert_before (&gsi
, assign_stmt
, GSI_SAME_STMT
);
5019 t
= build2 (fd
->loop
.cond_code
, boolean_type_node
,
5020 DECL_P (vback
) && TREE_ADDRESSABLE (vback
)
5022 gsi_insert_before (&gsi
, gimple_build_cond_empty (t
), GSI_SAME_STMT
);
5025 /* Remove the GIMPLE_OMP_CONTINUE statement. */
5026 gsi_remove (&gsi
, true);
5028 if (fd
->collapse
> 1 && !gimple_omp_for_combined_p (fd
->for_stmt
))
5029 collapse_bb
= extract_omp_for_update_vars (fd
, cont_bb
, body_bb
);
5032 /* Remove the GIMPLE_OMP_FOR statement. */
5033 gsi
= gsi_for_stmt (fd
->for_stmt
);
5034 gsi_remove (&gsi
, true);
5036 /* Remove the GIMPLE_OMP_RETURN statement. */
5037 gsi
= gsi_last_nondebug_bb (exit_bb
);
5038 gsi_remove (&gsi
, true);
5040 FALLTHRU_EDGE (entry_bb
)->probability
= profile_probability::always ();
5042 remove_edge (BRANCH_EDGE (entry_bb
));
5045 remove_edge_and_dominated_blocks (BRANCH_EDGE (entry_bb
));
5046 region
->outer
->cont
= NULL
;
5049 /* Connect all the blocks. */
5052 ep
= find_edge (cont_bb
, body_bb
);
5053 if (gimple_omp_for_combined_p (fd
->for_stmt
))
5058 else if (fd
->collapse
> 1)
5061 ep
= make_edge (cont_bb
, collapse_bb
, EDGE_TRUE_VALUE
);
5064 ep
->flags
= EDGE_TRUE_VALUE
;
5065 find_edge (cont_bb
, fin_bb
)->flags
5066 = ep
? EDGE_FALSE_VALUE
: EDGE_FALLTHRU
;
5069 set_immediate_dominator (CDI_DOMINATORS
, body_bb
,
5070 recompute_dominator (CDI_DOMINATORS
, body_bb
));
5072 set_immediate_dominator (CDI_DOMINATORS
, fin_bb
,
5073 recompute_dominator (CDI_DOMINATORS
, fin_bb
));
5075 if (!broken_loop
&& !gimple_omp_for_combined_p (fd
->for_stmt
))
5077 struct loop
*loop
= alloc_loop ();
5078 loop
->header
= body_bb
;
5079 if (collapse_bb
== NULL
)
5080 loop
->latch
= cont_bb
;
5081 add_loop (loop
, body_bb
->loop_father
);
5085 /* A subroutine of expand_omp_for. Generate code for an OpenACC
5086 partitioned loop. The lowering here is abstracted, in that the
5087 loop parameters are passed through internal functions, which are
5088 further lowered by oacc_device_lower, once we get to the target
5089 compiler. The loop is of the form:
5091 for (V = B; V LTGT E; V += S) {BODY}
5093 where LTGT is < or >. We may have a specified chunking size, CHUNKING
5094 (constant 0 for no chunking) and we will have a GWV partitioning
5095 mask, specifying dimensions over which the loop is to be
5096 partitioned (see note below). We generate code that looks like
5097 (this ignores tiling):
5099 <entry_bb> [incoming FALL->body, BRANCH->exit]
5100 typedef signedintify (typeof (V)) T; // underlying signed integral type
5103 T DIR = LTGT == '<' ? +1 : -1;
5104 T chunk_max = GOACC_LOOP_CHUNK (dir, range, S, CHUNK_SIZE, GWV);
5105 T step = GOACC_LOOP_STEP (dir, range, S, CHUNK_SIZE, GWV);
5107 <head_bb> [created by splitting end of entry_bb]
5108 T offset = GOACC_LOOP_OFFSET (dir, range, S, CHUNK_SIZE, GWV, chunk_no);
5109 T bound = GOACC_LOOP_BOUND (dir, range, S, CHUNK_SIZE, GWV, offset);
5110 if (!(offset LTGT bound)) goto bottom_bb;
5112 <body_bb> [incoming]
5116 <cont_bb> [incoming, may == body_bb FALL->exit_bb, BRANCH->body_bb]
5118 if (offset LTGT bound) goto body_bb; [*]
5120 <bottom_bb> [created by splitting start of exit_bb] insert BRANCH->head_bb
5122 if (chunk < chunk_max) goto head_bb;
5124 <exit_bb> [incoming]
5125 V = B + ((range -/+ 1) / S +/- 1) * S [*]
5127 [*] Needed if V live at end of loop. */
5130 expand_oacc_for (struct omp_region
*region
, struct omp_for_data
*fd
)
5132 tree v
= fd
->loop
.v
;
5133 enum tree_code cond_code
= fd
->loop
.cond_code
;
5134 enum tree_code plus_code
= PLUS_EXPR
;
5136 tree chunk_size
= integer_minus_one_node
;
5137 tree gwv
= integer_zero_node
;
5138 tree iter_type
= TREE_TYPE (v
);
5139 tree diff_type
= iter_type
;
5140 tree plus_type
= iter_type
;
5141 struct oacc_collapse
*counts
= NULL
;
5143 gcc_checking_assert (gimple_omp_for_kind (fd
->for_stmt
)
5144 == GF_OMP_FOR_KIND_OACC_LOOP
);
5145 gcc_assert (!gimple_omp_for_combined_into_p (fd
->for_stmt
));
5146 gcc_assert (cond_code
== LT_EXPR
|| cond_code
== GT_EXPR
);
5148 if (POINTER_TYPE_P (iter_type
))
5150 plus_code
= POINTER_PLUS_EXPR
;
5151 plus_type
= sizetype
;
5153 if (POINTER_TYPE_P (diff_type
) || TYPE_UNSIGNED (diff_type
))
5154 diff_type
= signed_type_for (diff_type
);
5155 if (TYPE_PRECISION (diff_type
) < TYPE_PRECISION (integer_type_node
))
5156 diff_type
= integer_type_node
;
5158 basic_block entry_bb
= region
->entry
; /* BB ending in OMP_FOR */
5159 basic_block exit_bb
= region
->exit
; /* BB ending in OMP_RETURN */
5160 basic_block cont_bb
= region
->cont
; /* BB ending in OMP_CONTINUE */
5161 basic_block bottom_bb
= NULL
;
5163 /* entry_bb has two sucessors; the branch edge is to the exit
5164 block, fallthrough edge to body. */
5165 gcc_assert (EDGE_COUNT (entry_bb
->succs
) == 2
5166 && BRANCH_EDGE (entry_bb
)->dest
== exit_bb
);
5168 /* If cont_bb non-NULL, it has 2 successors. The branch successor is
5169 body_bb, or to a block whose only successor is the body_bb. Its
5170 fallthrough successor is the final block (same as the branch
5171 successor of the entry_bb). */
5174 basic_block body_bb
= FALLTHRU_EDGE (entry_bb
)->dest
;
5175 basic_block bed
= BRANCH_EDGE (cont_bb
)->dest
;
5177 gcc_assert (FALLTHRU_EDGE (cont_bb
)->dest
== exit_bb
);
5178 gcc_assert (bed
== body_bb
|| single_succ_edge (bed
)->dest
== body_bb
);
5181 gcc_assert (!gimple_in_ssa_p (cfun
));
5183 /* The exit block only has entry_bb and cont_bb as predecessors. */
5184 gcc_assert (EDGE_COUNT (exit_bb
->preds
) == 1 + (cont_bb
!= NULL
));
5187 tree chunk_max
= NULL_TREE
;
5189 tree step
= create_tmp_var (diff_type
, ".step");
5190 bool up
= cond_code
== LT_EXPR
;
5191 tree dir
= build_int_cst (diff_type
, up
? +1 : -1);
5192 bool chunking
= !gimple_in_ssa_p (cfun
);
5196 tree tile_size
= NULL_TREE
;
5197 tree element_s
= NULL_TREE
;
5198 tree e_bound
= NULL_TREE
, e_offset
= NULL_TREE
, e_step
= NULL_TREE
;
5199 basic_block elem_body_bb
= NULL
;
5200 basic_block elem_cont_bb
= NULL
;
5202 /* SSA instances. */
5203 tree offset_incr
= NULL_TREE
;
5204 tree offset_init
= NULL_TREE
;
5206 gimple_stmt_iterator gsi
;
5212 edge split
, be
, fte
;
5214 /* Split the end of entry_bb to create head_bb. */
5215 split
= split_block (entry_bb
, last_stmt (entry_bb
));
5216 basic_block head_bb
= split
->dest
;
5217 entry_bb
= split
->src
;
5219 /* Chunk setup goes at end of entry_bb, replacing the omp_for. */
5220 gsi
= gsi_last_nondebug_bb (entry_bb
);
5221 gomp_for
*for_stmt
= as_a
<gomp_for
*> (gsi_stmt (gsi
));
5222 loc
= gimple_location (for_stmt
);
5224 if (gimple_in_ssa_p (cfun
))
5226 offset_init
= gimple_omp_for_index (for_stmt
, 0);
5227 gcc_assert (integer_zerop (fd
->loop
.n1
));
5228 /* The SSA parallelizer does gang parallelism. */
5229 gwv
= build_int_cst (integer_type_node
, GOMP_DIM_MASK (GOMP_DIM_GANG
));
5232 if (fd
->collapse
> 1 || fd
->tiling
)
5234 gcc_assert (!gimple_in_ssa_p (cfun
) && up
);
5235 counts
= XALLOCAVEC (struct oacc_collapse
, fd
->collapse
);
5236 tree total
= expand_oacc_collapse_init (fd
, &gsi
, counts
,
5237 TREE_TYPE (fd
->loop
.n2
), loc
);
5239 if (SSA_VAR_P (fd
->loop
.n2
))
5241 total
= force_gimple_operand_gsi (&gsi
, total
, false, NULL_TREE
,
5242 true, GSI_SAME_STMT
);
5243 ass
= gimple_build_assign (fd
->loop
.n2
, total
);
5244 gsi_insert_before (&gsi
, ass
, GSI_SAME_STMT
);
5248 tree b
= fd
->loop
.n1
;
5249 tree e
= fd
->loop
.n2
;
5250 tree s
= fd
->loop
.step
;
5252 b
= force_gimple_operand_gsi (&gsi
, b
, true, NULL_TREE
, true, GSI_SAME_STMT
);
5253 e
= force_gimple_operand_gsi (&gsi
, e
, true, NULL_TREE
, true, GSI_SAME_STMT
);
5255 /* Convert the step, avoiding possible unsigned->signed overflow. */
5256 negating
= !up
&& TYPE_UNSIGNED (TREE_TYPE (s
));
5258 s
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (s
), s
);
5259 s
= fold_convert (diff_type
, s
);
5261 s
= fold_build1 (NEGATE_EXPR
, diff_type
, s
);
5262 s
= force_gimple_operand_gsi (&gsi
, s
, true, NULL_TREE
, true, GSI_SAME_STMT
);
5265 chunk_size
= integer_zero_node
;
5266 expr
= fold_convert (diff_type
, chunk_size
);
5267 chunk_size
= force_gimple_operand_gsi (&gsi
, expr
, true,
5268 NULL_TREE
, true, GSI_SAME_STMT
);
5272 /* Determine the tile size and element step,
5273 modify the outer loop step size. */
5274 tile_size
= create_tmp_var (diff_type
, ".tile_size");
5275 expr
= build_int_cst (diff_type
, 1);
5276 for (int ix
= 0; ix
< fd
->collapse
; ix
++)
5277 expr
= fold_build2 (MULT_EXPR
, diff_type
, counts
[ix
].tile
, expr
);
5278 expr
= force_gimple_operand_gsi (&gsi
, expr
, true,
5279 NULL_TREE
, true, GSI_SAME_STMT
);
5280 ass
= gimple_build_assign (tile_size
, expr
);
5281 gsi_insert_before (&gsi
, ass
, GSI_SAME_STMT
);
5283 element_s
= create_tmp_var (diff_type
, ".element_s");
5284 ass
= gimple_build_assign (element_s
, s
);
5285 gsi_insert_before (&gsi
, ass
, GSI_SAME_STMT
);
5287 expr
= fold_build2 (MULT_EXPR
, diff_type
, s
, tile_size
);
5288 s
= force_gimple_operand_gsi (&gsi
, expr
, true,
5289 NULL_TREE
, true, GSI_SAME_STMT
);
5292 /* Determine the range, avoiding possible unsigned->signed overflow. */
5293 negating
= !up
&& TYPE_UNSIGNED (iter_type
);
5294 expr
= fold_build2 (MINUS_EXPR
, plus_type
,
5295 fold_convert (plus_type
, negating
? b
: e
),
5296 fold_convert (plus_type
, negating
? e
: b
));
5297 expr
= fold_convert (diff_type
, expr
);
5299 expr
= fold_build1 (NEGATE_EXPR
, diff_type
, expr
);
5300 tree range
= force_gimple_operand_gsi (&gsi
, expr
, true,
5301 NULL_TREE
, true, GSI_SAME_STMT
);
5303 chunk_no
= build_int_cst (diff_type
, 0);
5306 gcc_assert (!gimple_in_ssa_p (cfun
));
5309 chunk_max
= create_tmp_var (diff_type
, ".chunk_max");
5310 chunk_no
= create_tmp_var (diff_type
, ".chunk_no");
5312 ass
= gimple_build_assign (chunk_no
, expr
);
5313 gsi_insert_before (&gsi
, ass
, GSI_SAME_STMT
);
5315 call
= gimple_build_call_internal (IFN_GOACC_LOOP
, 6,
5316 build_int_cst (integer_type_node
,
5317 IFN_GOACC_LOOP_CHUNKS
),
5318 dir
, range
, s
, chunk_size
, gwv
);
5319 gimple_call_set_lhs (call
, chunk_max
);
5320 gimple_set_location (call
, loc
);
5321 gsi_insert_before (&gsi
, call
, GSI_SAME_STMT
);
5324 chunk_size
= chunk_no
;
5326 call
= gimple_build_call_internal (IFN_GOACC_LOOP
, 6,
5327 build_int_cst (integer_type_node
,
5328 IFN_GOACC_LOOP_STEP
),
5329 dir
, range
, s
, chunk_size
, gwv
);
5330 gimple_call_set_lhs (call
, step
);
5331 gimple_set_location (call
, loc
);
5332 gsi_insert_before (&gsi
, call
, GSI_SAME_STMT
);
5334 /* Remove the GIMPLE_OMP_FOR. */
5335 gsi_remove (&gsi
, true);
5337 /* Fixup edges from head_bb. */
5338 be
= BRANCH_EDGE (head_bb
);
5339 fte
= FALLTHRU_EDGE (head_bb
);
5340 be
->flags
|= EDGE_FALSE_VALUE
;
5341 fte
->flags
^= EDGE_FALLTHRU
| EDGE_TRUE_VALUE
;
5343 basic_block body_bb
= fte
->dest
;
5345 if (gimple_in_ssa_p (cfun
))
5347 gsi
= gsi_last_nondebug_bb (cont_bb
);
5348 gomp_continue
*cont_stmt
= as_a
<gomp_continue
*> (gsi_stmt (gsi
));
5350 offset
= gimple_omp_continue_control_use (cont_stmt
);
5351 offset_incr
= gimple_omp_continue_control_def (cont_stmt
);
5355 offset
= create_tmp_var (diff_type
, ".offset");
5356 offset_init
= offset_incr
= offset
;
5358 bound
= create_tmp_var (TREE_TYPE (offset
), ".bound");
5360 /* Loop offset & bound go into head_bb. */
5361 gsi
= gsi_start_bb (head_bb
);
5363 call
= gimple_build_call_internal (IFN_GOACC_LOOP
, 7,
5364 build_int_cst (integer_type_node
,
5365 IFN_GOACC_LOOP_OFFSET
),
5367 chunk_size
, gwv
, chunk_no
);
5368 gimple_call_set_lhs (call
, offset_init
);
5369 gimple_set_location (call
, loc
);
5370 gsi_insert_after (&gsi
, call
, GSI_CONTINUE_LINKING
);
5372 call
= gimple_build_call_internal (IFN_GOACC_LOOP
, 7,
5373 build_int_cst (integer_type_node
,
5374 IFN_GOACC_LOOP_BOUND
),
5376 chunk_size
, gwv
, offset_init
);
5377 gimple_call_set_lhs (call
, bound
);
5378 gimple_set_location (call
, loc
);
5379 gsi_insert_after (&gsi
, call
, GSI_CONTINUE_LINKING
);
5381 expr
= build2 (cond_code
, boolean_type_node
, offset_init
, bound
);
5382 gsi_insert_after (&gsi
, gimple_build_cond_empty (expr
),
5383 GSI_CONTINUE_LINKING
);
5385 /* V assignment goes into body_bb. */
5386 if (!gimple_in_ssa_p (cfun
))
5388 gsi
= gsi_start_bb (body_bb
);
5390 expr
= build2 (plus_code
, iter_type
, b
,
5391 fold_convert (plus_type
, offset
));
5392 expr
= force_gimple_operand_gsi (&gsi
, expr
, false, NULL_TREE
,
5393 true, GSI_SAME_STMT
);
5394 ass
= gimple_build_assign (v
, expr
);
5395 gsi_insert_before (&gsi
, ass
, GSI_SAME_STMT
);
5397 if (fd
->collapse
> 1 || fd
->tiling
)
5398 expand_oacc_collapse_vars (fd
, false, &gsi
, counts
, v
);
5402 /* Determine the range of the element loop -- usually simply
5403 the tile_size, but could be smaller if the final
5404 iteration of the outer loop is a partial tile. */
5405 tree e_range
= create_tmp_var (diff_type
, ".e_range");
5407 expr
= build2 (MIN_EXPR
, diff_type
,
5408 build2 (MINUS_EXPR
, diff_type
, bound
, offset
),
5409 build2 (MULT_EXPR
, diff_type
, tile_size
,
5411 expr
= force_gimple_operand_gsi (&gsi
, expr
, false, NULL_TREE
,
5412 true, GSI_SAME_STMT
);
5413 ass
= gimple_build_assign (e_range
, expr
);
5414 gsi_insert_before (&gsi
, ass
, GSI_SAME_STMT
);
5416 /* Determine bound, offset & step of inner loop. */
5417 e_bound
= create_tmp_var (diff_type
, ".e_bound");
5418 e_offset
= create_tmp_var (diff_type
, ".e_offset");
5419 e_step
= create_tmp_var (diff_type
, ".e_step");
5421 /* Mark these as element loops. */
5422 tree t
, e_gwv
= integer_minus_one_node
;
5423 tree chunk
= build_int_cst (diff_type
, 0); /* Never chunked. */
5425 t
= build_int_cst (integer_type_node
, IFN_GOACC_LOOP_OFFSET
);
5426 call
= gimple_build_call_internal (IFN_GOACC_LOOP
, 7, t
, dir
, e_range
,
5427 element_s
, chunk
, e_gwv
, chunk
);
5428 gimple_call_set_lhs (call
, e_offset
);
5429 gimple_set_location (call
, loc
);
5430 gsi_insert_before (&gsi
, call
, GSI_SAME_STMT
);
5432 t
= build_int_cst (integer_type_node
, IFN_GOACC_LOOP_BOUND
);
5433 call
= gimple_build_call_internal (IFN_GOACC_LOOP
, 7, t
, dir
, e_range
,
5434 element_s
, chunk
, e_gwv
, e_offset
);
5435 gimple_call_set_lhs (call
, e_bound
);
5436 gimple_set_location (call
, loc
);
5437 gsi_insert_before (&gsi
, call
, GSI_SAME_STMT
);
5439 t
= build_int_cst (integer_type_node
, IFN_GOACC_LOOP_STEP
);
5440 call
= gimple_build_call_internal (IFN_GOACC_LOOP
, 6, t
, dir
, e_range
,
5441 element_s
, chunk
, e_gwv
);
5442 gimple_call_set_lhs (call
, e_step
);
5443 gimple_set_location (call
, loc
);
5444 gsi_insert_before (&gsi
, call
, GSI_SAME_STMT
);
5446 /* Add test and split block. */
5447 expr
= build2 (cond_code
, boolean_type_node
, e_offset
, e_bound
);
5448 stmt
= gimple_build_cond_empty (expr
);
5449 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
5450 split
= split_block (body_bb
, stmt
);
5451 elem_body_bb
= split
->dest
;
5452 if (cont_bb
== body_bb
)
5453 cont_bb
= elem_body_bb
;
5454 body_bb
= split
->src
;
5456 split
->flags
^= EDGE_FALLTHRU
| EDGE_TRUE_VALUE
;
5458 /* Add a dummy exit for the tiled block when cont_bb is missing. */
5459 if (cont_bb
== NULL
)
5461 edge e
= make_edge (body_bb
, exit_bb
, EDGE_FALSE_VALUE
);
5462 e
->probability
= profile_probability::even ();
5463 split
->probability
= profile_probability::even ();
5466 /* Initialize the user's loop vars. */
5467 gsi
= gsi_start_bb (elem_body_bb
);
5468 expand_oacc_collapse_vars (fd
, true, &gsi
, counts
, e_offset
);
5472 /* Loop increment goes into cont_bb. If this is not a loop, we
5473 will have spawned threads as if it was, and each one will
5474 execute one iteration. The specification is not explicit about
5475 whether such constructs are ill-formed or not, and they can
5476 occur, especially when noreturn routines are involved. */
5479 gsi
= gsi_last_nondebug_bb (cont_bb
);
5480 gomp_continue
*cont_stmt
= as_a
<gomp_continue
*> (gsi_stmt (gsi
));
5481 loc
= gimple_location (cont_stmt
);
5485 /* Insert element loop increment and test. */
5486 expr
= build2 (PLUS_EXPR
, diff_type
, e_offset
, e_step
);
5487 expr
= force_gimple_operand_gsi (&gsi
, expr
, false, NULL_TREE
,
5488 true, GSI_SAME_STMT
);
5489 ass
= gimple_build_assign (e_offset
, expr
);
5490 gsi_insert_before (&gsi
, ass
, GSI_SAME_STMT
);
5491 expr
= build2 (cond_code
, boolean_type_node
, e_offset
, e_bound
);
5493 stmt
= gimple_build_cond_empty (expr
);
5494 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
5495 split
= split_block (cont_bb
, stmt
);
5496 elem_cont_bb
= split
->src
;
5497 cont_bb
= split
->dest
;
5499 split
->flags
^= EDGE_FALLTHRU
| EDGE_FALSE_VALUE
;
5500 split
->probability
= profile_probability::unlikely ().guessed ();
5502 = make_edge (elem_cont_bb
, elem_body_bb
, EDGE_TRUE_VALUE
);
5503 latch_edge
->probability
= profile_probability::likely ().guessed ();
5505 edge skip_edge
= make_edge (body_bb
, cont_bb
, EDGE_FALSE_VALUE
);
5506 skip_edge
->probability
= profile_probability::unlikely ().guessed ();
5507 edge loop_entry_edge
= EDGE_SUCC (body_bb
, 1 - skip_edge
->dest_idx
);
5508 loop_entry_edge
->probability
5509 = profile_probability::likely ().guessed ();
5511 gsi
= gsi_for_stmt (cont_stmt
);
5514 /* Increment offset. */
5515 if (gimple_in_ssa_p (cfun
))
5516 expr
= build2 (plus_code
, iter_type
, offset
,
5517 fold_convert (plus_type
, step
));
5519 expr
= build2 (PLUS_EXPR
, diff_type
, offset
, step
);
5520 expr
= force_gimple_operand_gsi (&gsi
, expr
, false, NULL_TREE
,
5521 true, GSI_SAME_STMT
);
5522 ass
= gimple_build_assign (offset_incr
, expr
);
5523 gsi_insert_before (&gsi
, ass
, GSI_SAME_STMT
);
5524 expr
= build2 (cond_code
, boolean_type_node
, offset_incr
, bound
);
5525 gsi_insert_before (&gsi
, gimple_build_cond_empty (expr
), GSI_SAME_STMT
);
5527 /* Remove the GIMPLE_OMP_CONTINUE. */
5528 gsi_remove (&gsi
, true);
5530 /* Fixup edges from cont_bb. */
5531 be
= BRANCH_EDGE (cont_bb
);
5532 fte
= FALLTHRU_EDGE (cont_bb
);
5533 be
->flags
|= EDGE_TRUE_VALUE
;
5534 fte
->flags
^= EDGE_FALLTHRU
| EDGE_FALSE_VALUE
;
5538 /* Split the beginning of exit_bb to make bottom_bb. We
5539 need to insert a nop at the start, because splitting is
5540 after a stmt, not before. */
5541 gsi
= gsi_start_bb (exit_bb
);
5542 stmt
= gimple_build_nop ();
5543 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
5544 split
= split_block (exit_bb
, stmt
);
5545 bottom_bb
= split
->src
;
5546 exit_bb
= split
->dest
;
5547 gsi
= gsi_last_bb (bottom_bb
);
5549 /* Chunk increment and test goes into bottom_bb. */
5550 expr
= build2 (PLUS_EXPR
, diff_type
, chunk_no
,
5551 build_int_cst (diff_type
, 1));
5552 ass
= gimple_build_assign (chunk_no
, expr
);
5553 gsi_insert_after (&gsi
, ass
, GSI_CONTINUE_LINKING
);
5555 /* Chunk test at end of bottom_bb. */
5556 expr
= build2 (LT_EXPR
, boolean_type_node
, chunk_no
, chunk_max
);
5557 gsi_insert_after (&gsi
, gimple_build_cond_empty (expr
),
5558 GSI_CONTINUE_LINKING
);
5560 /* Fixup edges from bottom_bb. */
5561 split
->flags
^= EDGE_FALLTHRU
| EDGE_FALSE_VALUE
;
5562 split
->probability
= profile_probability::unlikely ().guessed ();
5563 edge latch_edge
= make_edge (bottom_bb
, head_bb
, EDGE_TRUE_VALUE
);
5564 latch_edge
->probability
= profile_probability::likely ().guessed ();
5568 gsi
= gsi_last_nondebug_bb (exit_bb
);
5569 gcc_assert (gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_RETURN
);
5570 loc
= gimple_location (gsi_stmt (gsi
));
5572 if (!gimple_in_ssa_p (cfun
))
5574 /* Insert the final value of V, in case it is live. This is the
5575 value for the only thread that survives past the join. */
5576 expr
= fold_build2 (MINUS_EXPR
, diff_type
, range
, dir
);
5577 expr
= fold_build2 (PLUS_EXPR
, diff_type
, expr
, s
);
5578 expr
= fold_build2 (TRUNC_DIV_EXPR
, diff_type
, expr
, s
);
5579 expr
= fold_build2 (MULT_EXPR
, diff_type
, expr
, s
);
5580 expr
= build2 (plus_code
, iter_type
, b
, fold_convert (plus_type
, expr
));
5581 expr
= force_gimple_operand_gsi (&gsi
, expr
, false, NULL_TREE
,
5582 true, GSI_SAME_STMT
);
5583 ass
= gimple_build_assign (v
, expr
);
5584 gsi_insert_before (&gsi
, ass
, GSI_SAME_STMT
);
5587 /* Remove the OMP_RETURN. */
5588 gsi_remove (&gsi
, true);
5592 /* We now have one, two or three nested loops. Update the loop
5594 struct loop
*parent
= entry_bb
->loop_father
;
5595 struct loop
*body
= body_bb
->loop_father
;
5599 struct loop
*chunk_loop
= alloc_loop ();
5600 chunk_loop
->header
= head_bb
;
5601 chunk_loop
->latch
= bottom_bb
;
5602 add_loop (chunk_loop
, parent
);
5603 parent
= chunk_loop
;
5605 else if (parent
!= body
)
5607 gcc_assert (body
->header
== body_bb
);
5608 gcc_assert (body
->latch
== cont_bb
5609 || single_pred (body
->latch
) == cont_bb
);
5615 struct loop
*body_loop
= alloc_loop ();
5616 body_loop
->header
= body_bb
;
5617 body_loop
->latch
= cont_bb
;
5618 add_loop (body_loop
, parent
);
5622 /* Insert tiling's element loop. */
5623 struct loop
*inner_loop
= alloc_loop ();
5624 inner_loop
->header
= elem_body_bb
;
5625 inner_loop
->latch
= elem_cont_bb
;
5626 add_loop (inner_loop
, body_loop
);
5632 /* Expand the OMP loop defined by REGION. */
5635 expand_omp_for (struct omp_region
*region
, gimple
*inner_stmt
)
5637 struct omp_for_data fd
;
5638 struct omp_for_data_loop
*loops
;
5641 = (struct omp_for_data_loop
*)
5642 alloca (gimple_omp_for_collapse (last_stmt (region
->entry
))
5643 * sizeof (struct omp_for_data_loop
));
5644 omp_extract_for_data (as_a
<gomp_for
*> (last_stmt (region
->entry
)),
5646 region
->sched_kind
= fd
.sched_kind
;
5647 region
->sched_modifiers
= fd
.sched_modifiers
;
5649 gcc_assert (EDGE_COUNT (region
->entry
->succs
) == 2);
5650 BRANCH_EDGE (region
->entry
)->flags
&= ~EDGE_ABNORMAL
;
5651 FALLTHRU_EDGE (region
->entry
)->flags
&= ~EDGE_ABNORMAL
;
5654 gcc_assert (EDGE_COUNT (region
->cont
->succs
) == 2);
5655 BRANCH_EDGE (region
->cont
)->flags
&= ~EDGE_ABNORMAL
;
5656 FALLTHRU_EDGE (region
->cont
)->flags
&= ~EDGE_ABNORMAL
;
5659 /* If there isn't a continue then this is a degerate case where
5660 the introduction of abnormal edges during lowering will prevent
5661 original loops from being detected. Fix that up. */
5662 loops_state_set (LOOPS_NEED_FIXUP
);
5664 if (gimple_omp_for_kind (fd
.for_stmt
) & GF_OMP_FOR_SIMD
)
5665 expand_omp_simd (region
, &fd
);
5666 else if (gimple_omp_for_kind (fd
.for_stmt
) == GF_OMP_FOR_KIND_OACC_LOOP
)
5668 gcc_assert (!inner_stmt
);
5669 expand_oacc_for (region
, &fd
);
5671 else if (gimple_omp_for_kind (fd
.for_stmt
) == GF_OMP_FOR_KIND_TASKLOOP
)
5673 if (gimple_omp_for_combined_into_p (fd
.for_stmt
))
5674 expand_omp_taskloop_for_inner (region
, &fd
, inner_stmt
);
5676 expand_omp_taskloop_for_outer (region
, &fd
, inner_stmt
);
5678 else if (fd
.sched_kind
== OMP_CLAUSE_SCHEDULE_STATIC
5679 && !fd
.have_ordered
)
5681 if (fd
.chunk_size
== NULL
)
5682 expand_omp_for_static_nochunk (region
, &fd
, inner_stmt
);
5684 expand_omp_for_static_chunk (region
, &fd
, inner_stmt
);
5688 int fn_index
, start_ix
, next_ix
;
5690 gcc_assert (gimple_omp_for_kind (fd
.for_stmt
)
5691 == GF_OMP_FOR_KIND_FOR
);
5692 if (fd
.chunk_size
== NULL
5693 && fd
.sched_kind
== OMP_CLAUSE_SCHEDULE_STATIC
)
5694 fd
.chunk_size
= integer_zero_node
;
5695 gcc_assert (fd
.sched_kind
!= OMP_CLAUSE_SCHEDULE_AUTO
);
5696 switch (fd
.sched_kind
)
5698 case OMP_CLAUSE_SCHEDULE_RUNTIME
:
5701 case OMP_CLAUSE_SCHEDULE_DYNAMIC
:
5702 case OMP_CLAUSE_SCHEDULE_GUIDED
:
5703 if ((fd
.sched_modifiers
& OMP_CLAUSE_SCHEDULE_NONMONOTONIC
)
5705 && !fd
.have_ordered
)
5707 fn_index
= 3 + fd
.sched_kind
;
5712 fn_index
= fd
.sched_kind
;
5716 fn_index
+= fd
.have_ordered
* 6;
5718 start_ix
= ((int)BUILT_IN_GOMP_LOOP_DOACROSS_STATIC_START
) + fn_index
;
5720 start_ix
= ((int)BUILT_IN_GOMP_LOOP_STATIC_START
) + fn_index
;
5721 next_ix
= ((int)BUILT_IN_GOMP_LOOP_STATIC_NEXT
) + fn_index
;
5722 if (fd
.iter_type
== long_long_unsigned_type_node
)
5724 start_ix
+= ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_START
5725 - (int)BUILT_IN_GOMP_LOOP_STATIC_START
);
5726 next_ix
+= ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT
5727 - (int)BUILT_IN_GOMP_LOOP_STATIC_NEXT
);
5729 expand_omp_for_generic (region
, &fd
, (enum built_in_function
) start_ix
,
5730 (enum built_in_function
) next_ix
, inner_stmt
);
5733 if (gimple_in_ssa_p (cfun
))
5734 update_ssa (TODO_update_ssa_only_virtuals
);
5737 /* Expand code for an OpenMP sections directive. In pseudo code, we generate
5739 v = GOMP_sections_start (n);
5756 v = GOMP_sections_next ();
5761 If this is a combined parallel sections, replace the call to
5762 GOMP_sections_start with call to GOMP_sections_next. */
5765 expand_omp_sections (struct omp_region
*region
)
5767 tree t
, u
, vin
= NULL
, vmain
, vnext
, l2
;
5769 basic_block entry_bb
, l0_bb
, l1_bb
, l2_bb
, default_bb
;
5770 gimple_stmt_iterator si
, switch_si
;
5771 gomp_sections
*sections_stmt
;
5773 gomp_continue
*cont
;
5776 struct omp_region
*inner
;
5778 bool exit_reachable
= region
->cont
!= NULL
;
5780 gcc_assert (region
->exit
!= NULL
);
5781 entry_bb
= region
->entry
;
5782 l0_bb
= single_succ (entry_bb
);
5783 l1_bb
= region
->cont
;
5784 l2_bb
= region
->exit
;
5785 if (single_pred_p (l2_bb
) && single_pred (l2_bb
) == l0_bb
)
5786 l2
= gimple_block_label (l2_bb
);
5789 /* This can happen if there are reductions. */
5790 len
= EDGE_COUNT (l0_bb
->succs
);
5791 gcc_assert (len
> 0);
5792 e
= EDGE_SUCC (l0_bb
, len
- 1);
5793 si
= gsi_last_nondebug_bb (e
->dest
);
5796 || gimple_code (gsi_stmt (si
)) != GIMPLE_OMP_SECTION
)
5797 l2
= gimple_block_label (e
->dest
);
5799 FOR_EACH_EDGE (e
, ei
, l0_bb
->succs
)
5801 si
= gsi_last_nondebug_bb (e
->dest
);
5803 || gimple_code (gsi_stmt (si
)) != GIMPLE_OMP_SECTION
)
5805 l2
= gimple_block_label (e
->dest
);
5811 default_bb
= create_empty_bb (l1_bb
->prev_bb
);
5813 default_bb
= create_empty_bb (l0_bb
);
5815 /* We will build a switch() with enough cases for all the
5816 GIMPLE_OMP_SECTION regions, a '0' case to handle the end of more work
5817 and a default case to abort if something goes wrong. */
5818 len
= EDGE_COUNT (l0_bb
->succs
);
5820 /* Use vec::quick_push on label_vec throughout, since we know the size
5822 auto_vec
<tree
> label_vec (len
);
5824 /* The call to GOMP_sections_start goes in ENTRY_BB, replacing the
5825 GIMPLE_OMP_SECTIONS statement. */
5826 si
= gsi_last_nondebug_bb (entry_bb
);
5827 sections_stmt
= as_a
<gomp_sections
*> (gsi_stmt (si
));
5828 gcc_assert (gimple_code (sections_stmt
) == GIMPLE_OMP_SECTIONS
);
5829 vin
= gimple_omp_sections_control (sections_stmt
);
5830 if (!is_combined_parallel (region
))
5832 /* If we are not inside a combined parallel+sections region,
5833 call GOMP_sections_start. */
5834 t
= build_int_cst (unsigned_type_node
, len
- 1);
5835 u
= builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_START
);
5836 stmt
= gimple_build_call (u
, 1, t
);
5840 /* Otherwise, call GOMP_sections_next. */
5841 u
= builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT
);
5842 stmt
= gimple_build_call (u
, 0);
5844 gimple_call_set_lhs (stmt
, vin
);
5845 gsi_insert_after (&si
, stmt
, GSI_SAME_STMT
);
5846 gsi_remove (&si
, true);
5848 /* The switch() statement replacing GIMPLE_OMP_SECTIONS_SWITCH goes in
5850 switch_si
= gsi_last_nondebug_bb (l0_bb
);
5851 gcc_assert (gimple_code (gsi_stmt (switch_si
)) == GIMPLE_OMP_SECTIONS_SWITCH
);
5854 cont
= as_a
<gomp_continue
*> (last_stmt (l1_bb
));
5855 gcc_assert (gimple_code (cont
) == GIMPLE_OMP_CONTINUE
);
5856 vmain
= gimple_omp_continue_control_use (cont
);
5857 vnext
= gimple_omp_continue_control_def (cont
);
5865 t
= build_case_label (build_int_cst (unsigned_type_node
, 0), NULL
, l2
);
5866 label_vec
.quick_push (t
);
5869 /* Convert each GIMPLE_OMP_SECTION into a CASE_LABEL_EXPR. */
5870 for (inner
= region
->inner
, casei
= 1;
5872 inner
= inner
->next
, i
++, casei
++)
5874 basic_block s_entry_bb
, s_exit_bb
;
5876 /* Skip optional reduction region. */
5877 if (inner
->type
== GIMPLE_OMP_ATOMIC_LOAD
)
5884 s_entry_bb
= inner
->entry
;
5885 s_exit_bb
= inner
->exit
;
5887 t
= gimple_block_label (s_entry_bb
);
5888 u
= build_int_cst (unsigned_type_node
, casei
);
5889 u
= build_case_label (u
, NULL
, t
);
5890 label_vec
.quick_push (u
);
5892 si
= gsi_last_nondebug_bb (s_entry_bb
);
5893 gcc_assert (gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_SECTION
);
5894 gcc_assert (i
< len
|| gimple_omp_section_last_p (gsi_stmt (si
)));
5895 gsi_remove (&si
, true);
5896 single_succ_edge (s_entry_bb
)->flags
= EDGE_FALLTHRU
;
5898 if (s_exit_bb
== NULL
)
5901 si
= gsi_last_nondebug_bb (s_exit_bb
);
5902 gcc_assert (gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_RETURN
);
5903 gsi_remove (&si
, true);
5905 single_succ_edge (s_exit_bb
)->flags
= EDGE_FALLTHRU
;
5908 /* Error handling code goes in DEFAULT_BB. */
5909 t
= gimple_block_label (default_bb
);
5910 u
= build_case_label (NULL
, NULL
, t
);
5911 make_edge (l0_bb
, default_bb
, 0);
5912 add_bb_to_loop (default_bb
, current_loops
->tree_root
);
5914 stmt
= gimple_build_switch (vmain
, u
, label_vec
);
5915 gsi_insert_after (&switch_si
, stmt
, GSI_SAME_STMT
);
5916 gsi_remove (&switch_si
, true);
5918 si
= gsi_start_bb (default_bb
);
5919 stmt
= gimple_build_call (builtin_decl_explicit (BUILT_IN_TRAP
), 0);
5920 gsi_insert_after (&si
, stmt
, GSI_CONTINUE_LINKING
);
5926 /* Code to get the next section goes in L1_BB. */
5927 si
= gsi_last_nondebug_bb (l1_bb
);
5928 gcc_assert (gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_CONTINUE
);
5930 bfn_decl
= builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT
);
5931 stmt
= gimple_build_call (bfn_decl
, 0);
5932 gimple_call_set_lhs (stmt
, vnext
);
5933 gsi_insert_after (&si
, stmt
, GSI_SAME_STMT
);
5934 gsi_remove (&si
, true);
5936 single_succ_edge (l1_bb
)->flags
= EDGE_FALLTHRU
;
5939 /* Cleanup function replaces GIMPLE_OMP_RETURN in EXIT_BB. */
5940 si
= gsi_last_nondebug_bb (l2_bb
);
5941 if (gimple_omp_return_nowait_p (gsi_stmt (si
)))
5942 t
= builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_NOWAIT
);
5943 else if (gimple_omp_return_lhs (gsi_stmt (si
)))
5944 t
= builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_CANCEL
);
5946 t
= builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END
);
5947 stmt
= gimple_build_call (t
, 0);
5948 if (gimple_omp_return_lhs (gsi_stmt (si
)))
5949 gimple_call_set_lhs (stmt
, gimple_omp_return_lhs (gsi_stmt (si
)));
5950 gsi_insert_after (&si
, stmt
, GSI_SAME_STMT
);
5951 gsi_remove (&si
, true);
5953 set_immediate_dominator (CDI_DOMINATORS
, default_bb
, l0_bb
);
5956 /* Expand code for an OpenMP single directive. We've already expanded
5957 much of the code, here we simply place the GOMP_barrier call. */
5960 expand_omp_single (struct omp_region
*region
)
5962 basic_block entry_bb
, exit_bb
;
5963 gimple_stmt_iterator si
;
5965 entry_bb
= region
->entry
;
5966 exit_bb
= region
->exit
;
5968 si
= gsi_last_nondebug_bb (entry_bb
);
5969 gcc_assert (gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_SINGLE
);
5970 gsi_remove (&si
, true);
5971 single_succ_edge (entry_bb
)->flags
= EDGE_FALLTHRU
;
5973 si
= gsi_last_nondebug_bb (exit_bb
);
5974 if (!gimple_omp_return_nowait_p (gsi_stmt (si
)))
5976 tree t
= gimple_omp_return_lhs (gsi_stmt (si
));
5977 gsi_insert_after (&si
, omp_build_barrier (t
), GSI_SAME_STMT
);
5979 gsi_remove (&si
, true);
5980 single_succ_edge (exit_bb
)->flags
= EDGE_FALLTHRU
;
5983 /* Generic expansion for OpenMP synchronization directives: master,
5984 ordered and critical. All we need to do here is remove the entry
5985 and exit markers for REGION. */
5988 expand_omp_synch (struct omp_region
*region
)
5990 basic_block entry_bb
, exit_bb
;
5991 gimple_stmt_iterator si
;
5993 entry_bb
= region
->entry
;
5994 exit_bb
= region
->exit
;
5996 si
= gsi_last_nondebug_bb (entry_bb
);
5997 gcc_assert (gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_SINGLE
5998 || gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_MASTER
5999 || gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_TASKGROUP
6000 || gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_ORDERED
6001 || gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_CRITICAL
6002 || gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_TEAMS
);
6003 gsi_remove (&si
, true);
6004 single_succ_edge (entry_bb
)->flags
= EDGE_FALLTHRU
;
6008 si
= gsi_last_nondebug_bb (exit_bb
);
6009 gcc_assert (gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_RETURN
);
6010 gsi_remove (&si
, true);
6011 single_succ_edge (exit_bb
)->flags
= EDGE_FALLTHRU
;
6015 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
6016 operation as a normal volatile load. */
6019 expand_omp_atomic_load (basic_block load_bb
, tree addr
,
6020 tree loaded_val
, int index
)
6022 enum built_in_function tmpbase
;
6023 gimple_stmt_iterator gsi
;
6024 basic_block store_bb
;
6027 tree decl
, call
, type
, itype
;
6029 gsi
= gsi_last_nondebug_bb (load_bb
);
6030 stmt
= gsi_stmt (gsi
);
6031 gcc_assert (gimple_code (stmt
) == GIMPLE_OMP_ATOMIC_LOAD
);
6032 loc
= gimple_location (stmt
);
6034 /* ??? If the target does not implement atomic_load_optab[mode], and mode
6035 is smaller than word size, then expand_atomic_load assumes that the load
6036 is atomic. We could avoid the builtin entirely in this case. */
6038 tmpbase
= (enum built_in_function
) (BUILT_IN_ATOMIC_LOAD_N
+ index
+ 1);
6039 decl
= builtin_decl_explicit (tmpbase
);
6040 if (decl
== NULL_TREE
)
6043 type
= TREE_TYPE (loaded_val
);
6044 itype
= TREE_TYPE (TREE_TYPE (decl
));
6046 call
= build_call_expr_loc (loc
, decl
, 2, addr
,
6047 build_int_cst (NULL
,
6048 gimple_omp_atomic_seq_cst_p (stmt
)
6050 : MEMMODEL_RELAXED
));
6051 if (!useless_type_conversion_p (type
, itype
))
6052 call
= fold_build1_loc (loc
, VIEW_CONVERT_EXPR
, type
, call
);
6053 call
= build2_loc (loc
, MODIFY_EXPR
, void_type_node
, loaded_val
, call
);
6055 force_gimple_operand_gsi (&gsi
, call
, true, NULL_TREE
, true, GSI_SAME_STMT
);
6056 gsi_remove (&gsi
, true);
6058 store_bb
= single_succ (load_bb
);
6059 gsi
= gsi_last_nondebug_bb (store_bb
);
6060 gcc_assert (gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_ATOMIC_STORE
);
6061 gsi_remove (&gsi
, true);
6063 if (gimple_in_ssa_p (cfun
))
6064 update_ssa (TODO_update_ssa_no_phi
);
6069 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
6070 operation as a normal volatile store. */
6073 expand_omp_atomic_store (basic_block load_bb
, tree addr
,
6074 tree loaded_val
, tree stored_val
, int index
)
6076 enum built_in_function tmpbase
;
6077 gimple_stmt_iterator gsi
;
6078 basic_block store_bb
= single_succ (load_bb
);
6081 tree decl
, call
, type
, itype
;
6085 gsi
= gsi_last_nondebug_bb (load_bb
);
6086 stmt
= gsi_stmt (gsi
);
6087 gcc_assert (gimple_code (stmt
) == GIMPLE_OMP_ATOMIC_LOAD
);
6089 /* If the load value is needed, then this isn't a store but an exchange. */
6090 exchange
= gimple_omp_atomic_need_value_p (stmt
);
6092 gsi
= gsi_last_nondebug_bb (store_bb
);
6093 stmt
= gsi_stmt (gsi
);
6094 gcc_assert (gimple_code (stmt
) == GIMPLE_OMP_ATOMIC_STORE
);
6095 loc
= gimple_location (stmt
);
6097 /* ??? If the target does not implement atomic_store_optab[mode], and mode
6098 is smaller than word size, then expand_atomic_store assumes that the store
6099 is atomic. We could avoid the builtin entirely in this case. */
6101 tmpbase
= (exchange
? BUILT_IN_ATOMIC_EXCHANGE_N
: BUILT_IN_ATOMIC_STORE_N
);
6102 tmpbase
= (enum built_in_function
) ((int) tmpbase
+ index
+ 1);
6103 decl
= builtin_decl_explicit (tmpbase
);
6104 if (decl
== NULL_TREE
)
6107 type
= TREE_TYPE (stored_val
);
6109 /* Dig out the type of the function's second argument. */
6110 itype
= TREE_TYPE (decl
);
6111 itype
= TYPE_ARG_TYPES (itype
);
6112 itype
= TREE_CHAIN (itype
);
6113 itype
= TREE_VALUE (itype
);
6114 imode
= TYPE_MODE (itype
);
6116 if (exchange
&& !can_atomic_exchange_p (imode
, true))
6119 if (!useless_type_conversion_p (itype
, type
))
6120 stored_val
= fold_build1_loc (loc
, VIEW_CONVERT_EXPR
, itype
, stored_val
);
6121 call
= build_call_expr_loc (loc
, decl
, 3, addr
, stored_val
,
6122 build_int_cst (NULL
,
6123 gimple_omp_atomic_seq_cst_p (stmt
)
6125 : MEMMODEL_RELAXED
));
6128 if (!useless_type_conversion_p (type
, itype
))
6129 call
= build1_loc (loc
, VIEW_CONVERT_EXPR
, type
, call
);
6130 call
= build2_loc (loc
, MODIFY_EXPR
, void_type_node
, loaded_val
, call
);
6133 force_gimple_operand_gsi (&gsi
, call
, true, NULL_TREE
, true, GSI_SAME_STMT
);
6134 gsi_remove (&gsi
, true);
6136 /* Remove the GIMPLE_OMP_ATOMIC_LOAD that we verified above. */
6137 gsi
= gsi_last_nondebug_bb (load_bb
);
6138 gsi_remove (&gsi
, true);
6140 if (gimple_in_ssa_p (cfun
))
6141 update_ssa (TODO_update_ssa_no_phi
);
6146 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
6147 operation as a __atomic_fetch_op builtin. INDEX is log2 of the
6148 size of the data type, and thus usable to find the index of the builtin
6149 decl. Returns false if the expression is not of the proper form. */
6152 expand_omp_atomic_fetch_op (basic_block load_bb
,
6153 tree addr
, tree loaded_val
,
6154 tree stored_val
, int index
)
6156 enum built_in_function oldbase
, newbase
, tmpbase
;
6157 tree decl
, itype
, call
;
6159 basic_block store_bb
= single_succ (load_bb
);
6160 gimple_stmt_iterator gsi
;
6163 enum tree_code code
;
6164 bool need_old
, need_new
;
6168 /* We expect to find the following sequences:
6171 GIMPLE_OMP_ATOMIC_LOAD (tmp, mem)
6174 val = tmp OP something; (or: something OP tmp)
6175 GIMPLE_OMP_STORE (val)
6177 ???FIXME: Allow a more flexible sequence.
6178 Perhaps use data flow to pick the statements.
6182 gsi
= gsi_after_labels (store_bb
);
6183 stmt
= gsi_stmt (gsi
);
6184 if (is_gimple_debug (stmt
))
6186 gsi_next_nondebug (&gsi
);
6187 if (gsi_end_p (gsi
))
6189 stmt
= gsi_stmt (gsi
);
6191 loc
= gimple_location (stmt
);
6192 if (!is_gimple_assign (stmt
))
6194 gsi_next_nondebug (&gsi
);
6195 if (gimple_code (gsi_stmt (gsi
)) != GIMPLE_OMP_ATOMIC_STORE
)
6197 need_new
= gimple_omp_atomic_need_value_p (gsi_stmt (gsi
));
6198 need_old
= gimple_omp_atomic_need_value_p (last_stmt (load_bb
));
6199 seq_cst
= gimple_omp_atomic_seq_cst_p (last_stmt (load_bb
));
6200 gcc_checking_assert (!need_old
|| !need_new
);
6202 if (!operand_equal_p (gimple_assign_lhs (stmt
), stored_val
, 0))
6205 /* Check for one of the supported fetch-op operations. */
6206 code
= gimple_assign_rhs_code (stmt
);
6210 case POINTER_PLUS_EXPR
:
6211 oldbase
= BUILT_IN_ATOMIC_FETCH_ADD_N
;
6212 newbase
= BUILT_IN_ATOMIC_ADD_FETCH_N
;
6215 oldbase
= BUILT_IN_ATOMIC_FETCH_SUB_N
;
6216 newbase
= BUILT_IN_ATOMIC_SUB_FETCH_N
;
6219 oldbase
= BUILT_IN_ATOMIC_FETCH_AND_N
;
6220 newbase
= BUILT_IN_ATOMIC_AND_FETCH_N
;
6223 oldbase
= BUILT_IN_ATOMIC_FETCH_OR_N
;
6224 newbase
= BUILT_IN_ATOMIC_OR_FETCH_N
;
6227 oldbase
= BUILT_IN_ATOMIC_FETCH_XOR_N
;
6228 newbase
= BUILT_IN_ATOMIC_XOR_FETCH_N
;
6234 /* Make sure the expression is of the proper form. */
6235 if (operand_equal_p (gimple_assign_rhs1 (stmt
), loaded_val
, 0))
6236 rhs
= gimple_assign_rhs2 (stmt
);
6237 else if (commutative_tree_code (gimple_assign_rhs_code (stmt
))
6238 && operand_equal_p (gimple_assign_rhs2 (stmt
), loaded_val
, 0))
6239 rhs
= gimple_assign_rhs1 (stmt
);
6243 tmpbase
= ((enum built_in_function
)
6244 ((need_new
? newbase
: oldbase
) + index
+ 1));
6245 decl
= builtin_decl_explicit (tmpbase
);
6246 if (decl
== NULL_TREE
)
6248 itype
= TREE_TYPE (TREE_TYPE (decl
));
6249 imode
= TYPE_MODE (itype
);
6251 /* We could test all of the various optabs involved, but the fact of the
6252 matter is that (with the exception of i486 vs i586 and xadd) all targets
6253 that support any atomic operaton optab also implements compare-and-swap.
6254 Let optabs.c take care of expanding any compare-and-swap loop. */
6255 if (!can_compare_and_swap_p (imode
, true) || !can_atomic_load_p (imode
))
6258 gsi
= gsi_last_nondebug_bb (load_bb
);
6259 gcc_assert (gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_ATOMIC_LOAD
);
6261 /* OpenMP does not imply any barrier-like semantics on its atomic ops.
6262 It only requires that the operation happen atomically. Thus we can
6263 use the RELAXED memory model. */
6264 call
= build_call_expr_loc (loc
, decl
, 3, addr
,
6265 fold_convert_loc (loc
, itype
, rhs
),
6266 build_int_cst (NULL
,
6267 seq_cst
? MEMMODEL_SEQ_CST
6268 : MEMMODEL_RELAXED
));
6270 if (need_old
|| need_new
)
6272 lhs
= need_old
? loaded_val
: stored_val
;
6273 call
= fold_convert_loc (loc
, TREE_TYPE (lhs
), call
);
6274 call
= build2_loc (loc
, MODIFY_EXPR
, void_type_node
, lhs
, call
);
6277 call
= fold_convert_loc (loc
, void_type_node
, call
);
6278 force_gimple_operand_gsi (&gsi
, call
, true, NULL_TREE
, true, GSI_SAME_STMT
);
6279 gsi_remove (&gsi
, true);
6281 gsi
= gsi_last_nondebug_bb (store_bb
);
6282 gcc_assert (gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_ATOMIC_STORE
);
6283 gsi_remove (&gsi
, true);
6284 gsi
= gsi_last_nondebug_bb (store_bb
);
6285 stmt
= gsi_stmt (gsi
);
6286 gsi_remove (&gsi
, true);
6288 if (gimple_in_ssa_p (cfun
))
6290 release_defs (stmt
);
6291 update_ssa (TODO_update_ssa_no_phi
);
6297 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
6301 newval = rhs; // with oldval replacing *addr in rhs
6302 oldval = __sync_val_compare_and_swap (addr, oldval, newval);
6303 if (oldval != newval)
6306 INDEX is log2 of the size of the data type, and thus usable to find the
6307 index of the builtin decl. */
6310 expand_omp_atomic_pipeline (basic_block load_bb
, basic_block store_bb
,
6311 tree addr
, tree loaded_val
, tree stored_val
,
6314 tree loadedi
, storedi
, initial
, new_storedi
, old_vali
;
6315 tree type
, itype
, cmpxchg
, iaddr
, atype
;
6316 gimple_stmt_iterator si
;
6317 basic_block loop_header
= single_succ (load_bb
);
6320 enum built_in_function fncode
;
6322 /* ??? We need a non-pointer interface to __atomic_compare_exchange in
6323 order to use the RELAXED memory model effectively. */
6324 fncode
= (enum built_in_function
)((int)BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
6326 cmpxchg
= builtin_decl_explicit (fncode
);
6327 if (cmpxchg
== NULL_TREE
)
6329 type
= TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val
));
6331 itype
= TREE_TYPE (TREE_TYPE (cmpxchg
));
6333 if (!can_compare_and_swap_p (TYPE_MODE (itype
), true)
6334 || !can_atomic_load_p (TYPE_MODE (itype
)))
6337 /* Load the initial value, replacing the GIMPLE_OMP_ATOMIC_LOAD. */
6338 si
= gsi_last_nondebug_bb (load_bb
);
6339 gcc_assert (gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_ATOMIC_LOAD
);
6341 /* For floating-point values, we'll need to view-convert them to integers
6342 so that we can perform the atomic compare and swap. Simplify the
6343 following code by always setting up the "i"ntegral variables. */
6344 if (!INTEGRAL_TYPE_P (type
) && !POINTER_TYPE_P (type
))
6348 iaddr
= create_tmp_reg (build_pointer_type_for_mode (itype
, ptr_mode
,
6352 = force_gimple_operand_gsi (&si
,
6353 fold_convert (TREE_TYPE (iaddr
), addr
),
6354 false, NULL_TREE
, true, GSI_SAME_STMT
);
6355 stmt
= gimple_build_assign (iaddr
, iaddr_val
);
6356 gsi_insert_before (&si
, stmt
, GSI_SAME_STMT
);
6357 loadedi
= create_tmp_var (itype
);
6358 if (gimple_in_ssa_p (cfun
))
6359 loadedi
= make_ssa_name (loadedi
);
6364 loadedi
= loaded_val
;
6367 fncode
= (enum built_in_function
) (BUILT_IN_ATOMIC_LOAD_N
+ index
+ 1);
6368 tree loaddecl
= builtin_decl_explicit (fncode
);
6371 = fold_convert (atype
,
6372 build_call_expr (loaddecl
, 2, iaddr
,
6373 build_int_cst (NULL_TREE
,
6374 MEMMODEL_RELAXED
)));
6378 = build_int_cst (build_pointer_type_for_mode (atype
, ptr_mode
,
6380 initial
= build2 (MEM_REF
, atype
, iaddr
, off
);
6384 = force_gimple_operand_gsi (&si
, initial
, true, NULL_TREE
, true,
6387 /* Move the value to the LOADEDI temporary. */
6388 if (gimple_in_ssa_p (cfun
))
6390 gcc_assert (gimple_seq_empty_p (phi_nodes (loop_header
)));
6391 phi
= create_phi_node (loadedi
, loop_header
);
6392 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi
, single_succ_edge (load_bb
)),
6396 gsi_insert_before (&si
,
6397 gimple_build_assign (loadedi
, initial
),
6399 if (loadedi
!= loaded_val
)
6401 gimple_stmt_iterator gsi2
;
6404 x
= build1 (VIEW_CONVERT_EXPR
, type
, loadedi
);
6405 gsi2
= gsi_start_bb (loop_header
);
6406 if (gimple_in_ssa_p (cfun
))
6409 x
= force_gimple_operand_gsi (&gsi2
, x
, true, NULL_TREE
,
6410 true, GSI_SAME_STMT
);
6411 stmt
= gimple_build_assign (loaded_val
, x
);
6412 gsi_insert_before (&gsi2
, stmt
, GSI_SAME_STMT
);
6416 x
= build2 (MODIFY_EXPR
, TREE_TYPE (loaded_val
), loaded_val
, x
);
6417 force_gimple_operand_gsi (&gsi2
, x
, true, NULL_TREE
,
6418 true, GSI_SAME_STMT
);
6421 gsi_remove (&si
, true);
6423 si
= gsi_last_nondebug_bb (store_bb
);
6424 gcc_assert (gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_ATOMIC_STORE
);
6427 storedi
= stored_val
;
6430 = force_gimple_operand_gsi (&si
,
6431 build1 (VIEW_CONVERT_EXPR
, itype
,
6432 stored_val
), true, NULL_TREE
, true,
6435 /* Build the compare&swap statement. */
6436 new_storedi
= build_call_expr (cmpxchg
, 3, iaddr
, loadedi
, storedi
);
6437 new_storedi
= force_gimple_operand_gsi (&si
,
6438 fold_convert (TREE_TYPE (loadedi
),
6441 true, GSI_SAME_STMT
);
6443 if (gimple_in_ssa_p (cfun
))
6447 old_vali
= create_tmp_var (TREE_TYPE (loadedi
));
6448 stmt
= gimple_build_assign (old_vali
, loadedi
);
6449 gsi_insert_before (&si
, stmt
, GSI_SAME_STMT
);
6451 stmt
= gimple_build_assign (loadedi
, new_storedi
);
6452 gsi_insert_before (&si
, stmt
, GSI_SAME_STMT
);
6455 /* Note that we always perform the comparison as an integer, even for
6456 floating point. This allows the atomic operation to properly
6457 succeed even with NaNs and -0.0. */
6458 tree ne
= build2 (NE_EXPR
, boolean_type_node
, new_storedi
, old_vali
);
6459 stmt
= gimple_build_cond_empty (ne
);
6460 gsi_insert_before (&si
, stmt
, GSI_SAME_STMT
);
6463 e
= single_succ_edge (store_bb
);
6464 e
->flags
&= ~EDGE_FALLTHRU
;
6465 e
->flags
|= EDGE_FALSE_VALUE
;
6466 /* Expect no looping. */
6467 e
->probability
= profile_probability::guessed_always ();
6469 e
= make_edge (store_bb
, loop_header
, EDGE_TRUE_VALUE
);
6470 e
->probability
= profile_probability::guessed_never ();
6472 /* Copy the new value to loadedi (we already did that before the condition
6473 if we are not in SSA). */
6474 if (gimple_in_ssa_p (cfun
))
6476 phi
= gimple_seq_first_stmt (phi_nodes (loop_header
));
6477 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi
, e
), new_storedi
);
6480 /* Remove GIMPLE_OMP_ATOMIC_STORE. */
6481 gsi_remove (&si
, true);
6483 struct loop
*loop
= alloc_loop ();
6484 loop
->header
= loop_header
;
6485 loop
->latch
= store_bb
;
6486 add_loop (loop
, loop_header
->loop_father
);
6488 if (gimple_in_ssa_p (cfun
))
6489 update_ssa (TODO_update_ssa_no_phi
);
6494 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
6496 GOMP_atomic_start ();
6500 The result is not globally atomic, but works so long as all parallel
6501 references are within #pragma omp atomic directives. According to
6502 responses received from omp@openmp.org, appears to be within spec.
6503 Which makes sense, since that's how several other compilers handle
6504 this situation as well.
6505 LOADED_VAL and ADDR are the operands of GIMPLE_OMP_ATOMIC_LOAD we're
6506 expanding. STORED_VAL is the operand of the matching
6507 GIMPLE_OMP_ATOMIC_STORE.
6510 GIMPLE_OMP_ATOMIC_LOAD (loaded_val, addr) with
6514 GIMPLE_OMP_ATOMIC_STORE (stored_val) with
6519 expand_omp_atomic_mutex (basic_block load_bb
, basic_block store_bb
,
6520 tree addr
, tree loaded_val
, tree stored_val
)
6522 gimple_stmt_iterator si
;
6526 si
= gsi_last_nondebug_bb (load_bb
);
6527 gcc_assert (gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_ATOMIC_LOAD
);
6529 t
= builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_START
);
6530 t
= build_call_expr (t
, 0);
6531 force_gimple_operand_gsi (&si
, t
, true, NULL_TREE
, true, GSI_SAME_STMT
);
6533 tree mem
= build_simple_mem_ref (addr
);
6534 TREE_TYPE (mem
) = TREE_TYPE (loaded_val
);
6535 TREE_OPERAND (mem
, 1)
6536 = fold_convert (build_pointer_type_for_mode (TREE_TYPE (mem
), ptr_mode
,
6538 TREE_OPERAND (mem
, 1));
6539 stmt
= gimple_build_assign (loaded_val
, mem
);
6540 gsi_insert_before (&si
, stmt
, GSI_SAME_STMT
);
6541 gsi_remove (&si
, true);
6543 si
= gsi_last_nondebug_bb (store_bb
);
6544 gcc_assert (gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_ATOMIC_STORE
);
6546 stmt
= gimple_build_assign (unshare_expr (mem
), stored_val
);
6547 gsi_insert_before (&si
, stmt
, GSI_SAME_STMT
);
6549 t
= builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_END
);
6550 t
= build_call_expr (t
, 0);
6551 force_gimple_operand_gsi (&si
, t
, true, NULL_TREE
, true, GSI_SAME_STMT
);
6552 gsi_remove (&si
, true);
6554 if (gimple_in_ssa_p (cfun
))
6555 update_ssa (TODO_update_ssa_no_phi
);
6559 /* Expand an GIMPLE_OMP_ATOMIC statement. We try to expand
6560 using expand_omp_atomic_fetch_op. If it failed, we try to
6561 call expand_omp_atomic_pipeline, and if it fails too, the
6562 ultimate fallback is wrapping the operation in a mutex
6563 (expand_omp_atomic_mutex). REGION is the atomic region built
6564 by build_omp_regions_1(). */
6567 expand_omp_atomic (struct omp_region
*region
)
6569 basic_block load_bb
= region
->entry
, store_bb
= region
->exit
;
6570 gomp_atomic_load
*load
= as_a
<gomp_atomic_load
*> (last_stmt (load_bb
));
6571 gomp_atomic_store
*store
= as_a
<gomp_atomic_store
*> (last_stmt (store_bb
));
6572 tree loaded_val
= gimple_omp_atomic_load_lhs (load
);
6573 tree addr
= gimple_omp_atomic_load_rhs (load
);
6574 tree stored_val
= gimple_omp_atomic_store_val (store
);
6575 tree type
= TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val
));
6576 HOST_WIDE_INT index
;
6578 /* Make sure the type is one of the supported sizes. */
6579 index
= tree_to_uhwi (TYPE_SIZE_UNIT (type
));
6580 index
= exact_log2 (index
);
6581 if (index
>= 0 && index
<= 4)
6583 unsigned int align
= TYPE_ALIGN_UNIT (type
);
6585 /* __sync builtins require strict data alignment. */
6586 if (exact_log2 (align
) >= index
)
6590 if (loaded_val
== stored_val
6591 && (is_int_mode (TYPE_MODE (type
), &smode
)
6592 || is_float_mode (TYPE_MODE (type
), &smode
))
6593 && GET_MODE_BITSIZE (smode
) <= BITS_PER_WORD
6594 && expand_omp_atomic_load (load_bb
, addr
, loaded_val
, index
))
6598 if ((is_int_mode (TYPE_MODE (type
), &smode
)
6599 || is_float_mode (TYPE_MODE (type
), &smode
))
6600 && GET_MODE_BITSIZE (smode
) <= BITS_PER_WORD
6601 && store_bb
== single_succ (load_bb
)
6602 && first_stmt (store_bb
) == store
6603 && expand_omp_atomic_store (load_bb
, addr
, loaded_val
,
6607 /* When possible, use specialized atomic update functions. */
6608 if ((INTEGRAL_TYPE_P (type
) || POINTER_TYPE_P (type
))
6609 && store_bb
== single_succ (load_bb
)
6610 && expand_omp_atomic_fetch_op (load_bb
, addr
,
6611 loaded_val
, stored_val
, index
))
6614 /* If we don't have specialized __sync builtins, try and implement
6615 as a compare and swap loop. */
6616 if (expand_omp_atomic_pipeline (load_bb
, store_bb
, addr
,
6617 loaded_val
, stored_val
, index
))
6622 /* The ultimate fallback is wrapping the operation in a mutex. */
6623 expand_omp_atomic_mutex (load_bb
, store_bb
, addr
, loaded_val
, stored_val
);
6626 /* Mark the loops inside the kernels region starting at REGION_ENTRY and ending
6630 mark_loops_in_oacc_kernels_region (basic_block region_entry
,
6631 basic_block region_exit
)
6633 struct loop
*outer
= region_entry
->loop_father
;
6634 gcc_assert (region_exit
== NULL
|| outer
== region_exit
->loop_father
);
6636 /* Don't parallelize the kernels region if it contains more than one outer
6638 unsigned int nr_outer_loops
= 0;
6639 struct loop
*single_outer
= NULL
;
6640 for (struct loop
*loop
= outer
->inner
; loop
!= NULL
; loop
= loop
->next
)
6642 gcc_assert (loop_outer (loop
) == outer
);
6644 if (!dominated_by_p (CDI_DOMINATORS
, loop
->header
, region_entry
))
6647 if (region_exit
!= NULL
6648 && dominated_by_p (CDI_DOMINATORS
, loop
->header
, region_exit
))
6652 single_outer
= loop
;
6654 if (nr_outer_loops
!= 1)
6657 for (struct loop
*loop
= single_outer
->inner
;
6663 /* Mark the loops in the region. */
6664 for (struct loop
*loop
= single_outer
; loop
!= NULL
; loop
= loop
->inner
)
6665 loop
->in_oacc_kernels_region
= true;
6668 /* Types used to pass grid and wortkgroup sizes to kernel invocation. */
6670 struct GTY(()) grid_launch_attributes_trees
6672 tree kernel_dim_array_type
;
6673 tree kernel_lattrs_dimnum_decl
;
6674 tree kernel_lattrs_grid_decl
;
6675 tree kernel_lattrs_group_decl
;
6676 tree kernel_launch_attributes_type
;
6679 static GTY(()) struct grid_launch_attributes_trees
*grid_attr_trees
;
6681 /* Create types used to pass kernel launch attributes to target. */
6684 grid_create_kernel_launch_attr_types (void)
6686 if (grid_attr_trees
)
6688 grid_attr_trees
= ggc_alloc
<grid_launch_attributes_trees
> ();
6690 tree dim_arr_index_type
6691 = build_index_type (build_int_cst (integer_type_node
, 2));
6692 grid_attr_trees
->kernel_dim_array_type
6693 = build_array_type (uint32_type_node
, dim_arr_index_type
);
6695 grid_attr_trees
->kernel_launch_attributes_type
= make_node (RECORD_TYPE
);
6696 grid_attr_trees
->kernel_lattrs_dimnum_decl
6697 = build_decl (BUILTINS_LOCATION
, FIELD_DECL
, get_identifier ("ndim"),
6699 DECL_CHAIN (grid_attr_trees
->kernel_lattrs_dimnum_decl
) = NULL_TREE
;
6701 grid_attr_trees
->kernel_lattrs_grid_decl
6702 = build_decl (BUILTINS_LOCATION
, FIELD_DECL
, get_identifier ("grid_size"),
6703 grid_attr_trees
->kernel_dim_array_type
);
6704 DECL_CHAIN (grid_attr_trees
->kernel_lattrs_grid_decl
)
6705 = grid_attr_trees
->kernel_lattrs_dimnum_decl
;
6706 grid_attr_trees
->kernel_lattrs_group_decl
6707 = build_decl (BUILTINS_LOCATION
, FIELD_DECL
, get_identifier ("group_size"),
6708 grid_attr_trees
->kernel_dim_array_type
);
6709 DECL_CHAIN (grid_attr_trees
->kernel_lattrs_group_decl
)
6710 = grid_attr_trees
->kernel_lattrs_grid_decl
;
6711 finish_builtin_struct (grid_attr_trees
->kernel_launch_attributes_type
,
6712 "__gomp_kernel_launch_attributes",
6713 grid_attr_trees
->kernel_lattrs_group_decl
, NULL_TREE
);
6716 /* Insert before the current statement in GSI a store of VALUE to INDEX of
6717 array (of type kernel_dim_array_type) FLD_DECL of RANGE_VAR. VALUE must be
6718 of type uint32_type_node. */
6721 grid_insert_store_range_dim (gimple_stmt_iterator
*gsi
, tree range_var
,
6722 tree fld_decl
, int index
, tree value
)
6724 tree ref
= build4 (ARRAY_REF
, uint32_type_node
,
6725 build3 (COMPONENT_REF
,
6726 grid_attr_trees
->kernel_dim_array_type
,
6727 range_var
, fld_decl
, NULL_TREE
),
6728 build_int_cst (integer_type_node
, index
),
6729 NULL_TREE
, NULL_TREE
);
6730 gsi_insert_before (gsi
, gimple_build_assign (ref
, value
), GSI_SAME_STMT
);
6733 /* Return a tree representation of a pointer to a structure with grid and
6734 work-group size information. Statements filling that information will be
6735 inserted before GSI, TGT_STMT is the target statement which has the
6736 necessary information in it. */
6739 grid_get_kernel_launch_attributes (gimple_stmt_iterator
*gsi
,
6740 gomp_target
*tgt_stmt
)
6742 grid_create_kernel_launch_attr_types ();
6743 tree lattrs
= create_tmp_var (grid_attr_trees
->kernel_launch_attributes_type
,
6744 "__kernel_launch_attrs");
6746 unsigned max_dim
= 0;
6747 for (tree clause
= gimple_omp_target_clauses (tgt_stmt
);
6749 clause
= OMP_CLAUSE_CHAIN (clause
))
6751 if (OMP_CLAUSE_CODE (clause
) != OMP_CLAUSE__GRIDDIM_
)
6754 unsigned dim
= OMP_CLAUSE__GRIDDIM__DIMENSION (clause
);
6755 max_dim
= MAX (dim
, max_dim
);
6757 grid_insert_store_range_dim (gsi
, lattrs
,
6758 grid_attr_trees
->kernel_lattrs_grid_decl
,
6759 dim
, OMP_CLAUSE__GRIDDIM__SIZE (clause
));
6760 grid_insert_store_range_dim (gsi
, lattrs
,
6761 grid_attr_trees
->kernel_lattrs_group_decl
,
6762 dim
, OMP_CLAUSE__GRIDDIM__GROUP (clause
));
6765 tree dimref
= build3 (COMPONENT_REF
, uint32_type_node
, lattrs
,
6766 grid_attr_trees
->kernel_lattrs_dimnum_decl
, NULL_TREE
);
6767 gcc_checking_assert (max_dim
<= 2);
6768 tree dimensions
= build_int_cstu (uint32_type_node
, max_dim
+ 1);
6769 gsi_insert_before (gsi
, gimple_build_assign (dimref
, dimensions
),
6771 TREE_ADDRESSABLE (lattrs
) = 1;
6772 return build_fold_addr_expr (lattrs
);
6775 /* Build target argument identifier from the DEVICE identifier, value
6776 identifier ID and whether the element also has a SUBSEQUENT_PARAM. */
6779 get_target_argument_identifier_1 (int device
, bool subseqent_param
, int id
)
6781 tree t
= build_int_cst (integer_type_node
, device
);
6782 if (subseqent_param
)
6783 t
= fold_build2 (BIT_IOR_EXPR
, integer_type_node
, t
,
6784 build_int_cst (integer_type_node
,
6785 GOMP_TARGET_ARG_SUBSEQUENT_PARAM
));
6786 t
= fold_build2 (BIT_IOR_EXPR
, integer_type_node
, t
,
6787 build_int_cst (integer_type_node
, id
));
6791 /* Like above but return it in type that can be directly stored as an element
6792 of the argument array. */
6795 get_target_argument_identifier (int device
, bool subseqent_param
, int id
)
6797 tree t
= get_target_argument_identifier_1 (device
, subseqent_param
, id
);
6798 return fold_convert (ptr_type_node
, t
);
6801 /* Return a target argument consisting of DEVICE identifier, value identifier
6802 ID, and the actual VALUE. */
6805 get_target_argument_value (gimple_stmt_iterator
*gsi
, int device
, int id
,
6808 tree t
= fold_build2 (LSHIFT_EXPR
, integer_type_node
,
6809 fold_convert (integer_type_node
, value
),
6810 build_int_cst (unsigned_type_node
,
6811 GOMP_TARGET_ARG_VALUE_SHIFT
));
6812 t
= fold_build2 (BIT_IOR_EXPR
, integer_type_node
, t
,
6813 get_target_argument_identifier_1 (device
, false, id
));
6814 t
= fold_convert (ptr_type_node
, t
);
6815 return force_gimple_operand_gsi (gsi
, t
, true, NULL
, true, GSI_SAME_STMT
);
6818 /* If VALUE is an integer constant greater than -2^15 and smaller than 2^15,
6819 push one argument to ARGS with both the DEVICE, ID and VALUE embedded in it,
6820 otherwise push an identifier (with DEVICE and ID) and the VALUE in two
6824 push_target_argument_according_to_value (gimple_stmt_iterator
*gsi
, int device
,
6825 int id
, tree value
, vec
<tree
> *args
)
6827 if (tree_fits_shwi_p (value
)
6828 && tree_to_shwi (value
) > -(1 << 15)
6829 && tree_to_shwi (value
) < (1 << 15))
6830 args
->quick_push (get_target_argument_value (gsi
, device
, id
, value
));
6833 args
->quick_push (get_target_argument_identifier (device
, true, id
));
6834 value
= fold_convert (ptr_type_node
, value
);
6835 value
= force_gimple_operand_gsi (gsi
, value
, true, NULL
, true,
6837 args
->quick_push (value
);
6841 /* Create an array of arguments that is then passed to GOMP_target. */
6844 get_target_arguments (gimple_stmt_iterator
*gsi
, gomp_target
*tgt_stmt
)
6846 auto_vec
<tree
, 6> args
;
6847 tree clauses
= gimple_omp_target_clauses (tgt_stmt
);
6848 tree t
, c
= omp_find_clause (clauses
, OMP_CLAUSE_NUM_TEAMS
);
6850 t
= OMP_CLAUSE_NUM_TEAMS_EXPR (c
);
6852 t
= integer_minus_one_node
;
6853 push_target_argument_according_to_value (gsi
, GOMP_TARGET_ARG_DEVICE_ALL
,
6854 GOMP_TARGET_ARG_NUM_TEAMS
, t
, &args
);
6856 c
= omp_find_clause (clauses
, OMP_CLAUSE_THREAD_LIMIT
);
6858 t
= OMP_CLAUSE_THREAD_LIMIT_EXPR (c
);
6860 t
= integer_minus_one_node
;
6861 push_target_argument_according_to_value (gsi
, GOMP_TARGET_ARG_DEVICE_ALL
,
6862 GOMP_TARGET_ARG_THREAD_LIMIT
, t
,
6865 /* Add HSA-specific grid sizes, if available. */
6866 if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt
),
6867 OMP_CLAUSE__GRIDDIM_
))
6869 int id
= GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES
;
6870 t
= get_target_argument_identifier (GOMP_DEVICE_HSA
, true, id
);
6871 args
.quick_push (t
);
6872 args
.quick_push (grid_get_kernel_launch_attributes (gsi
, tgt_stmt
));
6875 /* Produce more, perhaps device specific, arguments here. */
6877 tree argarray
= create_tmp_var (build_array_type_nelts (ptr_type_node
,
6878 args
.length () + 1),
6879 ".omp_target_args");
6880 for (unsigned i
= 0; i
< args
.length (); i
++)
6882 tree ref
= build4 (ARRAY_REF
, ptr_type_node
, argarray
,
6883 build_int_cst (integer_type_node
, i
),
6884 NULL_TREE
, NULL_TREE
);
6885 gsi_insert_before (gsi
, gimple_build_assign (ref
, args
[i
]),
6888 tree ref
= build4 (ARRAY_REF
, ptr_type_node
, argarray
,
6889 build_int_cst (integer_type_node
, args
.length ()),
6890 NULL_TREE
, NULL_TREE
);
6891 gsi_insert_before (gsi
, gimple_build_assign (ref
, null_pointer_node
),
6893 TREE_ADDRESSABLE (argarray
) = 1;
6894 return build_fold_addr_expr (argarray
);
6897 /* Expand the GIMPLE_OMP_TARGET starting at REGION. */
6900 expand_omp_target (struct omp_region
*region
)
6902 basic_block entry_bb
, exit_bb
, new_bb
;
6903 struct function
*child_cfun
;
6904 tree child_fn
, block
, t
;
6905 gimple_stmt_iterator gsi
;
6906 gomp_target
*entry_stmt
;
6909 bool offloaded
, data_region
;
6911 entry_stmt
= as_a
<gomp_target
*> (last_stmt (region
->entry
));
6912 new_bb
= region
->entry
;
6914 offloaded
= is_gimple_omp_offloaded (entry_stmt
);
6915 switch (gimple_omp_target_kind (entry_stmt
))
6917 case GF_OMP_TARGET_KIND_REGION
:
6918 case GF_OMP_TARGET_KIND_UPDATE
:
6919 case GF_OMP_TARGET_KIND_ENTER_DATA
:
6920 case GF_OMP_TARGET_KIND_EXIT_DATA
:
6921 case GF_OMP_TARGET_KIND_OACC_PARALLEL
:
6922 case GF_OMP_TARGET_KIND_OACC_KERNELS
:
6923 case GF_OMP_TARGET_KIND_OACC_UPDATE
:
6924 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA
:
6925 case GF_OMP_TARGET_KIND_OACC_DECLARE
:
6926 data_region
= false;
6928 case GF_OMP_TARGET_KIND_DATA
:
6929 case GF_OMP_TARGET_KIND_OACC_DATA
:
6930 case GF_OMP_TARGET_KIND_OACC_HOST_DATA
:
6937 child_fn
= NULL_TREE
;
6941 child_fn
= gimple_omp_target_child_fn (entry_stmt
);
6942 child_cfun
= DECL_STRUCT_FUNCTION (child_fn
);
6945 /* Supported by expand_omp_taskreg, but not here. */
6946 if (child_cfun
!= NULL
)
6947 gcc_checking_assert (!child_cfun
->cfg
);
6948 gcc_checking_assert (!gimple_in_ssa_p (cfun
));
6950 entry_bb
= region
->entry
;
6951 exit_bb
= region
->exit
;
6953 if (gimple_omp_target_kind (entry_stmt
) == GF_OMP_TARGET_KIND_OACC_KERNELS
)
6955 mark_loops_in_oacc_kernels_region (region
->entry
, region
->exit
);
6957 /* Further down, both OpenACC kernels and OpenACC parallel constructs
6958 will be mappted to BUILT_IN_GOACC_PARALLEL, and to distinguish the
6959 two, there is an "oacc kernels" attribute set for OpenACC kernels. */
6960 DECL_ATTRIBUTES (child_fn
)
6961 = tree_cons (get_identifier ("oacc kernels"),
6962 NULL_TREE
, DECL_ATTRIBUTES (child_fn
));
6967 unsigned srcidx
, dstidx
, num
;
6969 /* If the offloading region needs data sent from the parent
6970 function, then the very first statement (except possible
6971 tree profile counter updates) of the offloading body
6972 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
6973 &.OMP_DATA_O is passed as an argument to the child function,
6974 we need to replace it with the argument as seen by the child
6977 In most cases, this will end up being the identity assignment
6978 .OMP_DATA_I = .OMP_DATA_I. However, if the offloading body had
6979 a function call that has been inlined, the original PARM_DECL
6980 .OMP_DATA_I may have been converted into a different local
6981 variable. In which case, we need to keep the assignment. */
6982 tree data_arg
= gimple_omp_target_data_arg (entry_stmt
);
6985 basic_block entry_succ_bb
= single_succ (entry_bb
);
6986 gimple_stmt_iterator gsi
;
6988 gimple
*tgtcopy_stmt
= NULL
;
6989 tree sender
= TREE_VEC_ELT (data_arg
, 0);
6991 for (gsi
= gsi_start_bb (entry_succ_bb
); ; gsi_next (&gsi
))
6993 gcc_assert (!gsi_end_p (gsi
));
6994 stmt
= gsi_stmt (gsi
);
6995 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
6998 if (gimple_num_ops (stmt
) == 2)
7000 tree arg
= gimple_assign_rhs1 (stmt
);
7002 /* We're ignoring the subcode because we're
7003 effectively doing a STRIP_NOPS. */
7005 if (TREE_CODE (arg
) == ADDR_EXPR
7006 && TREE_OPERAND (arg
, 0) == sender
)
7008 tgtcopy_stmt
= stmt
;
7014 gcc_assert (tgtcopy_stmt
!= NULL
);
7015 arg
= DECL_ARGUMENTS (child_fn
);
7017 gcc_assert (gimple_assign_lhs (tgtcopy_stmt
) == arg
);
7018 gsi_remove (&gsi
, true);
7021 /* Declare local variables needed in CHILD_CFUN. */
7022 block
= DECL_INITIAL (child_fn
);
7023 BLOCK_VARS (block
) = vec2chain (child_cfun
->local_decls
);
7024 /* The gimplifier could record temporaries in the offloading block
7025 rather than in containing function's local_decls chain,
7026 which would mean cgraph missed finalizing them. Do it now. */
7027 for (t
= BLOCK_VARS (block
); t
; t
= DECL_CHAIN (t
))
7028 if (VAR_P (t
) && TREE_STATIC (t
) && !DECL_EXTERNAL (t
))
7029 varpool_node::finalize_decl (t
);
7030 DECL_SAVED_TREE (child_fn
) = NULL
;
7031 /* We'll create a CFG for child_fn, so no gimple body is needed. */
7032 gimple_set_body (child_fn
, NULL
);
7033 TREE_USED (block
) = 1;
7035 /* Reset DECL_CONTEXT on function arguments. */
7036 for (t
= DECL_ARGUMENTS (child_fn
); t
; t
= DECL_CHAIN (t
))
7037 DECL_CONTEXT (t
) = child_fn
;
7039 /* Split ENTRY_BB at GIMPLE_*,
7040 so that it can be moved to the child function. */
7041 gsi
= gsi_last_nondebug_bb (entry_bb
);
7042 stmt
= gsi_stmt (gsi
);
7044 && gimple_code (stmt
) == gimple_code (entry_stmt
));
7045 e
= split_block (entry_bb
, stmt
);
7046 gsi_remove (&gsi
, true);
7048 single_succ_edge (entry_bb
)->flags
= EDGE_FALLTHRU
;
7050 /* Convert GIMPLE_OMP_RETURN into a RETURN_EXPR. */
7053 gsi
= gsi_last_nondebug_bb (exit_bb
);
7054 gcc_assert (!gsi_end_p (gsi
)
7055 && gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_RETURN
);
7056 stmt
= gimple_build_return (NULL
);
7057 gsi_insert_after (&gsi
, stmt
, GSI_SAME_STMT
);
7058 gsi_remove (&gsi
, true);
7061 /* Move the offloading region into CHILD_CFUN. */
7063 block
= gimple_block (entry_stmt
);
7065 new_bb
= move_sese_region_to_fn (child_cfun
, entry_bb
, exit_bb
, block
);
7067 single_succ_edge (new_bb
)->flags
= EDGE_FALLTHRU
;
7068 /* When the OMP expansion process cannot guarantee an up-to-date
7069 loop tree arrange for the child function to fixup loops. */
7070 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP
))
7071 child_cfun
->x_current_loops
->state
|= LOOPS_NEED_FIXUP
;
7073 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
7074 num
= vec_safe_length (child_cfun
->local_decls
);
7075 for (srcidx
= 0, dstidx
= 0; srcidx
< num
; srcidx
++)
7077 t
= (*child_cfun
->local_decls
)[srcidx
];
7078 if (DECL_CONTEXT (t
) == cfun
->decl
)
7080 if (srcidx
!= dstidx
)
7081 (*child_cfun
->local_decls
)[dstidx
] = t
;
7085 vec_safe_truncate (child_cfun
->local_decls
, dstidx
);
7087 /* Inform the callgraph about the new function. */
7088 child_cfun
->curr_properties
= cfun
->curr_properties
;
7089 child_cfun
->has_simduid_loops
|= cfun
->has_simduid_loops
;
7090 child_cfun
->has_force_vectorize_loops
|= cfun
->has_force_vectorize_loops
;
7091 cgraph_node
*node
= cgraph_node::get_create (child_fn
);
7092 node
->parallelized_function
= 1;
7093 cgraph_node::add_new_function (child_fn
, true);
7095 /* Add the new function to the offload table. */
7096 if (ENABLE_OFFLOADING
)
7099 DECL_PRESERVE_P (child_fn
) = 1;
7100 vec_safe_push (offload_funcs
, child_fn
);
7103 bool need_asm
= DECL_ASSEMBLER_NAME_SET_P (current_function_decl
)
7104 && !DECL_ASSEMBLER_NAME_SET_P (child_fn
);
7106 /* Fix the callgraph edges for child_cfun. Those for cfun will be
7107 fixed in a following pass. */
7108 push_cfun (child_cfun
);
7110 assign_assembler_name_if_needed (child_fn
);
7111 cgraph_edge::rebuild_edges ();
7113 /* Some EH regions might become dead, see PR34608. If
7114 pass_cleanup_cfg isn't the first pass to happen with the
7115 new child, these dead EH edges might cause problems.
7116 Clean them up now. */
7117 if (flag_exceptions
)
7120 bool changed
= false;
7122 FOR_EACH_BB_FN (bb
, cfun
)
7123 changed
|= gimple_purge_dead_eh_edges (bb
);
7125 cleanup_tree_cfg ();
7127 if (flag_checking
&& !loops_state_satisfies_p (LOOPS_NEED_FIXUP
))
7128 verify_loop_structure ();
7131 if (dump_file
&& !gimple_in_ssa_p (cfun
))
7133 omp_any_child_fn_dumped
= true;
7134 dump_function_header (dump_file
, child_fn
, dump_flags
);
7135 dump_function_to_file (child_fn
, dump_file
, dump_flags
);
7138 adjust_context_and_scope (region
, gimple_block (entry_stmt
), child_fn
);
7141 /* Emit a library call to launch the offloading region, or do data
7143 tree t1
, t2
, t3
, t4
, device
, cond
, depend
, c
, clauses
;
7144 enum built_in_function start_ix
;
7145 location_t clause_loc
;
7146 unsigned int flags_i
= 0;
7148 switch (gimple_omp_target_kind (entry_stmt
))
7150 case GF_OMP_TARGET_KIND_REGION
:
7151 start_ix
= BUILT_IN_GOMP_TARGET
;
7153 case GF_OMP_TARGET_KIND_DATA
:
7154 start_ix
= BUILT_IN_GOMP_TARGET_DATA
;
7156 case GF_OMP_TARGET_KIND_UPDATE
:
7157 start_ix
= BUILT_IN_GOMP_TARGET_UPDATE
;
7159 case GF_OMP_TARGET_KIND_ENTER_DATA
:
7160 start_ix
= BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA
;
7162 case GF_OMP_TARGET_KIND_EXIT_DATA
:
7163 start_ix
= BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA
;
7164 flags_i
|= GOMP_TARGET_FLAG_EXIT_DATA
;
7166 case GF_OMP_TARGET_KIND_OACC_KERNELS
:
7167 case GF_OMP_TARGET_KIND_OACC_PARALLEL
:
7168 start_ix
= BUILT_IN_GOACC_PARALLEL
;
7170 case GF_OMP_TARGET_KIND_OACC_DATA
:
7171 case GF_OMP_TARGET_KIND_OACC_HOST_DATA
:
7172 start_ix
= BUILT_IN_GOACC_DATA_START
;
7174 case GF_OMP_TARGET_KIND_OACC_UPDATE
:
7175 start_ix
= BUILT_IN_GOACC_UPDATE
;
7177 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA
:
7178 start_ix
= BUILT_IN_GOACC_ENTER_EXIT_DATA
;
7180 case GF_OMP_TARGET_KIND_OACC_DECLARE
:
7181 start_ix
= BUILT_IN_GOACC_DECLARE
;
7187 clauses
= gimple_omp_target_clauses (entry_stmt
);
7189 /* By default, the value of DEVICE is GOMP_DEVICE_ICV (let runtime
7190 library choose) and there is no conditional. */
7192 device
= build_int_cst (integer_type_node
, GOMP_DEVICE_ICV
);
7194 c
= omp_find_clause (clauses
, OMP_CLAUSE_IF
);
7196 cond
= OMP_CLAUSE_IF_EXPR (c
);
7198 c
= omp_find_clause (clauses
, OMP_CLAUSE_DEVICE
);
7201 /* Even if we pass it to all library function calls, it is currently only
7202 defined/used for the OpenMP target ones. */
7203 gcc_checking_assert (start_ix
== BUILT_IN_GOMP_TARGET
7204 || start_ix
== BUILT_IN_GOMP_TARGET_DATA
7205 || start_ix
== BUILT_IN_GOMP_TARGET_UPDATE
7206 || start_ix
== BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA
);
7208 device
= OMP_CLAUSE_DEVICE_ID (c
);
7209 clause_loc
= OMP_CLAUSE_LOCATION (c
);
7212 clause_loc
= gimple_location (entry_stmt
);
7214 c
= omp_find_clause (clauses
, OMP_CLAUSE_NOWAIT
);
7216 flags_i
|= GOMP_TARGET_FLAG_NOWAIT
;
7218 /* Ensure 'device' is of the correct type. */
7219 device
= fold_convert_loc (clause_loc
, integer_type_node
, device
);
7221 /* If we found the clause 'if (cond)', build
7222 (cond ? device : GOMP_DEVICE_HOST_FALLBACK). */
7225 cond
= gimple_boolify (cond
);
7227 basic_block cond_bb
, then_bb
, else_bb
;
7231 tmp_var
= create_tmp_var (TREE_TYPE (device
));
7233 e
= split_block_after_labels (new_bb
);
7236 gsi
= gsi_last_nondebug_bb (new_bb
);
7238 e
= split_block (new_bb
, gsi_stmt (gsi
));
7244 then_bb
= create_empty_bb (cond_bb
);
7245 else_bb
= create_empty_bb (then_bb
);
7246 set_immediate_dominator (CDI_DOMINATORS
, then_bb
, cond_bb
);
7247 set_immediate_dominator (CDI_DOMINATORS
, else_bb
, cond_bb
);
7249 stmt
= gimple_build_cond_empty (cond
);
7250 gsi
= gsi_last_bb (cond_bb
);
7251 gsi_insert_after (&gsi
, stmt
, GSI_CONTINUE_LINKING
);
7253 gsi
= gsi_start_bb (then_bb
);
7254 stmt
= gimple_build_assign (tmp_var
, device
);
7255 gsi_insert_after (&gsi
, stmt
, GSI_CONTINUE_LINKING
);
7257 gsi
= gsi_start_bb (else_bb
);
7258 stmt
= gimple_build_assign (tmp_var
,
7259 build_int_cst (integer_type_node
,
7260 GOMP_DEVICE_HOST_FALLBACK
));
7261 gsi_insert_after (&gsi
, stmt
, GSI_CONTINUE_LINKING
);
7263 make_edge (cond_bb
, then_bb
, EDGE_TRUE_VALUE
);
7264 make_edge (cond_bb
, else_bb
, EDGE_FALSE_VALUE
);
7265 add_bb_to_loop (then_bb
, cond_bb
->loop_father
);
7266 add_bb_to_loop (else_bb
, cond_bb
->loop_father
);
7267 make_edge (then_bb
, new_bb
, EDGE_FALLTHRU
);
7268 make_edge (else_bb
, new_bb
, EDGE_FALLTHRU
);
7271 gsi
= gsi_last_nondebug_bb (new_bb
);
7275 gsi
= gsi_last_nondebug_bb (new_bb
);
7276 device
= force_gimple_operand_gsi (&gsi
, device
, true, NULL_TREE
,
7277 true, GSI_SAME_STMT
);
7280 t
= gimple_omp_target_data_arg (entry_stmt
);
7283 t1
= size_zero_node
;
7284 t2
= build_zero_cst (ptr_type_node
);
7290 t1
= TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (TREE_VEC_ELT (t
, 1))));
7291 t1
= size_binop (PLUS_EXPR
, t1
, size_int (1));
7292 t2
= build_fold_addr_expr (TREE_VEC_ELT (t
, 0));
7293 t3
= build_fold_addr_expr (TREE_VEC_ELT (t
, 1));
7294 t4
= build_fold_addr_expr (TREE_VEC_ELT (t
, 2));
7298 bool tagging
= false;
7299 /* The maximum number used by any start_ix, without varargs. */
7300 auto_vec
<tree
, 11> args
;
7301 args
.quick_push (device
);
7303 args
.quick_push (build_fold_addr_expr (child_fn
));
7304 args
.quick_push (t1
);
7305 args
.quick_push (t2
);
7306 args
.quick_push (t3
);
7307 args
.quick_push (t4
);
7310 case BUILT_IN_GOACC_DATA_START
:
7311 case BUILT_IN_GOACC_DECLARE
:
7312 case BUILT_IN_GOMP_TARGET_DATA
:
7314 case BUILT_IN_GOMP_TARGET
:
7315 case BUILT_IN_GOMP_TARGET_UPDATE
:
7316 case BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA
:
7317 args
.quick_push (build_int_cst (unsigned_type_node
, flags_i
));
7318 c
= omp_find_clause (clauses
, OMP_CLAUSE_DEPEND
);
7320 depend
= OMP_CLAUSE_DECL (c
);
7322 depend
= build_int_cst (ptr_type_node
, 0);
7323 args
.quick_push (depend
);
7324 if (start_ix
== BUILT_IN_GOMP_TARGET
)
7325 args
.quick_push (get_target_arguments (&gsi
, entry_stmt
));
7327 case BUILT_IN_GOACC_PARALLEL
:
7328 oacc_set_fn_attrib (child_fn
, clauses
, &args
);
7331 case BUILT_IN_GOACC_ENTER_EXIT_DATA
:
7332 case BUILT_IN_GOACC_UPDATE
:
7334 tree t_async
= NULL_TREE
;
7336 /* If present, use the value specified by the respective
7337 clause, making sure that is of the correct type. */
7338 c
= omp_find_clause (clauses
, OMP_CLAUSE_ASYNC
);
7340 t_async
= fold_convert_loc (OMP_CLAUSE_LOCATION (c
),
7342 OMP_CLAUSE_ASYNC_EXPR (c
));
7344 /* Default values for t_async. */
7345 t_async
= fold_convert_loc (gimple_location (entry_stmt
),
7347 build_int_cst (integer_type_node
,
7349 if (tagging
&& t_async
)
7351 unsigned HOST_WIDE_INT i_async
= GOMP_LAUNCH_OP_MAX
;
7353 if (TREE_CODE (t_async
) == INTEGER_CST
)
7355 /* See if we can pack the async arg in to the tag's
7357 i_async
= TREE_INT_CST_LOW (t_async
);
7358 if (i_async
< GOMP_LAUNCH_OP_MAX
)
7359 t_async
= NULL_TREE
;
7361 i_async
= GOMP_LAUNCH_OP_MAX
;
7363 args
.safe_push (oacc_launch_pack (GOMP_LAUNCH_ASYNC
, NULL_TREE
,
7367 args
.safe_push (t_async
);
7369 /* Save the argument index, and ... */
7370 unsigned t_wait_idx
= args
.length ();
7371 unsigned num_waits
= 0;
7372 c
= omp_find_clause (clauses
, OMP_CLAUSE_WAIT
);
7374 /* ... push a placeholder. */
7375 args
.safe_push (integer_zero_node
);
7377 for (; c
; c
= OMP_CLAUSE_CHAIN (c
))
7378 if (OMP_CLAUSE_CODE (c
) == OMP_CLAUSE_WAIT
)
7380 args
.safe_push (fold_convert_loc (OMP_CLAUSE_LOCATION (c
),
7382 OMP_CLAUSE_WAIT_EXPR (c
)));
7386 if (!tagging
|| num_waits
)
7390 /* Now that we know the number, update the placeholder. */
7392 len
= oacc_launch_pack (GOMP_LAUNCH_WAIT
, NULL_TREE
, num_waits
);
7394 len
= build_int_cst (integer_type_node
, num_waits
);
7395 len
= fold_convert_loc (gimple_location (entry_stmt
),
7396 unsigned_type_node
, len
);
7397 args
[t_wait_idx
] = len
;
7405 /* Push terminal marker - zero. */
7406 args
.safe_push (oacc_launch_pack (0, NULL_TREE
, 0));
7408 g
= gimple_build_call_vec (builtin_decl_explicit (start_ix
), args
);
7409 gimple_set_location (g
, gimple_location (entry_stmt
));
7410 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
7414 gcc_assert (g
&& gimple_code (g
) == GIMPLE_OMP_TARGET
);
7415 gsi_remove (&gsi
, true);
7417 if (data_region
&& region
->exit
)
7419 gsi
= gsi_last_nondebug_bb (region
->exit
);
7421 gcc_assert (g
&& gimple_code (g
) == GIMPLE_OMP_RETURN
);
7422 gsi_remove (&gsi
, true);
7426 /* Expand KFOR loop as a HSA grifidied kernel, i.e. as a body only with
7427 iteration variable derived from the thread number. INTRA_GROUP means this
7428 is an expansion of a loop iterating over work-items within a separate
7429 iteration over groups. */
7432 grid_expand_omp_for_loop (struct omp_region
*kfor
, bool intra_group
)
7434 gimple_stmt_iterator gsi
;
7435 gomp_for
*for_stmt
= as_a
<gomp_for
*> (last_stmt (kfor
->entry
));
7436 gcc_checking_assert (gimple_omp_for_kind (for_stmt
)
7437 == GF_OMP_FOR_KIND_GRID_LOOP
);
7438 size_t collapse
= gimple_omp_for_collapse (for_stmt
);
7439 struct omp_for_data_loop
*loops
7440 = XALLOCAVEC (struct omp_for_data_loop
,
7441 gimple_omp_for_collapse (for_stmt
));
7442 struct omp_for_data fd
;
7444 remove_edge (BRANCH_EDGE (kfor
->entry
));
7445 basic_block body_bb
= FALLTHRU_EDGE (kfor
->entry
)->dest
;
7447 gcc_assert (kfor
->cont
);
7448 omp_extract_for_data (for_stmt
, &fd
, loops
);
7450 gsi
= gsi_start_bb (body_bb
);
7452 for (size_t dim
= 0; dim
< collapse
; dim
++)
7455 itype
= type
= TREE_TYPE (fd
.loops
[dim
].v
);
7456 if (POINTER_TYPE_P (type
))
7457 itype
= signed_type_for (type
);
7459 tree n1
= fd
.loops
[dim
].n1
;
7460 tree step
= fd
.loops
[dim
].step
;
7461 n1
= force_gimple_operand_gsi (&gsi
, fold_convert (type
, n1
),
7462 true, NULL_TREE
, true, GSI_SAME_STMT
);
7463 step
= force_gimple_operand_gsi (&gsi
, fold_convert (itype
, step
),
7464 true, NULL_TREE
, true, GSI_SAME_STMT
);
7466 if (gimple_omp_for_grid_group_iter (for_stmt
))
7468 gcc_checking_assert (!intra_group
);
7469 threadid
= build_call_expr (builtin_decl_explicit
7470 (BUILT_IN_HSA_WORKGROUPID
), 1,
7471 build_int_cstu (unsigned_type_node
, dim
));
7473 else if (intra_group
)
7474 threadid
= build_call_expr (builtin_decl_explicit
7475 (BUILT_IN_HSA_WORKITEMID
), 1,
7476 build_int_cstu (unsigned_type_node
, dim
));
7478 threadid
= build_call_expr (builtin_decl_explicit
7479 (BUILT_IN_HSA_WORKITEMABSID
), 1,
7480 build_int_cstu (unsigned_type_node
, dim
));
7481 threadid
= fold_convert (itype
, threadid
);
7482 threadid
= force_gimple_operand_gsi (&gsi
, threadid
, true, NULL_TREE
,
7483 true, GSI_SAME_STMT
);
7485 tree startvar
= fd
.loops
[dim
].v
;
7486 tree t
= fold_build2 (MULT_EXPR
, itype
, threadid
, step
);
7487 if (POINTER_TYPE_P (type
))
7488 t
= fold_build_pointer_plus (n1
, t
);
7490 t
= fold_build2 (PLUS_EXPR
, type
, t
, n1
);
7491 t
= fold_convert (type
, t
);
7492 t
= force_gimple_operand_gsi (&gsi
, t
,
7494 && TREE_ADDRESSABLE (startvar
),
7495 NULL_TREE
, true, GSI_SAME_STMT
);
7496 gassign
*assign_stmt
= gimple_build_assign (startvar
, t
);
7497 gsi_insert_before (&gsi
, assign_stmt
, GSI_SAME_STMT
);
7499 /* Remove the omp for statement. */
7500 gsi
= gsi_last_nondebug_bb (kfor
->entry
);
7501 gsi_remove (&gsi
, true);
7503 /* Remove the GIMPLE_OMP_CONTINUE statement. */
7504 gsi
= gsi_last_nondebug_bb (kfor
->cont
);
7505 gcc_assert (!gsi_end_p (gsi
)
7506 && gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_CONTINUE
);
7507 gsi_remove (&gsi
, true);
7509 /* Replace the GIMPLE_OMP_RETURN with a barrier, if necessary. */
7510 gsi
= gsi_last_nondebug_bb (kfor
->exit
);
7511 gcc_assert (!gsi_end_p (gsi
)
7512 && gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_RETURN
);
7514 gsi_insert_before (&gsi
, omp_build_barrier (NULL_TREE
), GSI_SAME_STMT
);
7515 gsi_remove (&gsi
, true);
7517 /* Fixup the much simpler CFG. */
7518 remove_edge (find_edge (kfor
->cont
, body_bb
));
7520 if (kfor
->cont
!= body_bb
)
7521 set_immediate_dominator (CDI_DOMINATORS
, kfor
->cont
, body_bb
);
7522 set_immediate_dominator (CDI_DOMINATORS
, kfor
->exit
, kfor
->cont
);
7525 /* Structure passed to grid_remap_kernel_arg_accesses so that it can remap
7528 struct grid_arg_decl_map
7534 /* Invoked through walk_gimple_op, will remap all PARM_DECLs to the ones
7535 pertaining to kernel function. */
7538 grid_remap_kernel_arg_accesses (tree
*tp
, int *walk_subtrees
, void *data
)
7540 struct walk_stmt_info
*wi
= (struct walk_stmt_info
*) data
;
7541 struct grid_arg_decl_map
*adm
= (struct grid_arg_decl_map
*) wi
->info
;
7544 if (t
== adm
->old_arg
)
7546 *walk_subtrees
= !TYPE_P (t
) && !DECL_P (t
);
7550 /* If TARGET region contains a kernel body for loop, remove its region from the
7551 TARGET and expand it in HSA gridified kernel fashion. */
7554 grid_expand_target_grid_body (struct omp_region
*target
)
7556 if (!hsa_gen_requested_p ())
7559 gomp_target
*tgt_stmt
= as_a
<gomp_target
*> (last_stmt (target
->entry
));
7560 struct omp_region
**pp
;
7562 for (pp
= &target
->inner
; *pp
; pp
= &(*pp
)->next
)
7563 if ((*pp
)->type
== GIMPLE_OMP_GRID_BODY
)
7566 struct omp_region
*gpukernel
= *pp
;
7568 tree orig_child_fndecl
= gimple_omp_target_child_fn (tgt_stmt
);
7571 /* HSA cannot handle OACC stuff. */
7572 if (gimple_omp_target_kind (tgt_stmt
) != GF_OMP_TARGET_KIND_REGION
)
7574 gcc_checking_assert (orig_child_fndecl
);
7575 gcc_assert (!omp_find_clause (gimple_omp_target_clauses (tgt_stmt
),
7576 OMP_CLAUSE__GRIDDIM_
));
7577 cgraph_node
*n
= cgraph_node::get (orig_child_fndecl
);
7579 hsa_register_kernel (n
);
7583 gcc_assert (omp_find_clause (gimple_omp_target_clauses (tgt_stmt
),
7584 OMP_CLAUSE__GRIDDIM_
));
7586 = gimple_block (first_stmt (single_succ (gpukernel
->entry
)));
7587 *pp
= gpukernel
->next
;
7588 for (pp
= &gpukernel
->inner
; *pp
; pp
= &(*pp
)->next
)
7589 if ((*pp
)->type
== GIMPLE_OMP_FOR
)
7592 struct omp_region
*kfor
= *pp
;
7594 gomp_for
*for_stmt
= as_a
<gomp_for
*> (last_stmt (kfor
->entry
));
7595 gcc_assert (gimple_omp_for_kind (for_stmt
) == GF_OMP_FOR_KIND_GRID_LOOP
);
7599 if (gimple_omp_for_grid_group_iter (for_stmt
))
7601 struct omp_region
**next_pp
;
7602 for (pp
= &kfor
->inner
; *pp
; pp
= next_pp
)
7604 next_pp
= &(*pp
)->next
;
7605 if ((*pp
)->type
!= GIMPLE_OMP_FOR
)
7607 gomp_for
*inner
= as_a
<gomp_for
*> (last_stmt ((*pp
)->entry
));
7608 gcc_assert (gimple_omp_for_kind (inner
)
7609 == GF_OMP_FOR_KIND_GRID_LOOP
);
7610 grid_expand_omp_for_loop (*pp
, true);
7615 expand_omp (kfor
->inner
);
7617 if (gpukernel
->inner
)
7618 expand_omp (gpukernel
->inner
);
7620 tree kern_fndecl
= copy_node (orig_child_fndecl
);
7621 DECL_NAME (kern_fndecl
) = clone_function_name (kern_fndecl
, "kernel");
7622 SET_DECL_ASSEMBLER_NAME (kern_fndecl
, DECL_NAME (kern_fndecl
));
7623 tree tgtblock
= gimple_block (tgt_stmt
);
7624 tree fniniblock
= make_node (BLOCK
);
7625 BLOCK_ABSTRACT_ORIGIN (fniniblock
) = tgtblock
;
7626 BLOCK_SOURCE_LOCATION (fniniblock
) = BLOCK_SOURCE_LOCATION (tgtblock
);
7627 BLOCK_SOURCE_END_LOCATION (fniniblock
) = BLOCK_SOURCE_END_LOCATION (tgtblock
);
7628 BLOCK_SUPERCONTEXT (fniniblock
) = kern_fndecl
;
7629 DECL_INITIAL (kern_fndecl
) = fniniblock
;
7630 push_struct_function (kern_fndecl
);
7631 cfun
->function_end_locus
= gimple_location (tgt_stmt
);
7632 init_tree_ssa (cfun
);
7635 tree old_parm_decl
= DECL_ARGUMENTS (kern_fndecl
);
7636 gcc_assert (!DECL_CHAIN (old_parm_decl
));
7637 tree new_parm_decl
= copy_node (DECL_ARGUMENTS (kern_fndecl
));
7638 DECL_CONTEXT (new_parm_decl
) = kern_fndecl
;
7639 DECL_ARGUMENTS (kern_fndecl
) = new_parm_decl
;
7640 gcc_assert (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (kern_fndecl
))));
7641 DECL_RESULT (kern_fndecl
) = copy_node (DECL_RESULT (kern_fndecl
));
7642 DECL_CONTEXT (DECL_RESULT (kern_fndecl
)) = kern_fndecl
;
7643 struct function
*kern_cfun
= DECL_STRUCT_FUNCTION (kern_fndecl
);
7644 kern_cfun
->curr_properties
= cfun
->curr_properties
;
7646 grid_expand_omp_for_loop (kfor
, false);
7648 /* Remove the omp for statement. */
7649 gimple_stmt_iterator gsi
= gsi_last_nondebug_bb (gpukernel
->entry
);
7650 gsi_remove (&gsi
, true);
7651 /* Replace the GIMPLE_OMP_RETURN at the end of the kernel region with a real
7653 gsi
= gsi_last_nondebug_bb (gpukernel
->exit
);
7654 gcc_assert (!gsi_end_p (gsi
)
7655 && gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_RETURN
);
7656 gimple
*ret_stmt
= gimple_build_return (NULL
);
7657 gsi_insert_after (&gsi
, ret_stmt
, GSI_SAME_STMT
);
7658 gsi_remove (&gsi
, true);
7660 /* Statements in the first BB in the target construct have been produced by
7661 target lowering and must be copied inside the GPUKERNEL, with the two
7662 exceptions of the first OMP statement and the OMP_DATA assignment
7664 gsi
= gsi_start_bb (single_succ (gpukernel
->entry
));
7665 tree data_arg
= gimple_omp_target_data_arg (tgt_stmt
);
7666 tree sender
= data_arg
? TREE_VEC_ELT (data_arg
, 0) : NULL
;
7667 for (gimple_stmt_iterator tsi
= gsi_start_bb (single_succ (target
->entry
));
7668 !gsi_end_p (tsi
); gsi_next (&tsi
))
7670 gimple
*stmt
= gsi_stmt (tsi
);
7671 if (is_gimple_omp (stmt
))
7674 && is_gimple_assign (stmt
)
7675 && TREE_CODE (gimple_assign_rhs1 (stmt
)) == ADDR_EXPR
7676 && TREE_OPERAND (gimple_assign_rhs1 (stmt
), 0) == sender
)
7678 gimple
*copy
= gimple_copy (stmt
);
7679 gsi_insert_before (&gsi
, copy
, GSI_SAME_STMT
);
7680 gimple_set_block (copy
, fniniblock
);
7683 move_sese_region_to_fn (kern_cfun
, single_succ (gpukernel
->entry
),
7684 gpukernel
->exit
, inside_block
);
7686 cgraph_node
*kcn
= cgraph_node::get_create (kern_fndecl
);
7687 kcn
->mark_force_output ();
7688 cgraph_node
*orig_child
= cgraph_node::get (orig_child_fndecl
);
7690 hsa_register_kernel (kcn
, orig_child
);
7692 cgraph_node::add_new_function (kern_fndecl
, true);
7693 push_cfun (kern_cfun
);
7694 cgraph_edge::rebuild_edges ();
7696 /* Re-map any mention of the PARM_DECL of the original function to the
7697 PARM_DECL of the new one.
7699 TODO: It would be great if lowering produced references into the GPU
7700 kernel decl straight away and we did not have to do this. */
7701 struct grid_arg_decl_map adm
;
7702 adm
.old_arg
= old_parm_decl
;
7703 adm
.new_arg
= new_parm_decl
;
7705 FOR_EACH_BB_FN (bb
, kern_cfun
)
7707 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
7709 gimple
*stmt
= gsi_stmt (gsi
);
7710 struct walk_stmt_info wi
;
7711 memset (&wi
, 0, sizeof (wi
));
7713 walk_gimple_op (stmt
, grid_remap_kernel_arg_accesses
, &wi
);
7721 /* Expand the parallel region tree rooted at REGION. Expansion
7722 proceeds in depth-first order. Innermost regions are expanded
7723 first. This way, parallel regions that require a new function to
7724 be created (e.g., GIMPLE_OMP_PARALLEL) can be expanded without having any
7725 internal dependencies in their body. */
7728 expand_omp (struct omp_region
*region
)
7730 omp_any_child_fn_dumped
= false;
7733 location_t saved_location
;
7734 gimple
*inner_stmt
= NULL
;
7736 /* First, determine whether this is a combined parallel+workshare
7738 if (region
->type
== GIMPLE_OMP_PARALLEL
)
7739 determine_parallel_type (region
);
7740 else if (region
->type
== GIMPLE_OMP_TARGET
)
7741 grid_expand_target_grid_body (region
);
7743 if (region
->type
== GIMPLE_OMP_FOR
7744 && gimple_omp_for_combined_p (last_stmt (region
->entry
)))
7745 inner_stmt
= last_stmt (region
->inner
->entry
);
7748 expand_omp (region
->inner
);
7750 saved_location
= input_location
;
7751 if (gimple_has_location (last_stmt (region
->entry
)))
7752 input_location
= gimple_location (last_stmt (region
->entry
));
7754 switch (region
->type
)
7756 case GIMPLE_OMP_PARALLEL
:
7757 case GIMPLE_OMP_TASK
:
7758 expand_omp_taskreg (region
);
7761 case GIMPLE_OMP_FOR
:
7762 expand_omp_for (region
, inner_stmt
);
7765 case GIMPLE_OMP_SECTIONS
:
7766 expand_omp_sections (region
);
7769 case GIMPLE_OMP_SECTION
:
7770 /* Individual omp sections are handled together with their
7771 parent GIMPLE_OMP_SECTIONS region. */
7774 case GIMPLE_OMP_SINGLE
:
7775 expand_omp_single (region
);
7778 case GIMPLE_OMP_ORDERED
:
7780 gomp_ordered
*ord_stmt
7781 = as_a
<gomp_ordered
*> (last_stmt (region
->entry
));
7782 if (omp_find_clause (gimple_omp_ordered_clauses (ord_stmt
),
7785 /* We'll expand these when expanding corresponding
7786 worksharing region with ordered(n) clause. */
7787 gcc_assert (region
->outer
7788 && region
->outer
->type
== GIMPLE_OMP_FOR
);
7789 region
->ord_stmt
= ord_stmt
;
7794 case GIMPLE_OMP_MASTER
:
7795 case GIMPLE_OMP_TASKGROUP
:
7796 case GIMPLE_OMP_CRITICAL
:
7797 case GIMPLE_OMP_TEAMS
:
7798 expand_omp_synch (region
);
7801 case GIMPLE_OMP_ATOMIC_LOAD
:
7802 expand_omp_atomic (region
);
7805 case GIMPLE_OMP_TARGET
:
7806 expand_omp_target (region
);
7813 input_location
= saved_location
;
7814 region
= region
->next
;
7816 if (omp_any_child_fn_dumped
)
7819 dump_function_header (dump_file
, current_function_decl
, dump_flags
);
7820 omp_any_child_fn_dumped
= false;
7824 /* Helper for build_omp_regions. Scan the dominator tree starting at
7825 block BB. PARENT is the region that contains BB. If SINGLE_TREE is
7826 true, the function ends once a single tree is built (otherwise, whole
7827 forest of OMP constructs may be built). */
7830 build_omp_regions_1 (basic_block bb
, struct omp_region
*parent
,
7833 gimple_stmt_iterator gsi
;
7837 gsi
= gsi_last_nondebug_bb (bb
);
7838 if (!gsi_end_p (gsi
) && is_gimple_omp (gsi_stmt (gsi
)))
7840 struct omp_region
*region
;
7841 enum gimple_code code
;
7843 stmt
= gsi_stmt (gsi
);
7844 code
= gimple_code (stmt
);
7845 if (code
== GIMPLE_OMP_RETURN
)
7847 /* STMT is the return point out of region PARENT. Mark it
7848 as the exit point and make PARENT the immediately
7849 enclosing region. */
7850 gcc_assert (parent
);
7853 parent
= parent
->outer
;
7855 else if (code
== GIMPLE_OMP_ATOMIC_STORE
)
7857 /* GIMPLE_OMP_ATOMIC_STORE is analogous to
7858 GIMPLE_OMP_RETURN, but matches with
7859 GIMPLE_OMP_ATOMIC_LOAD. */
7860 gcc_assert (parent
);
7861 gcc_assert (parent
->type
== GIMPLE_OMP_ATOMIC_LOAD
);
7864 parent
= parent
->outer
;
7866 else if (code
== GIMPLE_OMP_CONTINUE
)
7868 gcc_assert (parent
);
7871 else if (code
== GIMPLE_OMP_SECTIONS_SWITCH
)
7873 /* GIMPLE_OMP_SECTIONS_SWITCH is part of
7874 GIMPLE_OMP_SECTIONS, and we do nothing for it. */
7878 region
= new_omp_region (bb
, code
, parent
);
7880 if (code
== GIMPLE_OMP_TARGET
)
7882 switch (gimple_omp_target_kind (stmt
))
7884 case GF_OMP_TARGET_KIND_REGION
:
7885 case GF_OMP_TARGET_KIND_DATA
:
7886 case GF_OMP_TARGET_KIND_OACC_PARALLEL
:
7887 case GF_OMP_TARGET_KIND_OACC_KERNELS
:
7888 case GF_OMP_TARGET_KIND_OACC_DATA
:
7889 case GF_OMP_TARGET_KIND_OACC_HOST_DATA
:
7891 case GF_OMP_TARGET_KIND_UPDATE
:
7892 case GF_OMP_TARGET_KIND_ENTER_DATA
:
7893 case GF_OMP_TARGET_KIND_EXIT_DATA
:
7894 case GF_OMP_TARGET_KIND_OACC_UPDATE
:
7895 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA
:
7896 case GF_OMP_TARGET_KIND_OACC_DECLARE
:
7897 /* ..., other than for those stand-alone directives... */
7904 else if (code
== GIMPLE_OMP_ORDERED
7905 && omp_find_clause (gimple_omp_ordered_clauses
7906 (as_a
<gomp_ordered
*> (stmt
)),
7908 /* #pragma omp ordered depend is also just a stand-alone
7911 /* ..., this directive becomes the parent for a new region. */
7917 if (single_tree
&& !parent
)
7920 for (son
= first_dom_son (CDI_DOMINATORS
, bb
);
7922 son
= next_dom_son (CDI_DOMINATORS
, son
))
7923 build_omp_regions_1 (son
, parent
, single_tree
);
7926 /* Builds the tree of OMP regions rooted at ROOT, storing it to
7930 build_omp_regions_root (basic_block root
)
7932 gcc_assert (root_omp_region
== NULL
);
7933 build_omp_regions_1 (root
, NULL
, true);
7934 gcc_assert (root_omp_region
!= NULL
);
7937 /* Expands omp construct (and its subconstructs) starting in HEAD. */
7940 omp_expand_local (basic_block head
)
7942 build_omp_regions_root (head
);
7943 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
7945 fprintf (dump_file
, "\nOMP region tree\n\n");
7946 dump_omp_region (dump_file
, root_omp_region
, 0);
7947 fprintf (dump_file
, "\n");
7950 remove_exit_barriers (root_omp_region
);
7951 expand_omp (root_omp_region
);
7953 omp_free_regions ();
7956 /* Scan the CFG and build a tree of OMP regions. Return the root of
7957 the OMP region tree. */
7960 build_omp_regions (void)
7962 gcc_assert (root_omp_region
== NULL
);
7963 calculate_dominance_info (CDI_DOMINATORS
);
7964 build_omp_regions_1 (ENTRY_BLOCK_PTR_FOR_FN (cfun
), NULL
, false);
7967 /* Main entry point for expanding OMP-GIMPLE into runtime calls. */
7970 execute_expand_omp (void)
7972 build_omp_regions ();
7974 if (!root_omp_region
)
7979 fprintf (dump_file
, "\nOMP region tree\n\n");
7980 dump_omp_region (dump_file
, root_omp_region
, 0);
7981 fprintf (dump_file
, "\n");
7984 remove_exit_barriers (root_omp_region
);
7986 expand_omp (root_omp_region
);
7988 if (flag_checking
&& !loops_state_satisfies_p (LOOPS_NEED_FIXUP
))
7989 verify_loop_structure ();
7990 cleanup_tree_cfg ();
7992 omp_free_regions ();
7997 /* OMP expansion -- the default pass, run before creation of SSA form. */
8001 const pass_data pass_data_expand_omp
=
8003 GIMPLE_PASS
, /* type */
8004 "ompexp", /* name */
8005 OPTGROUP_OMP
, /* optinfo_flags */
8006 TV_NONE
, /* tv_id */
8007 PROP_gimple_any
, /* properties_required */
8008 PROP_gimple_eomp
, /* properties_provided */
8009 0, /* properties_destroyed */
8010 0, /* todo_flags_start */
8011 0, /* todo_flags_finish */
8014 class pass_expand_omp
: public gimple_opt_pass
8017 pass_expand_omp (gcc::context
*ctxt
)
8018 : gimple_opt_pass (pass_data_expand_omp
, ctxt
)
8021 /* opt_pass methods: */
8022 virtual unsigned int execute (function
*)
8024 bool gate
= ((flag_openacc
!= 0 || flag_openmp
!= 0
8025 || flag_openmp_simd
!= 0)
8028 /* This pass always runs, to provide PROP_gimple_eomp.
8029 But often, there is nothing to do. */
8033 return execute_expand_omp ();
8036 }; // class pass_expand_omp
8041 make_pass_expand_omp (gcc::context
*ctxt
)
8043 return new pass_expand_omp (ctxt
);
8048 const pass_data pass_data_expand_omp_ssa
=
8050 GIMPLE_PASS
, /* type */
8051 "ompexpssa", /* name */
8052 OPTGROUP_OMP
, /* optinfo_flags */
8053 TV_NONE
, /* tv_id */
8054 PROP_cfg
| PROP_ssa
, /* properties_required */
8055 PROP_gimple_eomp
, /* properties_provided */
8056 0, /* properties_destroyed */
8057 0, /* todo_flags_start */
8058 TODO_cleanup_cfg
| TODO_rebuild_alias
, /* todo_flags_finish */
8061 class pass_expand_omp_ssa
: public gimple_opt_pass
8064 pass_expand_omp_ssa (gcc::context
*ctxt
)
8065 : gimple_opt_pass (pass_data_expand_omp_ssa
, ctxt
)
8068 /* opt_pass methods: */
8069 virtual bool gate (function
*fun
)
8071 return !(fun
->curr_properties
& PROP_gimple_eomp
);
8073 virtual unsigned int execute (function
*) { return execute_expand_omp (); }
8074 opt_pass
* clone () { return new pass_expand_omp_ssa (m_ctxt
); }
8076 }; // class pass_expand_omp_ssa
8081 make_pass_expand_omp_ssa (gcc::context
*ctxt
)
8083 return new pass_expand_omp_ssa (ctxt
);
8086 /* Called from tree-cfg.c::make_edges to create cfg edges for all relevant
8090 omp_make_gimple_edges (basic_block bb
, struct omp_region
**region
,
8093 gimple
*last
= last_stmt (bb
);
8094 enum gimple_code code
= gimple_code (last
);
8095 struct omp_region
*cur_region
= *region
;
8096 bool fallthru
= false;
8100 case GIMPLE_OMP_PARALLEL
:
8101 case GIMPLE_OMP_TASK
:
8102 case GIMPLE_OMP_FOR
:
8103 case GIMPLE_OMP_SINGLE
:
8104 case GIMPLE_OMP_TEAMS
:
8105 case GIMPLE_OMP_MASTER
:
8106 case GIMPLE_OMP_TASKGROUP
:
8107 case GIMPLE_OMP_CRITICAL
:
8108 case GIMPLE_OMP_SECTION
:
8109 case GIMPLE_OMP_GRID_BODY
:
8110 cur_region
= new_omp_region (bb
, code
, cur_region
);
8114 case GIMPLE_OMP_ORDERED
:
8115 cur_region
= new_omp_region (bb
, code
, cur_region
);
8117 if (omp_find_clause (gimple_omp_ordered_clauses
8118 (as_a
<gomp_ordered
*> (last
)),
8120 cur_region
= cur_region
->outer
;
8123 case GIMPLE_OMP_TARGET
:
8124 cur_region
= new_omp_region (bb
, code
, cur_region
);
8126 switch (gimple_omp_target_kind (last
))
8128 case GF_OMP_TARGET_KIND_REGION
:
8129 case GF_OMP_TARGET_KIND_DATA
:
8130 case GF_OMP_TARGET_KIND_OACC_PARALLEL
:
8131 case GF_OMP_TARGET_KIND_OACC_KERNELS
:
8132 case GF_OMP_TARGET_KIND_OACC_DATA
:
8133 case GF_OMP_TARGET_KIND_OACC_HOST_DATA
:
8135 case GF_OMP_TARGET_KIND_UPDATE
:
8136 case GF_OMP_TARGET_KIND_ENTER_DATA
:
8137 case GF_OMP_TARGET_KIND_EXIT_DATA
:
8138 case GF_OMP_TARGET_KIND_OACC_UPDATE
:
8139 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA
:
8140 case GF_OMP_TARGET_KIND_OACC_DECLARE
:
8141 cur_region
= cur_region
->outer
;
8148 case GIMPLE_OMP_SECTIONS
:
8149 cur_region
= new_omp_region (bb
, code
, cur_region
);
8153 case GIMPLE_OMP_SECTIONS_SWITCH
:
8157 case GIMPLE_OMP_ATOMIC_LOAD
:
8158 case GIMPLE_OMP_ATOMIC_STORE
:
8162 case GIMPLE_OMP_RETURN
:
8163 /* In the case of a GIMPLE_OMP_SECTION, the edge will go
8164 somewhere other than the next block. This will be
8166 cur_region
->exit
= bb
;
8167 if (cur_region
->type
== GIMPLE_OMP_TASK
)
8168 /* Add an edge corresponding to not scheduling the task
8170 make_edge (cur_region
->entry
, bb
, EDGE_ABNORMAL
);
8171 fallthru
= cur_region
->type
!= GIMPLE_OMP_SECTION
;
8172 cur_region
= cur_region
->outer
;
8175 case GIMPLE_OMP_CONTINUE
:
8176 cur_region
->cont
= bb
;
8177 switch (cur_region
->type
)
8179 case GIMPLE_OMP_FOR
:
8180 /* Mark all GIMPLE_OMP_FOR and GIMPLE_OMP_CONTINUE
8181 succs edges as abnormal to prevent splitting
8183 single_succ_edge (cur_region
->entry
)->flags
|= EDGE_ABNORMAL
;
8184 /* Make the loopback edge. */
8185 make_edge (bb
, single_succ (cur_region
->entry
),
8188 /* Create an edge from GIMPLE_OMP_FOR to exit, which
8189 corresponds to the case that the body of the loop
8190 is not executed at all. */
8191 make_edge (cur_region
->entry
, bb
->next_bb
, EDGE_ABNORMAL
);
8192 make_edge (bb
, bb
->next_bb
, EDGE_FALLTHRU
| EDGE_ABNORMAL
);
8196 case GIMPLE_OMP_SECTIONS
:
8197 /* Wire up the edges into and out of the nested sections. */
8199 basic_block switch_bb
= single_succ (cur_region
->entry
);
8201 struct omp_region
*i
;
8202 for (i
= cur_region
->inner
; i
; i
= i
->next
)
8204 gcc_assert (i
->type
== GIMPLE_OMP_SECTION
);
8205 make_edge (switch_bb
, i
->entry
, 0);
8206 make_edge (i
->exit
, bb
, EDGE_FALLTHRU
);
8209 /* Make the loopback edge to the block with
8210 GIMPLE_OMP_SECTIONS_SWITCH. */
8211 make_edge (bb
, switch_bb
, 0);
8213 /* Make the edge from the switch to exit. */
8214 make_edge (switch_bb
, bb
->next_bb
, 0);
8219 case GIMPLE_OMP_TASK
:
8232 if (*region
!= cur_region
)
8234 *region
= cur_region
;
8236 *region_idx
= cur_region
->entry
->index
;
8244 #include "gt-omp-expand.h"