1 /* Expansion pass for OMP directives. Outlines regions of certain OMP
2 directives to separate functions, converts others into explicit calls to the
3 runtime library (libgomp) and so forth
5 Copyright (C) 2005-2017 Free Software Foundation, Inc.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
33 #include "tree-pass.h"
37 #include "pretty-print.h"
38 #include "diagnostic-core.h"
39 #include "fold-const.h"
40 #include "stor-layout.h"
42 #include "internal-fn.h"
44 #include "gimple-iterator.h"
45 #include "gimplify-me.h"
46 #include "gimple-walk.h"
48 #include "tree-into-ssa.h"
50 #include "splay-tree.h"
52 #include "omp-general.h"
53 #include "omp-offload.h"
54 #include "tree-cfgcleanup.h"
55 #include "symbol-summary.h"
57 #include "gomp-constants.h"
58 #include "gimple-pretty-print.h"
59 #include "hsa-common.h"
61 #include "stringpool.h"
64 /* OMP region information. Every parallel and workshare
65 directive is enclosed between two markers, the OMP_* directive
66 and a corresponding GIMPLE_OMP_RETURN statement. */
70 /* The enclosing region. */
71 struct omp_region
*outer
;
73 /* First child region. */
74 struct omp_region
*inner
;
76 /* Next peer region. */
77 struct omp_region
*next
;
79 /* Block containing the omp directive as its last stmt. */
82 /* Block containing the GIMPLE_OMP_RETURN as its last stmt. */
85 /* Block containing the GIMPLE_OMP_CONTINUE as its last stmt. */
88 /* If this is a combined parallel+workshare region, this is a list
89 of additional arguments needed by the combined parallel+workshare
91 vec
<tree
, va_gc
> *ws_args
;
93 /* The code for the omp directive of this region. */
94 enum gimple_code type
;
96 /* Schedule kind, only used for GIMPLE_OMP_FOR type regions. */
97 enum omp_clause_schedule_kind sched_kind
;
99 /* Schedule modifiers. */
100 unsigned char sched_modifiers
;
102 /* True if this is a combined parallel+workshare region. */
103 bool is_combined_parallel
;
105 /* The ordered stmt if type is GIMPLE_OMP_ORDERED and it has
107 gomp_ordered
*ord_stmt
;
110 static struct omp_region
*root_omp_region
;
111 static bool omp_any_child_fn_dumped
;
113 static void expand_omp_build_assign (gimple_stmt_iterator
*, tree
, tree
,
115 static gphi
*find_phi_with_arg_on_edge (tree
, edge
);
116 static void expand_omp (struct omp_region
*region
);
118 /* Return true if REGION is a combined parallel+workshare region. */
121 is_combined_parallel (struct omp_region
*region
)
123 return region
->is_combined_parallel
;
126 /* Given two blocks PAR_ENTRY_BB and WS_ENTRY_BB such that WS_ENTRY_BB
127 is the immediate dominator of PAR_ENTRY_BB, return true if there
128 are no data dependencies that would prevent expanding the parallel
129 directive at PAR_ENTRY_BB as a combined parallel+workshare region.
131 When expanding a combined parallel+workshare region, the call to
132 the child function may need additional arguments in the case of
133 GIMPLE_OMP_FOR regions. In some cases, these arguments are
134 computed out of variables passed in from the parent to the child
135 via 'struct .omp_data_s'. For instance:
137 #pragma omp parallel for schedule (guided, i * 4)
142 # BLOCK 2 (PAR_ENTRY_BB)
144 #pragma omp parallel [child fn: bar.omp_fn.0 ( ..., D.1598)
146 # BLOCK 3 (WS_ENTRY_BB)
147 .omp_data_i = &.omp_data_o;
148 D.1667 = .omp_data_i->i;
150 #pragma omp for schedule (guided, D.1598)
152 When we outline the parallel region, the call to the child function
153 'bar.omp_fn.0' will need the value D.1598 in its argument list, but
154 that value is computed *after* the call site. So, in principle we
155 cannot do the transformation.
157 To see whether the code in WS_ENTRY_BB blocks the combined
158 parallel+workshare call, we collect all the variables used in the
159 GIMPLE_OMP_FOR header check whether they appear on the LHS of any
160 statement in WS_ENTRY_BB. If so, then we cannot emit the combined
163 FIXME. If we had the SSA form built at this point, we could merely
164 hoist the code in block 3 into block 2 and be done with it. But at
165 this point we don't have dataflow information and though we could
166 hack something up here, it is really not worth the aggravation. */
169 workshare_safe_to_combine_p (basic_block ws_entry_bb
)
171 struct omp_for_data fd
;
172 gimple
*ws_stmt
= last_stmt (ws_entry_bb
);
174 if (gimple_code (ws_stmt
) == GIMPLE_OMP_SECTIONS
)
177 gcc_assert (gimple_code (ws_stmt
) == GIMPLE_OMP_FOR
);
179 omp_extract_for_data (as_a
<gomp_for
*> (ws_stmt
), &fd
, NULL
);
181 if (fd
.collapse
> 1 && TREE_CODE (fd
.loop
.n2
) != INTEGER_CST
)
183 if (fd
.iter_type
!= long_integer_type_node
)
186 /* FIXME. We give up too easily here. If any of these arguments
187 are not constants, they will likely involve variables that have
188 been mapped into fields of .omp_data_s for sharing with the child
189 function. With appropriate data flow, it would be possible to
191 if (!is_gimple_min_invariant (fd
.loop
.n1
)
192 || !is_gimple_min_invariant (fd
.loop
.n2
)
193 || !is_gimple_min_invariant (fd
.loop
.step
)
194 || (fd
.chunk_size
&& !is_gimple_min_invariant (fd
.chunk_size
)))
200 /* Adjust CHUNK_SIZE from SCHEDULE clause, depending on simd modifier
201 presence (SIMD_SCHEDULE). */
204 omp_adjust_chunk_size (tree chunk_size
, bool simd_schedule
)
209 int vf
= omp_max_vf ();
213 tree type
= TREE_TYPE (chunk_size
);
214 chunk_size
= fold_build2 (PLUS_EXPR
, type
, chunk_size
,
215 build_int_cst (type
, vf
- 1));
216 return fold_build2 (BIT_AND_EXPR
, type
, chunk_size
,
217 build_int_cst (type
, -vf
));
220 /* Collect additional arguments needed to emit a combined
221 parallel+workshare call. WS_STMT is the workshare directive being
224 static vec
<tree
, va_gc
> *
225 get_ws_args_for (gimple
*par_stmt
, gimple
*ws_stmt
)
228 location_t loc
= gimple_location (ws_stmt
);
229 vec
<tree
, va_gc
> *ws_args
;
231 if (gomp_for
*for_stmt
= dyn_cast
<gomp_for
*> (ws_stmt
))
233 struct omp_for_data fd
;
236 omp_extract_for_data (for_stmt
, &fd
, NULL
);
240 if (gimple_omp_for_combined_into_p (for_stmt
))
243 = omp_find_clause (gimple_omp_parallel_clauses (par_stmt
),
244 OMP_CLAUSE__LOOPTEMP_
);
246 n1
= OMP_CLAUSE_DECL (innerc
);
247 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
248 OMP_CLAUSE__LOOPTEMP_
);
250 n2
= OMP_CLAUSE_DECL (innerc
);
253 vec_alloc (ws_args
, 3 + (fd
.chunk_size
!= 0));
255 t
= fold_convert_loc (loc
, long_integer_type_node
, n1
);
256 ws_args
->quick_push (t
);
258 t
= fold_convert_loc (loc
, long_integer_type_node
, n2
);
259 ws_args
->quick_push (t
);
261 t
= fold_convert_loc (loc
, long_integer_type_node
, fd
.loop
.step
);
262 ws_args
->quick_push (t
);
266 t
= fold_convert_loc (loc
, long_integer_type_node
, fd
.chunk_size
);
267 t
= omp_adjust_chunk_size (t
, fd
.simd_schedule
);
268 ws_args
->quick_push (t
);
273 else if (gimple_code (ws_stmt
) == GIMPLE_OMP_SECTIONS
)
275 /* Number of sections is equal to the number of edges from the
276 GIMPLE_OMP_SECTIONS_SWITCH statement, except for the one to
277 the exit of the sections region. */
278 basic_block bb
= single_succ (gimple_bb (ws_stmt
));
279 t
= build_int_cst (unsigned_type_node
, EDGE_COUNT (bb
->succs
) - 1);
280 vec_alloc (ws_args
, 1);
281 ws_args
->quick_push (t
);
288 /* Discover whether REGION is a combined parallel+workshare region. */
291 determine_parallel_type (struct omp_region
*region
)
293 basic_block par_entry_bb
, par_exit_bb
;
294 basic_block ws_entry_bb
, ws_exit_bb
;
296 if (region
== NULL
|| region
->inner
== NULL
297 || region
->exit
== NULL
|| region
->inner
->exit
== NULL
298 || region
->inner
->cont
== NULL
)
301 /* We only support parallel+for and parallel+sections. */
302 if (region
->type
!= GIMPLE_OMP_PARALLEL
303 || (region
->inner
->type
!= GIMPLE_OMP_FOR
304 && region
->inner
->type
!= GIMPLE_OMP_SECTIONS
))
307 /* Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and
308 WS_EXIT_BB -> PAR_EXIT_BB. */
309 par_entry_bb
= region
->entry
;
310 par_exit_bb
= region
->exit
;
311 ws_entry_bb
= region
->inner
->entry
;
312 ws_exit_bb
= region
->inner
->exit
;
314 if (single_succ (par_entry_bb
) == ws_entry_bb
315 && single_succ (ws_exit_bb
) == par_exit_bb
316 && workshare_safe_to_combine_p (ws_entry_bb
)
317 && (gimple_omp_parallel_combined_p (last_stmt (par_entry_bb
))
318 || (last_and_only_stmt (ws_entry_bb
)
319 && last_and_only_stmt (par_exit_bb
))))
321 gimple
*par_stmt
= last_stmt (par_entry_bb
);
322 gimple
*ws_stmt
= last_stmt (ws_entry_bb
);
324 if (region
->inner
->type
== GIMPLE_OMP_FOR
)
326 /* If this is a combined parallel loop, we need to determine
327 whether or not to use the combined library calls. There
328 are two cases where we do not apply the transformation:
329 static loops and any kind of ordered loop. In the first
330 case, we already open code the loop so there is no need
331 to do anything else. In the latter case, the combined
332 parallel loop call would still need extra synchronization
333 to implement ordered semantics, so there would not be any
334 gain in using the combined call. */
335 tree clauses
= gimple_omp_for_clauses (ws_stmt
);
336 tree c
= omp_find_clause (clauses
, OMP_CLAUSE_SCHEDULE
);
338 || ((OMP_CLAUSE_SCHEDULE_KIND (c
) & OMP_CLAUSE_SCHEDULE_MASK
)
339 == OMP_CLAUSE_SCHEDULE_STATIC
)
340 || omp_find_clause (clauses
, OMP_CLAUSE_ORDERED
))
342 region
->is_combined_parallel
= false;
343 region
->inner
->is_combined_parallel
= false;
348 region
->is_combined_parallel
= true;
349 region
->inner
->is_combined_parallel
= true;
350 region
->ws_args
= get_ws_args_for (par_stmt
, ws_stmt
);
354 /* Debugging dumps for parallel regions. */
355 void dump_omp_region (FILE *, struct omp_region
*, int);
356 void debug_omp_region (struct omp_region
*);
357 void debug_all_omp_regions (void);
359 /* Dump the parallel region tree rooted at REGION. */
362 dump_omp_region (FILE *file
, struct omp_region
*region
, int indent
)
364 fprintf (file
, "%*sbb %d: %s\n", indent
, "", region
->entry
->index
,
365 gimple_code_name
[region
->type
]);
368 dump_omp_region (file
, region
->inner
, indent
+ 4);
372 fprintf (file
, "%*sbb %d: GIMPLE_OMP_CONTINUE\n", indent
, "",
373 region
->cont
->index
);
377 fprintf (file
, "%*sbb %d: GIMPLE_OMP_RETURN\n", indent
, "",
378 region
->exit
->index
);
380 fprintf (file
, "%*s[no exit marker]\n", indent
, "");
383 dump_omp_region (file
, region
->next
, indent
);
387 debug_omp_region (struct omp_region
*region
)
389 dump_omp_region (stderr
, region
, 0);
393 debug_all_omp_regions (void)
395 dump_omp_region (stderr
, root_omp_region
, 0);
398 /* Create a new parallel region starting at STMT inside region PARENT. */
400 static struct omp_region
*
401 new_omp_region (basic_block bb
, enum gimple_code type
,
402 struct omp_region
*parent
)
404 struct omp_region
*region
= XCNEW (struct omp_region
);
406 region
->outer
= parent
;
412 /* This is a nested region. Add it to the list of inner
413 regions in PARENT. */
414 region
->next
= parent
->inner
;
415 parent
->inner
= region
;
419 /* This is a toplevel region. Add it to the list of toplevel
420 regions in ROOT_OMP_REGION. */
421 region
->next
= root_omp_region
;
422 root_omp_region
= region
;
428 /* Release the memory associated with the region tree rooted at REGION. */
431 free_omp_region_1 (struct omp_region
*region
)
433 struct omp_region
*i
, *n
;
435 for (i
= region
->inner
; i
; i
= n
)
438 free_omp_region_1 (i
);
444 /* Release the memory for the entire omp region tree. */
447 omp_free_regions (void)
449 struct omp_region
*r
, *n
;
450 for (r
= root_omp_region
; r
; r
= n
)
453 free_omp_region_1 (r
);
455 root_omp_region
= NULL
;
458 /* A convenience function to build an empty GIMPLE_COND with just the
462 gimple_build_cond_empty (tree cond
)
464 enum tree_code pred_code
;
467 gimple_cond_get_ops_from_tree (cond
, &pred_code
, &lhs
, &rhs
);
468 return gimple_build_cond (pred_code
, lhs
, rhs
, NULL_TREE
, NULL_TREE
);
471 /* Return true if a parallel REGION is within a declare target function or
472 within a target region and is not a part of a gridified target. */
475 parallel_needs_hsa_kernel_p (struct omp_region
*region
)
477 bool indirect
= false;
478 for (region
= region
->outer
; region
; region
= region
->outer
)
480 if (region
->type
== GIMPLE_OMP_PARALLEL
)
482 else if (region
->type
== GIMPLE_OMP_TARGET
)
484 gomp_target
*tgt_stmt
485 = as_a
<gomp_target
*> (last_stmt (region
->entry
));
487 if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt
),
488 OMP_CLAUSE__GRIDDIM_
))
495 if (lookup_attribute ("omp declare target",
496 DECL_ATTRIBUTES (current_function_decl
)))
502 /* Change DECL_CONTEXT of CHILD_FNDECL to that of the parent function.
503 Add CHILD_FNDECL to decl chain of the supercontext of the block
504 ENTRY_BLOCK - this is the block which originally contained the
505 code from which CHILD_FNDECL was created.
507 Together, these actions ensure that the debug info for the outlined
508 function will be emitted with the correct lexical scope. */
511 adjust_context_and_scope (tree entry_block
, tree child_fndecl
)
513 if (entry_block
!= NULL_TREE
&& TREE_CODE (entry_block
) == BLOCK
)
515 tree b
= BLOCK_SUPERCONTEXT (entry_block
);
517 if (TREE_CODE (b
) == BLOCK
)
521 /* Follow supercontext chain until the parent fndecl
523 for (parent_fndecl
= BLOCK_SUPERCONTEXT (b
);
524 TREE_CODE (parent_fndecl
) == BLOCK
;
525 parent_fndecl
= BLOCK_SUPERCONTEXT (parent_fndecl
))
528 gcc_assert (TREE_CODE (parent_fndecl
) == FUNCTION_DECL
);
530 DECL_CONTEXT (child_fndecl
) = parent_fndecl
;
532 DECL_CHAIN (child_fndecl
) = BLOCK_VARS (b
);
533 BLOCK_VARS (b
) = child_fndecl
;
538 /* Build the function calls to GOMP_parallel_start etc to actually
539 generate the parallel operation. REGION is the parallel region
540 being expanded. BB is the block where to insert the code. WS_ARGS
541 will be set if this is a call to a combined parallel+workshare
542 construct, it contains the list of additional arguments needed by
543 the workshare construct. */
546 expand_parallel_call (struct omp_region
*region
, basic_block bb
,
547 gomp_parallel
*entry_stmt
,
548 vec
<tree
, va_gc
> *ws_args
)
550 tree t
, t1
, t2
, val
, cond
, c
, clauses
, flags
;
551 gimple_stmt_iterator gsi
;
553 enum built_in_function start_ix
;
555 location_t clause_loc
;
556 vec
<tree
, va_gc
> *args
;
558 clauses
= gimple_omp_parallel_clauses (entry_stmt
);
560 /* Determine what flavor of GOMP_parallel we will be
562 start_ix
= BUILT_IN_GOMP_PARALLEL
;
563 if (is_combined_parallel (region
))
565 switch (region
->inner
->type
)
568 gcc_assert (region
->inner
->sched_kind
!= OMP_CLAUSE_SCHEDULE_AUTO
);
569 switch (region
->inner
->sched_kind
)
571 case OMP_CLAUSE_SCHEDULE_RUNTIME
:
574 case OMP_CLAUSE_SCHEDULE_DYNAMIC
:
575 case OMP_CLAUSE_SCHEDULE_GUIDED
:
576 if (region
->inner
->sched_modifiers
577 & OMP_CLAUSE_SCHEDULE_NONMONOTONIC
)
579 start_ix2
= 3 + region
->inner
->sched_kind
;
584 start_ix2
= region
->inner
->sched_kind
;
587 start_ix2
+= (int) BUILT_IN_GOMP_PARALLEL_LOOP_STATIC
;
588 start_ix
= (enum built_in_function
) start_ix2
;
590 case GIMPLE_OMP_SECTIONS
:
591 start_ix
= BUILT_IN_GOMP_PARALLEL_SECTIONS
;
598 /* By default, the value of NUM_THREADS is zero (selected at run time)
599 and there is no conditional. */
601 val
= build_int_cst (unsigned_type_node
, 0);
602 flags
= build_int_cst (unsigned_type_node
, 0);
604 c
= omp_find_clause (clauses
, OMP_CLAUSE_IF
);
606 cond
= OMP_CLAUSE_IF_EXPR (c
);
608 c
= omp_find_clause (clauses
, OMP_CLAUSE_NUM_THREADS
);
611 val
= OMP_CLAUSE_NUM_THREADS_EXPR (c
);
612 clause_loc
= OMP_CLAUSE_LOCATION (c
);
615 clause_loc
= gimple_location (entry_stmt
);
617 c
= omp_find_clause (clauses
, OMP_CLAUSE_PROC_BIND
);
619 flags
= build_int_cst (unsigned_type_node
, OMP_CLAUSE_PROC_BIND_KIND (c
));
621 /* Ensure 'val' is of the correct type. */
622 val
= fold_convert_loc (clause_loc
, unsigned_type_node
, val
);
624 /* If we found the clause 'if (cond)', build either
625 (cond != 0) or (cond ? val : 1u). */
628 cond
= gimple_boolify (cond
);
630 if (integer_zerop (val
))
631 val
= fold_build2_loc (clause_loc
,
632 EQ_EXPR
, unsigned_type_node
, cond
,
633 build_int_cst (TREE_TYPE (cond
), 0));
636 basic_block cond_bb
, then_bb
, else_bb
;
637 edge e
, e_then
, e_else
;
638 tree tmp_then
, tmp_else
, tmp_join
, tmp_var
;
640 tmp_var
= create_tmp_var (TREE_TYPE (val
));
641 if (gimple_in_ssa_p (cfun
))
643 tmp_then
= make_ssa_name (tmp_var
);
644 tmp_else
= make_ssa_name (tmp_var
);
645 tmp_join
= make_ssa_name (tmp_var
);
654 e
= split_block_after_labels (bb
);
659 then_bb
= create_empty_bb (cond_bb
);
660 else_bb
= create_empty_bb (then_bb
);
661 set_immediate_dominator (CDI_DOMINATORS
, then_bb
, cond_bb
);
662 set_immediate_dominator (CDI_DOMINATORS
, else_bb
, cond_bb
);
664 stmt
= gimple_build_cond_empty (cond
);
665 gsi
= gsi_start_bb (cond_bb
);
666 gsi_insert_after (&gsi
, stmt
, GSI_CONTINUE_LINKING
);
668 gsi
= gsi_start_bb (then_bb
);
669 expand_omp_build_assign (&gsi
, tmp_then
, val
, true);
671 gsi
= gsi_start_bb (else_bb
);
672 expand_omp_build_assign (&gsi
, tmp_else
,
673 build_int_cst (unsigned_type_node
, 1),
676 make_edge (cond_bb
, then_bb
, EDGE_TRUE_VALUE
);
677 make_edge (cond_bb
, else_bb
, EDGE_FALSE_VALUE
);
678 add_bb_to_loop (then_bb
, cond_bb
->loop_father
);
679 add_bb_to_loop (else_bb
, cond_bb
->loop_father
);
680 e_then
= make_edge (then_bb
, bb
, EDGE_FALLTHRU
);
681 e_else
= make_edge (else_bb
, bb
, EDGE_FALLTHRU
);
683 if (gimple_in_ssa_p (cfun
))
685 gphi
*phi
= create_phi_node (tmp_join
, bb
);
686 add_phi_arg (phi
, tmp_then
, e_then
, UNKNOWN_LOCATION
);
687 add_phi_arg (phi
, tmp_else
, e_else
, UNKNOWN_LOCATION
);
693 gsi
= gsi_start_bb (bb
);
694 val
= force_gimple_operand_gsi (&gsi
, val
, true, NULL_TREE
,
695 false, GSI_CONTINUE_LINKING
);
698 gsi
= gsi_last_bb (bb
);
699 t
= gimple_omp_parallel_data_arg (entry_stmt
);
701 t1
= null_pointer_node
;
703 t1
= build_fold_addr_expr (t
);
704 tree child_fndecl
= gimple_omp_parallel_child_fn (entry_stmt
);
705 t2
= build_fold_addr_expr (child_fndecl
);
707 adjust_context_and_scope (gimple_block (entry_stmt
), child_fndecl
);
709 vec_alloc (args
, 4 + vec_safe_length (ws_args
));
710 args
->quick_push (t2
);
711 args
->quick_push (t1
);
712 args
->quick_push (val
);
714 args
->splice (*ws_args
);
715 args
->quick_push (flags
);
717 t
= build_call_expr_loc_vec (UNKNOWN_LOCATION
,
718 builtin_decl_explicit (start_ix
), args
);
720 force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
721 false, GSI_CONTINUE_LINKING
);
723 if (hsa_gen_requested_p ()
724 && parallel_needs_hsa_kernel_p (region
))
726 cgraph_node
*child_cnode
= cgraph_node::get (child_fndecl
);
727 hsa_register_kernel (child_cnode
);
731 /* Insert a function call whose name is FUNC_NAME with the information from
732 ENTRY_STMT into the basic_block BB. */
735 expand_cilk_for_call (basic_block bb
, gomp_parallel
*entry_stmt
,
736 vec
<tree
, va_gc
> *ws_args
)
739 gimple_stmt_iterator gsi
;
740 vec
<tree
, va_gc
> *args
;
742 gcc_assert (vec_safe_length (ws_args
) == 2);
743 tree func_name
= (*ws_args
)[0];
744 tree grain
= (*ws_args
)[1];
746 tree clauses
= gimple_omp_parallel_clauses (entry_stmt
);
747 tree count
= omp_find_clause (clauses
, OMP_CLAUSE__CILK_FOR_COUNT_
);
748 gcc_assert (count
!= NULL_TREE
);
749 count
= OMP_CLAUSE_OPERAND (count
, 0);
751 gsi
= gsi_last_bb (bb
);
752 t
= gimple_omp_parallel_data_arg (entry_stmt
);
754 t1
= null_pointer_node
;
756 t1
= build_fold_addr_expr (t
);
757 t2
= build_fold_addr_expr (gimple_omp_parallel_child_fn (entry_stmt
));
760 args
->quick_push (t2
);
761 args
->quick_push (t1
);
762 args
->quick_push (count
);
763 args
->quick_push (grain
);
764 t
= build_call_expr_loc_vec (UNKNOWN_LOCATION
, func_name
, args
);
766 force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
, false,
767 GSI_CONTINUE_LINKING
);
770 /* Build the function call to GOMP_task to actually
771 generate the task operation. BB is the block where to insert the code. */
774 expand_task_call (struct omp_region
*region
, basic_block bb
,
775 gomp_task
*entry_stmt
)
778 gimple_stmt_iterator gsi
;
779 location_t loc
= gimple_location (entry_stmt
);
781 tree clauses
= gimple_omp_task_clauses (entry_stmt
);
783 tree ifc
= omp_find_clause (clauses
, OMP_CLAUSE_IF
);
784 tree untied
= omp_find_clause (clauses
, OMP_CLAUSE_UNTIED
);
785 tree mergeable
= omp_find_clause (clauses
, OMP_CLAUSE_MERGEABLE
);
786 tree depend
= omp_find_clause (clauses
, OMP_CLAUSE_DEPEND
);
787 tree finalc
= omp_find_clause (clauses
, OMP_CLAUSE_FINAL
);
788 tree priority
= omp_find_clause (clauses
, OMP_CLAUSE_PRIORITY
);
791 = (untied
? GOMP_TASK_FLAG_UNTIED
: 0)
792 | (mergeable
? GOMP_TASK_FLAG_MERGEABLE
: 0)
793 | (depend
? GOMP_TASK_FLAG_DEPEND
: 0);
795 bool taskloop_p
= gimple_omp_task_taskloop_p (entry_stmt
);
796 tree startvar
= NULL_TREE
, endvar
= NULL_TREE
, step
= NULL_TREE
;
797 tree num_tasks
= NULL_TREE
;
801 gimple
*g
= last_stmt (region
->outer
->entry
);
802 gcc_assert (gimple_code (g
) == GIMPLE_OMP_FOR
803 && gimple_omp_for_kind (g
) == GF_OMP_FOR_KIND_TASKLOOP
);
804 struct omp_for_data fd
;
805 omp_extract_for_data (as_a
<gomp_for
*> (g
), &fd
, NULL
);
806 startvar
= omp_find_clause (clauses
, OMP_CLAUSE__LOOPTEMP_
);
807 endvar
= omp_find_clause (OMP_CLAUSE_CHAIN (startvar
),
808 OMP_CLAUSE__LOOPTEMP_
);
809 startvar
= OMP_CLAUSE_DECL (startvar
);
810 endvar
= OMP_CLAUSE_DECL (endvar
);
811 step
= fold_convert_loc (loc
, fd
.iter_type
, fd
.loop
.step
);
812 if (fd
.loop
.cond_code
== LT_EXPR
)
813 iflags
|= GOMP_TASK_FLAG_UP
;
814 tree tclauses
= gimple_omp_for_clauses (g
);
815 num_tasks
= omp_find_clause (tclauses
, OMP_CLAUSE_NUM_TASKS
);
817 num_tasks
= OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks
);
820 num_tasks
= omp_find_clause (tclauses
, OMP_CLAUSE_GRAINSIZE
);
823 iflags
|= GOMP_TASK_FLAG_GRAINSIZE
;
824 num_tasks
= OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks
);
827 num_tasks
= integer_zero_node
;
829 num_tasks
= fold_convert_loc (loc
, long_integer_type_node
, num_tasks
);
830 if (ifc
== NULL_TREE
)
831 iflags
|= GOMP_TASK_FLAG_IF
;
832 if (omp_find_clause (tclauses
, OMP_CLAUSE_NOGROUP
))
833 iflags
|= GOMP_TASK_FLAG_NOGROUP
;
834 ull
= fd
.iter_type
== long_long_unsigned_type_node
;
837 iflags
|= GOMP_TASK_FLAG_PRIORITY
;
839 tree flags
= build_int_cst (unsigned_type_node
, iflags
);
841 tree cond
= boolean_true_node
;
846 tree t
= gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc
));
847 t
= fold_build3_loc (loc
, COND_EXPR
, unsigned_type_node
, t
,
848 build_int_cst (unsigned_type_node
,
850 build_int_cst (unsigned_type_node
, 0));
851 flags
= fold_build2_loc (loc
, PLUS_EXPR
, unsigned_type_node
,
855 cond
= gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc
));
860 tree t
= gimple_boolify (OMP_CLAUSE_FINAL_EXPR (finalc
));
861 t
= fold_build3_loc (loc
, COND_EXPR
, unsigned_type_node
, t
,
862 build_int_cst (unsigned_type_node
,
863 GOMP_TASK_FLAG_FINAL
),
864 build_int_cst (unsigned_type_node
, 0));
865 flags
= fold_build2_loc (loc
, PLUS_EXPR
, unsigned_type_node
, flags
, t
);
868 depend
= OMP_CLAUSE_DECL (depend
);
870 depend
= build_int_cst (ptr_type_node
, 0);
872 priority
= fold_convert (integer_type_node
,
873 OMP_CLAUSE_PRIORITY_EXPR (priority
));
875 priority
= integer_zero_node
;
877 gsi
= gsi_last_bb (bb
);
878 tree t
= gimple_omp_task_data_arg (entry_stmt
);
880 t2
= null_pointer_node
;
882 t2
= build_fold_addr_expr_loc (loc
, t
);
883 t1
= build_fold_addr_expr_loc (loc
, gimple_omp_task_child_fn (entry_stmt
));
884 t
= gimple_omp_task_copy_fn (entry_stmt
);
886 t3
= null_pointer_node
;
888 t3
= build_fold_addr_expr_loc (loc
, t
);
891 t
= build_call_expr (ull
892 ? builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP_ULL
)
893 : builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP
),
895 gimple_omp_task_arg_size (entry_stmt
),
896 gimple_omp_task_arg_align (entry_stmt
), flags
,
897 num_tasks
, priority
, startvar
, endvar
, step
);
899 t
= build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASK
),
901 gimple_omp_task_arg_size (entry_stmt
),
902 gimple_omp_task_arg_align (entry_stmt
), cond
, flags
,
905 force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
906 false, GSI_CONTINUE_LINKING
);
909 /* Chain all the DECLs in LIST by their TREE_CHAIN fields. */
912 vec2chain (vec
<tree
, va_gc
> *v
)
914 tree chain
= NULL_TREE
, t
;
917 FOR_EACH_VEC_SAFE_ELT_REVERSE (v
, ix
, t
)
919 DECL_CHAIN (t
) = chain
;
926 /* Remove barriers in REGION->EXIT's block. Note that this is only
927 valid for GIMPLE_OMP_PARALLEL regions. Since the end of a parallel region
928 is an implicit barrier, any workshare inside the GIMPLE_OMP_PARALLEL that
929 left a barrier at the end of the GIMPLE_OMP_PARALLEL region can now be
933 remove_exit_barrier (struct omp_region
*region
)
935 gimple_stmt_iterator gsi
;
940 int any_addressable_vars
= -1;
942 exit_bb
= region
->exit
;
944 /* If the parallel region doesn't return, we don't have REGION->EXIT
949 /* The last insn in the block will be the parallel's GIMPLE_OMP_RETURN. The
950 workshare's GIMPLE_OMP_RETURN will be in a preceding block. The kinds of
951 statements that can appear in between are extremely limited -- no
952 memory operations at all. Here, we allow nothing at all, so the
953 only thing we allow to precede this GIMPLE_OMP_RETURN is a label. */
954 gsi
= gsi_last_bb (exit_bb
);
955 gcc_assert (gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_RETURN
);
957 if (!gsi_end_p (gsi
) && gimple_code (gsi_stmt (gsi
)) != GIMPLE_LABEL
)
960 FOR_EACH_EDGE (e
, ei
, exit_bb
->preds
)
962 gsi
= gsi_last_bb (e
->src
);
965 stmt
= gsi_stmt (gsi
);
966 if (gimple_code (stmt
) == GIMPLE_OMP_RETURN
967 && !gimple_omp_return_nowait_p (stmt
))
969 /* OpenMP 3.0 tasks unfortunately prevent this optimization
970 in many cases. If there could be tasks queued, the barrier
971 might be needed to let the tasks run before some local
972 variable of the parallel that the task uses as shared
973 runs out of scope. The task can be spawned either
974 from within current function (this would be easy to check)
975 or from some function it calls and gets passed an address
976 of such a variable. */
977 if (any_addressable_vars
< 0)
979 gomp_parallel
*parallel_stmt
980 = as_a
<gomp_parallel
*> (last_stmt (region
->entry
));
981 tree child_fun
= gimple_omp_parallel_child_fn (parallel_stmt
);
982 tree local_decls
, block
, decl
;
985 any_addressable_vars
= 0;
986 FOR_EACH_LOCAL_DECL (DECL_STRUCT_FUNCTION (child_fun
), ix
, decl
)
987 if (TREE_ADDRESSABLE (decl
))
989 any_addressable_vars
= 1;
992 for (block
= gimple_block (stmt
);
993 !any_addressable_vars
995 && TREE_CODE (block
) == BLOCK
;
996 block
= BLOCK_SUPERCONTEXT (block
))
998 for (local_decls
= BLOCK_VARS (block
);
1000 local_decls
= DECL_CHAIN (local_decls
))
1001 if (TREE_ADDRESSABLE (local_decls
))
1003 any_addressable_vars
= 1;
1006 if (block
== gimple_block (parallel_stmt
))
1010 if (!any_addressable_vars
)
1011 gimple_omp_return_set_nowait (stmt
);
1017 remove_exit_barriers (struct omp_region
*region
)
1019 if (region
->type
== GIMPLE_OMP_PARALLEL
)
1020 remove_exit_barrier (region
);
1024 region
= region
->inner
;
1025 remove_exit_barriers (region
);
1026 while (region
->next
)
1028 region
= region
->next
;
1029 remove_exit_barriers (region
);
1034 /* Optimize omp_get_thread_num () and omp_get_num_threads ()
1035 calls. These can't be declared as const functions, but
1036 within one parallel body they are constant, so they can be
1037 transformed there into __builtin_omp_get_{thread_num,num_threads} ()
1038 which are declared const. Similarly for task body, except
1039 that in untied task omp_get_thread_num () can change at any task
1040 scheduling point. */
1043 optimize_omp_library_calls (gimple
*entry_stmt
)
1046 gimple_stmt_iterator gsi
;
1047 tree thr_num_tree
= builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM
);
1048 tree thr_num_id
= DECL_ASSEMBLER_NAME (thr_num_tree
);
1049 tree num_thr_tree
= builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS
);
1050 tree num_thr_id
= DECL_ASSEMBLER_NAME (num_thr_tree
);
1051 bool untied_task
= (gimple_code (entry_stmt
) == GIMPLE_OMP_TASK
1052 && omp_find_clause (gimple_omp_task_clauses (entry_stmt
),
1053 OMP_CLAUSE_UNTIED
) != NULL
);
1055 FOR_EACH_BB_FN (bb
, cfun
)
1056 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1058 gimple
*call
= gsi_stmt (gsi
);
1061 if (is_gimple_call (call
)
1062 && (decl
= gimple_call_fndecl (call
))
1063 && DECL_EXTERNAL (decl
)
1064 && TREE_PUBLIC (decl
)
1065 && DECL_INITIAL (decl
) == NULL
)
1069 if (DECL_NAME (decl
) == thr_num_id
)
1071 /* In #pragma omp task untied omp_get_thread_num () can change
1072 during the execution of the task region. */
1075 built_in
= builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM
);
1077 else if (DECL_NAME (decl
) == num_thr_id
)
1078 built_in
= builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS
);
1082 if (DECL_ASSEMBLER_NAME (decl
) != DECL_ASSEMBLER_NAME (built_in
)
1083 || gimple_call_num_args (call
) != 0)
1086 if (flag_exceptions
&& !TREE_NOTHROW (decl
))
1089 if (TREE_CODE (TREE_TYPE (decl
)) != FUNCTION_TYPE
1090 || !types_compatible_p (TREE_TYPE (TREE_TYPE (decl
)),
1091 TREE_TYPE (TREE_TYPE (built_in
))))
1094 gimple_call_set_fndecl (call
, built_in
);
1099 /* Callback for expand_omp_build_assign. Return non-NULL if *tp needs to be
1103 expand_omp_regimplify_p (tree
*tp
, int *walk_subtrees
, void *)
1107 /* Any variable with DECL_VALUE_EXPR needs to be regimplified. */
1108 if (VAR_P (t
) && DECL_HAS_VALUE_EXPR_P (t
))
1111 if (TREE_CODE (t
) == ADDR_EXPR
)
1112 recompute_tree_invariant_for_addr_expr (t
);
1114 *walk_subtrees
= !TYPE_P (t
) && !DECL_P (t
);
1118 /* Prepend or append TO = FROM assignment before or after *GSI_P. */
1121 expand_omp_build_assign (gimple_stmt_iterator
*gsi_p
, tree to
, tree from
,
1124 bool simple_p
= DECL_P (to
) && TREE_ADDRESSABLE (to
);
1125 from
= force_gimple_operand_gsi (gsi_p
, from
, simple_p
, NULL_TREE
,
1126 !after
, after
? GSI_CONTINUE_LINKING
1128 gimple
*stmt
= gimple_build_assign (to
, from
);
1130 gsi_insert_after (gsi_p
, stmt
, GSI_CONTINUE_LINKING
);
1132 gsi_insert_before (gsi_p
, stmt
, GSI_SAME_STMT
);
1133 if (walk_tree (&from
, expand_omp_regimplify_p
, NULL
, NULL
)
1134 || walk_tree (&to
, expand_omp_regimplify_p
, NULL
, NULL
))
1136 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
1137 gimple_regimplify_operands (stmt
, &gsi
);
1141 /* Expand the OpenMP parallel or task directive starting at REGION. */
1144 expand_omp_taskreg (struct omp_region
*region
)
1146 basic_block entry_bb
, exit_bb
, new_bb
;
1147 struct function
*child_cfun
;
1148 tree child_fn
, block
, t
;
1149 gimple_stmt_iterator gsi
;
1150 gimple
*entry_stmt
, *stmt
;
1152 vec
<tree
, va_gc
> *ws_args
;
1154 entry_stmt
= last_stmt (region
->entry
);
1155 child_fn
= gimple_omp_taskreg_child_fn (entry_stmt
);
1156 child_cfun
= DECL_STRUCT_FUNCTION (child_fn
);
1158 entry_bb
= region
->entry
;
1159 if (gimple_code (entry_stmt
) == GIMPLE_OMP_TASK
)
1160 exit_bb
= region
->cont
;
1162 exit_bb
= region
->exit
;
1166 && gimple_code (entry_stmt
) == GIMPLE_OMP_PARALLEL
1167 && omp_find_clause (gimple_omp_parallel_clauses (entry_stmt
),
1168 OMP_CLAUSE__CILK_FOR_COUNT_
) != NULL_TREE
);
1171 /* If it is a _Cilk_for statement, it is modelled *like* a parallel for,
1172 and the inner statement contains the name of the built-in function
1174 ws_args
= region
->inner
->ws_args
;
1175 else if (is_combined_parallel (region
))
1176 ws_args
= region
->ws_args
;
1180 if (child_cfun
->cfg
)
1182 /* Due to inlining, it may happen that we have already outlined
1183 the region, in which case all we need to do is make the
1184 sub-graph unreachable and emit the parallel call. */
1185 edge entry_succ_e
, exit_succ_e
;
1187 entry_succ_e
= single_succ_edge (entry_bb
);
1189 gsi
= gsi_last_bb (entry_bb
);
1190 gcc_assert (gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_PARALLEL
1191 || gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_TASK
);
1192 gsi_remove (&gsi
, true);
1197 exit_succ_e
= single_succ_edge (exit_bb
);
1198 make_edge (new_bb
, exit_succ_e
->dest
, EDGE_FALLTHRU
);
1200 remove_edge_and_dominated_blocks (entry_succ_e
);
1204 unsigned srcidx
, dstidx
, num
;
1206 /* If the parallel region needs data sent from the parent
1207 function, then the very first statement (except possible
1208 tree profile counter updates) of the parallel body
1209 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
1210 &.OMP_DATA_O is passed as an argument to the child function,
1211 we need to replace it with the argument as seen by the child
1214 In most cases, this will end up being the identity assignment
1215 .OMP_DATA_I = .OMP_DATA_I. However, if the parallel body had
1216 a function call that has been inlined, the original PARM_DECL
1217 .OMP_DATA_I may have been converted into a different local
1218 variable. In which case, we need to keep the assignment. */
1219 if (gimple_omp_taskreg_data_arg (entry_stmt
))
1221 basic_block entry_succ_bb
1222 = single_succ_p (entry_bb
) ? single_succ (entry_bb
)
1223 : FALLTHRU_EDGE (entry_bb
)->dest
;
1225 gimple
*parcopy_stmt
= NULL
;
1227 for (gsi
= gsi_start_bb (entry_succ_bb
); ; gsi_next (&gsi
))
1231 gcc_assert (!gsi_end_p (gsi
));
1232 stmt
= gsi_stmt (gsi
);
1233 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
1236 if (gimple_num_ops (stmt
) == 2)
1238 tree arg
= gimple_assign_rhs1 (stmt
);
1240 /* We're ignore the subcode because we're
1241 effectively doing a STRIP_NOPS. */
1243 if (TREE_CODE (arg
) == ADDR_EXPR
1244 && TREE_OPERAND (arg
, 0)
1245 == gimple_omp_taskreg_data_arg (entry_stmt
))
1247 parcopy_stmt
= stmt
;
1253 gcc_assert (parcopy_stmt
!= NULL
);
1254 arg
= DECL_ARGUMENTS (child_fn
);
1256 if (!gimple_in_ssa_p (cfun
))
1258 if (gimple_assign_lhs (parcopy_stmt
) == arg
)
1259 gsi_remove (&gsi
, true);
1262 /* ?? Is setting the subcode really necessary ?? */
1263 gimple_omp_set_subcode (parcopy_stmt
, TREE_CODE (arg
));
1264 gimple_assign_set_rhs1 (parcopy_stmt
, arg
);
1269 tree lhs
= gimple_assign_lhs (parcopy_stmt
);
1270 gcc_assert (SSA_NAME_VAR (lhs
) == arg
);
1271 /* We'd like to set the rhs to the default def in the child_fn,
1272 but it's too early to create ssa names in the child_fn.
1273 Instead, we set the rhs to the parm. In
1274 move_sese_region_to_fn, we introduce a default def for the
1275 parm, map the parm to it's default def, and once we encounter
1276 this stmt, replace the parm with the default def. */
1277 gimple_assign_set_rhs1 (parcopy_stmt
, arg
);
1278 update_stmt (parcopy_stmt
);
1282 /* Declare local variables needed in CHILD_CFUN. */
1283 block
= DECL_INITIAL (child_fn
);
1284 BLOCK_VARS (block
) = vec2chain (child_cfun
->local_decls
);
1285 /* The gimplifier could record temporaries in parallel/task block
1286 rather than in containing function's local_decls chain,
1287 which would mean cgraph missed finalizing them. Do it now. */
1288 for (t
= BLOCK_VARS (block
); t
; t
= DECL_CHAIN (t
))
1289 if (VAR_P (t
) && TREE_STATIC (t
) && !DECL_EXTERNAL (t
))
1290 varpool_node::finalize_decl (t
);
1291 DECL_SAVED_TREE (child_fn
) = NULL
;
1292 /* We'll create a CFG for child_fn, so no gimple body is needed. */
1293 gimple_set_body (child_fn
, NULL
);
1294 TREE_USED (block
) = 1;
1296 /* Reset DECL_CONTEXT on function arguments. */
1297 for (t
= DECL_ARGUMENTS (child_fn
); t
; t
= DECL_CHAIN (t
))
1298 DECL_CONTEXT (t
) = child_fn
;
1300 /* Split ENTRY_BB at GIMPLE_OMP_PARALLEL or GIMPLE_OMP_TASK,
1301 so that it can be moved to the child function. */
1302 gsi
= gsi_last_bb (entry_bb
);
1303 stmt
= gsi_stmt (gsi
);
1304 gcc_assert (stmt
&& (gimple_code (stmt
) == GIMPLE_OMP_PARALLEL
1305 || gimple_code (stmt
) == GIMPLE_OMP_TASK
));
1306 e
= split_block (entry_bb
, stmt
);
1307 gsi_remove (&gsi
, true);
1310 if (gimple_code (entry_stmt
) == GIMPLE_OMP_PARALLEL
)
1311 single_succ_edge (entry_bb
)->flags
= EDGE_FALLTHRU
;
1314 e2
= make_edge (e
->src
, BRANCH_EDGE (entry_bb
)->dest
, EDGE_ABNORMAL
);
1315 gcc_assert (e2
->dest
== region
->exit
);
1316 remove_edge (BRANCH_EDGE (entry_bb
));
1317 set_immediate_dominator (CDI_DOMINATORS
, e2
->dest
, e
->src
);
1318 gsi
= gsi_last_bb (region
->exit
);
1319 gcc_assert (!gsi_end_p (gsi
)
1320 && gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_RETURN
);
1321 gsi_remove (&gsi
, true);
1324 /* Convert GIMPLE_OMP_{RETURN,CONTINUE} into a RETURN_EXPR. */
1327 gsi
= gsi_last_bb (exit_bb
);
1328 gcc_assert (!gsi_end_p (gsi
)
1329 && (gimple_code (gsi_stmt (gsi
))
1330 == (e2
? GIMPLE_OMP_CONTINUE
: GIMPLE_OMP_RETURN
)));
1331 stmt
= gimple_build_return (NULL
);
1332 gsi_insert_after (&gsi
, stmt
, GSI_SAME_STMT
);
1333 gsi_remove (&gsi
, true);
1336 /* Move the parallel region into CHILD_CFUN. */
1338 if (gimple_in_ssa_p (cfun
))
1340 init_tree_ssa (child_cfun
);
1341 init_ssa_operands (child_cfun
);
1342 child_cfun
->gimple_df
->in_ssa_p
= true;
1346 block
= gimple_block (entry_stmt
);
1348 /* Make sure to generate early debug for the function before
1349 outlining anything. */
1350 if (! gimple_in_ssa_p (cfun
))
1351 (*debug_hooks
->early_global_decl
) (cfun
->decl
);
1353 new_bb
= move_sese_region_to_fn (child_cfun
, entry_bb
, exit_bb
, block
);
1355 single_succ_edge (new_bb
)->flags
= EDGE_FALLTHRU
;
1358 basic_block dest_bb
= e2
->dest
;
1360 make_edge (new_bb
, dest_bb
, EDGE_FALLTHRU
);
1362 set_immediate_dominator (CDI_DOMINATORS
, dest_bb
, new_bb
);
1364 /* When the OMP expansion process cannot guarantee an up-to-date
1365 loop tree arrange for the child function to fixup loops. */
1366 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP
))
1367 child_cfun
->x_current_loops
->state
|= LOOPS_NEED_FIXUP
;
1369 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
1370 num
= vec_safe_length (child_cfun
->local_decls
);
1371 for (srcidx
= 0, dstidx
= 0; srcidx
< num
; srcidx
++)
1373 t
= (*child_cfun
->local_decls
)[srcidx
];
1374 if (DECL_CONTEXT (t
) == cfun
->decl
)
1376 if (srcidx
!= dstidx
)
1377 (*child_cfun
->local_decls
)[dstidx
] = t
;
1381 vec_safe_truncate (child_cfun
->local_decls
, dstidx
);
1383 /* Inform the callgraph about the new function. */
1384 child_cfun
->curr_properties
= cfun
->curr_properties
;
1385 child_cfun
->has_simduid_loops
|= cfun
->has_simduid_loops
;
1386 child_cfun
->has_force_vectorize_loops
|= cfun
->has_force_vectorize_loops
;
1387 cgraph_node
*node
= cgraph_node::get_create (child_fn
);
1388 node
->parallelized_function
= 1;
1389 cgraph_node::add_new_function (child_fn
, true);
1391 bool need_asm
= DECL_ASSEMBLER_NAME_SET_P (current_function_decl
)
1392 && !DECL_ASSEMBLER_NAME_SET_P (child_fn
);
1394 /* Fix the callgraph edges for child_cfun. Those for cfun will be
1395 fixed in a following pass. */
1396 push_cfun (child_cfun
);
1398 assign_assembler_name_if_needed (child_fn
);
1401 optimize_omp_library_calls (entry_stmt
);
1402 cgraph_edge::rebuild_edges ();
1404 /* Some EH regions might become dead, see PR34608. If
1405 pass_cleanup_cfg isn't the first pass to happen with the
1406 new child, these dead EH edges might cause problems.
1407 Clean them up now. */
1408 if (flag_exceptions
)
1411 bool changed
= false;
1413 FOR_EACH_BB_FN (bb
, cfun
)
1414 changed
|= gimple_purge_dead_eh_edges (bb
);
1416 cleanup_tree_cfg ();
1418 if (gimple_in_ssa_p (cfun
))
1419 update_ssa (TODO_update_ssa
);
1420 if (flag_checking
&& !loops_state_satisfies_p (LOOPS_NEED_FIXUP
))
1421 verify_loop_structure ();
1424 if (dump_file
&& !gimple_in_ssa_p (cfun
))
1426 omp_any_child_fn_dumped
= true;
1427 dump_function_header (dump_file
, child_fn
, dump_flags
);
1428 dump_function_to_file (child_fn
, dump_file
, dump_flags
);
1432 /* Emit a library call to launch the children threads. */
1434 expand_cilk_for_call (new_bb
,
1435 as_a
<gomp_parallel
*> (entry_stmt
), ws_args
);
1436 else if (gimple_code (entry_stmt
) == GIMPLE_OMP_PARALLEL
)
1437 expand_parallel_call (region
, new_bb
,
1438 as_a
<gomp_parallel
*> (entry_stmt
), ws_args
);
1440 expand_task_call (region
, new_bb
, as_a
<gomp_task
*> (entry_stmt
));
1441 if (gimple_in_ssa_p (cfun
))
1442 update_ssa (TODO_update_ssa_only_virtuals
);
1445 /* Information about members of an OpenACC collapsed loop nest. */
1447 struct oacc_collapse
1449 tree base
; /* Base value. */
1450 tree iters
; /* Number of steps. */
1451 tree step
; /* Step size. */
1452 tree tile
; /* Tile increment (if tiled). */
1453 tree outer
; /* Tile iterator var. */
1456 /* Helper for expand_oacc_for. Determine collapsed loop information.
1457 Fill in COUNTS array. Emit any initialization code before GSI.
1458 Return the calculated outer loop bound of BOUND_TYPE. */
1461 expand_oacc_collapse_init (const struct omp_for_data
*fd
,
1462 gimple_stmt_iterator
*gsi
,
1463 oacc_collapse
*counts
, tree bound_type
,
1466 tree tiling
= fd
->tiling
;
1467 tree total
= build_int_cst (bound_type
, 1);
1470 gcc_assert (integer_onep (fd
->loop
.step
));
1471 gcc_assert (integer_zerop (fd
->loop
.n1
));
1473 /* When tiling, the first operand of the tile clause applies to the
1474 innermost loop, and we work outwards from there. Seems
1475 backwards, but whatever. */
1476 for (ix
= fd
->collapse
; ix
--;)
1478 const omp_for_data_loop
*loop
= &fd
->loops
[ix
];
1480 tree iter_type
= TREE_TYPE (loop
->v
);
1481 tree diff_type
= iter_type
;
1482 tree plus_type
= iter_type
;
1484 gcc_assert (loop
->cond_code
== fd
->loop
.cond_code
);
1486 if (POINTER_TYPE_P (iter_type
))
1487 plus_type
= sizetype
;
1488 if (POINTER_TYPE_P (diff_type
) || TYPE_UNSIGNED (diff_type
))
1489 diff_type
= signed_type_for (diff_type
);
1493 tree num
= build_int_cst (integer_type_node
, fd
->collapse
);
1494 tree loop_no
= build_int_cst (integer_type_node
, ix
);
1495 tree tile
= TREE_VALUE (tiling
);
1497 = gimple_build_call_internal (IFN_GOACC_TILE
, 5, num
, loop_no
, tile
,
1498 /* gwv-outer=*/integer_zero_node
,
1499 /* gwv-inner=*/integer_zero_node
);
1501 counts
[ix
].outer
= create_tmp_var (iter_type
, ".outer");
1502 counts
[ix
].tile
= create_tmp_var (diff_type
, ".tile");
1503 gimple_call_set_lhs (call
, counts
[ix
].tile
);
1504 gimple_set_location (call
, loc
);
1505 gsi_insert_before (gsi
, call
, GSI_SAME_STMT
);
1507 tiling
= TREE_CHAIN (tiling
);
1511 counts
[ix
].tile
= NULL
;
1512 counts
[ix
].outer
= loop
->v
;
1517 tree s
= loop
->step
;
1518 bool up
= loop
->cond_code
== LT_EXPR
;
1519 tree dir
= build_int_cst (diff_type
, up
? +1 : -1);
1523 b
= force_gimple_operand_gsi (gsi
, b
, true, NULL_TREE
,
1524 true, GSI_SAME_STMT
);
1525 e
= force_gimple_operand_gsi (gsi
, e
, true, NULL_TREE
,
1526 true, GSI_SAME_STMT
);
1528 /* Convert the step, avoiding possible unsigned->signed overflow. */
1529 negating
= !up
&& TYPE_UNSIGNED (TREE_TYPE (s
));
1531 s
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (s
), s
);
1532 s
= fold_convert (diff_type
, s
);
1534 s
= fold_build1 (NEGATE_EXPR
, diff_type
, s
);
1535 s
= force_gimple_operand_gsi (gsi
, s
, true, NULL_TREE
,
1536 true, GSI_SAME_STMT
);
1538 /* Determine the range, avoiding possible unsigned->signed overflow. */
1539 negating
= !up
&& TYPE_UNSIGNED (iter_type
);
1540 expr
= fold_build2 (MINUS_EXPR
, plus_type
,
1541 fold_convert (plus_type
, negating
? b
: e
),
1542 fold_convert (plus_type
, negating
? e
: b
));
1543 expr
= fold_convert (diff_type
, expr
);
1545 expr
= fold_build1 (NEGATE_EXPR
, diff_type
, expr
);
1546 tree range
= force_gimple_operand_gsi
1547 (gsi
, expr
, true, NULL_TREE
, true, GSI_SAME_STMT
);
1549 /* Determine number of iterations. */
1550 expr
= fold_build2 (MINUS_EXPR
, diff_type
, range
, dir
);
1551 expr
= fold_build2 (PLUS_EXPR
, diff_type
, expr
, s
);
1552 expr
= fold_build2 (TRUNC_DIV_EXPR
, diff_type
, expr
, s
);
1554 tree iters
= force_gimple_operand_gsi (gsi
, expr
, true, NULL_TREE
,
1555 true, GSI_SAME_STMT
);
1557 counts
[ix
].base
= b
;
1558 counts
[ix
].iters
= iters
;
1559 counts
[ix
].step
= s
;
1561 total
= fold_build2 (MULT_EXPR
, bound_type
, total
,
1562 fold_convert (bound_type
, iters
));
1568 /* Emit initializers for collapsed loop members. INNER is true if
1569 this is for the element loop of a TILE. IVAR is the outer
1570 loop iteration variable, from which collapsed loop iteration values
1571 are calculated. COUNTS array has been initialized by
1572 expand_oacc_collapse_inits. */
1575 expand_oacc_collapse_vars (const struct omp_for_data
*fd
, bool inner
,
1576 gimple_stmt_iterator
*gsi
,
1577 const oacc_collapse
*counts
, tree ivar
)
1579 tree ivar_type
= TREE_TYPE (ivar
);
1581 /* The most rapidly changing iteration variable is the innermost
1583 for (int ix
= fd
->collapse
; ix
--;)
1585 const omp_for_data_loop
*loop
= &fd
->loops
[ix
];
1586 const oacc_collapse
*collapse
= &counts
[ix
];
1587 tree v
= inner
? loop
->v
: collapse
->outer
;
1588 tree iter_type
= TREE_TYPE (v
);
1589 tree diff_type
= TREE_TYPE (collapse
->step
);
1590 tree plus_type
= iter_type
;
1591 enum tree_code plus_code
= PLUS_EXPR
;
1594 if (POINTER_TYPE_P (iter_type
))
1596 plus_code
= POINTER_PLUS_EXPR
;
1597 plus_type
= sizetype
;
1603 tree mod
= fold_convert (ivar_type
, collapse
->iters
);
1604 ivar
= fold_build2 (TRUNC_DIV_EXPR
, ivar_type
, expr
, mod
);
1605 expr
= fold_build2 (TRUNC_MOD_EXPR
, ivar_type
, expr
, mod
);
1606 ivar
= force_gimple_operand_gsi (gsi
, ivar
, true, NULL_TREE
,
1607 true, GSI_SAME_STMT
);
1610 expr
= fold_build2 (MULT_EXPR
, diff_type
, fold_convert (diff_type
, expr
),
1612 expr
= fold_build2 (plus_code
, iter_type
,
1613 inner
? collapse
->outer
: collapse
->base
,
1614 fold_convert (plus_type
, expr
));
1615 expr
= force_gimple_operand_gsi (gsi
, expr
, false, NULL_TREE
,
1616 true, GSI_SAME_STMT
);
1617 gassign
*ass
= gimple_build_assign (v
, expr
);
1618 gsi_insert_before (gsi
, ass
, GSI_SAME_STMT
);
1622 /* Helper function for expand_omp_{for_*,simd}. If this is the outermost
1623 of the combined collapse > 1 loop constructs, generate code like:
1624 if (__builtin_expect (N32 cond3 N31, 0)) goto ZERO_ITER_BB;
1629 count3 = (adj + N32 - N31) / STEP3;
1630 if (__builtin_expect (N22 cond2 N21, 0)) goto ZERO_ITER_BB;
1635 count2 = (adj + N22 - N21) / STEP2;
1636 if (__builtin_expect (N12 cond1 N11, 0)) goto ZERO_ITER_BB;
1641 count1 = (adj + N12 - N11) / STEP1;
1642 count = count1 * count2 * count3;
1643 Furthermore, if ZERO_ITER_BB is NULL, create a BB which does:
1645 and set ZERO_ITER_BB to that bb. If this isn't the outermost
1646 of the combined loop constructs, just initialize COUNTS array
1647 from the _looptemp_ clauses. */
1649 /* NOTE: It *could* be better to moosh all of the BBs together,
1650 creating one larger BB with all the computation and the unexpected
1651 jump at the end. I.e.
1653 bool zero3, zero2, zero1, zero;
1656 count3 = (N32 - N31) /[cl] STEP3;
1658 count2 = (N22 - N21) /[cl] STEP2;
1660 count1 = (N12 - N11) /[cl] STEP1;
1661 zero = zero3 || zero2 || zero1;
1662 count = count1 * count2 * count3;
1663 if (__builtin_expect(zero, false)) goto zero_iter_bb;
1665 After all, we expect the zero=false, and thus we expect to have to
1666 evaluate all of the comparison expressions, so short-circuiting
1667 oughtn't be a win. Since the condition isn't protecting a
1668 denominator, we're not concerned about divide-by-zero, so we can
1669 fully evaluate count even if a numerator turned out to be wrong.
1671 It seems like putting this all together would create much better
1672 scheduling opportunities, and less pressure on the chip's branch
1676 expand_omp_for_init_counts (struct omp_for_data
*fd
, gimple_stmt_iterator
*gsi
,
1677 basic_block
&entry_bb
, tree
*counts
,
1678 basic_block
&zero_iter1_bb
, int &first_zero_iter1
,
1679 basic_block
&zero_iter2_bb
, int &first_zero_iter2
,
1680 basic_block
&l2_dom_bb
)
1682 tree t
, type
= TREE_TYPE (fd
->loop
.v
);
1686 /* Collapsed loops need work for expansion into SSA form. */
1687 gcc_assert (!gimple_in_ssa_p (cfun
));
1689 if (gimple_omp_for_combined_into_p (fd
->for_stmt
)
1690 && TREE_CODE (fd
->loop
.n2
) != INTEGER_CST
)
1692 gcc_assert (fd
->ordered
== 0);
1693 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1694 isn't supposed to be handled, as the inner loop doesn't
1696 tree innerc
= omp_find_clause (gimple_omp_for_clauses (fd
->for_stmt
),
1697 OMP_CLAUSE__LOOPTEMP_
);
1698 gcc_assert (innerc
);
1699 for (i
= 0; i
< fd
->collapse
; i
++)
1701 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
1702 OMP_CLAUSE__LOOPTEMP_
);
1703 gcc_assert (innerc
);
1705 counts
[i
] = OMP_CLAUSE_DECL (innerc
);
1707 counts
[0] = NULL_TREE
;
1712 for (i
= fd
->collapse
; i
< fd
->ordered
; i
++)
1714 tree itype
= TREE_TYPE (fd
->loops
[i
].v
);
1715 counts
[i
] = NULL_TREE
;
1716 t
= fold_binary (fd
->loops
[i
].cond_code
, boolean_type_node
,
1717 fold_convert (itype
, fd
->loops
[i
].n1
),
1718 fold_convert (itype
, fd
->loops
[i
].n2
));
1719 if (t
&& integer_zerop (t
))
1721 for (i
= fd
->collapse
; i
< fd
->ordered
; i
++)
1722 counts
[i
] = build_int_cst (type
, 0);
1726 for (i
= 0; i
< (fd
->ordered
? fd
->ordered
: fd
->collapse
); i
++)
1728 tree itype
= TREE_TYPE (fd
->loops
[i
].v
);
1730 if (i
>= fd
->collapse
&& counts
[i
])
1732 if ((SSA_VAR_P (fd
->loop
.n2
) || i
>= fd
->collapse
)
1733 && ((t
= fold_binary (fd
->loops
[i
].cond_code
, boolean_type_node
,
1734 fold_convert (itype
, fd
->loops
[i
].n1
),
1735 fold_convert (itype
, fd
->loops
[i
].n2
)))
1736 == NULL_TREE
|| !integer_onep (t
)))
1740 n1
= fold_convert (itype
, unshare_expr (fd
->loops
[i
].n1
));
1741 n1
= force_gimple_operand_gsi (gsi
, n1
, true, NULL_TREE
,
1742 true, GSI_SAME_STMT
);
1743 n2
= fold_convert (itype
, unshare_expr (fd
->loops
[i
].n2
));
1744 n2
= force_gimple_operand_gsi (gsi
, n2
, true, NULL_TREE
,
1745 true, GSI_SAME_STMT
);
1746 cond_stmt
= gimple_build_cond (fd
->loops
[i
].cond_code
, n1
, n2
,
1747 NULL_TREE
, NULL_TREE
);
1748 gsi_insert_before (gsi
, cond_stmt
, GSI_SAME_STMT
);
1749 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt
),
1750 expand_omp_regimplify_p
, NULL
, NULL
)
1751 || walk_tree (gimple_cond_rhs_ptr (cond_stmt
),
1752 expand_omp_regimplify_p
, NULL
, NULL
))
1754 *gsi
= gsi_for_stmt (cond_stmt
);
1755 gimple_regimplify_operands (cond_stmt
, gsi
);
1757 e
= split_block (entry_bb
, cond_stmt
);
1758 basic_block
&zero_iter_bb
1759 = i
< fd
->collapse
? zero_iter1_bb
: zero_iter2_bb
;
1760 int &first_zero_iter
1761 = i
< fd
->collapse
? first_zero_iter1
: first_zero_iter2
;
1762 if (zero_iter_bb
== NULL
)
1764 gassign
*assign_stmt
;
1765 first_zero_iter
= i
;
1766 zero_iter_bb
= create_empty_bb (entry_bb
);
1767 add_bb_to_loop (zero_iter_bb
, entry_bb
->loop_father
);
1768 *gsi
= gsi_after_labels (zero_iter_bb
);
1769 if (i
< fd
->collapse
)
1770 assign_stmt
= gimple_build_assign (fd
->loop
.n2
,
1771 build_zero_cst (type
));
1774 counts
[i
] = create_tmp_reg (type
, ".count");
1776 = gimple_build_assign (counts
[i
], build_zero_cst (type
));
1778 gsi_insert_before (gsi
, assign_stmt
, GSI_SAME_STMT
);
1779 set_immediate_dominator (CDI_DOMINATORS
, zero_iter_bb
,
1782 ne
= make_edge (entry_bb
, zero_iter_bb
, EDGE_FALSE_VALUE
);
1783 ne
->probability
= profile_probability::very_unlikely ();
1784 e
->flags
= EDGE_TRUE_VALUE
;
1785 e
->probability
= ne
->probability
.invert ();
1786 if (l2_dom_bb
== NULL
)
1787 l2_dom_bb
= entry_bb
;
1789 *gsi
= gsi_last_bb (entry_bb
);
1792 if (POINTER_TYPE_P (itype
))
1793 itype
= signed_type_for (itype
);
1794 t
= build_int_cst (itype
, (fd
->loops
[i
].cond_code
== LT_EXPR
1796 t
= fold_build2 (PLUS_EXPR
, itype
,
1797 fold_convert (itype
, fd
->loops
[i
].step
), t
);
1798 t
= fold_build2 (PLUS_EXPR
, itype
, t
,
1799 fold_convert (itype
, fd
->loops
[i
].n2
));
1800 t
= fold_build2 (MINUS_EXPR
, itype
, t
,
1801 fold_convert (itype
, fd
->loops
[i
].n1
));
1802 /* ?? We could probably use CEIL_DIV_EXPR instead of
1803 TRUNC_DIV_EXPR and adjusting by hand. Unless we can't
1804 generate the same code in the end because generically we
1805 don't know that the values involved must be negative for
1807 if (TYPE_UNSIGNED (itype
) && fd
->loops
[i
].cond_code
== GT_EXPR
)
1808 t
= fold_build2 (TRUNC_DIV_EXPR
, itype
,
1809 fold_build1 (NEGATE_EXPR
, itype
, t
),
1810 fold_build1 (NEGATE_EXPR
, itype
,
1811 fold_convert (itype
,
1812 fd
->loops
[i
].step
)));
1814 t
= fold_build2 (TRUNC_DIV_EXPR
, itype
, t
,
1815 fold_convert (itype
, fd
->loops
[i
].step
));
1816 t
= fold_convert (type
, t
);
1817 if (TREE_CODE (t
) == INTEGER_CST
)
1821 if (i
< fd
->collapse
|| i
!= first_zero_iter2
)
1822 counts
[i
] = create_tmp_reg (type
, ".count");
1823 expand_omp_build_assign (gsi
, counts
[i
], t
);
1825 if (SSA_VAR_P (fd
->loop
.n2
) && i
< fd
->collapse
)
1830 t
= fold_build2 (MULT_EXPR
, type
, fd
->loop
.n2
, counts
[i
]);
1831 expand_omp_build_assign (gsi
, fd
->loop
.n2
, t
);
1836 /* Helper function for expand_omp_{for_*,simd}. Generate code like:
1838 V3 = N31 + (T % count3) * STEP3;
1840 V2 = N21 + (T % count2) * STEP2;
1842 V1 = N11 + T * STEP1;
1843 if this loop doesn't have an inner loop construct combined with it.
1844 If it does have an inner loop construct combined with it and the
1845 iteration count isn't known constant, store values from counts array
1846 into its _looptemp_ temporaries instead. */
1849 expand_omp_for_init_vars (struct omp_for_data
*fd
, gimple_stmt_iterator
*gsi
,
1850 tree
*counts
, gimple
*inner_stmt
, tree startvar
)
1853 if (gimple_omp_for_combined_p (fd
->for_stmt
))
1855 /* If fd->loop.n2 is constant, then no propagation of the counts
1856 is needed, they are constant. */
1857 if (TREE_CODE (fd
->loop
.n2
) == INTEGER_CST
)
1860 tree clauses
= gimple_code (inner_stmt
) != GIMPLE_OMP_FOR
1861 ? gimple_omp_taskreg_clauses (inner_stmt
)
1862 : gimple_omp_for_clauses (inner_stmt
);
1863 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1864 isn't supposed to be handled, as the inner loop doesn't
1866 tree innerc
= omp_find_clause (clauses
, OMP_CLAUSE__LOOPTEMP_
);
1867 gcc_assert (innerc
);
1868 for (i
= 0; i
< fd
->collapse
; i
++)
1870 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
1871 OMP_CLAUSE__LOOPTEMP_
);
1872 gcc_assert (innerc
);
1875 tree tem
= OMP_CLAUSE_DECL (innerc
);
1876 tree t
= fold_convert (TREE_TYPE (tem
), counts
[i
]);
1877 t
= force_gimple_operand_gsi (gsi
, t
, false, NULL_TREE
,
1878 false, GSI_CONTINUE_LINKING
);
1879 gassign
*stmt
= gimple_build_assign (tem
, t
);
1880 gsi_insert_after (gsi
, stmt
, GSI_CONTINUE_LINKING
);
1886 tree type
= TREE_TYPE (fd
->loop
.v
);
1887 tree tem
= create_tmp_reg (type
, ".tem");
1888 gassign
*stmt
= gimple_build_assign (tem
, startvar
);
1889 gsi_insert_after (gsi
, stmt
, GSI_CONTINUE_LINKING
);
1891 for (i
= fd
->collapse
- 1; i
>= 0; i
--)
1893 tree vtype
= TREE_TYPE (fd
->loops
[i
].v
), itype
, t
;
1895 if (POINTER_TYPE_P (vtype
))
1896 itype
= signed_type_for (vtype
);
1898 t
= fold_build2 (TRUNC_MOD_EXPR
, type
, tem
, counts
[i
]);
1901 t
= fold_convert (itype
, t
);
1902 t
= fold_build2 (MULT_EXPR
, itype
, t
,
1903 fold_convert (itype
, fd
->loops
[i
].step
));
1904 if (POINTER_TYPE_P (vtype
))
1905 t
= fold_build_pointer_plus (fd
->loops
[i
].n1
, t
);
1907 t
= fold_build2 (PLUS_EXPR
, itype
, fd
->loops
[i
].n1
, t
);
1908 t
= force_gimple_operand_gsi (gsi
, t
,
1909 DECL_P (fd
->loops
[i
].v
)
1910 && TREE_ADDRESSABLE (fd
->loops
[i
].v
),
1912 GSI_CONTINUE_LINKING
);
1913 stmt
= gimple_build_assign (fd
->loops
[i
].v
, t
);
1914 gsi_insert_after (gsi
, stmt
, GSI_CONTINUE_LINKING
);
1917 t
= fold_build2 (TRUNC_DIV_EXPR
, type
, tem
, counts
[i
]);
1918 t
= force_gimple_operand_gsi (gsi
, t
, false, NULL_TREE
,
1919 false, GSI_CONTINUE_LINKING
);
1920 stmt
= gimple_build_assign (tem
, t
);
1921 gsi_insert_after (gsi
, stmt
, GSI_CONTINUE_LINKING
);
1926 /* Helper function for expand_omp_for_*. Generate code like:
1929 if (V3 cond3 N32) goto BODY_BB; else goto L11;
1933 if (V2 cond2 N22) goto BODY_BB; else goto L12;
1940 extract_omp_for_update_vars (struct omp_for_data
*fd
, basic_block cont_bb
,
1941 basic_block body_bb
)
1943 basic_block last_bb
, bb
, collapse_bb
= NULL
;
1945 gimple_stmt_iterator gsi
;
1951 for (i
= fd
->collapse
- 1; i
>= 0; i
--)
1953 tree vtype
= TREE_TYPE (fd
->loops
[i
].v
);
1955 bb
= create_empty_bb (last_bb
);
1956 add_bb_to_loop (bb
, last_bb
->loop_father
);
1957 gsi
= gsi_start_bb (bb
);
1959 if (i
< fd
->collapse
- 1)
1961 e
= make_edge (last_bb
, bb
, EDGE_FALSE_VALUE
);
1962 e
->probability
= profile_probability::guessed_always ().apply_scale (1, 8);
1964 t
= fd
->loops
[i
+ 1].n1
;
1965 t
= force_gimple_operand_gsi (&gsi
, t
,
1966 DECL_P (fd
->loops
[i
+ 1].v
)
1967 && TREE_ADDRESSABLE (fd
->loops
[i
1970 GSI_CONTINUE_LINKING
);
1971 stmt
= gimple_build_assign (fd
->loops
[i
+ 1].v
, t
);
1972 gsi_insert_after (&gsi
, stmt
, GSI_CONTINUE_LINKING
);
1977 set_immediate_dominator (CDI_DOMINATORS
, bb
, last_bb
);
1979 if (POINTER_TYPE_P (vtype
))
1980 t
= fold_build_pointer_plus (fd
->loops
[i
].v
, fd
->loops
[i
].step
);
1982 t
= fold_build2 (PLUS_EXPR
, vtype
, fd
->loops
[i
].v
, fd
->loops
[i
].step
);
1983 t
= force_gimple_operand_gsi (&gsi
, t
,
1984 DECL_P (fd
->loops
[i
].v
)
1985 && TREE_ADDRESSABLE (fd
->loops
[i
].v
),
1986 NULL_TREE
, false, GSI_CONTINUE_LINKING
);
1987 stmt
= gimple_build_assign (fd
->loops
[i
].v
, t
);
1988 gsi_insert_after (&gsi
, stmt
, GSI_CONTINUE_LINKING
);
1992 t
= fd
->loops
[i
].n2
;
1993 t
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
1994 false, GSI_CONTINUE_LINKING
);
1995 tree v
= fd
->loops
[i
].v
;
1996 if (DECL_P (v
) && TREE_ADDRESSABLE (v
))
1997 v
= force_gimple_operand_gsi (&gsi
, v
, true, NULL_TREE
,
1998 false, GSI_CONTINUE_LINKING
);
1999 t
= fold_build2 (fd
->loops
[i
].cond_code
, boolean_type_node
, v
, t
);
2000 stmt
= gimple_build_cond_empty (t
);
2001 gsi_insert_after (&gsi
, stmt
, GSI_CONTINUE_LINKING
);
2002 e
= make_edge (bb
, body_bb
, EDGE_TRUE_VALUE
);
2003 e
->probability
= profile_probability::guessed_always ().apply_scale (7, 8);
2006 make_edge (bb
, body_bb
, EDGE_FALLTHRU
);
2013 /* Expand #pragma omp ordered depend(source). */
2016 expand_omp_ordered_source (gimple_stmt_iterator
*gsi
, struct omp_for_data
*fd
,
2017 tree
*counts
, location_t loc
)
2019 enum built_in_function source_ix
2020 = fd
->iter_type
== long_integer_type_node
2021 ? BUILT_IN_GOMP_DOACROSS_POST
: BUILT_IN_GOMP_DOACROSS_ULL_POST
;
2023 = gimple_build_call (builtin_decl_explicit (source_ix
), 1,
2024 build_fold_addr_expr (counts
[fd
->ordered
]));
2025 gimple_set_location (g
, loc
);
2026 gsi_insert_before (gsi
, g
, GSI_SAME_STMT
);
2029 /* Expand a single depend from #pragma omp ordered depend(sink:...). */
2032 expand_omp_ordered_sink (gimple_stmt_iterator
*gsi
, struct omp_for_data
*fd
,
2033 tree
*counts
, tree c
, location_t loc
)
2035 auto_vec
<tree
, 10> args
;
2036 enum built_in_function sink_ix
2037 = fd
->iter_type
== long_integer_type_node
2038 ? BUILT_IN_GOMP_DOACROSS_WAIT
: BUILT_IN_GOMP_DOACROSS_ULL_WAIT
;
2039 tree t
, off
, coff
= NULL_TREE
, deps
= OMP_CLAUSE_DECL (c
), cond
= NULL_TREE
;
2041 gimple_stmt_iterator gsi2
= *gsi
;
2042 bool warned_step
= false;
2044 for (i
= 0; i
< fd
->ordered
; i
++)
2046 tree step
= NULL_TREE
;
2047 off
= TREE_PURPOSE (deps
);
2048 if (TREE_CODE (off
) == TRUNC_DIV_EXPR
)
2050 step
= TREE_OPERAND (off
, 1);
2051 off
= TREE_OPERAND (off
, 0);
2053 if (!integer_zerop (off
))
2055 gcc_assert (fd
->loops
[i
].cond_code
== LT_EXPR
2056 || fd
->loops
[i
].cond_code
== GT_EXPR
);
2057 bool forward
= fd
->loops
[i
].cond_code
== LT_EXPR
;
2060 /* Non-simple Fortran DO loops. If step is variable,
2061 we don't know at compile even the direction, so can't
2063 if (TREE_CODE (step
) != INTEGER_CST
)
2065 forward
= tree_int_cst_sgn (step
) != -1;
2067 if (forward
^ OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps
))
2068 warning_at (loc
, 0, "%<depend(sink)%> clause waiting for "
2069 "lexically later iteration");
2072 deps
= TREE_CHAIN (deps
);
2074 /* If all offsets corresponding to the collapsed loops are zero,
2075 this depend clause can be ignored. FIXME: but there is still a
2076 flush needed. We need to emit one __sync_synchronize () for it
2077 though (perhaps conditionally)? Solve this together with the
2078 conservative dependence folding optimization.
2079 if (i >= fd->collapse)
2082 deps
= OMP_CLAUSE_DECL (c
);
2084 edge e1
= split_block (gsi_bb (gsi2
), gsi_stmt (gsi2
));
2085 edge e2
= split_block_after_labels (e1
->dest
);
2087 gsi2
= gsi_after_labels (e1
->dest
);
2088 *gsi
= gsi_last_bb (e1
->src
);
2089 for (i
= 0; i
< fd
->ordered
; i
++)
2091 tree itype
= TREE_TYPE (fd
->loops
[i
].v
);
2092 tree step
= NULL_TREE
;
2093 tree orig_off
= NULL_TREE
;
2094 if (POINTER_TYPE_P (itype
))
2097 deps
= TREE_CHAIN (deps
);
2098 off
= TREE_PURPOSE (deps
);
2099 if (TREE_CODE (off
) == TRUNC_DIV_EXPR
)
2101 step
= TREE_OPERAND (off
, 1);
2102 off
= TREE_OPERAND (off
, 0);
2103 gcc_assert (fd
->loops
[i
].cond_code
== LT_EXPR
2104 && integer_onep (fd
->loops
[i
].step
)
2105 && !POINTER_TYPE_P (TREE_TYPE (fd
->loops
[i
].v
)));
2107 tree s
= fold_convert_loc (loc
, itype
, step
? step
: fd
->loops
[i
].step
);
2110 off
= fold_convert_loc (loc
, itype
, off
);
2112 off
= fold_build2_loc (loc
, TRUNC_DIV_EXPR
, itype
, off
, s
);
2115 if (integer_zerop (off
))
2116 t
= boolean_true_node
;
2120 tree co
= fold_convert_loc (loc
, itype
, off
);
2121 if (POINTER_TYPE_P (TREE_TYPE (fd
->loops
[i
].v
)))
2123 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps
))
2124 co
= fold_build1_loc (loc
, NEGATE_EXPR
, itype
, co
);
2125 a
= fold_build2_loc (loc
, POINTER_PLUS_EXPR
,
2126 TREE_TYPE (fd
->loops
[i
].v
), fd
->loops
[i
].v
,
2129 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps
))
2130 a
= fold_build2_loc (loc
, MINUS_EXPR
, TREE_TYPE (fd
->loops
[i
].v
),
2131 fd
->loops
[i
].v
, co
);
2133 a
= fold_build2_loc (loc
, PLUS_EXPR
, TREE_TYPE (fd
->loops
[i
].v
),
2134 fd
->loops
[i
].v
, co
);
2138 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps
))
2139 t1
= fold_build2_loc (loc
, GE_EXPR
, boolean_type_node
, a
,
2142 t1
= fold_build2_loc (loc
, LT_EXPR
, boolean_type_node
, a
,
2144 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps
))
2145 t2
= fold_build2_loc (loc
, LT_EXPR
, boolean_type_node
, a
,
2148 t2
= fold_build2_loc (loc
, GE_EXPR
, boolean_type_node
, a
,
2150 t
= fold_build2_loc (loc
, LT_EXPR
, boolean_type_node
,
2151 step
, build_int_cst (TREE_TYPE (step
), 0));
2152 if (TREE_CODE (step
) != INTEGER_CST
)
2154 t1
= unshare_expr (t1
);
2155 t1
= force_gimple_operand_gsi (gsi
, t1
, true, NULL_TREE
,
2156 false, GSI_CONTINUE_LINKING
);
2157 t2
= unshare_expr (t2
);
2158 t2
= force_gimple_operand_gsi (gsi
, t2
, true, NULL_TREE
,
2159 false, GSI_CONTINUE_LINKING
);
2161 t
= fold_build3_loc (loc
, COND_EXPR
, boolean_type_node
,
2164 else if (fd
->loops
[i
].cond_code
== LT_EXPR
)
2166 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps
))
2167 t
= fold_build2_loc (loc
, GE_EXPR
, boolean_type_node
, a
,
2170 t
= fold_build2_loc (loc
, LT_EXPR
, boolean_type_node
, a
,
2173 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps
))
2174 t
= fold_build2_loc (loc
, GT_EXPR
, boolean_type_node
, a
,
2177 t
= fold_build2_loc (loc
, LE_EXPR
, boolean_type_node
, a
,
2181 cond
= fold_build2_loc (loc
, BIT_AND_EXPR
, boolean_type_node
, cond
, t
);
2185 off
= fold_convert_loc (loc
, itype
, off
);
2188 || (fd
->loops
[i
].cond_code
== LT_EXPR
2189 ? !integer_onep (fd
->loops
[i
].step
)
2190 : !integer_minus_onep (fd
->loops
[i
].step
)))
2192 if (step
== NULL_TREE
2193 && TYPE_UNSIGNED (itype
)
2194 && fd
->loops
[i
].cond_code
== GT_EXPR
)
2195 t
= fold_build2_loc (loc
, TRUNC_MOD_EXPR
, itype
, off
,
2196 fold_build1_loc (loc
, NEGATE_EXPR
, itype
,
2199 t
= fold_build2_loc (loc
, TRUNC_MOD_EXPR
, itype
,
2200 orig_off
? orig_off
: off
, s
);
2201 t
= fold_build2_loc (loc
, EQ_EXPR
, boolean_type_node
, t
,
2202 build_int_cst (itype
, 0));
2203 if (integer_zerop (t
) && !warned_step
)
2205 warning_at (loc
, 0, "%<depend(sink)%> refers to iteration never "
2206 "in the iteration space");
2209 cond
= fold_build2_loc (loc
, BIT_AND_EXPR
, boolean_type_node
,
2213 if (i
<= fd
->collapse
- 1 && fd
->collapse
> 1)
2219 t
= fold_build2_loc (loc
, MINUS_EXPR
, TREE_TYPE (fd
->loops
[i
].v
),
2220 fd
->loops
[i
].v
, fd
->loops
[i
].n1
);
2221 t
= fold_convert_loc (loc
, fd
->iter_type
, t
);
2224 /* We have divided off by step already earlier. */;
2225 else if (TYPE_UNSIGNED (itype
) && fd
->loops
[i
].cond_code
== GT_EXPR
)
2226 off
= fold_build2_loc (loc
, TRUNC_DIV_EXPR
, itype
, off
,
2227 fold_build1_loc (loc
, NEGATE_EXPR
, itype
,
2230 off
= fold_build2_loc (loc
, TRUNC_DIV_EXPR
, itype
, off
, s
);
2231 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps
))
2232 off
= fold_build1_loc (loc
, NEGATE_EXPR
, itype
, off
);
2233 off
= fold_convert_loc (loc
, fd
->iter_type
, off
);
2234 if (i
<= fd
->collapse
- 1 && fd
->collapse
> 1)
2237 off
= fold_build2_loc (loc
, PLUS_EXPR
, fd
->iter_type
, coff
,
2239 if (i
< fd
->collapse
- 1)
2241 coff
= fold_build2_loc (loc
, MULT_EXPR
, fd
->iter_type
, off
,
2246 off
= unshare_expr (off
);
2247 t
= fold_build2_loc (loc
, PLUS_EXPR
, fd
->iter_type
, t
, off
);
2248 t
= force_gimple_operand_gsi (&gsi2
, t
, true, NULL_TREE
,
2249 true, GSI_SAME_STMT
);
2252 gimple
*g
= gimple_build_call_vec (builtin_decl_explicit (sink_ix
), args
);
2253 gimple_set_location (g
, loc
);
2254 gsi_insert_before (&gsi2
, g
, GSI_SAME_STMT
);
2256 cond
= unshare_expr (cond
);
2257 cond
= force_gimple_operand_gsi (gsi
, cond
, true, NULL_TREE
, false,
2258 GSI_CONTINUE_LINKING
);
2259 gsi_insert_after (gsi
, gimple_build_cond_empty (cond
), GSI_NEW_STMT
);
2260 edge e3
= make_edge (e1
->src
, e2
->dest
, EDGE_FALSE_VALUE
);
2261 e3
->probability
= profile_probability::guessed_always ().apply_scale (1, 8);
2262 e1
->probability
= e3
->probability
.invert ();
2263 e1
->flags
= EDGE_TRUE_VALUE
;
2264 set_immediate_dominator (CDI_DOMINATORS
, e2
->dest
, e1
->src
);
2266 *gsi
= gsi_after_labels (e2
->dest
);
2269 /* Expand all #pragma omp ordered depend(source) and
2270 #pragma omp ordered depend(sink:...) constructs in the current
2271 #pragma omp for ordered(n) region. */
2274 expand_omp_ordered_source_sink (struct omp_region
*region
,
2275 struct omp_for_data
*fd
, tree
*counts
,
2276 basic_block cont_bb
)
2278 struct omp_region
*inner
;
2280 for (i
= fd
->collapse
- 1; i
< fd
->ordered
; i
++)
2281 if (i
== fd
->collapse
- 1 && fd
->collapse
> 1)
2282 counts
[i
] = NULL_TREE
;
2283 else if (i
>= fd
->collapse
&& !cont_bb
)
2284 counts
[i
] = build_zero_cst (fd
->iter_type
);
2285 else if (!POINTER_TYPE_P (TREE_TYPE (fd
->loops
[i
].v
))
2286 && integer_onep (fd
->loops
[i
].step
))
2287 counts
[i
] = NULL_TREE
;
2289 counts
[i
] = create_tmp_var (fd
->iter_type
, ".orditer");
2291 = build_array_type_nelts (fd
->iter_type
, fd
->ordered
- fd
->collapse
+ 1);
2292 counts
[fd
->ordered
] = create_tmp_var (atype
, ".orditera");
2293 TREE_ADDRESSABLE (counts
[fd
->ordered
]) = 1;
2295 for (inner
= region
->inner
; inner
; inner
= inner
->next
)
2296 if (inner
->type
== GIMPLE_OMP_ORDERED
)
2298 gomp_ordered
*ord_stmt
= inner
->ord_stmt
;
2299 gimple_stmt_iterator gsi
= gsi_for_stmt (ord_stmt
);
2300 location_t loc
= gimple_location (ord_stmt
);
2302 for (c
= gimple_omp_ordered_clauses (ord_stmt
);
2303 c
; c
= OMP_CLAUSE_CHAIN (c
))
2304 if (OMP_CLAUSE_DEPEND_KIND (c
) == OMP_CLAUSE_DEPEND_SOURCE
)
2307 expand_omp_ordered_source (&gsi
, fd
, counts
, loc
);
2308 for (c
= gimple_omp_ordered_clauses (ord_stmt
);
2309 c
; c
= OMP_CLAUSE_CHAIN (c
))
2310 if (OMP_CLAUSE_DEPEND_KIND (c
) == OMP_CLAUSE_DEPEND_SINK
)
2311 expand_omp_ordered_sink (&gsi
, fd
, counts
, c
, loc
);
2312 gsi_remove (&gsi
, true);
2316 /* Wrap the body into fd->ordered - fd->collapse loops that aren't
2320 expand_omp_for_ordered_loops (struct omp_for_data
*fd
, tree
*counts
,
2321 basic_block cont_bb
, basic_block body_bb
,
2322 bool ordered_lastprivate
)
2324 if (fd
->ordered
== fd
->collapse
)
2329 gimple_stmt_iterator gsi
= gsi_after_labels (body_bb
);
2330 for (int i
= fd
->collapse
; i
< fd
->ordered
; i
++)
2332 tree type
= TREE_TYPE (fd
->loops
[i
].v
);
2333 tree n1
= fold_convert (type
, fd
->loops
[i
].n1
);
2334 expand_omp_build_assign (&gsi
, fd
->loops
[i
].v
, n1
);
2335 tree aref
= build4 (ARRAY_REF
, fd
->iter_type
, counts
[fd
->ordered
],
2336 size_int (i
- fd
->collapse
+ 1),
2337 NULL_TREE
, NULL_TREE
);
2338 expand_omp_build_assign (&gsi
, aref
, build_zero_cst (fd
->iter_type
));
2343 for (int i
= fd
->ordered
- 1; i
>= fd
->collapse
; i
--)
2345 tree t
, type
= TREE_TYPE (fd
->loops
[i
].v
);
2346 gimple_stmt_iterator gsi
= gsi_after_labels (body_bb
);
2347 expand_omp_build_assign (&gsi
, fd
->loops
[i
].v
,
2348 fold_convert (type
, fd
->loops
[i
].n1
));
2350 expand_omp_build_assign (&gsi
, counts
[i
],
2351 build_zero_cst (fd
->iter_type
));
2352 tree aref
= build4 (ARRAY_REF
, fd
->iter_type
, counts
[fd
->ordered
],
2353 size_int (i
- fd
->collapse
+ 1),
2354 NULL_TREE
, NULL_TREE
);
2355 expand_omp_build_assign (&gsi
, aref
, build_zero_cst (fd
->iter_type
));
2356 if (!gsi_end_p (gsi
))
2359 gsi
= gsi_last_bb (body_bb
);
2360 edge e1
= split_block (body_bb
, gsi_stmt (gsi
));
2361 basic_block new_body
= e1
->dest
;
2362 if (body_bb
== cont_bb
)
2365 basic_block new_header
;
2366 if (EDGE_COUNT (cont_bb
->preds
) > 0)
2368 gsi
= gsi_last_bb (cont_bb
);
2369 if (POINTER_TYPE_P (type
))
2370 t
= fold_build_pointer_plus (fd
->loops
[i
].v
,
2371 fold_convert (sizetype
,
2372 fd
->loops
[i
].step
));
2374 t
= fold_build2 (PLUS_EXPR
, type
, fd
->loops
[i
].v
,
2375 fold_convert (type
, fd
->loops
[i
].step
));
2376 expand_omp_build_assign (&gsi
, fd
->loops
[i
].v
, t
);
2379 t
= fold_build2 (PLUS_EXPR
, fd
->iter_type
, counts
[i
],
2380 build_int_cst (fd
->iter_type
, 1));
2381 expand_omp_build_assign (&gsi
, counts
[i
], t
);
2386 t
= fold_build2 (MINUS_EXPR
, TREE_TYPE (fd
->loops
[i
].v
),
2387 fd
->loops
[i
].v
, fd
->loops
[i
].n1
);
2388 t
= fold_convert (fd
->iter_type
, t
);
2389 t
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
2390 true, GSI_SAME_STMT
);
2392 aref
= build4 (ARRAY_REF
, fd
->iter_type
, counts
[fd
->ordered
],
2393 size_int (i
- fd
->collapse
+ 1),
2394 NULL_TREE
, NULL_TREE
);
2395 expand_omp_build_assign (&gsi
, aref
, t
);
2397 e2
= split_block (cont_bb
, gsi_stmt (gsi
));
2398 new_header
= e2
->dest
;
2401 new_header
= cont_bb
;
2402 gsi
= gsi_after_labels (new_header
);
2403 tree v
= force_gimple_operand_gsi (&gsi
, fd
->loops
[i
].v
, true, NULL_TREE
,
2404 true, GSI_SAME_STMT
);
2406 = force_gimple_operand_gsi (&gsi
, fold_convert (type
, fd
->loops
[i
].n2
),
2407 true, NULL_TREE
, true, GSI_SAME_STMT
);
2408 t
= build2 (fd
->loops
[i
].cond_code
, boolean_type_node
, v
, n2
);
2409 gsi_insert_before (&gsi
, gimple_build_cond_empty (t
), GSI_NEW_STMT
);
2410 edge e3
= split_block (new_header
, gsi_stmt (gsi
));
2413 make_edge (body_bb
, new_header
, EDGE_FALLTHRU
);
2414 e3
->flags
= EDGE_FALSE_VALUE
;
2415 e3
->probability
= profile_probability::guessed_always ().apply_scale (1, 8);
2416 e1
= make_edge (new_header
, new_body
, EDGE_TRUE_VALUE
);
2417 e1
->probability
= e3
->probability
.invert ();
2419 set_immediate_dominator (CDI_DOMINATORS
, new_header
, body_bb
);
2420 set_immediate_dominator (CDI_DOMINATORS
, new_body
, new_header
);
2424 struct loop
*loop
= alloc_loop ();
2425 loop
->header
= new_header
;
2426 loop
->latch
= e2
->src
;
2427 add_loop (loop
, body_bb
->loop_father
);
2431 /* If there are any lastprivate clauses and it is possible some loops
2432 might have zero iterations, ensure all the decls are initialized,
2433 otherwise we could crash evaluating C++ class iterators with lastprivate
2435 bool need_inits
= false;
2436 for (int i
= fd
->collapse
; ordered_lastprivate
&& i
< fd
->ordered
; i
++)
2439 tree type
= TREE_TYPE (fd
->loops
[i
].v
);
2440 gimple_stmt_iterator gsi
= gsi_after_labels (body_bb
);
2441 expand_omp_build_assign (&gsi
, fd
->loops
[i
].v
,
2442 fold_convert (type
, fd
->loops
[i
].n1
));
2446 tree type
= TREE_TYPE (fd
->loops
[i
].v
);
2447 tree this_cond
= fold_build2 (fd
->loops
[i
].cond_code
,
2449 fold_convert (type
, fd
->loops
[i
].n1
),
2450 fold_convert (type
, fd
->loops
[i
].n2
));
2451 if (!integer_onep (this_cond
))
2458 /* A subroutine of expand_omp_for. Generate code for a parallel
2459 loop with any schedule. Given parameters:
2461 for (V = N1; V cond N2; V += STEP) BODY;
2463 where COND is "<" or ">", we generate pseudocode
2465 more = GOMP_loop_foo_start (N1, N2, STEP, CHUNK, &istart0, &iend0);
2466 if (more) goto L0; else goto L3;
2473 if (V cond iend) goto L1; else goto L2;
2475 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
2478 If this is a combined omp parallel loop, instead of the call to
2479 GOMP_loop_foo_start, we call GOMP_loop_foo_next.
2480 If this is gimple_omp_for_combined_p loop, then instead of assigning
2481 V and iend in L0 we assign the first two _looptemp_ clause decls of the
2482 inner GIMPLE_OMP_FOR and V += STEP; and
2483 if (V cond iend) goto L1; else goto L2; are removed.
2485 For collapsed loops, given parameters:
2487 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
2488 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
2489 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
2492 we generate pseudocode
2494 if (__builtin_expect (N32 cond3 N31, 0)) goto Z0;
2499 count3 = (adj + N32 - N31) / STEP3;
2500 if (__builtin_expect (N22 cond2 N21, 0)) goto Z0;
2505 count2 = (adj + N22 - N21) / STEP2;
2506 if (__builtin_expect (N12 cond1 N11, 0)) goto Z0;
2511 count1 = (adj + N12 - N11) / STEP1;
2512 count = count1 * count2 * count3;
2517 more = GOMP_loop_foo_start (0, count, 1, CHUNK, &istart0, &iend0);
2518 if (more) goto L0; else goto L3;
2522 V3 = N31 + (T % count3) * STEP3;
2524 V2 = N21 + (T % count2) * STEP2;
2526 V1 = N11 + T * STEP1;
2531 if (V < iend) goto L10; else goto L2;
2534 if (V3 cond3 N32) goto L1; else goto L11;
2538 if (V2 cond2 N22) goto L1; else goto L12;
2544 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
2550 expand_omp_for_generic (struct omp_region
*region
,
2551 struct omp_for_data
*fd
,
2552 enum built_in_function start_fn
,
2553 enum built_in_function next_fn
,
2556 tree type
, istart0
, iend0
, iend
;
2557 tree t
, vmain
, vback
, bias
= NULL_TREE
;
2558 basic_block entry_bb
, cont_bb
, exit_bb
, l0_bb
, l1_bb
, collapse_bb
;
2559 basic_block l2_bb
= NULL
, l3_bb
= NULL
;
2560 gimple_stmt_iterator gsi
;
2561 gassign
*assign_stmt
;
2562 bool in_combined_parallel
= is_combined_parallel (region
);
2563 bool broken_loop
= region
->cont
== NULL
;
2565 tree
*counts
= NULL
;
2567 bool ordered_lastprivate
= false;
2569 gcc_assert (!broken_loop
|| !in_combined_parallel
);
2570 gcc_assert (fd
->iter_type
== long_integer_type_node
2571 || !in_combined_parallel
);
2573 entry_bb
= region
->entry
;
2574 cont_bb
= region
->cont
;
2576 gcc_assert (EDGE_COUNT (entry_bb
->succs
) == 2);
2577 gcc_assert (broken_loop
2578 || BRANCH_EDGE (entry_bb
)->dest
== FALLTHRU_EDGE (cont_bb
)->dest
);
2579 l0_bb
= split_edge (FALLTHRU_EDGE (entry_bb
));
2580 l1_bb
= single_succ (l0_bb
);
2583 l2_bb
= create_empty_bb (cont_bb
);
2584 gcc_assert (BRANCH_EDGE (cont_bb
)->dest
== l1_bb
2585 || (single_succ_edge (BRANCH_EDGE (cont_bb
)->dest
)->dest
2587 gcc_assert (EDGE_COUNT (cont_bb
->succs
) == 2);
2591 l3_bb
= BRANCH_EDGE (entry_bb
)->dest
;
2592 exit_bb
= region
->exit
;
2594 gsi
= gsi_last_bb (entry_bb
);
2596 gcc_assert (gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_FOR
);
2598 && omp_find_clause (gimple_omp_for_clauses (gsi_stmt (gsi
)),
2599 OMP_CLAUSE_LASTPRIVATE
))
2600 ordered_lastprivate
= false;
2601 if (fd
->collapse
> 1 || fd
->ordered
)
2603 int first_zero_iter1
= -1, first_zero_iter2
= -1;
2604 basic_block zero_iter1_bb
= NULL
, zero_iter2_bb
= NULL
, l2_dom_bb
= NULL
;
2606 counts
= XALLOCAVEC (tree
, fd
->ordered
? fd
->ordered
+ 1 : fd
->collapse
);
2607 expand_omp_for_init_counts (fd
, &gsi
, entry_bb
, counts
,
2608 zero_iter1_bb
, first_zero_iter1
,
2609 zero_iter2_bb
, first_zero_iter2
, l2_dom_bb
);
2613 /* Some counts[i] vars might be uninitialized if
2614 some loop has zero iterations. But the body shouldn't
2615 be executed in that case, so just avoid uninit warnings. */
2616 for (i
= first_zero_iter1
;
2617 i
< (fd
->ordered
? fd
->ordered
: fd
->collapse
); i
++)
2618 if (SSA_VAR_P (counts
[i
]))
2619 TREE_NO_WARNING (counts
[i
]) = 1;
2621 e
= split_block (entry_bb
, gsi_stmt (gsi
));
2623 make_edge (zero_iter1_bb
, entry_bb
, EDGE_FALLTHRU
);
2624 gsi
= gsi_last_bb (entry_bb
);
2625 set_immediate_dominator (CDI_DOMINATORS
, entry_bb
,
2626 get_immediate_dominator (CDI_DOMINATORS
,
2631 /* Some counts[i] vars might be uninitialized if
2632 some loop has zero iterations. But the body shouldn't
2633 be executed in that case, so just avoid uninit warnings. */
2634 for (i
= first_zero_iter2
; i
< fd
->ordered
; i
++)
2635 if (SSA_VAR_P (counts
[i
]))
2636 TREE_NO_WARNING (counts
[i
]) = 1;
2638 make_edge (zero_iter2_bb
, entry_bb
, EDGE_FALLTHRU
);
2642 e
= split_block (entry_bb
, gsi_stmt (gsi
));
2644 make_edge (zero_iter2_bb
, entry_bb
, EDGE_FALLTHRU
);
2645 gsi
= gsi_last_bb (entry_bb
);
2646 set_immediate_dominator (CDI_DOMINATORS
, entry_bb
,
2647 get_immediate_dominator
2648 (CDI_DOMINATORS
, zero_iter2_bb
));
2651 if (fd
->collapse
== 1)
2653 counts
[0] = fd
->loop
.n2
;
2654 fd
->loop
= fd
->loops
[0];
2658 type
= TREE_TYPE (fd
->loop
.v
);
2659 istart0
= create_tmp_var (fd
->iter_type
, ".istart0");
2660 iend0
= create_tmp_var (fd
->iter_type
, ".iend0");
2661 TREE_ADDRESSABLE (istart0
) = 1;
2662 TREE_ADDRESSABLE (iend0
) = 1;
2664 /* See if we need to bias by LLONG_MIN. */
2665 if (fd
->iter_type
== long_long_unsigned_type_node
2666 && TREE_CODE (type
) == INTEGER_TYPE
2667 && !TYPE_UNSIGNED (type
)
2668 && fd
->ordered
== 0)
2672 if (fd
->loop
.cond_code
== LT_EXPR
)
2675 n2
= fold_build2 (PLUS_EXPR
, type
, fd
->loop
.n2
, fd
->loop
.step
);
2679 n1
= fold_build2 (MINUS_EXPR
, type
, fd
->loop
.n2
, fd
->loop
.step
);
2682 if (TREE_CODE (n1
) != INTEGER_CST
2683 || TREE_CODE (n2
) != INTEGER_CST
2684 || ((tree_int_cst_sgn (n1
) < 0) ^ (tree_int_cst_sgn (n2
) < 0)))
2685 bias
= fold_convert (fd
->iter_type
, TYPE_MIN_VALUE (type
));
2688 gimple_stmt_iterator gsif
= gsi
;
2691 tree arr
= NULL_TREE
;
2692 if (in_combined_parallel
)
2694 gcc_assert (fd
->ordered
== 0);
2695 /* In a combined parallel loop, emit a call to
2696 GOMP_loop_foo_next. */
2697 t
= build_call_expr (builtin_decl_explicit (next_fn
), 2,
2698 build_fold_addr_expr (istart0
),
2699 build_fold_addr_expr (iend0
));
2703 tree t0
, t1
, t2
, t3
, t4
;
2704 /* If this is not a combined parallel loop, emit a call to
2705 GOMP_loop_foo_start in ENTRY_BB. */
2706 t4
= build_fold_addr_expr (iend0
);
2707 t3
= build_fold_addr_expr (istart0
);
2710 t0
= build_int_cst (unsigned_type_node
,
2711 fd
->ordered
- fd
->collapse
+ 1);
2712 arr
= create_tmp_var (build_array_type_nelts (fd
->iter_type
,
2714 - fd
->collapse
+ 1),
2716 DECL_NAMELESS (arr
) = 1;
2717 TREE_ADDRESSABLE (arr
) = 1;
2718 TREE_STATIC (arr
) = 1;
2719 vec
<constructor_elt
, va_gc
> *v
;
2720 vec_alloc (v
, fd
->ordered
- fd
->collapse
+ 1);
2723 for (idx
= 0; idx
< fd
->ordered
- fd
->collapse
+ 1; idx
++)
2726 if (idx
== 0 && fd
->collapse
> 1)
2729 c
= counts
[idx
+ fd
->collapse
- 1];
2730 tree purpose
= size_int (idx
);
2731 CONSTRUCTOR_APPEND_ELT (v
, purpose
, c
);
2732 if (TREE_CODE (c
) != INTEGER_CST
)
2733 TREE_STATIC (arr
) = 0;
2736 DECL_INITIAL (arr
) = build_constructor (TREE_TYPE (arr
), v
);
2737 if (!TREE_STATIC (arr
))
2738 force_gimple_operand_gsi (&gsi
, build1 (DECL_EXPR
,
2739 void_type_node
, arr
),
2740 true, NULL_TREE
, true, GSI_SAME_STMT
);
2741 t1
= build_fold_addr_expr (arr
);
2746 t2
= fold_convert (fd
->iter_type
, fd
->loop
.step
);
2749 if (gimple_omp_for_combined_into_p (fd
->for_stmt
))
2752 = omp_find_clause (gimple_omp_for_clauses (fd
->for_stmt
),
2753 OMP_CLAUSE__LOOPTEMP_
);
2754 gcc_assert (innerc
);
2755 t0
= OMP_CLAUSE_DECL (innerc
);
2756 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
2757 OMP_CLAUSE__LOOPTEMP_
);
2758 gcc_assert (innerc
);
2759 t1
= OMP_CLAUSE_DECL (innerc
);
2761 if (POINTER_TYPE_P (TREE_TYPE (t0
))
2762 && TYPE_PRECISION (TREE_TYPE (t0
))
2763 != TYPE_PRECISION (fd
->iter_type
))
2765 /* Avoid casting pointers to integer of a different size. */
2766 tree itype
= signed_type_for (type
);
2767 t1
= fold_convert (fd
->iter_type
, fold_convert (itype
, t1
));
2768 t0
= fold_convert (fd
->iter_type
, fold_convert (itype
, t0
));
2772 t1
= fold_convert (fd
->iter_type
, t1
);
2773 t0
= fold_convert (fd
->iter_type
, t0
);
2777 t1
= fold_build2 (PLUS_EXPR
, fd
->iter_type
, t1
, bias
);
2778 t0
= fold_build2 (PLUS_EXPR
, fd
->iter_type
, t0
, bias
);
2781 if (fd
->iter_type
== long_integer_type_node
|| fd
->ordered
)
2785 t
= fold_convert (fd
->iter_type
, fd
->chunk_size
);
2786 t
= omp_adjust_chunk_size (t
, fd
->simd_schedule
);
2788 t
= build_call_expr (builtin_decl_explicit (start_fn
),
2789 5, t0
, t1
, t
, t3
, t4
);
2791 t
= build_call_expr (builtin_decl_explicit (start_fn
),
2792 6, t0
, t1
, t2
, t
, t3
, t4
);
2794 else if (fd
->ordered
)
2795 t
= build_call_expr (builtin_decl_explicit (start_fn
),
2798 t
= build_call_expr (builtin_decl_explicit (start_fn
),
2799 5, t0
, t1
, t2
, t3
, t4
);
2807 /* The GOMP_loop_ull_*start functions have additional boolean
2808 argument, true for < loops and false for > loops.
2809 In Fortran, the C bool type can be different from
2810 boolean_type_node. */
2811 bfn_decl
= builtin_decl_explicit (start_fn
);
2812 c_bool_type
= TREE_TYPE (TREE_TYPE (bfn_decl
));
2813 t5
= build_int_cst (c_bool_type
,
2814 fd
->loop
.cond_code
== LT_EXPR
? 1 : 0);
2817 tree bfn_decl
= builtin_decl_explicit (start_fn
);
2818 t
= fold_convert (fd
->iter_type
, fd
->chunk_size
);
2819 t
= omp_adjust_chunk_size (t
, fd
->simd_schedule
);
2820 t
= build_call_expr (bfn_decl
, 7, t5
, t0
, t1
, t2
, t
, t3
, t4
);
2823 t
= build_call_expr (builtin_decl_explicit (start_fn
),
2824 6, t5
, t0
, t1
, t2
, t3
, t4
);
2827 if (TREE_TYPE (t
) != boolean_type_node
)
2828 t
= fold_build2 (NE_EXPR
, boolean_type_node
,
2829 t
, build_int_cst (TREE_TYPE (t
), 0));
2830 t
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
2831 true, GSI_SAME_STMT
);
2832 if (arr
&& !TREE_STATIC (arr
))
2834 tree clobber
= build_constructor (TREE_TYPE (arr
), NULL
);
2835 TREE_THIS_VOLATILE (clobber
) = 1;
2836 gsi_insert_before (&gsi
, gimple_build_assign (arr
, clobber
),
2839 gsi_insert_after (&gsi
, gimple_build_cond_empty (t
), GSI_SAME_STMT
);
2841 /* Remove the GIMPLE_OMP_FOR statement. */
2842 gsi_remove (&gsi
, true);
2844 if (gsi_end_p (gsif
))
2845 gsif
= gsi_after_labels (gsi_bb (gsif
));
2848 /* Iteration setup for sequential loop goes in L0_BB. */
2849 tree startvar
= fd
->loop
.v
;
2850 tree endvar
= NULL_TREE
;
2852 if (gimple_omp_for_combined_p (fd
->for_stmt
))
2854 gcc_assert (gimple_code (inner_stmt
) == GIMPLE_OMP_FOR
2855 && gimple_omp_for_kind (inner_stmt
)
2856 == GF_OMP_FOR_KIND_SIMD
);
2857 tree innerc
= omp_find_clause (gimple_omp_for_clauses (inner_stmt
),
2858 OMP_CLAUSE__LOOPTEMP_
);
2859 gcc_assert (innerc
);
2860 startvar
= OMP_CLAUSE_DECL (innerc
);
2861 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
2862 OMP_CLAUSE__LOOPTEMP_
);
2863 gcc_assert (innerc
);
2864 endvar
= OMP_CLAUSE_DECL (innerc
);
2867 gsi
= gsi_start_bb (l0_bb
);
2869 if (fd
->ordered
&& fd
->collapse
== 1)
2870 t
= fold_build2 (MULT_EXPR
, fd
->iter_type
, t
,
2871 fold_convert (fd
->iter_type
, fd
->loop
.step
));
2873 t
= fold_build2 (MINUS_EXPR
, fd
->iter_type
, t
, bias
);
2874 if (fd
->ordered
&& fd
->collapse
== 1)
2876 if (POINTER_TYPE_P (TREE_TYPE (startvar
)))
2877 t
= fold_build2 (POINTER_PLUS_EXPR
, TREE_TYPE (startvar
),
2878 fd
->loop
.n1
, fold_convert (sizetype
, t
));
2881 t
= fold_convert (TREE_TYPE (startvar
), t
);
2882 t
= fold_build2 (PLUS_EXPR
, TREE_TYPE (startvar
),
2888 if (POINTER_TYPE_P (TREE_TYPE (startvar
)))
2889 t
= fold_convert (signed_type_for (TREE_TYPE (startvar
)), t
);
2890 t
= fold_convert (TREE_TYPE (startvar
), t
);
2892 t
= force_gimple_operand_gsi (&gsi
, t
,
2894 && TREE_ADDRESSABLE (startvar
),
2895 NULL_TREE
, false, GSI_CONTINUE_LINKING
);
2896 assign_stmt
= gimple_build_assign (startvar
, t
);
2897 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
2900 if (fd
->ordered
&& fd
->collapse
== 1)
2901 t
= fold_build2 (MULT_EXPR
, fd
->iter_type
, t
,
2902 fold_convert (fd
->iter_type
, fd
->loop
.step
));
2904 t
= fold_build2 (MINUS_EXPR
, fd
->iter_type
, t
, bias
);
2905 if (fd
->ordered
&& fd
->collapse
== 1)
2907 if (POINTER_TYPE_P (TREE_TYPE (startvar
)))
2908 t
= fold_build2 (POINTER_PLUS_EXPR
, TREE_TYPE (startvar
),
2909 fd
->loop
.n1
, fold_convert (sizetype
, t
));
2912 t
= fold_convert (TREE_TYPE (startvar
), t
);
2913 t
= fold_build2 (PLUS_EXPR
, TREE_TYPE (startvar
),
2919 if (POINTER_TYPE_P (TREE_TYPE (startvar
)))
2920 t
= fold_convert (signed_type_for (TREE_TYPE (startvar
)), t
);
2921 t
= fold_convert (TREE_TYPE (startvar
), t
);
2923 iend
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
2924 false, GSI_CONTINUE_LINKING
);
2927 assign_stmt
= gimple_build_assign (endvar
, iend
);
2928 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
2929 if (useless_type_conversion_p (TREE_TYPE (fd
->loop
.v
), TREE_TYPE (iend
)))
2930 assign_stmt
= gimple_build_assign (fd
->loop
.v
, iend
);
2932 assign_stmt
= gimple_build_assign (fd
->loop
.v
, NOP_EXPR
, iend
);
2933 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
2935 /* Handle linear clause adjustments. */
2936 tree itercnt
= NULL_TREE
;
2937 if (gimple_omp_for_kind (fd
->for_stmt
) == GF_OMP_FOR_KIND_FOR
)
2938 for (tree c
= gimple_omp_for_clauses (fd
->for_stmt
);
2939 c
; c
= OMP_CLAUSE_CHAIN (c
))
2940 if (OMP_CLAUSE_CODE (c
) == OMP_CLAUSE_LINEAR
2941 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c
))
2943 tree d
= OMP_CLAUSE_DECL (c
);
2944 bool is_ref
= omp_is_reference (d
);
2945 tree t
= d
, a
, dest
;
2947 t
= build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c
), t
);
2948 tree type
= TREE_TYPE (t
);
2949 if (POINTER_TYPE_P (type
))
2951 dest
= unshare_expr (t
);
2952 tree v
= create_tmp_var (TREE_TYPE (t
), NULL
);
2953 expand_omp_build_assign (&gsif
, v
, t
);
2954 if (itercnt
== NULL_TREE
)
2957 tree n1
= fd
->loop
.n1
;
2958 if (POINTER_TYPE_P (TREE_TYPE (itercnt
)))
2961 = fold_convert (signed_type_for (TREE_TYPE (itercnt
)),
2963 n1
= fold_convert (TREE_TYPE (itercnt
), n1
);
2965 itercnt
= fold_build2 (MINUS_EXPR
, TREE_TYPE (itercnt
),
2967 itercnt
= fold_build2 (EXACT_DIV_EXPR
, TREE_TYPE (itercnt
),
2968 itercnt
, fd
->loop
.step
);
2969 itercnt
= force_gimple_operand_gsi (&gsi
, itercnt
, true,
2971 GSI_CONTINUE_LINKING
);
2973 a
= fold_build2 (MULT_EXPR
, type
,
2974 fold_convert (type
, itercnt
),
2975 fold_convert (type
, OMP_CLAUSE_LINEAR_STEP (c
)));
2976 t
= fold_build2 (type
== TREE_TYPE (t
) ? PLUS_EXPR
2977 : POINTER_PLUS_EXPR
, TREE_TYPE (t
), v
, a
);
2978 t
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
2979 false, GSI_CONTINUE_LINKING
);
2980 assign_stmt
= gimple_build_assign (dest
, t
);
2981 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
2983 if (fd
->collapse
> 1)
2984 expand_omp_for_init_vars (fd
, &gsi
, counts
, inner_stmt
, startvar
);
2988 /* Until now, counts array contained number of iterations or
2989 variable containing it for ith loop. From now on, we need
2990 those counts only for collapsed loops, and only for the 2nd
2991 till the last collapsed one. Move those one element earlier,
2992 we'll use counts[fd->collapse - 1] for the first source/sink
2993 iteration counter and so on and counts[fd->ordered]
2994 as the array holding the current counter values for
2996 if (fd
->collapse
> 1)
2997 memmove (counts
, counts
+ 1, (fd
->collapse
- 1) * sizeof (counts
[0]));
3001 for (i
= fd
->collapse
; i
< fd
->ordered
; i
++)
3003 tree type
= TREE_TYPE (fd
->loops
[i
].v
);
3005 = fold_build2 (fd
->loops
[i
].cond_code
, boolean_type_node
,
3006 fold_convert (type
, fd
->loops
[i
].n1
),
3007 fold_convert (type
, fd
->loops
[i
].n2
));
3008 if (!integer_onep (this_cond
))
3011 if (i
< fd
->ordered
)
3014 = create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun
)->prev_bb
);
3015 add_bb_to_loop (cont_bb
, l1_bb
->loop_father
);
3016 gimple_stmt_iterator gsi
= gsi_after_labels (cont_bb
);
3017 gimple
*g
= gimple_build_omp_continue (fd
->loop
.v
, fd
->loop
.v
);
3018 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
3019 make_edge (cont_bb
, l3_bb
, EDGE_FALLTHRU
);
3020 make_edge (cont_bb
, l1_bb
, 0);
3021 l2_bb
= create_empty_bb (cont_bb
);
3022 broken_loop
= false;
3025 expand_omp_ordered_source_sink (region
, fd
, counts
, cont_bb
);
3026 cont_bb
= expand_omp_for_ordered_loops (fd
, counts
, cont_bb
, l1_bb
,
3027 ordered_lastprivate
);
3028 if (counts
[fd
->collapse
- 1])
3030 gcc_assert (fd
->collapse
== 1);
3031 gsi
= gsi_last_bb (l0_bb
);
3032 expand_omp_build_assign (&gsi
, counts
[fd
->collapse
- 1],
3034 gsi
= gsi_last_bb (cont_bb
);
3035 t
= fold_build2 (PLUS_EXPR
, fd
->iter_type
, counts
[fd
->collapse
- 1],
3036 build_int_cst (fd
->iter_type
, 1));
3037 expand_omp_build_assign (&gsi
, counts
[fd
->collapse
- 1], t
);
3038 tree aref
= build4 (ARRAY_REF
, fd
->iter_type
, counts
[fd
->ordered
],
3039 size_zero_node
, NULL_TREE
, NULL_TREE
);
3040 expand_omp_build_assign (&gsi
, aref
, counts
[fd
->collapse
- 1]);
3041 t
= counts
[fd
->collapse
- 1];
3043 else if (fd
->collapse
> 1)
3047 t
= fold_build2 (MINUS_EXPR
, TREE_TYPE (fd
->loops
[0].v
),
3048 fd
->loops
[0].v
, fd
->loops
[0].n1
);
3049 t
= fold_convert (fd
->iter_type
, t
);
3051 gsi
= gsi_last_bb (l0_bb
);
3052 tree aref
= build4 (ARRAY_REF
, fd
->iter_type
, counts
[fd
->ordered
],
3053 size_zero_node
, NULL_TREE
, NULL_TREE
);
3054 t
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
3055 false, GSI_CONTINUE_LINKING
);
3056 expand_omp_build_assign (&gsi
, aref
, t
, true);
3061 /* Code to control the increment and predicate for the sequential
3062 loop goes in the CONT_BB. */
3063 gsi
= gsi_last_bb (cont_bb
);
3064 gomp_continue
*cont_stmt
= as_a
<gomp_continue
*> (gsi_stmt (gsi
));
3065 gcc_assert (gimple_code (cont_stmt
) == GIMPLE_OMP_CONTINUE
);
3066 vmain
= gimple_omp_continue_control_use (cont_stmt
);
3067 vback
= gimple_omp_continue_control_def (cont_stmt
);
3069 if (!gimple_omp_for_combined_p (fd
->for_stmt
))
3071 if (POINTER_TYPE_P (type
))
3072 t
= fold_build_pointer_plus (vmain
, fd
->loop
.step
);
3074 t
= fold_build2 (PLUS_EXPR
, type
, vmain
, fd
->loop
.step
);
3075 t
= force_gimple_operand_gsi (&gsi
, t
,
3077 && TREE_ADDRESSABLE (vback
),
3078 NULL_TREE
, true, GSI_SAME_STMT
);
3079 assign_stmt
= gimple_build_assign (vback
, t
);
3080 gsi_insert_before (&gsi
, assign_stmt
, GSI_SAME_STMT
);
3082 if (fd
->ordered
&& counts
[fd
->collapse
- 1] == NULL_TREE
)
3084 if (fd
->collapse
> 1)
3088 t
= fold_build2 (MINUS_EXPR
, TREE_TYPE (fd
->loops
[0].v
),
3089 fd
->loops
[0].v
, fd
->loops
[0].n1
);
3090 t
= fold_convert (fd
->iter_type
, t
);
3092 tree aref
= build4 (ARRAY_REF
, fd
->iter_type
,
3093 counts
[fd
->ordered
], size_zero_node
,
3094 NULL_TREE
, NULL_TREE
);
3095 t
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
3096 true, GSI_SAME_STMT
);
3097 expand_omp_build_assign (&gsi
, aref
, t
);
3100 t
= build2 (fd
->loop
.cond_code
, boolean_type_node
,
3101 DECL_P (vback
) && TREE_ADDRESSABLE (vback
) ? t
: vback
,
3103 gcond
*cond_stmt
= gimple_build_cond_empty (t
);
3104 gsi_insert_before (&gsi
, cond_stmt
, GSI_SAME_STMT
);
3107 /* Remove GIMPLE_OMP_CONTINUE. */
3108 gsi_remove (&gsi
, true);
3110 if (fd
->collapse
> 1 && !gimple_omp_for_combined_p (fd
->for_stmt
))
3111 collapse_bb
= extract_omp_for_update_vars (fd
, cont_bb
, l1_bb
);
3113 /* Emit code to get the next parallel iteration in L2_BB. */
3114 gsi
= gsi_start_bb (l2_bb
);
3116 t
= build_call_expr (builtin_decl_explicit (next_fn
), 2,
3117 build_fold_addr_expr (istart0
),
3118 build_fold_addr_expr (iend0
));
3119 t
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
3120 false, GSI_CONTINUE_LINKING
);
3121 if (TREE_TYPE (t
) != boolean_type_node
)
3122 t
= fold_build2 (NE_EXPR
, boolean_type_node
,
3123 t
, build_int_cst (TREE_TYPE (t
), 0));
3124 gcond
*cond_stmt
= gimple_build_cond_empty (t
);
3125 gsi_insert_after (&gsi
, cond_stmt
, GSI_CONTINUE_LINKING
);
3128 /* Add the loop cleanup function. */
3129 gsi
= gsi_last_bb (exit_bb
);
3130 if (gimple_omp_return_nowait_p (gsi_stmt (gsi
)))
3131 t
= builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT
);
3132 else if (gimple_omp_return_lhs (gsi_stmt (gsi
)))
3133 t
= builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL
);
3135 t
= builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END
);
3136 gcall
*call_stmt
= gimple_build_call (t
, 0);
3137 if (gimple_omp_return_lhs (gsi_stmt (gsi
)))
3138 gimple_call_set_lhs (call_stmt
, gimple_omp_return_lhs (gsi_stmt (gsi
)));
3139 gsi_insert_after (&gsi
, call_stmt
, GSI_SAME_STMT
);
3142 tree arr
= counts
[fd
->ordered
];
3143 tree clobber
= build_constructor (TREE_TYPE (arr
), NULL
);
3144 TREE_THIS_VOLATILE (clobber
) = 1;
3145 gsi_insert_after (&gsi
, gimple_build_assign (arr
, clobber
),
3148 gsi_remove (&gsi
, true);
3150 /* Connect the new blocks. */
3151 find_edge (entry_bb
, l0_bb
)->flags
= EDGE_TRUE_VALUE
;
3152 find_edge (entry_bb
, l3_bb
)->flags
= EDGE_FALSE_VALUE
;
3158 e
= find_edge (cont_bb
, l3_bb
);
3159 ne
= make_edge (l2_bb
, l3_bb
, EDGE_FALSE_VALUE
);
3161 phis
= phi_nodes (l3_bb
);
3162 for (gsi
= gsi_start (phis
); !gsi_end_p (gsi
); gsi_next (&gsi
))
3164 gimple
*phi
= gsi_stmt (gsi
);
3165 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi
, ne
),
3166 PHI_ARG_DEF_FROM_EDGE (phi
, e
));
3170 make_edge (cont_bb
, l2_bb
, EDGE_FALSE_VALUE
);
3171 e
= find_edge (cont_bb
, l1_bb
);
3174 e
= BRANCH_EDGE (cont_bb
);
3175 gcc_assert (single_succ (e
->dest
) == l1_bb
);
3177 if (gimple_omp_for_combined_p (fd
->for_stmt
))
3182 else if (fd
->collapse
> 1)
3185 e
= make_edge (cont_bb
, collapse_bb
, EDGE_TRUE_VALUE
);
3188 e
->flags
= EDGE_TRUE_VALUE
;
3191 e
->probability
= profile_probability::guessed_always ().apply_scale (7, 8);
3192 find_edge (cont_bb
, l2_bb
)->probability
= e
->probability
.invert ();
3196 e
= find_edge (cont_bb
, l2_bb
);
3197 e
->flags
= EDGE_FALLTHRU
;
3199 make_edge (l2_bb
, l0_bb
, EDGE_TRUE_VALUE
);
3201 if (gimple_in_ssa_p (cfun
))
3203 /* Add phis to the outer loop that connect to the phis in the inner,
3204 original loop, and move the loop entry value of the inner phi to
3205 the loop entry value of the outer phi. */
3207 for (psi
= gsi_start_phis (l3_bb
); !gsi_end_p (psi
); gsi_next (&psi
))
3209 source_location locus
;
3211 gphi
*exit_phi
= psi
.phi ();
3213 edge l2_to_l3
= find_edge (l2_bb
, l3_bb
);
3214 tree exit_res
= PHI_ARG_DEF_FROM_EDGE (exit_phi
, l2_to_l3
);
3216 basic_block latch
= BRANCH_EDGE (cont_bb
)->dest
;
3217 edge latch_to_l1
= find_edge (latch
, l1_bb
);
3219 = find_phi_with_arg_on_edge (exit_res
, latch_to_l1
);
3221 tree t
= gimple_phi_result (exit_phi
);
3222 tree new_res
= copy_ssa_name (t
, NULL
);
3223 nphi
= create_phi_node (new_res
, l0_bb
);
3225 edge l0_to_l1
= find_edge (l0_bb
, l1_bb
);
3226 t
= PHI_ARG_DEF_FROM_EDGE (inner_phi
, l0_to_l1
);
3227 locus
= gimple_phi_arg_location_from_edge (inner_phi
, l0_to_l1
);
3228 edge entry_to_l0
= find_edge (entry_bb
, l0_bb
);
3229 add_phi_arg (nphi
, t
, entry_to_l0
, locus
);
3231 edge l2_to_l0
= find_edge (l2_bb
, l0_bb
);
3232 add_phi_arg (nphi
, exit_res
, l2_to_l0
, UNKNOWN_LOCATION
);
3234 add_phi_arg (inner_phi
, new_res
, l0_to_l1
, UNKNOWN_LOCATION
);
3238 set_immediate_dominator (CDI_DOMINATORS
, l2_bb
,
3239 recompute_dominator (CDI_DOMINATORS
, l2_bb
));
3240 set_immediate_dominator (CDI_DOMINATORS
, l3_bb
,
3241 recompute_dominator (CDI_DOMINATORS
, l3_bb
));
3242 set_immediate_dominator (CDI_DOMINATORS
, l0_bb
,
3243 recompute_dominator (CDI_DOMINATORS
, l0_bb
));
3244 set_immediate_dominator (CDI_DOMINATORS
, l1_bb
,
3245 recompute_dominator (CDI_DOMINATORS
, l1_bb
));
3247 /* We enter expand_omp_for_generic with a loop. This original loop may
3248 have its own loop struct, or it may be part of an outer loop struct
3249 (which may be the fake loop). */
3250 struct loop
*outer_loop
= entry_bb
->loop_father
;
3251 bool orig_loop_has_loop_struct
= l1_bb
->loop_father
!= outer_loop
;
3253 add_bb_to_loop (l2_bb
, outer_loop
);
3255 /* We've added a new loop around the original loop. Allocate the
3256 corresponding loop struct. */
3257 struct loop
*new_loop
= alloc_loop ();
3258 new_loop
->header
= l0_bb
;
3259 new_loop
->latch
= l2_bb
;
3260 add_loop (new_loop
, outer_loop
);
3262 /* Allocate a loop structure for the original loop unless we already
3264 if (!orig_loop_has_loop_struct
3265 && !gimple_omp_for_combined_p (fd
->for_stmt
))
3267 struct loop
*orig_loop
= alloc_loop ();
3268 orig_loop
->header
= l1_bb
;
3269 /* The loop may have multiple latches. */
3270 add_loop (orig_loop
, new_loop
);
3275 /* A subroutine of expand_omp_for. Generate code for a parallel
3276 loop with static schedule and no specified chunk size. Given
3279 for (V = N1; V cond N2; V += STEP) BODY;
3281 where COND is "<" or ">", we generate pseudocode
3283 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
3288 if ((__typeof (V)) -1 > 0 && cond is >)
3289 n = -(adj + N2 - N1) / -STEP;
3291 n = (adj + N2 - N1) / STEP;
3294 if (threadid < tt) goto L3; else goto L4;
3299 s0 = q * threadid + tt;
3302 if (s0 >= e0) goto L2; else goto L0;
3308 if (V cond e) goto L1;
3313 expand_omp_for_static_nochunk (struct omp_region
*region
,
3314 struct omp_for_data
*fd
,
3317 tree n
, q
, s0
, e0
, e
, t
, tt
, nthreads
, threadid
;
3318 tree type
, itype
, vmain
, vback
;
3319 basic_block entry_bb
, second_bb
, third_bb
, exit_bb
, seq_start_bb
;
3320 basic_block body_bb
, cont_bb
, collapse_bb
= NULL
;
3322 gimple_stmt_iterator gsi
;
3324 bool broken_loop
= region
->cont
== NULL
;
3325 tree
*counts
= NULL
;
3328 itype
= type
= TREE_TYPE (fd
->loop
.v
);
3329 if (POINTER_TYPE_P (type
))
3330 itype
= signed_type_for (type
);
3332 entry_bb
= region
->entry
;
3333 cont_bb
= region
->cont
;
3334 gcc_assert (EDGE_COUNT (entry_bb
->succs
) == 2);
3335 fin_bb
= BRANCH_EDGE (entry_bb
)->dest
;
3336 gcc_assert (broken_loop
3337 || (fin_bb
== FALLTHRU_EDGE (cont_bb
)->dest
));
3338 seq_start_bb
= split_edge (FALLTHRU_EDGE (entry_bb
));
3339 body_bb
= single_succ (seq_start_bb
);
3342 gcc_assert (BRANCH_EDGE (cont_bb
)->dest
== body_bb
3343 || single_succ (BRANCH_EDGE (cont_bb
)->dest
) == body_bb
);
3344 gcc_assert (EDGE_COUNT (cont_bb
->succs
) == 2);
3346 exit_bb
= region
->exit
;
3348 /* Iteration space partitioning goes in ENTRY_BB. */
3349 gsi
= gsi_last_bb (entry_bb
);
3350 gcc_assert (gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_FOR
);
3352 if (fd
->collapse
> 1)
3354 int first_zero_iter
= -1, dummy
= -1;
3355 basic_block l2_dom_bb
= NULL
, dummy_bb
= NULL
;
3357 counts
= XALLOCAVEC (tree
, fd
->collapse
);
3358 expand_omp_for_init_counts (fd
, &gsi
, entry_bb
, counts
,
3359 fin_bb
, first_zero_iter
,
3360 dummy_bb
, dummy
, l2_dom_bb
);
3363 else if (gimple_omp_for_combined_into_p (fd
->for_stmt
))
3364 t
= integer_one_node
;
3366 t
= fold_binary (fd
->loop
.cond_code
, boolean_type_node
,
3367 fold_convert (type
, fd
->loop
.n1
),
3368 fold_convert (type
, fd
->loop
.n2
));
3369 if (fd
->collapse
== 1
3370 && TYPE_UNSIGNED (type
)
3371 && (t
== NULL_TREE
|| !integer_onep (t
)))
3373 n1
= fold_convert (type
, unshare_expr (fd
->loop
.n1
));
3374 n1
= force_gimple_operand_gsi (&gsi
, n1
, true, NULL_TREE
,
3375 true, GSI_SAME_STMT
);
3376 n2
= fold_convert (type
, unshare_expr (fd
->loop
.n2
));
3377 n2
= force_gimple_operand_gsi (&gsi
, n2
, true, NULL_TREE
,
3378 true, GSI_SAME_STMT
);
3379 gcond
*cond_stmt
= gimple_build_cond (fd
->loop
.cond_code
, n1
, n2
,
3380 NULL_TREE
, NULL_TREE
);
3381 gsi_insert_before (&gsi
, cond_stmt
, GSI_SAME_STMT
);
3382 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt
),
3383 expand_omp_regimplify_p
, NULL
, NULL
)
3384 || walk_tree (gimple_cond_rhs_ptr (cond_stmt
),
3385 expand_omp_regimplify_p
, NULL
, NULL
))
3387 gsi
= gsi_for_stmt (cond_stmt
);
3388 gimple_regimplify_operands (cond_stmt
, &gsi
);
3390 ep
= split_block (entry_bb
, cond_stmt
);
3391 ep
->flags
= EDGE_TRUE_VALUE
;
3392 entry_bb
= ep
->dest
;
3393 ep
->probability
= profile_probability::very_likely ();
3394 ep
= make_edge (ep
->src
, fin_bb
, EDGE_FALSE_VALUE
);
3395 ep
->probability
= profile_probability::very_unlikely ();
3396 if (gimple_in_ssa_p (cfun
))
3398 int dest_idx
= find_edge (entry_bb
, fin_bb
)->dest_idx
;
3399 for (gphi_iterator gpi
= gsi_start_phis (fin_bb
);
3400 !gsi_end_p (gpi
); gsi_next (&gpi
))
3402 gphi
*phi
= gpi
.phi ();
3403 add_phi_arg (phi
, gimple_phi_arg_def (phi
, dest_idx
),
3404 ep
, UNKNOWN_LOCATION
);
3407 gsi
= gsi_last_bb (entry_bb
);
3410 switch (gimple_omp_for_kind (fd
->for_stmt
))
3412 case GF_OMP_FOR_KIND_FOR
:
3413 nthreads
= builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS
);
3414 threadid
= builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM
);
3416 case GF_OMP_FOR_KIND_DISTRIBUTE
:
3417 nthreads
= builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS
);
3418 threadid
= builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM
);
3423 nthreads
= build_call_expr (nthreads
, 0);
3424 nthreads
= fold_convert (itype
, nthreads
);
3425 nthreads
= force_gimple_operand_gsi (&gsi
, nthreads
, true, NULL_TREE
,
3426 true, GSI_SAME_STMT
);
3427 threadid
= build_call_expr (threadid
, 0);
3428 threadid
= fold_convert (itype
, threadid
);
3429 threadid
= force_gimple_operand_gsi (&gsi
, threadid
, true, NULL_TREE
,
3430 true, GSI_SAME_STMT
);
3434 step
= fd
->loop
.step
;
3435 if (gimple_omp_for_combined_into_p (fd
->for_stmt
))
3437 tree innerc
= omp_find_clause (gimple_omp_for_clauses (fd
->for_stmt
),
3438 OMP_CLAUSE__LOOPTEMP_
);
3439 gcc_assert (innerc
);
3440 n1
= OMP_CLAUSE_DECL (innerc
);
3441 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
3442 OMP_CLAUSE__LOOPTEMP_
);
3443 gcc_assert (innerc
);
3444 n2
= OMP_CLAUSE_DECL (innerc
);
3446 n1
= force_gimple_operand_gsi (&gsi
, fold_convert (type
, n1
),
3447 true, NULL_TREE
, true, GSI_SAME_STMT
);
3448 n2
= force_gimple_operand_gsi (&gsi
, fold_convert (itype
, n2
),
3449 true, NULL_TREE
, true, GSI_SAME_STMT
);
3450 step
= force_gimple_operand_gsi (&gsi
, fold_convert (itype
, step
),
3451 true, NULL_TREE
, true, GSI_SAME_STMT
);
3453 t
= build_int_cst (itype
, (fd
->loop
.cond_code
== LT_EXPR
? -1 : 1));
3454 t
= fold_build2 (PLUS_EXPR
, itype
, step
, t
);
3455 t
= fold_build2 (PLUS_EXPR
, itype
, t
, n2
);
3456 t
= fold_build2 (MINUS_EXPR
, itype
, t
, fold_convert (itype
, n1
));
3457 if (TYPE_UNSIGNED (itype
) && fd
->loop
.cond_code
== GT_EXPR
)
3458 t
= fold_build2 (TRUNC_DIV_EXPR
, itype
,
3459 fold_build1 (NEGATE_EXPR
, itype
, t
),
3460 fold_build1 (NEGATE_EXPR
, itype
, step
));
3462 t
= fold_build2 (TRUNC_DIV_EXPR
, itype
, t
, step
);
3463 t
= fold_convert (itype
, t
);
3464 n
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
, true, GSI_SAME_STMT
);
3466 q
= create_tmp_reg (itype
, "q");
3467 t
= fold_build2 (TRUNC_DIV_EXPR
, itype
, n
, nthreads
);
3468 t
= force_gimple_operand_gsi (&gsi
, t
, false, NULL_TREE
, true, GSI_SAME_STMT
);
3469 gsi_insert_before (&gsi
, gimple_build_assign (q
, t
), GSI_SAME_STMT
);
3471 tt
= create_tmp_reg (itype
, "tt");
3472 t
= fold_build2 (TRUNC_MOD_EXPR
, itype
, n
, nthreads
);
3473 t
= force_gimple_operand_gsi (&gsi
, t
, false, NULL_TREE
, true, GSI_SAME_STMT
);
3474 gsi_insert_before (&gsi
, gimple_build_assign (tt
, t
), GSI_SAME_STMT
);
3476 t
= build2 (LT_EXPR
, boolean_type_node
, threadid
, tt
);
3477 gcond
*cond_stmt
= gimple_build_cond_empty (t
);
3478 gsi_insert_before (&gsi
, cond_stmt
, GSI_SAME_STMT
);
3480 second_bb
= split_block (entry_bb
, cond_stmt
)->dest
;
3481 gsi
= gsi_last_bb (second_bb
);
3482 gcc_assert (gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_FOR
);
3484 gsi_insert_before (&gsi
, gimple_build_assign (tt
, build_int_cst (itype
, 0)),
3486 gassign
*assign_stmt
3487 = gimple_build_assign (q
, PLUS_EXPR
, q
, build_int_cst (itype
, 1));
3488 gsi_insert_before (&gsi
, assign_stmt
, GSI_SAME_STMT
);
3490 third_bb
= split_block (second_bb
, assign_stmt
)->dest
;
3491 gsi
= gsi_last_bb (third_bb
);
3492 gcc_assert (gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_FOR
);
3494 t
= build2 (MULT_EXPR
, itype
, q
, threadid
);
3495 t
= build2 (PLUS_EXPR
, itype
, t
, tt
);
3496 s0
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
, true, GSI_SAME_STMT
);
3498 t
= fold_build2 (PLUS_EXPR
, itype
, s0
, q
);
3499 e0
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
, true, GSI_SAME_STMT
);
3501 t
= build2 (GE_EXPR
, boolean_type_node
, s0
, e0
);
3502 gsi_insert_before (&gsi
, gimple_build_cond_empty (t
), GSI_SAME_STMT
);
3504 /* Remove the GIMPLE_OMP_FOR statement. */
3505 gsi_remove (&gsi
, true);
3507 /* Setup code for sequential iteration goes in SEQ_START_BB. */
3508 gsi
= gsi_start_bb (seq_start_bb
);
3510 tree startvar
= fd
->loop
.v
;
3511 tree endvar
= NULL_TREE
;
3513 if (gimple_omp_for_combined_p (fd
->for_stmt
))
3515 tree clauses
= gimple_code (inner_stmt
) == GIMPLE_OMP_PARALLEL
3516 ? gimple_omp_parallel_clauses (inner_stmt
)
3517 : gimple_omp_for_clauses (inner_stmt
);
3518 tree innerc
= omp_find_clause (clauses
, OMP_CLAUSE__LOOPTEMP_
);
3519 gcc_assert (innerc
);
3520 startvar
= OMP_CLAUSE_DECL (innerc
);
3521 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
3522 OMP_CLAUSE__LOOPTEMP_
);
3523 gcc_assert (innerc
);
3524 endvar
= OMP_CLAUSE_DECL (innerc
);
3525 if (fd
->collapse
> 1 && TREE_CODE (fd
->loop
.n2
) != INTEGER_CST
3526 && gimple_omp_for_kind (fd
->for_stmt
) == GF_OMP_FOR_KIND_DISTRIBUTE
)
3529 for (i
= 1; i
< fd
->collapse
; i
++)
3531 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
3532 OMP_CLAUSE__LOOPTEMP_
);
3533 gcc_assert (innerc
);
3535 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
3536 OMP_CLAUSE__LOOPTEMP_
);
3539 /* If needed (distribute parallel for with lastprivate),
3540 propagate down the total number of iterations. */
3541 tree t
= fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc
)),
3543 t
= force_gimple_operand_gsi (&gsi
, t
, false, NULL_TREE
, false,
3544 GSI_CONTINUE_LINKING
);
3545 assign_stmt
= gimple_build_assign (OMP_CLAUSE_DECL (innerc
), t
);
3546 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
3550 t
= fold_convert (itype
, s0
);
3551 t
= fold_build2 (MULT_EXPR
, itype
, t
, step
);
3552 if (POINTER_TYPE_P (type
))
3553 t
= fold_build_pointer_plus (n1
, t
);
3555 t
= fold_build2 (PLUS_EXPR
, type
, t
, n1
);
3556 t
= fold_convert (TREE_TYPE (startvar
), t
);
3557 t
= force_gimple_operand_gsi (&gsi
, t
,
3559 && TREE_ADDRESSABLE (startvar
),
3560 NULL_TREE
, false, GSI_CONTINUE_LINKING
);
3561 assign_stmt
= gimple_build_assign (startvar
, t
);
3562 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
3564 t
= fold_convert (itype
, e0
);
3565 t
= fold_build2 (MULT_EXPR
, itype
, t
, step
);
3566 if (POINTER_TYPE_P (type
))
3567 t
= fold_build_pointer_plus (n1
, t
);
3569 t
= fold_build2 (PLUS_EXPR
, type
, t
, n1
);
3570 t
= fold_convert (TREE_TYPE (startvar
), t
);
3571 e
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
3572 false, GSI_CONTINUE_LINKING
);
3575 assign_stmt
= gimple_build_assign (endvar
, e
);
3576 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
3577 if (useless_type_conversion_p (TREE_TYPE (fd
->loop
.v
), TREE_TYPE (e
)))
3578 assign_stmt
= gimple_build_assign (fd
->loop
.v
, e
);
3580 assign_stmt
= gimple_build_assign (fd
->loop
.v
, NOP_EXPR
, e
);
3581 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
3583 /* Handle linear clause adjustments. */
3584 tree itercnt
= NULL_TREE
;
3585 if (gimple_omp_for_kind (fd
->for_stmt
) == GF_OMP_FOR_KIND_FOR
)
3586 for (tree c
= gimple_omp_for_clauses (fd
->for_stmt
);
3587 c
; c
= OMP_CLAUSE_CHAIN (c
))
3588 if (OMP_CLAUSE_CODE (c
) == OMP_CLAUSE_LINEAR
3589 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c
))
3591 tree d
= OMP_CLAUSE_DECL (c
);
3592 bool is_ref
= omp_is_reference (d
);
3593 tree t
= d
, a
, dest
;
3595 t
= build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c
), t
);
3596 if (itercnt
== NULL_TREE
)
3598 if (gimple_omp_for_combined_into_p (fd
->for_stmt
))
3600 itercnt
= fold_build2 (MINUS_EXPR
, itype
,
3601 fold_convert (itype
, n1
),
3602 fold_convert (itype
, fd
->loop
.n1
));
3603 itercnt
= fold_build2 (EXACT_DIV_EXPR
, itype
, itercnt
, step
);
3604 itercnt
= fold_build2 (PLUS_EXPR
, itype
, itercnt
, s0
);
3605 itercnt
= force_gimple_operand_gsi (&gsi
, itercnt
, true,
3607 GSI_CONTINUE_LINKING
);
3612 tree type
= TREE_TYPE (t
);
3613 if (POINTER_TYPE_P (type
))
3615 a
= fold_build2 (MULT_EXPR
, type
,
3616 fold_convert (type
, itercnt
),
3617 fold_convert (type
, OMP_CLAUSE_LINEAR_STEP (c
)));
3618 dest
= unshare_expr (t
);
3619 t
= fold_build2 (type
== TREE_TYPE (t
) ? PLUS_EXPR
3620 : POINTER_PLUS_EXPR
, TREE_TYPE (t
), t
, a
);
3621 t
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
3622 false, GSI_CONTINUE_LINKING
);
3623 assign_stmt
= gimple_build_assign (dest
, t
);
3624 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
3626 if (fd
->collapse
> 1)
3627 expand_omp_for_init_vars (fd
, &gsi
, counts
, inner_stmt
, startvar
);
3631 /* The code controlling the sequential loop replaces the
3632 GIMPLE_OMP_CONTINUE. */
3633 gsi
= gsi_last_bb (cont_bb
);
3634 gomp_continue
*cont_stmt
= as_a
<gomp_continue
*> (gsi_stmt (gsi
));
3635 gcc_assert (gimple_code (cont_stmt
) == GIMPLE_OMP_CONTINUE
);
3636 vmain
= gimple_omp_continue_control_use (cont_stmt
);
3637 vback
= gimple_omp_continue_control_def (cont_stmt
);
3639 if (!gimple_omp_for_combined_p (fd
->for_stmt
))
3641 if (POINTER_TYPE_P (type
))
3642 t
= fold_build_pointer_plus (vmain
, step
);
3644 t
= fold_build2 (PLUS_EXPR
, type
, vmain
, step
);
3645 t
= force_gimple_operand_gsi (&gsi
, t
,
3647 && TREE_ADDRESSABLE (vback
),
3648 NULL_TREE
, true, GSI_SAME_STMT
);
3649 assign_stmt
= gimple_build_assign (vback
, t
);
3650 gsi_insert_before (&gsi
, assign_stmt
, GSI_SAME_STMT
);
3652 t
= build2 (fd
->loop
.cond_code
, boolean_type_node
,
3653 DECL_P (vback
) && TREE_ADDRESSABLE (vback
)
3655 gsi_insert_before (&gsi
, gimple_build_cond_empty (t
), GSI_SAME_STMT
);
3658 /* Remove the GIMPLE_OMP_CONTINUE statement. */
3659 gsi_remove (&gsi
, true);
3661 if (fd
->collapse
> 1 && !gimple_omp_for_combined_p (fd
->for_stmt
))
3662 collapse_bb
= extract_omp_for_update_vars (fd
, cont_bb
, body_bb
);
3665 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
3666 gsi
= gsi_last_bb (exit_bb
);
3667 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi
)))
3669 t
= gimple_omp_return_lhs (gsi_stmt (gsi
));
3670 gsi_insert_after (&gsi
, omp_build_barrier (t
), GSI_SAME_STMT
);
3672 gsi_remove (&gsi
, true);
3674 /* Connect all the blocks. */
3675 ep
= make_edge (entry_bb
, third_bb
, EDGE_FALSE_VALUE
);
3676 ep
->probability
= profile_probability::guessed_always ().apply_scale (3, 4);
3677 ep
= find_edge (entry_bb
, second_bb
);
3678 ep
->flags
= EDGE_TRUE_VALUE
;
3679 ep
->probability
= profile_probability::guessed_always ().apply_scale (1, 4);
3680 find_edge (third_bb
, seq_start_bb
)->flags
= EDGE_FALSE_VALUE
;
3681 find_edge (third_bb
, fin_bb
)->flags
= EDGE_TRUE_VALUE
;
3685 ep
= find_edge (cont_bb
, body_bb
);
3688 ep
= BRANCH_EDGE (cont_bb
);
3689 gcc_assert (single_succ (ep
->dest
) == body_bb
);
3691 if (gimple_omp_for_combined_p (fd
->for_stmt
))
3696 else if (fd
->collapse
> 1)
3699 ep
= make_edge (cont_bb
, collapse_bb
, EDGE_TRUE_VALUE
);
3702 ep
->flags
= EDGE_TRUE_VALUE
;
3703 find_edge (cont_bb
, fin_bb
)->flags
3704 = ep
? EDGE_FALSE_VALUE
: EDGE_FALLTHRU
;
3707 set_immediate_dominator (CDI_DOMINATORS
, second_bb
, entry_bb
);
3708 set_immediate_dominator (CDI_DOMINATORS
, third_bb
, entry_bb
);
3709 set_immediate_dominator (CDI_DOMINATORS
, seq_start_bb
, third_bb
);
3711 set_immediate_dominator (CDI_DOMINATORS
, body_bb
,
3712 recompute_dominator (CDI_DOMINATORS
, body_bb
));
3713 set_immediate_dominator (CDI_DOMINATORS
, fin_bb
,
3714 recompute_dominator (CDI_DOMINATORS
, fin_bb
));
3716 struct loop
*loop
= body_bb
->loop_father
;
3717 if (loop
!= entry_bb
->loop_father
)
3719 gcc_assert (broken_loop
|| loop
->header
== body_bb
);
3720 gcc_assert (broken_loop
3721 || loop
->latch
== region
->cont
3722 || single_pred (loop
->latch
) == region
->cont
);
3726 if (!broken_loop
&& !gimple_omp_for_combined_p (fd
->for_stmt
))
3728 loop
= alloc_loop ();
3729 loop
->header
= body_bb
;
3730 if (collapse_bb
== NULL
)
3731 loop
->latch
= cont_bb
;
3732 add_loop (loop
, body_bb
->loop_father
);
3736 /* Return phi in E->DEST with ARG on edge E. */
3739 find_phi_with_arg_on_edge (tree arg
, edge e
)
3741 basic_block bb
= e
->dest
;
3743 for (gphi_iterator gpi
= gsi_start_phis (bb
);
3747 gphi
*phi
= gpi
.phi ();
3748 if (PHI_ARG_DEF_FROM_EDGE (phi
, e
) == arg
)
3755 /* A subroutine of expand_omp_for. Generate code for a parallel
3756 loop with static schedule and a specified chunk size. Given
3759 for (V = N1; V cond N2; V += STEP) BODY;
3761 where COND is "<" or ">", we generate pseudocode
3763 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
3768 if ((__typeof (V)) -1 > 0 && cond is >)
3769 n = -(adj + N2 - N1) / -STEP;
3771 n = (adj + N2 - N1) / STEP;
3773 V = threadid * CHUNK * STEP + N1; -- this extra definition of V is
3774 here so that V is defined
3775 if the loop is not entered
3777 s0 = (trip * nthreads + threadid) * CHUNK;
3778 e0 = min (s0 + CHUNK, n);
3779 if (s0 < n) goto L1; else goto L4;
3786 if (V cond e) goto L2; else goto L3;
3794 expand_omp_for_static_chunk (struct omp_region
*region
,
3795 struct omp_for_data
*fd
, gimple
*inner_stmt
)
3797 tree n
, s0
, e0
, e
, t
;
3798 tree trip_var
, trip_init
, trip_main
, trip_back
, nthreads
, threadid
;
3799 tree type
, itype
, vmain
, vback
, vextra
;
3800 basic_block entry_bb
, exit_bb
, body_bb
, seq_start_bb
, iter_part_bb
;
3801 basic_block trip_update_bb
= NULL
, cont_bb
, collapse_bb
= NULL
, fin_bb
;
3802 gimple_stmt_iterator gsi
;
3804 bool broken_loop
= region
->cont
== NULL
;
3805 tree
*counts
= NULL
;
3808 itype
= type
= TREE_TYPE (fd
->loop
.v
);
3809 if (POINTER_TYPE_P (type
))
3810 itype
= signed_type_for (type
);
3812 entry_bb
= region
->entry
;
3813 se
= split_block (entry_bb
, last_stmt (entry_bb
));
3815 iter_part_bb
= se
->dest
;
3816 cont_bb
= region
->cont
;
3817 gcc_assert (EDGE_COUNT (iter_part_bb
->succs
) == 2);
3818 fin_bb
= BRANCH_EDGE (iter_part_bb
)->dest
;
3819 gcc_assert (broken_loop
3820 || fin_bb
== FALLTHRU_EDGE (cont_bb
)->dest
);
3821 seq_start_bb
= split_edge (FALLTHRU_EDGE (iter_part_bb
));
3822 body_bb
= single_succ (seq_start_bb
);
3825 gcc_assert (BRANCH_EDGE (cont_bb
)->dest
== body_bb
3826 || single_succ (BRANCH_EDGE (cont_bb
)->dest
) == body_bb
);
3827 gcc_assert (EDGE_COUNT (cont_bb
->succs
) == 2);
3828 trip_update_bb
= split_edge (FALLTHRU_EDGE (cont_bb
));
3830 exit_bb
= region
->exit
;
3832 /* Trip and adjustment setup goes in ENTRY_BB. */
3833 gsi
= gsi_last_bb (entry_bb
);
3834 gcc_assert (gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_FOR
);
3836 if (fd
->collapse
> 1)
3838 int first_zero_iter
= -1, dummy
= -1;
3839 basic_block l2_dom_bb
= NULL
, dummy_bb
= NULL
;
3841 counts
= XALLOCAVEC (tree
, fd
->collapse
);
3842 expand_omp_for_init_counts (fd
, &gsi
, entry_bb
, counts
,
3843 fin_bb
, first_zero_iter
,
3844 dummy_bb
, dummy
, l2_dom_bb
);
3847 else if (gimple_omp_for_combined_into_p (fd
->for_stmt
))
3848 t
= integer_one_node
;
3850 t
= fold_binary (fd
->loop
.cond_code
, boolean_type_node
,
3851 fold_convert (type
, fd
->loop
.n1
),
3852 fold_convert (type
, fd
->loop
.n2
));
3853 if (fd
->collapse
== 1
3854 && TYPE_UNSIGNED (type
)
3855 && (t
== NULL_TREE
|| !integer_onep (t
)))
3857 n1
= fold_convert (type
, unshare_expr (fd
->loop
.n1
));
3858 n1
= force_gimple_operand_gsi (&gsi
, n1
, true, NULL_TREE
,
3859 true, GSI_SAME_STMT
);
3860 n2
= fold_convert (type
, unshare_expr (fd
->loop
.n2
));
3861 n2
= force_gimple_operand_gsi (&gsi
, n2
, true, NULL_TREE
,
3862 true, GSI_SAME_STMT
);
3863 gcond
*cond_stmt
= gimple_build_cond (fd
->loop
.cond_code
, n1
, n2
,
3864 NULL_TREE
, NULL_TREE
);
3865 gsi_insert_before (&gsi
, cond_stmt
, GSI_SAME_STMT
);
3866 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt
),
3867 expand_omp_regimplify_p
, NULL
, NULL
)
3868 || walk_tree (gimple_cond_rhs_ptr (cond_stmt
),
3869 expand_omp_regimplify_p
, NULL
, NULL
))
3871 gsi
= gsi_for_stmt (cond_stmt
);
3872 gimple_regimplify_operands (cond_stmt
, &gsi
);
3874 se
= split_block (entry_bb
, cond_stmt
);
3875 se
->flags
= EDGE_TRUE_VALUE
;
3876 entry_bb
= se
->dest
;
3877 se
->probability
= profile_probability::very_likely ();
3878 se
= make_edge (se
->src
, fin_bb
, EDGE_FALSE_VALUE
);
3879 se
->probability
= profile_probability::very_unlikely ();
3880 if (gimple_in_ssa_p (cfun
))
3882 int dest_idx
= find_edge (iter_part_bb
, fin_bb
)->dest_idx
;
3883 for (gphi_iterator gpi
= gsi_start_phis (fin_bb
);
3884 !gsi_end_p (gpi
); gsi_next (&gpi
))
3886 gphi
*phi
= gpi
.phi ();
3887 add_phi_arg (phi
, gimple_phi_arg_def (phi
, dest_idx
),
3888 se
, UNKNOWN_LOCATION
);
3891 gsi
= gsi_last_bb (entry_bb
);
3894 switch (gimple_omp_for_kind (fd
->for_stmt
))
3896 case GF_OMP_FOR_KIND_FOR
:
3897 nthreads
= builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS
);
3898 threadid
= builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM
);
3900 case GF_OMP_FOR_KIND_DISTRIBUTE
:
3901 nthreads
= builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS
);
3902 threadid
= builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM
);
3907 nthreads
= build_call_expr (nthreads
, 0);
3908 nthreads
= fold_convert (itype
, nthreads
);
3909 nthreads
= force_gimple_operand_gsi (&gsi
, nthreads
, true, NULL_TREE
,
3910 true, GSI_SAME_STMT
);
3911 threadid
= build_call_expr (threadid
, 0);
3912 threadid
= fold_convert (itype
, threadid
);
3913 threadid
= force_gimple_operand_gsi (&gsi
, threadid
, true, NULL_TREE
,
3914 true, GSI_SAME_STMT
);
3918 step
= fd
->loop
.step
;
3919 if (gimple_omp_for_combined_into_p (fd
->for_stmt
))
3921 tree innerc
= omp_find_clause (gimple_omp_for_clauses (fd
->for_stmt
),
3922 OMP_CLAUSE__LOOPTEMP_
);
3923 gcc_assert (innerc
);
3924 n1
= OMP_CLAUSE_DECL (innerc
);
3925 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
3926 OMP_CLAUSE__LOOPTEMP_
);
3927 gcc_assert (innerc
);
3928 n2
= OMP_CLAUSE_DECL (innerc
);
3930 n1
= force_gimple_operand_gsi (&gsi
, fold_convert (type
, n1
),
3931 true, NULL_TREE
, true, GSI_SAME_STMT
);
3932 n2
= force_gimple_operand_gsi (&gsi
, fold_convert (itype
, n2
),
3933 true, NULL_TREE
, true, GSI_SAME_STMT
);
3934 step
= force_gimple_operand_gsi (&gsi
, fold_convert (itype
, step
),
3935 true, NULL_TREE
, true, GSI_SAME_STMT
);
3936 tree chunk_size
= fold_convert (itype
, fd
->chunk_size
);
3937 chunk_size
= omp_adjust_chunk_size (chunk_size
, fd
->simd_schedule
);
3939 = force_gimple_operand_gsi (&gsi
, chunk_size
, true, NULL_TREE
, true,
3942 t
= build_int_cst (itype
, (fd
->loop
.cond_code
== LT_EXPR
? -1 : 1));
3943 t
= fold_build2 (PLUS_EXPR
, itype
, step
, t
);
3944 t
= fold_build2 (PLUS_EXPR
, itype
, t
, n2
);
3945 t
= fold_build2 (MINUS_EXPR
, itype
, t
, fold_convert (itype
, n1
));
3946 if (TYPE_UNSIGNED (itype
) && fd
->loop
.cond_code
== GT_EXPR
)
3947 t
= fold_build2 (TRUNC_DIV_EXPR
, itype
,
3948 fold_build1 (NEGATE_EXPR
, itype
, t
),
3949 fold_build1 (NEGATE_EXPR
, itype
, step
));
3951 t
= fold_build2 (TRUNC_DIV_EXPR
, itype
, t
, step
);
3952 t
= fold_convert (itype
, t
);
3953 n
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
3954 true, GSI_SAME_STMT
);
3956 trip_var
= create_tmp_reg (itype
, ".trip");
3957 if (gimple_in_ssa_p (cfun
))
3959 trip_init
= make_ssa_name (trip_var
);
3960 trip_main
= make_ssa_name (trip_var
);
3961 trip_back
= make_ssa_name (trip_var
);
3965 trip_init
= trip_var
;
3966 trip_main
= trip_var
;
3967 trip_back
= trip_var
;
3970 gassign
*assign_stmt
3971 = gimple_build_assign (trip_init
, build_int_cst (itype
, 0));
3972 gsi_insert_before (&gsi
, assign_stmt
, GSI_SAME_STMT
);
3974 t
= fold_build2 (MULT_EXPR
, itype
, threadid
, chunk_size
);
3975 t
= fold_build2 (MULT_EXPR
, itype
, t
, step
);
3976 if (POINTER_TYPE_P (type
))
3977 t
= fold_build_pointer_plus (n1
, t
);
3979 t
= fold_build2 (PLUS_EXPR
, type
, t
, n1
);
3980 vextra
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
3981 true, GSI_SAME_STMT
);
3983 /* Remove the GIMPLE_OMP_FOR. */
3984 gsi_remove (&gsi
, true);
3986 gimple_stmt_iterator gsif
= gsi
;
3988 /* Iteration space partitioning goes in ITER_PART_BB. */
3989 gsi
= gsi_last_bb (iter_part_bb
);
3991 t
= fold_build2 (MULT_EXPR
, itype
, trip_main
, nthreads
);
3992 t
= fold_build2 (PLUS_EXPR
, itype
, t
, threadid
);
3993 t
= fold_build2 (MULT_EXPR
, itype
, t
, chunk_size
);
3994 s0
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
3995 false, GSI_CONTINUE_LINKING
);
3997 t
= fold_build2 (PLUS_EXPR
, itype
, s0
, chunk_size
);
3998 t
= fold_build2 (MIN_EXPR
, itype
, t
, n
);
3999 e0
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
4000 false, GSI_CONTINUE_LINKING
);
4002 t
= build2 (LT_EXPR
, boolean_type_node
, s0
, n
);
4003 gsi_insert_after (&gsi
, gimple_build_cond_empty (t
), GSI_CONTINUE_LINKING
);
4005 /* Setup code for sequential iteration goes in SEQ_START_BB. */
4006 gsi
= gsi_start_bb (seq_start_bb
);
4008 tree startvar
= fd
->loop
.v
;
4009 tree endvar
= NULL_TREE
;
4011 if (gimple_omp_for_combined_p (fd
->for_stmt
))
4013 tree clauses
= gimple_code (inner_stmt
) == GIMPLE_OMP_PARALLEL
4014 ? gimple_omp_parallel_clauses (inner_stmt
)
4015 : gimple_omp_for_clauses (inner_stmt
);
4016 tree innerc
= omp_find_clause (clauses
, OMP_CLAUSE__LOOPTEMP_
);
4017 gcc_assert (innerc
);
4018 startvar
= OMP_CLAUSE_DECL (innerc
);
4019 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
4020 OMP_CLAUSE__LOOPTEMP_
);
4021 gcc_assert (innerc
);
4022 endvar
= OMP_CLAUSE_DECL (innerc
);
4023 if (fd
->collapse
> 1 && TREE_CODE (fd
->loop
.n2
) != INTEGER_CST
4024 && gimple_omp_for_kind (fd
->for_stmt
) == GF_OMP_FOR_KIND_DISTRIBUTE
)
4027 for (i
= 1; i
< fd
->collapse
; i
++)
4029 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
4030 OMP_CLAUSE__LOOPTEMP_
);
4031 gcc_assert (innerc
);
4033 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
4034 OMP_CLAUSE__LOOPTEMP_
);
4037 /* If needed (distribute parallel for with lastprivate),
4038 propagate down the total number of iterations. */
4039 tree t
= fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc
)),
4041 t
= force_gimple_operand_gsi (&gsi
, t
, false, NULL_TREE
, false,
4042 GSI_CONTINUE_LINKING
);
4043 assign_stmt
= gimple_build_assign (OMP_CLAUSE_DECL (innerc
), t
);
4044 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
4049 t
= fold_convert (itype
, s0
);
4050 t
= fold_build2 (MULT_EXPR
, itype
, t
, step
);
4051 if (POINTER_TYPE_P (type
))
4052 t
= fold_build_pointer_plus (n1
, t
);
4054 t
= fold_build2 (PLUS_EXPR
, type
, t
, n1
);
4055 t
= fold_convert (TREE_TYPE (startvar
), t
);
4056 t
= force_gimple_operand_gsi (&gsi
, t
,
4058 && TREE_ADDRESSABLE (startvar
),
4059 NULL_TREE
, false, GSI_CONTINUE_LINKING
);
4060 assign_stmt
= gimple_build_assign (startvar
, t
);
4061 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
4063 t
= fold_convert (itype
, e0
);
4064 t
= fold_build2 (MULT_EXPR
, itype
, t
, step
);
4065 if (POINTER_TYPE_P (type
))
4066 t
= fold_build_pointer_plus (n1
, t
);
4068 t
= fold_build2 (PLUS_EXPR
, type
, t
, n1
);
4069 t
= fold_convert (TREE_TYPE (startvar
), t
);
4070 e
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
4071 false, GSI_CONTINUE_LINKING
);
4074 assign_stmt
= gimple_build_assign (endvar
, e
);
4075 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
4076 if (useless_type_conversion_p (TREE_TYPE (fd
->loop
.v
), TREE_TYPE (e
)))
4077 assign_stmt
= gimple_build_assign (fd
->loop
.v
, e
);
4079 assign_stmt
= gimple_build_assign (fd
->loop
.v
, NOP_EXPR
, e
);
4080 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
4082 /* Handle linear clause adjustments. */
4083 tree itercnt
= NULL_TREE
, itercntbias
= NULL_TREE
;
4084 if (gimple_omp_for_kind (fd
->for_stmt
) == GF_OMP_FOR_KIND_FOR
)
4085 for (tree c
= gimple_omp_for_clauses (fd
->for_stmt
);
4086 c
; c
= OMP_CLAUSE_CHAIN (c
))
4087 if (OMP_CLAUSE_CODE (c
) == OMP_CLAUSE_LINEAR
4088 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c
))
4090 tree d
= OMP_CLAUSE_DECL (c
);
4091 bool is_ref
= omp_is_reference (d
);
4092 tree t
= d
, a
, dest
;
4094 t
= build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c
), t
);
4095 tree type
= TREE_TYPE (t
);
4096 if (POINTER_TYPE_P (type
))
4098 dest
= unshare_expr (t
);
4099 tree v
= create_tmp_var (TREE_TYPE (t
), NULL
);
4100 expand_omp_build_assign (&gsif
, v
, t
);
4101 if (itercnt
== NULL_TREE
)
4103 if (gimple_omp_for_combined_into_p (fd
->for_stmt
))
4106 = fold_build2 (MINUS_EXPR
, itype
, fold_convert (itype
, n1
),
4107 fold_convert (itype
, fd
->loop
.n1
));
4108 itercntbias
= fold_build2 (EXACT_DIV_EXPR
, itype
,
4111 = force_gimple_operand_gsi (&gsif
, itercntbias
, true,
4114 itercnt
= fold_build2 (PLUS_EXPR
, itype
, itercntbias
, s0
);
4115 itercnt
= force_gimple_operand_gsi (&gsi
, itercnt
, true,
4117 GSI_CONTINUE_LINKING
);
4122 a
= fold_build2 (MULT_EXPR
, type
,
4123 fold_convert (type
, itercnt
),
4124 fold_convert (type
, OMP_CLAUSE_LINEAR_STEP (c
)));
4125 t
= fold_build2 (type
== TREE_TYPE (t
) ? PLUS_EXPR
4126 : POINTER_PLUS_EXPR
, TREE_TYPE (t
), v
, a
);
4127 t
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
4128 false, GSI_CONTINUE_LINKING
);
4129 assign_stmt
= gimple_build_assign (dest
, t
);
4130 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
4132 if (fd
->collapse
> 1)
4133 expand_omp_for_init_vars (fd
, &gsi
, counts
, inner_stmt
, startvar
);
4137 /* The code controlling the sequential loop goes in CONT_BB,
4138 replacing the GIMPLE_OMP_CONTINUE. */
4139 gsi
= gsi_last_bb (cont_bb
);
4140 gomp_continue
*cont_stmt
= as_a
<gomp_continue
*> (gsi_stmt (gsi
));
4141 vmain
= gimple_omp_continue_control_use (cont_stmt
);
4142 vback
= gimple_omp_continue_control_def (cont_stmt
);
4144 if (!gimple_omp_for_combined_p (fd
->for_stmt
))
4146 if (POINTER_TYPE_P (type
))
4147 t
= fold_build_pointer_plus (vmain
, step
);
4149 t
= fold_build2 (PLUS_EXPR
, type
, vmain
, step
);
4150 if (DECL_P (vback
) && TREE_ADDRESSABLE (vback
))
4151 t
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
4152 true, GSI_SAME_STMT
);
4153 assign_stmt
= gimple_build_assign (vback
, t
);
4154 gsi_insert_before (&gsi
, assign_stmt
, GSI_SAME_STMT
);
4156 if (tree_int_cst_equal (fd
->chunk_size
, integer_one_node
))
4157 t
= build2 (EQ_EXPR
, boolean_type_node
,
4158 build_int_cst (itype
, 0),
4159 build_int_cst (itype
, 1));
4161 t
= build2 (fd
->loop
.cond_code
, boolean_type_node
,
4162 DECL_P (vback
) && TREE_ADDRESSABLE (vback
)
4164 gsi_insert_before (&gsi
, gimple_build_cond_empty (t
), GSI_SAME_STMT
);
4167 /* Remove GIMPLE_OMP_CONTINUE. */
4168 gsi_remove (&gsi
, true);
4170 if (fd
->collapse
> 1 && !gimple_omp_for_combined_p (fd
->for_stmt
))
4171 collapse_bb
= extract_omp_for_update_vars (fd
, cont_bb
, body_bb
);
4173 /* Trip update code goes into TRIP_UPDATE_BB. */
4174 gsi
= gsi_start_bb (trip_update_bb
);
4176 t
= build_int_cst (itype
, 1);
4177 t
= build2 (PLUS_EXPR
, itype
, trip_main
, t
);
4178 assign_stmt
= gimple_build_assign (trip_back
, t
);
4179 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
4182 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
4183 gsi
= gsi_last_bb (exit_bb
);
4184 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi
)))
4186 t
= gimple_omp_return_lhs (gsi_stmt (gsi
));
4187 gsi_insert_after (&gsi
, omp_build_barrier (t
), GSI_SAME_STMT
);
4189 gsi_remove (&gsi
, true);
4191 /* Connect the new blocks. */
4192 find_edge (iter_part_bb
, seq_start_bb
)->flags
= EDGE_TRUE_VALUE
;
4193 find_edge (iter_part_bb
, fin_bb
)->flags
= EDGE_FALSE_VALUE
;
4197 se
= find_edge (cont_bb
, body_bb
);
4200 se
= BRANCH_EDGE (cont_bb
);
4201 gcc_assert (single_succ (se
->dest
) == body_bb
);
4203 if (gimple_omp_for_combined_p (fd
->for_stmt
))
4208 else if (fd
->collapse
> 1)
4211 se
= make_edge (cont_bb
, collapse_bb
, EDGE_TRUE_VALUE
);
4214 se
->flags
= EDGE_TRUE_VALUE
;
4215 find_edge (cont_bb
, trip_update_bb
)->flags
4216 = se
? EDGE_FALSE_VALUE
: EDGE_FALLTHRU
;
4218 redirect_edge_and_branch (single_succ_edge (trip_update_bb
),
4222 if (gimple_in_ssa_p (cfun
))
4230 gcc_assert (fd
->collapse
== 1 && !broken_loop
);
4232 /* When we redirect the edge from trip_update_bb to iter_part_bb, we
4233 remove arguments of the phi nodes in fin_bb. We need to create
4234 appropriate phi nodes in iter_part_bb instead. */
4235 se
= find_edge (iter_part_bb
, fin_bb
);
4236 re
= single_succ_edge (trip_update_bb
);
4237 vec
<edge_var_map
> *head
= redirect_edge_var_map_vector (re
);
4238 ene
= single_succ_edge (entry_bb
);
4240 psi
= gsi_start_phis (fin_bb
);
4241 for (i
= 0; !gsi_end_p (psi
) && head
->iterate (i
, &vm
);
4242 gsi_next (&psi
), ++i
)
4245 source_location locus
;
4248 if (operand_equal_p (gimple_phi_arg_def (phi
, 0),
4249 redirect_edge_var_map_def (vm
), 0))
4252 t
= gimple_phi_result (phi
);
4253 gcc_assert (t
== redirect_edge_var_map_result (vm
));
4255 if (!single_pred_p (fin_bb
))
4256 t
= copy_ssa_name (t
, phi
);
4258 nphi
= create_phi_node (t
, iter_part_bb
);
4260 t
= PHI_ARG_DEF_FROM_EDGE (phi
, se
);
4261 locus
= gimple_phi_arg_location_from_edge (phi
, se
);
4263 /* A special case -- fd->loop.v is not yet computed in
4264 iter_part_bb, we need to use vextra instead. */
4265 if (t
== fd
->loop
.v
)
4267 add_phi_arg (nphi
, t
, ene
, locus
);
4268 locus
= redirect_edge_var_map_location (vm
);
4269 tree back_arg
= redirect_edge_var_map_def (vm
);
4270 add_phi_arg (nphi
, back_arg
, re
, locus
);
4271 edge ce
= find_edge (cont_bb
, body_bb
);
4274 ce
= BRANCH_EDGE (cont_bb
);
4275 gcc_assert (single_succ (ce
->dest
) == body_bb
);
4276 ce
= single_succ_edge (ce
->dest
);
4278 gphi
*inner_loop_phi
= find_phi_with_arg_on_edge (back_arg
, ce
);
4279 gcc_assert (inner_loop_phi
!= NULL
);
4280 add_phi_arg (inner_loop_phi
, gimple_phi_result (nphi
),
4281 find_edge (seq_start_bb
, body_bb
), locus
);
4283 if (!single_pred_p (fin_bb
))
4284 add_phi_arg (phi
, gimple_phi_result (nphi
), se
, locus
);
4286 gcc_assert (gsi_end_p (psi
) && (head
== NULL
|| i
== head
->length ()));
4287 redirect_edge_var_map_clear (re
);
4288 if (single_pred_p (fin_bb
))
4291 psi
= gsi_start_phis (fin_bb
);
4292 if (gsi_end_p (psi
))
4294 remove_phi_node (&psi
, false);
4297 /* Make phi node for trip. */
4298 phi
= create_phi_node (trip_main
, iter_part_bb
);
4299 add_phi_arg (phi
, trip_back
, single_succ_edge (trip_update_bb
),
4301 add_phi_arg (phi
, trip_init
, single_succ_edge (entry_bb
),
4306 set_immediate_dominator (CDI_DOMINATORS
, trip_update_bb
, cont_bb
);
4307 set_immediate_dominator (CDI_DOMINATORS
, iter_part_bb
,
4308 recompute_dominator (CDI_DOMINATORS
, iter_part_bb
));
4309 set_immediate_dominator (CDI_DOMINATORS
, fin_bb
,
4310 recompute_dominator (CDI_DOMINATORS
, fin_bb
));
4311 set_immediate_dominator (CDI_DOMINATORS
, seq_start_bb
,
4312 recompute_dominator (CDI_DOMINATORS
, seq_start_bb
));
4313 set_immediate_dominator (CDI_DOMINATORS
, body_bb
,
4314 recompute_dominator (CDI_DOMINATORS
, body_bb
));
4318 struct loop
*loop
= body_bb
->loop_father
;
4319 struct loop
*trip_loop
= alloc_loop ();
4320 trip_loop
->header
= iter_part_bb
;
4321 trip_loop
->latch
= trip_update_bb
;
4322 add_loop (trip_loop
, iter_part_bb
->loop_father
);
4324 if (loop
!= entry_bb
->loop_father
)
4326 gcc_assert (loop
->header
== body_bb
);
4327 gcc_assert (loop
->latch
== region
->cont
4328 || single_pred (loop
->latch
) == region
->cont
);
4329 trip_loop
->inner
= loop
;
4333 if (!gimple_omp_for_combined_p (fd
->for_stmt
))
4335 loop
= alloc_loop ();
4336 loop
->header
= body_bb
;
4337 if (collapse_bb
== NULL
)
4338 loop
->latch
= cont_bb
;
4339 add_loop (loop
, trip_loop
);
4344 /* A subroutine of expand_omp_for. Generate code for _Cilk_for loop.
4346 for (V = N1; V cond N2; V += STEP) BODY;
4348 where COND is "<" or ">" or "!=", we generate pseudocode
4350 for (ind_var = low; ind_var < high; ind_var++)
4352 V = n1 + (ind_var * STEP)
4357 In the above pseudocode, low and high are function parameters of the
4358 child function. In the function below, we are inserting a temp.
4359 variable that will be making a call to two OMP functions that will not be
4360 found in the body of _Cilk_for (since OMP_FOR cannot be mixed
4361 with _Cilk_for). These functions are replaced with low and high
4362 by the function that handles taskreg. */
4366 expand_cilk_for (struct omp_region
*region
, struct omp_for_data
*fd
)
4368 bool broken_loop
= region
->cont
== NULL
;
4369 basic_block entry_bb
= region
->entry
;
4370 basic_block cont_bb
= region
->cont
;
4372 gcc_assert (EDGE_COUNT (entry_bb
->succs
) == 2);
4373 gcc_assert (broken_loop
4374 || BRANCH_EDGE (entry_bb
)->dest
== FALLTHRU_EDGE (cont_bb
)->dest
);
4375 basic_block l0_bb
= FALLTHRU_EDGE (entry_bb
)->dest
;
4376 basic_block l1_bb
, l2_bb
;
4380 gcc_assert (BRANCH_EDGE (cont_bb
)->dest
== l0_bb
);
4381 gcc_assert (EDGE_COUNT (cont_bb
->succs
) == 2);
4382 l1_bb
= split_block (cont_bb
, last_stmt (cont_bb
))->dest
;
4383 l2_bb
= BRANCH_EDGE (entry_bb
)->dest
;
4387 BRANCH_EDGE (entry_bb
)->flags
&= ~EDGE_ABNORMAL
;
4388 l1_bb
= split_edge (BRANCH_EDGE (entry_bb
));
4389 l2_bb
= single_succ (l1_bb
);
4391 basic_block exit_bb
= region
->exit
;
4392 basic_block l2_dom_bb
= NULL
;
4394 gimple_stmt_iterator gsi
= gsi_last_bb (entry_bb
);
4396 /* Below statements until the "tree high_val = ..." are pseudo statements
4397 used to pass information to be used by expand_omp_taskreg.
4398 low_val and high_val will be replaced by the __low and __high
4399 parameter from the child function.
4401 The call_exprs part is a place-holder, it is mainly used
4402 to distinctly identify to the top-level part that this is
4403 where we should put low and high (reasoning given in header
4406 gomp_parallel
*par_stmt
4407 = as_a
<gomp_parallel
*> (last_stmt (region
->outer
->entry
));
4408 tree child_fndecl
= gimple_omp_parallel_child_fn (par_stmt
);
4409 tree t
, low_val
= NULL_TREE
, high_val
= NULL_TREE
;
4410 for (t
= DECL_ARGUMENTS (child_fndecl
); t
; t
= TREE_CHAIN (t
))
4412 if (id_equal (DECL_NAME (t
), "__high"))
4414 else if (id_equal (DECL_NAME (t
), "__low"))
4417 gcc_assert (low_val
&& high_val
);
4419 tree type
= TREE_TYPE (low_val
);
4420 tree ind_var
= create_tmp_reg (type
, "__cilk_ind_var");
4421 gcc_assert (gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_FOR
);
4423 /* Not needed in SSA form right now. */
4424 gcc_assert (!gimple_in_ssa_p (cfun
));
4425 if (l2_dom_bb
== NULL
)
4431 gimple
*stmt
= gimple_build_assign (ind_var
, n1
);
4433 /* Replace the GIMPLE_OMP_FOR statement. */
4434 gsi_replace (&gsi
, stmt
, true);
4438 /* Code to control the increment goes in the CONT_BB. */
4439 gsi
= gsi_last_bb (cont_bb
);
4440 stmt
= gsi_stmt (gsi
);
4441 gcc_assert (gimple_code (stmt
) == GIMPLE_OMP_CONTINUE
);
4442 stmt
= gimple_build_assign (ind_var
, PLUS_EXPR
, ind_var
,
4443 build_one_cst (type
));
4445 /* Replace GIMPLE_OMP_CONTINUE. */
4446 gsi_replace (&gsi
, stmt
, true);
4449 /* Emit the condition in L1_BB. */
4450 gsi
= gsi_after_labels (l1_bb
);
4451 t
= fold_build2 (MULT_EXPR
, TREE_TYPE (fd
->loop
.step
),
4452 fold_convert (TREE_TYPE (fd
->loop
.step
), ind_var
),
4454 if (POINTER_TYPE_P (TREE_TYPE (fd
->loop
.n1
)))
4455 t
= fold_build2 (POINTER_PLUS_EXPR
, TREE_TYPE (fd
->loop
.n1
),
4456 fd
->loop
.n1
, fold_convert (sizetype
, t
));
4458 t
= fold_build2 (PLUS_EXPR
, TREE_TYPE (fd
->loop
.n1
),
4459 fd
->loop
.n1
, fold_convert (TREE_TYPE (fd
->loop
.n1
), t
));
4460 t
= fold_convert (TREE_TYPE (fd
->loop
.v
), t
);
4461 expand_omp_build_assign (&gsi
, fd
->loop
.v
, t
);
4463 /* The condition is always '<' since the runtime will fill in the low
4465 stmt
= gimple_build_cond (LT_EXPR
, ind_var
, n2
, NULL_TREE
, NULL_TREE
);
4466 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
4468 /* Remove GIMPLE_OMP_RETURN. */
4469 gsi
= gsi_last_bb (exit_bb
);
4470 gsi_remove (&gsi
, true);
4472 /* Connect the new blocks. */
4473 remove_edge (FALLTHRU_EDGE (entry_bb
));
4478 remove_edge (BRANCH_EDGE (entry_bb
));
4479 make_edge (entry_bb
, l1_bb
, EDGE_FALLTHRU
);
4481 e
= BRANCH_EDGE (l1_bb
);
4482 ne
= FALLTHRU_EDGE (l1_bb
);
4483 e
->flags
= EDGE_TRUE_VALUE
;
4487 single_succ_edge (entry_bb
)->flags
= EDGE_FALLTHRU
;
4489 ne
= single_succ_edge (l1_bb
);
4490 e
= make_edge (l1_bb
, l0_bb
, EDGE_TRUE_VALUE
);
4493 ne
->flags
= EDGE_FALSE_VALUE
;
4494 e
->probability
= profile_probability::guessed_always ().apply_scale (7, 8);
4495 ne
->probability
= e
->probability
.invert ();
4497 set_immediate_dominator (CDI_DOMINATORS
, l1_bb
, entry_bb
);
4498 set_immediate_dominator (CDI_DOMINATORS
, l2_bb
, l2_dom_bb
);
4499 set_immediate_dominator (CDI_DOMINATORS
, l0_bb
, l1_bb
);
4503 struct loop
*loop
= alloc_loop ();
4504 loop
->header
= l1_bb
;
4505 loop
->latch
= cont_bb
;
4506 add_loop (loop
, l1_bb
->loop_father
);
4507 loop
->safelen
= INT_MAX
;
4510 /* Pick the correct library function based on the precision of the
4511 induction variable type. */
4512 tree lib_fun
= NULL_TREE
;
4513 if (TYPE_PRECISION (type
) == 32)
4514 lib_fun
= cilk_for_32_fndecl
;
4515 else if (TYPE_PRECISION (type
) == 64)
4516 lib_fun
= cilk_for_64_fndecl
;
4520 gcc_assert (fd
->sched_kind
== OMP_CLAUSE_SCHEDULE_CILKFOR
);
4522 /* WS_ARGS contains the library function flavor to call:
4523 __libcilkrts_cilk_for_64 or __libcilkrts_cilk_for_32), and the
4524 user-defined grain value. If the user does not define one, then zero
4525 is passed in by the parser. */
4526 vec_alloc (region
->ws_args
, 2);
4527 region
->ws_args
->quick_push (lib_fun
);
4528 region
->ws_args
->quick_push (fd
->chunk_size
);
4531 /* A subroutine of expand_omp_for. Generate code for a simd non-worksharing
4532 loop. Given parameters:
4534 for (V = N1; V cond N2; V += STEP) BODY;
4536 where COND is "<" or ">", we generate pseudocode
4544 if (V cond N2) goto L0; else goto L2;
4547 For collapsed loops, given parameters:
4549 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
4550 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
4551 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
4554 we generate pseudocode
4560 count3 = (adj + N32 - N31) / STEP3;
4565 count2 = (adj + N22 - N21) / STEP2;
4570 count1 = (adj + N12 - N11) / STEP1;
4571 count = count1 * count2 * count3;
4581 V2 += (V3 cond3 N32) ? 0 : STEP2;
4582 V3 = (V3 cond3 N32) ? V3 : N31;
4583 V1 += (V2 cond2 N22) ? 0 : STEP1;
4584 V2 = (V2 cond2 N22) ? V2 : N21;
4586 if (V < count) goto L0; else goto L2;
4592 expand_omp_simd (struct omp_region
*region
, struct omp_for_data
*fd
)
4595 basic_block entry_bb
, cont_bb
, exit_bb
, l0_bb
, l1_bb
, l2_bb
, l2_dom_bb
;
4596 gimple_stmt_iterator gsi
;
4599 bool broken_loop
= region
->cont
== NULL
;
4601 tree
*counts
= NULL
;
4603 int safelen_int
= INT_MAX
;
4604 tree safelen
= omp_find_clause (gimple_omp_for_clauses (fd
->for_stmt
),
4605 OMP_CLAUSE_SAFELEN
);
4606 tree simduid
= omp_find_clause (gimple_omp_for_clauses (fd
->for_stmt
),
4607 OMP_CLAUSE__SIMDUID_
);
4612 safelen
= OMP_CLAUSE_SAFELEN_EXPR (safelen
);
4613 if (TREE_CODE (safelen
) != INTEGER_CST
)
4615 else if (tree_fits_uhwi_p (safelen
) && tree_to_uhwi (safelen
) < INT_MAX
)
4616 safelen_int
= tree_to_uhwi (safelen
);
4617 if (safelen_int
== 1)
4620 type
= TREE_TYPE (fd
->loop
.v
);
4621 entry_bb
= region
->entry
;
4622 cont_bb
= region
->cont
;
4623 gcc_assert (EDGE_COUNT (entry_bb
->succs
) == 2);
4624 gcc_assert (broken_loop
4625 || BRANCH_EDGE (entry_bb
)->dest
== FALLTHRU_EDGE (cont_bb
)->dest
);
4626 l0_bb
= FALLTHRU_EDGE (entry_bb
)->dest
;
4629 gcc_assert (BRANCH_EDGE (cont_bb
)->dest
== l0_bb
);
4630 gcc_assert (EDGE_COUNT (cont_bb
->succs
) == 2);
4631 l1_bb
= split_block (cont_bb
, last_stmt (cont_bb
))->dest
;
4632 l2_bb
= BRANCH_EDGE (entry_bb
)->dest
;
4636 BRANCH_EDGE (entry_bb
)->flags
&= ~EDGE_ABNORMAL
;
4637 l1_bb
= split_edge (BRANCH_EDGE (entry_bb
));
4638 l2_bb
= single_succ (l1_bb
);
4640 exit_bb
= region
->exit
;
4643 gsi
= gsi_last_bb (entry_bb
);
4645 gcc_assert (gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_FOR
);
4646 /* Not needed in SSA form right now. */
4647 gcc_assert (!gimple_in_ssa_p (cfun
));
4648 if (fd
->collapse
> 1)
4650 int first_zero_iter
= -1, dummy
= -1;
4651 basic_block zero_iter_bb
= l2_bb
, dummy_bb
= NULL
;
4653 counts
= XALLOCAVEC (tree
, fd
->collapse
);
4654 expand_omp_for_init_counts (fd
, &gsi
, entry_bb
, counts
,
4655 zero_iter_bb
, first_zero_iter
,
4656 dummy_bb
, dummy
, l2_dom_bb
);
4658 if (l2_dom_bb
== NULL
)
4663 if (gimple_omp_for_combined_into_p (fd
->for_stmt
))
4665 tree innerc
= omp_find_clause (gimple_omp_for_clauses (fd
->for_stmt
),
4666 OMP_CLAUSE__LOOPTEMP_
);
4667 gcc_assert (innerc
);
4668 n1
= OMP_CLAUSE_DECL (innerc
);
4669 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
4670 OMP_CLAUSE__LOOPTEMP_
);
4671 gcc_assert (innerc
);
4672 n2
= OMP_CLAUSE_DECL (innerc
);
4674 tree step
= fd
->loop
.step
;
4676 bool is_simt
= omp_find_clause (gimple_omp_for_clauses (fd
->for_stmt
),
4680 cfun
->curr_properties
&= ~PROP_gimple_lomp_dev
;
4681 is_simt
= safelen_int
> 1;
4683 tree simt_lane
= NULL_TREE
, simt_maxlane
= NULL_TREE
;
4686 simt_lane
= create_tmp_var (unsigned_type_node
);
4687 gimple
*g
= gimple_build_call_internal (IFN_GOMP_SIMT_LANE
, 0);
4688 gimple_call_set_lhs (g
, simt_lane
);
4689 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
4690 tree offset
= fold_build2 (MULT_EXPR
, TREE_TYPE (step
), step
,
4691 fold_convert (TREE_TYPE (step
), simt_lane
));
4692 n1
= fold_convert (type
, n1
);
4693 if (POINTER_TYPE_P (type
))
4694 n1
= fold_build_pointer_plus (n1
, offset
);
4696 n1
= fold_build2 (PLUS_EXPR
, type
, n1
, fold_convert (type
, offset
));
4698 /* Collapsed loops not handled for SIMT yet: limit to one lane only. */
4699 if (fd
->collapse
> 1)
4700 simt_maxlane
= build_one_cst (unsigned_type_node
);
4701 else if (safelen_int
< omp_max_simt_vf ())
4702 simt_maxlane
= build_int_cst (unsigned_type_node
, safelen_int
);
4704 = build_call_expr_internal_loc (UNKNOWN_LOCATION
, IFN_GOMP_SIMT_VF
,
4705 unsigned_type_node
, 0);
4707 vf
= fold_build2 (MIN_EXPR
, unsigned_type_node
, vf
, simt_maxlane
);
4708 vf
= fold_convert (TREE_TYPE (step
), vf
);
4709 step
= fold_build2 (MULT_EXPR
, TREE_TYPE (step
), step
, vf
);
4712 expand_omp_build_assign (&gsi
, fd
->loop
.v
, fold_convert (type
, n1
));
4713 if (fd
->collapse
> 1)
4715 if (gimple_omp_for_combined_into_p (fd
->for_stmt
))
4718 expand_omp_for_init_vars (fd
, &gsi
, counts
, NULL
, n1
);
4722 for (i
= 0; i
< fd
->collapse
; i
++)
4724 tree itype
= TREE_TYPE (fd
->loops
[i
].v
);
4725 if (POINTER_TYPE_P (itype
))
4726 itype
= signed_type_for (itype
);
4727 t
= fold_convert (TREE_TYPE (fd
->loops
[i
].v
), fd
->loops
[i
].n1
);
4728 expand_omp_build_assign (&gsi
, fd
->loops
[i
].v
, t
);
4732 /* Remove the GIMPLE_OMP_FOR statement. */
4733 gsi_remove (&gsi
, true);
4737 /* Code to control the increment goes in the CONT_BB. */
4738 gsi
= gsi_last_bb (cont_bb
);
4739 stmt
= gsi_stmt (gsi
);
4740 gcc_assert (gimple_code (stmt
) == GIMPLE_OMP_CONTINUE
);
4742 if (POINTER_TYPE_P (type
))
4743 t
= fold_build_pointer_plus (fd
->loop
.v
, step
);
4745 t
= fold_build2 (PLUS_EXPR
, type
, fd
->loop
.v
, step
);
4746 expand_omp_build_assign (&gsi
, fd
->loop
.v
, t
);
4748 if (fd
->collapse
> 1)
4750 i
= fd
->collapse
- 1;
4751 if (POINTER_TYPE_P (TREE_TYPE (fd
->loops
[i
].v
)))
4753 t
= fold_convert (sizetype
, fd
->loops
[i
].step
);
4754 t
= fold_build_pointer_plus (fd
->loops
[i
].v
, t
);
4758 t
= fold_convert (TREE_TYPE (fd
->loops
[i
].v
),
4760 t
= fold_build2 (PLUS_EXPR
, TREE_TYPE (fd
->loops
[i
].v
),
4763 expand_omp_build_assign (&gsi
, fd
->loops
[i
].v
, t
);
4765 for (i
= fd
->collapse
- 1; i
> 0; i
--)
4767 tree itype
= TREE_TYPE (fd
->loops
[i
].v
);
4768 tree itype2
= TREE_TYPE (fd
->loops
[i
- 1].v
);
4769 if (POINTER_TYPE_P (itype2
))
4770 itype2
= signed_type_for (itype2
);
4771 t
= fold_convert (itype2
, fd
->loops
[i
- 1].step
);
4772 t
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
, true,
4774 t
= build3 (COND_EXPR
, itype2
,
4775 build2 (fd
->loops
[i
].cond_code
, boolean_type_node
,
4777 fold_convert (itype
, fd
->loops
[i
].n2
)),
4778 build_int_cst (itype2
, 0), t
);
4779 if (POINTER_TYPE_P (TREE_TYPE (fd
->loops
[i
- 1].v
)))
4780 t
= fold_build_pointer_plus (fd
->loops
[i
- 1].v
, t
);
4782 t
= fold_build2 (PLUS_EXPR
, itype2
, fd
->loops
[i
- 1].v
, t
);
4783 expand_omp_build_assign (&gsi
, fd
->loops
[i
- 1].v
, t
);
4785 t
= fold_convert (itype
, fd
->loops
[i
].n1
);
4786 t
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
, true,
4788 t
= build3 (COND_EXPR
, itype
,
4789 build2 (fd
->loops
[i
].cond_code
, boolean_type_node
,
4791 fold_convert (itype
, fd
->loops
[i
].n2
)),
4793 expand_omp_build_assign (&gsi
, fd
->loops
[i
].v
, t
);
4797 /* Remove GIMPLE_OMP_CONTINUE. */
4798 gsi_remove (&gsi
, true);
4801 /* Emit the condition in L1_BB. */
4802 gsi
= gsi_start_bb (l1_bb
);
4804 t
= fold_convert (type
, n2
);
4805 t
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
4806 false, GSI_CONTINUE_LINKING
);
4807 tree v
= fd
->loop
.v
;
4808 if (DECL_P (v
) && TREE_ADDRESSABLE (v
))
4809 v
= force_gimple_operand_gsi (&gsi
, v
, true, NULL_TREE
,
4810 false, GSI_CONTINUE_LINKING
);
4811 t
= build2 (fd
->loop
.cond_code
, boolean_type_node
, v
, t
);
4812 cond_stmt
= gimple_build_cond_empty (t
);
4813 gsi_insert_after (&gsi
, cond_stmt
, GSI_CONTINUE_LINKING
);
4814 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt
), expand_omp_regimplify_p
,
4816 || walk_tree (gimple_cond_rhs_ptr (cond_stmt
), expand_omp_regimplify_p
,
4819 gsi
= gsi_for_stmt (cond_stmt
);
4820 gimple_regimplify_operands (cond_stmt
, &gsi
);
4823 /* Add 'V -= STEP * (SIMT_VF - 1)' after the loop. */
4826 gsi
= gsi_start_bb (l2_bb
);
4827 step
= fold_build2 (MINUS_EXPR
, TREE_TYPE (step
), fd
->loop
.step
, step
);
4828 if (POINTER_TYPE_P (type
))
4829 t
= fold_build_pointer_plus (fd
->loop
.v
, step
);
4831 t
= fold_build2 (PLUS_EXPR
, type
, fd
->loop
.v
, step
);
4832 expand_omp_build_assign (&gsi
, fd
->loop
.v
, t
);
4835 /* Remove GIMPLE_OMP_RETURN. */
4836 gsi
= gsi_last_bb (exit_bb
);
4837 gsi_remove (&gsi
, true);
4839 /* Connect the new blocks. */
4840 remove_edge (FALLTHRU_EDGE (entry_bb
));
4844 remove_edge (BRANCH_EDGE (entry_bb
));
4845 make_edge (entry_bb
, l1_bb
, EDGE_FALLTHRU
);
4847 e
= BRANCH_EDGE (l1_bb
);
4848 ne
= FALLTHRU_EDGE (l1_bb
);
4849 e
->flags
= EDGE_TRUE_VALUE
;
4853 single_succ_edge (entry_bb
)->flags
= EDGE_FALLTHRU
;
4855 ne
= single_succ_edge (l1_bb
);
4856 e
= make_edge (l1_bb
, l0_bb
, EDGE_TRUE_VALUE
);
4859 ne
->flags
= EDGE_FALSE_VALUE
;
4860 e
->probability
= profile_probability::guessed_always ().apply_scale (7, 8);
4861 ne
->probability
= e
->probability
.invert ();
4863 set_immediate_dominator (CDI_DOMINATORS
, l1_bb
, entry_bb
);
4864 set_immediate_dominator (CDI_DOMINATORS
, l0_bb
, l1_bb
);
4868 cond_stmt
= gimple_build_cond (LT_EXPR
, simt_lane
, simt_maxlane
,
4869 NULL_TREE
, NULL_TREE
);
4870 gsi
= gsi_last_bb (entry_bb
);
4871 gsi_insert_after (&gsi
, cond_stmt
, GSI_NEW_STMT
);
4872 make_edge (entry_bb
, l2_bb
, EDGE_FALSE_VALUE
);
4873 FALLTHRU_EDGE (entry_bb
)->flags
= EDGE_TRUE_VALUE
;
4874 FALLTHRU_EDGE (entry_bb
)->probability
4875 = profile_probability::guessed_always ().apply_scale (7, 8);
4876 BRANCH_EDGE (entry_bb
)->probability
4877 = FALLTHRU_EDGE (entry_bb
)->probability
.invert ();
4878 l2_dom_bb
= entry_bb
;
4880 set_immediate_dominator (CDI_DOMINATORS
, l2_bb
, l2_dom_bb
);
4884 struct loop
*loop
= alloc_loop ();
4885 loop
->header
= l1_bb
;
4886 loop
->latch
= cont_bb
;
4887 add_loop (loop
, l1_bb
->loop_father
);
4888 loop
->safelen
= safelen_int
;
4891 loop
->simduid
= OMP_CLAUSE__SIMDUID__DECL (simduid
);
4892 cfun
->has_simduid_loops
= true;
4894 /* If not -fno-tree-loop-vectorize, hint that we want to vectorize
4896 if ((flag_tree_loop_vectorize
4897 || !global_options_set
.x_flag_tree_loop_vectorize
)
4898 && flag_tree_loop_optimize
4899 && loop
->safelen
> 1)
4901 loop
->force_vectorize
= true;
4902 cfun
->has_force_vectorize_loops
= true;
4906 cfun
->has_simduid_loops
= true;
4909 /* Taskloop construct is represented after gimplification with
4910 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
4911 in between them. This routine expands the outer GIMPLE_OMP_FOR,
4912 which should just compute all the needed loop temporaries
4913 for GIMPLE_OMP_TASK. */
4916 expand_omp_taskloop_for_outer (struct omp_region
*region
,
4917 struct omp_for_data
*fd
,
4920 tree type
, bias
= NULL_TREE
;
4921 basic_block entry_bb
, cont_bb
, exit_bb
;
4922 gimple_stmt_iterator gsi
;
4923 gassign
*assign_stmt
;
4924 tree
*counts
= NULL
;
4927 gcc_assert (inner_stmt
);
4928 gcc_assert (region
->cont
);
4929 gcc_assert (gimple_code (inner_stmt
) == GIMPLE_OMP_TASK
4930 && gimple_omp_task_taskloop_p (inner_stmt
));
4931 type
= TREE_TYPE (fd
->loop
.v
);
4933 /* See if we need to bias by LLONG_MIN. */
4934 if (fd
->iter_type
== long_long_unsigned_type_node
4935 && TREE_CODE (type
) == INTEGER_TYPE
4936 && !TYPE_UNSIGNED (type
))
4940 if (fd
->loop
.cond_code
== LT_EXPR
)
4943 n2
= fold_build2 (PLUS_EXPR
, type
, fd
->loop
.n2
, fd
->loop
.step
);
4947 n1
= fold_build2 (MINUS_EXPR
, type
, fd
->loop
.n2
, fd
->loop
.step
);
4950 if (TREE_CODE (n1
) != INTEGER_CST
4951 || TREE_CODE (n2
) != INTEGER_CST
4952 || ((tree_int_cst_sgn (n1
) < 0) ^ (tree_int_cst_sgn (n2
) < 0)))
4953 bias
= fold_convert (fd
->iter_type
, TYPE_MIN_VALUE (type
));
4956 entry_bb
= region
->entry
;
4957 cont_bb
= region
->cont
;
4958 gcc_assert (EDGE_COUNT (entry_bb
->succs
) == 2);
4959 gcc_assert (BRANCH_EDGE (entry_bb
)->dest
== FALLTHRU_EDGE (cont_bb
)->dest
);
4960 exit_bb
= region
->exit
;
4962 gsi
= gsi_last_bb (entry_bb
);
4963 gimple
*for_stmt
= gsi_stmt (gsi
);
4964 gcc_assert (gimple_code (for_stmt
) == GIMPLE_OMP_FOR
);
4965 if (fd
->collapse
> 1)
4967 int first_zero_iter
= -1, dummy
= -1;
4968 basic_block zero_iter_bb
= NULL
, dummy_bb
= NULL
, l2_dom_bb
= NULL
;
4970 counts
= XALLOCAVEC (tree
, fd
->collapse
);
4971 expand_omp_for_init_counts (fd
, &gsi
, entry_bb
, counts
,
4972 zero_iter_bb
, first_zero_iter
,
4973 dummy_bb
, dummy
, l2_dom_bb
);
4977 /* Some counts[i] vars might be uninitialized if
4978 some loop has zero iterations. But the body shouldn't
4979 be executed in that case, so just avoid uninit warnings. */
4980 for (i
= first_zero_iter
; i
< fd
->collapse
; i
++)
4981 if (SSA_VAR_P (counts
[i
]))
4982 TREE_NO_WARNING (counts
[i
]) = 1;
4984 edge e
= split_block (entry_bb
, gsi_stmt (gsi
));
4986 make_edge (zero_iter_bb
, entry_bb
, EDGE_FALLTHRU
);
4987 gsi
= gsi_last_bb (entry_bb
);
4988 set_immediate_dominator (CDI_DOMINATORS
, entry_bb
,
4989 get_immediate_dominator (CDI_DOMINATORS
,
4997 if (POINTER_TYPE_P (TREE_TYPE (t0
))
4998 && TYPE_PRECISION (TREE_TYPE (t0
))
4999 != TYPE_PRECISION (fd
->iter_type
))
5001 /* Avoid casting pointers to integer of a different size. */
5002 tree itype
= signed_type_for (type
);
5003 t1
= fold_convert (fd
->iter_type
, fold_convert (itype
, t1
));
5004 t0
= fold_convert (fd
->iter_type
, fold_convert (itype
, t0
));
5008 t1
= fold_convert (fd
->iter_type
, t1
);
5009 t0
= fold_convert (fd
->iter_type
, t0
);
5013 t1
= fold_build2 (PLUS_EXPR
, fd
->iter_type
, t1
, bias
);
5014 t0
= fold_build2 (PLUS_EXPR
, fd
->iter_type
, t0
, bias
);
5017 tree innerc
= omp_find_clause (gimple_omp_task_clauses (inner_stmt
),
5018 OMP_CLAUSE__LOOPTEMP_
);
5019 gcc_assert (innerc
);
5020 tree startvar
= OMP_CLAUSE_DECL (innerc
);
5021 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
), OMP_CLAUSE__LOOPTEMP_
);
5022 gcc_assert (innerc
);
5023 tree endvar
= OMP_CLAUSE_DECL (innerc
);
5024 if (fd
->collapse
> 1 && TREE_CODE (fd
->loop
.n2
) != INTEGER_CST
)
5026 gcc_assert (innerc
);
5027 for (i
= 1; i
< fd
->collapse
; i
++)
5029 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
5030 OMP_CLAUSE__LOOPTEMP_
);
5031 gcc_assert (innerc
);
5033 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
5034 OMP_CLAUSE__LOOPTEMP_
);
5037 /* If needed (inner taskloop has lastprivate clause), propagate
5038 down the total number of iterations. */
5039 tree t
= force_gimple_operand_gsi (&gsi
, fd
->loop
.n2
, false,
5041 GSI_CONTINUE_LINKING
);
5042 assign_stmt
= gimple_build_assign (OMP_CLAUSE_DECL (innerc
), t
);
5043 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
5047 t0
= force_gimple_operand_gsi (&gsi
, t0
, false, NULL_TREE
, false,
5048 GSI_CONTINUE_LINKING
);
5049 assign_stmt
= gimple_build_assign (startvar
, t0
);
5050 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
5052 t1
= force_gimple_operand_gsi (&gsi
, t1
, false, NULL_TREE
, false,
5053 GSI_CONTINUE_LINKING
);
5054 assign_stmt
= gimple_build_assign (endvar
, t1
);
5055 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
5056 if (fd
->collapse
> 1)
5057 expand_omp_for_init_vars (fd
, &gsi
, counts
, inner_stmt
, startvar
);
5059 /* Remove the GIMPLE_OMP_FOR statement. */
5060 gsi
= gsi_for_stmt (for_stmt
);
5061 gsi_remove (&gsi
, true);
5063 gsi
= gsi_last_bb (cont_bb
);
5064 gsi_remove (&gsi
, true);
5066 gsi
= gsi_last_bb (exit_bb
);
5067 gsi_remove (&gsi
, true);
5069 FALLTHRU_EDGE (entry_bb
)->probability
= profile_probability::always ();
5070 remove_edge (BRANCH_EDGE (entry_bb
));
5071 FALLTHRU_EDGE (cont_bb
)->probability
= profile_probability::always ();
5072 remove_edge (BRANCH_EDGE (cont_bb
));
5073 set_immediate_dominator (CDI_DOMINATORS
, exit_bb
, cont_bb
);
5074 set_immediate_dominator (CDI_DOMINATORS
, region
->entry
,
5075 recompute_dominator (CDI_DOMINATORS
, region
->entry
));
5078 /* Taskloop construct is represented after gimplification with
5079 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
5080 in between them. This routine expands the inner GIMPLE_OMP_FOR.
5081 GOMP_taskloop{,_ull} function arranges for each task to be given just
5082 a single range of iterations. */
5085 expand_omp_taskloop_for_inner (struct omp_region
*region
,
5086 struct omp_for_data
*fd
,
5089 tree e
, t
, type
, itype
, vmain
, vback
, bias
= NULL_TREE
;
5090 basic_block entry_bb
, exit_bb
, body_bb
, cont_bb
, collapse_bb
= NULL
;
5092 gimple_stmt_iterator gsi
;
5094 bool broken_loop
= region
->cont
== NULL
;
5095 tree
*counts
= NULL
;
5098 itype
= type
= TREE_TYPE (fd
->loop
.v
);
5099 if (POINTER_TYPE_P (type
))
5100 itype
= signed_type_for (type
);
5102 /* See if we need to bias by LLONG_MIN. */
5103 if (fd
->iter_type
== long_long_unsigned_type_node
5104 && TREE_CODE (type
) == INTEGER_TYPE
5105 && !TYPE_UNSIGNED (type
))
5109 if (fd
->loop
.cond_code
== LT_EXPR
)
5112 n2
= fold_build2 (PLUS_EXPR
, type
, fd
->loop
.n2
, fd
->loop
.step
);
5116 n1
= fold_build2 (MINUS_EXPR
, type
, fd
->loop
.n2
, fd
->loop
.step
);
5119 if (TREE_CODE (n1
) != INTEGER_CST
5120 || TREE_CODE (n2
) != INTEGER_CST
5121 || ((tree_int_cst_sgn (n1
) < 0) ^ (tree_int_cst_sgn (n2
) < 0)))
5122 bias
= fold_convert (fd
->iter_type
, TYPE_MIN_VALUE (type
));
5125 entry_bb
= region
->entry
;
5126 cont_bb
= region
->cont
;
5127 gcc_assert (EDGE_COUNT (entry_bb
->succs
) == 2);
5128 fin_bb
= BRANCH_EDGE (entry_bb
)->dest
;
5129 gcc_assert (broken_loop
5130 || (fin_bb
== FALLTHRU_EDGE (cont_bb
)->dest
));
5131 body_bb
= FALLTHRU_EDGE (entry_bb
)->dest
;
5134 gcc_assert (BRANCH_EDGE (cont_bb
)->dest
== body_bb
);
5135 gcc_assert (EDGE_COUNT (cont_bb
->succs
) == 2);
5137 exit_bb
= region
->exit
;
5139 /* Iteration space partitioning goes in ENTRY_BB. */
5140 gsi
= gsi_last_bb (entry_bb
);
5141 gcc_assert (gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_FOR
);
5143 if (fd
->collapse
> 1)
5145 int first_zero_iter
= -1, dummy
= -1;
5146 basic_block l2_dom_bb
= NULL
, dummy_bb
= NULL
;
5148 counts
= XALLOCAVEC (tree
, fd
->collapse
);
5149 expand_omp_for_init_counts (fd
, &gsi
, entry_bb
, counts
,
5150 fin_bb
, first_zero_iter
,
5151 dummy_bb
, dummy
, l2_dom_bb
);
5155 t
= integer_one_node
;
5157 step
= fd
->loop
.step
;
5158 tree innerc
= omp_find_clause (gimple_omp_for_clauses (fd
->for_stmt
),
5159 OMP_CLAUSE__LOOPTEMP_
);
5160 gcc_assert (innerc
);
5161 n1
= OMP_CLAUSE_DECL (innerc
);
5162 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
), OMP_CLAUSE__LOOPTEMP_
);
5163 gcc_assert (innerc
);
5164 n2
= OMP_CLAUSE_DECL (innerc
);
5167 n1
= fold_build2 (PLUS_EXPR
, fd
->iter_type
, n1
, bias
);
5168 n2
= fold_build2 (PLUS_EXPR
, fd
->iter_type
, n2
, bias
);
5170 n1
= force_gimple_operand_gsi (&gsi
, fold_convert (type
, n1
),
5171 true, NULL_TREE
, true, GSI_SAME_STMT
);
5172 n2
= force_gimple_operand_gsi (&gsi
, fold_convert (itype
, n2
),
5173 true, NULL_TREE
, true, GSI_SAME_STMT
);
5174 step
= force_gimple_operand_gsi (&gsi
, fold_convert (itype
, step
),
5175 true, NULL_TREE
, true, GSI_SAME_STMT
);
5177 tree startvar
= fd
->loop
.v
;
5178 tree endvar
= NULL_TREE
;
5180 if (gimple_omp_for_combined_p (fd
->for_stmt
))
5182 tree clauses
= gimple_omp_for_clauses (inner_stmt
);
5183 tree innerc
= omp_find_clause (clauses
, OMP_CLAUSE__LOOPTEMP_
);
5184 gcc_assert (innerc
);
5185 startvar
= OMP_CLAUSE_DECL (innerc
);
5186 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
5187 OMP_CLAUSE__LOOPTEMP_
);
5188 gcc_assert (innerc
);
5189 endvar
= OMP_CLAUSE_DECL (innerc
);
5191 t
= fold_convert (TREE_TYPE (startvar
), n1
);
5192 t
= force_gimple_operand_gsi (&gsi
, t
,
5194 && TREE_ADDRESSABLE (startvar
),
5195 NULL_TREE
, false, GSI_CONTINUE_LINKING
);
5196 gimple
*assign_stmt
= gimple_build_assign (startvar
, t
);
5197 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
5199 t
= fold_convert (TREE_TYPE (startvar
), n2
);
5200 e
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
5201 false, GSI_CONTINUE_LINKING
);
5204 assign_stmt
= gimple_build_assign (endvar
, e
);
5205 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
5206 if (useless_type_conversion_p (TREE_TYPE (fd
->loop
.v
), TREE_TYPE (e
)))
5207 assign_stmt
= gimple_build_assign (fd
->loop
.v
, e
);
5209 assign_stmt
= gimple_build_assign (fd
->loop
.v
, NOP_EXPR
, e
);
5210 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
5212 if (fd
->collapse
> 1)
5213 expand_omp_for_init_vars (fd
, &gsi
, counts
, inner_stmt
, startvar
);
5217 /* The code controlling the sequential loop replaces the
5218 GIMPLE_OMP_CONTINUE. */
5219 gsi
= gsi_last_bb (cont_bb
);
5220 gomp_continue
*cont_stmt
= as_a
<gomp_continue
*> (gsi_stmt (gsi
));
5221 gcc_assert (gimple_code (cont_stmt
) == GIMPLE_OMP_CONTINUE
);
5222 vmain
= gimple_omp_continue_control_use (cont_stmt
);
5223 vback
= gimple_omp_continue_control_def (cont_stmt
);
5225 if (!gimple_omp_for_combined_p (fd
->for_stmt
))
5227 if (POINTER_TYPE_P (type
))
5228 t
= fold_build_pointer_plus (vmain
, step
);
5230 t
= fold_build2 (PLUS_EXPR
, type
, vmain
, step
);
5231 t
= force_gimple_operand_gsi (&gsi
, t
,
5233 && TREE_ADDRESSABLE (vback
),
5234 NULL_TREE
, true, GSI_SAME_STMT
);
5235 assign_stmt
= gimple_build_assign (vback
, t
);
5236 gsi_insert_before (&gsi
, assign_stmt
, GSI_SAME_STMT
);
5238 t
= build2 (fd
->loop
.cond_code
, boolean_type_node
,
5239 DECL_P (vback
) && TREE_ADDRESSABLE (vback
)
5241 gsi_insert_before (&gsi
, gimple_build_cond_empty (t
), GSI_SAME_STMT
);
5244 /* Remove the GIMPLE_OMP_CONTINUE statement. */
5245 gsi_remove (&gsi
, true);
5247 if (fd
->collapse
> 1 && !gimple_omp_for_combined_p (fd
->for_stmt
))
5248 collapse_bb
= extract_omp_for_update_vars (fd
, cont_bb
, body_bb
);
5251 /* Remove the GIMPLE_OMP_FOR statement. */
5252 gsi
= gsi_for_stmt (fd
->for_stmt
);
5253 gsi_remove (&gsi
, true);
5255 /* Remove the GIMPLE_OMP_RETURN statement. */
5256 gsi
= gsi_last_bb (exit_bb
);
5257 gsi_remove (&gsi
, true);
5259 FALLTHRU_EDGE (entry_bb
)->probability
= profile_probability::always ();
5261 remove_edge (BRANCH_EDGE (entry_bb
));
5264 remove_edge_and_dominated_blocks (BRANCH_EDGE (entry_bb
));
5265 region
->outer
->cont
= NULL
;
5268 /* Connect all the blocks. */
5271 ep
= find_edge (cont_bb
, body_bb
);
5272 if (gimple_omp_for_combined_p (fd
->for_stmt
))
5277 else if (fd
->collapse
> 1)
5280 ep
= make_edge (cont_bb
, collapse_bb
, EDGE_TRUE_VALUE
);
5283 ep
->flags
= EDGE_TRUE_VALUE
;
5284 find_edge (cont_bb
, fin_bb
)->flags
5285 = ep
? EDGE_FALSE_VALUE
: EDGE_FALLTHRU
;
5288 set_immediate_dominator (CDI_DOMINATORS
, body_bb
,
5289 recompute_dominator (CDI_DOMINATORS
, body_bb
));
5291 set_immediate_dominator (CDI_DOMINATORS
, fin_bb
,
5292 recompute_dominator (CDI_DOMINATORS
, fin_bb
));
5294 if (!broken_loop
&& !gimple_omp_for_combined_p (fd
->for_stmt
))
5296 struct loop
*loop
= alloc_loop ();
5297 loop
->header
= body_bb
;
5298 if (collapse_bb
== NULL
)
5299 loop
->latch
= cont_bb
;
5300 add_loop (loop
, body_bb
->loop_father
);
5304 /* A subroutine of expand_omp_for. Generate code for an OpenACC
5305 partitioned loop. The lowering here is abstracted, in that the
5306 loop parameters are passed through internal functions, which are
5307 further lowered by oacc_device_lower, once we get to the target
5308 compiler. The loop is of the form:
5310 for (V = B; V LTGT E; V += S) {BODY}
5312 where LTGT is < or >. We may have a specified chunking size, CHUNKING
5313 (constant 0 for no chunking) and we will have a GWV partitioning
5314 mask, specifying dimensions over which the loop is to be
5315 partitioned (see note below). We generate code that looks like
5316 (this ignores tiling):
5318 <entry_bb> [incoming FALL->body, BRANCH->exit]
5319 typedef signedintify (typeof (V)) T; // underlying signed integral type
5322 T DIR = LTGT == '<' ? +1 : -1;
5323 T chunk_max = GOACC_LOOP_CHUNK (dir, range, S, CHUNK_SIZE, GWV);
5324 T step = GOACC_LOOP_STEP (dir, range, S, CHUNK_SIZE, GWV);
5326 <head_bb> [created by splitting end of entry_bb]
5327 T offset = GOACC_LOOP_OFFSET (dir, range, S, CHUNK_SIZE, GWV, chunk_no);
5328 T bound = GOACC_LOOP_BOUND (dir, range, S, CHUNK_SIZE, GWV, offset);
5329 if (!(offset LTGT bound)) goto bottom_bb;
5331 <body_bb> [incoming]
5335 <cont_bb> [incoming, may == body_bb FALL->exit_bb, BRANCH->body_bb]
5337 if (offset LTGT bound) goto body_bb; [*]
5339 <bottom_bb> [created by splitting start of exit_bb] insert BRANCH->head_bb
5341 if (chunk < chunk_max) goto head_bb;
5343 <exit_bb> [incoming]
5344 V = B + ((range -/+ 1) / S +/- 1) * S [*]
5346 [*] Needed if V live at end of loop. */
5349 expand_oacc_for (struct omp_region
*region
, struct omp_for_data
*fd
)
5351 tree v
= fd
->loop
.v
;
5352 enum tree_code cond_code
= fd
->loop
.cond_code
;
5353 enum tree_code plus_code
= PLUS_EXPR
;
5355 tree chunk_size
= integer_minus_one_node
;
5356 tree gwv
= integer_zero_node
;
5357 tree iter_type
= TREE_TYPE (v
);
5358 tree diff_type
= iter_type
;
5359 tree plus_type
= iter_type
;
5360 struct oacc_collapse
*counts
= NULL
;
5362 gcc_checking_assert (gimple_omp_for_kind (fd
->for_stmt
)
5363 == GF_OMP_FOR_KIND_OACC_LOOP
);
5364 gcc_assert (!gimple_omp_for_combined_into_p (fd
->for_stmt
));
5365 gcc_assert (cond_code
== LT_EXPR
|| cond_code
== GT_EXPR
);
5367 if (POINTER_TYPE_P (iter_type
))
5369 plus_code
= POINTER_PLUS_EXPR
;
5370 plus_type
= sizetype
;
5372 if (POINTER_TYPE_P (diff_type
) || TYPE_UNSIGNED (diff_type
))
5373 diff_type
= signed_type_for (diff_type
);
5374 if (TYPE_PRECISION (diff_type
) < TYPE_PRECISION (integer_type_node
))
5375 diff_type
= integer_type_node
;
5377 basic_block entry_bb
= region
->entry
; /* BB ending in OMP_FOR */
5378 basic_block exit_bb
= region
->exit
; /* BB ending in OMP_RETURN */
5379 basic_block cont_bb
= region
->cont
; /* BB ending in OMP_CONTINUE */
5380 basic_block bottom_bb
= NULL
;
5382 /* entry_bb has two sucessors; the branch edge is to the exit
5383 block, fallthrough edge to body. */
5384 gcc_assert (EDGE_COUNT (entry_bb
->succs
) == 2
5385 && BRANCH_EDGE (entry_bb
)->dest
== exit_bb
);
5387 /* If cont_bb non-NULL, it has 2 successors. The branch successor is
5388 body_bb, or to a block whose only successor is the body_bb. Its
5389 fallthrough successor is the final block (same as the branch
5390 successor of the entry_bb). */
5393 basic_block body_bb
= FALLTHRU_EDGE (entry_bb
)->dest
;
5394 basic_block bed
= BRANCH_EDGE (cont_bb
)->dest
;
5396 gcc_assert (FALLTHRU_EDGE (cont_bb
)->dest
== exit_bb
);
5397 gcc_assert (bed
== body_bb
|| single_succ_edge (bed
)->dest
== body_bb
);
5400 gcc_assert (!gimple_in_ssa_p (cfun
));
5402 /* The exit block only has entry_bb and cont_bb as predecessors. */
5403 gcc_assert (EDGE_COUNT (exit_bb
->preds
) == 1 + (cont_bb
!= NULL
));
5406 tree chunk_max
= NULL_TREE
;
5408 tree step
= create_tmp_var (diff_type
, ".step");
5409 bool up
= cond_code
== LT_EXPR
;
5410 tree dir
= build_int_cst (diff_type
, up
? +1 : -1);
5411 bool chunking
= !gimple_in_ssa_p (cfun
);
5415 tree tile_size
= NULL_TREE
;
5416 tree element_s
= NULL_TREE
;
5417 tree e_bound
= NULL_TREE
, e_offset
= NULL_TREE
, e_step
= NULL_TREE
;
5418 basic_block elem_body_bb
= NULL
;
5419 basic_block elem_cont_bb
= NULL
;
5421 /* SSA instances. */
5422 tree offset_incr
= NULL_TREE
;
5423 tree offset_init
= NULL_TREE
;
5425 gimple_stmt_iterator gsi
;
5431 edge split
, be
, fte
;
5433 /* Split the end of entry_bb to create head_bb. */
5434 split
= split_block (entry_bb
, last_stmt (entry_bb
));
5435 basic_block head_bb
= split
->dest
;
5436 entry_bb
= split
->src
;
5438 /* Chunk setup goes at end of entry_bb, replacing the omp_for. */
5439 gsi
= gsi_last_bb (entry_bb
);
5440 gomp_for
*for_stmt
= as_a
<gomp_for
*> (gsi_stmt (gsi
));
5441 loc
= gimple_location (for_stmt
);
5443 if (gimple_in_ssa_p (cfun
))
5445 offset_init
= gimple_omp_for_index (for_stmt
, 0);
5446 gcc_assert (integer_zerop (fd
->loop
.n1
));
5447 /* The SSA parallelizer does gang parallelism. */
5448 gwv
= build_int_cst (integer_type_node
, GOMP_DIM_MASK (GOMP_DIM_GANG
));
5451 if (fd
->collapse
> 1 || fd
->tiling
)
5453 gcc_assert (!gimple_in_ssa_p (cfun
) && up
);
5454 counts
= XALLOCAVEC (struct oacc_collapse
, fd
->collapse
);
5455 tree total
= expand_oacc_collapse_init (fd
, &gsi
, counts
,
5456 TREE_TYPE (fd
->loop
.n2
), loc
);
5458 if (SSA_VAR_P (fd
->loop
.n2
))
5460 total
= force_gimple_operand_gsi (&gsi
, total
, false, NULL_TREE
,
5461 true, GSI_SAME_STMT
);
5462 ass
= gimple_build_assign (fd
->loop
.n2
, total
);
5463 gsi_insert_before (&gsi
, ass
, GSI_SAME_STMT
);
5467 tree b
= fd
->loop
.n1
;
5468 tree e
= fd
->loop
.n2
;
5469 tree s
= fd
->loop
.step
;
5471 b
= force_gimple_operand_gsi (&gsi
, b
, true, NULL_TREE
, true, GSI_SAME_STMT
);
5472 e
= force_gimple_operand_gsi (&gsi
, e
, true, NULL_TREE
, true, GSI_SAME_STMT
);
5474 /* Convert the step, avoiding possible unsigned->signed overflow. */
5475 negating
= !up
&& TYPE_UNSIGNED (TREE_TYPE (s
));
5477 s
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (s
), s
);
5478 s
= fold_convert (diff_type
, s
);
5480 s
= fold_build1 (NEGATE_EXPR
, diff_type
, s
);
5481 s
= force_gimple_operand_gsi (&gsi
, s
, true, NULL_TREE
, true, GSI_SAME_STMT
);
5484 chunk_size
= integer_zero_node
;
5485 expr
= fold_convert (diff_type
, chunk_size
);
5486 chunk_size
= force_gimple_operand_gsi (&gsi
, expr
, true,
5487 NULL_TREE
, true, GSI_SAME_STMT
);
5491 /* Determine the tile size and element step,
5492 modify the outer loop step size. */
5493 tile_size
= create_tmp_var (diff_type
, ".tile_size");
5494 expr
= build_int_cst (diff_type
, 1);
5495 for (int ix
= 0; ix
< fd
->collapse
; ix
++)
5496 expr
= fold_build2 (MULT_EXPR
, diff_type
, counts
[ix
].tile
, expr
);
5497 expr
= force_gimple_operand_gsi (&gsi
, expr
, true,
5498 NULL_TREE
, true, GSI_SAME_STMT
);
5499 ass
= gimple_build_assign (tile_size
, expr
);
5500 gsi_insert_before (&gsi
, ass
, GSI_SAME_STMT
);
5502 element_s
= create_tmp_var (diff_type
, ".element_s");
5503 ass
= gimple_build_assign (element_s
, s
);
5504 gsi_insert_before (&gsi
, ass
, GSI_SAME_STMT
);
5506 expr
= fold_build2 (MULT_EXPR
, diff_type
, s
, tile_size
);
5507 s
= force_gimple_operand_gsi (&gsi
, expr
, true,
5508 NULL_TREE
, true, GSI_SAME_STMT
);
5511 /* Determine the range, avoiding possible unsigned->signed overflow. */
5512 negating
= !up
&& TYPE_UNSIGNED (iter_type
);
5513 expr
= fold_build2 (MINUS_EXPR
, plus_type
,
5514 fold_convert (plus_type
, negating
? b
: e
),
5515 fold_convert (plus_type
, negating
? e
: b
));
5516 expr
= fold_convert (diff_type
, expr
);
5518 expr
= fold_build1 (NEGATE_EXPR
, diff_type
, expr
);
5519 tree range
= force_gimple_operand_gsi (&gsi
, expr
, true,
5520 NULL_TREE
, true, GSI_SAME_STMT
);
5522 chunk_no
= build_int_cst (diff_type
, 0);
5525 gcc_assert (!gimple_in_ssa_p (cfun
));
5528 chunk_max
= create_tmp_var (diff_type
, ".chunk_max");
5529 chunk_no
= create_tmp_var (diff_type
, ".chunk_no");
5531 ass
= gimple_build_assign (chunk_no
, expr
);
5532 gsi_insert_before (&gsi
, ass
, GSI_SAME_STMT
);
5534 call
= gimple_build_call_internal (IFN_GOACC_LOOP
, 6,
5535 build_int_cst (integer_type_node
,
5536 IFN_GOACC_LOOP_CHUNKS
),
5537 dir
, range
, s
, chunk_size
, gwv
);
5538 gimple_call_set_lhs (call
, chunk_max
);
5539 gimple_set_location (call
, loc
);
5540 gsi_insert_before (&gsi
, call
, GSI_SAME_STMT
);
5543 chunk_size
= chunk_no
;
5545 call
= gimple_build_call_internal (IFN_GOACC_LOOP
, 6,
5546 build_int_cst (integer_type_node
,
5547 IFN_GOACC_LOOP_STEP
),
5548 dir
, range
, s
, chunk_size
, gwv
);
5549 gimple_call_set_lhs (call
, step
);
5550 gimple_set_location (call
, loc
);
5551 gsi_insert_before (&gsi
, call
, GSI_SAME_STMT
);
5553 /* Remove the GIMPLE_OMP_FOR. */
5554 gsi_remove (&gsi
, true);
5556 /* Fixup edges from head_bb. */
5557 be
= BRANCH_EDGE (head_bb
);
5558 fte
= FALLTHRU_EDGE (head_bb
);
5559 be
->flags
|= EDGE_FALSE_VALUE
;
5560 fte
->flags
^= EDGE_FALLTHRU
| EDGE_TRUE_VALUE
;
5562 basic_block body_bb
= fte
->dest
;
5564 if (gimple_in_ssa_p (cfun
))
5566 gsi
= gsi_last_bb (cont_bb
);
5567 gomp_continue
*cont_stmt
= as_a
<gomp_continue
*> (gsi_stmt (gsi
));
5569 offset
= gimple_omp_continue_control_use (cont_stmt
);
5570 offset_incr
= gimple_omp_continue_control_def (cont_stmt
);
5574 offset
= create_tmp_var (diff_type
, ".offset");
5575 offset_init
= offset_incr
= offset
;
5577 bound
= create_tmp_var (TREE_TYPE (offset
), ".bound");
5579 /* Loop offset & bound go into head_bb. */
5580 gsi
= gsi_start_bb (head_bb
);
5582 call
= gimple_build_call_internal (IFN_GOACC_LOOP
, 7,
5583 build_int_cst (integer_type_node
,
5584 IFN_GOACC_LOOP_OFFSET
),
5586 chunk_size
, gwv
, chunk_no
);
5587 gimple_call_set_lhs (call
, offset_init
);
5588 gimple_set_location (call
, loc
);
5589 gsi_insert_after (&gsi
, call
, GSI_CONTINUE_LINKING
);
5591 call
= gimple_build_call_internal (IFN_GOACC_LOOP
, 7,
5592 build_int_cst (integer_type_node
,
5593 IFN_GOACC_LOOP_BOUND
),
5595 chunk_size
, gwv
, offset_init
);
5596 gimple_call_set_lhs (call
, bound
);
5597 gimple_set_location (call
, loc
);
5598 gsi_insert_after (&gsi
, call
, GSI_CONTINUE_LINKING
);
5600 expr
= build2 (cond_code
, boolean_type_node
, offset_init
, bound
);
5601 gsi_insert_after (&gsi
, gimple_build_cond_empty (expr
),
5602 GSI_CONTINUE_LINKING
);
5604 /* V assignment goes into body_bb. */
5605 if (!gimple_in_ssa_p (cfun
))
5607 gsi
= gsi_start_bb (body_bb
);
5609 expr
= build2 (plus_code
, iter_type
, b
,
5610 fold_convert (plus_type
, offset
));
5611 expr
= force_gimple_operand_gsi (&gsi
, expr
, false, NULL_TREE
,
5612 true, GSI_SAME_STMT
);
5613 ass
= gimple_build_assign (v
, expr
);
5614 gsi_insert_before (&gsi
, ass
, GSI_SAME_STMT
);
5616 if (fd
->collapse
> 1 || fd
->tiling
)
5617 expand_oacc_collapse_vars (fd
, false, &gsi
, counts
, v
);
5621 /* Determine the range of the element loop -- usually simply
5622 the tile_size, but could be smaller if the final
5623 iteration of the outer loop is a partial tile. */
5624 tree e_range
= create_tmp_var (diff_type
, ".e_range");
5626 expr
= build2 (MIN_EXPR
, diff_type
,
5627 build2 (MINUS_EXPR
, diff_type
, bound
, offset
),
5628 build2 (MULT_EXPR
, diff_type
, tile_size
,
5630 expr
= force_gimple_operand_gsi (&gsi
, expr
, false, NULL_TREE
,
5631 true, GSI_SAME_STMT
);
5632 ass
= gimple_build_assign (e_range
, expr
);
5633 gsi_insert_before (&gsi
, ass
, GSI_SAME_STMT
);
5635 /* Determine bound, offset & step of inner loop. */
5636 e_bound
= create_tmp_var (diff_type
, ".e_bound");
5637 e_offset
= create_tmp_var (diff_type
, ".e_offset");
5638 e_step
= create_tmp_var (diff_type
, ".e_step");
5640 /* Mark these as element loops. */
5641 tree t
, e_gwv
= integer_minus_one_node
;
5642 tree chunk
= build_int_cst (diff_type
, 0); /* Never chunked. */
5644 t
= build_int_cst (integer_type_node
, IFN_GOACC_LOOP_OFFSET
);
5645 call
= gimple_build_call_internal (IFN_GOACC_LOOP
, 7, t
, dir
, e_range
,
5646 element_s
, chunk
, e_gwv
, chunk
);
5647 gimple_call_set_lhs (call
, e_offset
);
5648 gimple_set_location (call
, loc
);
5649 gsi_insert_before (&gsi
, call
, GSI_SAME_STMT
);
5651 t
= build_int_cst (integer_type_node
, IFN_GOACC_LOOP_BOUND
);
5652 call
= gimple_build_call_internal (IFN_GOACC_LOOP
, 7, t
, dir
, e_range
,
5653 element_s
, chunk
, e_gwv
, e_offset
);
5654 gimple_call_set_lhs (call
, e_bound
);
5655 gimple_set_location (call
, loc
);
5656 gsi_insert_before (&gsi
, call
, GSI_SAME_STMT
);
5658 t
= build_int_cst (integer_type_node
, IFN_GOACC_LOOP_STEP
);
5659 call
= gimple_build_call_internal (IFN_GOACC_LOOP
, 6, t
, dir
, e_range
,
5660 element_s
, chunk
, e_gwv
);
5661 gimple_call_set_lhs (call
, e_step
);
5662 gimple_set_location (call
, loc
);
5663 gsi_insert_before (&gsi
, call
, GSI_SAME_STMT
);
5665 /* Add test and split block. */
5666 expr
= build2 (cond_code
, boolean_type_node
, e_offset
, e_bound
);
5667 stmt
= gimple_build_cond_empty (expr
);
5668 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
5669 split
= split_block (body_bb
, stmt
);
5670 elem_body_bb
= split
->dest
;
5671 if (cont_bb
== body_bb
)
5672 cont_bb
= elem_body_bb
;
5673 body_bb
= split
->src
;
5675 split
->flags
^= EDGE_FALLTHRU
| EDGE_TRUE_VALUE
;
5677 /* Initialize the user's loop vars. */
5678 gsi
= gsi_start_bb (elem_body_bb
);
5679 expand_oacc_collapse_vars (fd
, true, &gsi
, counts
, e_offset
);
5683 /* Loop increment goes into cont_bb. If this is not a loop, we
5684 will have spawned threads as if it was, and each one will
5685 execute one iteration. The specification is not explicit about
5686 whether such constructs are ill-formed or not, and they can
5687 occur, especially when noreturn routines are involved. */
5690 gsi
= gsi_last_bb (cont_bb
);
5691 gomp_continue
*cont_stmt
= as_a
<gomp_continue
*> (gsi_stmt (gsi
));
5692 loc
= gimple_location (cont_stmt
);
5696 /* Insert element loop increment and test. */
5697 expr
= build2 (PLUS_EXPR
, diff_type
, e_offset
, e_step
);
5698 expr
= force_gimple_operand_gsi (&gsi
, expr
, false, NULL_TREE
,
5699 true, GSI_SAME_STMT
);
5700 ass
= gimple_build_assign (e_offset
, expr
);
5701 gsi_insert_before (&gsi
, ass
, GSI_SAME_STMT
);
5702 expr
= build2 (cond_code
, boolean_type_node
, e_offset
, e_bound
);
5704 stmt
= gimple_build_cond_empty (expr
);
5705 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
5706 split
= split_block (cont_bb
, stmt
);
5707 elem_cont_bb
= split
->src
;
5708 cont_bb
= split
->dest
;
5710 split
->flags
^= EDGE_FALLTHRU
| EDGE_FALSE_VALUE
;
5711 split
->probability
= profile_probability::unlikely ().guessed ();
5713 = make_edge (elem_cont_bb
, elem_body_bb
, EDGE_TRUE_VALUE
);
5714 latch_edge
->probability
= profile_probability::likely ().guessed ();
5716 edge skip_edge
= make_edge (body_bb
, cont_bb
, EDGE_FALSE_VALUE
);
5717 skip_edge
->probability
= profile_probability::unlikely ().guessed ();
5718 edge loop_entry_edge
= EDGE_SUCC (body_bb
, 1 - skip_edge
->dest_idx
);
5719 loop_entry_edge
->probability
5720 = profile_probability::likely ().guessed ();
5722 gsi
= gsi_for_stmt (cont_stmt
);
5725 /* Increment offset. */
5726 if (gimple_in_ssa_p (cfun
))
5727 expr
= build2 (plus_code
, iter_type
, offset
,
5728 fold_convert (plus_type
, step
));
5730 expr
= build2 (PLUS_EXPR
, diff_type
, offset
, step
);
5731 expr
= force_gimple_operand_gsi (&gsi
, expr
, false, NULL_TREE
,
5732 true, GSI_SAME_STMT
);
5733 ass
= gimple_build_assign (offset_incr
, expr
);
5734 gsi_insert_before (&gsi
, ass
, GSI_SAME_STMT
);
5735 expr
= build2 (cond_code
, boolean_type_node
, offset_incr
, bound
);
5736 gsi_insert_before (&gsi
, gimple_build_cond_empty (expr
), GSI_SAME_STMT
);
5738 /* Remove the GIMPLE_OMP_CONTINUE. */
5739 gsi_remove (&gsi
, true);
5741 /* Fixup edges from cont_bb. */
5742 be
= BRANCH_EDGE (cont_bb
);
5743 fte
= FALLTHRU_EDGE (cont_bb
);
5744 be
->flags
|= EDGE_TRUE_VALUE
;
5745 fte
->flags
^= EDGE_FALLTHRU
| EDGE_FALSE_VALUE
;
5749 /* Split the beginning of exit_bb to make bottom_bb. We
5750 need to insert a nop at the start, because splitting is
5751 after a stmt, not before. */
5752 gsi
= gsi_start_bb (exit_bb
);
5753 stmt
= gimple_build_nop ();
5754 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
5755 split
= split_block (exit_bb
, stmt
);
5756 bottom_bb
= split
->src
;
5757 exit_bb
= split
->dest
;
5758 gsi
= gsi_last_bb (bottom_bb
);
5760 /* Chunk increment and test goes into bottom_bb. */
5761 expr
= build2 (PLUS_EXPR
, diff_type
, chunk_no
,
5762 build_int_cst (diff_type
, 1));
5763 ass
= gimple_build_assign (chunk_no
, expr
);
5764 gsi_insert_after (&gsi
, ass
, GSI_CONTINUE_LINKING
);
5766 /* Chunk test at end of bottom_bb. */
5767 expr
= build2 (LT_EXPR
, boolean_type_node
, chunk_no
, chunk_max
);
5768 gsi_insert_after (&gsi
, gimple_build_cond_empty (expr
),
5769 GSI_CONTINUE_LINKING
);
5771 /* Fixup edges from bottom_bb. */
5772 split
->flags
^= EDGE_FALLTHRU
| EDGE_FALSE_VALUE
;
5773 split
->probability
= profile_probability::unlikely ().guessed ();
5774 edge latch_edge
= make_edge (bottom_bb
, head_bb
, EDGE_TRUE_VALUE
);
5775 latch_edge
->probability
= profile_probability::likely ().guessed ();
5779 gsi
= gsi_last_bb (exit_bb
);
5780 gcc_assert (gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_RETURN
);
5781 loc
= gimple_location (gsi_stmt (gsi
));
5783 if (!gimple_in_ssa_p (cfun
))
5785 /* Insert the final value of V, in case it is live. This is the
5786 value for the only thread that survives past the join. */
5787 expr
= fold_build2 (MINUS_EXPR
, diff_type
, range
, dir
);
5788 expr
= fold_build2 (PLUS_EXPR
, diff_type
, expr
, s
);
5789 expr
= fold_build2 (TRUNC_DIV_EXPR
, diff_type
, expr
, s
);
5790 expr
= fold_build2 (MULT_EXPR
, diff_type
, expr
, s
);
5791 expr
= build2 (plus_code
, iter_type
, b
, fold_convert (plus_type
, expr
));
5792 expr
= force_gimple_operand_gsi (&gsi
, expr
, false, NULL_TREE
,
5793 true, GSI_SAME_STMT
);
5794 ass
= gimple_build_assign (v
, expr
);
5795 gsi_insert_before (&gsi
, ass
, GSI_SAME_STMT
);
5798 /* Remove the OMP_RETURN. */
5799 gsi_remove (&gsi
, true);
5803 /* We now have one, two or three nested loops. Update the loop
5805 struct loop
*parent
= entry_bb
->loop_father
;
5806 struct loop
*body
= body_bb
->loop_father
;
5810 struct loop
*chunk_loop
= alloc_loop ();
5811 chunk_loop
->header
= head_bb
;
5812 chunk_loop
->latch
= bottom_bb
;
5813 add_loop (chunk_loop
, parent
);
5814 parent
= chunk_loop
;
5816 else if (parent
!= body
)
5818 gcc_assert (body
->header
== body_bb
);
5819 gcc_assert (body
->latch
== cont_bb
5820 || single_pred (body
->latch
) == cont_bb
);
5826 struct loop
*body_loop
= alloc_loop ();
5827 body_loop
->header
= body_bb
;
5828 body_loop
->latch
= cont_bb
;
5829 add_loop (body_loop
, parent
);
5833 /* Insert tiling's element loop. */
5834 struct loop
*inner_loop
= alloc_loop ();
5835 inner_loop
->header
= elem_body_bb
;
5836 inner_loop
->latch
= elem_cont_bb
;
5837 add_loop (inner_loop
, body_loop
);
5843 /* Expand the OMP loop defined by REGION. */
5846 expand_omp_for (struct omp_region
*region
, gimple
*inner_stmt
)
5848 struct omp_for_data fd
;
5849 struct omp_for_data_loop
*loops
;
5852 = (struct omp_for_data_loop
*)
5853 alloca (gimple_omp_for_collapse (last_stmt (region
->entry
))
5854 * sizeof (struct omp_for_data_loop
));
5855 omp_extract_for_data (as_a
<gomp_for
*> (last_stmt (region
->entry
)),
5857 region
->sched_kind
= fd
.sched_kind
;
5858 region
->sched_modifiers
= fd
.sched_modifiers
;
5860 gcc_assert (EDGE_COUNT (region
->entry
->succs
) == 2);
5861 BRANCH_EDGE (region
->entry
)->flags
&= ~EDGE_ABNORMAL
;
5862 FALLTHRU_EDGE (region
->entry
)->flags
&= ~EDGE_ABNORMAL
;
5865 gcc_assert (EDGE_COUNT (region
->cont
->succs
) == 2);
5866 BRANCH_EDGE (region
->cont
)->flags
&= ~EDGE_ABNORMAL
;
5867 FALLTHRU_EDGE (region
->cont
)->flags
&= ~EDGE_ABNORMAL
;
5870 /* If there isn't a continue then this is a degerate case where
5871 the introduction of abnormal edges during lowering will prevent
5872 original loops from being detected. Fix that up. */
5873 loops_state_set (LOOPS_NEED_FIXUP
);
5875 if (gimple_omp_for_kind (fd
.for_stmt
) & GF_OMP_FOR_SIMD
)
5876 expand_omp_simd (region
, &fd
);
5877 else if (gimple_omp_for_kind (fd
.for_stmt
) == GF_OMP_FOR_KIND_CILKFOR
)
5878 expand_cilk_for (region
, &fd
);
5879 else if (gimple_omp_for_kind (fd
.for_stmt
) == GF_OMP_FOR_KIND_OACC_LOOP
)
5881 gcc_assert (!inner_stmt
);
5882 expand_oacc_for (region
, &fd
);
5884 else if (gimple_omp_for_kind (fd
.for_stmt
) == GF_OMP_FOR_KIND_TASKLOOP
)
5886 if (gimple_omp_for_combined_into_p (fd
.for_stmt
))
5887 expand_omp_taskloop_for_inner (region
, &fd
, inner_stmt
);
5889 expand_omp_taskloop_for_outer (region
, &fd
, inner_stmt
);
5891 else if (fd
.sched_kind
== OMP_CLAUSE_SCHEDULE_STATIC
5892 && !fd
.have_ordered
)
5894 if (fd
.chunk_size
== NULL
)
5895 expand_omp_for_static_nochunk (region
, &fd
, inner_stmt
);
5897 expand_omp_for_static_chunk (region
, &fd
, inner_stmt
);
5901 int fn_index
, start_ix
, next_ix
;
5903 gcc_assert (gimple_omp_for_kind (fd
.for_stmt
)
5904 == GF_OMP_FOR_KIND_FOR
);
5905 if (fd
.chunk_size
== NULL
5906 && fd
.sched_kind
== OMP_CLAUSE_SCHEDULE_STATIC
)
5907 fd
.chunk_size
= integer_zero_node
;
5908 gcc_assert (fd
.sched_kind
!= OMP_CLAUSE_SCHEDULE_AUTO
);
5909 switch (fd
.sched_kind
)
5911 case OMP_CLAUSE_SCHEDULE_RUNTIME
:
5914 case OMP_CLAUSE_SCHEDULE_DYNAMIC
:
5915 case OMP_CLAUSE_SCHEDULE_GUIDED
:
5916 if ((fd
.sched_modifiers
& OMP_CLAUSE_SCHEDULE_NONMONOTONIC
)
5918 && !fd
.have_ordered
)
5920 fn_index
= 3 + fd
.sched_kind
;
5925 fn_index
= fd
.sched_kind
;
5929 fn_index
+= fd
.have_ordered
* 6;
5931 start_ix
= ((int)BUILT_IN_GOMP_LOOP_DOACROSS_STATIC_START
) + fn_index
;
5933 start_ix
= ((int)BUILT_IN_GOMP_LOOP_STATIC_START
) + fn_index
;
5934 next_ix
= ((int)BUILT_IN_GOMP_LOOP_STATIC_NEXT
) + fn_index
;
5935 if (fd
.iter_type
== long_long_unsigned_type_node
)
5937 start_ix
+= ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_START
5938 - (int)BUILT_IN_GOMP_LOOP_STATIC_START
);
5939 next_ix
+= ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT
5940 - (int)BUILT_IN_GOMP_LOOP_STATIC_NEXT
);
5942 expand_omp_for_generic (region
, &fd
, (enum built_in_function
) start_ix
,
5943 (enum built_in_function
) next_ix
, inner_stmt
);
5946 if (gimple_in_ssa_p (cfun
))
5947 update_ssa (TODO_update_ssa_only_virtuals
);
5950 /* Expand code for an OpenMP sections directive. In pseudo code, we generate
5952 v = GOMP_sections_start (n);
5969 v = GOMP_sections_next ();
5974 If this is a combined parallel sections, replace the call to
5975 GOMP_sections_start with call to GOMP_sections_next. */
5978 expand_omp_sections (struct omp_region
*region
)
5980 tree t
, u
, vin
= NULL
, vmain
, vnext
, l2
;
5982 basic_block entry_bb
, l0_bb
, l1_bb
, l2_bb
, default_bb
;
5983 gimple_stmt_iterator si
, switch_si
;
5984 gomp_sections
*sections_stmt
;
5986 gomp_continue
*cont
;
5989 struct omp_region
*inner
;
5991 bool exit_reachable
= region
->cont
!= NULL
;
5993 gcc_assert (region
->exit
!= NULL
);
5994 entry_bb
= region
->entry
;
5995 l0_bb
= single_succ (entry_bb
);
5996 l1_bb
= region
->cont
;
5997 l2_bb
= region
->exit
;
5998 if (single_pred_p (l2_bb
) && single_pred (l2_bb
) == l0_bb
)
5999 l2
= gimple_block_label (l2_bb
);
6002 /* This can happen if there are reductions. */
6003 len
= EDGE_COUNT (l0_bb
->succs
);
6004 gcc_assert (len
> 0);
6005 e
= EDGE_SUCC (l0_bb
, len
- 1);
6006 si
= gsi_last_bb (e
->dest
);
6009 || gimple_code (gsi_stmt (si
)) != GIMPLE_OMP_SECTION
)
6010 l2
= gimple_block_label (e
->dest
);
6012 FOR_EACH_EDGE (e
, ei
, l0_bb
->succs
)
6014 si
= gsi_last_bb (e
->dest
);
6016 || gimple_code (gsi_stmt (si
)) != GIMPLE_OMP_SECTION
)
6018 l2
= gimple_block_label (e
->dest
);
6024 default_bb
= create_empty_bb (l1_bb
->prev_bb
);
6026 default_bb
= create_empty_bb (l0_bb
);
6028 /* We will build a switch() with enough cases for all the
6029 GIMPLE_OMP_SECTION regions, a '0' case to handle the end of more work
6030 and a default case to abort if something goes wrong. */
6031 len
= EDGE_COUNT (l0_bb
->succs
);
6033 /* Use vec::quick_push on label_vec throughout, since we know the size
6035 auto_vec
<tree
> label_vec (len
);
6037 /* The call to GOMP_sections_start goes in ENTRY_BB, replacing the
6038 GIMPLE_OMP_SECTIONS statement. */
6039 si
= gsi_last_bb (entry_bb
);
6040 sections_stmt
= as_a
<gomp_sections
*> (gsi_stmt (si
));
6041 gcc_assert (gimple_code (sections_stmt
) == GIMPLE_OMP_SECTIONS
);
6042 vin
= gimple_omp_sections_control (sections_stmt
);
6043 if (!is_combined_parallel (region
))
6045 /* If we are not inside a combined parallel+sections region,
6046 call GOMP_sections_start. */
6047 t
= build_int_cst (unsigned_type_node
, len
- 1);
6048 u
= builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_START
);
6049 stmt
= gimple_build_call (u
, 1, t
);
6053 /* Otherwise, call GOMP_sections_next. */
6054 u
= builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT
);
6055 stmt
= gimple_build_call (u
, 0);
6057 gimple_call_set_lhs (stmt
, vin
);
6058 gsi_insert_after (&si
, stmt
, GSI_SAME_STMT
);
6059 gsi_remove (&si
, true);
6061 /* The switch() statement replacing GIMPLE_OMP_SECTIONS_SWITCH goes in
6063 switch_si
= gsi_last_bb (l0_bb
);
6064 gcc_assert (gimple_code (gsi_stmt (switch_si
)) == GIMPLE_OMP_SECTIONS_SWITCH
);
6067 cont
= as_a
<gomp_continue
*> (last_stmt (l1_bb
));
6068 gcc_assert (gimple_code (cont
) == GIMPLE_OMP_CONTINUE
);
6069 vmain
= gimple_omp_continue_control_use (cont
);
6070 vnext
= gimple_omp_continue_control_def (cont
);
6078 t
= build_case_label (build_int_cst (unsigned_type_node
, 0), NULL
, l2
);
6079 label_vec
.quick_push (t
);
6082 /* Convert each GIMPLE_OMP_SECTION into a CASE_LABEL_EXPR. */
6083 for (inner
= region
->inner
, casei
= 1;
6085 inner
= inner
->next
, i
++, casei
++)
6087 basic_block s_entry_bb
, s_exit_bb
;
6089 /* Skip optional reduction region. */
6090 if (inner
->type
== GIMPLE_OMP_ATOMIC_LOAD
)
6097 s_entry_bb
= inner
->entry
;
6098 s_exit_bb
= inner
->exit
;
6100 t
= gimple_block_label (s_entry_bb
);
6101 u
= build_int_cst (unsigned_type_node
, casei
);
6102 u
= build_case_label (u
, NULL
, t
);
6103 label_vec
.quick_push (u
);
6105 si
= gsi_last_bb (s_entry_bb
);
6106 gcc_assert (gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_SECTION
);
6107 gcc_assert (i
< len
|| gimple_omp_section_last_p (gsi_stmt (si
)));
6108 gsi_remove (&si
, true);
6109 single_succ_edge (s_entry_bb
)->flags
= EDGE_FALLTHRU
;
6111 if (s_exit_bb
== NULL
)
6114 si
= gsi_last_bb (s_exit_bb
);
6115 gcc_assert (gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_RETURN
);
6116 gsi_remove (&si
, true);
6118 single_succ_edge (s_exit_bb
)->flags
= EDGE_FALLTHRU
;
6121 /* Error handling code goes in DEFAULT_BB. */
6122 t
= gimple_block_label (default_bb
);
6123 u
= build_case_label (NULL
, NULL
, t
);
6124 make_edge (l0_bb
, default_bb
, 0);
6125 add_bb_to_loop (default_bb
, current_loops
->tree_root
);
6127 stmt
= gimple_build_switch (vmain
, u
, label_vec
);
6128 gsi_insert_after (&switch_si
, stmt
, GSI_SAME_STMT
);
6129 gsi_remove (&switch_si
, true);
6131 si
= gsi_start_bb (default_bb
);
6132 stmt
= gimple_build_call (builtin_decl_explicit (BUILT_IN_TRAP
), 0);
6133 gsi_insert_after (&si
, stmt
, GSI_CONTINUE_LINKING
);
6139 /* Code to get the next section goes in L1_BB. */
6140 si
= gsi_last_bb (l1_bb
);
6141 gcc_assert (gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_CONTINUE
);
6143 bfn_decl
= builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT
);
6144 stmt
= gimple_build_call (bfn_decl
, 0);
6145 gimple_call_set_lhs (stmt
, vnext
);
6146 gsi_insert_after (&si
, stmt
, GSI_SAME_STMT
);
6147 gsi_remove (&si
, true);
6149 single_succ_edge (l1_bb
)->flags
= EDGE_FALLTHRU
;
6152 /* Cleanup function replaces GIMPLE_OMP_RETURN in EXIT_BB. */
6153 si
= gsi_last_bb (l2_bb
);
6154 if (gimple_omp_return_nowait_p (gsi_stmt (si
)))
6155 t
= builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_NOWAIT
);
6156 else if (gimple_omp_return_lhs (gsi_stmt (si
)))
6157 t
= builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_CANCEL
);
6159 t
= builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END
);
6160 stmt
= gimple_build_call (t
, 0);
6161 if (gimple_omp_return_lhs (gsi_stmt (si
)))
6162 gimple_call_set_lhs (stmt
, gimple_omp_return_lhs (gsi_stmt (si
)));
6163 gsi_insert_after (&si
, stmt
, GSI_SAME_STMT
);
6164 gsi_remove (&si
, true);
6166 set_immediate_dominator (CDI_DOMINATORS
, default_bb
, l0_bb
);
6169 /* Expand code for an OpenMP single directive. We've already expanded
6170 much of the code, here we simply place the GOMP_barrier call. */
6173 expand_omp_single (struct omp_region
*region
)
6175 basic_block entry_bb
, exit_bb
;
6176 gimple_stmt_iterator si
;
6178 entry_bb
= region
->entry
;
6179 exit_bb
= region
->exit
;
6181 si
= gsi_last_bb (entry_bb
);
6182 gcc_assert (gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_SINGLE
);
6183 gsi_remove (&si
, true);
6184 single_succ_edge (entry_bb
)->flags
= EDGE_FALLTHRU
;
6186 si
= gsi_last_bb (exit_bb
);
6187 if (!gimple_omp_return_nowait_p (gsi_stmt (si
)))
6189 tree t
= gimple_omp_return_lhs (gsi_stmt (si
));
6190 gsi_insert_after (&si
, omp_build_barrier (t
), GSI_SAME_STMT
);
6192 gsi_remove (&si
, true);
6193 single_succ_edge (exit_bb
)->flags
= EDGE_FALLTHRU
;
6196 /* Generic expansion for OpenMP synchronization directives: master,
6197 ordered and critical. All we need to do here is remove the entry
6198 and exit markers for REGION. */
6201 expand_omp_synch (struct omp_region
*region
)
6203 basic_block entry_bb
, exit_bb
;
6204 gimple_stmt_iterator si
;
6206 entry_bb
= region
->entry
;
6207 exit_bb
= region
->exit
;
6209 si
= gsi_last_bb (entry_bb
);
6210 gcc_assert (gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_SINGLE
6211 || gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_MASTER
6212 || gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_TASKGROUP
6213 || gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_ORDERED
6214 || gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_CRITICAL
6215 || gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_TEAMS
);
6216 gsi_remove (&si
, true);
6217 single_succ_edge (entry_bb
)->flags
= EDGE_FALLTHRU
;
6221 si
= gsi_last_bb (exit_bb
);
6222 gcc_assert (gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_RETURN
);
6223 gsi_remove (&si
, true);
6224 single_succ_edge (exit_bb
)->flags
= EDGE_FALLTHRU
;
6228 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
6229 operation as a normal volatile load. */
6232 expand_omp_atomic_load (basic_block load_bb
, tree addr
,
6233 tree loaded_val
, int index
)
6235 enum built_in_function tmpbase
;
6236 gimple_stmt_iterator gsi
;
6237 basic_block store_bb
;
6240 tree decl
, call
, type
, itype
;
6242 gsi
= gsi_last_bb (load_bb
);
6243 stmt
= gsi_stmt (gsi
);
6244 gcc_assert (gimple_code (stmt
) == GIMPLE_OMP_ATOMIC_LOAD
);
6245 loc
= gimple_location (stmt
);
6247 /* ??? If the target does not implement atomic_load_optab[mode], and mode
6248 is smaller than word size, then expand_atomic_load assumes that the load
6249 is atomic. We could avoid the builtin entirely in this case. */
6251 tmpbase
= (enum built_in_function
) (BUILT_IN_ATOMIC_LOAD_N
+ index
+ 1);
6252 decl
= builtin_decl_explicit (tmpbase
);
6253 if (decl
== NULL_TREE
)
6256 type
= TREE_TYPE (loaded_val
);
6257 itype
= TREE_TYPE (TREE_TYPE (decl
));
6259 call
= build_call_expr_loc (loc
, decl
, 2, addr
,
6260 build_int_cst (NULL
,
6261 gimple_omp_atomic_seq_cst_p (stmt
)
6263 : MEMMODEL_RELAXED
));
6264 if (!useless_type_conversion_p (type
, itype
))
6265 call
= fold_build1_loc (loc
, VIEW_CONVERT_EXPR
, type
, call
);
6266 call
= build2_loc (loc
, MODIFY_EXPR
, void_type_node
, loaded_val
, call
);
6268 force_gimple_operand_gsi (&gsi
, call
, true, NULL_TREE
, true, GSI_SAME_STMT
);
6269 gsi_remove (&gsi
, true);
6271 store_bb
= single_succ (load_bb
);
6272 gsi
= gsi_last_bb (store_bb
);
6273 gcc_assert (gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_ATOMIC_STORE
);
6274 gsi_remove (&gsi
, true);
6276 if (gimple_in_ssa_p (cfun
))
6277 update_ssa (TODO_update_ssa_no_phi
);
6282 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
6283 operation as a normal volatile store. */
6286 expand_omp_atomic_store (basic_block load_bb
, tree addr
,
6287 tree loaded_val
, tree stored_val
, int index
)
6289 enum built_in_function tmpbase
;
6290 gimple_stmt_iterator gsi
;
6291 basic_block store_bb
= single_succ (load_bb
);
6294 tree decl
, call
, type
, itype
;
6298 gsi
= gsi_last_bb (load_bb
);
6299 stmt
= gsi_stmt (gsi
);
6300 gcc_assert (gimple_code (stmt
) == GIMPLE_OMP_ATOMIC_LOAD
);
6302 /* If the load value is needed, then this isn't a store but an exchange. */
6303 exchange
= gimple_omp_atomic_need_value_p (stmt
);
6305 gsi
= gsi_last_bb (store_bb
);
6306 stmt
= gsi_stmt (gsi
);
6307 gcc_assert (gimple_code (stmt
) == GIMPLE_OMP_ATOMIC_STORE
);
6308 loc
= gimple_location (stmt
);
6310 /* ??? If the target does not implement atomic_store_optab[mode], and mode
6311 is smaller than word size, then expand_atomic_store assumes that the store
6312 is atomic. We could avoid the builtin entirely in this case. */
6314 tmpbase
= (exchange
? BUILT_IN_ATOMIC_EXCHANGE_N
: BUILT_IN_ATOMIC_STORE_N
);
6315 tmpbase
= (enum built_in_function
) ((int) tmpbase
+ index
+ 1);
6316 decl
= builtin_decl_explicit (tmpbase
);
6317 if (decl
== NULL_TREE
)
6320 type
= TREE_TYPE (stored_val
);
6322 /* Dig out the type of the function's second argument. */
6323 itype
= TREE_TYPE (decl
);
6324 itype
= TYPE_ARG_TYPES (itype
);
6325 itype
= TREE_CHAIN (itype
);
6326 itype
= TREE_VALUE (itype
);
6327 imode
= TYPE_MODE (itype
);
6329 if (exchange
&& !can_atomic_exchange_p (imode
, true))
6332 if (!useless_type_conversion_p (itype
, type
))
6333 stored_val
= fold_build1_loc (loc
, VIEW_CONVERT_EXPR
, itype
, stored_val
);
6334 call
= build_call_expr_loc (loc
, decl
, 3, addr
, stored_val
,
6335 build_int_cst (NULL
,
6336 gimple_omp_atomic_seq_cst_p (stmt
)
6338 : MEMMODEL_RELAXED
));
6341 if (!useless_type_conversion_p (type
, itype
))
6342 call
= build1_loc (loc
, VIEW_CONVERT_EXPR
, type
, call
);
6343 call
= build2_loc (loc
, MODIFY_EXPR
, void_type_node
, loaded_val
, call
);
6346 force_gimple_operand_gsi (&gsi
, call
, true, NULL_TREE
, true, GSI_SAME_STMT
);
6347 gsi_remove (&gsi
, true);
6349 /* Remove the GIMPLE_OMP_ATOMIC_LOAD that we verified above. */
6350 gsi
= gsi_last_bb (load_bb
);
6351 gsi_remove (&gsi
, true);
6353 if (gimple_in_ssa_p (cfun
))
6354 update_ssa (TODO_update_ssa_no_phi
);
6359 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
6360 operation as a __atomic_fetch_op builtin. INDEX is log2 of the
6361 size of the data type, and thus usable to find the index of the builtin
6362 decl. Returns false if the expression is not of the proper form. */
6365 expand_omp_atomic_fetch_op (basic_block load_bb
,
6366 tree addr
, tree loaded_val
,
6367 tree stored_val
, int index
)
6369 enum built_in_function oldbase
, newbase
, tmpbase
;
6370 tree decl
, itype
, call
;
6372 basic_block store_bb
= single_succ (load_bb
);
6373 gimple_stmt_iterator gsi
;
6376 enum tree_code code
;
6377 bool need_old
, need_new
;
6381 /* We expect to find the following sequences:
6384 GIMPLE_OMP_ATOMIC_LOAD (tmp, mem)
6387 val = tmp OP something; (or: something OP tmp)
6388 GIMPLE_OMP_STORE (val)
6390 ???FIXME: Allow a more flexible sequence.
6391 Perhaps use data flow to pick the statements.
6395 gsi
= gsi_after_labels (store_bb
);
6396 stmt
= gsi_stmt (gsi
);
6397 loc
= gimple_location (stmt
);
6398 if (!is_gimple_assign (stmt
))
6401 if (gimple_code (gsi_stmt (gsi
)) != GIMPLE_OMP_ATOMIC_STORE
)
6403 need_new
= gimple_omp_atomic_need_value_p (gsi_stmt (gsi
));
6404 need_old
= gimple_omp_atomic_need_value_p (last_stmt (load_bb
));
6405 seq_cst
= gimple_omp_atomic_seq_cst_p (last_stmt (load_bb
));
6406 gcc_checking_assert (!need_old
|| !need_new
);
6408 if (!operand_equal_p (gimple_assign_lhs (stmt
), stored_val
, 0))
6411 /* Check for one of the supported fetch-op operations. */
6412 code
= gimple_assign_rhs_code (stmt
);
6416 case POINTER_PLUS_EXPR
:
6417 oldbase
= BUILT_IN_ATOMIC_FETCH_ADD_N
;
6418 newbase
= BUILT_IN_ATOMIC_ADD_FETCH_N
;
6421 oldbase
= BUILT_IN_ATOMIC_FETCH_SUB_N
;
6422 newbase
= BUILT_IN_ATOMIC_SUB_FETCH_N
;
6425 oldbase
= BUILT_IN_ATOMIC_FETCH_AND_N
;
6426 newbase
= BUILT_IN_ATOMIC_AND_FETCH_N
;
6429 oldbase
= BUILT_IN_ATOMIC_FETCH_OR_N
;
6430 newbase
= BUILT_IN_ATOMIC_OR_FETCH_N
;
6433 oldbase
= BUILT_IN_ATOMIC_FETCH_XOR_N
;
6434 newbase
= BUILT_IN_ATOMIC_XOR_FETCH_N
;
6440 /* Make sure the expression is of the proper form. */
6441 if (operand_equal_p (gimple_assign_rhs1 (stmt
), loaded_val
, 0))
6442 rhs
= gimple_assign_rhs2 (stmt
);
6443 else if (commutative_tree_code (gimple_assign_rhs_code (stmt
))
6444 && operand_equal_p (gimple_assign_rhs2 (stmt
), loaded_val
, 0))
6445 rhs
= gimple_assign_rhs1 (stmt
);
6449 tmpbase
= ((enum built_in_function
)
6450 ((need_new
? newbase
: oldbase
) + index
+ 1));
6451 decl
= builtin_decl_explicit (tmpbase
);
6452 if (decl
== NULL_TREE
)
6454 itype
= TREE_TYPE (TREE_TYPE (decl
));
6455 imode
= TYPE_MODE (itype
);
6457 /* We could test all of the various optabs involved, but the fact of the
6458 matter is that (with the exception of i486 vs i586 and xadd) all targets
6459 that support any atomic operaton optab also implements compare-and-swap.
6460 Let optabs.c take care of expanding any compare-and-swap loop. */
6461 if (!can_compare_and_swap_p (imode
, true) || !can_atomic_load_p (imode
))
6464 gsi
= gsi_last_bb (load_bb
);
6465 gcc_assert (gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_ATOMIC_LOAD
);
6467 /* OpenMP does not imply any barrier-like semantics on its atomic ops.
6468 It only requires that the operation happen atomically. Thus we can
6469 use the RELAXED memory model. */
6470 call
= build_call_expr_loc (loc
, decl
, 3, addr
,
6471 fold_convert_loc (loc
, itype
, rhs
),
6472 build_int_cst (NULL
,
6473 seq_cst
? MEMMODEL_SEQ_CST
6474 : MEMMODEL_RELAXED
));
6476 if (need_old
|| need_new
)
6478 lhs
= need_old
? loaded_val
: stored_val
;
6479 call
= fold_convert_loc (loc
, TREE_TYPE (lhs
), call
);
6480 call
= build2_loc (loc
, MODIFY_EXPR
, void_type_node
, lhs
, call
);
6483 call
= fold_convert_loc (loc
, void_type_node
, call
);
6484 force_gimple_operand_gsi (&gsi
, call
, true, NULL_TREE
, true, GSI_SAME_STMT
);
6485 gsi_remove (&gsi
, true);
6487 gsi
= gsi_last_bb (store_bb
);
6488 gcc_assert (gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_ATOMIC_STORE
);
6489 gsi_remove (&gsi
, true);
6490 gsi
= gsi_last_bb (store_bb
);
6491 stmt
= gsi_stmt (gsi
);
6492 gsi_remove (&gsi
, true);
6494 if (gimple_in_ssa_p (cfun
))
6496 release_defs (stmt
);
6497 update_ssa (TODO_update_ssa_no_phi
);
6503 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
6507 newval = rhs; // with oldval replacing *addr in rhs
6508 oldval = __sync_val_compare_and_swap (addr, oldval, newval);
6509 if (oldval != newval)
6512 INDEX is log2 of the size of the data type, and thus usable to find the
6513 index of the builtin decl. */
6516 expand_omp_atomic_pipeline (basic_block load_bb
, basic_block store_bb
,
6517 tree addr
, tree loaded_val
, tree stored_val
,
6520 tree loadedi
, storedi
, initial
, new_storedi
, old_vali
;
6521 tree type
, itype
, cmpxchg
, iaddr
;
6522 gimple_stmt_iterator si
;
6523 basic_block loop_header
= single_succ (load_bb
);
6526 enum built_in_function fncode
;
6528 /* ??? We need a non-pointer interface to __atomic_compare_exchange in
6529 order to use the RELAXED memory model effectively. */
6530 fncode
= (enum built_in_function
)((int)BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
6532 cmpxchg
= builtin_decl_explicit (fncode
);
6533 if (cmpxchg
== NULL_TREE
)
6535 type
= TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (addr
)));
6536 itype
= TREE_TYPE (TREE_TYPE (cmpxchg
));
6538 if (!can_compare_and_swap_p (TYPE_MODE (itype
), true)
6539 || !can_atomic_load_p (TYPE_MODE (itype
)))
6542 /* Load the initial value, replacing the GIMPLE_OMP_ATOMIC_LOAD. */
6543 si
= gsi_last_bb (load_bb
);
6544 gcc_assert (gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_ATOMIC_LOAD
);
6546 /* For floating-point values, we'll need to view-convert them to integers
6547 so that we can perform the atomic compare and swap. Simplify the
6548 following code by always setting up the "i"ntegral variables. */
6549 if (!INTEGRAL_TYPE_P (type
) && !POINTER_TYPE_P (type
))
6553 iaddr
= create_tmp_reg (build_pointer_type_for_mode (itype
, ptr_mode
,
6556 = force_gimple_operand_gsi (&si
,
6557 fold_convert (TREE_TYPE (iaddr
), addr
),
6558 false, NULL_TREE
, true, GSI_SAME_STMT
);
6559 stmt
= gimple_build_assign (iaddr
, iaddr_val
);
6560 gsi_insert_before (&si
, stmt
, GSI_SAME_STMT
);
6561 loadedi
= create_tmp_var (itype
);
6562 if (gimple_in_ssa_p (cfun
))
6563 loadedi
= make_ssa_name (loadedi
);
6568 loadedi
= loaded_val
;
6571 fncode
= (enum built_in_function
) (BUILT_IN_ATOMIC_LOAD_N
+ index
+ 1);
6572 tree loaddecl
= builtin_decl_explicit (fncode
);
6575 = fold_convert (TREE_TYPE (TREE_TYPE (iaddr
)),
6576 build_call_expr (loaddecl
, 2, iaddr
,
6577 build_int_cst (NULL_TREE
,
6578 MEMMODEL_RELAXED
)));
6580 initial
= build2 (MEM_REF
, TREE_TYPE (TREE_TYPE (iaddr
)), iaddr
,
6581 build_int_cst (TREE_TYPE (iaddr
), 0));
6584 = force_gimple_operand_gsi (&si
, initial
, true, NULL_TREE
, true,
6587 /* Move the value to the LOADEDI temporary. */
6588 if (gimple_in_ssa_p (cfun
))
6590 gcc_assert (gimple_seq_empty_p (phi_nodes (loop_header
)));
6591 phi
= create_phi_node (loadedi
, loop_header
);
6592 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi
, single_succ_edge (load_bb
)),
6596 gsi_insert_before (&si
,
6597 gimple_build_assign (loadedi
, initial
),
6599 if (loadedi
!= loaded_val
)
6601 gimple_stmt_iterator gsi2
;
6604 x
= build1 (VIEW_CONVERT_EXPR
, type
, loadedi
);
6605 gsi2
= gsi_start_bb (loop_header
);
6606 if (gimple_in_ssa_p (cfun
))
6609 x
= force_gimple_operand_gsi (&gsi2
, x
, true, NULL_TREE
,
6610 true, GSI_SAME_STMT
);
6611 stmt
= gimple_build_assign (loaded_val
, x
);
6612 gsi_insert_before (&gsi2
, stmt
, GSI_SAME_STMT
);
6616 x
= build2 (MODIFY_EXPR
, TREE_TYPE (loaded_val
), loaded_val
, x
);
6617 force_gimple_operand_gsi (&gsi2
, x
, true, NULL_TREE
,
6618 true, GSI_SAME_STMT
);
6621 gsi_remove (&si
, true);
6623 si
= gsi_last_bb (store_bb
);
6624 gcc_assert (gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_ATOMIC_STORE
);
6627 storedi
= stored_val
;
6630 = force_gimple_operand_gsi (&si
,
6631 build1 (VIEW_CONVERT_EXPR
, itype
,
6632 stored_val
), true, NULL_TREE
, true,
6635 /* Build the compare&swap statement. */
6636 new_storedi
= build_call_expr (cmpxchg
, 3, iaddr
, loadedi
, storedi
);
6637 new_storedi
= force_gimple_operand_gsi (&si
,
6638 fold_convert (TREE_TYPE (loadedi
),
6641 true, GSI_SAME_STMT
);
6643 if (gimple_in_ssa_p (cfun
))
6647 old_vali
= create_tmp_var (TREE_TYPE (loadedi
));
6648 stmt
= gimple_build_assign (old_vali
, loadedi
);
6649 gsi_insert_before (&si
, stmt
, GSI_SAME_STMT
);
6651 stmt
= gimple_build_assign (loadedi
, new_storedi
);
6652 gsi_insert_before (&si
, stmt
, GSI_SAME_STMT
);
6655 /* Note that we always perform the comparison as an integer, even for
6656 floating point. This allows the atomic operation to properly
6657 succeed even with NaNs and -0.0. */
6658 tree ne
= build2 (NE_EXPR
, boolean_type_node
, new_storedi
, old_vali
);
6659 stmt
= gimple_build_cond_empty (ne
);
6660 gsi_insert_before (&si
, stmt
, GSI_SAME_STMT
);
6663 e
= single_succ_edge (store_bb
);
6664 e
->flags
&= ~EDGE_FALLTHRU
;
6665 e
->flags
|= EDGE_FALSE_VALUE
;
6666 /* Expect no looping. */
6667 e
->probability
= profile_probability::guessed_always ();
6669 e
= make_edge (store_bb
, loop_header
, EDGE_TRUE_VALUE
);
6670 e
->probability
= profile_probability::guessed_never ();
6672 /* Copy the new value to loadedi (we already did that before the condition
6673 if we are not in SSA). */
6674 if (gimple_in_ssa_p (cfun
))
6676 phi
= gimple_seq_first_stmt (phi_nodes (loop_header
));
6677 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi
, e
), new_storedi
);
6680 /* Remove GIMPLE_OMP_ATOMIC_STORE. */
6681 gsi_remove (&si
, true);
6683 struct loop
*loop
= alloc_loop ();
6684 loop
->header
= loop_header
;
6685 loop
->latch
= store_bb
;
6686 add_loop (loop
, loop_header
->loop_father
);
6688 if (gimple_in_ssa_p (cfun
))
6689 update_ssa (TODO_update_ssa_no_phi
);
6694 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
6696 GOMP_atomic_start ();
6700 The result is not globally atomic, but works so long as all parallel
6701 references are within #pragma omp atomic directives. According to
6702 responses received from omp@openmp.org, appears to be within spec.
6703 Which makes sense, since that's how several other compilers handle
6704 this situation as well.
6705 LOADED_VAL and ADDR are the operands of GIMPLE_OMP_ATOMIC_LOAD we're
6706 expanding. STORED_VAL is the operand of the matching
6707 GIMPLE_OMP_ATOMIC_STORE.
6710 GIMPLE_OMP_ATOMIC_LOAD (loaded_val, addr) with
6714 GIMPLE_OMP_ATOMIC_STORE (stored_val) with
6719 expand_omp_atomic_mutex (basic_block load_bb
, basic_block store_bb
,
6720 tree addr
, tree loaded_val
, tree stored_val
)
6722 gimple_stmt_iterator si
;
6726 si
= gsi_last_bb (load_bb
);
6727 gcc_assert (gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_ATOMIC_LOAD
);
6729 t
= builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_START
);
6730 t
= build_call_expr (t
, 0);
6731 force_gimple_operand_gsi (&si
, t
, true, NULL_TREE
, true, GSI_SAME_STMT
);
6733 stmt
= gimple_build_assign (loaded_val
, build_simple_mem_ref (addr
));
6734 gsi_insert_before (&si
, stmt
, GSI_SAME_STMT
);
6735 gsi_remove (&si
, true);
6737 si
= gsi_last_bb (store_bb
);
6738 gcc_assert (gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_ATOMIC_STORE
);
6740 stmt
= gimple_build_assign (build_simple_mem_ref (unshare_expr (addr
)),
6742 gsi_insert_before (&si
, stmt
, GSI_SAME_STMT
);
6744 t
= builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_END
);
6745 t
= build_call_expr (t
, 0);
6746 force_gimple_operand_gsi (&si
, t
, true, NULL_TREE
, true, GSI_SAME_STMT
);
6747 gsi_remove (&si
, true);
6749 if (gimple_in_ssa_p (cfun
))
6750 update_ssa (TODO_update_ssa_no_phi
);
6754 /* Expand an GIMPLE_OMP_ATOMIC statement. We try to expand
6755 using expand_omp_atomic_fetch_op. If it failed, we try to
6756 call expand_omp_atomic_pipeline, and if it fails too, the
6757 ultimate fallback is wrapping the operation in a mutex
6758 (expand_omp_atomic_mutex). REGION is the atomic region built
6759 by build_omp_regions_1(). */
6762 expand_omp_atomic (struct omp_region
*region
)
6764 basic_block load_bb
= region
->entry
, store_bb
= region
->exit
;
6765 gomp_atomic_load
*load
= as_a
<gomp_atomic_load
*> (last_stmt (load_bb
));
6766 gomp_atomic_store
*store
= as_a
<gomp_atomic_store
*> (last_stmt (store_bb
));
6767 tree loaded_val
= gimple_omp_atomic_load_lhs (load
);
6768 tree addr
= gimple_omp_atomic_load_rhs (load
);
6769 tree stored_val
= gimple_omp_atomic_store_val (store
);
6770 tree type
= TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (addr
)));
6771 HOST_WIDE_INT index
;
6773 /* Make sure the type is one of the supported sizes. */
6774 index
= tree_to_uhwi (TYPE_SIZE_UNIT (type
));
6775 index
= exact_log2 (index
);
6776 if (index
>= 0 && index
<= 4)
6778 unsigned int align
= TYPE_ALIGN_UNIT (type
);
6780 /* __sync builtins require strict data alignment. */
6781 if (exact_log2 (align
) >= index
)
6785 if (loaded_val
== stored_val
6786 && (is_int_mode (TYPE_MODE (type
), &smode
)
6787 || is_float_mode (TYPE_MODE (type
), &smode
))
6788 && GET_MODE_BITSIZE (smode
) <= BITS_PER_WORD
6789 && expand_omp_atomic_load (load_bb
, addr
, loaded_val
, index
))
6793 if ((is_int_mode (TYPE_MODE (type
), &smode
)
6794 || is_float_mode (TYPE_MODE (type
), &smode
))
6795 && GET_MODE_BITSIZE (smode
) <= BITS_PER_WORD
6796 && store_bb
== single_succ (load_bb
)
6797 && first_stmt (store_bb
) == store
6798 && expand_omp_atomic_store (load_bb
, addr
, loaded_val
,
6802 /* When possible, use specialized atomic update functions. */
6803 if ((INTEGRAL_TYPE_P (type
) || POINTER_TYPE_P (type
))
6804 && store_bb
== single_succ (load_bb
)
6805 && expand_omp_atomic_fetch_op (load_bb
, addr
,
6806 loaded_val
, stored_val
, index
))
6809 /* If we don't have specialized __sync builtins, try and implement
6810 as a compare and swap loop. */
6811 if (expand_omp_atomic_pipeline (load_bb
, store_bb
, addr
,
6812 loaded_val
, stored_val
, index
))
6817 /* The ultimate fallback is wrapping the operation in a mutex. */
6818 expand_omp_atomic_mutex (load_bb
, store_bb
, addr
, loaded_val
, stored_val
);
6821 /* Mark the loops inside the kernels region starting at REGION_ENTRY and ending
6825 mark_loops_in_oacc_kernels_region (basic_block region_entry
,
6826 basic_block region_exit
)
6828 struct loop
*outer
= region_entry
->loop_father
;
6829 gcc_assert (region_exit
== NULL
|| outer
== region_exit
->loop_father
);
6831 /* Don't parallelize the kernels region if it contains more than one outer
6833 unsigned int nr_outer_loops
= 0;
6834 struct loop
*single_outer
= NULL
;
6835 for (struct loop
*loop
= outer
->inner
; loop
!= NULL
; loop
= loop
->next
)
6837 gcc_assert (loop_outer (loop
) == outer
);
6839 if (!dominated_by_p (CDI_DOMINATORS
, loop
->header
, region_entry
))
6842 if (region_exit
!= NULL
6843 && dominated_by_p (CDI_DOMINATORS
, loop
->header
, region_exit
))
6847 single_outer
= loop
;
6849 if (nr_outer_loops
!= 1)
6852 for (struct loop
*loop
= single_outer
->inner
;
6858 /* Mark the loops in the region. */
6859 for (struct loop
*loop
= single_outer
; loop
!= NULL
; loop
= loop
->inner
)
6860 loop
->in_oacc_kernels_region
= true;
6863 /* Types used to pass grid and wortkgroup sizes to kernel invocation. */
6865 struct GTY(()) grid_launch_attributes_trees
6867 tree kernel_dim_array_type
;
6868 tree kernel_lattrs_dimnum_decl
;
6869 tree kernel_lattrs_grid_decl
;
6870 tree kernel_lattrs_group_decl
;
6871 tree kernel_launch_attributes_type
;
6874 static GTY(()) struct grid_launch_attributes_trees
*grid_attr_trees
;
6876 /* Create types used to pass kernel launch attributes to target. */
6879 grid_create_kernel_launch_attr_types (void)
6881 if (grid_attr_trees
)
6883 grid_attr_trees
= ggc_alloc
<grid_launch_attributes_trees
> ();
6885 tree dim_arr_index_type
6886 = build_index_type (build_int_cst (integer_type_node
, 2));
6887 grid_attr_trees
->kernel_dim_array_type
6888 = build_array_type (uint32_type_node
, dim_arr_index_type
);
6890 grid_attr_trees
->kernel_launch_attributes_type
= make_node (RECORD_TYPE
);
6891 grid_attr_trees
->kernel_lattrs_dimnum_decl
6892 = build_decl (BUILTINS_LOCATION
, FIELD_DECL
, get_identifier ("ndim"),
6894 DECL_CHAIN (grid_attr_trees
->kernel_lattrs_dimnum_decl
) = NULL_TREE
;
6896 grid_attr_trees
->kernel_lattrs_grid_decl
6897 = build_decl (BUILTINS_LOCATION
, FIELD_DECL
, get_identifier ("grid_size"),
6898 grid_attr_trees
->kernel_dim_array_type
);
6899 DECL_CHAIN (grid_attr_trees
->kernel_lattrs_grid_decl
)
6900 = grid_attr_trees
->kernel_lattrs_dimnum_decl
;
6901 grid_attr_trees
->kernel_lattrs_group_decl
6902 = build_decl (BUILTINS_LOCATION
, FIELD_DECL
, get_identifier ("group_size"),
6903 grid_attr_trees
->kernel_dim_array_type
);
6904 DECL_CHAIN (grid_attr_trees
->kernel_lattrs_group_decl
)
6905 = grid_attr_trees
->kernel_lattrs_grid_decl
;
6906 finish_builtin_struct (grid_attr_trees
->kernel_launch_attributes_type
,
6907 "__gomp_kernel_launch_attributes",
6908 grid_attr_trees
->kernel_lattrs_group_decl
, NULL_TREE
);
6911 /* Insert before the current statement in GSI a store of VALUE to INDEX of
6912 array (of type kernel_dim_array_type) FLD_DECL of RANGE_VAR. VALUE must be
6913 of type uint32_type_node. */
6916 grid_insert_store_range_dim (gimple_stmt_iterator
*gsi
, tree range_var
,
6917 tree fld_decl
, int index
, tree value
)
6919 tree ref
= build4 (ARRAY_REF
, uint32_type_node
,
6920 build3 (COMPONENT_REF
,
6921 grid_attr_trees
->kernel_dim_array_type
,
6922 range_var
, fld_decl
, NULL_TREE
),
6923 build_int_cst (integer_type_node
, index
),
6924 NULL_TREE
, NULL_TREE
);
6925 gsi_insert_before (gsi
, gimple_build_assign (ref
, value
), GSI_SAME_STMT
);
6928 /* Return a tree representation of a pointer to a structure with grid and
6929 work-group size information. Statements filling that information will be
6930 inserted before GSI, TGT_STMT is the target statement which has the
6931 necessary information in it. */
6934 grid_get_kernel_launch_attributes (gimple_stmt_iterator
*gsi
,
6935 gomp_target
*tgt_stmt
)
6937 grid_create_kernel_launch_attr_types ();
6938 tree lattrs
= create_tmp_var (grid_attr_trees
->kernel_launch_attributes_type
,
6939 "__kernel_launch_attrs");
6941 unsigned max_dim
= 0;
6942 for (tree clause
= gimple_omp_target_clauses (tgt_stmt
);
6944 clause
= OMP_CLAUSE_CHAIN (clause
))
6946 if (OMP_CLAUSE_CODE (clause
) != OMP_CLAUSE__GRIDDIM_
)
6949 unsigned dim
= OMP_CLAUSE__GRIDDIM__DIMENSION (clause
);
6950 max_dim
= MAX (dim
, max_dim
);
6952 grid_insert_store_range_dim (gsi
, lattrs
,
6953 grid_attr_trees
->kernel_lattrs_grid_decl
,
6954 dim
, OMP_CLAUSE__GRIDDIM__SIZE (clause
));
6955 grid_insert_store_range_dim (gsi
, lattrs
,
6956 grid_attr_trees
->kernel_lattrs_group_decl
,
6957 dim
, OMP_CLAUSE__GRIDDIM__GROUP (clause
));
6960 tree dimref
= build3 (COMPONENT_REF
, uint32_type_node
, lattrs
,
6961 grid_attr_trees
->kernel_lattrs_dimnum_decl
, NULL_TREE
);
6962 gcc_checking_assert (max_dim
<= 2);
6963 tree dimensions
= build_int_cstu (uint32_type_node
, max_dim
+ 1);
6964 gsi_insert_before (gsi
, gimple_build_assign (dimref
, dimensions
),
6966 TREE_ADDRESSABLE (lattrs
) = 1;
6967 return build_fold_addr_expr (lattrs
);
6970 /* Build target argument identifier from the DEVICE identifier, value
6971 identifier ID and whether the element also has a SUBSEQUENT_PARAM. */
6974 get_target_argument_identifier_1 (int device
, bool subseqent_param
, int id
)
6976 tree t
= build_int_cst (integer_type_node
, device
);
6977 if (subseqent_param
)
6978 t
= fold_build2 (BIT_IOR_EXPR
, integer_type_node
, t
,
6979 build_int_cst (integer_type_node
,
6980 GOMP_TARGET_ARG_SUBSEQUENT_PARAM
));
6981 t
= fold_build2 (BIT_IOR_EXPR
, integer_type_node
, t
,
6982 build_int_cst (integer_type_node
, id
));
6986 /* Like above but return it in type that can be directly stored as an element
6987 of the argument array. */
6990 get_target_argument_identifier (int device
, bool subseqent_param
, int id
)
6992 tree t
= get_target_argument_identifier_1 (device
, subseqent_param
, id
);
6993 return fold_convert (ptr_type_node
, t
);
6996 /* Return a target argument consisting of DEVICE identifier, value identifier
6997 ID, and the actual VALUE. */
7000 get_target_argument_value (gimple_stmt_iterator
*gsi
, int device
, int id
,
7003 tree t
= fold_build2 (LSHIFT_EXPR
, integer_type_node
,
7004 fold_convert (integer_type_node
, value
),
7005 build_int_cst (unsigned_type_node
,
7006 GOMP_TARGET_ARG_VALUE_SHIFT
));
7007 t
= fold_build2 (BIT_IOR_EXPR
, integer_type_node
, t
,
7008 get_target_argument_identifier_1 (device
, false, id
));
7009 t
= fold_convert (ptr_type_node
, t
);
7010 return force_gimple_operand_gsi (gsi
, t
, true, NULL
, true, GSI_SAME_STMT
);
7013 /* If VALUE is an integer constant greater than -2^15 and smaller than 2^15,
7014 push one argument to ARGS with both the DEVICE, ID and VALUE embedded in it,
7015 otherwise push an identifier (with DEVICE and ID) and the VALUE in two
7019 push_target_argument_according_to_value (gimple_stmt_iterator
*gsi
, int device
,
7020 int id
, tree value
, vec
<tree
> *args
)
7022 if (tree_fits_shwi_p (value
)
7023 && tree_to_shwi (value
) > -(1 << 15)
7024 && tree_to_shwi (value
) < (1 << 15))
7025 args
->quick_push (get_target_argument_value (gsi
, device
, id
, value
));
7028 args
->quick_push (get_target_argument_identifier (device
, true, id
));
7029 value
= fold_convert (ptr_type_node
, value
);
7030 value
= force_gimple_operand_gsi (gsi
, value
, true, NULL
, true,
7032 args
->quick_push (value
);
7036 /* Create an array of arguments that is then passed to GOMP_target. */
7039 get_target_arguments (gimple_stmt_iterator
*gsi
, gomp_target
*tgt_stmt
)
7041 auto_vec
<tree
, 6> args
;
7042 tree clauses
= gimple_omp_target_clauses (tgt_stmt
);
7043 tree t
, c
= omp_find_clause (clauses
, OMP_CLAUSE_NUM_TEAMS
);
7045 t
= OMP_CLAUSE_NUM_TEAMS_EXPR (c
);
7047 t
= integer_minus_one_node
;
7048 push_target_argument_according_to_value (gsi
, GOMP_TARGET_ARG_DEVICE_ALL
,
7049 GOMP_TARGET_ARG_NUM_TEAMS
, t
, &args
);
7051 c
= omp_find_clause (clauses
, OMP_CLAUSE_THREAD_LIMIT
);
7053 t
= OMP_CLAUSE_THREAD_LIMIT_EXPR (c
);
7055 t
= integer_minus_one_node
;
7056 push_target_argument_according_to_value (gsi
, GOMP_TARGET_ARG_DEVICE_ALL
,
7057 GOMP_TARGET_ARG_THREAD_LIMIT
, t
,
7060 /* Add HSA-specific grid sizes, if available. */
7061 if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt
),
7062 OMP_CLAUSE__GRIDDIM_
))
7064 int id
= GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES
;
7065 t
= get_target_argument_identifier (GOMP_DEVICE_HSA
, true, id
);
7066 args
.quick_push (t
);
7067 args
.quick_push (grid_get_kernel_launch_attributes (gsi
, tgt_stmt
));
7070 /* Produce more, perhaps device specific, arguments here. */
7072 tree argarray
= create_tmp_var (build_array_type_nelts (ptr_type_node
,
7073 args
.length () + 1),
7074 ".omp_target_args");
7075 for (unsigned i
= 0; i
< args
.length (); i
++)
7077 tree ref
= build4 (ARRAY_REF
, ptr_type_node
, argarray
,
7078 build_int_cst (integer_type_node
, i
),
7079 NULL_TREE
, NULL_TREE
);
7080 gsi_insert_before (gsi
, gimple_build_assign (ref
, args
[i
]),
7083 tree ref
= build4 (ARRAY_REF
, ptr_type_node
, argarray
,
7084 build_int_cst (integer_type_node
, args
.length ()),
7085 NULL_TREE
, NULL_TREE
);
7086 gsi_insert_before (gsi
, gimple_build_assign (ref
, null_pointer_node
),
7088 TREE_ADDRESSABLE (argarray
) = 1;
7089 return build_fold_addr_expr (argarray
);
7092 /* Expand the GIMPLE_OMP_TARGET starting at REGION. */
7095 expand_omp_target (struct omp_region
*region
)
7097 basic_block entry_bb
, exit_bb
, new_bb
;
7098 struct function
*child_cfun
;
7099 tree child_fn
, block
, t
;
7100 gimple_stmt_iterator gsi
;
7101 gomp_target
*entry_stmt
;
7104 bool offloaded
, data_region
;
7106 entry_stmt
= as_a
<gomp_target
*> (last_stmt (region
->entry
));
7107 new_bb
= region
->entry
;
7109 offloaded
= is_gimple_omp_offloaded (entry_stmt
);
7110 switch (gimple_omp_target_kind (entry_stmt
))
7112 case GF_OMP_TARGET_KIND_REGION
:
7113 case GF_OMP_TARGET_KIND_UPDATE
:
7114 case GF_OMP_TARGET_KIND_ENTER_DATA
:
7115 case GF_OMP_TARGET_KIND_EXIT_DATA
:
7116 case GF_OMP_TARGET_KIND_OACC_PARALLEL
:
7117 case GF_OMP_TARGET_KIND_OACC_KERNELS
:
7118 case GF_OMP_TARGET_KIND_OACC_UPDATE
:
7119 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA
:
7120 case GF_OMP_TARGET_KIND_OACC_DECLARE
:
7121 data_region
= false;
7123 case GF_OMP_TARGET_KIND_DATA
:
7124 case GF_OMP_TARGET_KIND_OACC_DATA
:
7125 case GF_OMP_TARGET_KIND_OACC_HOST_DATA
:
7132 child_fn
= NULL_TREE
;
7136 child_fn
= gimple_omp_target_child_fn (entry_stmt
);
7137 child_cfun
= DECL_STRUCT_FUNCTION (child_fn
);
7140 /* Supported by expand_omp_taskreg, but not here. */
7141 if (child_cfun
!= NULL
)
7142 gcc_checking_assert (!child_cfun
->cfg
);
7143 gcc_checking_assert (!gimple_in_ssa_p (cfun
));
7145 entry_bb
= region
->entry
;
7146 exit_bb
= region
->exit
;
7148 if (gimple_omp_target_kind (entry_stmt
) == GF_OMP_TARGET_KIND_OACC_KERNELS
)
7150 mark_loops_in_oacc_kernels_region (region
->entry
, region
->exit
);
7152 /* Further down, both OpenACC kernels and OpenACC parallel constructs
7153 will be mappted to BUILT_IN_GOACC_PARALLEL, and to distinguish the
7154 two, there is an "oacc kernels" attribute set for OpenACC kernels. */
7155 DECL_ATTRIBUTES (child_fn
)
7156 = tree_cons (get_identifier ("oacc kernels"),
7157 NULL_TREE
, DECL_ATTRIBUTES (child_fn
));
7162 unsigned srcidx
, dstidx
, num
;
7164 /* If the offloading region needs data sent from the parent
7165 function, then the very first statement (except possible
7166 tree profile counter updates) of the offloading body
7167 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
7168 &.OMP_DATA_O is passed as an argument to the child function,
7169 we need to replace it with the argument as seen by the child
7172 In most cases, this will end up being the identity assignment
7173 .OMP_DATA_I = .OMP_DATA_I. However, if the offloading body had
7174 a function call that has been inlined, the original PARM_DECL
7175 .OMP_DATA_I may have been converted into a different local
7176 variable. In which case, we need to keep the assignment. */
7177 tree data_arg
= gimple_omp_target_data_arg (entry_stmt
);
7180 basic_block entry_succ_bb
= single_succ (entry_bb
);
7181 gimple_stmt_iterator gsi
;
7183 gimple
*tgtcopy_stmt
= NULL
;
7184 tree sender
= TREE_VEC_ELT (data_arg
, 0);
7186 for (gsi
= gsi_start_bb (entry_succ_bb
); ; gsi_next (&gsi
))
7188 gcc_assert (!gsi_end_p (gsi
));
7189 stmt
= gsi_stmt (gsi
);
7190 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
7193 if (gimple_num_ops (stmt
) == 2)
7195 tree arg
= gimple_assign_rhs1 (stmt
);
7197 /* We're ignoring the subcode because we're
7198 effectively doing a STRIP_NOPS. */
7200 if (TREE_CODE (arg
) == ADDR_EXPR
7201 && TREE_OPERAND (arg
, 0) == sender
)
7203 tgtcopy_stmt
= stmt
;
7209 gcc_assert (tgtcopy_stmt
!= NULL
);
7210 arg
= DECL_ARGUMENTS (child_fn
);
7212 gcc_assert (gimple_assign_lhs (tgtcopy_stmt
) == arg
);
7213 gsi_remove (&gsi
, true);
7216 /* Declare local variables needed in CHILD_CFUN. */
7217 block
= DECL_INITIAL (child_fn
);
7218 BLOCK_VARS (block
) = vec2chain (child_cfun
->local_decls
);
7219 /* The gimplifier could record temporaries in the offloading block
7220 rather than in containing function's local_decls chain,
7221 which would mean cgraph missed finalizing them. Do it now. */
7222 for (t
= BLOCK_VARS (block
); t
; t
= DECL_CHAIN (t
))
7223 if (VAR_P (t
) && TREE_STATIC (t
) && !DECL_EXTERNAL (t
))
7224 varpool_node::finalize_decl (t
);
7225 DECL_SAVED_TREE (child_fn
) = NULL
;
7226 /* We'll create a CFG for child_fn, so no gimple body is needed. */
7227 gimple_set_body (child_fn
, NULL
);
7228 TREE_USED (block
) = 1;
7230 /* Reset DECL_CONTEXT on function arguments. */
7231 for (t
= DECL_ARGUMENTS (child_fn
); t
; t
= DECL_CHAIN (t
))
7232 DECL_CONTEXT (t
) = child_fn
;
7234 /* Split ENTRY_BB at GIMPLE_*,
7235 so that it can be moved to the child function. */
7236 gsi
= gsi_last_bb (entry_bb
);
7237 stmt
= gsi_stmt (gsi
);
7239 && gimple_code (stmt
) == gimple_code (entry_stmt
));
7240 e
= split_block (entry_bb
, stmt
);
7241 gsi_remove (&gsi
, true);
7243 single_succ_edge (entry_bb
)->flags
= EDGE_FALLTHRU
;
7245 /* Convert GIMPLE_OMP_RETURN into a RETURN_EXPR. */
7248 gsi
= gsi_last_bb (exit_bb
);
7249 gcc_assert (!gsi_end_p (gsi
)
7250 && gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_RETURN
);
7251 stmt
= gimple_build_return (NULL
);
7252 gsi_insert_after (&gsi
, stmt
, GSI_SAME_STMT
);
7253 gsi_remove (&gsi
, true);
7256 /* Make sure to generate early debug for the function before
7257 outlining anything. */
7258 if (! gimple_in_ssa_p (cfun
))
7259 (*debug_hooks
->early_global_decl
) (cfun
->decl
);
7261 /* Move the offloading region into CHILD_CFUN. */
7263 block
= gimple_block (entry_stmt
);
7265 new_bb
= move_sese_region_to_fn (child_cfun
, entry_bb
, exit_bb
, block
);
7267 single_succ_edge (new_bb
)->flags
= EDGE_FALLTHRU
;
7268 /* When the OMP expansion process cannot guarantee an up-to-date
7269 loop tree arrange for the child function to fixup loops. */
7270 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP
))
7271 child_cfun
->x_current_loops
->state
|= LOOPS_NEED_FIXUP
;
7273 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
7274 num
= vec_safe_length (child_cfun
->local_decls
);
7275 for (srcidx
= 0, dstidx
= 0; srcidx
< num
; srcidx
++)
7277 t
= (*child_cfun
->local_decls
)[srcidx
];
7278 if (DECL_CONTEXT (t
) == cfun
->decl
)
7280 if (srcidx
!= dstidx
)
7281 (*child_cfun
->local_decls
)[dstidx
] = t
;
7285 vec_safe_truncate (child_cfun
->local_decls
, dstidx
);
7287 /* Inform the callgraph about the new function. */
7288 child_cfun
->curr_properties
= cfun
->curr_properties
;
7289 child_cfun
->has_simduid_loops
|= cfun
->has_simduid_loops
;
7290 child_cfun
->has_force_vectorize_loops
|= cfun
->has_force_vectorize_loops
;
7291 cgraph_node
*node
= cgraph_node::get_create (child_fn
);
7292 node
->parallelized_function
= 1;
7293 cgraph_node::add_new_function (child_fn
, true);
7295 /* Add the new function to the offload table. */
7296 if (ENABLE_OFFLOADING
)
7297 vec_safe_push (offload_funcs
, child_fn
);
7299 bool need_asm
= DECL_ASSEMBLER_NAME_SET_P (current_function_decl
)
7300 && !DECL_ASSEMBLER_NAME_SET_P (child_fn
);
7302 /* Fix the callgraph edges for child_cfun. Those for cfun will be
7303 fixed in a following pass. */
7304 push_cfun (child_cfun
);
7306 assign_assembler_name_if_needed (child_fn
);
7307 cgraph_edge::rebuild_edges ();
7309 /* Some EH regions might become dead, see PR34608. If
7310 pass_cleanup_cfg isn't the first pass to happen with the
7311 new child, these dead EH edges might cause problems.
7312 Clean them up now. */
7313 if (flag_exceptions
)
7316 bool changed
= false;
7318 FOR_EACH_BB_FN (bb
, cfun
)
7319 changed
|= gimple_purge_dead_eh_edges (bb
);
7321 cleanup_tree_cfg ();
7323 if (flag_checking
&& !loops_state_satisfies_p (LOOPS_NEED_FIXUP
))
7324 verify_loop_structure ();
7327 if (dump_file
&& !gimple_in_ssa_p (cfun
))
7329 omp_any_child_fn_dumped
= true;
7330 dump_function_header (dump_file
, child_fn
, dump_flags
);
7331 dump_function_to_file (child_fn
, dump_file
, dump_flags
);
7335 /* Emit a library call to launch the offloading region, or do data
7337 tree t1
, t2
, t3
, t4
, device
, cond
, depend
, c
, clauses
;
7338 enum built_in_function start_ix
;
7339 location_t clause_loc
;
7340 unsigned int flags_i
= 0;
7342 switch (gimple_omp_target_kind (entry_stmt
))
7344 case GF_OMP_TARGET_KIND_REGION
:
7345 start_ix
= BUILT_IN_GOMP_TARGET
;
7347 case GF_OMP_TARGET_KIND_DATA
:
7348 start_ix
= BUILT_IN_GOMP_TARGET_DATA
;
7350 case GF_OMP_TARGET_KIND_UPDATE
:
7351 start_ix
= BUILT_IN_GOMP_TARGET_UPDATE
;
7353 case GF_OMP_TARGET_KIND_ENTER_DATA
:
7354 start_ix
= BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA
;
7356 case GF_OMP_TARGET_KIND_EXIT_DATA
:
7357 start_ix
= BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA
;
7358 flags_i
|= GOMP_TARGET_FLAG_EXIT_DATA
;
7360 case GF_OMP_TARGET_KIND_OACC_KERNELS
:
7361 case GF_OMP_TARGET_KIND_OACC_PARALLEL
:
7362 start_ix
= BUILT_IN_GOACC_PARALLEL
;
7364 case GF_OMP_TARGET_KIND_OACC_DATA
:
7365 case GF_OMP_TARGET_KIND_OACC_HOST_DATA
:
7366 start_ix
= BUILT_IN_GOACC_DATA_START
;
7368 case GF_OMP_TARGET_KIND_OACC_UPDATE
:
7369 start_ix
= BUILT_IN_GOACC_UPDATE
;
7371 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA
:
7372 start_ix
= BUILT_IN_GOACC_ENTER_EXIT_DATA
;
7374 case GF_OMP_TARGET_KIND_OACC_DECLARE
:
7375 start_ix
= BUILT_IN_GOACC_DECLARE
;
7381 clauses
= gimple_omp_target_clauses (entry_stmt
);
7383 /* By default, the value of DEVICE is GOMP_DEVICE_ICV (let runtime
7384 library choose) and there is no conditional. */
7386 device
= build_int_cst (integer_type_node
, GOMP_DEVICE_ICV
);
7388 c
= omp_find_clause (clauses
, OMP_CLAUSE_IF
);
7390 cond
= OMP_CLAUSE_IF_EXPR (c
);
7392 c
= omp_find_clause (clauses
, OMP_CLAUSE_DEVICE
);
7395 /* Even if we pass it to all library function calls, it is currently only
7396 defined/used for the OpenMP target ones. */
7397 gcc_checking_assert (start_ix
== BUILT_IN_GOMP_TARGET
7398 || start_ix
== BUILT_IN_GOMP_TARGET_DATA
7399 || start_ix
== BUILT_IN_GOMP_TARGET_UPDATE
7400 || start_ix
== BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA
);
7402 device
= OMP_CLAUSE_DEVICE_ID (c
);
7403 clause_loc
= OMP_CLAUSE_LOCATION (c
);
7406 clause_loc
= gimple_location (entry_stmt
);
7408 c
= omp_find_clause (clauses
, OMP_CLAUSE_NOWAIT
);
7410 flags_i
|= GOMP_TARGET_FLAG_NOWAIT
;
7412 /* Ensure 'device' is of the correct type. */
7413 device
= fold_convert_loc (clause_loc
, integer_type_node
, device
);
7415 /* If we found the clause 'if (cond)', build
7416 (cond ? device : GOMP_DEVICE_HOST_FALLBACK). */
7419 cond
= gimple_boolify (cond
);
7421 basic_block cond_bb
, then_bb
, else_bb
;
7425 tmp_var
= create_tmp_var (TREE_TYPE (device
));
7427 e
= split_block_after_labels (new_bb
);
7430 gsi
= gsi_last_bb (new_bb
);
7432 e
= split_block (new_bb
, gsi_stmt (gsi
));
7438 then_bb
= create_empty_bb (cond_bb
);
7439 else_bb
= create_empty_bb (then_bb
);
7440 set_immediate_dominator (CDI_DOMINATORS
, then_bb
, cond_bb
);
7441 set_immediate_dominator (CDI_DOMINATORS
, else_bb
, cond_bb
);
7443 stmt
= gimple_build_cond_empty (cond
);
7444 gsi
= gsi_last_bb (cond_bb
);
7445 gsi_insert_after (&gsi
, stmt
, GSI_CONTINUE_LINKING
);
7447 gsi
= gsi_start_bb (then_bb
);
7448 stmt
= gimple_build_assign (tmp_var
, device
);
7449 gsi_insert_after (&gsi
, stmt
, GSI_CONTINUE_LINKING
);
7451 gsi
= gsi_start_bb (else_bb
);
7452 stmt
= gimple_build_assign (tmp_var
,
7453 build_int_cst (integer_type_node
,
7454 GOMP_DEVICE_HOST_FALLBACK
));
7455 gsi_insert_after (&gsi
, stmt
, GSI_CONTINUE_LINKING
);
7457 make_edge (cond_bb
, then_bb
, EDGE_TRUE_VALUE
);
7458 make_edge (cond_bb
, else_bb
, EDGE_FALSE_VALUE
);
7459 add_bb_to_loop (then_bb
, cond_bb
->loop_father
);
7460 add_bb_to_loop (else_bb
, cond_bb
->loop_father
);
7461 make_edge (then_bb
, new_bb
, EDGE_FALLTHRU
);
7462 make_edge (else_bb
, new_bb
, EDGE_FALLTHRU
);
7465 gsi
= gsi_last_bb (new_bb
);
7469 gsi
= gsi_last_bb (new_bb
);
7470 device
= force_gimple_operand_gsi (&gsi
, device
, true, NULL_TREE
,
7471 true, GSI_SAME_STMT
);
7474 t
= gimple_omp_target_data_arg (entry_stmt
);
7477 t1
= size_zero_node
;
7478 t2
= build_zero_cst (ptr_type_node
);
7484 t1
= TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (TREE_VEC_ELT (t
, 1))));
7485 t1
= size_binop (PLUS_EXPR
, t1
, size_int (1));
7486 t2
= build_fold_addr_expr (TREE_VEC_ELT (t
, 0));
7487 t3
= build_fold_addr_expr (TREE_VEC_ELT (t
, 1));
7488 t4
= build_fold_addr_expr (TREE_VEC_ELT (t
, 2));
7492 bool tagging
= false;
7493 /* The maximum number used by any start_ix, without varargs. */
7494 auto_vec
<tree
, 11> args
;
7495 args
.quick_push (device
);
7497 args
.quick_push (build_fold_addr_expr (child_fn
));
7498 args
.quick_push (t1
);
7499 args
.quick_push (t2
);
7500 args
.quick_push (t3
);
7501 args
.quick_push (t4
);
7504 case BUILT_IN_GOACC_DATA_START
:
7505 case BUILT_IN_GOACC_DECLARE
:
7506 case BUILT_IN_GOMP_TARGET_DATA
:
7508 case BUILT_IN_GOMP_TARGET
:
7509 case BUILT_IN_GOMP_TARGET_UPDATE
:
7510 case BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA
:
7511 args
.quick_push (build_int_cst (unsigned_type_node
, flags_i
));
7512 c
= omp_find_clause (clauses
, OMP_CLAUSE_DEPEND
);
7514 depend
= OMP_CLAUSE_DECL (c
);
7516 depend
= build_int_cst (ptr_type_node
, 0);
7517 args
.quick_push (depend
);
7518 if (start_ix
== BUILT_IN_GOMP_TARGET
)
7519 args
.quick_push (get_target_arguments (&gsi
, entry_stmt
));
7521 case BUILT_IN_GOACC_PARALLEL
:
7522 oacc_set_fn_attrib (child_fn
, clauses
, &args
);
7525 case BUILT_IN_GOACC_ENTER_EXIT_DATA
:
7526 case BUILT_IN_GOACC_UPDATE
:
7528 tree t_async
= NULL_TREE
;
7530 /* If present, use the value specified by the respective
7531 clause, making sure that is of the correct type. */
7532 c
= omp_find_clause (clauses
, OMP_CLAUSE_ASYNC
);
7534 t_async
= fold_convert_loc (OMP_CLAUSE_LOCATION (c
),
7536 OMP_CLAUSE_ASYNC_EXPR (c
));
7538 /* Default values for t_async. */
7539 t_async
= fold_convert_loc (gimple_location (entry_stmt
),
7541 build_int_cst (integer_type_node
,
7543 if (tagging
&& t_async
)
7545 unsigned HOST_WIDE_INT i_async
= GOMP_LAUNCH_OP_MAX
;
7547 if (TREE_CODE (t_async
) == INTEGER_CST
)
7549 /* See if we can pack the async arg in to the tag's
7551 i_async
= TREE_INT_CST_LOW (t_async
);
7552 if (i_async
< GOMP_LAUNCH_OP_MAX
)
7553 t_async
= NULL_TREE
;
7555 i_async
= GOMP_LAUNCH_OP_MAX
;
7557 args
.safe_push (oacc_launch_pack (GOMP_LAUNCH_ASYNC
, NULL_TREE
,
7561 args
.safe_push (t_async
);
7563 /* Save the argument index, and ... */
7564 unsigned t_wait_idx
= args
.length ();
7565 unsigned num_waits
= 0;
7566 c
= omp_find_clause (clauses
, OMP_CLAUSE_WAIT
);
7568 /* ... push a placeholder. */
7569 args
.safe_push (integer_zero_node
);
7571 for (; c
; c
= OMP_CLAUSE_CHAIN (c
))
7572 if (OMP_CLAUSE_CODE (c
) == OMP_CLAUSE_WAIT
)
7574 args
.safe_push (fold_convert_loc (OMP_CLAUSE_LOCATION (c
),
7576 OMP_CLAUSE_WAIT_EXPR (c
)));
7580 if (!tagging
|| num_waits
)
7584 /* Now that we know the number, update the placeholder. */
7586 len
= oacc_launch_pack (GOMP_LAUNCH_WAIT
, NULL_TREE
, num_waits
);
7588 len
= build_int_cst (integer_type_node
, num_waits
);
7589 len
= fold_convert_loc (gimple_location (entry_stmt
),
7590 unsigned_type_node
, len
);
7591 args
[t_wait_idx
] = len
;
7599 /* Push terminal marker - zero. */
7600 args
.safe_push (oacc_launch_pack (0, NULL_TREE
, 0));
7602 g
= gimple_build_call_vec (builtin_decl_explicit (start_ix
), args
);
7603 gimple_set_location (g
, gimple_location (entry_stmt
));
7604 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
7608 gcc_assert (g
&& gimple_code (g
) == GIMPLE_OMP_TARGET
);
7609 gsi_remove (&gsi
, true);
7611 if (data_region
&& region
->exit
)
7613 gsi
= gsi_last_bb (region
->exit
);
7615 gcc_assert (g
&& gimple_code (g
) == GIMPLE_OMP_RETURN
);
7616 gsi_remove (&gsi
, true);
7620 /* Expand KFOR loop as a HSA grifidied kernel, i.e. as a body only with
7621 iteration variable derived from the thread number. INTRA_GROUP means this
7622 is an expansion of a loop iterating over work-items within a separate
7623 iteration over groups. */
7626 grid_expand_omp_for_loop (struct omp_region
*kfor
, bool intra_group
)
7628 gimple_stmt_iterator gsi
;
7629 gomp_for
*for_stmt
= as_a
<gomp_for
*> (last_stmt (kfor
->entry
));
7630 gcc_checking_assert (gimple_omp_for_kind (for_stmt
)
7631 == GF_OMP_FOR_KIND_GRID_LOOP
);
7632 size_t collapse
= gimple_omp_for_collapse (for_stmt
);
7633 struct omp_for_data_loop
*loops
7634 = XALLOCAVEC (struct omp_for_data_loop
,
7635 gimple_omp_for_collapse (for_stmt
));
7636 struct omp_for_data fd
;
7638 remove_edge (BRANCH_EDGE (kfor
->entry
));
7639 basic_block body_bb
= FALLTHRU_EDGE (kfor
->entry
)->dest
;
7641 gcc_assert (kfor
->cont
);
7642 omp_extract_for_data (for_stmt
, &fd
, loops
);
7644 gsi
= gsi_start_bb (body_bb
);
7646 for (size_t dim
= 0; dim
< collapse
; dim
++)
7649 itype
= type
= TREE_TYPE (fd
.loops
[dim
].v
);
7650 if (POINTER_TYPE_P (type
))
7651 itype
= signed_type_for (type
);
7653 tree n1
= fd
.loops
[dim
].n1
;
7654 tree step
= fd
.loops
[dim
].step
;
7655 n1
= force_gimple_operand_gsi (&gsi
, fold_convert (type
, n1
),
7656 true, NULL_TREE
, true, GSI_SAME_STMT
);
7657 step
= force_gimple_operand_gsi (&gsi
, fold_convert (itype
, step
),
7658 true, NULL_TREE
, true, GSI_SAME_STMT
);
7660 if (gimple_omp_for_grid_group_iter (for_stmt
))
7662 gcc_checking_assert (!intra_group
);
7663 threadid
= build_call_expr (builtin_decl_explicit
7664 (BUILT_IN_HSA_WORKGROUPID
), 1,
7665 build_int_cstu (unsigned_type_node
, dim
));
7667 else if (intra_group
)
7668 threadid
= build_call_expr (builtin_decl_explicit
7669 (BUILT_IN_HSA_WORKITEMID
), 1,
7670 build_int_cstu (unsigned_type_node
, dim
));
7672 threadid
= build_call_expr (builtin_decl_explicit
7673 (BUILT_IN_HSA_WORKITEMABSID
), 1,
7674 build_int_cstu (unsigned_type_node
, dim
));
7675 threadid
= fold_convert (itype
, threadid
);
7676 threadid
= force_gimple_operand_gsi (&gsi
, threadid
, true, NULL_TREE
,
7677 true, GSI_SAME_STMT
);
7679 tree startvar
= fd
.loops
[dim
].v
;
7680 tree t
= fold_build2 (MULT_EXPR
, itype
, threadid
, step
);
7681 if (POINTER_TYPE_P (type
))
7682 t
= fold_build_pointer_plus (n1
, t
);
7684 t
= fold_build2 (PLUS_EXPR
, type
, t
, n1
);
7685 t
= fold_convert (type
, t
);
7686 t
= force_gimple_operand_gsi (&gsi
, t
,
7688 && TREE_ADDRESSABLE (startvar
),
7689 NULL_TREE
, true, GSI_SAME_STMT
);
7690 gassign
*assign_stmt
= gimple_build_assign (startvar
, t
);
7691 gsi_insert_before (&gsi
, assign_stmt
, GSI_SAME_STMT
);
7693 /* Remove the omp for statement. */
7694 gsi
= gsi_last_bb (kfor
->entry
);
7695 gsi_remove (&gsi
, true);
7697 /* Remove the GIMPLE_OMP_CONTINUE statement. */
7698 gsi
= gsi_last_bb (kfor
->cont
);
7699 gcc_assert (!gsi_end_p (gsi
)
7700 && gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_CONTINUE
);
7701 gsi_remove (&gsi
, true);
7703 /* Replace the GIMPLE_OMP_RETURN with a barrier, if necessary. */
7704 gsi
= gsi_last_bb (kfor
->exit
);
7705 gcc_assert (!gsi_end_p (gsi
)
7706 && gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_RETURN
);
7708 gsi_insert_before (&gsi
, omp_build_barrier (NULL_TREE
), GSI_SAME_STMT
);
7709 gsi_remove (&gsi
, true);
7711 /* Fixup the much simpler CFG. */
7712 remove_edge (find_edge (kfor
->cont
, body_bb
));
7714 if (kfor
->cont
!= body_bb
)
7715 set_immediate_dominator (CDI_DOMINATORS
, kfor
->cont
, body_bb
);
7716 set_immediate_dominator (CDI_DOMINATORS
, kfor
->exit
, kfor
->cont
);
7719 /* Structure passed to grid_remap_kernel_arg_accesses so that it can remap
7722 struct grid_arg_decl_map
7728 /* Invoked through walk_gimple_op, will remap all PARM_DECLs to the ones
7729 pertaining to kernel function. */
7732 grid_remap_kernel_arg_accesses (tree
*tp
, int *walk_subtrees
, void *data
)
7734 struct walk_stmt_info
*wi
= (struct walk_stmt_info
*) data
;
7735 struct grid_arg_decl_map
*adm
= (struct grid_arg_decl_map
*) wi
->info
;
7738 if (t
== adm
->old_arg
)
7740 *walk_subtrees
= !TYPE_P (t
) && !DECL_P (t
);
7744 /* If TARGET region contains a kernel body for loop, remove its region from the
7745 TARGET and expand it in HSA gridified kernel fashion. */
7748 grid_expand_target_grid_body (struct omp_region
*target
)
7750 if (!hsa_gen_requested_p ())
7753 gomp_target
*tgt_stmt
= as_a
<gomp_target
*> (last_stmt (target
->entry
));
7754 struct omp_region
**pp
;
7756 for (pp
= &target
->inner
; *pp
; pp
= &(*pp
)->next
)
7757 if ((*pp
)->type
== GIMPLE_OMP_GRID_BODY
)
7760 struct omp_region
*gpukernel
= *pp
;
7762 tree orig_child_fndecl
= gimple_omp_target_child_fn (tgt_stmt
);
7765 /* HSA cannot handle OACC stuff. */
7766 if (gimple_omp_target_kind (tgt_stmt
) != GF_OMP_TARGET_KIND_REGION
)
7768 gcc_checking_assert (orig_child_fndecl
);
7769 gcc_assert (!omp_find_clause (gimple_omp_target_clauses (tgt_stmt
),
7770 OMP_CLAUSE__GRIDDIM_
));
7771 cgraph_node
*n
= cgraph_node::get (orig_child_fndecl
);
7773 hsa_register_kernel (n
);
7777 gcc_assert (omp_find_clause (gimple_omp_target_clauses (tgt_stmt
),
7778 OMP_CLAUSE__GRIDDIM_
));
7780 = gimple_block (first_stmt (single_succ (gpukernel
->entry
)));
7781 *pp
= gpukernel
->next
;
7782 for (pp
= &gpukernel
->inner
; *pp
; pp
= &(*pp
)->next
)
7783 if ((*pp
)->type
== GIMPLE_OMP_FOR
)
7786 struct omp_region
*kfor
= *pp
;
7788 gomp_for
*for_stmt
= as_a
<gomp_for
*> (last_stmt (kfor
->entry
));
7789 gcc_assert (gimple_omp_for_kind (for_stmt
) == GF_OMP_FOR_KIND_GRID_LOOP
);
7793 if (gimple_omp_for_grid_group_iter (for_stmt
))
7795 struct omp_region
**next_pp
;
7796 for (pp
= &kfor
->inner
; *pp
; pp
= next_pp
)
7798 next_pp
= &(*pp
)->next
;
7799 if ((*pp
)->type
!= GIMPLE_OMP_FOR
)
7801 gomp_for
*inner
= as_a
<gomp_for
*> (last_stmt ((*pp
)->entry
));
7802 gcc_assert (gimple_omp_for_kind (inner
)
7803 == GF_OMP_FOR_KIND_GRID_LOOP
);
7804 grid_expand_omp_for_loop (*pp
, true);
7809 expand_omp (kfor
->inner
);
7811 if (gpukernel
->inner
)
7812 expand_omp (gpukernel
->inner
);
7814 tree kern_fndecl
= copy_node (orig_child_fndecl
);
7815 DECL_NAME (kern_fndecl
) = clone_function_name (kern_fndecl
, "kernel");
7816 SET_DECL_ASSEMBLER_NAME (kern_fndecl
, DECL_NAME (kern_fndecl
));
7817 tree tgtblock
= gimple_block (tgt_stmt
);
7818 tree fniniblock
= make_node (BLOCK
);
7819 BLOCK_ABSTRACT_ORIGIN (fniniblock
) = tgtblock
;
7820 BLOCK_SOURCE_LOCATION (fniniblock
) = BLOCK_SOURCE_LOCATION (tgtblock
);
7821 BLOCK_SOURCE_END_LOCATION (fniniblock
) = BLOCK_SOURCE_END_LOCATION (tgtblock
);
7822 BLOCK_SUPERCONTEXT (fniniblock
) = kern_fndecl
;
7823 DECL_INITIAL (kern_fndecl
) = fniniblock
;
7824 push_struct_function (kern_fndecl
);
7825 cfun
->function_end_locus
= gimple_location (tgt_stmt
);
7826 init_tree_ssa (cfun
);
7829 /* Make sure to generate early debug for the function before
7830 outlining anything. */
7831 if (! gimple_in_ssa_p (cfun
))
7832 (*debug_hooks
->early_global_decl
) (cfun
->decl
);
7834 tree old_parm_decl
= DECL_ARGUMENTS (kern_fndecl
);
7835 gcc_assert (!DECL_CHAIN (old_parm_decl
));
7836 tree new_parm_decl
= copy_node (DECL_ARGUMENTS (kern_fndecl
));
7837 DECL_CONTEXT (new_parm_decl
) = kern_fndecl
;
7838 DECL_ARGUMENTS (kern_fndecl
) = new_parm_decl
;
7839 gcc_assert (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (kern_fndecl
))));
7840 DECL_RESULT (kern_fndecl
) = copy_node (DECL_RESULT (kern_fndecl
));
7841 DECL_CONTEXT (DECL_RESULT (kern_fndecl
)) = kern_fndecl
;
7842 struct function
*kern_cfun
= DECL_STRUCT_FUNCTION (kern_fndecl
);
7843 kern_cfun
->curr_properties
= cfun
->curr_properties
;
7845 grid_expand_omp_for_loop (kfor
, false);
7847 /* Remove the omp for statement. */
7848 gimple_stmt_iterator gsi
= gsi_last_bb (gpukernel
->entry
);
7849 gsi_remove (&gsi
, true);
7850 /* Replace the GIMPLE_OMP_RETURN at the end of the kernel region with a real
7852 gsi
= gsi_last_bb (gpukernel
->exit
);
7853 gcc_assert (!gsi_end_p (gsi
)
7854 && gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_RETURN
);
7855 gimple
*ret_stmt
= gimple_build_return (NULL
);
7856 gsi_insert_after (&gsi
, ret_stmt
, GSI_SAME_STMT
);
7857 gsi_remove (&gsi
, true);
7859 /* Statements in the first BB in the target construct have been produced by
7860 target lowering and must be copied inside the GPUKERNEL, with the two
7861 exceptions of the first OMP statement and the OMP_DATA assignment
7863 gsi
= gsi_start_bb (single_succ (gpukernel
->entry
));
7864 tree data_arg
= gimple_omp_target_data_arg (tgt_stmt
);
7865 tree sender
= data_arg
? TREE_VEC_ELT (data_arg
, 0) : NULL
;
7866 for (gimple_stmt_iterator tsi
= gsi_start_bb (single_succ (target
->entry
));
7867 !gsi_end_p (tsi
); gsi_next (&tsi
))
7869 gimple
*stmt
= gsi_stmt (tsi
);
7870 if (is_gimple_omp (stmt
))
7873 && is_gimple_assign (stmt
)
7874 && TREE_CODE (gimple_assign_rhs1 (stmt
)) == ADDR_EXPR
7875 && TREE_OPERAND (gimple_assign_rhs1 (stmt
), 0) == sender
)
7877 gimple
*copy
= gimple_copy (stmt
);
7878 gsi_insert_before (&gsi
, copy
, GSI_SAME_STMT
);
7879 gimple_set_block (copy
, fniniblock
);
7882 move_sese_region_to_fn (kern_cfun
, single_succ (gpukernel
->entry
),
7883 gpukernel
->exit
, inside_block
);
7885 cgraph_node
*kcn
= cgraph_node::get_create (kern_fndecl
);
7886 kcn
->mark_force_output ();
7887 cgraph_node
*orig_child
= cgraph_node::get (orig_child_fndecl
);
7889 hsa_register_kernel (kcn
, orig_child
);
7891 cgraph_node::add_new_function (kern_fndecl
, true);
7892 push_cfun (kern_cfun
);
7893 cgraph_edge::rebuild_edges ();
7895 /* Re-map any mention of the PARM_DECL of the original function to the
7896 PARM_DECL of the new one.
7898 TODO: It would be great if lowering produced references into the GPU
7899 kernel decl straight away and we did not have to do this. */
7900 struct grid_arg_decl_map adm
;
7901 adm
.old_arg
= old_parm_decl
;
7902 adm
.new_arg
= new_parm_decl
;
7904 FOR_EACH_BB_FN (bb
, kern_cfun
)
7906 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
7908 gimple
*stmt
= gsi_stmt (gsi
);
7909 struct walk_stmt_info wi
;
7910 memset (&wi
, 0, sizeof (wi
));
7912 walk_gimple_op (stmt
, grid_remap_kernel_arg_accesses
, &wi
);
7920 /* Expand the parallel region tree rooted at REGION. Expansion
7921 proceeds in depth-first order. Innermost regions are expanded
7922 first. This way, parallel regions that require a new function to
7923 be created (e.g., GIMPLE_OMP_PARALLEL) can be expanded without having any
7924 internal dependencies in their body. */
7927 expand_omp (struct omp_region
*region
)
7929 omp_any_child_fn_dumped
= false;
7932 location_t saved_location
;
7933 gimple
*inner_stmt
= NULL
;
7935 /* First, determine whether this is a combined parallel+workshare
7937 if (region
->type
== GIMPLE_OMP_PARALLEL
)
7938 determine_parallel_type (region
);
7939 else if (region
->type
== GIMPLE_OMP_TARGET
)
7940 grid_expand_target_grid_body (region
);
7942 if (region
->type
== GIMPLE_OMP_FOR
7943 && gimple_omp_for_combined_p (last_stmt (region
->entry
)))
7944 inner_stmt
= last_stmt (region
->inner
->entry
);
7947 expand_omp (region
->inner
);
7949 saved_location
= input_location
;
7950 if (gimple_has_location (last_stmt (region
->entry
)))
7951 input_location
= gimple_location (last_stmt (region
->entry
));
7953 switch (region
->type
)
7955 case GIMPLE_OMP_PARALLEL
:
7956 case GIMPLE_OMP_TASK
:
7957 expand_omp_taskreg (region
);
7960 case GIMPLE_OMP_FOR
:
7961 expand_omp_for (region
, inner_stmt
);
7964 case GIMPLE_OMP_SECTIONS
:
7965 expand_omp_sections (region
);
7968 case GIMPLE_OMP_SECTION
:
7969 /* Individual omp sections are handled together with their
7970 parent GIMPLE_OMP_SECTIONS region. */
7973 case GIMPLE_OMP_SINGLE
:
7974 expand_omp_single (region
);
7977 case GIMPLE_OMP_ORDERED
:
7979 gomp_ordered
*ord_stmt
7980 = as_a
<gomp_ordered
*> (last_stmt (region
->entry
));
7981 if (omp_find_clause (gimple_omp_ordered_clauses (ord_stmt
),
7984 /* We'll expand these when expanding corresponding
7985 worksharing region with ordered(n) clause. */
7986 gcc_assert (region
->outer
7987 && region
->outer
->type
== GIMPLE_OMP_FOR
);
7988 region
->ord_stmt
= ord_stmt
;
7993 case GIMPLE_OMP_MASTER
:
7994 case GIMPLE_OMP_TASKGROUP
:
7995 case GIMPLE_OMP_CRITICAL
:
7996 case GIMPLE_OMP_TEAMS
:
7997 expand_omp_synch (region
);
8000 case GIMPLE_OMP_ATOMIC_LOAD
:
8001 expand_omp_atomic (region
);
8004 case GIMPLE_OMP_TARGET
:
8005 expand_omp_target (region
);
8012 input_location
= saved_location
;
8013 region
= region
->next
;
8015 if (omp_any_child_fn_dumped
)
8018 dump_function_header (dump_file
, current_function_decl
, dump_flags
);
8019 omp_any_child_fn_dumped
= false;
8023 /* Helper for build_omp_regions. Scan the dominator tree starting at
8024 block BB. PARENT is the region that contains BB. If SINGLE_TREE is
8025 true, the function ends once a single tree is built (otherwise, whole
8026 forest of OMP constructs may be built). */
8029 build_omp_regions_1 (basic_block bb
, struct omp_region
*parent
,
8032 gimple_stmt_iterator gsi
;
8036 gsi
= gsi_last_bb (bb
);
8037 if (!gsi_end_p (gsi
) && is_gimple_omp (gsi_stmt (gsi
)))
8039 struct omp_region
*region
;
8040 enum gimple_code code
;
8042 stmt
= gsi_stmt (gsi
);
8043 code
= gimple_code (stmt
);
8044 if (code
== GIMPLE_OMP_RETURN
)
8046 /* STMT is the return point out of region PARENT. Mark it
8047 as the exit point and make PARENT the immediately
8048 enclosing region. */
8049 gcc_assert (parent
);
8052 parent
= parent
->outer
;
8054 else if (code
== GIMPLE_OMP_ATOMIC_STORE
)
8056 /* GIMPLE_OMP_ATOMIC_STORE is analogous to
8057 GIMPLE_OMP_RETURN, but matches with
8058 GIMPLE_OMP_ATOMIC_LOAD. */
8059 gcc_assert (parent
);
8060 gcc_assert (parent
->type
== GIMPLE_OMP_ATOMIC_LOAD
);
8063 parent
= parent
->outer
;
8065 else if (code
== GIMPLE_OMP_CONTINUE
)
8067 gcc_assert (parent
);
8070 else if (code
== GIMPLE_OMP_SECTIONS_SWITCH
)
8072 /* GIMPLE_OMP_SECTIONS_SWITCH is part of
8073 GIMPLE_OMP_SECTIONS, and we do nothing for it. */
8077 region
= new_omp_region (bb
, code
, parent
);
8079 if (code
== GIMPLE_OMP_TARGET
)
8081 switch (gimple_omp_target_kind (stmt
))
8083 case GF_OMP_TARGET_KIND_REGION
:
8084 case GF_OMP_TARGET_KIND_DATA
:
8085 case GF_OMP_TARGET_KIND_OACC_PARALLEL
:
8086 case GF_OMP_TARGET_KIND_OACC_KERNELS
:
8087 case GF_OMP_TARGET_KIND_OACC_DATA
:
8088 case GF_OMP_TARGET_KIND_OACC_HOST_DATA
:
8090 case GF_OMP_TARGET_KIND_UPDATE
:
8091 case GF_OMP_TARGET_KIND_ENTER_DATA
:
8092 case GF_OMP_TARGET_KIND_EXIT_DATA
:
8093 case GF_OMP_TARGET_KIND_OACC_UPDATE
:
8094 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA
:
8095 case GF_OMP_TARGET_KIND_OACC_DECLARE
:
8096 /* ..., other than for those stand-alone directives... */
8103 else if (code
== GIMPLE_OMP_ORDERED
8104 && omp_find_clause (gimple_omp_ordered_clauses
8105 (as_a
<gomp_ordered
*> (stmt
)),
8107 /* #pragma omp ordered depend is also just a stand-alone
8110 /* ..., this directive becomes the parent for a new region. */
8116 if (single_tree
&& !parent
)
8119 for (son
= first_dom_son (CDI_DOMINATORS
, bb
);
8121 son
= next_dom_son (CDI_DOMINATORS
, son
))
8122 build_omp_regions_1 (son
, parent
, single_tree
);
8125 /* Builds the tree of OMP regions rooted at ROOT, storing it to
8129 build_omp_regions_root (basic_block root
)
8131 gcc_assert (root_omp_region
== NULL
);
8132 build_omp_regions_1 (root
, NULL
, true);
8133 gcc_assert (root_omp_region
!= NULL
);
8136 /* Expands omp construct (and its subconstructs) starting in HEAD. */
8139 omp_expand_local (basic_block head
)
8141 build_omp_regions_root (head
);
8142 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
8144 fprintf (dump_file
, "\nOMP region tree\n\n");
8145 dump_omp_region (dump_file
, root_omp_region
, 0);
8146 fprintf (dump_file
, "\n");
8149 remove_exit_barriers (root_omp_region
);
8150 expand_omp (root_omp_region
);
8152 omp_free_regions ();
8155 /* Scan the CFG and build a tree of OMP regions. Return the root of
8156 the OMP region tree. */
8159 build_omp_regions (void)
8161 gcc_assert (root_omp_region
== NULL
);
8162 calculate_dominance_info (CDI_DOMINATORS
);
8163 build_omp_regions_1 (ENTRY_BLOCK_PTR_FOR_FN (cfun
), NULL
, false);
8166 /* Main entry point for expanding OMP-GIMPLE into runtime calls. */
8169 execute_expand_omp (void)
8171 build_omp_regions ();
8173 if (!root_omp_region
)
8178 fprintf (dump_file
, "\nOMP region tree\n\n");
8179 dump_omp_region (dump_file
, root_omp_region
, 0);
8180 fprintf (dump_file
, "\n");
8183 remove_exit_barriers (root_omp_region
);
8185 expand_omp (root_omp_region
);
8187 if (flag_checking
&& !loops_state_satisfies_p (LOOPS_NEED_FIXUP
))
8188 verify_loop_structure ();
8189 cleanup_tree_cfg ();
8191 omp_free_regions ();
8196 /* OMP expansion -- the default pass, run before creation of SSA form. */
8200 const pass_data pass_data_expand_omp
=
8202 GIMPLE_PASS
, /* type */
8203 "ompexp", /* name */
8204 OPTGROUP_OMP
, /* optinfo_flags */
8205 TV_NONE
, /* tv_id */
8206 PROP_gimple_any
, /* properties_required */
8207 PROP_gimple_eomp
, /* properties_provided */
8208 0, /* properties_destroyed */
8209 0, /* todo_flags_start */
8210 0, /* todo_flags_finish */
8213 class pass_expand_omp
: public gimple_opt_pass
8216 pass_expand_omp (gcc::context
*ctxt
)
8217 : gimple_opt_pass (pass_data_expand_omp
, ctxt
)
8220 /* opt_pass methods: */
8221 virtual unsigned int execute (function
*)
8223 bool gate
= ((flag_cilkplus
!= 0 || flag_openacc
!= 0 || flag_openmp
!= 0
8224 || flag_openmp_simd
!= 0)
8227 /* This pass always runs, to provide PROP_gimple_eomp.
8228 But often, there is nothing to do. */
8232 return execute_expand_omp ();
8235 }; // class pass_expand_omp
8240 make_pass_expand_omp (gcc::context
*ctxt
)
8242 return new pass_expand_omp (ctxt
);
8247 const pass_data pass_data_expand_omp_ssa
=
8249 GIMPLE_PASS
, /* type */
8250 "ompexpssa", /* name */
8251 OPTGROUP_OMP
, /* optinfo_flags */
8252 TV_NONE
, /* tv_id */
8253 PROP_cfg
| PROP_ssa
, /* properties_required */
8254 PROP_gimple_eomp
, /* properties_provided */
8255 0, /* properties_destroyed */
8256 0, /* todo_flags_start */
8257 TODO_cleanup_cfg
| TODO_rebuild_alias
, /* todo_flags_finish */
8260 class pass_expand_omp_ssa
: public gimple_opt_pass
8263 pass_expand_omp_ssa (gcc::context
*ctxt
)
8264 : gimple_opt_pass (pass_data_expand_omp_ssa
, ctxt
)
8267 /* opt_pass methods: */
8268 virtual bool gate (function
*fun
)
8270 return !(fun
->curr_properties
& PROP_gimple_eomp
);
8272 virtual unsigned int execute (function
*) { return execute_expand_omp (); }
8273 opt_pass
* clone () { return new pass_expand_omp_ssa (m_ctxt
); }
8275 }; // class pass_expand_omp_ssa
8280 make_pass_expand_omp_ssa (gcc::context
*ctxt
)
8282 return new pass_expand_omp_ssa (ctxt
);
8285 /* Called from tree-cfg.c::make_edges to create cfg edges for all relevant
8289 omp_make_gimple_edges (basic_block bb
, struct omp_region
**region
,
8292 gimple
*last
= last_stmt (bb
);
8293 enum gimple_code code
= gimple_code (last
);
8294 struct omp_region
*cur_region
= *region
;
8295 bool fallthru
= false;
8299 case GIMPLE_OMP_PARALLEL
:
8300 case GIMPLE_OMP_TASK
:
8301 case GIMPLE_OMP_FOR
:
8302 case GIMPLE_OMP_SINGLE
:
8303 case GIMPLE_OMP_TEAMS
:
8304 case GIMPLE_OMP_MASTER
:
8305 case GIMPLE_OMP_TASKGROUP
:
8306 case GIMPLE_OMP_CRITICAL
:
8307 case GIMPLE_OMP_SECTION
:
8308 case GIMPLE_OMP_GRID_BODY
:
8309 cur_region
= new_omp_region (bb
, code
, cur_region
);
8313 case GIMPLE_OMP_ORDERED
:
8314 cur_region
= new_omp_region (bb
, code
, cur_region
);
8316 if (omp_find_clause (gimple_omp_ordered_clauses
8317 (as_a
<gomp_ordered
*> (last
)),
8319 cur_region
= cur_region
->outer
;
8322 case GIMPLE_OMP_TARGET
:
8323 cur_region
= new_omp_region (bb
, code
, cur_region
);
8325 switch (gimple_omp_target_kind (last
))
8327 case GF_OMP_TARGET_KIND_REGION
:
8328 case GF_OMP_TARGET_KIND_DATA
:
8329 case GF_OMP_TARGET_KIND_OACC_PARALLEL
:
8330 case GF_OMP_TARGET_KIND_OACC_KERNELS
:
8331 case GF_OMP_TARGET_KIND_OACC_DATA
:
8332 case GF_OMP_TARGET_KIND_OACC_HOST_DATA
:
8334 case GF_OMP_TARGET_KIND_UPDATE
:
8335 case GF_OMP_TARGET_KIND_ENTER_DATA
:
8336 case GF_OMP_TARGET_KIND_EXIT_DATA
:
8337 case GF_OMP_TARGET_KIND_OACC_UPDATE
:
8338 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA
:
8339 case GF_OMP_TARGET_KIND_OACC_DECLARE
:
8340 cur_region
= cur_region
->outer
;
8347 case GIMPLE_OMP_SECTIONS
:
8348 cur_region
= new_omp_region (bb
, code
, cur_region
);
8352 case GIMPLE_OMP_SECTIONS_SWITCH
:
8356 case GIMPLE_OMP_ATOMIC_LOAD
:
8357 case GIMPLE_OMP_ATOMIC_STORE
:
8361 case GIMPLE_OMP_RETURN
:
8362 /* In the case of a GIMPLE_OMP_SECTION, the edge will go
8363 somewhere other than the next block. This will be
8365 cur_region
->exit
= bb
;
8366 if (cur_region
->type
== GIMPLE_OMP_TASK
)
8367 /* Add an edge corresponding to not scheduling the task
8369 make_edge (cur_region
->entry
, bb
, EDGE_ABNORMAL
);
8370 fallthru
= cur_region
->type
!= GIMPLE_OMP_SECTION
;
8371 cur_region
= cur_region
->outer
;
8374 case GIMPLE_OMP_CONTINUE
:
8375 cur_region
->cont
= bb
;
8376 switch (cur_region
->type
)
8378 case GIMPLE_OMP_FOR
:
8379 /* Mark all GIMPLE_OMP_FOR and GIMPLE_OMP_CONTINUE
8380 succs edges as abnormal to prevent splitting
8382 single_succ_edge (cur_region
->entry
)->flags
|= EDGE_ABNORMAL
;
8383 /* Make the loopback edge. */
8384 make_edge (bb
, single_succ (cur_region
->entry
),
8387 /* Create an edge from GIMPLE_OMP_FOR to exit, which
8388 corresponds to the case that the body of the loop
8389 is not executed at all. */
8390 make_edge (cur_region
->entry
, bb
->next_bb
, EDGE_ABNORMAL
);
8391 make_edge (bb
, bb
->next_bb
, EDGE_FALLTHRU
| EDGE_ABNORMAL
);
8395 case GIMPLE_OMP_SECTIONS
:
8396 /* Wire up the edges into and out of the nested sections. */
8398 basic_block switch_bb
= single_succ (cur_region
->entry
);
8400 struct omp_region
*i
;
8401 for (i
= cur_region
->inner
; i
; i
= i
->next
)
8403 gcc_assert (i
->type
== GIMPLE_OMP_SECTION
);
8404 make_edge (switch_bb
, i
->entry
, 0);
8405 make_edge (i
->exit
, bb
, EDGE_FALLTHRU
);
8408 /* Make the loopback edge to the block with
8409 GIMPLE_OMP_SECTIONS_SWITCH. */
8410 make_edge (bb
, switch_bb
, 0);
8412 /* Make the edge from the switch to exit. */
8413 make_edge (switch_bb
, bb
->next_bb
, 0);
8418 case GIMPLE_OMP_TASK
:
8431 if (*region
!= cur_region
)
8433 *region
= cur_region
;
8435 *region_idx
= cur_region
->entry
->index
;
8443 #include "gt-omp-expand.h"