1 /* Expansion pass for OMP directives. Outlines regions of certain OMP
2 directives to separate functions, converts others into explicit calls to the
3 runtime library (libgomp) and so forth
5 Copyright (C) 2005-2017 Free Software Foundation, Inc.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
33 #include "tree-pass.h"
37 #include "pretty-print.h"
38 #include "diagnostic-core.h"
39 #include "fold-const.h"
40 #include "stor-layout.h"
42 #include "internal-fn.h"
44 #include "gimple-iterator.h"
45 #include "gimplify-me.h"
46 #include "gimple-walk.h"
48 #include "tree-into-ssa.h"
50 #include "splay-tree.h"
52 #include "omp-general.h"
53 #include "omp-offload.h"
54 #include "tree-cfgcleanup.h"
55 #include "symbol-summary.h"
57 #include "gomp-constants.h"
58 #include "gimple-pretty-print.h"
59 #include "hsa-common.h"
63 /* OMP region information. Every parallel and workshare
64 directive is enclosed between two markers, the OMP_* directive
65 and a corresponding GIMPLE_OMP_RETURN statement. */
69 /* The enclosing region. */
70 struct omp_region
*outer
;
72 /* First child region. */
73 struct omp_region
*inner
;
75 /* Next peer region. */
76 struct omp_region
*next
;
78 /* Block containing the omp directive as its last stmt. */
81 /* Block containing the GIMPLE_OMP_RETURN as its last stmt. */
84 /* Block containing the GIMPLE_OMP_CONTINUE as its last stmt. */
87 /* If this is a combined parallel+workshare region, this is a list
88 of additional arguments needed by the combined parallel+workshare
90 vec
<tree
, va_gc
> *ws_args
;
92 /* The code for the omp directive of this region. */
93 enum gimple_code type
;
95 /* Schedule kind, only used for GIMPLE_OMP_FOR type regions. */
96 enum omp_clause_schedule_kind sched_kind
;
98 /* Schedule modifiers. */
99 unsigned char sched_modifiers
;
101 /* True if this is a combined parallel+workshare region. */
102 bool is_combined_parallel
;
104 /* The ordered stmt if type is GIMPLE_OMP_ORDERED and it has
106 gomp_ordered
*ord_stmt
;
109 static struct omp_region
*root_omp_region
;
110 static bool omp_any_child_fn_dumped
;
112 static void expand_omp_build_assign (gimple_stmt_iterator
*, tree
, tree
,
114 static gphi
*find_phi_with_arg_on_edge (tree
, edge
);
115 static void expand_omp (struct omp_region
*region
);
117 /* Return true if REGION is a combined parallel+workshare region. */
120 is_combined_parallel (struct omp_region
*region
)
122 return region
->is_combined_parallel
;
125 /* Given two blocks PAR_ENTRY_BB and WS_ENTRY_BB such that WS_ENTRY_BB
126 is the immediate dominator of PAR_ENTRY_BB, return true if there
127 are no data dependencies that would prevent expanding the parallel
128 directive at PAR_ENTRY_BB as a combined parallel+workshare region.
130 When expanding a combined parallel+workshare region, the call to
131 the child function may need additional arguments in the case of
132 GIMPLE_OMP_FOR regions. In some cases, these arguments are
133 computed out of variables passed in from the parent to the child
134 via 'struct .omp_data_s'. For instance:
136 #pragma omp parallel for schedule (guided, i * 4)
141 # BLOCK 2 (PAR_ENTRY_BB)
143 #pragma omp parallel [child fn: bar.omp_fn.0 ( ..., D.1598)
145 # BLOCK 3 (WS_ENTRY_BB)
146 .omp_data_i = &.omp_data_o;
147 D.1667 = .omp_data_i->i;
149 #pragma omp for schedule (guided, D.1598)
151 When we outline the parallel region, the call to the child function
152 'bar.omp_fn.0' will need the value D.1598 in its argument list, but
153 that value is computed *after* the call site. So, in principle we
154 cannot do the transformation.
156 To see whether the code in WS_ENTRY_BB blocks the combined
157 parallel+workshare call, we collect all the variables used in the
158 GIMPLE_OMP_FOR header check whether they appear on the LHS of any
159 statement in WS_ENTRY_BB. If so, then we cannot emit the combined
162 FIXME. If we had the SSA form built at this point, we could merely
163 hoist the code in block 3 into block 2 and be done with it. But at
164 this point we don't have dataflow information and though we could
165 hack something up here, it is really not worth the aggravation. */
168 workshare_safe_to_combine_p (basic_block ws_entry_bb
)
170 struct omp_for_data fd
;
171 gimple
*ws_stmt
= last_stmt (ws_entry_bb
);
173 if (gimple_code (ws_stmt
) == GIMPLE_OMP_SECTIONS
)
176 gcc_assert (gimple_code (ws_stmt
) == GIMPLE_OMP_FOR
);
178 omp_extract_for_data (as_a
<gomp_for
*> (ws_stmt
), &fd
, NULL
);
180 if (fd
.collapse
> 1 && TREE_CODE (fd
.loop
.n2
) != INTEGER_CST
)
182 if (fd
.iter_type
!= long_integer_type_node
)
185 /* FIXME. We give up too easily here. If any of these arguments
186 are not constants, they will likely involve variables that have
187 been mapped into fields of .omp_data_s for sharing with the child
188 function. With appropriate data flow, it would be possible to
190 if (!is_gimple_min_invariant (fd
.loop
.n1
)
191 || !is_gimple_min_invariant (fd
.loop
.n2
)
192 || !is_gimple_min_invariant (fd
.loop
.step
)
193 || (fd
.chunk_size
&& !is_gimple_min_invariant (fd
.chunk_size
)))
199 /* Adjust CHUNK_SIZE from SCHEDULE clause, depending on simd modifier
200 presence (SIMD_SCHEDULE). */
203 omp_adjust_chunk_size (tree chunk_size
, bool simd_schedule
)
208 int vf
= omp_max_vf ();
212 tree type
= TREE_TYPE (chunk_size
);
213 chunk_size
= fold_build2 (PLUS_EXPR
, type
, chunk_size
,
214 build_int_cst (type
, vf
- 1));
215 return fold_build2 (BIT_AND_EXPR
, type
, chunk_size
,
216 build_int_cst (type
, -vf
));
219 /* Collect additional arguments needed to emit a combined
220 parallel+workshare call. WS_STMT is the workshare directive being
223 static vec
<tree
, va_gc
> *
224 get_ws_args_for (gimple
*par_stmt
, gimple
*ws_stmt
)
227 location_t loc
= gimple_location (ws_stmt
);
228 vec
<tree
, va_gc
> *ws_args
;
230 if (gomp_for
*for_stmt
= dyn_cast
<gomp_for
*> (ws_stmt
))
232 struct omp_for_data fd
;
235 omp_extract_for_data (for_stmt
, &fd
, NULL
);
239 if (gimple_omp_for_combined_into_p (for_stmt
))
242 = omp_find_clause (gimple_omp_parallel_clauses (par_stmt
),
243 OMP_CLAUSE__LOOPTEMP_
);
245 n1
= OMP_CLAUSE_DECL (innerc
);
246 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
247 OMP_CLAUSE__LOOPTEMP_
);
249 n2
= OMP_CLAUSE_DECL (innerc
);
252 vec_alloc (ws_args
, 3 + (fd
.chunk_size
!= 0));
254 t
= fold_convert_loc (loc
, long_integer_type_node
, n1
);
255 ws_args
->quick_push (t
);
257 t
= fold_convert_loc (loc
, long_integer_type_node
, n2
);
258 ws_args
->quick_push (t
);
260 t
= fold_convert_loc (loc
, long_integer_type_node
, fd
.loop
.step
);
261 ws_args
->quick_push (t
);
265 t
= fold_convert_loc (loc
, long_integer_type_node
, fd
.chunk_size
);
266 t
= omp_adjust_chunk_size (t
, fd
.simd_schedule
);
267 ws_args
->quick_push (t
);
272 else if (gimple_code (ws_stmt
) == GIMPLE_OMP_SECTIONS
)
274 /* Number of sections is equal to the number of edges from the
275 GIMPLE_OMP_SECTIONS_SWITCH statement, except for the one to
276 the exit of the sections region. */
277 basic_block bb
= single_succ (gimple_bb (ws_stmt
));
278 t
= build_int_cst (unsigned_type_node
, EDGE_COUNT (bb
->succs
) - 1);
279 vec_alloc (ws_args
, 1);
280 ws_args
->quick_push (t
);
287 /* Discover whether REGION is a combined parallel+workshare region. */
290 determine_parallel_type (struct omp_region
*region
)
292 basic_block par_entry_bb
, par_exit_bb
;
293 basic_block ws_entry_bb
, ws_exit_bb
;
295 if (region
== NULL
|| region
->inner
== NULL
296 || region
->exit
== NULL
|| region
->inner
->exit
== NULL
297 || region
->inner
->cont
== NULL
)
300 /* We only support parallel+for and parallel+sections. */
301 if (region
->type
!= GIMPLE_OMP_PARALLEL
302 || (region
->inner
->type
!= GIMPLE_OMP_FOR
303 && region
->inner
->type
!= GIMPLE_OMP_SECTIONS
))
306 /* Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and
307 WS_EXIT_BB -> PAR_EXIT_BB. */
308 par_entry_bb
= region
->entry
;
309 par_exit_bb
= region
->exit
;
310 ws_entry_bb
= region
->inner
->entry
;
311 ws_exit_bb
= region
->inner
->exit
;
313 if (single_succ (par_entry_bb
) == ws_entry_bb
314 && single_succ (ws_exit_bb
) == par_exit_bb
315 && workshare_safe_to_combine_p (ws_entry_bb
)
316 && (gimple_omp_parallel_combined_p (last_stmt (par_entry_bb
))
317 || (last_and_only_stmt (ws_entry_bb
)
318 && last_and_only_stmt (par_exit_bb
))))
320 gimple
*par_stmt
= last_stmt (par_entry_bb
);
321 gimple
*ws_stmt
= last_stmt (ws_entry_bb
);
323 if (region
->inner
->type
== GIMPLE_OMP_FOR
)
325 /* If this is a combined parallel loop, we need to determine
326 whether or not to use the combined library calls. There
327 are two cases where we do not apply the transformation:
328 static loops and any kind of ordered loop. In the first
329 case, we already open code the loop so there is no need
330 to do anything else. In the latter case, the combined
331 parallel loop call would still need extra synchronization
332 to implement ordered semantics, so there would not be any
333 gain in using the combined call. */
334 tree clauses
= gimple_omp_for_clauses (ws_stmt
);
335 tree c
= omp_find_clause (clauses
, OMP_CLAUSE_SCHEDULE
);
337 || ((OMP_CLAUSE_SCHEDULE_KIND (c
) & OMP_CLAUSE_SCHEDULE_MASK
)
338 == OMP_CLAUSE_SCHEDULE_STATIC
)
339 || omp_find_clause (clauses
, OMP_CLAUSE_ORDERED
))
341 region
->is_combined_parallel
= false;
342 region
->inner
->is_combined_parallel
= false;
347 region
->is_combined_parallel
= true;
348 region
->inner
->is_combined_parallel
= true;
349 region
->ws_args
= get_ws_args_for (par_stmt
, ws_stmt
);
353 /* Debugging dumps for parallel regions. */
354 void dump_omp_region (FILE *, struct omp_region
*, int);
355 void debug_omp_region (struct omp_region
*);
356 void debug_all_omp_regions (void);
358 /* Dump the parallel region tree rooted at REGION. */
361 dump_omp_region (FILE *file
, struct omp_region
*region
, int indent
)
363 fprintf (file
, "%*sbb %d: %s\n", indent
, "", region
->entry
->index
,
364 gimple_code_name
[region
->type
]);
367 dump_omp_region (file
, region
->inner
, indent
+ 4);
371 fprintf (file
, "%*sbb %d: GIMPLE_OMP_CONTINUE\n", indent
, "",
372 region
->cont
->index
);
376 fprintf (file
, "%*sbb %d: GIMPLE_OMP_RETURN\n", indent
, "",
377 region
->exit
->index
);
379 fprintf (file
, "%*s[no exit marker]\n", indent
, "");
382 dump_omp_region (file
, region
->next
, indent
);
386 debug_omp_region (struct omp_region
*region
)
388 dump_omp_region (stderr
, region
, 0);
392 debug_all_omp_regions (void)
394 dump_omp_region (stderr
, root_omp_region
, 0);
397 /* Create a new parallel region starting at STMT inside region PARENT. */
399 static struct omp_region
*
400 new_omp_region (basic_block bb
, enum gimple_code type
,
401 struct omp_region
*parent
)
403 struct omp_region
*region
= XCNEW (struct omp_region
);
405 region
->outer
= parent
;
411 /* This is a nested region. Add it to the list of inner
412 regions in PARENT. */
413 region
->next
= parent
->inner
;
414 parent
->inner
= region
;
418 /* This is a toplevel region. Add it to the list of toplevel
419 regions in ROOT_OMP_REGION. */
420 region
->next
= root_omp_region
;
421 root_omp_region
= region
;
427 /* Release the memory associated with the region tree rooted at REGION. */
430 free_omp_region_1 (struct omp_region
*region
)
432 struct omp_region
*i
, *n
;
434 for (i
= region
->inner
; i
; i
= n
)
437 free_omp_region_1 (i
);
443 /* Release the memory for the entire omp region tree. */
446 omp_free_regions (void)
448 struct omp_region
*r
, *n
;
449 for (r
= root_omp_region
; r
; r
= n
)
452 free_omp_region_1 (r
);
454 root_omp_region
= NULL
;
457 /* A convenience function to build an empty GIMPLE_COND with just the
461 gimple_build_cond_empty (tree cond
)
463 enum tree_code pred_code
;
466 gimple_cond_get_ops_from_tree (cond
, &pred_code
, &lhs
, &rhs
);
467 return gimple_build_cond (pred_code
, lhs
, rhs
, NULL_TREE
, NULL_TREE
);
470 /* Return true if a parallel REGION is within a declare target function or
471 within a target region and is not a part of a gridified target. */
474 parallel_needs_hsa_kernel_p (struct omp_region
*region
)
476 bool indirect
= false;
477 for (region
= region
->outer
; region
; region
= region
->outer
)
479 if (region
->type
== GIMPLE_OMP_PARALLEL
)
481 else if (region
->type
== GIMPLE_OMP_TARGET
)
483 gomp_target
*tgt_stmt
484 = as_a
<gomp_target
*> (last_stmt (region
->entry
));
486 if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt
),
487 OMP_CLAUSE__GRIDDIM_
))
494 if (lookup_attribute ("omp declare target",
495 DECL_ATTRIBUTES (current_function_decl
)))
501 /* Build the function calls to GOMP_parallel_start etc to actually
502 generate the parallel operation. REGION is the parallel region
503 being expanded. BB is the block where to insert the code. WS_ARGS
504 will be set if this is a call to a combined parallel+workshare
505 construct, it contains the list of additional arguments needed by
506 the workshare construct. */
509 expand_parallel_call (struct omp_region
*region
, basic_block bb
,
510 gomp_parallel
*entry_stmt
,
511 vec
<tree
, va_gc
> *ws_args
)
513 tree t
, t1
, t2
, val
, cond
, c
, clauses
, flags
;
514 gimple_stmt_iterator gsi
;
516 enum built_in_function start_ix
;
518 location_t clause_loc
;
519 vec
<tree
, va_gc
> *args
;
521 clauses
= gimple_omp_parallel_clauses (entry_stmt
);
523 /* Determine what flavor of GOMP_parallel we will be
525 start_ix
= BUILT_IN_GOMP_PARALLEL
;
526 if (is_combined_parallel (region
))
528 switch (region
->inner
->type
)
531 gcc_assert (region
->inner
->sched_kind
!= OMP_CLAUSE_SCHEDULE_AUTO
);
532 switch (region
->inner
->sched_kind
)
534 case OMP_CLAUSE_SCHEDULE_RUNTIME
:
537 case OMP_CLAUSE_SCHEDULE_DYNAMIC
:
538 case OMP_CLAUSE_SCHEDULE_GUIDED
:
539 if (region
->inner
->sched_modifiers
540 & OMP_CLAUSE_SCHEDULE_NONMONOTONIC
)
542 start_ix2
= 3 + region
->inner
->sched_kind
;
547 start_ix2
= region
->inner
->sched_kind
;
550 start_ix2
+= (int) BUILT_IN_GOMP_PARALLEL_LOOP_STATIC
;
551 start_ix
= (enum built_in_function
) start_ix2
;
553 case GIMPLE_OMP_SECTIONS
:
554 start_ix
= BUILT_IN_GOMP_PARALLEL_SECTIONS
;
561 /* By default, the value of NUM_THREADS is zero (selected at run time)
562 and there is no conditional. */
564 val
= build_int_cst (unsigned_type_node
, 0);
565 flags
= build_int_cst (unsigned_type_node
, 0);
567 c
= omp_find_clause (clauses
, OMP_CLAUSE_IF
);
569 cond
= OMP_CLAUSE_IF_EXPR (c
);
571 c
= omp_find_clause (clauses
, OMP_CLAUSE_NUM_THREADS
);
574 val
= OMP_CLAUSE_NUM_THREADS_EXPR (c
);
575 clause_loc
= OMP_CLAUSE_LOCATION (c
);
578 clause_loc
= gimple_location (entry_stmt
);
580 c
= omp_find_clause (clauses
, OMP_CLAUSE_PROC_BIND
);
582 flags
= build_int_cst (unsigned_type_node
, OMP_CLAUSE_PROC_BIND_KIND (c
));
584 /* Ensure 'val' is of the correct type. */
585 val
= fold_convert_loc (clause_loc
, unsigned_type_node
, val
);
587 /* If we found the clause 'if (cond)', build either
588 (cond != 0) or (cond ? val : 1u). */
591 cond
= gimple_boolify (cond
);
593 if (integer_zerop (val
))
594 val
= fold_build2_loc (clause_loc
,
595 EQ_EXPR
, unsigned_type_node
, cond
,
596 build_int_cst (TREE_TYPE (cond
), 0));
599 basic_block cond_bb
, then_bb
, else_bb
;
600 edge e
, e_then
, e_else
;
601 tree tmp_then
, tmp_else
, tmp_join
, tmp_var
;
603 tmp_var
= create_tmp_var (TREE_TYPE (val
));
604 if (gimple_in_ssa_p (cfun
))
606 tmp_then
= make_ssa_name (tmp_var
);
607 tmp_else
= make_ssa_name (tmp_var
);
608 tmp_join
= make_ssa_name (tmp_var
);
617 e
= split_block_after_labels (bb
);
622 then_bb
= create_empty_bb (cond_bb
);
623 else_bb
= create_empty_bb (then_bb
);
624 set_immediate_dominator (CDI_DOMINATORS
, then_bb
, cond_bb
);
625 set_immediate_dominator (CDI_DOMINATORS
, else_bb
, cond_bb
);
627 stmt
= gimple_build_cond_empty (cond
);
628 gsi
= gsi_start_bb (cond_bb
);
629 gsi_insert_after (&gsi
, stmt
, GSI_CONTINUE_LINKING
);
631 gsi
= gsi_start_bb (then_bb
);
632 expand_omp_build_assign (&gsi
, tmp_then
, val
, true);
634 gsi
= gsi_start_bb (else_bb
);
635 expand_omp_build_assign (&gsi
, tmp_else
,
636 build_int_cst (unsigned_type_node
, 1),
639 make_edge (cond_bb
, then_bb
, EDGE_TRUE_VALUE
);
640 make_edge (cond_bb
, else_bb
, EDGE_FALSE_VALUE
);
641 add_bb_to_loop (then_bb
, cond_bb
->loop_father
);
642 add_bb_to_loop (else_bb
, cond_bb
->loop_father
);
643 e_then
= make_edge (then_bb
, bb
, EDGE_FALLTHRU
);
644 e_else
= make_edge (else_bb
, bb
, EDGE_FALLTHRU
);
646 if (gimple_in_ssa_p (cfun
))
648 gphi
*phi
= create_phi_node (tmp_join
, bb
);
649 add_phi_arg (phi
, tmp_then
, e_then
, UNKNOWN_LOCATION
);
650 add_phi_arg (phi
, tmp_else
, e_else
, UNKNOWN_LOCATION
);
656 gsi
= gsi_start_bb (bb
);
657 val
= force_gimple_operand_gsi (&gsi
, val
, true, NULL_TREE
,
658 false, GSI_CONTINUE_LINKING
);
661 gsi
= gsi_last_bb (bb
);
662 t
= gimple_omp_parallel_data_arg (entry_stmt
);
664 t1
= null_pointer_node
;
666 t1
= build_fold_addr_expr (t
);
667 tree child_fndecl
= gimple_omp_parallel_child_fn (entry_stmt
);
668 t2
= build_fold_addr_expr (child_fndecl
);
670 vec_alloc (args
, 4 + vec_safe_length (ws_args
));
671 args
->quick_push (t2
);
672 args
->quick_push (t1
);
673 args
->quick_push (val
);
675 args
->splice (*ws_args
);
676 args
->quick_push (flags
);
678 t
= build_call_expr_loc_vec (UNKNOWN_LOCATION
,
679 builtin_decl_explicit (start_ix
), args
);
681 force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
682 false, GSI_CONTINUE_LINKING
);
684 if (hsa_gen_requested_p ()
685 && parallel_needs_hsa_kernel_p (region
))
687 cgraph_node
*child_cnode
= cgraph_node::get (child_fndecl
);
688 hsa_register_kernel (child_cnode
);
692 /* Insert a function call whose name is FUNC_NAME with the information from
693 ENTRY_STMT into the basic_block BB. */
696 expand_cilk_for_call (basic_block bb
, gomp_parallel
*entry_stmt
,
697 vec
<tree
, va_gc
> *ws_args
)
700 gimple_stmt_iterator gsi
;
701 vec
<tree
, va_gc
> *args
;
703 gcc_assert (vec_safe_length (ws_args
) == 2);
704 tree func_name
= (*ws_args
)[0];
705 tree grain
= (*ws_args
)[1];
707 tree clauses
= gimple_omp_parallel_clauses (entry_stmt
);
708 tree count
= omp_find_clause (clauses
, OMP_CLAUSE__CILK_FOR_COUNT_
);
709 gcc_assert (count
!= NULL_TREE
);
710 count
= OMP_CLAUSE_OPERAND (count
, 0);
712 gsi
= gsi_last_bb (bb
);
713 t
= gimple_omp_parallel_data_arg (entry_stmt
);
715 t1
= null_pointer_node
;
717 t1
= build_fold_addr_expr (t
);
718 t2
= build_fold_addr_expr (gimple_omp_parallel_child_fn (entry_stmt
));
721 args
->quick_push (t2
);
722 args
->quick_push (t1
);
723 args
->quick_push (count
);
724 args
->quick_push (grain
);
725 t
= build_call_expr_loc_vec (UNKNOWN_LOCATION
, func_name
, args
);
727 force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
, false,
728 GSI_CONTINUE_LINKING
);
731 /* Build the function call to GOMP_task to actually
732 generate the task operation. BB is the block where to insert the code. */
735 expand_task_call (struct omp_region
*region
, basic_block bb
,
736 gomp_task
*entry_stmt
)
739 gimple_stmt_iterator gsi
;
740 location_t loc
= gimple_location (entry_stmt
);
742 tree clauses
= gimple_omp_task_clauses (entry_stmt
);
744 tree ifc
= omp_find_clause (clauses
, OMP_CLAUSE_IF
);
745 tree untied
= omp_find_clause (clauses
, OMP_CLAUSE_UNTIED
);
746 tree mergeable
= omp_find_clause (clauses
, OMP_CLAUSE_MERGEABLE
);
747 tree depend
= omp_find_clause (clauses
, OMP_CLAUSE_DEPEND
);
748 tree finalc
= omp_find_clause (clauses
, OMP_CLAUSE_FINAL
);
749 tree priority
= omp_find_clause (clauses
, OMP_CLAUSE_PRIORITY
);
752 = (untied
? GOMP_TASK_FLAG_UNTIED
: 0)
753 | (mergeable
? GOMP_TASK_FLAG_MERGEABLE
: 0)
754 | (depend
? GOMP_TASK_FLAG_DEPEND
: 0);
756 bool taskloop_p
= gimple_omp_task_taskloop_p (entry_stmt
);
757 tree startvar
= NULL_TREE
, endvar
= NULL_TREE
, step
= NULL_TREE
;
758 tree num_tasks
= NULL_TREE
;
762 gimple
*g
= last_stmt (region
->outer
->entry
);
763 gcc_assert (gimple_code (g
) == GIMPLE_OMP_FOR
764 && gimple_omp_for_kind (g
) == GF_OMP_FOR_KIND_TASKLOOP
);
765 struct omp_for_data fd
;
766 omp_extract_for_data (as_a
<gomp_for
*> (g
), &fd
, NULL
);
767 startvar
= omp_find_clause (clauses
, OMP_CLAUSE__LOOPTEMP_
);
768 endvar
= omp_find_clause (OMP_CLAUSE_CHAIN (startvar
),
769 OMP_CLAUSE__LOOPTEMP_
);
770 startvar
= OMP_CLAUSE_DECL (startvar
);
771 endvar
= OMP_CLAUSE_DECL (endvar
);
772 step
= fold_convert_loc (loc
, fd
.iter_type
, fd
.loop
.step
);
773 if (fd
.loop
.cond_code
== LT_EXPR
)
774 iflags
|= GOMP_TASK_FLAG_UP
;
775 tree tclauses
= gimple_omp_for_clauses (g
);
776 num_tasks
= omp_find_clause (tclauses
, OMP_CLAUSE_NUM_TASKS
);
778 num_tasks
= OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks
);
781 num_tasks
= omp_find_clause (tclauses
, OMP_CLAUSE_GRAINSIZE
);
784 iflags
|= GOMP_TASK_FLAG_GRAINSIZE
;
785 num_tasks
= OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks
);
788 num_tasks
= integer_zero_node
;
790 num_tasks
= fold_convert_loc (loc
, long_integer_type_node
, num_tasks
);
791 if (ifc
== NULL_TREE
)
792 iflags
|= GOMP_TASK_FLAG_IF
;
793 if (omp_find_clause (tclauses
, OMP_CLAUSE_NOGROUP
))
794 iflags
|= GOMP_TASK_FLAG_NOGROUP
;
795 ull
= fd
.iter_type
== long_long_unsigned_type_node
;
798 iflags
|= GOMP_TASK_FLAG_PRIORITY
;
800 tree flags
= build_int_cst (unsigned_type_node
, iflags
);
802 tree cond
= boolean_true_node
;
807 tree t
= gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc
));
808 t
= fold_build3_loc (loc
, COND_EXPR
, unsigned_type_node
, t
,
809 build_int_cst (unsigned_type_node
,
811 build_int_cst (unsigned_type_node
, 0));
812 flags
= fold_build2_loc (loc
, PLUS_EXPR
, unsigned_type_node
,
816 cond
= gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc
));
821 tree t
= gimple_boolify (OMP_CLAUSE_FINAL_EXPR (finalc
));
822 t
= fold_build3_loc (loc
, COND_EXPR
, unsigned_type_node
, t
,
823 build_int_cst (unsigned_type_node
,
824 GOMP_TASK_FLAG_FINAL
),
825 build_int_cst (unsigned_type_node
, 0));
826 flags
= fold_build2_loc (loc
, PLUS_EXPR
, unsigned_type_node
, flags
, t
);
829 depend
= OMP_CLAUSE_DECL (depend
);
831 depend
= build_int_cst (ptr_type_node
, 0);
833 priority
= fold_convert (integer_type_node
,
834 OMP_CLAUSE_PRIORITY_EXPR (priority
));
836 priority
= integer_zero_node
;
838 gsi
= gsi_last_bb (bb
);
839 tree t
= gimple_omp_task_data_arg (entry_stmt
);
841 t2
= null_pointer_node
;
843 t2
= build_fold_addr_expr_loc (loc
, t
);
844 t1
= build_fold_addr_expr_loc (loc
, gimple_omp_task_child_fn (entry_stmt
));
845 t
= gimple_omp_task_copy_fn (entry_stmt
);
847 t3
= null_pointer_node
;
849 t3
= build_fold_addr_expr_loc (loc
, t
);
852 t
= build_call_expr (ull
853 ? builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP_ULL
)
854 : builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP
),
856 gimple_omp_task_arg_size (entry_stmt
),
857 gimple_omp_task_arg_align (entry_stmt
), flags
,
858 num_tasks
, priority
, startvar
, endvar
, step
);
860 t
= build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASK
),
862 gimple_omp_task_arg_size (entry_stmt
),
863 gimple_omp_task_arg_align (entry_stmt
), cond
, flags
,
866 force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
867 false, GSI_CONTINUE_LINKING
);
870 /* Chain all the DECLs in LIST by their TREE_CHAIN fields. */
873 vec2chain (vec
<tree
, va_gc
> *v
)
875 tree chain
= NULL_TREE
, t
;
878 FOR_EACH_VEC_SAFE_ELT_REVERSE (v
, ix
, t
)
880 DECL_CHAIN (t
) = chain
;
887 /* Remove barriers in REGION->EXIT's block. Note that this is only
888 valid for GIMPLE_OMP_PARALLEL regions. Since the end of a parallel region
889 is an implicit barrier, any workshare inside the GIMPLE_OMP_PARALLEL that
890 left a barrier at the end of the GIMPLE_OMP_PARALLEL region can now be
894 remove_exit_barrier (struct omp_region
*region
)
896 gimple_stmt_iterator gsi
;
901 int any_addressable_vars
= -1;
903 exit_bb
= region
->exit
;
905 /* If the parallel region doesn't return, we don't have REGION->EXIT
910 /* The last insn in the block will be the parallel's GIMPLE_OMP_RETURN. The
911 workshare's GIMPLE_OMP_RETURN will be in a preceding block. The kinds of
912 statements that can appear in between are extremely limited -- no
913 memory operations at all. Here, we allow nothing at all, so the
914 only thing we allow to precede this GIMPLE_OMP_RETURN is a label. */
915 gsi
= gsi_last_bb (exit_bb
);
916 gcc_assert (gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_RETURN
);
918 if (!gsi_end_p (gsi
) && gimple_code (gsi_stmt (gsi
)) != GIMPLE_LABEL
)
921 FOR_EACH_EDGE (e
, ei
, exit_bb
->preds
)
923 gsi
= gsi_last_bb (e
->src
);
926 stmt
= gsi_stmt (gsi
);
927 if (gimple_code (stmt
) == GIMPLE_OMP_RETURN
928 && !gimple_omp_return_nowait_p (stmt
))
930 /* OpenMP 3.0 tasks unfortunately prevent this optimization
931 in many cases. If there could be tasks queued, the barrier
932 might be needed to let the tasks run before some local
933 variable of the parallel that the task uses as shared
934 runs out of scope. The task can be spawned either
935 from within current function (this would be easy to check)
936 or from some function it calls and gets passed an address
937 of such a variable. */
938 if (any_addressable_vars
< 0)
940 gomp_parallel
*parallel_stmt
941 = as_a
<gomp_parallel
*> (last_stmt (region
->entry
));
942 tree child_fun
= gimple_omp_parallel_child_fn (parallel_stmt
);
943 tree local_decls
, block
, decl
;
946 any_addressable_vars
= 0;
947 FOR_EACH_LOCAL_DECL (DECL_STRUCT_FUNCTION (child_fun
), ix
, decl
)
948 if (TREE_ADDRESSABLE (decl
))
950 any_addressable_vars
= 1;
953 for (block
= gimple_block (stmt
);
954 !any_addressable_vars
956 && TREE_CODE (block
) == BLOCK
;
957 block
= BLOCK_SUPERCONTEXT (block
))
959 for (local_decls
= BLOCK_VARS (block
);
961 local_decls
= DECL_CHAIN (local_decls
))
962 if (TREE_ADDRESSABLE (local_decls
))
964 any_addressable_vars
= 1;
967 if (block
== gimple_block (parallel_stmt
))
971 if (!any_addressable_vars
)
972 gimple_omp_return_set_nowait (stmt
);
978 remove_exit_barriers (struct omp_region
*region
)
980 if (region
->type
== GIMPLE_OMP_PARALLEL
)
981 remove_exit_barrier (region
);
985 region
= region
->inner
;
986 remove_exit_barriers (region
);
989 region
= region
->next
;
990 remove_exit_barriers (region
);
995 /* Optimize omp_get_thread_num () and omp_get_num_threads ()
996 calls. These can't be declared as const functions, but
997 within one parallel body they are constant, so they can be
998 transformed there into __builtin_omp_get_{thread_num,num_threads} ()
999 which are declared const. Similarly for task body, except
1000 that in untied task omp_get_thread_num () can change at any task
1001 scheduling point. */
1004 optimize_omp_library_calls (gimple
*entry_stmt
)
1007 gimple_stmt_iterator gsi
;
1008 tree thr_num_tree
= builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM
);
1009 tree thr_num_id
= DECL_ASSEMBLER_NAME (thr_num_tree
);
1010 tree num_thr_tree
= builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS
);
1011 tree num_thr_id
= DECL_ASSEMBLER_NAME (num_thr_tree
);
1012 bool untied_task
= (gimple_code (entry_stmt
) == GIMPLE_OMP_TASK
1013 && omp_find_clause (gimple_omp_task_clauses (entry_stmt
),
1014 OMP_CLAUSE_UNTIED
) != NULL
);
1016 FOR_EACH_BB_FN (bb
, cfun
)
1017 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1019 gimple
*call
= gsi_stmt (gsi
);
1022 if (is_gimple_call (call
)
1023 && (decl
= gimple_call_fndecl (call
))
1024 && DECL_EXTERNAL (decl
)
1025 && TREE_PUBLIC (decl
)
1026 && DECL_INITIAL (decl
) == NULL
)
1030 if (DECL_NAME (decl
) == thr_num_id
)
1032 /* In #pragma omp task untied omp_get_thread_num () can change
1033 during the execution of the task region. */
1036 built_in
= builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM
);
1038 else if (DECL_NAME (decl
) == num_thr_id
)
1039 built_in
= builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS
);
1043 if (DECL_ASSEMBLER_NAME (decl
) != DECL_ASSEMBLER_NAME (built_in
)
1044 || gimple_call_num_args (call
) != 0)
1047 if (flag_exceptions
&& !TREE_NOTHROW (decl
))
1050 if (TREE_CODE (TREE_TYPE (decl
)) != FUNCTION_TYPE
1051 || !types_compatible_p (TREE_TYPE (TREE_TYPE (decl
)),
1052 TREE_TYPE (TREE_TYPE (built_in
))))
1055 gimple_call_set_fndecl (call
, built_in
);
1060 /* Callback for expand_omp_build_assign. Return non-NULL if *tp needs to be
1064 expand_omp_regimplify_p (tree
*tp
, int *walk_subtrees
, void *)
1068 /* Any variable with DECL_VALUE_EXPR needs to be regimplified. */
1069 if (VAR_P (t
) && DECL_HAS_VALUE_EXPR_P (t
))
1072 if (TREE_CODE (t
) == ADDR_EXPR
)
1073 recompute_tree_invariant_for_addr_expr (t
);
1075 *walk_subtrees
= !TYPE_P (t
) && !DECL_P (t
);
1079 /* Prepend or append TO = FROM assignment before or after *GSI_P. */
1082 expand_omp_build_assign (gimple_stmt_iterator
*gsi_p
, tree to
, tree from
,
1085 bool simple_p
= DECL_P (to
) && TREE_ADDRESSABLE (to
);
1086 from
= force_gimple_operand_gsi (gsi_p
, from
, simple_p
, NULL_TREE
,
1087 !after
, after
? GSI_CONTINUE_LINKING
1089 gimple
*stmt
= gimple_build_assign (to
, from
);
1091 gsi_insert_after (gsi_p
, stmt
, GSI_CONTINUE_LINKING
);
1093 gsi_insert_before (gsi_p
, stmt
, GSI_SAME_STMT
);
1094 if (walk_tree (&from
, expand_omp_regimplify_p
, NULL
, NULL
)
1095 || walk_tree (&to
, expand_omp_regimplify_p
, NULL
, NULL
))
1097 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
1098 gimple_regimplify_operands (stmt
, &gsi
);
1102 /* Expand the OpenMP parallel or task directive starting at REGION. */
1105 expand_omp_taskreg (struct omp_region
*region
)
1107 basic_block entry_bb
, exit_bb
, new_bb
;
1108 struct function
*child_cfun
;
1109 tree child_fn
, block
, t
;
1110 gimple_stmt_iterator gsi
;
1111 gimple
*entry_stmt
, *stmt
;
1113 vec
<tree
, va_gc
> *ws_args
;
1115 entry_stmt
= last_stmt (region
->entry
);
1116 child_fn
= gimple_omp_taskreg_child_fn (entry_stmt
);
1117 child_cfun
= DECL_STRUCT_FUNCTION (child_fn
);
1119 entry_bb
= region
->entry
;
1120 if (gimple_code (entry_stmt
) == GIMPLE_OMP_TASK
)
1121 exit_bb
= region
->cont
;
1123 exit_bb
= region
->exit
;
1127 && gimple_code (entry_stmt
) == GIMPLE_OMP_PARALLEL
1128 && omp_find_clause (gimple_omp_parallel_clauses (entry_stmt
),
1129 OMP_CLAUSE__CILK_FOR_COUNT_
) != NULL_TREE
);
1132 /* If it is a _Cilk_for statement, it is modelled *like* a parallel for,
1133 and the inner statement contains the name of the built-in function
1135 ws_args
= region
->inner
->ws_args
;
1136 else if (is_combined_parallel (region
))
1137 ws_args
= region
->ws_args
;
1141 if (child_cfun
->cfg
)
1143 /* Due to inlining, it may happen that we have already outlined
1144 the region, in which case all we need to do is make the
1145 sub-graph unreachable and emit the parallel call. */
1146 edge entry_succ_e
, exit_succ_e
;
1148 entry_succ_e
= single_succ_edge (entry_bb
);
1150 gsi
= gsi_last_bb (entry_bb
);
1151 gcc_assert (gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_PARALLEL
1152 || gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_TASK
);
1153 gsi_remove (&gsi
, true);
1158 exit_succ_e
= single_succ_edge (exit_bb
);
1159 make_edge (new_bb
, exit_succ_e
->dest
, EDGE_FALLTHRU
);
1161 remove_edge_and_dominated_blocks (entry_succ_e
);
1165 unsigned srcidx
, dstidx
, num
;
1167 /* If the parallel region needs data sent from the parent
1168 function, then the very first statement (except possible
1169 tree profile counter updates) of the parallel body
1170 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
1171 &.OMP_DATA_O is passed as an argument to the child function,
1172 we need to replace it with the argument as seen by the child
1175 In most cases, this will end up being the identity assignment
1176 .OMP_DATA_I = .OMP_DATA_I. However, if the parallel body had
1177 a function call that has been inlined, the original PARM_DECL
1178 .OMP_DATA_I may have been converted into a different local
1179 variable. In which case, we need to keep the assignment. */
1180 if (gimple_omp_taskreg_data_arg (entry_stmt
))
1182 basic_block entry_succ_bb
1183 = single_succ_p (entry_bb
) ? single_succ (entry_bb
)
1184 : FALLTHRU_EDGE (entry_bb
)->dest
;
1186 gimple
*parcopy_stmt
= NULL
;
1188 for (gsi
= gsi_start_bb (entry_succ_bb
); ; gsi_next (&gsi
))
1192 gcc_assert (!gsi_end_p (gsi
));
1193 stmt
= gsi_stmt (gsi
);
1194 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
1197 if (gimple_num_ops (stmt
) == 2)
1199 tree arg
= gimple_assign_rhs1 (stmt
);
1201 /* We're ignore the subcode because we're
1202 effectively doing a STRIP_NOPS. */
1204 if (TREE_CODE (arg
) == ADDR_EXPR
1205 && TREE_OPERAND (arg
, 0)
1206 == gimple_omp_taskreg_data_arg (entry_stmt
))
1208 parcopy_stmt
= stmt
;
1214 gcc_assert (parcopy_stmt
!= NULL
);
1215 arg
= DECL_ARGUMENTS (child_fn
);
1217 if (!gimple_in_ssa_p (cfun
))
1219 if (gimple_assign_lhs (parcopy_stmt
) == arg
)
1220 gsi_remove (&gsi
, true);
1223 /* ?? Is setting the subcode really necessary ?? */
1224 gimple_omp_set_subcode (parcopy_stmt
, TREE_CODE (arg
));
1225 gimple_assign_set_rhs1 (parcopy_stmt
, arg
);
1230 tree lhs
= gimple_assign_lhs (parcopy_stmt
);
1231 gcc_assert (SSA_NAME_VAR (lhs
) == arg
);
1232 /* We'd like to set the rhs to the default def in the child_fn,
1233 but it's too early to create ssa names in the child_fn.
1234 Instead, we set the rhs to the parm. In
1235 move_sese_region_to_fn, we introduce a default def for the
1236 parm, map the parm to it's default def, and once we encounter
1237 this stmt, replace the parm with the default def. */
1238 gimple_assign_set_rhs1 (parcopy_stmt
, arg
);
1239 update_stmt (parcopy_stmt
);
1243 /* Declare local variables needed in CHILD_CFUN. */
1244 block
= DECL_INITIAL (child_fn
);
1245 BLOCK_VARS (block
) = vec2chain (child_cfun
->local_decls
);
1246 /* The gimplifier could record temporaries in parallel/task block
1247 rather than in containing function's local_decls chain,
1248 which would mean cgraph missed finalizing them. Do it now. */
1249 for (t
= BLOCK_VARS (block
); t
; t
= DECL_CHAIN (t
))
1250 if (VAR_P (t
) && TREE_STATIC (t
) && !DECL_EXTERNAL (t
))
1251 varpool_node::finalize_decl (t
);
1252 DECL_SAVED_TREE (child_fn
) = NULL
;
1253 /* We'll create a CFG for child_fn, so no gimple body is needed. */
1254 gimple_set_body (child_fn
, NULL
);
1255 TREE_USED (block
) = 1;
1257 /* Reset DECL_CONTEXT on function arguments. */
1258 for (t
= DECL_ARGUMENTS (child_fn
); t
; t
= DECL_CHAIN (t
))
1259 DECL_CONTEXT (t
) = child_fn
;
1261 /* Split ENTRY_BB at GIMPLE_OMP_PARALLEL or GIMPLE_OMP_TASK,
1262 so that it can be moved to the child function. */
1263 gsi
= gsi_last_bb (entry_bb
);
1264 stmt
= gsi_stmt (gsi
);
1265 gcc_assert (stmt
&& (gimple_code (stmt
) == GIMPLE_OMP_PARALLEL
1266 || gimple_code (stmt
) == GIMPLE_OMP_TASK
));
1267 e
= split_block (entry_bb
, stmt
);
1268 gsi_remove (&gsi
, true);
1271 if (gimple_code (entry_stmt
) == GIMPLE_OMP_PARALLEL
)
1272 single_succ_edge (entry_bb
)->flags
= EDGE_FALLTHRU
;
1275 e2
= make_edge (e
->src
, BRANCH_EDGE (entry_bb
)->dest
, EDGE_ABNORMAL
);
1276 gcc_assert (e2
->dest
== region
->exit
);
1277 remove_edge (BRANCH_EDGE (entry_bb
));
1278 set_immediate_dominator (CDI_DOMINATORS
, e2
->dest
, e
->src
);
1279 gsi
= gsi_last_bb (region
->exit
);
1280 gcc_assert (!gsi_end_p (gsi
)
1281 && gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_RETURN
);
1282 gsi_remove (&gsi
, true);
1285 /* Convert GIMPLE_OMP_{RETURN,CONTINUE} into a RETURN_EXPR. */
1288 gsi
= gsi_last_bb (exit_bb
);
1289 gcc_assert (!gsi_end_p (gsi
)
1290 && (gimple_code (gsi_stmt (gsi
))
1291 == (e2
? GIMPLE_OMP_CONTINUE
: GIMPLE_OMP_RETURN
)));
1292 stmt
= gimple_build_return (NULL
);
1293 gsi_insert_after (&gsi
, stmt
, GSI_SAME_STMT
);
1294 gsi_remove (&gsi
, true);
1297 /* Move the parallel region into CHILD_CFUN. */
1299 if (gimple_in_ssa_p (cfun
))
1301 init_tree_ssa (child_cfun
);
1302 init_ssa_operands (child_cfun
);
1303 child_cfun
->gimple_df
->in_ssa_p
= true;
1307 block
= gimple_block (entry_stmt
);
1309 /* Make sure to generate early debug for the function before
1310 outlining anything. */
1311 if (! gimple_in_ssa_p (cfun
))
1312 (*debug_hooks
->early_global_decl
) (cfun
->decl
);
1314 new_bb
= move_sese_region_to_fn (child_cfun
, entry_bb
, exit_bb
, block
);
1316 single_succ_edge (new_bb
)->flags
= EDGE_FALLTHRU
;
1319 basic_block dest_bb
= e2
->dest
;
1321 make_edge (new_bb
, dest_bb
, EDGE_FALLTHRU
);
1323 set_immediate_dominator (CDI_DOMINATORS
, dest_bb
, new_bb
);
1325 /* When the OMP expansion process cannot guarantee an up-to-date
1326 loop tree arrange for the child function to fixup loops. */
1327 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP
))
1328 child_cfun
->x_current_loops
->state
|= LOOPS_NEED_FIXUP
;
1330 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
1331 num
= vec_safe_length (child_cfun
->local_decls
);
1332 for (srcidx
= 0, dstidx
= 0; srcidx
< num
; srcidx
++)
1334 t
= (*child_cfun
->local_decls
)[srcidx
];
1335 if (DECL_CONTEXT (t
) == cfun
->decl
)
1337 if (srcidx
!= dstidx
)
1338 (*child_cfun
->local_decls
)[dstidx
] = t
;
1342 vec_safe_truncate (child_cfun
->local_decls
, dstidx
);
1344 /* Inform the callgraph about the new function. */
1345 child_cfun
->curr_properties
= cfun
->curr_properties
;
1346 child_cfun
->has_simduid_loops
|= cfun
->has_simduid_loops
;
1347 child_cfun
->has_force_vectorize_loops
|= cfun
->has_force_vectorize_loops
;
1348 cgraph_node
*node
= cgraph_node::get_create (child_fn
);
1349 node
->parallelized_function
= 1;
1350 cgraph_node::add_new_function (child_fn
, true);
1352 bool need_asm
= DECL_ASSEMBLER_NAME_SET_P (current_function_decl
)
1353 && !DECL_ASSEMBLER_NAME_SET_P (child_fn
);
1355 /* Fix the callgraph edges for child_cfun. Those for cfun will be
1356 fixed in a following pass. */
1357 push_cfun (child_cfun
);
1359 assign_assembler_name_if_needed (child_fn
);
1362 optimize_omp_library_calls (entry_stmt
);
1363 cgraph_edge::rebuild_edges ();
1365 /* Some EH regions might become dead, see PR34608. If
1366 pass_cleanup_cfg isn't the first pass to happen with the
1367 new child, these dead EH edges might cause problems.
1368 Clean them up now. */
1369 if (flag_exceptions
)
1372 bool changed
= false;
1374 FOR_EACH_BB_FN (bb
, cfun
)
1375 changed
|= gimple_purge_dead_eh_edges (bb
);
1377 cleanup_tree_cfg ();
1379 if (gimple_in_ssa_p (cfun
))
1380 update_ssa (TODO_update_ssa
);
1381 if (flag_checking
&& !loops_state_satisfies_p (LOOPS_NEED_FIXUP
))
1382 verify_loop_structure ();
1385 if (dump_file
&& !gimple_in_ssa_p (cfun
))
1387 omp_any_child_fn_dumped
= true;
1388 dump_function_header (dump_file
, child_fn
, dump_flags
);
1389 dump_function_to_file (child_fn
, dump_file
, dump_flags
);
1393 /* Emit a library call to launch the children threads. */
1395 expand_cilk_for_call (new_bb
,
1396 as_a
<gomp_parallel
*> (entry_stmt
), ws_args
);
1397 else if (gimple_code (entry_stmt
) == GIMPLE_OMP_PARALLEL
)
1398 expand_parallel_call (region
, new_bb
,
1399 as_a
<gomp_parallel
*> (entry_stmt
), ws_args
);
1401 expand_task_call (region
, new_bb
, as_a
<gomp_task
*> (entry_stmt
));
1402 if (gimple_in_ssa_p (cfun
))
1403 update_ssa (TODO_update_ssa_only_virtuals
);
1406 /* Information about members of an OpenACC collapsed loop nest. */
1408 struct oacc_collapse
1410 tree base
; /* Base value. */
1411 tree iters
; /* Number of steps. */
1412 tree step
; /* Step size. */
1413 tree tile
; /* Tile increment (if tiled). */
1414 tree outer
; /* Tile iterator var. */
1417 /* Helper for expand_oacc_for. Determine collapsed loop information.
1418 Fill in COUNTS array. Emit any initialization code before GSI.
1419 Return the calculated outer loop bound of BOUND_TYPE. */
1422 expand_oacc_collapse_init (const struct omp_for_data
*fd
,
1423 gimple_stmt_iterator
*gsi
,
1424 oacc_collapse
*counts
, tree bound_type
,
1427 tree tiling
= fd
->tiling
;
1428 tree total
= build_int_cst (bound_type
, 1);
1431 gcc_assert (integer_onep (fd
->loop
.step
));
1432 gcc_assert (integer_zerop (fd
->loop
.n1
));
1434 /* When tiling, the first operand of the tile clause applies to the
1435 innermost loop, and we work outwards from there. Seems
1436 backwards, but whatever. */
1437 for (ix
= fd
->collapse
; ix
--;)
1439 const omp_for_data_loop
*loop
= &fd
->loops
[ix
];
1441 tree iter_type
= TREE_TYPE (loop
->v
);
1442 tree diff_type
= iter_type
;
1443 tree plus_type
= iter_type
;
1445 gcc_assert (loop
->cond_code
== fd
->loop
.cond_code
);
1447 if (POINTER_TYPE_P (iter_type
))
1448 plus_type
= sizetype
;
1449 if (POINTER_TYPE_P (diff_type
) || TYPE_UNSIGNED (diff_type
))
1450 diff_type
= signed_type_for (diff_type
);
1454 tree num
= build_int_cst (integer_type_node
, fd
->collapse
);
1455 tree loop_no
= build_int_cst (integer_type_node
, ix
);
1456 tree tile
= TREE_VALUE (tiling
);
1458 = gimple_build_call_internal (IFN_GOACC_TILE
, 5, num
, loop_no
, tile
,
1459 /* gwv-outer=*/integer_zero_node
,
1460 /* gwv-inner=*/integer_zero_node
);
1462 counts
[ix
].outer
= create_tmp_var (iter_type
, ".outer");
1463 counts
[ix
].tile
= create_tmp_var (diff_type
, ".tile");
1464 gimple_call_set_lhs (call
, counts
[ix
].tile
);
1465 gimple_set_location (call
, loc
);
1466 gsi_insert_before (gsi
, call
, GSI_SAME_STMT
);
1468 tiling
= TREE_CHAIN (tiling
);
1472 counts
[ix
].tile
= NULL
;
1473 counts
[ix
].outer
= loop
->v
;
1478 tree s
= loop
->step
;
1479 bool up
= loop
->cond_code
== LT_EXPR
;
1480 tree dir
= build_int_cst (diff_type
, up
? +1 : -1);
1484 b
= force_gimple_operand_gsi (gsi
, b
, true, NULL_TREE
,
1485 true, GSI_SAME_STMT
);
1486 e
= force_gimple_operand_gsi (gsi
, e
, true, NULL_TREE
,
1487 true, GSI_SAME_STMT
);
1489 /* Convert the step, avoiding possible unsigned->signed overflow. */
1490 negating
= !up
&& TYPE_UNSIGNED (TREE_TYPE (s
));
1492 s
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (s
), s
);
1493 s
= fold_convert (diff_type
, s
);
1495 s
= fold_build1 (NEGATE_EXPR
, diff_type
, s
);
1496 s
= force_gimple_operand_gsi (gsi
, s
, true, NULL_TREE
,
1497 true, GSI_SAME_STMT
);
1499 /* Determine the range, avoiding possible unsigned->signed overflow. */
1500 negating
= !up
&& TYPE_UNSIGNED (iter_type
);
1501 expr
= fold_build2 (MINUS_EXPR
, plus_type
,
1502 fold_convert (plus_type
, negating
? b
: e
),
1503 fold_convert (plus_type
, negating
? e
: b
));
1504 expr
= fold_convert (diff_type
, expr
);
1506 expr
= fold_build1 (NEGATE_EXPR
, diff_type
, expr
);
1507 tree range
= force_gimple_operand_gsi
1508 (gsi
, expr
, true, NULL_TREE
, true, GSI_SAME_STMT
);
1510 /* Determine number of iterations. */
1511 expr
= fold_build2 (MINUS_EXPR
, diff_type
, range
, dir
);
1512 expr
= fold_build2 (PLUS_EXPR
, diff_type
, expr
, s
);
1513 expr
= fold_build2 (TRUNC_DIV_EXPR
, diff_type
, expr
, s
);
1515 tree iters
= force_gimple_operand_gsi (gsi
, expr
, true, NULL_TREE
,
1516 true, GSI_SAME_STMT
);
1518 counts
[ix
].base
= b
;
1519 counts
[ix
].iters
= iters
;
1520 counts
[ix
].step
= s
;
1522 total
= fold_build2 (MULT_EXPR
, bound_type
, total
,
1523 fold_convert (bound_type
, iters
));
1529 /* Emit initializers for collapsed loop members. INNER is true if
1530 this is for the element loop of a TILE. IVAR is the outer
1531 loop iteration variable, from which collapsed loop iteration values
1532 are calculated. COUNTS array has been initialized by
1533 expand_oacc_collapse_inits. */
1536 expand_oacc_collapse_vars (const struct omp_for_data
*fd
, bool inner
,
1537 gimple_stmt_iterator
*gsi
,
1538 const oacc_collapse
*counts
, tree ivar
)
1540 tree ivar_type
= TREE_TYPE (ivar
);
1542 /* The most rapidly changing iteration variable is the innermost
1544 for (int ix
= fd
->collapse
; ix
--;)
1546 const omp_for_data_loop
*loop
= &fd
->loops
[ix
];
1547 const oacc_collapse
*collapse
= &counts
[ix
];
1548 tree v
= inner
? loop
->v
: collapse
->outer
;
1549 tree iter_type
= TREE_TYPE (v
);
1550 tree diff_type
= TREE_TYPE (collapse
->step
);
1551 tree plus_type
= iter_type
;
1552 enum tree_code plus_code
= PLUS_EXPR
;
1555 if (POINTER_TYPE_P (iter_type
))
1557 plus_code
= POINTER_PLUS_EXPR
;
1558 plus_type
= sizetype
;
1564 tree mod
= fold_convert (ivar_type
, collapse
->iters
);
1565 ivar
= fold_build2 (TRUNC_DIV_EXPR
, ivar_type
, expr
, mod
);
1566 expr
= fold_build2 (TRUNC_MOD_EXPR
, ivar_type
, expr
, mod
);
1567 ivar
= force_gimple_operand_gsi (gsi
, ivar
, true, NULL_TREE
,
1568 true, GSI_SAME_STMT
);
1571 expr
= fold_build2 (MULT_EXPR
, diff_type
, fold_convert (diff_type
, expr
),
1573 expr
= fold_build2 (plus_code
, iter_type
,
1574 inner
? collapse
->outer
: collapse
->base
,
1575 fold_convert (plus_type
, expr
));
1576 expr
= force_gimple_operand_gsi (gsi
, expr
, false, NULL_TREE
,
1577 true, GSI_SAME_STMT
);
1578 gassign
*ass
= gimple_build_assign (v
, expr
);
1579 gsi_insert_before (gsi
, ass
, GSI_SAME_STMT
);
1583 /* Helper function for expand_omp_{for_*,simd}. If this is the outermost
1584 of the combined collapse > 1 loop constructs, generate code like:
1585 if (__builtin_expect (N32 cond3 N31, 0)) goto ZERO_ITER_BB;
1590 count3 = (adj + N32 - N31) / STEP3;
1591 if (__builtin_expect (N22 cond2 N21, 0)) goto ZERO_ITER_BB;
1596 count2 = (adj + N22 - N21) / STEP2;
1597 if (__builtin_expect (N12 cond1 N11, 0)) goto ZERO_ITER_BB;
1602 count1 = (adj + N12 - N11) / STEP1;
1603 count = count1 * count2 * count3;
1604 Furthermore, if ZERO_ITER_BB is NULL, create a BB which does:
1606 and set ZERO_ITER_BB to that bb. If this isn't the outermost
1607 of the combined loop constructs, just initialize COUNTS array
1608 from the _looptemp_ clauses. */
1610 /* NOTE: It *could* be better to moosh all of the BBs together,
1611 creating one larger BB with all the computation and the unexpected
1612 jump at the end. I.e.
1614 bool zero3, zero2, zero1, zero;
1617 count3 = (N32 - N31) /[cl] STEP3;
1619 count2 = (N22 - N21) /[cl] STEP2;
1621 count1 = (N12 - N11) /[cl] STEP1;
1622 zero = zero3 || zero2 || zero1;
1623 count = count1 * count2 * count3;
1624 if (__builtin_expect(zero, false)) goto zero_iter_bb;
1626 After all, we expect the zero=false, and thus we expect to have to
1627 evaluate all of the comparison expressions, so short-circuiting
1628 oughtn't be a win. Since the condition isn't protecting a
1629 denominator, we're not concerned about divide-by-zero, so we can
1630 fully evaluate count even if a numerator turned out to be wrong.
1632 It seems like putting this all together would create much better
1633 scheduling opportunities, and less pressure on the chip's branch
1637 expand_omp_for_init_counts (struct omp_for_data
*fd
, gimple_stmt_iterator
*gsi
,
1638 basic_block
&entry_bb
, tree
*counts
,
1639 basic_block
&zero_iter1_bb
, int &first_zero_iter1
,
1640 basic_block
&zero_iter2_bb
, int &first_zero_iter2
,
1641 basic_block
&l2_dom_bb
)
1643 tree t
, type
= TREE_TYPE (fd
->loop
.v
);
1647 /* Collapsed loops need work for expansion into SSA form. */
1648 gcc_assert (!gimple_in_ssa_p (cfun
));
1650 if (gimple_omp_for_combined_into_p (fd
->for_stmt
)
1651 && TREE_CODE (fd
->loop
.n2
) != INTEGER_CST
)
1653 gcc_assert (fd
->ordered
== 0);
1654 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1655 isn't supposed to be handled, as the inner loop doesn't
1657 tree innerc
= omp_find_clause (gimple_omp_for_clauses (fd
->for_stmt
),
1658 OMP_CLAUSE__LOOPTEMP_
);
1659 gcc_assert (innerc
);
1660 for (i
= 0; i
< fd
->collapse
; i
++)
1662 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
1663 OMP_CLAUSE__LOOPTEMP_
);
1664 gcc_assert (innerc
);
1666 counts
[i
] = OMP_CLAUSE_DECL (innerc
);
1668 counts
[0] = NULL_TREE
;
1673 for (i
= fd
->collapse
; i
< fd
->ordered
; i
++)
1675 tree itype
= TREE_TYPE (fd
->loops
[i
].v
);
1676 counts
[i
] = NULL_TREE
;
1677 t
= fold_binary (fd
->loops
[i
].cond_code
, boolean_type_node
,
1678 fold_convert (itype
, fd
->loops
[i
].n1
),
1679 fold_convert (itype
, fd
->loops
[i
].n2
));
1680 if (t
&& integer_zerop (t
))
1682 for (i
= fd
->collapse
; i
< fd
->ordered
; i
++)
1683 counts
[i
] = build_int_cst (type
, 0);
1687 for (i
= 0; i
< (fd
->ordered
? fd
->ordered
: fd
->collapse
); i
++)
1689 tree itype
= TREE_TYPE (fd
->loops
[i
].v
);
1691 if (i
>= fd
->collapse
&& counts
[i
])
1693 if ((SSA_VAR_P (fd
->loop
.n2
) || i
>= fd
->collapse
)
1694 && ((t
= fold_binary (fd
->loops
[i
].cond_code
, boolean_type_node
,
1695 fold_convert (itype
, fd
->loops
[i
].n1
),
1696 fold_convert (itype
, fd
->loops
[i
].n2
)))
1697 == NULL_TREE
|| !integer_onep (t
)))
1701 n1
= fold_convert (itype
, unshare_expr (fd
->loops
[i
].n1
));
1702 n1
= force_gimple_operand_gsi (gsi
, n1
, true, NULL_TREE
,
1703 true, GSI_SAME_STMT
);
1704 n2
= fold_convert (itype
, unshare_expr (fd
->loops
[i
].n2
));
1705 n2
= force_gimple_operand_gsi (gsi
, n2
, true, NULL_TREE
,
1706 true, GSI_SAME_STMT
);
1707 cond_stmt
= gimple_build_cond (fd
->loops
[i
].cond_code
, n1
, n2
,
1708 NULL_TREE
, NULL_TREE
);
1709 gsi_insert_before (gsi
, cond_stmt
, GSI_SAME_STMT
);
1710 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt
),
1711 expand_omp_regimplify_p
, NULL
, NULL
)
1712 || walk_tree (gimple_cond_rhs_ptr (cond_stmt
),
1713 expand_omp_regimplify_p
, NULL
, NULL
))
1715 *gsi
= gsi_for_stmt (cond_stmt
);
1716 gimple_regimplify_operands (cond_stmt
, gsi
);
1718 e
= split_block (entry_bb
, cond_stmt
);
1719 basic_block
&zero_iter_bb
1720 = i
< fd
->collapse
? zero_iter1_bb
: zero_iter2_bb
;
1721 int &first_zero_iter
1722 = i
< fd
->collapse
? first_zero_iter1
: first_zero_iter2
;
1723 if (zero_iter_bb
== NULL
)
1725 gassign
*assign_stmt
;
1726 first_zero_iter
= i
;
1727 zero_iter_bb
= create_empty_bb (entry_bb
);
1728 add_bb_to_loop (zero_iter_bb
, entry_bb
->loop_father
);
1729 *gsi
= gsi_after_labels (zero_iter_bb
);
1730 if (i
< fd
->collapse
)
1731 assign_stmt
= gimple_build_assign (fd
->loop
.n2
,
1732 build_zero_cst (type
));
1735 counts
[i
] = create_tmp_reg (type
, ".count");
1737 = gimple_build_assign (counts
[i
], build_zero_cst (type
));
1739 gsi_insert_before (gsi
, assign_stmt
, GSI_SAME_STMT
);
1740 set_immediate_dominator (CDI_DOMINATORS
, zero_iter_bb
,
1743 ne
= make_edge (entry_bb
, zero_iter_bb
, EDGE_FALSE_VALUE
);
1744 ne
->probability
= profile_probability::very_unlikely ();
1745 e
->flags
= EDGE_TRUE_VALUE
;
1746 e
->probability
= ne
->probability
.invert ();
1747 if (l2_dom_bb
== NULL
)
1748 l2_dom_bb
= entry_bb
;
1750 *gsi
= gsi_last_bb (entry_bb
);
1753 if (POINTER_TYPE_P (itype
))
1754 itype
= signed_type_for (itype
);
1755 t
= build_int_cst (itype
, (fd
->loops
[i
].cond_code
== LT_EXPR
1757 t
= fold_build2 (PLUS_EXPR
, itype
,
1758 fold_convert (itype
, fd
->loops
[i
].step
), t
);
1759 t
= fold_build2 (PLUS_EXPR
, itype
, t
,
1760 fold_convert (itype
, fd
->loops
[i
].n2
));
1761 t
= fold_build2 (MINUS_EXPR
, itype
, t
,
1762 fold_convert (itype
, fd
->loops
[i
].n1
));
1763 /* ?? We could probably use CEIL_DIV_EXPR instead of
1764 TRUNC_DIV_EXPR and adjusting by hand. Unless we can't
1765 generate the same code in the end because generically we
1766 don't know that the values involved must be negative for
1768 if (TYPE_UNSIGNED (itype
) && fd
->loops
[i
].cond_code
== GT_EXPR
)
1769 t
= fold_build2 (TRUNC_DIV_EXPR
, itype
,
1770 fold_build1 (NEGATE_EXPR
, itype
, t
),
1771 fold_build1 (NEGATE_EXPR
, itype
,
1772 fold_convert (itype
,
1773 fd
->loops
[i
].step
)));
1775 t
= fold_build2 (TRUNC_DIV_EXPR
, itype
, t
,
1776 fold_convert (itype
, fd
->loops
[i
].step
));
1777 t
= fold_convert (type
, t
);
1778 if (TREE_CODE (t
) == INTEGER_CST
)
1782 if (i
< fd
->collapse
|| i
!= first_zero_iter2
)
1783 counts
[i
] = create_tmp_reg (type
, ".count");
1784 expand_omp_build_assign (gsi
, counts
[i
], t
);
1786 if (SSA_VAR_P (fd
->loop
.n2
) && i
< fd
->collapse
)
1791 t
= fold_build2 (MULT_EXPR
, type
, fd
->loop
.n2
, counts
[i
]);
1792 expand_omp_build_assign (gsi
, fd
->loop
.n2
, t
);
1797 /* Helper function for expand_omp_{for_*,simd}. Generate code like:
1799 V3 = N31 + (T % count3) * STEP3;
1801 V2 = N21 + (T % count2) * STEP2;
1803 V1 = N11 + T * STEP1;
1804 if this loop doesn't have an inner loop construct combined with it.
1805 If it does have an inner loop construct combined with it and the
1806 iteration count isn't known constant, store values from counts array
1807 into its _looptemp_ temporaries instead. */
1810 expand_omp_for_init_vars (struct omp_for_data
*fd
, gimple_stmt_iterator
*gsi
,
1811 tree
*counts
, gimple
*inner_stmt
, tree startvar
)
1814 if (gimple_omp_for_combined_p (fd
->for_stmt
))
1816 /* If fd->loop.n2 is constant, then no propagation of the counts
1817 is needed, they are constant. */
1818 if (TREE_CODE (fd
->loop
.n2
) == INTEGER_CST
)
1821 tree clauses
= gimple_code (inner_stmt
) != GIMPLE_OMP_FOR
1822 ? gimple_omp_taskreg_clauses (inner_stmt
)
1823 : gimple_omp_for_clauses (inner_stmt
);
1824 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1825 isn't supposed to be handled, as the inner loop doesn't
1827 tree innerc
= omp_find_clause (clauses
, OMP_CLAUSE__LOOPTEMP_
);
1828 gcc_assert (innerc
);
1829 for (i
= 0; i
< fd
->collapse
; i
++)
1831 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
1832 OMP_CLAUSE__LOOPTEMP_
);
1833 gcc_assert (innerc
);
1836 tree tem
= OMP_CLAUSE_DECL (innerc
);
1837 tree t
= fold_convert (TREE_TYPE (tem
), counts
[i
]);
1838 t
= force_gimple_operand_gsi (gsi
, t
, false, NULL_TREE
,
1839 false, GSI_CONTINUE_LINKING
);
1840 gassign
*stmt
= gimple_build_assign (tem
, t
);
1841 gsi_insert_after (gsi
, stmt
, GSI_CONTINUE_LINKING
);
1847 tree type
= TREE_TYPE (fd
->loop
.v
);
1848 tree tem
= create_tmp_reg (type
, ".tem");
1849 gassign
*stmt
= gimple_build_assign (tem
, startvar
);
1850 gsi_insert_after (gsi
, stmt
, GSI_CONTINUE_LINKING
);
1852 for (i
= fd
->collapse
- 1; i
>= 0; i
--)
1854 tree vtype
= TREE_TYPE (fd
->loops
[i
].v
), itype
, t
;
1856 if (POINTER_TYPE_P (vtype
))
1857 itype
= signed_type_for (vtype
);
1859 t
= fold_build2 (TRUNC_MOD_EXPR
, type
, tem
, counts
[i
]);
1862 t
= fold_convert (itype
, t
);
1863 t
= fold_build2 (MULT_EXPR
, itype
, t
,
1864 fold_convert (itype
, fd
->loops
[i
].step
));
1865 if (POINTER_TYPE_P (vtype
))
1866 t
= fold_build_pointer_plus (fd
->loops
[i
].n1
, t
);
1868 t
= fold_build2 (PLUS_EXPR
, itype
, fd
->loops
[i
].n1
, t
);
1869 t
= force_gimple_operand_gsi (gsi
, t
,
1870 DECL_P (fd
->loops
[i
].v
)
1871 && TREE_ADDRESSABLE (fd
->loops
[i
].v
),
1873 GSI_CONTINUE_LINKING
);
1874 stmt
= gimple_build_assign (fd
->loops
[i
].v
, t
);
1875 gsi_insert_after (gsi
, stmt
, GSI_CONTINUE_LINKING
);
1878 t
= fold_build2 (TRUNC_DIV_EXPR
, type
, tem
, counts
[i
]);
1879 t
= force_gimple_operand_gsi (gsi
, t
, false, NULL_TREE
,
1880 false, GSI_CONTINUE_LINKING
);
1881 stmt
= gimple_build_assign (tem
, t
);
1882 gsi_insert_after (gsi
, stmt
, GSI_CONTINUE_LINKING
);
1887 /* Helper function for expand_omp_for_*. Generate code like:
1890 if (V3 cond3 N32) goto BODY_BB; else goto L11;
1894 if (V2 cond2 N22) goto BODY_BB; else goto L12;
1901 extract_omp_for_update_vars (struct omp_for_data
*fd
, basic_block cont_bb
,
1902 basic_block body_bb
)
1904 basic_block last_bb
, bb
, collapse_bb
= NULL
;
1906 gimple_stmt_iterator gsi
;
1912 for (i
= fd
->collapse
- 1; i
>= 0; i
--)
1914 tree vtype
= TREE_TYPE (fd
->loops
[i
].v
);
1916 bb
= create_empty_bb (last_bb
);
1917 add_bb_to_loop (bb
, last_bb
->loop_father
);
1918 gsi
= gsi_start_bb (bb
);
1920 if (i
< fd
->collapse
- 1)
1922 e
= make_edge (last_bb
, bb
, EDGE_FALSE_VALUE
);
1923 e
->probability
= profile_probability::guessed_always ().apply_scale (1, 8);
1925 t
= fd
->loops
[i
+ 1].n1
;
1926 t
= force_gimple_operand_gsi (&gsi
, t
,
1927 DECL_P (fd
->loops
[i
+ 1].v
)
1928 && TREE_ADDRESSABLE (fd
->loops
[i
1931 GSI_CONTINUE_LINKING
);
1932 stmt
= gimple_build_assign (fd
->loops
[i
+ 1].v
, t
);
1933 gsi_insert_after (&gsi
, stmt
, GSI_CONTINUE_LINKING
);
1938 set_immediate_dominator (CDI_DOMINATORS
, bb
, last_bb
);
1940 if (POINTER_TYPE_P (vtype
))
1941 t
= fold_build_pointer_plus (fd
->loops
[i
].v
, fd
->loops
[i
].step
);
1943 t
= fold_build2 (PLUS_EXPR
, vtype
, fd
->loops
[i
].v
, fd
->loops
[i
].step
);
1944 t
= force_gimple_operand_gsi (&gsi
, t
,
1945 DECL_P (fd
->loops
[i
].v
)
1946 && TREE_ADDRESSABLE (fd
->loops
[i
].v
),
1947 NULL_TREE
, false, GSI_CONTINUE_LINKING
);
1948 stmt
= gimple_build_assign (fd
->loops
[i
].v
, t
);
1949 gsi_insert_after (&gsi
, stmt
, GSI_CONTINUE_LINKING
);
1953 t
= fd
->loops
[i
].n2
;
1954 t
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
1955 false, GSI_CONTINUE_LINKING
);
1956 tree v
= fd
->loops
[i
].v
;
1957 if (DECL_P (v
) && TREE_ADDRESSABLE (v
))
1958 v
= force_gimple_operand_gsi (&gsi
, v
, true, NULL_TREE
,
1959 false, GSI_CONTINUE_LINKING
);
1960 t
= fold_build2 (fd
->loops
[i
].cond_code
, boolean_type_node
, v
, t
);
1961 stmt
= gimple_build_cond_empty (t
);
1962 gsi_insert_after (&gsi
, stmt
, GSI_CONTINUE_LINKING
);
1963 e
= make_edge (bb
, body_bb
, EDGE_TRUE_VALUE
);
1964 e
->probability
= profile_probability::guessed_always ().apply_scale (7, 8);
1967 make_edge (bb
, body_bb
, EDGE_FALLTHRU
);
1974 /* Expand #pragma omp ordered depend(source). */
1977 expand_omp_ordered_source (gimple_stmt_iterator
*gsi
, struct omp_for_data
*fd
,
1978 tree
*counts
, location_t loc
)
1980 enum built_in_function source_ix
1981 = fd
->iter_type
== long_integer_type_node
1982 ? BUILT_IN_GOMP_DOACROSS_POST
: BUILT_IN_GOMP_DOACROSS_ULL_POST
;
1984 = gimple_build_call (builtin_decl_explicit (source_ix
), 1,
1985 build_fold_addr_expr (counts
[fd
->ordered
]));
1986 gimple_set_location (g
, loc
);
1987 gsi_insert_before (gsi
, g
, GSI_SAME_STMT
);
1990 /* Expand a single depend from #pragma omp ordered depend(sink:...). */
1993 expand_omp_ordered_sink (gimple_stmt_iterator
*gsi
, struct omp_for_data
*fd
,
1994 tree
*counts
, tree c
, location_t loc
)
1996 auto_vec
<tree
, 10> args
;
1997 enum built_in_function sink_ix
1998 = fd
->iter_type
== long_integer_type_node
1999 ? BUILT_IN_GOMP_DOACROSS_WAIT
: BUILT_IN_GOMP_DOACROSS_ULL_WAIT
;
2000 tree t
, off
, coff
= NULL_TREE
, deps
= OMP_CLAUSE_DECL (c
), cond
= NULL_TREE
;
2002 gimple_stmt_iterator gsi2
= *gsi
;
2003 bool warned_step
= false;
2005 for (i
= 0; i
< fd
->ordered
; i
++)
2007 tree step
= NULL_TREE
;
2008 off
= TREE_PURPOSE (deps
);
2009 if (TREE_CODE (off
) == TRUNC_DIV_EXPR
)
2011 step
= TREE_OPERAND (off
, 1);
2012 off
= TREE_OPERAND (off
, 0);
2014 if (!integer_zerop (off
))
2016 gcc_assert (fd
->loops
[i
].cond_code
== LT_EXPR
2017 || fd
->loops
[i
].cond_code
== GT_EXPR
);
2018 bool forward
= fd
->loops
[i
].cond_code
== LT_EXPR
;
2021 /* Non-simple Fortran DO loops. If step is variable,
2022 we don't know at compile even the direction, so can't
2024 if (TREE_CODE (step
) != INTEGER_CST
)
2026 forward
= tree_int_cst_sgn (step
) != -1;
2028 if (forward
^ OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps
))
2029 warning_at (loc
, 0, "%<depend(sink)%> clause waiting for "
2030 "lexically later iteration");
2033 deps
= TREE_CHAIN (deps
);
2035 /* If all offsets corresponding to the collapsed loops are zero,
2036 this depend clause can be ignored. FIXME: but there is still a
2037 flush needed. We need to emit one __sync_synchronize () for it
2038 though (perhaps conditionally)? Solve this together with the
2039 conservative dependence folding optimization.
2040 if (i >= fd->collapse)
2043 deps
= OMP_CLAUSE_DECL (c
);
2045 edge e1
= split_block (gsi_bb (gsi2
), gsi_stmt (gsi2
));
2046 edge e2
= split_block_after_labels (e1
->dest
);
2048 gsi2
= gsi_after_labels (e1
->dest
);
2049 *gsi
= gsi_last_bb (e1
->src
);
2050 for (i
= 0; i
< fd
->ordered
; i
++)
2052 tree itype
= TREE_TYPE (fd
->loops
[i
].v
);
2053 tree step
= NULL_TREE
;
2054 tree orig_off
= NULL_TREE
;
2055 if (POINTER_TYPE_P (itype
))
2058 deps
= TREE_CHAIN (deps
);
2059 off
= TREE_PURPOSE (deps
);
2060 if (TREE_CODE (off
) == TRUNC_DIV_EXPR
)
2062 step
= TREE_OPERAND (off
, 1);
2063 off
= TREE_OPERAND (off
, 0);
2064 gcc_assert (fd
->loops
[i
].cond_code
== LT_EXPR
2065 && integer_onep (fd
->loops
[i
].step
)
2066 && !POINTER_TYPE_P (TREE_TYPE (fd
->loops
[i
].v
)));
2068 tree s
= fold_convert_loc (loc
, itype
, step
? step
: fd
->loops
[i
].step
);
2071 off
= fold_convert_loc (loc
, itype
, off
);
2073 off
= fold_build2_loc (loc
, TRUNC_DIV_EXPR
, itype
, off
, s
);
2076 if (integer_zerop (off
))
2077 t
= boolean_true_node
;
2081 tree co
= fold_convert_loc (loc
, itype
, off
);
2082 if (POINTER_TYPE_P (TREE_TYPE (fd
->loops
[i
].v
)))
2084 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps
))
2085 co
= fold_build1_loc (loc
, NEGATE_EXPR
, itype
, co
);
2086 a
= fold_build2_loc (loc
, POINTER_PLUS_EXPR
,
2087 TREE_TYPE (fd
->loops
[i
].v
), fd
->loops
[i
].v
,
2090 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps
))
2091 a
= fold_build2_loc (loc
, MINUS_EXPR
, TREE_TYPE (fd
->loops
[i
].v
),
2092 fd
->loops
[i
].v
, co
);
2094 a
= fold_build2_loc (loc
, PLUS_EXPR
, TREE_TYPE (fd
->loops
[i
].v
),
2095 fd
->loops
[i
].v
, co
);
2099 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps
))
2100 t1
= fold_build2_loc (loc
, GE_EXPR
, boolean_type_node
, a
,
2103 t1
= fold_build2_loc (loc
, LT_EXPR
, boolean_type_node
, a
,
2105 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps
))
2106 t2
= fold_build2_loc (loc
, LT_EXPR
, boolean_type_node
, a
,
2109 t2
= fold_build2_loc (loc
, GE_EXPR
, boolean_type_node
, a
,
2111 t
= fold_build2_loc (loc
, LT_EXPR
, boolean_type_node
,
2112 step
, build_int_cst (TREE_TYPE (step
), 0));
2113 if (TREE_CODE (step
) != INTEGER_CST
)
2115 t1
= unshare_expr (t1
);
2116 t1
= force_gimple_operand_gsi (gsi
, t1
, true, NULL_TREE
,
2117 false, GSI_CONTINUE_LINKING
);
2118 t2
= unshare_expr (t2
);
2119 t2
= force_gimple_operand_gsi (gsi
, t2
, true, NULL_TREE
,
2120 false, GSI_CONTINUE_LINKING
);
2122 t
= fold_build3_loc (loc
, COND_EXPR
, boolean_type_node
,
2125 else if (fd
->loops
[i
].cond_code
== LT_EXPR
)
2127 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps
))
2128 t
= fold_build2_loc (loc
, GE_EXPR
, boolean_type_node
, a
,
2131 t
= fold_build2_loc (loc
, LT_EXPR
, boolean_type_node
, a
,
2134 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps
))
2135 t
= fold_build2_loc (loc
, GT_EXPR
, boolean_type_node
, a
,
2138 t
= fold_build2_loc (loc
, LE_EXPR
, boolean_type_node
, a
,
2142 cond
= fold_build2_loc (loc
, BIT_AND_EXPR
, boolean_type_node
, cond
, t
);
2146 off
= fold_convert_loc (loc
, itype
, off
);
2149 || (fd
->loops
[i
].cond_code
== LT_EXPR
2150 ? !integer_onep (fd
->loops
[i
].step
)
2151 : !integer_minus_onep (fd
->loops
[i
].step
)))
2153 if (step
== NULL_TREE
2154 && TYPE_UNSIGNED (itype
)
2155 && fd
->loops
[i
].cond_code
== GT_EXPR
)
2156 t
= fold_build2_loc (loc
, TRUNC_MOD_EXPR
, itype
, off
,
2157 fold_build1_loc (loc
, NEGATE_EXPR
, itype
,
2160 t
= fold_build2_loc (loc
, TRUNC_MOD_EXPR
, itype
,
2161 orig_off
? orig_off
: off
, s
);
2162 t
= fold_build2_loc (loc
, EQ_EXPR
, boolean_type_node
, t
,
2163 build_int_cst (itype
, 0));
2164 if (integer_zerop (t
) && !warned_step
)
2166 warning_at (loc
, 0, "%<depend(sink)%> refers to iteration never "
2167 "in the iteration space");
2170 cond
= fold_build2_loc (loc
, BIT_AND_EXPR
, boolean_type_node
,
2174 if (i
<= fd
->collapse
- 1 && fd
->collapse
> 1)
2180 t
= fold_build2_loc (loc
, MINUS_EXPR
, TREE_TYPE (fd
->loops
[i
].v
),
2181 fd
->loops
[i
].v
, fd
->loops
[i
].n1
);
2182 t
= fold_convert_loc (loc
, fd
->iter_type
, t
);
2185 /* We have divided off by step already earlier. */;
2186 else if (TYPE_UNSIGNED (itype
) && fd
->loops
[i
].cond_code
== GT_EXPR
)
2187 off
= fold_build2_loc (loc
, TRUNC_DIV_EXPR
, itype
, off
,
2188 fold_build1_loc (loc
, NEGATE_EXPR
, itype
,
2191 off
= fold_build2_loc (loc
, TRUNC_DIV_EXPR
, itype
, off
, s
);
2192 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps
))
2193 off
= fold_build1_loc (loc
, NEGATE_EXPR
, itype
, off
);
2194 off
= fold_convert_loc (loc
, fd
->iter_type
, off
);
2195 if (i
<= fd
->collapse
- 1 && fd
->collapse
> 1)
2198 off
= fold_build2_loc (loc
, PLUS_EXPR
, fd
->iter_type
, coff
,
2200 if (i
< fd
->collapse
- 1)
2202 coff
= fold_build2_loc (loc
, MULT_EXPR
, fd
->iter_type
, off
,
2207 off
= unshare_expr (off
);
2208 t
= fold_build2_loc (loc
, PLUS_EXPR
, fd
->iter_type
, t
, off
);
2209 t
= force_gimple_operand_gsi (&gsi2
, t
, true, NULL_TREE
,
2210 true, GSI_SAME_STMT
);
2213 gimple
*g
= gimple_build_call_vec (builtin_decl_explicit (sink_ix
), args
);
2214 gimple_set_location (g
, loc
);
2215 gsi_insert_before (&gsi2
, g
, GSI_SAME_STMT
);
2217 cond
= unshare_expr (cond
);
2218 cond
= force_gimple_operand_gsi (gsi
, cond
, true, NULL_TREE
, false,
2219 GSI_CONTINUE_LINKING
);
2220 gsi_insert_after (gsi
, gimple_build_cond_empty (cond
), GSI_NEW_STMT
);
2221 edge e3
= make_edge (e1
->src
, e2
->dest
, EDGE_FALSE_VALUE
);
2222 e3
->probability
= profile_probability::guessed_always ().apply_scale (1, 8);
2223 e1
->probability
= e3
->probability
.invert ();
2224 e1
->flags
= EDGE_TRUE_VALUE
;
2225 set_immediate_dominator (CDI_DOMINATORS
, e2
->dest
, e1
->src
);
2227 *gsi
= gsi_after_labels (e2
->dest
);
2230 /* Expand all #pragma omp ordered depend(source) and
2231 #pragma omp ordered depend(sink:...) constructs in the current
2232 #pragma omp for ordered(n) region. */
2235 expand_omp_ordered_source_sink (struct omp_region
*region
,
2236 struct omp_for_data
*fd
, tree
*counts
,
2237 basic_block cont_bb
)
2239 struct omp_region
*inner
;
2241 for (i
= fd
->collapse
- 1; i
< fd
->ordered
; i
++)
2242 if (i
== fd
->collapse
- 1 && fd
->collapse
> 1)
2243 counts
[i
] = NULL_TREE
;
2244 else if (i
>= fd
->collapse
&& !cont_bb
)
2245 counts
[i
] = build_zero_cst (fd
->iter_type
);
2246 else if (!POINTER_TYPE_P (TREE_TYPE (fd
->loops
[i
].v
))
2247 && integer_onep (fd
->loops
[i
].step
))
2248 counts
[i
] = NULL_TREE
;
2250 counts
[i
] = create_tmp_var (fd
->iter_type
, ".orditer");
2252 = build_array_type_nelts (fd
->iter_type
, fd
->ordered
- fd
->collapse
+ 1);
2253 counts
[fd
->ordered
] = create_tmp_var (atype
, ".orditera");
2254 TREE_ADDRESSABLE (counts
[fd
->ordered
]) = 1;
2256 for (inner
= region
->inner
; inner
; inner
= inner
->next
)
2257 if (inner
->type
== GIMPLE_OMP_ORDERED
)
2259 gomp_ordered
*ord_stmt
= inner
->ord_stmt
;
2260 gimple_stmt_iterator gsi
= gsi_for_stmt (ord_stmt
);
2261 location_t loc
= gimple_location (ord_stmt
);
2263 for (c
= gimple_omp_ordered_clauses (ord_stmt
);
2264 c
; c
= OMP_CLAUSE_CHAIN (c
))
2265 if (OMP_CLAUSE_DEPEND_KIND (c
) == OMP_CLAUSE_DEPEND_SOURCE
)
2268 expand_omp_ordered_source (&gsi
, fd
, counts
, loc
);
2269 for (c
= gimple_omp_ordered_clauses (ord_stmt
);
2270 c
; c
= OMP_CLAUSE_CHAIN (c
))
2271 if (OMP_CLAUSE_DEPEND_KIND (c
) == OMP_CLAUSE_DEPEND_SINK
)
2272 expand_omp_ordered_sink (&gsi
, fd
, counts
, c
, loc
);
2273 gsi_remove (&gsi
, true);
2277 /* Wrap the body into fd->ordered - fd->collapse loops that aren't
2281 expand_omp_for_ordered_loops (struct omp_for_data
*fd
, tree
*counts
,
2282 basic_block cont_bb
, basic_block body_bb
,
2283 bool ordered_lastprivate
)
2285 if (fd
->ordered
== fd
->collapse
)
2290 gimple_stmt_iterator gsi
= gsi_after_labels (body_bb
);
2291 for (int i
= fd
->collapse
; i
< fd
->ordered
; i
++)
2293 tree type
= TREE_TYPE (fd
->loops
[i
].v
);
2294 tree n1
= fold_convert (type
, fd
->loops
[i
].n1
);
2295 expand_omp_build_assign (&gsi
, fd
->loops
[i
].v
, n1
);
2296 tree aref
= build4 (ARRAY_REF
, fd
->iter_type
, counts
[fd
->ordered
],
2297 size_int (i
- fd
->collapse
+ 1),
2298 NULL_TREE
, NULL_TREE
);
2299 expand_omp_build_assign (&gsi
, aref
, build_zero_cst (fd
->iter_type
));
2304 for (int i
= fd
->ordered
- 1; i
>= fd
->collapse
; i
--)
2306 tree t
, type
= TREE_TYPE (fd
->loops
[i
].v
);
2307 gimple_stmt_iterator gsi
= gsi_after_labels (body_bb
);
2308 expand_omp_build_assign (&gsi
, fd
->loops
[i
].v
,
2309 fold_convert (type
, fd
->loops
[i
].n1
));
2311 expand_omp_build_assign (&gsi
, counts
[i
],
2312 build_zero_cst (fd
->iter_type
));
2313 tree aref
= build4 (ARRAY_REF
, fd
->iter_type
, counts
[fd
->ordered
],
2314 size_int (i
- fd
->collapse
+ 1),
2315 NULL_TREE
, NULL_TREE
);
2316 expand_omp_build_assign (&gsi
, aref
, build_zero_cst (fd
->iter_type
));
2317 if (!gsi_end_p (gsi
))
2320 gsi
= gsi_last_bb (body_bb
);
2321 edge e1
= split_block (body_bb
, gsi_stmt (gsi
));
2322 basic_block new_body
= e1
->dest
;
2323 if (body_bb
== cont_bb
)
2326 basic_block new_header
;
2327 if (EDGE_COUNT (cont_bb
->preds
) > 0)
2329 gsi
= gsi_last_bb (cont_bb
);
2330 if (POINTER_TYPE_P (type
))
2331 t
= fold_build_pointer_plus (fd
->loops
[i
].v
,
2332 fold_convert (sizetype
,
2333 fd
->loops
[i
].step
));
2335 t
= fold_build2 (PLUS_EXPR
, type
, fd
->loops
[i
].v
,
2336 fold_convert (type
, fd
->loops
[i
].step
));
2337 expand_omp_build_assign (&gsi
, fd
->loops
[i
].v
, t
);
2340 t
= fold_build2 (PLUS_EXPR
, fd
->iter_type
, counts
[i
],
2341 build_int_cst (fd
->iter_type
, 1));
2342 expand_omp_build_assign (&gsi
, counts
[i
], t
);
2347 t
= fold_build2 (MINUS_EXPR
, TREE_TYPE (fd
->loops
[i
].v
),
2348 fd
->loops
[i
].v
, fd
->loops
[i
].n1
);
2349 t
= fold_convert (fd
->iter_type
, t
);
2350 t
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
2351 true, GSI_SAME_STMT
);
2353 aref
= build4 (ARRAY_REF
, fd
->iter_type
, counts
[fd
->ordered
],
2354 size_int (i
- fd
->collapse
+ 1),
2355 NULL_TREE
, NULL_TREE
);
2356 expand_omp_build_assign (&gsi
, aref
, t
);
2358 e2
= split_block (cont_bb
, gsi_stmt (gsi
));
2359 new_header
= e2
->dest
;
2362 new_header
= cont_bb
;
2363 gsi
= gsi_after_labels (new_header
);
2364 tree v
= force_gimple_operand_gsi (&gsi
, fd
->loops
[i
].v
, true, NULL_TREE
,
2365 true, GSI_SAME_STMT
);
2367 = force_gimple_operand_gsi (&gsi
, fold_convert (type
, fd
->loops
[i
].n2
),
2368 true, NULL_TREE
, true, GSI_SAME_STMT
);
2369 t
= build2 (fd
->loops
[i
].cond_code
, boolean_type_node
, v
, n2
);
2370 gsi_insert_before (&gsi
, gimple_build_cond_empty (t
), GSI_NEW_STMT
);
2371 edge e3
= split_block (new_header
, gsi_stmt (gsi
));
2374 make_edge (body_bb
, new_header
, EDGE_FALLTHRU
);
2375 e3
->flags
= EDGE_FALSE_VALUE
;
2376 e3
->probability
= profile_probability::guessed_always ().apply_scale (1, 8);
2377 e1
= make_edge (new_header
, new_body
, EDGE_TRUE_VALUE
);
2378 e1
->probability
= e3
->probability
.invert ();
2380 set_immediate_dominator (CDI_DOMINATORS
, new_header
, body_bb
);
2381 set_immediate_dominator (CDI_DOMINATORS
, new_body
, new_header
);
2385 struct loop
*loop
= alloc_loop ();
2386 loop
->header
= new_header
;
2387 loop
->latch
= e2
->src
;
2388 add_loop (loop
, body_bb
->loop_father
);
2392 /* If there are any lastprivate clauses and it is possible some loops
2393 might have zero iterations, ensure all the decls are initialized,
2394 otherwise we could crash evaluating C++ class iterators with lastprivate
2396 bool need_inits
= false;
2397 for (int i
= fd
->collapse
; ordered_lastprivate
&& i
< fd
->ordered
; i
++)
2400 tree type
= TREE_TYPE (fd
->loops
[i
].v
);
2401 gimple_stmt_iterator gsi
= gsi_after_labels (body_bb
);
2402 expand_omp_build_assign (&gsi
, fd
->loops
[i
].v
,
2403 fold_convert (type
, fd
->loops
[i
].n1
));
2407 tree type
= TREE_TYPE (fd
->loops
[i
].v
);
2408 tree this_cond
= fold_build2 (fd
->loops
[i
].cond_code
,
2410 fold_convert (type
, fd
->loops
[i
].n1
),
2411 fold_convert (type
, fd
->loops
[i
].n2
));
2412 if (!integer_onep (this_cond
))
2419 /* A subroutine of expand_omp_for. Generate code for a parallel
2420 loop with any schedule. Given parameters:
2422 for (V = N1; V cond N2; V += STEP) BODY;
2424 where COND is "<" or ">", we generate pseudocode
2426 more = GOMP_loop_foo_start (N1, N2, STEP, CHUNK, &istart0, &iend0);
2427 if (more) goto L0; else goto L3;
2434 if (V cond iend) goto L1; else goto L2;
2436 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
2439 If this is a combined omp parallel loop, instead of the call to
2440 GOMP_loop_foo_start, we call GOMP_loop_foo_next.
2441 If this is gimple_omp_for_combined_p loop, then instead of assigning
2442 V and iend in L0 we assign the first two _looptemp_ clause decls of the
2443 inner GIMPLE_OMP_FOR and V += STEP; and
2444 if (V cond iend) goto L1; else goto L2; are removed.
2446 For collapsed loops, given parameters:
2448 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
2449 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
2450 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
2453 we generate pseudocode
2455 if (__builtin_expect (N32 cond3 N31, 0)) goto Z0;
2460 count3 = (adj + N32 - N31) / STEP3;
2461 if (__builtin_expect (N22 cond2 N21, 0)) goto Z0;
2466 count2 = (adj + N22 - N21) / STEP2;
2467 if (__builtin_expect (N12 cond1 N11, 0)) goto Z0;
2472 count1 = (adj + N12 - N11) / STEP1;
2473 count = count1 * count2 * count3;
2478 more = GOMP_loop_foo_start (0, count, 1, CHUNK, &istart0, &iend0);
2479 if (more) goto L0; else goto L3;
2483 V3 = N31 + (T % count3) * STEP3;
2485 V2 = N21 + (T % count2) * STEP2;
2487 V1 = N11 + T * STEP1;
2492 if (V < iend) goto L10; else goto L2;
2495 if (V3 cond3 N32) goto L1; else goto L11;
2499 if (V2 cond2 N22) goto L1; else goto L12;
2505 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
2511 expand_omp_for_generic (struct omp_region
*region
,
2512 struct omp_for_data
*fd
,
2513 enum built_in_function start_fn
,
2514 enum built_in_function next_fn
,
2517 tree type
, istart0
, iend0
, iend
;
2518 tree t
, vmain
, vback
, bias
= NULL_TREE
;
2519 basic_block entry_bb
, cont_bb
, exit_bb
, l0_bb
, l1_bb
, collapse_bb
;
2520 basic_block l2_bb
= NULL
, l3_bb
= NULL
;
2521 gimple_stmt_iterator gsi
;
2522 gassign
*assign_stmt
;
2523 bool in_combined_parallel
= is_combined_parallel (region
);
2524 bool broken_loop
= region
->cont
== NULL
;
2526 tree
*counts
= NULL
;
2528 bool ordered_lastprivate
= false;
2530 gcc_assert (!broken_loop
|| !in_combined_parallel
);
2531 gcc_assert (fd
->iter_type
== long_integer_type_node
2532 || !in_combined_parallel
);
2534 entry_bb
= region
->entry
;
2535 cont_bb
= region
->cont
;
2537 gcc_assert (EDGE_COUNT (entry_bb
->succs
) == 2);
2538 gcc_assert (broken_loop
2539 || BRANCH_EDGE (entry_bb
)->dest
== FALLTHRU_EDGE (cont_bb
)->dest
);
2540 l0_bb
= split_edge (FALLTHRU_EDGE (entry_bb
));
2541 l1_bb
= single_succ (l0_bb
);
2544 l2_bb
= create_empty_bb (cont_bb
);
2545 gcc_assert (BRANCH_EDGE (cont_bb
)->dest
== l1_bb
2546 || (single_succ_edge (BRANCH_EDGE (cont_bb
)->dest
)->dest
2548 gcc_assert (EDGE_COUNT (cont_bb
->succs
) == 2);
2552 l3_bb
= BRANCH_EDGE (entry_bb
)->dest
;
2553 exit_bb
= region
->exit
;
2555 gsi
= gsi_last_bb (entry_bb
);
2557 gcc_assert (gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_FOR
);
2559 && omp_find_clause (gimple_omp_for_clauses (gsi_stmt (gsi
)),
2560 OMP_CLAUSE_LASTPRIVATE
))
2561 ordered_lastprivate
= false;
2562 if (fd
->collapse
> 1 || fd
->ordered
)
2564 int first_zero_iter1
= -1, first_zero_iter2
= -1;
2565 basic_block zero_iter1_bb
= NULL
, zero_iter2_bb
= NULL
, l2_dom_bb
= NULL
;
2567 counts
= XALLOCAVEC (tree
, fd
->ordered
? fd
->ordered
+ 1 : fd
->collapse
);
2568 expand_omp_for_init_counts (fd
, &gsi
, entry_bb
, counts
,
2569 zero_iter1_bb
, first_zero_iter1
,
2570 zero_iter2_bb
, first_zero_iter2
, l2_dom_bb
);
2574 /* Some counts[i] vars might be uninitialized if
2575 some loop has zero iterations. But the body shouldn't
2576 be executed in that case, so just avoid uninit warnings. */
2577 for (i
= first_zero_iter1
;
2578 i
< (fd
->ordered
? fd
->ordered
: fd
->collapse
); i
++)
2579 if (SSA_VAR_P (counts
[i
]))
2580 TREE_NO_WARNING (counts
[i
]) = 1;
2582 e
= split_block (entry_bb
, gsi_stmt (gsi
));
2584 make_edge (zero_iter1_bb
, entry_bb
, EDGE_FALLTHRU
);
2585 gsi
= gsi_last_bb (entry_bb
);
2586 set_immediate_dominator (CDI_DOMINATORS
, entry_bb
,
2587 get_immediate_dominator (CDI_DOMINATORS
,
2592 /* Some counts[i] vars might be uninitialized if
2593 some loop has zero iterations. But the body shouldn't
2594 be executed in that case, so just avoid uninit warnings. */
2595 for (i
= first_zero_iter2
; i
< fd
->ordered
; i
++)
2596 if (SSA_VAR_P (counts
[i
]))
2597 TREE_NO_WARNING (counts
[i
]) = 1;
2599 make_edge (zero_iter2_bb
, entry_bb
, EDGE_FALLTHRU
);
2603 e
= split_block (entry_bb
, gsi_stmt (gsi
));
2605 make_edge (zero_iter2_bb
, entry_bb
, EDGE_FALLTHRU
);
2606 gsi
= gsi_last_bb (entry_bb
);
2607 set_immediate_dominator (CDI_DOMINATORS
, entry_bb
,
2608 get_immediate_dominator
2609 (CDI_DOMINATORS
, zero_iter2_bb
));
2612 if (fd
->collapse
== 1)
2614 counts
[0] = fd
->loop
.n2
;
2615 fd
->loop
= fd
->loops
[0];
2619 type
= TREE_TYPE (fd
->loop
.v
);
2620 istart0
= create_tmp_var (fd
->iter_type
, ".istart0");
2621 iend0
= create_tmp_var (fd
->iter_type
, ".iend0");
2622 TREE_ADDRESSABLE (istart0
) = 1;
2623 TREE_ADDRESSABLE (iend0
) = 1;
2625 /* See if we need to bias by LLONG_MIN. */
2626 if (fd
->iter_type
== long_long_unsigned_type_node
2627 && TREE_CODE (type
) == INTEGER_TYPE
2628 && !TYPE_UNSIGNED (type
)
2629 && fd
->ordered
== 0)
2633 if (fd
->loop
.cond_code
== LT_EXPR
)
2636 n2
= fold_build2 (PLUS_EXPR
, type
, fd
->loop
.n2
, fd
->loop
.step
);
2640 n1
= fold_build2 (MINUS_EXPR
, type
, fd
->loop
.n2
, fd
->loop
.step
);
2643 if (TREE_CODE (n1
) != INTEGER_CST
2644 || TREE_CODE (n2
) != INTEGER_CST
2645 || ((tree_int_cst_sgn (n1
) < 0) ^ (tree_int_cst_sgn (n2
) < 0)))
2646 bias
= fold_convert (fd
->iter_type
, TYPE_MIN_VALUE (type
));
2649 gimple_stmt_iterator gsif
= gsi
;
2652 tree arr
= NULL_TREE
;
2653 if (in_combined_parallel
)
2655 gcc_assert (fd
->ordered
== 0);
2656 /* In a combined parallel loop, emit a call to
2657 GOMP_loop_foo_next. */
2658 t
= build_call_expr (builtin_decl_explicit (next_fn
), 2,
2659 build_fold_addr_expr (istart0
),
2660 build_fold_addr_expr (iend0
));
2664 tree t0
, t1
, t2
, t3
, t4
;
2665 /* If this is not a combined parallel loop, emit a call to
2666 GOMP_loop_foo_start in ENTRY_BB. */
2667 t4
= build_fold_addr_expr (iend0
);
2668 t3
= build_fold_addr_expr (istart0
);
2671 t0
= build_int_cst (unsigned_type_node
,
2672 fd
->ordered
- fd
->collapse
+ 1);
2673 arr
= create_tmp_var (build_array_type_nelts (fd
->iter_type
,
2675 - fd
->collapse
+ 1),
2677 DECL_NAMELESS (arr
) = 1;
2678 TREE_ADDRESSABLE (arr
) = 1;
2679 TREE_STATIC (arr
) = 1;
2680 vec
<constructor_elt
, va_gc
> *v
;
2681 vec_alloc (v
, fd
->ordered
- fd
->collapse
+ 1);
2684 for (idx
= 0; idx
< fd
->ordered
- fd
->collapse
+ 1; idx
++)
2687 if (idx
== 0 && fd
->collapse
> 1)
2690 c
= counts
[idx
+ fd
->collapse
- 1];
2691 tree purpose
= size_int (idx
);
2692 CONSTRUCTOR_APPEND_ELT (v
, purpose
, c
);
2693 if (TREE_CODE (c
) != INTEGER_CST
)
2694 TREE_STATIC (arr
) = 0;
2697 DECL_INITIAL (arr
) = build_constructor (TREE_TYPE (arr
), v
);
2698 if (!TREE_STATIC (arr
))
2699 force_gimple_operand_gsi (&gsi
, build1 (DECL_EXPR
,
2700 void_type_node
, arr
),
2701 true, NULL_TREE
, true, GSI_SAME_STMT
);
2702 t1
= build_fold_addr_expr (arr
);
2707 t2
= fold_convert (fd
->iter_type
, fd
->loop
.step
);
2710 if (gimple_omp_for_combined_into_p (fd
->for_stmt
))
2713 = omp_find_clause (gimple_omp_for_clauses (fd
->for_stmt
),
2714 OMP_CLAUSE__LOOPTEMP_
);
2715 gcc_assert (innerc
);
2716 t0
= OMP_CLAUSE_DECL (innerc
);
2717 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
2718 OMP_CLAUSE__LOOPTEMP_
);
2719 gcc_assert (innerc
);
2720 t1
= OMP_CLAUSE_DECL (innerc
);
2722 if (POINTER_TYPE_P (TREE_TYPE (t0
))
2723 && TYPE_PRECISION (TREE_TYPE (t0
))
2724 != TYPE_PRECISION (fd
->iter_type
))
2726 /* Avoid casting pointers to integer of a different size. */
2727 tree itype
= signed_type_for (type
);
2728 t1
= fold_convert (fd
->iter_type
, fold_convert (itype
, t1
));
2729 t0
= fold_convert (fd
->iter_type
, fold_convert (itype
, t0
));
2733 t1
= fold_convert (fd
->iter_type
, t1
);
2734 t0
= fold_convert (fd
->iter_type
, t0
);
2738 t1
= fold_build2 (PLUS_EXPR
, fd
->iter_type
, t1
, bias
);
2739 t0
= fold_build2 (PLUS_EXPR
, fd
->iter_type
, t0
, bias
);
2742 if (fd
->iter_type
== long_integer_type_node
|| fd
->ordered
)
2746 t
= fold_convert (fd
->iter_type
, fd
->chunk_size
);
2747 t
= omp_adjust_chunk_size (t
, fd
->simd_schedule
);
2749 t
= build_call_expr (builtin_decl_explicit (start_fn
),
2750 5, t0
, t1
, t
, t3
, t4
);
2752 t
= build_call_expr (builtin_decl_explicit (start_fn
),
2753 6, t0
, t1
, t2
, t
, t3
, t4
);
2755 else if (fd
->ordered
)
2756 t
= build_call_expr (builtin_decl_explicit (start_fn
),
2759 t
= build_call_expr (builtin_decl_explicit (start_fn
),
2760 5, t0
, t1
, t2
, t3
, t4
);
2768 /* The GOMP_loop_ull_*start functions have additional boolean
2769 argument, true for < loops and false for > loops.
2770 In Fortran, the C bool type can be different from
2771 boolean_type_node. */
2772 bfn_decl
= builtin_decl_explicit (start_fn
);
2773 c_bool_type
= TREE_TYPE (TREE_TYPE (bfn_decl
));
2774 t5
= build_int_cst (c_bool_type
,
2775 fd
->loop
.cond_code
== LT_EXPR
? 1 : 0);
2778 tree bfn_decl
= builtin_decl_explicit (start_fn
);
2779 t
= fold_convert (fd
->iter_type
, fd
->chunk_size
);
2780 t
= omp_adjust_chunk_size (t
, fd
->simd_schedule
);
2781 t
= build_call_expr (bfn_decl
, 7, t5
, t0
, t1
, t2
, t
, t3
, t4
);
2784 t
= build_call_expr (builtin_decl_explicit (start_fn
),
2785 6, t5
, t0
, t1
, t2
, t3
, t4
);
2788 if (TREE_TYPE (t
) != boolean_type_node
)
2789 t
= fold_build2 (NE_EXPR
, boolean_type_node
,
2790 t
, build_int_cst (TREE_TYPE (t
), 0));
2791 t
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
2792 true, GSI_SAME_STMT
);
2793 if (arr
&& !TREE_STATIC (arr
))
2795 tree clobber
= build_constructor (TREE_TYPE (arr
), NULL
);
2796 TREE_THIS_VOLATILE (clobber
) = 1;
2797 gsi_insert_before (&gsi
, gimple_build_assign (arr
, clobber
),
2800 gsi_insert_after (&gsi
, gimple_build_cond_empty (t
), GSI_SAME_STMT
);
2802 /* Remove the GIMPLE_OMP_FOR statement. */
2803 gsi_remove (&gsi
, true);
2805 if (gsi_end_p (gsif
))
2806 gsif
= gsi_after_labels (gsi_bb (gsif
));
2809 /* Iteration setup for sequential loop goes in L0_BB. */
2810 tree startvar
= fd
->loop
.v
;
2811 tree endvar
= NULL_TREE
;
2813 if (gimple_omp_for_combined_p (fd
->for_stmt
))
2815 gcc_assert (gimple_code (inner_stmt
) == GIMPLE_OMP_FOR
2816 && gimple_omp_for_kind (inner_stmt
)
2817 == GF_OMP_FOR_KIND_SIMD
);
2818 tree innerc
= omp_find_clause (gimple_omp_for_clauses (inner_stmt
),
2819 OMP_CLAUSE__LOOPTEMP_
);
2820 gcc_assert (innerc
);
2821 startvar
= OMP_CLAUSE_DECL (innerc
);
2822 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
2823 OMP_CLAUSE__LOOPTEMP_
);
2824 gcc_assert (innerc
);
2825 endvar
= OMP_CLAUSE_DECL (innerc
);
2828 gsi
= gsi_start_bb (l0_bb
);
2830 if (fd
->ordered
&& fd
->collapse
== 1)
2831 t
= fold_build2 (MULT_EXPR
, fd
->iter_type
, t
,
2832 fold_convert (fd
->iter_type
, fd
->loop
.step
));
2834 t
= fold_build2 (MINUS_EXPR
, fd
->iter_type
, t
, bias
);
2835 if (fd
->ordered
&& fd
->collapse
== 1)
2837 if (POINTER_TYPE_P (TREE_TYPE (startvar
)))
2838 t
= fold_build2 (POINTER_PLUS_EXPR
, TREE_TYPE (startvar
),
2839 fd
->loop
.n1
, fold_convert (sizetype
, t
));
2842 t
= fold_convert (TREE_TYPE (startvar
), t
);
2843 t
= fold_build2 (PLUS_EXPR
, TREE_TYPE (startvar
),
2849 if (POINTER_TYPE_P (TREE_TYPE (startvar
)))
2850 t
= fold_convert (signed_type_for (TREE_TYPE (startvar
)), t
);
2851 t
= fold_convert (TREE_TYPE (startvar
), t
);
2853 t
= force_gimple_operand_gsi (&gsi
, t
,
2855 && TREE_ADDRESSABLE (startvar
),
2856 NULL_TREE
, false, GSI_CONTINUE_LINKING
);
2857 assign_stmt
= gimple_build_assign (startvar
, t
);
2858 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
2861 if (fd
->ordered
&& fd
->collapse
== 1)
2862 t
= fold_build2 (MULT_EXPR
, fd
->iter_type
, t
,
2863 fold_convert (fd
->iter_type
, fd
->loop
.step
));
2865 t
= fold_build2 (MINUS_EXPR
, fd
->iter_type
, t
, bias
);
2866 if (fd
->ordered
&& fd
->collapse
== 1)
2868 if (POINTER_TYPE_P (TREE_TYPE (startvar
)))
2869 t
= fold_build2 (POINTER_PLUS_EXPR
, TREE_TYPE (startvar
),
2870 fd
->loop
.n1
, fold_convert (sizetype
, t
));
2873 t
= fold_convert (TREE_TYPE (startvar
), t
);
2874 t
= fold_build2 (PLUS_EXPR
, TREE_TYPE (startvar
),
2880 if (POINTER_TYPE_P (TREE_TYPE (startvar
)))
2881 t
= fold_convert (signed_type_for (TREE_TYPE (startvar
)), t
);
2882 t
= fold_convert (TREE_TYPE (startvar
), t
);
2884 iend
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
2885 false, GSI_CONTINUE_LINKING
);
2888 assign_stmt
= gimple_build_assign (endvar
, iend
);
2889 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
2890 if (useless_type_conversion_p (TREE_TYPE (fd
->loop
.v
), TREE_TYPE (iend
)))
2891 assign_stmt
= gimple_build_assign (fd
->loop
.v
, iend
);
2893 assign_stmt
= gimple_build_assign (fd
->loop
.v
, NOP_EXPR
, iend
);
2894 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
2896 /* Handle linear clause adjustments. */
2897 tree itercnt
= NULL_TREE
;
2898 if (gimple_omp_for_kind (fd
->for_stmt
) == GF_OMP_FOR_KIND_FOR
)
2899 for (tree c
= gimple_omp_for_clauses (fd
->for_stmt
);
2900 c
; c
= OMP_CLAUSE_CHAIN (c
))
2901 if (OMP_CLAUSE_CODE (c
) == OMP_CLAUSE_LINEAR
2902 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c
))
2904 tree d
= OMP_CLAUSE_DECL (c
);
2905 bool is_ref
= omp_is_reference (d
);
2906 tree t
= d
, a
, dest
;
2908 t
= build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c
), t
);
2909 tree type
= TREE_TYPE (t
);
2910 if (POINTER_TYPE_P (type
))
2912 dest
= unshare_expr (t
);
2913 tree v
= create_tmp_var (TREE_TYPE (t
), NULL
);
2914 expand_omp_build_assign (&gsif
, v
, t
);
2915 if (itercnt
== NULL_TREE
)
2918 tree n1
= fd
->loop
.n1
;
2919 if (POINTER_TYPE_P (TREE_TYPE (itercnt
)))
2922 = fold_convert (signed_type_for (TREE_TYPE (itercnt
)),
2924 n1
= fold_convert (TREE_TYPE (itercnt
), n1
);
2926 itercnt
= fold_build2 (MINUS_EXPR
, TREE_TYPE (itercnt
),
2928 itercnt
= fold_build2 (EXACT_DIV_EXPR
, TREE_TYPE (itercnt
),
2929 itercnt
, fd
->loop
.step
);
2930 itercnt
= force_gimple_operand_gsi (&gsi
, itercnt
, true,
2932 GSI_CONTINUE_LINKING
);
2934 a
= fold_build2 (MULT_EXPR
, type
,
2935 fold_convert (type
, itercnt
),
2936 fold_convert (type
, OMP_CLAUSE_LINEAR_STEP (c
)));
2937 t
= fold_build2 (type
== TREE_TYPE (t
) ? PLUS_EXPR
2938 : POINTER_PLUS_EXPR
, TREE_TYPE (t
), v
, a
);
2939 t
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
2940 false, GSI_CONTINUE_LINKING
);
2941 assign_stmt
= gimple_build_assign (dest
, t
);
2942 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
2944 if (fd
->collapse
> 1)
2945 expand_omp_for_init_vars (fd
, &gsi
, counts
, inner_stmt
, startvar
);
2949 /* Until now, counts array contained number of iterations or
2950 variable containing it for ith loop. From now on, we need
2951 those counts only for collapsed loops, and only for the 2nd
2952 till the last collapsed one. Move those one element earlier,
2953 we'll use counts[fd->collapse - 1] for the first source/sink
2954 iteration counter and so on and counts[fd->ordered]
2955 as the array holding the current counter values for
2957 if (fd
->collapse
> 1)
2958 memmove (counts
, counts
+ 1, (fd
->collapse
- 1) * sizeof (counts
[0]));
2962 for (i
= fd
->collapse
; i
< fd
->ordered
; i
++)
2964 tree type
= TREE_TYPE (fd
->loops
[i
].v
);
2966 = fold_build2 (fd
->loops
[i
].cond_code
, boolean_type_node
,
2967 fold_convert (type
, fd
->loops
[i
].n1
),
2968 fold_convert (type
, fd
->loops
[i
].n2
));
2969 if (!integer_onep (this_cond
))
2972 if (i
< fd
->ordered
)
2975 = create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun
)->prev_bb
);
2976 add_bb_to_loop (cont_bb
, l1_bb
->loop_father
);
2977 gimple_stmt_iterator gsi
= gsi_after_labels (cont_bb
);
2978 gimple
*g
= gimple_build_omp_continue (fd
->loop
.v
, fd
->loop
.v
);
2979 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
2980 make_edge (cont_bb
, l3_bb
, EDGE_FALLTHRU
);
2981 make_edge (cont_bb
, l1_bb
, 0);
2982 l2_bb
= create_empty_bb (cont_bb
);
2983 broken_loop
= false;
2986 expand_omp_ordered_source_sink (region
, fd
, counts
, cont_bb
);
2987 cont_bb
= expand_omp_for_ordered_loops (fd
, counts
, cont_bb
, l1_bb
,
2988 ordered_lastprivate
);
2989 if (counts
[fd
->collapse
- 1])
2991 gcc_assert (fd
->collapse
== 1);
2992 gsi
= gsi_last_bb (l0_bb
);
2993 expand_omp_build_assign (&gsi
, counts
[fd
->collapse
- 1],
2995 gsi
= gsi_last_bb (cont_bb
);
2996 t
= fold_build2 (PLUS_EXPR
, fd
->iter_type
, counts
[fd
->collapse
- 1],
2997 build_int_cst (fd
->iter_type
, 1));
2998 expand_omp_build_assign (&gsi
, counts
[fd
->collapse
- 1], t
);
2999 tree aref
= build4 (ARRAY_REF
, fd
->iter_type
, counts
[fd
->ordered
],
3000 size_zero_node
, NULL_TREE
, NULL_TREE
);
3001 expand_omp_build_assign (&gsi
, aref
, counts
[fd
->collapse
- 1]);
3002 t
= counts
[fd
->collapse
- 1];
3004 else if (fd
->collapse
> 1)
3008 t
= fold_build2 (MINUS_EXPR
, TREE_TYPE (fd
->loops
[0].v
),
3009 fd
->loops
[0].v
, fd
->loops
[0].n1
);
3010 t
= fold_convert (fd
->iter_type
, t
);
3012 gsi
= gsi_last_bb (l0_bb
);
3013 tree aref
= build4 (ARRAY_REF
, fd
->iter_type
, counts
[fd
->ordered
],
3014 size_zero_node
, NULL_TREE
, NULL_TREE
);
3015 t
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
3016 false, GSI_CONTINUE_LINKING
);
3017 expand_omp_build_assign (&gsi
, aref
, t
, true);
3022 /* Code to control the increment and predicate for the sequential
3023 loop goes in the CONT_BB. */
3024 gsi
= gsi_last_bb (cont_bb
);
3025 gomp_continue
*cont_stmt
= as_a
<gomp_continue
*> (gsi_stmt (gsi
));
3026 gcc_assert (gimple_code (cont_stmt
) == GIMPLE_OMP_CONTINUE
);
3027 vmain
= gimple_omp_continue_control_use (cont_stmt
);
3028 vback
= gimple_omp_continue_control_def (cont_stmt
);
3030 if (!gimple_omp_for_combined_p (fd
->for_stmt
))
3032 if (POINTER_TYPE_P (type
))
3033 t
= fold_build_pointer_plus (vmain
, fd
->loop
.step
);
3035 t
= fold_build2 (PLUS_EXPR
, type
, vmain
, fd
->loop
.step
);
3036 t
= force_gimple_operand_gsi (&gsi
, t
,
3038 && TREE_ADDRESSABLE (vback
),
3039 NULL_TREE
, true, GSI_SAME_STMT
);
3040 assign_stmt
= gimple_build_assign (vback
, t
);
3041 gsi_insert_before (&gsi
, assign_stmt
, GSI_SAME_STMT
);
3043 if (fd
->ordered
&& counts
[fd
->collapse
- 1] == NULL_TREE
)
3045 if (fd
->collapse
> 1)
3049 t
= fold_build2 (MINUS_EXPR
, TREE_TYPE (fd
->loops
[0].v
),
3050 fd
->loops
[0].v
, fd
->loops
[0].n1
);
3051 t
= fold_convert (fd
->iter_type
, t
);
3053 tree aref
= build4 (ARRAY_REF
, fd
->iter_type
,
3054 counts
[fd
->ordered
], size_zero_node
,
3055 NULL_TREE
, NULL_TREE
);
3056 t
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
3057 true, GSI_SAME_STMT
);
3058 expand_omp_build_assign (&gsi
, aref
, t
);
3061 t
= build2 (fd
->loop
.cond_code
, boolean_type_node
,
3062 DECL_P (vback
) && TREE_ADDRESSABLE (vback
) ? t
: vback
,
3064 gcond
*cond_stmt
= gimple_build_cond_empty (t
);
3065 gsi_insert_before (&gsi
, cond_stmt
, GSI_SAME_STMT
);
3068 /* Remove GIMPLE_OMP_CONTINUE. */
3069 gsi_remove (&gsi
, true);
3071 if (fd
->collapse
> 1 && !gimple_omp_for_combined_p (fd
->for_stmt
))
3072 collapse_bb
= extract_omp_for_update_vars (fd
, cont_bb
, l1_bb
);
3074 /* Emit code to get the next parallel iteration in L2_BB. */
3075 gsi
= gsi_start_bb (l2_bb
);
3077 t
= build_call_expr (builtin_decl_explicit (next_fn
), 2,
3078 build_fold_addr_expr (istart0
),
3079 build_fold_addr_expr (iend0
));
3080 t
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
3081 false, GSI_CONTINUE_LINKING
);
3082 if (TREE_TYPE (t
) != boolean_type_node
)
3083 t
= fold_build2 (NE_EXPR
, boolean_type_node
,
3084 t
, build_int_cst (TREE_TYPE (t
), 0));
3085 gcond
*cond_stmt
= gimple_build_cond_empty (t
);
3086 gsi_insert_after (&gsi
, cond_stmt
, GSI_CONTINUE_LINKING
);
3089 /* Add the loop cleanup function. */
3090 gsi
= gsi_last_bb (exit_bb
);
3091 if (gimple_omp_return_nowait_p (gsi_stmt (gsi
)))
3092 t
= builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT
);
3093 else if (gimple_omp_return_lhs (gsi_stmt (gsi
)))
3094 t
= builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL
);
3096 t
= builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END
);
3097 gcall
*call_stmt
= gimple_build_call (t
, 0);
3098 if (gimple_omp_return_lhs (gsi_stmt (gsi
)))
3099 gimple_call_set_lhs (call_stmt
, gimple_omp_return_lhs (gsi_stmt (gsi
)));
3100 gsi_insert_after (&gsi
, call_stmt
, GSI_SAME_STMT
);
3103 tree arr
= counts
[fd
->ordered
];
3104 tree clobber
= build_constructor (TREE_TYPE (arr
), NULL
);
3105 TREE_THIS_VOLATILE (clobber
) = 1;
3106 gsi_insert_after (&gsi
, gimple_build_assign (arr
, clobber
),
3109 gsi_remove (&gsi
, true);
3111 /* Connect the new blocks. */
3112 find_edge (entry_bb
, l0_bb
)->flags
= EDGE_TRUE_VALUE
;
3113 find_edge (entry_bb
, l3_bb
)->flags
= EDGE_FALSE_VALUE
;
3119 e
= find_edge (cont_bb
, l3_bb
);
3120 ne
= make_edge (l2_bb
, l3_bb
, EDGE_FALSE_VALUE
);
3122 phis
= phi_nodes (l3_bb
);
3123 for (gsi
= gsi_start (phis
); !gsi_end_p (gsi
); gsi_next (&gsi
))
3125 gimple
*phi
= gsi_stmt (gsi
);
3126 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi
, ne
),
3127 PHI_ARG_DEF_FROM_EDGE (phi
, e
));
3131 make_edge (cont_bb
, l2_bb
, EDGE_FALSE_VALUE
);
3132 e
= find_edge (cont_bb
, l1_bb
);
3135 e
= BRANCH_EDGE (cont_bb
);
3136 gcc_assert (single_succ (e
->dest
) == l1_bb
);
3138 if (gimple_omp_for_combined_p (fd
->for_stmt
))
3143 else if (fd
->collapse
> 1)
3146 e
= make_edge (cont_bb
, collapse_bb
, EDGE_TRUE_VALUE
);
3149 e
->flags
= EDGE_TRUE_VALUE
;
3152 e
->probability
= profile_probability::guessed_always ().apply_scale (7, 8);
3153 find_edge (cont_bb
, l2_bb
)->probability
= e
->probability
.invert ();
3157 e
= find_edge (cont_bb
, l2_bb
);
3158 e
->flags
= EDGE_FALLTHRU
;
3160 make_edge (l2_bb
, l0_bb
, EDGE_TRUE_VALUE
);
3162 if (gimple_in_ssa_p (cfun
))
3164 /* Add phis to the outer loop that connect to the phis in the inner,
3165 original loop, and move the loop entry value of the inner phi to
3166 the loop entry value of the outer phi. */
3168 for (psi
= gsi_start_phis (l3_bb
); !gsi_end_p (psi
); gsi_next (&psi
))
3170 source_location locus
;
3172 gphi
*exit_phi
= psi
.phi ();
3174 edge l2_to_l3
= find_edge (l2_bb
, l3_bb
);
3175 tree exit_res
= PHI_ARG_DEF_FROM_EDGE (exit_phi
, l2_to_l3
);
3177 basic_block latch
= BRANCH_EDGE (cont_bb
)->dest
;
3178 edge latch_to_l1
= find_edge (latch
, l1_bb
);
3180 = find_phi_with_arg_on_edge (exit_res
, latch_to_l1
);
3182 tree t
= gimple_phi_result (exit_phi
);
3183 tree new_res
= copy_ssa_name (t
, NULL
);
3184 nphi
= create_phi_node (new_res
, l0_bb
);
3186 edge l0_to_l1
= find_edge (l0_bb
, l1_bb
);
3187 t
= PHI_ARG_DEF_FROM_EDGE (inner_phi
, l0_to_l1
);
3188 locus
= gimple_phi_arg_location_from_edge (inner_phi
, l0_to_l1
);
3189 edge entry_to_l0
= find_edge (entry_bb
, l0_bb
);
3190 add_phi_arg (nphi
, t
, entry_to_l0
, locus
);
3192 edge l2_to_l0
= find_edge (l2_bb
, l0_bb
);
3193 add_phi_arg (nphi
, exit_res
, l2_to_l0
, UNKNOWN_LOCATION
);
3195 add_phi_arg (inner_phi
, new_res
, l0_to_l1
, UNKNOWN_LOCATION
);
3199 set_immediate_dominator (CDI_DOMINATORS
, l2_bb
,
3200 recompute_dominator (CDI_DOMINATORS
, l2_bb
));
3201 set_immediate_dominator (CDI_DOMINATORS
, l3_bb
,
3202 recompute_dominator (CDI_DOMINATORS
, l3_bb
));
3203 set_immediate_dominator (CDI_DOMINATORS
, l0_bb
,
3204 recompute_dominator (CDI_DOMINATORS
, l0_bb
));
3205 set_immediate_dominator (CDI_DOMINATORS
, l1_bb
,
3206 recompute_dominator (CDI_DOMINATORS
, l1_bb
));
3208 /* We enter expand_omp_for_generic with a loop. This original loop may
3209 have its own loop struct, or it may be part of an outer loop struct
3210 (which may be the fake loop). */
3211 struct loop
*outer_loop
= entry_bb
->loop_father
;
3212 bool orig_loop_has_loop_struct
= l1_bb
->loop_father
!= outer_loop
;
3214 add_bb_to_loop (l2_bb
, outer_loop
);
3216 /* We've added a new loop around the original loop. Allocate the
3217 corresponding loop struct. */
3218 struct loop
*new_loop
= alloc_loop ();
3219 new_loop
->header
= l0_bb
;
3220 new_loop
->latch
= l2_bb
;
3221 add_loop (new_loop
, outer_loop
);
3223 /* Allocate a loop structure for the original loop unless we already
3225 if (!orig_loop_has_loop_struct
3226 && !gimple_omp_for_combined_p (fd
->for_stmt
))
3228 struct loop
*orig_loop
= alloc_loop ();
3229 orig_loop
->header
= l1_bb
;
3230 /* The loop may have multiple latches. */
3231 add_loop (orig_loop
, new_loop
);
3236 /* A subroutine of expand_omp_for. Generate code for a parallel
3237 loop with static schedule and no specified chunk size. Given
3240 for (V = N1; V cond N2; V += STEP) BODY;
3242 where COND is "<" or ">", we generate pseudocode
3244 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
3249 if ((__typeof (V)) -1 > 0 && cond is >)
3250 n = -(adj + N2 - N1) / -STEP;
3252 n = (adj + N2 - N1) / STEP;
3255 if (threadid < tt) goto L3; else goto L4;
3260 s0 = q * threadid + tt;
3263 if (s0 >= e0) goto L2; else goto L0;
3269 if (V cond e) goto L1;
3274 expand_omp_for_static_nochunk (struct omp_region
*region
,
3275 struct omp_for_data
*fd
,
3278 tree n
, q
, s0
, e0
, e
, t
, tt
, nthreads
, threadid
;
3279 tree type
, itype
, vmain
, vback
;
3280 basic_block entry_bb
, second_bb
, third_bb
, exit_bb
, seq_start_bb
;
3281 basic_block body_bb
, cont_bb
, collapse_bb
= NULL
;
3283 gimple_stmt_iterator gsi
;
3285 bool broken_loop
= region
->cont
== NULL
;
3286 tree
*counts
= NULL
;
3289 itype
= type
= TREE_TYPE (fd
->loop
.v
);
3290 if (POINTER_TYPE_P (type
))
3291 itype
= signed_type_for (type
);
3293 entry_bb
= region
->entry
;
3294 cont_bb
= region
->cont
;
3295 gcc_assert (EDGE_COUNT (entry_bb
->succs
) == 2);
3296 fin_bb
= BRANCH_EDGE (entry_bb
)->dest
;
3297 gcc_assert (broken_loop
3298 || (fin_bb
== FALLTHRU_EDGE (cont_bb
)->dest
));
3299 seq_start_bb
= split_edge (FALLTHRU_EDGE (entry_bb
));
3300 body_bb
= single_succ (seq_start_bb
);
3303 gcc_assert (BRANCH_EDGE (cont_bb
)->dest
== body_bb
3304 || single_succ (BRANCH_EDGE (cont_bb
)->dest
) == body_bb
);
3305 gcc_assert (EDGE_COUNT (cont_bb
->succs
) == 2);
3307 exit_bb
= region
->exit
;
3309 /* Iteration space partitioning goes in ENTRY_BB. */
3310 gsi
= gsi_last_bb (entry_bb
);
3311 gcc_assert (gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_FOR
);
3313 if (fd
->collapse
> 1)
3315 int first_zero_iter
= -1, dummy
= -1;
3316 basic_block l2_dom_bb
= NULL
, dummy_bb
= NULL
;
3318 counts
= XALLOCAVEC (tree
, fd
->collapse
);
3319 expand_omp_for_init_counts (fd
, &gsi
, entry_bb
, counts
,
3320 fin_bb
, first_zero_iter
,
3321 dummy_bb
, dummy
, l2_dom_bb
);
3324 else if (gimple_omp_for_combined_into_p (fd
->for_stmt
))
3325 t
= integer_one_node
;
3327 t
= fold_binary (fd
->loop
.cond_code
, boolean_type_node
,
3328 fold_convert (type
, fd
->loop
.n1
),
3329 fold_convert (type
, fd
->loop
.n2
));
3330 if (fd
->collapse
== 1
3331 && TYPE_UNSIGNED (type
)
3332 && (t
== NULL_TREE
|| !integer_onep (t
)))
3334 n1
= fold_convert (type
, unshare_expr (fd
->loop
.n1
));
3335 n1
= force_gimple_operand_gsi (&gsi
, n1
, true, NULL_TREE
,
3336 true, GSI_SAME_STMT
);
3337 n2
= fold_convert (type
, unshare_expr (fd
->loop
.n2
));
3338 n2
= force_gimple_operand_gsi (&gsi
, n2
, true, NULL_TREE
,
3339 true, GSI_SAME_STMT
);
3340 gcond
*cond_stmt
= gimple_build_cond (fd
->loop
.cond_code
, n1
, n2
,
3341 NULL_TREE
, NULL_TREE
);
3342 gsi_insert_before (&gsi
, cond_stmt
, GSI_SAME_STMT
);
3343 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt
),
3344 expand_omp_regimplify_p
, NULL
, NULL
)
3345 || walk_tree (gimple_cond_rhs_ptr (cond_stmt
),
3346 expand_omp_regimplify_p
, NULL
, NULL
))
3348 gsi
= gsi_for_stmt (cond_stmt
);
3349 gimple_regimplify_operands (cond_stmt
, &gsi
);
3351 ep
= split_block (entry_bb
, cond_stmt
);
3352 ep
->flags
= EDGE_TRUE_VALUE
;
3353 entry_bb
= ep
->dest
;
3354 ep
->probability
= profile_probability::very_likely ();
3355 ep
= make_edge (ep
->src
, fin_bb
, EDGE_FALSE_VALUE
);
3356 ep
->probability
= profile_probability::very_unlikely ();
3357 if (gimple_in_ssa_p (cfun
))
3359 int dest_idx
= find_edge (entry_bb
, fin_bb
)->dest_idx
;
3360 for (gphi_iterator gpi
= gsi_start_phis (fin_bb
);
3361 !gsi_end_p (gpi
); gsi_next (&gpi
))
3363 gphi
*phi
= gpi
.phi ();
3364 add_phi_arg (phi
, gimple_phi_arg_def (phi
, dest_idx
),
3365 ep
, UNKNOWN_LOCATION
);
3368 gsi
= gsi_last_bb (entry_bb
);
3371 switch (gimple_omp_for_kind (fd
->for_stmt
))
3373 case GF_OMP_FOR_KIND_FOR
:
3374 nthreads
= builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS
);
3375 threadid
= builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM
);
3377 case GF_OMP_FOR_KIND_DISTRIBUTE
:
3378 nthreads
= builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS
);
3379 threadid
= builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM
);
3384 nthreads
= build_call_expr (nthreads
, 0);
3385 nthreads
= fold_convert (itype
, nthreads
);
3386 nthreads
= force_gimple_operand_gsi (&gsi
, nthreads
, true, NULL_TREE
,
3387 true, GSI_SAME_STMT
);
3388 threadid
= build_call_expr (threadid
, 0);
3389 threadid
= fold_convert (itype
, threadid
);
3390 threadid
= force_gimple_operand_gsi (&gsi
, threadid
, true, NULL_TREE
,
3391 true, GSI_SAME_STMT
);
3395 step
= fd
->loop
.step
;
3396 if (gimple_omp_for_combined_into_p (fd
->for_stmt
))
3398 tree innerc
= omp_find_clause (gimple_omp_for_clauses (fd
->for_stmt
),
3399 OMP_CLAUSE__LOOPTEMP_
);
3400 gcc_assert (innerc
);
3401 n1
= OMP_CLAUSE_DECL (innerc
);
3402 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
3403 OMP_CLAUSE__LOOPTEMP_
);
3404 gcc_assert (innerc
);
3405 n2
= OMP_CLAUSE_DECL (innerc
);
3407 n1
= force_gimple_operand_gsi (&gsi
, fold_convert (type
, n1
),
3408 true, NULL_TREE
, true, GSI_SAME_STMT
);
3409 n2
= force_gimple_operand_gsi (&gsi
, fold_convert (itype
, n2
),
3410 true, NULL_TREE
, true, GSI_SAME_STMT
);
3411 step
= force_gimple_operand_gsi (&gsi
, fold_convert (itype
, step
),
3412 true, NULL_TREE
, true, GSI_SAME_STMT
);
3414 t
= build_int_cst (itype
, (fd
->loop
.cond_code
== LT_EXPR
? -1 : 1));
3415 t
= fold_build2 (PLUS_EXPR
, itype
, step
, t
);
3416 t
= fold_build2 (PLUS_EXPR
, itype
, t
, n2
);
3417 t
= fold_build2 (MINUS_EXPR
, itype
, t
, fold_convert (itype
, n1
));
3418 if (TYPE_UNSIGNED (itype
) && fd
->loop
.cond_code
== GT_EXPR
)
3419 t
= fold_build2 (TRUNC_DIV_EXPR
, itype
,
3420 fold_build1 (NEGATE_EXPR
, itype
, t
),
3421 fold_build1 (NEGATE_EXPR
, itype
, step
));
3423 t
= fold_build2 (TRUNC_DIV_EXPR
, itype
, t
, step
);
3424 t
= fold_convert (itype
, t
);
3425 n
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
, true, GSI_SAME_STMT
);
3427 q
= create_tmp_reg (itype
, "q");
3428 t
= fold_build2 (TRUNC_DIV_EXPR
, itype
, n
, nthreads
);
3429 t
= force_gimple_operand_gsi (&gsi
, t
, false, NULL_TREE
, true, GSI_SAME_STMT
);
3430 gsi_insert_before (&gsi
, gimple_build_assign (q
, t
), GSI_SAME_STMT
);
3432 tt
= create_tmp_reg (itype
, "tt");
3433 t
= fold_build2 (TRUNC_MOD_EXPR
, itype
, n
, nthreads
);
3434 t
= force_gimple_operand_gsi (&gsi
, t
, false, NULL_TREE
, true, GSI_SAME_STMT
);
3435 gsi_insert_before (&gsi
, gimple_build_assign (tt
, t
), GSI_SAME_STMT
);
3437 t
= build2 (LT_EXPR
, boolean_type_node
, threadid
, tt
);
3438 gcond
*cond_stmt
= gimple_build_cond_empty (t
);
3439 gsi_insert_before (&gsi
, cond_stmt
, GSI_SAME_STMT
);
3441 second_bb
= split_block (entry_bb
, cond_stmt
)->dest
;
3442 gsi
= gsi_last_bb (second_bb
);
3443 gcc_assert (gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_FOR
);
3445 gsi_insert_before (&gsi
, gimple_build_assign (tt
, build_int_cst (itype
, 0)),
3447 gassign
*assign_stmt
3448 = gimple_build_assign (q
, PLUS_EXPR
, q
, build_int_cst (itype
, 1));
3449 gsi_insert_before (&gsi
, assign_stmt
, GSI_SAME_STMT
);
3451 third_bb
= split_block (second_bb
, assign_stmt
)->dest
;
3452 gsi
= gsi_last_bb (third_bb
);
3453 gcc_assert (gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_FOR
);
3455 t
= build2 (MULT_EXPR
, itype
, q
, threadid
);
3456 t
= build2 (PLUS_EXPR
, itype
, t
, tt
);
3457 s0
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
, true, GSI_SAME_STMT
);
3459 t
= fold_build2 (PLUS_EXPR
, itype
, s0
, q
);
3460 e0
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
, true, GSI_SAME_STMT
);
3462 t
= build2 (GE_EXPR
, boolean_type_node
, s0
, e0
);
3463 gsi_insert_before (&gsi
, gimple_build_cond_empty (t
), GSI_SAME_STMT
);
3465 /* Remove the GIMPLE_OMP_FOR statement. */
3466 gsi_remove (&gsi
, true);
3468 /* Setup code for sequential iteration goes in SEQ_START_BB. */
3469 gsi
= gsi_start_bb (seq_start_bb
);
3471 tree startvar
= fd
->loop
.v
;
3472 tree endvar
= NULL_TREE
;
3474 if (gimple_omp_for_combined_p (fd
->for_stmt
))
3476 tree clauses
= gimple_code (inner_stmt
) == GIMPLE_OMP_PARALLEL
3477 ? gimple_omp_parallel_clauses (inner_stmt
)
3478 : gimple_omp_for_clauses (inner_stmt
);
3479 tree innerc
= omp_find_clause (clauses
, OMP_CLAUSE__LOOPTEMP_
);
3480 gcc_assert (innerc
);
3481 startvar
= OMP_CLAUSE_DECL (innerc
);
3482 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
3483 OMP_CLAUSE__LOOPTEMP_
);
3484 gcc_assert (innerc
);
3485 endvar
= OMP_CLAUSE_DECL (innerc
);
3486 if (fd
->collapse
> 1 && TREE_CODE (fd
->loop
.n2
) != INTEGER_CST
3487 && gimple_omp_for_kind (fd
->for_stmt
) == GF_OMP_FOR_KIND_DISTRIBUTE
)
3490 for (i
= 1; i
< fd
->collapse
; i
++)
3492 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
3493 OMP_CLAUSE__LOOPTEMP_
);
3494 gcc_assert (innerc
);
3496 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
3497 OMP_CLAUSE__LOOPTEMP_
);
3500 /* If needed (distribute parallel for with lastprivate),
3501 propagate down the total number of iterations. */
3502 tree t
= fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc
)),
3504 t
= force_gimple_operand_gsi (&gsi
, t
, false, NULL_TREE
, false,
3505 GSI_CONTINUE_LINKING
);
3506 assign_stmt
= gimple_build_assign (OMP_CLAUSE_DECL (innerc
), t
);
3507 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
3511 t
= fold_convert (itype
, s0
);
3512 t
= fold_build2 (MULT_EXPR
, itype
, t
, step
);
3513 if (POINTER_TYPE_P (type
))
3514 t
= fold_build_pointer_plus (n1
, t
);
3516 t
= fold_build2 (PLUS_EXPR
, type
, t
, n1
);
3517 t
= fold_convert (TREE_TYPE (startvar
), t
);
3518 t
= force_gimple_operand_gsi (&gsi
, t
,
3520 && TREE_ADDRESSABLE (startvar
),
3521 NULL_TREE
, false, GSI_CONTINUE_LINKING
);
3522 assign_stmt
= gimple_build_assign (startvar
, t
);
3523 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
3525 t
= fold_convert (itype
, e0
);
3526 t
= fold_build2 (MULT_EXPR
, itype
, t
, step
);
3527 if (POINTER_TYPE_P (type
))
3528 t
= fold_build_pointer_plus (n1
, t
);
3530 t
= fold_build2 (PLUS_EXPR
, type
, t
, n1
);
3531 t
= fold_convert (TREE_TYPE (startvar
), t
);
3532 e
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
3533 false, GSI_CONTINUE_LINKING
);
3536 assign_stmt
= gimple_build_assign (endvar
, e
);
3537 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
3538 if (useless_type_conversion_p (TREE_TYPE (fd
->loop
.v
), TREE_TYPE (e
)))
3539 assign_stmt
= gimple_build_assign (fd
->loop
.v
, e
);
3541 assign_stmt
= gimple_build_assign (fd
->loop
.v
, NOP_EXPR
, e
);
3542 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
3544 /* Handle linear clause adjustments. */
3545 tree itercnt
= NULL_TREE
;
3546 if (gimple_omp_for_kind (fd
->for_stmt
) == GF_OMP_FOR_KIND_FOR
)
3547 for (tree c
= gimple_omp_for_clauses (fd
->for_stmt
);
3548 c
; c
= OMP_CLAUSE_CHAIN (c
))
3549 if (OMP_CLAUSE_CODE (c
) == OMP_CLAUSE_LINEAR
3550 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c
))
3552 tree d
= OMP_CLAUSE_DECL (c
);
3553 bool is_ref
= omp_is_reference (d
);
3554 tree t
= d
, a
, dest
;
3556 t
= build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c
), t
);
3557 if (itercnt
== NULL_TREE
)
3559 if (gimple_omp_for_combined_into_p (fd
->for_stmt
))
3561 itercnt
= fold_build2 (MINUS_EXPR
, itype
,
3562 fold_convert (itype
, n1
),
3563 fold_convert (itype
, fd
->loop
.n1
));
3564 itercnt
= fold_build2 (EXACT_DIV_EXPR
, itype
, itercnt
, step
);
3565 itercnt
= fold_build2 (PLUS_EXPR
, itype
, itercnt
, s0
);
3566 itercnt
= force_gimple_operand_gsi (&gsi
, itercnt
, true,
3568 GSI_CONTINUE_LINKING
);
3573 tree type
= TREE_TYPE (t
);
3574 if (POINTER_TYPE_P (type
))
3576 a
= fold_build2 (MULT_EXPR
, type
,
3577 fold_convert (type
, itercnt
),
3578 fold_convert (type
, OMP_CLAUSE_LINEAR_STEP (c
)));
3579 dest
= unshare_expr (t
);
3580 t
= fold_build2 (type
== TREE_TYPE (t
) ? PLUS_EXPR
3581 : POINTER_PLUS_EXPR
, TREE_TYPE (t
), t
, a
);
3582 t
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
3583 false, GSI_CONTINUE_LINKING
);
3584 assign_stmt
= gimple_build_assign (dest
, t
);
3585 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
3587 if (fd
->collapse
> 1)
3588 expand_omp_for_init_vars (fd
, &gsi
, counts
, inner_stmt
, startvar
);
3592 /* The code controlling the sequential loop replaces the
3593 GIMPLE_OMP_CONTINUE. */
3594 gsi
= gsi_last_bb (cont_bb
);
3595 gomp_continue
*cont_stmt
= as_a
<gomp_continue
*> (gsi_stmt (gsi
));
3596 gcc_assert (gimple_code (cont_stmt
) == GIMPLE_OMP_CONTINUE
);
3597 vmain
= gimple_omp_continue_control_use (cont_stmt
);
3598 vback
= gimple_omp_continue_control_def (cont_stmt
);
3600 if (!gimple_omp_for_combined_p (fd
->for_stmt
))
3602 if (POINTER_TYPE_P (type
))
3603 t
= fold_build_pointer_plus (vmain
, step
);
3605 t
= fold_build2 (PLUS_EXPR
, type
, vmain
, step
);
3606 t
= force_gimple_operand_gsi (&gsi
, t
,
3608 && TREE_ADDRESSABLE (vback
),
3609 NULL_TREE
, true, GSI_SAME_STMT
);
3610 assign_stmt
= gimple_build_assign (vback
, t
);
3611 gsi_insert_before (&gsi
, assign_stmt
, GSI_SAME_STMT
);
3613 t
= build2 (fd
->loop
.cond_code
, boolean_type_node
,
3614 DECL_P (vback
) && TREE_ADDRESSABLE (vback
)
3616 gsi_insert_before (&gsi
, gimple_build_cond_empty (t
), GSI_SAME_STMT
);
3619 /* Remove the GIMPLE_OMP_CONTINUE statement. */
3620 gsi_remove (&gsi
, true);
3622 if (fd
->collapse
> 1 && !gimple_omp_for_combined_p (fd
->for_stmt
))
3623 collapse_bb
= extract_omp_for_update_vars (fd
, cont_bb
, body_bb
);
3626 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
3627 gsi
= gsi_last_bb (exit_bb
);
3628 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi
)))
3630 t
= gimple_omp_return_lhs (gsi_stmt (gsi
));
3631 gsi_insert_after (&gsi
, omp_build_barrier (t
), GSI_SAME_STMT
);
3633 gsi_remove (&gsi
, true);
3635 /* Connect all the blocks. */
3636 ep
= make_edge (entry_bb
, third_bb
, EDGE_FALSE_VALUE
);
3637 ep
->probability
= profile_probability::guessed_always ().apply_scale (3, 4);
3638 ep
= find_edge (entry_bb
, second_bb
);
3639 ep
->flags
= EDGE_TRUE_VALUE
;
3640 ep
->probability
= profile_probability::guessed_always ().apply_scale (1, 4);
3641 find_edge (third_bb
, seq_start_bb
)->flags
= EDGE_FALSE_VALUE
;
3642 find_edge (third_bb
, fin_bb
)->flags
= EDGE_TRUE_VALUE
;
3646 ep
= find_edge (cont_bb
, body_bb
);
3649 ep
= BRANCH_EDGE (cont_bb
);
3650 gcc_assert (single_succ (ep
->dest
) == body_bb
);
3652 if (gimple_omp_for_combined_p (fd
->for_stmt
))
3657 else if (fd
->collapse
> 1)
3660 ep
= make_edge (cont_bb
, collapse_bb
, EDGE_TRUE_VALUE
);
3663 ep
->flags
= EDGE_TRUE_VALUE
;
3664 find_edge (cont_bb
, fin_bb
)->flags
3665 = ep
? EDGE_FALSE_VALUE
: EDGE_FALLTHRU
;
3668 set_immediate_dominator (CDI_DOMINATORS
, second_bb
, entry_bb
);
3669 set_immediate_dominator (CDI_DOMINATORS
, third_bb
, entry_bb
);
3670 set_immediate_dominator (CDI_DOMINATORS
, seq_start_bb
, third_bb
);
3672 set_immediate_dominator (CDI_DOMINATORS
, body_bb
,
3673 recompute_dominator (CDI_DOMINATORS
, body_bb
));
3674 set_immediate_dominator (CDI_DOMINATORS
, fin_bb
,
3675 recompute_dominator (CDI_DOMINATORS
, fin_bb
));
3677 struct loop
*loop
= body_bb
->loop_father
;
3678 if (loop
!= entry_bb
->loop_father
)
3680 gcc_assert (broken_loop
|| loop
->header
== body_bb
);
3681 gcc_assert (broken_loop
3682 || loop
->latch
== region
->cont
3683 || single_pred (loop
->latch
) == region
->cont
);
3687 if (!broken_loop
&& !gimple_omp_for_combined_p (fd
->for_stmt
))
3689 loop
= alloc_loop ();
3690 loop
->header
= body_bb
;
3691 if (collapse_bb
== NULL
)
3692 loop
->latch
= cont_bb
;
3693 add_loop (loop
, body_bb
->loop_father
);
3697 /* Return phi in E->DEST with ARG on edge E. */
3700 find_phi_with_arg_on_edge (tree arg
, edge e
)
3702 basic_block bb
= e
->dest
;
3704 for (gphi_iterator gpi
= gsi_start_phis (bb
);
3708 gphi
*phi
= gpi
.phi ();
3709 if (PHI_ARG_DEF_FROM_EDGE (phi
, e
) == arg
)
3716 /* A subroutine of expand_omp_for. Generate code for a parallel
3717 loop with static schedule and a specified chunk size. Given
3720 for (V = N1; V cond N2; V += STEP) BODY;
3722 where COND is "<" or ">", we generate pseudocode
3724 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
3729 if ((__typeof (V)) -1 > 0 && cond is >)
3730 n = -(adj + N2 - N1) / -STEP;
3732 n = (adj + N2 - N1) / STEP;
3734 V = threadid * CHUNK * STEP + N1; -- this extra definition of V is
3735 here so that V is defined
3736 if the loop is not entered
3738 s0 = (trip * nthreads + threadid) * CHUNK;
3739 e0 = min (s0 + CHUNK, n);
3740 if (s0 < n) goto L1; else goto L4;
3747 if (V cond e) goto L2; else goto L3;
3755 expand_omp_for_static_chunk (struct omp_region
*region
,
3756 struct omp_for_data
*fd
, gimple
*inner_stmt
)
3758 tree n
, s0
, e0
, e
, t
;
3759 tree trip_var
, trip_init
, trip_main
, trip_back
, nthreads
, threadid
;
3760 tree type
, itype
, vmain
, vback
, vextra
;
3761 basic_block entry_bb
, exit_bb
, body_bb
, seq_start_bb
, iter_part_bb
;
3762 basic_block trip_update_bb
= NULL
, cont_bb
, collapse_bb
= NULL
, fin_bb
;
3763 gimple_stmt_iterator gsi
;
3765 bool broken_loop
= region
->cont
== NULL
;
3766 tree
*counts
= NULL
;
3769 itype
= type
= TREE_TYPE (fd
->loop
.v
);
3770 if (POINTER_TYPE_P (type
))
3771 itype
= signed_type_for (type
);
3773 entry_bb
= region
->entry
;
3774 se
= split_block (entry_bb
, last_stmt (entry_bb
));
3776 iter_part_bb
= se
->dest
;
3777 cont_bb
= region
->cont
;
3778 gcc_assert (EDGE_COUNT (iter_part_bb
->succs
) == 2);
3779 fin_bb
= BRANCH_EDGE (iter_part_bb
)->dest
;
3780 gcc_assert (broken_loop
3781 || fin_bb
== FALLTHRU_EDGE (cont_bb
)->dest
);
3782 seq_start_bb
= split_edge (FALLTHRU_EDGE (iter_part_bb
));
3783 body_bb
= single_succ (seq_start_bb
);
3786 gcc_assert (BRANCH_EDGE (cont_bb
)->dest
== body_bb
3787 || single_succ (BRANCH_EDGE (cont_bb
)->dest
) == body_bb
);
3788 gcc_assert (EDGE_COUNT (cont_bb
->succs
) == 2);
3789 trip_update_bb
= split_edge (FALLTHRU_EDGE (cont_bb
));
3791 exit_bb
= region
->exit
;
3793 /* Trip and adjustment setup goes in ENTRY_BB. */
3794 gsi
= gsi_last_bb (entry_bb
);
3795 gcc_assert (gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_FOR
);
3797 if (fd
->collapse
> 1)
3799 int first_zero_iter
= -1, dummy
= -1;
3800 basic_block l2_dom_bb
= NULL
, dummy_bb
= NULL
;
3802 counts
= XALLOCAVEC (tree
, fd
->collapse
);
3803 expand_omp_for_init_counts (fd
, &gsi
, entry_bb
, counts
,
3804 fin_bb
, first_zero_iter
,
3805 dummy_bb
, dummy
, l2_dom_bb
);
3808 else if (gimple_omp_for_combined_into_p (fd
->for_stmt
))
3809 t
= integer_one_node
;
3811 t
= fold_binary (fd
->loop
.cond_code
, boolean_type_node
,
3812 fold_convert (type
, fd
->loop
.n1
),
3813 fold_convert (type
, fd
->loop
.n2
));
3814 if (fd
->collapse
== 1
3815 && TYPE_UNSIGNED (type
)
3816 && (t
== NULL_TREE
|| !integer_onep (t
)))
3818 n1
= fold_convert (type
, unshare_expr (fd
->loop
.n1
));
3819 n1
= force_gimple_operand_gsi (&gsi
, n1
, true, NULL_TREE
,
3820 true, GSI_SAME_STMT
);
3821 n2
= fold_convert (type
, unshare_expr (fd
->loop
.n2
));
3822 n2
= force_gimple_operand_gsi (&gsi
, n2
, true, NULL_TREE
,
3823 true, GSI_SAME_STMT
);
3824 gcond
*cond_stmt
= gimple_build_cond (fd
->loop
.cond_code
, n1
, n2
,
3825 NULL_TREE
, NULL_TREE
);
3826 gsi_insert_before (&gsi
, cond_stmt
, GSI_SAME_STMT
);
3827 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt
),
3828 expand_omp_regimplify_p
, NULL
, NULL
)
3829 || walk_tree (gimple_cond_rhs_ptr (cond_stmt
),
3830 expand_omp_regimplify_p
, NULL
, NULL
))
3832 gsi
= gsi_for_stmt (cond_stmt
);
3833 gimple_regimplify_operands (cond_stmt
, &gsi
);
3835 se
= split_block (entry_bb
, cond_stmt
);
3836 se
->flags
= EDGE_TRUE_VALUE
;
3837 entry_bb
= se
->dest
;
3838 se
->probability
= profile_probability::very_likely ();
3839 se
= make_edge (se
->src
, fin_bb
, EDGE_FALSE_VALUE
);
3840 se
->probability
= profile_probability::very_unlikely ();
3841 if (gimple_in_ssa_p (cfun
))
3843 int dest_idx
= find_edge (iter_part_bb
, fin_bb
)->dest_idx
;
3844 for (gphi_iterator gpi
= gsi_start_phis (fin_bb
);
3845 !gsi_end_p (gpi
); gsi_next (&gpi
))
3847 gphi
*phi
= gpi
.phi ();
3848 add_phi_arg (phi
, gimple_phi_arg_def (phi
, dest_idx
),
3849 se
, UNKNOWN_LOCATION
);
3852 gsi
= gsi_last_bb (entry_bb
);
3855 switch (gimple_omp_for_kind (fd
->for_stmt
))
3857 case GF_OMP_FOR_KIND_FOR
:
3858 nthreads
= builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS
);
3859 threadid
= builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM
);
3861 case GF_OMP_FOR_KIND_DISTRIBUTE
:
3862 nthreads
= builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS
);
3863 threadid
= builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM
);
3868 nthreads
= build_call_expr (nthreads
, 0);
3869 nthreads
= fold_convert (itype
, nthreads
);
3870 nthreads
= force_gimple_operand_gsi (&gsi
, nthreads
, true, NULL_TREE
,
3871 true, GSI_SAME_STMT
);
3872 threadid
= build_call_expr (threadid
, 0);
3873 threadid
= fold_convert (itype
, threadid
);
3874 threadid
= force_gimple_operand_gsi (&gsi
, threadid
, true, NULL_TREE
,
3875 true, GSI_SAME_STMT
);
3879 step
= fd
->loop
.step
;
3880 if (gimple_omp_for_combined_into_p (fd
->for_stmt
))
3882 tree innerc
= omp_find_clause (gimple_omp_for_clauses (fd
->for_stmt
),
3883 OMP_CLAUSE__LOOPTEMP_
);
3884 gcc_assert (innerc
);
3885 n1
= OMP_CLAUSE_DECL (innerc
);
3886 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
3887 OMP_CLAUSE__LOOPTEMP_
);
3888 gcc_assert (innerc
);
3889 n2
= OMP_CLAUSE_DECL (innerc
);
3891 n1
= force_gimple_operand_gsi (&gsi
, fold_convert (type
, n1
),
3892 true, NULL_TREE
, true, GSI_SAME_STMT
);
3893 n2
= force_gimple_operand_gsi (&gsi
, fold_convert (itype
, n2
),
3894 true, NULL_TREE
, true, GSI_SAME_STMT
);
3895 step
= force_gimple_operand_gsi (&gsi
, fold_convert (itype
, step
),
3896 true, NULL_TREE
, true, GSI_SAME_STMT
);
3897 tree chunk_size
= fold_convert (itype
, fd
->chunk_size
);
3898 chunk_size
= omp_adjust_chunk_size (chunk_size
, fd
->simd_schedule
);
3900 = force_gimple_operand_gsi (&gsi
, chunk_size
, true, NULL_TREE
, true,
3903 t
= build_int_cst (itype
, (fd
->loop
.cond_code
== LT_EXPR
? -1 : 1));
3904 t
= fold_build2 (PLUS_EXPR
, itype
, step
, t
);
3905 t
= fold_build2 (PLUS_EXPR
, itype
, t
, n2
);
3906 t
= fold_build2 (MINUS_EXPR
, itype
, t
, fold_convert (itype
, n1
));
3907 if (TYPE_UNSIGNED (itype
) && fd
->loop
.cond_code
== GT_EXPR
)
3908 t
= fold_build2 (TRUNC_DIV_EXPR
, itype
,
3909 fold_build1 (NEGATE_EXPR
, itype
, t
),
3910 fold_build1 (NEGATE_EXPR
, itype
, step
));
3912 t
= fold_build2 (TRUNC_DIV_EXPR
, itype
, t
, step
);
3913 t
= fold_convert (itype
, t
);
3914 n
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
3915 true, GSI_SAME_STMT
);
3917 trip_var
= create_tmp_reg (itype
, ".trip");
3918 if (gimple_in_ssa_p (cfun
))
3920 trip_init
= make_ssa_name (trip_var
);
3921 trip_main
= make_ssa_name (trip_var
);
3922 trip_back
= make_ssa_name (trip_var
);
3926 trip_init
= trip_var
;
3927 trip_main
= trip_var
;
3928 trip_back
= trip_var
;
3931 gassign
*assign_stmt
3932 = gimple_build_assign (trip_init
, build_int_cst (itype
, 0));
3933 gsi_insert_before (&gsi
, assign_stmt
, GSI_SAME_STMT
);
3935 t
= fold_build2 (MULT_EXPR
, itype
, threadid
, chunk_size
);
3936 t
= fold_build2 (MULT_EXPR
, itype
, t
, step
);
3937 if (POINTER_TYPE_P (type
))
3938 t
= fold_build_pointer_plus (n1
, t
);
3940 t
= fold_build2 (PLUS_EXPR
, type
, t
, n1
);
3941 vextra
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
3942 true, GSI_SAME_STMT
);
3944 /* Remove the GIMPLE_OMP_FOR. */
3945 gsi_remove (&gsi
, true);
3947 gimple_stmt_iterator gsif
= gsi
;
3949 /* Iteration space partitioning goes in ITER_PART_BB. */
3950 gsi
= gsi_last_bb (iter_part_bb
);
3952 t
= fold_build2 (MULT_EXPR
, itype
, trip_main
, nthreads
);
3953 t
= fold_build2 (PLUS_EXPR
, itype
, t
, threadid
);
3954 t
= fold_build2 (MULT_EXPR
, itype
, t
, chunk_size
);
3955 s0
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
3956 false, GSI_CONTINUE_LINKING
);
3958 t
= fold_build2 (PLUS_EXPR
, itype
, s0
, chunk_size
);
3959 t
= fold_build2 (MIN_EXPR
, itype
, t
, n
);
3960 e0
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
3961 false, GSI_CONTINUE_LINKING
);
3963 t
= build2 (LT_EXPR
, boolean_type_node
, s0
, n
);
3964 gsi_insert_after (&gsi
, gimple_build_cond_empty (t
), GSI_CONTINUE_LINKING
);
3966 /* Setup code for sequential iteration goes in SEQ_START_BB. */
3967 gsi
= gsi_start_bb (seq_start_bb
);
3969 tree startvar
= fd
->loop
.v
;
3970 tree endvar
= NULL_TREE
;
3972 if (gimple_omp_for_combined_p (fd
->for_stmt
))
3974 tree clauses
= gimple_code (inner_stmt
) == GIMPLE_OMP_PARALLEL
3975 ? gimple_omp_parallel_clauses (inner_stmt
)
3976 : gimple_omp_for_clauses (inner_stmt
);
3977 tree innerc
= omp_find_clause (clauses
, OMP_CLAUSE__LOOPTEMP_
);
3978 gcc_assert (innerc
);
3979 startvar
= OMP_CLAUSE_DECL (innerc
);
3980 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
3981 OMP_CLAUSE__LOOPTEMP_
);
3982 gcc_assert (innerc
);
3983 endvar
= OMP_CLAUSE_DECL (innerc
);
3984 if (fd
->collapse
> 1 && TREE_CODE (fd
->loop
.n2
) != INTEGER_CST
3985 && gimple_omp_for_kind (fd
->for_stmt
) == GF_OMP_FOR_KIND_DISTRIBUTE
)
3988 for (i
= 1; i
< fd
->collapse
; i
++)
3990 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
3991 OMP_CLAUSE__LOOPTEMP_
);
3992 gcc_assert (innerc
);
3994 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
3995 OMP_CLAUSE__LOOPTEMP_
);
3998 /* If needed (distribute parallel for with lastprivate),
3999 propagate down the total number of iterations. */
4000 tree t
= fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc
)),
4002 t
= force_gimple_operand_gsi (&gsi
, t
, false, NULL_TREE
, false,
4003 GSI_CONTINUE_LINKING
);
4004 assign_stmt
= gimple_build_assign (OMP_CLAUSE_DECL (innerc
), t
);
4005 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
4010 t
= fold_convert (itype
, s0
);
4011 t
= fold_build2 (MULT_EXPR
, itype
, t
, step
);
4012 if (POINTER_TYPE_P (type
))
4013 t
= fold_build_pointer_plus (n1
, t
);
4015 t
= fold_build2 (PLUS_EXPR
, type
, t
, n1
);
4016 t
= fold_convert (TREE_TYPE (startvar
), t
);
4017 t
= force_gimple_operand_gsi (&gsi
, t
,
4019 && TREE_ADDRESSABLE (startvar
),
4020 NULL_TREE
, false, GSI_CONTINUE_LINKING
);
4021 assign_stmt
= gimple_build_assign (startvar
, t
);
4022 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
4024 t
= fold_convert (itype
, e0
);
4025 t
= fold_build2 (MULT_EXPR
, itype
, t
, step
);
4026 if (POINTER_TYPE_P (type
))
4027 t
= fold_build_pointer_plus (n1
, t
);
4029 t
= fold_build2 (PLUS_EXPR
, type
, t
, n1
);
4030 t
= fold_convert (TREE_TYPE (startvar
), t
);
4031 e
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
4032 false, GSI_CONTINUE_LINKING
);
4035 assign_stmt
= gimple_build_assign (endvar
, e
);
4036 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
4037 if (useless_type_conversion_p (TREE_TYPE (fd
->loop
.v
), TREE_TYPE (e
)))
4038 assign_stmt
= gimple_build_assign (fd
->loop
.v
, e
);
4040 assign_stmt
= gimple_build_assign (fd
->loop
.v
, NOP_EXPR
, e
);
4041 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
4043 /* Handle linear clause adjustments. */
4044 tree itercnt
= NULL_TREE
, itercntbias
= NULL_TREE
;
4045 if (gimple_omp_for_kind (fd
->for_stmt
) == GF_OMP_FOR_KIND_FOR
)
4046 for (tree c
= gimple_omp_for_clauses (fd
->for_stmt
);
4047 c
; c
= OMP_CLAUSE_CHAIN (c
))
4048 if (OMP_CLAUSE_CODE (c
) == OMP_CLAUSE_LINEAR
4049 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c
))
4051 tree d
= OMP_CLAUSE_DECL (c
);
4052 bool is_ref
= omp_is_reference (d
);
4053 tree t
= d
, a
, dest
;
4055 t
= build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c
), t
);
4056 tree type
= TREE_TYPE (t
);
4057 if (POINTER_TYPE_P (type
))
4059 dest
= unshare_expr (t
);
4060 tree v
= create_tmp_var (TREE_TYPE (t
), NULL
);
4061 expand_omp_build_assign (&gsif
, v
, t
);
4062 if (itercnt
== NULL_TREE
)
4064 if (gimple_omp_for_combined_into_p (fd
->for_stmt
))
4067 = fold_build2 (MINUS_EXPR
, itype
, fold_convert (itype
, n1
),
4068 fold_convert (itype
, fd
->loop
.n1
));
4069 itercntbias
= fold_build2 (EXACT_DIV_EXPR
, itype
,
4072 = force_gimple_operand_gsi (&gsif
, itercntbias
, true,
4075 itercnt
= fold_build2 (PLUS_EXPR
, itype
, itercntbias
, s0
);
4076 itercnt
= force_gimple_operand_gsi (&gsi
, itercnt
, true,
4078 GSI_CONTINUE_LINKING
);
4083 a
= fold_build2 (MULT_EXPR
, type
,
4084 fold_convert (type
, itercnt
),
4085 fold_convert (type
, OMP_CLAUSE_LINEAR_STEP (c
)));
4086 t
= fold_build2 (type
== TREE_TYPE (t
) ? PLUS_EXPR
4087 : POINTER_PLUS_EXPR
, TREE_TYPE (t
), v
, a
);
4088 t
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
4089 false, GSI_CONTINUE_LINKING
);
4090 assign_stmt
= gimple_build_assign (dest
, t
);
4091 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
4093 if (fd
->collapse
> 1)
4094 expand_omp_for_init_vars (fd
, &gsi
, counts
, inner_stmt
, startvar
);
4098 /* The code controlling the sequential loop goes in CONT_BB,
4099 replacing the GIMPLE_OMP_CONTINUE. */
4100 gsi
= gsi_last_bb (cont_bb
);
4101 gomp_continue
*cont_stmt
= as_a
<gomp_continue
*> (gsi_stmt (gsi
));
4102 vmain
= gimple_omp_continue_control_use (cont_stmt
);
4103 vback
= gimple_omp_continue_control_def (cont_stmt
);
4105 if (!gimple_omp_for_combined_p (fd
->for_stmt
))
4107 if (POINTER_TYPE_P (type
))
4108 t
= fold_build_pointer_plus (vmain
, step
);
4110 t
= fold_build2 (PLUS_EXPR
, type
, vmain
, step
);
4111 if (DECL_P (vback
) && TREE_ADDRESSABLE (vback
))
4112 t
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
4113 true, GSI_SAME_STMT
);
4114 assign_stmt
= gimple_build_assign (vback
, t
);
4115 gsi_insert_before (&gsi
, assign_stmt
, GSI_SAME_STMT
);
4117 if (tree_int_cst_equal (fd
->chunk_size
, integer_one_node
))
4118 t
= build2 (EQ_EXPR
, boolean_type_node
,
4119 build_int_cst (itype
, 0),
4120 build_int_cst (itype
, 1));
4122 t
= build2 (fd
->loop
.cond_code
, boolean_type_node
,
4123 DECL_P (vback
) && TREE_ADDRESSABLE (vback
)
4125 gsi_insert_before (&gsi
, gimple_build_cond_empty (t
), GSI_SAME_STMT
);
4128 /* Remove GIMPLE_OMP_CONTINUE. */
4129 gsi_remove (&gsi
, true);
4131 if (fd
->collapse
> 1 && !gimple_omp_for_combined_p (fd
->for_stmt
))
4132 collapse_bb
= extract_omp_for_update_vars (fd
, cont_bb
, body_bb
);
4134 /* Trip update code goes into TRIP_UPDATE_BB. */
4135 gsi
= gsi_start_bb (trip_update_bb
);
4137 t
= build_int_cst (itype
, 1);
4138 t
= build2 (PLUS_EXPR
, itype
, trip_main
, t
);
4139 assign_stmt
= gimple_build_assign (trip_back
, t
);
4140 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
4143 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
4144 gsi
= gsi_last_bb (exit_bb
);
4145 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi
)))
4147 t
= gimple_omp_return_lhs (gsi_stmt (gsi
));
4148 gsi_insert_after (&gsi
, omp_build_barrier (t
), GSI_SAME_STMT
);
4150 gsi_remove (&gsi
, true);
4152 /* Connect the new blocks. */
4153 find_edge (iter_part_bb
, seq_start_bb
)->flags
= EDGE_TRUE_VALUE
;
4154 find_edge (iter_part_bb
, fin_bb
)->flags
= EDGE_FALSE_VALUE
;
4158 se
= find_edge (cont_bb
, body_bb
);
4161 se
= BRANCH_EDGE (cont_bb
);
4162 gcc_assert (single_succ (se
->dest
) == body_bb
);
4164 if (gimple_omp_for_combined_p (fd
->for_stmt
))
4169 else if (fd
->collapse
> 1)
4172 se
= make_edge (cont_bb
, collapse_bb
, EDGE_TRUE_VALUE
);
4175 se
->flags
= EDGE_TRUE_VALUE
;
4176 find_edge (cont_bb
, trip_update_bb
)->flags
4177 = se
? EDGE_FALSE_VALUE
: EDGE_FALLTHRU
;
4179 redirect_edge_and_branch (single_succ_edge (trip_update_bb
),
4183 if (gimple_in_ssa_p (cfun
))
4191 gcc_assert (fd
->collapse
== 1 && !broken_loop
);
4193 /* When we redirect the edge from trip_update_bb to iter_part_bb, we
4194 remove arguments of the phi nodes in fin_bb. We need to create
4195 appropriate phi nodes in iter_part_bb instead. */
4196 se
= find_edge (iter_part_bb
, fin_bb
);
4197 re
= single_succ_edge (trip_update_bb
);
4198 vec
<edge_var_map
> *head
= redirect_edge_var_map_vector (re
);
4199 ene
= single_succ_edge (entry_bb
);
4201 psi
= gsi_start_phis (fin_bb
);
4202 for (i
= 0; !gsi_end_p (psi
) && head
->iterate (i
, &vm
);
4203 gsi_next (&psi
), ++i
)
4206 source_location locus
;
4209 if (operand_equal_p (gimple_phi_arg_def (phi
, 0),
4210 redirect_edge_var_map_def (vm
), 0))
4213 t
= gimple_phi_result (phi
);
4214 gcc_assert (t
== redirect_edge_var_map_result (vm
));
4216 if (!single_pred_p (fin_bb
))
4217 t
= copy_ssa_name (t
, phi
);
4219 nphi
= create_phi_node (t
, iter_part_bb
);
4221 t
= PHI_ARG_DEF_FROM_EDGE (phi
, se
);
4222 locus
= gimple_phi_arg_location_from_edge (phi
, se
);
4224 /* A special case -- fd->loop.v is not yet computed in
4225 iter_part_bb, we need to use vextra instead. */
4226 if (t
== fd
->loop
.v
)
4228 add_phi_arg (nphi
, t
, ene
, locus
);
4229 locus
= redirect_edge_var_map_location (vm
);
4230 tree back_arg
= redirect_edge_var_map_def (vm
);
4231 add_phi_arg (nphi
, back_arg
, re
, locus
);
4232 edge ce
= find_edge (cont_bb
, body_bb
);
4235 ce
= BRANCH_EDGE (cont_bb
);
4236 gcc_assert (single_succ (ce
->dest
) == body_bb
);
4237 ce
= single_succ_edge (ce
->dest
);
4239 gphi
*inner_loop_phi
= find_phi_with_arg_on_edge (back_arg
, ce
);
4240 gcc_assert (inner_loop_phi
!= NULL
);
4241 add_phi_arg (inner_loop_phi
, gimple_phi_result (nphi
),
4242 find_edge (seq_start_bb
, body_bb
), locus
);
4244 if (!single_pred_p (fin_bb
))
4245 add_phi_arg (phi
, gimple_phi_result (nphi
), se
, locus
);
4247 gcc_assert (gsi_end_p (psi
) && (head
== NULL
|| i
== head
->length ()));
4248 redirect_edge_var_map_clear (re
);
4249 if (single_pred_p (fin_bb
))
4252 psi
= gsi_start_phis (fin_bb
);
4253 if (gsi_end_p (psi
))
4255 remove_phi_node (&psi
, false);
4258 /* Make phi node for trip. */
4259 phi
= create_phi_node (trip_main
, iter_part_bb
);
4260 add_phi_arg (phi
, trip_back
, single_succ_edge (trip_update_bb
),
4262 add_phi_arg (phi
, trip_init
, single_succ_edge (entry_bb
),
4267 set_immediate_dominator (CDI_DOMINATORS
, trip_update_bb
, cont_bb
);
4268 set_immediate_dominator (CDI_DOMINATORS
, iter_part_bb
,
4269 recompute_dominator (CDI_DOMINATORS
, iter_part_bb
));
4270 set_immediate_dominator (CDI_DOMINATORS
, fin_bb
,
4271 recompute_dominator (CDI_DOMINATORS
, fin_bb
));
4272 set_immediate_dominator (CDI_DOMINATORS
, seq_start_bb
,
4273 recompute_dominator (CDI_DOMINATORS
, seq_start_bb
));
4274 set_immediate_dominator (CDI_DOMINATORS
, body_bb
,
4275 recompute_dominator (CDI_DOMINATORS
, body_bb
));
4279 struct loop
*loop
= body_bb
->loop_father
;
4280 struct loop
*trip_loop
= alloc_loop ();
4281 trip_loop
->header
= iter_part_bb
;
4282 trip_loop
->latch
= trip_update_bb
;
4283 add_loop (trip_loop
, iter_part_bb
->loop_father
);
4285 if (loop
!= entry_bb
->loop_father
)
4287 gcc_assert (loop
->header
== body_bb
);
4288 gcc_assert (loop
->latch
== region
->cont
4289 || single_pred (loop
->latch
) == region
->cont
);
4290 trip_loop
->inner
= loop
;
4294 if (!gimple_omp_for_combined_p (fd
->for_stmt
))
4296 loop
= alloc_loop ();
4297 loop
->header
= body_bb
;
4298 if (collapse_bb
== NULL
)
4299 loop
->latch
= cont_bb
;
4300 add_loop (loop
, trip_loop
);
4305 /* A subroutine of expand_omp_for. Generate code for _Cilk_for loop.
4307 for (V = N1; V cond N2; V += STEP) BODY;
4309 where COND is "<" or ">" or "!=", we generate pseudocode
4311 for (ind_var = low; ind_var < high; ind_var++)
4313 V = n1 + (ind_var * STEP)
4318 In the above pseudocode, low and high are function parameters of the
4319 child function. In the function below, we are inserting a temp.
4320 variable that will be making a call to two OMP functions that will not be
4321 found in the body of _Cilk_for (since OMP_FOR cannot be mixed
4322 with _Cilk_for). These functions are replaced with low and high
4323 by the function that handles taskreg. */
4327 expand_cilk_for (struct omp_region
*region
, struct omp_for_data
*fd
)
4329 bool broken_loop
= region
->cont
== NULL
;
4330 basic_block entry_bb
= region
->entry
;
4331 basic_block cont_bb
= region
->cont
;
4333 gcc_assert (EDGE_COUNT (entry_bb
->succs
) == 2);
4334 gcc_assert (broken_loop
4335 || BRANCH_EDGE (entry_bb
)->dest
== FALLTHRU_EDGE (cont_bb
)->dest
);
4336 basic_block l0_bb
= FALLTHRU_EDGE (entry_bb
)->dest
;
4337 basic_block l1_bb
, l2_bb
;
4341 gcc_assert (BRANCH_EDGE (cont_bb
)->dest
== l0_bb
);
4342 gcc_assert (EDGE_COUNT (cont_bb
->succs
) == 2);
4343 l1_bb
= split_block (cont_bb
, last_stmt (cont_bb
))->dest
;
4344 l2_bb
= BRANCH_EDGE (entry_bb
)->dest
;
4348 BRANCH_EDGE (entry_bb
)->flags
&= ~EDGE_ABNORMAL
;
4349 l1_bb
= split_edge (BRANCH_EDGE (entry_bb
));
4350 l2_bb
= single_succ (l1_bb
);
4352 basic_block exit_bb
= region
->exit
;
4353 basic_block l2_dom_bb
= NULL
;
4355 gimple_stmt_iterator gsi
= gsi_last_bb (entry_bb
);
4357 /* Below statements until the "tree high_val = ..." are pseudo statements
4358 used to pass information to be used by expand_omp_taskreg.
4359 low_val and high_val will be replaced by the __low and __high
4360 parameter from the child function.
4362 The call_exprs part is a place-holder, it is mainly used
4363 to distinctly identify to the top-level part that this is
4364 where we should put low and high (reasoning given in header
4367 gomp_parallel
*par_stmt
4368 = as_a
<gomp_parallel
*> (last_stmt (region
->outer
->entry
));
4369 tree child_fndecl
= gimple_omp_parallel_child_fn (par_stmt
);
4370 tree t
, low_val
= NULL_TREE
, high_val
= NULL_TREE
;
4371 for (t
= DECL_ARGUMENTS (child_fndecl
); t
; t
= TREE_CHAIN (t
))
4373 if (id_equal (DECL_NAME (t
), "__high"))
4375 else if (id_equal (DECL_NAME (t
), "__low"))
4378 gcc_assert (low_val
&& high_val
);
4380 tree type
= TREE_TYPE (low_val
);
4381 tree ind_var
= create_tmp_reg (type
, "__cilk_ind_var");
4382 gcc_assert (gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_FOR
);
4384 /* Not needed in SSA form right now. */
4385 gcc_assert (!gimple_in_ssa_p (cfun
));
4386 if (l2_dom_bb
== NULL
)
4392 gimple
*stmt
= gimple_build_assign (ind_var
, n1
);
4394 /* Replace the GIMPLE_OMP_FOR statement. */
4395 gsi_replace (&gsi
, stmt
, true);
4399 /* Code to control the increment goes in the CONT_BB. */
4400 gsi
= gsi_last_bb (cont_bb
);
4401 stmt
= gsi_stmt (gsi
);
4402 gcc_assert (gimple_code (stmt
) == GIMPLE_OMP_CONTINUE
);
4403 stmt
= gimple_build_assign (ind_var
, PLUS_EXPR
, ind_var
,
4404 build_one_cst (type
));
4406 /* Replace GIMPLE_OMP_CONTINUE. */
4407 gsi_replace (&gsi
, stmt
, true);
4410 /* Emit the condition in L1_BB. */
4411 gsi
= gsi_after_labels (l1_bb
);
4412 t
= fold_build2 (MULT_EXPR
, TREE_TYPE (fd
->loop
.step
),
4413 fold_convert (TREE_TYPE (fd
->loop
.step
), ind_var
),
4415 if (POINTER_TYPE_P (TREE_TYPE (fd
->loop
.n1
)))
4416 t
= fold_build2 (POINTER_PLUS_EXPR
, TREE_TYPE (fd
->loop
.n1
),
4417 fd
->loop
.n1
, fold_convert (sizetype
, t
));
4419 t
= fold_build2 (PLUS_EXPR
, TREE_TYPE (fd
->loop
.n1
),
4420 fd
->loop
.n1
, fold_convert (TREE_TYPE (fd
->loop
.n1
), t
));
4421 t
= fold_convert (TREE_TYPE (fd
->loop
.v
), t
);
4422 expand_omp_build_assign (&gsi
, fd
->loop
.v
, t
);
4424 /* The condition is always '<' since the runtime will fill in the low
4426 stmt
= gimple_build_cond (LT_EXPR
, ind_var
, n2
, NULL_TREE
, NULL_TREE
);
4427 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
4429 /* Remove GIMPLE_OMP_RETURN. */
4430 gsi
= gsi_last_bb (exit_bb
);
4431 gsi_remove (&gsi
, true);
4433 /* Connect the new blocks. */
4434 remove_edge (FALLTHRU_EDGE (entry_bb
));
4439 remove_edge (BRANCH_EDGE (entry_bb
));
4440 make_edge (entry_bb
, l1_bb
, EDGE_FALLTHRU
);
4442 e
= BRANCH_EDGE (l1_bb
);
4443 ne
= FALLTHRU_EDGE (l1_bb
);
4444 e
->flags
= EDGE_TRUE_VALUE
;
4448 single_succ_edge (entry_bb
)->flags
= EDGE_FALLTHRU
;
4450 ne
= single_succ_edge (l1_bb
);
4451 e
= make_edge (l1_bb
, l0_bb
, EDGE_TRUE_VALUE
);
4454 ne
->flags
= EDGE_FALSE_VALUE
;
4455 e
->probability
= profile_probability::guessed_always ().apply_scale (7, 8);
4456 ne
->probability
= e
->probability
.invert ();
4458 set_immediate_dominator (CDI_DOMINATORS
, l1_bb
, entry_bb
);
4459 set_immediate_dominator (CDI_DOMINATORS
, l2_bb
, l2_dom_bb
);
4460 set_immediate_dominator (CDI_DOMINATORS
, l0_bb
, l1_bb
);
4464 struct loop
*loop
= alloc_loop ();
4465 loop
->header
= l1_bb
;
4466 loop
->latch
= cont_bb
;
4467 add_loop (loop
, l1_bb
->loop_father
);
4468 loop
->safelen
= INT_MAX
;
4471 /* Pick the correct library function based on the precision of the
4472 induction variable type. */
4473 tree lib_fun
= NULL_TREE
;
4474 if (TYPE_PRECISION (type
) == 32)
4475 lib_fun
= cilk_for_32_fndecl
;
4476 else if (TYPE_PRECISION (type
) == 64)
4477 lib_fun
= cilk_for_64_fndecl
;
4481 gcc_assert (fd
->sched_kind
== OMP_CLAUSE_SCHEDULE_CILKFOR
);
4483 /* WS_ARGS contains the library function flavor to call:
4484 __libcilkrts_cilk_for_64 or __libcilkrts_cilk_for_32), and the
4485 user-defined grain value. If the user does not define one, then zero
4486 is passed in by the parser. */
4487 vec_alloc (region
->ws_args
, 2);
4488 region
->ws_args
->quick_push (lib_fun
);
4489 region
->ws_args
->quick_push (fd
->chunk_size
);
4492 /* A subroutine of expand_omp_for. Generate code for a simd non-worksharing
4493 loop. Given parameters:
4495 for (V = N1; V cond N2; V += STEP) BODY;
4497 where COND is "<" or ">", we generate pseudocode
4505 if (V cond N2) goto L0; else goto L2;
4508 For collapsed loops, given parameters:
4510 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
4511 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
4512 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
4515 we generate pseudocode
4521 count3 = (adj + N32 - N31) / STEP3;
4526 count2 = (adj + N22 - N21) / STEP2;
4531 count1 = (adj + N12 - N11) / STEP1;
4532 count = count1 * count2 * count3;
4542 V2 += (V3 cond3 N32) ? 0 : STEP2;
4543 V3 = (V3 cond3 N32) ? V3 : N31;
4544 V1 += (V2 cond2 N22) ? 0 : STEP1;
4545 V2 = (V2 cond2 N22) ? V2 : N21;
4547 if (V < count) goto L0; else goto L2;
4553 expand_omp_simd (struct omp_region
*region
, struct omp_for_data
*fd
)
4556 basic_block entry_bb
, cont_bb
, exit_bb
, l0_bb
, l1_bb
, l2_bb
, l2_dom_bb
;
4557 gimple_stmt_iterator gsi
;
4560 bool broken_loop
= region
->cont
== NULL
;
4562 tree
*counts
= NULL
;
4564 int safelen_int
= INT_MAX
;
4565 tree safelen
= omp_find_clause (gimple_omp_for_clauses (fd
->for_stmt
),
4566 OMP_CLAUSE_SAFELEN
);
4567 tree simduid
= omp_find_clause (gimple_omp_for_clauses (fd
->for_stmt
),
4568 OMP_CLAUSE__SIMDUID_
);
4573 safelen
= OMP_CLAUSE_SAFELEN_EXPR (safelen
);
4574 if (TREE_CODE (safelen
) != INTEGER_CST
)
4576 else if (tree_fits_uhwi_p (safelen
) && tree_to_uhwi (safelen
) < INT_MAX
)
4577 safelen_int
= tree_to_uhwi (safelen
);
4578 if (safelen_int
== 1)
4581 type
= TREE_TYPE (fd
->loop
.v
);
4582 entry_bb
= region
->entry
;
4583 cont_bb
= region
->cont
;
4584 gcc_assert (EDGE_COUNT (entry_bb
->succs
) == 2);
4585 gcc_assert (broken_loop
4586 || BRANCH_EDGE (entry_bb
)->dest
== FALLTHRU_EDGE (cont_bb
)->dest
);
4587 l0_bb
= FALLTHRU_EDGE (entry_bb
)->dest
;
4590 gcc_assert (BRANCH_EDGE (cont_bb
)->dest
== l0_bb
);
4591 gcc_assert (EDGE_COUNT (cont_bb
->succs
) == 2);
4592 l1_bb
= split_block (cont_bb
, last_stmt (cont_bb
))->dest
;
4593 l2_bb
= BRANCH_EDGE (entry_bb
)->dest
;
4597 BRANCH_EDGE (entry_bb
)->flags
&= ~EDGE_ABNORMAL
;
4598 l1_bb
= split_edge (BRANCH_EDGE (entry_bb
));
4599 l2_bb
= single_succ (l1_bb
);
4601 exit_bb
= region
->exit
;
4604 gsi
= gsi_last_bb (entry_bb
);
4606 gcc_assert (gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_FOR
);
4607 /* Not needed in SSA form right now. */
4608 gcc_assert (!gimple_in_ssa_p (cfun
));
4609 if (fd
->collapse
> 1)
4611 int first_zero_iter
= -1, dummy
= -1;
4612 basic_block zero_iter_bb
= l2_bb
, dummy_bb
= NULL
;
4614 counts
= XALLOCAVEC (tree
, fd
->collapse
);
4615 expand_omp_for_init_counts (fd
, &gsi
, entry_bb
, counts
,
4616 zero_iter_bb
, first_zero_iter
,
4617 dummy_bb
, dummy
, l2_dom_bb
);
4619 if (l2_dom_bb
== NULL
)
4624 if (gimple_omp_for_combined_into_p (fd
->for_stmt
))
4626 tree innerc
= omp_find_clause (gimple_omp_for_clauses (fd
->for_stmt
),
4627 OMP_CLAUSE__LOOPTEMP_
);
4628 gcc_assert (innerc
);
4629 n1
= OMP_CLAUSE_DECL (innerc
);
4630 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
4631 OMP_CLAUSE__LOOPTEMP_
);
4632 gcc_assert (innerc
);
4633 n2
= OMP_CLAUSE_DECL (innerc
);
4635 tree step
= fd
->loop
.step
;
4637 bool is_simt
= omp_find_clause (gimple_omp_for_clauses (fd
->for_stmt
),
4641 cfun
->curr_properties
&= ~PROP_gimple_lomp_dev
;
4642 is_simt
= safelen_int
> 1;
4644 tree simt_lane
= NULL_TREE
, simt_maxlane
= NULL_TREE
;
4647 simt_lane
= create_tmp_var (unsigned_type_node
);
4648 gimple
*g
= gimple_build_call_internal (IFN_GOMP_SIMT_LANE
, 0);
4649 gimple_call_set_lhs (g
, simt_lane
);
4650 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
4651 tree offset
= fold_build2 (MULT_EXPR
, TREE_TYPE (step
), step
,
4652 fold_convert (TREE_TYPE (step
), simt_lane
));
4653 n1
= fold_convert (type
, n1
);
4654 if (POINTER_TYPE_P (type
))
4655 n1
= fold_build_pointer_plus (n1
, offset
);
4657 n1
= fold_build2 (PLUS_EXPR
, type
, n1
, fold_convert (type
, offset
));
4659 /* Collapsed loops not handled for SIMT yet: limit to one lane only. */
4660 if (fd
->collapse
> 1)
4661 simt_maxlane
= build_one_cst (unsigned_type_node
);
4662 else if (safelen_int
< omp_max_simt_vf ())
4663 simt_maxlane
= build_int_cst (unsigned_type_node
, safelen_int
);
4665 = build_call_expr_internal_loc (UNKNOWN_LOCATION
, IFN_GOMP_SIMT_VF
,
4666 unsigned_type_node
, 0);
4668 vf
= fold_build2 (MIN_EXPR
, unsigned_type_node
, vf
, simt_maxlane
);
4669 vf
= fold_convert (TREE_TYPE (step
), vf
);
4670 step
= fold_build2 (MULT_EXPR
, TREE_TYPE (step
), step
, vf
);
4673 expand_omp_build_assign (&gsi
, fd
->loop
.v
, fold_convert (type
, n1
));
4674 if (fd
->collapse
> 1)
4676 if (gimple_omp_for_combined_into_p (fd
->for_stmt
))
4679 expand_omp_for_init_vars (fd
, &gsi
, counts
, NULL
, n1
);
4683 for (i
= 0; i
< fd
->collapse
; i
++)
4685 tree itype
= TREE_TYPE (fd
->loops
[i
].v
);
4686 if (POINTER_TYPE_P (itype
))
4687 itype
= signed_type_for (itype
);
4688 t
= fold_convert (TREE_TYPE (fd
->loops
[i
].v
), fd
->loops
[i
].n1
);
4689 expand_omp_build_assign (&gsi
, fd
->loops
[i
].v
, t
);
4693 /* Remove the GIMPLE_OMP_FOR statement. */
4694 gsi_remove (&gsi
, true);
4698 /* Code to control the increment goes in the CONT_BB. */
4699 gsi
= gsi_last_bb (cont_bb
);
4700 stmt
= gsi_stmt (gsi
);
4701 gcc_assert (gimple_code (stmt
) == GIMPLE_OMP_CONTINUE
);
4703 if (POINTER_TYPE_P (type
))
4704 t
= fold_build_pointer_plus (fd
->loop
.v
, step
);
4706 t
= fold_build2 (PLUS_EXPR
, type
, fd
->loop
.v
, step
);
4707 expand_omp_build_assign (&gsi
, fd
->loop
.v
, t
);
4709 if (fd
->collapse
> 1)
4711 i
= fd
->collapse
- 1;
4712 if (POINTER_TYPE_P (TREE_TYPE (fd
->loops
[i
].v
)))
4714 t
= fold_convert (sizetype
, fd
->loops
[i
].step
);
4715 t
= fold_build_pointer_plus (fd
->loops
[i
].v
, t
);
4719 t
= fold_convert (TREE_TYPE (fd
->loops
[i
].v
),
4721 t
= fold_build2 (PLUS_EXPR
, TREE_TYPE (fd
->loops
[i
].v
),
4724 expand_omp_build_assign (&gsi
, fd
->loops
[i
].v
, t
);
4726 for (i
= fd
->collapse
- 1; i
> 0; i
--)
4728 tree itype
= TREE_TYPE (fd
->loops
[i
].v
);
4729 tree itype2
= TREE_TYPE (fd
->loops
[i
- 1].v
);
4730 if (POINTER_TYPE_P (itype2
))
4731 itype2
= signed_type_for (itype2
);
4732 t
= build3 (COND_EXPR
, itype2
,
4733 build2 (fd
->loops
[i
].cond_code
, boolean_type_node
,
4735 fold_convert (itype
, fd
->loops
[i
].n2
)),
4736 build_int_cst (itype2
, 0),
4737 fold_convert (itype2
, fd
->loops
[i
- 1].step
));
4738 if (POINTER_TYPE_P (TREE_TYPE (fd
->loops
[i
- 1].v
)))
4739 t
= fold_build_pointer_plus (fd
->loops
[i
- 1].v
, t
);
4741 t
= fold_build2 (PLUS_EXPR
, itype2
, fd
->loops
[i
- 1].v
, t
);
4742 expand_omp_build_assign (&gsi
, fd
->loops
[i
- 1].v
, t
);
4744 t
= build3 (COND_EXPR
, itype
,
4745 build2 (fd
->loops
[i
].cond_code
, boolean_type_node
,
4747 fold_convert (itype
, fd
->loops
[i
].n2
)),
4749 fold_convert (itype
, fd
->loops
[i
].n1
));
4750 expand_omp_build_assign (&gsi
, fd
->loops
[i
].v
, t
);
4754 /* Remove GIMPLE_OMP_CONTINUE. */
4755 gsi_remove (&gsi
, true);
4758 /* Emit the condition in L1_BB. */
4759 gsi
= gsi_start_bb (l1_bb
);
4761 t
= fold_convert (type
, n2
);
4762 t
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
4763 false, GSI_CONTINUE_LINKING
);
4764 tree v
= fd
->loop
.v
;
4765 if (DECL_P (v
) && TREE_ADDRESSABLE (v
))
4766 v
= force_gimple_operand_gsi (&gsi
, v
, true, NULL_TREE
,
4767 false, GSI_CONTINUE_LINKING
);
4768 t
= build2 (fd
->loop
.cond_code
, boolean_type_node
, v
, t
);
4769 cond_stmt
= gimple_build_cond_empty (t
);
4770 gsi_insert_after (&gsi
, cond_stmt
, GSI_CONTINUE_LINKING
);
4771 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt
), expand_omp_regimplify_p
,
4773 || walk_tree (gimple_cond_rhs_ptr (cond_stmt
), expand_omp_regimplify_p
,
4776 gsi
= gsi_for_stmt (cond_stmt
);
4777 gimple_regimplify_operands (cond_stmt
, &gsi
);
4780 /* Add 'V -= STEP * (SIMT_VF - 1)' after the loop. */
4783 gsi
= gsi_start_bb (l2_bb
);
4784 step
= fold_build2 (MINUS_EXPR
, TREE_TYPE (step
), fd
->loop
.step
, step
);
4785 if (POINTER_TYPE_P (type
))
4786 t
= fold_build_pointer_plus (fd
->loop
.v
, step
);
4788 t
= fold_build2 (PLUS_EXPR
, type
, fd
->loop
.v
, step
);
4789 expand_omp_build_assign (&gsi
, fd
->loop
.v
, t
);
4792 /* Remove GIMPLE_OMP_RETURN. */
4793 gsi
= gsi_last_bb (exit_bb
);
4794 gsi_remove (&gsi
, true);
4796 /* Connect the new blocks. */
4797 remove_edge (FALLTHRU_EDGE (entry_bb
));
4801 remove_edge (BRANCH_EDGE (entry_bb
));
4802 make_edge (entry_bb
, l1_bb
, EDGE_FALLTHRU
);
4804 e
= BRANCH_EDGE (l1_bb
);
4805 ne
= FALLTHRU_EDGE (l1_bb
);
4806 e
->flags
= EDGE_TRUE_VALUE
;
4810 single_succ_edge (entry_bb
)->flags
= EDGE_FALLTHRU
;
4812 ne
= single_succ_edge (l1_bb
);
4813 e
= make_edge (l1_bb
, l0_bb
, EDGE_TRUE_VALUE
);
4816 ne
->flags
= EDGE_FALSE_VALUE
;
4817 e
->probability
= profile_probability::guessed_always ().apply_scale (7, 8);
4818 ne
->probability
= e
->probability
.invert ();
4820 set_immediate_dominator (CDI_DOMINATORS
, l1_bb
, entry_bb
);
4821 set_immediate_dominator (CDI_DOMINATORS
, l0_bb
, l1_bb
);
4825 cond_stmt
= gimple_build_cond (LT_EXPR
, simt_lane
, simt_maxlane
,
4826 NULL_TREE
, NULL_TREE
);
4827 gsi
= gsi_last_bb (entry_bb
);
4828 gsi_insert_after (&gsi
, cond_stmt
, GSI_NEW_STMT
);
4829 make_edge (entry_bb
, l2_bb
, EDGE_FALSE_VALUE
);
4830 FALLTHRU_EDGE (entry_bb
)->flags
= EDGE_TRUE_VALUE
;
4831 FALLTHRU_EDGE (entry_bb
)->probability
4832 = profile_probability::guessed_always ().apply_scale (7, 8);
4833 BRANCH_EDGE (entry_bb
)->probability
4834 = FALLTHRU_EDGE (entry_bb
)->probability
.invert ();
4835 l2_dom_bb
= entry_bb
;
4837 set_immediate_dominator (CDI_DOMINATORS
, l2_bb
, l2_dom_bb
);
4841 struct loop
*loop
= alloc_loop ();
4842 loop
->header
= l1_bb
;
4843 loop
->latch
= cont_bb
;
4844 add_loop (loop
, l1_bb
->loop_father
);
4845 loop
->safelen
= safelen_int
;
4848 loop
->simduid
= OMP_CLAUSE__SIMDUID__DECL (simduid
);
4849 cfun
->has_simduid_loops
= true;
4851 /* If not -fno-tree-loop-vectorize, hint that we want to vectorize
4853 if ((flag_tree_loop_vectorize
4854 || !global_options_set
.x_flag_tree_loop_vectorize
)
4855 && flag_tree_loop_optimize
4856 && loop
->safelen
> 1)
4858 loop
->force_vectorize
= true;
4859 cfun
->has_force_vectorize_loops
= true;
4863 cfun
->has_simduid_loops
= true;
4866 /* Taskloop construct is represented after gimplification with
4867 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
4868 in between them. This routine expands the outer GIMPLE_OMP_FOR,
4869 which should just compute all the needed loop temporaries
4870 for GIMPLE_OMP_TASK. */
4873 expand_omp_taskloop_for_outer (struct omp_region
*region
,
4874 struct omp_for_data
*fd
,
4877 tree type
, bias
= NULL_TREE
;
4878 basic_block entry_bb
, cont_bb
, exit_bb
;
4879 gimple_stmt_iterator gsi
;
4880 gassign
*assign_stmt
;
4881 tree
*counts
= NULL
;
4884 gcc_assert (inner_stmt
);
4885 gcc_assert (region
->cont
);
4886 gcc_assert (gimple_code (inner_stmt
) == GIMPLE_OMP_TASK
4887 && gimple_omp_task_taskloop_p (inner_stmt
));
4888 type
= TREE_TYPE (fd
->loop
.v
);
4890 /* See if we need to bias by LLONG_MIN. */
4891 if (fd
->iter_type
== long_long_unsigned_type_node
4892 && TREE_CODE (type
) == INTEGER_TYPE
4893 && !TYPE_UNSIGNED (type
))
4897 if (fd
->loop
.cond_code
== LT_EXPR
)
4900 n2
= fold_build2 (PLUS_EXPR
, type
, fd
->loop
.n2
, fd
->loop
.step
);
4904 n1
= fold_build2 (MINUS_EXPR
, type
, fd
->loop
.n2
, fd
->loop
.step
);
4907 if (TREE_CODE (n1
) != INTEGER_CST
4908 || TREE_CODE (n2
) != INTEGER_CST
4909 || ((tree_int_cst_sgn (n1
) < 0) ^ (tree_int_cst_sgn (n2
) < 0)))
4910 bias
= fold_convert (fd
->iter_type
, TYPE_MIN_VALUE (type
));
4913 entry_bb
= region
->entry
;
4914 cont_bb
= region
->cont
;
4915 gcc_assert (EDGE_COUNT (entry_bb
->succs
) == 2);
4916 gcc_assert (BRANCH_EDGE (entry_bb
)->dest
== FALLTHRU_EDGE (cont_bb
)->dest
);
4917 exit_bb
= region
->exit
;
4919 gsi
= gsi_last_bb (entry_bb
);
4920 gimple
*for_stmt
= gsi_stmt (gsi
);
4921 gcc_assert (gimple_code (for_stmt
) == GIMPLE_OMP_FOR
);
4922 if (fd
->collapse
> 1)
4924 int first_zero_iter
= -1, dummy
= -1;
4925 basic_block zero_iter_bb
= NULL
, dummy_bb
= NULL
, l2_dom_bb
= NULL
;
4927 counts
= XALLOCAVEC (tree
, fd
->collapse
);
4928 expand_omp_for_init_counts (fd
, &gsi
, entry_bb
, counts
,
4929 zero_iter_bb
, first_zero_iter
,
4930 dummy_bb
, dummy
, l2_dom_bb
);
4934 /* Some counts[i] vars might be uninitialized if
4935 some loop has zero iterations. But the body shouldn't
4936 be executed in that case, so just avoid uninit warnings. */
4937 for (i
= first_zero_iter
; i
< fd
->collapse
; i
++)
4938 if (SSA_VAR_P (counts
[i
]))
4939 TREE_NO_WARNING (counts
[i
]) = 1;
4941 edge e
= split_block (entry_bb
, gsi_stmt (gsi
));
4943 make_edge (zero_iter_bb
, entry_bb
, EDGE_FALLTHRU
);
4944 gsi
= gsi_last_bb (entry_bb
);
4945 set_immediate_dominator (CDI_DOMINATORS
, entry_bb
,
4946 get_immediate_dominator (CDI_DOMINATORS
,
4954 if (POINTER_TYPE_P (TREE_TYPE (t0
))
4955 && TYPE_PRECISION (TREE_TYPE (t0
))
4956 != TYPE_PRECISION (fd
->iter_type
))
4958 /* Avoid casting pointers to integer of a different size. */
4959 tree itype
= signed_type_for (type
);
4960 t1
= fold_convert (fd
->iter_type
, fold_convert (itype
, t1
));
4961 t0
= fold_convert (fd
->iter_type
, fold_convert (itype
, t0
));
4965 t1
= fold_convert (fd
->iter_type
, t1
);
4966 t0
= fold_convert (fd
->iter_type
, t0
);
4970 t1
= fold_build2 (PLUS_EXPR
, fd
->iter_type
, t1
, bias
);
4971 t0
= fold_build2 (PLUS_EXPR
, fd
->iter_type
, t0
, bias
);
4974 tree innerc
= omp_find_clause (gimple_omp_task_clauses (inner_stmt
),
4975 OMP_CLAUSE__LOOPTEMP_
);
4976 gcc_assert (innerc
);
4977 tree startvar
= OMP_CLAUSE_DECL (innerc
);
4978 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
), OMP_CLAUSE__LOOPTEMP_
);
4979 gcc_assert (innerc
);
4980 tree endvar
= OMP_CLAUSE_DECL (innerc
);
4981 if (fd
->collapse
> 1 && TREE_CODE (fd
->loop
.n2
) != INTEGER_CST
)
4983 gcc_assert (innerc
);
4984 for (i
= 1; i
< fd
->collapse
; i
++)
4986 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
4987 OMP_CLAUSE__LOOPTEMP_
);
4988 gcc_assert (innerc
);
4990 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
4991 OMP_CLAUSE__LOOPTEMP_
);
4994 /* If needed (inner taskloop has lastprivate clause), propagate
4995 down the total number of iterations. */
4996 tree t
= force_gimple_operand_gsi (&gsi
, fd
->loop
.n2
, false,
4998 GSI_CONTINUE_LINKING
);
4999 assign_stmt
= gimple_build_assign (OMP_CLAUSE_DECL (innerc
), t
);
5000 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
5004 t0
= force_gimple_operand_gsi (&gsi
, t0
, false, NULL_TREE
, false,
5005 GSI_CONTINUE_LINKING
);
5006 assign_stmt
= gimple_build_assign (startvar
, t0
);
5007 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
5009 t1
= force_gimple_operand_gsi (&gsi
, t1
, false, NULL_TREE
, false,
5010 GSI_CONTINUE_LINKING
);
5011 assign_stmt
= gimple_build_assign (endvar
, t1
);
5012 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
5013 if (fd
->collapse
> 1)
5014 expand_omp_for_init_vars (fd
, &gsi
, counts
, inner_stmt
, startvar
);
5016 /* Remove the GIMPLE_OMP_FOR statement. */
5017 gsi
= gsi_for_stmt (for_stmt
);
5018 gsi_remove (&gsi
, true);
5020 gsi
= gsi_last_bb (cont_bb
);
5021 gsi_remove (&gsi
, true);
5023 gsi
= gsi_last_bb (exit_bb
);
5024 gsi_remove (&gsi
, true);
5026 FALLTHRU_EDGE (entry_bb
)->probability
= profile_probability::always ();
5027 remove_edge (BRANCH_EDGE (entry_bb
));
5028 FALLTHRU_EDGE (cont_bb
)->probability
= profile_probability::always ();
5029 remove_edge (BRANCH_EDGE (cont_bb
));
5030 set_immediate_dominator (CDI_DOMINATORS
, exit_bb
, cont_bb
);
5031 set_immediate_dominator (CDI_DOMINATORS
, region
->entry
,
5032 recompute_dominator (CDI_DOMINATORS
, region
->entry
));
5035 /* Taskloop construct is represented after gimplification with
5036 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
5037 in between them. This routine expands the inner GIMPLE_OMP_FOR.
5038 GOMP_taskloop{,_ull} function arranges for each task to be given just
5039 a single range of iterations. */
5042 expand_omp_taskloop_for_inner (struct omp_region
*region
,
5043 struct omp_for_data
*fd
,
5046 tree e
, t
, type
, itype
, vmain
, vback
, bias
= NULL_TREE
;
5047 basic_block entry_bb
, exit_bb
, body_bb
, cont_bb
, collapse_bb
= NULL
;
5049 gimple_stmt_iterator gsi
;
5051 bool broken_loop
= region
->cont
== NULL
;
5052 tree
*counts
= NULL
;
5055 itype
= type
= TREE_TYPE (fd
->loop
.v
);
5056 if (POINTER_TYPE_P (type
))
5057 itype
= signed_type_for (type
);
5059 /* See if we need to bias by LLONG_MIN. */
5060 if (fd
->iter_type
== long_long_unsigned_type_node
5061 && TREE_CODE (type
) == INTEGER_TYPE
5062 && !TYPE_UNSIGNED (type
))
5066 if (fd
->loop
.cond_code
== LT_EXPR
)
5069 n2
= fold_build2 (PLUS_EXPR
, type
, fd
->loop
.n2
, fd
->loop
.step
);
5073 n1
= fold_build2 (MINUS_EXPR
, type
, fd
->loop
.n2
, fd
->loop
.step
);
5076 if (TREE_CODE (n1
) != INTEGER_CST
5077 || TREE_CODE (n2
) != INTEGER_CST
5078 || ((tree_int_cst_sgn (n1
) < 0) ^ (tree_int_cst_sgn (n2
) < 0)))
5079 bias
= fold_convert (fd
->iter_type
, TYPE_MIN_VALUE (type
));
5082 entry_bb
= region
->entry
;
5083 cont_bb
= region
->cont
;
5084 gcc_assert (EDGE_COUNT (entry_bb
->succs
) == 2);
5085 fin_bb
= BRANCH_EDGE (entry_bb
)->dest
;
5086 gcc_assert (broken_loop
5087 || (fin_bb
== FALLTHRU_EDGE (cont_bb
)->dest
));
5088 body_bb
= FALLTHRU_EDGE (entry_bb
)->dest
;
5091 gcc_assert (BRANCH_EDGE (cont_bb
)->dest
== body_bb
);
5092 gcc_assert (EDGE_COUNT (cont_bb
->succs
) == 2);
5094 exit_bb
= region
->exit
;
5096 /* Iteration space partitioning goes in ENTRY_BB. */
5097 gsi
= gsi_last_bb (entry_bb
);
5098 gcc_assert (gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_FOR
);
5100 if (fd
->collapse
> 1)
5102 int first_zero_iter
= -1, dummy
= -1;
5103 basic_block l2_dom_bb
= NULL
, dummy_bb
= NULL
;
5105 counts
= XALLOCAVEC (tree
, fd
->collapse
);
5106 expand_omp_for_init_counts (fd
, &gsi
, entry_bb
, counts
,
5107 fin_bb
, first_zero_iter
,
5108 dummy_bb
, dummy
, l2_dom_bb
);
5112 t
= integer_one_node
;
5114 step
= fd
->loop
.step
;
5115 tree innerc
= omp_find_clause (gimple_omp_for_clauses (fd
->for_stmt
),
5116 OMP_CLAUSE__LOOPTEMP_
);
5117 gcc_assert (innerc
);
5118 n1
= OMP_CLAUSE_DECL (innerc
);
5119 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
), OMP_CLAUSE__LOOPTEMP_
);
5120 gcc_assert (innerc
);
5121 n2
= OMP_CLAUSE_DECL (innerc
);
5124 n1
= fold_build2 (PLUS_EXPR
, fd
->iter_type
, n1
, bias
);
5125 n2
= fold_build2 (PLUS_EXPR
, fd
->iter_type
, n2
, bias
);
5127 n1
= force_gimple_operand_gsi (&gsi
, fold_convert (type
, n1
),
5128 true, NULL_TREE
, true, GSI_SAME_STMT
);
5129 n2
= force_gimple_operand_gsi (&gsi
, fold_convert (itype
, n2
),
5130 true, NULL_TREE
, true, GSI_SAME_STMT
);
5131 step
= force_gimple_operand_gsi (&gsi
, fold_convert (itype
, step
),
5132 true, NULL_TREE
, true, GSI_SAME_STMT
);
5134 tree startvar
= fd
->loop
.v
;
5135 tree endvar
= NULL_TREE
;
5137 if (gimple_omp_for_combined_p (fd
->for_stmt
))
5139 tree clauses
= gimple_omp_for_clauses (inner_stmt
);
5140 tree innerc
= omp_find_clause (clauses
, OMP_CLAUSE__LOOPTEMP_
);
5141 gcc_assert (innerc
);
5142 startvar
= OMP_CLAUSE_DECL (innerc
);
5143 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
5144 OMP_CLAUSE__LOOPTEMP_
);
5145 gcc_assert (innerc
);
5146 endvar
= OMP_CLAUSE_DECL (innerc
);
5148 t
= fold_convert (TREE_TYPE (startvar
), n1
);
5149 t
= force_gimple_operand_gsi (&gsi
, t
,
5151 && TREE_ADDRESSABLE (startvar
),
5152 NULL_TREE
, false, GSI_CONTINUE_LINKING
);
5153 gimple
*assign_stmt
= gimple_build_assign (startvar
, t
);
5154 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
5156 t
= fold_convert (TREE_TYPE (startvar
), n2
);
5157 e
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
5158 false, GSI_CONTINUE_LINKING
);
5161 assign_stmt
= gimple_build_assign (endvar
, e
);
5162 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
5163 if (useless_type_conversion_p (TREE_TYPE (fd
->loop
.v
), TREE_TYPE (e
)))
5164 assign_stmt
= gimple_build_assign (fd
->loop
.v
, e
);
5166 assign_stmt
= gimple_build_assign (fd
->loop
.v
, NOP_EXPR
, e
);
5167 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
5169 if (fd
->collapse
> 1)
5170 expand_omp_for_init_vars (fd
, &gsi
, counts
, inner_stmt
, startvar
);
5174 /* The code controlling the sequential loop replaces the
5175 GIMPLE_OMP_CONTINUE. */
5176 gsi
= gsi_last_bb (cont_bb
);
5177 gomp_continue
*cont_stmt
= as_a
<gomp_continue
*> (gsi_stmt (gsi
));
5178 gcc_assert (gimple_code (cont_stmt
) == GIMPLE_OMP_CONTINUE
);
5179 vmain
= gimple_omp_continue_control_use (cont_stmt
);
5180 vback
= gimple_omp_continue_control_def (cont_stmt
);
5182 if (!gimple_omp_for_combined_p (fd
->for_stmt
))
5184 if (POINTER_TYPE_P (type
))
5185 t
= fold_build_pointer_plus (vmain
, step
);
5187 t
= fold_build2 (PLUS_EXPR
, type
, vmain
, step
);
5188 t
= force_gimple_operand_gsi (&gsi
, t
,
5190 && TREE_ADDRESSABLE (vback
),
5191 NULL_TREE
, true, GSI_SAME_STMT
);
5192 assign_stmt
= gimple_build_assign (vback
, t
);
5193 gsi_insert_before (&gsi
, assign_stmt
, GSI_SAME_STMT
);
5195 t
= build2 (fd
->loop
.cond_code
, boolean_type_node
,
5196 DECL_P (vback
) && TREE_ADDRESSABLE (vback
)
5198 gsi_insert_before (&gsi
, gimple_build_cond_empty (t
), GSI_SAME_STMT
);
5201 /* Remove the GIMPLE_OMP_CONTINUE statement. */
5202 gsi_remove (&gsi
, true);
5204 if (fd
->collapse
> 1 && !gimple_omp_for_combined_p (fd
->for_stmt
))
5205 collapse_bb
= extract_omp_for_update_vars (fd
, cont_bb
, body_bb
);
5208 /* Remove the GIMPLE_OMP_FOR statement. */
5209 gsi
= gsi_for_stmt (fd
->for_stmt
);
5210 gsi_remove (&gsi
, true);
5212 /* Remove the GIMPLE_OMP_RETURN statement. */
5213 gsi
= gsi_last_bb (exit_bb
);
5214 gsi_remove (&gsi
, true);
5216 FALLTHRU_EDGE (entry_bb
)->probability
= profile_probability::always ();
5218 remove_edge (BRANCH_EDGE (entry_bb
));
5221 remove_edge_and_dominated_blocks (BRANCH_EDGE (entry_bb
));
5222 region
->outer
->cont
= NULL
;
5225 /* Connect all the blocks. */
5228 ep
= find_edge (cont_bb
, body_bb
);
5229 if (gimple_omp_for_combined_p (fd
->for_stmt
))
5234 else if (fd
->collapse
> 1)
5237 ep
= make_edge (cont_bb
, collapse_bb
, EDGE_TRUE_VALUE
);
5240 ep
->flags
= EDGE_TRUE_VALUE
;
5241 find_edge (cont_bb
, fin_bb
)->flags
5242 = ep
? EDGE_FALSE_VALUE
: EDGE_FALLTHRU
;
5245 set_immediate_dominator (CDI_DOMINATORS
, body_bb
,
5246 recompute_dominator (CDI_DOMINATORS
, body_bb
));
5248 set_immediate_dominator (CDI_DOMINATORS
, fin_bb
,
5249 recompute_dominator (CDI_DOMINATORS
, fin_bb
));
5251 if (!broken_loop
&& !gimple_omp_for_combined_p (fd
->for_stmt
))
5253 struct loop
*loop
= alloc_loop ();
5254 loop
->header
= body_bb
;
5255 if (collapse_bb
== NULL
)
5256 loop
->latch
= cont_bb
;
5257 add_loop (loop
, body_bb
->loop_father
);
5261 /* A subroutine of expand_omp_for. Generate code for an OpenACC
5262 partitioned loop. The lowering here is abstracted, in that the
5263 loop parameters are passed through internal functions, which are
5264 further lowered by oacc_device_lower, once we get to the target
5265 compiler. The loop is of the form:
5267 for (V = B; V LTGT E; V += S) {BODY}
5269 where LTGT is < or >. We may have a specified chunking size, CHUNKING
5270 (constant 0 for no chunking) and we will have a GWV partitioning
5271 mask, specifying dimensions over which the loop is to be
5272 partitioned (see note below). We generate code that looks like
5273 (this ignores tiling):
5275 <entry_bb> [incoming FALL->body, BRANCH->exit]
5276 typedef signedintify (typeof (V)) T; // underlying signed integral type
5279 T DIR = LTGT == '<' ? +1 : -1;
5280 T chunk_max = GOACC_LOOP_CHUNK (dir, range, S, CHUNK_SIZE, GWV);
5281 T step = GOACC_LOOP_STEP (dir, range, S, CHUNK_SIZE, GWV);
5283 <head_bb> [created by splitting end of entry_bb]
5284 T offset = GOACC_LOOP_OFFSET (dir, range, S, CHUNK_SIZE, GWV, chunk_no);
5285 T bound = GOACC_LOOP_BOUND (dir, range, S, CHUNK_SIZE, GWV, offset);
5286 if (!(offset LTGT bound)) goto bottom_bb;
5288 <body_bb> [incoming]
5292 <cont_bb> [incoming, may == body_bb FALL->exit_bb, BRANCH->body_bb]
5294 if (offset LTGT bound) goto body_bb; [*]
5296 <bottom_bb> [created by splitting start of exit_bb] insert BRANCH->head_bb
5298 if (chunk < chunk_max) goto head_bb;
5300 <exit_bb> [incoming]
5301 V = B + ((range -/+ 1) / S +/- 1) * S [*]
5303 [*] Needed if V live at end of loop. */
5306 expand_oacc_for (struct omp_region
*region
, struct omp_for_data
*fd
)
5308 tree v
= fd
->loop
.v
;
5309 enum tree_code cond_code
= fd
->loop
.cond_code
;
5310 enum tree_code plus_code
= PLUS_EXPR
;
5312 tree chunk_size
= integer_minus_one_node
;
5313 tree gwv
= integer_zero_node
;
5314 tree iter_type
= TREE_TYPE (v
);
5315 tree diff_type
= iter_type
;
5316 tree plus_type
= iter_type
;
5317 struct oacc_collapse
*counts
= NULL
;
5319 gcc_checking_assert (gimple_omp_for_kind (fd
->for_stmt
)
5320 == GF_OMP_FOR_KIND_OACC_LOOP
);
5321 gcc_assert (!gimple_omp_for_combined_into_p (fd
->for_stmt
));
5322 gcc_assert (cond_code
== LT_EXPR
|| cond_code
== GT_EXPR
);
5324 if (POINTER_TYPE_P (iter_type
))
5326 plus_code
= POINTER_PLUS_EXPR
;
5327 plus_type
= sizetype
;
5329 if (POINTER_TYPE_P (diff_type
) || TYPE_UNSIGNED (diff_type
))
5330 diff_type
= signed_type_for (diff_type
);
5332 basic_block entry_bb
= region
->entry
; /* BB ending in OMP_FOR */
5333 basic_block exit_bb
= region
->exit
; /* BB ending in OMP_RETURN */
5334 basic_block cont_bb
= region
->cont
; /* BB ending in OMP_CONTINUE */
5335 basic_block bottom_bb
= NULL
;
5337 /* entry_bb has two sucessors; the branch edge is to the exit
5338 block, fallthrough edge to body. */
5339 gcc_assert (EDGE_COUNT (entry_bb
->succs
) == 2
5340 && BRANCH_EDGE (entry_bb
)->dest
== exit_bb
);
5342 /* If cont_bb non-NULL, it has 2 successors. The branch successor is
5343 body_bb, or to a block whose only successor is the body_bb. Its
5344 fallthrough successor is the final block (same as the branch
5345 successor of the entry_bb). */
5348 basic_block body_bb
= FALLTHRU_EDGE (entry_bb
)->dest
;
5349 basic_block bed
= BRANCH_EDGE (cont_bb
)->dest
;
5351 gcc_assert (FALLTHRU_EDGE (cont_bb
)->dest
== exit_bb
);
5352 gcc_assert (bed
== body_bb
|| single_succ_edge (bed
)->dest
== body_bb
);
5355 gcc_assert (!gimple_in_ssa_p (cfun
));
5357 /* The exit block only has entry_bb and cont_bb as predecessors. */
5358 gcc_assert (EDGE_COUNT (exit_bb
->preds
) == 1 + (cont_bb
!= NULL
));
5361 tree chunk_max
= NULL_TREE
;
5363 tree step
= create_tmp_var (diff_type
, ".step");
5364 bool up
= cond_code
== LT_EXPR
;
5365 tree dir
= build_int_cst (diff_type
, up
? +1 : -1);
5366 bool chunking
= !gimple_in_ssa_p (cfun
);
5370 tree tile_size
= NULL_TREE
;
5371 tree element_s
= NULL_TREE
;
5372 tree e_bound
= NULL_TREE
, e_offset
= NULL_TREE
, e_step
= NULL_TREE
;
5373 basic_block elem_body_bb
= NULL
;
5374 basic_block elem_cont_bb
= NULL
;
5376 /* SSA instances. */
5377 tree offset_incr
= NULL_TREE
;
5378 tree offset_init
= NULL_TREE
;
5380 gimple_stmt_iterator gsi
;
5386 edge split
, be
, fte
;
5388 /* Split the end of entry_bb to create head_bb. */
5389 split
= split_block (entry_bb
, last_stmt (entry_bb
));
5390 basic_block head_bb
= split
->dest
;
5391 entry_bb
= split
->src
;
5393 /* Chunk setup goes at end of entry_bb, replacing the omp_for. */
5394 gsi
= gsi_last_bb (entry_bb
);
5395 gomp_for
*for_stmt
= as_a
<gomp_for
*> (gsi_stmt (gsi
));
5396 loc
= gimple_location (for_stmt
);
5398 if (gimple_in_ssa_p (cfun
))
5400 offset_init
= gimple_omp_for_index (for_stmt
, 0);
5401 gcc_assert (integer_zerop (fd
->loop
.n1
));
5402 /* The SSA parallelizer does gang parallelism. */
5403 gwv
= build_int_cst (integer_type_node
, GOMP_DIM_MASK (GOMP_DIM_GANG
));
5406 if (fd
->collapse
> 1 || fd
->tiling
)
5408 gcc_assert (!gimple_in_ssa_p (cfun
) && up
);
5409 counts
= XALLOCAVEC (struct oacc_collapse
, fd
->collapse
);
5410 tree total
= expand_oacc_collapse_init (fd
, &gsi
, counts
,
5411 TREE_TYPE (fd
->loop
.n2
), loc
);
5413 if (SSA_VAR_P (fd
->loop
.n2
))
5415 total
= force_gimple_operand_gsi (&gsi
, total
, false, NULL_TREE
,
5416 true, GSI_SAME_STMT
);
5417 ass
= gimple_build_assign (fd
->loop
.n2
, total
);
5418 gsi_insert_before (&gsi
, ass
, GSI_SAME_STMT
);
5422 tree b
= fd
->loop
.n1
;
5423 tree e
= fd
->loop
.n2
;
5424 tree s
= fd
->loop
.step
;
5426 b
= force_gimple_operand_gsi (&gsi
, b
, true, NULL_TREE
, true, GSI_SAME_STMT
);
5427 e
= force_gimple_operand_gsi (&gsi
, e
, true, NULL_TREE
, true, GSI_SAME_STMT
);
5429 /* Convert the step, avoiding possible unsigned->signed overflow. */
5430 negating
= !up
&& TYPE_UNSIGNED (TREE_TYPE (s
));
5432 s
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (s
), s
);
5433 s
= fold_convert (diff_type
, s
);
5435 s
= fold_build1 (NEGATE_EXPR
, diff_type
, s
);
5436 s
= force_gimple_operand_gsi (&gsi
, s
, true, NULL_TREE
, true, GSI_SAME_STMT
);
5439 chunk_size
= integer_zero_node
;
5440 expr
= fold_convert (diff_type
, chunk_size
);
5441 chunk_size
= force_gimple_operand_gsi (&gsi
, expr
, true,
5442 NULL_TREE
, true, GSI_SAME_STMT
);
5446 /* Determine the tile size and element step,
5447 modify the outer loop step size. */
5448 tile_size
= create_tmp_var (diff_type
, ".tile_size");
5449 expr
= build_int_cst (diff_type
, 1);
5450 for (int ix
= 0; ix
< fd
->collapse
; ix
++)
5451 expr
= fold_build2 (MULT_EXPR
, diff_type
, counts
[ix
].tile
, expr
);
5452 expr
= force_gimple_operand_gsi (&gsi
, expr
, true,
5453 NULL_TREE
, true, GSI_SAME_STMT
);
5454 ass
= gimple_build_assign (tile_size
, expr
);
5455 gsi_insert_before (&gsi
, ass
, GSI_SAME_STMT
);
5457 element_s
= create_tmp_var (diff_type
, ".element_s");
5458 ass
= gimple_build_assign (element_s
, s
);
5459 gsi_insert_before (&gsi
, ass
, GSI_SAME_STMT
);
5461 expr
= fold_build2 (MULT_EXPR
, diff_type
, s
, tile_size
);
5462 s
= force_gimple_operand_gsi (&gsi
, expr
, true,
5463 NULL_TREE
, true, GSI_SAME_STMT
);
5466 /* Determine the range, avoiding possible unsigned->signed overflow. */
5467 negating
= !up
&& TYPE_UNSIGNED (iter_type
);
5468 expr
= fold_build2 (MINUS_EXPR
, plus_type
,
5469 fold_convert (plus_type
, negating
? b
: e
),
5470 fold_convert (plus_type
, negating
? e
: b
));
5471 expr
= fold_convert (diff_type
, expr
);
5473 expr
= fold_build1 (NEGATE_EXPR
, diff_type
, expr
);
5474 tree range
= force_gimple_operand_gsi (&gsi
, expr
, true,
5475 NULL_TREE
, true, GSI_SAME_STMT
);
5477 chunk_no
= build_int_cst (diff_type
, 0);
5480 gcc_assert (!gimple_in_ssa_p (cfun
));
5483 chunk_max
= create_tmp_var (diff_type
, ".chunk_max");
5484 chunk_no
= create_tmp_var (diff_type
, ".chunk_no");
5486 ass
= gimple_build_assign (chunk_no
, expr
);
5487 gsi_insert_before (&gsi
, ass
, GSI_SAME_STMT
);
5489 call
= gimple_build_call_internal (IFN_GOACC_LOOP
, 6,
5490 build_int_cst (integer_type_node
,
5491 IFN_GOACC_LOOP_CHUNKS
),
5492 dir
, range
, s
, chunk_size
, gwv
);
5493 gimple_call_set_lhs (call
, chunk_max
);
5494 gimple_set_location (call
, loc
);
5495 gsi_insert_before (&gsi
, call
, GSI_SAME_STMT
);
5498 chunk_size
= chunk_no
;
5500 call
= gimple_build_call_internal (IFN_GOACC_LOOP
, 6,
5501 build_int_cst (integer_type_node
,
5502 IFN_GOACC_LOOP_STEP
),
5503 dir
, range
, s
, chunk_size
, gwv
);
5504 gimple_call_set_lhs (call
, step
);
5505 gimple_set_location (call
, loc
);
5506 gsi_insert_before (&gsi
, call
, GSI_SAME_STMT
);
5508 /* Remove the GIMPLE_OMP_FOR. */
5509 gsi_remove (&gsi
, true);
5511 /* Fixup edges from head_bb. */
5512 be
= BRANCH_EDGE (head_bb
);
5513 fte
= FALLTHRU_EDGE (head_bb
);
5514 be
->flags
|= EDGE_FALSE_VALUE
;
5515 fte
->flags
^= EDGE_FALLTHRU
| EDGE_TRUE_VALUE
;
5517 basic_block body_bb
= fte
->dest
;
5519 if (gimple_in_ssa_p (cfun
))
5521 gsi
= gsi_last_bb (cont_bb
);
5522 gomp_continue
*cont_stmt
= as_a
<gomp_continue
*> (gsi_stmt (gsi
));
5524 offset
= gimple_omp_continue_control_use (cont_stmt
);
5525 offset_incr
= gimple_omp_continue_control_def (cont_stmt
);
5529 offset
= create_tmp_var (diff_type
, ".offset");
5530 offset_init
= offset_incr
= offset
;
5532 bound
= create_tmp_var (TREE_TYPE (offset
), ".bound");
5534 /* Loop offset & bound go into head_bb. */
5535 gsi
= gsi_start_bb (head_bb
);
5537 call
= gimple_build_call_internal (IFN_GOACC_LOOP
, 7,
5538 build_int_cst (integer_type_node
,
5539 IFN_GOACC_LOOP_OFFSET
),
5541 chunk_size
, gwv
, chunk_no
);
5542 gimple_call_set_lhs (call
, offset_init
);
5543 gimple_set_location (call
, loc
);
5544 gsi_insert_after (&gsi
, call
, GSI_CONTINUE_LINKING
);
5546 call
= gimple_build_call_internal (IFN_GOACC_LOOP
, 7,
5547 build_int_cst (integer_type_node
,
5548 IFN_GOACC_LOOP_BOUND
),
5550 chunk_size
, gwv
, offset_init
);
5551 gimple_call_set_lhs (call
, bound
);
5552 gimple_set_location (call
, loc
);
5553 gsi_insert_after (&gsi
, call
, GSI_CONTINUE_LINKING
);
5555 expr
= build2 (cond_code
, boolean_type_node
, offset_init
, bound
);
5556 gsi_insert_after (&gsi
, gimple_build_cond_empty (expr
),
5557 GSI_CONTINUE_LINKING
);
5559 /* V assignment goes into body_bb. */
5560 if (!gimple_in_ssa_p (cfun
))
5562 gsi
= gsi_start_bb (body_bb
);
5564 expr
= build2 (plus_code
, iter_type
, b
,
5565 fold_convert (plus_type
, offset
));
5566 expr
= force_gimple_operand_gsi (&gsi
, expr
, false, NULL_TREE
,
5567 true, GSI_SAME_STMT
);
5568 ass
= gimple_build_assign (v
, expr
);
5569 gsi_insert_before (&gsi
, ass
, GSI_SAME_STMT
);
5571 if (fd
->collapse
> 1 || fd
->tiling
)
5572 expand_oacc_collapse_vars (fd
, false, &gsi
, counts
, v
);
5576 /* Determine the range of the element loop -- usually simply
5577 the tile_size, but could be smaller if the final
5578 iteration of the outer loop is a partial tile. */
5579 tree e_range
= create_tmp_var (diff_type
, ".e_range");
5581 expr
= build2 (MIN_EXPR
, diff_type
,
5582 build2 (MINUS_EXPR
, diff_type
, bound
, offset
),
5583 build2 (MULT_EXPR
, diff_type
, tile_size
,
5585 expr
= force_gimple_operand_gsi (&gsi
, expr
, false, NULL_TREE
,
5586 true, GSI_SAME_STMT
);
5587 ass
= gimple_build_assign (e_range
, expr
);
5588 gsi_insert_before (&gsi
, ass
, GSI_SAME_STMT
);
5590 /* Determine bound, offset & step of inner loop. */
5591 e_bound
= create_tmp_var (diff_type
, ".e_bound");
5592 e_offset
= create_tmp_var (diff_type
, ".e_offset");
5593 e_step
= create_tmp_var (diff_type
, ".e_step");
5595 /* Mark these as element loops. */
5596 tree t
, e_gwv
= integer_minus_one_node
;
5597 tree chunk
= build_int_cst (diff_type
, 0); /* Never chunked. */
5599 t
= build_int_cst (integer_type_node
, IFN_GOACC_LOOP_OFFSET
);
5600 call
= gimple_build_call_internal (IFN_GOACC_LOOP
, 7, t
, dir
, e_range
,
5601 element_s
, chunk
, e_gwv
, chunk
);
5602 gimple_call_set_lhs (call
, e_offset
);
5603 gimple_set_location (call
, loc
);
5604 gsi_insert_before (&gsi
, call
, GSI_SAME_STMT
);
5606 t
= build_int_cst (integer_type_node
, IFN_GOACC_LOOP_BOUND
);
5607 call
= gimple_build_call_internal (IFN_GOACC_LOOP
, 7, t
, dir
, e_range
,
5608 element_s
, chunk
, e_gwv
, e_offset
);
5609 gimple_call_set_lhs (call
, e_bound
);
5610 gimple_set_location (call
, loc
);
5611 gsi_insert_before (&gsi
, call
, GSI_SAME_STMT
);
5613 t
= build_int_cst (integer_type_node
, IFN_GOACC_LOOP_STEP
);
5614 call
= gimple_build_call_internal (IFN_GOACC_LOOP
, 6, t
, dir
, e_range
,
5615 element_s
, chunk
, e_gwv
);
5616 gimple_call_set_lhs (call
, e_step
);
5617 gimple_set_location (call
, loc
);
5618 gsi_insert_before (&gsi
, call
, GSI_SAME_STMT
);
5620 /* Add test and split block. */
5621 expr
= build2 (cond_code
, boolean_type_node
, e_offset
, e_bound
);
5622 stmt
= gimple_build_cond_empty (expr
);
5623 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
5624 split
= split_block (body_bb
, stmt
);
5625 elem_body_bb
= split
->dest
;
5626 if (cont_bb
== body_bb
)
5627 cont_bb
= elem_body_bb
;
5628 body_bb
= split
->src
;
5630 split
->flags
^= EDGE_FALLTHRU
| EDGE_TRUE_VALUE
;
5632 /* Initialize the user's loop vars. */
5633 gsi
= gsi_start_bb (elem_body_bb
);
5634 expand_oacc_collapse_vars (fd
, true, &gsi
, counts
, e_offset
);
5638 /* Loop increment goes into cont_bb. If this is not a loop, we
5639 will have spawned threads as if it was, and each one will
5640 execute one iteration. The specification is not explicit about
5641 whether such constructs are ill-formed or not, and they can
5642 occur, especially when noreturn routines are involved. */
5645 gsi
= gsi_last_bb (cont_bb
);
5646 gomp_continue
*cont_stmt
= as_a
<gomp_continue
*> (gsi_stmt (gsi
));
5647 loc
= gimple_location (cont_stmt
);
5651 /* Insert element loop increment and test. */
5652 expr
= build2 (PLUS_EXPR
, diff_type
, e_offset
, e_step
);
5653 expr
= force_gimple_operand_gsi (&gsi
, expr
, false, NULL_TREE
,
5654 true, GSI_SAME_STMT
);
5655 ass
= gimple_build_assign (e_offset
, expr
);
5656 gsi_insert_before (&gsi
, ass
, GSI_SAME_STMT
);
5657 expr
= build2 (cond_code
, boolean_type_node
, e_offset
, e_bound
);
5659 stmt
= gimple_build_cond_empty (expr
);
5660 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
5661 split
= split_block (cont_bb
, stmt
);
5662 elem_cont_bb
= split
->src
;
5663 cont_bb
= split
->dest
;
5665 split
->flags
^= EDGE_FALLTHRU
| EDGE_FALSE_VALUE
;
5666 make_edge (elem_cont_bb
, elem_body_bb
, EDGE_TRUE_VALUE
);
5668 make_edge (body_bb
, cont_bb
, EDGE_FALSE_VALUE
);
5670 gsi
= gsi_for_stmt (cont_stmt
);
5673 /* Increment offset. */
5674 if (gimple_in_ssa_p (cfun
))
5675 expr
= build2 (plus_code
, iter_type
, offset
,
5676 fold_convert (plus_type
, step
));
5678 expr
= build2 (PLUS_EXPR
, diff_type
, offset
, step
);
5679 expr
= force_gimple_operand_gsi (&gsi
, expr
, false, NULL_TREE
,
5680 true, GSI_SAME_STMT
);
5681 ass
= gimple_build_assign (offset_incr
, expr
);
5682 gsi_insert_before (&gsi
, ass
, GSI_SAME_STMT
);
5683 expr
= build2 (cond_code
, boolean_type_node
, offset_incr
, bound
);
5684 gsi_insert_before (&gsi
, gimple_build_cond_empty (expr
), GSI_SAME_STMT
);
5686 /* Remove the GIMPLE_OMP_CONTINUE. */
5687 gsi_remove (&gsi
, true);
5689 /* Fixup edges from cont_bb. */
5690 be
= BRANCH_EDGE (cont_bb
);
5691 fte
= FALLTHRU_EDGE (cont_bb
);
5692 be
->flags
|= EDGE_TRUE_VALUE
;
5693 fte
->flags
^= EDGE_FALLTHRU
| EDGE_FALSE_VALUE
;
5697 /* Split the beginning of exit_bb to make bottom_bb. We
5698 need to insert a nop at the start, because splitting is
5699 after a stmt, not before. */
5700 gsi
= gsi_start_bb (exit_bb
);
5701 stmt
= gimple_build_nop ();
5702 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
5703 split
= split_block (exit_bb
, stmt
);
5704 bottom_bb
= split
->src
;
5705 exit_bb
= split
->dest
;
5706 gsi
= gsi_last_bb (bottom_bb
);
5708 /* Chunk increment and test goes into bottom_bb. */
5709 expr
= build2 (PLUS_EXPR
, diff_type
, chunk_no
,
5710 build_int_cst (diff_type
, 1));
5711 ass
= gimple_build_assign (chunk_no
, expr
);
5712 gsi_insert_after (&gsi
, ass
, GSI_CONTINUE_LINKING
);
5714 /* Chunk test at end of bottom_bb. */
5715 expr
= build2 (LT_EXPR
, boolean_type_node
, chunk_no
, chunk_max
);
5716 gsi_insert_after (&gsi
, gimple_build_cond_empty (expr
),
5717 GSI_CONTINUE_LINKING
);
5719 /* Fixup edges from bottom_bb. */
5720 split
->flags
^= EDGE_FALLTHRU
| EDGE_FALSE_VALUE
;
5721 make_edge (bottom_bb
, head_bb
, EDGE_TRUE_VALUE
);
5725 gsi
= gsi_last_bb (exit_bb
);
5726 gcc_assert (gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_RETURN
);
5727 loc
= gimple_location (gsi_stmt (gsi
));
5729 if (!gimple_in_ssa_p (cfun
))
5731 /* Insert the final value of V, in case it is live. This is the
5732 value for the only thread that survives past the join. */
5733 expr
= fold_build2 (MINUS_EXPR
, diff_type
, range
, dir
);
5734 expr
= fold_build2 (PLUS_EXPR
, diff_type
, expr
, s
);
5735 expr
= fold_build2 (TRUNC_DIV_EXPR
, diff_type
, expr
, s
);
5736 expr
= fold_build2 (MULT_EXPR
, diff_type
, expr
, s
);
5737 expr
= build2 (plus_code
, iter_type
, b
, fold_convert (plus_type
, expr
));
5738 expr
= force_gimple_operand_gsi (&gsi
, expr
, false, NULL_TREE
,
5739 true, GSI_SAME_STMT
);
5740 ass
= gimple_build_assign (v
, expr
);
5741 gsi_insert_before (&gsi
, ass
, GSI_SAME_STMT
);
5744 /* Remove the OMP_RETURN. */
5745 gsi_remove (&gsi
, true);
5749 /* We now have one, two or three nested loops. Update the loop
5751 struct loop
*parent
= entry_bb
->loop_father
;
5752 struct loop
*body
= body_bb
->loop_father
;
5756 struct loop
*chunk_loop
= alloc_loop ();
5757 chunk_loop
->header
= head_bb
;
5758 chunk_loop
->latch
= bottom_bb
;
5759 add_loop (chunk_loop
, parent
);
5760 parent
= chunk_loop
;
5762 else if (parent
!= body
)
5764 gcc_assert (body
->header
== body_bb
);
5765 gcc_assert (body
->latch
== cont_bb
5766 || single_pred (body
->latch
) == cont_bb
);
5772 struct loop
*body_loop
= alloc_loop ();
5773 body_loop
->header
= body_bb
;
5774 body_loop
->latch
= cont_bb
;
5775 add_loop (body_loop
, parent
);
5779 /* Insert tiling's element loop. */
5780 struct loop
*inner_loop
= alloc_loop ();
5781 inner_loop
->header
= elem_body_bb
;
5782 inner_loop
->latch
= elem_cont_bb
;
5783 add_loop (inner_loop
, body_loop
);
5789 /* Expand the OMP loop defined by REGION. */
5792 expand_omp_for (struct omp_region
*region
, gimple
*inner_stmt
)
5794 struct omp_for_data fd
;
5795 struct omp_for_data_loop
*loops
;
5798 = (struct omp_for_data_loop
*)
5799 alloca (gimple_omp_for_collapse (last_stmt (region
->entry
))
5800 * sizeof (struct omp_for_data_loop
));
5801 omp_extract_for_data (as_a
<gomp_for
*> (last_stmt (region
->entry
)),
5803 region
->sched_kind
= fd
.sched_kind
;
5804 region
->sched_modifiers
= fd
.sched_modifiers
;
5806 gcc_assert (EDGE_COUNT (region
->entry
->succs
) == 2);
5807 BRANCH_EDGE (region
->entry
)->flags
&= ~EDGE_ABNORMAL
;
5808 FALLTHRU_EDGE (region
->entry
)->flags
&= ~EDGE_ABNORMAL
;
5811 gcc_assert (EDGE_COUNT (region
->cont
->succs
) == 2);
5812 BRANCH_EDGE (region
->cont
)->flags
&= ~EDGE_ABNORMAL
;
5813 FALLTHRU_EDGE (region
->cont
)->flags
&= ~EDGE_ABNORMAL
;
5816 /* If there isn't a continue then this is a degerate case where
5817 the introduction of abnormal edges during lowering will prevent
5818 original loops from being detected. Fix that up. */
5819 loops_state_set (LOOPS_NEED_FIXUP
);
5821 if (gimple_omp_for_kind (fd
.for_stmt
) & GF_OMP_FOR_SIMD
)
5822 expand_omp_simd (region
, &fd
);
5823 else if (gimple_omp_for_kind (fd
.for_stmt
) == GF_OMP_FOR_KIND_CILKFOR
)
5824 expand_cilk_for (region
, &fd
);
5825 else if (gimple_omp_for_kind (fd
.for_stmt
) == GF_OMP_FOR_KIND_OACC_LOOP
)
5827 gcc_assert (!inner_stmt
);
5828 expand_oacc_for (region
, &fd
);
5830 else if (gimple_omp_for_kind (fd
.for_stmt
) == GF_OMP_FOR_KIND_TASKLOOP
)
5832 if (gimple_omp_for_combined_into_p (fd
.for_stmt
))
5833 expand_omp_taskloop_for_inner (region
, &fd
, inner_stmt
);
5835 expand_omp_taskloop_for_outer (region
, &fd
, inner_stmt
);
5837 else if (fd
.sched_kind
== OMP_CLAUSE_SCHEDULE_STATIC
5838 && !fd
.have_ordered
)
5840 if (fd
.chunk_size
== NULL
)
5841 expand_omp_for_static_nochunk (region
, &fd
, inner_stmt
);
5843 expand_omp_for_static_chunk (region
, &fd
, inner_stmt
);
5847 int fn_index
, start_ix
, next_ix
;
5849 gcc_assert (gimple_omp_for_kind (fd
.for_stmt
)
5850 == GF_OMP_FOR_KIND_FOR
);
5851 if (fd
.chunk_size
== NULL
5852 && fd
.sched_kind
== OMP_CLAUSE_SCHEDULE_STATIC
)
5853 fd
.chunk_size
= integer_zero_node
;
5854 gcc_assert (fd
.sched_kind
!= OMP_CLAUSE_SCHEDULE_AUTO
);
5855 switch (fd
.sched_kind
)
5857 case OMP_CLAUSE_SCHEDULE_RUNTIME
:
5860 case OMP_CLAUSE_SCHEDULE_DYNAMIC
:
5861 case OMP_CLAUSE_SCHEDULE_GUIDED
:
5862 if ((fd
.sched_modifiers
& OMP_CLAUSE_SCHEDULE_NONMONOTONIC
)
5864 && !fd
.have_ordered
)
5866 fn_index
= 3 + fd
.sched_kind
;
5871 fn_index
= fd
.sched_kind
;
5875 fn_index
+= fd
.have_ordered
* 6;
5877 start_ix
= ((int)BUILT_IN_GOMP_LOOP_DOACROSS_STATIC_START
) + fn_index
;
5879 start_ix
= ((int)BUILT_IN_GOMP_LOOP_STATIC_START
) + fn_index
;
5880 next_ix
= ((int)BUILT_IN_GOMP_LOOP_STATIC_NEXT
) + fn_index
;
5881 if (fd
.iter_type
== long_long_unsigned_type_node
)
5883 start_ix
+= ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_START
5884 - (int)BUILT_IN_GOMP_LOOP_STATIC_START
);
5885 next_ix
+= ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT
5886 - (int)BUILT_IN_GOMP_LOOP_STATIC_NEXT
);
5888 expand_omp_for_generic (region
, &fd
, (enum built_in_function
) start_ix
,
5889 (enum built_in_function
) next_ix
, inner_stmt
);
5892 if (gimple_in_ssa_p (cfun
))
5893 update_ssa (TODO_update_ssa_only_virtuals
);
5896 /* Expand code for an OpenMP sections directive. In pseudo code, we generate
5898 v = GOMP_sections_start (n);
5915 v = GOMP_sections_next ();
5920 If this is a combined parallel sections, replace the call to
5921 GOMP_sections_start with call to GOMP_sections_next. */
5924 expand_omp_sections (struct omp_region
*region
)
5926 tree t
, u
, vin
= NULL
, vmain
, vnext
, l2
;
5928 basic_block entry_bb
, l0_bb
, l1_bb
, l2_bb
, default_bb
;
5929 gimple_stmt_iterator si
, switch_si
;
5930 gomp_sections
*sections_stmt
;
5932 gomp_continue
*cont
;
5935 struct omp_region
*inner
;
5937 bool exit_reachable
= region
->cont
!= NULL
;
5939 gcc_assert (region
->exit
!= NULL
);
5940 entry_bb
= region
->entry
;
5941 l0_bb
= single_succ (entry_bb
);
5942 l1_bb
= region
->cont
;
5943 l2_bb
= region
->exit
;
5944 if (single_pred_p (l2_bb
) && single_pred (l2_bb
) == l0_bb
)
5945 l2
= gimple_block_label (l2_bb
);
5948 /* This can happen if there are reductions. */
5949 len
= EDGE_COUNT (l0_bb
->succs
);
5950 gcc_assert (len
> 0);
5951 e
= EDGE_SUCC (l0_bb
, len
- 1);
5952 si
= gsi_last_bb (e
->dest
);
5955 || gimple_code (gsi_stmt (si
)) != GIMPLE_OMP_SECTION
)
5956 l2
= gimple_block_label (e
->dest
);
5958 FOR_EACH_EDGE (e
, ei
, l0_bb
->succs
)
5960 si
= gsi_last_bb (e
->dest
);
5962 || gimple_code (gsi_stmt (si
)) != GIMPLE_OMP_SECTION
)
5964 l2
= gimple_block_label (e
->dest
);
5970 default_bb
= create_empty_bb (l1_bb
->prev_bb
);
5972 default_bb
= create_empty_bb (l0_bb
);
5974 /* We will build a switch() with enough cases for all the
5975 GIMPLE_OMP_SECTION regions, a '0' case to handle the end of more work
5976 and a default case to abort if something goes wrong. */
5977 len
= EDGE_COUNT (l0_bb
->succs
);
5979 /* Use vec::quick_push on label_vec throughout, since we know the size
5981 auto_vec
<tree
> label_vec (len
);
5983 /* The call to GOMP_sections_start goes in ENTRY_BB, replacing the
5984 GIMPLE_OMP_SECTIONS statement. */
5985 si
= gsi_last_bb (entry_bb
);
5986 sections_stmt
= as_a
<gomp_sections
*> (gsi_stmt (si
));
5987 gcc_assert (gimple_code (sections_stmt
) == GIMPLE_OMP_SECTIONS
);
5988 vin
= gimple_omp_sections_control (sections_stmt
);
5989 if (!is_combined_parallel (region
))
5991 /* If we are not inside a combined parallel+sections region,
5992 call GOMP_sections_start. */
5993 t
= build_int_cst (unsigned_type_node
, len
- 1);
5994 u
= builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_START
);
5995 stmt
= gimple_build_call (u
, 1, t
);
5999 /* Otherwise, call GOMP_sections_next. */
6000 u
= builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT
);
6001 stmt
= gimple_build_call (u
, 0);
6003 gimple_call_set_lhs (stmt
, vin
);
6004 gsi_insert_after (&si
, stmt
, GSI_SAME_STMT
);
6005 gsi_remove (&si
, true);
6007 /* The switch() statement replacing GIMPLE_OMP_SECTIONS_SWITCH goes in
6009 switch_si
= gsi_last_bb (l0_bb
);
6010 gcc_assert (gimple_code (gsi_stmt (switch_si
)) == GIMPLE_OMP_SECTIONS_SWITCH
);
6013 cont
= as_a
<gomp_continue
*> (last_stmt (l1_bb
));
6014 gcc_assert (gimple_code (cont
) == GIMPLE_OMP_CONTINUE
);
6015 vmain
= gimple_omp_continue_control_use (cont
);
6016 vnext
= gimple_omp_continue_control_def (cont
);
6024 t
= build_case_label (build_int_cst (unsigned_type_node
, 0), NULL
, l2
);
6025 label_vec
.quick_push (t
);
6028 /* Convert each GIMPLE_OMP_SECTION into a CASE_LABEL_EXPR. */
6029 for (inner
= region
->inner
, casei
= 1;
6031 inner
= inner
->next
, i
++, casei
++)
6033 basic_block s_entry_bb
, s_exit_bb
;
6035 /* Skip optional reduction region. */
6036 if (inner
->type
== GIMPLE_OMP_ATOMIC_LOAD
)
6043 s_entry_bb
= inner
->entry
;
6044 s_exit_bb
= inner
->exit
;
6046 t
= gimple_block_label (s_entry_bb
);
6047 u
= build_int_cst (unsigned_type_node
, casei
);
6048 u
= build_case_label (u
, NULL
, t
);
6049 label_vec
.quick_push (u
);
6051 si
= gsi_last_bb (s_entry_bb
);
6052 gcc_assert (gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_SECTION
);
6053 gcc_assert (i
< len
|| gimple_omp_section_last_p (gsi_stmt (si
)));
6054 gsi_remove (&si
, true);
6055 single_succ_edge (s_entry_bb
)->flags
= EDGE_FALLTHRU
;
6057 if (s_exit_bb
== NULL
)
6060 si
= gsi_last_bb (s_exit_bb
);
6061 gcc_assert (gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_RETURN
);
6062 gsi_remove (&si
, true);
6064 single_succ_edge (s_exit_bb
)->flags
= EDGE_FALLTHRU
;
6067 /* Error handling code goes in DEFAULT_BB. */
6068 t
= gimple_block_label (default_bb
);
6069 u
= build_case_label (NULL
, NULL
, t
);
6070 make_edge (l0_bb
, default_bb
, 0);
6071 add_bb_to_loop (default_bb
, current_loops
->tree_root
);
6073 stmt
= gimple_build_switch (vmain
, u
, label_vec
);
6074 gsi_insert_after (&switch_si
, stmt
, GSI_SAME_STMT
);
6075 gsi_remove (&switch_si
, true);
6077 si
= gsi_start_bb (default_bb
);
6078 stmt
= gimple_build_call (builtin_decl_explicit (BUILT_IN_TRAP
), 0);
6079 gsi_insert_after (&si
, stmt
, GSI_CONTINUE_LINKING
);
6085 /* Code to get the next section goes in L1_BB. */
6086 si
= gsi_last_bb (l1_bb
);
6087 gcc_assert (gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_CONTINUE
);
6089 bfn_decl
= builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT
);
6090 stmt
= gimple_build_call (bfn_decl
, 0);
6091 gimple_call_set_lhs (stmt
, vnext
);
6092 gsi_insert_after (&si
, stmt
, GSI_SAME_STMT
);
6093 gsi_remove (&si
, true);
6095 single_succ_edge (l1_bb
)->flags
= EDGE_FALLTHRU
;
6098 /* Cleanup function replaces GIMPLE_OMP_RETURN in EXIT_BB. */
6099 si
= gsi_last_bb (l2_bb
);
6100 if (gimple_omp_return_nowait_p (gsi_stmt (si
)))
6101 t
= builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_NOWAIT
);
6102 else if (gimple_omp_return_lhs (gsi_stmt (si
)))
6103 t
= builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_CANCEL
);
6105 t
= builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END
);
6106 stmt
= gimple_build_call (t
, 0);
6107 if (gimple_omp_return_lhs (gsi_stmt (si
)))
6108 gimple_call_set_lhs (stmt
, gimple_omp_return_lhs (gsi_stmt (si
)));
6109 gsi_insert_after (&si
, stmt
, GSI_SAME_STMT
);
6110 gsi_remove (&si
, true);
6112 set_immediate_dominator (CDI_DOMINATORS
, default_bb
, l0_bb
);
6115 /* Expand code for an OpenMP single directive. We've already expanded
6116 much of the code, here we simply place the GOMP_barrier call. */
6119 expand_omp_single (struct omp_region
*region
)
6121 basic_block entry_bb
, exit_bb
;
6122 gimple_stmt_iterator si
;
6124 entry_bb
= region
->entry
;
6125 exit_bb
= region
->exit
;
6127 si
= gsi_last_bb (entry_bb
);
6128 gcc_assert (gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_SINGLE
);
6129 gsi_remove (&si
, true);
6130 single_succ_edge (entry_bb
)->flags
= EDGE_FALLTHRU
;
6132 si
= gsi_last_bb (exit_bb
);
6133 if (!gimple_omp_return_nowait_p (gsi_stmt (si
)))
6135 tree t
= gimple_omp_return_lhs (gsi_stmt (si
));
6136 gsi_insert_after (&si
, omp_build_barrier (t
), GSI_SAME_STMT
);
6138 gsi_remove (&si
, true);
6139 single_succ_edge (exit_bb
)->flags
= EDGE_FALLTHRU
;
6142 /* Generic expansion for OpenMP synchronization directives: master,
6143 ordered and critical. All we need to do here is remove the entry
6144 and exit markers for REGION. */
6147 expand_omp_synch (struct omp_region
*region
)
6149 basic_block entry_bb
, exit_bb
;
6150 gimple_stmt_iterator si
;
6152 entry_bb
= region
->entry
;
6153 exit_bb
= region
->exit
;
6155 si
= gsi_last_bb (entry_bb
);
6156 gcc_assert (gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_SINGLE
6157 || gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_MASTER
6158 || gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_TASKGROUP
6159 || gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_ORDERED
6160 || gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_CRITICAL
6161 || gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_TEAMS
);
6162 gsi_remove (&si
, true);
6163 single_succ_edge (entry_bb
)->flags
= EDGE_FALLTHRU
;
6167 si
= gsi_last_bb (exit_bb
);
6168 gcc_assert (gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_RETURN
);
6169 gsi_remove (&si
, true);
6170 single_succ_edge (exit_bb
)->flags
= EDGE_FALLTHRU
;
6174 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
6175 operation as a normal volatile load. */
6178 expand_omp_atomic_load (basic_block load_bb
, tree addr
,
6179 tree loaded_val
, int index
)
6181 enum built_in_function tmpbase
;
6182 gimple_stmt_iterator gsi
;
6183 basic_block store_bb
;
6186 tree decl
, call
, type
, itype
;
6188 gsi
= gsi_last_bb (load_bb
);
6189 stmt
= gsi_stmt (gsi
);
6190 gcc_assert (gimple_code (stmt
) == GIMPLE_OMP_ATOMIC_LOAD
);
6191 loc
= gimple_location (stmt
);
6193 /* ??? If the target does not implement atomic_load_optab[mode], and mode
6194 is smaller than word size, then expand_atomic_load assumes that the load
6195 is atomic. We could avoid the builtin entirely in this case. */
6197 tmpbase
= (enum built_in_function
) (BUILT_IN_ATOMIC_LOAD_N
+ index
+ 1);
6198 decl
= builtin_decl_explicit (tmpbase
);
6199 if (decl
== NULL_TREE
)
6202 type
= TREE_TYPE (loaded_val
);
6203 itype
= TREE_TYPE (TREE_TYPE (decl
));
6205 call
= build_call_expr_loc (loc
, decl
, 2, addr
,
6206 build_int_cst (NULL
,
6207 gimple_omp_atomic_seq_cst_p (stmt
)
6209 : MEMMODEL_RELAXED
));
6210 if (!useless_type_conversion_p (type
, itype
))
6211 call
= fold_build1_loc (loc
, VIEW_CONVERT_EXPR
, type
, call
);
6212 call
= build2_loc (loc
, MODIFY_EXPR
, void_type_node
, loaded_val
, call
);
6214 force_gimple_operand_gsi (&gsi
, call
, true, NULL_TREE
, true, GSI_SAME_STMT
);
6215 gsi_remove (&gsi
, true);
6217 store_bb
= single_succ (load_bb
);
6218 gsi
= gsi_last_bb (store_bb
);
6219 gcc_assert (gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_ATOMIC_STORE
);
6220 gsi_remove (&gsi
, true);
6222 if (gimple_in_ssa_p (cfun
))
6223 update_ssa (TODO_update_ssa_no_phi
);
6228 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
6229 operation as a normal volatile store. */
6232 expand_omp_atomic_store (basic_block load_bb
, tree addr
,
6233 tree loaded_val
, tree stored_val
, int index
)
6235 enum built_in_function tmpbase
;
6236 gimple_stmt_iterator gsi
;
6237 basic_block store_bb
= single_succ (load_bb
);
6240 tree decl
, call
, type
, itype
;
6244 gsi
= gsi_last_bb (load_bb
);
6245 stmt
= gsi_stmt (gsi
);
6246 gcc_assert (gimple_code (stmt
) == GIMPLE_OMP_ATOMIC_LOAD
);
6248 /* If the load value is needed, then this isn't a store but an exchange. */
6249 exchange
= gimple_omp_atomic_need_value_p (stmt
);
6251 gsi
= gsi_last_bb (store_bb
);
6252 stmt
= gsi_stmt (gsi
);
6253 gcc_assert (gimple_code (stmt
) == GIMPLE_OMP_ATOMIC_STORE
);
6254 loc
= gimple_location (stmt
);
6256 /* ??? If the target does not implement atomic_store_optab[mode], and mode
6257 is smaller than word size, then expand_atomic_store assumes that the store
6258 is atomic. We could avoid the builtin entirely in this case. */
6260 tmpbase
= (exchange
? BUILT_IN_ATOMIC_EXCHANGE_N
: BUILT_IN_ATOMIC_STORE_N
);
6261 tmpbase
= (enum built_in_function
) ((int) tmpbase
+ index
+ 1);
6262 decl
= builtin_decl_explicit (tmpbase
);
6263 if (decl
== NULL_TREE
)
6266 type
= TREE_TYPE (stored_val
);
6268 /* Dig out the type of the function's second argument. */
6269 itype
= TREE_TYPE (decl
);
6270 itype
= TYPE_ARG_TYPES (itype
);
6271 itype
= TREE_CHAIN (itype
);
6272 itype
= TREE_VALUE (itype
);
6273 imode
= TYPE_MODE (itype
);
6275 if (exchange
&& !can_atomic_exchange_p (imode
, true))
6278 if (!useless_type_conversion_p (itype
, type
))
6279 stored_val
= fold_build1_loc (loc
, VIEW_CONVERT_EXPR
, itype
, stored_val
);
6280 call
= build_call_expr_loc (loc
, decl
, 3, addr
, stored_val
,
6281 build_int_cst (NULL
,
6282 gimple_omp_atomic_seq_cst_p (stmt
)
6284 : MEMMODEL_RELAXED
));
6287 if (!useless_type_conversion_p (type
, itype
))
6288 call
= build1_loc (loc
, VIEW_CONVERT_EXPR
, type
, call
);
6289 call
= build2_loc (loc
, MODIFY_EXPR
, void_type_node
, loaded_val
, call
);
6292 force_gimple_operand_gsi (&gsi
, call
, true, NULL_TREE
, true, GSI_SAME_STMT
);
6293 gsi_remove (&gsi
, true);
6295 /* Remove the GIMPLE_OMP_ATOMIC_LOAD that we verified above. */
6296 gsi
= gsi_last_bb (load_bb
);
6297 gsi_remove (&gsi
, true);
6299 if (gimple_in_ssa_p (cfun
))
6300 update_ssa (TODO_update_ssa_no_phi
);
6305 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
6306 operation as a __atomic_fetch_op builtin. INDEX is log2 of the
6307 size of the data type, and thus usable to find the index of the builtin
6308 decl. Returns false if the expression is not of the proper form. */
6311 expand_omp_atomic_fetch_op (basic_block load_bb
,
6312 tree addr
, tree loaded_val
,
6313 tree stored_val
, int index
)
6315 enum built_in_function oldbase
, newbase
, tmpbase
;
6316 tree decl
, itype
, call
;
6318 basic_block store_bb
= single_succ (load_bb
);
6319 gimple_stmt_iterator gsi
;
6322 enum tree_code code
;
6323 bool need_old
, need_new
;
6327 /* We expect to find the following sequences:
6330 GIMPLE_OMP_ATOMIC_LOAD (tmp, mem)
6333 val = tmp OP something; (or: something OP tmp)
6334 GIMPLE_OMP_STORE (val)
6336 ???FIXME: Allow a more flexible sequence.
6337 Perhaps use data flow to pick the statements.
6341 gsi
= gsi_after_labels (store_bb
);
6342 stmt
= gsi_stmt (gsi
);
6343 loc
= gimple_location (stmt
);
6344 if (!is_gimple_assign (stmt
))
6347 if (gimple_code (gsi_stmt (gsi
)) != GIMPLE_OMP_ATOMIC_STORE
)
6349 need_new
= gimple_omp_atomic_need_value_p (gsi_stmt (gsi
));
6350 need_old
= gimple_omp_atomic_need_value_p (last_stmt (load_bb
));
6351 seq_cst
= gimple_omp_atomic_seq_cst_p (last_stmt (load_bb
));
6352 gcc_checking_assert (!need_old
|| !need_new
);
6354 if (!operand_equal_p (gimple_assign_lhs (stmt
), stored_val
, 0))
6357 /* Check for one of the supported fetch-op operations. */
6358 code
= gimple_assign_rhs_code (stmt
);
6362 case POINTER_PLUS_EXPR
:
6363 oldbase
= BUILT_IN_ATOMIC_FETCH_ADD_N
;
6364 newbase
= BUILT_IN_ATOMIC_ADD_FETCH_N
;
6367 oldbase
= BUILT_IN_ATOMIC_FETCH_SUB_N
;
6368 newbase
= BUILT_IN_ATOMIC_SUB_FETCH_N
;
6371 oldbase
= BUILT_IN_ATOMIC_FETCH_AND_N
;
6372 newbase
= BUILT_IN_ATOMIC_AND_FETCH_N
;
6375 oldbase
= BUILT_IN_ATOMIC_FETCH_OR_N
;
6376 newbase
= BUILT_IN_ATOMIC_OR_FETCH_N
;
6379 oldbase
= BUILT_IN_ATOMIC_FETCH_XOR_N
;
6380 newbase
= BUILT_IN_ATOMIC_XOR_FETCH_N
;
6386 /* Make sure the expression is of the proper form. */
6387 if (operand_equal_p (gimple_assign_rhs1 (stmt
), loaded_val
, 0))
6388 rhs
= gimple_assign_rhs2 (stmt
);
6389 else if (commutative_tree_code (gimple_assign_rhs_code (stmt
))
6390 && operand_equal_p (gimple_assign_rhs2 (stmt
), loaded_val
, 0))
6391 rhs
= gimple_assign_rhs1 (stmt
);
6395 tmpbase
= ((enum built_in_function
)
6396 ((need_new
? newbase
: oldbase
) + index
+ 1));
6397 decl
= builtin_decl_explicit (tmpbase
);
6398 if (decl
== NULL_TREE
)
6400 itype
= TREE_TYPE (TREE_TYPE (decl
));
6401 imode
= TYPE_MODE (itype
);
6403 /* We could test all of the various optabs involved, but the fact of the
6404 matter is that (with the exception of i486 vs i586 and xadd) all targets
6405 that support any atomic operaton optab also implements compare-and-swap.
6406 Let optabs.c take care of expanding any compare-and-swap loop. */
6407 if (!can_compare_and_swap_p (imode
, true) || !can_atomic_load_p (imode
))
6410 gsi
= gsi_last_bb (load_bb
);
6411 gcc_assert (gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_ATOMIC_LOAD
);
6413 /* OpenMP does not imply any barrier-like semantics on its atomic ops.
6414 It only requires that the operation happen atomically. Thus we can
6415 use the RELAXED memory model. */
6416 call
= build_call_expr_loc (loc
, decl
, 3, addr
,
6417 fold_convert_loc (loc
, itype
, rhs
),
6418 build_int_cst (NULL
,
6419 seq_cst
? MEMMODEL_SEQ_CST
6420 : MEMMODEL_RELAXED
));
6422 if (need_old
|| need_new
)
6424 lhs
= need_old
? loaded_val
: stored_val
;
6425 call
= fold_convert_loc (loc
, TREE_TYPE (lhs
), call
);
6426 call
= build2_loc (loc
, MODIFY_EXPR
, void_type_node
, lhs
, call
);
6429 call
= fold_convert_loc (loc
, void_type_node
, call
);
6430 force_gimple_operand_gsi (&gsi
, call
, true, NULL_TREE
, true, GSI_SAME_STMT
);
6431 gsi_remove (&gsi
, true);
6433 gsi
= gsi_last_bb (store_bb
);
6434 gcc_assert (gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_ATOMIC_STORE
);
6435 gsi_remove (&gsi
, true);
6436 gsi
= gsi_last_bb (store_bb
);
6437 stmt
= gsi_stmt (gsi
);
6438 gsi_remove (&gsi
, true);
6440 if (gimple_in_ssa_p (cfun
))
6442 release_defs (stmt
);
6443 update_ssa (TODO_update_ssa_no_phi
);
6449 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
6453 newval = rhs; // with oldval replacing *addr in rhs
6454 oldval = __sync_val_compare_and_swap (addr, oldval, newval);
6455 if (oldval != newval)
6458 INDEX is log2 of the size of the data type, and thus usable to find the
6459 index of the builtin decl. */
6462 expand_omp_atomic_pipeline (basic_block load_bb
, basic_block store_bb
,
6463 tree addr
, tree loaded_val
, tree stored_val
,
6466 tree loadedi
, storedi
, initial
, new_storedi
, old_vali
;
6467 tree type
, itype
, cmpxchg
, iaddr
;
6468 gimple_stmt_iterator si
;
6469 basic_block loop_header
= single_succ (load_bb
);
6472 enum built_in_function fncode
;
6474 /* ??? We need a non-pointer interface to __atomic_compare_exchange in
6475 order to use the RELAXED memory model effectively. */
6476 fncode
= (enum built_in_function
)((int)BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
6478 cmpxchg
= builtin_decl_explicit (fncode
);
6479 if (cmpxchg
== NULL_TREE
)
6481 type
= TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (addr
)));
6482 itype
= TREE_TYPE (TREE_TYPE (cmpxchg
));
6484 if (!can_compare_and_swap_p (TYPE_MODE (itype
), true)
6485 || !can_atomic_load_p (TYPE_MODE (itype
)))
6488 /* Load the initial value, replacing the GIMPLE_OMP_ATOMIC_LOAD. */
6489 si
= gsi_last_bb (load_bb
);
6490 gcc_assert (gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_ATOMIC_LOAD
);
6492 /* For floating-point values, we'll need to view-convert them to integers
6493 so that we can perform the atomic compare and swap. Simplify the
6494 following code by always setting up the "i"ntegral variables. */
6495 if (!INTEGRAL_TYPE_P (type
) && !POINTER_TYPE_P (type
))
6499 iaddr
= create_tmp_reg (build_pointer_type_for_mode (itype
, ptr_mode
,
6502 = force_gimple_operand_gsi (&si
,
6503 fold_convert (TREE_TYPE (iaddr
), addr
),
6504 false, NULL_TREE
, true, GSI_SAME_STMT
);
6505 stmt
= gimple_build_assign (iaddr
, iaddr_val
);
6506 gsi_insert_before (&si
, stmt
, GSI_SAME_STMT
);
6507 loadedi
= create_tmp_var (itype
);
6508 if (gimple_in_ssa_p (cfun
))
6509 loadedi
= make_ssa_name (loadedi
);
6514 loadedi
= loaded_val
;
6517 fncode
= (enum built_in_function
) (BUILT_IN_ATOMIC_LOAD_N
+ index
+ 1);
6518 tree loaddecl
= builtin_decl_explicit (fncode
);
6521 = fold_convert (TREE_TYPE (TREE_TYPE (iaddr
)),
6522 build_call_expr (loaddecl
, 2, iaddr
,
6523 build_int_cst (NULL_TREE
,
6524 MEMMODEL_RELAXED
)));
6526 initial
= build2 (MEM_REF
, TREE_TYPE (TREE_TYPE (iaddr
)), iaddr
,
6527 build_int_cst (TREE_TYPE (iaddr
), 0));
6530 = force_gimple_operand_gsi (&si
, initial
, true, NULL_TREE
, true,
6533 /* Move the value to the LOADEDI temporary. */
6534 if (gimple_in_ssa_p (cfun
))
6536 gcc_assert (gimple_seq_empty_p (phi_nodes (loop_header
)));
6537 phi
= create_phi_node (loadedi
, loop_header
);
6538 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi
, single_succ_edge (load_bb
)),
6542 gsi_insert_before (&si
,
6543 gimple_build_assign (loadedi
, initial
),
6545 if (loadedi
!= loaded_val
)
6547 gimple_stmt_iterator gsi2
;
6550 x
= build1 (VIEW_CONVERT_EXPR
, type
, loadedi
);
6551 gsi2
= gsi_start_bb (loop_header
);
6552 if (gimple_in_ssa_p (cfun
))
6555 x
= force_gimple_operand_gsi (&gsi2
, x
, true, NULL_TREE
,
6556 true, GSI_SAME_STMT
);
6557 stmt
= gimple_build_assign (loaded_val
, x
);
6558 gsi_insert_before (&gsi2
, stmt
, GSI_SAME_STMT
);
6562 x
= build2 (MODIFY_EXPR
, TREE_TYPE (loaded_val
), loaded_val
, x
);
6563 force_gimple_operand_gsi (&gsi2
, x
, true, NULL_TREE
,
6564 true, GSI_SAME_STMT
);
6567 gsi_remove (&si
, true);
6569 si
= gsi_last_bb (store_bb
);
6570 gcc_assert (gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_ATOMIC_STORE
);
6573 storedi
= stored_val
;
6576 = force_gimple_operand_gsi (&si
,
6577 build1 (VIEW_CONVERT_EXPR
, itype
,
6578 stored_val
), true, NULL_TREE
, true,
6581 /* Build the compare&swap statement. */
6582 new_storedi
= build_call_expr (cmpxchg
, 3, iaddr
, loadedi
, storedi
);
6583 new_storedi
= force_gimple_operand_gsi (&si
,
6584 fold_convert (TREE_TYPE (loadedi
),
6587 true, GSI_SAME_STMT
);
6589 if (gimple_in_ssa_p (cfun
))
6593 old_vali
= create_tmp_var (TREE_TYPE (loadedi
));
6594 stmt
= gimple_build_assign (old_vali
, loadedi
);
6595 gsi_insert_before (&si
, stmt
, GSI_SAME_STMT
);
6597 stmt
= gimple_build_assign (loadedi
, new_storedi
);
6598 gsi_insert_before (&si
, stmt
, GSI_SAME_STMT
);
6601 /* Note that we always perform the comparison as an integer, even for
6602 floating point. This allows the atomic operation to properly
6603 succeed even with NaNs and -0.0. */
6604 tree ne
= build2 (NE_EXPR
, boolean_type_node
, new_storedi
, old_vali
);
6605 stmt
= gimple_build_cond_empty (ne
);
6606 gsi_insert_before (&si
, stmt
, GSI_SAME_STMT
);
6609 e
= single_succ_edge (store_bb
);
6610 e
->flags
&= ~EDGE_FALLTHRU
;
6611 e
->flags
|= EDGE_FALSE_VALUE
;
6612 /* Expect no looping. */
6613 e
->probability
= profile_probability::guessed_always ();
6615 e
= make_edge (store_bb
, loop_header
, EDGE_TRUE_VALUE
);
6616 e
->probability
= profile_probability::guessed_never ();
6618 /* Copy the new value to loadedi (we already did that before the condition
6619 if we are not in SSA). */
6620 if (gimple_in_ssa_p (cfun
))
6622 phi
= gimple_seq_first_stmt (phi_nodes (loop_header
));
6623 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi
, e
), new_storedi
);
6626 /* Remove GIMPLE_OMP_ATOMIC_STORE. */
6627 gsi_remove (&si
, true);
6629 struct loop
*loop
= alloc_loop ();
6630 loop
->header
= loop_header
;
6631 loop
->latch
= store_bb
;
6632 add_loop (loop
, loop_header
->loop_father
);
6634 if (gimple_in_ssa_p (cfun
))
6635 update_ssa (TODO_update_ssa_no_phi
);
6640 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
6642 GOMP_atomic_start ();
6646 The result is not globally atomic, but works so long as all parallel
6647 references are within #pragma omp atomic directives. According to
6648 responses received from omp@openmp.org, appears to be within spec.
6649 Which makes sense, since that's how several other compilers handle
6650 this situation as well.
6651 LOADED_VAL and ADDR are the operands of GIMPLE_OMP_ATOMIC_LOAD we're
6652 expanding. STORED_VAL is the operand of the matching
6653 GIMPLE_OMP_ATOMIC_STORE.
6656 GIMPLE_OMP_ATOMIC_LOAD (loaded_val, addr) with
6660 GIMPLE_OMP_ATOMIC_STORE (stored_val) with
6665 expand_omp_atomic_mutex (basic_block load_bb
, basic_block store_bb
,
6666 tree addr
, tree loaded_val
, tree stored_val
)
6668 gimple_stmt_iterator si
;
6672 si
= gsi_last_bb (load_bb
);
6673 gcc_assert (gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_ATOMIC_LOAD
);
6675 t
= builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_START
);
6676 t
= build_call_expr (t
, 0);
6677 force_gimple_operand_gsi (&si
, t
, true, NULL_TREE
, true, GSI_SAME_STMT
);
6679 stmt
= gimple_build_assign (loaded_val
, build_simple_mem_ref (addr
));
6680 gsi_insert_before (&si
, stmt
, GSI_SAME_STMT
);
6681 gsi_remove (&si
, true);
6683 si
= gsi_last_bb (store_bb
);
6684 gcc_assert (gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_ATOMIC_STORE
);
6686 stmt
= gimple_build_assign (build_simple_mem_ref (unshare_expr (addr
)),
6688 gsi_insert_before (&si
, stmt
, GSI_SAME_STMT
);
6690 t
= builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_END
);
6691 t
= build_call_expr (t
, 0);
6692 force_gimple_operand_gsi (&si
, t
, true, NULL_TREE
, true, GSI_SAME_STMT
);
6693 gsi_remove (&si
, true);
6695 if (gimple_in_ssa_p (cfun
))
6696 update_ssa (TODO_update_ssa_no_phi
);
6700 /* Expand an GIMPLE_OMP_ATOMIC statement. We try to expand
6701 using expand_omp_atomic_fetch_op. If it failed, we try to
6702 call expand_omp_atomic_pipeline, and if it fails too, the
6703 ultimate fallback is wrapping the operation in a mutex
6704 (expand_omp_atomic_mutex). REGION is the atomic region built
6705 by build_omp_regions_1(). */
6708 expand_omp_atomic (struct omp_region
*region
)
6710 basic_block load_bb
= region
->entry
, store_bb
= region
->exit
;
6711 gomp_atomic_load
*load
= as_a
<gomp_atomic_load
*> (last_stmt (load_bb
));
6712 gomp_atomic_store
*store
= as_a
<gomp_atomic_store
*> (last_stmt (store_bb
));
6713 tree loaded_val
= gimple_omp_atomic_load_lhs (load
);
6714 tree addr
= gimple_omp_atomic_load_rhs (load
);
6715 tree stored_val
= gimple_omp_atomic_store_val (store
);
6716 tree type
= TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (addr
)));
6717 HOST_WIDE_INT index
;
6719 /* Make sure the type is one of the supported sizes. */
6720 index
= tree_to_uhwi (TYPE_SIZE_UNIT (type
));
6721 index
= exact_log2 (index
);
6722 if (index
>= 0 && index
<= 4)
6724 unsigned int align
= TYPE_ALIGN_UNIT (type
);
6726 /* __sync builtins require strict data alignment. */
6727 if (exact_log2 (align
) >= index
)
6730 if (loaded_val
== stored_val
6731 && (GET_MODE_CLASS (TYPE_MODE (type
)) == MODE_INT
6732 || GET_MODE_CLASS (TYPE_MODE (type
)) == MODE_FLOAT
)
6733 && GET_MODE_BITSIZE (TYPE_MODE (type
)) <= BITS_PER_WORD
6734 && expand_omp_atomic_load (load_bb
, addr
, loaded_val
, index
))
6738 if ((GET_MODE_CLASS (TYPE_MODE (type
)) == MODE_INT
6739 || GET_MODE_CLASS (TYPE_MODE (type
)) == MODE_FLOAT
)
6740 && GET_MODE_BITSIZE (TYPE_MODE (type
)) <= BITS_PER_WORD
6741 && store_bb
== single_succ (load_bb
)
6742 && first_stmt (store_bb
) == store
6743 && expand_omp_atomic_store (load_bb
, addr
, loaded_val
,
6747 /* When possible, use specialized atomic update functions. */
6748 if ((INTEGRAL_TYPE_P (type
) || POINTER_TYPE_P (type
))
6749 && store_bb
== single_succ (load_bb
)
6750 && expand_omp_atomic_fetch_op (load_bb
, addr
,
6751 loaded_val
, stored_val
, index
))
6754 /* If we don't have specialized __sync builtins, try and implement
6755 as a compare and swap loop. */
6756 if (expand_omp_atomic_pipeline (load_bb
, store_bb
, addr
,
6757 loaded_val
, stored_val
, index
))
6762 /* The ultimate fallback is wrapping the operation in a mutex. */
6763 expand_omp_atomic_mutex (load_bb
, store_bb
, addr
, loaded_val
, stored_val
);
6766 /* Mark the loops inside the kernels region starting at REGION_ENTRY and ending
6770 mark_loops_in_oacc_kernels_region (basic_block region_entry
,
6771 basic_block region_exit
)
6773 struct loop
*outer
= region_entry
->loop_father
;
6774 gcc_assert (region_exit
== NULL
|| outer
== region_exit
->loop_father
);
6776 /* Don't parallelize the kernels region if it contains more than one outer
6778 unsigned int nr_outer_loops
= 0;
6779 struct loop
*single_outer
= NULL
;
6780 for (struct loop
*loop
= outer
->inner
; loop
!= NULL
; loop
= loop
->next
)
6782 gcc_assert (loop_outer (loop
) == outer
);
6784 if (!dominated_by_p (CDI_DOMINATORS
, loop
->header
, region_entry
))
6787 if (region_exit
!= NULL
6788 && dominated_by_p (CDI_DOMINATORS
, loop
->header
, region_exit
))
6792 single_outer
= loop
;
6794 if (nr_outer_loops
!= 1)
6797 for (struct loop
*loop
= single_outer
->inner
;
6803 /* Mark the loops in the region. */
6804 for (struct loop
*loop
= single_outer
; loop
!= NULL
; loop
= loop
->inner
)
6805 loop
->in_oacc_kernels_region
= true;
6808 /* Types used to pass grid and wortkgroup sizes to kernel invocation. */
6810 struct GTY(()) grid_launch_attributes_trees
6812 tree kernel_dim_array_type
;
6813 tree kernel_lattrs_dimnum_decl
;
6814 tree kernel_lattrs_grid_decl
;
6815 tree kernel_lattrs_group_decl
;
6816 tree kernel_launch_attributes_type
;
6819 static GTY(()) struct grid_launch_attributes_trees
*grid_attr_trees
;
6821 /* Create types used to pass kernel launch attributes to target. */
6824 grid_create_kernel_launch_attr_types (void)
6826 if (grid_attr_trees
)
6828 grid_attr_trees
= ggc_alloc
<grid_launch_attributes_trees
> ();
6830 tree dim_arr_index_type
6831 = build_index_type (build_int_cst (integer_type_node
, 2));
6832 grid_attr_trees
->kernel_dim_array_type
6833 = build_array_type (uint32_type_node
, dim_arr_index_type
);
6835 grid_attr_trees
->kernel_launch_attributes_type
= make_node (RECORD_TYPE
);
6836 grid_attr_trees
->kernel_lattrs_dimnum_decl
6837 = build_decl (BUILTINS_LOCATION
, FIELD_DECL
, get_identifier ("ndim"),
6839 DECL_CHAIN (grid_attr_trees
->kernel_lattrs_dimnum_decl
) = NULL_TREE
;
6841 grid_attr_trees
->kernel_lattrs_grid_decl
6842 = build_decl (BUILTINS_LOCATION
, FIELD_DECL
, get_identifier ("grid_size"),
6843 grid_attr_trees
->kernel_dim_array_type
);
6844 DECL_CHAIN (grid_attr_trees
->kernel_lattrs_grid_decl
)
6845 = grid_attr_trees
->kernel_lattrs_dimnum_decl
;
6846 grid_attr_trees
->kernel_lattrs_group_decl
6847 = build_decl (BUILTINS_LOCATION
, FIELD_DECL
, get_identifier ("group_size"),
6848 grid_attr_trees
->kernel_dim_array_type
);
6849 DECL_CHAIN (grid_attr_trees
->kernel_lattrs_group_decl
)
6850 = grid_attr_trees
->kernel_lattrs_grid_decl
;
6851 finish_builtin_struct (grid_attr_trees
->kernel_launch_attributes_type
,
6852 "__gomp_kernel_launch_attributes",
6853 grid_attr_trees
->kernel_lattrs_group_decl
, NULL_TREE
);
6856 /* Insert before the current statement in GSI a store of VALUE to INDEX of
6857 array (of type kernel_dim_array_type) FLD_DECL of RANGE_VAR. VALUE must be
6858 of type uint32_type_node. */
6861 grid_insert_store_range_dim (gimple_stmt_iterator
*gsi
, tree range_var
,
6862 tree fld_decl
, int index
, tree value
)
6864 tree ref
= build4 (ARRAY_REF
, uint32_type_node
,
6865 build3 (COMPONENT_REF
,
6866 grid_attr_trees
->kernel_dim_array_type
,
6867 range_var
, fld_decl
, NULL_TREE
),
6868 build_int_cst (integer_type_node
, index
),
6869 NULL_TREE
, NULL_TREE
);
6870 gsi_insert_before (gsi
, gimple_build_assign (ref
, value
), GSI_SAME_STMT
);
6873 /* Return a tree representation of a pointer to a structure with grid and
6874 work-group size information. Statements filling that information will be
6875 inserted before GSI, TGT_STMT is the target statement which has the
6876 necessary information in it. */
6879 grid_get_kernel_launch_attributes (gimple_stmt_iterator
*gsi
,
6880 gomp_target
*tgt_stmt
)
6882 grid_create_kernel_launch_attr_types ();
6883 tree lattrs
= create_tmp_var (grid_attr_trees
->kernel_launch_attributes_type
,
6884 "__kernel_launch_attrs");
6886 unsigned max_dim
= 0;
6887 for (tree clause
= gimple_omp_target_clauses (tgt_stmt
);
6889 clause
= OMP_CLAUSE_CHAIN (clause
))
6891 if (OMP_CLAUSE_CODE (clause
) != OMP_CLAUSE__GRIDDIM_
)
6894 unsigned dim
= OMP_CLAUSE__GRIDDIM__DIMENSION (clause
);
6895 max_dim
= MAX (dim
, max_dim
);
6897 grid_insert_store_range_dim (gsi
, lattrs
,
6898 grid_attr_trees
->kernel_lattrs_grid_decl
,
6899 dim
, OMP_CLAUSE__GRIDDIM__SIZE (clause
));
6900 grid_insert_store_range_dim (gsi
, lattrs
,
6901 grid_attr_trees
->kernel_lattrs_group_decl
,
6902 dim
, OMP_CLAUSE__GRIDDIM__GROUP (clause
));
6905 tree dimref
= build3 (COMPONENT_REF
, uint32_type_node
, lattrs
,
6906 grid_attr_trees
->kernel_lattrs_dimnum_decl
, NULL_TREE
);
6907 gcc_checking_assert (max_dim
<= 2);
6908 tree dimensions
= build_int_cstu (uint32_type_node
, max_dim
+ 1);
6909 gsi_insert_before (gsi
, gimple_build_assign (dimref
, dimensions
),
6911 TREE_ADDRESSABLE (lattrs
) = 1;
6912 return build_fold_addr_expr (lattrs
);
6915 /* Build target argument identifier from the DEVICE identifier, value
6916 identifier ID and whether the element also has a SUBSEQUENT_PARAM. */
6919 get_target_argument_identifier_1 (int device
, bool subseqent_param
, int id
)
6921 tree t
= build_int_cst (integer_type_node
, device
);
6922 if (subseqent_param
)
6923 t
= fold_build2 (BIT_IOR_EXPR
, integer_type_node
, t
,
6924 build_int_cst (integer_type_node
,
6925 GOMP_TARGET_ARG_SUBSEQUENT_PARAM
));
6926 t
= fold_build2 (BIT_IOR_EXPR
, integer_type_node
, t
,
6927 build_int_cst (integer_type_node
, id
));
6931 /* Like above but return it in type that can be directly stored as an element
6932 of the argument array. */
6935 get_target_argument_identifier (int device
, bool subseqent_param
, int id
)
6937 tree t
= get_target_argument_identifier_1 (device
, subseqent_param
, id
);
6938 return fold_convert (ptr_type_node
, t
);
6941 /* Return a target argument consisting of DEVICE identifier, value identifier
6942 ID, and the actual VALUE. */
6945 get_target_argument_value (gimple_stmt_iterator
*gsi
, int device
, int id
,
6948 tree t
= fold_build2 (LSHIFT_EXPR
, integer_type_node
,
6949 fold_convert (integer_type_node
, value
),
6950 build_int_cst (unsigned_type_node
,
6951 GOMP_TARGET_ARG_VALUE_SHIFT
));
6952 t
= fold_build2 (BIT_IOR_EXPR
, integer_type_node
, t
,
6953 get_target_argument_identifier_1 (device
, false, id
));
6954 t
= fold_convert (ptr_type_node
, t
);
6955 return force_gimple_operand_gsi (gsi
, t
, true, NULL
, true, GSI_SAME_STMT
);
6958 /* If VALUE is an integer constant greater than -2^15 and smaller than 2^15,
6959 push one argument to ARGS with both the DEVICE, ID and VALUE embedded in it,
6960 otherwise push an identifier (with DEVICE and ID) and the VALUE in two
6964 push_target_argument_according_to_value (gimple_stmt_iterator
*gsi
, int device
,
6965 int id
, tree value
, vec
<tree
> *args
)
6967 if (tree_fits_shwi_p (value
)
6968 && tree_to_shwi (value
) > -(1 << 15)
6969 && tree_to_shwi (value
) < (1 << 15))
6970 args
->quick_push (get_target_argument_value (gsi
, device
, id
, value
));
6973 args
->quick_push (get_target_argument_identifier (device
, true, id
));
6974 value
= fold_convert (ptr_type_node
, value
);
6975 value
= force_gimple_operand_gsi (gsi
, value
, true, NULL
, true,
6977 args
->quick_push (value
);
6981 /* Create an array of arguments that is then passed to GOMP_target. */
6984 get_target_arguments (gimple_stmt_iterator
*gsi
, gomp_target
*tgt_stmt
)
6986 auto_vec
<tree
, 6> args
;
6987 tree clauses
= gimple_omp_target_clauses (tgt_stmt
);
6988 tree t
, c
= omp_find_clause (clauses
, OMP_CLAUSE_NUM_TEAMS
);
6990 t
= OMP_CLAUSE_NUM_TEAMS_EXPR (c
);
6992 t
= integer_minus_one_node
;
6993 push_target_argument_according_to_value (gsi
, GOMP_TARGET_ARG_DEVICE_ALL
,
6994 GOMP_TARGET_ARG_NUM_TEAMS
, t
, &args
);
6996 c
= omp_find_clause (clauses
, OMP_CLAUSE_THREAD_LIMIT
);
6998 t
= OMP_CLAUSE_THREAD_LIMIT_EXPR (c
);
7000 t
= integer_minus_one_node
;
7001 push_target_argument_according_to_value (gsi
, GOMP_TARGET_ARG_DEVICE_ALL
,
7002 GOMP_TARGET_ARG_THREAD_LIMIT
, t
,
7005 /* Add HSA-specific grid sizes, if available. */
7006 if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt
),
7007 OMP_CLAUSE__GRIDDIM_
))
7009 int id
= GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES
;
7010 t
= get_target_argument_identifier (GOMP_DEVICE_HSA
, true, id
);
7011 args
.quick_push (t
);
7012 args
.quick_push (grid_get_kernel_launch_attributes (gsi
, tgt_stmt
));
7015 /* Produce more, perhaps device specific, arguments here. */
7017 tree argarray
= create_tmp_var (build_array_type_nelts (ptr_type_node
,
7018 args
.length () + 1),
7019 ".omp_target_args");
7020 for (unsigned i
= 0; i
< args
.length (); i
++)
7022 tree ref
= build4 (ARRAY_REF
, ptr_type_node
, argarray
,
7023 build_int_cst (integer_type_node
, i
),
7024 NULL_TREE
, NULL_TREE
);
7025 gsi_insert_before (gsi
, gimple_build_assign (ref
, args
[i
]),
7028 tree ref
= build4 (ARRAY_REF
, ptr_type_node
, argarray
,
7029 build_int_cst (integer_type_node
, args
.length ()),
7030 NULL_TREE
, NULL_TREE
);
7031 gsi_insert_before (gsi
, gimple_build_assign (ref
, null_pointer_node
),
7033 TREE_ADDRESSABLE (argarray
) = 1;
7034 return build_fold_addr_expr (argarray
);
7037 /* Expand the GIMPLE_OMP_TARGET starting at REGION. */
7040 expand_omp_target (struct omp_region
*region
)
7042 basic_block entry_bb
, exit_bb
, new_bb
;
7043 struct function
*child_cfun
;
7044 tree child_fn
, block
, t
;
7045 gimple_stmt_iterator gsi
;
7046 gomp_target
*entry_stmt
;
7049 bool offloaded
, data_region
;
7051 entry_stmt
= as_a
<gomp_target
*> (last_stmt (region
->entry
));
7052 new_bb
= region
->entry
;
7054 offloaded
= is_gimple_omp_offloaded (entry_stmt
);
7055 switch (gimple_omp_target_kind (entry_stmt
))
7057 case GF_OMP_TARGET_KIND_REGION
:
7058 case GF_OMP_TARGET_KIND_UPDATE
:
7059 case GF_OMP_TARGET_KIND_ENTER_DATA
:
7060 case GF_OMP_TARGET_KIND_EXIT_DATA
:
7061 case GF_OMP_TARGET_KIND_OACC_PARALLEL
:
7062 case GF_OMP_TARGET_KIND_OACC_KERNELS
:
7063 case GF_OMP_TARGET_KIND_OACC_UPDATE
:
7064 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA
:
7065 case GF_OMP_TARGET_KIND_OACC_DECLARE
:
7066 data_region
= false;
7068 case GF_OMP_TARGET_KIND_DATA
:
7069 case GF_OMP_TARGET_KIND_OACC_DATA
:
7070 case GF_OMP_TARGET_KIND_OACC_HOST_DATA
:
7077 child_fn
= NULL_TREE
;
7081 child_fn
= gimple_omp_target_child_fn (entry_stmt
);
7082 child_cfun
= DECL_STRUCT_FUNCTION (child_fn
);
7085 /* Supported by expand_omp_taskreg, but not here. */
7086 if (child_cfun
!= NULL
)
7087 gcc_checking_assert (!child_cfun
->cfg
);
7088 gcc_checking_assert (!gimple_in_ssa_p (cfun
));
7090 entry_bb
= region
->entry
;
7091 exit_bb
= region
->exit
;
7093 if (gimple_omp_target_kind (entry_stmt
) == GF_OMP_TARGET_KIND_OACC_KERNELS
)
7095 mark_loops_in_oacc_kernels_region (region
->entry
, region
->exit
);
7097 /* Further down, both OpenACC kernels and OpenACC parallel constructs
7098 will be mappted to BUILT_IN_GOACC_PARALLEL, and to distinguish the
7099 two, there is an "oacc kernels" attribute set for OpenACC kernels. */
7100 DECL_ATTRIBUTES (child_fn
)
7101 = tree_cons (get_identifier ("oacc kernels"),
7102 NULL_TREE
, DECL_ATTRIBUTES (child_fn
));
7107 unsigned srcidx
, dstidx
, num
;
7109 /* If the offloading region needs data sent from the parent
7110 function, then the very first statement (except possible
7111 tree profile counter updates) of the offloading body
7112 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
7113 &.OMP_DATA_O is passed as an argument to the child function,
7114 we need to replace it with the argument as seen by the child
7117 In most cases, this will end up being the identity assignment
7118 .OMP_DATA_I = .OMP_DATA_I. However, if the offloading body had
7119 a function call that has been inlined, the original PARM_DECL
7120 .OMP_DATA_I may have been converted into a different local
7121 variable. In which case, we need to keep the assignment. */
7122 tree data_arg
= gimple_omp_target_data_arg (entry_stmt
);
7125 basic_block entry_succ_bb
= single_succ (entry_bb
);
7126 gimple_stmt_iterator gsi
;
7128 gimple
*tgtcopy_stmt
= NULL
;
7129 tree sender
= TREE_VEC_ELT (data_arg
, 0);
7131 for (gsi
= gsi_start_bb (entry_succ_bb
); ; gsi_next (&gsi
))
7133 gcc_assert (!gsi_end_p (gsi
));
7134 stmt
= gsi_stmt (gsi
);
7135 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
7138 if (gimple_num_ops (stmt
) == 2)
7140 tree arg
= gimple_assign_rhs1 (stmt
);
7142 /* We're ignoring the subcode because we're
7143 effectively doing a STRIP_NOPS. */
7145 if (TREE_CODE (arg
) == ADDR_EXPR
7146 && TREE_OPERAND (arg
, 0) == sender
)
7148 tgtcopy_stmt
= stmt
;
7154 gcc_assert (tgtcopy_stmt
!= NULL
);
7155 arg
= DECL_ARGUMENTS (child_fn
);
7157 gcc_assert (gimple_assign_lhs (tgtcopy_stmt
) == arg
);
7158 gsi_remove (&gsi
, true);
7161 /* Declare local variables needed in CHILD_CFUN. */
7162 block
= DECL_INITIAL (child_fn
);
7163 BLOCK_VARS (block
) = vec2chain (child_cfun
->local_decls
);
7164 /* The gimplifier could record temporaries in the offloading block
7165 rather than in containing function's local_decls chain,
7166 which would mean cgraph missed finalizing them. Do it now. */
7167 for (t
= BLOCK_VARS (block
); t
; t
= DECL_CHAIN (t
))
7168 if (VAR_P (t
) && TREE_STATIC (t
) && !DECL_EXTERNAL (t
))
7169 varpool_node::finalize_decl (t
);
7170 DECL_SAVED_TREE (child_fn
) = NULL
;
7171 /* We'll create a CFG for child_fn, so no gimple body is needed. */
7172 gimple_set_body (child_fn
, NULL
);
7173 TREE_USED (block
) = 1;
7175 /* Reset DECL_CONTEXT on function arguments. */
7176 for (t
= DECL_ARGUMENTS (child_fn
); t
; t
= DECL_CHAIN (t
))
7177 DECL_CONTEXT (t
) = child_fn
;
7179 /* Split ENTRY_BB at GIMPLE_*,
7180 so that it can be moved to the child function. */
7181 gsi
= gsi_last_bb (entry_bb
);
7182 stmt
= gsi_stmt (gsi
);
7184 && gimple_code (stmt
) == gimple_code (entry_stmt
));
7185 e
= split_block (entry_bb
, stmt
);
7186 gsi_remove (&gsi
, true);
7188 single_succ_edge (entry_bb
)->flags
= EDGE_FALLTHRU
;
7190 /* Convert GIMPLE_OMP_RETURN into a RETURN_EXPR. */
7193 gsi
= gsi_last_bb (exit_bb
);
7194 gcc_assert (!gsi_end_p (gsi
)
7195 && gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_RETURN
);
7196 stmt
= gimple_build_return (NULL
);
7197 gsi_insert_after (&gsi
, stmt
, GSI_SAME_STMT
);
7198 gsi_remove (&gsi
, true);
7201 /* Make sure to generate early debug for the function before
7202 outlining anything. */
7203 if (! gimple_in_ssa_p (cfun
))
7204 (*debug_hooks
->early_global_decl
) (cfun
->decl
);
7206 /* Move the offloading region into CHILD_CFUN. */
7208 block
= gimple_block (entry_stmt
);
7210 new_bb
= move_sese_region_to_fn (child_cfun
, entry_bb
, exit_bb
, block
);
7212 single_succ_edge (new_bb
)->flags
= EDGE_FALLTHRU
;
7213 /* When the OMP expansion process cannot guarantee an up-to-date
7214 loop tree arrange for the child function to fixup loops. */
7215 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP
))
7216 child_cfun
->x_current_loops
->state
|= LOOPS_NEED_FIXUP
;
7218 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
7219 num
= vec_safe_length (child_cfun
->local_decls
);
7220 for (srcidx
= 0, dstidx
= 0; srcidx
< num
; srcidx
++)
7222 t
= (*child_cfun
->local_decls
)[srcidx
];
7223 if (DECL_CONTEXT (t
) == cfun
->decl
)
7225 if (srcidx
!= dstidx
)
7226 (*child_cfun
->local_decls
)[dstidx
] = t
;
7230 vec_safe_truncate (child_cfun
->local_decls
, dstidx
);
7232 /* Inform the callgraph about the new function. */
7233 child_cfun
->curr_properties
= cfun
->curr_properties
;
7234 child_cfun
->has_simduid_loops
|= cfun
->has_simduid_loops
;
7235 child_cfun
->has_force_vectorize_loops
|= cfun
->has_force_vectorize_loops
;
7236 cgraph_node
*node
= cgraph_node::get_create (child_fn
);
7237 node
->parallelized_function
= 1;
7238 cgraph_node::add_new_function (child_fn
, true);
7240 /* Add the new function to the offload table. */
7241 if (ENABLE_OFFLOADING
)
7242 vec_safe_push (offload_funcs
, child_fn
);
7244 bool need_asm
= DECL_ASSEMBLER_NAME_SET_P (current_function_decl
)
7245 && !DECL_ASSEMBLER_NAME_SET_P (child_fn
);
7247 /* Fix the callgraph edges for child_cfun. Those for cfun will be
7248 fixed in a following pass. */
7249 push_cfun (child_cfun
);
7251 assign_assembler_name_if_needed (child_fn
);
7252 cgraph_edge::rebuild_edges ();
7254 /* Some EH regions might become dead, see PR34608. If
7255 pass_cleanup_cfg isn't the first pass to happen with the
7256 new child, these dead EH edges might cause problems.
7257 Clean them up now. */
7258 if (flag_exceptions
)
7261 bool changed
= false;
7263 FOR_EACH_BB_FN (bb
, cfun
)
7264 changed
|= gimple_purge_dead_eh_edges (bb
);
7266 cleanup_tree_cfg ();
7268 if (flag_checking
&& !loops_state_satisfies_p (LOOPS_NEED_FIXUP
))
7269 verify_loop_structure ();
7272 if (dump_file
&& !gimple_in_ssa_p (cfun
))
7274 omp_any_child_fn_dumped
= true;
7275 dump_function_header (dump_file
, child_fn
, dump_flags
);
7276 dump_function_to_file (child_fn
, dump_file
, dump_flags
);
7280 /* Emit a library call to launch the offloading region, or do data
7282 tree t1
, t2
, t3
, t4
, device
, cond
, depend
, c
, clauses
;
7283 enum built_in_function start_ix
;
7284 location_t clause_loc
;
7285 unsigned int flags_i
= 0;
7287 switch (gimple_omp_target_kind (entry_stmt
))
7289 case GF_OMP_TARGET_KIND_REGION
:
7290 start_ix
= BUILT_IN_GOMP_TARGET
;
7292 case GF_OMP_TARGET_KIND_DATA
:
7293 start_ix
= BUILT_IN_GOMP_TARGET_DATA
;
7295 case GF_OMP_TARGET_KIND_UPDATE
:
7296 start_ix
= BUILT_IN_GOMP_TARGET_UPDATE
;
7298 case GF_OMP_TARGET_KIND_ENTER_DATA
:
7299 start_ix
= BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA
;
7301 case GF_OMP_TARGET_KIND_EXIT_DATA
:
7302 start_ix
= BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA
;
7303 flags_i
|= GOMP_TARGET_FLAG_EXIT_DATA
;
7305 case GF_OMP_TARGET_KIND_OACC_KERNELS
:
7306 case GF_OMP_TARGET_KIND_OACC_PARALLEL
:
7307 start_ix
= BUILT_IN_GOACC_PARALLEL
;
7309 case GF_OMP_TARGET_KIND_OACC_DATA
:
7310 case GF_OMP_TARGET_KIND_OACC_HOST_DATA
:
7311 start_ix
= BUILT_IN_GOACC_DATA_START
;
7313 case GF_OMP_TARGET_KIND_OACC_UPDATE
:
7314 start_ix
= BUILT_IN_GOACC_UPDATE
;
7316 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA
:
7317 start_ix
= BUILT_IN_GOACC_ENTER_EXIT_DATA
;
7319 case GF_OMP_TARGET_KIND_OACC_DECLARE
:
7320 start_ix
= BUILT_IN_GOACC_DECLARE
;
7326 clauses
= gimple_omp_target_clauses (entry_stmt
);
7328 /* By default, the value of DEVICE is GOMP_DEVICE_ICV (let runtime
7329 library choose) and there is no conditional. */
7331 device
= build_int_cst (integer_type_node
, GOMP_DEVICE_ICV
);
7333 c
= omp_find_clause (clauses
, OMP_CLAUSE_IF
);
7335 cond
= OMP_CLAUSE_IF_EXPR (c
);
7337 c
= omp_find_clause (clauses
, OMP_CLAUSE_DEVICE
);
7340 /* Even if we pass it to all library function calls, it is currently only
7341 defined/used for the OpenMP target ones. */
7342 gcc_checking_assert (start_ix
== BUILT_IN_GOMP_TARGET
7343 || start_ix
== BUILT_IN_GOMP_TARGET_DATA
7344 || start_ix
== BUILT_IN_GOMP_TARGET_UPDATE
7345 || start_ix
== BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA
);
7347 device
= OMP_CLAUSE_DEVICE_ID (c
);
7348 clause_loc
= OMP_CLAUSE_LOCATION (c
);
7351 clause_loc
= gimple_location (entry_stmt
);
7353 c
= omp_find_clause (clauses
, OMP_CLAUSE_NOWAIT
);
7355 flags_i
|= GOMP_TARGET_FLAG_NOWAIT
;
7357 /* Ensure 'device' is of the correct type. */
7358 device
= fold_convert_loc (clause_loc
, integer_type_node
, device
);
7360 /* If we found the clause 'if (cond)', build
7361 (cond ? device : GOMP_DEVICE_HOST_FALLBACK). */
7364 cond
= gimple_boolify (cond
);
7366 basic_block cond_bb
, then_bb
, else_bb
;
7370 tmp_var
= create_tmp_var (TREE_TYPE (device
));
7372 e
= split_block_after_labels (new_bb
);
7375 gsi
= gsi_last_bb (new_bb
);
7377 e
= split_block (new_bb
, gsi_stmt (gsi
));
7383 then_bb
= create_empty_bb (cond_bb
);
7384 else_bb
= create_empty_bb (then_bb
);
7385 set_immediate_dominator (CDI_DOMINATORS
, then_bb
, cond_bb
);
7386 set_immediate_dominator (CDI_DOMINATORS
, else_bb
, cond_bb
);
7388 stmt
= gimple_build_cond_empty (cond
);
7389 gsi
= gsi_last_bb (cond_bb
);
7390 gsi_insert_after (&gsi
, stmt
, GSI_CONTINUE_LINKING
);
7392 gsi
= gsi_start_bb (then_bb
);
7393 stmt
= gimple_build_assign (tmp_var
, device
);
7394 gsi_insert_after (&gsi
, stmt
, GSI_CONTINUE_LINKING
);
7396 gsi
= gsi_start_bb (else_bb
);
7397 stmt
= gimple_build_assign (tmp_var
,
7398 build_int_cst (integer_type_node
,
7399 GOMP_DEVICE_HOST_FALLBACK
));
7400 gsi_insert_after (&gsi
, stmt
, GSI_CONTINUE_LINKING
);
7402 make_edge (cond_bb
, then_bb
, EDGE_TRUE_VALUE
);
7403 make_edge (cond_bb
, else_bb
, EDGE_FALSE_VALUE
);
7404 add_bb_to_loop (then_bb
, cond_bb
->loop_father
);
7405 add_bb_to_loop (else_bb
, cond_bb
->loop_father
);
7406 make_edge (then_bb
, new_bb
, EDGE_FALLTHRU
);
7407 make_edge (else_bb
, new_bb
, EDGE_FALLTHRU
);
7410 gsi
= gsi_last_bb (new_bb
);
7414 gsi
= gsi_last_bb (new_bb
);
7415 device
= force_gimple_operand_gsi (&gsi
, device
, true, NULL_TREE
,
7416 true, GSI_SAME_STMT
);
7419 t
= gimple_omp_target_data_arg (entry_stmt
);
7422 t1
= size_zero_node
;
7423 t2
= build_zero_cst (ptr_type_node
);
7429 t1
= TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (TREE_VEC_ELT (t
, 1))));
7430 t1
= size_binop (PLUS_EXPR
, t1
, size_int (1));
7431 t2
= build_fold_addr_expr (TREE_VEC_ELT (t
, 0));
7432 t3
= build_fold_addr_expr (TREE_VEC_ELT (t
, 1));
7433 t4
= build_fold_addr_expr (TREE_VEC_ELT (t
, 2));
7437 bool tagging
= false;
7438 /* The maximum number used by any start_ix, without varargs. */
7439 auto_vec
<tree
, 11> args
;
7440 args
.quick_push (device
);
7442 args
.quick_push (build_fold_addr_expr (child_fn
));
7443 args
.quick_push (t1
);
7444 args
.quick_push (t2
);
7445 args
.quick_push (t3
);
7446 args
.quick_push (t4
);
7449 case BUILT_IN_GOACC_DATA_START
:
7450 case BUILT_IN_GOACC_DECLARE
:
7451 case BUILT_IN_GOMP_TARGET_DATA
:
7453 case BUILT_IN_GOMP_TARGET
:
7454 case BUILT_IN_GOMP_TARGET_UPDATE
:
7455 case BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA
:
7456 args
.quick_push (build_int_cst (unsigned_type_node
, flags_i
));
7457 c
= omp_find_clause (clauses
, OMP_CLAUSE_DEPEND
);
7459 depend
= OMP_CLAUSE_DECL (c
);
7461 depend
= build_int_cst (ptr_type_node
, 0);
7462 args
.quick_push (depend
);
7463 if (start_ix
== BUILT_IN_GOMP_TARGET
)
7464 args
.quick_push (get_target_arguments (&gsi
, entry_stmt
));
7466 case BUILT_IN_GOACC_PARALLEL
:
7467 oacc_set_fn_attrib (child_fn
, clauses
, &args
);
7470 case BUILT_IN_GOACC_ENTER_EXIT_DATA
:
7471 case BUILT_IN_GOACC_UPDATE
:
7473 tree t_async
= NULL_TREE
;
7475 /* If present, use the value specified by the respective
7476 clause, making sure that is of the correct type. */
7477 c
= omp_find_clause (clauses
, OMP_CLAUSE_ASYNC
);
7479 t_async
= fold_convert_loc (OMP_CLAUSE_LOCATION (c
),
7481 OMP_CLAUSE_ASYNC_EXPR (c
));
7483 /* Default values for t_async. */
7484 t_async
= fold_convert_loc (gimple_location (entry_stmt
),
7486 build_int_cst (integer_type_node
,
7488 if (tagging
&& t_async
)
7490 unsigned HOST_WIDE_INT i_async
= GOMP_LAUNCH_OP_MAX
;
7492 if (TREE_CODE (t_async
) == INTEGER_CST
)
7494 /* See if we can pack the async arg in to the tag's
7496 i_async
= TREE_INT_CST_LOW (t_async
);
7497 if (i_async
< GOMP_LAUNCH_OP_MAX
)
7498 t_async
= NULL_TREE
;
7500 i_async
= GOMP_LAUNCH_OP_MAX
;
7502 args
.safe_push (oacc_launch_pack (GOMP_LAUNCH_ASYNC
, NULL_TREE
,
7506 args
.safe_push (t_async
);
7508 /* Save the argument index, and ... */
7509 unsigned t_wait_idx
= args
.length ();
7510 unsigned num_waits
= 0;
7511 c
= omp_find_clause (clauses
, OMP_CLAUSE_WAIT
);
7513 /* ... push a placeholder. */
7514 args
.safe_push (integer_zero_node
);
7516 for (; c
; c
= OMP_CLAUSE_CHAIN (c
))
7517 if (OMP_CLAUSE_CODE (c
) == OMP_CLAUSE_WAIT
)
7519 args
.safe_push (fold_convert_loc (OMP_CLAUSE_LOCATION (c
),
7521 OMP_CLAUSE_WAIT_EXPR (c
)));
7525 if (!tagging
|| num_waits
)
7529 /* Now that we know the number, update the placeholder. */
7531 len
= oacc_launch_pack (GOMP_LAUNCH_WAIT
, NULL_TREE
, num_waits
);
7533 len
= build_int_cst (integer_type_node
, num_waits
);
7534 len
= fold_convert_loc (gimple_location (entry_stmt
),
7535 unsigned_type_node
, len
);
7536 args
[t_wait_idx
] = len
;
7544 /* Push terminal marker - zero. */
7545 args
.safe_push (oacc_launch_pack (0, NULL_TREE
, 0));
7547 g
= gimple_build_call_vec (builtin_decl_explicit (start_ix
), args
);
7548 gimple_set_location (g
, gimple_location (entry_stmt
));
7549 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
7553 gcc_assert (g
&& gimple_code (g
) == GIMPLE_OMP_TARGET
);
7554 gsi_remove (&gsi
, true);
7556 if (data_region
&& region
->exit
)
7558 gsi
= gsi_last_bb (region
->exit
);
7560 gcc_assert (g
&& gimple_code (g
) == GIMPLE_OMP_RETURN
);
7561 gsi_remove (&gsi
, true);
7565 /* Expand KFOR loop as a HSA grifidied kernel, i.e. as a body only with
7566 iteration variable derived from the thread number. INTRA_GROUP means this
7567 is an expansion of a loop iterating over work-items within a separate
7568 iteration over groups. */
7571 grid_expand_omp_for_loop (struct omp_region
*kfor
, bool intra_group
)
7573 gimple_stmt_iterator gsi
;
7574 gomp_for
*for_stmt
= as_a
<gomp_for
*> (last_stmt (kfor
->entry
));
7575 gcc_checking_assert (gimple_omp_for_kind (for_stmt
)
7576 == GF_OMP_FOR_KIND_GRID_LOOP
);
7577 size_t collapse
= gimple_omp_for_collapse (for_stmt
);
7578 struct omp_for_data_loop
*loops
7579 = XALLOCAVEC (struct omp_for_data_loop
,
7580 gimple_omp_for_collapse (for_stmt
));
7581 struct omp_for_data fd
;
7583 remove_edge (BRANCH_EDGE (kfor
->entry
));
7584 basic_block body_bb
= FALLTHRU_EDGE (kfor
->entry
)->dest
;
7586 gcc_assert (kfor
->cont
);
7587 omp_extract_for_data (for_stmt
, &fd
, loops
);
7589 gsi
= gsi_start_bb (body_bb
);
7591 for (size_t dim
= 0; dim
< collapse
; dim
++)
7594 itype
= type
= TREE_TYPE (fd
.loops
[dim
].v
);
7595 if (POINTER_TYPE_P (type
))
7596 itype
= signed_type_for (type
);
7598 tree n1
= fd
.loops
[dim
].n1
;
7599 tree step
= fd
.loops
[dim
].step
;
7600 n1
= force_gimple_operand_gsi (&gsi
, fold_convert (type
, n1
),
7601 true, NULL_TREE
, true, GSI_SAME_STMT
);
7602 step
= force_gimple_operand_gsi (&gsi
, fold_convert (itype
, step
),
7603 true, NULL_TREE
, true, GSI_SAME_STMT
);
7605 if (gimple_omp_for_grid_group_iter (for_stmt
))
7607 gcc_checking_assert (!intra_group
);
7608 threadid
= build_call_expr (builtin_decl_explicit
7609 (BUILT_IN_HSA_WORKGROUPID
), 1,
7610 build_int_cstu (unsigned_type_node
, dim
));
7612 else if (intra_group
)
7613 threadid
= build_call_expr (builtin_decl_explicit
7614 (BUILT_IN_HSA_WORKITEMID
), 1,
7615 build_int_cstu (unsigned_type_node
, dim
));
7617 threadid
= build_call_expr (builtin_decl_explicit
7618 (BUILT_IN_HSA_WORKITEMABSID
), 1,
7619 build_int_cstu (unsigned_type_node
, dim
));
7620 threadid
= fold_convert (itype
, threadid
);
7621 threadid
= force_gimple_operand_gsi (&gsi
, threadid
, true, NULL_TREE
,
7622 true, GSI_SAME_STMT
);
7624 tree startvar
= fd
.loops
[dim
].v
;
7625 tree t
= fold_build2 (MULT_EXPR
, itype
, threadid
, step
);
7626 if (POINTER_TYPE_P (type
))
7627 t
= fold_build_pointer_plus (n1
, t
);
7629 t
= fold_build2 (PLUS_EXPR
, type
, t
, n1
);
7630 t
= fold_convert (type
, t
);
7631 t
= force_gimple_operand_gsi (&gsi
, t
,
7633 && TREE_ADDRESSABLE (startvar
),
7634 NULL_TREE
, true, GSI_SAME_STMT
);
7635 gassign
*assign_stmt
= gimple_build_assign (startvar
, t
);
7636 gsi_insert_before (&gsi
, assign_stmt
, GSI_SAME_STMT
);
7638 /* Remove the omp for statement. */
7639 gsi
= gsi_last_bb (kfor
->entry
);
7640 gsi_remove (&gsi
, true);
7642 /* Remove the GIMPLE_OMP_CONTINUE statement. */
7643 gsi
= gsi_last_bb (kfor
->cont
);
7644 gcc_assert (!gsi_end_p (gsi
)
7645 && gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_CONTINUE
);
7646 gsi_remove (&gsi
, true);
7648 /* Replace the GIMPLE_OMP_RETURN with a barrier, if necessary. */
7649 gsi
= gsi_last_bb (kfor
->exit
);
7650 gcc_assert (!gsi_end_p (gsi
)
7651 && gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_RETURN
);
7653 gsi_insert_before (&gsi
, omp_build_barrier (NULL_TREE
), GSI_SAME_STMT
);
7654 gsi_remove (&gsi
, true);
7656 /* Fixup the much simpler CFG. */
7657 remove_edge (find_edge (kfor
->cont
, body_bb
));
7659 if (kfor
->cont
!= body_bb
)
7660 set_immediate_dominator (CDI_DOMINATORS
, kfor
->cont
, body_bb
);
7661 set_immediate_dominator (CDI_DOMINATORS
, kfor
->exit
, kfor
->cont
);
7664 /* Structure passed to grid_remap_kernel_arg_accesses so that it can remap
7667 struct grid_arg_decl_map
7673 /* Invoked through walk_gimple_op, will remap all PARM_DECLs to the ones
7674 pertaining to kernel function. */
7677 grid_remap_kernel_arg_accesses (tree
*tp
, int *walk_subtrees
, void *data
)
7679 struct walk_stmt_info
*wi
= (struct walk_stmt_info
*) data
;
7680 struct grid_arg_decl_map
*adm
= (struct grid_arg_decl_map
*) wi
->info
;
7683 if (t
== adm
->old_arg
)
7685 *walk_subtrees
= !TYPE_P (t
) && !DECL_P (t
);
7689 /* If TARGET region contains a kernel body for loop, remove its region from the
7690 TARGET and expand it in HSA gridified kernel fashion. */
7693 grid_expand_target_grid_body (struct omp_region
*target
)
7695 if (!hsa_gen_requested_p ())
7698 gomp_target
*tgt_stmt
= as_a
<gomp_target
*> (last_stmt (target
->entry
));
7699 struct omp_region
**pp
;
7701 for (pp
= &target
->inner
; *pp
; pp
= &(*pp
)->next
)
7702 if ((*pp
)->type
== GIMPLE_OMP_GRID_BODY
)
7705 struct omp_region
*gpukernel
= *pp
;
7707 tree orig_child_fndecl
= gimple_omp_target_child_fn (tgt_stmt
);
7710 /* HSA cannot handle OACC stuff. */
7711 if (gimple_omp_target_kind (tgt_stmt
) != GF_OMP_TARGET_KIND_REGION
)
7713 gcc_checking_assert (orig_child_fndecl
);
7714 gcc_assert (!omp_find_clause (gimple_omp_target_clauses (tgt_stmt
),
7715 OMP_CLAUSE__GRIDDIM_
));
7716 cgraph_node
*n
= cgraph_node::get (orig_child_fndecl
);
7718 hsa_register_kernel (n
);
7722 gcc_assert (omp_find_clause (gimple_omp_target_clauses (tgt_stmt
),
7723 OMP_CLAUSE__GRIDDIM_
));
7725 = gimple_block (first_stmt (single_succ (gpukernel
->entry
)));
7726 *pp
= gpukernel
->next
;
7727 for (pp
= &gpukernel
->inner
; *pp
; pp
= &(*pp
)->next
)
7728 if ((*pp
)->type
== GIMPLE_OMP_FOR
)
7731 struct omp_region
*kfor
= *pp
;
7733 gomp_for
*for_stmt
= as_a
<gomp_for
*> (last_stmt (kfor
->entry
));
7734 gcc_assert (gimple_omp_for_kind (for_stmt
) == GF_OMP_FOR_KIND_GRID_LOOP
);
7738 if (gimple_omp_for_grid_group_iter (for_stmt
))
7740 struct omp_region
**next_pp
;
7741 for (pp
= &kfor
->inner
; *pp
; pp
= next_pp
)
7743 next_pp
= &(*pp
)->next
;
7744 if ((*pp
)->type
!= GIMPLE_OMP_FOR
)
7746 gomp_for
*inner
= as_a
<gomp_for
*> (last_stmt ((*pp
)->entry
));
7747 gcc_assert (gimple_omp_for_kind (inner
)
7748 == GF_OMP_FOR_KIND_GRID_LOOP
);
7749 grid_expand_omp_for_loop (*pp
, true);
7754 expand_omp (kfor
->inner
);
7756 if (gpukernel
->inner
)
7757 expand_omp (gpukernel
->inner
);
7759 tree kern_fndecl
= copy_node (orig_child_fndecl
);
7760 DECL_NAME (kern_fndecl
) = clone_function_name (kern_fndecl
, "kernel");
7761 SET_DECL_ASSEMBLER_NAME (kern_fndecl
, DECL_NAME (kern_fndecl
));
7762 tree tgtblock
= gimple_block (tgt_stmt
);
7763 tree fniniblock
= make_node (BLOCK
);
7764 BLOCK_ABSTRACT_ORIGIN (fniniblock
) = tgtblock
;
7765 BLOCK_SOURCE_LOCATION (fniniblock
) = BLOCK_SOURCE_LOCATION (tgtblock
);
7766 BLOCK_SOURCE_END_LOCATION (fniniblock
) = BLOCK_SOURCE_END_LOCATION (tgtblock
);
7767 BLOCK_SUPERCONTEXT (fniniblock
) = kern_fndecl
;
7768 DECL_INITIAL (kern_fndecl
) = fniniblock
;
7769 push_struct_function (kern_fndecl
);
7770 cfun
->function_end_locus
= gimple_location (tgt_stmt
);
7771 init_tree_ssa (cfun
);
7774 /* Make sure to generate early debug for the function before
7775 outlining anything. */
7776 if (! gimple_in_ssa_p (cfun
))
7777 (*debug_hooks
->early_global_decl
) (cfun
->decl
);
7779 tree old_parm_decl
= DECL_ARGUMENTS (kern_fndecl
);
7780 gcc_assert (!DECL_CHAIN (old_parm_decl
));
7781 tree new_parm_decl
= copy_node (DECL_ARGUMENTS (kern_fndecl
));
7782 DECL_CONTEXT (new_parm_decl
) = kern_fndecl
;
7783 DECL_ARGUMENTS (kern_fndecl
) = new_parm_decl
;
7784 gcc_assert (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (kern_fndecl
))));
7785 DECL_RESULT (kern_fndecl
) = copy_node (DECL_RESULT (kern_fndecl
));
7786 DECL_CONTEXT (DECL_RESULT (kern_fndecl
)) = kern_fndecl
;
7787 struct function
*kern_cfun
= DECL_STRUCT_FUNCTION (kern_fndecl
);
7788 kern_cfun
->curr_properties
= cfun
->curr_properties
;
7790 grid_expand_omp_for_loop (kfor
, false);
7792 /* Remove the omp for statement. */
7793 gimple_stmt_iterator gsi
= gsi_last_bb (gpukernel
->entry
);
7794 gsi_remove (&gsi
, true);
7795 /* Replace the GIMPLE_OMP_RETURN at the end of the kernel region with a real
7797 gsi
= gsi_last_bb (gpukernel
->exit
);
7798 gcc_assert (!gsi_end_p (gsi
)
7799 && gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_RETURN
);
7800 gimple
*ret_stmt
= gimple_build_return (NULL
);
7801 gsi_insert_after (&gsi
, ret_stmt
, GSI_SAME_STMT
);
7802 gsi_remove (&gsi
, true);
7804 /* Statements in the first BB in the target construct have been produced by
7805 target lowering and must be copied inside the GPUKERNEL, with the two
7806 exceptions of the first OMP statement and the OMP_DATA assignment
7808 gsi
= gsi_start_bb (single_succ (gpukernel
->entry
));
7809 tree data_arg
= gimple_omp_target_data_arg (tgt_stmt
);
7810 tree sender
= data_arg
? TREE_VEC_ELT (data_arg
, 0) : NULL
;
7811 for (gimple_stmt_iterator tsi
= gsi_start_bb (single_succ (target
->entry
));
7812 !gsi_end_p (tsi
); gsi_next (&tsi
))
7814 gimple
*stmt
= gsi_stmt (tsi
);
7815 if (is_gimple_omp (stmt
))
7818 && is_gimple_assign (stmt
)
7819 && TREE_CODE (gimple_assign_rhs1 (stmt
)) == ADDR_EXPR
7820 && TREE_OPERAND (gimple_assign_rhs1 (stmt
), 0) == sender
)
7822 gimple
*copy
= gimple_copy (stmt
);
7823 gsi_insert_before (&gsi
, copy
, GSI_SAME_STMT
);
7824 gimple_set_block (copy
, fniniblock
);
7827 move_sese_region_to_fn (kern_cfun
, single_succ (gpukernel
->entry
),
7828 gpukernel
->exit
, inside_block
);
7830 cgraph_node
*kcn
= cgraph_node::get_create (kern_fndecl
);
7831 kcn
->mark_force_output ();
7832 cgraph_node
*orig_child
= cgraph_node::get (orig_child_fndecl
);
7834 hsa_register_kernel (kcn
, orig_child
);
7836 cgraph_node::add_new_function (kern_fndecl
, true);
7837 push_cfun (kern_cfun
);
7838 cgraph_edge::rebuild_edges ();
7840 /* Re-map any mention of the PARM_DECL of the original function to the
7841 PARM_DECL of the new one.
7843 TODO: It would be great if lowering produced references into the GPU
7844 kernel decl straight away and we did not have to do this. */
7845 struct grid_arg_decl_map adm
;
7846 adm
.old_arg
= old_parm_decl
;
7847 adm
.new_arg
= new_parm_decl
;
7849 FOR_EACH_BB_FN (bb
, kern_cfun
)
7851 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
7853 gimple
*stmt
= gsi_stmt (gsi
);
7854 struct walk_stmt_info wi
;
7855 memset (&wi
, 0, sizeof (wi
));
7857 walk_gimple_op (stmt
, grid_remap_kernel_arg_accesses
, &wi
);
7865 /* Expand the parallel region tree rooted at REGION. Expansion
7866 proceeds in depth-first order. Innermost regions are expanded
7867 first. This way, parallel regions that require a new function to
7868 be created (e.g., GIMPLE_OMP_PARALLEL) can be expanded without having any
7869 internal dependencies in their body. */
7872 expand_omp (struct omp_region
*region
)
7874 omp_any_child_fn_dumped
= false;
7877 location_t saved_location
;
7878 gimple
*inner_stmt
= NULL
;
7880 /* First, determine whether this is a combined parallel+workshare
7882 if (region
->type
== GIMPLE_OMP_PARALLEL
)
7883 determine_parallel_type (region
);
7884 else if (region
->type
== GIMPLE_OMP_TARGET
)
7885 grid_expand_target_grid_body (region
);
7887 if (region
->type
== GIMPLE_OMP_FOR
7888 && gimple_omp_for_combined_p (last_stmt (region
->entry
)))
7889 inner_stmt
= last_stmt (region
->inner
->entry
);
7892 expand_omp (region
->inner
);
7894 saved_location
= input_location
;
7895 if (gimple_has_location (last_stmt (region
->entry
)))
7896 input_location
= gimple_location (last_stmt (region
->entry
));
7898 switch (region
->type
)
7900 case GIMPLE_OMP_PARALLEL
:
7901 case GIMPLE_OMP_TASK
:
7902 expand_omp_taskreg (region
);
7905 case GIMPLE_OMP_FOR
:
7906 expand_omp_for (region
, inner_stmt
);
7909 case GIMPLE_OMP_SECTIONS
:
7910 expand_omp_sections (region
);
7913 case GIMPLE_OMP_SECTION
:
7914 /* Individual omp sections are handled together with their
7915 parent GIMPLE_OMP_SECTIONS region. */
7918 case GIMPLE_OMP_SINGLE
:
7919 expand_omp_single (region
);
7922 case GIMPLE_OMP_ORDERED
:
7924 gomp_ordered
*ord_stmt
7925 = as_a
<gomp_ordered
*> (last_stmt (region
->entry
));
7926 if (omp_find_clause (gimple_omp_ordered_clauses (ord_stmt
),
7929 /* We'll expand these when expanding corresponding
7930 worksharing region with ordered(n) clause. */
7931 gcc_assert (region
->outer
7932 && region
->outer
->type
== GIMPLE_OMP_FOR
);
7933 region
->ord_stmt
= ord_stmt
;
7938 case GIMPLE_OMP_MASTER
:
7939 case GIMPLE_OMP_TASKGROUP
:
7940 case GIMPLE_OMP_CRITICAL
:
7941 case GIMPLE_OMP_TEAMS
:
7942 expand_omp_synch (region
);
7945 case GIMPLE_OMP_ATOMIC_LOAD
:
7946 expand_omp_atomic (region
);
7949 case GIMPLE_OMP_TARGET
:
7950 expand_omp_target (region
);
7957 input_location
= saved_location
;
7958 region
= region
->next
;
7960 if (omp_any_child_fn_dumped
)
7963 dump_function_header (dump_file
, current_function_decl
, dump_flags
);
7964 omp_any_child_fn_dumped
= false;
7968 /* Helper for build_omp_regions. Scan the dominator tree starting at
7969 block BB. PARENT is the region that contains BB. If SINGLE_TREE is
7970 true, the function ends once a single tree is built (otherwise, whole
7971 forest of OMP constructs may be built). */
7974 build_omp_regions_1 (basic_block bb
, struct omp_region
*parent
,
7977 gimple_stmt_iterator gsi
;
7981 gsi
= gsi_last_bb (bb
);
7982 if (!gsi_end_p (gsi
) && is_gimple_omp (gsi_stmt (gsi
)))
7984 struct omp_region
*region
;
7985 enum gimple_code code
;
7987 stmt
= gsi_stmt (gsi
);
7988 code
= gimple_code (stmt
);
7989 if (code
== GIMPLE_OMP_RETURN
)
7991 /* STMT is the return point out of region PARENT. Mark it
7992 as the exit point and make PARENT the immediately
7993 enclosing region. */
7994 gcc_assert (parent
);
7997 parent
= parent
->outer
;
7999 else if (code
== GIMPLE_OMP_ATOMIC_STORE
)
8001 /* GIMPLE_OMP_ATOMIC_STORE is analogous to
8002 GIMPLE_OMP_RETURN, but matches with
8003 GIMPLE_OMP_ATOMIC_LOAD. */
8004 gcc_assert (parent
);
8005 gcc_assert (parent
->type
== GIMPLE_OMP_ATOMIC_LOAD
);
8008 parent
= parent
->outer
;
8010 else if (code
== GIMPLE_OMP_CONTINUE
)
8012 gcc_assert (parent
);
8015 else if (code
== GIMPLE_OMP_SECTIONS_SWITCH
)
8017 /* GIMPLE_OMP_SECTIONS_SWITCH is part of
8018 GIMPLE_OMP_SECTIONS, and we do nothing for it. */
8022 region
= new_omp_region (bb
, code
, parent
);
8024 if (code
== GIMPLE_OMP_TARGET
)
8026 switch (gimple_omp_target_kind (stmt
))
8028 case GF_OMP_TARGET_KIND_REGION
:
8029 case GF_OMP_TARGET_KIND_DATA
:
8030 case GF_OMP_TARGET_KIND_OACC_PARALLEL
:
8031 case GF_OMP_TARGET_KIND_OACC_KERNELS
:
8032 case GF_OMP_TARGET_KIND_OACC_DATA
:
8033 case GF_OMP_TARGET_KIND_OACC_HOST_DATA
:
8035 case GF_OMP_TARGET_KIND_UPDATE
:
8036 case GF_OMP_TARGET_KIND_ENTER_DATA
:
8037 case GF_OMP_TARGET_KIND_EXIT_DATA
:
8038 case GF_OMP_TARGET_KIND_OACC_UPDATE
:
8039 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA
:
8040 case GF_OMP_TARGET_KIND_OACC_DECLARE
:
8041 /* ..., other than for those stand-alone directives... */
8048 else if (code
== GIMPLE_OMP_ORDERED
8049 && omp_find_clause (gimple_omp_ordered_clauses
8050 (as_a
<gomp_ordered
*> (stmt
)),
8052 /* #pragma omp ordered depend is also just a stand-alone
8055 /* ..., this directive becomes the parent for a new region. */
8061 if (single_tree
&& !parent
)
8064 for (son
= first_dom_son (CDI_DOMINATORS
, bb
);
8066 son
= next_dom_son (CDI_DOMINATORS
, son
))
8067 build_omp_regions_1 (son
, parent
, single_tree
);
8070 /* Builds the tree of OMP regions rooted at ROOT, storing it to
8074 build_omp_regions_root (basic_block root
)
8076 gcc_assert (root_omp_region
== NULL
);
8077 build_omp_regions_1 (root
, NULL
, true);
8078 gcc_assert (root_omp_region
!= NULL
);
8081 /* Expands omp construct (and its subconstructs) starting in HEAD. */
8084 omp_expand_local (basic_block head
)
8086 build_omp_regions_root (head
);
8087 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
8089 fprintf (dump_file
, "\nOMP region tree\n\n");
8090 dump_omp_region (dump_file
, root_omp_region
, 0);
8091 fprintf (dump_file
, "\n");
8094 remove_exit_barriers (root_omp_region
);
8095 expand_omp (root_omp_region
);
8097 omp_free_regions ();
8100 /* Scan the CFG and build a tree of OMP regions. Return the root of
8101 the OMP region tree. */
8104 build_omp_regions (void)
8106 gcc_assert (root_omp_region
== NULL
);
8107 calculate_dominance_info (CDI_DOMINATORS
);
8108 build_omp_regions_1 (ENTRY_BLOCK_PTR_FOR_FN (cfun
), NULL
, false);
8111 /* Main entry point for expanding OMP-GIMPLE into runtime calls. */
8114 execute_expand_omp (void)
8116 build_omp_regions ();
8118 if (!root_omp_region
)
8123 fprintf (dump_file
, "\nOMP region tree\n\n");
8124 dump_omp_region (dump_file
, root_omp_region
, 0);
8125 fprintf (dump_file
, "\n");
8128 remove_exit_barriers (root_omp_region
);
8130 expand_omp (root_omp_region
);
8132 if (flag_checking
&& !loops_state_satisfies_p (LOOPS_NEED_FIXUP
))
8133 verify_loop_structure ();
8134 cleanup_tree_cfg ();
8136 omp_free_regions ();
8141 /* OMP expansion -- the default pass, run before creation of SSA form. */
8145 const pass_data pass_data_expand_omp
=
8147 GIMPLE_PASS
, /* type */
8148 "ompexp", /* name */
8149 OPTGROUP_OMP
, /* optinfo_flags */
8150 TV_NONE
, /* tv_id */
8151 PROP_gimple_any
, /* properties_required */
8152 PROP_gimple_eomp
, /* properties_provided */
8153 0, /* properties_destroyed */
8154 0, /* todo_flags_start */
8155 0, /* todo_flags_finish */
8158 class pass_expand_omp
: public gimple_opt_pass
8161 pass_expand_omp (gcc::context
*ctxt
)
8162 : gimple_opt_pass (pass_data_expand_omp
, ctxt
)
8165 /* opt_pass methods: */
8166 virtual unsigned int execute (function
*)
8168 bool gate
= ((flag_cilkplus
!= 0 || flag_openacc
!= 0 || flag_openmp
!= 0
8169 || flag_openmp_simd
!= 0)
8172 /* This pass always runs, to provide PROP_gimple_eomp.
8173 But often, there is nothing to do. */
8177 return execute_expand_omp ();
8180 }; // class pass_expand_omp
8185 make_pass_expand_omp (gcc::context
*ctxt
)
8187 return new pass_expand_omp (ctxt
);
8192 const pass_data pass_data_expand_omp_ssa
=
8194 GIMPLE_PASS
, /* type */
8195 "ompexpssa", /* name */
8196 OPTGROUP_OMP
, /* optinfo_flags */
8197 TV_NONE
, /* tv_id */
8198 PROP_cfg
| PROP_ssa
, /* properties_required */
8199 PROP_gimple_eomp
, /* properties_provided */
8200 0, /* properties_destroyed */
8201 0, /* todo_flags_start */
8202 TODO_cleanup_cfg
| TODO_rebuild_alias
, /* todo_flags_finish */
8205 class pass_expand_omp_ssa
: public gimple_opt_pass
8208 pass_expand_omp_ssa (gcc::context
*ctxt
)
8209 : gimple_opt_pass (pass_data_expand_omp_ssa
, ctxt
)
8212 /* opt_pass methods: */
8213 virtual bool gate (function
*fun
)
8215 return !(fun
->curr_properties
& PROP_gimple_eomp
);
8217 virtual unsigned int execute (function
*) { return execute_expand_omp (); }
8218 opt_pass
* clone () { return new pass_expand_omp_ssa (m_ctxt
); }
8220 }; // class pass_expand_omp_ssa
8225 make_pass_expand_omp_ssa (gcc::context
*ctxt
)
8227 return new pass_expand_omp_ssa (ctxt
);
8230 /* Called from tree-cfg.c::make_edges to create cfg edges for all relevant
8234 omp_make_gimple_edges (basic_block bb
, struct omp_region
**region
,
8237 gimple
*last
= last_stmt (bb
);
8238 enum gimple_code code
= gimple_code (last
);
8239 struct omp_region
*cur_region
= *region
;
8240 bool fallthru
= false;
8244 case GIMPLE_OMP_PARALLEL
:
8245 case GIMPLE_OMP_TASK
:
8246 case GIMPLE_OMP_FOR
:
8247 case GIMPLE_OMP_SINGLE
:
8248 case GIMPLE_OMP_TEAMS
:
8249 case GIMPLE_OMP_MASTER
:
8250 case GIMPLE_OMP_TASKGROUP
:
8251 case GIMPLE_OMP_CRITICAL
:
8252 case GIMPLE_OMP_SECTION
:
8253 case GIMPLE_OMP_GRID_BODY
:
8254 cur_region
= new_omp_region (bb
, code
, cur_region
);
8258 case GIMPLE_OMP_ORDERED
:
8259 cur_region
= new_omp_region (bb
, code
, cur_region
);
8261 if (omp_find_clause (gimple_omp_ordered_clauses
8262 (as_a
<gomp_ordered
*> (last
)),
8264 cur_region
= cur_region
->outer
;
8267 case GIMPLE_OMP_TARGET
:
8268 cur_region
= new_omp_region (bb
, code
, cur_region
);
8270 switch (gimple_omp_target_kind (last
))
8272 case GF_OMP_TARGET_KIND_REGION
:
8273 case GF_OMP_TARGET_KIND_DATA
:
8274 case GF_OMP_TARGET_KIND_OACC_PARALLEL
:
8275 case GF_OMP_TARGET_KIND_OACC_KERNELS
:
8276 case GF_OMP_TARGET_KIND_OACC_DATA
:
8277 case GF_OMP_TARGET_KIND_OACC_HOST_DATA
:
8279 case GF_OMP_TARGET_KIND_UPDATE
:
8280 case GF_OMP_TARGET_KIND_ENTER_DATA
:
8281 case GF_OMP_TARGET_KIND_EXIT_DATA
:
8282 case GF_OMP_TARGET_KIND_OACC_UPDATE
:
8283 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA
:
8284 case GF_OMP_TARGET_KIND_OACC_DECLARE
:
8285 cur_region
= cur_region
->outer
;
8292 case GIMPLE_OMP_SECTIONS
:
8293 cur_region
= new_omp_region (bb
, code
, cur_region
);
8297 case GIMPLE_OMP_SECTIONS_SWITCH
:
8301 case GIMPLE_OMP_ATOMIC_LOAD
:
8302 case GIMPLE_OMP_ATOMIC_STORE
:
8306 case GIMPLE_OMP_RETURN
:
8307 /* In the case of a GIMPLE_OMP_SECTION, the edge will go
8308 somewhere other than the next block. This will be
8310 cur_region
->exit
= bb
;
8311 if (cur_region
->type
== GIMPLE_OMP_TASK
)
8312 /* Add an edge corresponding to not scheduling the task
8314 make_edge (cur_region
->entry
, bb
, EDGE_ABNORMAL
);
8315 fallthru
= cur_region
->type
!= GIMPLE_OMP_SECTION
;
8316 cur_region
= cur_region
->outer
;
8319 case GIMPLE_OMP_CONTINUE
:
8320 cur_region
->cont
= bb
;
8321 switch (cur_region
->type
)
8323 case GIMPLE_OMP_FOR
:
8324 /* Mark all GIMPLE_OMP_FOR and GIMPLE_OMP_CONTINUE
8325 succs edges as abnormal to prevent splitting
8327 single_succ_edge (cur_region
->entry
)->flags
|= EDGE_ABNORMAL
;
8328 /* Make the loopback edge. */
8329 make_edge (bb
, single_succ (cur_region
->entry
),
8332 /* Create an edge from GIMPLE_OMP_FOR to exit, which
8333 corresponds to the case that the body of the loop
8334 is not executed at all. */
8335 make_edge (cur_region
->entry
, bb
->next_bb
, EDGE_ABNORMAL
);
8336 make_edge (bb
, bb
->next_bb
, EDGE_FALLTHRU
| EDGE_ABNORMAL
);
8340 case GIMPLE_OMP_SECTIONS
:
8341 /* Wire up the edges into and out of the nested sections. */
8343 basic_block switch_bb
= single_succ (cur_region
->entry
);
8345 struct omp_region
*i
;
8346 for (i
= cur_region
->inner
; i
; i
= i
->next
)
8348 gcc_assert (i
->type
== GIMPLE_OMP_SECTION
);
8349 make_edge (switch_bb
, i
->entry
, 0);
8350 make_edge (i
->exit
, bb
, EDGE_FALLTHRU
);
8353 /* Make the loopback edge to the block with
8354 GIMPLE_OMP_SECTIONS_SWITCH. */
8355 make_edge (bb
, switch_bb
, 0);
8357 /* Make the edge from the switch to exit. */
8358 make_edge (switch_bb
, bb
->next_bb
, 0);
8363 case GIMPLE_OMP_TASK
:
8376 if (*region
!= cur_region
)
8378 *region
= cur_region
;
8380 *region_idx
= cur_region
->entry
->index
;
8388 #include "gt-omp-expand.h"