1 /* Expansion pass for OMP directives. Outlines regions of certain OMP
2 directives to separate functions, converts others into explicit calls to the
3 runtime library (libgomp) and so forth
5 Copyright (C) 2005-2018 Free Software Foundation, Inc.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
33 #include "tree-pass.h"
37 #include "pretty-print.h"
38 #include "diagnostic-core.h"
39 #include "fold-const.h"
40 #include "stor-layout.h"
42 #include "internal-fn.h"
44 #include "gimple-iterator.h"
45 #include "gimplify-me.h"
46 #include "gimple-walk.h"
48 #include "tree-into-ssa.h"
50 #include "splay-tree.h"
52 #include "omp-general.h"
53 #include "omp-offload.h"
54 #include "tree-cfgcleanup.h"
55 #include "symbol-summary.h"
56 #include "gomp-constants.h"
57 #include "gimple-pretty-print.h"
58 #include "hsa-common.h"
60 #include "stringpool.h"
63 /* OMP region information. Every parallel and workshare
64 directive is enclosed between two markers, the OMP_* directive
65 and a corresponding GIMPLE_OMP_RETURN statement. */
69 /* The enclosing region. */
70 struct omp_region
*outer
;
72 /* First child region. */
73 struct omp_region
*inner
;
75 /* Next peer region. */
76 struct omp_region
*next
;
78 /* Block containing the omp directive as its last stmt. */
81 /* Block containing the GIMPLE_OMP_RETURN as its last stmt. */
84 /* Block containing the GIMPLE_OMP_CONTINUE as its last stmt. */
87 /* If this is a combined parallel+workshare region, this is a list
88 of additional arguments needed by the combined parallel+workshare
90 vec
<tree
, va_gc
> *ws_args
;
92 /* The code for the omp directive of this region. */
93 enum gimple_code type
;
95 /* Schedule kind, only used for GIMPLE_OMP_FOR type regions. */
96 enum omp_clause_schedule_kind sched_kind
;
98 /* Schedule modifiers. */
99 unsigned char sched_modifiers
;
101 /* True if this is a combined parallel+workshare region. */
102 bool is_combined_parallel
;
104 /* The ordered stmt if type is GIMPLE_OMP_ORDERED and it has
106 gomp_ordered
*ord_stmt
;
109 static struct omp_region
*root_omp_region
;
110 static bool omp_any_child_fn_dumped
;
112 static void expand_omp_build_assign (gimple_stmt_iterator
*, tree
, tree
,
114 static gphi
*find_phi_with_arg_on_edge (tree
, edge
);
115 static void expand_omp (struct omp_region
*region
);
117 /* Return true if REGION is a combined parallel+workshare region. */
120 is_combined_parallel (struct omp_region
*region
)
122 return region
->is_combined_parallel
;
125 /* Given two blocks PAR_ENTRY_BB and WS_ENTRY_BB such that WS_ENTRY_BB
126 is the immediate dominator of PAR_ENTRY_BB, return true if there
127 are no data dependencies that would prevent expanding the parallel
128 directive at PAR_ENTRY_BB as a combined parallel+workshare region.
130 When expanding a combined parallel+workshare region, the call to
131 the child function may need additional arguments in the case of
132 GIMPLE_OMP_FOR regions. In some cases, these arguments are
133 computed out of variables passed in from the parent to the child
134 via 'struct .omp_data_s'. For instance:
136 #pragma omp parallel for schedule (guided, i * 4)
141 # BLOCK 2 (PAR_ENTRY_BB)
143 #pragma omp parallel [child fn: bar.omp_fn.0 ( ..., D.1598)
145 # BLOCK 3 (WS_ENTRY_BB)
146 .omp_data_i = &.omp_data_o;
147 D.1667 = .omp_data_i->i;
149 #pragma omp for schedule (guided, D.1598)
151 When we outline the parallel region, the call to the child function
152 'bar.omp_fn.0' will need the value D.1598 in its argument list, but
153 that value is computed *after* the call site. So, in principle we
154 cannot do the transformation.
156 To see whether the code in WS_ENTRY_BB blocks the combined
157 parallel+workshare call, we collect all the variables used in the
158 GIMPLE_OMP_FOR header check whether they appear on the LHS of any
159 statement in WS_ENTRY_BB. If so, then we cannot emit the combined
162 FIXME. If we had the SSA form built at this point, we could merely
163 hoist the code in block 3 into block 2 and be done with it. But at
164 this point we don't have dataflow information and though we could
165 hack something up here, it is really not worth the aggravation. */
168 workshare_safe_to_combine_p (basic_block ws_entry_bb
)
170 struct omp_for_data fd
;
171 gimple
*ws_stmt
= last_stmt (ws_entry_bb
);
173 if (gimple_code (ws_stmt
) == GIMPLE_OMP_SECTIONS
)
176 gcc_assert (gimple_code (ws_stmt
) == GIMPLE_OMP_FOR
);
178 omp_extract_for_data (as_a
<gomp_for
*> (ws_stmt
), &fd
, NULL
);
180 if (fd
.collapse
> 1 && TREE_CODE (fd
.loop
.n2
) != INTEGER_CST
)
182 if (fd
.iter_type
!= long_integer_type_node
)
185 /* FIXME. We give up too easily here. If any of these arguments
186 are not constants, they will likely involve variables that have
187 been mapped into fields of .omp_data_s for sharing with the child
188 function. With appropriate data flow, it would be possible to
190 if (!is_gimple_min_invariant (fd
.loop
.n1
)
191 || !is_gimple_min_invariant (fd
.loop
.n2
)
192 || !is_gimple_min_invariant (fd
.loop
.step
)
193 || (fd
.chunk_size
&& !is_gimple_min_invariant (fd
.chunk_size
)))
199 /* Adjust CHUNK_SIZE from SCHEDULE clause, depending on simd modifier
200 presence (SIMD_SCHEDULE). */
203 omp_adjust_chunk_size (tree chunk_size
, bool simd_schedule
)
208 poly_uint64 vf
= omp_max_vf ();
209 if (known_eq (vf
, 1U))
212 tree type
= TREE_TYPE (chunk_size
);
213 chunk_size
= fold_build2 (PLUS_EXPR
, type
, chunk_size
,
214 build_int_cst (type
, vf
- 1));
215 return fold_build2 (BIT_AND_EXPR
, type
, chunk_size
,
216 build_int_cst (type
, -vf
));
219 /* Collect additional arguments needed to emit a combined
220 parallel+workshare call. WS_STMT is the workshare directive being
223 static vec
<tree
, va_gc
> *
224 get_ws_args_for (gimple
*par_stmt
, gimple
*ws_stmt
)
227 location_t loc
= gimple_location (ws_stmt
);
228 vec
<tree
, va_gc
> *ws_args
;
230 if (gomp_for
*for_stmt
= dyn_cast
<gomp_for
*> (ws_stmt
))
232 struct omp_for_data fd
;
235 omp_extract_for_data (for_stmt
, &fd
, NULL
);
239 if (gimple_omp_for_combined_into_p (for_stmt
))
242 = omp_find_clause (gimple_omp_parallel_clauses (par_stmt
),
243 OMP_CLAUSE__LOOPTEMP_
);
245 n1
= OMP_CLAUSE_DECL (innerc
);
246 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
247 OMP_CLAUSE__LOOPTEMP_
);
249 n2
= OMP_CLAUSE_DECL (innerc
);
252 vec_alloc (ws_args
, 3 + (fd
.chunk_size
!= 0));
254 t
= fold_convert_loc (loc
, long_integer_type_node
, n1
);
255 ws_args
->quick_push (t
);
257 t
= fold_convert_loc (loc
, long_integer_type_node
, n2
);
258 ws_args
->quick_push (t
);
260 t
= fold_convert_loc (loc
, long_integer_type_node
, fd
.loop
.step
);
261 ws_args
->quick_push (t
);
265 t
= fold_convert_loc (loc
, long_integer_type_node
, fd
.chunk_size
);
266 t
= omp_adjust_chunk_size (t
, fd
.simd_schedule
);
267 ws_args
->quick_push (t
);
272 else if (gimple_code (ws_stmt
) == GIMPLE_OMP_SECTIONS
)
274 /* Number of sections is equal to the number of edges from the
275 GIMPLE_OMP_SECTIONS_SWITCH statement, except for the one to
276 the exit of the sections region. */
277 basic_block bb
= single_succ (gimple_bb (ws_stmt
));
278 t
= build_int_cst (unsigned_type_node
, EDGE_COUNT (bb
->succs
) - 1);
279 vec_alloc (ws_args
, 1);
280 ws_args
->quick_push (t
);
287 /* Discover whether REGION is a combined parallel+workshare region. */
290 determine_parallel_type (struct omp_region
*region
)
292 basic_block par_entry_bb
, par_exit_bb
;
293 basic_block ws_entry_bb
, ws_exit_bb
;
295 if (region
== NULL
|| region
->inner
== NULL
296 || region
->exit
== NULL
|| region
->inner
->exit
== NULL
297 || region
->inner
->cont
== NULL
)
300 /* We only support parallel+for and parallel+sections. */
301 if (region
->type
!= GIMPLE_OMP_PARALLEL
302 || (region
->inner
->type
!= GIMPLE_OMP_FOR
303 && region
->inner
->type
!= GIMPLE_OMP_SECTIONS
))
306 /* Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and
307 WS_EXIT_BB -> PAR_EXIT_BB. */
308 par_entry_bb
= region
->entry
;
309 par_exit_bb
= region
->exit
;
310 ws_entry_bb
= region
->inner
->entry
;
311 ws_exit_bb
= region
->inner
->exit
;
313 if (single_succ (par_entry_bb
) == ws_entry_bb
314 && single_succ (ws_exit_bb
) == par_exit_bb
315 && workshare_safe_to_combine_p (ws_entry_bb
)
316 && (gimple_omp_parallel_combined_p (last_stmt (par_entry_bb
))
317 || (last_and_only_stmt (ws_entry_bb
)
318 && last_and_only_stmt (par_exit_bb
))))
320 gimple
*par_stmt
= last_stmt (par_entry_bb
);
321 gimple
*ws_stmt
= last_stmt (ws_entry_bb
);
323 if (region
->inner
->type
== GIMPLE_OMP_FOR
)
325 /* If this is a combined parallel loop, we need to determine
326 whether or not to use the combined library calls. There
327 are two cases where we do not apply the transformation:
328 static loops and any kind of ordered loop. In the first
329 case, we already open code the loop so there is no need
330 to do anything else. In the latter case, the combined
331 parallel loop call would still need extra synchronization
332 to implement ordered semantics, so there would not be any
333 gain in using the combined call. */
334 tree clauses
= gimple_omp_for_clauses (ws_stmt
);
335 tree c
= omp_find_clause (clauses
, OMP_CLAUSE_SCHEDULE
);
337 || ((OMP_CLAUSE_SCHEDULE_KIND (c
) & OMP_CLAUSE_SCHEDULE_MASK
)
338 == OMP_CLAUSE_SCHEDULE_STATIC
)
339 || omp_find_clause (clauses
, OMP_CLAUSE_ORDERED
))
341 region
->is_combined_parallel
= false;
342 region
->inner
->is_combined_parallel
= false;
347 region
->is_combined_parallel
= true;
348 region
->inner
->is_combined_parallel
= true;
349 region
->ws_args
= get_ws_args_for (par_stmt
, ws_stmt
);
353 /* Debugging dumps for parallel regions. */
354 void dump_omp_region (FILE *, struct omp_region
*, int);
355 void debug_omp_region (struct omp_region
*);
356 void debug_all_omp_regions (void);
358 /* Dump the parallel region tree rooted at REGION. */
361 dump_omp_region (FILE *file
, struct omp_region
*region
, int indent
)
363 fprintf (file
, "%*sbb %d: %s\n", indent
, "", region
->entry
->index
,
364 gimple_code_name
[region
->type
]);
367 dump_omp_region (file
, region
->inner
, indent
+ 4);
371 fprintf (file
, "%*sbb %d: GIMPLE_OMP_CONTINUE\n", indent
, "",
372 region
->cont
->index
);
376 fprintf (file
, "%*sbb %d: GIMPLE_OMP_RETURN\n", indent
, "",
377 region
->exit
->index
);
379 fprintf (file
, "%*s[no exit marker]\n", indent
, "");
382 dump_omp_region (file
, region
->next
, indent
);
386 debug_omp_region (struct omp_region
*region
)
388 dump_omp_region (stderr
, region
, 0);
392 debug_all_omp_regions (void)
394 dump_omp_region (stderr
, root_omp_region
, 0);
397 /* Create a new parallel region starting at STMT inside region PARENT. */
399 static struct omp_region
*
400 new_omp_region (basic_block bb
, enum gimple_code type
,
401 struct omp_region
*parent
)
403 struct omp_region
*region
= XCNEW (struct omp_region
);
405 region
->outer
= parent
;
411 /* This is a nested region. Add it to the list of inner
412 regions in PARENT. */
413 region
->next
= parent
->inner
;
414 parent
->inner
= region
;
418 /* This is a toplevel region. Add it to the list of toplevel
419 regions in ROOT_OMP_REGION. */
420 region
->next
= root_omp_region
;
421 root_omp_region
= region
;
427 /* Release the memory associated with the region tree rooted at REGION. */
430 free_omp_region_1 (struct omp_region
*region
)
432 struct omp_region
*i
, *n
;
434 for (i
= region
->inner
; i
; i
= n
)
437 free_omp_region_1 (i
);
443 /* Release the memory for the entire omp region tree. */
446 omp_free_regions (void)
448 struct omp_region
*r
, *n
;
449 for (r
= root_omp_region
; r
; r
= n
)
452 free_omp_region_1 (r
);
454 root_omp_region
= NULL
;
457 /* A convenience function to build an empty GIMPLE_COND with just the
461 gimple_build_cond_empty (tree cond
)
463 enum tree_code pred_code
;
466 gimple_cond_get_ops_from_tree (cond
, &pred_code
, &lhs
, &rhs
);
467 return gimple_build_cond (pred_code
, lhs
, rhs
, NULL_TREE
, NULL_TREE
);
470 /* Return true if a parallel REGION is within a declare target function or
471 within a target region and is not a part of a gridified target. */
474 parallel_needs_hsa_kernel_p (struct omp_region
*region
)
476 bool indirect
= false;
477 for (region
= region
->outer
; region
; region
= region
->outer
)
479 if (region
->type
== GIMPLE_OMP_PARALLEL
)
481 else if (region
->type
== GIMPLE_OMP_TARGET
)
483 gomp_target
*tgt_stmt
484 = as_a
<gomp_target
*> (last_stmt (region
->entry
));
486 if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt
),
487 OMP_CLAUSE__GRIDDIM_
))
494 if (lookup_attribute ("omp declare target",
495 DECL_ATTRIBUTES (current_function_decl
)))
501 /* Change DECL_CONTEXT of CHILD_FNDECL to that of the parent function.
502 Add CHILD_FNDECL to decl chain of the supercontext of the block
503 ENTRY_BLOCK - this is the block which originally contained the
504 code from which CHILD_FNDECL was created.
506 Together, these actions ensure that the debug info for the outlined
507 function will be emitted with the correct lexical scope. */
510 adjust_context_and_scope (tree entry_block
, tree child_fndecl
)
512 if (entry_block
!= NULL_TREE
&& TREE_CODE (entry_block
) == BLOCK
)
514 tree b
= BLOCK_SUPERCONTEXT (entry_block
);
516 if (TREE_CODE (b
) == BLOCK
)
520 /* Follow supercontext chain until the parent fndecl
522 for (parent_fndecl
= BLOCK_SUPERCONTEXT (b
);
523 TREE_CODE (parent_fndecl
) == BLOCK
;
524 parent_fndecl
= BLOCK_SUPERCONTEXT (parent_fndecl
))
527 gcc_assert (TREE_CODE (parent_fndecl
) == FUNCTION_DECL
);
529 DECL_CONTEXT (child_fndecl
) = parent_fndecl
;
531 DECL_CHAIN (child_fndecl
) = BLOCK_VARS (b
);
532 BLOCK_VARS (b
) = child_fndecl
;
537 /* Build the function calls to GOMP_parallel_start etc to actually
538 generate the parallel operation. REGION is the parallel region
539 being expanded. BB is the block where to insert the code. WS_ARGS
540 will be set if this is a call to a combined parallel+workshare
541 construct, it contains the list of additional arguments needed by
542 the workshare construct. */
545 expand_parallel_call (struct omp_region
*region
, basic_block bb
,
546 gomp_parallel
*entry_stmt
,
547 vec
<tree
, va_gc
> *ws_args
)
549 tree t
, t1
, t2
, val
, cond
, c
, clauses
, flags
;
550 gimple_stmt_iterator gsi
;
552 enum built_in_function start_ix
;
554 location_t clause_loc
;
555 vec
<tree
, va_gc
> *args
;
557 clauses
= gimple_omp_parallel_clauses (entry_stmt
);
559 /* Determine what flavor of GOMP_parallel we will be
561 start_ix
= BUILT_IN_GOMP_PARALLEL
;
562 if (is_combined_parallel (region
))
564 switch (region
->inner
->type
)
567 gcc_assert (region
->inner
->sched_kind
!= OMP_CLAUSE_SCHEDULE_AUTO
);
568 switch (region
->inner
->sched_kind
)
570 case OMP_CLAUSE_SCHEDULE_RUNTIME
:
573 case OMP_CLAUSE_SCHEDULE_DYNAMIC
:
574 case OMP_CLAUSE_SCHEDULE_GUIDED
:
575 if (region
->inner
->sched_modifiers
576 & OMP_CLAUSE_SCHEDULE_NONMONOTONIC
)
578 start_ix2
= 3 + region
->inner
->sched_kind
;
583 start_ix2
= region
->inner
->sched_kind
;
586 start_ix2
+= (int) BUILT_IN_GOMP_PARALLEL_LOOP_STATIC
;
587 start_ix
= (enum built_in_function
) start_ix2
;
589 case GIMPLE_OMP_SECTIONS
:
590 start_ix
= BUILT_IN_GOMP_PARALLEL_SECTIONS
;
597 /* By default, the value of NUM_THREADS is zero (selected at run time)
598 and there is no conditional. */
600 val
= build_int_cst (unsigned_type_node
, 0);
601 flags
= build_int_cst (unsigned_type_node
, 0);
603 c
= omp_find_clause (clauses
, OMP_CLAUSE_IF
);
605 cond
= OMP_CLAUSE_IF_EXPR (c
);
607 c
= omp_find_clause (clauses
, OMP_CLAUSE_NUM_THREADS
);
610 val
= OMP_CLAUSE_NUM_THREADS_EXPR (c
);
611 clause_loc
= OMP_CLAUSE_LOCATION (c
);
614 clause_loc
= gimple_location (entry_stmt
);
616 c
= omp_find_clause (clauses
, OMP_CLAUSE_PROC_BIND
);
618 flags
= build_int_cst (unsigned_type_node
, OMP_CLAUSE_PROC_BIND_KIND (c
));
620 /* Ensure 'val' is of the correct type. */
621 val
= fold_convert_loc (clause_loc
, unsigned_type_node
, val
);
623 /* If we found the clause 'if (cond)', build either
624 (cond != 0) or (cond ? val : 1u). */
627 cond
= gimple_boolify (cond
);
629 if (integer_zerop (val
))
630 val
= fold_build2_loc (clause_loc
,
631 EQ_EXPR
, unsigned_type_node
, cond
,
632 build_int_cst (TREE_TYPE (cond
), 0));
635 basic_block cond_bb
, then_bb
, else_bb
;
636 edge e
, e_then
, e_else
;
637 tree tmp_then
, tmp_else
, tmp_join
, tmp_var
;
639 tmp_var
= create_tmp_var (TREE_TYPE (val
));
640 if (gimple_in_ssa_p (cfun
))
642 tmp_then
= make_ssa_name (tmp_var
);
643 tmp_else
= make_ssa_name (tmp_var
);
644 tmp_join
= make_ssa_name (tmp_var
);
653 e
= split_block_after_labels (bb
);
658 then_bb
= create_empty_bb (cond_bb
);
659 else_bb
= create_empty_bb (then_bb
);
660 set_immediate_dominator (CDI_DOMINATORS
, then_bb
, cond_bb
);
661 set_immediate_dominator (CDI_DOMINATORS
, else_bb
, cond_bb
);
663 stmt
= gimple_build_cond_empty (cond
);
664 gsi
= gsi_start_bb (cond_bb
);
665 gsi_insert_after (&gsi
, stmt
, GSI_CONTINUE_LINKING
);
667 gsi
= gsi_start_bb (then_bb
);
668 expand_omp_build_assign (&gsi
, tmp_then
, val
, true);
670 gsi
= gsi_start_bb (else_bb
);
671 expand_omp_build_assign (&gsi
, tmp_else
,
672 build_int_cst (unsigned_type_node
, 1),
675 make_edge (cond_bb
, then_bb
, EDGE_TRUE_VALUE
);
676 make_edge (cond_bb
, else_bb
, EDGE_FALSE_VALUE
);
677 add_bb_to_loop (then_bb
, cond_bb
->loop_father
);
678 add_bb_to_loop (else_bb
, cond_bb
->loop_father
);
679 e_then
= make_edge (then_bb
, bb
, EDGE_FALLTHRU
);
680 e_else
= make_edge (else_bb
, bb
, EDGE_FALLTHRU
);
682 if (gimple_in_ssa_p (cfun
))
684 gphi
*phi
= create_phi_node (tmp_join
, bb
);
685 add_phi_arg (phi
, tmp_then
, e_then
, UNKNOWN_LOCATION
);
686 add_phi_arg (phi
, tmp_else
, e_else
, UNKNOWN_LOCATION
);
692 gsi
= gsi_start_bb (bb
);
693 val
= force_gimple_operand_gsi (&gsi
, val
, true, NULL_TREE
,
694 false, GSI_CONTINUE_LINKING
);
697 gsi
= gsi_last_nondebug_bb (bb
);
698 t
= gimple_omp_parallel_data_arg (entry_stmt
);
700 t1
= null_pointer_node
;
702 t1
= build_fold_addr_expr (t
);
703 tree child_fndecl
= gimple_omp_parallel_child_fn (entry_stmt
);
704 t2
= build_fold_addr_expr (child_fndecl
);
706 adjust_context_and_scope (gimple_block (entry_stmt
), child_fndecl
);
708 vec_alloc (args
, 4 + vec_safe_length (ws_args
));
709 args
->quick_push (t2
);
710 args
->quick_push (t1
);
711 args
->quick_push (val
);
713 args
->splice (*ws_args
);
714 args
->quick_push (flags
);
716 t
= build_call_expr_loc_vec (UNKNOWN_LOCATION
,
717 builtin_decl_explicit (start_ix
), args
);
719 force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
720 false, GSI_CONTINUE_LINKING
);
722 if (hsa_gen_requested_p ()
723 && parallel_needs_hsa_kernel_p (region
))
725 cgraph_node
*child_cnode
= cgraph_node::get (child_fndecl
);
726 hsa_register_kernel (child_cnode
);
730 /* Build the function call to GOMP_task to actually
731 generate the task operation. BB is the block where to insert the code. */
734 expand_task_call (struct omp_region
*region
, basic_block bb
,
735 gomp_task
*entry_stmt
)
738 gimple_stmt_iterator gsi
;
739 location_t loc
= gimple_location (entry_stmt
);
741 tree clauses
= gimple_omp_task_clauses (entry_stmt
);
743 tree ifc
= omp_find_clause (clauses
, OMP_CLAUSE_IF
);
744 tree untied
= omp_find_clause (clauses
, OMP_CLAUSE_UNTIED
);
745 tree mergeable
= omp_find_clause (clauses
, OMP_CLAUSE_MERGEABLE
);
746 tree depend
= omp_find_clause (clauses
, OMP_CLAUSE_DEPEND
);
747 tree finalc
= omp_find_clause (clauses
, OMP_CLAUSE_FINAL
);
748 tree priority
= omp_find_clause (clauses
, OMP_CLAUSE_PRIORITY
);
751 = (untied
? GOMP_TASK_FLAG_UNTIED
: 0)
752 | (mergeable
? GOMP_TASK_FLAG_MERGEABLE
: 0)
753 | (depend
? GOMP_TASK_FLAG_DEPEND
: 0);
755 bool taskloop_p
= gimple_omp_task_taskloop_p (entry_stmt
);
756 tree startvar
= NULL_TREE
, endvar
= NULL_TREE
, step
= NULL_TREE
;
757 tree num_tasks
= NULL_TREE
;
761 gimple
*g
= last_stmt (region
->outer
->entry
);
762 gcc_assert (gimple_code (g
) == GIMPLE_OMP_FOR
763 && gimple_omp_for_kind (g
) == GF_OMP_FOR_KIND_TASKLOOP
);
764 struct omp_for_data fd
;
765 omp_extract_for_data (as_a
<gomp_for
*> (g
), &fd
, NULL
);
766 startvar
= omp_find_clause (clauses
, OMP_CLAUSE__LOOPTEMP_
);
767 endvar
= omp_find_clause (OMP_CLAUSE_CHAIN (startvar
),
768 OMP_CLAUSE__LOOPTEMP_
);
769 startvar
= OMP_CLAUSE_DECL (startvar
);
770 endvar
= OMP_CLAUSE_DECL (endvar
);
771 step
= fold_convert_loc (loc
, fd
.iter_type
, fd
.loop
.step
);
772 if (fd
.loop
.cond_code
== LT_EXPR
)
773 iflags
|= GOMP_TASK_FLAG_UP
;
774 tree tclauses
= gimple_omp_for_clauses (g
);
775 num_tasks
= omp_find_clause (tclauses
, OMP_CLAUSE_NUM_TASKS
);
777 num_tasks
= OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks
);
780 num_tasks
= omp_find_clause (tclauses
, OMP_CLAUSE_GRAINSIZE
);
783 iflags
|= GOMP_TASK_FLAG_GRAINSIZE
;
784 num_tasks
= OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks
);
787 num_tasks
= integer_zero_node
;
789 num_tasks
= fold_convert_loc (loc
, long_integer_type_node
, num_tasks
);
790 if (ifc
== NULL_TREE
)
791 iflags
|= GOMP_TASK_FLAG_IF
;
792 if (omp_find_clause (tclauses
, OMP_CLAUSE_NOGROUP
))
793 iflags
|= GOMP_TASK_FLAG_NOGROUP
;
794 ull
= fd
.iter_type
== long_long_unsigned_type_node
;
797 iflags
|= GOMP_TASK_FLAG_PRIORITY
;
799 tree flags
= build_int_cst (unsigned_type_node
, iflags
);
801 tree cond
= boolean_true_node
;
806 tree t
= gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc
));
807 t
= fold_build3_loc (loc
, COND_EXPR
, unsigned_type_node
, t
,
808 build_int_cst (unsigned_type_node
,
810 build_int_cst (unsigned_type_node
, 0));
811 flags
= fold_build2_loc (loc
, PLUS_EXPR
, unsigned_type_node
,
815 cond
= gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc
));
820 tree t
= gimple_boolify (OMP_CLAUSE_FINAL_EXPR (finalc
));
821 t
= fold_build3_loc (loc
, COND_EXPR
, unsigned_type_node
, t
,
822 build_int_cst (unsigned_type_node
,
823 GOMP_TASK_FLAG_FINAL
),
824 build_int_cst (unsigned_type_node
, 0));
825 flags
= fold_build2_loc (loc
, PLUS_EXPR
, unsigned_type_node
, flags
, t
);
828 depend
= OMP_CLAUSE_DECL (depend
);
830 depend
= build_int_cst (ptr_type_node
, 0);
832 priority
= fold_convert (integer_type_node
,
833 OMP_CLAUSE_PRIORITY_EXPR (priority
));
835 priority
= integer_zero_node
;
837 gsi
= gsi_last_nondebug_bb (bb
);
838 tree t
= gimple_omp_task_data_arg (entry_stmt
);
840 t2
= null_pointer_node
;
842 t2
= build_fold_addr_expr_loc (loc
, t
);
843 t1
= build_fold_addr_expr_loc (loc
, gimple_omp_task_child_fn (entry_stmt
));
844 t
= gimple_omp_task_copy_fn (entry_stmt
);
846 t3
= null_pointer_node
;
848 t3
= build_fold_addr_expr_loc (loc
, t
);
851 t
= build_call_expr (ull
852 ? builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP_ULL
)
853 : builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP
),
855 gimple_omp_task_arg_size (entry_stmt
),
856 gimple_omp_task_arg_align (entry_stmt
), flags
,
857 num_tasks
, priority
, startvar
, endvar
, step
);
859 t
= build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASK
),
861 gimple_omp_task_arg_size (entry_stmt
),
862 gimple_omp_task_arg_align (entry_stmt
), cond
, flags
,
865 force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
866 false, GSI_CONTINUE_LINKING
);
869 /* Chain all the DECLs in LIST by their TREE_CHAIN fields. */
872 vec2chain (vec
<tree
, va_gc
> *v
)
874 tree chain
= NULL_TREE
, t
;
877 FOR_EACH_VEC_SAFE_ELT_REVERSE (v
, ix
, t
)
879 DECL_CHAIN (t
) = chain
;
886 /* Remove barriers in REGION->EXIT's block. Note that this is only
887 valid for GIMPLE_OMP_PARALLEL regions. Since the end of a parallel region
888 is an implicit barrier, any workshare inside the GIMPLE_OMP_PARALLEL that
889 left a barrier at the end of the GIMPLE_OMP_PARALLEL region can now be
893 remove_exit_barrier (struct omp_region
*region
)
895 gimple_stmt_iterator gsi
;
900 int any_addressable_vars
= -1;
902 exit_bb
= region
->exit
;
904 /* If the parallel region doesn't return, we don't have REGION->EXIT
909 /* The last insn in the block will be the parallel's GIMPLE_OMP_RETURN. The
910 workshare's GIMPLE_OMP_RETURN will be in a preceding block. The kinds of
911 statements that can appear in between are extremely limited -- no
912 memory operations at all. Here, we allow nothing at all, so the
913 only thing we allow to precede this GIMPLE_OMP_RETURN is a label. */
914 gsi
= gsi_last_nondebug_bb (exit_bb
);
915 gcc_assert (gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_RETURN
);
916 gsi_prev_nondebug (&gsi
);
917 if (!gsi_end_p (gsi
) && gimple_code (gsi_stmt (gsi
)) != GIMPLE_LABEL
)
920 FOR_EACH_EDGE (e
, ei
, exit_bb
->preds
)
922 gsi
= gsi_last_nondebug_bb (e
->src
);
925 stmt
= gsi_stmt (gsi
);
926 if (gimple_code (stmt
) == GIMPLE_OMP_RETURN
927 && !gimple_omp_return_nowait_p (stmt
))
929 /* OpenMP 3.0 tasks unfortunately prevent this optimization
930 in many cases. If there could be tasks queued, the barrier
931 might be needed to let the tasks run before some local
932 variable of the parallel that the task uses as shared
933 runs out of scope. The task can be spawned either
934 from within current function (this would be easy to check)
935 or from some function it calls and gets passed an address
936 of such a variable. */
937 if (any_addressable_vars
< 0)
939 gomp_parallel
*parallel_stmt
940 = as_a
<gomp_parallel
*> (last_stmt (region
->entry
));
941 tree child_fun
= gimple_omp_parallel_child_fn (parallel_stmt
);
942 tree local_decls
, block
, decl
;
945 any_addressable_vars
= 0;
946 FOR_EACH_LOCAL_DECL (DECL_STRUCT_FUNCTION (child_fun
), ix
, decl
)
947 if (TREE_ADDRESSABLE (decl
))
949 any_addressable_vars
= 1;
952 for (block
= gimple_block (stmt
);
953 !any_addressable_vars
955 && TREE_CODE (block
) == BLOCK
;
956 block
= BLOCK_SUPERCONTEXT (block
))
958 for (local_decls
= BLOCK_VARS (block
);
960 local_decls
= DECL_CHAIN (local_decls
))
961 if (TREE_ADDRESSABLE (local_decls
))
963 any_addressable_vars
= 1;
966 if (block
== gimple_block (parallel_stmt
))
970 if (!any_addressable_vars
)
971 gimple_omp_return_set_nowait (stmt
);
977 remove_exit_barriers (struct omp_region
*region
)
979 if (region
->type
== GIMPLE_OMP_PARALLEL
)
980 remove_exit_barrier (region
);
984 region
= region
->inner
;
985 remove_exit_barriers (region
);
988 region
= region
->next
;
989 remove_exit_barriers (region
);
994 /* Optimize omp_get_thread_num () and omp_get_num_threads ()
995 calls. These can't be declared as const functions, but
996 within one parallel body they are constant, so they can be
997 transformed there into __builtin_omp_get_{thread_num,num_threads} ()
998 which are declared const. Similarly for task body, except
999 that in untied task omp_get_thread_num () can change at any task
1000 scheduling point. */
1003 optimize_omp_library_calls (gimple
*entry_stmt
)
1006 gimple_stmt_iterator gsi
;
1007 tree thr_num_tree
= builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM
);
1008 tree thr_num_id
= DECL_ASSEMBLER_NAME (thr_num_tree
);
1009 tree num_thr_tree
= builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS
);
1010 tree num_thr_id
= DECL_ASSEMBLER_NAME (num_thr_tree
);
1011 bool untied_task
= (gimple_code (entry_stmt
) == GIMPLE_OMP_TASK
1012 && omp_find_clause (gimple_omp_task_clauses (entry_stmt
),
1013 OMP_CLAUSE_UNTIED
) != NULL
);
1015 FOR_EACH_BB_FN (bb
, cfun
)
1016 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1018 gimple
*call
= gsi_stmt (gsi
);
1021 if (is_gimple_call (call
)
1022 && (decl
= gimple_call_fndecl (call
))
1023 && DECL_EXTERNAL (decl
)
1024 && TREE_PUBLIC (decl
)
1025 && DECL_INITIAL (decl
) == NULL
)
1029 if (DECL_NAME (decl
) == thr_num_id
)
1031 /* In #pragma omp task untied omp_get_thread_num () can change
1032 during the execution of the task region. */
1035 built_in
= builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM
);
1037 else if (DECL_NAME (decl
) == num_thr_id
)
1038 built_in
= builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS
);
1042 if (DECL_ASSEMBLER_NAME (decl
) != DECL_ASSEMBLER_NAME (built_in
)
1043 || gimple_call_num_args (call
) != 0)
1046 if (flag_exceptions
&& !TREE_NOTHROW (decl
))
1049 if (TREE_CODE (TREE_TYPE (decl
)) != FUNCTION_TYPE
1050 || !types_compatible_p (TREE_TYPE (TREE_TYPE (decl
)),
1051 TREE_TYPE (TREE_TYPE (built_in
))))
1054 gimple_call_set_fndecl (call
, built_in
);
1059 /* Callback for expand_omp_build_assign. Return non-NULL if *tp needs to be
1063 expand_omp_regimplify_p (tree
*tp
, int *walk_subtrees
, void *)
1067 /* Any variable with DECL_VALUE_EXPR needs to be regimplified. */
1068 if (VAR_P (t
) && DECL_HAS_VALUE_EXPR_P (t
))
1071 if (TREE_CODE (t
) == ADDR_EXPR
)
1072 recompute_tree_invariant_for_addr_expr (t
);
1074 *walk_subtrees
= !TYPE_P (t
) && !DECL_P (t
);
1078 /* Prepend or append TO = FROM assignment before or after *GSI_P. */
1081 expand_omp_build_assign (gimple_stmt_iterator
*gsi_p
, tree to
, tree from
,
1084 bool simple_p
= DECL_P (to
) && TREE_ADDRESSABLE (to
);
1085 from
= force_gimple_operand_gsi (gsi_p
, from
, simple_p
, NULL_TREE
,
1086 !after
, after
? GSI_CONTINUE_LINKING
1088 gimple
*stmt
= gimple_build_assign (to
, from
);
1090 gsi_insert_after (gsi_p
, stmt
, GSI_CONTINUE_LINKING
);
1092 gsi_insert_before (gsi_p
, stmt
, GSI_SAME_STMT
);
1093 if (walk_tree (&from
, expand_omp_regimplify_p
, NULL
, NULL
)
1094 || walk_tree (&to
, expand_omp_regimplify_p
, NULL
, NULL
))
1096 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
1097 gimple_regimplify_operands (stmt
, &gsi
);
1101 /* Expand the OpenMP parallel or task directive starting at REGION. */
1104 expand_omp_taskreg (struct omp_region
*region
)
1106 basic_block entry_bb
, exit_bb
, new_bb
;
1107 struct function
*child_cfun
;
1108 tree child_fn
, block
, t
;
1109 gimple_stmt_iterator gsi
;
1110 gimple
*entry_stmt
, *stmt
;
1112 vec
<tree
, va_gc
> *ws_args
;
1114 entry_stmt
= last_stmt (region
->entry
);
1115 child_fn
= gimple_omp_taskreg_child_fn (entry_stmt
);
1116 child_cfun
= DECL_STRUCT_FUNCTION (child_fn
);
1118 entry_bb
= region
->entry
;
1119 if (gimple_code (entry_stmt
) == GIMPLE_OMP_TASK
)
1120 exit_bb
= region
->cont
;
1122 exit_bb
= region
->exit
;
1124 if (is_combined_parallel (region
))
1125 ws_args
= region
->ws_args
;
1129 if (child_cfun
->cfg
)
1131 /* Due to inlining, it may happen that we have already outlined
1132 the region, in which case all we need to do is make the
1133 sub-graph unreachable and emit the parallel call. */
1134 edge entry_succ_e
, exit_succ_e
;
1136 entry_succ_e
= single_succ_edge (entry_bb
);
1138 gsi
= gsi_last_nondebug_bb (entry_bb
);
1139 gcc_assert (gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_PARALLEL
1140 || gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_TASK
);
1141 gsi_remove (&gsi
, true);
1146 exit_succ_e
= single_succ_edge (exit_bb
);
1147 make_edge (new_bb
, exit_succ_e
->dest
, EDGE_FALLTHRU
);
1149 remove_edge_and_dominated_blocks (entry_succ_e
);
1153 unsigned srcidx
, dstidx
, num
;
1155 /* If the parallel region needs data sent from the parent
1156 function, then the very first statement (except possible
1157 tree profile counter updates) of the parallel body
1158 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
1159 &.OMP_DATA_O is passed as an argument to the child function,
1160 we need to replace it with the argument as seen by the child
1163 In most cases, this will end up being the identity assignment
1164 .OMP_DATA_I = .OMP_DATA_I. However, if the parallel body had
1165 a function call that has been inlined, the original PARM_DECL
1166 .OMP_DATA_I may have been converted into a different local
1167 variable. In which case, we need to keep the assignment. */
1168 if (gimple_omp_taskreg_data_arg (entry_stmt
))
1170 basic_block entry_succ_bb
1171 = single_succ_p (entry_bb
) ? single_succ (entry_bb
)
1172 : FALLTHRU_EDGE (entry_bb
)->dest
;
1174 gimple
*parcopy_stmt
= NULL
;
1176 for (gsi
= gsi_start_bb (entry_succ_bb
); ; gsi_next (&gsi
))
1180 gcc_assert (!gsi_end_p (gsi
));
1181 stmt
= gsi_stmt (gsi
);
1182 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
1185 if (gimple_num_ops (stmt
) == 2)
1187 tree arg
= gimple_assign_rhs1 (stmt
);
1189 /* We're ignore the subcode because we're
1190 effectively doing a STRIP_NOPS. */
1192 if (TREE_CODE (arg
) == ADDR_EXPR
1193 && TREE_OPERAND (arg
, 0)
1194 == gimple_omp_taskreg_data_arg (entry_stmt
))
1196 parcopy_stmt
= stmt
;
1202 gcc_assert (parcopy_stmt
!= NULL
);
1203 arg
= DECL_ARGUMENTS (child_fn
);
1205 if (!gimple_in_ssa_p (cfun
))
1207 if (gimple_assign_lhs (parcopy_stmt
) == arg
)
1208 gsi_remove (&gsi
, true);
1211 /* ?? Is setting the subcode really necessary ?? */
1212 gimple_omp_set_subcode (parcopy_stmt
, TREE_CODE (arg
));
1213 gimple_assign_set_rhs1 (parcopy_stmt
, arg
);
1218 tree lhs
= gimple_assign_lhs (parcopy_stmt
);
1219 gcc_assert (SSA_NAME_VAR (lhs
) == arg
);
1220 /* We'd like to set the rhs to the default def in the child_fn,
1221 but it's too early to create ssa names in the child_fn.
1222 Instead, we set the rhs to the parm. In
1223 move_sese_region_to_fn, we introduce a default def for the
1224 parm, map the parm to it's default def, and once we encounter
1225 this stmt, replace the parm with the default def. */
1226 gimple_assign_set_rhs1 (parcopy_stmt
, arg
);
1227 update_stmt (parcopy_stmt
);
1231 /* Declare local variables needed in CHILD_CFUN. */
1232 block
= DECL_INITIAL (child_fn
);
1233 BLOCK_VARS (block
) = vec2chain (child_cfun
->local_decls
);
1234 /* The gimplifier could record temporaries in parallel/task block
1235 rather than in containing function's local_decls chain,
1236 which would mean cgraph missed finalizing them. Do it now. */
1237 for (t
= BLOCK_VARS (block
); t
; t
= DECL_CHAIN (t
))
1238 if (VAR_P (t
) && TREE_STATIC (t
) && !DECL_EXTERNAL (t
))
1239 varpool_node::finalize_decl (t
);
1240 DECL_SAVED_TREE (child_fn
) = NULL
;
1241 /* We'll create a CFG for child_fn, so no gimple body is needed. */
1242 gimple_set_body (child_fn
, NULL
);
1243 TREE_USED (block
) = 1;
1245 /* Reset DECL_CONTEXT on function arguments. */
1246 for (t
= DECL_ARGUMENTS (child_fn
); t
; t
= DECL_CHAIN (t
))
1247 DECL_CONTEXT (t
) = child_fn
;
1249 /* Split ENTRY_BB at GIMPLE_OMP_PARALLEL or GIMPLE_OMP_TASK,
1250 so that it can be moved to the child function. */
1251 gsi
= gsi_last_nondebug_bb (entry_bb
);
1252 stmt
= gsi_stmt (gsi
);
1253 gcc_assert (stmt
&& (gimple_code (stmt
) == GIMPLE_OMP_PARALLEL
1254 || gimple_code (stmt
) == GIMPLE_OMP_TASK
));
1255 e
= split_block (entry_bb
, stmt
);
1256 gsi_remove (&gsi
, true);
1259 if (gimple_code (entry_stmt
) == GIMPLE_OMP_PARALLEL
)
1260 single_succ_edge (entry_bb
)->flags
= EDGE_FALLTHRU
;
1263 e2
= make_edge (e
->src
, BRANCH_EDGE (entry_bb
)->dest
, EDGE_ABNORMAL
);
1264 gcc_assert (e2
->dest
== region
->exit
);
1265 remove_edge (BRANCH_EDGE (entry_bb
));
1266 set_immediate_dominator (CDI_DOMINATORS
, e2
->dest
, e
->src
);
1267 gsi
= gsi_last_nondebug_bb (region
->exit
);
1268 gcc_assert (!gsi_end_p (gsi
)
1269 && gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_RETURN
);
1270 gsi_remove (&gsi
, true);
1273 /* Convert GIMPLE_OMP_{RETURN,CONTINUE} into a RETURN_EXPR. */
1276 gsi
= gsi_last_nondebug_bb (exit_bb
);
1277 gcc_assert (!gsi_end_p (gsi
)
1278 && (gimple_code (gsi_stmt (gsi
))
1279 == (e2
? GIMPLE_OMP_CONTINUE
: GIMPLE_OMP_RETURN
)));
1280 stmt
= gimple_build_return (NULL
);
1281 gsi_insert_after (&gsi
, stmt
, GSI_SAME_STMT
);
1282 gsi_remove (&gsi
, true);
1285 /* Move the parallel region into CHILD_CFUN. */
1287 if (gimple_in_ssa_p (cfun
))
1289 init_tree_ssa (child_cfun
);
1290 init_ssa_operands (child_cfun
);
1291 child_cfun
->gimple_df
->in_ssa_p
= true;
1295 block
= gimple_block (entry_stmt
);
1297 /* Make sure to generate early debug for the function before
1298 outlining anything. */
1299 if (! gimple_in_ssa_p (cfun
))
1300 (*debug_hooks
->early_global_decl
) (cfun
->decl
);
1302 new_bb
= move_sese_region_to_fn (child_cfun
, entry_bb
, exit_bb
, block
);
1304 single_succ_edge (new_bb
)->flags
= EDGE_FALLTHRU
;
1307 basic_block dest_bb
= e2
->dest
;
1309 make_edge (new_bb
, dest_bb
, EDGE_FALLTHRU
);
1311 set_immediate_dominator (CDI_DOMINATORS
, dest_bb
, new_bb
);
1313 /* When the OMP expansion process cannot guarantee an up-to-date
1314 loop tree arrange for the child function to fixup loops. */
1315 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP
))
1316 child_cfun
->x_current_loops
->state
|= LOOPS_NEED_FIXUP
;
1318 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
1319 num
= vec_safe_length (child_cfun
->local_decls
);
1320 for (srcidx
= 0, dstidx
= 0; srcidx
< num
; srcidx
++)
1322 t
= (*child_cfun
->local_decls
)[srcidx
];
1323 if (DECL_CONTEXT (t
) == cfun
->decl
)
1325 if (srcidx
!= dstidx
)
1326 (*child_cfun
->local_decls
)[dstidx
] = t
;
1330 vec_safe_truncate (child_cfun
->local_decls
, dstidx
);
1332 /* Inform the callgraph about the new function. */
1333 child_cfun
->curr_properties
= cfun
->curr_properties
;
1334 child_cfun
->has_simduid_loops
|= cfun
->has_simduid_loops
;
1335 child_cfun
->has_force_vectorize_loops
|= cfun
->has_force_vectorize_loops
;
1336 cgraph_node
*node
= cgraph_node::get_create (child_fn
);
1337 node
->parallelized_function
= 1;
1338 cgraph_node::add_new_function (child_fn
, true);
1340 bool need_asm
= DECL_ASSEMBLER_NAME_SET_P (current_function_decl
)
1341 && !DECL_ASSEMBLER_NAME_SET_P (child_fn
);
1343 /* Fix the callgraph edges for child_cfun. Those for cfun will be
1344 fixed in a following pass. */
1345 push_cfun (child_cfun
);
1347 assign_assembler_name_if_needed (child_fn
);
1350 optimize_omp_library_calls (entry_stmt
);
1351 update_max_bb_count ();
1352 cgraph_edge::rebuild_edges ();
1354 /* Some EH regions might become dead, see PR34608. If
1355 pass_cleanup_cfg isn't the first pass to happen with the
1356 new child, these dead EH edges might cause problems.
1357 Clean them up now. */
1358 if (flag_exceptions
)
1361 bool changed
= false;
1363 FOR_EACH_BB_FN (bb
, cfun
)
1364 changed
|= gimple_purge_dead_eh_edges (bb
);
1366 cleanup_tree_cfg ();
1368 if (gimple_in_ssa_p (cfun
))
1369 update_ssa (TODO_update_ssa
);
1370 if (flag_checking
&& !loops_state_satisfies_p (LOOPS_NEED_FIXUP
))
1371 verify_loop_structure ();
1374 if (dump_file
&& !gimple_in_ssa_p (cfun
))
1376 omp_any_child_fn_dumped
= true;
1377 dump_function_header (dump_file
, child_fn
, dump_flags
);
1378 dump_function_to_file (child_fn
, dump_file
, dump_flags
);
1382 if (gimple_code (entry_stmt
) == GIMPLE_OMP_PARALLEL
)
1383 expand_parallel_call (region
, new_bb
,
1384 as_a
<gomp_parallel
*> (entry_stmt
), ws_args
);
1386 expand_task_call (region
, new_bb
, as_a
<gomp_task
*> (entry_stmt
));
1387 if (gimple_in_ssa_p (cfun
))
1388 update_ssa (TODO_update_ssa_only_virtuals
);
1391 /* Information about members of an OpenACC collapsed loop nest. */
1393 struct oacc_collapse
1395 tree base
; /* Base value. */
1396 tree iters
; /* Number of steps. */
1397 tree step
; /* Step size. */
1398 tree tile
; /* Tile increment (if tiled). */
1399 tree outer
; /* Tile iterator var. */
1402 /* Helper for expand_oacc_for. Determine collapsed loop information.
1403 Fill in COUNTS array. Emit any initialization code before GSI.
1404 Return the calculated outer loop bound of BOUND_TYPE. */
1407 expand_oacc_collapse_init (const struct omp_for_data
*fd
,
1408 gimple_stmt_iterator
*gsi
,
1409 oacc_collapse
*counts
, tree bound_type
,
1412 tree tiling
= fd
->tiling
;
1413 tree total
= build_int_cst (bound_type
, 1);
1416 gcc_assert (integer_onep (fd
->loop
.step
));
1417 gcc_assert (integer_zerop (fd
->loop
.n1
));
1419 /* When tiling, the first operand of the tile clause applies to the
1420 innermost loop, and we work outwards from there. Seems
1421 backwards, but whatever. */
1422 for (ix
= fd
->collapse
; ix
--;)
1424 const omp_for_data_loop
*loop
= &fd
->loops
[ix
];
1426 tree iter_type
= TREE_TYPE (loop
->v
);
1427 tree diff_type
= iter_type
;
1428 tree plus_type
= iter_type
;
1430 gcc_assert (loop
->cond_code
== fd
->loop
.cond_code
);
1432 if (POINTER_TYPE_P (iter_type
))
1433 plus_type
= sizetype
;
1434 if (POINTER_TYPE_P (diff_type
) || TYPE_UNSIGNED (diff_type
))
1435 diff_type
= signed_type_for (diff_type
);
1439 tree num
= build_int_cst (integer_type_node
, fd
->collapse
);
1440 tree loop_no
= build_int_cst (integer_type_node
, ix
);
1441 tree tile
= TREE_VALUE (tiling
);
1443 = gimple_build_call_internal (IFN_GOACC_TILE
, 5, num
, loop_no
, tile
,
1444 /* gwv-outer=*/integer_zero_node
,
1445 /* gwv-inner=*/integer_zero_node
);
1447 counts
[ix
].outer
= create_tmp_var (iter_type
, ".outer");
1448 counts
[ix
].tile
= create_tmp_var (diff_type
, ".tile");
1449 gimple_call_set_lhs (call
, counts
[ix
].tile
);
1450 gimple_set_location (call
, loc
);
1451 gsi_insert_before (gsi
, call
, GSI_SAME_STMT
);
1453 tiling
= TREE_CHAIN (tiling
);
1457 counts
[ix
].tile
= NULL
;
1458 counts
[ix
].outer
= loop
->v
;
1463 tree s
= loop
->step
;
1464 bool up
= loop
->cond_code
== LT_EXPR
;
1465 tree dir
= build_int_cst (diff_type
, up
? +1 : -1);
1469 b
= force_gimple_operand_gsi (gsi
, b
, true, NULL_TREE
,
1470 true, GSI_SAME_STMT
);
1471 e
= force_gimple_operand_gsi (gsi
, e
, true, NULL_TREE
,
1472 true, GSI_SAME_STMT
);
1474 /* Convert the step, avoiding possible unsigned->signed overflow. */
1475 negating
= !up
&& TYPE_UNSIGNED (TREE_TYPE (s
));
1477 s
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (s
), s
);
1478 s
= fold_convert (diff_type
, s
);
1480 s
= fold_build1 (NEGATE_EXPR
, diff_type
, s
);
1481 s
= force_gimple_operand_gsi (gsi
, s
, true, NULL_TREE
,
1482 true, GSI_SAME_STMT
);
1484 /* Determine the range, avoiding possible unsigned->signed overflow. */
1485 negating
= !up
&& TYPE_UNSIGNED (iter_type
);
1486 expr
= fold_build2 (MINUS_EXPR
, plus_type
,
1487 fold_convert (plus_type
, negating
? b
: e
),
1488 fold_convert (plus_type
, negating
? e
: b
));
1489 expr
= fold_convert (diff_type
, expr
);
1491 expr
= fold_build1 (NEGATE_EXPR
, diff_type
, expr
);
1492 tree range
= force_gimple_operand_gsi
1493 (gsi
, expr
, true, NULL_TREE
, true, GSI_SAME_STMT
);
1495 /* Determine number of iterations. */
1496 expr
= fold_build2 (MINUS_EXPR
, diff_type
, range
, dir
);
1497 expr
= fold_build2 (PLUS_EXPR
, diff_type
, expr
, s
);
1498 expr
= fold_build2 (TRUNC_DIV_EXPR
, diff_type
, expr
, s
);
1500 tree iters
= force_gimple_operand_gsi (gsi
, expr
, true, NULL_TREE
,
1501 true, GSI_SAME_STMT
);
1503 counts
[ix
].base
= b
;
1504 counts
[ix
].iters
= iters
;
1505 counts
[ix
].step
= s
;
1507 total
= fold_build2 (MULT_EXPR
, bound_type
, total
,
1508 fold_convert (bound_type
, iters
));
1514 /* Emit initializers for collapsed loop members. INNER is true if
1515 this is for the element loop of a TILE. IVAR is the outer
1516 loop iteration variable, from which collapsed loop iteration values
1517 are calculated. COUNTS array has been initialized by
1518 expand_oacc_collapse_inits. */
1521 expand_oacc_collapse_vars (const struct omp_for_data
*fd
, bool inner
,
1522 gimple_stmt_iterator
*gsi
,
1523 const oacc_collapse
*counts
, tree ivar
)
1525 tree ivar_type
= TREE_TYPE (ivar
);
1527 /* The most rapidly changing iteration variable is the innermost
1529 for (int ix
= fd
->collapse
; ix
--;)
1531 const omp_for_data_loop
*loop
= &fd
->loops
[ix
];
1532 const oacc_collapse
*collapse
= &counts
[ix
];
1533 tree v
= inner
? loop
->v
: collapse
->outer
;
1534 tree iter_type
= TREE_TYPE (v
);
1535 tree diff_type
= TREE_TYPE (collapse
->step
);
1536 tree plus_type
= iter_type
;
1537 enum tree_code plus_code
= PLUS_EXPR
;
1540 if (POINTER_TYPE_P (iter_type
))
1542 plus_code
= POINTER_PLUS_EXPR
;
1543 plus_type
= sizetype
;
1549 tree mod
= fold_convert (ivar_type
, collapse
->iters
);
1550 ivar
= fold_build2 (TRUNC_DIV_EXPR
, ivar_type
, expr
, mod
);
1551 expr
= fold_build2 (TRUNC_MOD_EXPR
, ivar_type
, expr
, mod
);
1552 ivar
= force_gimple_operand_gsi (gsi
, ivar
, true, NULL_TREE
,
1553 true, GSI_SAME_STMT
);
1556 expr
= fold_build2 (MULT_EXPR
, diff_type
, fold_convert (diff_type
, expr
),
1558 expr
= fold_build2 (plus_code
, iter_type
,
1559 inner
? collapse
->outer
: collapse
->base
,
1560 fold_convert (plus_type
, expr
));
1561 expr
= force_gimple_operand_gsi (gsi
, expr
, false, NULL_TREE
,
1562 true, GSI_SAME_STMT
);
1563 gassign
*ass
= gimple_build_assign (v
, expr
);
1564 gsi_insert_before (gsi
, ass
, GSI_SAME_STMT
);
1568 /* Helper function for expand_omp_{for_*,simd}. If this is the outermost
1569 of the combined collapse > 1 loop constructs, generate code like:
1570 if (__builtin_expect (N32 cond3 N31, 0)) goto ZERO_ITER_BB;
1575 count3 = (adj + N32 - N31) / STEP3;
1576 if (__builtin_expect (N22 cond2 N21, 0)) goto ZERO_ITER_BB;
1581 count2 = (adj + N22 - N21) / STEP2;
1582 if (__builtin_expect (N12 cond1 N11, 0)) goto ZERO_ITER_BB;
1587 count1 = (adj + N12 - N11) / STEP1;
1588 count = count1 * count2 * count3;
1589 Furthermore, if ZERO_ITER_BB is NULL, create a BB which does:
1591 and set ZERO_ITER_BB to that bb. If this isn't the outermost
1592 of the combined loop constructs, just initialize COUNTS array
1593 from the _looptemp_ clauses. */
1595 /* NOTE: It *could* be better to moosh all of the BBs together,
1596 creating one larger BB with all the computation and the unexpected
1597 jump at the end. I.e.
1599 bool zero3, zero2, zero1, zero;
1602 count3 = (N32 - N31) /[cl] STEP3;
1604 count2 = (N22 - N21) /[cl] STEP2;
1606 count1 = (N12 - N11) /[cl] STEP1;
1607 zero = zero3 || zero2 || zero1;
1608 count = count1 * count2 * count3;
1609 if (__builtin_expect(zero, false)) goto zero_iter_bb;
1611 After all, we expect the zero=false, and thus we expect to have to
1612 evaluate all of the comparison expressions, so short-circuiting
1613 oughtn't be a win. Since the condition isn't protecting a
1614 denominator, we're not concerned about divide-by-zero, so we can
1615 fully evaluate count even if a numerator turned out to be wrong.
1617 It seems like putting this all together would create much better
1618 scheduling opportunities, and less pressure on the chip's branch
1622 expand_omp_for_init_counts (struct omp_for_data
*fd
, gimple_stmt_iterator
*gsi
,
1623 basic_block
&entry_bb
, tree
*counts
,
1624 basic_block
&zero_iter1_bb
, int &first_zero_iter1
,
1625 basic_block
&zero_iter2_bb
, int &first_zero_iter2
,
1626 basic_block
&l2_dom_bb
)
1628 tree t
, type
= TREE_TYPE (fd
->loop
.v
);
1632 /* Collapsed loops need work for expansion into SSA form. */
1633 gcc_assert (!gimple_in_ssa_p (cfun
));
1635 if (gimple_omp_for_combined_into_p (fd
->for_stmt
)
1636 && TREE_CODE (fd
->loop
.n2
) != INTEGER_CST
)
1638 gcc_assert (fd
->ordered
== 0);
1639 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1640 isn't supposed to be handled, as the inner loop doesn't
1642 tree innerc
= omp_find_clause (gimple_omp_for_clauses (fd
->for_stmt
),
1643 OMP_CLAUSE__LOOPTEMP_
);
1644 gcc_assert (innerc
);
1645 for (i
= 0; i
< fd
->collapse
; i
++)
1647 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
1648 OMP_CLAUSE__LOOPTEMP_
);
1649 gcc_assert (innerc
);
1651 counts
[i
] = OMP_CLAUSE_DECL (innerc
);
1653 counts
[0] = NULL_TREE
;
1658 for (i
= fd
->collapse
; i
< fd
->ordered
; i
++)
1660 tree itype
= TREE_TYPE (fd
->loops
[i
].v
);
1661 counts
[i
] = NULL_TREE
;
1662 t
= fold_binary (fd
->loops
[i
].cond_code
, boolean_type_node
,
1663 fold_convert (itype
, fd
->loops
[i
].n1
),
1664 fold_convert (itype
, fd
->loops
[i
].n2
));
1665 if (t
&& integer_zerop (t
))
1667 for (i
= fd
->collapse
; i
< fd
->ordered
; i
++)
1668 counts
[i
] = build_int_cst (type
, 0);
1672 for (i
= 0; i
< (fd
->ordered
? fd
->ordered
: fd
->collapse
); i
++)
1674 tree itype
= TREE_TYPE (fd
->loops
[i
].v
);
1676 if (i
>= fd
->collapse
&& counts
[i
])
1678 if ((SSA_VAR_P (fd
->loop
.n2
) || i
>= fd
->collapse
)
1679 && ((t
= fold_binary (fd
->loops
[i
].cond_code
, boolean_type_node
,
1680 fold_convert (itype
, fd
->loops
[i
].n1
),
1681 fold_convert (itype
, fd
->loops
[i
].n2
)))
1682 == NULL_TREE
|| !integer_onep (t
)))
1686 n1
= fold_convert (itype
, unshare_expr (fd
->loops
[i
].n1
));
1687 n1
= force_gimple_operand_gsi (gsi
, n1
, true, NULL_TREE
,
1688 true, GSI_SAME_STMT
);
1689 n2
= fold_convert (itype
, unshare_expr (fd
->loops
[i
].n2
));
1690 n2
= force_gimple_operand_gsi (gsi
, n2
, true, NULL_TREE
,
1691 true, GSI_SAME_STMT
);
1692 cond_stmt
= gimple_build_cond (fd
->loops
[i
].cond_code
, n1
, n2
,
1693 NULL_TREE
, NULL_TREE
);
1694 gsi_insert_before (gsi
, cond_stmt
, GSI_SAME_STMT
);
1695 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt
),
1696 expand_omp_regimplify_p
, NULL
, NULL
)
1697 || walk_tree (gimple_cond_rhs_ptr (cond_stmt
),
1698 expand_omp_regimplify_p
, NULL
, NULL
))
1700 *gsi
= gsi_for_stmt (cond_stmt
);
1701 gimple_regimplify_operands (cond_stmt
, gsi
);
1703 e
= split_block (entry_bb
, cond_stmt
);
1704 basic_block
&zero_iter_bb
1705 = i
< fd
->collapse
? zero_iter1_bb
: zero_iter2_bb
;
1706 int &first_zero_iter
1707 = i
< fd
->collapse
? first_zero_iter1
: first_zero_iter2
;
1708 if (zero_iter_bb
== NULL
)
1710 gassign
*assign_stmt
;
1711 first_zero_iter
= i
;
1712 zero_iter_bb
= create_empty_bb (entry_bb
);
1713 add_bb_to_loop (zero_iter_bb
, entry_bb
->loop_father
);
1714 *gsi
= gsi_after_labels (zero_iter_bb
);
1715 if (i
< fd
->collapse
)
1716 assign_stmt
= gimple_build_assign (fd
->loop
.n2
,
1717 build_zero_cst (type
));
1720 counts
[i
] = create_tmp_reg (type
, ".count");
1722 = gimple_build_assign (counts
[i
], build_zero_cst (type
));
1724 gsi_insert_before (gsi
, assign_stmt
, GSI_SAME_STMT
);
1725 set_immediate_dominator (CDI_DOMINATORS
, zero_iter_bb
,
1728 ne
= make_edge (entry_bb
, zero_iter_bb
, EDGE_FALSE_VALUE
);
1729 ne
->probability
= profile_probability::very_unlikely ();
1730 e
->flags
= EDGE_TRUE_VALUE
;
1731 e
->probability
= ne
->probability
.invert ();
1732 if (l2_dom_bb
== NULL
)
1733 l2_dom_bb
= entry_bb
;
1735 *gsi
= gsi_last_nondebug_bb (entry_bb
);
1738 if (POINTER_TYPE_P (itype
))
1739 itype
= signed_type_for (itype
);
1740 t
= build_int_cst (itype
, (fd
->loops
[i
].cond_code
== LT_EXPR
1742 t
= fold_build2 (PLUS_EXPR
, itype
,
1743 fold_convert (itype
, fd
->loops
[i
].step
), t
);
1744 t
= fold_build2 (PLUS_EXPR
, itype
, t
,
1745 fold_convert (itype
, fd
->loops
[i
].n2
));
1746 t
= fold_build2 (MINUS_EXPR
, itype
, t
,
1747 fold_convert (itype
, fd
->loops
[i
].n1
));
1748 /* ?? We could probably use CEIL_DIV_EXPR instead of
1749 TRUNC_DIV_EXPR and adjusting by hand. Unless we can't
1750 generate the same code in the end because generically we
1751 don't know that the values involved must be negative for
1753 if (TYPE_UNSIGNED (itype
) && fd
->loops
[i
].cond_code
== GT_EXPR
)
1754 t
= fold_build2 (TRUNC_DIV_EXPR
, itype
,
1755 fold_build1 (NEGATE_EXPR
, itype
, t
),
1756 fold_build1 (NEGATE_EXPR
, itype
,
1757 fold_convert (itype
,
1758 fd
->loops
[i
].step
)));
1760 t
= fold_build2 (TRUNC_DIV_EXPR
, itype
, t
,
1761 fold_convert (itype
, fd
->loops
[i
].step
));
1762 t
= fold_convert (type
, t
);
1763 if (TREE_CODE (t
) == INTEGER_CST
)
1767 if (i
< fd
->collapse
|| i
!= first_zero_iter2
)
1768 counts
[i
] = create_tmp_reg (type
, ".count");
1769 expand_omp_build_assign (gsi
, counts
[i
], t
);
1771 if (SSA_VAR_P (fd
->loop
.n2
) && i
< fd
->collapse
)
1776 t
= fold_build2 (MULT_EXPR
, type
, fd
->loop
.n2
, counts
[i
]);
1777 expand_omp_build_assign (gsi
, fd
->loop
.n2
, t
);
1782 /* Helper function for expand_omp_{for_*,simd}. Generate code like:
1784 V3 = N31 + (T % count3) * STEP3;
1786 V2 = N21 + (T % count2) * STEP2;
1788 V1 = N11 + T * STEP1;
1789 if this loop doesn't have an inner loop construct combined with it.
1790 If it does have an inner loop construct combined with it and the
1791 iteration count isn't known constant, store values from counts array
1792 into its _looptemp_ temporaries instead. */
1795 expand_omp_for_init_vars (struct omp_for_data
*fd
, gimple_stmt_iterator
*gsi
,
1796 tree
*counts
, gimple
*inner_stmt
, tree startvar
)
1799 if (gimple_omp_for_combined_p (fd
->for_stmt
))
1801 /* If fd->loop.n2 is constant, then no propagation of the counts
1802 is needed, they are constant. */
1803 if (TREE_CODE (fd
->loop
.n2
) == INTEGER_CST
)
1806 tree clauses
= gimple_code (inner_stmt
) != GIMPLE_OMP_FOR
1807 ? gimple_omp_taskreg_clauses (inner_stmt
)
1808 : gimple_omp_for_clauses (inner_stmt
);
1809 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1810 isn't supposed to be handled, as the inner loop doesn't
1812 tree innerc
= omp_find_clause (clauses
, OMP_CLAUSE__LOOPTEMP_
);
1813 gcc_assert (innerc
);
1814 for (i
= 0; i
< fd
->collapse
; i
++)
1816 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
1817 OMP_CLAUSE__LOOPTEMP_
);
1818 gcc_assert (innerc
);
1821 tree tem
= OMP_CLAUSE_DECL (innerc
);
1822 tree t
= fold_convert (TREE_TYPE (tem
), counts
[i
]);
1823 t
= force_gimple_operand_gsi (gsi
, t
, false, NULL_TREE
,
1824 false, GSI_CONTINUE_LINKING
);
1825 gassign
*stmt
= gimple_build_assign (tem
, t
);
1826 gsi_insert_after (gsi
, stmt
, GSI_CONTINUE_LINKING
);
1832 tree type
= TREE_TYPE (fd
->loop
.v
);
1833 tree tem
= create_tmp_reg (type
, ".tem");
1834 gassign
*stmt
= gimple_build_assign (tem
, startvar
);
1835 gsi_insert_after (gsi
, stmt
, GSI_CONTINUE_LINKING
);
1837 for (i
= fd
->collapse
- 1; i
>= 0; i
--)
1839 tree vtype
= TREE_TYPE (fd
->loops
[i
].v
), itype
, t
;
1841 if (POINTER_TYPE_P (vtype
))
1842 itype
= signed_type_for (vtype
);
1844 t
= fold_build2 (TRUNC_MOD_EXPR
, type
, tem
, counts
[i
]);
1847 t
= fold_convert (itype
, t
);
1848 t
= fold_build2 (MULT_EXPR
, itype
, t
,
1849 fold_convert (itype
, fd
->loops
[i
].step
));
1850 if (POINTER_TYPE_P (vtype
))
1851 t
= fold_build_pointer_plus (fd
->loops
[i
].n1
, t
);
1853 t
= fold_build2 (PLUS_EXPR
, itype
, fd
->loops
[i
].n1
, t
);
1854 t
= force_gimple_operand_gsi (gsi
, t
,
1855 DECL_P (fd
->loops
[i
].v
)
1856 && TREE_ADDRESSABLE (fd
->loops
[i
].v
),
1858 GSI_CONTINUE_LINKING
);
1859 stmt
= gimple_build_assign (fd
->loops
[i
].v
, t
);
1860 gsi_insert_after (gsi
, stmt
, GSI_CONTINUE_LINKING
);
1863 t
= fold_build2 (TRUNC_DIV_EXPR
, type
, tem
, counts
[i
]);
1864 t
= force_gimple_operand_gsi (gsi
, t
, false, NULL_TREE
,
1865 false, GSI_CONTINUE_LINKING
);
1866 stmt
= gimple_build_assign (tem
, t
);
1867 gsi_insert_after (gsi
, stmt
, GSI_CONTINUE_LINKING
);
1872 /* Helper function for expand_omp_for_*. Generate code like:
1875 if (V3 cond3 N32) goto BODY_BB; else goto L11;
1879 if (V2 cond2 N22) goto BODY_BB; else goto L12;
1886 extract_omp_for_update_vars (struct omp_for_data
*fd
, basic_block cont_bb
,
1887 basic_block body_bb
)
1889 basic_block last_bb
, bb
, collapse_bb
= NULL
;
1891 gimple_stmt_iterator gsi
;
1897 for (i
= fd
->collapse
- 1; i
>= 0; i
--)
1899 tree vtype
= TREE_TYPE (fd
->loops
[i
].v
);
1901 bb
= create_empty_bb (last_bb
);
1902 add_bb_to_loop (bb
, last_bb
->loop_father
);
1903 gsi
= gsi_start_bb (bb
);
1905 if (i
< fd
->collapse
- 1)
1907 e
= make_edge (last_bb
, bb
, EDGE_FALSE_VALUE
);
1908 e
->probability
= profile_probability::guessed_always ().apply_scale (1, 8);
1910 t
= fd
->loops
[i
+ 1].n1
;
1911 t
= force_gimple_operand_gsi (&gsi
, t
,
1912 DECL_P (fd
->loops
[i
+ 1].v
)
1913 && TREE_ADDRESSABLE (fd
->loops
[i
1916 GSI_CONTINUE_LINKING
);
1917 stmt
= gimple_build_assign (fd
->loops
[i
+ 1].v
, t
);
1918 gsi_insert_after (&gsi
, stmt
, GSI_CONTINUE_LINKING
);
1923 set_immediate_dominator (CDI_DOMINATORS
, bb
, last_bb
);
1925 if (POINTER_TYPE_P (vtype
))
1926 t
= fold_build_pointer_plus (fd
->loops
[i
].v
, fd
->loops
[i
].step
);
1928 t
= fold_build2 (PLUS_EXPR
, vtype
, fd
->loops
[i
].v
, fd
->loops
[i
].step
);
1929 t
= force_gimple_operand_gsi (&gsi
, t
,
1930 DECL_P (fd
->loops
[i
].v
)
1931 && TREE_ADDRESSABLE (fd
->loops
[i
].v
),
1932 NULL_TREE
, false, GSI_CONTINUE_LINKING
);
1933 stmt
= gimple_build_assign (fd
->loops
[i
].v
, t
);
1934 gsi_insert_after (&gsi
, stmt
, GSI_CONTINUE_LINKING
);
1938 t
= fd
->loops
[i
].n2
;
1939 t
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
1940 false, GSI_CONTINUE_LINKING
);
1941 tree v
= fd
->loops
[i
].v
;
1942 if (DECL_P (v
) && TREE_ADDRESSABLE (v
))
1943 v
= force_gimple_operand_gsi (&gsi
, v
, true, NULL_TREE
,
1944 false, GSI_CONTINUE_LINKING
);
1945 t
= fold_build2 (fd
->loops
[i
].cond_code
, boolean_type_node
, v
, t
);
1946 stmt
= gimple_build_cond_empty (t
);
1947 gsi_insert_after (&gsi
, stmt
, GSI_CONTINUE_LINKING
);
1948 e
= make_edge (bb
, body_bb
, EDGE_TRUE_VALUE
);
1949 e
->probability
= profile_probability::guessed_always ().apply_scale (7, 8);
1952 make_edge (bb
, body_bb
, EDGE_FALLTHRU
);
1959 /* Expand #pragma omp ordered depend(source). */
1962 expand_omp_ordered_source (gimple_stmt_iterator
*gsi
, struct omp_for_data
*fd
,
1963 tree
*counts
, location_t loc
)
1965 enum built_in_function source_ix
1966 = fd
->iter_type
== long_integer_type_node
1967 ? BUILT_IN_GOMP_DOACROSS_POST
: BUILT_IN_GOMP_DOACROSS_ULL_POST
;
1969 = gimple_build_call (builtin_decl_explicit (source_ix
), 1,
1970 build_fold_addr_expr (counts
[fd
->ordered
]));
1971 gimple_set_location (g
, loc
);
1972 gsi_insert_before (gsi
, g
, GSI_SAME_STMT
);
1975 /* Expand a single depend from #pragma omp ordered depend(sink:...). */
1978 expand_omp_ordered_sink (gimple_stmt_iterator
*gsi
, struct omp_for_data
*fd
,
1979 tree
*counts
, tree c
, location_t loc
)
1981 auto_vec
<tree
, 10> args
;
1982 enum built_in_function sink_ix
1983 = fd
->iter_type
== long_integer_type_node
1984 ? BUILT_IN_GOMP_DOACROSS_WAIT
: BUILT_IN_GOMP_DOACROSS_ULL_WAIT
;
1985 tree t
, off
, coff
= NULL_TREE
, deps
= OMP_CLAUSE_DECL (c
), cond
= NULL_TREE
;
1987 gimple_stmt_iterator gsi2
= *gsi
;
1988 bool warned_step
= false;
1990 for (i
= 0; i
< fd
->ordered
; i
++)
1992 tree step
= NULL_TREE
;
1993 off
= TREE_PURPOSE (deps
);
1994 if (TREE_CODE (off
) == TRUNC_DIV_EXPR
)
1996 step
= TREE_OPERAND (off
, 1);
1997 off
= TREE_OPERAND (off
, 0);
1999 if (!integer_zerop (off
))
2001 gcc_assert (fd
->loops
[i
].cond_code
== LT_EXPR
2002 || fd
->loops
[i
].cond_code
== GT_EXPR
);
2003 bool forward
= fd
->loops
[i
].cond_code
== LT_EXPR
;
2006 /* Non-simple Fortran DO loops. If step is variable,
2007 we don't know at compile even the direction, so can't
2009 if (TREE_CODE (step
) != INTEGER_CST
)
2011 forward
= tree_int_cst_sgn (step
) != -1;
2013 if (forward
^ OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps
))
2014 warning_at (loc
, 0, "%<depend(sink)%> clause waiting for "
2015 "lexically later iteration");
2018 deps
= TREE_CHAIN (deps
);
2020 /* If all offsets corresponding to the collapsed loops are zero,
2021 this depend clause can be ignored. FIXME: but there is still a
2022 flush needed. We need to emit one __sync_synchronize () for it
2023 though (perhaps conditionally)? Solve this together with the
2024 conservative dependence folding optimization.
2025 if (i >= fd->collapse)
2028 deps
= OMP_CLAUSE_DECL (c
);
2030 edge e1
= split_block (gsi_bb (gsi2
), gsi_stmt (gsi2
));
2031 edge e2
= split_block_after_labels (e1
->dest
);
2033 gsi2
= gsi_after_labels (e1
->dest
);
2034 *gsi
= gsi_last_bb (e1
->src
);
2035 for (i
= 0; i
< fd
->ordered
; i
++)
2037 tree itype
= TREE_TYPE (fd
->loops
[i
].v
);
2038 tree step
= NULL_TREE
;
2039 tree orig_off
= NULL_TREE
;
2040 if (POINTER_TYPE_P (itype
))
2043 deps
= TREE_CHAIN (deps
);
2044 off
= TREE_PURPOSE (deps
);
2045 if (TREE_CODE (off
) == TRUNC_DIV_EXPR
)
2047 step
= TREE_OPERAND (off
, 1);
2048 off
= TREE_OPERAND (off
, 0);
2049 gcc_assert (fd
->loops
[i
].cond_code
== LT_EXPR
2050 && integer_onep (fd
->loops
[i
].step
)
2051 && !POINTER_TYPE_P (TREE_TYPE (fd
->loops
[i
].v
)));
2053 tree s
= fold_convert_loc (loc
, itype
, step
? step
: fd
->loops
[i
].step
);
2056 off
= fold_convert_loc (loc
, itype
, off
);
2058 off
= fold_build2_loc (loc
, TRUNC_DIV_EXPR
, itype
, off
, s
);
2061 if (integer_zerop (off
))
2062 t
= boolean_true_node
;
2066 tree co
= fold_convert_loc (loc
, itype
, off
);
2067 if (POINTER_TYPE_P (TREE_TYPE (fd
->loops
[i
].v
)))
2069 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps
))
2070 co
= fold_build1_loc (loc
, NEGATE_EXPR
, itype
, co
);
2071 a
= fold_build2_loc (loc
, POINTER_PLUS_EXPR
,
2072 TREE_TYPE (fd
->loops
[i
].v
), fd
->loops
[i
].v
,
2075 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps
))
2076 a
= fold_build2_loc (loc
, MINUS_EXPR
, TREE_TYPE (fd
->loops
[i
].v
),
2077 fd
->loops
[i
].v
, co
);
2079 a
= fold_build2_loc (loc
, PLUS_EXPR
, TREE_TYPE (fd
->loops
[i
].v
),
2080 fd
->loops
[i
].v
, co
);
2084 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps
))
2085 t1
= fold_build2_loc (loc
, GE_EXPR
, boolean_type_node
, a
,
2088 t1
= fold_build2_loc (loc
, LT_EXPR
, boolean_type_node
, a
,
2090 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps
))
2091 t2
= fold_build2_loc (loc
, LT_EXPR
, boolean_type_node
, a
,
2094 t2
= fold_build2_loc (loc
, GE_EXPR
, boolean_type_node
, a
,
2096 t
= fold_build2_loc (loc
, LT_EXPR
, boolean_type_node
,
2097 step
, build_int_cst (TREE_TYPE (step
), 0));
2098 if (TREE_CODE (step
) != INTEGER_CST
)
2100 t1
= unshare_expr (t1
);
2101 t1
= force_gimple_operand_gsi (gsi
, t1
, true, NULL_TREE
,
2102 false, GSI_CONTINUE_LINKING
);
2103 t2
= unshare_expr (t2
);
2104 t2
= force_gimple_operand_gsi (gsi
, t2
, true, NULL_TREE
,
2105 false, GSI_CONTINUE_LINKING
);
2107 t
= fold_build3_loc (loc
, COND_EXPR
, boolean_type_node
,
2110 else if (fd
->loops
[i
].cond_code
== LT_EXPR
)
2112 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps
))
2113 t
= fold_build2_loc (loc
, GE_EXPR
, boolean_type_node
, a
,
2116 t
= fold_build2_loc (loc
, LT_EXPR
, boolean_type_node
, a
,
2119 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps
))
2120 t
= fold_build2_loc (loc
, GT_EXPR
, boolean_type_node
, a
,
2123 t
= fold_build2_loc (loc
, LE_EXPR
, boolean_type_node
, a
,
2127 cond
= fold_build2_loc (loc
, BIT_AND_EXPR
, boolean_type_node
, cond
, t
);
2131 off
= fold_convert_loc (loc
, itype
, off
);
2134 || (fd
->loops
[i
].cond_code
== LT_EXPR
2135 ? !integer_onep (fd
->loops
[i
].step
)
2136 : !integer_minus_onep (fd
->loops
[i
].step
)))
2138 if (step
== NULL_TREE
2139 && TYPE_UNSIGNED (itype
)
2140 && fd
->loops
[i
].cond_code
== GT_EXPR
)
2141 t
= fold_build2_loc (loc
, TRUNC_MOD_EXPR
, itype
, off
,
2142 fold_build1_loc (loc
, NEGATE_EXPR
, itype
,
2145 t
= fold_build2_loc (loc
, TRUNC_MOD_EXPR
, itype
,
2146 orig_off
? orig_off
: off
, s
);
2147 t
= fold_build2_loc (loc
, EQ_EXPR
, boolean_type_node
, t
,
2148 build_int_cst (itype
, 0));
2149 if (integer_zerop (t
) && !warned_step
)
2151 warning_at (loc
, 0, "%<depend(sink)%> refers to iteration never "
2152 "in the iteration space");
2155 cond
= fold_build2_loc (loc
, BIT_AND_EXPR
, boolean_type_node
,
2159 if (i
<= fd
->collapse
- 1 && fd
->collapse
> 1)
2165 t
= fold_build2_loc (loc
, MINUS_EXPR
, TREE_TYPE (fd
->loops
[i
].v
),
2166 fd
->loops
[i
].v
, fd
->loops
[i
].n1
);
2167 t
= fold_convert_loc (loc
, fd
->iter_type
, t
);
2170 /* We have divided off by step already earlier. */;
2171 else if (TYPE_UNSIGNED (itype
) && fd
->loops
[i
].cond_code
== GT_EXPR
)
2172 off
= fold_build2_loc (loc
, TRUNC_DIV_EXPR
, itype
, off
,
2173 fold_build1_loc (loc
, NEGATE_EXPR
, itype
,
2176 off
= fold_build2_loc (loc
, TRUNC_DIV_EXPR
, itype
, off
, s
);
2177 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps
))
2178 off
= fold_build1_loc (loc
, NEGATE_EXPR
, itype
, off
);
2179 off
= fold_convert_loc (loc
, fd
->iter_type
, off
);
2180 if (i
<= fd
->collapse
- 1 && fd
->collapse
> 1)
2183 off
= fold_build2_loc (loc
, PLUS_EXPR
, fd
->iter_type
, coff
,
2185 if (i
< fd
->collapse
- 1)
2187 coff
= fold_build2_loc (loc
, MULT_EXPR
, fd
->iter_type
, off
,
2192 off
= unshare_expr (off
);
2193 t
= fold_build2_loc (loc
, PLUS_EXPR
, fd
->iter_type
, t
, off
);
2194 t
= force_gimple_operand_gsi (&gsi2
, t
, true, NULL_TREE
,
2195 true, GSI_SAME_STMT
);
2198 gimple
*g
= gimple_build_call_vec (builtin_decl_explicit (sink_ix
), args
);
2199 gimple_set_location (g
, loc
);
2200 gsi_insert_before (&gsi2
, g
, GSI_SAME_STMT
);
2202 cond
= unshare_expr (cond
);
2203 cond
= force_gimple_operand_gsi (gsi
, cond
, true, NULL_TREE
, false,
2204 GSI_CONTINUE_LINKING
);
2205 gsi_insert_after (gsi
, gimple_build_cond_empty (cond
), GSI_NEW_STMT
);
2206 edge e3
= make_edge (e1
->src
, e2
->dest
, EDGE_FALSE_VALUE
);
2207 e3
->probability
= profile_probability::guessed_always ().apply_scale (1, 8);
2208 e1
->probability
= e3
->probability
.invert ();
2209 e1
->flags
= EDGE_TRUE_VALUE
;
2210 set_immediate_dominator (CDI_DOMINATORS
, e2
->dest
, e1
->src
);
2212 *gsi
= gsi_after_labels (e2
->dest
);
2215 /* Expand all #pragma omp ordered depend(source) and
2216 #pragma omp ordered depend(sink:...) constructs in the current
2217 #pragma omp for ordered(n) region. */
2220 expand_omp_ordered_source_sink (struct omp_region
*region
,
2221 struct omp_for_data
*fd
, tree
*counts
,
2222 basic_block cont_bb
)
2224 struct omp_region
*inner
;
2226 for (i
= fd
->collapse
- 1; i
< fd
->ordered
; i
++)
2227 if (i
== fd
->collapse
- 1 && fd
->collapse
> 1)
2228 counts
[i
] = NULL_TREE
;
2229 else if (i
>= fd
->collapse
&& !cont_bb
)
2230 counts
[i
] = build_zero_cst (fd
->iter_type
);
2231 else if (!POINTER_TYPE_P (TREE_TYPE (fd
->loops
[i
].v
))
2232 && integer_onep (fd
->loops
[i
].step
))
2233 counts
[i
] = NULL_TREE
;
2235 counts
[i
] = create_tmp_var (fd
->iter_type
, ".orditer");
2237 = build_array_type_nelts (fd
->iter_type
, fd
->ordered
- fd
->collapse
+ 1);
2238 counts
[fd
->ordered
] = create_tmp_var (atype
, ".orditera");
2239 TREE_ADDRESSABLE (counts
[fd
->ordered
]) = 1;
2241 for (inner
= region
->inner
; inner
; inner
= inner
->next
)
2242 if (inner
->type
== GIMPLE_OMP_ORDERED
)
2244 gomp_ordered
*ord_stmt
= inner
->ord_stmt
;
2245 gimple_stmt_iterator gsi
= gsi_for_stmt (ord_stmt
);
2246 location_t loc
= gimple_location (ord_stmt
);
2248 for (c
= gimple_omp_ordered_clauses (ord_stmt
);
2249 c
; c
= OMP_CLAUSE_CHAIN (c
))
2250 if (OMP_CLAUSE_DEPEND_KIND (c
) == OMP_CLAUSE_DEPEND_SOURCE
)
2253 expand_omp_ordered_source (&gsi
, fd
, counts
, loc
);
2254 for (c
= gimple_omp_ordered_clauses (ord_stmt
);
2255 c
; c
= OMP_CLAUSE_CHAIN (c
))
2256 if (OMP_CLAUSE_DEPEND_KIND (c
) == OMP_CLAUSE_DEPEND_SINK
)
2257 expand_omp_ordered_sink (&gsi
, fd
, counts
, c
, loc
);
2258 gsi_remove (&gsi
, true);
2262 /* Wrap the body into fd->ordered - fd->collapse loops that aren't
2266 expand_omp_for_ordered_loops (struct omp_for_data
*fd
, tree
*counts
,
2267 basic_block cont_bb
, basic_block body_bb
,
2268 bool ordered_lastprivate
)
2270 if (fd
->ordered
== fd
->collapse
)
2275 gimple_stmt_iterator gsi
= gsi_after_labels (body_bb
);
2276 for (int i
= fd
->collapse
; i
< fd
->ordered
; i
++)
2278 tree type
= TREE_TYPE (fd
->loops
[i
].v
);
2279 tree n1
= fold_convert (type
, fd
->loops
[i
].n1
);
2280 expand_omp_build_assign (&gsi
, fd
->loops
[i
].v
, n1
);
2281 tree aref
= build4 (ARRAY_REF
, fd
->iter_type
, counts
[fd
->ordered
],
2282 size_int (i
- fd
->collapse
+ 1),
2283 NULL_TREE
, NULL_TREE
);
2284 expand_omp_build_assign (&gsi
, aref
, build_zero_cst (fd
->iter_type
));
2289 for (int i
= fd
->ordered
- 1; i
>= fd
->collapse
; i
--)
2291 tree t
, type
= TREE_TYPE (fd
->loops
[i
].v
);
2292 gimple_stmt_iterator gsi
= gsi_after_labels (body_bb
);
2293 expand_omp_build_assign (&gsi
, fd
->loops
[i
].v
,
2294 fold_convert (type
, fd
->loops
[i
].n1
));
2296 expand_omp_build_assign (&gsi
, counts
[i
],
2297 build_zero_cst (fd
->iter_type
));
2298 tree aref
= build4 (ARRAY_REF
, fd
->iter_type
, counts
[fd
->ordered
],
2299 size_int (i
- fd
->collapse
+ 1),
2300 NULL_TREE
, NULL_TREE
);
2301 expand_omp_build_assign (&gsi
, aref
, build_zero_cst (fd
->iter_type
));
2302 if (!gsi_end_p (gsi
))
2305 gsi
= gsi_last_bb (body_bb
);
2306 edge e1
= split_block (body_bb
, gsi_stmt (gsi
));
2307 basic_block new_body
= e1
->dest
;
2308 if (body_bb
== cont_bb
)
2311 basic_block new_header
;
2312 if (EDGE_COUNT (cont_bb
->preds
) > 0)
2314 gsi
= gsi_last_bb (cont_bb
);
2315 if (POINTER_TYPE_P (type
))
2316 t
= fold_build_pointer_plus (fd
->loops
[i
].v
,
2317 fold_convert (sizetype
,
2318 fd
->loops
[i
].step
));
2320 t
= fold_build2 (PLUS_EXPR
, type
, fd
->loops
[i
].v
,
2321 fold_convert (type
, fd
->loops
[i
].step
));
2322 expand_omp_build_assign (&gsi
, fd
->loops
[i
].v
, t
);
2325 t
= fold_build2 (PLUS_EXPR
, fd
->iter_type
, counts
[i
],
2326 build_int_cst (fd
->iter_type
, 1));
2327 expand_omp_build_assign (&gsi
, counts
[i
], t
);
2332 t
= fold_build2 (MINUS_EXPR
, TREE_TYPE (fd
->loops
[i
].v
),
2333 fd
->loops
[i
].v
, fd
->loops
[i
].n1
);
2334 t
= fold_convert (fd
->iter_type
, t
);
2335 t
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
2336 true, GSI_SAME_STMT
);
2338 aref
= build4 (ARRAY_REF
, fd
->iter_type
, counts
[fd
->ordered
],
2339 size_int (i
- fd
->collapse
+ 1),
2340 NULL_TREE
, NULL_TREE
);
2341 expand_omp_build_assign (&gsi
, aref
, t
);
2343 e2
= split_block (cont_bb
, gsi_stmt (gsi
));
2344 new_header
= e2
->dest
;
2347 new_header
= cont_bb
;
2348 gsi
= gsi_after_labels (new_header
);
2349 tree v
= force_gimple_operand_gsi (&gsi
, fd
->loops
[i
].v
, true, NULL_TREE
,
2350 true, GSI_SAME_STMT
);
2352 = force_gimple_operand_gsi (&gsi
, fold_convert (type
, fd
->loops
[i
].n2
),
2353 true, NULL_TREE
, true, GSI_SAME_STMT
);
2354 t
= build2 (fd
->loops
[i
].cond_code
, boolean_type_node
, v
, n2
);
2355 gsi_insert_before (&gsi
, gimple_build_cond_empty (t
), GSI_NEW_STMT
);
2356 edge e3
= split_block (new_header
, gsi_stmt (gsi
));
2359 make_edge (body_bb
, new_header
, EDGE_FALLTHRU
);
2360 e3
->flags
= EDGE_FALSE_VALUE
;
2361 e3
->probability
= profile_probability::guessed_always ().apply_scale (1, 8);
2362 e1
= make_edge (new_header
, new_body
, EDGE_TRUE_VALUE
);
2363 e1
->probability
= e3
->probability
.invert ();
2365 set_immediate_dominator (CDI_DOMINATORS
, new_header
, body_bb
);
2366 set_immediate_dominator (CDI_DOMINATORS
, new_body
, new_header
);
2370 struct loop
*loop
= alloc_loop ();
2371 loop
->header
= new_header
;
2372 loop
->latch
= e2
->src
;
2373 add_loop (loop
, body_bb
->loop_father
);
2377 /* If there are any lastprivate clauses and it is possible some loops
2378 might have zero iterations, ensure all the decls are initialized,
2379 otherwise we could crash evaluating C++ class iterators with lastprivate
2381 bool need_inits
= false;
2382 for (int i
= fd
->collapse
; ordered_lastprivate
&& i
< fd
->ordered
; i
++)
2385 tree type
= TREE_TYPE (fd
->loops
[i
].v
);
2386 gimple_stmt_iterator gsi
= gsi_after_labels (body_bb
);
2387 expand_omp_build_assign (&gsi
, fd
->loops
[i
].v
,
2388 fold_convert (type
, fd
->loops
[i
].n1
));
2392 tree type
= TREE_TYPE (fd
->loops
[i
].v
);
2393 tree this_cond
= fold_build2 (fd
->loops
[i
].cond_code
,
2395 fold_convert (type
, fd
->loops
[i
].n1
),
2396 fold_convert (type
, fd
->loops
[i
].n2
));
2397 if (!integer_onep (this_cond
))
2404 /* A subroutine of expand_omp_for. Generate code for a parallel
2405 loop with any schedule. Given parameters:
2407 for (V = N1; V cond N2; V += STEP) BODY;
2409 where COND is "<" or ">", we generate pseudocode
2411 more = GOMP_loop_foo_start (N1, N2, STEP, CHUNK, &istart0, &iend0);
2412 if (more) goto L0; else goto L3;
2419 if (V cond iend) goto L1; else goto L2;
2421 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
2424 If this is a combined omp parallel loop, instead of the call to
2425 GOMP_loop_foo_start, we call GOMP_loop_foo_next.
2426 If this is gimple_omp_for_combined_p loop, then instead of assigning
2427 V and iend in L0 we assign the first two _looptemp_ clause decls of the
2428 inner GIMPLE_OMP_FOR and V += STEP; and
2429 if (V cond iend) goto L1; else goto L2; are removed.
2431 For collapsed loops, given parameters:
2433 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
2434 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
2435 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
2438 we generate pseudocode
2440 if (__builtin_expect (N32 cond3 N31, 0)) goto Z0;
2445 count3 = (adj + N32 - N31) / STEP3;
2446 if (__builtin_expect (N22 cond2 N21, 0)) goto Z0;
2451 count2 = (adj + N22 - N21) / STEP2;
2452 if (__builtin_expect (N12 cond1 N11, 0)) goto Z0;
2457 count1 = (adj + N12 - N11) / STEP1;
2458 count = count1 * count2 * count3;
2463 more = GOMP_loop_foo_start (0, count, 1, CHUNK, &istart0, &iend0);
2464 if (more) goto L0; else goto L3;
2468 V3 = N31 + (T % count3) * STEP3;
2470 V2 = N21 + (T % count2) * STEP2;
2472 V1 = N11 + T * STEP1;
2477 if (V < iend) goto L10; else goto L2;
2480 if (V3 cond3 N32) goto L1; else goto L11;
2484 if (V2 cond2 N22) goto L1; else goto L12;
2490 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
2496 expand_omp_for_generic (struct omp_region
*region
,
2497 struct omp_for_data
*fd
,
2498 enum built_in_function start_fn
,
2499 enum built_in_function next_fn
,
2502 tree type
, istart0
, iend0
, iend
;
2503 tree t
, vmain
, vback
, bias
= NULL_TREE
;
2504 basic_block entry_bb
, cont_bb
, exit_bb
, l0_bb
, l1_bb
, collapse_bb
;
2505 basic_block l2_bb
= NULL
, l3_bb
= NULL
;
2506 gimple_stmt_iterator gsi
;
2507 gassign
*assign_stmt
;
2508 bool in_combined_parallel
= is_combined_parallel (region
);
2509 bool broken_loop
= region
->cont
== NULL
;
2511 tree
*counts
= NULL
;
2513 bool ordered_lastprivate
= false;
2515 gcc_assert (!broken_loop
|| !in_combined_parallel
);
2516 gcc_assert (fd
->iter_type
== long_integer_type_node
2517 || !in_combined_parallel
);
2519 entry_bb
= region
->entry
;
2520 cont_bb
= region
->cont
;
2522 gcc_assert (EDGE_COUNT (entry_bb
->succs
) == 2);
2523 gcc_assert (broken_loop
2524 || BRANCH_EDGE (entry_bb
)->dest
== FALLTHRU_EDGE (cont_bb
)->dest
);
2525 l0_bb
= split_edge (FALLTHRU_EDGE (entry_bb
));
2526 l1_bb
= single_succ (l0_bb
);
2529 l2_bb
= create_empty_bb (cont_bb
);
2530 gcc_assert (BRANCH_EDGE (cont_bb
)->dest
== l1_bb
2531 || (single_succ_edge (BRANCH_EDGE (cont_bb
)->dest
)->dest
2533 gcc_assert (EDGE_COUNT (cont_bb
->succs
) == 2);
2537 l3_bb
= BRANCH_EDGE (entry_bb
)->dest
;
2538 exit_bb
= region
->exit
;
2540 gsi
= gsi_last_nondebug_bb (entry_bb
);
2542 gcc_assert (gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_FOR
);
2544 && omp_find_clause (gimple_omp_for_clauses (gsi_stmt (gsi
)),
2545 OMP_CLAUSE_LASTPRIVATE
))
2546 ordered_lastprivate
= false;
2547 if (fd
->collapse
> 1 || fd
->ordered
)
2549 int first_zero_iter1
= -1, first_zero_iter2
= -1;
2550 basic_block zero_iter1_bb
= NULL
, zero_iter2_bb
= NULL
, l2_dom_bb
= NULL
;
2552 counts
= XALLOCAVEC (tree
, fd
->ordered
? fd
->ordered
+ 1 : fd
->collapse
);
2553 expand_omp_for_init_counts (fd
, &gsi
, entry_bb
, counts
,
2554 zero_iter1_bb
, first_zero_iter1
,
2555 zero_iter2_bb
, first_zero_iter2
, l2_dom_bb
);
2559 /* Some counts[i] vars might be uninitialized if
2560 some loop has zero iterations. But the body shouldn't
2561 be executed in that case, so just avoid uninit warnings. */
2562 for (i
= first_zero_iter1
;
2563 i
< (fd
->ordered
? fd
->ordered
: fd
->collapse
); i
++)
2564 if (SSA_VAR_P (counts
[i
]))
2565 TREE_NO_WARNING (counts
[i
]) = 1;
2567 e
= split_block (entry_bb
, gsi_stmt (gsi
));
2569 make_edge (zero_iter1_bb
, entry_bb
, EDGE_FALLTHRU
);
2570 gsi
= gsi_last_nondebug_bb (entry_bb
);
2571 set_immediate_dominator (CDI_DOMINATORS
, entry_bb
,
2572 get_immediate_dominator (CDI_DOMINATORS
,
2577 /* Some counts[i] vars might be uninitialized if
2578 some loop has zero iterations. But the body shouldn't
2579 be executed in that case, so just avoid uninit warnings. */
2580 for (i
= first_zero_iter2
; i
< fd
->ordered
; i
++)
2581 if (SSA_VAR_P (counts
[i
]))
2582 TREE_NO_WARNING (counts
[i
]) = 1;
2584 make_edge (zero_iter2_bb
, entry_bb
, EDGE_FALLTHRU
);
2588 e
= split_block (entry_bb
, gsi_stmt (gsi
));
2590 make_edge (zero_iter2_bb
, entry_bb
, EDGE_FALLTHRU
);
2591 gsi
= gsi_last_nondebug_bb (entry_bb
);
2592 set_immediate_dominator (CDI_DOMINATORS
, entry_bb
,
2593 get_immediate_dominator
2594 (CDI_DOMINATORS
, zero_iter2_bb
));
2597 if (fd
->collapse
== 1)
2599 counts
[0] = fd
->loop
.n2
;
2600 fd
->loop
= fd
->loops
[0];
2604 type
= TREE_TYPE (fd
->loop
.v
);
2605 istart0
= create_tmp_var (fd
->iter_type
, ".istart0");
2606 iend0
= create_tmp_var (fd
->iter_type
, ".iend0");
2607 TREE_ADDRESSABLE (istart0
) = 1;
2608 TREE_ADDRESSABLE (iend0
) = 1;
2610 /* See if we need to bias by LLONG_MIN. */
2611 if (fd
->iter_type
== long_long_unsigned_type_node
2612 && TREE_CODE (type
) == INTEGER_TYPE
2613 && !TYPE_UNSIGNED (type
)
2614 && fd
->ordered
== 0)
2618 if (fd
->loop
.cond_code
== LT_EXPR
)
2621 n2
= fold_build2 (PLUS_EXPR
, type
, fd
->loop
.n2
, fd
->loop
.step
);
2625 n1
= fold_build2 (MINUS_EXPR
, type
, fd
->loop
.n2
, fd
->loop
.step
);
2628 if (TREE_CODE (n1
) != INTEGER_CST
2629 || TREE_CODE (n2
) != INTEGER_CST
2630 || ((tree_int_cst_sgn (n1
) < 0) ^ (tree_int_cst_sgn (n2
) < 0)))
2631 bias
= fold_convert (fd
->iter_type
, TYPE_MIN_VALUE (type
));
2634 gimple_stmt_iterator gsif
= gsi
;
2637 tree arr
= NULL_TREE
;
2638 if (in_combined_parallel
)
2640 gcc_assert (fd
->ordered
== 0);
2641 /* In a combined parallel loop, emit a call to
2642 GOMP_loop_foo_next. */
2643 t
= build_call_expr (builtin_decl_explicit (next_fn
), 2,
2644 build_fold_addr_expr (istart0
),
2645 build_fold_addr_expr (iend0
));
2649 tree t0
, t1
, t2
, t3
, t4
;
2650 /* If this is not a combined parallel loop, emit a call to
2651 GOMP_loop_foo_start in ENTRY_BB. */
2652 t4
= build_fold_addr_expr (iend0
);
2653 t3
= build_fold_addr_expr (istart0
);
2656 t0
= build_int_cst (unsigned_type_node
,
2657 fd
->ordered
- fd
->collapse
+ 1);
2658 arr
= create_tmp_var (build_array_type_nelts (fd
->iter_type
,
2660 - fd
->collapse
+ 1),
2662 DECL_NAMELESS (arr
) = 1;
2663 TREE_ADDRESSABLE (arr
) = 1;
2664 TREE_STATIC (arr
) = 1;
2665 vec
<constructor_elt
, va_gc
> *v
;
2666 vec_alloc (v
, fd
->ordered
- fd
->collapse
+ 1);
2669 for (idx
= 0; idx
< fd
->ordered
- fd
->collapse
+ 1; idx
++)
2672 if (idx
== 0 && fd
->collapse
> 1)
2675 c
= counts
[idx
+ fd
->collapse
- 1];
2676 tree purpose
= size_int (idx
);
2677 CONSTRUCTOR_APPEND_ELT (v
, purpose
, c
);
2678 if (TREE_CODE (c
) != INTEGER_CST
)
2679 TREE_STATIC (arr
) = 0;
2682 DECL_INITIAL (arr
) = build_constructor (TREE_TYPE (arr
), v
);
2683 if (!TREE_STATIC (arr
))
2684 force_gimple_operand_gsi (&gsi
, build1 (DECL_EXPR
,
2685 void_type_node
, arr
),
2686 true, NULL_TREE
, true, GSI_SAME_STMT
);
2687 t1
= build_fold_addr_expr (arr
);
2692 t2
= fold_convert (fd
->iter_type
, fd
->loop
.step
);
2695 if (gimple_omp_for_combined_into_p (fd
->for_stmt
))
2698 = omp_find_clause (gimple_omp_for_clauses (fd
->for_stmt
),
2699 OMP_CLAUSE__LOOPTEMP_
);
2700 gcc_assert (innerc
);
2701 t0
= OMP_CLAUSE_DECL (innerc
);
2702 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
2703 OMP_CLAUSE__LOOPTEMP_
);
2704 gcc_assert (innerc
);
2705 t1
= OMP_CLAUSE_DECL (innerc
);
2707 if (POINTER_TYPE_P (TREE_TYPE (t0
))
2708 && TYPE_PRECISION (TREE_TYPE (t0
))
2709 != TYPE_PRECISION (fd
->iter_type
))
2711 /* Avoid casting pointers to integer of a different size. */
2712 tree itype
= signed_type_for (type
);
2713 t1
= fold_convert (fd
->iter_type
, fold_convert (itype
, t1
));
2714 t0
= fold_convert (fd
->iter_type
, fold_convert (itype
, t0
));
2718 t1
= fold_convert (fd
->iter_type
, t1
);
2719 t0
= fold_convert (fd
->iter_type
, t0
);
2723 t1
= fold_build2 (PLUS_EXPR
, fd
->iter_type
, t1
, bias
);
2724 t0
= fold_build2 (PLUS_EXPR
, fd
->iter_type
, t0
, bias
);
2727 if (fd
->iter_type
== long_integer_type_node
|| fd
->ordered
)
2731 t
= fold_convert (fd
->iter_type
, fd
->chunk_size
);
2732 t
= omp_adjust_chunk_size (t
, fd
->simd_schedule
);
2734 t
= build_call_expr (builtin_decl_explicit (start_fn
),
2735 5, t0
, t1
, t
, t3
, t4
);
2737 t
= build_call_expr (builtin_decl_explicit (start_fn
),
2738 6, t0
, t1
, t2
, t
, t3
, t4
);
2740 else if (fd
->ordered
)
2741 t
= build_call_expr (builtin_decl_explicit (start_fn
),
2744 t
= build_call_expr (builtin_decl_explicit (start_fn
),
2745 5, t0
, t1
, t2
, t3
, t4
);
2753 /* The GOMP_loop_ull_*start functions have additional boolean
2754 argument, true for < loops and false for > loops.
2755 In Fortran, the C bool type can be different from
2756 boolean_type_node. */
2757 bfn_decl
= builtin_decl_explicit (start_fn
);
2758 c_bool_type
= TREE_TYPE (TREE_TYPE (bfn_decl
));
2759 t5
= build_int_cst (c_bool_type
,
2760 fd
->loop
.cond_code
== LT_EXPR
? 1 : 0);
2763 tree bfn_decl
= builtin_decl_explicit (start_fn
);
2764 t
= fold_convert (fd
->iter_type
, fd
->chunk_size
);
2765 t
= omp_adjust_chunk_size (t
, fd
->simd_schedule
);
2766 t
= build_call_expr (bfn_decl
, 7, t5
, t0
, t1
, t2
, t
, t3
, t4
);
2769 t
= build_call_expr (builtin_decl_explicit (start_fn
),
2770 6, t5
, t0
, t1
, t2
, t3
, t4
);
2773 if (TREE_TYPE (t
) != boolean_type_node
)
2774 t
= fold_build2 (NE_EXPR
, boolean_type_node
,
2775 t
, build_int_cst (TREE_TYPE (t
), 0));
2776 t
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
2777 true, GSI_SAME_STMT
);
2778 if (arr
&& !TREE_STATIC (arr
))
2780 tree clobber
= build_constructor (TREE_TYPE (arr
), NULL
);
2781 TREE_THIS_VOLATILE (clobber
) = 1;
2782 gsi_insert_before (&gsi
, gimple_build_assign (arr
, clobber
),
2785 gsi_insert_after (&gsi
, gimple_build_cond_empty (t
), GSI_SAME_STMT
);
2787 /* Remove the GIMPLE_OMP_FOR statement. */
2788 gsi_remove (&gsi
, true);
2790 if (gsi_end_p (gsif
))
2791 gsif
= gsi_after_labels (gsi_bb (gsif
));
2794 /* Iteration setup for sequential loop goes in L0_BB. */
2795 tree startvar
= fd
->loop
.v
;
2796 tree endvar
= NULL_TREE
;
2798 if (gimple_omp_for_combined_p (fd
->for_stmt
))
2800 gcc_assert (gimple_code (inner_stmt
) == GIMPLE_OMP_FOR
2801 && gimple_omp_for_kind (inner_stmt
)
2802 == GF_OMP_FOR_KIND_SIMD
);
2803 tree innerc
= omp_find_clause (gimple_omp_for_clauses (inner_stmt
),
2804 OMP_CLAUSE__LOOPTEMP_
);
2805 gcc_assert (innerc
);
2806 startvar
= OMP_CLAUSE_DECL (innerc
);
2807 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
2808 OMP_CLAUSE__LOOPTEMP_
);
2809 gcc_assert (innerc
);
2810 endvar
= OMP_CLAUSE_DECL (innerc
);
2813 gsi
= gsi_start_bb (l0_bb
);
2815 if (fd
->ordered
&& fd
->collapse
== 1)
2816 t
= fold_build2 (MULT_EXPR
, fd
->iter_type
, t
,
2817 fold_convert (fd
->iter_type
, fd
->loop
.step
));
2819 t
= fold_build2 (MINUS_EXPR
, fd
->iter_type
, t
, bias
);
2820 if (fd
->ordered
&& fd
->collapse
== 1)
2822 if (POINTER_TYPE_P (TREE_TYPE (startvar
)))
2823 t
= fold_build2 (POINTER_PLUS_EXPR
, TREE_TYPE (startvar
),
2824 fd
->loop
.n1
, fold_convert (sizetype
, t
));
2827 t
= fold_convert (TREE_TYPE (startvar
), t
);
2828 t
= fold_build2 (PLUS_EXPR
, TREE_TYPE (startvar
),
2834 if (POINTER_TYPE_P (TREE_TYPE (startvar
)))
2835 t
= fold_convert (signed_type_for (TREE_TYPE (startvar
)), t
);
2836 t
= fold_convert (TREE_TYPE (startvar
), t
);
2838 t
= force_gimple_operand_gsi (&gsi
, t
,
2840 && TREE_ADDRESSABLE (startvar
),
2841 NULL_TREE
, false, GSI_CONTINUE_LINKING
);
2842 assign_stmt
= gimple_build_assign (startvar
, t
);
2843 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
2846 if (fd
->ordered
&& fd
->collapse
== 1)
2847 t
= fold_build2 (MULT_EXPR
, fd
->iter_type
, t
,
2848 fold_convert (fd
->iter_type
, fd
->loop
.step
));
2850 t
= fold_build2 (MINUS_EXPR
, fd
->iter_type
, t
, bias
);
2851 if (fd
->ordered
&& fd
->collapse
== 1)
2853 if (POINTER_TYPE_P (TREE_TYPE (startvar
)))
2854 t
= fold_build2 (POINTER_PLUS_EXPR
, TREE_TYPE (startvar
),
2855 fd
->loop
.n1
, fold_convert (sizetype
, t
));
2858 t
= fold_convert (TREE_TYPE (startvar
), t
);
2859 t
= fold_build2 (PLUS_EXPR
, TREE_TYPE (startvar
),
2865 if (POINTER_TYPE_P (TREE_TYPE (startvar
)))
2866 t
= fold_convert (signed_type_for (TREE_TYPE (startvar
)), t
);
2867 t
= fold_convert (TREE_TYPE (startvar
), t
);
2869 iend
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
2870 false, GSI_CONTINUE_LINKING
);
2873 assign_stmt
= gimple_build_assign (endvar
, iend
);
2874 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
2875 if (useless_type_conversion_p (TREE_TYPE (fd
->loop
.v
), TREE_TYPE (iend
)))
2876 assign_stmt
= gimple_build_assign (fd
->loop
.v
, iend
);
2878 assign_stmt
= gimple_build_assign (fd
->loop
.v
, NOP_EXPR
, iend
);
2879 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
2881 /* Handle linear clause adjustments. */
2882 tree itercnt
= NULL_TREE
;
2883 if (gimple_omp_for_kind (fd
->for_stmt
) == GF_OMP_FOR_KIND_FOR
)
2884 for (tree c
= gimple_omp_for_clauses (fd
->for_stmt
);
2885 c
; c
= OMP_CLAUSE_CHAIN (c
))
2886 if (OMP_CLAUSE_CODE (c
) == OMP_CLAUSE_LINEAR
2887 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c
))
2889 tree d
= OMP_CLAUSE_DECL (c
);
2890 bool is_ref
= omp_is_reference (d
);
2891 tree t
= d
, a
, dest
;
2893 t
= build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c
), t
);
2894 tree type
= TREE_TYPE (t
);
2895 if (POINTER_TYPE_P (type
))
2897 dest
= unshare_expr (t
);
2898 tree v
= create_tmp_var (TREE_TYPE (t
), NULL
);
2899 expand_omp_build_assign (&gsif
, v
, t
);
2900 if (itercnt
== NULL_TREE
)
2903 tree n1
= fd
->loop
.n1
;
2904 if (POINTER_TYPE_P (TREE_TYPE (itercnt
)))
2907 = fold_convert (signed_type_for (TREE_TYPE (itercnt
)),
2909 n1
= fold_convert (TREE_TYPE (itercnt
), n1
);
2911 itercnt
= fold_build2 (MINUS_EXPR
, TREE_TYPE (itercnt
),
2913 itercnt
= fold_build2 (EXACT_DIV_EXPR
, TREE_TYPE (itercnt
),
2914 itercnt
, fd
->loop
.step
);
2915 itercnt
= force_gimple_operand_gsi (&gsi
, itercnt
, true,
2917 GSI_CONTINUE_LINKING
);
2919 a
= fold_build2 (MULT_EXPR
, type
,
2920 fold_convert (type
, itercnt
),
2921 fold_convert (type
, OMP_CLAUSE_LINEAR_STEP (c
)));
2922 t
= fold_build2 (type
== TREE_TYPE (t
) ? PLUS_EXPR
2923 : POINTER_PLUS_EXPR
, TREE_TYPE (t
), v
, a
);
2924 t
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
2925 false, GSI_CONTINUE_LINKING
);
2926 assign_stmt
= gimple_build_assign (dest
, t
);
2927 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
2929 if (fd
->collapse
> 1)
2930 expand_omp_for_init_vars (fd
, &gsi
, counts
, inner_stmt
, startvar
);
2934 /* Until now, counts array contained number of iterations or
2935 variable containing it for ith loop. From now on, we need
2936 those counts only for collapsed loops, and only for the 2nd
2937 till the last collapsed one. Move those one element earlier,
2938 we'll use counts[fd->collapse - 1] for the first source/sink
2939 iteration counter and so on and counts[fd->ordered]
2940 as the array holding the current counter values for
2942 if (fd
->collapse
> 1)
2943 memmove (counts
, counts
+ 1, (fd
->collapse
- 1) * sizeof (counts
[0]));
2947 for (i
= fd
->collapse
; i
< fd
->ordered
; i
++)
2949 tree type
= TREE_TYPE (fd
->loops
[i
].v
);
2951 = fold_build2 (fd
->loops
[i
].cond_code
, boolean_type_node
,
2952 fold_convert (type
, fd
->loops
[i
].n1
),
2953 fold_convert (type
, fd
->loops
[i
].n2
));
2954 if (!integer_onep (this_cond
))
2957 if (i
< fd
->ordered
)
2960 = create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun
)->prev_bb
);
2961 add_bb_to_loop (cont_bb
, l1_bb
->loop_father
);
2962 gimple_stmt_iterator gsi
= gsi_after_labels (cont_bb
);
2963 gimple
*g
= gimple_build_omp_continue (fd
->loop
.v
, fd
->loop
.v
);
2964 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
2965 make_edge (cont_bb
, l3_bb
, EDGE_FALLTHRU
);
2966 make_edge (cont_bb
, l1_bb
, 0);
2967 l2_bb
= create_empty_bb (cont_bb
);
2968 broken_loop
= false;
2971 expand_omp_ordered_source_sink (region
, fd
, counts
, cont_bb
);
2972 cont_bb
= expand_omp_for_ordered_loops (fd
, counts
, cont_bb
, l1_bb
,
2973 ordered_lastprivate
);
2974 if (counts
[fd
->collapse
- 1])
2976 gcc_assert (fd
->collapse
== 1);
2977 gsi
= gsi_last_bb (l0_bb
);
2978 expand_omp_build_assign (&gsi
, counts
[fd
->collapse
- 1],
2980 gsi
= gsi_last_bb (cont_bb
);
2981 t
= fold_build2 (PLUS_EXPR
, fd
->iter_type
, counts
[fd
->collapse
- 1],
2982 build_int_cst (fd
->iter_type
, 1));
2983 expand_omp_build_assign (&gsi
, counts
[fd
->collapse
- 1], t
);
2984 tree aref
= build4 (ARRAY_REF
, fd
->iter_type
, counts
[fd
->ordered
],
2985 size_zero_node
, NULL_TREE
, NULL_TREE
);
2986 expand_omp_build_assign (&gsi
, aref
, counts
[fd
->collapse
- 1]);
2987 t
= counts
[fd
->collapse
- 1];
2989 else if (fd
->collapse
> 1)
2993 t
= fold_build2 (MINUS_EXPR
, TREE_TYPE (fd
->loops
[0].v
),
2994 fd
->loops
[0].v
, fd
->loops
[0].n1
);
2995 t
= fold_convert (fd
->iter_type
, t
);
2997 gsi
= gsi_last_bb (l0_bb
);
2998 tree aref
= build4 (ARRAY_REF
, fd
->iter_type
, counts
[fd
->ordered
],
2999 size_zero_node
, NULL_TREE
, NULL_TREE
);
3000 t
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
3001 false, GSI_CONTINUE_LINKING
);
3002 expand_omp_build_assign (&gsi
, aref
, t
, true);
3007 /* Code to control the increment and predicate for the sequential
3008 loop goes in the CONT_BB. */
3009 gsi
= gsi_last_nondebug_bb (cont_bb
);
3010 gomp_continue
*cont_stmt
= as_a
<gomp_continue
*> (gsi_stmt (gsi
));
3011 gcc_assert (gimple_code (cont_stmt
) == GIMPLE_OMP_CONTINUE
);
3012 vmain
= gimple_omp_continue_control_use (cont_stmt
);
3013 vback
= gimple_omp_continue_control_def (cont_stmt
);
3015 if (!gimple_omp_for_combined_p (fd
->for_stmt
))
3017 if (POINTER_TYPE_P (type
))
3018 t
= fold_build_pointer_plus (vmain
, fd
->loop
.step
);
3020 t
= fold_build2 (PLUS_EXPR
, type
, vmain
, fd
->loop
.step
);
3021 t
= force_gimple_operand_gsi (&gsi
, t
,
3023 && TREE_ADDRESSABLE (vback
),
3024 NULL_TREE
, true, GSI_SAME_STMT
);
3025 assign_stmt
= gimple_build_assign (vback
, t
);
3026 gsi_insert_before (&gsi
, assign_stmt
, GSI_SAME_STMT
);
3028 if (fd
->ordered
&& counts
[fd
->collapse
- 1] == NULL_TREE
)
3030 if (fd
->collapse
> 1)
3034 t
= fold_build2 (MINUS_EXPR
, TREE_TYPE (fd
->loops
[0].v
),
3035 fd
->loops
[0].v
, fd
->loops
[0].n1
);
3036 t
= fold_convert (fd
->iter_type
, t
);
3038 tree aref
= build4 (ARRAY_REF
, fd
->iter_type
,
3039 counts
[fd
->ordered
], size_zero_node
,
3040 NULL_TREE
, NULL_TREE
);
3041 t
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
3042 true, GSI_SAME_STMT
);
3043 expand_omp_build_assign (&gsi
, aref
, t
);
3046 t
= build2 (fd
->loop
.cond_code
, boolean_type_node
,
3047 DECL_P (vback
) && TREE_ADDRESSABLE (vback
) ? t
: vback
,
3049 gcond
*cond_stmt
= gimple_build_cond_empty (t
);
3050 gsi_insert_before (&gsi
, cond_stmt
, GSI_SAME_STMT
);
3053 /* Remove GIMPLE_OMP_CONTINUE. */
3054 gsi_remove (&gsi
, true);
3056 if (fd
->collapse
> 1 && !gimple_omp_for_combined_p (fd
->for_stmt
))
3057 collapse_bb
= extract_omp_for_update_vars (fd
, cont_bb
, l1_bb
);
3059 /* Emit code to get the next parallel iteration in L2_BB. */
3060 gsi
= gsi_start_bb (l2_bb
);
3062 t
= build_call_expr (builtin_decl_explicit (next_fn
), 2,
3063 build_fold_addr_expr (istart0
),
3064 build_fold_addr_expr (iend0
));
3065 t
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
3066 false, GSI_CONTINUE_LINKING
);
3067 if (TREE_TYPE (t
) != boolean_type_node
)
3068 t
= fold_build2 (NE_EXPR
, boolean_type_node
,
3069 t
, build_int_cst (TREE_TYPE (t
), 0));
3070 gcond
*cond_stmt
= gimple_build_cond_empty (t
);
3071 gsi_insert_after (&gsi
, cond_stmt
, GSI_CONTINUE_LINKING
);
3074 /* Add the loop cleanup function. */
3075 gsi
= gsi_last_nondebug_bb (exit_bb
);
3076 if (gimple_omp_return_nowait_p (gsi_stmt (gsi
)))
3077 t
= builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT
);
3078 else if (gimple_omp_return_lhs (gsi_stmt (gsi
)))
3079 t
= builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL
);
3081 t
= builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END
);
3082 gcall
*call_stmt
= gimple_build_call (t
, 0);
3083 if (gimple_omp_return_lhs (gsi_stmt (gsi
)))
3084 gimple_call_set_lhs (call_stmt
, gimple_omp_return_lhs (gsi_stmt (gsi
)));
3085 gsi_insert_after (&gsi
, call_stmt
, GSI_SAME_STMT
);
3088 tree arr
= counts
[fd
->ordered
];
3089 tree clobber
= build_constructor (TREE_TYPE (arr
), NULL
);
3090 TREE_THIS_VOLATILE (clobber
) = 1;
3091 gsi_insert_after (&gsi
, gimple_build_assign (arr
, clobber
),
3094 gsi_remove (&gsi
, true);
3096 /* Connect the new blocks. */
3097 find_edge (entry_bb
, l0_bb
)->flags
= EDGE_TRUE_VALUE
;
3098 find_edge (entry_bb
, l3_bb
)->flags
= EDGE_FALSE_VALUE
;
3104 e
= find_edge (cont_bb
, l3_bb
);
3105 ne
= make_edge (l2_bb
, l3_bb
, EDGE_FALSE_VALUE
);
3107 phis
= phi_nodes (l3_bb
);
3108 for (gsi
= gsi_start (phis
); !gsi_end_p (gsi
); gsi_next (&gsi
))
3110 gimple
*phi
= gsi_stmt (gsi
);
3111 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi
, ne
),
3112 PHI_ARG_DEF_FROM_EDGE (phi
, e
));
3116 make_edge (cont_bb
, l2_bb
, EDGE_FALSE_VALUE
);
3117 e
= find_edge (cont_bb
, l1_bb
);
3120 e
= BRANCH_EDGE (cont_bb
);
3121 gcc_assert (single_succ (e
->dest
) == l1_bb
);
3123 if (gimple_omp_for_combined_p (fd
->for_stmt
))
3128 else if (fd
->collapse
> 1)
3131 e
= make_edge (cont_bb
, collapse_bb
, EDGE_TRUE_VALUE
);
3134 e
->flags
= EDGE_TRUE_VALUE
;
3137 e
->probability
= profile_probability::guessed_always ().apply_scale (7, 8);
3138 find_edge (cont_bb
, l2_bb
)->probability
= e
->probability
.invert ();
3142 e
= find_edge (cont_bb
, l2_bb
);
3143 e
->flags
= EDGE_FALLTHRU
;
3145 make_edge (l2_bb
, l0_bb
, EDGE_TRUE_VALUE
);
3147 if (gimple_in_ssa_p (cfun
))
3149 /* Add phis to the outer loop that connect to the phis in the inner,
3150 original loop, and move the loop entry value of the inner phi to
3151 the loop entry value of the outer phi. */
3153 for (psi
= gsi_start_phis (l3_bb
); !gsi_end_p (psi
); gsi_next (&psi
))
3155 source_location locus
;
3157 gphi
*exit_phi
= psi
.phi ();
3159 edge l2_to_l3
= find_edge (l2_bb
, l3_bb
);
3160 tree exit_res
= PHI_ARG_DEF_FROM_EDGE (exit_phi
, l2_to_l3
);
3162 basic_block latch
= BRANCH_EDGE (cont_bb
)->dest
;
3163 edge latch_to_l1
= find_edge (latch
, l1_bb
);
3165 = find_phi_with_arg_on_edge (exit_res
, latch_to_l1
);
3167 tree t
= gimple_phi_result (exit_phi
);
3168 tree new_res
= copy_ssa_name (t
, NULL
);
3169 nphi
= create_phi_node (new_res
, l0_bb
);
3171 edge l0_to_l1
= find_edge (l0_bb
, l1_bb
);
3172 t
= PHI_ARG_DEF_FROM_EDGE (inner_phi
, l0_to_l1
);
3173 locus
= gimple_phi_arg_location_from_edge (inner_phi
, l0_to_l1
);
3174 edge entry_to_l0
= find_edge (entry_bb
, l0_bb
);
3175 add_phi_arg (nphi
, t
, entry_to_l0
, locus
);
3177 edge l2_to_l0
= find_edge (l2_bb
, l0_bb
);
3178 add_phi_arg (nphi
, exit_res
, l2_to_l0
, UNKNOWN_LOCATION
);
3180 add_phi_arg (inner_phi
, new_res
, l0_to_l1
, UNKNOWN_LOCATION
);
3184 set_immediate_dominator (CDI_DOMINATORS
, l2_bb
,
3185 recompute_dominator (CDI_DOMINATORS
, l2_bb
));
3186 set_immediate_dominator (CDI_DOMINATORS
, l3_bb
,
3187 recompute_dominator (CDI_DOMINATORS
, l3_bb
));
3188 set_immediate_dominator (CDI_DOMINATORS
, l0_bb
,
3189 recompute_dominator (CDI_DOMINATORS
, l0_bb
));
3190 set_immediate_dominator (CDI_DOMINATORS
, l1_bb
,
3191 recompute_dominator (CDI_DOMINATORS
, l1_bb
));
3193 /* We enter expand_omp_for_generic with a loop. This original loop may
3194 have its own loop struct, or it may be part of an outer loop struct
3195 (which may be the fake loop). */
3196 struct loop
*outer_loop
= entry_bb
->loop_father
;
3197 bool orig_loop_has_loop_struct
= l1_bb
->loop_father
!= outer_loop
;
3199 add_bb_to_loop (l2_bb
, outer_loop
);
3201 /* We've added a new loop around the original loop. Allocate the
3202 corresponding loop struct. */
3203 struct loop
*new_loop
= alloc_loop ();
3204 new_loop
->header
= l0_bb
;
3205 new_loop
->latch
= l2_bb
;
3206 add_loop (new_loop
, outer_loop
);
3208 /* Allocate a loop structure for the original loop unless we already
3210 if (!orig_loop_has_loop_struct
3211 && !gimple_omp_for_combined_p (fd
->for_stmt
))
3213 struct loop
*orig_loop
= alloc_loop ();
3214 orig_loop
->header
= l1_bb
;
3215 /* The loop may have multiple latches. */
3216 add_loop (orig_loop
, new_loop
);
3221 /* A subroutine of expand_omp_for. Generate code for a parallel
3222 loop with static schedule and no specified chunk size. Given
3225 for (V = N1; V cond N2; V += STEP) BODY;
3227 where COND is "<" or ">", we generate pseudocode
3229 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
3234 if ((__typeof (V)) -1 > 0 && cond is >)
3235 n = -(adj + N2 - N1) / -STEP;
3237 n = (adj + N2 - N1) / STEP;
3240 if (threadid < tt) goto L3; else goto L4;
3245 s0 = q * threadid + tt;
3248 if (s0 >= e0) goto L2; else goto L0;
3254 if (V cond e) goto L1;
3259 expand_omp_for_static_nochunk (struct omp_region
*region
,
3260 struct omp_for_data
*fd
,
3263 tree n
, q
, s0
, e0
, e
, t
, tt
, nthreads
, threadid
;
3264 tree type
, itype
, vmain
, vback
;
3265 basic_block entry_bb
, second_bb
, third_bb
, exit_bb
, seq_start_bb
;
3266 basic_block body_bb
, cont_bb
, collapse_bb
= NULL
;
3268 gimple_stmt_iterator gsi
;
3270 bool broken_loop
= region
->cont
== NULL
;
3271 tree
*counts
= NULL
;
3274 itype
= type
= TREE_TYPE (fd
->loop
.v
);
3275 if (POINTER_TYPE_P (type
))
3276 itype
= signed_type_for (type
);
3278 entry_bb
= region
->entry
;
3279 cont_bb
= region
->cont
;
3280 gcc_assert (EDGE_COUNT (entry_bb
->succs
) == 2);
3281 fin_bb
= BRANCH_EDGE (entry_bb
)->dest
;
3282 gcc_assert (broken_loop
3283 || (fin_bb
== FALLTHRU_EDGE (cont_bb
)->dest
));
3284 seq_start_bb
= split_edge (FALLTHRU_EDGE (entry_bb
));
3285 body_bb
= single_succ (seq_start_bb
);
3288 gcc_assert (BRANCH_EDGE (cont_bb
)->dest
== body_bb
3289 || single_succ (BRANCH_EDGE (cont_bb
)->dest
) == body_bb
);
3290 gcc_assert (EDGE_COUNT (cont_bb
->succs
) == 2);
3292 exit_bb
= region
->exit
;
3294 /* Iteration space partitioning goes in ENTRY_BB. */
3295 gsi
= gsi_last_nondebug_bb (entry_bb
);
3296 gcc_assert (gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_FOR
);
3298 if (fd
->collapse
> 1)
3300 int first_zero_iter
= -1, dummy
= -1;
3301 basic_block l2_dom_bb
= NULL
, dummy_bb
= NULL
;
3303 counts
= XALLOCAVEC (tree
, fd
->collapse
);
3304 expand_omp_for_init_counts (fd
, &gsi
, entry_bb
, counts
,
3305 fin_bb
, first_zero_iter
,
3306 dummy_bb
, dummy
, l2_dom_bb
);
3309 else if (gimple_omp_for_combined_into_p (fd
->for_stmt
))
3310 t
= integer_one_node
;
3312 t
= fold_binary (fd
->loop
.cond_code
, boolean_type_node
,
3313 fold_convert (type
, fd
->loop
.n1
),
3314 fold_convert (type
, fd
->loop
.n2
));
3315 if (fd
->collapse
== 1
3316 && TYPE_UNSIGNED (type
)
3317 && (t
== NULL_TREE
|| !integer_onep (t
)))
3319 n1
= fold_convert (type
, unshare_expr (fd
->loop
.n1
));
3320 n1
= force_gimple_operand_gsi (&gsi
, n1
, true, NULL_TREE
,
3321 true, GSI_SAME_STMT
);
3322 n2
= fold_convert (type
, unshare_expr (fd
->loop
.n2
));
3323 n2
= force_gimple_operand_gsi (&gsi
, n2
, true, NULL_TREE
,
3324 true, GSI_SAME_STMT
);
3325 gcond
*cond_stmt
= gimple_build_cond (fd
->loop
.cond_code
, n1
, n2
,
3326 NULL_TREE
, NULL_TREE
);
3327 gsi_insert_before (&gsi
, cond_stmt
, GSI_SAME_STMT
);
3328 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt
),
3329 expand_omp_regimplify_p
, NULL
, NULL
)
3330 || walk_tree (gimple_cond_rhs_ptr (cond_stmt
),
3331 expand_omp_regimplify_p
, NULL
, NULL
))
3333 gsi
= gsi_for_stmt (cond_stmt
);
3334 gimple_regimplify_operands (cond_stmt
, &gsi
);
3336 ep
= split_block (entry_bb
, cond_stmt
);
3337 ep
->flags
= EDGE_TRUE_VALUE
;
3338 entry_bb
= ep
->dest
;
3339 ep
->probability
= profile_probability::very_likely ();
3340 ep
= make_edge (ep
->src
, fin_bb
, EDGE_FALSE_VALUE
);
3341 ep
->probability
= profile_probability::very_unlikely ();
3342 if (gimple_in_ssa_p (cfun
))
3344 int dest_idx
= find_edge (entry_bb
, fin_bb
)->dest_idx
;
3345 for (gphi_iterator gpi
= gsi_start_phis (fin_bb
);
3346 !gsi_end_p (gpi
); gsi_next (&gpi
))
3348 gphi
*phi
= gpi
.phi ();
3349 add_phi_arg (phi
, gimple_phi_arg_def (phi
, dest_idx
),
3350 ep
, UNKNOWN_LOCATION
);
3353 gsi
= gsi_last_bb (entry_bb
);
3356 switch (gimple_omp_for_kind (fd
->for_stmt
))
3358 case GF_OMP_FOR_KIND_FOR
:
3359 nthreads
= builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS
);
3360 threadid
= builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM
);
3362 case GF_OMP_FOR_KIND_DISTRIBUTE
:
3363 nthreads
= builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS
);
3364 threadid
= builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM
);
3369 nthreads
= build_call_expr (nthreads
, 0);
3370 nthreads
= fold_convert (itype
, nthreads
);
3371 nthreads
= force_gimple_operand_gsi (&gsi
, nthreads
, true, NULL_TREE
,
3372 true, GSI_SAME_STMT
);
3373 threadid
= build_call_expr (threadid
, 0);
3374 threadid
= fold_convert (itype
, threadid
);
3375 threadid
= force_gimple_operand_gsi (&gsi
, threadid
, true, NULL_TREE
,
3376 true, GSI_SAME_STMT
);
3380 step
= fd
->loop
.step
;
3381 if (gimple_omp_for_combined_into_p (fd
->for_stmt
))
3383 tree innerc
= omp_find_clause (gimple_omp_for_clauses (fd
->for_stmt
),
3384 OMP_CLAUSE__LOOPTEMP_
);
3385 gcc_assert (innerc
);
3386 n1
= OMP_CLAUSE_DECL (innerc
);
3387 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
3388 OMP_CLAUSE__LOOPTEMP_
);
3389 gcc_assert (innerc
);
3390 n2
= OMP_CLAUSE_DECL (innerc
);
3392 n1
= force_gimple_operand_gsi (&gsi
, fold_convert (type
, n1
),
3393 true, NULL_TREE
, true, GSI_SAME_STMT
);
3394 n2
= force_gimple_operand_gsi (&gsi
, fold_convert (itype
, n2
),
3395 true, NULL_TREE
, true, GSI_SAME_STMT
);
3396 step
= force_gimple_operand_gsi (&gsi
, fold_convert (itype
, step
),
3397 true, NULL_TREE
, true, GSI_SAME_STMT
);
3399 t
= build_int_cst (itype
, (fd
->loop
.cond_code
== LT_EXPR
? -1 : 1));
3400 t
= fold_build2 (PLUS_EXPR
, itype
, step
, t
);
3401 t
= fold_build2 (PLUS_EXPR
, itype
, t
, n2
);
3402 t
= fold_build2 (MINUS_EXPR
, itype
, t
, fold_convert (itype
, n1
));
3403 if (TYPE_UNSIGNED (itype
) && fd
->loop
.cond_code
== GT_EXPR
)
3404 t
= fold_build2 (TRUNC_DIV_EXPR
, itype
,
3405 fold_build1 (NEGATE_EXPR
, itype
, t
),
3406 fold_build1 (NEGATE_EXPR
, itype
, step
));
3408 t
= fold_build2 (TRUNC_DIV_EXPR
, itype
, t
, step
);
3409 t
= fold_convert (itype
, t
);
3410 n
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
, true, GSI_SAME_STMT
);
3412 q
= create_tmp_reg (itype
, "q");
3413 t
= fold_build2 (TRUNC_DIV_EXPR
, itype
, n
, nthreads
);
3414 t
= force_gimple_operand_gsi (&gsi
, t
, false, NULL_TREE
, true, GSI_SAME_STMT
);
3415 gsi_insert_before (&gsi
, gimple_build_assign (q
, t
), GSI_SAME_STMT
);
3417 tt
= create_tmp_reg (itype
, "tt");
3418 t
= fold_build2 (TRUNC_MOD_EXPR
, itype
, n
, nthreads
);
3419 t
= force_gimple_operand_gsi (&gsi
, t
, false, NULL_TREE
, true, GSI_SAME_STMT
);
3420 gsi_insert_before (&gsi
, gimple_build_assign (tt
, t
), GSI_SAME_STMT
);
3422 t
= build2 (LT_EXPR
, boolean_type_node
, threadid
, tt
);
3423 gcond
*cond_stmt
= gimple_build_cond_empty (t
);
3424 gsi_insert_before (&gsi
, cond_stmt
, GSI_SAME_STMT
);
3426 second_bb
= split_block (entry_bb
, cond_stmt
)->dest
;
3427 gsi
= gsi_last_nondebug_bb (second_bb
);
3428 gcc_assert (gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_FOR
);
3430 gsi_insert_before (&gsi
, gimple_build_assign (tt
, build_int_cst (itype
, 0)),
3432 gassign
*assign_stmt
3433 = gimple_build_assign (q
, PLUS_EXPR
, q
, build_int_cst (itype
, 1));
3434 gsi_insert_before (&gsi
, assign_stmt
, GSI_SAME_STMT
);
3436 third_bb
= split_block (second_bb
, assign_stmt
)->dest
;
3437 gsi
= gsi_last_nondebug_bb (third_bb
);
3438 gcc_assert (gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_FOR
);
3440 t
= build2 (MULT_EXPR
, itype
, q
, threadid
);
3441 t
= build2 (PLUS_EXPR
, itype
, t
, tt
);
3442 s0
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
, true, GSI_SAME_STMT
);
3444 t
= fold_build2 (PLUS_EXPR
, itype
, s0
, q
);
3445 e0
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
, true, GSI_SAME_STMT
);
3447 t
= build2 (GE_EXPR
, boolean_type_node
, s0
, e0
);
3448 gsi_insert_before (&gsi
, gimple_build_cond_empty (t
), GSI_SAME_STMT
);
3450 /* Remove the GIMPLE_OMP_FOR statement. */
3451 gsi_remove (&gsi
, true);
3453 /* Setup code for sequential iteration goes in SEQ_START_BB. */
3454 gsi
= gsi_start_bb (seq_start_bb
);
3456 tree startvar
= fd
->loop
.v
;
3457 tree endvar
= NULL_TREE
;
3459 if (gimple_omp_for_combined_p (fd
->for_stmt
))
3461 tree clauses
= gimple_code (inner_stmt
) == GIMPLE_OMP_PARALLEL
3462 ? gimple_omp_parallel_clauses (inner_stmt
)
3463 : gimple_omp_for_clauses (inner_stmt
);
3464 tree innerc
= omp_find_clause (clauses
, OMP_CLAUSE__LOOPTEMP_
);
3465 gcc_assert (innerc
);
3466 startvar
= OMP_CLAUSE_DECL (innerc
);
3467 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
3468 OMP_CLAUSE__LOOPTEMP_
);
3469 gcc_assert (innerc
);
3470 endvar
= OMP_CLAUSE_DECL (innerc
);
3471 if (fd
->collapse
> 1 && TREE_CODE (fd
->loop
.n2
) != INTEGER_CST
3472 && gimple_omp_for_kind (fd
->for_stmt
) == GF_OMP_FOR_KIND_DISTRIBUTE
)
3475 for (i
= 1; i
< fd
->collapse
; i
++)
3477 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
3478 OMP_CLAUSE__LOOPTEMP_
);
3479 gcc_assert (innerc
);
3481 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
3482 OMP_CLAUSE__LOOPTEMP_
);
3485 /* If needed (distribute parallel for with lastprivate),
3486 propagate down the total number of iterations. */
3487 tree t
= fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc
)),
3489 t
= force_gimple_operand_gsi (&gsi
, t
, false, NULL_TREE
, false,
3490 GSI_CONTINUE_LINKING
);
3491 assign_stmt
= gimple_build_assign (OMP_CLAUSE_DECL (innerc
), t
);
3492 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
3496 t
= fold_convert (itype
, s0
);
3497 t
= fold_build2 (MULT_EXPR
, itype
, t
, step
);
3498 if (POINTER_TYPE_P (type
))
3499 t
= fold_build_pointer_plus (n1
, t
);
3501 t
= fold_build2 (PLUS_EXPR
, type
, t
, n1
);
3502 t
= fold_convert (TREE_TYPE (startvar
), t
);
3503 t
= force_gimple_operand_gsi (&gsi
, t
,
3505 && TREE_ADDRESSABLE (startvar
),
3506 NULL_TREE
, false, GSI_CONTINUE_LINKING
);
3507 assign_stmt
= gimple_build_assign (startvar
, t
);
3508 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
3510 t
= fold_convert (itype
, e0
);
3511 t
= fold_build2 (MULT_EXPR
, itype
, t
, step
);
3512 if (POINTER_TYPE_P (type
))
3513 t
= fold_build_pointer_plus (n1
, t
);
3515 t
= fold_build2 (PLUS_EXPR
, type
, t
, n1
);
3516 t
= fold_convert (TREE_TYPE (startvar
), t
);
3517 e
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
3518 false, GSI_CONTINUE_LINKING
);
3521 assign_stmt
= gimple_build_assign (endvar
, e
);
3522 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
3523 if (useless_type_conversion_p (TREE_TYPE (fd
->loop
.v
), TREE_TYPE (e
)))
3524 assign_stmt
= gimple_build_assign (fd
->loop
.v
, e
);
3526 assign_stmt
= gimple_build_assign (fd
->loop
.v
, NOP_EXPR
, e
);
3527 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
3529 /* Handle linear clause adjustments. */
3530 tree itercnt
= NULL_TREE
;
3531 if (gimple_omp_for_kind (fd
->for_stmt
) == GF_OMP_FOR_KIND_FOR
)
3532 for (tree c
= gimple_omp_for_clauses (fd
->for_stmt
);
3533 c
; c
= OMP_CLAUSE_CHAIN (c
))
3534 if (OMP_CLAUSE_CODE (c
) == OMP_CLAUSE_LINEAR
3535 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c
))
3537 tree d
= OMP_CLAUSE_DECL (c
);
3538 bool is_ref
= omp_is_reference (d
);
3539 tree t
= d
, a
, dest
;
3541 t
= build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c
), t
);
3542 if (itercnt
== NULL_TREE
)
3544 if (gimple_omp_for_combined_into_p (fd
->for_stmt
))
3546 itercnt
= fold_build2 (MINUS_EXPR
, itype
,
3547 fold_convert (itype
, n1
),
3548 fold_convert (itype
, fd
->loop
.n1
));
3549 itercnt
= fold_build2 (EXACT_DIV_EXPR
, itype
, itercnt
, step
);
3550 itercnt
= fold_build2 (PLUS_EXPR
, itype
, itercnt
, s0
);
3551 itercnt
= force_gimple_operand_gsi (&gsi
, itercnt
, true,
3553 GSI_CONTINUE_LINKING
);
3558 tree type
= TREE_TYPE (t
);
3559 if (POINTER_TYPE_P (type
))
3561 a
= fold_build2 (MULT_EXPR
, type
,
3562 fold_convert (type
, itercnt
),
3563 fold_convert (type
, OMP_CLAUSE_LINEAR_STEP (c
)));
3564 dest
= unshare_expr (t
);
3565 t
= fold_build2 (type
== TREE_TYPE (t
) ? PLUS_EXPR
3566 : POINTER_PLUS_EXPR
, TREE_TYPE (t
), t
, a
);
3567 t
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
3568 false, GSI_CONTINUE_LINKING
);
3569 assign_stmt
= gimple_build_assign (dest
, t
);
3570 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
3572 if (fd
->collapse
> 1)
3573 expand_omp_for_init_vars (fd
, &gsi
, counts
, inner_stmt
, startvar
);
3577 /* The code controlling the sequential loop replaces the
3578 GIMPLE_OMP_CONTINUE. */
3579 gsi
= gsi_last_nondebug_bb (cont_bb
);
3580 gomp_continue
*cont_stmt
= as_a
<gomp_continue
*> (gsi_stmt (gsi
));
3581 gcc_assert (gimple_code (cont_stmt
) == GIMPLE_OMP_CONTINUE
);
3582 vmain
= gimple_omp_continue_control_use (cont_stmt
);
3583 vback
= gimple_omp_continue_control_def (cont_stmt
);
3585 if (!gimple_omp_for_combined_p (fd
->for_stmt
))
3587 if (POINTER_TYPE_P (type
))
3588 t
= fold_build_pointer_plus (vmain
, step
);
3590 t
= fold_build2 (PLUS_EXPR
, type
, vmain
, step
);
3591 t
= force_gimple_operand_gsi (&gsi
, t
,
3593 && TREE_ADDRESSABLE (vback
),
3594 NULL_TREE
, true, GSI_SAME_STMT
);
3595 assign_stmt
= gimple_build_assign (vback
, t
);
3596 gsi_insert_before (&gsi
, assign_stmt
, GSI_SAME_STMT
);
3598 t
= build2 (fd
->loop
.cond_code
, boolean_type_node
,
3599 DECL_P (vback
) && TREE_ADDRESSABLE (vback
)
3601 gsi_insert_before (&gsi
, gimple_build_cond_empty (t
), GSI_SAME_STMT
);
3604 /* Remove the GIMPLE_OMP_CONTINUE statement. */
3605 gsi_remove (&gsi
, true);
3607 if (fd
->collapse
> 1 && !gimple_omp_for_combined_p (fd
->for_stmt
))
3608 collapse_bb
= extract_omp_for_update_vars (fd
, cont_bb
, body_bb
);
3611 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
3612 gsi
= gsi_last_nondebug_bb (exit_bb
);
3613 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi
)))
3615 t
= gimple_omp_return_lhs (gsi_stmt (gsi
));
3616 gsi_insert_after (&gsi
, omp_build_barrier (t
), GSI_SAME_STMT
);
3618 gsi_remove (&gsi
, true);
3620 /* Connect all the blocks. */
3621 ep
= make_edge (entry_bb
, third_bb
, EDGE_FALSE_VALUE
);
3622 ep
->probability
= profile_probability::guessed_always ().apply_scale (3, 4);
3623 ep
= find_edge (entry_bb
, second_bb
);
3624 ep
->flags
= EDGE_TRUE_VALUE
;
3625 ep
->probability
= profile_probability::guessed_always ().apply_scale (1, 4);
3626 find_edge (third_bb
, seq_start_bb
)->flags
= EDGE_FALSE_VALUE
;
3627 find_edge (third_bb
, fin_bb
)->flags
= EDGE_TRUE_VALUE
;
3631 ep
= find_edge (cont_bb
, body_bb
);
3634 ep
= BRANCH_EDGE (cont_bb
);
3635 gcc_assert (single_succ (ep
->dest
) == body_bb
);
3637 if (gimple_omp_for_combined_p (fd
->for_stmt
))
3642 else if (fd
->collapse
> 1)
3645 ep
= make_edge (cont_bb
, collapse_bb
, EDGE_TRUE_VALUE
);
3648 ep
->flags
= EDGE_TRUE_VALUE
;
3649 find_edge (cont_bb
, fin_bb
)->flags
3650 = ep
? EDGE_FALSE_VALUE
: EDGE_FALLTHRU
;
3653 set_immediate_dominator (CDI_DOMINATORS
, second_bb
, entry_bb
);
3654 set_immediate_dominator (CDI_DOMINATORS
, third_bb
, entry_bb
);
3655 set_immediate_dominator (CDI_DOMINATORS
, seq_start_bb
, third_bb
);
3657 set_immediate_dominator (CDI_DOMINATORS
, body_bb
,
3658 recompute_dominator (CDI_DOMINATORS
, body_bb
));
3659 set_immediate_dominator (CDI_DOMINATORS
, fin_bb
,
3660 recompute_dominator (CDI_DOMINATORS
, fin_bb
));
3662 struct loop
*loop
= body_bb
->loop_father
;
3663 if (loop
!= entry_bb
->loop_father
)
3665 gcc_assert (broken_loop
|| loop
->header
== body_bb
);
3666 gcc_assert (broken_loop
3667 || loop
->latch
== region
->cont
3668 || single_pred (loop
->latch
) == region
->cont
);
3672 if (!broken_loop
&& !gimple_omp_for_combined_p (fd
->for_stmt
))
3674 loop
= alloc_loop ();
3675 loop
->header
= body_bb
;
3676 if (collapse_bb
== NULL
)
3677 loop
->latch
= cont_bb
;
3678 add_loop (loop
, body_bb
->loop_father
);
3682 /* Return phi in E->DEST with ARG on edge E. */
3685 find_phi_with_arg_on_edge (tree arg
, edge e
)
3687 basic_block bb
= e
->dest
;
3689 for (gphi_iterator gpi
= gsi_start_phis (bb
);
3693 gphi
*phi
= gpi
.phi ();
3694 if (PHI_ARG_DEF_FROM_EDGE (phi
, e
) == arg
)
3701 /* A subroutine of expand_omp_for. Generate code for a parallel
3702 loop with static schedule and a specified chunk size. Given
3705 for (V = N1; V cond N2; V += STEP) BODY;
3707 where COND is "<" or ">", we generate pseudocode
3709 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
3714 if ((__typeof (V)) -1 > 0 && cond is >)
3715 n = -(adj + N2 - N1) / -STEP;
3717 n = (adj + N2 - N1) / STEP;
3719 V = threadid * CHUNK * STEP + N1; -- this extra definition of V is
3720 here so that V is defined
3721 if the loop is not entered
3723 s0 = (trip * nthreads + threadid) * CHUNK;
3724 e0 = min (s0 + CHUNK, n);
3725 if (s0 < n) goto L1; else goto L4;
3732 if (V cond e) goto L2; else goto L3;
3740 expand_omp_for_static_chunk (struct omp_region
*region
,
3741 struct omp_for_data
*fd
, gimple
*inner_stmt
)
3743 tree n
, s0
, e0
, e
, t
;
3744 tree trip_var
, trip_init
, trip_main
, trip_back
, nthreads
, threadid
;
3745 tree type
, itype
, vmain
, vback
, vextra
;
3746 basic_block entry_bb
, exit_bb
, body_bb
, seq_start_bb
, iter_part_bb
;
3747 basic_block trip_update_bb
= NULL
, cont_bb
, collapse_bb
= NULL
, fin_bb
;
3748 gimple_stmt_iterator gsi
;
3750 bool broken_loop
= region
->cont
== NULL
;
3751 tree
*counts
= NULL
;
3754 itype
= type
= TREE_TYPE (fd
->loop
.v
);
3755 if (POINTER_TYPE_P (type
))
3756 itype
= signed_type_for (type
);
3758 entry_bb
= region
->entry
;
3759 se
= split_block (entry_bb
, last_stmt (entry_bb
));
3761 iter_part_bb
= se
->dest
;
3762 cont_bb
= region
->cont
;
3763 gcc_assert (EDGE_COUNT (iter_part_bb
->succs
) == 2);
3764 fin_bb
= BRANCH_EDGE (iter_part_bb
)->dest
;
3765 gcc_assert (broken_loop
3766 || fin_bb
== FALLTHRU_EDGE (cont_bb
)->dest
);
3767 seq_start_bb
= split_edge (FALLTHRU_EDGE (iter_part_bb
));
3768 body_bb
= single_succ (seq_start_bb
);
3771 gcc_assert (BRANCH_EDGE (cont_bb
)->dest
== body_bb
3772 || single_succ (BRANCH_EDGE (cont_bb
)->dest
) == body_bb
);
3773 gcc_assert (EDGE_COUNT (cont_bb
->succs
) == 2);
3774 trip_update_bb
= split_edge (FALLTHRU_EDGE (cont_bb
));
3776 exit_bb
= region
->exit
;
3778 /* Trip and adjustment setup goes in ENTRY_BB. */
3779 gsi
= gsi_last_nondebug_bb (entry_bb
);
3780 gcc_assert (gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_FOR
);
3782 if (fd
->collapse
> 1)
3784 int first_zero_iter
= -1, dummy
= -1;
3785 basic_block l2_dom_bb
= NULL
, dummy_bb
= NULL
;
3787 counts
= XALLOCAVEC (tree
, fd
->collapse
);
3788 expand_omp_for_init_counts (fd
, &gsi
, entry_bb
, counts
,
3789 fin_bb
, first_zero_iter
,
3790 dummy_bb
, dummy
, l2_dom_bb
);
3793 else if (gimple_omp_for_combined_into_p (fd
->for_stmt
))
3794 t
= integer_one_node
;
3796 t
= fold_binary (fd
->loop
.cond_code
, boolean_type_node
,
3797 fold_convert (type
, fd
->loop
.n1
),
3798 fold_convert (type
, fd
->loop
.n2
));
3799 if (fd
->collapse
== 1
3800 && TYPE_UNSIGNED (type
)
3801 && (t
== NULL_TREE
|| !integer_onep (t
)))
3803 n1
= fold_convert (type
, unshare_expr (fd
->loop
.n1
));
3804 n1
= force_gimple_operand_gsi (&gsi
, n1
, true, NULL_TREE
,
3805 true, GSI_SAME_STMT
);
3806 n2
= fold_convert (type
, unshare_expr (fd
->loop
.n2
));
3807 n2
= force_gimple_operand_gsi (&gsi
, n2
, true, NULL_TREE
,
3808 true, GSI_SAME_STMT
);
3809 gcond
*cond_stmt
= gimple_build_cond (fd
->loop
.cond_code
, n1
, n2
,
3810 NULL_TREE
, NULL_TREE
);
3811 gsi_insert_before (&gsi
, cond_stmt
, GSI_SAME_STMT
);
3812 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt
),
3813 expand_omp_regimplify_p
, NULL
, NULL
)
3814 || walk_tree (gimple_cond_rhs_ptr (cond_stmt
),
3815 expand_omp_regimplify_p
, NULL
, NULL
))
3817 gsi
= gsi_for_stmt (cond_stmt
);
3818 gimple_regimplify_operands (cond_stmt
, &gsi
);
3820 se
= split_block (entry_bb
, cond_stmt
);
3821 se
->flags
= EDGE_TRUE_VALUE
;
3822 entry_bb
= se
->dest
;
3823 se
->probability
= profile_probability::very_likely ();
3824 se
= make_edge (se
->src
, fin_bb
, EDGE_FALSE_VALUE
);
3825 se
->probability
= profile_probability::very_unlikely ();
3826 if (gimple_in_ssa_p (cfun
))
3828 int dest_idx
= find_edge (iter_part_bb
, fin_bb
)->dest_idx
;
3829 for (gphi_iterator gpi
= gsi_start_phis (fin_bb
);
3830 !gsi_end_p (gpi
); gsi_next (&gpi
))
3832 gphi
*phi
= gpi
.phi ();
3833 add_phi_arg (phi
, gimple_phi_arg_def (phi
, dest_idx
),
3834 se
, UNKNOWN_LOCATION
);
3837 gsi
= gsi_last_bb (entry_bb
);
3840 switch (gimple_omp_for_kind (fd
->for_stmt
))
3842 case GF_OMP_FOR_KIND_FOR
:
3843 nthreads
= builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS
);
3844 threadid
= builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM
);
3846 case GF_OMP_FOR_KIND_DISTRIBUTE
:
3847 nthreads
= builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS
);
3848 threadid
= builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM
);
3853 nthreads
= build_call_expr (nthreads
, 0);
3854 nthreads
= fold_convert (itype
, nthreads
);
3855 nthreads
= force_gimple_operand_gsi (&gsi
, nthreads
, true, NULL_TREE
,
3856 true, GSI_SAME_STMT
);
3857 threadid
= build_call_expr (threadid
, 0);
3858 threadid
= fold_convert (itype
, threadid
);
3859 threadid
= force_gimple_operand_gsi (&gsi
, threadid
, true, NULL_TREE
,
3860 true, GSI_SAME_STMT
);
3864 step
= fd
->loop
.step
;
3865 if (gimple_omp_for_combined_into_p (fd
->for_stmt
))
3867 tree innerc
= omp_find_clause (gimple_omp_for_clauses (fd
->for_stmt
),
3868 OMP_CLAUSE__LOOPTEMP_
);
3869 gcc_assert (innerc
);
3870 n1
= OMP_CLAUSE_DECL (innerc
);
3871 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
3872 OMP_CLAUSE__LOOPTEMP_
);
3873 gcc_assert (innerc
);
3874 n2
= OMP_CLAUSE_DECL (innerc
);
3876 n1
= force_gimple_operand_gsi (&gsi
, fold_convert (type
, n1
),
3877 true, NULL_TREE
, true, GSI_SAME_STMT
);
3878 n2
= force_gimple_operand_gsi (&gsi
, fold_convert (itype
, n2
),
3879 true, NULL_TREE
, true, GSI_SAME_STMT
);
3880 step
= force_gimple_operand_gsi (&gsi
, fold_convert (itype
, step
),
3881 true, NULL_TREE
, true, GSI_SAME_STMT
);
3882 tree chunk_size
= fold_convert (itype
, fd
->chunk_size
);
3883 chunk_size
= omp_adjust_chunk_size (chunk_size
, fd
->simd_schedule
);
3885 = force_gimple_operand_gsi (&gsi
, chunk_size
, true, NULL_TREE
, true,
3888 t
= build_int_cst (itype
, (fd
->loop
.cond_code
== LT_EXPR
? -1 : 1));
3889 t
= fold_build2 (PLUS_EXPR
, itype
, step
, t
);
3890 t
= fold_build2 (PLUS_EXPR
, itype
, t
, n2
);
3891 t
= fold_build2 (MINUS_EXPR
, itype
, t
, fold_convert (itype
, n1
));
3892 if (TYPE_UNSIGNED (itype
) && fd
->loop
.cond_code
== GT_EXPR
)
3893 t
= fold_build2 (TRUNC_DIV_EXPR
, itype
,
3894 fold_build1 (NEGATE_EXPR
, itype
, t
),
3895 fold_build1 (NEGATE_EXPR
, itype
, step
));
3897 t
= fold_build2 (TRUNC_DIV_EXPR
, itype
, t
, step
);
3898 t
= fold_convert (itype
, t
);
3899 n
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
3900 true, GSI_SAME_STMT
);
3902 trip_var
= create_tmp_reg (itype
, ".trip");
3903 if (gimple_in_ssa_p (cfun
))
3905 trip_init
= make_ssa_name (trip_var
);
3906 trip_main
= make_ssa_name (trip_var
);
3907 trip_back
= make_ssa_name (trip_var
);
3911 trip_init
= trip_var
;
3912 trip_main
= trip_var
;
3913 trip_back
= trip_var
;
3916 gassign
*assign_stmt
3917 = gimple_build_assign (trip_init
, build_int_cst (itype
, 0));
3918 gsi_insert_before (&gsi
, assign_stmt
, GSI_SAME_STMT
);
3920 t
= fold_build2 (MULT_EXPR
, itype
, threadid
, chunk_size
);
3921 t
= fold_build2 (MULT_EXPR
, itype
, t
, step
);
3922 if (POINTER_TYPE_P (type
))
3923 t
= fold_build_pointer_plus (n1
, t
);
3925 t
= fold_build2 (PLUS_EXPR
, type
, t
, n1
);
3926 vextra
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
3927 true, GSI_SAME_STMT
);
3929 /* Remove the GIMPLE_OMP_FOR. */
3930 gsi_remove (&gsi
, true);
3932 gimple_stmt_iterator gsif
= gsi
;
3934 /* Iteration space partitioning goes in ITER_PART_BB. */
3935 gsi
= gsi_last_bb (iter_part_bb
);
3937 t
= fold_build2 (MULT_EXPR
, itype
, trip_main
, nthreads
);
3938 t
= fold_build2 (PLUS_EXPR
, itype
, t
, threadid
);
3939 t
= fold_build2 (MULT_EXPR
, itype
, t
, chunk_size
);
3940 s0
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
3941 false, GSI_CONTINUE_LINKING
);
3943 t
= fold_build2 (PLUS_EXPR
, itype
, s0
, chunk_size
);
3944 t
= fold_build2 (MIN_EXPR
, itype
, t
, n
);
3945 e0
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
3946 false, GSI_CONTINUE_LINKING
);
3948 t
= build2 (LT_EXPR
, boolean_type_node
, s0
, n
);
3949 gsi_insert_after (&gsi
, gimple_build_cond_empty (t
), GSI_CONTINUE_LINKING
);
3951 /* Setup code for sequential iteration goes in SEQ_START_BB. */
3952 gsi
= gsi_start_bb (seq_start_bb
);
3954 tree startvar
= fd
->loop
.v
;
3955 tree endvar
= NULL_TREE
;
3957 if (gimple_omp_for_combined_p (fd
->for_stmt
))
3959 tree clauses
= gimple_code (inner_stmt
) == GIMPLE_OMP_PARALLEL
3960 ? gimple_omp_parallel_clauses (inner_stmt
)
3961 : gimple_omp_for_clauses (inner_stmt
);
3962 tree innerc
= omp_find_clause (clauses
, OMP_CLAUSE__LOOPTEMP_
);
3963 gcc_assert (innerc
);
3964 startvar
= OMP_CLAUSE_DECL (innerc
);
3965 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
3966 OMP_CLAUSE__LOOPTEMP_
);
3967 gcc_assert (innerc
);
3968 endvar
= OMP_CLAUSE_DECL (innerc
);
3969 if (fd
->collapse
> 1 && TREE_CODE (fd
->loop
.n2
) != INTEGER_CST
3970 && gimple_omp_for_kind (fd
->for_stmt
) == GF_OMP_FOR_KIND_DISTRIBUTE
)
3973 for (i
= 1; i
< fd
->collapse
; i
++)
3975 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
3976 OMP_CLAUSE__LOOPTEMP_
);
3977 gcc_assert (innerc
);
3979 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
3980 OMP_CLAUSE__LOOPTEMP_
);
3983 /* If needed (distribute parallel for with lastprivate),
3984 propagate down the total number of iterations. */
3985 tree t
= fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc
)),
3987 t
= force_gimple_operand_gsi (&gsi
, t
, false, NULL_TREE
, false,
3988 GSI_CONTINUE_LINKING
);
3989 assign_stmt
= gimple_build_assign (OMP_CLAUSE_DECL (innerc
), t
);
3990 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
3995 t
= fold_convert (itype
, s0
);
3996 t
= fold_build2 (MULT_EXPR
, itype
, t
, step
);
3997 if (POINTER_TYPE_P (type
))
3998 t
= fold_build_pointer_plus (n1
, t
);
4000 t
= fold_build2 (PLUS_EXPR
, type
, t
, n1
);
4001 t
= fold_convert (TREE_TYPE (startvar
), t
);
4002 t
= force_gimple_operand_gsi (&gsi
, t
,
4004 && TREE_ADDRESSABLE (startvar
),
4005 NULL_TREE
, false, GSI_CONTINUE_LINKING
);
4006 assign_stmt
= gimple_build_assign (startvar
, t
);
4007 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
4009 t
= fold_convert (itype
, e0
);
4010 t
= fold_build2 (MULT_EXPR
, itype
, t
, step
);
4011 if (POINTER_TYPE_P (type
))
4012 t
= fold_build_pointer_plus (n1
, t
);
4014 t
= fold_build2 (PLUS_EXPR
, type
, t
, n1
);
4015 t
= fold_convert (TREE_TYPE (startvar
), t
);
4016 e
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
4017 false, GSI_CONTINUE_LINKING
);
4020 assign_stmt
= gimple_build_assign (endvar
, e
);
4021 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
4022 if (useless_type_conversion_p (TREE_TYPE (fd
->loop
.v
), TREE_TYPE (e
)))
4023 assign_stmt
= gimple_build_assign (fd
->loop
.v
, e
);
4025 assign_stmt
= gimple_build_assign (fd
->loop
.v
, NOP_EXPR
, e
);
4026 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
4028 /* Handle linear clause adjustments. */
4029 tree itercnt
= NULL_TREE
, itercntbias
= NULL_TREE
;
4030 if (gimple_omp_for_kind (fd
->for_stmt
) == GF_OMP_FOR_KIND_FOR
)
4031 for (tree c
= gimple_omp_for_clauses (fd
->for_stmt
);
4032 c
; c
= OMP_CLAUSE_CHAIN (c
))
4033 if (OMP_CLAUSE_CODE (c
) == OMP_CLAUSE_LINEAR
4034 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c
))
4036 tree d
= OMP_CLAUSE_DECL (c
);
4037 bool is_ref
= omp_is_reference (d
);
4038 tree t
= d
, a
, dest
;
4040 t
= build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c
), t
);
4041 tree type
= TREE_TYPE (t
);
4042 if (POINTER_TYPE_P (type
))
4044 dest
= unshare_expr (t
);
4045 tree v
= create_tmp_var (TREE_TYPE (t
), NULL
);
4046 expand_omp_build_assign (&gsif
, v
, t
);
4047 if (itercnt
== NULL_TREE
)
4049 if (gimple_omp_for_combined_into_p (fd
->for_stmt
))
4052 = fold_build2 (MINUS_EXPR
, itype
, fold_convert (itype
, n1
),
4053 fold_convert (itype
, fd
->loop
.n1
));
4054 itercntbias
= fold_build2 (EXACT_DIV_EXPR
, itype
,
4057 = force_gimple_operand_gsi (&gsif
, itercntbias
, true,
4060 itercnt
= fold_build2 (PLUS_EXPR
, itype
, itercntbias
, s0
);
4061 itercnt
= force_gimple_operand_gsi (&gsi
, itercnt
, true,
4063 GSI_CONTINUE_LINKING
);
4068 a
= fold_build2 (MULT_EXPR
, type
,
4069 fold_convert (type
, itercnt
),
4070 fold_convert (type
, OMP_CLAUSE_LINEAR_STEP (c
)));
4071 t
= fold_build2 (type
== TREE_TYPE (t
) ? PLUS_EXPR
4072 : POINTER_PLUS_EXPR
, TREE_TYPE (t
), v
, a
);
4073 t
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
4074 false, GSI_CONTINUE_LINKING
);
4075 assign_stmt
= gimple_build_assign (dest
, t
);
4076 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
4078 if (fd
->collapse
> 1)
4079 expand_omp_for_init_vars (fd
, &gsi
, counts
, inner_stmt
, startvar
);
4083 /* The code controlling the sequential loop goes in CONT_BB,
4084 replacing the GIMPLE_OMP_CONTINUE. */
4085 gsi
= gsi_last_nondebug_bb (cont_bb
);
4086 gomp_continue
*cont_stmt
= as_a
<gomp_continue
*> (gsi_stmt (gsi
));
4087 vmain
= gimple_omp_continue_control_use (cont_stmt
);
4088 vback
= gimple_omp_continue_control_def (cont_stmt
);
4090 if (!gimple_omp_for_combined_p (fd
->for_stmt
))
4092 if (POINTER_TYPE_P (type
))
4093 t
= fold_build_pointer_plus (vmain
, step
);
4095 t
= fold_build2 (PLUS_EXPR
, type
, vmain
, step
);
4096 if (DECL_P (vback
) && TREE_ADDRESSABLE (vback
))
4097 t
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
4098 true, GSI_SAME_STMT
);
4099 assign_stmt
= gimple_build_assign (vback
, t
);
4100 gsi_insert_before (&gsi
, assign_stmt
, GSI_SAME_STMT
);
4102 if (tree_int_cst_equal (fd
->chunk_size
, integer_one_node
))
4103 t
= build2 (EQ_EXPR
, boolean_type_node
,
4104 build_int_cst (itype
, 0),
4105 build_int_cst (itype
, 1));
4107 t
= build2 (fd
->loop
.cond_code
, boolean_type_node
,
4108 DECL_P (vback
) && TREE_ADDRESSABLE (vback
)
4110 gsi_insert_before (&gsi
, gimple_build_cond_empty (t
), GSI_SAME_STMT
);
4113 /* Remove GIMPLE_OMP_CONTINUE. */
4114 gsi_remove (&gsi
, true);
4116 if (fd
->collapse
> 1 && !gimple_omp_for_combined_p (fd
->for_stmt
))
4117 collapse_bb
= extract_omp_for_update_vars (fd
, cont_bb
, body_bb
);
4119 /* Trip update code goes into TRIP_UPDATE_BB. */
4120 gsi
= gsi_start_bb (trip_update_bb
);
4122 t
= build_int_cst (itype
, 1);
4123 t
= build2 (PLUS_EXPR
, itype
, trip_main
, t
);
4124 assign_stmt
= gimple_build_assign (trip_back
, t
);
4125 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
4128 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
4129 gsi
= gsi_last_nondebug_bb (exit_bb
);
4130 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi
)))
4132 t
= gimple_omp_return_lhs (gsi_stmt (gsi
));
4133 gsi_insert_after (&gsi
, omp_build_barrier (t
), GSI_SAME_STMT
);
4135 gsi_remove (&gsi
, true);
4137 /* Connect the new blocks. */
4138 find_edge (iter_part_bb
, seq_start_bb
)->flags
= EDGE_TRUE_VALUE
;
4139 find_edge (iter_part_bb
, fin_bb
)->flags
= EDGE_FALSE_VALUE
;
4143 se
= find_edge (cont_bb
, body_bb
);
4146 se
= BRANCH_EDGE (cont_bb
);
4147 gcc_assert (single_succ (se
->dest
) == body_bb
);
4149 if (gimple_omp_for_combined_p (fd
->for_stmt
))
4154 else if (fd
->collapse
> 1)
4157 se
= make_edge (cont_bb
, collapse_bb
, EDGE_TRUE_VALUE
);
4160 se
->flags
= EDGE_TRUE_VALUE
;
4161 find_edge (cont_bb
, trip_update_bb
)->flags
4162 = se
? EDGE_FALSE_VALUE
: EDGE_FALLTHRU
;
4164 redirect_edge_and_branch (single_succ_edge (trip_update_bb
),
4168 if (gimple_in_ssa_p (cfun
))
4176 gcc_assert (fd
->collapse
== 1 && !broken_loop
);
4178 /* When we redirect the edge from trip_update_bb to iter_part_bb, we
4179 remove arguments of the phi nodes in fin_bb. We need to create
4180 appropriate phi nodes in iter_part_bb instead. */
4181 se
= find_edge (iter_part_bb
, fin_bb
);
4182 re
= single_succ_edge (trip_update_bb
);
4183 vec
<edge_var_map
> *head
= redirect_edge_var_map_vector (re
);
4184 ene
= single_succ_edge (entry_bb
);
4186 psi
= gsi_start_phis (fin_bb
);
4187 for (i
= 0; !gsi_end_p (psi
) && head
->iterate (i
, &vm
);
4188 gsi_next (&psi
), ++i
)
4191 source_location locus
;
4194 if (operand_equal_p (gimple_phi_arg_def (phi
, 0),
4195 redirect_edge_var_map_def (vm
), 0))
4198 t
= gimple_phi_result (phi
);
4199 gcc_assert (t
== redirect_edge_var_map_result (vm
));
4201 if (!single_pred_p (fin_bb
))
4202 t
= copy_ssa_name (t
, phi
);
4204 nphi
= create_phi_node (t
, iter_part_bb
);
4206 t
= PHI_ARG_DEF_FROM_EDGE (phi
, se
);
4207 locus
= gimple_phi_arg_location_from_edge (phi
, se
);
4209 /* A special case -- fd->loop.v is not yet computed in
4210 iter_part_bb, we need to use vextra instead. */
4211 if (t
== fd
->loop
.v
)
4213 add_phi_arg (nphi
, t
, ene
, locus
);
4214 locus
= redirect_edge_var_map_location (vm
);
4215 tree back_arg
= redirect_edge_var_map_def (vm
);
4216 add_phi_arg (nphi
, back_arg
, re
, locus
);
4217 edge ce
= find_edge (cont_bb
, body_bb
);
4220 ce
= BRANCH_EDGE (cont_bb
);
4221 gcc_assert (single_succ (ce
->dest
) == body_bb
);
4222 ce
= single_succ_edge (ce
->dest
);
4224 gphi
*inner_loop_phi
= find_phi_with_arg_on_edge (back_arg
, ce
);
4225 gcc_assert (inner_loop_phi
!= NULL
);
4226 add_phi_arg (inner_loop_phi
, gimple_phi_result (nphi
),
4227 find_edge (seq_start_bb
, body_bb
), locus
);
4229 if (!single_pred_p (fin_bb
))
4230 add_phi_arg (phi
, gimple_phi_result (nphi
), se
, locus
);
4232 gcc_assert (gsi_end_p (psi
) && (head
== NULL
|| i
== head
->length ()));
4233 redirect_edge_var_map_clear (re
);
4234 if (single_pred_p (fin_bb
))
4237 psi
= gsi_start_phis (fin_bb
);
4238 if (gsi_end_p (psi
))
4240 remove_phi_node (&psi
, false);
4243 /* Make phi node for trip. */
4244 phi
= create_phi_node (trip_main
, iter_part_bb
);
4245 add_phi_arg (phi
, trip_back
, single_succ_edge (trip_update_bb
),
4247 add_phi_arg (phi
, trip_init
, single_succ_edge (entry_bb
),
4252 set_immediate_dominator (CDI_DOMINATORS
, trip_update_bb
, cont_bb
);
4253 set_immediate_dominator (CDI_DOMINATORS
, iter_part_bb
,
4254 recompute_dominator (CDI_DOMINATORS
, iter_part_bb
));
4255 set_immediate_dominator (CDI_DOMINATORS
, fin_bb
,
4256 recompute_dominator (CDI_DOMINATORS
, fin_bb
));
4257 set_immediate_dominator (CDI_DOMINATORS
, seq_start_bb
,
4258 recompute_dominator (CDI_DOMINATORS
, seq_start_bb
));
4259 set_immediate_dominator (CDI_DOMINATORS
, body_bb
,
4260 recompute_dominator (CDI_DOMINATORS
, body_bb
));
4264 struct loop
*loop
= body_bb
->loop_father
;
4265 struct loop
*trip_loop
= alloc_loop ();
4266 trip_loop
->header
= iter_part_bb
;
4267 trip_loop
->latch
= trip_update_bb
;
4268 add_loop (trip_loop
, iter_part_bb
->loop_father
);
4270 if (loop
!= entry_bb
->loop_father
)
4272 gcc_assert (loop
->header
== body_bb
);
4273 gcc_assert (loop
->latch
== region
->cont
4274 || single_pred (loop
->latch
) == region
->cont
);
4275 trip_loop
->inner
= loop
;
4279 if (!gimple_omp_for_combined_p (fd
->for_stmt
))
4281 loop
= alloc_loop ();
4282 loop
->header
= body_bb
;
4283 if (collapse_bb
== NULL
)
4284 loop
->latch
= cont_bb
;
4285 add_loop (loop
, trip_loop
);
4290 /* A subroutine of expand_omp_for. Generate code for a simd non-worksharing
4291 loop. Given parameters:
4293 for (V = N1; V cond N2; V += STEP) BODY;
4295 where COND is "<" or ">", we generate pseudocode
4303 if (V cond N2) goto L0; else goto L2;
4306 For collapsed loops, given parameters:
4308 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
4309 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
4310 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
4313 we generate pseudocode
4319 count3 = (adj + N32 - N31) / STEP3;
4324 count2 = (adj + N22 - N21) / STEP2;
4329 count1 = (adj + N12 - N11) / STEP1;
4330 count = count1 * count2 * count3;
4340 V2 += (V3 cond3 N32) ? 0 : STEP2;
4341 V3 = (V3 cond3 N32) ? V3 : N31;
4342 V1 += (V2 cond2 N22) ? 0 : STEP1;
4343 V2 = (V2 cond2 N22) ? V2 : N21;
4345 if (V < count) goto L0; else goto L2;
4351 expand_omp_simd (struct omp_region
*region
, struct omp_for_data
*fd
)
4354 basic_block entry_bb
, cont_bb
, exit_bb
, l0_bb
, l1_bb
, l2_bb
, l2_dom_bb
;
4355 gimple_stmt_iterator gsi
;
4358 bool broken_loop
= region
->cont
== NULL
;
4360 tree
*counts
= NULL
;
4362 int safelen_int
= INT_MAX
;
4363 tree safelen
= omp_find_clause (gimple_omp_for_clauses (fd
->for_stmt
),
4364 OMP_CLAUSE_SAFELEN
);
4365 tree simduid
= omp_find_clause (gimple_omp_for_clauses (fd
->for_stmt
),
4366 OMP_CLAUSE__SIMDUID_
);
4372 safelen
= OMP_CLAUSE_SAFELEN_EXPR (safelen
);
4373 if (!poly_int_tree_p (safelen
, &val
))
4376 safelen_int
= MIN (constant_lower_bound (val
), INT_MAX
);
4377 if (safelen_int
== 1)
4380 type
= TREE_TYPE (fd
->loop
.v
);
4381 entry_bb
= region
->entry
;
4382 cont_bb
= region
->cont
;
4383 gcc_assert (EDGE_COUNT (entry_bb
->succs
) == 2);
4384 gcc_assert (broken_loop
4385 || BRANCH_EDGE (entry_bb
)->dest
== FALLTHRU_EDGE (cont_bb
)->dest
);
4386 l0_bb
= FALLTHRU_EDGE (entry_bb
)->dest
;
4389 gcc_assert (BRANCH_EDGE (cont_bb
)->dest
== l0_bb
);
4390 gcc_assert (EDGE_COUNT (cont_bb
->succs
) == 2);
4391 l1_bb
= split_block (cont_bb
, last_stmt (cont_bb
))->dest
;
4392 l2_bb
= BRANCH_EDGE (entry_bb
)->dest
;
4396 BRANCH_EDGE (entry_bb
)->flags
&= ~EDGE_ABNORMAL
;
4397 l1_bb
= split_edge (BRANCH_EDGE (entry_bb
));
4398 l2_bb
= single_succ (l1_bb
);
4400 exit_bb
= region
->exit
;
4403 gsi
= gsi_last_nondebug_bb (entry_bb
);
4405 gcc_assert (gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_FOR
);
4406 /* Not needed in SSA form right now. */
4407 gcc_assert (!gimple_in_ssa_p (cfun
));
4408 if (fd
->collapse
> 1)
4410 int first_zero_iter
= -1, dummy
= -1;
4411 basic_block zero_iter_bb
= l2_bb
, dummy_bb
= NULL
;
4413 counts
= XALLOCAVEC (tree
, fd
->collapse
);
4414 expand_omp_for_init_counts (fd
, &gsi
, entry_bb
, counts
,
4415 zero_iter_bb
, first_zero_iter
,
4416 dummy_bb
, dummy
, l2_dom_bb
);
4418 if (l2_dom_bb
== NULL
)
4423 if (gimple_omp_for_combined_into_p (fd
->for_stmt
))
4425 tree innerc
= omp_find_clause (gimple_omp_for_clauses (fd
->for_stmt
),
4426 OMP_CLAUSE__LOOPTEMP_
);
4427 gcc_assert (innerc
);
4428 n1
= OMP_CLAUSE_DECL (innerc
);
4429 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
4430 OMP_CLAUSE__LOOPTEMP_
);
4431 gcc_assert (innerc
);
4432 n2
= OMP_CLAUSE_DECL (innerc
);
4434 tree step
= fd
->loop
.step
;
4436 bool is_simt
= omp_find_clause (gimple_omp_for_clauses (fd
->for_stmt
),
4440 cfun
->curr_properties
&= ~PROP_gimple_lomp_dev
;
4441 is_simt
= safelen_int
> 1;
4443 tree simt_lane
= NULL_TREE
, simt_maxlane
= NULL_TREE
;
4446 simt_lane
= create_tmp_var (unsigned_type_node
);
4447 gimple
*g
= gimple_build_call_internal (IFN_GOMP_SIMT_LANE
, 0);
4448 gimple_call_set_lhs (g
, simt_lane
);
4449 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
4450 tree offset
= fold_build2 (MULT_EXPR
, TREE_TYPE (step
), step
,
4451 fold_convert (TREE_TYPE (step
), simt_lane
));
4452 n1
= fold_convert (type
, n1
);
4453 if (POINTER_TYPE_P (type
))
4454 n1
= fold_build_pointer_plus (n1
, offset
);
4456 n1
= fold_build2 (PLUS_EXPR
, type
, n1
, fold_convert (type
, offset
));
4458 /* Collapsed loops not handled for SIMT yet: limit to one lane only. */
4459 if (fd
->collapse
> 1)
4460 simt_maxlane
= build_one_cst (unsigned_type_node
);
4461 else if (safelen_int
< omp_max_simt_vf ())
4462 simt_maxlane
= build_int_cst (unsigned_type_node
, safelen_int
);
4464 = build_call_expr_internal_loc (UNKNOWN_LOCATION
, IFN_GOMP_SIMT_VF
,
4465 unsigned_type_node
, 0);
4467 vf
= fold_build2 (MIN_EXPR
, unsigned_type_node
, vf
, simt_maxlane
);
4468 vf
= fold_convert (TREE_TYPE (step
), vf
);
4469 step
= fold_build2 (MULT_EXPR
, TREE_TYPE (step
), step
, vf
);
4472 expand_omp_build_assign (&gsi
, fd
->loop
.v
, fold_convert (type
, n1
));
4473 if (fd
->collapse
> 1)
4475 if (gimple_omp_for_combined_into_p (fd
->for_stmt
))
4478 expand_omp_for_init_vars (fd
, &gsi
, counts
, NULL
, n1
);
4482 for (i
= 0; i
< fd
->collapse
; i
++)
4484 tree itype
= TREE_TYPE (fd
->loops
[i
].v
);
4485 if (POINTER_TYPE_P (itype
))
4486 itype
= signed_type_for (itype
);
4487 t
= fold_convert (TREE_TYPE (fd
->loops
[i
].v
), fd
->loops
[i
].n1
);
4488 expand_omp_build_assign (&gsi
, fd
->loops
[i
].v
, t
);
4492 /* Remove the GIMPLE_OMP_FOR statement. */
4493 gsi_remove (&gsi
, true);
4497 /* Code to control the increment goes in the CONT_BB. */
4498 gsi
= gsi_last_nondebug_bb (cont_bb
);
4499 stmt
= gsi_stmt (gsi
);
4500 gcc_assert (gimple_code (stmt
) == GIMPLE_OMP_CONTINUE
);
4502 if (POINTER_TYPE_P (type
))
4503 t
= fold_build_pointer_plus (fd
->loop
.v
, step
);
4505 t
= fold_build2 (PLUS_EXPR
, type
, fd
->loop
.v
, step
);
4506 expand_omp_build_assign (&gsi
, fd
->loop
.v
, t
);
4508 if (fd
->collapse
> 1)
4510 i
= fd
->collapse
- 1;
4511 if (POINTER_TYPE_P (TREE_TYPE (fd
->loops
[i
].v
)))
4513 t
= fold_convert (sizetype
, fd
->loops
[i
].step
);
4514 t
= fold_build_pointer_plus (fd
->loops
[i
].v
, t
);
4518 t
= fold_convert (TREE_TYPE (fd
->loops
[i
].v
),
4520 t
= fold_build2 (PLUS_EXPR
, TREE_TYPE (fd
->loops
[i
].v
),
4523 expand_omp_build_assign (&gsi
, fd
->loops
[i
].v
, t
);
4525 for (i
= fd
->collapse
- 1; i
> 0; i
--)
4527 tree itype
= TREE_TYPE (fd
->loops
[i
].v
);
4528 tree itype2
= TREE_TYPE (fd
->loops
[i
- 1].v
);
4529 if (POINTER_TYPE_P (itype2
))
4530 itype2
= signed_type_for (itype2
);
4531 t
= fold_convert (itype2
, fd
->loops
[i
- 1].step
);
4532 t
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
, true,
4534 t
= build3 (COND_EXPR
, itype2
,
4535 build2 (fd
->loops
[i
].cond_code
, boolean_type_node
,
4537 fold_convert (itype
, fd
->loops
[i
].n2
)),
4538 build_int_cst (itype2
, 0), t
);
4539 if (POINTER_TYPE_P (TREE_TYPE (fd
->loops
[i
- 1].v
)))
4540 t
= fold_build_pointer_plus (fd
->loops
[i
- 1].v
, t
);
4542 t
= fold_build2 (PLUS_EXPR
, itype2
, fd
->loops
[i
- 1].v
, t
);
4543 expand_omp_build_assign (&gsi
, fd
->loops
[i
- 1].v
, t
);
4545 t
= fold_convert (itype
, fd
->loops
[i
].n1
);
4546 t
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
, true,
4548 t
= build3 (COND_EXPR
, itype
,
4549 build2 (fd
->loops
[i
].cond_code
, boolean_type_node
,
4551 fold_convert (itype
, fd
->loops
[i
].n2
)),
4553 expand_omp_build_assign (&gsi
, fd
->loops
[i
].v
, t
);
4557 /* Remove GIMPLE_OMP_CONTINUE. */
4558 gsi_remove (&gsi
, true);
4561 /* Emit the condition in L1_BB. */
4562 gsi
= gsi_start_bb (l1_bb
);
4564 t
= fold_convert (type
, n2
);
4565 t
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
4566 false, GSI_CONTINUE_LINKING
);
4567 tree v
= fd
->loop
.v
;
4568 if (DECL_P (v
) && TREE_ADDRESSABLE (v
))
4569 v
= force_gimple_operand_gsi (&gsi
, v
, true, NULL_TREE
,
4570 false, GSI_CONTINUE_LINKING
);
4571 t
= build2 (fd
->loop
.cond_code
, boolean_type_node
, v
, t
);
4572 cond_stmt
= gimple_build_cond_empty (t
);
4573 gsi_insert_after (&gsi
, cond_stmt
, GSI_CONTINUE_LINKING
);
4574 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt
), expand_omp_regimplify_p
,
4576 || walk_tree (gimple_cond_rhs_ptr (cond_stmt
), expand_omp_regimplify_p
,
4579 gsi
= gsi_for_stmt (cond_stmt
);
4580 gimple_regimplify_operands (cond_stmt
, &gsi
);
4583 /* Add 'V -= STEP * (SIMT_VF - 1)' after the loop. */
4586 gsi
= gsi_start_bb (l2_bb
);
4587 step
= fold_build2 (MINUS_EXPR
, TREE_TYPE (step
), fd
->loop
.step
, step
);
4588 if (POINTER_TYPE_P (type
))
4589 t
= fold_build_pointer_plus (fd
->loop
.v
, step
);
4591 t
= fold_build2 (PLUS_EXPR
, type
, fd
->loop
.v
, step
);
4592 expand_omp_build_assign (&gsi
, fd
->loop
.v
, t
);
4595 /* Remove GIMPLE_OMP_RETURN. */
4596 gsi
= gsi_last_nondebug_bb (exit_bb
);
4597 gsi_remove (&gsi
, true);
4599 /* Connect the new blocks. */
4600 remove_edge (FALLTHRU_EDGE (entry_bb
));
4604 remove_edge (BRANCH_EDGE (entry_bb
));
4605 make_edge (entry_bb
, l1_bb
, EDGE_FALLTHRU
);
4607 e
= BRANCH_EDGE (l1_bb
);
4608 ne
= FALLTHRU_EDGE (l1_bb
);
4609 e
->flags
= EDGE_TRUE_VALUE
;
4613 single_succ_edge (entry_bb
)->flags
= EDGE_FALLTHRU
;
4615 ne
= single_succ_edge (l1_bb
);
4616 e
= make_edge (l1_bb
, l0_bb
, EDGE_TRUE_VALUE
);
4619 ne
->flags
= EDGE_FALSE_VALUE
;
4620 e
->probability
= profile_probability::guessed_always ().apply_scale (7, 8);
4621 ne
->probability
= e
->probability
.invert ();
4623 set_immediate_dominator (CDI_DOMINATORS
, l1_bb
, entry_bb
);
4624 set_immediate_dominator (CDI_DOMINATORS
, l0_bb
, l1_bb
);
4628 cond_stmt
= gimple_build_cond (LT_EXPR
, simt_lane
, simt_maxlane
,
4629 NULL_TREE
, NULL_TREE
);
4630 gsi
= gsi_last_bb (entry_bb
);
4631 gsi_insert_after (&gsi
, cond_stmt
, GSI_NEW_STMT
);
4632 make_edge (entry_bb
, l2_bb
, EDGE_FALSE_VALUE
);
4633 FALLTHRU_EDGE (entry_bb
)->flags
= EDGE_TRUE_VALUE
;
4634 FALLTHRU_EDGE (entry_bb
)->probability
4635 = profile_probability::guessed_always ().apply_scale (7, 8);
4636 BRANCH_EDGE (entry_bb
)->probability
4637 = FALLTHRU_EDGE (entry_bb
)->probability
.invert ();
4638 l2_dom_bb
= entry_bb
;
4640 set_immediate_dominator (CDI_DOMINATORS
, l2_bb
, l2_dom_bb
);
4644 struct loop
*loop
= alloc_loop ();
4645 loop
->header
= l1_bb
;
4646 loop
->latch
= cont_bb
;
4647 add_loop (loop
, l1_bb
->loop_father
);
4648 loop
->safelen
= safelen_int
;
4651 loop
->simduid
= OMP_CLAUSE__SIMDUID__DECL (simduid
);
4652 cfun
->has_simduid_loops
= true;
4654 /* If not -fno-tree-loop-vectorize, hint that we want to vectorize
4656 if ((flag_tree_loop_vectorize
4657 || !global_options_set
.x_flag_tree_loop_vectorize
)
4658 && flag_tree_loop_optimize
4659 && loop
->safelen
> 1)
4661 loop
->force_vectorize
= true;
4662 cfun
->has_force_vectorize_loops
= true;
4666 cfun
->has_simduid_loops
= true;
4669 /* Taskloop construct is represented after gimplification with
4670 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
4671 in between them. This routine expands the outer GIMPLE_OMP_FOR,
4672 which should just compute all the needed loop temporaries
4673 for GIMPLE_OMP_TASK. */
4676 expand_omp_taskloop_for_outer (struct omp_region
*region
,
4677 struct omp_for_data
*fd
,
4680 tree type
, bias
= NULL_TREE
;
4681 basic_block entry_bb
, cont_bb
, exit_bb
;
4682 gimple_stmt_iterator gsi
;
4683 gassign
*assign_stmt
;
4684 tree
*counts
= NULL
;
4687 gcc_assert (inner_stmt
);
4688 gcc_assert (region
->cont
);
4689 gcc_assert (gimple_code (inner_stmt
) == GIMPLE_OMP_TASK
4690 && gimple_omp_task_taskloop_p (inner_stmt
));
4691 type
= TREE_TYPE (fd
->loop
.v
);
4693 /* See if we need to bias by LLONG_MIN. */
4694 if (fd
->iter_type
== long_long_unsigned_type_node
4695 && TREE_CODE (type
) == INTEGER_TYPE
4696 && !TYPE_UNSIGNED (type
))
4700 if (fd
->loop
.cond_code
== LT_EXPR
)
4703 n2
= fold_build2 (PLUS_EXPR
, type
, fd
->loop
.n2
, fd
->loop
.step
);
4707 n1
= fold_build2 (MINUS_EXPR
, type
, fd
->loop
.n2
, fd
->loop
.step
);
4710 if (TREE_CODE (n1
) != INTEGER_CST
4711 || TREE_CODE (n2
) != INTEGER_CST
4712 || ((tree_int_cst_sgn (n1
) < 0) ^ (tree_int_cst_sgn (n2
) < 0)))
4713 bias
= fold_convert (fd
->iter_type
, TYPE_MIN_VALUE (type
));
4716 entry_bb
= region
->entry
;
4717 cont_bb
= region
->cont
;
4718 gcc_assert (EDGE_COUNT (entry_bb
->succs
) == 2);
4719 gcc_assert (BRANCH_EDGE (entry_bb
)->dest
== FALLTHRU_EDGE (cont_bb
)->dest
);
4720 exit_bb
= region
->exit
;
4722 gsi
= gsi_last_nondebug_bb (entry_bb
);
4723 gimple
*for_stmt
= gsi_stmt (gsi
);
4724 gcc_assert (gimple_code (for_stmt
) == GIMPLE_OMP_FOR
);
4725 if (fd
->collapse
> 1)
4727 int first_zero_iter
= -1, dummy
= -1;
4728 basic_block zero_iter_bb
= NULL
, dummy_bb
= NULL
, l2_dom_bb
= NULL
;
4730 counts
= XALLOCAVEC (tree
, fd
->collapse
);
4731 expand_omp_for_init_counts (fd
, &gsi
, entry_bb
, counts
,
4732 zero_iter_bb
, first_zero_iter
,
4733 dummy_bb
, dummy
, l2_dom_bb
);
4737 /* Some counts[i] vars might be uninitialized if
4738 some loop has zero iterations. But the body shouldn't
4739 be executed in that case, so just avoid uninit warnings. */
4740 for (i
= first_zero_iter
; i
< fd
->collapse
; i
++)
4741 if (SSA_VAR_P (counts
[i
]))
4742 TREE_NO_WARNING (counts
[i
]) = 1;
4744 edge e
= split_block (entry_bb
, gsi_stmt (gsi
));
4746 make_edge (zero_iter_bb
, entry_bb
, EDGE_FALLTHRU
);
4747 gsi
= gsi_last_bb (entry_bb
);
4748 set_immediate_dominator (CDI_DOMINATORS
, entry_bb
,
4749 get_immediate_dominator (CDI_DOMINATORS
,
4757 if (POINTER_TYPE_P (TREE_TYPE (t0
))
4758 && TYPE_PRECISION (TREE_TYPE (t0
))
4759 != TYPE_PRECISION (fd
->iter_type
))
4761 /* Avoid casting pointers to integer of a different size. */
4762 tree itype
= signed_type_for (type
);
4763 t1
= fold_convert (fd
->iter_type
, fold_convert (itype
, t1
));
4764 t0
= fold_convert (fd
->iter_type
, fold_convert (itype
, t0
));
4768 t1
= fold_convert (fd
->iter_type
, t1
);
4769 t0
= fold_convert (fd
->iter_type
, t0
);
4773 t1
= fold_build2 (PLUS_EXPR
, fd
->iter_type
, t1
, bias
);
4774 t0
= fold_build2 (PLUS_EXPR
, fd
->iter_type
, t0
, bias
);
4777 tree innerc
= omp_find_clause (gimple_omp_task_clauses (inner_stmt
),
4778 OMP_CLAUSE__LOOPTEMP_
);
4779 gcc_assert (innerc
);
4780 tree startvar
= OMP_CLAUSE_DECL (innerc
);
4781 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
), OMP_CLAUSE__LOOPTEMP_
);
4782 gcc_assert (innerc
);
4783 tree endvar
= OMP_CLAUSE_DECL (innerc
);
4784 if (fd
->collapse
> 1 && TREE_CODE (fd
->loop
.n2
) != INTEGER_CST
)
4786 gcc_assert (innerc
);
4787 for (i
= 1; i
< fd
->collapse
; i
++)
4789 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
4790 OMP_CLAUSE__LOOPTEMP_
);
4791 gcc_assert (innerc
);
4793 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
4794 OMP_CLAUSE__LOOPTEMP_
);
4797 /* If needed (inner taskloop has lastprivate clause), propagate
4798 down the total number of iterations. */
4799 tree t
= force_gimple_operand_gsi (&gsi
, fd
->loop
.n2
, false,
4801 GSI_CONTINUE_LINKING
);
4802 assign_stmt
= gimple_build_assign (OMP_CLAUSE_DECL (innerc
), t
);
4803 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
4807 t0
= force_gimple_operand_gsi (&gsi
, t0
, false, NULL_TREE
, false,
4808 GSI_CONTINUE_LINKING
);
4809 assign_stmt
= gimple_build_assign (startvar
, t0
);
4810 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
4812 t1
= force_gimple_operand_gsi (&gsi
, t1
, false, NULL_TREE
, false,
4813 GSI_CONTINUE_LINKING
);
4814 assign_stmt
= gimple_build_assign (endvar
, t1
);
4815 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
4816 if (fd
->collapse
> 1)
4817 expand_omp_for_init_vars (fd
, &gsi
, counts
, inner_stmt
, startvar
);
4819 /* Remove the GIMPLE_OMP_FOR statement. */
4820 gsi
= gsi_for_stmt (for_stmt
);
4821 gsi_remove (&gsi
, true);
4823 gsi
= gsi_last_nondebug_bb (cont_bb
);
4824 gsi_remove (&gsi
, true);
4826 gsi
= gsi_last_nondebug_bb (exit_bb
);
4827 gsi_remove (&gsi
, true);
4829 FALLTHRU_EDGE (entry_bb
)->probability
= profile_probability::always ();
4830 remove_edge (BRANCH_EDGE (entry_bb
));
4831 FALLTHRU_EDGE (cont_bb
)->probability
= profile_probability::always ();
4832 remove_edge (BRANCH_EDGE (cont_bb
));
4833 set_immediate_dominator (CDI_DOMINATORS
, exit_bb
, cont_bb
);
4834 set_immediate_dominator (CDI_DOMINATORS
, region
->entry
,
4835 recompute_dominator (CDI_DOMINATORS
, region
->entry
));
4838 /* Taskloop construct is represented after gimplification with
4839 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
4840 in between them. This routine expands the inner GIMPLE_OMP_FOR.
4841 GOMP_taskloop{,_ull} function arranges for each task to be given just
4842 a single range of iterations. */
4845 expand_omp_taskloop_for_inner (struct omp_region
*region
,
4846 struct omp_for_data
*fd
,
4849 tree e
, t
, type
, itype
, vmain
, vback
, bias
= NULL_TREE
;
4850 basic_block entry_bb
, exit_bb
, body_bb
, cont_bb
, collapse_bb
= NULL
;
4852 gimple_stmt_iterator gsi
;
4854 bool broken_loop
= region
->cont
== NULL
;
4855 tree
*counts
= NULL
;
4858 itype
= type
= TREE_TYPE (fd
->loop
.v
);
4859 if (POINTER_TYPE_P (type
))
4860 itype
= signed_type_for (type
);
4862 /* See if we need to bias by LLONG_MIN. */
4863 if (fd
->iter_type
== long_long_unsigned_type_node
4864 && TREE_CODE (type
) == INTEGER_TYPE
4865 && !TYPE_UNSIGNED (type
))
4869 if (fd
->loop
.cond_code
== LT_EXPR
)
4872 n2
= fold_build2 (PLUS_EXPR
, type
, fd
->loop
.n2
, fd
->loop
.step
);
4876 n1
= fold_build2 (MINUS_EXPR
, type
, fd
->loop
.n2
, fd
->loop
.step
);
4879 if (TREE_CODE (n1
) != INTEGER_CST
4880 || TREE_CODE (n2
) != INTEGER_CST
4881 || ((tree_int_cst_sgn (n1
) < 0) ^ (tree_int_cst_sgn (n2
) < 0)))
4882 bias
= fold_convert (fd
->iter_type
, TYPE_MIN_VALUE (type
));
4885 entry_bb
= region
->entry
;
4886 cont_bb
= region
->cont
;
4887 gcc_assert (EDGE_COUNT (entry_bb
->succs
) == 2);
4888 fin_bb
= BRANCH_EDGE (entry_bb
)->dest
;
4889 gcc_assert (broken_loop
4890 || (fin_bb
== FALLTHRU_EDGE (cont_bb
)->dest
));
4891 body_bb
= FALLTHRU_EDGE (entry_bb
)->dest
;
4894 gcc_assert (BRANCH_EDGE (cont_bb
)->dest
== body_bb
);
4895 gcc_assert (EDGE_COUNT (cont_bb
->succs
) == 2);
4897 exit_bb
= region
->exit
;
4899 /* Iteration space partitioning goes in ENTRY_BB. */
4900 gsi
= gsi_last_nondebug_bb (entry_bb
);
4901 gcc_assert (gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_FOR
);
4903 if (fd
->collapse
> 1)
4905 int first_zero_iter
= -1, dummy
= -1;
4906 basic_block l2_dom_bb
= NULL
, dummy_bb
= NULL
;
4908 counts
= XALLOCAVEC (tree
, fd
->collapse
);
4909 expand_omp_for_init_counts (fd
, &gsi
, entry_bb
, counts
,
4910 fin_bb
, first_zero_iter
,
4911 dummy_bb
, dummy
, l2_dom_bb
);
4915 t
= integer_one_node
;
4917 step
= fd
->loop
.step
;
4918 tree innerc
= omp_find_clause (gimple_omp_for_clauses (fd
->for_stmt
),
4919 OMP_CLAUSE__LOOPTEMP_
);
4920 gcc_assert (innerc
);
4921 n1
= OMP_CLAUSE_DECL (innerc
);
4922 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
), OMP_CLAUSE__LOOPTEMP_
);
4923 gcc_assert (innerc
);
4924 n2
= OMP_CLAUSE_DECL (innerc
);
4927 n1
= fold_build2 (PLUS_EXPR
, fd
->iter_type
, n1
, bias
);
4928 n2
= fold_build2 (PLUS_EXPR
, fd
->iter_type
, n2
, bias
);
4930 n1
= force_gimple_operand_gsi (&gsi
, fold_convert (type
, n1
),
4931 true, NULL_TREE
, true, GSI_SAME_STMT
);
4932 n2
= force_gimple_operand_gsi (&gsi
, fold_convert (itype
, n2
),
4933 true, NULL_TREE
, true, GSI_SAME_STMT
);
4934 step
= force_gimple_operand_gsi (&gsi
, fold_convert (itype
, step
),
4935 true, NULL_TREE
, true, GSI_SAME_STMT
);
4937 tree startvar
= fd
->loop
.v
;
4938 tree endvar
= NULL_TREE
;
4940 if (gimple_omp_for_combined_p (fd
->for_stmt
))
4942 tree clauses
= gimple_omp_for_clauses (inner_stmt
);
4943 tree innerc
= omp_find_clause (clauses
, OMP_CLAUSE__LOOPTEMP_
);
4944 gcc_assert (innerc
);
4945 startvar
= OMP_CLAUSE_DECL (innerc
);
4946 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
4947 OMP_CLAUSE__LOOPTEMP_
);
4948 gcc_assert (innerc
);
4949 endvar
= OMP_CLAUSE_DECL (innerc
);
4951 t
= fold_convert (TREE_TYPE (startvar
), n1
);
4952 t
= force_gimple_operand_gsi (&gsi
, t
,
4954 && TREE_ADDRESSABLE (startvar
),
4955 NULL_TREE
, false, GSI_CONTINUE_LINKING
);
4956 gimple
*assign_stmt
= gimple_build_assign (startvar
, t
);
4957 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
4959 t
= fold_convert (TREE_TYPE (startvar
), n2
);
4960 e
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
4961 false, GSI_CONTINUE_LINKING
);
4964 assign_stmt
= gimple_build_assign (endvar
, e
);
4965 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
4966 if (useless_type_conversion_p (TREE_TYPE (fd
->loop
.v
), TREE_TYPE (e
)))
4967 assign_stmt
= gimple_build_assign (fd
->loop
.v
, e
);
4969 assign_stmt
= gimple_build_assign (fd
->loop
.v
, NOP_EXPR
, e
);
4970 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
4972 if (fd
->collapse
> 1)
4973 expand_omp_for_init_vars (fd
, &gsi
, counts
, inner_stmt
, startvar
);
4977 /* The code controlling the sequential loop replaces the
4978 GIMPLE_OMP_CONTINUE. */
4979 gsi
= gsi_last_nondebug_bb (cont_bb
);
4980 gomp_continue
*cont_stmt
= as_a
<gomp_continue
*> (gsi_stmt (gsi
));
4981 gcc_assert (gimple_code (cont_stmt
) == GIMPLE_OMP_CONTINUE
);
4982 vmain
= gimple_omp_continue_control_use (cont_stmt
);
4983 vback
= gimple_omp_continue_control_def (cont_stmt
);
4985 if (!gimple_omp_for_combined_p (fd
->for_stmt
))
4987 if (POINTER_TYPE_P (type
))
4988 t
= fold_build_pointer_plus (vmain
, step
);
4990 t
= fold_build2 (PLUS_EXPR
, type
, vmain
, step
);
4991 t
= force_gimple_operand_gsi (&gsi
, t
,
4993 && TREE_ADDRESSABLE (vback
),
4994 NULL_TREE
, true, GSI_SAME_STMT
);
4995 assign_stmt
= gimple_build_assign (vback
, t
);
4996 gsi_insert_before (&gsi
, assign_stmt
, GSI_SAME_STMT
);
4998 t
= build2 (fd
->loop
.cond_code
, boolean_type_node
,
4999 DECL_P (vback
) && TREE_ADDRESSABLE (vback
)
5001 gsi_insert_before (&gsi
, gimple_build_cond_empty (t
), GSI_SAME_STMT
);
5004 /* Remove the GIMPLE_OMP_CONTINUE statement. */
5005 gsi_remove (&gsi
, true);
5007 if (fd
->collapse
> 1 && !gimple_omp_for_combined_p (fd
->for_stmt
))
5008 collapse_bb
= extract_omp_for_update_vars (fd
, cont_bb
, body_bb
);
5011 /* Remove the GIMPLE_OMP_FOR statement. */
5012 gsi
= gsi_for_stmt (fd
->for_stmt
);
5013 gsi_remove (&gsi
, true);
5015 /* Remove the GIMPLE_OMP_RETURN statement. */
5016 gsi
= gsi_last_nondebug_bb (exit_bb
);
5017 gsi_remove (&gsi
, true);
5019 FALLTHRU_EDGE (entry_bb
)->probability
= profile_probability::always ();
5021 remove_edge (BRANCH_EDGE (entry_bb
));
5024 remove_edge_and_dominated_blocks (BRANCH_EDGE (entry_bb
));
5025 region
->outer
->cont
= NULL
;
5028 /* Connect all the blocks. */
5031 ep
= find_edge (cont_bb
, body_bb
);
5032 if (gimple_omp_for_combined_p (fd
->for_stmt
))
5037 else if (fd
->collapse
> 1)
5040 ep
= make_edge (cont_bb
, collapse_bb
, EDGE_TRUE_VALUE
);
5043 ep
->flags
= EDGE_TRUE_VALUE
;
5044 find_edge (cont_bb
, fin_bb
)->flags
5045 = ep
? EDGE_FALSE_VALUE
: EDGE_FALLTHRU
;
5048 set_immediate_dominator (CDI_DOMINATORS
, body_bb
,
5049 recompute_dominator (CDI_DOMINATORS
, body_bb
));
5051 set_immediate_dominator (CDI_DOMINATORS
, fin_bb
,
5052 recompute_dominator (CDI_DOMINATORS
, fin_bb
));
5054 if (!broken_loop
&& !gimple_omp_for_combined_p (fd
->for_stmt
))
5056 struct loop
*loop
= alloc_loop ();
5057 loop
->header
= body_bb
;
5058 if (collapse_bb
== NULL
)
5059 loop
->latch
= cont_bb
;
5060 add_loop (loop
, body_bb
->loop_father
);
5064 /* A subroutine of expand_omp_for. Generate code for an OpenACC
5065 partitioned loop. The lowering here is abstracted, in that the
5066 loop parameters are passed through internal functions, which are
5067 further lowered by oacc_device_lower, once we get to the target
5068 compiler. The loop is of the form:
5070 for (V = B; V LTGT E; V += S) {BODY}
5072 where LTGT is < or >. We may have a specified chunking size, CHUNKING
5073 (constant 0 for no chunking) and we will have a GWV partitioning
5074 mask, specifying dimensions over which the loop is to be
5075 partitioned (see note below). We generate code that looks like
5076 (this ignores tiling):
5078 <entry_bb> [incoming FALL->body, BRANCH->exit]
5079 typedef signedintify (typeof (V)) T; // underlying signed integral type
5082 T DIR = LTGT == '<' ? +1 : -1;
5083 T chunk_max = GOACC_LOOP_CHUNK (dir, range, S, CHUNK_SIZE, GWV);
5084 T step = GOACC_LOOP_STEP (dir, range, S, CHUNK_SIZE, GWV);
5086 <head_bb> [created by splitting end of entry_bb]
5087 T offset = GOACC_LOOP_OFFSET (dir, range, S, CHUNK_SIZE, GWV, chunk_no);
5088 T bound = GOACC_LOOP_BOUND (dir, range, S, CHUNK_SIZE, GWV, offset);
5089 if (!(offset LTGT bound)) goto bottom_bb;
5091 <body_bb> [incoming]
5095 <cont_bb> [incoming, may == body_bb FALL->exit_bb, BRANCH->body_bb]
5097 if (offset LTGT bound) goto body_bb; [*]
5099 <bottom_bb> [created by splitting start of exit_bb] insert BRANCH->head_bb
5101 if (chunk < chunk_max) goto head_bb;
5103 <exit_bb> [incoming]
5104 V = B + ((range -/+ 1) / S +/- 1) * S [*]
5106 [*] Needed if V live at end of loop. */
5109 expand_oacc_for (struct omp_region
*region
, struct omp_for_data
*fd
)
5111 tree v
= fd
->loop
.v
;
5112 enum tree_code cond_code
= fd
->loop
.cond_code
;
5113 enum tree_code plus_code
= PLUS_EXPR
;
5115 tree chunk_size
= integer_minus_one_node
;
5116 tree gwv
= integer_zero_node
;
5117 tree iter_type
= TREE_TYPE (v
);
5118 tree diff_type
= iter_type
;
5119 tree plus_type
= iter_type
;
5120 struct oacc_collapse
*counts
= NULL
;
5122 gcc_checking_assert (gimple_omp_for_kind (fd
->for_stmt
)
5123 == GF_OMP_FOR_KIND_OACC_LOOP
);
5124 gcc_assert (!gimple_omp_for_combined_into_p (fd
->for_stmt
));
5125 gcc_assert (cond_code
== LT_EXPR
|| cond_code
== GT_EXPR
);
5127 if (POINTER_TYPE_P (iter_type
))
5129 plus_code
= POINTER_PLUS_EXPR
;
5130 plus_type
= sizetype
;
5132 if (POINTER_TYPE_P (diff_type
) || TYPE_UNSIGNED (diff_type
))
5133 diff_type
= signed_type_for (diff_type
);
5134 if (TYPE_PRECISION (diff_type
) < TYPE_PRECISION (integer_type_node
))
5135 diff_type
= integer_type_node
;
5137 basic_block entry_bb
= region
->entry
; /* BB ending in OMP_FOR */
5138 basic_block exit_bb
= region
->exit
; /* BB ending in OMP_RETURN */
5139 basic_block cont_bb
= region
->cont
; /* BB ending in OMP_CONTINUE */
5140 basic_block bottom_bb
= NULL
;
5142 /* entry_bb has two sucessors; the branch edge is to the exit
5143 block, fallthrough edge to body. */
5144 gcc_assert (EDGE_COUNT (entry_bb
->succs
) == 2
5145 && BRANCH_EDGE (entry_bb
)->dest
== exit_bb
);
5147 /* If cont_bb non-NULL, it has 2 successors. The branch successor is
5148 body_bb, or to a block whose only successor is the body_bb. Its
5149 fallthrough successor is the final block (same as the branch
5150 successor of the entry_bb). */
5153 basic_block body_bb
= FALLTHRU_EDGE (entry_bb
)->dest
;
5154 basic_block bed
= BRANCH_EDGE (cont_bb
)->dest
;
5156 gcc_assert (FALLTHRU_EDGE (cont_bb
)->dest
== exit_bb
);
5157 gcc_assert (bed
== body_bb
|| single_succ_edge (bed
)->dest
== body_bb
);
5160 gcc_assert (!gimple_in_ssa_p (cfun
));
5162 /* The exit block only has entry_bb and cont_bb as predecessors. */
5163 gcc_assert (EDGE_COUNT (exit_bb
->preds
) == 1 + (cont_bb
!= NULL
));
5166 tree chunk_max
= NULL_TREE
;
5168 tree step
= create_tmp_var (diff_type
, ".step");
5169 bool up
= cond_code
== LT_EXPR
;
5170 tree dir
= build_int_cst (diff_type
, up
? +1 : -1);
5171 bool chunking
= !gimple_in_ssa_p (cfun
);
5175 tree tile_size
= NULL_TREE
;
5176 tree element_s
= NULL_TREE
;
5177 tree e_bound
= NULL_TREE
, e_offset
= NULL_TREE
, e_step
= NULL_TREE
;
5178 basic_block elem_body_bb
= NULL
;
5179 basic_block elem_cont_bb
= NULL
;
5181 /* SSA instances. */
5182 tree offset_incr
= NULL_TREE
;
5183 tree offset_init
= NULL_TREE
;
5185 gimple_stmt_iterator gsi
;
5191 edge split
, be
, fte
;
5193 /* Split the end of entry_bb to create head_bb. */
5194 split
= split_block (entry_bb
, last_stmt (entry_bb
));
5195 basic_block head_bb
= split
->dest
;
5196 entry_bb
= split
->src
;
5198 /* Chunk setup goes at end of entry_bb, replacing the omp_for. */
5199 gsi
= gsi_last_nondebug_bb (entry_bb
);
5200 gomp_for
*for_stmt
= as_a
<gomp_for
*> (gsi_stmt (gsi
));
5201 loc
= gimple_location (for_stmt
);
5203 if (gimple_in_ssa_p (cfun
))
5205 offset_init
= gimple_omp_for_index (for_stmt
, 0);
5206 gcc_assert (integer_zerop (fd
->loop
.n1
));
5207 /* The SSA parallelizer does gang parallelism. */
5208 gwv
= build_int_cst (integer_type_node
, GOMP_DIM_MASK (GOMP_DIM_GANG
));
5211 if (fd
->collapse
> 1 || fd
->tiling
)
5213 gcc_assert (!gimple_in_ssa_p (cfun
) && up
);
5214 counts
= XALLOCAVEC (struct oacc_collapse
, fd
->collapse
);
5215 tree total
= expand_oacc_collapse_init (fd
, &gsi
, counts
,
5216 TREE_TYPE (fd
->loop
.n2
), loc
);
5218 if (SSA_VAR_P (fd
->loop
.n2
))
5220 total
= force_gimple_operand_gsi (&gsi
, total
, false, NULL_TREE
,
5221 true, GSI_SAME_STMT
);
5222 ass
= gimple_build_assign (fd
->loop
.n2
, total
);
5223 gsi_insert_before (&gsi
, ass
, GSI_SAME_STMT
);
5227 tree b
= fd
->loop
.n1
;
5228 tree e
= fd
->loop
.n2
;
5229 tree s
= fd
->loop
.step
;
5231 b
= force_gimple_operand_gsi (&gsi
, b
, true, NULL_TREE
, true, GSI_SAME_STMT
);
5232 e
= force_gimple_operand_gsi (&gsi
, e
, true, NULL_TREE
, true, GSI_SAME_STMT
);
5234 /* Convert the step, avoiding possible unsigned->signed overflow. */
5235 negating
= !up
&& TYPE_UNSIGNED (TREE_TYPE (s
));
5237 s
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (s
), s
);
5238 s
= fold_convert (diff_type
, s
);
5240 s
= fold_build1 (NEGATE_EXPR
, diff_type
, s
);
5241 s
= force_gimple_operand_gsi (&gsi
, s
, true, NULL_TREE
, true, GSI_SAME_STMT
);
5244 chunk_size
= integer_zero_node
;
5245 expr
= fold_convert (diff_type
, chunk_size
);
5246 chunk_size
= force_gimple_operand_gsi (&gsi
, expr
, true,
5247 NULL_TREE
, true, GSI_SAME_STMT
);
5251 /* Determine the tile size and element step,
5252 modify the outer loop step size. */
5253 tile_size
= create_tmp_var (diff_type
, ".tile_size");
5254 expr
= build_int_cst (diff_type
, 1);
5255 for (int ix
= 0; ix
< fd
->collapse
; ix
++)
5256 expr
= fold_build2 (MULT_EXPR
, diff_type
, counts
[ix
].tile
, expr
);
5257 expr
= force_gimple_operand_gsi (&gsi
, expr
, true,
5258 NULL_TREE
, true, GSI_SAME_STMT
);
5259 ass
= gimple_build_assign (tile_size
, expr
);
5260 gsi_insert_before (&gsi
, ass
, GSI_SAME_STMT
);
5262 element_s
= create_tmp_var (diff_type
, ".element_s");
5263 ass
= gimple_build_assign (element_s
, s
);
5264 gsi_insert_before (&gsi
, ass
, GSI_SAME_STMT
);
5266 expr
= fold_build2 (MULT_EXPR
, diff_type
, s
, tile_size
);
5267 s
= force_gimple_operand_gsi (&gsi
, expr
, true,
5268 NULL_TREE
, true, GSI_SAME_STMT
);
5271 /* Determine the range, avoiding possible unsigned->signed overflow. */
5272 negating
= !up
&& TYPE_UNSIGNED (iter_type
);
5273 expr
= fold_build2 (MINUS_EXPR
, plus_type
,
5274 fold_convert (plus_type
, negating
? b
: e
),
5275 fold_convert (plus_type
, negating
? e
: b
));
5276 expr
= fold_convert (diff_type
, expr
);
5278 expr
= fold_build1 (NEGATE_EXPR
, diff_type
, expr
);
5279 tree range
= force_gimple_operand_gsi (&gsi
, expr
, true,
5280 NULL_TREE
, true, GSI_SAME_STMT
);
5282 chunk_no
= build_int_cst (diff_type
, 0);
5285 gcc_assert (!gimple_in_ssa_p (cfun
));
5288 chunk_max
= create_tmp_var (diff_type
, ".chunk_max");
5289 chunk_no
= create_tmp_var (diff_type
, ".chunk_no");
5291 ass
= gimple_build_assign (chunk_no
, expr
);
5292 gsi_insert_before (&gsi
, ass
, GSI_SAME_STMT
);
5294 call
= gimple_build_call_internal (IFN_GOACC_LOOP
, 6,
5295 build_int_cst (integer_type_node
,
5296 IFN_GOACC_LOOP_CHUNKS
),
5297 dir
, range
, s
, chunk_size
, gwv
);
5298 gimple_call_set_lhs (call
, chunk_max
);
5299 gimple_set_location (call
, loc
);
5300 gsi_insert_before (&gsi
, call
, GSI_SAME_STMT
);
5303 chunk_size
= chunk_no
;
5305 call
= gimple_build_call_internal (IFN_GOACC_LOOP
, 6,
5306 build_int_cst (integer_type_node
,
5307 IFN_GOACC_LOOP_STEP
),
5308 dir
, range
, s
, chunk_size
, gwv
);
5309 gimple_call_set_lhs (call
, step
);
5310 gimple_set_location (call
, loc
);
5311 gsi_insert_before (&gsi
, call
, GSI_SAME_STMT
);
5313 /* Remove the GIMPLE_OMP_FOR. */
5314 gsi_remove (&gsi
, true);
5316 /* Fixup edges from head_bb. */
5317 be
= BRANCH_EDGE (head_bb
);
5318 fte
= FALLTHRU_EDGE (head_bb
);
5319 be
->flags
|= EDGE_FALSE_VALUE
;
5320 fte
->flags
^= EDGE_FALLTHRU
| EDGE_TRUE_VALUE
;
5322 basic_block body_bb
= fte
->dest
;
5324 if (gimple_in_ssa_p (cfun
))
5326 gsi
= gsi_last_nondebug_bb (cont_bb
);
5327 gomp_continue
*cont_stmt
= as_a
<gomp_continue
*> (gsi_stmt (gsi
));
5329 offset
= gimple_omp_continue_control_use (cont_stmt
);
5330 offset_incr
= gimple_omp_continue_control_def (cont_stmt
);
5334 offset
= create_tmp_var (diff_type
, ".offset");
5335 offset_init
= offset_incr
= offset
;
5337 bound
= create_tmp_var (TREE_TYPE (offset
), ".bound");
5339 /* Loop offset & bound go into head_bb. */
5340 gsi
= gsi_start_bb (head_bb
);
5342 call
= gimple_build_call_internal (IFN_GOACC_LOOP
, 7,
5343 build_int_cst (integer_type_node
,
5344 IFN_GOACC_LOOP_OFFSET
),
5346 chunk_size
, gwv
, chunk_no
);
5347 gimple_call_set_lhs (call
, offset_init
);
5348 gimple_set_location (call
, loc
);
5349 gsi_insert_after (&gsi
, call
, GSI_CONTINUE_LINKING
);
5351 call
= gimple_build_call_internal (IFN_GOACC_LOOP
, 7,
5352 build_int_cst (integer_type_node
,
5353 IFN_GOACC_LOOP_BOUND
),
5355 chunk_size
, gwv
, offset_init
);
5356 gimple_call_set_lhs (call
, bound
);
5357 gimple_set_location (call
, loc
);
5358 gsi_insert_after (&gsi
, call
, GSI_CONTINUE_LINKING
);
5360 expr
= build2 (cond_code
, boolean_type_node
, offset_init
, bound
);
5361 gsi_insert_after (&gsi
, gimple_build_cond_empty (expr
),
5362 GSI_CONTINUE_LINKING
);
5364 /* V assignment goes into body_bb. */
5365 if (!gimple_in_ssa_p (cfun
))
5367 gsi
= gsi_start_bb (body_bb
);
5369 expr
= build2 (plus_code
, iter_type
, b
,
5370 fold_convert (plus_type
, offset
));
5371 expr
= force_gimple_operand_gsi (&gsi
, expr
, false, NULL_TREE
,
5372 true, GSI_SAME_STMT
);
5373 ass
= gimple_build_assign (v
, expr
);
5374 gsi_insert_before (&gsi
, ass
, GSI_SAME_STMT
);
5376 if (fd
->collapse
> 1 || fd
->tiling
)
5377 expand_oacc_collapse_vars (fd
, false, &gsi
, counts
, v
);
5381 /* Determine the range of the element loop -- usually simply
5382 the tile_size, but could be smaller if the final
5383 iteration of the outer loop is a partial tile. */
5384 tree e_range
= create_tmp_var (diff_type
, ".e_range");
5386 expr
= build2 (MIN_EXPR
, diff_type
,
5387 build2 (MINUS_EXPR
, diff_type
, bound
, offset
),
5388 build2 (MULT_EXPR
, diff_type
, tile_size
,
5390 expr
= force_gimple_operand_gsi (&gsi
, expr
, false, NULL_TREE
,
5391 true, GSI_SAME_STMT
);
5392 ass
= gimple_build_assign (e_range
, expr
);
5393 gsi_insert_before (&gsi
, ass
, GSI_SAME_STMT
);
5395 /* Determine bound, offset & step of inner loop. */
5396 e_bound
= create_tmp_var (diff_type
, ".e_bound");
5397 e_offset
= create_tmp_var (diff_type
, ".e_offset");
5398 e_step
= create_tmp_var (diff_type
, ".e_step");
5400 /* Mark these as element loops. */
5401 tree t
, e_gwv
= integer_minus_one_node
;
5402 tree chunk
= build_int_cst (diff_type
, 0); /* Never chunked. */
5404 t
= build_int_cst (integer_type_node
, IFN_GOACC_LOOP_OFFSET
);
5405 call
= gimple_build_call_internal (IFN_GOACC_LOOP
, 7, t
, dir
, e_range
,
5406 element_s
, chunk
, e_gwv
, chunk
);
5407 gimple_call_set_lhs (call
, e_offset
);
5408 gimple_set_location (call
, loc
);
5409 gsi_insert_before (&gsi
, call
, GSI_SAME_STMT
);
5411 t
= build_int_cst (integer_type_node
, IFN_GOACC_LOOP_BOUND
);
5412 call
= gimple_build_call_internal (IFN_GOACC_LOOP
, 7, t
, dir
, e_range
,
5413 element_s
, chunk
, e_gwv
, e_offset
);
5414 gimple_call_set_lhs (call
, e_bound
);
5415 gimple_set_location (call
, loc
);
5416 gsi_insert_before (&gsi
, call
, GSI_SAME_STMT
);
5418 t
= build_int_cst (integer_type_node
, IFN_GOACC_LOOP_STEP
);
5419 call
= gimple_build_call_internal (IFN_GOACC_LOOP
, 6, t
, dir
, e_range
,
5420 element_s
, chunk
, e_gwv
);
5421 gimple_call_set_lhs (call
, e_step
);
5422 gimple_set_location (call
, loc
);
5423 gsi_insert_before (&gsi
, call
, GSI_SAME_STMT
);
5425 /* Add test and split block. */
5426 expr
= build2 (cond_code
, boolean_type_node
, e_offset
, e_bound
);
5427 stmt
= gimple_build_cond_empty (expr
);
5428 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
5429 split
= split_block (body_bb
, stmt
);
5430 elem_body_bb
= split
->dest
;
5431 if (cont_bb
== body_bb
)
5432 cont_bb
= elem_body_bb
;
5433 body_bb
= split
->src
;
5435 split
->flags
^= EDGE_FALLTHRU
| EDGE_TRUE_VALUE
;
5437 /* Initialize the user's loop vars. */
5438 gsi
= gsi_start_bb (elem_body_bb
);
5439 expand_oacc_collapse_vars (fd
, true, &gsi
, counts
, e_offset
);
5443 /* Loop increment goes into cont_bb. If this is not a loop, we
5444 will have spawned threads as if it was, and each one will
5445 execute one iteration. The specification is not explicit about
5446 whether such constructs are ill-formed or not, and they can
5447 occur, especially when noreturn routines are involved. */
5450 gsi
= gsi_last_nondebug_bb (cont_bb
);
5451 gomp_continue
*cont_stmt
= as_a
<gomp_continue
*> (gsi_stmt (gsi
));
5452 loc
= gimple_location (cont_stmt
);
5456 /* Insert element loop increment and test. */
5457 expr
= build2 (PLUS_EXPR
, diff_type
, e_offset
, e_step
);
5458 expr
= force_gimple_operand_gsi (&gsi
, expr
, false, NULL_TREE
,
5459 true, GSI_SAME_STMT
);
5460 ass
= gimple_build_assign (e_offset
, expr
);
5461 gsi_insert_before (&gsi
, ass
, GSI_SAME_STMT
);
5462 expr
= build2 (cond_code
, boolean_type_node
, e_offset
, e_bound
);
5464 stmt
= gimple_build_cond_empty (expr
);
5465 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
5466 split
= split_block (cont_bb
, stmt
);
5467 elem_cont_bb
= split
->src
;
5468 cont_bb
= split
->dest
;
5470 split
->flags
^= EDGE_FALLTHRU
| EDGE_FALSE_VALUE
;
5471 split
->probability
= profile_probability::unlikely ().guessed ();
5473 = make_edge (elem_cont_bb
, elem_body_bb
, EDGE_TRUE_VALUE
);
5474 latch_edge
->probability
= profile_probability::likely ().guessed ();
5476 edge skip_edge
= make_edge (body_bb
, cont_bb
, EDGE_FALSE_VALUE
);
5477 skip_edge
->probability
= profile_probability::unlikely ().guessed ();
5478 edge loop_entry_edge
= EDGE_SUCC (body_bb
, 1 - skip_edge
->dest_idx
);
5479 loop_entry_edge
->probability
5480 = profile_probability::likely ().guessed ();
5482 gsi
= gsi_for_stmt (cont_stmt
);
5485 /* Increment offset. */
5486 if (gimple_in_ssa_p (cfun
))
5487 expr
= build2 (plus_code
, iter_type
, offset
,
5488 fold_convert (plus_type
, step
));
5490 expr
= build2 (PLUS_EXPR
, diff_type
, offset
, step
);
5491 expr
= force_gimple_operand_gsi (&gsi
, expr
, false, NULL_TREE
,
5492 true, GSI_SAME_STMT
);
5493 ass
= gimple_build_assign (offset_incr
, expr
);
5494 gsi_insert_before (&gsi
, ass
, GSI_SAME_STMT
);
5495 expr
= build2 (cond_code
, boolean_type_node
, offset_incr
, bound
);
5496 gsi_insert_before (&gsi
, gimple_build_cond_empty (expr
), GSI_SAME_STMT
);
5498 /* Remove the GIMPLE_OMP_CONTINUE. */
5499 gsi_remove (&gsi
, true);
5501 /* Fixup edges from cont_bb. */
5502 be
= BRANCH_EDGE (cont_bb
);
5503 fte
= FALLTHRU_EDGE (cont_bb
);
5504 be
->flags
|= EDGE_TRUE_VALUE
;
5505 fte
->flags
^= EDGE_FALLTHRU
| EDGE_FALSE_VALUE
;
5509 /* Split the beginning of exit_bb to make bottom_bb. We
5510 need to insert a nop at the start, because splitting is
5511 after a stmt, not before. */
5512 gsi
= gsi_start_bb (exit_bb
);
5513 stmt
= gimple_build_nop ();
5514 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
5515 split
= split_block (exit_bb
, stmt
);
5516 bottom_bb
= split
->src
;
5517 exit_bb
= split
->dest
;
5518 gsi
= gsi_last_bb (bottom_bb
);
5520 /* Chunk increment and test goes into bottom_bb. */
5521 expr
= build2 (PLUS_EXPR
, diff_type
, chunk_no
,
5522 build_int_cst (diff_type
, 1));
5523 ass
= gimple_build_assign (chunk_no
, expr
);
5524 gsi_insert_after (&gsi
, ass
, GSI_CONTINUE_LINKING
);
5526 /* Chunk test at end of bottom_bb. */
5527 expr
= build2 (LT_EXPR
, boolean_type_node
, chunk_no
, chunk_max
);
5528 gsi_insert_after (&gsi
, gimple_build_cond_empty (expr
),
5529 GSI_CONTINUE_LINKING
);
5531 /* Fixup edges from bottom_bb. */
5532 split
->flags
^= EDGE_FALLTHRU
| EDGE_FALSE_VALUE
;
5533 split
->probability
= profile_probability::unlikely ().guessed ();
5534 edge latch_edge
= make_edge (bottom_bb
, head_bb
, EDGE_TRUE_VALUE
);
5535 latch_edge
->probability
= profile_probability::likely ().guessed ();
5539 gsi
= gsi_last_nondebug_bb (exit_bb
);
5540 gcc_assert (gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_RETURN
);
5541 loc
= gimple_location (gsi_stmt (gsi
));
5543 if (!gimple_in_ssa_p (cfun
))
5545 /* Insert the final value of V, in case it is live. This is the
5546 value for the only thread that survives past the join. */
5547 expr
= fold_build2 (MINUS_EXPR
, diff_type
, range
, dir
);
5548 expr
= fold_build2 (PLUS_EXPR
, diff_type
, expr
, s
);
5549 expr
= fold_build2 (TRUNC_DIV_EXPR
, diff_type
, expr
, s
);
5550 expr
= fold_build2 (MULT_EXPR
, diff_type
, expr
, s
);
5551 expr
= build2 (plus_code
, iter_type
, b
, fold_convert (plus_type
, expr
));
5552 expr
= force_gimple_operand_gsi (&gsi
, expr
, false, NULL_TREE
,
5553 true, GSI_SAME_STMT
);
5554 ass
= gimple_build_assign (v
, expr
);
5555 gsi_insert_before (&gsi
, ass
, GSI_SAME_STMT
);
5558 /* Remove the OMP_RETURN. */
5559 gsi_remove (&gsi
, true);
5563 /* We now have one, two or three nested loops. Update the loop
5565 struct loop
*parent
= entry_bb
->loop_father
;
5566 struct loop
*body
= body_bb
->loop_father
;
5570 struct loop
*chunk_loop
= alloc_loop ();
5571 chunk_loop
->header
= head_bb
;
5572 chunk_loop
->latch
= bottom_bb
;
5573 add_loop (chunk_loop
, parent
);
5574 parent
= chunk_loop
;
5576 else if (parent
!= body
)
5578 gcc_assert (body
->header
== body_bb
);
5579 gcc_assert (body
->latch
== cont_bb
5580 || single_pred (body
->latch
) == cont_bb
);
5586 struct loop
*body_loop
= alloc_loop ();
5587 body_loop
->header
= body_bb
;
5588 body_loop
->latch
= cont_bb
;
5589 add_loop (body_loop
, parent
);
5593 /* Insert tiling's element loop. */
5594 struct loop
*inner_loop
= alloc_loop ();
5595 inner_loop
->header
= elem_body_bb
;
5596 inner_loop
->latch
= elem_cont_bb
;
5597 add_loop (inner_loop
, body_loop
);
5603 /* Expand the OMP loop defined by REGION. */
5606 expand_omp_for (struct omp_region
*region
, gimple
*inner_stmt
)
5608 struct omp_for_data fd
;
5609 struct omp_for_data_loop
*loops
;
5612 = (struct omp_for_data_loop
*)
5613 alloca (gimple_omp_for_collapse (last_stmt (region
->entry
))
5614 * sizeof (struct omp_for_data_loop
));
5615 omp_extract_for_data (as_a
<gomp_for
*> (last_stmt (region
->entry
)),
5617 region
->sched_kind
= fd
.sched_kind
;
5618 region
->sched_modifiers
= fd
.sched_modifiers
;
5620 gcc_assert (EDGE_COUNT (region
->entry
->succs
) == 2);
5621 BRANCH_EDGE (region
->entry
)->flags
&= ~EDGE_ABNORMAL
;
5622 FALLTHRU_EDGE (region
->entry
)->flags
&= ~EDGE_ABNORMAL
;
5625 gcc_assert (EDGE_COUNT (region
->cont
->succs
) == 2);
5626 BRANCH_EDGE (region
->cont
)->flags
&= ~EDGE_ABNORMAL
;
5627 FALLTHRU_EDGE (region
->cont
)->flags
&= ~EDGE_ABNORMAL
;
5630 /* If there isn't a continue then this is a degerate case where
5631 the introduction of abnormal edges during lowering will prevent
5632 original loops from being detected. Fix that up. */
5633 loops_state_set (LOOPS_NEED_FIXUP
);
5635 if (gimple_omp_for_kind (fd
.for_stmt
) & GF_OMP_FOR_SIMD
)
5636 expand_omp_simd (region
, &fd
);
5637 else if (gimple_omp_for_kind (fd
.for_stmt
) == GF_OMP_FOR_KIND_OACC_LOOP
)
5639 gcc_assert (!inner_stmt
);
5640 expand_oacc_for (region
, &fd
);
5642 else if (gimple_omp_for_kind (fd
.for_stmt
) == GF_OMP_FOR_KIND_TASKLOOP
)
5644 if (gimple_omp_for_combined_into_p (fd
.for_stmt
))
5645 expand_omp_taskloop_for_inner (region
, &fd
, inner_stmt
);
5647 expand_omp_taskloop_for_outer (region
, &fd
, inner_stmt
);
5649 else if (fd
.sched_kind
== OMP_CLAUSE_SCHEDULE_STATIC
5650 && !fd
.have_ordered
)
5652 if (fd
.chunk_size
== NULL
)
5653 expand_omp_for_static_nochunk (region
, &fd
, inner_stmt
);
5655 expand_omp_for_static_chunk (region
, &fd
, inner_stmt
);
5659 int fn_index
, start_ix
, next_ix
;
5661 gcc_assert (gimple_omp_for_kind (fd
.for_stmt
)
5662 == GF_OMP_FOR_KIND_FOR
);
5663 if (fd
.chunk_size
== NULL
5664 && fd
.sched_kind
== OMP_CLAUSE_SCHEDULE_STATIC
)
5665 fd
.chunk_size
= integer_zero_node
;
5666 gcc_assert (fd
.sched_kind
!= OMP_CLAUSE_SCHEDULE_AUTO
);
5667 switch (fd
.sched_kind
)
5669 case OMP_CLAUSE_SCHEDULE_RUNTIME
:
5672 case OMP_CLAUSE_SCHEDULE_DYNAMIC
:
5673 case OMP_CLAUSE_SCHEDULE_GUIDED
:
5674 if ((fd
.sched_modifiers
& OMP_CLAUSE_SCHEDULE_NONMONOTONIC
)
5676 && !fd
.have_ordered
)
5678 fn_index
= 3 + fd
.sched_kind
;
5683 fn_index
= fd
.sched_kind
;
5687 fn_index
+= fd
.have_ordered
* 6;
5689 start_ix
= ((int)BUILT_IN_GOMP_LOOP_DOACROSS_STATIC_START
) + fn_index
;
5691 start_ix
= ((int)BUILT_IN_GOMP_LOOP_STATIC_START
) + fn_index
;
5692 next_ix
= ((int)BUILT_IN_GOMP_LOOP_STATIC_NEXT
) + fn_index
;
5693 if (fd
.iter_type
== long_long_unsigned_type_node
)
5695 start_ix
+= ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_START
5696 - (int)BUILT_IN_GOMP_LOOP_STATIC_START
);
5697 next_ix
+= ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT
5698 - (int)BUILT_IN_GOMP_LOOP_STATIC_NEXT
);
5700 expand_omp_for_generic (region
, &fd
, (enum built_in_function
) start_ix
,
5701 (enum built_in_function
) next_ix
, inner_stmt
);
5704 if (gimple_in_ssa_p (cfun
))
5705 update_ssa (TODO_update_ssa_only_virtuals
);
5708 /* Expand code for an OpenMP sections directive. In pseudo code, we generate
5710 v = GOMP_sections_start (n);
5727 v = GOMP_sections_next ();
5732 If this is a combined parallel sections, replace the call to
5733 GOMP_sections_start with call to GOMP_sections_next. */
5736 expand_omp_sections (struct omp_region
*region
)
5738 tree t
, u
, vin
= NULL
, vmain
, vnext
, l2
;
5740 basic_block entry_bb
, l0_bb
, l1_bb
, l2_bb
, default_bb
;
5741 gimple_stmt_iterator si
, switch_si
;
5742 gomp_sections
*sections_stmt
;
5744 gomp_continue
*cont
;
5747 struct omp_region
*inner
;
5749 bool exit_reachable
= region
->cont
!= NULL
;
5751 gcc_assert (region
->exit
!= NULL
);
5752 entry_bb
= region
->entry
;
5753 l0_bb
= single_succ (entry_bb
);
5754 l1_bb
= region
->cont
;
5755 l2_bb
= region
->exit
;
5756 if (single_pred_p (l2_bb
) && single_pred (l2_bb
) == l0_bb
)
5757 l2
= gimple_block_label (l2_bb
);
5760 /* This can happen if there are reductions. */
5761 len
= EDGE_COUNT (l0_bb
->succs
);
5762 gcc_assert (len
> 0);
5763 e
= EDGE_SUCC (l0_bb
, len
- 1);
5764 si
= gsi_last_nondebug_bb (e
->dest
);
5767 || gimple_code (gsi_stmt (si
)) != GIMPLE_OMP_SECTION
)
5768 l2
= gimple_block_label (e
->dest
);
5770 FOR_EACH_EDGE (e
, ei
, l0_bb
->succs
)
5772 si
= gsi_last_nondebug_bb (e
->dest
);
5774 || gimple_code (gsi_stmt (si
)) != GIMPLE_OMP_SECTION
)
5776 l2
= gimple_block_label (e
->dest
);
5782 default_bb
= create_empty_bb (l1_bb
->prev_bb
);
5784 default_bb
= create_empty_bb (l0_bb
);
5786 /* We will build a switch() with enough cases for all the
5787 GIMPLE_OMP_SECTION regions, a '0' case to handle the end of more work
5788 and a default case to abort if something goes wrong. */
5789 len
= EDGE_COUNT (l0_bb
->succs
);
5791 /* Use vec::quick_push on label_vec throughout, since we know the size
5793 auto_vec
<tree
> label_vec (len
);
5795 /* The call to GOMP_sections_start goes in ENTRY_BB, replacing the
5796 GIMPLE_OMP_SECTIONS statement. */
5797 si
= gsi_last_nondebug_bb (entry_bb
);
5798 sections_stmt
= as_a
<gomp_sections
*> (gsi_stmt (si
));
5799 gcc_assert (gimple_code (sections_stmt
) == GIMPLE_OMP_SECTIONS
);
5800 vin
= gimple_omp_sections_control (sections_stmt
);
5801 if (!is_combined_parallel (region
))
5803 /* If we are not inside a combined parallel+sections region,
5804 call GOMP_sections_start. */
5805 t
= build_int_cst (unsigned_type_node
, len
- 1);
5806 u
= builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_START
);
5807 stmt
= gimple_build_call (u
, 1, t
);
5811 /* Otherwise, call GOMP_sections_next. */
5812 u
= builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT
);
5813 stmt
= gimple_build_call (u
, 0);
5815 gimple_call_set_lhs (stmt
, vin
);
5816 gsi_insert_after (&si
, stmt
, GSI_SAME_STMT
);
5817 gsi_remove (&si
, true);
5819 /* The switch() statement replacing GIMPLE_OMP_SECTIONS_SWITCH goes in
5821 switch_si
= gsi_last_nondebug_bb (l0_bb
);
5822 gcc_assert (gimple_code (gsi_stmt (switch_si
)) == GIMPLE_OMP_SECTIONS_SWITCH
);
5825 cont
= as_a
<gomp_continue
*> (last_stmt (l1_bb
));
5826 gcc_assert (gimple_code (cont
) == GIMPLE_OMP_CONTINUE
);
5827 vmain
= gimple_omp_continue_control_use (cont
);
5828 vnext
= gimple_omp_continue_control_def (cont
);
5836 t
= build_case_label (build_int_cst (unsigned_type_node
, 0), NULL
, l2
);
5837 label_vec
.quick_push (t
);
5840 /* Convert each GIMPLE_OMP_SECTION into a CASE_LABEL_EXPR. */
5841 for (inner
= region
->inner
, casei
= 1;
5843 inner
= inner
->next
, i
++, casei
++)
5845 basic_block s_entry_bb
, s_exit_bb
;
5847 /* Skip optional reduction region. */
5848 if (inner
->type
== GIMPLE_OMP_ATOMIC_LOAD
)
5855 s_entry_bb
= inner
->entry
;
5856 s_exit_bb
= inner
->exit
;
5858 t
= gimple_block_label (s_entry_bb
);
5859 u
= build_int_cst (unsigned_type_node
, casei
);
5860 u
= build_case_label (u
, NULL
, t
);
5861 label_vec
.quick_push (u
);
5863 si
= gsi_last_nondebug_bb (s_entry_bb
);
5864 gcc_assert (gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_SECTION
);
5865 gcc_assert (i
< len
|| gimple_omp_section_last_p (gsi_stmt (si
)));
5866 gsi_remove (&si
, true);
5867 single_succ_edge (s_entry_bb
)->flags
= EDGE_FALLTHRU
;
5869 if (s_exit_bb
== NULL
)
5872 si
= gsi_last_nondebug_bb (s_exit_bb
);
5873 gcc_assert (gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_RETURN
);
5874 gsi_remove (&si
, true);
5876 single_succ_edge (s_exit_bb
)->flags
= EDGE_FALLTHRU
;
5879 /* Error handling code goes in DEFAULT_BB. */
5880 t
= gimple_block_label (default_bb
);
5881 u
= build_case_label (NULL
, NULL
, t
);
5882 make_edge (l0_bb
, default_bb
, 0);
5883 add_bb_to_loop (default_bb
, current_loops
->tree_root
);
5885 stmt
= gimple_build_switch (vmain
, u
, label_vec
);
5886 gsi_insert_after (&switch_si
, stmt
, GSI_SAME_STMT
);
5887 gsi_remove (&switch_si
, true);
5889 si
= gsi_start_bb (default_bb
);
5890 stmt
= gimple_build_call (builtin_decl_explicit (BUILT_IN_TRAP
), 0);
5891 gsi_insert_after (&si
, stmt
, GSI_CONTINUE_LINKING
);
5897 /* Code to get the next section goes in L1_BB. */
5898 si
= gsi_last_nondebug_bb (l1_bb
);
5899 gcc_assert (gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_CONTINUE
);
5901 bfn_decl
= builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT
);
5902 stmt
= gimple_build_call (bfn_decl
, 0);
5903 gimple_call_set_lhs (stmt
, vnext
);
5904 gsi_insert_after (&si
, stmt
, GSI_SAME_STMT
);
5905 gsi_remove (&si
, true);
5907 single_succ_edge (l1_bb
)->flags
= EDGE_FALLTHRU
;
5910 /* Cleanup function replaces GIMPLE_OMP_RETURN in EXIT_BB. */
5911 si
= gsi_last_nondebug_bb (l2_bb
);
5912 if (gimple_omp_return_nowait_p (gsi_stmt (si
)))
5913 t
= builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_NOWAIT
);
5914 else if (gimple_omp_return_lhs (gsi_stmt (si
)))
5915 t
= builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_CANCEL
);
5917 t
= builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END
);
5918 stmt
= gimple_build_call (t
, 0);
5919 if (gimple_omp_return_lhs (gsi_stmt (si
)))
5920 gimple_call_set_lhs (stmt
, gimple_omp_return_lhs (gsi_stmt (si
)));
5921 gsi_insert_after (&si
, stmt
, GSI_SAME_STMT
);
5922 gsi_remove (&si
, true);
5924 set_immediate_dominator (CDI_DOMINATORS
, default_bb
, l0_bb
);
5927 /* Expand code for an OpenMP single directive. We've already expanded
5928 much of the code, here we simply place the GOMP_barrier call. */
5931 expand_omp_single (struct omp_region
*region
)
5933 basic_block entry_bb
, exit_bb
;
5934 gimple_stmt_iterator si
;
5936 entry_bb
= region
->entry
;
5937 exit_bb
= region
->exit
;
5939 si
= gsi_last_nondebug_bb (entry_bb
);
5940 gcc_assert (gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_SINGLE
);
5941 gsi_remove (&si
, true);
5942 single_succ_edge (entry_bb
)->flags
= EDGE_FALLTHRU
;
5944 si
= gsi_last_nondebug_bb (exit_bb
);
5945 if (!gimple_omp_return_nowait_p (gsi_stmt (si
)))
5947 tree t
= gimple_omp_return_lhs (gsi_stmt (si
));
5948 gsi_insert_after (&si
, omp_build_barrier (t
), GSI_SAME_STMT
);
5950 gsi_remove (&si
, true);
5951 single_succ_edge (exit_bb
)->flags
= EDGE_FALLTHRU
;
5954 /* Generic expansion for OpenMP synchronization directives: master,
5955 ordered and critical. All we need to do here is remove the entry
5956 and exit markers for REGION. */
5959 expand_omp_synch (struct omp_region
*region
)
5961 basic_block entry_bb
, exit_bb
;
5962 gimple_stmt_iterator si
;
5964 entry_bb
= region
->entry
;
5965 exit_bb
= region
->exit
;
5967 si
= gsi_last_nondebug_bb (entry_bb
);
5968 gcc_assert (gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_SINGLE
5969 || gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_MASTER
5970 || gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_TASKGROUP
5971 || gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_ORDERED
5972 || gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_CRITICAL
5973 || gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_TEAMS
);
5974 gsi_remove (&si
, true);
5975 single_succ_edge (entry_bb
)->flags
= EDGE_FALLTHRU
;
5979 si
= gsi_last_nondebug_bb (exit_bb
);
5980 gcc_assert (gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_RETURN
);
5981 gsi_remove (&si
, true);
5982 single_succ_edge (exit_bb
)->flags
= EDGE_FALLTHRU
;
5986 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
5987 operation as a normal volatile load. */
5990 expand_omp_atomic_load (basic_block load_bb
, tree addr
,
5991 tree loaded_val
, int index
)
5993 enum built_in_function tmpbase
;
5994 gimple_stmt_iterator gsi
;
5995 basic_block store_bb
;
5998 tree decl
, call
, type
, itype
;
6000 gsi
= gsi_last_nondebug_bb (load_bb
);
6001 stmt
= gsi_stmt (gsi
);
6002 gcc_assert (gimple_code (stmt
) == GIMPLE_OMP_ATOMIC_LOAD
);
6003 loc
= gimple_location (stmt
);
6005 /* ??? If the target does not implement atomic_load_optab[mode], and mode
6006 is smaller than word size, then expand_atomic_load assumes that the load
6007 is atomic. We could avoid the builtin entirely in this case. */
6009 tmpbase
= (enum built_in_function
) (BUILT_IN_ATOMIC_LOAD_N
+ index
+ 1);
6010 decl
= builtin_decl_explicit (tmpbase
);
6011 if (decl
== NULL_TREE
)
6014 type
= TREE_TYPE (loaded_val
);
6015 itype
= TREE_TYPE (TREE_TYPE (decl
));
6017 call
= build_call_expr_loc (loc
, decl
, 2, addr
,
6018 build_int_cst (NULL
,
6019 gimple_omp_atomic_seq_cst_p (stmt
)
6021 : MEMMODEL_RELAXED
));
6022 if (!useless_type_conversion_p (type
, itype
))
6023 call
= fold_build1_loc (loc
, VIEW_CONVERT_EXPR
, type
, call
);
6024 call
= build2_loc (loc
, MODIFY_EXPR
, void_type_node
, loaded_val
, call
);
6026 force_gimple_operand_gsi (&gsi
, call
, true, NULL_TREE
, true, GSI_SAME_STMT
);
6027 gsi_remove (&gsi
, true);
6029 store_bb
= single_succ (load_bb
);
6030 gsi
= gsi_last_nondebug_bb (store_bb
);
6031 gcc_assert (gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_ATOMIC_STORE
);
6032 gsi_remove (&gsi
, true);
6034 if (gimple_in_ssa_p (cfun
))
6035 update_ssa (TODO_update_ssa_no_phi
);
6040 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
6041 operation as a normal volatile store. */
6044 expand_omp_atomic_store (basic_block load_bb
, tree addr
,
6045 tree loaded_val
, tree stored_val
, int index
)
6047 enum built_in_function tmpbase
;
6048 gimple_stmt_iterator gsi
;
6049 basic_block store_bb
= single_succ (load_bb
);
6052 tree decl
, call
, type
, itype
;
6056 gsi
= gsi_last_nondebug_bb (load_bb
);
6057 stmt
= gsi_stmt (gsi
);
6058 gcc_assert (gimple_code (stmt
) == GIMPLE_OMP_ATOMIC_LOAD
);
6060 /* If the load value is needed, then this isn't a store but an exchange. */
6061 exchange
= gimple_omp_atomic_need_value_p (stmt
);
6063 gsi
= gsi_last_nondebug_bb (store_bb
);
6064 stmt
= gsi_stmt (gsi
);
6065 gcc_assert (gimple_code (stmt
) == GIMPLE_OMP_ATOMIC_STORE
);
6066 loc
= gimple_location (stmt
);
6068 /* ??? If the target does not implement atomic_store_optab[mode], and mode
6069 is smaller than word size, then expand_atomic_store assumes that the store
6070 is atomic. We could avoid the builtin entirely in this case. */
6072 tmpbase
= (exchange
? BUILT_IN_ATOMIC_EXCHANGE_N
: BUILT_IN_ATOMIC_STORE_N
);
6073 tmpbase
= (enum built_in_function
) ((int) tmpbase
+ index
+ 1);
6074 decl
= builtin_decl_explicit (tmpbase
);
6075 if (decl
== NULL_TREE
)
6078 type
= TREE_TYPE (stored_val
);
6080 /* Dig out the type of the function's second argument. */
6081 itype
= TREE_TYPE (decl
);
6082 itype
= TYPE_ARG_TYPES (itype
);
6083 itype
= TREE_CHAIN (itype
);
6084 itype
= TREE_VALUE (itype
);
6085 imode
= TYPE_MODE (itype
);
6087 if (exchange
&& !can_atomic_exchange_p (imode
, true))
6090 if (!useless_type_conversion_p (itype
, type
))
6091 stored_val
= fold_build1_loc (loc
, VIEW_CONVERT_EXPR
, itype
, stored_val
);
6092 call
= build_call_expr_loc (loc
, decl
, 3, addr
, stored_val
,
6093 build_int_cst (NULL
,
6094 gimple_omp_atomic_seq_cst_p (stmt
)
6096 : MEMMODEL_RELAXED
));
6099 if (!useless_type_conversion_p (type
, itype
))
6100 call
= build1_loc (loc
, VIEW_CONVERT_EXPR
, type
, call
);
6101 call
= build2_loc (loc
, MODIFY_EXPR
, void_type_node
, loaded_val
, call
);
6104 force_gimple_operand_gsi (&gsi
, call
, true, NULL_TREE
, true, GSI_SAME_STMT
);
6105 gsi_remove (&gsi
, true);
6107 /* Remove the GIMPLE_OMP_ATOMIC_LOAD that we verified above. */
6108 gsi
= gsi_last_nondebug_bb (load_bb
);
6109 gsi_remove (&gsi
, true);
6111 if (gimple_in_ssa_p (cfun
))
6112 update_ssa (TODO_update_ssa_no_phi
);
6117 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
6118 operation as a __atomic_fetch_op builtin. INDEX is log2 of the
6119 size of the data type, and thus usable to find the index of the builtin
6120 decl. Returns false if the expression is not of the proper form. */
6123 expand_omp_atomic_fetch_op (basic_block load_bb
,
6124 tree addr
, tree loaded_val
,
6125 tree stored_val
, int index
)
6127 enum built_in_function oldbase
, newbase
, tmpbase
;
6128 tree decl
, itype
, call
;
6130 basic_block store_bb
= single_succ (load_bb
);
6131 gimple_stmt_iterator gsi
;
6134 enum tree_code code
;
6135 bool need_old
, need_new
;
6139 /* We expect to find the following sequences:
6142 GIMPLE_OMP_ATOMIC_LOAD (tmp, mem)
6145 val = tmp OP something; (or: something OP tmp)
6146 GIMPLE_OMP_STORE (val)
6148 ???FIXME: Allow a more flexible sequence.
6149 Perhaps use data flow to pick the statements.
6153 gsi
= gsi_after_labels (store_bb
);
6154 stmt
= gsi_stmt (gsi
);
6155 if (is_gimple_debug (stmt
))
6157 gsi_next_nondebug (&gsi
);
6158 if (gsi_end_p (gsi
))
6160 stmt
= gsi_stmt (gsi
);
6162 loc
= gimple_location (stmt
);
6163 if (!is_gimple_assign (stmt
))
6165 gsi_next_nondebug (&gsi
);
6166 if (gimple_code (gsi_stmt (gsi
)) != GIMPLE_OMP_ATOMIC_STORE
)
6168 need_new
= gimple_omp_atomic_need_value_p (gsi_stmt (gsi
));
6169 need_old
= gimple_omp_atomic_need_value_p (last_stmt (load_bb
));
6170 seq_cst
= gimple_omp_atomic_seq_cst_p (last_stmt (load_bb
));
6171 gcc_checking_assert (!need_old
|| !need_new
);
6173 if (!operand_equal_p (gimple_assign_lhs (stmt
), stored_val
, 0))
6176 /* Check for one of the supported fetch-op operations. */
6177 code
= gimple_assign_rhs_code (stmt
);
6181 case POINTER_PLUS_EXPR
:
6182 oldbase
= BUILT_IN_ATOMIC_FETCH_ADD_N
;
6183 newbase
= BUILT_IN_ATOMIC_ADD_FETCH_N
;
6186 oldbase
= BUILT_IN_ATOMIC_FETCH_SUB_N
;
6187 newbase
= BUILT_IN_ATOMIC_SUB_FETCH_N
;
6190 oldbase
= BUILT_IN_ATOMIC_FETCH_AND_N
;
6191 newbase
= BUILT_IN_ATOMIC_AND_FETCH_N
;
6194 oldbase
= BUILT_IN_ATOMIC_FETCH_OR_N
;
6195 newbase
= BUILT_IN_ATOMIC_OR_FETCH_N
;
6198 oldbase
= BUILT_IN_ATOMIC_FETCH_XOR_N
;
6199 newbase
= BUILT_IN_ATOMIC_XOR_FETCH_N
;
6205 /* Make sure the expression is of the proper form. */
6206 if (operand_equal_p (gimple_assign_rhs1 (stmt
), loaded_val
, 0))
6207 rhs
= gimple_assign_rhs2 (stmt
);
6208 else if (commutative_tree_code (gimple_assign_rhs_code (stmt
))
6209 && operand_equal_p (gimple_assign_rhs2 (stmt
), loaded_val
, 0))
6210 rhs
= gimple_assign_rhs1 (stmt
);
6214 tmpbase
= ((enum built_in_function
)
6215 ((need_new
? newbase
: oldbase
) + index
+ 1));
6216 decl
= builtin_decl_explicit (tmpbase
);
6217 if (decl
== NULL_TREE
)
6219 itype
= TREE_TYPE (TREE_TYPE (decl
));
6220 imode
= TYPE_MODE (itype
);
6222 /* We could test all of the various optabs involved, but the fact of the
6223 matter is that (with the exception of i486 vs i586 and xadd) all targets
6224 that support any atomic operaton optab also implements compare-and-swap.
6225 Let optabs.c take care of expanding any compare-and-swap loop. */
6226 if (!can_compare_and_swap_p (imode
, true) || !can_atomic_load_p (imode
))
6229 gsi
= gsi_last_nondebug_bb (load_bb
);
6230 gcc_assert (gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_ATOMIC_LOAD
);
6232 /* OpenMP does not imply any barrier-like semantics on its atomic ops.
6233 It only requires that the operation happen atomically. Thus we can
6234 use the RELAXED memory model. */
6235 call
= build_call_expr_loc (loc
, decl
, 3, addr
,
6236 fold_convert_loc (loc
, itype
, rhs
),
6237 build_int_cst (NULL
,
6238 seq_cst
? MEMMODEL_SEQ_CST
6239 : MEMMODEL_RELAXED
));
6241 if (need_old
|| need_new
)
6243 lhs
= need_old
? loaded_val
: stored_val
;
6244 call
= fold_convert_loc (loc
, TREE_TYPE (lhs
), call
);
6245 call
= build2_loc (loc
, MODIFY_EXPR
, void_type_node
, lhs
, call
);
6248 call
= fold_convert_loc (loc
, void_type_node
, call
);
6249 force_gimple_operand_gsi (&gsi
, call
, true, NULL_TREE
, true, GSI_SAME_STMT
);
6250 gsi_remove (&gsi
, true);
6252 gsi
= gsi_last_nondebug_bb (store_bb
);
6253 gcc_assert (gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_ATOMIC_STORE
);
6254 gsi_remove (&gsi
, true);
6255 gsi
= gsi_last_nondebug_bb (store_bb
);
6256 stmt
= gsi_stmt (gsi
);
6257 gsi_remove (&gsi
, true);
6259 if (gimple_in_ssa_p (cfun
))
6261 release_defs (stmt
);
6262 update_ssa (TODO_update_ssa_no_phi
);
6268 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
6272 newval = rhs; // with oldval replacing *addr in rhs
6273 oldval = __sync_val_compare_and_swap (addr, oldval, newval);
6274 if (oldval != newval)
6277 INDEX is log2 of the size of the data type, and thus usable to find the
6278 index of the builtin decl. */
6281 expand_omp_atomic_pipeline (basic_block load_bb
, basic_block store_bb
,
6282 tree addr
, tree loaded_val
, tree stored_val
,
6285 tree loadedi
, storedi
, initial
, new_storedi
, old_vali
;
6286 tree type
, itype
, cmpxchg
, iaddr
;
6287 gimple_stmt_iterator si
;
6288 basic_block loop_header
= single_succ (load_bb
);
6291 enum built_in_function fncode
;
6293 /* ??? We need a non-pointer interface to __atomic_compare_exchange in
6294 order to use the RELAXED memory model effectively. */
6295 fncode
= (enum built_in_function
)((int)BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
6297 cmpxchg
= builtin_decl_explicit (fncode
);
6298 if (cmpxchg
== NULL_TREE
)
6300 type
= TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (addr
)));
6301 itype
= TREE_TYPE (TREE_TYPE (cmpxchg
));
6303 if (!can_compare_and_swap_p (TYPE_MODE (itype
), true)
6304 || !can_atomic_load_p (TYPE_MODE (itype
)))
6307 /* Load the initial value, replacing the GIMPLE_OMP_ATOMIC_LOAD. */
6308 si
= gsi_last_nondebug_bb (load_bb
);
6309 gcc_assert (gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_ATOMIC_LOAD
);
6311 /* For floating-point values, we'll need to view-convert them to integers
6312 so that we can perform the atomic compare and swap. Simplify the
6313 following code by always setting up the "i"ntegral variables. */
6314 if (!INTEGRAL_TYPE_P (type
) && !POINTER_TYPE_P (type
))
6318 iaddr
= create_tmp_reg (build_pointer_type_for_mode (itype
, ptr_mode
,
6321 = force_gimple_operand_gsi (&si
,
6322 fold_convert (TREE_TYPE (iaddr
), addr
),
6323 false, NULL_TREE
, true, GSI_SAME_STMT
);
6324 stmt
= gimple_build_assign (iaddr
, iaddr_val
);
6325 gsi_insert_before (&si
, stmt
, GSI_SAME_STMT
);
6326 loadedi
= create_tmp_var (itype
);
6327 if (gimple_in_ssa_p (cfun
))
6328 loadedi
= make_ssa_name (loadedi
);
6333 loadedi
= loaded_val
;
6336 fncode
= (enum built_in_function
) (BUILT_IN_ATOMIC_LOAD_N
+ index
+ 1);
6337 tree loaddecl
= builtin_decl_explicit (fncode
);
6340 = fold_convert (TREE_TYPE (TREE_TYPE (iaddr
)),
6341 build_call_expr (loaddecl
, 2, iaddr
,
6342 build_int_cst (NULL_TREE
,
6343 MEMMODEL_RELAXED
)));
6345 initial
= build2 (MEM_REF
, TREE_TYPE (TREE_TYPE (iaddr
)), iaddr
,
6346 build_int_cst (TREE_TYPE (iaddr
), 0));
6349 = force_gimple_operand_gsi (&si
, initial
, true, NULL_TREE
, true,
6352 /* Move the value to the LOADEDI temporary. */
6353 if (gimple_in_ssa_p (cfun
))
6355 gcc_assert (gimple_seq_empty_p (phi_nodes (loop_header
)));
6356 phi
= create_phi_node (loadedi
, loop_header
);
6357 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi
, single_succ_edge (load_bb
)),
6361 gsi_insert_before (&si
,
6362 gimple_build_assign (loadedi
, initial
),
6364 if (loadedi
!= loaded_val
)
6366 gimple_stmt_iterator gsi2
;
6369 x
= build1 (VIEW_CONVERT_EXPR
, type
, loadedi
);
6370 gsi2
= gsi_start_bb (loop_header
);
6371 if (gimple_in_ssa_p (cfun
))
6374 x
= force_gimple_operand_gsi (&gsi2
, x
, true, NULL_TREE
,
6375 true, GSI_SAME_STMT
);
6376 stmt
= gimple_build_assign (loaded_val
, x
);
6377 gsi_insert_before (&gsi2
, stmt
, GSI_SAME_STMT
);
6381 x
= build2 (MODIFY_EXPR
, TREE_TYPE (loaded_val
), loaded_val
, x
);
6382 force_gimple_operand_gsi (&gsi2
, x
, true, NULL_TREE
,
6383 true, GSI_SAME_STMT
);
6386 gsi_remove (&si
, true);
6388 si
= gsi_last_nondebug_bb (store_bb
);
6389 gcc_assert (gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_ATOMIC_STORE
);
6392 storedi
= stored_val
;
6395 = force_gimple_operand_gsi (&si
,
6396 build1 (VIEW_CONVERT_EXPR
, itype
,
6397 stored_val
), true, NULL_TREE
, true,
6400 /* Build the compare&swap statement. */
6401 new_storedi
= build_call_expr (cmpxchg
, 3, iaddr
, loadedi
, storedi
);
6402 new_storedi
= force_gimple_operand_gsi (&si
,
6403 fold_convert (TREE_TYPE (loadedi
),
6406 true, GSI_SAME_STMT
);
6408 if (gimple_in_ssa_p (cfun
))
6412 old_vali
= create_tmp_var (TREE_TYPE (loadedi
));
6413 stmt
= gimple_build_assign (old_vali
, loadedi
);
6414 gsi_insert_before (&si
, stmt
, GSI_SAME_STMT
);
6416 stmt
= gimple_build_assign (loadedi
, new_storedi
);
6417 gsi_insert_before (&si
, stmt
, GSI_SAME_STMT
);
6420 /* Note that we always perform the comparison as an integer, even for
6421 floating point. This allows the atomic operation to properly
6422 succeed even with NaNs and -0.0. */
6423 tree ne
= build2 (NE_EXPR
, boolean_type_node
, new_storedi
, old_vali
);
6424 stmt
= gimple_build_cond_empty (ne
);
6425 gsi_insert_before (&si
, stmt
, GSI_SAME_STMT
);
6428 e
= single_succ_edge (store_bb
);
6429 e
->flags
&= ~EDGE_FALLTHRU
;
6430 e
->flags
|= EDGE_FALSE_VALUE
;
6431 /* Expect no looping. */
6432 e
->probability
= profile_probability::guessed_always ();
6434 e
= make_edge (store_bb
, loop_header
, EDGE_TRUE_VALUE
);
6435 e
->probability
= profile_probability::guessed_never ();
6437 /* Copy the new value to loadedi (we already did that before the condition
6438 if we are not in SSA). */
6439 if (gimple_in_ssa_p (cfun
))
6441 phi
= gimple_seq_first_stmt (phi_nodes (loop_header
));
6442 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi
, e
), new_storedi
);
6445 /* Remove GIMPLE_OMP_ATOMIC_STORE. */
6446 gsi_remove (&si
, true);
6448 struct loop
*loop
= alloc_loop ();
6449 loop
->header
= loop_header
;
6450 loop
->latch
= store_bb
;
6451 add_loop (loop
, loop_header
->loop_father
);
6453 if (gimple_in_ssa_p (cfun
))
6454 update_ssa (TODO_update_ssa_no_phi
);
6459 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
6461 GOMP_atomic_start ();
6465 The result is not globally atomic, but works so long as all parallel
6466 references are within #pragma omp atomic directives. According to
6467 responses received from omp@openmp.org, appears to be within spec.
6468 Which makes sense, since that's how several other compilers handle
6469 this situation as well.
6470 LOADED_VAL and ADDR are the operands of GIMPLE_OMP_ATOMIC_LOAD we're
6471 expanding. STORED_VAL is the operand of the matching
6472 GIMPLE_OMP_ATOMIC_STORE.
6475 GIMPLE_OMP_ATOMIC_LOAD (loaded_val, addr) with
6479 GIMPLE_OMP_ATOMIC_STORE (stored_val) with
6484 expand_omp_atomic_mutex (basic_block load_bb
, basic_block store_bb
,
6485 tree addr
, tree loaded_val
, tree stored_val
)
6487 gimple_stmt_iterator si
;
6491 si
= gsi_last_nondebug_bb (load_bb
);
6492 gcc_assert (gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_ATOMIC_LOAD
);
6494 t
= builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_START
);
6495 t
= build_call_expr (t
, 0);
6496 force_gimple_operand_gsi (&si
, t
, true, NULL_TREE
, true, GSI_SAME_STMT
);
6498 stmt
= gimple_build_assign (loaded_val
, build_simple_mem_ref (addr
));
6499 gsi_insert_before (&si
, stmt
, GSI_SAME_STMT
);
6500 gsi_remove (&si
, true);
6502 si
= gsi_last_nondebug_bb (store_bb
);
6503 gcc_assert (gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_ATOMIC_STORE
);
6505 stmt
= gimple_build_assign (build_simple_mem_ref (unshare_expr (addr
)),
6507 gsi_insert_before (&si
, stmt
, GSI_SAME_STMT
);
6509 t
= builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_END
);
6510 t
= build_call_expr (t
, 0);
6511 force_gimple_operand_gsi (&si
, t
, true, NULL_TREE
, true, GSI_SAME_STMT
);
6512 gsi_remove (&si
, true);
6514 if (gimple_in_ssa_p (cfun
))
6515 update_ssa (TODO_update_ssa_no_phi
);
6519 /* Expand an GIMPLE_OMP_ATOMIC statement. We try to expand
6520 using expand_omp_atomic_fetch_op. If it failed, we try to
6521 call expand_omp_atomic_pipeline, and if it fails too, the
6522 ultimate fallback is wrapping the operation in a mutex
6523 (expand_omp_atomic_mutex). REGION is the atomic region built
6524 by build_omp_regions_1(). */
6527 expand_omp_atomic (struct omp_region
*region
)
6529 basic_block load_bb
= region
->entry
, store_bb
= region
->exit
;
6530 gomp_atomic_load
*load
= as_a
<gomp_atomic_load
*> (last_stmt (load_bb
));
6531 gomp_atomic_store
*store
= as_a
<gomp_atomic_store
*> (last_stmt (store_bb
));
6532 tree loaded_val
= gimple_omp_atomic_load_lhs (load
);
6533 tree addr
= gimple_omp_atomic_load_rhs (load
);
6534 tree stored_val
= gimple_omp_atomic_store_val (store
);
6535 tree type
= TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (addr
)));
6536 HOST_WIDE_INT index
;
6538 /* Make sure the type is one of the supported sizes. */
6539 index
= tree_to_uhwi (TYPE_SIZE_UNIT (type
));
6540 index
= exact_log2 (index
);
6541 if (index
>= 0 && index
<= 4)
6543 unsigned int align
= TYPE_ALIGN_UNIT (type
);
6545 /* __sync builtins require strict data alignment. */
6546 if (exact_log2 (align
) >= index
)
6550 if (loaded_val
== stored_val
6551 && (is_int_mode (TYPE_MODE (type
), &smode
)
6552 || is_float_mode (TYPE_MODE (type
), &smode
))
6553 && GET_MODE_BITSIZE (smode
) <= BITS_PER_WORD
6554 && expand_omp_atomic_load (load_bb
, addr
, loaded_val
, index
))
6558 if ((is_int_mode (TYPE_MODE (type
), &smode
)
6559 || is_float_mode (TYPE_MODE (type
), &smode
))
6560 && GET_MODE_BITSIZE (smode
) <= BITS_PER_WORD
6561 && store_bb
== single_succ (load_bb
)
6562 && first_stmt (store_bb
) == store
6563 && expand_omp_atomic_store (load_bb
, addr
, loaded_val
,
6567 /* When possible, use specialized atomic update functions. */
6568 if ((INTEGRAL_TYPE_P (type
) || POINTER_TYPE_P (type
))
6569 && store_bb
== single_succ (load_bb
)
6570 && expand_omp_atomic_fetch_op (load_bb
, addr
,
6571 loaded_val
, stored_val
, index
))
6574 /* If we don't have specialized __sync builtins, try and implement
6575 as a compare and swap loop. */
6576 if (expand_omp_atomic_pipeline (load_bb
, store_bb
, addr
,
6577 loaded_val
, stored_val
, index
))
6582 /* The ultimate fallback is wrapping the operation in a mutex. */
6583 expand_omp_atomic_mutex (load_bb
, store_bb
, addr
, loaded_val
, stored_val
);
6586 /* Mark the loops inside the kernels region starting at REGION_ENTRY and ending
6590 mark_loops_in_oacc_kernels_region (basic_block region_entry
,
6591 basic_block region_exit
)
6593 struct loop
*outer
= region_entry
->loop_father
;
6594 gcc_assert (region_exit
== NULL
|| outer
== region_exit
->loop_father
);
6596 /* Don't parallelize the kernels region if it contains more than one outer
6598 unsigned int nr_outer_loops
= 0;
6599 struct loop
*single_outer
= NULL
;
6600 for (struct loop
*loop
= outer
->inner
; loop
!= NULL
; loop
= loop
->next
)
6602 gcc_assert (loop_outer (loop
) == outer
);
6604 if (!dominated_by_p (CDI_DOMINATORS
, loop
->header
, region_entry
))
6607 if (region_exit
!= NULL
6608 && dominated_by_p (CDI_DOMINATORS
, loop
->header
, region_exit
))
6612 single_outer
= loop
;
6614 if (nr_outer_loops
!= 1)
6617 for (struct loop
*loop
= single_outer
->inner
;
6623 /* Mark the loops in the region. */
6624 for (struct loop
*loop
= single_outer
; loop
!= NULL
; loop
= loop
->inner
)
6625 loop
->in_oacc_kernels_region
= true;
6628 /* Types used to pass grid and wortkgroup sizes to kernel invocation. */
6630 struct GTY(()) grid_launch_attributes_trees
6632 tree kernel_dim_array_type
;
6633 tree kernel_lattrs_dimnum_decl
;
6634 tree kernel_lattrs_grid_decl
;
6635 tree kernel_lattrs_group_decl
;
6636 tree kernel_launch_attributes_type
;
6639 static GTY(()) struct grid_launch_attributes_trees
*grid_attr_trees
;
6641 /* Create types used to pass kernel launch attributes to target. */
6644 grid_create_kernel_launch_attr_types (void)
6646 if (grid_attr_trees
)
6648 grid_attr_trees
= ggc_alloc
<grid_launch_attributes_trees
> ();
6650 tree dim_arr_index_type
6651 = build_index_type (build_int_cst (integer_type_node
, 2));
6652 grid_attr_trees
->kernel_dim_array_type
6653 = build_array_type (uint32_type_node
, dim_arr_index_type
);
6655 grid_attr_trees
->kernel_launch_attributes_type
= make_node (RECORD_TYPE
);
6656 grid_attr_trees
->kernel_lattrs_dimnum_decl
6657 = build_decl (BUILTINS_LOCATION
, FIELD_DECL
, get_identifier ("ndim"),
6659 DECL_CHAIN (grid_attr_trees
->kernel_lattrs_dimnum_decl
) = NULL_TREE
;
6661 grid_attr_trees
->kernel_lattrs_grid_decl
6662 = build_decl (BUILTINS_LOCATION
, FIELD_DECL
, get_identifier ("grid_size"),
6663 grid_attr_trees
->kernel_dim_array_type
);
6664 DECL_CHAIN (grid_attr_trees
->kernel_lattrs_grid_decl
)
6665 = grid_attr_trees
->kernel_lattrs_dimnum_decl
;
6666 grid_attr_trees
->kernel_lattrs_group_decl
6667 = build_decl (BUILTINS_LOCATION
, FIELD_DECL
, get_identifier ("group_size"),
6668 grid_attr_trees
->kernel_dim_array_type
);
6669 DECL_CHAIN (grid_attr_trees
->kernel_lattrs_group_decl
)
6670 = grid_attr_trees
->kernel_lattrs_grid_decl
;
6671 finish_builtin_struct (grid_attr_trees
->kernel_launch_attributes_type
,
6672 "__gomp_kernel_launch_attributes",
6673 grid_attr_trees
->kernel_lattrs_group_decl
, NULL_TREE
);
6676 /* Insert before the current statement in GSI a store of VALUE to INDEX of
6677 array (of type kernel_dim_array_type) FLD_DECL of RANGE_VAR. VALUE must be
6678 of type uint32_type_node. */
6681 grid_insert_store_range_dim (gimple_stmt_iterator
*gsi
, tree range_var
,
6682 tree fld_decl
, int index
, tree value
)
6684 tree ref
= build4 (ARRAY_REF
, uint32_type_node
,
6685 build3 (COMPONENT_REF
,
6686 grid_attr_trees
->kernel_dim_array_type
,
6687 range_var
, fld_decl
, NULL_TREE
),
6688 build_int_cst (integer_type_node
, index
),
6689 NULL_TREE
, NULL_TREE
);
6690 gsi_insert_before (gsi
, gimple_build_assign (ref
, value
), GSI_SAME_STMT
);
6693 /* Return a tree representation of a pointer to a structure with grid and
6694 work-group size information. Statements filling that information will be
6695 inserted before GSI, TGT_STMT is the target statement which has the
6696 necessary information in it. */
6699 grid_get_kernel_launch_attributes (gimple_stmt_iterator
*gsi
,
6700 gomp_target
*tgt_stmt
)
6702 grid_create_kernel_launch_attr_types ();
6703 tree lattrs
= create_tmp_var (grid_attr_trees
->kernel_launch_attributes_type
,
6704 "__kernel_launch_attrs");
6706 unsigned max_dim
= 0;
6707 for (tree clause
= gimple_omp_target_clauses (tgt_stmt
);
6709 clause
= OMP_CLAUSE_CHAIN (clause
))
6711 if (OMP_CLAUSE_CODE (clause
) != OMP_CLAUSE__GRIDDIM_
)
6714 unsigned dim
= OMP_CLAUSE__GRIDDIM__DIMENSION (clause
);
6715 max_dim
= MAX (dim
, max_dim
);
6717 grid_insert_store_range_dim (gsi
, lattrs
,
6718 grid_attr_trees
->kernel_lattrs_grid_decl
,
6719 dim
, OMP_CLAUSE__GRIDDIM__SIZE (clause
));
6720 grid_insert_store_range_dim (gsi
, lattrs
,
6721 grid_attr_trees
->kernel_lattrs_group_decl
,
6722 dim
, OMP_CLAUSE__GRIDDIM__GROUP (clause
));
6725 tree dimref
= build3 (COMPONENT_REF
, uint32_type_node
, lattrs
,
6726 grid_attr_trees
->kernel_lattrs_dimnum_decl
, NULL_TREE
);
6727 gcc_checking_assert (max_dim
<= 2);
6728 tree dimensions
= build_int_cstu (uint32_type_node
, max_dim
+ 1);
6729 gsi_insert_before (gsi
, gimple_build_assign (dimref
, dimensions
),
6731 TREE_ADDRESSABLE (lattrs
) = 1;
6732 return build_fold_addr_expr (lattrs
);
6735 /* Build target argument identifier from the DEVICE identifier, value
6736 identifier ID and whether the element also has a SUBSEQUENT_PARAM. */
6739 get_target_argument_identifier_1 (int device
, bool subseqent_param
, int id
)
6741 tree t
= build_int_cst (integer_type_node
, device
);
6742 if (subseqent_param
)
6743 t
= fold_build2 (BIT_IOR_EXPR
, integer_type_node
, t
,
6744 build_int_cst (integer_type_node
,
6745 GOMP_TARGET_ARG_SUBSEQUENT_PARAM
));
6746 t
= fold_build2 (BIT_IOR_EXPR
, integer_type_node
, t
,
6747 build_int_cst (integer_type_node
, id
));
6751 /* Like above but return it in type that can be directly stored as an element
6752 of the argument array. */
6755 get_target_argument_identifier (int device
, bool subseqent_param
, int id
)
6757 tree t
= get_target_argument_identifier_1 (device
, subseqent_param
, id
);
6758 return fold_convert (ptr_type_node
, t
);
6761 /* Return a target argument consisting of DEVICE identifier, value identifier
6762 ID, and the actual VALUE. */
6765 get_target_argument_value (gimple_stmt_iterator
*gsi
, int device
, int id
,
6768 tree t
= fold_build2 (LSHIFT_EXPR
, integer_type_node
,
6769 fold_convert (integer_type_node
, value
),
6770 build_int_cst (unsigned_type_node
,
6771 GOMP_TARGET_ARG_VALUE_SHIFT
));
6772 t
= fold_build2 (BIT_IOR_EXPR
, integer_type_node
, t
,
6773 get_target_argument_identifier_1 (device
, false, id
));
6774 t
= fold_convert (ptr_type_node
, t
);
6775 return force_gimple_operand_gsi (gsi
, t
, true, NULL
, true, GSI_SAME_STMT
);
6778 /* If VALUE is an integer constant greater than -2^15 and smaller than 2^15,
6779 push one argument to ARGS with both the DEVICE, ID and VALUE embedded in it,
6780 otherwise push an identifier (with DEVICE and ID) and the VALUE in two
6784 push_target_argument_according_to_value (gimple_stmt_iterator
*gsi
, int device
,
6785 int id
, tree value
, vec
<tree
> *args
)
6787 if (tree_fits_shwi_p (value
)
6788 && tree_to_shwi (value
) > -(1 << 15)
6789 && tree_to_shwi (value
) < (1 << 15))
6790 args
->quick_push (get_target_argument_value (gsi
, device
, id
, value
));
6793 args
->quick_push (get_target_argument_identifier (device
, true, id
));
6794 value
= fold_convert (ptr_type_node
, value
);
6795 value
= force_gimple_operand_gsi (gsi
, value
, true, NULL
, true,
6797 args
->quick_push (value
);
6801 /* Create an array of arguments that is then passed to GOMP_target. */
6804 get_target_arguments (gimple_stmt_iterator
*gsi
, gomp_target
*tgt_stmt
)
6806 auto_vec
<tree
, 6> args
;
6807 tree clauses
= gimple_omp_target_clauses (tgt_stmt
);
6808 tree t
, c
= omp_find_clause (clauses
, OMP_CLAUSE_NUM_TEAMS
);
6810 t
= OMP_CLAUSE_NUM_TEAMS_EXPR (c
);
6812 t
= integer_minus_one_node
;
6813 push_target_argument_according_to_value (gsi
, GOMP_TARGET_ARG_DEVICE_ALL
,
6814 GOMP_TARGET_ARG_NUM_TEAMS
, t
, &args
);
6816 c
= omp_find_clause (clauses
, OMP_CLAUSE_THREAD_LIMIT
);
6818 t
= OMP_CLAUSE_THREAD_LIMIT_EXPR (c
);
6820 t
= integer_minus_one_node
;
6821 push_target_argument_according_to_value (gsi
, GOMP_TARGET_ARG_DEVICE_ALL
,
6822 GOMP_TARGET_ARG_THREAD_LIMIT
, t
,
6825 /* Add HSA-specific grid sizes, if available. */
6826 if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt
),
6827 OMP_CLAUSE__GRIDDIM_
))
6829 int id
= GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES
;
6830 t
= get_target_argument_identifier (GOMP_DEVICE_HSA
, true, id
);
6831 args
.quick_push (t
);
6832 args
.quick_push (grid_get_kernel_launch_attributes (gsi
, tgt_stmt
));
6835 /* Produce more, perhaps device specific, arguments here. */
6837 tree argarray
= create_tmp_var (build_array_type_nelts (ptr_type_node
,
6838 args
.length () + 1),
6839 ".omp_target_args");
6840 for (unsigned i
= 0; i
< args
.length (); i
++)
6842 tree ref
= build4 (ARRAY_REF
, ptr_type_node
, argarray
,
6843 build_int_cst (integer_type_node
, i
),
6844 NULL_TREE
, NULL_TREE
);
6845 gsi_insert_before (gsi
, gimple_build_assign (ref
, args
[i
]),
6848 tree ref
= build4 (ARRAY_REF
, ptr_type_node
, argarray
,
6849 build_int_cst (integer_type_node
, args
.length ()),
6850 NULL_TREE
, NULL_TREE
);
6851 gsi_insert_before (gsi
, gimple_build_assign (ref
, null_pointer_node
),
6853 TREE_ADDRESSABLE (argarray
) = 1;
6854 return build_fold_addr_expr (argarray
);
6857 /* Expand the GIMPLE_OMP_TARGET starting at REGION. */
6860 expand_omp_target (struct omp_region
*region
)
6862 basic_block entry_bb
, exit_bb
, new_bb
;
6863 struct function
*child_cfun
;
6864 tree child_fn
, block
, t
;
6865 gimple_stmt_iterator gsi
;
6866 gomp_target
*entry_stmt
;
6869 bool offloaded
, data_region
;
6871 entry_stmt
= as_a
<gomp_target
*> (last_stmt (region
->entry
));
6872 new_bb
= region
->entry
;
6874 offloaded
= is_gimple_omp_offloaded (entry_stmt
);
6875 switch (gimple_omp_target_kind (entry_stmt
))
6877 case GF_OMP_TARGET_KIND_REGION
:
6878 case GF_OMP_TARGET_KIND_UPDATE
:
6879 case GF_OMP_TARGET_KIND_ENTER_DATA
:
6880 case GF_OMP_TARGET_KIND_EXIT_DATA
:
6881 case GF_OMP_TARGET_KIND_OACC_PARALLEL
:
6882 case GF_OMP_TARGET_KIND_OACC_KERNELS
:
6883 case GF_OMP_TARGET_KIND_OACC_UPDATE
:
6884 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA
:
6885 case GF_OMP_TARGET_KIND_OACC_DECLARE
:
6886 data_region
= false;
6888 case GF_OMP_TARGET_KIND_DATA
:
6889 case GF_OMP_TARGET_KIND_OACC_DATA
:
6890 case GF_OMP_TARGET_KIND_OACC_HOST_DATA
:
6897 child_fn
= NULL_TREE
;
6901 child_fn
= gimple_omp_target_child_fn (entry_stmt
);
6902 child_cfun
= DECL_STRUCT_FUNCTION (child_fn
);
6905 /* Supported by expand_omp_taskreg, but not here. */
6906 if (child_cfun
!= NULL
)
6907 gcc_checking_assert (!child_cfun
->cfg
);
6908 gcc_checking_assert (!gimple_in_ssa_p (cfun
));
6910 entry_bb
= region
->entry
;
6911 exit_bb
= region
->exit
;
6913 if (gimple_omp_target_kind (entry_stmt
) == GF_OMP_TARGET_KIND_OACC_KERNELS
)
6915 mark_loops_in_oacc_kernels_region (region
->entry
, region
->exit
);
6917 /* Further down, both OpenACC kernels and OpenACC parallel constructs
6918 will be mappted to BUILT_IN_GOACC_PARALLEL, and to distinguish the
6919 two, there is an "oacc kernels" attribute set for OpenACC kernels. */
6920 DECL_ATTRIBUTES (child_fn
)
6921 = tree_cons (get_identifier ("oacc kernels"),
6922 NULL_TREE
, DECL_ATTRIBUTES (child_fn
));
6927 unsigned srcidx
, dstidx
, num
;
6929 /* If the offloading region needs data sent from the parent
6930 function, then the very first statement (except possible
6931 tree profile counter updates) of the offloading body
6932 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
6933 &.OMP_DATA_O is passed as an argument to the child function,
6934 we need to replace it with the argument as seen by the child
6937 In most cases, this will end up being the identity assignment
6938 .OMP_DATA_I = .OMP_DATA_I. However, if the offloading body had
6939 a function call that has been inlined, the original PARM_DECL
6940 .OMP_DATA_I may have been converted into a different local
6941 variable. In which case, we need to keep the assignment. */
6942 tree data_arg
= gimple_omp_target_data_arg (entry_stmt
);
6945 basic_block entry_succ_bb
= single_succ (entry_bb
);
6946 gimple_stmt_iterator gsi
;
6948 gimple
*tgtcopy_stmt
= NULL
;
6949 tree sender
= TREE_VEC_ELT (data_arg
, 0);
6951 for (gsi
= gsi_start_bb (entry_succ_bb
); ; gsi_next (&gsi
))
6953 gcc_assert (!gsi_end_p (gsi
));
6954 stmt
= gsi_stmt (gsi
);
6955 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
6958 if (gimple_num_ops (stmt
) == 2)
6960 tree arg
= gimple_assign_rhs1 (stmt
);
6962 /* We're ignoring the subcode because we're
6963 effectively doing a STRIP_NOPS. */
6965 if (TREE_CODE (arg
) == ADDR_EXPR
6966 && TREE_OPERAND (arg
, 0) == sender
)
6968 tgtcopy_stmt
= stmt
;
6974 gcc_assert (tgtcopy_stmt
!= NULL
);
6975 arg
= DECL_ARGUMENTS (child_fn
);
6977 gcc_assert (gimple_assign_lhs (tgtcopy_stmt
) == arg
);
6978 gsi_remove (&gsi
, true);
6981 /* Declare local variables needed in CHILD_CFUN. */
6982 block
= DECL_INITIAL (child_fn
);
6983 BLOCK_VARS (block
) = vec2chain (child_cfun
->local_decls
);
6984 /* The gimplifier could record temporaries in the offloading block
6985 rather than in containing function's local_decls chain,
6986 which would mean cgraph missed finalizing them. Do it now. */
6987 for (t
= BLOCK_VARS (block
); t
; t
= DECL_CHAIN (t
))
6988 if (VAR_P (t
) && TREE_STATIC (t
) && !DECL_EXTERNAL (t
))
6989 varpool_node::finalize_decl (t
);
6990 DECL_SAVED_TREE (child_fn
) = NULL
;
6991 /* We'll create a CFG for child_fn, so no gimple body is needed. */
6992 gimple_set_body (child_fn
, NULL
);
6993 TREE_USED (block
) = 1;
6995 /* Reset DECL_CONTEXT on function arguments. */
6996 for (t
= DECL_ARGUMENTS (child_fn
); t
; t
= DECL_CHAIN (t
))
6997 DECL_CONTEXT (t
) = child_fn
;
6999 /* Split ENTRY_BB at GIMPLE_*,
7000 so that it can be moved to the child function. */
7001 gsi
= gsi_last_nondebug_bb (entry_bb
);
7002 stmt
= gsi_stmt (gsi
);
7004 && gimple_code (stmt
) == gimple_code (entry_stmt
));
7005 e
= split_block (entry_bb
, stmt
);
7006 gsi_remove (&gsi
, true);
7008 single_succ_edge (entry_bb
)->flags
= EDGE_FALLTHRU
;
7010 /* Convert GIMPLE_OMP_RETURN into a RETURN_EXPR. */
7013 gsi
= gsi_last_nondebug_bb (exit_bb
);
7014 gcc_assert (!gsi_end_p (gsi
)
7015 && gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_RETURN
);
7016 stmt
= gimple_build_return (NULL
);
7017 gsi_insert_after (&gsi
, stmt
, GSI_SAME_STMT
);
7018 gsi_remove (&gsi
, true);
7021 /* Make sure to generate early debug for the function before
7022 outlining anything. */
7023 if (! gimple_in_ssa_p (cfun
))
7024 (*debug_hooks
->early_global_decl
) (cfun
->decl
);
7026 /* Move the offloading region into CHILD_CFUN. */
7028 block
= gimple_block (entry_stmt
);
7030 new_bb
= move_sese_region_to_fn (child_cfun
, entry_bb
, exit_bb
, block
);
7032 single_succ_edge (new_bb
)->flags
= EDGE_FALLTHRU
;
7033 /* When the OMP expansion process cannot guarantee an up-to-date
7034 loop tree arrange for the child function to fixup loops. */
7035 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP
))
7036 child_cfun
->x_current_loops
->state
|= LOOPS_NEED_FIXUP
;
7038 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
7039 num
= vec_safe_length (child_cfun
->local_decls
);
7040 for (srcidx
= 0, dstidx
= 0; srcidx
< num
; srcidx
++)
7042 t
= (*child_cfun
->local_decls
)[srcidx
];
7043 if (DECL_CONTEXT (t
) == cfun
->decl
)
7045 if (srcidx
!= dstidx
)
7046 (*child_cfun
->local_decls
)[dstidx
] = t
;
7050 vec_safe_truncate (child_cfun
->local_decls
, dstidx
);
7052 /* Inform the callgraph about the new function. */
7053 child_cfun
->curr_properties
= cfun
->curr_properties
;
7054 child_cfun
->has_simduid_loops
|= cfun
->has_simduid_loops
;
7055 child_cfun
->has_force_vectorize_loops
|= cfun
->has_force_vectorize_loops
;
7056 cgraph_node
*node
= cgraph_node::get_create (child_fn
);
7057 node
->parallelized_function
= 1;
7058 cgraph_node::add_new_function (child_fn
, true);
7060 /* Add the new function to the offload table. */
7061 if (ENABLE_OFFLOADING
)
7064 DECL_PRESERVE_P (child_fn
) = 1;
7065 vec_safe_push (offload_funcs
, child_fn
);
7068 bool need_asm
= DECL_ASSEMBLER_NAME_SET_P (current_function_decl
)
7069 && !DECL_ASSEMBLER_NAME_SET_P (child_fn
);
7071 /* Fix the callgraph edges for child_cfun. Those for cfun will be
7072 fixed in a following pass. */
7073 push_cfun (child_cfun
);
7075 assign_assembler_name_if_needed (child_fn
);
7076 cgraph_edge::rebuild_edges ();
7078 /* Some EH regions might become dead, see PR34608. If
7079 pass_cleanup_cfg isn't the first pass to happen with the
7080 new child, these dead EH edges might cause problems.
7081 Clean them up now. */
7082 if (flag_exceptions
)
7085 bool changed
= false;
7087 FOR_EACH_BB_FN (bb
, cfun
)
7088 changed
|= gimple_purge_dead_eh_edges (bb
);
7090 cleanup_tree_cfg ();
7092 if (flag_checking
&& !loops_state_satisfies_p (LOOPS_NEED_FIXUP
))
7093 verify_loop_structure ();
7096 if (dump_file
&& !gimple_in_ssa_p (cfun
))
7098 omp_any_child_fn_dumped
= true;
7099 dump_function_header (dump_file
, child_fn
, dump_flags
);
7100 dump_function_to_file (child_fn
, dump_file
, dump_flags
);
7104 /* Emit a library call to launch the offloading region, or do data
7106 tree t1
, t2
, t3
, t4
, device
, cond
, depend
, c
, clauses
;
7107 enum built_in_function start_ix
;
7108 location_t clause_loc
;
7109 unsigned int flags_i
= 0;
7111 switch (gimple_omp_target_kind (entry_stmt
))
7113 case GF_OMP_TARGET_KIND_REGION
:
7114 start_ix
= BUILT_IN_GOMP_TARGET
;
7116 case GF_OMP_TARGET_KIND_DATA
:
7117 start_ix
= BUILT_IN_GOMP_TARGET_DATA
;
7119 case GF_OMP_TARGET_KIND_UPDATE
:
7120 start_ix
= BUILT_IN_GOMP_TARGET_UPDATE
;
7122 case GF_OMP_TARGET_KIND_ENTER_DATA
:
7123 start_ix
= BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA
;
7125 case GF_OMP_TARGET_KIND_EXIT_DATA
:
7126 start_ix
= BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA
;
7127 flags_i
|= GOMP_TARGET_FLAG_EXIT_DATA
;
7129 case GF_OMP_TARGET_KIND_OACC_KERNELS
:
7130 case GF_OMP_TARGET_KIND_OACC_PARALLEL
:
7131 start_ix
= BUILT_IN_GOACC_PARALLEL
;
7133 case GF_OMP_TARGET_KIND_OACC_DATA
:
7134 case GF_OMP_TARGET_KIND_OACC_HOST_DATA
:
7135 start_ix
= BUILT_IN_GOACC_DATA_START
;
7137 case GF_OMP_TARGET_KIND_OACC_UPDATE
:
7138 start_ix
= BUILT_IN_GOACC_UPDATE
;
7140 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA
:
7141 start_ix
= BUILT_IN_GOACC_ENTER_EXIT_DATA
;
7143 case GF_OMP_TARGET_KIND_OACC_DECLARE
:
7144 start_ix
= BUILT_IN_GOACC_DECLARE
;
7150 clauses
= gimple_omp_target_clauses (entry_stmt
);
7152 /* By default, the value of DEVICE is GOMP_DEVICE_ICV (let runtime
7153 library choose) and there is no conditional. */
7155 device
= build_int_cst (integer_type_node
, GOMP_DEVICE_ICV
);
7157 c
= omp_find_clause (clauses
, OMP_CLAUSE_IF
);
7159 cond
= OMP_CLAUSE_IF_EXPR (c
);
7161 c
= omp_find_clause (clauses
, OMP_CLAUSE_DEVICE
);
7164 /* Even if we pass it to all library function calls, it is currently only
7165 defined/used for the OpenMP target ones. */
7166 gcc_checking_assert (start_ix
== BUILT_IN_GOMP_TARGET
7167 || start_ix
== BUILT_IN_GOMP_TARGET_DATA
7168 || start_ix
== BUILT_IN_GOMP_TARGET_UPDATE
7169 || start_ix
== BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA
);
7171 device
= OMP_CLAUSE_DEVICE_ID (c
);
7172 clause_loc
= OMP_CLAUSE_LOCATION (c
);
7175 clause_loc
= gimple_location (entry_stmt
);
7177 c
= omp_find_clause (clauses
, OMP_CLAUSE_NOWAIT
);
7179 flags_i
|= GOMP_TARGET_FLAG_NOWAIT
;
7181 /* Ensure 'device' is of the correct type. */
7182 device
= fold_convert_loc (clause_loc
, integer_type_node
, device
);
7184 /* If we found the clause 'if (cond)', build
7185 (cond ? device : GOMP_DEVICE_HOST_FALLBACK). */
7188 cond
= gimple_boolify (cond
);
7190 basic_block cond_bb
, then_bb
, else_bb
;
7194 tmp_var
= create_tmp_var (TREE_TYPE (device
));
7196 e
= split_block_after_labels (new_bb
);
7199 gsi
= gsi_last_nondebug_bb (new_bb
);
7201 e
= split_block (new_bb
, gsi_stmt (gsi
));
7207 then_bb
= create_empty_bb (cond_bb
);
7208 else_bb
= create_empty_bb (then_bb
);
7209 set_immediate_dominator (CDI_DOMINATORS
, then_bb
, cond_bb
);
7210 set_immediate_dominator (CDI_DOMINATORS
, else_bb
, cond_bb
);
7212 stmt
= gimple_build_cond_empty (cond
);
7213 gsi
= gsi_last_bb (cond_bb
);
7214 gsi_insert_after (&gsi
, stmt
, GSI_CONTINUE_LINKING
);
7216 gsi
= gsi_start_bb (then_bb
);
7217 stmt
= gimple_build_assign (tmp_var
, device
);
7218 gsi_insert_after (&gsi
, stmt
, GSI_CONTINUE_LINKING
);
7220 gsi
= gsi_start_bb (else_bb
);
7221 stmt
= gimple_build_assign (tmp_var
,
7222 build_int_cst (integer_type_node
,
7223 GOMP_DEVICE_HOST_FALLBACK
));
7224 gsi_insert_after (&gsi
, stmt
, GSI_CONTINUE_LINKING
);
7226 make_edge (cond_bb
, then_bb
, EDGE_TRUE_VALUE
);
7227 make_edge (cond_bb
, else_bb
, EDGE_FALSE_VALUE
);
7228 add_bb_to_loop (then_bb
, cond_bb
->loop_father
);
7229 add_bb_to_loop (else_bb
, cond_bb
->loop_father
);
7230 make_edge (then_bb
, new_bb
, EDGE_FALLTHRU
);
7231 make_edge (else_bb
, new_bb
, EDGE_FALLTHRU
);
7234 gsi
= gsi_last_nondebug_bb (new_bb
);
7238 gsi
= gsi_last_nondebug_bb (new_bb
);
7239 device
= force_gimple_operand_gsi (&gsi
, device
, true, NULL_TREE
,
7240 true, GSI_SAME_STMT
);
7243 t
= gimple_omp_target_data_arg (entry_stmt
);
7246 t1
= size_zero_node
;
7247 t2
= build_zero_cst (ptr_type_node
);
7253 t1
= TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (TREE_VEC_ELT (t
, 1))));
7254 t1
= size_binop (PLUS_EXPR
, t1
, size_int (1));
7255 t2
= build_fold_addr_expr (TREE_VEC_ELT (t
, 0));
7256 t3
= build_fold_addr_expr (TREE_VEC_ELT (t
, 1));
7257 t4
= build_fold_addr_expr (TREE_VEC_ELT (t
, 2));
7261 bool tagging
= false;
7262 /* The maximum number used by any start_ix, without varargs. */
7263 auto_vec
<tree
, 11> args
;
7264 args
.quick_push (device
);
7266 args
.quick_push (build_fold_addr_expr (child_fn
));
7267 args
.quick_push (t1
);
7268 args
.quick_push (t2
);
7269 args
.quick_push (t3
);
7270 args
.quick_push (t4
);
7273 case BUILT_IN_GOACC_DATA_START
:
7274 case BUILT_IN_GOACC_DECLARE
:
7275 case BUILT_IN_GOMP_TARGET_DATA
:
7277 case BUILT_IN_GOMP_TARGET
:
7278 case BUILT_IN_GOMP_TARGET_UPDATE
:
7279 case BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA
:
7280 args
.quick_push (build_int_cst (unsigned_type_node
, flags_i
));
7281 c
= omp_find_clause (clauses
, OMP_CLAUSE_DEPEND
);
7283 depend
= OMP_CLAUSE_DECL (c
);
7285 depend
= build_int_cst (ptr_type_node
, 0);
7286 args
.quick_push (depend
);
7287 if (start_ix
== BUILT_IN_GOMP_TARGET
)
7288 args
.quick_push (get_target_arguments (&gsi
, entry_stmt
));
7290 case BUILT_IN_GOACC_PARALLEL
:
7291 oacc_set_fn_attrib (child_fn
, clauses
, &args
);
7294 case BUILT_IN_GOACC_ENTER_EXIT_DATA
:
7295 case BUILT_IN_GOACC_UPDATE
:
7297 tree t_async
= NULL_TREE
;
7299 /* If present, use the value specified by the respective
7300 clause, making sure that is of the correct type. */
7301 c
= omp_find_clause (clauses
, OMP_CLAUSE_ASYNC
);
7303 t_async
= fold_convert_loc (OMP_CLAUSE_LOCATION (c
),
7305 OMP_CLAUSE_ASYNC_EXPR (c
));
7307 /* Default values for t_async. */
7308 t_async
= fold_convert_loc (gimple_location (entry_stmt
),
7310 build_int_cst (integer_type_node
,
7312 if (tagging
&& t_async
)
7314 unsigned HOST_WIDE_INT i_async
= GOMP_LAUNCH_OP_MAX
;
7316 if (TREE_CODE (t_async
) == INTEGER_CST
)
7318 /* See if we can pack the async arg in to the tag's
7320 i_async
= TREE_INT_CST_LOW (t_async
);
7321 if (i_async
< GOMP_LAUNCH_OP_MAX
)
7322 t_async
= NULL_TREE
;
7324 i_async
= GOMP_LAUNCH_OP_MAX
;
7326 args
.safe_push (oacc_launch_pack (GOMP_LAUNCH_ASYNC
, NULL_TREE
,
7330 args
.safe_push (t_async
);
7332 /* Save the argument index, and ... */
7333 unsigned t_wait_idx
= args
.length ();
7334 unsigned num_waits
= 0;
7335 c
= omp_find_clause (clauses
, OMP_CLAUSE_WAIT
);
7337 /* ... push a placeholder. */
7338 args
.safe_push (integer_zero_node
);
7340 for (; c
; c
= OMP_CLAUSE_CHAIN (c
))
7341 if (OMP_CLAUSE_CODE (c
) == OMP_CLAUSE_WAIT
)
7343 args
.safe_push (fold_convert_loc (OMP_CLAUSE_LOCATION (c
),
7345 OMP_CLAUSE_WAIT_EXPR (c
)));
7349 if (!tagging
|| num_waits
)
7353 /* Now that we know the number, update the placeholder. */
7355 len
= oacc_launch_pack (GOMP_LAUNCH_WAIT
, NULL_TREE
, num_waits
);
7357 len
= build_int_cst (integer_type_node
, num_waits
);
7358 len
= fold_convert_loc (gimple_location (entry_stmt
),
7359 unsigned_type_node
, len
);
7360 args
[t_wait_idx
] = len
;
7368 /* Push terminal marker - zero. */
7369 args
.safe_push (oacc_launch_pack (0, NULL_TREE
, 0));
7371 g
= gimple_build_call_vec (builtin_decl_explicit (start_ix
), args
);
7372 gimple_set_location (g
, gimple_location (entry_stmt
));
7373 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
7377 gcc_assert (g
&& gimple_code (g
) == GIMPLE_OMP_TARGET
);
7378 gsi_remove (&gsi
, true);
7380 if (data_region
&& region
->exit
)
7382 gsi
= gsi_last_nondebug_bb (region
->exit
);
7384 gcc_assert (g
&& gimple_code (g
) == GIMPLE_OMP_RETURN
);
7385 gsi_remove (&gsi
, true);
7389 /* Expand KFOR loop as a HSA grifidied kernel, i.e. as a body only with
7390 iteration variable derived from the thread number. INTRA_GROUP means this
7391 is an expansion of a loop iterating over work-items within a separate
7392 iteration over groups. */
7395 grid_expand_omp_for_loop (struct omp_region
*kfor
, bool intra_group
)
7397 gimple_stmt_iterator gsi
;
7398 gomp_for
*for_stmt
= as_a
<gomp_for
*> (last_stmt (kfor
->entry
));
7399 gcc_checking_assert (gimple_omp_for_kind (for_stmt
)
7400 == GF_OMP_FOR_KIND_GRID_LOOP
);
7401 size_t collapse
= gimple_omp_for_collapse (for_stmt
);
7402 struct omp_for_data_loop
*loops
7403 = XALLOCAVEC (struct omp_for_data_loop
,
7404 gimple_omp_for_collapse (for_stmt
));
7405 struct omp_for_data fd
;
7407 remove_edge (BRANCH_EDGE (kfor
->entry
));
7408 basic_block body_bb
= FALLTHRU_EDGE (kfor
->entry
)->dest
;
7410 gcc_assert (kfor
->cont
);
7411 omp_extract_for_data (for_stmt
, &fd
, loops
);
7413 gsi
= gsi_start_bb (body_bb
);
7415 for (size_t dim
= 0; dim
< collapse
; dim
++)
7418 itype
= type
= TREE_TYPE (fd
.loops
[dim
].v
);
7419 if (POINTER_TYPE_P (type
))
7420 itype
= signed_type_for (type
);
7422 tree n1
= fd
.loops
[dim
].n1
;
7423 tree step
= fd
.loops
[dim
].step
;
7424 n1
= force_gimple_operand_gsi (&gsi
, fold_convert (type
, n1
),
7425 true, NULL_TREE
, true, GSI_SAME_STMT
);
7426 step
= force_gimple_operand_gsi (&gsi
, fold_convert (itype
, step
),
7427 true, NULL_TREE
, true, GSI_SAME_STMT
);
7429 if (gimple_omp_for_grid_group_iter (for_stmt
))
7431 gcc_checking_assert (!intra_group
);
7432 threadid
= build_call_expr (builtin_decl_explicit
7433 (BUILT_IN_HSA_WORKGROUPID
), 1,
7434 build_int_cstu (unsigned_type_node
, dim
));
7436 else if (intra_group
)
7437 threadid
= build_call_expr (builtin_decl_explicit
7438 (BUILT_IN_HSA_WORKITEMID
), 1,
7439 build_int_cstu (unsigned_type_node
, dim
));
7441 threadid
= build_call_expr (builtin_decl_explicit
7442 (BUILT_IN_HSA_WORKITEMABSID
), 1,
7443 build_int_cstu (unsigned_type_node
, dim
));
7444 threadid
= fold_convert (itype
, threadid
);
7445 threadid
= force_gimple_operand_gsi (&gsi
, threadid
, true, NULL_TREE
,
7446 true, GSI_SAME_STMT
);
7448 tree startvar
= fd
.loops
[dim
].v
;
7449 tree t
= fold_build2 (MULT_EXPR
, itype
, threadid
, step
);
7450 if (POINTER_TYPE_P (type
))
7451 t
= fold_build_pointer_plus (n1
, t
);
7453 t
= fold_build2 (PLUS_EXPR
, type
, t
, n1
);
7454 t
= fold_convert (type
, t
);
7455 t
= force_gimple_operand_gsi (&gsi
, t
,
7457 && TREE_ADDRESSABLE (startvar
),
7458 NULL_TREE
, true, GSI_SAME_STMT
);
7459 gassign
*assign_stmt
= gimple_build_assign (startvar
, t
);
7460 gsi_insert_before (&gsi
, assign_stmt
, GSI_SAME_STMT
);
7462 /* Remove the omp for statement. */
7463 gsi
= gsi_last_nondebug_bb (kfor
->entry
);
7464 gsi_remove (&gsi
, true);
7466 /* Remove the GIMPLE_OMP_CONTINUE statement. */
7467 gsi
= gsi_last_nondebug_bb (kfor
->cont
);
7468 gcc_assert (!gsi_end_p (gsi
)
7469 && gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_CONTINUE
);
7470 gsi_remove (&gsi
, true);
7472 /* Replace the GIMPLE_OMP_RETURN with a barrier, if necessary. */
7473 gsi
= gsi_last_nondebug_bb (kfor
->exit
);
7474 gcc_assert (!gsi_end_p (gsi
)
7475 && gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_RETURN
);
7477 gsi_insert_before (&gsi
, omp_build_barrier (NULL_TREE
), GSI_SAME_STMT
);
7478 gsi_remove (&gsi
, true);
7480 /* Fixup the much simpler CFG. */
7481 remove_edge (find_edge (kfor
->cont
, body_bb
));
7483 if (kfor
->cont
!= body_bb
)
7484 set_immediate_dominator (CDI_DOMINATORS
, kfor
->cont
, body_bb
);
7485 set_immediate_dominator (CDI_DOMINATORS
, kfor
->exit
, kfor
->cont
);
7488 /* Structure passed to grid_remap_kernel_arg_accesses so that it can remap
7491 struct grid_arg_decl_map
7497 /* Invoked through walk_gimple_op, will remap all PARM_DECLs to the ones
7498 pertaining to kernel function. */
7501 grid_remap_kernel_arg_accesses (tree
*tp
, int *walk_subtrees
, void *data
)
7503 struct walk_stmt_info
*wi
= (struct walk_stmt_info
*) data
;
7504 struct grid_arg_decl_map
*adm
= (struct grid_arg_decl_map
*) wi
->info
;
7507 if (t
== adm
->old_arg
)
7509 *walk_subtrees
= !TYPE_P (t
) && !DECL_P (t
);
7513 /* If TARGET region contains a kernel body for loop, remove its region from the
7514 TARGET and expand it in HSA gridified kernel fashion. */
7517 grid_expand_target_grid_body (struct omp_region
*target
)
7519 if (!hsa_gen_requested_p ())
7522 gomp_target
*tgt_stmt
= as_a
<gomp_target
*> (last_stmt (target
->entry
));
7523 struct omp_region
**pp
;
7525 for (pp
= &target
->inner
; *pp
; pp
= &(*pp
)->next
)
7526 if ((*pp
)->type
== GIMPLE_OMP_GRID_BODY
)
7529 struct omp_region
*gpukernel
= *pp
;
7531 tree orig_child_fndecl
= gimple_omp_target_child_fn (tgt_stmt
);
7534 /* HSA cannot handle OACC stuff. */
7535 if (gimple_omp_target_kind (tgt_stmt
) != GF_OMP_TARGET_KIND_REGION
)
7537 gcc_checking_assert (orig_child_fndecl
);
7538 gcc_assert (!omp_find_clause (gimple_omp_target_clauses (tgt_stmt
),
7539 OMP_CLAUSE__GRIDDIM_
));
7540 cgraph_node
*n
= cgraph_node::get (orig_child_fndecl
);
7542 hsa_register_kernel (n
);
7546 gcc_assert (omp_find_clause (gimple_omp_target_clauses (tgt_stmt
),
7547 OMP_CLAUSE__GRIDDIM_
));
7549 = gimple_block (first_stmt (single_succ (gpukernel
->entry
)));
7550 *pp
= gpukernel
->next
;
7551 for (pp
= &gpukernel
->inner
; *pp
; pp
= &(*pp
)->next
)
7552 if ((*pp
)->type
== GIMPLE_OMP_FOR
)
7555 struct omp_region
*kfor
= *pp
;
7557 gomp_for
*for_stmt
= as_a
<gomp_for
*> (last_stmt (kfor
->entry
));
7558 gcc_assert (gimple_omp_for_kind (for_stmt
) == GF_OMP_FOR_KIND_GRID_LOOP
);
7562 if (gimple_omp_for_grid_group_iter (for_stmt
))
7564 struct omp_region
**next_pp
;
7565 for (pp
= &kfor
->inner
; *pp
; pp
= next_pp
)
7567 next_pp
= &(*pp
)->next
;
7568 if ((*pp
)->type
!= GIMPLE_OMP_FOR
)
7570 gomp_for
*inner
= as_a
<gomp_for
*> (last_stmt ((*pp
)->entry
));
7571 gcc_assert (gimple_omp_for_kind (inner
)
7572 == GF_OMP_FOR_KIND_GRID_LOOP
);
7573 grid_expand_omp_for_loop (*pp
, true);
7578 expand_omp (kfor
->inner
);
7580 if (gpukernel
->inner
)
7581 expand_omp (gpukernel
->inner
);
7583 tree kern_fndecl
= copy_node (orig_child_fndecl
);
7584 DECL_NAME (kern_fndecl
) = clone_function_name (kern_fndecl
, "kernel");
7585 SET_DECL_ASSEMBLER_NAME (kern_fndecl
, DECL_NAME (kern_fndecl
));
7586 tree tgtblock
= gimple_block (tgt_stmt
);
7587 tree fniniblock
= make_node (BLOCK
);
7588 BLOCK_ABSTRACT_ORIGIN (fniniblock
) = tgtblock
;
7589 BLOCK_SOURCE_LOCATION (fniniblock
) = BLOCK_SOURCE_LOCATION (tgtblock
);
7590 BLOCK_SOURCE_END_LOCATION (fniniblock
) = BLOCK_SOURCE_END_LOCATION (tgtblock
);
7591 BLOCK_SUPERCONTEXT (fniniblock
) = kern_fndecl
;
7592 DECL_INITIAL (kern_fndecl
) = fniniblock
;
7593 push_struct_function (kern_fndecl
);
7594 cfun
->function_end_locus
= gimple_location (tgt_stmt
);
7595 init_tree_ssa (cfun
);
7598 /* Make sure to generate early debug for the function before
7599 outlining anything. */
7600 if (! gimple_in_ssa_p (cfun
))
7601 (*debug_hooks
->early_global_decl
) (cfun
->decl
);
7603 tree old_parm_decl
= DECL_ARGUMENTS (kern_fndecl
);
7604 gcc_assert (!DECL_CHAIN (old_parm_decl
));
7605 tree new_parm_decl
= copy_node (DECL_ARGUMENTS (kern_fndecl
));
7606 DECL_CONTEXT (new_parm_decl
) = kern_fndecl
;
7607 DECL_ARGUMENTS (kern_fndecl
) = new_parm_decl
;
7608 gcc_assert (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (kern_fndecl
))));
7609 DECL_RESULT (kern_fndecl
) = copy_node (DECL_RESULT (kern_fndecl
));
7610 DECL_CONTEXT (DECL_RESULT (kern_fndecl
)) = kern_fndecl
;
7611 struct function
*kern_cfun
= DECL_STRUCT_FUNCTION (kern_fndecl
);
7612 kern_cfun
->curr_properties
= cfun
->curr_properties
;
7614 grid_expand_omp_for_loop (kfor
, false);
7616 /* Remove the omp for statement. */
7617 gimple_stmt_iterator gsi
= gsi_last_nondebug_bb (gpukernel
->entry
);
7618 gsi_remove (&gsi
, true);
7619 /* Replace the GIMPLE_OMP_RETURN at the end of the kernel region with a real
7621 gsi
= gsi_last_nondebug_bb (gpukernel
->exit
);
7622 gcc_assert (!gsi_end_p (gsi
)
7623 && gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_RETURN
);
7624 gimple
*ret_stmt
= gimple_build_return (NULL
);
7625 gsi_insert_after (&gsi
, ret_stmt
, GSI_SAME_STMT
);
7626 gsi_remove (&gsi
, true);
7628 /* Statements in the first BB in the target construct have been produced by
7629 target lowering and must be copied inside the GPUKERNEL, with the two
7630 exceptions of the first OMP statement and the OMP_DATA assignment
7632 gsi
= gsi_start_bb (single_succ (gpukernel
->entry
));
7633 tree data_arg
= gimple_omp_target_data_arg (tgt_stmt
);
7634 tree sender
= data_arg
? TREE_VEC_ELT (data_arg
, 0) : NULL
;
7635 for (gimple_stmt_iterator tsi
= gsi_start_bb (single_succ (target
->entry
));
7636 !gsi_end_p (tsi
); gsi_next (&tsi
))
7638 gimple
*stmt
= gsi_stmt (tsi
);
7639 if (is_gimple_omp (stmt
))
7642 && is_gimple_assign (stmt
)
7643 && TREE_CODE (gimple_assign_rhs1 (stmt
)) == ADDR_EXPR
7644 && TREE_OPERAND (gimple_assign_rhs1 (stmt
), 0) == sender
)
7646 gimple
*copy
= gimple_copy (stmt
);
7647 gsi_insert_before (&gsi
, copy
, GSI_SAME_STMT
);
7648 gimple_set_block (copy
, fniniblock
);
7651 move_sese_region_to_fn (kern_cfun
, single_succ (gpukernel
->entry
),
7652 gpukernel
->exit
, inside_block
);
7654 cgraph_node
*kcn
= cgraph_node::get_create (kern_fndecl
);
7655 kcn
->mark_force_output ();
7656 cgraph_node
*orig_child
= cgraph_node::get (orig_child_fndecl
);
7658 hsa_register_kernel (kcn
, orig_child
);
7660 cgraph_node::add_new_function (kern_fndecl
, true);
7661 push_cfun (kern_cfun
);
7662 cgraph_edge::rebuild_edges ();
7664 /* Re-map any mention of the PARM_DECL of the original function to the
7665 PARM_DECL of the new one.
7667 TODO: It would be great if lowering produced references into the GPU
7668 kernel decl straight away and we did not have to do this. */
7669 struct grid_arg_decl_map adm
;
7670 adm
.old_arg
= old_parm_decl
;
7671 adm
.new_arg
= new_parm_decl
;
7673 FOR_EACH_BB_FN (bb
, kern_cfun
)
7675 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
7677 gimple
*stmt
= gsi_stmt (gsi
);
7678 struct walk_stmt_info wi
;
7679 memset (&wi
, 0, sizeof (wi
));
7681 walk_gimple_op (stmt
, grid_remap_kernel_arg_accesses
, &wi
);
7689 /* Expand the parallel region tree rooted at REGION. Expansion
7690 proceeds in depth-first order. Innermost regions are expanded
7691 first. This way, parallel regions that require a new function to
7692 be created (e.g., GIMPLE_OMP_PARALLEL) can be expanded without having any
7693 internal dependencies in their body. */
7696 expand_omp (struct omp_region
*region
)
7698 omp_any_child_fn_dumped
= false;
7701 location_t saved_location
;
7702 gimple
*inner_stmt
= NULL
;
7704 /* First, determine whether this is a combined parallel+workshare
7706 if (region
->type
== GIMPLE_OMP_PARALLEL
)
7707 determine_parallel_type (region
);
7708 else if (region
->type
== GIMPLE_OMP_TARGET
)
7709 grid_expand_target_grid_body (region
);
7711 if (region
->type
== GIMPLE_OMP_FOR
7712 && gimple_omp_for_combined_p (last_stmt (region
->entry
)))
7713 inner_stmt
= last_stmt (region
->inner
->entry
);
7716 expand_omp (region
->inner
);
7718 saved_location
= input_location
;
7719 if (gimple_has_location (last_stmt (region
->entry
)))
7720 input_location
= gimple_location (last_stmt (region
->entry
));
7722 switch (region
->type
)
7724 case GIMPLE_OMP_PARALLEL
:
7725 case GIMPLE_OMP_TASK
:
7726 expand_omp_taskreg (region
);
7729 case GIMPLE_OMP_FOR
:
7730 expand_omp_for (region
, inner_stmt
);
7733 case GIMPLE_OMP_SECTIONS
:
7734 expand_omp_sections (region
);
7737 case GIMPLE_OMP_SECTION
:
7738 /* Individual omp sections are handled together with their
7739 parent GIMPLE_OMP_SECTIONS region. */
7742 case GIMPLE_OMP_SINGLE
:
7743 expand_omp_single (region
);
7746 case GIMPLE_OMP_ORDERED
:
7748 gomp_ordered
*ord_stmt
7749 = as_a
<gomp_ordered
*> (last_stmt (region
->entry
));
7750 if (omp_find_clause (gimple_omp_ordered_clauses (ord_stmt
),
7753 /* We'll expand these when expanding corresponding
7754 worksharing region with ordered(n) clause. */
7755 gcc_assert (region
->outer
7756 && region
->outer
->type
== GIMPLE_OMP_FOR
);
7757 region
->ord_stmt
= ord_stmt
;
7762 case GIMPLE_OMP_MASTER
:
7763 case GIMPLE_OMP_TASKGROUP
:
7764 case GIMPLE_OMP_CRITICAL
:
7765 case GIMPLE_OMP_TEAMS
:
7766 expand_omp_synch (region
);
7769 case GIMPLE_OMP_ATOMIC_LOAD
:
7770 expand_omp_atomic (region
);
7773 case GIMPLE_OMP_TARGET
:
7774 expand_omp_target (region
);
7781 input_location
= saved_location
;
7782 region
= region
->next
;
7784 if (omp_any_child_fn_dumped
)
7787 dump_function_header (dump_file
, current_function_decl
, dump_flags
);
7788 omp_any_child_fn_dumped
= false;
7792 /* Helper for build_omp_regions. Scan the dominator tree starting at
7793 block BB. PARENT is the region that contains BB. If SINGLE_TREE is
7794 true, the function ends once a single tree is built (otherwise, whole
7795 forest of OMP constructs may be built). */
7798 build_omp_regions_1 (basic_block bb
, struct omp_region
*parent
,
7801 gimple_stmt_iterator gsi
;
7805 gsi
= gsi_last_nondebug_bb (bb
);
7806 if (!gsi_end_p (gsi
) && is_gimple_omp (gsi_stmt (gsi
)))
7808 struct omp_region
*region
;
7809 enum gimple_code code
;
7811 stmt
= gsi_stmt (gsi
);
7812 code
= gimple_code (stmt
);
7813 if (code
== GIMPLE_OMP_RETURN
)
7815 /* STMT is the return point out of region PARENT. Mark it
7816 as the exit point and make PARENT the immediately
7817 enclosing region. */
7818 gcc_assert (parent
);
7821 parent
= parent
->outer
;
7823 else if (code
== GIMPLE_OMP_ATOMIC_STORE
)
7825 /* GIMPLE_OMP_ATOMIC_STORE is analogous to
7826 GIMPLE_OMP_RETURN, but matches with
7827 GIMPLE_OMP_ATOMIC_LOAD. */
7828 gcc_assert (parent
);
7829 gcc_assert (parent
->type
== GIMPLE_OMP_ATOMIC_LOAD
);
7832 parent
= parent
->outer
;
7834 else if (code
== GIMPLE_OMP_CONTINUE
)
7836 gcc_assert (parent
);
7839 else if (code
== GIMPLE_OMP_SECTIONS_SWITCH
)
7841 /* GIMPLE_OMP_SECTIONS_SWITCH is part of
7842 GIMPLE_OMP_SECTIONS, and we do nothing for it. */
7846 region
= new_omp_region (bb
, code
, parent
);
7848 if (code
== GIMPLE_OMP_TARGET
)
7850 switch (gimple_omp_target_kind (stmt
))
7852 case GF_OMP_TARGET_KIND_REGION
:
7853 case GF_OMP_TARGET_KIND_DATA
:
7854 case GF_OMP_TARGET_KIND_OACC_PARALLEL
:
7855 case GF_OMP_TARGET_KIND_OACC_KERNELS
:
7856 case GF_OMP_TARGET_KIND_OACC_DATA
:
7857 case GF_OMP_TARGET_KIND_OACC_HOST_DATA
:
7859 case GF_OMP_TARGET_KIND_UPDATE
:
7860 case GF_OMP_TARGET_KIND_ENTER_DATA
:
7861 case GF_OMP_TARGET_KIND_EXIT_DATA
:
7862 case GF_OMP_TARGET_KIND_OACC_UPDATE
:
7863 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA
:
7864 case GF_OMP_TARGET_KIND_OACC_DECLARE
:
7865 /* ..., other than for those stand-alone directives... */
7872 else if (code
== GIMPLE_OMP_ORDERED
7873 && omp_find_clause (gimple_omp_ordered_clauses
7874 (as_a
<gomp_ordered
*> (stmt
)),
7876 /* #pragma omp ordered depend is also just a stand-alone
7879 /* ..., this directive becomes the parent for a new region. */
7885 if (single_tree
&& !parent
)
7888 for (son
= first_dom_son (CDI_DOMINATORS
, bb
);
7890 son
= next_dom_son (CDI_DOMINATORS
, son
))
7891 build_omp_regions_1 (son
, parent
, single_tree
);
7894 /* Builds the tree of OMP regions rooted at ROOT, storing it to
7898 build_omp_regions_root (basic_block root
)
7900 gcc_assert (root_omp_region
== NULL
);
7901 build_omp_regions_1 (root
, NULL
, true);
7902 gcc_assert (root_omp_region
!= NULL
);
7905 /* Expands omp construct (and its subconstructs) starting in HEAD. */
7908 omp_expand_local (basic_block head
)
7910 build_omp_regions_root (head
);
7911 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
7913 fprintf (dump_file
, "\nOMP region tree\n\n");
7914 dump_omp_region (dump_file
, root_omp_region
, 0);
7915 fprintf (dump_file
, "\n");
7918 remove_exit_barriers (root_omp_region
);
7919 expand_omp (root_omp_region
);
7921 omp_free_regions ();
7924 /* Scan the CFG and build a tree of OMP regions. Return the root of
7925 the OMP region tree. */
7928 build_omp_regions (void)
7930 gcc_assert (root_omp_region
== NULL
);
7931 calculate_dominance_info (CDI_DOMINATORS
);
7932 build_omp_regions_1 (ENTRY_BLOCK_PTR_FOR_FN (cfun
), NULL
, false);
7935 /* Main entry point for expanding OMP-GIMPLE into runtime calls. */
7938 execute_expand_omp (void)
7940 build_omp_regions ();
7942 if (!root_omp_region
)
7947 fprintf (dump_file
, "\nOMP region tree\n\n");
7948 dump_omp_region (dump_file
, root_omp_region
, 0);
7949 fprintf (dump_file
, "\n");
7952 remove_exit_barriers (root_omp_region
);
7954 expand_omp (root_omp_region
);
7956 if (flag_checking
&& !loops_state_satisfies_p (LOOPS_NEED_FIXUP
))
7957 verify_loop_structure ();
7958 cleanup_tree_cfg ();
7960 omp_free_regions ();
7965 /* OMP expansion -- the default pass, run before creation of SSA form. */
7969 const pass_data pass_data_expand_omp
=
7971 GIMPLE_PASS
, /* type */
7972 "ompexp", /* name */
7973 OPTGROUP_OMP
, /* optinfo_flags */
7974 TV_NONE
, /* tv_id */
7975 PROP_gimple_any
, /* properties_required */
7976 PROP_gimple_eomp
, /* properties_provided */
7977 0, /* properties_destroyed */
7978 0, /* todo_flags_start */
7979 0, /* todo_flags_finish */
7982 class pass_expand_omp
: public gimple_opt_pass
7985 pass_expand_omp (gcc::context
*ctxt
)
7986 : gimple_opt_pass (pass_data_expand_omp
, ctxt
)
7989 /* opt_pass methods: */
7990 virtual unsigned int execute (function
*)
7992 bool gate
= ((flag_openacc
!= 0 || flag_openmp
!= 0
7993 || flag_openmp_simd
!= 0)
7996 /* This pass always runs, to provide PROP_gimple_eomp.
7997 But often, there is nothing to do. */
8001 return execute_expand_omp ();
8004 }; // class pass_expand_omp
8009 make_pass_expand_omp (gcc::context
*ctxt
)
8011 return new pass_expand_omp (ctxt
);
8016 const pass_data pass_data_expand_omp_ssa
=
8018 GIMPLE_PASS
, /* type */
8019 "ompexpssa", /* name */
8020 OPTGROUP_OMP
, /* optinfo_flags */
8021 TV_NONE
, /* tv_id */
8022 PROP_cfg
| PROP_ssa
, /* properties_required */
8023 PROP_gimple_eomp
, /* properties_provided */
8024 0, /* properties_destroyed */
8025 0, /* todo_flags_start */
8026 TODO_cleanup_cfg
| TODO_rebuild_alias
, /* todo_flags_finish */
8029 class pass_expand_omp_ssa
: public gimple_opt_pass
8032 pass_expand_omp_ssa (gcc::context
*ctxt
)
8033 : gimple_opt_pass (pass_data_expand_omp_ssa
, ctxt
)
8036 /* opt_pass methods: */
8037 virtual bool gate (function
*fun
)
8039 return !(fun
->curr_properties
& PROP_gimple_eomp
);
8041 virtual unsigned int execute (function
*) { return execute_expand_omp (); }
8042 opt_pass
* clone () { return new pass_expand_omp_ssa (m_ctxt
); }
8044 }; // class pass_expand_omp_ssa
8049 make_pass_expand_omp_ssa (gcc::context
*ctxt
)
8051 return new pass_expand_omp_ssa (ctxt
);
8054 /* Called from tree-cfg.c::make_edges to create cfg edges for all relevant
8058 omp_make_gimple_edges (basic_block bb
, struct omp_region
**region
,
8061 gimple
*last
= last_stmt (bb
);
8062 enum gimple_code code
= gimple_code (last
);
8063 struct omp_region
*cur_region
= *region
;
8064 bool fallthru
= false;
8068 case GIMPLE_OMP_PARALLEL
:
8069 case GIMPLE_OMP_TASK
:
8070 case GIMPLE_OMP_FOR
:
8071 case GIMPLE_OMP_SINGLE
:
8072 case GIMPLE_OMP_TEAMS
:
8073 case GIMPLE_OMP_MASTER
:
8074 case GIMPLE_OMP_TASKGROUP
:
8075 case GIMPLE_OMP_CRITICAL
:
8076 case GIMPLE_OMP_SECTION
:
8077 case GIMPLE_OMP_GRID_BODY
:
8078 cur_region
= new_omp_region (bb
, code
, cur_region
);
8082 case GIMPLE_OMP_ORDERED
:
8083 cur_region
= new_omp_region (bb
, code
, cur_region
);
8085 if (omp_find_clause (gimple_omp_ordered_clauses
8086 (as_a
<gomp_ordered
*> (last
)),
8088 cur_region
= cur_region
->outer
;
8091 case GIMPLE_OMP_TARGET
:
8092 cur_region
= new_omp_region (bb
, code
, cur_region
);
8094 switch (gimple_omp_target_kind (last
))
8096 case GF_OMP_TARGET_KIND_REGION
:
8097 case GF_OMP_TARGET_KIND_DATA
:
8098 case GF_OMP_TARGET_KIND_OACC_PARALLEL
:
8099 case GF_OMP_TARGET_KIND_OACC_KERNELS
:
8100 case GF_OMP_TARGET_KIND_OACC_DATA
:
8101 case GF_OMP_TARGET_KIND_OACC_HOST_DATA
:
8103 case GF_OMP_TARGET_KIND_UPDATE
:
8104 case GF_OMP_TARGET_KIND_ENTER_DATA
:
8105 case GF_OMP_TARGET_KIND_EXIT_DATA
:
8106 case GF_OMP_TARGET_KIND_OACC_UPDATE
:
8107 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA
:
8108 case GF_OMP_TARGET_KIND_OACC_DECLARE
:
8109 cur_region
= cur_region
->outer
;
8116 case GIMPLE_OMP_SECTIONS
:
8117 cur_region
= new_omp_region (bb
, code
, cur_region
);
8121 case GIMPLE_OMP_SECTIONS_SWITCH
:
8125 case GIMPLE_OMP_ATOMIC_LOAD
:
8126 case GIMPLE_OMP_ATOMIC_STORE
:
8130 case GIMPLE_OMP_RETURN
:
8131 /* In the case of a GIMPLE_OMP_SECTION, the edge will go
8132 somewhere other than the next block. This will be
8134 cur_region
->exit
= bb
;
8135 if (cur_region
->type
== GIMPLE_OMP_TASK
)
8136 /* Add an edge corresponding to not scheduling the task
8138 make_edge (cur_region
->entry
, bb
, EDGE_ABNORMAL
);
8139 fallthru
= cur_region
->type
!= GIMPLE_OMP_SECTION
;
8140 cur_region
= cur_region
->outer
;
8143 case GIMPLE_OMP_CONTINUE
:
8144 cur_region
->cont
= bb
;
8145 switch (cur_region
->type
)
8147 case GIMPLE_OMP_FOR
:
8148 /* Mark all GIMPLE_OMP_FOR and GIMPLE_OMP_CONTINUE
8149 succs edges as abnormal to prevent splitting
8151 single_succ_edge (cur_region
->entry
)->flags
|= EDGE_ABNORMAL
;
8152 /* Make the loopback edge. */
8153 make_edge (bb
, single_succ (cur_region
->entry
),
8156 /* Create an edge from GIMPLE_OMP_FOR to exit, which
8157 corresponds to the case that the body of the loop
8158 is not executed at all. */
8159 make_edge (cur_region
->entry
, bb
->next_bb
, EDGE_ABNORMAL
);
8160 make_edge (bb
, bb
->next_bb
, EDGE_FALLTHRU
| EDGE_ABNORMAL
);
8164 case GIMPLE_OMP_SECTIONS
:
8165 /* Wire up the edges into and out of the nested sections. */
8167 basic_block switch_bb
= single_succ (cur_region
->entry
);
8169 struct omp_region
*i
;
8170 for (i
= cur_region
->inner
; i
; i
= i
->next
)
8172 gcc_assert (i
->type
== GIMPLE_OMP_SECTION
);
8173 make_edge (switch_bb
, i
->entry
, 0);
8174 make_edge (i
->exit
, bb
, EDGE_FALLTHRU
);
8177 /* Make the loopback edge to the block with
8178 GIMPLE_OMP_SECTIONS_SWITCH. */
8179 make_edge (bb
, switch_bb
, 0);
8181 /* Make the edge from the switch to exit. */
8182 make_edge (switch_bb
, bb
->next_bb
, 0);
8187 case GIMPLE_OMP_TASK
:
8200 if (*region
!= cur_region
)
8202 *region
= cur_region
;
8204 *region_idx
= cur_region
->entry
->index
;
8212 #include "gt-omp-expand.h"