1 /* Loop autoparallelization.
2 Copyright (C) 2006-2017 Free Software Foundation, Inc.
3 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
4 Zdenek Dvorak <dvorakz@suse.cz> and Razya Ladelsky <razya@il.ibm.com>.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
29 #include "tree-pass.h"
32 #include "gimple-pretty-print.h"
33 #include "fold-const.h"
35 #include "gimple-iterator.h"
36 #include "gimplify-me.h"
37 #include "gimple-walk.h"
38 #include "stor-layout.h"
39 #include "tree-nested.h"
41 #include "tree-ssa-loop-ivopts.h"
42 #include "tree-ssa-loop-manip.h"
43 #include "tree-ssa-loop-niter.h"
44 #include "tree-ssa-loop.h"
45 #include "tree-into-ssa.h"
47 #include "tree-scalar-evolution.h"
48 #include "langhooks.h"
49 #include "tree-vectorizer.h"
50 #include "tree-hasher.h"
51 #include "tree-parloops.h"
52 #include "omp-general.h"
56 #include "params-enum.h"
57 #include "tree-ssa-alias.h"
59 #include "gomp-constants.h"
62 /* This pass tries to distribute iterations of loops into several threads.
63 The implementation is straightforward -- for each loop we test whether its
64 iterations are independent, and if it is the case (and some additional
65 conditions regarding profitability and correctness are satisfied), we
66 add GIMPLE_OMP_PARALLEL and GIMPLE_OMP_FOR codes and let omp expansion
69 The most of the complexity is in bringing the code into shape expected
71 -- for GIMPLE_OMP_FOR, ensuring that the loop has only one induction
72 variable and that the exit test is at the start of the loop body
73 -- for GIMPLE_OMP_PARALLEL, replacing the references to local addressable
74 variables by accesses through pointers, and breaking up ssa chains
75 by storing the values incoming to the parallelized loop to a structure
76 passed to the new function as an argument (something similar is done
77 in omp gimplification, unfortunately only a small part of the code
81 -- if there are several parallelizable loops in a function, it may be
82 possible to generate the threads just once (using synchronization to
83 ensure that cross-loop dependences are obeyed).
84 -- handling of common reduction patterns for outer loops.
86 More info can also be found at http://gcc.gnu.org/wiki/AutoParInGCC */
89 currently we use vect_force_simple_reduction() to detect reduction patterns.
90 The code transformation will be introduced by an example.
97 for (i = 0; i < N; i++)
107 # sum_29 = PHI <sum_11(5), 1(3)>
108 # i_28 = PHI <i_12(5), 0(3)>
111 sum_11 = D.1795_8 + sum_29;
119 # sum_21 = PHI <sum_11(4)>
120 printf (&"%d"[0], sum_21);
123 after reduction transformation (only relevant parts):
131 # Storing the initial value given by the user. #
133 .paral_data_store.32.sum.27 = 1;
135 #pragma omp parallel num_threads(4)
137 #pragma omp for schedule(static)
139 # The neutral element corresponding to the particular
140 reduction's operation, e.g. 0 for PLUS_EXPR,
141 1 for MULT_EXPR, etc. replaces the user's initial value. #
143 # sum.27_29 = PHI <sum.27_11, 0>
145 sum.27_11 = D.1827_8 + sum.27_29;
149 # Adding this reduction phi is done at create_phi_for_local_result() #
150 # sum.27_56 = PHI <sum.27_11, 0>
153 # Creating the atomic operation is done at
154 create_call_for_reduction_1() #
156 #pragma omp atomic_load
157 D.1839_59 = *&.paral_data_load.33_51->reduction.23;
158 D.1840_60 = sum.27_56 + D.1839_59;
159 #pragma omp atomic_store (D.1840_60);
163 # collecting the result after the join of the threads is done at
164 create_loads_for_reductions().
165 The value computed by the threads is loaded from the
169 .paral_data_load.33_52 = &.paral_data_store.32;
170 sum_37 = .paral_data_load.33_52->sum.27;
171 sum_43 = D.1795_41 + sum_37;
174 # sum_21 = PHI <sum_43, sum_26>
175 printf (&"%d"[0], sum_21);
183 /* Minimal number of iterations of a loop that should be executed in each
185 #define MIN_PER_THREAD 100
187 /* Element of the hashtable, representing a
188 reduction in the current loop. */
189 struct reduction_info
191 gimple
*reduc_stmt
; /* reduction statement. */
192 gimple
*reduc_phi
; /* The phi node defining the reduction. */
193 enum tree_code reduction_code
;/* code for the reduction operation. */
194 unsigned reduc_version
; /* SSA_NAME_VERSION of original reduc_phi
196 gphi
*keep_res
; /* The PHI_RESULT of this phi is the resulting value
197 of the reduction variable when existing the loop. */
198 tree initial_value
; /* The initial value of the reduction var before entering the loop. */
199 tree field
; /* the name of the field in the parloop data structure intended for reduction. */
200 tree reduc_addr
; /* The address of the reduction variable for
201 openacc reductions. */
202 tree init
; /* reduction initialization value. */
203 gphi
*new_phi
; /* (helper field) Newly created phi node whose result
204 will be passed to the atomic operation. Represents
205 the local result each thread computed for the reduction
209 /* Reduction info hashtable helpers. */
211 struct reduction_hasher
: free_ptr_hash
<reduction_info
>
213 static inline hashval_t
hash (const reduction_info
*);
214 static inline bool equal (const reduction_info
*, const reduction_info
*);
217 /* Equality and hash functions for hashtab code. */
220 reduction_hasher::equal (const reduction_info
*a
, const reduction_info
*b
)
222 return (a
->reduc_phi
== b
->reduc_phi
);
226 reduction_hasher::hash (const reduction_info
*a
)
228 return a
->reduc_version
;
231 typedef hash_table
<reduction_hasher
> reduction_info_table_type
;
234 static struct reduction_info
*
235 reduction_phi (reduction_info_table_type
*reduction_list
, gimple
*phi
)
237 struct reduction_info tmpred
, *red
;
239 if (reduction_list
->elements () == 0 || phi
== NULL
)
242 if (gimple_uid (phi
) == (unsigned int)-1
243 || gimple_uid (phi
) == 0)
246 tmpred
.reduc_phi
= phi
;
247 tmpred
.reduc_version
= gimple_uid (phi
);
248 red
= reduction_list
->find (&tmpred
);
249 gcc_assert (red
== NULL
|| red
->reduc_phi
== phi
);
254 /* Element of hashtable of names to copy. */
256 struct name_to_copy_elt
258 unsigned version
; /* The version of the name to copy. */
259 tree new_name
; /* The new name used in the copy. */
260 tree field
; /* The field of the structure used to pass the
264 /* Name copies hashtable helpers. */
266 struct name_to_copy_hasher
: free_ptr_hash
<name_to_copy_elt
>
268 static inline hashval_t
hash (const name_to_copy_elt
*);
269 static inline bool equal (const name_to_copy_elt
*, const name_to_copy_elt
*);
272 /* Equality and hash functions for hashtab code. */
275 name_to_copy_hasher::equal (const name_to_copy_elt
*a
, const name_to_copy_elt
*b
)
277 return a
->version
== b
->version
;
281 name_to_copy_hasher::hash (const name_to_copy_elt
*a
)
283 return (hashval_t
) a
->version
;
286 typedef hash_table
<name_to_copy_hasher
> name_to_copy_table_type
;
288 /* A transformation matrix, which is a self-contained ROWSIZE x COLSIZE
289 matrix. Rather than use floats, we simply keep a single DENOMINATOR that
290 represents the denominator for every element in the matrix. */
291 typedef struct lambda_trans_matrix_s
293 lambda_matrix matrix
;
297 } *lambda_trans_matrix
;
298 #define LTM_MATRIX(T) ((T)->matrix)
299 #define LTM_ROWSIZE(T) ((T)->rowsize)
300 #define LTM_COLSIZE(T) ((T)->colsize)
301 #define LTM_DENOMINATOR(T) ((T)->denominator)
303 /* Allocate a new transformation matrix. */
305 static lambda_trans_matrix
306 lambda_trans_matrix_new (int colsize
, int rowsize
,
307 struct obstack
* lambda_obstack
)
309 lambda_trans_matrix ret
;
311 ret
= (lambda_trans_matrix
)
312 obstack_alloc (lambda_obstack
, sizeof (struct lambda_trans_matrix_s
));
313 LTM_MATRIX (ret
) = lambda_matrix_new (rowsize
, colsize
, lambda_obstack
);
314 LTM_ROWSIZE (ret
) = rowsize
;
315 LTM_COLSIZE (ret
) = colsize
;
316 LTM_DENOMINATOR (ret
) = 1;
320 /* Multiply a vector VEC by a matrix MAT.
321 MAT is an M*N matrix, and VEC is a vector with length N. The result
322 is stored in DEST which must be a vector of length M. */
325 lambda_matrix_vector_mult (lambda_matrix matrix
, int m
, int n
,
326 lambda_vector vec
, lambda_vector dest
)
330 lambda_vector_clear (dest
, m
);
331 for (i
= 0; i
< m
; i
++)
332 for (j
= 0; j
< n
; j
++)
333 dest
[i
] += matrix
[i
][j
] * vec
[j
];
336 /* Return true if TRANS is a legal transformation matrix that respects
337 the dependence vectors in DISTS and DIRS. The conservative answer
340 "Wolfe proves that a unimodular transformation represented by the
341 matrix T is legal when applied to a loop nest with a set of
342 lexicographically non-negative distance vectors RDG if and only if
343 for each vector d in RDG, (T.d >= 0) is lexicographically positive.
344 i.e.: if and only if it transforms the lexicographically positive
345 distance vectors to lexicographically positive vectors. Note that
346 a unimodular matrix must transform the zero vector (and only it) to
347 the zero vector." S.Muchnick. */
350 lambda_transform_legal_p (lambda_trans_matrix trans
,
352 vec
<ddr_p
> dependence_relations
)
355 lambda_vector distres
;
356 struct data_dependence_relation
*ddr
;
358 gcc_assert (LTM_COLSIZE (trans
) == nb_loops
359 && LTM_ROWSIZE (trans
) == nb_loops
);
361 /* When there are no dependences, the transformation is correct. */
362 if (dependence_relations
.length () == 0)
365 ddr
= dependence_relations
[0];
369 /* When there is an unknown relation in the dependence_relations, we
370 know that it is no worth looking at this loop nest: give up. */
371 if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
374 distres
= lambda_vector_new (nb_loops
);
376 /* For each distance vector in the dependence graph. */
377 FOR_EACH_VEC_ELT (dependence_relations
, i
, ddr
)
379 /* Don't care about relations for which we know that there is no
380 dependence, nor about read-read (aka. output-dependences):
381 these data accesses can happen in any order. */
382 if (DDR_ARE_DEPENDENT (ddr
) == chrec_known
383 || (DR_IS_READ (DDR_A (ddr
)) && DR_IS_READ (DDR_B (ddr
))))
386 /* Conservatively answer: "this transformation is not valid". */
387 if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
390 /* If the dependence could not be captured by a distance vector,
391 conservatively answer that the transform is not valid. */
392 if (DDR_NUM_DIST_VECTS (ddr
) == 0)
395 /* Compute trans.dist_vect */
396 for (j
= 0; j
< DDR_NUM_DIST_VECTS (ddr
); j
++)
398 lambda_matrix_vector_mult (LTM_MATRIX (trans
), nb_loops
, nb_loops
,
399 DDR_DIST_VECT (ddr
, j
), distres
);
401 if (!lambda_vector_lexico_pos (distres
, nb_loops
))
408 /* Data dependency analysis. Returns true if the iterations of LOOP
409 are independent on each other (that is, if we can execute them
413 loop_parallel_p (struct loop
*loop
, struct obstack
* parloop_obstack
)
415 vec
<ddr_p
> dependence_relations
;
416 vec
<data_reference_p
> datarefs
;
417 lambda_trans_matrix trans
;
420 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
422 fprintf (dump_file
, "Considering loop %d\n", loop
->num
);
424 fprintf (dump_file
, "loop is innermost\n");
426 fprintf (dump_file
, "loop NOT innermost\n");
429 /* Check for problems with dependences. If the loop can be reversed,
430 the iterations are independent. */
431 auto_vec
<loop_p
, 3> loop_nest
;
432 datarefs
.create (10);
433 dependence_relations
.create (100);
434 if (! compute_data_dependences_for_loop (loop
, true, &loop_nest
, &datarefs
,
435 &dependence_relations
))
437 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
438 fprintf (dump_file
, " FAILED: cannot analyze data dependencies\n");
442 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
443 dump_data_dependence_relations (dump_file
, dependence_relations
);
445 trans
= lambda_trans_matrix_new (1, 1, parloop_obstack
);
446 LTM_MATRIX (trans
)[0][0] = -1;
448 if (lambda_transform_legal_p (trans
, 1, dependence_relations
))
451 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
452 fprintf (dump_file
, " SUCCESS: may be parallelized\n");
454 else if (dump_file
&& (dump_flags
& TDF_DETAILS
))
456 " FAILED: data dependencies exist across iterations\n");
459 free_dependence_relations (dependence_relations
);
460 free_data_refs (datarefs
);
465 /* Return true when LOOP contains basic blocks marked with the
466 BB_IRREDUCIBLE_LOOP flag. */
469 loop_has_blocks_with_irreducible_flag (struct loop
*loop
)
472 basic_block
*bbs
= get_loop_body_in_dom_order (loop
);
475 for (i
= 0; i
< loop
->num_nodes
; i
++)
476 if (bbs
[i
]->flags
& BB_IRREDUCIBLE_LOOP
)
485 /* Assigns the address of OBJ in TYPE to an ssa name, and returns this name.
486 The assignment statement is placed on edge ENTRY. DECL_ADDRESS maps decls
487 to their addresses that can be reused. The address of OBJ is known to
488 be invariant in the whole function. Other needed statements are placed
492 take_address_of (tree obj
, tree type
, edge entry
,
493 int_tree_htab_type
*decl_address
, gimple_stmt_iterator
*gsi
)
496 tree
*var_p
, name
, addr
;
500 /* Since the address of OBJ is invariant, the trees may be shared.
501 Avoid rewriting unrelated parts of the code. */
502 obj
= unshare_expr (obj
);
504 handled_component_p (*var_p
);
505 var_p
= &TREE_OPERAND (*var_p
, 0))
508 /* Canonicalize the access to base on a MEM_REF. */
510 *var_p
= build_simple_mem_ref (build_fold_addr_expr (*var_p
));
512 /* Assign a canonical SSA name to the address of the base decl used
513 in the address and share it for all accesses and addresses based
515 uid
= DECL_UID (TREE_OPERAND (TREE_OPERAND (*var_p
, 0), 0));
518 int_tree_map
*slot
= decl_address
->find_slot (elt
, INSERT
);
523 addr
= TREE_OPERAND (*var_p
, 0);
525 = get_name (TREE_OPERAND (TREE_OPERAND (*var_p
, 0), 0));
527 name
= make_temp_ssa_name (TREE_TYPE (addr
), NULL
, obj_name
);
529 name
= make_ssa_name (TREE_TYPE (addr
));
530 stmt
= gimple_build_assign (name
, addr
);
531 gsi_insert_on_edge_immediate (entry
, stmt
);
539 /* Express the address in terms of the canonical SSA name. */
540 TREE_OPERAND (*var_p
, 0) = name
;
542 return build_fold_addr_expr_with_type (obj
, type
);
544 name
= force_gimple_operand (build_addr (obj
),
545 &stmts
, true, NULL_TREE
);
546 if (!gimple_seq_empty_p (stmts
))
547 gsi_insert_seq_before (gsi
, stmts
, GSI_SAME_STMT
);
549 if (!useless_type_conversion_p (type
, TREE_TYPE (name
)))
551 name
= force_gimple_operand (fold_convert (type
, name
), &stmts
, true,
553 if (!gimple_seq_empty_p (stmts
))
554 gsi_insert_seq_before (gsi
, stmts
, GSI_SAME_STMT
);
561 reduc_stmt_res (gimple
*stmt
)
563 return (gimple_code (stmt
) == GIMPLE_PHI
564 ? gimple_phi_result (stmt
)
565 : gimple_assign_lhs (stmt
));
568 /* Callback for htab_traverse. Create the initialization statement
569 for reduction described in SLOT, and place it at the preheader of
570 the loop described in DATA. */
573 initialize_reductions (reduction_info
**slot
, struct loop
*loop
)
579 struct reduction_info
*const reduc
= *slot
;
581 /* Create initialization in preheader:
582 reduction_variable = initialization value of reduction. */
584 /* In the phi node at the header, replace the argument coming
585 from the preheader with the reduction initialization value. */
587 /* Initialize the reduction. */
588 type
= TREE_TYPE (PHI_RESULT (reduc
->reduc_phi
));
589 init
= omp_reduction_init_op (gimple_location (reduc
->reduc_stmt
),
590 reduc
->reduction_code
, type
);
593 /* Replace the argument representing the initialization value
594 with the initialization value for the reduction (neutral
595 element for the particular operation, e.g. 0 for PLUS_EXPR,
596 1 for MULT_EXPR, etc).
597 Keep the old value in a new variable "reduction_initial",
598 that will be taken in consideration after the parallel
599 computing is done. */
601 e
= loop_preheader_edge (loop
);
602 arg
= PHI_ARG_DEF_FROM_EDGE (reduc
->reduc_phi
, e
);
603 /* Create new variable to hold the initial value. */
605 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE
606 (reduc
->reduc_phi
, loop_preheader_edge (loop
)), init
);
607 reduc
->initial_value
= arg
;
613 struct walk_stmt_info info
;
615 int_tree_htab_type
*decl_address
;
616 gimple_stmt_iterator
*gsi
;
621 /* Eliminates references to local variables in *TP out of the single
622 entry single exit region starting at DTA->ENTRY.
623 DECL_ADDRESS contains addresses of the references that had their
624 address taken already. If the expression is changed, CHANGED is
625 set to true. Callback for walk_tree. */
628 eliminate_local_variables_1 (tree
*tp
, int *walk_subtrees
, void *data
)
630 struct elv_data
*const dta
= (struct elv_data
*) data
;
631 tree t
= *tp
, var
, addr
, addr_type
, type
, obj
;
637 if (!SSA_VAR_P (t
) || DECL_EXTERNAL (t
))
640 type
= TREE_TYPE (t
);
641 addr_type
= build_pointer_type (type
);
642 addr
= take_address_of (t
, addr_type
, dta
->entry
, dta
->decl_address
,
644 if (dta
->gsi
== NULL
&& addr
== NULL_TREE
)
650 *tp
= build_simple_mem_ref (addr
);
656 if (TREE_CODE (t
) == ADDR_EXPR
)
658 /* ADDR_EXPR may appear in two contexts:
659 -- as a gimple operand, when the address taken is a function invariant
660 -- as gimple rhs, when the resulting address in not a function
662 We do not need to do anything special in the latter case (the base of
663 the memory reference whose address is taken may be replaced in the
664 DECL_P case). The former case is more complicated, as we need to
665 ensure that the new address is still a gimple operand. Thus, it
666 is not sufficient to replace just the base of the memory reference --
667 we need to move the whole computation of the address out of the
669 if (!is_gimple_val (t
))
673 obj
= TREE_OPERAND (t
, 0);
674 var
= get_base_address (obj
);
675 if (!var
|| !SSA_VAR_P (var
) || DECL_EXTERNAL (var
))
678 addr_type
= TREE_TYPE (t
);
679 addr
= take_address_of (obj
, addr_type
, dta
->entry
, dta
->decl_address
,
681 if (dta
->gsi
== NULL
&& addr
== NULL_TREE
)
698 /* Moves the references to local variables in STMT at *GSI out of the single
699 entry single exit region starting at ENTRY. DECL_ADDRESS contains
700 addresses of the references that had their address taken
704 eliminate_local_variables_stmt (edge entry
, gimple_stmt_iterator
*gsi
,
705 int_tree_htab_type
*decl_address
)
708 gimple
*stmt
= gsi_stmt (*gsi
);
710 memset (&dta
.info
, '\0', sizeof (dta
.info
));
712 dta
.decl_address
= decl_address
;
716 if (gimple_debug_bind_p (stmt
))
719 walk_tree (gimple_debug_bind_get_value_ptr (stmt
),
720 eliminate_local_variables_1
, &dta
.info
, NULL
);
723 gimple_debug_bind_reset_value (stmt
);
727 else if (gimple_clobber_p (stmt
))
729 unlink_stmt_vdef (stmt
);
730 stmt
= gimple_build_nop ();
731 gsi_replace (gsi
, stmt
, false);
737 walk_gimple_op (stmt
, eliminate_local_variables_1
, &dta
.info
);
744 /* Eliminates the references to local variables from the single entry
745 single exit region between the ENTRY and EXIT edges.
748 1) Taking address of a local variable -- these are moved out of the
749 region (and temporary variable is created to hold the address if
752 2) Dereferencing a local variable -- these are replaced with indirect
756 eliminate_local_variables (edge entry
, edge exit
)
759 auto_vec
<basic_block
, 3> body
;
761 gimple_stmt_iterator gsi
;
762 bool has_debug_stmt
= false;
763 int_tree_htab_type
decl_address (10);
764 basic_block entry_bb
= entry
->src
;
765 basic_block exit_bb
= exit
->dest
;
767 gather_blocks_in_sese_region (entry_bb
, exit_bb
, &body
);
769 FOR_EACH_VEC_ELT (body
, i
, bb
)
770 if (bb
!= entry_bb
&& bb
!= exit_bb
)
772 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
773 if (is_gimple_debug (gsi_stmt (gsi
)))
775 if (gimple_debug_bind_p (gsi_stmt (gsi
)))
776 has_debug_stmt
= true;
779 eliminate_local_variables_stmt (entry
, &gsi
, &decl_address
);
783 FOR_EACH_VEC_ELT (body
, i
, bb
)
784 if (bb
!= entry_bb
&& bb
!= exit_bb
)
785 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
786 if (gimple_debug_bind_p (gsi_stmt (gsi
)))
787 eliminate_local_variables_stmt (entry
, &gsi
, &decl_address
);
790 /* Returns true if expression EXPR is not defined between ENTRY and
791 EXIT, i.e. if all its operands are defined outside of the region. */
794 expr_invariant_in_region_p (edge entry
, edge exit
, tree expr
)
796 basic_block entry_bb
= entry
->src
;
797 basic_block exit_bb
= exit
->dest
;
800 if (is_gimple_min_invariant (expr
))
803 if (TREE_CODE (expr
) == SSA_NAME
)
805 def_bb
= gimple_bb (SSA_NAME_DEF_STMT (expr
));
807 && dominated_by_p (CDI_DOMINATORS
, def_bb
, entry_bb
)
808 && !dominated_by_p (CDI_DOMINATORS
, def_bb
, exit_bb
))
817 /* If COPY_NAME_P is true, creates and returns a duplicate of NAME.
818 The copies are stored to NAME_COPIES, if NAME was already duplicated,
819 its duplicate stored in NAME_COPIES is returned.
821 Regardless of COPY_NAME_P, the decl used as a base of the ssa name is also
822 duplicated, storing the copies in DECL_COPIES. */
825 separate_decls_in_region_name (tree name
, name_to_copy_table_type
*name_copies
,
826 int_tree_htab_type
*decl_copies
,
829 tree copy
, var
, var_copy
;
830 unsigned idx
, uid
, nuid
;
831 struct int_tree_map ielt
;
832 struct name_to_copy_elt elt
, *nelt
;
833 name_to_copy_elt
**slot
;
836 if (TREE_CODE (name
) != SSA_NAME
)
839 idx
= SSA_NAME_VERSION (name
);
841 slot
= name_copies
->find_slot_with_hash (&elt
, idx
,
842 copy_name_p
? INSERT
: NO_INSERT
);
844 return (*slot
)->new_name
;
848 copy
= duplicate_ssa_name (name
, NULL
);
849 nelt
= XNEW (struct name_to_copy_elt
);
851 nelt
->new_name
= copy
;
852 nelt
->field
= NULL_TREE
;
861 var
= SSA_NAME_VAR (name
);
865 uid
= DECL_UID (var
);
867 dslot
= decl_copies
->find_slot_with_hash (ielt
, uid
, INSERT
);
870 var_copy
= create_tmp_var (TREE_TYPE (var
), get_name (var
));
871 DECL_GIMPLE_REG_P (var_copy
) = DECL_GIMPLE_REG_P (var
);
873 dslot
->to
= var_copy
;
875 /* Ensure that when we meet this decl next time, we won't duplicate
877 nuid
= DECL_UID (var_copy
);
879 dslot
= decl_copies
->find_slot_with_hash (ielt
, nuid
, INSERT
);
880 gcc_assert (!dslot
->to
);
882 dslot
->to
= var_copy
;
885 var_copy
= dslot
->to
;
887 replace_ssa_name_symbol (copy
, var_copy
);
891 /* Finds the ssa names used in STMT that are defined outside the
892 region between ENTRY and EXIT and replaces such ssa names with
893 their duplicates. The duplicates are stored to NAME_COPIES. Base
894 decls of all ssa names used in STMT (including those defined in
895 LOOP) are replaced with the new temporary variables; the
896 replacement decls are stored in DECL_COPIES. */
899 separate_decls_in_region_stmt (edge entry
, edge exit
, gimple
*stmt
,
900 name_to_copy_table_type
*name_copies
,
901 int_tree_htab_type
*decl_copies
)
909 FOR_EACH_PHI_OR_STMT_DEF (def
, stmt
, oi
, SSA_OP_DEF
)
911 name
= DEF_FROM_PTR (def
);
912 gcc_assert (TREE_CODE (name
) == SSA_NAME
);
913 copy
= separate_decls_in_region_name (name
, name_copies
, decl_copies
,
915 gcc_assert (copy
== name
);
918 FOR_EACH_PHI_OR_STMT_USE (use
, stmt
, oi
, SSA_OP_USE
)
920 name
= USE_FROM_PTR (use
);
921 if (TREE_CODE (name
) != SSA_NAME
)
924 copy_name_p
= expr_invariant_in_region_p (entry
, exit
, name
);
925 copy
= separate_decls_in_region_name (name
, name_copies
, decl_copies
,
931 /* Finds the ssa names used in STMT that are defined outside the
932 region between ENTRY and EXIT and replaces such ssa names with
933 their duplicates. The duplicates are stored to NAME_COPIES. Base
934 decls of all ssa names used in STMT (including those defined in
935 LOOP) are replaced with the new temporary variables; the
936 replacement decls are stored in DECL_COPIES. */
939 separate_decls_in_region_debug (gimple
*stmt
,
940 name_to_copy_table_type
*name_copies
,
941 int_tree_htab_type
*decl_copies
)
946 struct int_tree_map ielt
;
947 struct name_to_copy_elt elt
;
948 name_to_copy_elt
**slot
;
951 if (gimple_debug_bind_p (stmt
))
952 var
= gimple_debug_bind_get_var (stmt
);
953 else if (gimple_debug_source_bind_p (stmt
))
954 var
= gimple_debug_source_bind_get_var (stmt
);
957 if (TREE_CODE (var
) == DEBUG_EXPR_DECL
|| TREE_CODE (var
) == LABEL_DECL
)
959 gcc_assert (DECL_P (var
) && SSA_VAR_P (var
));
960 ielt
.uid
= DECL_UID (var
);
961 dslot
= decl_copies
->find_slot_with_hash (ielt
, ielt
.uid
, NO_INSERT
);
964 if (gimple_debug_bind_p (stmt
))
965 gimple_debug_bind_set_var (stmt
, dslot
->to
);
966 else if (gimple_debug_source_bind_p (stmt
))
967 gimple_debug_source_bind_set_var (stmt
, dslot
->to
);
969 FOR_EACH_PHI_OR_STMT_USE (use
, stmt
, oi
, SSA_OP_USE
)
971 name
= USE_FROM_PTR (use
);
972 if (TREE_CODE (name
) != SSA_NAME
)
975 elt
.version
= SSA_NAME_VERSION (name
);
976 slot
= name_copies
->find_slot_with_hash (&elt
, elt
.version
, NO_INSERT
);
979 gimple_debug_bind_reset_value (stmt
);
984 SET_USE (use
, (*slot
)->new_name
);
990 /* Callback for htab_traverse. Adds a field corresponding to the reduction
991 specified in SLOT. The type is passed in DATA. */
994 add_field_for_reduction (reduction_info
**slot
, tree type
)
997 struct reduction_info
*const red
= *slot
;
998 tree var
= reduc_stmt_res (red
->reduc_stmt
);
999 tree field
= build_decl (gimple_location (red
->reduc_stmt
), FIELD_DECL
,
1000 SSA_NAME_IDENTIFIER (var
), TREE_TYPE (var
));
1002 insert_field_into_struct (type
, field
);
1009 /* Callback for htab_traverse. Adds a field corresponding to a ssa name
1010 described in SLOT. The type is passed in DATA. */
1013 add_field_for_name (name_to_copy_elt
**slot
, tree type
)
1015 struct name_to_copy_elt
*const elt
= *slot
;
1016 tree name
= ssa_name (elt
->version
);
1017 tree field
= build_decl (UNKNOWN_LOCATION
,
1018 FIELD_DECL
, SSA_NAME_IDENTIFIER (name
),
1021 insert_field_into_struct (type
, field
);
1027 /* Callback for htab_traverse. A local result is the intermediate result
1028 computed by a single
1029 thread, or the initial value in case no iteration was executed.
1030 This function creates a phi node reflecting these values.
1031 The phi's result will be stored in NEW_PHI field of the
1032 reduction's data structure. */
1035 create_phi_for_local_result (reduction_info
**slot
, struct loop
*loop
)
1037 struct reduction_info
*const reduc
= *slot
;
1040 basic_block store_bb
, continue_bb
;
1042 source_location locus
;
1044 /* STORE_BB is the block where the phi
1045 should be stored. It is the destination of the loop exit.
1046 (Find the fallthru edge from GIMPLE_OMP_CONTINUE). */
1047 continue_bb
= single_pred (loop
->latch
);
1048 store_bb
= FALLTHRU_EDGE (continue_bb
)->dest
;
1050 /* STORE_BB has two predecessors. One coming from the loop
1051 (the reduction's result is computed at the loop),
1052 and another coming from a block preceding the loop,
1054 are executed (the initial value should be taken). */
1055 if (EDGE_PRED (store_bb
, 0) == FALLTHRU_EDGE (continue_bb
))
1056 e
= EDGE_PRED (store_bb
, 1);
1058 e
= EDGE_PRED (store_bb
, 0);
1059 tree lhs
= reduc_stmt_res (reduc
->reduc_stmt
);
1060 local_res
= copy_ssa_name (lhs
);
1061 locus
= gimple_location (reduc
->reduc_stmt
);
1062 new_phi
= create_phi_node (local_res
, store_bb
);
1063 add_phi_arg (new_phi
, reduc
->init
, e
, locus
);
1064 add_phi_arg (new_phi
, lhs
, FALLTHRU_EDGE (continue_bb
), locus
);
1065 reduc
->new_phi
= new_phi
;
1075 basic_block store_bb
;
1076 basic_block load_bb
;
1079 /* Callback for htab_traverse. Create an atomic instruction for the
1080 reduction described in SLOT.
1081 DATA annotates the place in memory the atomic operation relates to,
1082 and the basic block it needs to be generated in. */
1085 create_call_for_reduction_1 (reduction_info
**slot
, struct clsn_data
*clsn_data
)
1087 struct reduction_info
*const reduc
= *slot
;
1088 gimple_stmt_iterator gsi
;
1089 tree type
= TREE_TYPE (PHI_RESULT (reduc
->reduc_phi
));
1094 tree t
, addr
, ref
, x
;
1095 tree tmp_load
, name
;
1098 if (reduc
->reduc_addr
== NULL_TREE
)
1100 load_struct
= build_simple_mem_ref (clsn_data
->load
);
1101 t
= build3 (COMPONENT_REF
, type
, load_struct
, reduc
->field
, NULL_TREE
);
1103 addr
= build_addr (t
);
1107 /* Set the address for the atomic store. */
1108 addr
= reduc
->reduc_addr
;
1110 /* Remove the non-atomic store '*addr = sum'. */
1111 tree res
= PHI_RESULT (reduc
->keep_res
);
1112 use_operand_p use_p
;
1114 bool single_use_p
= single_imm_use (res
, &use_p
, &stmt
);
1115 gcc_assert (single_use_p
);
1116 replace_uses_by (gimple_vdef (stmt
),
1117 gimple_vuse (stmt
));
1118 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
1119 gsi_remove (&gsi
, true);
1122 /* Create phi node. */
1123 bb
= clsn_data
->load_bb
;
1125 gsi
= gsi_last_bb (bb
);
1126 e
= split_block (bb
, gsi_stmt (gsi
));
1129 tmp_load
= create_tmp_var (TREE_TYPE (TREE_TYPE (addr
)));
1130 tmp_load
= make_ssa_name (tmp_load
);
1131 load
= gimple_build_omp_atomic_load (tmp_load
, addr
);
1132 SSA_NAME_DEF_STMT (tmp_load
) = load
;
1133 gsi
= gsi_start_bb (new_bb
);
1134 gsi_insert_after (&gsi
, load
, GSI_NEW_STMT
);
1136 e
= split_block (new_bb
, load
);
1138 gsi
= gsi_start_bb (new_bb
);
1140 x
= fold_build2 (reduc
->reduction_code
,
1141 TREE_TYPE (PHI_RESULT (reduc
->new_phi
)), ref
,
1142 PHI_RESULT (reduc
->new_phi
));
1144 name
= force_gimple_operand_gsi (&gsi
, x
, true, NULL_TREE
, true,
1145 GSI_CONTINUE_LINKING
);
1147 gsi_insert_after (&gsi
, gimple_build_omp_atomic_store (name
), GSI_NEW_STMT
);
1151 /* Create the atomic operation at the join point of the threads.
1152 REDUCTION_LIST describes the reductions in the LOOP.
1153 LD_ST_DATA describes the shared data structure where
1154 shared data is stored in and loaded from. */
1156 create_call_for_reduction (struct loop
*loop
,
1157 reduction_info_table_type
*reduction_list
,
1158 struct clsn_data
*ld_st_data
)
1160 reduction_list
->traverse
<struct loop
*, create_phi_for_local_result
> (loop
);
1161 /* Find the fallthru edge from GIMPLE_OMP_CONTINUE. */
1162 basic_block continue_bb
= single_pred (loop
->latch
);
1163 ld_st_data
->load_bb
= FALLTHRU_EDGE (continue_bb
)->dest
;
1165 ->traverse
<struct clsn_data
*, create_call_for_reduction_1
> (ld_st_data
);
1168 /* Callback for htab_traverse. Loads the final reduction value at the
1169 join point of all threads, and inserts it in the right place. */
1172 create_loads_for_reductions (reduction_info
**slot
, struct clsn_data
*clsn_data
)
1174 struct reduction_info
*const red
= *slot
;
1176 gimple_stmt_iterator gsi
;
1177 tree type
= TREE_TYPE (reduc_stmt_res (red
->reduc_stmt
));
1182 /* If there's no exit phi, the result of the reduction is unused. */
1183 if (red
->keep_res
== NULL
)
1186 gsi
= gsi_after_labels (clsn_data
->load_bb
);
1187 load_struct
= build_simple_mem_ref (clsn_data
->load
);
1188 load_struct
= build3 (COMPONENT_REF
, type
, load_struct
, red
->field
,
1192 name
= PHI_RESULT (red
->keep_res
);
1193 stmt
= gimple_build_assign (name
, x
);
1195 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1197 for (gsi
= gsi_start_phis (gimple_bb (red
->keep_res
));
1198 !gsi_end_p (gsi
); gsi_next (&gsi
))
1199 if (gsi_stmt (gsi
) == red
->keep_res
)
1201 remove_phi_node (&gsi
, false);
1207 /* Load the reduction result that was stored in LD_ST_DATA.
1208 REDUCTION_LIST describes the list of reductions that the
1209 loads should be generated for. */
1211 create_final_loads_for_reduction (reduction_info_table_type
*reduction_list
,
1212 struct clsn_data
*ld_st_data
)
1214 gimple_stmt_iterator gsi
;
1218 gsi
= gsi_after_labels (ld_st_data
->load_bb
);
1219 t
= build_fold_addr_expr (ld_st_data
->store
);
1220 stmt
= gimple_build_assign (ld_st_data
->load
, t
);
1222 gsi_insert_before (&gsi
, stmt
, GSI_NEW_STMT
);
1225 ->traverse
<struct clsn_data
*, create_loads_for_reductions
> (ld_st_data
);
1229 /* Callback for htab_traverse. Store the neutral value for the
1230 particular reduction's operation, e.g. 0 for PLUS_EXPR,
1231 1 for MULT_EXPR, etc. into the reduction field.
1232 The reduction is specified in SLOT. The store information is
1236 create_stores_for_reduction (reduction_info
**slot
, struct clsn_data
*clsn_data
)
1238 struct reduction_info
*const red
= *slot
;
1241 gimple_stmt_iterator gsi
;
1242 tree type
= TREE_TYPE (reduc_stmt_res (red
->reduc_stmt
));
1244 gsi
= gsi_last_bb (clsn_data
->store_bb
);
1245 t
= build3 (COMPONENT_REF
, type
, clsn_data
->store
, red
->field
, NULL_TREE
);
1246 stmt
= gimple_build_assign (t
, red
->initial_value
);
1247 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1252 /* Callback for htab_traverse. Creates loads to a field of LOAD in LOAD_BB and
1253 store to a field of STORE in STORE_BB for the ssa name and its duplicate
1254 specified in SLOT. */
1257 create_loads_and_stores_for_name (name_to_copy_elt
**slot
,
1258 struct clsn_data
*clsn_data
)
1260 struct name_to_copy_elt
*const elt
= *slot
;
1263 gimple_stmt_iterator gsi
;
1264 tree type
= TREE_TYPE (elt
->new_name
);
1267 gsi
= gsi_last_bb (clsn_data
->store_bb
);
1268 t
= build3 (COMPONENT_REF
, type
, clsn_data
->store
, elt
->field
, NULL_TREE
);
1269 stmt
= gimple_build_assign (t
, ssa_name (elt
->version
));
1270 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1272 gsi
= gsi_last_bb (clsn_data
->load_bb
);
1273 load_struct
= build_simple_mem_ref (clsn_data
->load
);
1274 t
= build3 (COMPONENT_REF
, type
, load_struct
, elt
->field
, NULL_TREE
);
1275 stmt
= gimple_build_assign (elt
->new_name
, t
);
1276 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1281 /* Moves all the variables used in LOOP and defined outside of it (including
1282 the initial values of loop phi nodes, and *PER_THREAD if it is a ssa
1283 name) to a structure created for this purpose. The code
1291 is transformed this way:
1306 `old' is stored to *ARG_STRUCT and `new' is stored to NEW_ARG_STRUCT. The
1307 pointer `new' is intentionally not initialized (the loop will be split to a
1308 separate function later, and `new' will be initialized from its arguments).
1309 LD_ST_DATA holds information about the shared data structure used to pass
1310 information among the threads. It is initialized here, and
1311 gen_parallel_loop will pass it to create_call_for_reduction that
1312 needs this information. REDUCTION_LIST describes the reductions
1316 separate_decls_in_region (edge entry
, edge exit
,
1317 reduction_info_table_type
*reduction_list
,
1318 tree
*arg_struct
, tree
*new_arg_struct
,
1319 struct clsn_data
*ld_st_data
)
1322 basic_block bb1
= split_edge (entry
);
1323 basic_block bb0
= single_pred (bb1
);
1324 name_to_copy_table_type
name_copies (10);
1325 int_tree_htab_type
decl_copies (10);
1327 tree type
, type_name
, nvar
;
1328 gimple_stmt_iterator gsi
;
1329 struct clsn_data clsn_data
;
1330 auto_vec
<basic_block
, 3> body
;
1332 basic_block entry_bb
= bb1
;
1333 basic_block exit_bb
= exit
->dest
;
1334 bool has_debug_stmt
= false;
1336 entry
= single_succ_edge (entry_bb
);
1337 gather_blocks_in_sese_region (entry_bb
, exit_bb
, &body
);
1339 FOR_EACH_VEC_ELT (body
, i
, bb
)
1341 if (bb
!= entry_bb
&& bb
!= exit_bb
)
1343 for (gsi
= gsi_start_phis (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1344 separate_decls_in_region_stmt (entry
, exit
, gsi_stmt (gsi
),
1345 &name_copies
, &decl_copies
);
1347 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1349 gimple
*stmt
= gsi_stmt (gsi
);
1351 if (is_gimple_debug (stmt
))
1352 has_debug_stmt
= true;
1354 separate_decls_in_region_stmt (entry
, exit
, stmt
,
1355 &name_copies
, &decl_copies
);
1360 /* Now process debug bind stmts. We must not create decls while
1361 processing debug stmts, so we defer their processing so as to
1362 make sure we will have debug info for as many variables as
1363 possible (all of those that were dealt with in the loop above),
1364 and discard those for which we know there's nothing we can
1367 FOR_EACH_VEC_ELT (body
, i
, bb
)
1368 if (bb
!= entry_bb
&& bb
!= exit_bb
)
1370 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
);)
1372 gimple
*stmt
= gsi_stmt (gsi
);
1374 if (is_gimple_debug (stmt
))
1376 if (separate_decls_in_region_debug (stmt
, &name_copies
,
1379 gsi_remove (&gsi
, true);
1388 if (name_copies
.elements () == 0 && reduction_list
->elements () == 0)
1390 /* It may happen that there is nothing to copy (if there are only
1391 loop carried and external variables in the loop). */
1393 *new_arg_struct
= NULL
;
1397 /* Create the type for the structure to store the ssa names to. */
1398 type
= lang_hooks
.types
.make_type (RECORD_TYPE
);
1399 type_name
= build_decl (UNKNOWN_LOCATION
,
1400 TYPE_DECL
, create_tmp_var_name (".paral_data"),
1402 TYPE_NAME (type
) = type_name
;
1404 name_copies
.traverse
<tree
, add_field_for_name
> (type
);
1405 if (reduction_list
&& reduction_list
->elements () > 0)
1407 /* Create the fields for reductions. */
1408 reduction_list
->traverse
<tree
, add_field_for_reduction
> (type
);
1412 /* Create the loads and stores. */
1413 *arg_struct
= create_tmp_var (type
, ".paral_data_store");
1414 nvar
= create_tmp_var (build_pointer_type (type
), ".paral_data_load");
1415 *new_arg_struct
= make_ssa_name (nvar
);
1417 ld_st_data
->store
= *arg_struct
;
1418 ld_st_data
->load
= *new_arg_struct
;
1419 ld_st_data
->store_bb
= bb0
;
1420 ld_st_data
->load_bb
= bb1
;
1423 .traverse
<struct clsn_data
*, create_loads_and_stores_for_name
>
1426 /* Load the calculation from memory (after the join of the threads). */
1428 if (reduction_list
&& reduction_list
->elements () > 0)
1431 ->traverse
<struct clsn_data
*, create_stores_for_reduction
>
1433 clsn_data
.load
= make_ssa_name (nvar
);
1434 clsn_data
.load_bb
= exit
->dest
;
1435 clsn_data
.store
= ld_st_data
->store
;
1436 create_final_loads_for_reduction (reduction_list
, &clsn_data
);
1441 /* Returns true if FN was created to run in parallel. */
1444 parallelized_function_p (tree fndecl
)
1446 cgraph_node
*node
= cgraph_node::get (fndecl
);
1447 gcc_assert (node
!= NULL
);
1448 return node
->parallelized_function
;
1451 /* Creates and returns an empty function that will receive the body of
1452 a parallelized loop. */
1455 create_loop_fn (location_t loc
)
1459 tree decl
, type
, name
, t
;
1460 struct function
*act_cfun
= cfun
;
1461 static unsigned loopfn_num
;
1463 loc
= LOCATION_LOCUS (loc
);
1464 snprintf (buf
, 100, "%s.$loopfn", current_function_name ());
1465 ASM_FORMAT_PRIVATE_NAME (tname
, buf
, loopfn_num
++);
1466 clean_symbol_name (tname
);
1467 name
= get_identifier (tname
);
1468 type
= build_function_type_list (void_type_node
, ptr_type_node
, NULL_TREE
);
1470 decl
= build_decl (loc
, FUNCTION_DECL
, name
, type
);
1471 TREE_STATIC (decl
) = 1;
1472 TREE_USED (decl
) = 1;
1473 DECL_ARTIFICIAL (decl
) = 1;
1474 DECL_IGNORED_P (decl
) = 0;
1475 TREE_PUBLIC (decl
) = 0;
1476 DECL_UNINLINABLE (decl
) = 1;
1477 DECL_EXTERNAL (decl
) = 0;
1478 DECL_CONTEXT (decl
) = NULL_TREE
;
1479 DECL_INITIAL (decl
) = make_node (BLOCK
);
1480 BLOCK_SUPERCONTEXT (DECL_INITIAL (decl
)) = decl
;
1482 t
= build_decl (loc
, RESULT_DECL
, NULL_TREE
, void_type_node
);
1483 DECL_ARTIFICIAL (t
) = 1;
1484 DECL_IGNORED_P (t
) = 1;
1485 DECL_RESULT (decl
) = t
;
1487 t
= build_decl (loc
, PARM_DECL
, get_identifier (".paral_data_param"),
1489 DECL_ARTIFICIAL (t
) = 1;
1490 DECL_ARG_TYPE (t
) = ptr_type_node
;
1491 DECL_CONTEXT (t
) = decl
;
1493 DECL_ARGUMENTS (decl
) = t
;
1495 allocate_struct_function (decl
, false);
1497 /* The call to allocate_struct_function clobbers CFUN, so we need to restore
1499 set_cfun (act_cfun
);
1504 /* Replace uses of NAME by VAL in block BB. */
1507 replace_uses_in_bb_by (tree name
, tree val
, basic_block bb
)
1510 imm_use_iterator imm_iter
;
1512 FOR_EACH_IMM_USE_STMT (use_stmt
, imm_iter
, name
)
1514 if (gimple_bb (use_stmt
) != bb
)
1517 use_operand_p use_p
;
1518 FOR_EACH_IMM_USE_ON_STMT (use_p
, imm_iter
)
1519 SET_USE (use_p
, val
);
1523 /* Do transformation from:
1530 ivtmp_a = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
1531 sum_a = PHI <sum_init (preheader), sum_b (latch)>
1535 sum_b = sum_a + sum_update
1543 ivtmp_b = ivtmp_a + 1;
1547 sum_z = PHI <sum_b (cond[1]), ...>
1549 [1] Where <bb cond> is single_pred (bb latch); In the simplest case,
1559 ivtmp_a = PHI <ivtmp_c (latch)>
1560 sum_a = PHI <sum_c (latch)>
1564 sum_b = sum_a + sum_update
1569 ivtmp_c = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
1570 sum_c = PHI <sum_init (preheader), sum_b (latch)>
1571 if (ivtmp_c < n + 1)
1577 ivtmp_b = ivtmp_a + 1;
1581 sum_y = PHI <sum_c (newheader)>
1584 sum_z = PHI <sum_y (newexit), ...>
1587 In unified diff format:
1592 + goto <bb newheader>
1595 - ivtmp_a = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
1596 - sum_a = PHI <sum_init (preheader), sum_b (latch)>
1597 + ivtmp_a = PHI <ivtmp_c (latch)>
1598 + sum_a = PHI <sum_c (latch)>
1602 sum_b = sum_a + sum_update
1609 + ivtmp_c = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
1610 + sum_c = PHI <sum_init (preheader), sum_b (latch)>
1611 + if (ivtmp_c < n + 1)
1617 ivtmp_b = ivtmp_a + 1;
1619 + goto <bb newheader>
1622 + sum_y = PHI <sum_c (newheader)>
1625 - sum_z = PHI <sum_b (cond[1]), ...>
1626 + sum_z = PHI <sum_y (newexit), ...>
1628 Note: the example does not show any virtual phis, but these are handled more
1629 or less as reductions.
1632 Moves the exit condition of LOOP to the beginning of its header.
1633 REDUCTION_LIST describes the reductions in LOOP. BOUND is the new loop
1637 transform_to_exit_first_loop_alt (struct loop
*loop
,
1638 reduction_info_table_type
*reduction_list
,
1641 basic_block header
= loop
->header
;
1642 basic_block latch
= loop
->latch
;
1643 edge exit
= single_dom_exit (loop
);
1644 basic_block exit_block
= exit
->dest
;
1645 gcond
*cond_stmt
= as_a
<gcond
*> (last_stmt (exit
->src
));
1646 tree control
= gimple_cond_lhs (cond_stmt
);
1649 /* Rewriting virtuals into loop-closed ssa normal form makes this
1650 transformation simpler. It also ensures that the virtuals are in
1651 loop-closed ssa normal from after the transformation, which is required by
1652 create_parallel_loop. */
1653 rewrite_virtuals_into_loop_closed_ssa (loop
);
1655 /* Create the new_header block. */
1656 basic_block new_header
= split_block_before_cond_jump (exit
->src
);
1657 edge edge_at_split
= single_pred_edge (new_header
);
1659 /* Redirect entry edge to new_header. */
1660 edge entry
= loop_preheader_edge (loop
);
1661 e
= redirect_edge_and_branch (entry
, new_header
);
1662 gcc_assert (e
== entry
);
1664 /* Redirect post_inc_edge to new_header. */
1665 edge post_inc_edge
= single_succ_edge (latch
);
1666 e
= redirect_edge_and_branch (post_inc_edge
, new_header
);
1667 gcc_assert (e
== post_inc_edge
);
1669 /* Redirect post_cond_edge to header. */
1670 edge post_cond_edge
= single_pred_edge (latch
);
1671 e
= redirect_edge_and_branch (post_cond_edge
, header
);
1672 gcc_assert (e
== post_cond_edge
);
1674 /* Redirect edge_at_split to latch. */
1675 e
= redirect_edge_and_branch (edge_at_split
, latch
);
1676 gcc_assert (e
== edge_at_split
);
1678 /* Set the new loop bound. */
1679 gimple_cond_set_rhs (cond_stmt
, bound
);
1680 update_stmt (cond_stmt
);
1682 /* Repair the ssa. */
1683 vec
<edge_var_map
> *v
= redirect_edge_var_map_vector (post_inc_edge
);
1687 for (gsi
= gsi_start_phis (header
), i
= 0;
1688 !gsi_end_p (gsi
) && v
->iterate (i
, &vm
);
1689 gsi_next (&gsi
), i
++)
1691 gphi
*phi
= gsi
.phi ();
1692 tree res_a
= PHI_RESULT (phi
);
1694 /* Create new phi. */
1695 tree res_c
= copy_ssa_name (res_a
, phi
);
1696 gphi
*nphi
= create_phi_node (res_c
, new_header
);
1698 /* Replace ivtmp_a with ivtmp_c in condition 'if (ivtmp_a < n)'. */
1699 replace_uses_in_bb_by (res_a
, res_c
, new_header
);
1701 /* Replace ivtmp/sum_b with ivtmp/sum_c in header phi. */
1702 add_phi_arg (phi
, res_c
, post_cond_edge
, UNKNOWN_LOCATION
);
1704 /* Replace sum_b with sum_c in exit phi. */
1705 tree res_b
= redirect_edge_var_map_def (vm
);
1706 replace_uses_in_bb_by (res_b
, res_c
, exit_block
);
1708 struct reduction_info
*red
= reduction_phi (reduction_list
, phi
);
1709 gcc_assert (virtual_operand_p (res_a
)
1715 /* Register the new reduction phi. */
1716 red
->reduc_phi
= nphi
;
1717 gimple_set_uid (red
->reduc_phi
, red
->reduc_version
);
1720 gcc_assert (gsi_end_p (gsi
) && !v
->iterate (i
, &vm
));
1722 /* Set the preheader argument of the new phis to ivtmp/sum_init. */
1723 flush_pending_stmts (entry
);
1725 /* Set the latch arguments of the new phis to ivtmp/sum_b. */
1726 flush_pending_stmts (post_inc_edge
);
1729 basic_block new_exit_block
= NULL
;
1730 if (!single_pred_p (exit
->dest
))
1732 /* Create a new empty exit block, inbetween the new loop header and the
1733 old exit block. The function separate_decls_in_region needs this block
1734 to insert code that is active on loop exit, but not any other path. */
1735 new_exit_block
= split_edge (exit
);
1738 /* Insert and register the reduction exit phis. */
1739 for (gphi_iterator gsi
= gsi_start_phis (exit_block
);
1743 gphi
*phi
= gsi
.phi ();
1745 tree res_z
= PHI_RESULT (phi
);
1748 if (new_exit_block
!= NULL
)
1750 /* Now that we have a new exit block, duplicate the phi of the old
1751 exit block in the new exit block to preserve loop-closed ssa. */
1752 edge succ_new_exit_block
= single_succ_edge (new_exit_block
);
1753 edge pred_new_exit_block
= single_pred_edge (new_exit_block
);
1754 tree res_y
= copy_ssa_name (res_z
, phi
);
1755 nphi
= create_phi_node (res_y
, new_exit_block
);
1756 res_c
= PHI_ARG_DEF_FROM_EDGE (phi
, succ_new_exit_block
);
1757 add_phi_arg (nphi
, res_c
, pred_new_exit_block
, UNKNOWN_LOCATION
);
1758 add_phi_arg (phi
, res_y
, succ_new_exit_block
, UNKNOWN_LOCATION
);
1761 res_c
= PHI_ARG_DEF_FROM_EDGE (phi
, exit
);
1763 if (virtual_operand_p (res_z
))
1766 gimple
*reduc_phi
= SSA_NAME_DEF_STMT (res_c
);
1767 struct reduction_info
*red
= reduction_phi (reduction_list
, reduc_phi
);
1769 red
->keep_res
= (nphi
!= NULL
1774 /* We're going to cancel the loop at the end of gen_parallel_loop, but until
1775 then we're still using some fields, so only bother about fields that are
1776 still used: header and latch.
1777 The loop has a new header bb, so we update it. The latch bb stays the
1779 loop
->header
= new_header
;
1781 /* Recalculate dominance info. */
1782 free_dominance_info (CDI_DOMINATORS
);
1783 calculate_dominance_info (CDI_DOMINATORS
);
1785 checking_verify_ssa (true, true);
1788 /* Tries to moves the exit condition of LOOP to the beginning of its header
1789 without duplication of the loop body. NIT is the number of iterations of the
1790 loop. REDUCTION_LIST describes the reductions in LOOP. Return true if
1791 transformation is successful. */
1794 try_transform_to_exit_first_loop_alt (struct loop
*loop
,
1795 reduction_info_table_type
*reduction_list
,
1798 /* Check whether the latch contains a single statement. */
1799 if (!gimple_seq_nondebug_singleton_p (bb_seq (loop
->latch
)))
1802 /* Check whether the latch contains no phis. */
1803 if (phi_nodes (loop
->latch
) != NULL
)
1806 /* Check whether the latch contains the loop iv increment. */
1807 edge back
= single_succ_edge (loop
->latch
);
1808 edge exit
= single_dom_exit (loop
);
1809 gcond
*cond_stmt
= as_a
<gcond
*> (last_stmt (exit
->src
));
1810 tree control
= gimple_cond_lhs (cond_stmt
);
1811 gphi
*phi
= as_a
<gphi
*> (SSA_NAME_DEF_STMT (control
));
1812 tree inc_res
= gimple_phi_arg_def (phi
, back
->dest_idx
);
1813 if (gimple_bb (SSA_NAME_DEF_STMT (inc_res
)) != loop
->latch
)
1816 /* Check whether there's no code between the loop condition and the latch. */
1817 if (!single_pred_p (loop
->latch
)
1818 || single_pred (loop
->latch
) != exit
->src
)
1821 tree alt_bound
= NULL_TREE
;
1822 tree nit_type
= TREE_TYPE (nit
);
1824 /* Figure out whether nit + 1 overflows. */
1825 if (TREE_CODE (nit
) == INTEGER_CST
)
1827 if (!tree_int_cst_equal (nit
, TYPE_MAX_VALUE (nit_type
)))
1829 alt_bound
= fold_build2_loc (UNKNOWN_LOCATION
, PLUS_EXPR
, nit_type
,
1830 nit
, build_one_cst (nit_type
));
1832 gcc_assert (TREE_CODE (alt_bound
) == INTEGER_CST
);
1833 transform_to_exit_first_loop_alt (loop
, reduction_list
, alt_bound
);
1838 /* Todo: Figure out if we can trigger this, if it's worth to handle
1839 optimally, and if we can handle it optimally. */
1844 gcc_assert (TREE_CODE (nit
) == SSA_NAME
);
1846 /* Variable nit is the loop bound as returned by canonicalize_loop_ivs, for an
1847 iv with base 0 and step 1 that is incremented in the latch, like this:
1850 # iv_1 = PHI <0 (preheader), iv_2 (latch)>
1861 The range of iv_1 is [0, nit]. The latch edge is taken for
1862 iv_1 == [0, nit - 1] and the exit edge is taken for iv_1 == nit. So the
1863 number of latch executions is equal to nit.
1865 The function max_loop_iterations gives us the maximum number of latch
1866 executions, so it gives us the maximum value of nit. */
1868 if (!max_loop_iterations (loop
, &nit_max
))
1871 /* Check if nit + 1 overflows. */
1872 widest_int type_max
= wi::to_widest (TYPE_MAX_VALUE (nit_type
));
1873 if (nit_max
>= type_max
)
1876 gimple
*def
= SSA_NAME_DEF_STMT (nit
);
1878 /* Try to find nit + 1, in the form of n in an assignment nit = n - 1. */
1880 && is_gimple_assign (def
)
1881 && gimple_assign_rhs_code (def
) == PLUS_EXPR
)
1883 tree op1
= gimple_assign_rhs1 (def
);
1884 tree op2
= gimple_assign_rhs2 (def
);
1885 if (integer_minus_onep (op1
))
1887 else if (integer_minus_onep (op2
))
1891 /* If not found, insert nit + 1. */
1892 if (alt_bound
== NULL_TREE
)
1894 alt_bound
= fold_build2 (PLUS_EXPR
, nit_type
, nit
,
1895 build_int_cst_type (nit_type
, 1));
1897 gimple_stmt_iterator gsi
= gsi_last_bb (loop_preheader_edge (loop
)->src
);
1900 = force_gimple_operand_gsi (&gsi
, alt_bound
, true, NULL_TREE
, false,
1901 GSI_CONTINUE_LINKING
);
1904 transform_to_exit_first_loop_alt (loop
, reduction_list
, alt_bound
);
1908 /* Moves the exit condition of LOOP to the beginning of its header. NIT is the
1909 number of iterations of the loop. REDUCTION_LIST describes the reductions in
1913 transform_to_exit_first_loop (struct loop
*loop
,
1914 reduction_info_table_type
*reduction_list
,
1917 basic_block
*bbs
, *nbbs
, ex_bb
, orig_header
;
1920 edge exit
= single_dom_exit (loop
), hpred
;
1921 tree control
, control_name
, res
, t
;
1924 gcond
*cond_stmt
, *cond_nit
;
1927 split_block_after_labels (loop
->header
);
1928 orig_header
= single_succ (loop
->header
);
1929 hpred
= single_succ_edge (loop
->header
);
1931 cond_stmt
= as_a
<gcond
*> (last_stmt (exit
->src
));
1932 control
= gimple_cond_lhs (cond_stmt
);
1933 gcc_assert (gimple_cond_rhs (cond_stmt
) == nit
);
1935 /* Make sure that we have phi nodes on exit for all loop header phis
1936 (create_parallel_loop requires that). */
1937 for (gphi_iterator gsi
= gsi_start_phis (loop
->header
);
1942 res
= PHI_RESULT (phi
);
1943 t
= copy_ssa_name (res
, phi
);
1944 SET_PHI_RESULT (phi
, t
);
1945 nphi
= create_phi_node (res
, orig_header
);
1946 add_phi_arg (nphi
, t
, hpred
, UNKNOWN_LOCATION
);
1950 gimple_cond_set_lhs (cond_stmt
, t
);
1951 update_stmt (cond_stmt
);
1956 bbs
= get_loop_body_in_dom_order (loop
);
1958 for (n
= 0; bbs
[n
] != exit
->src
; n
++)
1960 nbbs
= XNEWVEC (basic_block
, n
);
1961 ok
= gimple_duplicate_sese_tail (single_succ_edge (loop
->header
), exit
,
1968 /* Other than reductions, the only gimple reg that should be copied
1969 out of the loop is the control variable. */
1970 exit
= single_dom_exit (loop
);
1971 control_name
= NULL_TREE
;
1972 for (gphi_iterator gsi
= gsi_start_phis (ex_bb
);
1976 res
= PHI_RESULT (phi
);
1977 if (virtual_operand_p (res
))
1983 /* Check if it is a part of reduction. If it is,
1984 keep the phi at the reduction's keep_res field. The
1985 PHI_RESULT of this phi is the resulting value of the reduction
1986 variable when exiting the loop. */
1988 if (reduction_list
->elements () > 0)
1990 struct reduction_info
*red
;
1992 tree val
= PHI_ARG_DEF_FROM_EDGE (phi
, exit
);
1993 red
= reduction_phi (reduction_list
, SSA_NAME_DEF_STMT (val
));
1996 red
->keep_res
= phi
;
2001 gcc_assert (control_name
== NULL_TREE
2002 && SSA_NAME_VAR (res
) == SSA_NAME_VAR (control
));
2004 remove_phi_node (&gsi
, false);
2006 gcc_assert (control_name
!= NULL_TREE
);
2008 /* Initialize the control variable to number of iterations
2009 according to the rhs of the exit condition. */
2010 gimple_stmt_iterator gsi
= gsi_after_labels (ex_bb
);
2011 cond_nit
= as_a
<gcond
*> (last_stmt (exit
->src
));
2012 nit_1
= gimple_cond_rhs (cond_nit
);
2013 nit_1
= force_gimple_operand_gsi (&gsi
,
2014 fold_convert (TREE_TYPE (control_name
), nit_1
),
2015 false, NULL_TREE
, false, GSI_SAME_STMT
);
2016 stmt
= gimple_build_assign (control_name
, nit_1
);
2017 gsi_insert_before (&gsi
, stmt
, GSI_NEW_STMT
);
2020 /* Create the parallel constructs for LOOP as described in gen_parallel_loop.
2021 LOOP_FN and DATA are the arguments of GIMPLE_OMP_PARALLEL.
2022 NEW_DATA is the variable that should be initialized from the argument
2023 of LOOP_FN. N_THREADS is the requested number of threads, which can be 0 if
2024 that number is to be determined later. */
2027 create_parallel_loop (struct loop
*loop
, tree loop_fn
, tree data
,
2028 tree new_data
, unsigned n_threads
, location_t loc
,
2029 bool oacc_kernels_p
)
2031 gimple_stmt_iterator gsi
;
2032 basic_block for_bb
, ex_bb
, continue_bb
;
2034 gomp_parallel
*omp_par_stmt
;
2035 gimple
*omp_return_stmt1
, *omp_return_stmt2
;
2039 gomp_continue
*omp_cont_stmt
;
2040 tree cvar
, cvar_init
, initvar
, cvar_next
, cvar_base
, type
;
2041 edge exit
, nexit
, guard
, end
, e
;
2045 gcc_checking_assert (lookup_attribute ("oacc kernels",
2046 DECL_ATTRIBUTES (cfun
->decl
)));
2047 /* Indicate to later processing that this is a parallelized OpenACC
2048 kernels construct. */
2049 DECL_ATTRIBUTES (cfun
->decl
)
2050 = tree_cons (get_identifier ("oacc kernels parallelized"),
2051 NULL_TREE
, DECL_ATTRIBUTES (cfun
->decl
));
2055 /* Prepare the GIMPLE_OMP_PARALLEL statement. */
2057 basic_block bb
= loop_preheader_edge (loop
)->src
;
2058 basic_block paral_bb
= single_pred (bb
);
2059 gsi
= gsi_last_bb (paral_bb
);
2061 gcc_checking_assert (n_threads
!= 0);
2062 t
= build_omp_clause (loc
, OMP_CLAUSE_NUM_THREADS
);
2063 OMP_CLAUSE_NUM_THREADS_EXPR (t
)
2064 = build_int_cst (integer_type_node
, n_threads
);
2065 omp_par_stmt
= gimple_build_omp_parallel (NULL
, t
, loop_fn
, data
);
2066 gimple_set_location (omp_par_stmt
, loc
);
2068 gsi_insert_after (&gsi
, omp_par_stmt
, GSI_NEW_STMT
);
2070 /* Initialize NEW_DATA. */
2073 gassign
*assign_stmt
;
2075 gsi
= gsi_after_labels (bb
);
2077 param
= make_ssa_name (DECL_ARGUMENTS (loop_fn
));
2078 assign_stmt
= gimple_build_assign (param
, build_fold_addr_expr (data
));
2079 gsi_insert_before (&gsi
, assign_stmt
, GSI_SAME_STMT
);
2081 assign_stmt
= gimple_build_assign (new_data
,
2082 fold_convert (TREE_TYPE (new_data
), param
));
2083 gsi_insert_before (&gsi
, assign_stmt
, GSI_SAME_STMT
);
2086 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_PARALLEL. */
2087 bb
= split_loop_exit_edge (single_dom_exit (loop
));
2088 gsi
= gsi_last_bb (bb
);
2089 omp_return_stmt1
= gimple_build_omp_return (false);
2090 gimple_set_location (omp_return_stmt1
, loc
);
2091 gsi_insert_after (&gsi
, omp_return_stmt1
, GSI_NEW_STMT
);
2094 /* Extract data for GIMPLE_OMP_FOR. */
2095 gcc_assert (loop
->header
== single_dom_exit (loop
)->src
);
2096 cond_stmt
= as_a
<gcond
*> (last_stmt (loop
->header
));
2098 cvar
= gimple_cond_lhs (cond_stmt
);
2099 cvar_base
= SSA_NAME_VAR (cvar
);
2100 phi
= SSA_NAME_DEF_STMT (cvar
);
2101 cvar_init
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_preheader_edge (loop
));
2102 initvar
= copy_ssa_name (cvar
);
2103 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi
, loop_preheader_edge (loop
)),
2105 cvar_next
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (loop
));
2107 gsi
= gsi_last_nondebug_bb (loop
->latch
);
2108 gcc_assert (gsi_stmt (gsi
) == SSA_NAME_DEF_STMT (cvar_next
));
2109 gsi_remove (&gsi
, true);
2112 for_bb
= split_edge (loop_preheader_edge (loop
));
2113 ex_bb
= split_loop_exit_edge (single_dom_exit (loop
));
2114 extract_true_false_edges_from_block (loop
->header
, &nexit
, &exit
);
2115 gcc_assert (exit
== single_dom_exit (loop
));
2117 guard
= make_edge (for_bb
, ex_bb
, 0);
2118 /* FIXME: What is the probability? */
2119 guard
->probability
= profile_probability::guessed_never ();
2120 /* Split the latch edge, so LOOPS_HAVE_SIMPLE_LATCHES is still valid. */
2121 loop
->latch
= split_edge (single_succ_edge (loop
->latch
));
2122 single_pred_edge (loop
->latch
)->flags
= 0;
2123 end
= make_single_succ_edge (single_pred (loop
->latch
), ex_bb
, EDGE_FALLTHRU
);
2124 rescan_loop_exit (end
, true, false);
2126 for (gphi_iterator gpi
= gsi_start_phis (ex_bb
);
2127 !gsi_end_p (gpi
); gsi_next (&gpi
))
2129 source_location locus
;
2130 gphi
*phi
= gpi
.phi ();
2131 tree def
= PHI_ARG_DEF_FROM_EDGE (phi
, exit
);
2132 gimple
*def_stmt
= SSA_NAME_DEF_STMT (def
);
2134 /* If the exit phi is not connected to a header phi in the same loop, this
2135 value is not modified in the loop, and we're done with this phi. */
2136 if (!(gimple_code (def_stmt
) == GIMPLE_PHI
2137 && gimple_bb (def_stmt
) == loop
->header
))
2139 locus
= gimple_phi_arg_location_from_edge (phi
, exit
);
2140 add_phi_arg (phi
, def
, guard
, locus
);
2141 add_phi_arg (phi
, def
, end
, locus
);
2145 gphi
*stmt
= as_a
<gphi
*> (def_stmt
);
2146 def
= PHI_ARG_DEF_FROM_EDGE (stmt
, loop_preheader_edge (loop
));
2147 locus
= gimple_phi_arg_location_from_edge (stmt
,
2148 loop_preheader_edge (loop
));
2149 add_phi_arg (phi
, def
, guard
, locus
);
2151 def
= PHI_ARG_DEF_FROM_EDGE (stmt
, loop_latch_edge (loop
));
2152 locus
= gimple_phi_arg_location_from_edge (stmt
, loop_latch_edge (loop
));
2153 add_phi_arg (phi
, def
, end
, locus
);
2155 e
= redirect_edge_and_branch (exit
, nexit
->dest
);
2156 PENDING_STMT (e
) = NULL
;
2158 /* Emit GIMPLE_OMP_FOR. */
2160 /* Parallelized OpenACC kernels constructs use gang parallelism. See also
2161 omp-offload.c:execute_oacc_device_lower. */
2162 t
= build_omp_clause (loc
, OMP_CLAUSE_GANG
);
2165 t
= build_omp_clause (loc
, OMP_CLAUSE_SCHEDULE
);
2166 int chunk_size
= PARAM_VALUE (PARAM_PARLOOPS_CHUNK_SIZE
);
2167 enum PARAM_PARLOOPS_SCHEDULE_KIND schedule_type \
2168 = (enum PARAM_PARLOOPS_SCHEDULE_KIND
) PARAM_VALUE (PARAM_PARLOOPS_SCHEDULE
);
2169 switch (schedule_type
)
2171 case PARAM_PARLOOPS_SCHEDULE_KIND_static
:
2172 OMP_CLAUSE_SCHEDULE_KIND (t
) = OMP_CLAUSE_SCHEDULE_STATIC
;
2174 case PARAM_PARLOOPS_SCHEDULE_KIND_dynamic
:
2175 OMP_CLAUSE_SCHEDULE_KIND (t
) = OMP_CLAUSE_SCHEDULE_DYNAMIC
;
2177 case PARAM_PARLOOPS_SCHEDULE_KIND_guided
:
2178 OMP_CLAUSE_SCHEDULE_KIND (t
) = OMP_CLAUSE_SCHEDULE_GUIDED
;
2180 case PARAM_PARLOOPS_SCHEDULE_KIND_auto
:
2181 OMP_CLAUSE_SCHEDULE_KIND (t
) = OMP_CLAUSE_SCHEDULE_AUTO
;
2184 case PARAM_PARLOOPS_SCHEDULE_KIND_runtime
:
2185 OMP_CLAUSE_SCHEDULE_KIND (t
) = OMP_CLAUSE_SCHEDULE_RUNTIME
;
2191 if (chunk_size
!= 0)
2192 OMP_CLAUSE_SCHEDULE_CHUNK_EXPR (t
)
2193 = build_int_cst (integer_type_node
, chunk_size
);
2196 for_stmt
= gimple_build_omp_for (NULL
,
2198 ? GF_OMP_FOR_KIND_OACC_LOOP
2199 : GF_OMP_FOR_KIND_FOR
),
2202 gimple_cond_set_lhs (cond_stmt
, cvar_base
);
2203 type
= TREE_TYPE (cvar
);
2204 gimple_set_location (for_stmt
, loc
);
2205 gimple_omp_for_set_index (for_stmt
, 0, initvar
);
2206 gimple_omp_for_set_initial (for_stmt
, 0, cvar_init
);
2207 gimple_omp_for_set_final (for_stmt
, 0, gimple_cond_rhs (cond_stmt
));
2208 gimple_omp_for_set_cond (for_stmt
, 0, gimple_cond_code (cond_stmt
));
2209 gimple_omp_for_set_incr (for_stmt
, 0, build2 (PLUS_EXPR
, type
,
2211 build_int_cst (type
, 1)));
2213 gsi
= gsi_last_bb (for_bb
);
2214 gsi_insert_after (&gsi
, for_stmt
, GSI_NEW_STMT
);
2215 SSA_NAME_DEF_STMT (initvar
) = for_stmt
;
2217 /* Emit GIMPLE_OMP_CONTINUE. */
2218 continue_bb
= single_pred (loop
->latch
);
2219 gsi
= gsi_last_bb (continue_bb
);
2220 omp_cont_stmt
= gimple_build_omp_continue (cvar_next
, cvar
);
2221 gimple_set_location (omp_cont_stmt
, loc
);
2222 gsi_insert_after (&gsi
, omp_cont_stmt
, GSI_NEW_STMT
);
2223 SSA_NAME_DEF_STMT (cvar_next
) = omp_cont_stmt
;
2225 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_FOR. */
2226 gsi
= gsi_last_bb (ex_bb
);
2227 omp_return_stmt2
= gimple_build_omp_return (true);
2228 gimple_set_location (omp_return_stmt2
, loc
);
2229 gsi_insert_after (&gsi
, omp_return_stmt2
, GSI_NEW_STMT
);
2231 /* After the above dom info is hosed. Re-compute it. */
2232 free_dominance_info (CDI_DOMINATORS
);
2233 calculate_dominance_info (CDI_DOMINATORS
);
2236 /* Generates code to execute the iterations of LOOP in N_THREADS
2237 threads in parallel, which can be 0 if that number is to be determined
2240 NITER describes number of iterations of LOOP.
2241 REDUCTION_LIST describes the reductions existent in the LOOP. */
2244 gen_parallel_loop (struct loop
*loop
,
2245 reduction_info_table_type
*reduction_list
,
2246 unsigned n_threads
, struct tree_niter_desc
*niter
,
2247 bool oacc_kernels_p
)
2249 tree many_iterations_cond
, type
, nit
;
2250 tree arg_struct
, new_arg_struct
;
2253 struct clsn_data clsn_data
;
2256 unsigned int m_p_thread
=2;
2260 ---------------------------------------------------------------------
2263 IV = phi (INIT, IV + STEP)
2269 ---------------------------------------------------------------------
2271 with # of iterations NITER (possibly with MAY_BE_ZERO assumption),
2272 we generate the following code:
2274 ---------------------------------------------------------------------
2277 || NITER < MIN_PER_THREAD * N_THREADS)
2281 store all local loop-invariant variables used in body of the loop to DATA.
2282 GIMPLE_OMP_PARALLEL (OMP_CLAUSE_NUM_THREADS (N_THREADS), LOOPFN, DATA);
2283 load the variables from DATA.
2284 GIMPLE_OMP_FOR (IV = INIT; COND; IV += STEP) (OMP_CLAUSE_SCHEDULE (static))
2287 GIMPLE_OMP_CONTINUE;
2288 GIMPLE_OMP_RETURN -- GIMPLE_OMP_FOR
2289 GIMPLE_OMP_RETURN -- GIMPLE_OMP_PARALLEL
2295 IV = phi (INIT, IV + STEP)
2306 /* Create two versions of the loop -- in the old one, we know that the
2307 number of iterations is large enough, and we will transform it into the
2308 loop that will be split to loop_fn, the new one will be used for the
2309 remaining iterations. */
2311 /* We should compute a better number-of-iterations value for outer loops.
2314 for (i = 0; i < n; ++i)
2315 for (j = 0; j < m; ++j)
2318 we should compute nit = n * m, not nit = n.
2319 Also may_be_zero handling would need to be adjusted. */
2321 type
= TREE_TYPE (niter
->niter
);
2322 nit
= force_gimple_operand (unshare_expr (niter
->niter
), &stmts
, true,
2325 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
2327 if (!oacc_kernels_p
)
2332 m_p_thread
=MIN_PER_THREAD
;
2334 gcc_checking_assert (n_threads
!= 0);
2335 many_iterations_cond
=
2336 fold_build2 (GE_EXPR
, boolean_type_node
,
2337 nit
, build_int_cst (type
, m_p_thread
* n_threads
));
2339 many_iterations_cond
2340 = fold_build2 (TRUTH_AND_EXPR
, boolean_type_node
,
2341 invert_truthvalue (unshare_expr (niter
->may_be_zero
)),
2342 many_iterations_cond
);
2343 many_iterations_cond
2344 = force_gimple_operand (many_iterations_cond
, &stmts
, false, NULL_TREE
);
2346 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
2347 if (!is_gimple_condexpr (many_iterations_cond
))
2349 many_iterations_cond
2350 = force_gimple_operand (many_iterations_cond
, &stmts
,
2353 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
),
2357 initialize_original_copy_tables ();
2359 /* We assume that the loop usually iterates a lot. */
2360 loop_version (loop
, many_iterations_cond
, NULL
,
2361 profile_probability::likely (),
2362 profile_probability::unlikely (),
2363 profile_probability::likely (),
2364 profile_probability::unlikely (), true);
2365 update_ssa (TODO_update_ssa
);
2366 free_original_copy_tables ();
2369 /* Base all the induction variables in LOOP on a single control one. */
2370 canonicalize_loop_ivs (loop
, &nit
, true);
2372 /* Ensure that the exit condition is the first statement in the loop.
2373 The common case is that latch of the loop is empty (apart from the
2374 increment) and immediately follows the loop exit test. Attempt to move the
2375 entry of the loop directly before the exit check and increase the number of
2376 iterations of the loop by one. */
2377 if (try_transform_to_exit_first_loop_alt (loop
, reduction_list
, nit
))
2380 && (dump_flags
& TDF_DETAILS
))
2382 "alternative exit-first loop transform succeeded"
2383 " for loop %d\n", loop
->num
);
2390 /* Fall back on the method that handles more cases, but duplicates the
2391 loop body: move the exit condition of LOOP to the beginning of its
2392 header, and duplicate the part of the last iteration that gets disabled
2393 to the exit of the loop. */
2394 transform_to_exit_first_loop (loop
, reduction_list
, nit
);
2397 /* Generate initializations for reductions. */
2398 if (reduction_list
->elements () > 0)
2399 reduction_list
->traverse
<struct loop
*, initialize_reductions
> (loop
);
2401 /* Eliminate the references to local variables from the loop. */
2402 gcc_assert (single_exit (loop
));
2403 entry
= loop_preheader_edge (loop
);
2404 exit
= single_dom_exit (loop
);
2406 /* This rewrites the body in terms of new variables. This has already
2407 been done for oacc_kernels_p in pass_lower_omp/lower_omp (). */
2408 if (!oacc_kernels_p
)
2410 eliminate_local_variables (entry
, exit
);
2411 /* In the old loop, move all variables non-local to the loop to a
2412 structure and back, and create separate decls for the variables used in
2414 separate_decls_in_region (entry
, exit
, reduction_list
, &arg_struct
,
2415 &new_arg_struct
, &clsn_data
);
2419 arg_struct
= NULL_TREE
;
2420 new_arg_struct
= NULL_TREE
;
2421 clsn_data
.load
= NULL_TREE
;
2422 clsn_data
.load_bb
= exit
->dest
;
2423 clsn_data
.store
= NULL_TREE
;
2424 clsn_data
.store_bb
= NULL
;
2427 /* Create the parallel constructs. */
2428 loc
= UNKNOWN_LOCATION
;
2429 cond_stmt
= last_stmt (loop
->header
);
2431 loc
= gimple_location (cond_stmt
);
2432 create_parallel_loop (loop
, create_loop_fn (loc
), arg_struct
, new_arg_struct
,
2433 n_threads
, loc
, oacc_kernels_p
);
2434 if (reduction_list
->elements () > 0)
2435 create_call_for_reduction (loop
, reduction_list
, &clsn_data
);
2439 /* Free loop bound estimations that could contain references to
2440 removed statements. */
2441 free_numbers_of_iterations_estimates (cfun
);
2444 /* Returns true when LOOP contains vector phi nodes. */
2447 loop_has_vector_phi_nodes (struct loop
*loop ATTRIBUTE_UNUSED
)
2450 basic_block
*bbs
= get_loop_body_in_dom_order (loop
);
2454 for (i
= 0; i
< loop
->num_nodes
; i
++)
2455 for (gsi
= gsi_start_phis (bbs
[i
]); !gsi_end_p (gsi
); gsi_next (&gsi
))
2456 if (TREE_CODE (TREE_TYPE (PHI_RESULT (gsi
.phi ()))) == VECTOR_TYPE
)
2465 /* Create a reduction_info struct, initialize it with REDUC_STMT
2466 and PHI, insert it to the REDUCTION_LIST. */
2469 build_new_reduction (reduction_info_table_type
*reduction_list
,
2470 gimple
*reduc_stmt
, gphi
*phi
)
2472 reduction_info
**slot
;
2473 struct reduction_info
*new_reduction
;
2474 enum tree_code reduction_code
;
2476 gcc_assert (reduc_stmt
);
2478 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2481 "Detected reduction. reduction stmt is:\n");
2482 print_gimple_stmt (dump_file
, reduc_stmt
, 0);
2483 fprintf (dump_file
, "\n");
2486 if (gimple_code (reduc_stmt
) == GIMPLE_PHI
)
2488 tree op1
= PHI_ARG_DEF (reduc_stmt
, 0);
2489 gimple
*def1
= SSA_NAME_DEF_STMT (op1
);
2490 reduction_code
= gimple_assign_rhs_code (def1
);
2494 reduction_code
= gimple_assign_rhs_code (reduc_stmt
);
2496 new_reduction
= XCNEW (struct reduction_info
);
2498 new_reduction
->reduc_stmt
= reduc_stmt
;
2499 new_reduction
->reduc_phi
= phi
;
2500 new_reduction
->reduc_version
= SSA_NAME_VERSION (gimple_phi_result (phi
));
2501 new_reduction
->reduction_code
= reduction_code
;
2502 slot
= reduction_list
->find_slot (new_reduction
, INSERT
);
2503 *slot
= new_reduction
;
2506 /* Callback for htab_traverse. Sets gimple_uid of reduc_phi stmts. */
2509 set_reduc_phi_uids (reduction_info
**slot
, void *data ATTRIBUTE_UNUSED
)
2511 struct reduction_info
*const red
= *slot
;
2512 gimple_set_uid (red
->reduc_phi
, red
->reduc_version
);
2516 /* Detect all reductions in the LOOP, insert them into REDUCTION_LIST. */
2519 gather_scalar_reductions (loop_p loop
, reduction_info_table_type
*reduction_list
)
2522 loop_vec_info simple_loop_info
;
2523 auto_vec
<gphi
*, 4> double_reduc_phis
;
2524 auto_vec
<gimple
*, 4> double_reduc_stmts
;
2526 if (!stmt_vec_info_vec
.exists ())
2527 init_stmt_vec_info_vec ();
2529 simple_loop_info
= vect_analyze_loop_form (loop
);
2530 if (simple_loop_info
== NULL
)
2533 for (gsi
= gsi_start_phis (loop
->header
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2535 gphi
*phi
= gsi
.phi ();
2537 tree res
= PHI_RESULT (phi
);
2540 if (virtual_operand_p (res
))
2543 if (simple_iv (loop
, loop
, res
, &iv
, true))
2547 = vect_force_simple_reduction (simple_loop_info
, phi
,
2548 &double_reduc
, true);
2554 if (loop
->inner
->inner
!= NULL
)
2557 double_reduc_phis
.safe_push (phi
);
2558 double_reduc_stmts
.safe_push (reduc_stmt
);
2562 build_new_reduction (reduction_list
, reduc_stmt
, phi
);
2564 destroy_loop_vec_info (simple_loop_info
, true);
2566 if (!double_reduc_phis
.is_empty ())
2568 simple_loop_info
= vect_analyze_loop_form (loop
->inner
);
2569 if (simple_loop_info
)
2574 FOR_EACH_VEC_ELT (double_reduc_phis
, i
, phi
)
2577 tree res
= PHI_RESULT (phi
);
2580 use_operand_p use_p
;
2582 bool single_use_p
= single_imm_use (res
, &use_p
, &inner_stmt
);
2583 gcc_assert (single_use_p
);
2584 if (gimple_code (inner_stmt
) != GIMPLE_PHI
)
2586 gphi
*inner_phi
= as_a
<gphi
*> (inner_stmt
);
2587 if (simple_iv (loop
->inner
, loop
->inner
, PHI_RESULT (inner_phi
),
2591 gimple
*inner_reduc_stmt
2592 = vect_force_simple_reduction (simple_loop_info
, inner_phi
,
2593 &double_reduc
, true);
2594 gcc_assert (!double_reduc
);
2595 if (inner_reduc_stmt
== NULL
)
2598 build_new_reduction (reduction_list
, double_reduc_stmts
[i
], phi
);
2600 destroy_loop_vec_info (simple_loop_info
, true);
2605 /* Release the claim on gimple_uid. */
2606 free_stmt_vec_info_vec ();
2608 if (reduction_list
->elements () == 0)
2611 /* As gimple_uid is used by the vectorizer in between vect_analyze_loop_form
2612 and free_stmt_vec_info_vec, we can set gimple_uid of reduc_phi stmts only
2615 FOR_EACH_BB_FN (bb
, cfun
)
2616 for (gsi
= gsi_start_phis (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2617 gimple_set_uid (gsi_stmt (gsi
), (unsigned int)-1);
2618 reduction_list
->traverse
<void *, set_reduc_phi_uids
> (NULL
);
2621 /* Try to initialize NITER for code generation part. */
2624 try_get_loop_niter (loop_p loop
, struct tree_niter_desc
*niter
)
2626 edge exit
= single_dom_exit (loop
);
2630 /* We need to know # of iterations, and there should be no uses of values
2631 defined inside loop outside of it, unless the values are invariants of
2633 if (!number_of_iterations_exit (loop
, exit
, niter
, false))
2635 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2636 fprintf (dump_file
, " FAILED: number of iterations not known\n");
2643 /* Return the default def of the first function argument. */
2646 get_omp_data_i_param (void)
2648 tree decl
= DECL_ARGUMENTS (cfun
->decl
);
2649 gcc_assert (DECL_CHAIN (decl
) == NULL_TREE
);
2650 return ssa_default_def (cfun
, decl
);
2653 /* For PHI in loop header of LOOP, look for pattern:
2656 .omp_data_i = &.omp_data_arr;
2657 addr = .omp_data_i->sum;
2661 sum_b = PHI <sum_a (preheader), sum_c (latch)>
2663 and return addr. Otherwise, return NULL_TREE. */
2666 find_reduc_addr (struct loop
*loop
, gphi
*phi
)
2668 edge e
= loop_preheader_edge (loop
);
2669 tree arg
= PHI_ARG_DEF_FROM_EDGE (phi
, e
);
2670 gimple
*stmt
= SSA_NAME_DEF_STMT (arg
);
2671 if (!gimple_assign_single_p (stmt
))
2673 tree memref
= gimple_assign_rhs1 (stmt
);
2674 if (TREE_CODE (memref
) != MEM_REF
)
2676 tree addr
= TREE_OPERAND (memref
, 0);
2678 gimple
*stmt2
= SSA_NAME_DEF_STMT (addr
);
2679 if (!gimple_assign_single_p (stmt2
))
2681 tree compref
= gimple_assign_rhs1 (stmt2
);
2682 if (TREE_CODE (compref
) != COMPONENT_REF
)
2684 tree addr2
= TREE_OPERAND (compref
, 0);
2685 if (TREE_CODE (addr2
) != MEM_REF
)
2687 addr2
= TREE_OPERAND (addr2
, 0);
2688 if (TREE_CODE (addr2
) != SSA_NAME
2689 || addr2
!= get_omp_data_i_param ())
2695 /* Try to initialize REDUCTION_LIST for code generation part.
2696 REDUCTION_LIST describes the reductions. */
2699 try_create_reduction_list (loop_p loop
,
2700 reduction_info_table_type
*reduction_list
,
2701 bool oacc_kernels_p
)
2703 edge exit
= single_dom_exit (loop
);
2708 /* Try to get rid of exit phis. */
2709 final_value_replacement_loop (loop
);
2711 gather_scalar_reductions (loop
, reduction_list
);
2714 for (gsi
= gsi_start_phis (exit
->dest
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2716 gphi
*phi
= gsi
.phi ();
2717 struct reduction_info
*red
;
2718 imm_use_iterator imm_iter
;
2719 use_operand_p use_p
;
2721 tree val
= PHI_ARG_DEF_FROM_EDGE (phi
, exit
);
2723 if (!virtual_operand_p (val
))
2725 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2727 fprintf (dump_file
, "phi is ");
2728 print_gimple_stmt (dump_file
, phi
, 0);
2729 fprintf (dump_file
, "arg of phi to exit: value ");
2730 print_generic_expr (dump_file
, val
);
2731 fprintf (dump_file
, " used outside loop\n");
2733 " checking if it is part of reduction pattern:\n");
2735 if (reduction_list
->elements () == 0)
2737 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2739 " FAILED: it is not a part of reduction.\n");
2743 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, val
)
2745 if (!gimple_debug_bind_p (USE_STMT (use_p
))
2746 && flow_bb_inside_loop_p (loop
, gimple_bb (USE_STMT (use_p
))))
2748 reduc_phi
= USE_STMT (use_p
);
2752 red
= reduction_phi (reduction_list
, reduc_phi
);
2755 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2757 " FAILED: it is not a part of reduction.\n");
2760 if (red
->keep_res
!= NULL
)
2762 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2764 " FAILED: reduction has multiple exit phis.\n");
2767 red
->keep_res
= phi
;
2768 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2770 fprintf (dump_file
, "reduction phi is ");
2771 print_gimple_stmt (dump_file
, red
->reduc_phi
, 0);
2772 fprintf (dump_file
, "reduction stmt is ");
2773 print_gimple_stmt (dump_file
, red
->reduc_stmt
, 0);
2778 /* The iterations of the loop may communicate only through bivs whose
2779 iteration space can be distributed efficiently. */
2780 for (gsi
= gsi_start_phis (loop
->header
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2782 gphi
*phi
= gsi
.phi ();
2783 tree def
= PHI_RESULT (phi
);
2786 if (!virtual_operand_p (def
) && !simple_iv (loop
, loop
, def
, &iv
, true))
2788 struct reduction_info
*red
;
2790 red
= reduction_phi (reduction_list
, phi
);
2793 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2795 " FAILED: scalar dependency between iterations\n");
2803 for (gsi
= gsi_start_phis (loop
->header
); !gsi_end_p (gsi
);
2806 gphi
*phi
= gsi
.phi ();
2807 tree def
= PHI_RESULT (phi
);
2810 if (!virtual_operand_p (def
)
2811 && !simple_iv (loop
, loop
, def
, &iv
, true))
2813 tree addr
= find_reduc_addr (loop
, phi
);
2814 if (addr
== NULL_TREE
)
2816 struct reduction_info
*red
= reduction_phi (reduction_list
, phi
);
2817 red
->reduc_addr
= addr
;
2825 /* Return true if LOOP contains phis with ADDR_EXPR in args. */
2828 loop_has_phi_with_address_arg (struct loop
*loop
)
2830 basic_block
*bbs
= get_loop_body (loop
);
2835 for (i
= 0; i
< loop
->num_nodes
; i
++)
2836 for (gsi
= gsi_start_phis (bbs
[i
]); !gsi_end_p (gsi
); gsi_next (&gsi
))
2838 gphi
*phi
= gsi
.phi ();
2839 for (j
= 0; j
< gimple_phi_num_args (phi
); j
++)
2841 tree arg
= gimple_phi_arg_def (phi
, j
);
2842 if (TREE_CODE (arg
) == ADDR_EXPR
)
2844 /* This should be handled by eliminate_local_variables, but that
2845 function currently ignores phis. */
2857 /* Return true if memory ref REF (corresponding to the stmt at GSI in
2858 REGIONS_BB[I]) conflicts with the statements in REGIONS_BB[I] after gsi,
2859 or the statements in REGIONS_BB[I + n]. REF_IS_STORE indicates if REF is a
2860 store. Ignore conflicts with SKIP_STMT. */
2863 ref_conflicts_with_region (gimple_stmt_iterator gsi
, ao_ref
*ref
,
2864 bool ref_is_store
, vec
<basic_block
> region_bbs
,
2865 unsigned int i
, gimple
*skip_stmt
)
2867 basic_block bb
= region_bbs
[i
];
2872 for (; !gsi_end_p (gsi
);
2875 gimple
*stmt
= gsi_stmt (gsi
);
2876 if (stmt
== skip_stmt
)
2880 fprintf (dump_file
, "skipping reduction store: ");
2881 print_gimple_stmt (dump_file
, stmt
, 0);
2886 if (!gimple_vdef (stmt
)
2887 && !gimple_vuse (stmt
))
2890 if (gimple_code (stmt
) == GIMPLE_RETURN
)
2895 if (ref_maybe_used_by_stmt_p (stmt
, ref
))
2899 fprintf (dump_file
, "Stmt ");
2900 print_gimple_stmt (dump_file
, stmt
, 0);
2907 if (stmt_may_clobber_ref_p_1 (stmt
, ref
))
2911 fprintf (dump_file
, "Stmt ");
2912 print_gimple_stmt (dump_file
, stmt
, 0);
2919 if (i
== region_bbs
.length ())
2922 gsi
= gsi_start_bb (bb
);
2928 /* Return true if the bbs in REGION_BBS but not in in_loop_bbs can be executed
2929 in parallel with REGION_BBS containing the loop. Return the stores of
2930 reduction results in REDUCTION_STORES. */
2933 oacc_entry_exit_ok_1 (bitmap in_loop_bbs
, vec
<basic_block
> region_bbs
,
2934 reduction_info_table_type
*reduction_list
,
2935 bitmap reduction_stores
)
2937 tree omp_data_i
= get_omp_data_i_param ();
2941 FOR_EACH_VEC_ELT (region_bbs
, i
, bb
)
2943 if (bitmap_bit_p (in_loop_bbs
, bb
->index
))
2946 gimple_stmt_iterator gsi
;
2947 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
);
2950 gimple
*stmt
= gsi_stmt (gsi
);
2951 gimple
*skip_stmt
= NULL
;
2953 if (is_gimple_debug (stmt
)
2954 || gimple_code (stmt
) == GIMPLE_COND
)
2958 bool ref_is_store
= false;
2959 if (gimple_assign_load_p (stmt
))
2961 tree rhs
= gimple_assign_rhs1 (stmt
);
2962 tree base
= get_base_address (rhs
);
2963 if (TREE_CODE (base
) == MEM_REF
2964 && operand_equal_p (TREE_OPERAND (base
, 0), omp_data_i
, 0))
2967 tree lhs
= gimple_assign_lhs (stmt
);
2968 if (TREE_CODE (lhs
) == SSA_NAME
2969 && has_single_use (lhs
))
2971 use_operand_p use_p
;
2973 single_imm_use (lhs
, &use_p
, &use_stmt
);
2974 if (gimple_code (use_stmt
) == GIMPLE_PHI
)
2976 struct reduction_info
*red
;
2977 red
= reduction_phi (reduction_list
, use_stmt
);
2978 tree val
= PHI_RESULT (red
->keep_res
);
2979 if (has_single_use (val
))
2981 single_imm_use (val
, &use_p
, &use_stmt
);
2982 if (gimple_store_p (use_stmt
))
2985 = SSA_NAME_VERSION (gimple_vdef (use_stmt
));
2986 bitmap_set_bit (reduction_stores
, id
);
2987 skip_stmt
= use_stmt
;
2990 fprintf (dump_file
, "found reduction load: ");
2991 print_gimple_stmt (dump_file
, stmt
, 0);
2998 ao_ref_init (&ref
, rhs
);
3000 else if (gimple_store_p (stmt
))
3002 ao_ref_init (&ref
, gimple_assign_lhs (stmt
));
3003 ref_is_store
= true;
3005 else if (gimple_code (stmt
) == GIMPLE_OMP_RETURN
)
3007 else if (!gimple_has_side_effects (stmt
)
3008 && !gimple_could_trap_p (stmt
)
3009 && !stmt_could_throw_p (stmt
)
3010 && !gimple_vdef (stmt
)
3011 && !gimple_vuse (stmt
))
3013 else if (gimple_call_internal_p (stmt
, IFN_GOACC_DIM_POS
))
3015 else if (gimple_code (stmt
) == GIMPLE_RETURN
)
3021 fprintf (dump_file
, "Unhandled stmt in entry/exit: ");
3022 print_gimple_stmt (dump_file
, stmt
, 0);
3027 if (ref_conflicts_with_region (gsi
, &ref
, ref_is_store
, region_bbs
,
3032 fprintf (dump_file
, "conflicts with entry/exit stmt: ");
3033 print_gimple_stmt (dump_file
, stmt
, 0);
3043 /* Find stores inside REGION_BBS and outside IN_LOOP_BBS, and guard them with
3044 gang_pos == 0, except when the stores are REDUCTION_STORES. Return true
3045 if any changes were made. */
3048 oacc_entry_exit_single_gang (bitmap in_loop_bbs
, vec
<basic_block
> region_bbs
,
3049 bitmap reduction_stores
)
3051 tree gang_pos
= NULL_TREE
;
3052 bool changed
= false;
3056 FOR_EACH_VEC_ELT (region_bbs
, i
, bb
)
3058 if (bitmap_bit_p (in_loop_bbs
, bb
->index
))
3061 gimple_stmt_iterator gsi
;
3062 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
);)
3064 gimple
*stmt
= gsi_stmt (gsi
);
3066 if (!gimple_store_p (stmt
))
3068 /* Update gsi to point to next stmt. */
3073 if (bitmap_bit_p (reduction_stores
,
3074 SSA_NAME_VERSION (gimple_vdef (stmt
))))
3079 "skipped reduction store for single-gang"
3081 print_gimple_stmt (dump_file
, stmt
, 0);
3084 /* Update gsi to point to next stmt. */
3091 if (gang_pos
== NULL_TREE
)
3093 tree arg
= build_int_cst (integer_type_node
, GOMP_DIM_GANG
);
3095 = gimple_build_call_internal (IFN_GOACC_DIM_POS
, 1, arg
);
3096 gang_pos
= make_ssa_name (integer_type_node
);
3097 gimple_call_set_lhs (gang_single
, gang_pos
);
3098 gimple_stmt_iterator start
3099 = gsi_start_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
3100 tree vuse
= ssa_default_def (cfun
, gimple_vop (cfun
));
3101 gimple_set_vuse (gang_single
, vuse
);
3102 gsi_insert_before (&start
, gang_single
, GSI_SAME_STMT
);
3108 "found store that needs single-gang neutering: ");
3109 print_gimple_stmt (dump_file
, stmt
, 0);
3113 /* Split block before store. */
3114 gimple_stmt_iterator gsi2
= gsi
;
3117 if (gsi_end_p (gsi2
))
3119 e
= split_block_after_labels (bb
);
3120 gsi2
= gsi_last_bb (bb
);
3123 e
= split_block (bb
, gsi_stmt (gsi2
));
3124 basic_block bb2
= e
->dest
;
3126 /* Split block after store. */
3127 gimple_stmt_iterator gsi3
= gsi_start_bb (bb2
);
3128 edge e2
= split_block (bb2
, gsi_stmt (gsi3
));
3129 basic_block bb3
= e2
->dest
;
3132 = gimple_build_cond (EQ_EXPR
, gang_pos
, integer_zero_node
,
3133 NULL_TREE
, NULL_TREE
);
3134 gsi_insert_after (&gsi2
, cond
, GSI_NEW_STMT
);
3136 edge e3
= make_edge (bb
, bb3
, EDGE_FALSE_VALUE
);
3137 /* FIXME: What is the probability? */
3138 e3
->probability
= profile_probability::guessed_never ();
3139 e
->flags
= EDGE_TRUE_VALUE
;
3141 tree vdef
= gimple_vdef (stmt
);
3142 tree vuse
= gimple_vuse (stmt
);
3144 tree phi_res
= copy_ssa_name (vdef
);
3145 gphi
*new_phi
= create_phi_node (phi_res
, bb3
);
3146 replace_uses_by (vdef
, phi_res
);
3147 add_phi_arg (new_phi
, vuse
, e3
, UNKNOWN_LOCATION
);
3148 add_phi_arg (new_phi
, vdef
, e2
, UNKNOWN_LOCATION
);
3150 /* Update gsi to point to next stmt. */
3152 gsi
= gsi_start_bb (bb
);
3160 /* Return true if the statements before and after the LOOP can be executed in
3161 parallel with the function containing the loop. Resolve conflicting stores
3162 outside LOOP by guarding them such that only a single gang executes them. */
3165 oacc_entry_exit_ok (struct loop
*loop
,
3166 reduction_info_table_type
*reduction_list
)
3168 basic_block
*loop_bbs
= get_loop_body_in_dom_order (loop
);
3169 vec
<basic_block
> region_bbs
3170 = get_all_dominated_blocks (CDI_DOMINATORS
, ENTRY_BLOCK_PTR_FOR_FN (cfun
));
3172 bitmap in_loop_bbs
= BITMAP_ALLOC (NULL
);
3173 bitmap_clear (in_loop_bbs
);
3174 for (unsigned int i
= 0; i
< loop
->num_nodes
; i
++)
3175 bitmap_set_bit (in_loop_bbs
, loop_bbs
[i
]->index
);
3177 bitmap reduction_stores
= BITMAP_ALLOC (NULL
);
3178 bool res
= oacc_entry_exit_ok_1 (in_loop_bbs
, region_bbs
, reduction_list
,
3183 bool changed
= oacc_entry_exit_single_gang (in_loop_bbs
, region_bbs
,
3187 free_dominance_info (CDI_DOMINATORS
);
3188 calculate_dominance_info (CDI_DOMINATORS
);
3192 region_bbs
.release ();
3195 BITMAP_FREE (in_loop_bbs
);
3196 BITMAP_FREE (reduction_stores
);
3201 /* Detect parallel loops and generate parallel code using libgomp
3202 primitives. Returns true if some loop was parallelized, false
3206 parallelize_loops (bool oacc_kernels_p
)
3209 bool changed
= false;
3211 struct loop
*skip_loop
= NULL
;
3212 struct tree_niter_desc niter_desc
;
3213 struct obstack parloop_obstack
;
3214 HOST_WIDE_INT estimated
;
3215 source_location loop_loc
;
3217 /* Do not parallelize loops in the functions created by parallelization. */
3219 && parallelized_function_p (cfun
->decl
))
3222 /* Do not parallelize loops in offloaded functions. */
3224 && oacc_get_fn_attrib (cfun
->decl
) != NULL
)
3227 if (cfun
->has_nonlocal_label
)
3230 /* For OpenACC kernels, n_threads will be determined later; otherwise, it's
3231 the argument to -ftree-parallelize-loops. */
3235 n_threads
= flag_tree_parallelize_loops
;
3237 gcc_obstack_init (&parloop_obstack
);
3238 reduction_info_table_type
reduction_list (10);
3240 calculate_dominance_info (CDI_DOMINATORS
);
3242 FOR_EACH_LOOP (loop
, 0)
3244 if (loop
== skip_loop
)
3246 if (!loop
->in_oacc_kernels_region
3247 && dump_file
&& (dump_flags
& TDF_DETAILS
))
3249 "Skipping loop %d as inner loop of parallelized loop\n",
3252 skip_loop
= loop
->inner
;
3258 reduction_list
.empty ();
3262 if (!loop
->in_oacc_kernels_region
)
3265 /* Don't try to parallelize inner loops in an oacc kernels region. */
3267 skip_loop
= loop
->inner
;
3269 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3271 "Trying loop %d with header bb %d in oacc kernels"
3272 " region\n", loop
->num
, loop
->header
->index
);
3275 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3277 fprintf (dump_file
, "Trying loop %d as candidate\n",loop
->num
);
3279 fprintf (dump_file
, "loop %d is not innermost\n",loop
->num
);
3281 fprintf (dump_file
, "loop %d is innermost\n",loop
->num
);
3284 /* If we use autopar in graphite pass, we use its marked dependency
3285 checking results. */
3286 if (flag_loop_parallelize_all
&& !loop
->can_be_parallel
)
3288 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3289 fprintf (dump_file
, "loop is not parallel according to graphite\n");
3293 if (!single_dom_exit (loop
))
3296 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3297 fprintf (dump_file
, "loop is !single_dom_exit\n");
3302 if (/* And of course, the loop must be parallelizable. */
3303 !can_duplicate_loop_p (loop
)
3304 || loop_has_blocks_with_irreducible_flag (loop
)
3305 || (loop_preheader_edge (loop
)->src
->flags
& BB_IRREDUCIBLE_LOOP
)
3306 /* FIXME: the check for vector phi nodes could be removed. */
3307 || loop_has_vector_phi_nodes (loop
))
3310 estimated
= estimated_stmt_executions_int (loop
);
3311 if (estimated
== -1)
3312 estimated
= likely_max_stmt_executions_int (loop
);
3313 /* FIXME: Bypass this check as graphite doesn't update the
3314 count and frequency correctly now. */
3315 if (!flag_loop_parallelize_all
3317 && ((estimated
!= -1
3318 && estimated
<= (HOST_WIDE_INT
) n_threads
* MIN_PER_THREAD
)
3319 /* Do not bother with loops in cold areas. */
3320 || optimize_loop_nest_for_size_p (loop
)))
3323 if (!try_get_loop_niter (loop
, &niter_desc
))
3326 if (!try_create_reduction_list (loop
, &reduction_list
, oacc_kernels_p
))
3329 if (loop_has_phi_with_address_arg (loop
))
3332 if (!flag_loop_parallelize_all
3333 && !loop_parallel_p (loop
, &parloop_obstack
))
3337 && !oacc_entry_exit_ok (loop
, &reduction_list
))
3340 fprintf (dump_file
, "entry/exit not ok: FAILED\n");
3345 skip_loop
= loop
->inner
;
3347 loop_loc
= find_loop_location (loop
);
3349 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
, loop_loc
,
3350 "parallelizing outer loop %d\n", loop
->num
);
3352 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
, loop_loc
,
3353 "parallelizing inner loop %d\n", loop
->num
);
3355 gen_parallel_loop (loop
, &reduction_list
,
3356 n_threads
, &niter_desc
, oacc_kernels_p
);
3359 obstack_free (&parloop_obstack
, NULL
);
3361 /* Parallelization will cause new function calls to be inserted through
3362 which local variables will escape. Reset the points-to solution
3365 pt_solution_reset (&cfun
->gimple_df
->escaped
);
3370 /* Parallelization. */
3374 const pass_data pass_data_parallelize_loops
=
3376 GIMPLE_PASS
, /* type */
3377 "parloops", /* name */
3378 OPTGROUP_LOOP
, /* optinfo_flags */
3379 TV_TREE_PARALLELIZE_LOOPS
, /* tv_id */
3380 ( PROP_cfg
| PROP_ssa
), /* properties_required */
3381 0, /* properties_provided */
3382 0, /* properties_destroyed */
3383 0, /* todo_flags_start */
3384 0, /* todo_flags_finish */
3387 class pass_parallelize_loops
: public gimple_opt_pass
3390 pass_parallelize_loops (gcc::context
*ctxt
)
3391 : gimple_opt_pass (pass_data_parallelize_loops
, ctxt
),
3392 oacc_kernels_p (false)
3395 /* opt_pass methods: */
3396 virtual bool gate (function
*)
3399 return flag_openacc
;
3401 return flag_tree_parallelize_loops
> 1;
3403 virtual unsigned int execute (function
*);
3404 opt_pass
* clone () { return new pass_parallelize_loops (m_ctxt
); }
3405 void set_pass_param (unsigned int n
, bool param
)
3407 gcc_assert (n
== 0);
3408 oacc_kernels_p
= param
;
3412 bool oacc_kernels_p
;
3413 }; // class pass_parallelize_loops
3416 pass_parallelize_loops::execute (function
*fun
)
3418 tree nthreads
= builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS
);
3419 if (nthreads
== NULL_TREE
)
3422 bool in_loop_pipeline
= scev_initialized_p ();
3423 if (!in_loop_pipeline
)
3424 loop_optimizer_init (LOOPS_NORMAL
3425 | LOOPS_HAVE_RECORDED_EXITS
);
3427 if (number_of_loops (fun
) <= 1)
3430 if (!in_loop_pipeline
)
3432 rewrite_into_loop_closed_ssa (NULL
, TODO_update_ssa
);
3436 unsigned int todo
= 0;
3437 if (parallelize_loops (oacc_kernels_p
))
3439 fun
->curr_properties
&= ~(PROP_gimple_eomp
);
3441 checking_verify_loop_structure ();
3443 todo
|= TODO_update_ssa
;
3446 if (!in_loop_pipeline
)
3449 loop_optimizer_finalize ();
3458 make_pass_parallelize_loops (gcc::context
*ctxt
)
3460 return new pass_parallelize_loops (ctxt
);