1 /* Loop autoparallelization.
2 Copyright (C) 2006-2013 Free Software Foundation, Inc.
3 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
4 Zdenek Dvorak <dvorakz@suse.cz> and Razya Ladelsky <razya@il.ibm.com>.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
27 #include "tree-data-ref.h"
28 #include "tree-scalar-evolution.h"
29 #include "gimple-pretty-print.h"
30 #include "tree-pass.h"
31 #include "langhooks.h"
32 #include "tree-vectorizer.h"
33 #include "tree-hasher.h"
35 /* This pass tries to distribute iterations of loops into several threads.
36 The implementation is straightforward -- for each loop we test whether its
37 iterations are independent, and if it is the case (and some additional
38 conditions regarding profitability and correctness are satisfied), we
39 add GIMPLE_OMP_PARALLEL and GIMPLE_OMP_FOR codes and let omp expansion
42 The most of the complexity is in bringing the code into shape expected
44 -- for GIMPLE_OMP_FOR, ensuring that the loop has only one induction
45 variable and that the exit test is at the start of the loop body
46 -- for GIMPLE_OMP_PARALLEL, replacing the references to local addressable
47 variables by accesses through pointers, and breaking up ssa chains
48 by storing the values incoming to the parallelized loop to a structure
49 passed to the new function as an argument (something similar is done
50 in omp gimplification, unfortunately only a small part of the code
54 -- if there are several parallelizable loops in a function, it may be
55 possible to generate the threads just once (using synchronization to
56 ensure that cross-loop dependences are obeyed).
57 -- handling of common reduction patterns for outer loops.
59 More info can also be found at http://gcc.gnu.org/wiki/AutoParInGCC */
62 currently we use vect_force_simple_reduction() to detect reduction patterns.
63 The code transformation will be introduced by an example.
70 for (i = 0; i < N; i++)
80 # sum_29 = PHI <sum_11(5), 1(3)>
81 # i_28 = PHI <i_12(5), 0(3)>
84 sum_11 = D.1795_8 + sum_29;
92 # sum_21 = PHI <sum_11(4)>
93 printf (&"%d"[0], sum_21);
96 after reduction transformation (only relevant parts):
104 # Storing the initial value given by the user. #
106 .paral_data_store.32.sum.27 = 1;
108 #pragma omp parallel num_threads(4)
110 #pragma omp for schedule(static)
112 # The neutral element corresponding to the particular
113 reduction's operation, e.g. 0 for PLUS_EXPR,
114 1 for MULT_EXPR, etc. replaces the user's initial value. #
116 # sum.27_29 = PHI <sum.27_11, 0>
118 sum.27_11 = D.1827_8 + sum.27_29;
122 # Adding this reduction phi is done at create_phi_for_local_result() #
123 # sum.27_56 = PHI <sum.27_11, 0>
126 # Creating the atomic operation is done at
127 create_call_for_reduction_1() #
129 #pragma omp atomic_load
130 D.1839_59 = *&.paral_data_load.33_51->reduction.23;
131 D.1840_60 = sum.27_56 + D.1839_59;
132 #pragma omp atomic_store (D.1840_60);
136 # collecting the result after the join of the threads is done at
137 create_loads_for_reductions().
138 The value computed by the threads is loaded from the
142 .paral_data_load.33_52 = &.paral_data_store.32;
143 sum_37 = .paral_data_load.33_52->sum.27;
144 sum_43 = D.1795_41 + sum_37;
147 # sum_21 = PHI <sum_43, sum_26>
148 printf (&"%d"[0], sum_21);
156 /* Minimal number of iterations of a loop that should be executed in each
158 #define MIN_PER_THREAD 100
160 /* Element of the hashtable, representing a
161 reduction in the current loop. */
162 struct reduction_info
164 gimple reduc_stmt
; /* reduction statement. */
165 gimple reduc_phi
; /* The phi node defining the reduction. */
166 enum tree_code reduction_code
;/* code for the reduction operation. */
167 unsigned reduc_version
; /* SSA_NAME_VERSION of original reduc_phi
169 gimple keep_res
; /* The PHI_RESULT of this phi is the resulting value
170 of the reduction variable when existing the loop. */
171 tree initial_value
; /* The initial value of the reduction var before entering the loop. */
172 tree field
; /* the name of the field in the parloop data structure intended for reduction. */
173 tree init
; /* reduction initialization value. */
174 gimple new_phi
; /* (helper field) Newly created phi node whose result
175 will be passed to the atomic operation. Represents
176 the local result each thread computed for the reduction
180 /* Reduction info hashtable helpers. */
182 struct reduction_hasher
: typed_free_remove
<reduction_info
>
184 typedef reduction_info value_type
;
185 typedef reduction_info compare_type
;
186 static inline hashval_t
hash (const value_type
*);
187 static inline bool equal (const value_type
*, const compare_type
*);
190 /* Equality and hash functions for hashtab code. */
193 reduction_hasher::equal (const value_type
*a
, const compare_type
*b
)
195 return (a
->reduc_phi
== b
->reduc_phi
);
199 reduction_hasher::hash (const value_type
*a
)
201 return a
->reduc_version
;
204 typedef hash_table
<reduction_hasher
> reduction_info_table_type
;
207 static struct reduction_info
*
208 reduction_phi (reduction_info_table_type reduction_list
, gimple phi
)
210 struct reduction_info tmpred
, *red
;
212 if (reduction_list
.elements () == 0 || phi
== NULL
)
215 tmpred
.reduc_phi
= phi
;
216 tmpred
.reduc_version
= gimple_uid (phi
);
217 red
= reduction_list
.find (&tmpred
);
222 /* Element of hashtable of names to copy. */
224 struct name_to_copy_elt
226 unsigned version
; /* The version of the name to copy. */
227 tree new_name
; /* The new name used in the copy. */
228 tree field
; /* The field of the structure used to pass the
232 /* Name copies hashtable helpers. */
234 struct name_to_copy_hasher
: typed_free_remove
<name_to_copy_elt
>
236 typedef name_to_copy_elt value_type
;
237 typedef name_to_copy_elt compare_type
;
238 static inline hashval_t
hash (const value_type
*);
239 static inline bool equal (const value_type
*, const compare_type
*);
242 /* Equality and hash functions for hashtab code. */
245 name_to_copy_hasher::equal (const value_type
*a
, const compare_type
*b
)
247 return a
->version
== b
->version
;
251 name_to_copy_hasher::hash (const value_type
*a
)
253 return (hashval_t
) a
->version
;
256 typedef hash_table
<name_to_copy_hasher
> name_to_copy_table_type
;
258 /* A transformation matrix, which is a self-contained ROWSIZE x COLSIZE
259 matrix. Rather than use floats, we simply keep a single DENOMINATOR that
260 represents the denominator for every element in the matrix. */
261 typedef struct lambda_trans_matrix_s
263 lambda_matrix matrix
;
267 } *lambda_trans_matrix
;
268 #define LTM_MATRIX(T) ((T)->matrix)
269 #define LTM_ROWSIZE(T) ((T)->rowsize)
270 #define LTM_COLSIZE(T) ((T)->colsize)
271 #define LTM_DENOMINATOR(T) ((T)->denominator)
273 /* Allocate a new transformation matrix. */
275 static lambda_trans_matrix
276 lambda_trans_matrix_new (int colsize
, int rowsize
,
277 struct obstack
* lambda_obstack
)
279 lambda_trans_matrix ret
;
281 ret
= (lambda_trans_matrix
)
282 obstack_alloc (lambda_obstack
, sizeof (struct lambda_trans_matrix_s
));
283 LTM_MATRIX (ret
) = lambda_matrix_new (rowsize
, colsize
, lambda_obstack
);
284 LTM_ROWSIZE (ret
) = rowsize
;
285 LTM_COLSIZE (ret
) = colsize
;
286 LTM_DENOMINATOR (ret
) = 1;
290 /* Multiply a vector VEC by a matrix MAT.
291 MAT is an M*N matrix, and VEC is a vector with length N. The result
292 is stored in DEST which must be a vector of length M. */
295 lambda_matrix_vector_mult (lambda_matrix matrix
, int m
, int n
,
296 lambda_vector vec
, lambda_vector dest
)
300 lambda_vector_clear (dest
, m
);
301 for (i
= 0; i
< m
; i
++)
302 for (j
= 0; j
< n
; j
++)
303 dest
[i
] += matrix
[i
][j
] * vec
[j
];
306 /* Return true if TRANS is a legal transformation matrix that respects
307 the dependence vectors in DISTS and DIRS. The conservative answer
310 "Wolfe proves that a unimodular transformation represented by the
311 matrix T is legal when applied to a loop nest with a set of
312 lexicographically non-negative distance vectors RDG if and only if
313 for each vector d in RDG, (T.d >= 0) is lexicographically positive.
314 i.e.: if and only if it transforms the lexicographically positive
315 distance vectors to lexicographically positive vectors. Note that
316 a unimodular matrix must transform the zero vector (and only it) to
317 the zero vector." S.Muchnick. */
320 lambda_transform_legal_p (lambda_trans_matrix trans
,
322 vec
<ddr_p
> dependence_relations
)
325 lambda_vector distres
;
326 struct data_dependence_relation
*ddr
;
328 gcc_assert (LTM_COLSIZE (trans
) == nb_loops
329 && LTM_ROWSIZE (trans
) == nb_loops
);
331 /* When there are no dependences, the transformation is correct. */
332 if (dependence_relations
.length () == 0)
335 ddr
= dependence_relations
[0];
339 /* When there is an unknown relation in the dependence_relations, we
340 know that it is no worth looking at this loop nest: give up. */
341 if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
344 distres
= lambda_vector_new (nb_loops
);
346 /* For each distance vector in the dependence graph. */
347 FOR_EACH_VEC_ELT (dependence_relations
, i
, ddr
)
349 /* Don't care about relations for which we know that there is no
350 dependence, nor about read-read (aka. output-dependences):
351 these data accesses can happen in any order. */
352 if (DDR_ARE_DEPENDENT (ddr
) == chrec_known
353 || (DR_IS_READ (DDR_A (ddr
)) && DR_IS_READ (DDR_B (ddr
))))
356 /* Conservatively answer: "this transformation is not valid". */
357 if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
360 /* If the dependence could not be captured by a distance vector,
361 conservatively answer that the transform is not valid. */
362 if (DDR_NUM_DIST_VECTS (ddr
) == 0)
365 /* Compute trans.dist_vect */
366 for (j
= 0; j
< DDR_NUM_DIST_VECTS (ddr
); j
++)
368 lambda_matrix_vector_mult (LTM_MATRIX (trans
), nb_loops
, nb_loops
,
369 DDR_DIST_VECT (ddr
, j
), distres
);
371 if (!lambda_vector_lexico_pos (distres
, nb_loops
))
378 /* Data dependency analysis. Returns true if the iterations of LOOP
379 are independent on each other (that is, if we can execute them
383 loop_parallel_p (struct loop
*loop
, struct obstack
* parloop_obstack
)
385 vec
<loop_p
> loop_nest
;
386 vec
<ddr_p
> dependence_relations
;
387 vec
<data_reference_p
> datarefs
;
388 lambda_trans_matrix trans
;
391 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
393 fprintf (dump_file
, "Considering loop %d\n", loop
->num
);
395 fprintf (dump_file
, "loop is innermost\n");
397 fprintf (dump_file
, "loop NOT innermost\n");
400 /* Check for problems with dependences. If the loop can be reversed,
401 the iterations are independent. */
402 datarefs
.create (10);
403 dependence_relations
.create (10 * 10);
404 loop_nest
.create (3);
405 if (! compute_data_dependences_for_loop (loop
, true, &loop_nest
, &datarefs
,
406 &dependence_relations
))
408 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
409 fprintf (dump_file
, " FAILED: cannot analyze data dependencies\n");
413 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
414 dump_data_dependence_relations (dump_file
, dependence_relations
);
416 trans
= lambda_trans_matrix_new (1, 1, parloop_obstack
);
417 LTM_MATRIX (trans
)[0][0] = -1;
419 if (lambda_transform_legal_p (trans
, 1, dependence_relations
))
422 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
423 fprintf (dump_file
, " SUCCESS: may be parallelized\n");
425 else if (dump_file
&& (dump_flags
& TDF_DETAILS
))
427 " FAILED: data dependencies exist across iterations\n");
430 loop_nest
.release ();
431 free_dependence_relations (dependence_relations
);
432 free_data_refs (datarefs
);
437 /* Return true when LOOP contains basic blocks marked with the
438 BB_IRREDUCIBLE_LOOP flag. */
441 loop_has_blocks_with_irreducible_flag (struct loop
*loop
)
444 basic_block
*bbs
= get_loop_body_in_dom_order (loop
);
447 for (i
= 0; i
< loop
->num_nodes
; i
++)
448 if (bbs
[i
]->flags
& BB_IRREDUCIBLE_LOOP
)
457 /* Assigns the address of OBJ in TYPE to an ssa name, and returns this name.
458 The assignment statement is placed on edge ENTRY. DECL_ADDRESS maps decls
459 to their addresses that can be reused. The address of OBJ is known to
460 be invariant in the whole function. Other needed statements are placed
464 take_address_of (tree obj
, tree type
, edge entry
,
465 int_tree_htab_type decl_address
, gimple_stmt_iterator
*gsi
)
468 int_tree_map
**dslot
;
469 struct int_tree_map ielt
, *nielt
;
470 tree
*var_p
, name
, addr
;
474 /* Since the address of OBJ is invariant, the trees may be shared.
475 Avoid rewriting unrelated parts of the code. */
476 obj
= unshare_expr (obj
);
478 handled_component_p (*var_p
);
479 var_p
= &TREE_OPERAND (*var_p
, 0))
482 /* Canonicalize the access to base on a MEM_REF. */
484 *var_p
= build_simple_mem_ref (build_fold_addr_expr (*var_p
));
486 /* Assign a canonical SSA name to the address of the base decl used
487 in the address and share it for all accesses and addresses based
489 uid
= DECL_UID (TREE_OPERAND (TREE_OPERAND (*var_p
, 0), 0));
491 dslot
= decl_address
.find_slot_with_hash (&ielt
, uid
, INSERT
);
496 addr
= TREE_OPERAND (*var_p
, 0);
498 = get_name (TREE_OPERAND (TREE_OPERAND (*var_p
, 0), 0));
500 name
= make_temp_ssa_name (TREE_TYPE (addr
), NULL
, obj_name
);
502 name
= make_ssa_name (TREE_TYPE (addr
), NULL
);
503 stmt
= gimple_build_assign (name
, addr
);
504 gsi_insert_on_edge_immediate (entry
, stmt
);
506 nielt
= XNEW (struct int_tree_map
);
514 /* Express the address in terms of the canonical SSA name. */
515 TREE_OPERAND (*var_p
, 0) = name
;
517 return build_fold_addr_expr_with_type (obj
, type
);
519 name
= force_gimple_operand (build_addr (obj
, current_function_decl
),
520 &stmts
, true, NULL_TREE
);
521 if (!gimple_seq_empty_p (stmts
))
522 gsi_insert_seq_before (gsi
, stmts
, GSI_SAME_STMT
);
524 if (!useless_type_conversion_p (type
, TREE_TYPE (name
)))
526 name
= force_gimple_operand (fold_convert (type
, name
), &stmts
, true,
528 if (!gimple_seq_empty_p (stmts
))
529 gsi_insert_seq_before (gsi
, stmts
, GSI_SAME_STMT
);
535 /* Callback for htab_traverse. Create the initialization statement
536 for reduction described in SLOT, and place it at the preheader of
537 the loop described in DATA. */
540 initialize_reductions (reduction_info
**slot
, struct loop
*loop
)
543 tree bvar
, type
, arg
;
546 struct reduction_info
*const reduc
= *slot
;
548 /* Create initialization in preheader:
549 reduction_variable = initialization value of reduction. */
551 /* In the phi node at the header, replace the argument coming
552 from the preheader with the reduction initialization value. */
554 /* Create a new variable to initialize the reduction. */
555 type
= TREE_TYPE (PHI_RESULT (reduc
->reduc_phi
));
556 bvar
= create_tmp_var (type
, "reduction");
558 c
= build_omp_clause (gimple_location (reduc
->reduc_stmt
),
559 OMP_CLAUSE_REDUCTION
);
560 OMP_CLAUSE_REDUCTION_CODE (c
) = reduc
->reduction_code
;
561 OMP_CLAUSE_DECL (c
) = SSA_NAME_VAR (gimple_assign_lhs (reduc
->reduc_stmt
));
563 init
= omp_reduction_init (c
, TREE_TYPE (bvar
));
566 /* Replace the argument representing the initialization value
567 with the initialization value for the reduction (neutral
568 element for the particular operation, e.g. 0 for PLUS_EXPR,
569 1 for MULT_EXPR, etc).
570 Keep the old value in a new variable "reduction_initial",
571 that will be taken in consideration after the parallel
572 computing is done. */
574 e
= loop_preheader_edge (loop
);
575 arg
= PHI_ARG_DEF_FROM_EDGE (reduc
->reduc_phi
, e
);
576 /* Create new variable to hold the initial value. */
578 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE
579 (reduc
->reduc_phi
, loop_preheader_edge (loop
)), init
);
580 reduc
->initial_value
= arg
;
586 struct walk_stmt_info info
;
588 int_tree_htab_type decl_address
;
589 gimple_stmt_iterator
*gsi
;
594 /* Eliminates references to local variables in *TP out of the single
595 entry single exit region starting at DTA->ENTRY.
596 DECL_ADDRESS contains addresses of the references that had their
597 address taken already. If the expression is changed, CHANGED is
598 set to true. Callback for walk_tree. */
601 eliminate_local_variables_1 (tree
*tp
, int *walk_subtrees
, void *data
)
603 struct elv_data
*const dta
= (struct elv_data
*) data
;
604 tree t
= *tp
, var
, addr
, addr_type
, type
, obj
;
610 if (!SSA_VAR_P (t
) || DECL_EXTERNAL (t
))
613 type
= TREE_TYPE (t
);
614 addr_type
= build_pointer_type (type
);
615 addr
= take_address_of (t
, addr_type
, dta
->entry
, dta
->decl_address
,
617 if (dta
->gsi
== NULL
&& addr
== NULL_TREE
)
623 *tp
= build_simple_mem_ref (addr
);
629 if (TREE_CODE (t
) == ADDR_EXPR
)
631 /* ADDR_EXPR may appear in two contexts:
632 -- as a gimple operand, when the address taken is a function invariant
633 -- as gimple rhs, when the resulting address in not a function
635 We do not need to do anything special in the latter case (the base of
636 the memory reference whose address is taken may be replaced in the
637 DECL_P case). The former case is more complicated, as we need to
638 ensure that the new address is still a gimple operand. Thus, it
639 is not sufficient to replace just the base of the memory reference --
640 we need to move the whole computation of the address out of the
642 if (!is_gimple_val (t
))
646 obj
= TREE_OPERAND (t
, 0);
647 var
= get_base_address (obj
);
648 if (!var
|| !SSA_VAR_P (var
) || DECL_EXTERNAL (var
))
651 addr_type
= TREE_TYPE (t
);
652 addr
= take_address_of (obj
, addr_type
, dta
->entry
, dta
->decl_address
,
654 if (dta
->gsi
== NULL
&& addr
== NULL_TREE
)
671 /* Moves the references to local variables in STMT at *GSI out of the single
672 entry single exit region starting at ENTRY. DECL_ADDRESS contains
673 addresses of the references that had their address taken
677 eliminate_local_variables_stmt (edge entry
, gimple_stmt_iterator
*gsi
,
678 int_tree_htab_type decl_address
)
681 gimple stmt
= gsi_stmt (*gsi
);
683 memset (&dta
.info
, '\0', sizeof (dta
.info
));
685 dta
.decl_address
= decl_address
;
689 if (gimple_debug_bind_p (stmt
))
692 walk_tree (gimple_debug_bind_get_value_ptr (stmt
),
693 eliminate_local_variables_1
, &dta
.info
, NULL
);
696 gimple_debug_bind_reset_value (stmt
);
700 else if (gimple_clobber_p (stmt
))
702 stmt
= gimple_build_nop ();
703 gsi_replace (gsi
, stmt
, false);
709 walk_gimple_op (stmt
, eliminate_local_variables_1
, &dta
.info
);
716 /* Eliminates the references to local variables from the single entry
717 single exit region between the ENTRY and EXIT edges.
720 1) Taking address of a local variable -- these are moved out of the
721 region (and temporary variable is created to hold the address if
724 2) Dereferencing a local variable -- these are replaced with indirect
728 eliminate_local_variables (edge entry
, edge exit
)
731 vec
<basic_block
> body
;
734 gimple_stmt_iterator gsi
;
735 bool has_debug_stmt
= false;
736 int_tree_htab_type decl_address
;
737 decl_address
.create (10);
738 basic_block entry_bb
= entry
->src
;
739 basic_block exit_bb
= exit
->dest
;
741 gather_blocks_in_sese_region (entry_bb
, exit_bb
, &body
);
743 FOR_EACH_VEC_ELT (body
, i
, bb
)
744 if (bb
!= entry_bb
&& bb
!= exit_bb
)
745 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
746 if (is_gimple_debug (gsi_stmt (gsi
)))
748 if (gimple_debug_bind_p (gsi_stmt (gsi
)))
749 has_debug_stmt
= true;
752 eliminate_local_variables_stmt (entry
, &gsi
, decl_address
);
755 FOR_EACH_VEC_ELT (body
, i
, bb
)
756 if (bb
!= entry_bb
&& bb
!= exit_bb
)
757 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
758 if (gimple_debug_bind_p (gsi_stmt (gsi
)))
759 eliminate_local_variables_stmt (entry
, &gsi
, decl_address
);
761 decl_address
.dispose ();
765 /* Returns true if expression EXPR is not defined between ENTRY and
766 EXIT, i.e. if all its operands are defined outside of the region. */
769 expr_invariant_in_region_p (edge entry
, edge exit
, tree expr
)
771 basic_block entry_bb
= entry
->src
;
772 basic_block exit_bb
= exit
->dest
;
775 if (is_gimple_min_invariant (expr
))
778 if (TREE_CODE (expr
) == SSA_NAME
)
780 def_bb
= gimple_bb (SSA_NAME_DEF_STMT (expr
));
782 && dominated_by_p (CDI_DOMINATORS
, def_bb
, entry_bb
)
783 && !dominated_by_p (CDI_DOMINATORS
, def_bb
, exit_bb
))
792 /* If COPY_NAME_P is true, creates and returns a duplicate of NAME.
793 The copies are stored to NAME_COPIES, if NAME was already duplicated,
794 its duplicate stored in NAME_COPIES is returned.
796 Regardless of COPY_NAME_P, the decl used as a base of the ssa name is also
797 duplicated, storing the copies in DECL_COPIES. */
800 separate_decls_in_region_name (tree name
, name_to_copy_table_type name_copies
,
801 int_tree_htab_type decl_copies
, bool copy_name_p
)
803 tree copy
, var
, var_copy
;
804 unsigned idx
, uid
, nuid
;
805 struct int_tree_map ielt
, *nielt
;
806 struct name_to_copy_elt elt
, *nelt
;
807 name_to_copy_elt
**slot
;
808 int_tree_map
**dslot
;
810 if (TREE_CODE (name
) != SSA_NAME
)
813 idx
= SSA_NAME_VERSION (name
);
815 slot
= name_copies
.find_slot_with_hash (&elt
, idx
,
816 copy_name_p
? INSERT
: NO_INSERT
);
818 return (*slot
)->new_name
;
822 copy
= duplicate_ssa_name (name
, NULL
);
823 nelt
= XNEW (struct name_to_copy_elt
);
825 nelt
->new_name
= copy
;
826 nelt
->field
= NULL_TREE
;
835 var
= SSA_NAME_VAR (name
);
839 uid
= DECL_UID (var
);
841 dslot
= decl_copies
.find_slot_with_hash (&ielt
, uid
, INSERT
);
844 var_copy
= create_tmp_var (TREE_TYPE (var
), get_name (var
));
845 DECL_GIMPLE_REG_P (var_copy
) = DECL_GIMPLE_REG_P (var
);
846 nielt
= XNEW (struct int_tree_map
);
848 nielt
->to
= var_copy
;
851 /* Ensure that when we meet this decl next time, we won't duplicate
853 nuid
= DECL_UID (var_copy
);
855 dslot
= decl_copies
.find_slot_with_hash (&ielt
, nuid
, INSERT
);
856 gcc_assert (!*dslot
);
857 nielt
= XNEW (struct int_tree_map
);
859 nielt
->to
= var_copy
;
863 var_copy
= ((struct int_tree_map
*) *dslot
)->to
;
865 replace_ssa_name_symbol (copy
, var_copy
);
869 /* Finds the ssa names used in STMT that are defined outside the
870 region between ENTRY and EXIT and replaces such ssa names with
871 their duplicates. The duplicates are stored to NAME_COPIES. Base
872 decls of all ssa names used in STMT (including those defined in
873 LOOP) are replaced with the new temporary variables; the
874 replacement decls are stored in DECL_COPIES. */
877 separate_decls_in_region_stmt (edge entry
, edge exit
, gimple stmt
,
878 name_to_copy_table_type name_copies
,
879 int_tree_htab_type decl_copies
)
887 FOR_EACH_PHI_OR_STMT_DEF (def
, stmt
, oi
, SSA_OP_DEF
)
889 name
= DEF_FROM_PTR (def
);
890 gcc_assert (TREE_CODE (name
) == SSA_NAME
);
891 copy
= separate_decls_in_region_name (name
, name_copies
, decl_copies
,
893 gcc_assert (copy
== name
);
896 FOR_EACH_PHI_OR_STMT_USE (use
, stmt
, oi
, SSA_OP_USE
)
898 name
= USE_FROM_PTR (use
);
899 if (TREE_CODE (name
) != SSA_NAME
)
902 copy_name_p
= expr_invariant_in_region_p (entry
, exit
, name
);
903 copy
= separate_decls_in_region_name (name
, name_copies
, decl_copies
,
909 /* Finds the ssa names used in STMT that are defined outside the
910 region between ENTRY and EXIT and replaces such ssa names with
911 their duplicates. The duplicates are stored to NAME_COPIES. Base
912 decls of all ssa names used in STMT (including those defined in
913 LOOP) are replaced with the new temporary variables; the
914 replacement decls are stored in DECL_COPIES. */
917 separate_decls_in_region_debug (gimple stmt
,
918 name_to_copy_table_type name_copies
,
919 int_tree_htab_type decl_copies
)
924 struct int_tree_map ielt
;
925 struct name_to_copy_elt elt
;
926 name_to_copy_elt
**slot
;
927 int_tree_map
**dslot
;
929 if (gimple_debug_bind_p (stmt
))
930 var
= gimple_debug_bind_get_var (stmt
);
931 else if (gimple_debug_source_bind_p (stmt
))
932 var
= gimple_debug_source_bind_get_var (stmt
);
935 if (TREE_CODE (var
) == DEBUG_EXPR_DECL
|| TREE_CODE (var
) == LABEL_DECL
)
937 gcc_assert (DECL_P (var
) && SSA_VAR_P (var
));
938 ielt
.uid
= DECL_UID (var
);
939 dslot
= decl_copies
.find_slot_with_hash (&ielt
, ielt
.uid
, NO_INSERT
);
942 if (gimple_debug_bind_p (stmt
))
943 gimple_debug_bind_set_var (stmt
, ((struct int_tree_map
*) *dslot
)->to
);
944 else if (gimple_debug_source_bind_p (stmt
))
945 gimple_debug_source_bind_set_var (stmt
, ((struct int_tree_map
*) *dslot
)->to
);
947 FOR_EACH_PHI_OR_STMT_USE (use
, stmt
, oi
, SSA_OP_USE
)
949 name
= USE_FROM_PTR (use
);
950 if (TREE_CODE (name
) != SSA_NAME
)
953 elt
.version
= SSA_NAME_VERSION (name
);
954 slot
= name_copies
.find_slot_with_hash (&elt
, elt
.version
, NO_INSERT
);
957 gimple_debug_bind_reset_value (stmt
);
962 SET_USE (use
, (*slot
)->new_name
);
968 /* Callback for htab_traverse. Adds a field corresponding to the reduction
969 specified in SLOT. The type is passed in DATA. */
972 add_field_for_reduction (reduction_info
**slot
, tree type
)
975 struct reduction_info
*const red
= *slot
;
976 tree var
= gimple_assign_lhs (red
->reduc_stmt
);
977 tree field
= build_decl (gimple_location (red
->reduc_stmt
), FIELD_DECL
,
978 SSA_NAME_IDENTIFIER (var
), TREE_TYPE (var
));
980 insert_field_into_struct (type
, field
);
987 /* Callback for htab_traverse. Adds a field corresponding to a ssa name
988 described in SLOT. The type is passed in DATA. */
991 add_field_for_name (name_to_copy_elt
**slot
, tree type
)
993 struct name_to_copy_elt
*const elt
= *slot
;
994 tree name
= ssa_name (elt
->version
);
995 tree field
= build_decl (UNKNOWN_LOCATION
,
996 FIELD_DECL
, SSA_NAME_IDENTIFIER (name
),
999 insert_field_into_struct (type
, field
);
1005 /* Callback for htab_traverse. A local result is the intermediate result
1006 computed by a single
1007 thread, or the initial value in case no iteration was executed.
1008 This function creates a phi node reflecting these values.
1009 The phi's result will be stored in NEW_PHI field of the
1010 reduction's data structure. */
1013 create_phi_for_local_result (reduction_info
**slot
, struct loop
*loop
)
1015 struct reduction_info
*const reduc
= *slot
;
1018 basic_block store_bb
;
1020 source_location locus
;
1022 /* STORE_BB is the block where the phi
1023 should be stored. It is the destination of the loop exit.
1024 (Find the fallthru edge from GIMPLE_OMP_CONTINUE). */
1025 store_bb
= FALLTHRU_EDGE (loop
->latch
)->dest
;
1027 /* STORE_BB has two predecessors. One coming from the loop
1028 (the reduction's result is computed at the loop),
1029 and another coming from a block preceding the loop,
1031 are executed (the initial value should be taken). */
1032 if (EDGE_PRED (store_bb
, 0) == FALLTHRU_EDGE (loop
->latch
))
1033 e
= EDGE_PRED (store_bb
, 1);
1035 e
= EDGE_PRED (store_bb
, 0);
1036 local_res
= copy_ssa_name (gimple_assign_lhs (reduc
->reduc_stmt
), NULL
);
1037 locus
= gimple_location (reduc
->reduc_stmt
);
1038 new_phi
= create_phi_node (local_res
, store_bb
);
1039 add_phi_arg (new_phi
, reduc
->init
, e
, locus
);
1040 add_phi_arg (new_phi
, gimple_assign_lhs (reduc
->reduc_stmt
),
1041 FALLTHRU_EDGE (loop
->latch
), locus
);
1042 reduc
->new_phi
= new_phi
;
1052 basic_block store_bb
;
1053 basic_block load_bb
;
1056 /* Callback for htab_traverse. Create an atomic instruction for the
1057 reduction described in SLOT.
1058 DATA annotates the place in memory the atomic operation relates to,
1059 and the basic block it needs to be generated in. */
1062 create_call_for_reduction_1 (reduction_info
**slot
, struct clsn_data
*clsn_data
)
1064 struct reduction_info
*const reduc
= *slot
;
1065 gimple_stmt_iterator gsi
;
1066 tree type
= TREE_TYPE (PHI_RESULT (reduc
->reduc_phi
));
1071 tree t
, addr
, ref
, x
;
1072 tree tmp_load
, name
;
1075 load_struct
= build_simple_mem_ref (clsn_data
->load
);
1076 t
= build3 (COMPONENT_REF
, type
, load_struct
, reduc
->field
, NULL_TREE
);
1078 addr
= build_addr (t
, current_function_decl
);
1080 /* Create phi node. */
1081 bb
= clsn_data
->load_bb
;
1083 e
= split_block (bb
, t
);
1086 tmp_load
= create_tmp_var (TREE_TYPE (TREE_TYPE (addr
)), NULL
);
1087 tmp_load
= make_ssa_name (tmp_load
, NULL
);
1088 load
= gimple_build_omp_atomic_load (tmp_load
, addr
);
1089 SSA_NAME_DEF_STMT (tmp_load
) = load
;
1090 gsi
= gsi_start_bb (new_bb
);
1091 gsi_insert_after (&gsi
, load
, GSI_NEW_STMT
);
1093 e
= split_block (new_bb
, load
);
1095 gsi
= gsi_start_bb (new_bb
);
1097 x
= fold_build2 (reduc
->reduction_code
,
1098 TREE_TYPE (PHI_RESULT (reduc
->new_phi
)), ref
,
1099 PHI_RESULT (reduc
->new_phi
));
1101 name
= force_gimple_operand_gsi (&gsi
, x
, true, NULL_TREE
, true,
1102 GSI_CONTINUE_LINKING
);
1104 gsi_insert_after (&gsi
, gimple_build_omp_atomic_store (name
), GSI_NEW_STMT
);
1108 /* Create the atomic operation at the join point of the threads.
1109 REDUCTION_LIST describes the reductions in the LOOP.
1110 LD_ST_DATA describes the shared data structure where
1111 shared data is stored in and loaded from. */
1113 create_call_for_reduction (struct loop
*loop
,
1114 reduction_info_table_type reduction_list
,
1115 struct clsn_data
*ld_st_data
)
1117 reduction_list
.traverse
<struct loop
*, create_phi_for_local_result
> (loop
);
1118 /* Find the fallthru edge from GIMPLE_OMP_CONTINUE. */
1119 ld_st_data
->load_bb
= FALLTHRU_EDGE (loop
->latch
)->dest
;
1121 .traverse
<struct clsn_data
*, create_call_for_reduction_1
> (ld_st_data
);
1124 /* Callback for htab_traverse. Loads the final reduction value at the
1125 join point of all threads, and inserts it in the right place. */
1128 create_loads_for_reductions (reduction_info
**slot
, struct clsn_data
*clsn_data
)
1130 struct reduction_info
*const red
= *slot
;
1132 gimple_stmt_iterator gsi
;
1133 tree type
= TREE_TYPE (gimple_assign_lhs (red
->reduc_stmt
));
1138 gsi
= gsi_after_labels (clsn_data
->load_bb
);
1139 load_struct
= build_simple_mem_ref (clsn_data
->load
);
1140 load_struct
= build3 (COMPONENT_REF
, type
, load_struct
, red
->field
,
1144 name
= PHI_RESULT (red
->keep_res
);
1145 stmt
= gimple_build_assign (name
, x
);
1146 SSA_NAME_DEF_STMT (name
) = stmt
;
1148 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1150 for (gsi
= gsi_start_phis (gimple_bb (red
->keep_res
));
1151 !gsi_end_p (gsi
); gsi_next (&gsi
))
1152 if (gsi_stmt (gsi
) == red
->keep_res
)
1154 remove_phi_node (&gsi
, false);
1160 /* Load the reduction result that was stored in LD_ST_DATA.
1161 REDUCTION_LIST describes the list of reductions that the
1162 loads should be generated for. */
1164 create_final_loads_for_reduction (reduction_info_table_type reduction_list
,
1165 struct clsn_data
*ld_st_data
)
1167 gimple_stmt_iterator gsi
;
1171 gsi
= gsi_after_labels (ld_st_data
->load_bb
);
1172 t
= build_fold_addr_expr (ld_st_data
->store
);
1173 stmt
= gimple_build_assign (ld_st_data
->load
, t
);
1175 gsi_insert_before (&gsi
, stmt
, GSI_NEW_STMT
);
1176 SSA_NAME_DEF_STMT (ld_st_data
->load
) = stmt
;
1179 .traverse
<struct clsn_data
*, create_loads_for_reductions
> (ld_st_data
);
1183 /* Callback for htab_traverse. Store the neutral value for the
1184 particular reduction's operation, e.g. 0 for PLUS_EXPR,
1185 1 for MULT_EXPR, etc. into the reduction field.
1186 The reduction is specified in SLOT. The store information is
1190 create_stores_for_reduction (reduction_info
**slot
, struct clsn_data
*clsn_data
)
1192 struct reduction_info
*const red
= *slot
;
1195 gimple_stmt_iterator gsi
;
1196 tree type
= TREE_TYPE (gimple_assign_lhs (red
->reduc_stmt
));
1198 gsi
= gsi_last_bb (clsn_data
->store_bb
);
1199 t
= build3 (COMPONENT_REF
, type
, clsn_data
->store
, red
->field
, NULL_TREE
);
1200 stmt
= gimple_build_assign (t
, red
->initial_value
);
1201 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1206 /* Callback for htab_traverse. Creates loads to a field of LOAD in LOAD_BB and
1207 store to a field of STORE in STORE_BB for the ssa name and its duplicate
1208 specified in SLOT. */
1211 create_loads_and_stores_for_name (name_to_copy_elt
**slot
,
1212 struct clsn_data
*clsn_data
)
1214 struct name_to_copy_elt
*const elt
= *slot
;
1217 gimple_stmt_iterator gsi
;
1218 tree type
= TREE_TYPE (elt
->new_name
);
1221 gsi
= gsi_last_bb (clsn_data
->store_bb
);
1222 t
= build3 (COMPONENT_REF
, type
, clsn_data
->store
, elt
->field
, NULL_TREE
);
1223 stmt
= gimple_build_assign (t
, ssa_name (elt
->version
));
1224 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1226 gsi
= gsi_last_bb (clsn_data
->load_bb
);
1227 load_struct
= build_simple_mem_ref (clsn_data
->load
);
1228 t
= build3 (COMPONENT_REF
, type
, load_struct
, elt
->field
, NULL_TREE
);
1229 stmt
= gimple_build_assign (elt
->new_name
, t
);
1230 SSA_NAME_DEF_STMT (elt
->new_name
) = stmt
;
1231 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1236 /* Moves all the variables used in LOOP and defined outside of it (including
1237 the initial values of loop phi nodes, and *PER_THREAD if it is a ssa
1238 name) to a structure created for this purpose. The code
1246 is transformed this way:
1261 `old' is stored to *ARG_STRUCT and `new' is stored to NEW_ARG_STRUCT. The
1262 pointer `new' is intentionally not initialized (the loop will be split to a
1263 separate function later, and `new' will be initialized from its arguments).
1264 LD_ST_DATA holds information about the shared data structure used to pass
1265 information among the threads. It is initialized here, and
1266 gen_parallel_loop will pass it to create_call_for_reduction that
1267 needs this information. REDUCTION_LIST describes the reductions
1271 separate_decls_in_region (edge entry
, edge exit
,
1272 reduction_info_table_type reduction_list
,
1273 tree
*arg_struct
, tree
*new_arg_struct
,
1274 struct clsn_data
*ld_st_data
)
1277 basic_block bb1
= split_edge (entry
);
1278 basic_block bb0
= single_pred (bb1
);
1279 name_to_copy_table_type name_copies
;
1280 name_copies
.create (10);
1281 int_tree_htab_type decl_copies
;
1282 decl_copies
.create (10);
1284 tree type
, type_name
, nvar
;
1285 gimple_stmt_iterator gsi
;
1286 struct clsn_data clsn_data
;
1287 vec
<basic_block
> body
;
1290 basic_block entry_bb
= bb1
;
1291 basic_block exit_bb
= exit
->dest
;
1292 bool has_debug_stmt
= false;
1294 entry
= single_succ_edge (entry_bb
);
1295 gather_blocks_in_sese_region (entry_bb
, exit_bb
, &body
);
1297 FOR_EACH_VEC_ELT (body
, i
, bb
)
1299 if (bb
!= entry_bb
&& bb
!= exit_bb
)
1301 for (gsi
= gsi_start_phis (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1302 separate_decls_in_region_stmt (entry
, exit
, gsi_stmt (gsi
),
1303 name_copies
, decl_copies
);
1305 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1307 gimple stmt
= gsi_stmt (gsi
);
1309 if (is_gimple_debug (stmt
))
1310 has_debug_stmt
= true;
1312 separate_decls_in_region_stmt (entry
, exit
, stmt
,
1313 name_copies
, decl_copies
);
1318 /* Now process debug bind stmts. We must not create decls while
1319 processing debug stmts, so we defer their processing so as to
1320 make sure we will have debug info for as many variables as
1321 possible (all of those that were dealt with in the loop above),
1322 and discard those for which we know there's nothing we can
1325 FOR_EACH_VEC_ELT (body
, i
, bb
)
1326 if (bb
!= entry_bb
&& bb
!= exit_bb
)
1328 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
);)
1330 gimple stmt
= gsi_stmt (gsi
);
1332 if (is_gimple_debug (stmt
))
1334 if (separate_decls_in_region_debug (stmt
, name_copies
,
1337 gsi_remove (&gsi
, true);
1348 if (name_copies
.elements () == 0 && reduction_list
.elements () == 0)
1350 /* It may happen that there is nothing to copy (if there are only
1351 loop carried and external variables in the loop). */
1353 *new_arg_struct
= NULL
;
1357 /* Create the type for the structure to store the ssa names to. */
1358 type
= lang_hooks
.types
.make_type (RECORD_TYPE
);
1359 type_name
= build_decl (UNKNOWN_LOCATION
,
1360 TYPE_DECL
, create_tmp_var_name (".paral_data"),
1362 TYPE_NAME (type
) = type_name
;
1364 name_copies
.traverse
<tree
, add_field_for_name
> (type
);
1365 if (reduction_list
.is_created () && reduction_list
.elements () > 0)
1367 /* Create the fields for reductions. */
1368 reduction_list
.traverse
<tree
, add_field_for_reduction
> (type
);
1372 /* Create the loads and stores. */
1373 *arg_struct
= create_tmp_var (type
, ".paral_data_store");
1374 nvar
= create_tmp_var (build_pointer_type (type
), ".paral_data_load");
1375 *new_arg_struct
= make_ssa_name (nvar
, NULL
);
1377 ld_st_data
->store
= *arg_struct
;
1378 ld_st_data
->load
= *new_arg_struct
;
1379 ld_st_data
->store_bb
= bb0
;
1380 ld_st_data
->load_bb
= bb1
;
1383 .traverse
<struct clsn_data
*, create_loads_and_stores_for_name
>
1386 /* Load the calculation from memory (after the join of the threads). */
1388 if (reduction_list
.is_created () && reduction_list
.elements () > 0)
1391 .traverse
<struct clsn_data
*, create_stores_for_reduction
>
1393 clsn_data
.load
= make_ssa_name (nvar
, NULL
);
1394 clsn_data
.load_bb
= exit
->dest
;
1395 clsn_data
.store
= ld_st_data
->store
;
1396 create_final_loads_for_reduction (reduction_list
, &clsn_data
);
1400 decl_copies
.dispose ();
1401 name_copies
.dispose ();
1404 /* Bitmap containing uids of functions created by parallelization. We cannot
1405 allocate it from the default obstack, as it must live across compilation
1406 of several functions; we make it gc allocated instead. */
1408 static GTY(()) bitmap parallelized_functions
;
1410 /* Returns true if FN was created by create_loop_fn. */
1413 parallelized_function_p (tree fn
)
1415 if (!parallelized_functions
|| !DECL_ARTIFICIAL (fn
))
1418 return bitmap_bit_p (parallelized_functions
, DECL_UID (fn
));
1421 /* Creates and returns an empty function that will receive the body of
1422 a parallelized loop. */
1425 create_loop_fn (location_t loc
)
1429 tree decl
, type
, name
, t
;
1430 struct function
*act_cfun
= cfun
;
1431 static unsigned loopfn_num
;
1433 loc
= LOCATION_LOCUS (loc
);
1434 snprintf (buf
, 100, "%s.$loopfn", current_function_name ());
1435 ASM_FORMAT_PRIVATE_NAME (tname
, buf
, loopfn_num
++);
1436 clean_symbol_name (tname
);
1437 name
= get_identifier (tname
);
1438 type
= build_function_type_list (void_type_node
, ptr_type_node
, NULL_TREE
);
1440 decl
= build_decl (loc
, FUNCTION_DECL
, name
, type
);
1441 if (!parallelized_functions
)
1442 parallelized_functions
= BITMAP_GGC_ALLOC ();
1443 bitmap_set_bit (parallelized_functions
, DECL_UID (decl
));
1445 TREE_STATIC (decl
) = 1;
1446 TREE_USED (decl
) = 1;
1447 DECL_ARTIFICIAL (decl
) = 1;
1448 DECL_IGNORED_P (decl
) = 0;
1449 TREE_PUBLIC (decl
) = 0;
1450 DECL_UNINLINABLE (decl
) = 1;
1451 DECL_EXTERNAL (decl
) = 0;
1452 DECL_CONTEXT (decl
) = NULL_TREE
;
1453 DECL_INITIAL (decl
) = make_node (BLOCK
);
1455 t
= build_decl (loc
, RESULT_DECL
, NULL_TREE
, void_type_node
);
1456 DECL_ARTIFICIAL (t
) = 1;
1457 DECL_IGNORED_P (t
) = 1;
1458 DECL_RESULT (decl
) = t
;
1460 t
= build_decl (loc
, PARM_DECL
, get_identifier (".paral_data_param"),
1462 DECL_ARTIFICIAL (t
) = 1;
1463 DECL_ARG_TYPE (t
) = ptr_type_node
;
1464 DECL_CONTEXT (t
) = decl
;
1466 DECL_ARGUMENTS (decl
) = t
;
1468 allocate_struct_function (decl
, false);
1470 /* The call to allocate_struct_function clobbers CFUN, so we need to restore
1472 set_cfun (act_cfun
);
1477 /* Moves the exit condition of LOOP to the beginning of its header, and
1478 duplicates the part of the last iteration that gets disabled to the
1479 exit of the loop. NIT is the number of iterations of the loop
1480 (used to initialize the variables in the duplicated part).
1482 TODO: the common case is that latch of the loop is empty and immediately
1483 follows the loop exit. In this case, it would be better not to copy the
1484 body of the loop, but only move the entry of the loop directly before the
1485 exit check and increase the number of iterations of the loop by one.
1486 This may need some additional preconditioning in case NIT = ~0.
1487 REDUCTION_LIST describes the reductions in LOOP. */
1490 transform_to_exit_first_loop (struct loop
*loop
,
1491 reduction_info_table_type reduction_list
,
1494 basic_block
*bbs
, *nbbs
, ex_bb
, orig_header
;
1497 edge exit
= single_dom_exit (loop
), hpred
;
1498 tree control
, control_name
, res
, t
;
1499 gimple phi
, nphi
, cond_stmt
, stmt
, cond_nit
;
1500 gimple_stmt_iterator gsi
;
1503 split_block_after_labels (loop
->header
);
1504 orig_header
= single_succ (loop
->header
);
1505 hpred
= single_succ_edge (loop
->header
);
1507 cond_stmt
= last_stmt (exit
->src
);
1508 control
= gimple_cond_lhs (cond_stmt
);
1509 gcc_assert (gimple_cond_rhs (cond_stmt
) == nit
);
1511 /* Make sure that we have phi nodes on exit for all loop header phis
1512 (create_parallel_loop requires that). */
1513 for (gsi
= gsi_start_phis (loop
->header
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1515 phi
= gsi_stmt (gsi
);
1516 res
= PHI_RESULT (phi
);
1517 t
= copy_ssa_name (res
, phi
);
1518 SET_PHI_RESULT (phi
, t
);
1519 nphi
= create_phi_node (res
, orig_header
);
1520 add_phi_arg (nphi
, t
, hpred
, UNKNOWN_LOCATION
);
1524 gimple_cond_set_lhs (cond_stmt
, t
);
1525 update_stmt (cond_stmt
);
1530 bbs
= get_loop_body_in_dom_order (loop
);
1532 for (n
= 0; bbs
[n
] != exit
->src
; n
++)
1534 nbbs
= XNEWVEC (basic_block
, n
);
1535 ok
= gimple_duplicate_sese_tail (single_succ_edge (loop
->header
), exit
,
1542 /* Other than reductions, the only gimple reg that should be copied
1543 out of the loop is the control variable. */
1544 exit
= single_dom_exit (loop
);
1545 control_name
= NULL_TREE
;
1546 for (gsi
= gsi_start_phis (ex_bb
); !gsi_end_p (gsi
); )
1548 phi
= gsi_stmt (gsi
);
1549 res
= PHI_RESULT (phi
);
1550 if (virtual_operand_p (res
))
1556 /* Check if it is a part of reduction. If it is,
1557 keep the phi at the reduction's keep_res field. The
1558 PHI_RESULT of this phi is the resulting value of the reduction
1559 variable when exiting the loop. */
1561 if (reduction_list
.elements () > 0)
1563 struct reduction_info
*red
;
1565 tree val
= PHI_ARG_DEF_FROM_EDGE (phi
, exit
);
1566 red
= reduction_phi (reduction_list
, SSA_NAME_DEF_STMT (val
));
1569 red
->keep_res
= phi
;
1574 gcc_assert (control_name
== NULL_TREE
1575 && SSA_NAME_VAR (res
) == SSA_NAME_VAR (control
));
1577 remove_phi_node (&gsi
, false);
1579 gcc_assert (control_name
!= NULL_TREE
);
1581 /* Initialize the control variable to number of iterations
1582 according to the rhs of the exit condition. */
1583 gsi
= gsi_after_labels (ex_bb
);
1584 cond_nit
= last_stmt (exit
->src
);
1585 nit_1
= gimple_cond_rhs (cond_nit
);
1586 nit_1
= force_gimple_operand_gsi (&gsi
,
1587 fold_convert (TREE_TYPE (control_name
), nit_1
),
1588 false, NULL_TREE
, false, GSI_SAME_STMT
);
1589 stmt
= gimple_build_assign (control_name
, nit_1
);
1590 gsi_insert_before (&gsi
, stmt
, GSI_NEW_STMT
);
1591 SSA_NAME_DEF_STMT (control_name
) = stmt
;
1594 /* Create the parallel constructs for LOOP as described in gen_parallel_loop.
1595 LOOP_FN and DATA are the arguments of GIMPLE_OMP_PARALLEL.
1596 NEW_DATA is the variable that should be initialized from the argument
1597 of LOOP_FN. N_THREADS is the requested number of threads. Returns the
1598 basic block containing GIMPLE_OMP_PARALLEL tree. */
1601 create_parallel_loop (struct loop
*loop
, tree loop_fn
, tree data
,
1602 tree new_data
, unsigned n_threads
, location_t loc
)
1604 gimple_stmt_iterator gsi
;
1605 basic_block bb
, paral_bb
, for_bb
, ex_bb
;
1607 gimple stmt
, for_stmt
, phi
, cond_stmt
;
1608 tree cvar
, cvar_init
, initvar
, cvar_next
, cvar_base
, type
;
1609 edge exit
, nexit
, guard
, end
, e
;
1611 /* Prepare the GIMPLE_OMP_PARALLEL statement. */
1612 bb
= loop_preheader_edge (loop
)->src
;
1613 paral_bb
= single_pred (bb
);
1614 gsi
= gsi_last_bb (paral_bb
);
1616 t
= build_omp_clause (loc
, OMP_CLAUSE_NUM_THREADS
);
1617 OMP_CLAUSE_NUM_THREADS_EXPR (t
)
1618 = build_int_cst (integer_type_node
, n_threads
);
1619 stmt
= gimple_build_omp_parallel (NULL
, t
, loop_fn
, data
);
1620 gimple_set_location (stmt
, loc
);
1622 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1624 /* Initialize NEW_DATA. */
1627 gsi
= gsi_after_labels (bb
);
1629 param
= make_ssa_name (DECL_ARGUMENTS (loop_fn
), NULL
);
1630 stmt
= gimple_build_assign (param
, build_fold_addr_expr (data
));
1631 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
1632 SSA_NAME_DEF_STMT (param
) = stmt
;
1634 stmt
= gimple_build_assign (new_data
,
1635 fold_convert (TREE_TYPE (new_data
), param
));
1636 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
1637 SSA_NAME_DEF_STMT (new_data
) = stmt
;
1640 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_PARALLEL. */
1641 bb
= split_loop_exit_edge (single_dom_exit (loop
));
1642 gsi
= gsi_last_bb (bb
);
1643 stmt
= gimple_build_omp_return (false);
1644 gimple_set_location (stmt
, loc
);
1645 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1647 /* Extract data for GIMPLE_OMP_FOR. */
1648 gcc_assert (loop
->header
== single_dom_exit (loop
)->src
);
1649 cond_stmt
= last_stmt (loop
->header
);
1651 cvar
= gimple_cond_lhs (cond_stmt
);
1652 cvar_base
= SSA_NAME_VAR (cvar
);
1653 phi
= SSA_NAME_DEF_STMT (cvar
);
1654 cvar_init
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_preheader_edge (loop
));
1655 initvar
= copy_ssa_name (cvar
, NULL
);
1656 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi
, loop_preheader_edge (loop
)),
1658 cvar_next
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (loop
));
1660 gsi
= gsi_last_nondebug_bb (loop
->latch
);
1661 gcc_assert (gsi_stmt (gsi
) == SSA_NAME_DEF_STMT (cvar_next
));
1662 gsi_remove (&gsi
, true);
1665 for_bb
= split_edge (loop_preheader_edge (loop
));
1666 ex_bb
= split_loop_exit_edge (single_dom_exit (loop
));
1667 extract_true_false_edges_from_block (loop
->header
, &nexit
, &exit
);
1668 gcc_assert (exit
== single_dom_exit (loop
));
1670 guard
= make_edge (for_bb
, ex_bb
, 0);
1671 single_succ_edge (loop
->latch
)->flags
= 0;
1672 end
= make_edge (loop
->latch
, ex_bb
, EDGE_FALLTHRU
);
1673 for (gsi
= gsi_start_phis (ex_bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1675 source_location locus
;
1677 phi
= gsi_stmt (gsi
);
1678 stmt
= SSA_NAME_DEF_STMT (PHI_ARG_DEF_FROM_EDGE (phi
, exit
));
1680 def
= PHI_ARG_DEF_FROM_EDGE (stmt
, loop_preheader_edge (loop
));
1681 locus
= gimple_phi_arg_location_from_edge (stmt
,
1682 loop_preheader_edge (loop
));
1683 add_phi_arg (phi
, def
, guard
, locus
);
1685 def
= PHI_ARG_DEF_FROM_EDGE (stmt
, loop_latch_edge (loop
));
1686 locus
= gimple_phi_arg_location_from_edge (stmt
, loop_latch_edge (loop
));
1687 add_phi_arg (phi
, def
, end
, locus
);
1689 e
= redirect_edge_and_branch (exit
, nexit
->dest
);
1690 PENDING_STMT (e
) = NULL
;
1692 /* Emit GIMPLE_OMP_FOR. */
1693 gimple_cond_set_lhs (cond_stmt
, cvar_base
);
1694 type
= TREE_TYPE (cvar
);
1695 t
= build_omp_clause (loc
, OMP_CLAUSE_SCHEDULE
);
1696 OMP_CLAUSE_SCHEDULE_KIND (t
) = OMP_CLAUSE_SCHEDULE_STATIC
;
1698 for_stmt
= gimple_build_omp_for (NULL
, GF_OMP_FOR_KIND_FOR
, t
, 1, NULL
);
1699 gimple_set_location (for_stmt
, loc
);
1700 gimple_omp_for_set_index (for_stmt
, 0, initvar
);
1701 gimple_omp_for_set_initial (for_stmt
, 0, cvar_init
);
1702 gimple_omp_for_set_final (for_stmt
, 0, gimple_cond_rhs (cond_stmt
));
1703 gimple_omp_for_set_cond (for_stmt
, 0, gimple_cond_code (cond_stmt
));
1704 gimple_omp_for_set_incr (for_stmt
, 0, build2 (PLUS_EXPR
, type
,
1706 build_int_cst (type
, 1)));
1708 gsi
= gsi_last_bb (for_bb
);
1709 gsi_insert_after (&gsi
, for_stmt
, GSI_NEW_STMT
);
1710 SSA_NAME_DEF_STMT (initvar
) = for_stmt
;
1712 /* Emit GIMPLE_OMP_CONTINUE. */
1713 gsi
= gsi_last_bb (loop
->latch
);
1714 stmt
= gimple_build_omp_continue (cvar_next
, cvar
);
1715 gimple_set_location (stmt
, loc
);
1716 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1717 SSA_NAME_DEF_STMT (cvar_next
) = stmt
;
1719 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_FOR. */
1720 gsi
= gsi_last_bb (ex_bb
);
1721 stmt
= gimple_build_omp_return (true);
1722 gimple_set_location (stmt
, loc
);
1723 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1725 /* After the above dom info is hosed. Re-compute it. */
1726 free_dominance_info (CDI_DOMINATORS
);
1727 calculate_dominance_info (CDI_DOMINATORS
);
1732 /* Generates code to execute the iterations of LOOP in N_THREADS
1733 threads in parallel.
1735 NITER describes number of iterations of LOOP.
1736 REDUCTION_LIST describes the reductions existent in the LOOP. */
1739 gen_parallel_loop (struct loop
*loop
, reduction_info_table_type reduction_list
,
1740 unsigned n_threads
, struct tree_niter_desc
*niter
)
1743 tree many_iterations_cond
, type
, nit
;
1744 tree arg_struct
, new_arg_struct
;
1746 basic_block parallel_head
;
1748 struct clsn_data clsn_data
;
1752 unsigned int m_p_thread
=2;
1756 ---------------------------------------------------------------------
1759 IV = phi (INIT, IV + STEP)
1765 ---------------------------------------------------------------------
1767 with # of iterations NITER (possibly with MAY_BE_ZERO assumption),
1768 we generate the following code:
1770 ---------------------------------------------------------------------
1773 || NITER < MIN_PER_THREAD * N_THREADS)
1777 store all local loop-invariant variables used in body of the loop to DATA.
1778 GIMPLE_OMP_PARALLEL (OMP_CLAUSE_NUM_THREADS (N_THREADS), LOOPFN, DATA);
1779 load the variables from DATA.
1780 GIMPLE_OMP_FOR (IV = INIT; COND; IV += STEP) (OMP_CLAUSE_SCHEDULE (static))
1783 GIMPLE_OMP_CONTINUE;
1784 GIMPLE_OMP_RETURN -- GIMPLE_OMP_FOR
1785 GIMPLE_OMP_RETURN -- GIMPLE_OMP_PARALLEL
1791 IV = phi (INIT, IV + STEP)
1802 /* Create two versions of the loop -- in the old one, we know that the
1803 number of iterations is large enough, and we will transform it into the
1804 loop that will be split to loop_fn, the new one will be used for the
1805 remaining iterations. */
1807 /* We should compute a better number-of-iterations value for outer loops.
1810 for (i = 0; i < n; ++i)
1811 for (j = 0; j < m; ++j)
1814 we should compute nit = n * m, not nit = n.
1815 Also may_be_zero handling would need to be adjusted. */
1817 type
= TREE_TYPE (niter
->niter
);
1818 nit
= force_gimple_operand (unshare_expr (niter
->niter
), &stmts
, true,
1821 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
1826 m_p_thread
=MIN_PER_THREAD
;
1828 many_iterations_cond
=
1829 fold_build2 (GE_EXPR
, boolean_type_node
,
1830 nit
, build_int_cst (type
, m_p_thread
* n_threads
));
1832 many_iterations_cond
1833 = fold_build2 (TRUTH_AND_EXPR
, boolean_type_node
,
1834 invert_truthvalue (unshare_expr (niter
->may_be_zero
)),
1835 many_iterations_cond
);
1836 many_iterations_cond
1837 = force_gimple_operand (many_iterations_cond
, &stmts
, false, NULL_TREE
);
1839 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
1840 if (!is_gimple_condexpr (many_iterations_cond
))
1842 many_iterations_cond
1843 = force_gimple_operand (many_iterations_cond
, &stmts
,
1846 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
1849 initialize_original_copy_tables ();
1851 /* We assume that the loop usually iterates a lot. */
1852 prob
= 4 * REG_BR_PROB_BASE
/ 5;
1853 loop_version (loop
, many_iterations_cond
, NULL
,
1854 prob
, prob
, REG_BR_PROB_BASE
- prob
, true);
1855 update_ssa (TODO_update_ssa
);
1856 free_original_copy_tables ();
1858 /* Base all the induction variables in LOOP on a single control one. */
1859 canonicalize_loop_ivs (loop
, &nit
, true);
1861 /* Ensure that the exit condition is the first statement in the loop. */
1862 transform_to_exit_first_loop (loop
, reduction_list
, nit
);
1864 /* Generate initializations for reductions. */
1865 if (reduction_list
.elements () > 0)
1866 reduction_list
.traverse
<struct loop
*, initialize_reductions
> (loop
);
1868 /* Eliminate the references to local variables from the loop. */
1869 gcc_assert (single_exit (loop
));
1870 entry
= loop_preheader_edge (loop
);
1871 exit
= single_dom_exit (loop
);
1873 eliminate_local_variables (entry
, exit
);
1874 /* In the old loop, move all variables non-local to the loop to a structure
1875 and back, and create separate decls for the variables used in loop. */
1876 separate_decls_in_region (entry
, exit
, reduction_list
, &arg_struct
,
1877 &new_arg_struct
, &clsn_data
);
1879 /* Create the parallel constructs. */
1880 loc
= UNKNOWN_LOCATION
;
1881 cond_stmt
= last_stmt (loop
->header
);
1883 loc
= gimple_location (cond_stmt
);
1884 parallel_head
= create_parallel_loop (loop
, create_loop_fn (loc
), arg_struct
,
1885 new_arg_struct
, n_threads
, loc
);
1886 if (reduction_list
.elements () > 0)
1887 create_call_for_reduction (loop
, reduction_list
, &clsn_data
);
1891 /* Cancel the loop (it is simpler to do it here rather than to teach the
1892 expander to do it). */
1893 cancel_loop_tree (loop
);
1895 /* Free loop bound estimations that could contain references to
1896 removed statements. */
1897 FOR_EACH_LOOP (li
, loop
, 0)
1898 free_numbers_of_iterations_estimates_loop (loop
);
1900 /* Expand the parallel constructs. We do it directly here instead of running
1901 a separate expand_omp pass, since it is more efficient, and less likely to
1902 cause troubles with further analyses not being able to deal with the
1905 omp_expand_local (parallel_head
);
1908 /* Returns true when LOOP contains vector phi nodes. */
1911 loop_has_vector_phi_nodes (struct loop
*loop ATTRIBUTE_UNUSED
)
1914 basic_block
*bbs
= get_loop_body_in_dom_order (loop
);
1915 gimple_stmt_iterator gsi
;
1918 for (i
= 0; i
< loop
->num_nodes
; i
++)
1919 for (gsi
= gsi_start_phis (bbs
[i
]); !gsi_end_p (gsi
); gsi_next (&gsi
))
1920 if (TREE_CODE (TREE_TYPE (PHI_RESULT (gsi_stmt (gsi
)))) == VECTOR_TYPE
)
1929 /* Create a reduction_info struct, initialize it with REDUC_STMT
1930 and PHI, insert it to the REDUCTION_LIST. */
1933 build_new_reduction (reduction_info_table_type reduction_list
,
1934 gimple reduc_stmt
, gimple phi
)
1936 reduction_info
**slot
;
1937 struct reduction_info
*new_reduction
;
1939 gcc_assert (reduc_stmt
);
1941 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1944 "Detected reduction. reduction stmt is: \n");
1945 print_gimple_stmt (dump_file
, reduc_stmt
, 0, 0);
1946 fprintf (dump_file
, "\n");
1949 new_reduction
= XCNEW (struct reduction_info
);
1951 new_reduction
->reduc_stmt
= reduc_stmt
;
1952 new_reduction
->reduc_phi
= phi
;
1953 new_reduction
->reduc_version
= SSA_NAME_VERSION (gimple_phi_result (phi
));
1954 new_reduction
->reduction_code
= gimple_assign_rhs_code (reduc_stmt
);
1955 slot
= reduction_list
.find_slot (new_reduction
, INSERT
);
1956 *slot
= new_reduction
;
1959 /* Callback for htab_traverse. Sets gimple_uid of reduc_phi stmts. */
1962 set_reduc_phi_uids (reduction_info
**slot
, void *data ATTRIBUTE_UNUSED
)
1964 struct reduction_info
*const red
= *slot
;
1965 gimple_set_uid (red
->reduc_phi
, red
->reduc_version
);
1969 /* Detect all reductions in the LOOP, insert them into REDUCTION_LIST. */
1972 gather_scalar_reductions (loop_p loop
, reduction_info_table_type reduction_list
)
1974 gimple_stmt_iterator gsi
;
1975 loop_vec_info simple_loop_info
;
1977 simple_loop_info
= vect_analyze_loop_form (loop
);
1979 for (gsi
= gsi_start_phis (loop
->header
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1981 gimple phi
= gsi_stmt (gsi
);
1983 tree res
= PHI_RESULT (phi
);
1986 if (virtual_operand_p (res
))
1989 if (!simple_iv (loop
, loop
, res
, &iv
, true)
1990 && simple_loop_info
)
1992 gimple reduc_stmt
= vect_force_simple_reduction (simple_loop_info
,
1995 if (reduc_stmt
&& !double_reduc
)
1996 build_new_reduction (reduction_list
, reduc_stmt
, phi
);
1999 destroy_loop_vec_info (simple_loop_info
, true);
2001 /* As gimple_uid is used by the vectorizer in between vect_analyze_loop_form
2002 and destroy_loop_vec_info, we can set gimple_uid of reduc_phi stmts
2004 reduction_list
.traverse
<void *, set_reduc_phi_uids
> (NULL
);
2007 /* Try to initialize NITER for code generation part. */
2010 try_get_loop_niter (loop_p loop
, struct tree_niter_desc
*niter
)
2012 edge exit
= single_dom_exit (loop
);
2016 /* We need to know # of iterations, and there should be no uses of values
2017 defined inside loop outside of it, unless the values are invariants of
2019 if (!number_of_iterations_exit (loop
, exit
, niter
, false))
2021 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2022 fprintf (dump_file
, " FAILED: number of iterations not known\n");
2029 /* Try to initialize REDUCTION_LIST for code generation part.
2030 REDUCTION_LIST describes the reductions. */
2033 try_create_reduction_list (loop_p loop
,
2034 reduction_info_table_type reduction_list
)
2036 edge exit
= single_dom_exit (loop
);
2037 gimple_stmt_iterator gsi
;
2041 gather_scalar_reductions (loop
, reduction_list
);
2044 for (gsi
= gsi_start_phis (exit
->dest
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2046 gimple phi
= gsi_stmt (gsi
);
2047 struct reduction_info
*red
;
2048 imm_use_iterator imm_iter
;
2049 use_operand_p use_p
;
2051 tree val
= PHI_ARG_DEF_FROM_EDGE (phi
, exit
);
2053 if (!virtual_operand_p (val
))
2055 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2057 fprintf (dump_file
, "phi is ");
2058 print_gimple_stmt (dump_file
, phi
, 0, 0);
2059 fprintf (dump_file
, "arg of phi to exit: value ");
2060 print_generic_expr (dump_file
, val
, 0);
2061 fprintf (dump_file
, " used outside loop\n");
2063 " checking if it a part of reduction pattern: \n");
2065 if (reduction_list
.elements () == 0)
2067 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2069 " FAILED: it is not a part of reduction.\n");
2073 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, val
)
2075 if (!gimple_debug_bind_p (USE_STMT (use_p
))
2076 && flow_bb_inside_loop_p (loop
, gimple_bb (USE_STMT (use_p
))))
2078 reduc_phi
= USE_STMT (use_p
);
2082 red
= reduction_phi (reduction_list
, reduc_phi
);
2085 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2087 " FAILED: it is not a part of reduction.\n");
2090 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2092 fprintf (dump_file
, "reduction phi is ");
2093 print_gimple_stmt (dump_file
, red
->reduc_phi
, 0, 0);
2094 fprintf (dump_file
, "reduction stmt is ");
2095 print_gimple_stmt (dump_file
, red
->reduc_stmt
, 0, 0);
2100 /* The iterations of the loop may communicate only through bivs whose
2101 iteration space can be distributed efficiently. */
2102 for (gsi
= gsi_start_phis (loop
->header
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2104 gimple phi
= gsi_stmt (gsi
);
2105 tree def
= PHI_RESULT (phi
);
2108 if (!virtual_operand_p (def
) && !simple_iv (loop
, loop
, def
, &iv
, true))
2110 struct reduction_info
*red
;
2112 red
= reduction_phi (reduction_list
, phi
);
2115 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2117 " FAILED: scalar dependency between iterations\n");
2127 /* Detect parallel loops and generate parallel code using libgomp
2128 primitives. Returns true if some loop was parallelized, false
2132 parallelize_loops (void)
2134 unsigned n_threads
= flag_tree_parallelize_loops
;
2135 bool changed
= false;
2137 struct tree_niter_desc niter_desc
;
2139 reduction_info_table_type reduction_list
;
2140 struct obstack parloop_obstack
;
2141 HOST_WIDE_INT estimated
;
2144 /* Do not parallelize loops in the functions created by parallelization. */
2145 if (parallelized_function_p (cfun
->decl
))
2147 if (cfun
->has_nonlocal_label
)
2150 gcc_obstack_init (&parloop_obstack
);
2151 reduction_list
.create (10);
2152 init_stmt_vec_info_vec ();
2154 FOR_EACH_LOOP (li
, loop
, 0)
2156 reduction_list
.empty ();
2157 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2159 fprintf (dump_file
, "Trying loop %d as candidate\n",loop
->num
);
2161 fprintf (dump_file
, "loop %d is not innermost\n",loop
->num
);
2163 fprintf (dump_file
, "loop %d is innermost\n",loop
->num
);
2166 /* If we use autopar in graphite pass, we use its marked dependency
2167 checking results. */
2168 if (flag_loop_parallelize_all
&& !loop
->can_be_parallel
)
2170 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2171 fprintf (dump_file
, "loop is not parallel according to graphite\n");
2175 if (!single_dom_exit (loop
))
2178 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2179 fprintf (dump_file
, "loop is !single_dom_exit\n");
2184 if (/* And of course, the loop must be parallelizable. */
2185 !can_duplicate_loop_p (loop
)
2186 || loop_has_blocks_with_irreducible_flag (loop
)
2187 || (loop_preheader_edge (loop
)->src
->flags
& BB_IRREDUCIBLE_LOOP
)
2188 /* FIXME: the check for vector phi nodes could be removed. */
2189 || loop_has_vector_phi_nodes (loop
))
2192 estimated
= estimated_stmt_executions_int (loop
);
2193 if (estimated
== -1)
2194 estimated
= max_stmt_executions_int (loop
);
2195 /* FIXME: Bypass this check as graphite doesn't update the
2196 count and frequency correctly now. */
2197 if (!flag_loop_parallelize_all
2198 && ((estimated
!= -1
2199 && estimated
<= (HOST_WIDE_INT
) n_threads
* MIN_PER_THREAD
)
2200 /* Do not bother with loops in cold areas. */
2201 || optimize_loop_nest_for_size_p (loop
)))
2204 if (!try_get_loop_niter (loop
, &niter_desc
))
2207 if (!try_create_reduction_list (loop
, reduction_list
))
2210 if (!flag_loop_parallelize_all
2211 && !loop_parallel_p (loop
, &parloop_obstack
))
2215 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2218 fprintf (dump_file
, "parallelizing outer loop %d\n",loop
->header
->index
);
2220 fprintf (dump_file
, "parallelizing inner loop %d\n",loop
->header
->index
);
2221 loop_loc
= find_loop_location (loop
);
2222 if (loop_loc
!= UNKNOWN_LOC
)
2223 fprintf (dump_file
, "\nloop at %s:%d: ",
2224 LOC_FILE (loop_loc
), LOC_LINE (loop_loc
));
2226 gen_parallel_loop (loop
, reduction_list
,
2227 n_threads
, &niter_desc
);
2230 free_stmt_vec_info_vec ();
2231 reduction_list
.dispose ();
2232 obstack_free (&parloop_obstack
, NULL
);
2234 /* Parallelization will cause new function calls to be inserted through
2235 which local variables will escape. Reset the points-to solution
2238 pt_solution_reset (&cfun
->gimple_df
->escaped
);
2243 #include "gt-tree-parloops.h"