1 /* Loop autoparallelization.
2 Copyright (C) 2006-2013 Free Software Foundation, Inc.
3 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
4 Zdenek Dvorak <dvorakz@suse.cz> and Razya Ladelsky <razya@il.ibm.com>.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
25 #include "tree-flow.h"
27 #include "tree-data-ref.h"
28 #include "tree-scalar-evolution.h"
29 #include "gimple-pretty-print.h"
30 #include "tree-pass.h"
31 #include "langhooks.h"
32 #include "tree-vectorizer.h"
33 #include "tree-hasher.h"
35 /* This pass tries to distribute iterations of loops into several threads.
36 The implementation is straightforward -- for each loop we test whether its
37 iterations are independent, and if it is the case (and some additional
38 conditions regarding profitability and correctness are satisfied), we
39 add GIMPLE_OMP_PARALLEL and GIMPLE_OMP_FOR codes and let omp expansion
42 The most of the complexity is in bringing the code into shape expected
44 -- for GIMPLE_OMP_FOR, ensuring that the loop has only one induction
45 variable and that the exit test is at the start of the loop body
46 -- for GIMPLE_OMP_PARALLEL, replacing the references to local addressable
47 variables by accesses through pointers, and breaking up ssa chains
48 by storing the values incoming to the parallelized loop to a structure
49 passed to the new function as an argument (something similar is done
50 in omp gimplification, unfortunately only a small part of the code
54 -- if there are several parallelizable loops in a function, it may be
55 possible to generate the threads just once (using synchronization to
56 ensure that cross-loop dependences are obeyed).
57 -- handling of common reduction patterns for outer loops.
59 More info can also be found at http://gcc.gnu.org/wiki/AutoParInGCC */
62 currently we use vect_force_simple_reduction() to detect reduction patterns.
63 The code transformation will be introduced by an example.
70 for (i = 0; i < N; i++)
80 # sum_29 = PHI <sum_11(5), 1(3)>
81 # i_28 = PHI <i_12(5), 0(3)>
84 sum_11 = D.1795_8 + sum_29;
92 # sum_21 = PHI <sum_11(4)>
93 printf (&"%d"[0], sum_21);
96 after reduction transformation (only relevant parts):
104 # Storing the initial value given by the user. #
106 .paral_data_store.32.sum.27 = 1;
108 #pragma omp parallel num_threads(4)
110 #pragma omp for schedule(static)
112 # The neutral element corresponding to the particular
113 reduction's operation, e.g. 0 for PLUS_EXPR,
114 1 for MULT_EXPR, etc. replaces the user's initial value. #
116 # sum.27_29 = PHI <sum.27_11, 0>
118 sum.27_11 = D.1827_8 + sum.27_29;
122 # Adding this reduction phi is done at create_phi_for_local_result() #
123 # sum.27_56 = PHI <sum.27_11, 0>
126 # Creating the atomic operation is done at
127 create_call_for_reduction_1() #
129 #pragma omp atomic_load
130 D.1839_59 = *&.paral_data_load.33_51->reduction.23;
131 D.1840_60 = sum.27_56 + D.1839_59;
132 #pragma omp atomic_store (D.1840_60);
136 # collecting the result after the join of the threads is done at
137 create_loads_for_reductions().
138 The value computed by the threads is loaded from the
142 .paral_data_load.33_52 = &.paral_data_store.32;
143 sum_37 = .paral_data_load.33_52->sum.27;
144 sum_43 = D.1795_41 + sum_37;
147 # sum_21 = PHI <sum_43, sum_26>
148 printf (&"%d"[0], sum_21);
156 /* Minimal number of iterations of a loop that should be executed in each
158 #define MIN_PER_THREAD 100
160 /* Element of the hashtable, representing a
161 reduction in the current loop. */
162 struct reduction_info
164 gimple reduc_stmt
; /* reduction statement. */
165 gimple reduc_phi
; /* The phi node defining the reduction. */
166 enum tree_code reduction_code
;/* code for the reduction operation. */
167 unsigned reduc_version
; /* SSA_NAME_VERSION of original reduc_phi
169 gimple keep_res
; /* The PHI_RESULT of this phi is the resulting value
170 of the reduction variable when existing the loop. */
171 tree initial_value
; /* The initial value of the reduction var before entering the loop. */
172 tree field
; /* the name of the field in the parloop data structure intended for reduction. */
173 tree init
; /* reduction initialization value. */
174 gimple new_phi
; /* (helper field) Newly created phi node whose result
175 will be passed to the atomic operation. Represents
176 the local result each thread computed for the reduction
180 /* Reduction info hashtable helpers. */
182 struct reduction_hasher
: typed_free_remove
<reduction_info
>
184 typedef reduction_info value_type
;
185 typedef reduction_info compare_type
;
186 static inline hashval_t
hash (const value_type
*);
187 static inline bool equal (const value_type
*, const compare_type
*);
190 /* Equality and hash functions for hashtab code. */
193 reduction_hasher::equal (const value_type
*a
, const compare_type
*b
)
195 return (a
->reduc_phi
== b
->reduc_phi
);
199 reduction_hasher::hash (const value_type
*a
)
201 return a
->reduc_version
;
204 typedef hash_table
<reduction_hasher
> reduction_info_table_type
;
207 static struct reduction_info
*
208 reduction_phi (reduction_info_table_type reduction_list
, gimple phi
)
210 struct reduction_info tmpred
, *red
;
212 if (reduction_list
.elements () == 0 || phi
== NULL
)
215 tmpred
.reduc_phi
= phi
;
216 tmpred
.reduc_version
= gimple_uid (phi
);
217 red
= reduction_list
.find (&tmpred
);
222 /* Element of hashtable of names to copy. */
224 struct name_to_copy_elt
226 unsigned version
; /* The version of the name to copy. */
227 tree new_name
; /* The new name used in the copy. */
228 tree field
; /* The field of the structure used to pass the
232 /* Name copies hashtable helpers. */
234 struct name_to_copy_hasher
: typed_free_remove
<name_to_copy_elt
>
236 typedef name_to_copy_elt value_type
;
237 typedef name_to_copy_elt compare_type
;
238 static inline hashval_t
hash (const value_type
*);
239 static inline bool equal (const value_type
*, const compare_type
*);
242 /* Equality and hash functions for hashtab code. */
245 name_to_copy_hasher::equal (const value_type
*a
, const compare_type
*b
)
247 return a
->version
== b
->version
;
251 name_to_copy_hasher::hash (const value_type
*a
)
253 return (hashval_t
) a
->version
;
256 typedef hash_table
<name_to_copy_hasher
> name_to_copy_table_type
;
258 /* A transformation matrix, which is a self-contained ROWSIZE x COLSIZE
259 matrix. Rather than use floats, we simply keep a single DENOMINATOR that
260 represents the denominator for every element in the matrix. */
261 typedef struct lambda_trans_matrix_s
263 lambda_matrix matrix
;
267 } *lambda_trans_matrix
;
268 #define LTM_MATRIX(T) ((T)->matrix)
269 #define LTM_ROWSIZE(T) ((T)->rowsize)
270 #define LTM_COLSIZE(T) ((T)->colsize)
271 #define LTM_DENOMINATOR(T) ((T)->denominator)
273 /* Allocate a new transformation matrix. */
275 static lambda_trans_matrix
276 lambda_trans_matrix_new (int colsize
, int rowsize
,
277 struct obstack
* lambda_obstack
)
279 lambda_trans_matrix ret
;
281 ret
= (lambda_trans_matrix
)
282 obstack_alloc (lambda_obstack
, sizeof (struct lambda_trans_matrix_s
));
283 LTM_MATRIX (ret
) = lambda_matrix_new (rowsize
, colsize
, lambda_obstack
);
284 LTM_ROWSIZE (ret
) = rowsize
;
285 LTM_COLSIZE (ret
) = colsize
;
286 LTM_DENOMINATOR (ret
) = 1;
290 /* Multiply a vector VEC by a matrix MAT.
291 MAT is an M*N matrix, and VEC is a vector with length N. The result
292 is stored in DEST which must be a vector of length M. */
295 lambda_matrix_vector_mult (lambda_matrix matrix
, int m
, int n
,
296 lambda_vector vec
, lambda_vector dest
)
300 lambda_vector_clear (dest
, m
);
301 for (i
= 0; i
< m
; i
++)
302 for (j
= 0; j
< n
; j
++)
303 dest
[i
] += matrix
[i
][j
] * vec
[j
];
306 /* Return true if TRANS is a legal transformation matrix that respects
307 the dependence vectors in DISTS and DIRS. The conservative answer
310 "Wolfe proves that a unimodular transformation represented by the
311 matrix T is legal when applied to a loop nest with a set of
312 lexicographically non-negative distance vectors RDG if and only if
313 for each vector d in RDG, (T.d >= 0) is lexicographically positive.
314 i.e.: if and only if it transforms the lexicographically positive
315 distance vectors to lexicographically positive vectors. Note that
316 a unimodular matrix must transform the zero vector (and only it) to
317 the zero vector." S.Muchnick. */
320 lambda_transform_legal_p (lambda_trans_matrix trans
,
322 vec
<ddr_p
> dependence_relations
)
325 lambda_vector distres
;
326 struct data_dependence_relation
*ddr
;
328 gcc_assert (LTM_COLSIZE (trans
) == nb_loops
329 && LTM_ROWSIZE (trans
) == nb_loops
);
331 /* When there are no dependences, the transformation is correct. */
332 if (dependence_relations
.length () == 0)
335 ddr
= dependence_relations
[0];
339 /* When there is an unknown relation in the dependence_relations, we
340 know that it is no worth looking at this loop nest: give up. */
341 if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
344 distres
= lambda_vector_new (nb_loops
);
346 /* For each distance vector in the dependence graph. */
347 FOR_EACH_VEC_ELT (dependence_relations
, i
, ddr
)
349 /* Don't care about relations for which we know that there is no
350 dependence, nor about read-read (aka. output-dependences):
351 these data accesses can happen in any order. */
352 if (DDR_ARE_DEPENDENT (ddr
) == chrec_known
353 || (DR_IS_READ (DDR_A (ddr
)) && DR_IS_READ (DDR_B (ddr
))))
356 /* Conservatively answer: "this transformation is not valid". */
357 if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
360 /* If the dependence could not be captured by a distance vector,
361 conservatively answer that the transform is not valid. */
362 if (DDR_NUM_DIST_VECTS (ddr
) == 0)
365 /* Compute trans.dist_vect */
366 for (j
= 0; j
< DDR_NUM_DIST_VECTS (ddr
); j
++)
368 lambda_matrix_vector_mult (LTM_MATRIX (trans
), nb_loops
, nb_loops
,
369 DDR_DIST_VECT (ddr
, j
), distres
);
371 if (!lambda_vector_lexico_pos (distres
, nb_loops
))
378 /* Data dependency analysis. Returns true if the iterations of LOOP
379 are independent on each other (that is, if we can execute them
383 loop_parallel_p (struct loop
*loop
, struct obstack
* parloop_obstack
)
385 vec
<loop_p
> loop_nest
;
386 vec
<ddr_p
> dependence_relations
;
387 vec
<data_reference_p
> datarefs
;
388 lambda_trans_matrix trans
;
391 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
393 fprintf (dump_file
, "Considering loop %d\n", loop
->num
);
395 fprintf (dump_file
, "loop is innermost\n");
397 fprintf (dump_file
, "loop NOT innermost\n");
400 /* Check for problems with dependences. If the loop can be reversed,
401 the iterations are independent. */
402 datarefs
.create (10);
403 dependence_relations
.create (10 * 10);
404 loop_nest
.create (3);
405 if (! compute_data_dependences_for_loop (loop
, true, &loop_nest
, &datarefs
,
406 &dependence_relations
))
408 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
409 fprintf (dump_file
, " FAILED: cannot analyze data dependencies\n");
413 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
414 dump_data_dependence_relations (dump_file
, dependence_relations
);
416 trans
= lambda_trans_matrix_new (1, 1, parloop_obstack
);
417 LTM_MATRIX (trans
)[0][0] = -1;
419 if (lambda_transform_legal_p (trans
, 1, dependence_relations
))
422 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
423 fprintf (dump_file
, " SUCCESS: may be parallelized\n");
425 else if (dump_file
&& (dump_flags
& TDF_DETAILS
))
427 " FAILED: data dependencies exist across iterations\n");
430 loop_nest
.release ();
431 free_dependence_relations (dependence_relations
);
432 free_data_refs (datarefs
);
437 /* Return true when LOOP contains basic blocks marked with the
438 BB_IRREDUCIBLE_LOOP flag. */
441 loop_has_blocks_with_irreducible_flag (struct loop
*loop
)
444 basic_block
*bbs
= get_loop_body_in_dom_order (loop
);
447 for (i
= 0; i
< loop
->num_nodes
; i
++)
448 if (bbs
[i
]->flags
& BB_IRREDUCIBLE_LOOP
)
457 /* Assigns the address of OBJ in TYPE to an ssa name, and returns this name.
458 The assignment statement is placed on edge ENTRY. DECL_ADDRESS maps decls
459 to their addresses that can be reused. The address of OBJ is known to
460 be invariant in the whole function. Other needed statements are placed
464 take_address_of (tree obj
, tree type
, edge entry
,
465 int_tree_htab_type decl_address
, gimple_stmt_iterator
*gsi
)
468 int_tree_map
**dslot
;
469 struct int_tree_map ielt
, *nielt
;
470 tree
*var_p
, name
, addr
;
474 /* Since the address of OBJ is invariant, the trees may be shared.
475 Avoid rewriting unrelated parts of the code. */
476 obj
= unshare_expr (obj
);
478 handled_component_p (*var_p
);
479 var_p
= &TREE_OPERAND (*var_p
, 0))
482 /* Canonicalize the access to base on a MEM_REF. */
484 *var_p
= build_simple_mem_ref (build_fold_addr_expr (*var_p
));
486 /* Assign a canonical SSA name to the address of the base decl used
487 in the address and share it for all accesses and addresses based
489 uid
= DECL_UID (TREE_OPERAND (TREE_OPERAND (*var_p
, 0), 0));
491 dslot
= decl_address
.find_slot_with_hash (&ielt
, uid
, INSERT
);
496 addr
= TREE_OPERAND (*var_p
, 0);
497 name
= make_temp_ssa_name (TREE_TYPE (addr
), NULL
,
498 get_name (TREE_OPERAND
499 (TREE_OPERAND (*var_p
, 0), 0)));
500 stmt
= gimple_build_assign (name
, addr
);
501 gsi_insert_on_edge_immediate (entry
, stmt
);
503 nielt
= XNEW (struct int_tree_map
);
511 /* Express the address in terms of the canonical SSA name. */
512 TREE_OPERAND (*var_p
, 0) = name
;
514 return build_fold_addr_expr_with_type (obj
, type
);
516 name
= force_gimple_operand (build_addr (obj
, current_function_decl
),
517 &stmts
, true, NULL_TREE
);
518 if (!gimple_seq_empty_p (stmts
))
519 gsi_insert_seq_before (gsi
, stmts
, GSI_SAME_STMT
);
521 if (!useless_type_conversion_p (type
, TREE_TYPE (name
)))
523 name
= force_gimple_operand (fold_convert (type
, name
), &stmts
, true,
525 if (!gimple_seq_empty_p (stmts
))
526 gsi_insert_seq_before (gsi
, stmts
, GSI_SAME_STMT
);
532 /* Callback for htab_traverse. Create the initialization statement
533 for reduction described in SLOT, and place it at the preheader of
534 the loop described in DATA. */
537 initialize_reductions (reduction_info
**slot
, struct loop
*loop
)
540 tree bvar
, type
, arg
;
543 struct reduction_info
*const reduc
= *slot
;
545 /* Create initialization in preheader:
546 reduction_variable = initialization value of reduction. */
548 /* In the phi node at the header, replace the argument coming
549 from the preheader with the reduction initialization value. */
551 /* Create a new variable to initialize the reduction. */
552 type
= TREE_TYPE (PHI_RESULT (reduc
->reduc_phi
));
553 bvar
= create_tmp_var (type
, "reduction");
555 c
= build_omp_clause (gimple_location (reduc
->reduc_stmt
),
556 OMP_CLAUSE_REDUCTION
);
557 OMP_CLAUSE_REDUCTION_CODE (c
) = reduc
->reduction_code
;
558 OMP_CLAUSE_DECL (c
) = SSA_NAME_VAR (gimple_assign_lhs (reduc
->reduc_stmt
));
560 init
= omp_reduction_init (c
, TREE_TYPE (bvar
));
563 /* Replace the argument representing the initialization value
564 with the initialization value for the reduction (neutral
565 element for the particular operation, e.g. 0 for PLUS_EXPR,
566 1 for MULT_EXPR, etc).
567 Keep the old value in a new variable "reduction_initial",
568 that will be taken in consideration after the parallel
569 computing is done. */
571 e
= loop_preheader_edge (loop
);
572 arg
= PHI_ARG_DEF_FROM_EDGE (reduc
->reduc_phi
, e
);
573 /* Create new variable to hold the initial value. */
575 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE
576 (reduc
->reduc_phi
, loop_preheader_edge (loop
)), init
);
577 reduc
->initial_value
= arg
;
583 struct walk_stmt_info info
;
585 int_tree_htab_type decl_address
;
586 gimple_stmt_iterator
*gsi
;
591 /* Eliminates references to local variables in *TP out of the single
592 entry single exit region starting at DTA->ENTRY.
593 DECL_ADDRESS contains addresses of the references that had their
594 address taken already. If the expression is changed, CHANGED is
595 set to true. Callback for walk_tree. */
598 eliminate_local_variables_1 (tree
*tp
, int *walk_subtrees
, void *data
)
600 struct elv_data
*const dta
= (struct elv_data
*) data
;
601 tree t
= *tp
, var
, addr
, addr_type
, type
, obj
;
607 if (!SSA_VAR_P (t
) || DECL_EXTERNAL (t
))
610 type
= TREE_TYPE (t
);
611 addr_type
= build_pointer_type (type
);
612 addr
= take_address_of (t
, addr_type
, dta
->entry
, dta
->decl_address
,
614 if (dta
->gsi
== NULL
&& addr
== NULL_TREE
)
620 *tp
= build_simple_mem_ref (addr
);
626 if (TREE_CODE (t
) == ADDR_EXPR
)
628 /* ADDR_EXPR may appear in two contexts:
629 -- as a gimple operand, when the address taken is a function invariant
630 -- as gimple rhs, when the resulting address in not a function
632 We do not need to do anything special in the latter case (the base of
633 the memory reference whose address is taken may be replaced in the
634 DECL_P case). The former case is more complicated, as we need to
635 ensure that the new address is still a gimple operand. Thus, it
636 is not sufficient to replace just the base of the memory reference --
637 we need to move the whole computation of the address out of the
639 if (!is_gimple_val (t
))
643 obj
= TREE_OPERAND (t
, 0);
644 var
= get_base_address (obj
);
645 if (!var
|| !SSA_VAR_P (var
) || DECL_EXTERNAL (var
))
648 addr_type
= TREE_TYPE (t
);
649 addr
= take_address_of (obj
, addr_type
, dta
->entry
, dta
->decl_address
,
651 if (dta
->gsi
== NULL
&& addr
== NULL_TREE
)
668 /* Moves the references to local variables in STMT at *GSI out of the single
669 entry single exit region starting at ENTRY. DECL_ADDRESS contains
670 addresses of the references that had their address taken
674 eliminate_local_variables_stmt (edge entry
, gimple_stmt_iterator
*gsi
,
675 int_tree_htab_type decl_address
)
678 gimple stmt
= gsi_stmt (*gsi
);
680 memset (&dta
.info
, '\0', sizeof (dta
.info
));
682 dta
.decl_address
= decl_address
;
686 if (gimple_debug_bind_p (stmt
))
689 walk_tree (gimple_debug_bind_get_value_ptr (stmt
),
690 eliminate_local_variables_1
, &dta
.info
, NULL
);
693 gimple_debug_bind_reset_value (stmt
);
700 walk_gimple_op (stmt
, eliminate_local_variables_1
, &dta
.info
);
707 /* Eliminates the references to local variables from the single entry
708 single exit region between the ENTRY and EXIT edges.
711 1) Taking address of a local variable -- these are moved out of the
712 region (and temporary variable is created to hold the address if
715 2) Dereferencing a local variable -- these are replaced with indirect
719 eliminate_local_variables (edge entry
, edge exit
)
722 vec
<basic_block
> body
;
725 gimple_stmt_iterator gsi
;
726 bool has_debug_stmt
= false;
727 int_tree_htab_type decl_address
;
728 decl_address
.create (10);
729 basic_block entry_bb
= entry
->src
;
730 basic_block exit_bb
= exit
->dest
;
732 gather_blocks_in_sese_region (entry_bb
, exit_bb
, &body
);
734 FOR_EACH_VEC_ELT (body
, i
, bb
)
735 if (bb
!= entry_bb
&& bb
!= exit_bb
)
736 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
737 if (is_gimple_debug (gsi_stmt (gsi
)))
739 if (gimple_debug_bind_p (gsi_stmt (gsi
)))
740 has_debug_stmt
= true;
743 eliminate_local_variables_stmt (entry
, &gsi
, decl_address
);
746 FOR_EACH_VEC_ELT (body
, i
, bb
)
747 if (bb
!= entry_bb
&& bb
!= exit_bb
)
748 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
749 if (gimple_debug_bind_p (gsi_stmt (gsi
)))
750 eliminate_local_variables_stmt (entry
, &gsi
, decl_address
);
752 decl_address
.dispose ();
756 /* Returns true if expression EXPR is not defined between ENTRY and
757 EXIT, i.e. if all its operands are defined outside of the region. */
760 expr_invariant_in_region_p (edge entry
, edge exit
, tree expr
)
762 basic_block entry_bb
= entry
->src
;
763 basic_block exit_bb
= exit
->dest
;
766 if (is_gimple_min_invariant (expr
))
769 if (TREE_CODE (expr
) == SSA_NAME
)
771 def_bb
= gimple_bb (SSA_NAME_DEF_STMT (expr
));
773 && dominated_by_p (CDI_DOMINATORS
, def_bb
, entry_bb
)
774 && !dominated_by_p (CDI_DOMINATORS
, def_bb
, exit_bb
))
783 /* If COPY_NAME_P is true, creates and returns a duplicate of NAME.
784 The copies are stored to NAME_COPIES, if NAME was already duplicated,
785 its duplicate stored in NAME_COPIES is returned.
787 Regardless of COPY_NAME_P, the decl used as a base of the ssa name is also
788 duplicated, storing the copies in DECL_COPIES. */
791 separate_decls_in_region_name (tree name
, name_to_copy_table_type name_copies
,
792 int_tree_htab_type decl_copies
, bool copy_name_p
)
794 tree copy
, var
, var_copy
;
795 unsigned idx
, uid
, nuid
;
796 struct int_tree_map ielt
, *nielt
;
797 struct name_to_copy_elt elt
, *nelt
;
798 name_to_copy_elt
**slot
;
799 int_tree_map
**dslot
;
801 if (TREE_CODE (name
) != SSA_NAME
)
804 idx
= SSA_NAME_VERSION (name
);
806 slot
= name_copies
.find_slot_with_hash (&elt
, idx
,
807 copy_name_p
? INSERT
: NO_INSERT
);
809 return (*slot
)->new_name
;
813 copy
= duplicate_ssa_name (name
, NULL
);
814 nelt
= XNEW (struct name_to_copy_elt
);
816 nelt
->new_name
= copy
;
817 nelt
->field
= NULL_TREE
;
826 var
= SSA_NAME_VAR (name
);
830 uid
= DECL_UID (var
);
832 dslot
= decl_copies
.find_slot_with_hash (&ielt
, uid
, INSERT
);
835 var_copy
= create_tmp_var (TREE_TYPE (var
), get_name (var
));
836 DECL_GIMPLE_REG_P (var_copy
) = DECL_GIMPLE_REG_P (var
);
837 nielt
= XNEW (struct int_tree_map
);
839 nielt
->to
= var_copy
;
842 /* Ensure that when we meet this decl next time, we won't duplicate
844 nuid
= DECL_UID (var_copy
);
846 dslot
= decl_copies
.find_slot_with_hash (&ielt
, nuid
, INSERT
);
847 gcc_assert (!*dslot
);
848 nielt
= XNEW (struct int_tree_map
);
850 nielt
->to
= var_copy
;
854 var_copy
= ((struct int_tree_map
*) *dslot
)->to
;
856 replace_ssa_name_symbol (copy
, var_copy
);
860 /* Finds the ssa names used in STMT that are defined outside the
861 region between ENTRY and EXIT and replaces such ssa names with
862 their duplicates. The duplicates are stored to NAME_COPIES. Base
863 decls of all ssa names used in STMT (including those defined in
864 LOOP) are replaced with the new temporary variables; the
865 replacement decls are stored in DECL_COPIES. */
868 separate_decls_in_region_stmt (edge entry
, edge exit
, gimple stmt
,
869 name_to_copy_table_type name_copies
,
870 int_tree_htab_type decl_copies
)
878 FOR_EACH_PHI_OR_STMT_DEF (def
, stmt
, oi
, SSA_OP_DEF
)
880 name
= DEF_FROM_PTR (def
);
881 gcc_assert (TREE_CODE (name
) == SSA_NAME
);
882 copy
= separate_decls_in_region_name (name
, name_copies
, decl_copies
,
884 gcc_assert (copy
== name
);
887 FOR_EACH_PHI_OR_STMT_USE (use
, stmt
, oi
, SSA_OP_USE
)
889 name
= USE_FROM_PTR (use
);
890 if (TREE_CODE (name
) != SSA_NAME
)
893 copy_name_p
= expr_invariant_in_region_p (entry
, exit
, name
);
894 copy
= separate_decls_in_region_name (name
, name_copies
, decl_copies
,
900 /* Finds the ssa names used in STMT that are defined outside the
901 region between ENTRY and EXIT and replaces such ssa names with
902 their duplicates. The duplicates are stored to NAME_COPIES. Base
903 decls of all ssa names used in STMT (including those defined in
904 LOOP) are replaced with the new temporary variables; the
905 replacement decls are stored in DECL_COPIES. */
908 separate_decls_in_region_debug (gimple stmt
,
909 name_to_copy_table_type name_copies
,
910 int_tree_htab_type decl_copies
)
915 struct int_tree_map ielt
;
916 struct name_to_copy_elt elt
;
917 name_to_copy_elt
**slot
;
918 int_tree_map
**dslot
;
920 if (gimple_debug_bind_p (stmt
))
921 var
= gimple_debug_bind_get_var (stmt
);
922 else if (gimple_debug_source_bind_p (stmt
))
923 var
= gimple_debug_source_bind_get_var (stmt
);
926 if (TREE_CODE (var
) == DEBUG_EXPR_DECL
|| TREE_CODE (var
) == LABEL_DECL
)
928 gcc_assert (DECL_P (var
) && SSA_VAR_P (var
));
929 ielt
.uid
= DECL_UID (var
);
930 dslot
= decl_copies
.find_slot_with_hash (&ielt
, ielt
.uid
, NO_INSERT
);
933 if (gimple_debug_bind_p (stmt
))
934 gimple_debug_bind_set_var (stmt
, ((struct int_tree_map
*) *dslot
)->to
);
935 else if (gimple_debug_source_bind_p (stmt
))
936 gimple_debug_source_bind_set_var (stmt
, ((struct int_tree_map
*) *dslot
)->to
);
938 FOR_EACH_PHI_OR_STMT_USE (use
, stmt
, oi
, SSA_OP_USE
)
940 name
= USE_FROM_PTR (use
);
941 if (TREE_CODE (name
) != SSA_NAME
)
944 elt
.version
= SSA_NAME_VERSION (name
);
945 slot
= name_copies
.find_slot_with_hash (&elt
, elt
.version
, NO_INSERT
);
948 gimple_debug_bind_reset_value (stmt
);
953 SET_USE (use
, (*slot
)->new_name
);
959 /* Callback for htab_traverse. Adds a field corresponding to the reduction
960 specified in SLOT. The type is passed in DATA. */
963 add_field_for_reduction (reduction_info
**slot
, tree type
)
966 struct reduction_info
*const red
= *slot
;
967 tree var
= gimple_assign_lhs (red
->reduc_stmt
);
968 tree field
= build_decl (gimple_location (red
->reduc_stmt
), FIELD_DECL
,
969 SSA_NAME_IDENTIFIER (var
), TREE_TYPE (var
));
971 insert_field_into_struct (type
, field
);
978 /* Callback for htab_traverse. Adds a field corresponding to a ssa name
979 described in SLOT. The type is passed in DATA. */
982 add_field_for_name (name_to_copy_elt
**slot
, tree type
)
984 struct name_to_copy_elt
*const elt
= *slot
;
985 tree name
= ssa_name (elt
->version
);
986 tree field
= build_decl (UNKNOWN_LOCATION
,
987 FIELD_DECL
, SSA_NAME_IDENTIFIER (name
),
990 insert_field_into_struct (type
, field
);
996 /* Callback for htab_traverse. A local result is the intermediate result
998 thread, or the initial value in case no iteration was executed.
999 This function creates a phi node reflecting these values.
1000 The phi's result will be stored in NEW_PHI field of the
1001 reduction's data structure. */
1004 create_phi_for_local_result (reduction_info
**slot
, struct loop
*loop
)
1006 struct reduction_info
*const reduc
= *slot
;
1009 basic_block store_bb
;
1011 source_location locus
;
1013 /* STORE_BB is the block where the phi
1014 should be stored. It is the destination of the loop exit.
1015 (Find the fallthru edge from GIMPLE_OMP_CONTINUE). */
1016 store_bb
= FALLTHRU_EDGE (loop
->latch
)->dest
;
1018 /* STORE_BB has two predecessors. One coming from the loop
1019 (the reduction's result is computed at the loop),
1020 and another coming from a block preceding the loop,
1022 are executed (the initial value should be taken). */
1023 if (EDGE_PRED (store_bb
, 0) == FALLTHRU_EDGE (loop
->latch
))
1024 e
= EDGE_PRED (store_bb
, 1);
1026 e
= EDGE_PRED (store_bb
, 0);
1027 local_res
= copy_ssa_name (gimple_assign_lhs (reduc
->reduc_stmt
), NULL
);
1028 locus
= gimple_location (reduc
->reduc_stmt
);
1029 new_phi
= create_phi_node (local_res
, store_bb
);
1030 add_phi_arg (new_phi
, reduc
->init
, e
, locus
);
1031 add_phi_arg (new_phi
, gimple_assign_lhs (reduc
->reduc_stmt
),
1032 FALLTHRU_EDGE (loop
->latch
), locus
);
1033 reduc
->new_phi
= new_phi
;
1043 basic_block store_bb
;
1044 basic_block load_bb
;
1047 /* Callback for htab_traverse. Create an atomic instruction for the
1048 reduction described in SLOT.
1049 DATA annotates the place in memory the atomic operation relates to,
1050 and the basic block it needs to be generated in. */
1053 create_call_for_reduction_1 (reduction_info
**slot
, struct clsn_data
*clsn_data
)
1055 struct reduction_info
*const reduc
= *slot
;
1056 gimple_stmt_iterator gsi
;
1057 tree type
= TREE_TYPE (PHI_RESULT (reduc
->reduc_phi
));
1062 tree t
, addr
, ref
, x
;
1063 tree tmp_load
, name
;
1066 load_struct
= build_simple_mem_ref (clsn_data
->load
);
1067 t
= build3 (COMPONENT_REF
, type
, load_struct
, reduc
->field
, NULL_TREE
);
1069 addr
= build_addr (t
, current_function_decl
);
1071 /* Create phi node. */
1072 bb
= clsn_data
->load_bb
;
1074 e
= split_block (bb
, t
);
1077 tmp_load
= create_tmp_var (TREE_TYPE (TREE_TYPE (addr
)), NULL
);
1078 tmp_load
= make_ssa_name (tmp_load
, NULL
);
1079 load
= gimple_build_omp_atomic_load (tmp_load
, addr
);
1080 SSA_NAME_DEF_STMT (tmp_load
) = load
;
1081 gsi
= gsi_start_bb (new_bb
);
1082 gsi_insert_after (&gsi
, load
, GSI_NEW_STMT
);
1084 e
= split_block (new_bb
, load
);
1086 gsi
= gsi_start_bb (new_bb
);
1088 x
= fold_build2 (reduc
->reduction_code
,
1089 TREE_TYPE (PHI_RESULT (reduc
->new_phi
)), ref
,
1090 PHI_RESULT (reduc
->new_phi
));
1092 name
= force_gimple_operand_gsi (&gsi
, x
, true, NULL_TREE
, true,
1093 GSI_CONTINUE_LINKING
);
1095 gsi_insert_after (&gsi
, gimple_build_omp_atomic_store (name
), GSI_NEW_STMT
);
1099 /* Create the atomic operation at the join point of the threads.
1100 REDUCTION_LIST describes the reductions in the LOOP.
1101 LD_ST_DATA describes the shared data structure where
1102 shared data is stored in and loaded from. */
1104 create_call_for_reduction (struct loop
*loop
,
1105 reduction_info_table_type reduction_list
,
1106 struct clsn_data
*ld_st_data
)
1108 reduction_list
.traverse
<struct loop
*, create_phi_for_local_result
> (loop
);
1109 /* Find the fallthru edge from GIMPLE_OMP_CONTINUE. */
1110 ld_st_data
->load_bb
= FALLTHRU_EDGE (loop
->latch
)->dest
;
1112 .traverse
<struct clsn_data
*, create_call_for_reduction_1
> (ld_st_data
);
1115 /* Callback for htab_traverse. Loads the final reduction value at the
1116 join point of all threads, and inserts it in the right place. */
1119 create_loads_for_reductions (reduction_info
**slot
, struct clsn_data
*clsn_data
)
1121 struct reduction_info
*const red
= *slot
;
1123 gimple_stmt_iterator gsi
;
1124 tree type
= TREE_TYPE (gimple_assign_lhs (red
->reduc_stmt
));
1129 gsi
= gsi_after_labels (clsn_data
->load_bb
);
1130 load_struct
= build_simple_mem_ref (clsn_data
->load
);
1131 load_struct
= build3 (COMPONENT_REF
, type
, load_struct
, red
->field
,
1135 name
= PHI_RESULT (red
->keep_res
);
1136 stmt
= gimple_build_assign (name
, x
);
1137 SSA_NAME_DEF_STMT (name
) = stmt
;
1139 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1141 for (gsi
= gsi_start_phis (gimple_bb (red
->keep_res
));
1142 !gsi_end_p (gsi
); gsi_next (&gsi
))
1143 if (gsi_stmt (gsi
) == red
->keep_res
)
1145 remove_phi_node (&gsi
, false);
1151 /* Load the reduction result that was stored in LD_ST_DATA.
1152 REDUCTION_LIST describes the list of reductions that the
1153 loads should be generated for. */
1155 create_final_loads_for_reduction (reduction_info_table_type reduction_list
,
1156 struct clsn_data
*ld_st_data
)
1158 gimple_stmt_iterator gsi
;
1162 gsi
= gsi_after_labels (ld_st_data
->load_bb
);
1163 t
= build_fold_addr_expr (ld_st_data
->store
);
1164 stmt
= gimple_build_assign (ld_st_data
->load
, t
);
1166 gsi_insert_before (&gsi
, stmt
, GSI_NEW_STMT
);
1167 SSA_NAME_DEF_STMT (ld_st_data
->load
) = stmt
;
1170 .traverse
<struct clsn_data
*, create_loads_for_reductions
> (ld_st_data
);
1174 /* Callback for htab_traverse. Store the neutral value for the
1175 particular reduction's operation, e.g. 0 for PLUS_EXPR,
1176 1 for MULT_EXPR, etc. into the reduction field.
1177 The reduction is specified in SLOT. The store information is
1181 create_stores_for_reduction (reduction_info
**slot
, struct clsn_data
*clsn_data
)
1183 struct reduction_info
*const red
= *slot
;
1186 gimple_stmt_iterator gsi
;
1187 tree type
= TREE_TYPE (gimple_assign_lhs (red
->reduc_stmt
));
1189 gsi
= gsi_last_bb (clsn_data
->store_bb
);
1190 t
= build3 (COMPONENT_REF
, type
, clsn_data
->store
, red
->field
, NULL_TREE
);
1191 stmt
= gimple_build_assign (t
, red
->initial_value
);
1192 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1197 /* Callback for htab_traverse. Creates loads to a field of LOAD in LOAD_BB and
1198 store to a field of STORE in STORE_BB for the ssa name and its duplicate
1199 specified in SLOT. */
1202 create_loads_and_stores_for_name (name_to_copy_elt
**slot
,
1203 struct clsn_data
*clsn_data
)
1205 struct name_to_copy_elt
*const elt
= *slot
;
1208 gimple_stmt_iterator gsi
;
1209 tree type
= TREE_TYPE (elt
->new_name
);
1212 gsi
= gsi_last_bb (clsn_data
->store_bb
);
1213 t
= build3 (COMPONENT_REF
, type
, clsn_data
->store
, elt
->field
, NULL_TREE
);
1214 stmt
= gimple_build_assign (t
, ssa_name (elt
->version
));
1215 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1217 gsi
= gsi_last_bb (clsn_data
->load_bb
);
1218 load_struct
= build_simple_mem_ref (clsn_data
->load
);
1219 t
= build3 (COMPONENT_REF
, type
, load_struct
, elt
->field
, NULL_TREE
);
1220 stmt
= gimple_build_assign (elt
->new_name
, t
);
1221 SSA_NAME_DEF_STMT (elt
->new_name
) = stmt
;
1222 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1227 /* Moves all the variables used in LOOP and defined outside of it (including
1228 the initial values of loop phi nodes, and *PER_THREAD if it is a ssa
1229 name) to a structure created for this purpose. The code
1237 is transformed this way:
1252 `old' is stored to *ARG_STRUCT and `new' is stored to NEW_ARG_STRUCT. The
1253 pointer `new' is intentionally not initialized (the loop will be split to a
1254 separate function later, and `new' will be initialized from its arguments).
1255 LD_ST_DATA holds information about the shared data structure used to pass
1256 information among the threads. It is initialized here, and
1257 gen_parallel_loop will pass it to create_call_for_reduction that
1258 needs this information. REDUCTION_LIST describes the reductions
1262 separate_decls_in_region (edge entry
, edge exit
,
1263 reduction_info_table_type reduction_list
,
1264 tree
*arg_struct
, tree
*new_arg_struct
,
1265 struct clsn_data
*ld_st_data
)
1268 basic_block bb1
= split_edge (entry
);
1269 basic_block bb0
= single_pred (bb1
);
1270 name_to_copy_table_type name_copies
;
1271 name_copies
.create (10);
1272 int_tree_htab_type decl_copies
;
1273 decl_copies
.create (10);
1275 tree type
, type_name
, nvar
;
1276 gimple_stmt_iterator gsi
;
1277 struct clsn_data clsn_data
;
1278 vec
<basic_block
> body
;
1281 basic_block entry_bb
= bb1
;
1282 basic_block exit_bb
= exit
->dest
;
1283 bool has_debug_stmt
= false;
1285 entry
= single_succ_edge (entry_bb
);
1286 gather_blocks_in_sese_region (entry_bb
, exit_bb
, &body
);
1288 FOR_EACH_VEC_ELT (body
, i
, bb
)
1290 if (bb
!= entry_bb
&& bb
!= exit_bb
)
1292 for (gsi
= gsi_start_phis (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1293 separate_decls_in_region_stmt (entry
, exit
, gsi_stmt (gsi
),
1294 name_copies
, decl_copies
);
1296 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1298 gimple stmt
= gsi_stmt (gsi
);
1300 if (is_gimple_debug (stmt
))
1301 has_debug_stmt
= true;
1303 separate_decls_in_region_stmt (entry
, exit
, stmt
,
1304 name_copies
, decl_copies
);
1309 /* Now process debug bind stmts. We must not create decls while
1310 processing debug stmts, so we defer their processing so as to
1311 make sure we will have debug info for as many variables as
1312 possible (all of those that were dealt with in the loop above),
1313 and discard those for which we know there's nothing we can
1316 FOR_EACH_VEC_ELT (body
, i
, bb
)
1317 if (bb
!= entry_bb
&& bb
!= exit_bb
)
1319 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
);)
1321 gimple stmt
= gsi_stmt (gsi
);
1323 if (is_gimple_debug (stmt
))
1325 if (separate_decls_in_region_debug (stmt
, name_copies
,
1328 gsi_remove (&gsi
, true);
1339 if (name_copies
.elements () == 0 && reduction_list
.elements () == 0)
1341 /* It may happen that there is nothing to copy (if there are only
1342 loop carried and external variables in the loop). */
1344 *new_arg_struct
= NULL
;
1348 /* Create the type for the structure to store the ssa names to. */
1349 type
= lang_hooks
.types
.make_type (RECORD_TYPE
);
1350 type_name
= build_decl (UNKNOWN_LOCATION
,
1351 TYPE_DECL
, create_tmp_var_name (".paral_data"),
1353 TYPE_NAME (type
) = type_name
;
1355 name_copies
.traverse
<tree
, add_field_for_name
> (type
);
1356 if (reduction_list
.is_created () && reduction_list
.elements () > 0)
1358 /* Create the fields for reductions. */
1359 reduction_list
.traverse
<tree
, add_field_for_reduction
> (type
);
1363 /* Create the loads and stores. */
1364 *arg_struct
= create_tmp_var (type
, ".paral_data_store");
1365 nvar
= create_tmp_var (build_pointer_type (type
), ".paral_data_load");
1366 *new_arg_struct
= make_ssa_name (nvar
, NULL
);
1368 ld_st_data
->store
= *arg_struct
;
1369 ld_st_data
->load
= *new_arg_struct
;
1370 ld_st_data
->store_bb
= bb0
;
1371 ld_st_data
->load_bb
= bb1
;
1374 .traverse
<struct clsn_data
*, create_loads_and_stores_for_name
>
1377 /* Load the calculation from memory (after the join of the threads). */
1379 if (reduction_list
.is_created () && reduction_list
.elements () > 0)
1382 .traverse
<struct clsn_data
*, create_stores_for_reduction
>
1384 clsn_data
.load
= make_ssa_name (nvar
, NULL
);
1385 clsn_data
.load_bb
= exit
->dest
;
1386 clsn_data
.store
= ld_st_data
->store
;
1387 create_final_loads_for_reduction (reduction_list
, &clsn_data
);
1391 decl_copies
.dispose ();
1392 name_copies
.dispose ();
1395 /* Bitmap containing uids of functions created by parallelization. We cannot
1396 allocate it from the default obstack, as it must live across compilation
1397 of several functions; we make it gc allocated instead. */
1399 static GTY(()) bitmap parallelized_functions
;
1401 /* Returns true if FN was created by create_loop_fn. */
1404 parallelized_function_p (tree fn
)
1406 if (!parallelized_functions
|| !DECL_ARTIFICIAL (fn
))
1409 return bitmap_bit_p (parallelized_functions
, DECL_UID (fn
));
1412 /* Creates and returns an empty function that will receive the body of
1413 a parallelized loop. */
1416 create_loop_fn (location_t loc
)
1420 tree decl
, type
, name
, t
;
1421 struct function
*act_cfun
= cfun
;
1422 static unsigned loopfn_num
;
1424 loc
= LOCATION_LOCUS (loc
);
1425 snprintf (buf
, 100, "%s.$loopfn", current_function_name ());
1426 ASM_FORMAT_PRIVATE_NAME (tname
, buf
, loopfn_num
++);
1427 clean_symbol_name (tname
);
1428 name
= get_identifier (tname
);
1429 type
= build_function_type_list (void_type_node
, ptr_type_node
, NULL_TREE
);
1431 decl
= build_decl (loc
, FUNCTION_DECL
, name
, type
);
1432 if (!parallelized_functions
)
1433 parallelized_functions
= BITMAP_GGC_ALLOC ();
1434 bitmap_set_bit (parallelized_functions
, DECL_UID (decl
));
1436 TREE_STATIC (decl
) = 1;
1437 TREE_USED (decl
) = 1;
1438 DECL_ARTIFICIAL (decl
) = 1;
1439 DECL_IGNORED_P (decl
) = 0;
1440 TREE_PUBLIC (decl
) = 0;
1441 DECL_UNINLINABLE (decl
) = 1;
1442 DECL_EXTERNAL (decl
) = 0;
1443 DECL_CONTEXT (decl
) = NULL_TREE
;
1444 DECL_INITIAL (decl
) = make_node (BLOCK
);
1446 t
= build_decl (loc
, RESULT_DECL
, NULL_TREE
, void_type_node
);
1447 DECL_ARTIFICIAL (t
) = 1;
1448 DECL_IGNORED_P (t
) = 1;
1449 DECL_RESULT (decl
) = t
;
1451 t
= build_decl (loc
, PARM_DECL
, get_identifier (".paral_data_param"),
1453 DECL_ARTIFICIAL (t
) = 1;
1454 DECL_ARG_TYPE (t
) = ptr_type_node
;
1455 DECL_CONTEXT (t
) = decl
;
1457 DECL_ARGUMENTS (decl
) = t
;
1459 allocate_struct_function (decl
, false);
1461 /* The call to allocate_struct_function clobbers CFUN, so we need to restore
1463 set_cfun (act_cfun
);
1468 /* Moves the exit condition of LOOP to the beginning of its header, and
1469 duplicates the part of the last iteration that gets disabled to the
1470 exit of the loop. NIT is the number of iterations of the loop
1471 (used to initialize the variables in the duplicated part).
1473 TODO: the common case is that latch of the loop is empty and immediately
1474 follows the loop exit. In this case, it would be better not to copy the
1475 body of the loop, but only move the entry of the loop directly before the
1476 exit check and increase the number of iterations of the loop by one.
1477 This may need some additional preconditioning in case NIT = ~0.
1478 REDUCTION_LIST describes the reductions in LOOP. */
1481 transform_to_exit_first_loop (struct loop
*loop
,
1482 reduction_info_table_type reduction_list
,
1485 basic_block
*bbs
, *nbbs
, ex_bb
, orig_header
;
1488 edge exit
= single_dom_exit (loop
), hpred
;
1489 tree control
, control_name
, res
, t
;
1490 gimple phi
, nphi
, cond_stmt
, stmt
, cond_nit
;
1491 gimple_stmt_iterator gsi
;
1494 split_block_after_labels (loop
->header
);
1495 orig_header
= single_succ (loop
->header
);
1496 hpred
= single_succ_edge (loop
->header
);
1498 cond_stmt
= last_stmt (exit
->src
);
1499 control
= gimple_cond_lhs (cond_stmt
);
1500 gcc_assert (gimple_cond_rhs (cond_stmt
) == nit
);
1502 /* Make sure that we have phi nodes on exit for all loop header phis
1503 (create_parallel_loop requires that). */
1504 for (gsi
= gsi_start_phis (loop
->header
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1506 phi
= gsi_stmt (gsi
);
1507 res
= PHI_RESULT (phi
);
1508 t
= copy_ssa_name (res
, phi
);
1509 SET_PHI_RESULT (phi
, t
);
1510 nphi
= create_phi_node (res
, orig_header
);
1511 add_phi_arg (nphi
, t
, hpred
, UNKNOWN_LOCATION
);
1515 gimple_cond_set_lhs (cond_stmt
, t
);
1516 update_stmt (cond_stmt
);
1521 bbs
= get_loop_body_in_dom_order (loop
);
1523 for (n
= 0; bbs
[n
] != exit
->src
; n
++)
1525 nbbs
= XNEWVEC (basic_block
, n
);
1526 ok
= gimple_duplicate_sese_tail (single_succ_edge (loop
->header
), exit
,
1533 /* Other than reductions, the only gimple reg that should be copied
1534 out of the loop is the control variable. */
1535 exit
= single_dom_exit (loop
);
1536 control_name
= NULL_TREE
;
1537 for (gsi
= gsi_start_phis (ex_bb
); !gsi_end_p (gsi
); )
1539 phi
= gsi_stmt (gsi
);
1540 res
= PHI_RESULT (phi
);
1541 if (virtual_operand_p (res
))
1547 /* Check if it is a part of reduction. If it is,
1548 keep the phi at the reduction's keep_res field. The
1549 PHI_RESULT of this phi is the resulting value of the reduction
1550 variable when exiting the loop. */
1552 if (reduction_list
.elements () > 0)
1554 struct reduction_info
*red
;
1556 tree val
= PHI_ARG_DEF_FROM_EDGE (phi
, exit
);
1557 red
= reduction_phi (reduction_list
, SSA_NAME_DEF_STMT (val
));
1560 red
->keep_res
= phi
;
1565 gcc_assert (control_name
== NULL_TREE
1566 && SSA_NAME_VAR (res
) == SSA_NAME_VAR (control
));
1568 remove_phi_node (&gsi
, false);
1570 gcc_assert (control_name
!= NULL_TREE
);
1572 /* Initialize the control variable to number of iterations
1573 according to the rhs of the exit condition. */
1574 gsi
= gsi_after_labels (ex_bb
);
1575 cond_nit
= last_stmt (exit
->src
);
1576 nit_1
= gimple_cond_rhs (cond_nit
);
1577 nit_1
= force_gimple_operand_gsi (&gsi
,
1578 fold_convert (TREE_TYPE (control_name
), nit_1
),
1579 false, NULL_TREE
, false, GSI_SAME_STMT
);
1580 stmt
= gimple_build_assign (control_name
, nit_1
);
1581 gsi_insert_before (&gsi
, stmt
, GSI_NEW_STMT
);
1582 SSA_NAME_DEF_STMT (control_name
) = stmt
;
1585 /* Create the parallel constructs for LOOP as described in gen_parallel_loop.
1586 LOOP_FN and DATA are the arguments of GIMPLE_OMP_PARALLEL.
1587 NEW_DATA is the variable that should be initialized from the argument
1588 of LOOP_FN. N_THREADS is the requested number of threads. Returns the
1589 basic block containing GIMPLE_OMP_PARALLEL tree. */
1592 create_parallel_loop (struct loop
*loop
, tree loop_fn
, tree data
,
1593 tree new_data
, unsigned n_threads
, location_t loc
)
1595 gimple_stmt_iterator gsi
;
1596 basic_block bb
, paral_bb
, for_bb
, ex_bb
;
1598 gimple stmt
, for_stmt
, phi
, cond_stmt
;
1599 tree cvar
, cvar_init
, initvar
, cvar_next
, cvar_base
, type
;
1600 edge exit
, nexit
, guard
, end
, e
;
1602 /* Prepare the GIMPLE_OMP_PARALLEL statement. */
1603 bb
= loop_preheader_edge (loop
)->src
;
1604 paral_bb
= single_pred (bb
);
1605 gsi
= gsi_last_bb (paral_bb
);
1607 t
= build_omp_clause (loc
, OMP_CLAUSE_NUM_THREADS
);
1608 OMP_CLAUSE_NUM_THREADS_EXPR (t
)
1609 = build_int_cst (integer_type_node
, n_threads
);
1610 stmt
= gimple_build_omp_parallel (NULL
, t
, loop_fn
, data
);
1611 gimple_set_location (stmt
, loc
);
1613 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1615 /* Initialize NEW_DATA. */
1618 gsi
= gsi_after_labels (bb
);
1620 param
= make_ssa_name (DECL_ARGUMENTS (loop_fn
), NULL
);
1621 stmt
= gimple_build_assign (param
, build_fold_addr_expr (data
));
1622 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
1623 SSA_NAME_DEF_STMT (param
) = stmt
;
1625 stmt
= gimple_build_assign (new_data
,
1626 fold_convert (TREE_TYPE (new_data
), param
));
1627 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
1628 SSA_NAME_DEF_STMT (new_data
) = stmt
;
1631 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_PARALLEL. */
1632 bb
= split_loop_exit_edge (single_dom_exit (loop
));
1633 gsi
= gsi_last_bb (bb
);
1634 stmt
= gimple_build_omp_return (false);
1635 gimple_set_location (stmt
, loc
);
1636 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1638 /* Extract data for GIMPLE_OMP_FOR. */
1639 gcc_assert (loop
->header
== single_dom_exit (loop
)->src
);
1640 cond_stmt
= last_stmt (loop
->header
);
1642 cvar
= gimple_cond_lhs (cond_stmt
);
1643 cvar_base
= SSA_NAME_VAR (cvar
);
1644 phi
= SSA_NAME_DEF_STMT (cvar
);
1645 cvar_init
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_preheader_edge (loop
));
1646 initvar
= copy_ssa_name (cvar
, NULL
);
1647 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi
, loop_preheader_edge (loop
)),
1649 cvar_next
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (loop
));
1651 gsi
= gsi_last_nondebug_bb (loop
->latch
);
1652 gcc_assert (gsi_stmt (gsi
) == SSA_NAME_DEF_STMT (cvar_next
));
1653 gsi_remove (&gsi
, true);
1656 for_bb
= split_edge (loop_preheader_edge (loop
));
1657 ex_bb
= split_loop_exit_edge (single_dom_exit (loop
));
1658 extract_true_false_edges_from_block (loop
->header
, &nexit
, &exit
);
1659 gcc_assert (exit
== single_dom_exit (loop
));
1661 guard
= make_edge (for_bb
, ex_bb
, 0);
1662 single_succ_edge (loop
->latch
)->flags
= 0;
1663 end
= make_edge (loop
->latch
, ex_bb
, EDGE_FALLTHRU
);
1664 for (gsi
= gsi_start_phis (ex_bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1666 source_location locus
;
1668 phi
= gsi_stmt (gsi
);
1669 stmt
= SSA_NAME_DEF_STMT (PHI_ARG_DEF_FROM_EDGE (phi
, exit
));
1671 def
= PHI_ARG_DEF_FROM_EDGE (stmt
, loop_preheader_edge (loop
));
1672 locus
= gimple_phi_arg_location_from_edge (stmt
,
1673 loop_preheader_edge (loop
));
1674 add_phi_arg (phi
, def
, guard
, locus
);
1676 def
= PHI_ARG_DEF_FROM_EDGE (stmt
, loop_latch_edge (loop
));
1677 locus
= gimple_phi_arg_location_from_edge (stmt
, loop_latch_edge (loop
));
1678 add_phi_arg (phi
, def
, end
, locus
);
1680 e
= redirect_edge_and_branch (exit
, nexit
->dest
);
1681 PENDING_STMT (e
) = NULL
;
1683 /* Emit GIMPLE_OMP_FOR. */
1684 gimple_cond_set_lhs (cond_stmt
, cvar_base
);
1685 type
= TREE_TYPE (cvar
);
1686 t
= build_omp_clause (loc
, OMP_CLAUSE_SCHEDULE
);
1687 OMP_CLAUSE_SCHEDULE_KIND (t
) = OMP_CLAUSE_SCHEDULE_STATIC
;
1689 for_stmt
= gimple_build_omp_for (NULL
, t
, 1, NULL
);
1690 gimple_set_location (for_stmt
, loc
);
1691 gimple_omp_for_set_index (for_stmt
, 0, initvar
);
1692 gimple_omp_for_set_initial (for_stmt
, 0, cvar_init
);
1693 gimple_omp_for_set_final (for_stmt
, 0, gimple_cond_rhs (cond_stmt
));
1694 gimple_omp_for_set_cond (for_stmt
, 0, gimple_cond_code (cond_stmt
));
1695 gimple_omp_for_set_incr (for_stmt
, 0, build2 (PLUS_EXPR
, type
,
1697 build_int_cst (type
, 1)));
1699 gsi
= gsi_last_bb (for_bb
);
1700 gsi_insert_after (&gsi
, for_stmt
, GSI_NEW_STMT
);
1701 SSA_NAME_DEF_STMT (initvar
) = for_stmt
;
1703 /* Emit GIMPLE_OMP_CONTINUE. */
1704 gsi
= gsi_last_bb (loop
->latch
);
1705 stmt
= gimple_build_omp_continue (cvar_next
, cvar
);
1706 gimple_set_location (stmt
, loc
);
1707 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1708 SSA_NAME_DEF_STMT (cvar_next
) = stmt
;
1710 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_FOR. */
1711 gsi
= gsi_last_bb (ex_bb
);
1712 stmt
= gimple_build_omp_return (true);
1713 gimple_set_location (stmt
, loc
);
1714 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1716 /* After the above dom info is hosed. Re-compute it. */
1717 free_dominance_info (CDI_DOMINATORS
);
1718 calculate_dominance_info (CDI_DOMINATORS
);
1723 /* Generates code to execute the iterations of LOOP in N_THREADS
1724 threads in parallel.
1726 NITER describes number of iterations of LOOP.
1727 REDUCTION_LIST describes the reductions existent in the LOOP. */
1730 gen_parallel_loop (struct loop
*loop
, reduction_info_table_type reduction_list
,
1731 unsigned n_threads
, struct tree_niter_desc
*niter
)
1734 tree many_iterations_cond
, type
, nit
;
1735 tree arg_struct
, new_arg_struct
;
1737 basic_block parallel_head
;
1739 struct clsn_data clsn_data
;
1743 unsigned int m_p_thread
=2;
1747 ---------------------------------------------------------------------
1750 IV = phi (INIT, IV + STEP)
1756 ---------------------------------------------------------------------
1758 with # of iterations NITER (possibly with MAY_BE_ZERO assumption),
1759 we generate the following code:
1761 ---------------------------------------------------------------------
1764 || NITER < MIN_PER_THREAD * N_THREADS)
1768 store all local loop-invariant variables used in body of the loop to DATA.
1769 GIMPLE_OMP_PARALLEL (OMP_CLAUSE_NUM_THREADS (N_THREADS), LOOPFN, DATA);
1770 load the variables from DATA.
1771 GIMPLE_OMP_FOR (IV = INIT; COND; IV += STEP) (OMP_CLAUSE_SCHEDULE (static))
1774 GIMPLE_OMP_CONTINUE;
1775 GIMPLE_OMP_RETURN -- GIMPLE_OMP_FOR
1776 GIMPLE_OMP_RETURN -- GIMPLE_OMP_PARALLEL
1782 IV = phi (INIT, IV + STEP)
1793 /* Create two versions of the loop -- in the old one, we know that the
1794 number of iterations is large enough, and we will transform it into the
1795 loop that will be split to loop_fn, the new one will be used for the
1796 remaining iterations. */
1798 /* We should compute a better number-of-iterations value for outer loops.
1801 for (i = 0; i < n; ++i)
1802 for (j = 0; j < m; ++j)
1805 we should compute nit = n * m, not nit = n.
1806 Also may_be_zero handling would need to be adjusted. */
1808 type
= TREE_TYPE (niter
->niter
);
1809 nit
= force_gimple_operand (unshare_expr (niter
->niter
), &stmts
, true,
1812 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
1817 m_p_thread
=MIN_PER_THREAD
;
1819 many_iterations_cond
=
1820 fold_build2 (GE_EXPR
, boolean_type_node
,
1821 nit
, build_int_cst (type
, m_p_thread
* n_threads
));
1823 many_iterations_cond
1824 = fold_build2 (TRUTH_AND_EXPR
, boolean_type_node
,
1825 invert_truthvalue (unshare_expr (niter
->may_be_zero
)),
1826 many_iterations_cond
);
1827 many_iterations_cond
1828 = force_gimple_operand (many_iterations_cond
, &stmts
, false, NULL_TREE
);
1830 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
1831 if (!is_gimple_condexpr (many_iterations_cond
))
1833 many_iterations_cond
1834 = force_gimple_operand (many_iterations_cond
, &stmts
,
1837 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
1840 initialize_original_copy_tables ();
1842 /* We assume that the loop usually iterates a lot. */
1843 prob
= 4 * REG_BR_PROB_BASE
/ 5;
1844 loop_version (loop
, many_iterations_cond
, NULL
,
1845 prob
, prob
, REG_BR_PROB_BASE
- prob
, true);
1846 update_ssa (TODO_update_ssa
);
1847 free_original_copy_tables ();
1849 /* Base all the induction variables in LOOP on a single control one. */
1850 canonicalize_loop_ivs (loop
, &nit
, true);
1852 /* Ensure that the exit condition is the first statement in the loop. */
1853 transform_to_exit_first_loop (loop
, reduction_list
, nit
);
1855 /* Generate initializations for reductions. */
1856 if (reduction_list
.elements () > 0)
1857 reduction_list
.traverse
<struct loop
*, initialize_reductions
> (loop
);
1859 /* Eliminate the references to local variables from the loop. */
1860 gcc_assert (single_exit (loop
));
1861 entry
= loop_preheader_edge (loop
);
1862 exit
= single_dom_exit (loop
);
1864 eliminate_local_variables (entry
, exit
);
1865 /* In the old loop, move all variables non-local to the loop to a structure
1866 and back, and create separate decls for the variables used in loop. */
1867 separate_decls_in_region (entry
, exit
, reduction_list
, &arg_struct
,
1868 &new_arg_struct
, &clsn_data
);
1870 /* Create the parallel constructs. */
1871 loc
= UNKNOWN_LOCATION
;
1872 cond_stmt
= last_stmt (loop
->header
);
1874 loc
= gimple_location (cond_stmt
);
1875 parallel_head
= create_parallel_loop (loop
, create_loop_fn (loc
), arg_struct
,
1876 new_arg_struct
, n_threads
, loc
);
1877 if (reduction_list
.elements () > 0)
1878 create_call_for_reduction (loop
, reduction_list
, &clsn_data
);
1882 /* Cancel the loop (it is simpler to do it here rather than to teach the
1883 expander to do it). */
1884 cancel_loop_tree (loop
);
1886 /* Free loop bound estimations that could contain references to
1887 removed statements. */
1888 FOR_EACH_LOOP (li
, loop
, 0)
1889 free_numbers_of_iterations_estimates_loop (loop
);
1891 /* Expand the parallel constructs. We do it directly here instead of running
1892 a separate expand_omp pass, since it is more efficient, and less likely to
1893 cause troubles with further analyses not being able to deal with the
1896 omp_expand_local (parallel_head
);
1899 /* Returns true when LOOP contains vector phi nodes. */
1902 loop_has_vector_phi_nodes (struct loop
*loop ATTRIBUTE_UNUSED
)
1905 basic_block
*bbs
= get_loop_body_in_dom_order (loop
);
1906 gimple_stmt_iterator gsi
;
1909 for (i
= 0; i
< loop
->num_nodes
; i
++)
1910 for (gsi
= gsi_start_phis (bbs
[i
]); !gsi_end_p (gsi
); gsi_next (&gsi
))
1911 if (TREE_CODE (TREE_TYPE (PHI_RESULT (gsi_stmt (gsi
)))) == VECTOR_TYPE
)
1920 /* Create a reduction_info struct, initialize it with REDUC_STMT
1921 and PHI, insert it to the REDUCTION_LIST. */
1924 build_new_reduction (reduction_info_table_type reduction_list
,
1925 gimple reduc_stmt
, gimple phi
)
1927 reduction_info
**slot
;
1928 struct reduction_info
*new_reduction
;
1930 gcc_assert (reduc_stmt
);
1932 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1935 "Detected reduction. reduction stmt is: \n");
1936 print_gimple_stmt (dump_file
, reduc_stmt
, 0, 0);
1937 fprintf (dump_file
, "\n");
1940 new_reduction
= XCNEW (struct reduction_info
);
1942 new_reduction
->reduc_stmt
= reduc_stmt
;
1943 new_reduction
->reduc_phi
= phi
;
1944 new_reduction
->reduc_version
= SSA_NAME_VERSION (gimple_phi_result (phi
));
1945 new_reduction
->reduction_code
= gimple_assign_rhs_code (reduc_stmt
);
1946 slot
= reduction_list
.find_slot (new_reduction
, INSERT
);
1947 *slot
= new_reduction
;
1950 /* Callback for htab_traverse. Sets gimple_uid of reduc_phi stmts. */
1953 set_reduc_phi_uids (reduction_info
**slot
, void *data ATTRIBUTE_UNUSED
)
1955 struct reduction_info
*const red
= *slot
;
1956 gimple_set_uid (red
->reduc_phi
, red
->reduc_version
);
1960 /* Detect all reductions in the LOOP, insert them into REDUCTION_LIST. */
1963 gather_scalar_reductions (loop_p loop
, reduction_info_table_type reduction_list
)
1965 gimple_stmt_iterator gsi
;
1966 loop_vec_info simple_loop_info
;
1968 simple_loop_info
= vect_analyze_loop_form (loop
);
1970 for (gsi
= gsi_start_phis (loop
->header
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1972 gimple phi
= gsi_stmt (gsi
);
1974 tree res
= PHI_RESULT (phi
);
1977 if (virtual_operand_p (res
))
1980 if (!simple_iv (loop
, loop
, res
, &iv
, true)
1981 && simple_loop_info
)
1983 gimple reduc_stmt
= vect_force_simple_reduction (simple_loop_info
,
1986 if (reduc_stmt
&& !double_reduc
)
1987 build_new_reduction (reduction_list
, reduc_stmt
, phi
);
1990 destroy_loop_vec_info (simple_loop_info
, true);
1992 /* As gimple_uid is used by the vectorizer in between vect_analyze_loop_form
1993 and destroy_loop_vec_info, we can set gimple_uid of reduc_phi stmts
1995 reduction_list
.traverse
<void *, set_reduc_phi_uids
> (NULL
);
1998 /* Try to initialize NITER for code generation part. */
2001 try_get_loop_niter (loop_p loop
, struct tree_niter_desc
*niter
)
2003 edge exit
= single_dom_exit (loop
);
2007 /* We need to know # of iterations, and there should be no uses of values
2008 defined inside loop outside of it, unless the values are invariants of
2010 if (!number_of_iterations_exit (loop
, exit
, niter
, false))
2012 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2013 fprintf (dump_file
, " FAILED: number of iterations not known\n");
2020 /* Try to initialize REDUCTION_LIST for code generation part.
2021 REDUCTION_LIST describes the reductions. */
2024 try_create_reduction_list (loop_p loop
,
2025 reduction_info_table_type reduction_list
)
2027 edge exit
= single_dom_exit (loop
);
2028 gimple_stmt_iterator gsi
;
2032 gather_scalar_reductions (loop
, reduction_list
);
2035 for (gsi
= gsi_start_phis (exit
->dest
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2037 gimple phi
= gsi_stmt (gsi
);
2038 struct reduction_info
*red
;
2039 imm_use_iterator imm_iter
;
2040 use_operand_p use_p
;
2042 tree val
= PHI_ARG_DEF_FROM_EDGE (phi
, exit
);
2044 if (!virtual_operand_p (val
))
2046 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2048 fprintf (dump_file
, "phi is ");
2049 print_gimple_stmt (dump_file
, phi
, 0, 0);
2050 fprintf (dump_file
, "arg of phi to exit: value ");
2051 print_generic_expr (dump_file
, val
, 0);
2052 fprintf (dump_file
, " used outside loop\n");
2054 " checking if it a part of reduction pattern: \n");
2056 if (reduction_list
.elements () == 0)
2058 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2060 " FAILED: it is not a part of reduction.\n");
2064 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, val
)
2066 if (!gimple_debug_bind_p (USE_STMT (use_p
))
2067 && flow_bb_inside_loop_p (loop
, gimple_bb (USE_STMT (use_p
))))
2069 reduc_phi
= USE_STMT (use_p
);
2073 red
= reduction_phi (reduction_list
, reduc_phi
);
2076 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2078 " FAILED: it is not a part of reduction.\n");
2081 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2083 fprintf (dump_file
, "reduction phi is ");
2084 print_gimple_stmt (dump_file
, red
->reduc_phi
, 0, 0);
2085 fprintf (dump_file
, "reduction stmt is ");
2086 print_gimple_stmt (dump_file
, red
->reduc_stmt
, 0, 0);
2091 /* The iterations of the loop may communicate only through bivs whose
2092 iteration space can be distributed efficiently. */
2093 for (gsi
= gsi_start_phis (loop
->header
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2095 gimple phi
= gsi_stmt (gsi
);
2096 tree def
= PHI_RESULT (phi
);
2099 if (!virtual_operand_p (def
) && !simple_iv (loop
, loop
, def
, &iv
, true))
2101 struct reduction_info
*red
;
2103 red
= reduction_phi (reduction_list
, phi
);
2106 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2108 " FAILED: scalar dependency between iterations\n");
2118 /* Detect parallel loops and generate parallel code using libgomp
2119 primitives. Returns true if some loop was parallelized, false
2123 parallelize_loops (void)
2125 unsigned n_threads
= flag_tree_parallelize_loops
;
2126 bool changed
= false;
2128 struct tree_niter_desc niter_desc
;
2130 reduction_info_table_type reduction_list
;
2131 struct obstack parloop_obstack
;
2132 HOST_WIDE_INT estimated
;
2135 /* Do not parallelize loops in the functions created by parallelization. */
2136 if (parallelized_function_p (cfun
->decl
))
2138 if (cfun
->has_nonlocal_label
)
2141 gcc_obstack_init (&parloop_obstack
);
2142 reduction_list
.create (10);
2143 init_stmt_vec_info_vec ();
2145 FOR_EACH_LOOP (li
, loop
, 0)
2147 reduction_list
.empty ();
2148 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2150 fprintf (dump_file
, "Trying loop %d as candidate\n",loop
->num
);
2152 fprintf (dump_file
, "loop %d is not innermost\n",loop
->num
);
2154 fprintf (dump_file
, "loop %d is innermost\n",loop
->num
);
2157 /* If we use autopar in graphite pass, we use its marked dependency
2158 checking results. */
2159 if (flag_loop_parallelize_all
&& !loop
->can_be_parallel
)
2161 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2162 fprintf (dump_file
, "loop is not parallel according to graphite\n");
2166 if (!single_dom_exit (loop
))
2169 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2170 fprintf (dump_file
, "loop is !single_dom_exit\n");
2175 if (/* And of course, the loop must be parallelizable. */
2176 !can_duplicate_loop_p (loop
)
2177 || loop_has_blocks_with_irreducible_flag (loop
)
2178 || (loop_preheader_edge (loop
)->src
->flags
& BB_IRREDUCIBLE_LOOP
)
2179 /* FIXME: the check for vector phi nodes could be removed. */
2180 || loop_has_vector_phi_nodes (loop
))
2183 estimated
= estimated_stmt_executions_int (loop
);
2184 if (estimated
== -1)
2185 estimated
= max_stmt_executions_int (loop
);
2186 /* FIXME: Bypass this check as graphite doesn't update the
2187 count and frequency correctly now. */
2188 if (!flag_loop_parallelize_all
2189 && ((estimated
!= -1
2190 && estimated
<= (HOST_WIDE_INT
) n_threads
* MIN_PER_THREAD
)
2191 /* Do not bother with loops in cold areas. */
2192 || optimize_loop_nest_for_size_p (loop
)))
2195 if (!try_get_loop_niter (loop
, &niter_desc
))
2198 if (!try_create_reduction_list (loop
, reduction_list
))
2201 if (!flag_loop_parallelize_all
2202 && !loop_parallel_p (loop
, &parloop_obstack
))
2206 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2209 fprintf (dump_file
, "parallelizing outer loop %d\n",loop
->header
->index
);
2211 fprintf (dump_file
, "parallelizing inner loop %d\n",loop
->header
->index
);
2212 loop_loc
= find_loop_location (loop
);
2213 if (loop_loc
!= UNKNOWN_LOC
)
2214 fprintf (dump_file
, "\nloop at %s:%d: ",
2215 LOC_FILE (loop_loc
), LOC_LINE (loop_loc
));
2217 gen_parallel_loop (loop
, reduction_list
,
2218 n_threads
, &niter_desc
);
2221 free_stmt_vec_info_vec ();
2222 reduction_list
.dispose ();
2223 obstack_free (&parloop_obstack
, NULL
);
2225 /* Parallelization will cause new function calls to be inserted through
2226 which local variables will escape. Reset the points-to solution
2229 pt_solution_reset (&cfun
->gimple_df
->escaped
);
2234 #include "gt-tree-parloops.h"