1 /* Loop autoparallelization.
2 Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012
3 Free Software Foundation, Inc.
4 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
5 Zdenek Dvorak <dvorakz@suse.cz> and Razya Ladelsky <razya@il.ibm.com>.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
26 #include "tree-flow.h"
28 #include "tree-data-ref.h"
29 #include "tree-scalar-evolution.h"
30 #include "gimple-pretty-print.h"
31 #include "tree-pass.h"
32 #include "langhooks.h"
33 #include "tree-vectorizer.h"
35 /* This pass tries to distribute iterations of loops into several threads.
36 The implementation is straightforward -- for each loop we test whether its
37 iterations are independent, and if it is the case (and some additional
38 conditions regarding profitability and correctness are satisfied), we
39 add GIMPLE_OMP_PARALLEL and GIMPLE_OMP_FOR codes and let omp expansion
42 The most of the complexity is in bringing the code into shape expected
44 -- for GIMPLE_OMP_FOR, ensuring that the loop has only one induction
45 variable and that the exit test is at the start of the loop body
46 -- for GIMPLE_OMP_PARALLEL, replacing the references to local addressable
47 variables by accesses through pointers, and breaking up ssa chains
48 by storing the values incoming to the parallelized loop to a structure
49 passed to the new function as an argument (something similar is done
50 in omp gimplification, unfortunately only a small part of the code
54 -- if there are several parallelizable loops in a function, it may be
55 possible to generate the threads just once (using synchronization to
56 ensure that cross-loop dependences are obeyed).
57 -- handling of common reduction patterns for outer loops.
59 More info can also be found at http://gcc.gnu.org/wiki/AutoParInGCC */
62 currently we use vect_force_simple_reduction() to detect reduction patterns.
63 The code transformation will be introduced by an example.
70 for (i = 0; i < N; i++)
80 # sum_29 = PHI <sum_11(5), 1(3)>
81 # i_28 = PHI <i_12(5), 0(3)>
84 sum_11 = D.1795_8 + sum_29;
92 # sum_21 = PHI <sum_11(4)>
93 printf (&"%d"[0], sum_21);
96 after reduction transformation (only relevant parts):
104 # Storing the initial value given by the user. #
106 .paral_data_store.32.sum.27 = 1;
108 #pragma omp parallel num_threads(4)
110 #pragma omp for schedule(static)
112 # The neutral element corresponding to the particular
113 reduction's operation, e.g. 0 for PLUS_EXPR,
114 1 for MULT_EXPR, etc. replaces the user's initial value. #
116 # sum.27_29 = PHI <sum.27_11, 0>
118 sum.27_11 = D.1827_8 + sum.27_29;
122 # Adding this reduction phi is done at create_phi_for_local_result() #
123 # sum.27_56 = PHI <sum.27_11, 0>
126 # Creating the atomic operation is done at
127 create_call_for_reduction_1() #
129 #pragma omp atomic_load
130 D.1839_59 = *&.paral_data_load.33_51->reduction.23;
131 D.1840_60 = sum.27_56 + D.1839_59;
132 #pragma omp atomic_store (D.1840_60);
136 # collecting the result after the join of the threads is done at
137 create_loads_for_reductions().
138 The value computed by the threads is loaded from the
142 .paral_data_load.33_52 = &.paral_data_store.32;
143 sum_37 = .paral_data_load.33_52->sum.27;
144 sum_43 = D.1795_41 + sum_37;
147 # sum_21 = PHI <sum_43, sum_26>
148 printf (&"%d"[0], sum_21);
156 /* Minimal number of iterations of a loop that should be executed in each
158 #define MIN_PER_THREAD 100
160 /* Element of the hashtable, representing a
161 reduction in the current loop. */
162 struct reduction_info
164 gimple reduc_stmt
; /* reduction statement. */
165 gimple reduc_phi
; /* The phi node defining the reduction. */
166 enum tree_code reduction_code
;/* code for the reduction operation. */
167 unsigned reduc_version
; /* SSA_NAME_VERSION of original reduc_phi
169 gimple keep_res
; /* The PHI_RESULT of this phi is the resulting value
170 of the reduction variable when existing the loop. */
171 tree initial_value
; /* The initial value of the reduction var before entering the loop. */
172 tree field
; /* the name of the field in the parloop data structure intended for reduction. */
173 tree init
; /* reduction initialization value. */
174 gimple new_phi
; /* (helper field) Newly created phi node whose result
175 will be passed to the atomic operation. Represents
176 the local result each thread computed for the reduction
180 /* Equality and hash functions for hashtab code. */
183 reduction_info_eq (const void *aa
, const void *bb
)
185 const struct reduction_info
*a
= (const struct reduction_info
*) aa
;
186 const struct reduction_info
*b
= (const struct reduction_info
*) bb
;
188 return (a
->reduc_phi
== b
->reduc_phi
);
192 reduction_info_hash (const void *aa
)
194 const struct reduction_info
*a
= (const struct reduction_info
*) aa
;
196 return a
->reduc_version
;
199 static struct reduction_info
*
200 reduction_phi (htab_t reduction_list
, gimple phi
)
202 struct reduction_info tmpred
, *red
;
204 if (htab_elements (reduction_list
) == 0 || phi
== NULL
)
207 tmpred
.reduc_phi
= phi
;
208 tmpred
.reduc_version
= gimple_uid (phi
);
209 red
= (struct reduction_info
*) htab_find (reduction_list
, &tmpred
);
214 /* Element of hashtable of names to copy. */
216 struct name_to_copy_elt
218 unsigned version
; /* The version of the name to copy. */
219 tree new_name
; /* The new name used in the copy. */
220 tree field
; /* The field of the structure used to pass the
224 /* Equality and hash functions for hashtab code. */
227 name_to_copy_elt_eq (const void *aa
, const void *bb
)
229 const struct name_to_copy_elt
*a
= (const struct name_to_copy_elt
*) aa
;
230 const struct name_to_copy_elt
*b
= (const struct name_to_copy_elt
*) bb
;
232 return a
->version
== b
->version
;
236 name_to_copy_elt_hash (const void *aa
)
238 const struct name_to_copy_elt
*a
= (const struct name_to_copy_elt
*) aa
;
240 return (hashval_t
) a
->version
;
243 /* A transformation matrix, which is a self-contained ROWSIZE x COLSIZE
244 matrix. Rather than use floats, we simply keep a single DENOMINATOR that
245 represents the denominator for every element in the matrix. */
246 typedef struct lambda_trans_matrix_s
248 lambda_matrix matrix
;
252 } *lambda_trans_matrix
;
253 #define LTM_MATRIX(T) ((T)->matrix)
254 #define LTM_ROWSIZE(T) ((T)->rowsize)
255 #define LTM_COLSIZE(T) ((T)->colsize)
256 #define LTM_DENOMINATOR(T) ((T)->denominator)
258 /* Allocate a new transformation matrix. */
260 static lambda_trans_matrix
261 lambda_trans_matrix_new (int colsize
, int rowsize
,
262 struct obstack
* lambda_obstack
)
264 lambda_trans_matrix ret
;
266 ret
= (lambda_trans_matrix
)
267 obstack_alloc (lambda_obstack
, sizeof (struct lambda_trans_matrix_s
));
268 LTM_MATRIX (ret
) = lambda_matrix_new (rowsize
, colsize
, lambda_obstack
);
269 LTM_ROWSIZE (ret
) = rowsize
;
270 LTM_COLSIZE (ret
) = colsize
;
271 LTM_DENOMINATOR (ret
) = 1;
275 /* Multiply a vector VEC by a matrix MAT.
276 MAT is an M*N matrix, and VEC is a vector with length N. The result
277 is stored in DEST which must be a vector of length M. */
280 lambda_matrix_vector_mult (lambda_matrix matrix
, int m
, int n
,
281 lambda_vector vec
, lambda_vector dest
)
285 lambda_vector_clear (dest
, m
);
286 for (i
= 0; i
< m
; i
++)
287 for (j
= 0; j
< n
; j
++)
288 dest
[i
] += matrix
[i
][j
] * vec
[j
];
291 /* Return true if TRANS is a legal transformation matrix that respects
292 the dependence vectors in DISTS and DIRS. The conservative answer
295 "Wolfe proves that a unimodular transformation represented by the
296 matrix T is legal when applied to a loop nest with a set of
297 lexicographically non-negative distance vectors RDG if and only if
298 for each vector d in RDG, (T.d >= 0) is lexicographically positive.
299 i.e.: if and only if it transforms the lexicographically positive
300 distance vectors to lexicographically positive vectors. Note that
301 a unimodular matrix must transform the zero vector (and only it) to
302 the zero vector." S.Muchnick. */
305 lambda_transform_legal_p (lambda_trans_matrix trans
,
307 vec
<ddr_p
> dependence_relations
)
310 lambda_vector distres
;
311 struct data_dependence_relation
*ddr
;
313 gcc_assert (LTM_COLSIZE (trans
) == nb_loops
314 && LTM_ROWSIZE (trans
) == nb_loops
);
316 /* When there are no dependences, the transformation is correct. */
317 if (dependence_relations
.length () == 0)
320 ddr
= dependence_relations
[0];
324 /* When there is an unknown relation in the dependence_relations, we
325 know that it is no worth looking at this loop nest: give up. */
326 if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
329 distres
= lambda_vector_new (nb_loops
);
331 /* For each distance vector in the dependence graph. */
332 FOR_EACH_VEC_ELT (dependence_relations
, i
, ddr
)
334 /* Don't care about relations for which we know that there is no
335 dependence, nor about read-read (aka. output-dependences):
336 these data accesses can happen in any order. */
337 if (DDR_ARE_DEPENDENT (ddr
) == chrec_known
338 || (DR_IS_READ (DDR_A (ddr
)) && DR_IS_READ (DDR_B (ddr
))))
341 /* Conservatively answer: "this transformation is not valid". */
342 if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
345 /* If the dependence could not be captured by a distance vector,
346 conservatively answer that the transform is not valid. */
347 if (DDR_NUM_DIST_VECTS (ddr
) == 0)
350 /* Compute trans.dist_vect */
351 for (j
= 0; j
< DDR_NUM_DIST_VECTS (ddr
); j
++)
353 lambda_matrix_vector_mult (LTM_MATRIX (trans
), nb_loops
, nb_loops
,
354 DDR_DIST_VECT (ddr
, j
), distres
);
356 if (!lambda_vector_lexico_pos (distres
, nb_loops
))
363 /* Data dependency analysis. Returns true if the iterations of LOOP
364 are independent on each other (that is, if we can execute them
368 loop_parallel_p (struct loop
*loop
, struct obstack
* parloop_obstack
)
370 vec
<loop_p
> loop_nest
;
371 vec
<ddr_p
> dependence_relations
;
372 vec
<data_reference_p
> datarefs
;
373 lambda_trans_matrix trans
;
376 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
378 fprintf (dump_file
, "Considering loop %d\n", loop
->num
);
380 fprintf (dump_file
, "loop is innermost\n");
382 fprintf (dump_file
, "loop NOT innermost\n");
385 /* Check for problems with dependences. If the loop can be reversed,
386 the iterations are independent. */
387 datarefs
.create (10);
388 dependence_relations
.create (10 * 10);
389 loop_nest
.create (3);
390 if (! compute_data_dependences_for_loop (loop
, true, &loop_nest
, &datarefs
,
391 &dependence_relations
))
393 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
394 fprintf (dump_file
, " FAILED: cannot analyze data dependencies\n");
398 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
399 dump_data_dependence_relations (dump_file
, dependence_relations
);
401 trans
= lambda_trans_matrix_new (1, 1, parloop_obstack
);
402 LTM_MATRIX (trans
)[0][0] = -1;
404 if (lambda_transform_legal_p (trans
, 1, dependence_relations
))
407 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
408 fprintf (dump_file
, " SUCCESS: may be parallelized\n");
410 else if (dump_file
&& (dump_flags
& TDF_DETAILS
))
412 " FAILED: data dependencies exist across iterations\n");
415 loop_nest
.release ();
416 free_dependence_relations (dependence_relations
);
417 free_data_refs (datarefs
);
422 /* Return true when LOOP contains basic blocks marked with the
423 BB_IRREDUCIBLE_LOOP flag. */
426 loop_has_blocks_with_irreducible_flag (struct loop
*loop
)
429 basic_block
*bbs
= get_loop_body_in_dom_order (loop
);
432 for (i
= 0; i
< loop
->num_nodes
; i
++)
433 if (bbs
[i
]->flags
& BB_IRREDUCIBLE_LOOP
)
442 /* Assigns the address of OBJ in TYPE to an ssa name, and returns this name.
443 The assignment statement is placed on edge ENTRY. DECL_ADDRESS maps decls
444 to their addresses that can be reused. The address of OBJ is known to
445 be invariant in the whole function. Other needed statements are placed
449 take_address_of (tree obj
, tree type
, edge entry
, htab_t decl_address
,
450 gimple_stmt_iterator
*gsi
)
454 struct int_tree_map ielt
, *nielt
;
455 tree
*var_p
, name
, addr
;
459 /* Since the address of OBJ is invariant, the trees may be shared.
460 Avoid rewriting unrelated parts of the code. */
461 obj
= unshare_expr (obj
);
463 handled_component_p (*var_p
);
464 var_p
= &TREE_OPERAND (*var_p
, 0))
467 /* Canonicalize the access to base on a MEM_REF. */
469 *var_p
= build_simple_mem_ref (build_fold_addr_expr (*var_p
));
471 /* Assign a canonical SSA name to the address of the base decl used
472 in the address and share it for all accesses and addresses based
474 uid
= DECL_UID (TREE_OPERAND (TREE_OPERAND (*var_p
, 0), 0));
476 dslot
= htab_find_slot_with_hash (decl_address
, &ielt
, uid
, INSERT
);
481 addr
= TREE_OPERAND (*var_p
, 0);
482 name
= make_temp_ssa_name (TREE_TYPE (addr
), NULL
,
483 get_name (TREE_OPERAND
484 (TREE_OPERAND (*var_p
, 0), 0)));
485 stmt
= gimple_build_assign (name
, addr
);
486 gsi_insert_on_edge_immediate (entry
, stmt
);
488 nielt
= XNEW (struct int_tree_map
);
494 name
= ((struct int_tree_map
*) *dslot
)->to
;
496 /* Express the address in terms of the canonical SSA name. */
497 TREE_OPERAND (*var_p
, 0) = name
;
499 return build_fold_addr_expr_with_type (obj
, type
);
501 name
= force_gimple_operand (build_addr (obj
, current_function_decl
),
502 &stmts
, true, NULL_TREE
);
503 if (!gimple_seq_empty_p (stmts
))
504 gsi_insert_seq_before (gsi
, stmts
, GSI_SAME_STMT
);
506 if (!useless_type_conversion_p (type
, TREE_TYPE (name
)))
508 name
= force_gimple_operand (fold_convert (type
, name
), &stmts
, true,
510 if (!gimple_seq_empty_p (stmts
))
511 gsi_insert_seq_before (gsi
, stmts
, GSI_SAME_STMT
);
517 /* Callback for htab_traverse. Create the initialization statement
518 for reduction described in SLOT, and place it at the preheader of
519 the loop described in DATA. */
522 initialize_reductions (void **slot
, void *data
)
525 tree bvar
, type
, arg
;
528 struct reduction_info
*const reduc
= (struct reduction_info
*) *slot
;
529 struct loop
*loop
= (struct loop
*) data
;
531 /* Create initialization in preheader:
532 reduction_variable = initialization value of reduction. */
534 /* In the phi node at the header, replace the argument coming
535 from the preheader with the reduction initialization value. */
537 /* Create a new variable to initialize the reduction. */
538 type
= TREE_TYPE (PHI_RESULT (reduc
->reduc_phi
));
539 bvar
= create_tmp_var (type
, "reduction");
541 c
= build_omp_clause (gimple_location (reduc
->reduc_stmt
),
542 OMP_CLAUSE_REDUCTION
);
543 OMP_CLAUSE_REDUCTION_CODE (c
) = reduc
->reduction_code
;
544 OMP_CLAUSE_DECL (c
) = SSA_NAME_VAR (gimple_assign_lhs (reduc
->reduc_stmt
));
546 init
= omp_reduction_init (c
, TREE_TYPE (bvar
));
549 /* Replace the argument representing the initialization value
550 with the initialization value for the reduction (neutral
551 element for the particular operation, e.g. 0 for PLUS_EXPR,
552 1 for MULT_EXPR, etc).
553 Keep the old value in a new variable "reduction_initial",
554 that will be taken in consideration after the parallel
555 computing is done. */
557 e
= loop_preheader_edge (loop
);
558 arg
= PHI_ARG_DEF_FROM_EDGE (reduc
->reduc_phi
, e
);
559 /* Create new variable to hold the initial value. */
561 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE
562 (reduc
->reduc_phi
, loop_preheader_edge (loop
)), init
);
563 reduc
->initial_value
= arg
;
569 struct walk_stmt_info info
;
572 gimple_stmt_iterator
*gsi
;
577 /* Eliminates references to local variables in *TP out of the single
578 entry single exit region starting at DTA->ENTRY.
579 DECL_ADDRESS contains addresses of the references that had their
580 address taken already. If the expression is changed, CHANGED is
581 set to true. Callback for walk_tree. */
584 eliminate_local_variables_1 (tree
*tp
, int *walk_subtrees
, void *data
)
586 struct elv_data
*const dta
= (struct elv_data
*) data
;
587 tree t
= *tp
, var
, addr
, addr_type
, type
, obj
;
593 if (!SSA_VAR_P (t
) || DECL_EXTERNAL (t
))
596 type
= TREE_TYPE (t
);
597 addr_type
= build_pointer_type (type
);
598 addr
= take_address_of (t
, addr_type
, dta
->entry
, dta
->decl_address
,
600 if (dta
->gsi
== NULL
&& addr
== NULL_TREE
)
606 *tp
= build_simple_mem_ref (addr
);
612 if (TREE_CODE (t
) == ADDR_EXPR
)
614 /* ADDR_EXPR may appear in two contexts:
615 -- as a gimple operand, when the address taken is a function invariant
616 -- as gimple rhs, when the resulting address in not a function
618 We do not need to do anything special in the latter case (the base of
619 the memory reference whose address is taken may be replaced in the
620 DECL_P case). The former case is more complicated, as we need to
621 ensure that the new address is still a gimple operand. Thus, it
622 is not sufficient to replace just the base of the memory reference --
623 we need to move the whole computation of the address out of the
625 if (!is_gimple_val (t
))
629 obj
= TREE_OPERAND (t
, 0);
630 var
= get_base_address (obj
);
631 if (!var
|| !SSA_VAR_P (var
) || DECL_EXTERNAL (var
))
634 addr_type
= TREE_TYPE (t
);
635 addr
= take_address_of (obj
, addr_type
, dta
->entry
, dta
->decl_address
,
637 if (dta
->gsi
== NULL
&& addr
== NULL_TREE
)
654 /* Moves the references to local variables in STMT at *GSI out of the single
655 entry single exit region starting at ENTRY. DECL_ADDRESS contains
656 addresses of the references that had their address taken
660 eliminate_local_variables_stmt (edge entry
, gimple_stmt_iterator
*gsi
,
664 gimple stmt
= gsi_stmt (*gsi
);
666 memset (&dta
.info
, '\0', sizeof (dta
.info
));
668 dta
.decl_address
= decl_address
;
672 if (gimple_debug_bind_p (stmt
))
675 walk_tree (gimple_debug_bind_get_value_ptr (stmt
),
676 eliminate_local_variables_1
, &dta
.info
, NULL
);
679 gimple_debug_bind_reset_value (stmt
);
686 walk_gimple_op (stmt
, eliminate_local_variables_1
, &dta
.info
);
693 /* Eliminates the references to local variables from the single entry
694 single exit region between the ENTRY and EXIT edges.
697 1) Taking address of a local variable -- these are moved out of the
698 region (and temporary variable is created to hold the address if
701 2) Dereferencing a local variable -- these are replaced with indirect
705 eliminate_local_variables (edge entry
, edge exit
)
708 vec
<basic_block
> body
;
711 gimple_stmt_iterator gsi
;
712 bool has_debug_stmt
= false;
713 htab_t decl_address
= htab_create (10, int_tree_map_hash
, int_tree_map_eq
,
715 basic_block entry_bb
= entry
->src
;
716 basic_block exit_bb
= exit
->dest
;
718 gather_blocks_in_sese_region (entry_bb
, exit_bb
, &body
);
720 FOR_EACH_VEC_ELT (body
, i
, bb
)
721 if (bb
!= entry_bb
&& bb
!= exit_bb
)
722 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
723 if (is_gimple_debug (gsi_stmt (gsi
)))
725 if (gimple_debug_bind_p (gsi_stmt (gsi
)))
726 has_debug_stmt
= true;
729 eliminate_local_variables_stmt (entry
, &gsi
, decl_address
);
732 FOR_EACH_VEC_ELT (body
, i
, bb
)
733 if (bb
!= entry_bb
&& bb
!= exit_bb
)
734 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
735 if (gimple_debug_bind_p (gsi_stmt (gsi
)))
736 eliminate_local_variables_stmt (entry
, &gsi
, decl_address
);
738 htab_delete (decl_address
);
742 /* Returns true if expression EXPR is not defined between ENTRY and
743 EXIT, i.e. if all its operands are defined outside of the region. */
746 expr_invariant_in_region_p (edge entry
, edge exit
, tree expr
)
748 basic_block entry_bb
= entry
->src
;
749 basic_block exit_bb
= exit
->dest
;
752 if (is_gimple_min_invariant (expr
))
755 if (TREE_CODE (expr
) == SSA_NAME
)
757 def_bb
= gimple_bb (SSA_NAME_DEF_STMT (expr
));
759 && dominated_by_p (CDI_DOMINATORS
, def_bb
, entry_bb
)
760 && !dominated_by_p (CDI_DOMINATORS
, def_bb
, exit_bb
))
769 /* If COPY_NAME_P is true, creates and returns a duplicate of NAME.
770 The copies are stored to NAME_COPIES, if NAME was already duplicated,
771 its duplicate stored in NAME_COPIES is returned.
773 Regardless of COPY_NAME_P, the decl used as a base of the ssa name is also
774 duplicated, storing the copies in DECL_COPIES. */
777 separate_decls_in_region_name (tree name
,
778 htab_t name_copies
, htab_t decl_copies
,
781 tree copy
, var
, var_copy
;
782 unsigned idx
, uid
, nuid
;
783 struct int_tree_map ielt
, *nielt
;
784 struct name_to_copy_elt elt
, *nelt
;
785 void **slot
, **dslot
;
787 if (TREE_CODE (name
) != SSA_NAME
)
790 idx
= SSA_NAME_VERSION (name
);
792 slot
= htab_find_slot_with_hash (name_copies
, &elt
, idx
,
793 copy_name_p
? INSERT
: NO_INSERT
);
795 return ((struct name_to_copy_elt
*) *slot
)->new_name
;
799 copy
= duplicate_ssa_name (name
, NULL
);
800 nelt
= XNEW (struct name_to_copy_elt
);
802 nelt
->new_name
= copy
;
803 nelt
->field
= NULL_TREE
;
812 var
= SSA_NAME_VAR (name
);
816 uid
= DECL_UID (var
);
818 dslot
= htab_find_slot_with_hash (decl_copies
, &ielt
, uid
, INSERT
);
821 var_copy
= create_tmp_var (TREE_TYPE (var
), get_name (var
));
822 DECL_GIMPLE_REG_P (var_copy
) = DECL_GIMPLE_REG_P (var
);
823 nielt
= XNEW (struct int_tree_map
);
825 nielt
->to
= var_copy
;
828 /* Ensure that when we meet this decl next time, we won't duplicate
830 nuid
= DECL_UID (var_copy
);
832 dslot
= htab_find_slot_with_hash (decl_copies
, &ielt
, nuid
, INSERT
);
833 gcc_assert (!*dslot
);
834 nielt
= XNEW (struct int_tree_map
);
836 nielt
->to
= var_copy
;
840 var_copy
= ((struct int_tree_map
*) *dslot
)->to
;
842 replace_ssa_name_symbol (copy
, var_copy
);
846 /* Finds the ssa names used in STMT that are defined outside the
847 region between ENTRY and EXIT and replaces such ssa names with
848 their duplicates. The duplicates are stored to NAME_COPIES. Base
849 decls of all ssa names used in STMT (including those defined in
850 LOOP) are replaced with the new temporary variables; the
851 replacement decls are stored in DECL_COPIES. */
854 separate_decls_in_region_stmt (edge entry
, edge exit
, gimple stmt
,
855 htab_t name_copies
, htab_t decl_copies
)
863 FOR_EACH_PHI_OR_STMT_DEF (def
, stmt
, oi
, SSA_OP_DEF
)
865 name
= DEF_FROM_PTR (def
);
866 gcc_assert (TREE_CODE (name
) == SSA_NAME
);
867 copy
= separate_decls_in_region_name (name
, name_copies
, decl_copies
,
869 gcc_assert (copy
== name
);
872 FOR_EACH_PHI_OR_STMT_USE (use
, stmt
, oi
, SSA_OP_USE
)
874 name
= USE_FROM_PTR (use
);
875 if (TREE_CODE (name
) != SSA_NAME
)
878 copy_name_p
= expr_invariant_in_region_p (entry
, exit
, name
);
879 copy
= separate_decls_in_region_name (name
, name_copies
, decl_copies
,
885 /* Finds the ssa names used in STMT that are defined outside the
886 region between ENTRY and EXIT and replaces such ssa names with
887 their duplicates. The duplicates are stored to NAME_COPIES. Base
888 decls of all ssa names used in STMT (including those defined in
889 LOOP) are replaced with the new temporary variables; the
890 replacement decls are stored in DECL_COPIES. */
893 separate_decls_in_region_debug (gimple stmt
, htab_t name_copies
,
899 struct int_tree_map ielt
;
900 struct name_to_copy_elt elt
;
901 void **slot
, **dslot
;
903 if (gimple_debug_bind_p (stmt
))
904 var
= gimple_debug_bind_get_var (stmt
);
905 else if (gimple_debug_source_bind_p (stmt
))
906 var
= gimple_debug_source_bind_get_var (stmt
);
909 if (TREE_CODE (var
) == DEBUG_EXPR_DECL
|| TREE_CODE (var
) == LABEL_DECL
)
911 gcc_assert (DECL_P (var
) && SSA_VAR_P (var
));
912 ielt
.uid
= DECL_UID (var
);
913 dslot
= htab_find_slot_with_hash (decl_copies
, &ielt
, ielt
.uid
, NO_INSERT
);
916 if (gimple_debug_bind_p (stmt
))
917 gimple_debug_bind_set_var (stmt
, ((struct int_tree_map
*) *dslot
)->to
);
918 else if (gimple_debug_source_bind_p (stmt
))
919 gimple_debug_source_bind_set_var (stmt
, ((struct int_tree_map
*) *dslot
)->to
);
921 FOR_EACH_PHI_OR_STMT_USE (use
, stmt
, oi
, SSA_OP_USE
)
923 name
= USE_FROM_PTR (use
);
924 if (TREE_CODE (name
) != SSA_NAME
)
927 elt
.version
= SSA_NAME_VERSION (name
);
928 slot
= htab_find_slot_with_hash (name_copies
, &elt
, elt
.version
, NO_INSERT
);
931 gimple_debug_bind_reset_value (stmt
);
936 SET_USE (use
, ((struct name_to_copy_elt
*) *slot
)->new_name
);
942 /* Callback for htab_traverse. Adds a field corresponding to the reduction
943 specified in SLOT. The type is passed in DATA. */
946 add_field_for_reduction (void **slot
, void *data
)
949 struct reduction_info
*const red
= (struct reduction_info
*) *slot
;
950 tree
const type
= (tree
) data
;
951 tree var
= SSA_NAME_VAR (gimple_assign_lhs (red
->reduc_stmt
));
952 tree field
= build_decl (gimple_location (red
->reduc_stmt
),
953 FIELD_DECL
, DECL_NAME (var
), TREE_TYPE (var
));
955 insert_field_into_struct (type
, field
);
962 /* Callback for htab_traverse. Adds a field corresponding to a ssa name
963 described in SLOT. The type is passed in DATA. */
966 add_field_for_name (void **slot
, void *data
)
968 struct name_to_copy_elt
*const elt
= (struct name_to_copy_elt
*) *slot
;
969 tree type
= (tree
) data
;
970 tree name
= ssa_name (elt
->version
);
971 tree field
= build_decl (UNKNOWN_LOCATION
,
972 FIELD_DECL
, SSA_NAME_IDENTIFIER (name
),
975 insert_field_into_struct (type
, field
);
981 /* Callback for htab_traverse. A local result is the intermediate result
983 thread, or the initial value in case no iteration was executed.
984 This function creates a phi node reflecting these values.
985 The phi's result will be stored in NEW_PHI field of the
986 reduction's data structure. */
989 create_phi_for_local_result (void **slot
, void *data
)
991 struct reduction_info
*const reduc
= (struct reduction_info
*) *slot
;
992 const struct loop
*const loop
= (const struct loop
*) data
;
995 basic_block store_bb
;
997 source_location locus
;
999 /* STORE_BB is the block where the phi
1000 should be stored. It is the destination of the loop exit.
1001 (Find the fallthru edge from GIMPLE_OMP_CONTINUE). */
1002 store_bb
= FALLTHRU_EDGE (loop
->latch
)->dest
;
1004 /* STORE_BB has two predecessors. One coming from the loop
1005 (the reduction's result is computed at the loop),
1006 and another coming from a block preceding the loop,
1008 are executed (the initial value should be taken). */
1009 if (EDGE_PRED (store_bb
, 0) == FALLTHRU_EDGE (loop
->latch
))
1010 e
= EDGE_PRED (store_bb
, 1);
1012 e
= EDGE_PRED (store_bb
, 0);
1013 local_res
= copy_ssa_name (gimple_assign_lhs (reduc
->reduc_stmt
), NULL
);
1014 locus
= gimple_location (reduc
->reduc_stmt
);
1015 new_phi
= create_phi_node (local_res
, store_bb
);
1016 add_phi_arg (new_phi
, reduc
->init
, e
, locus
);
1017 add_phi_arg (new_phi
, gimple_assign_lhs (reduc
->reduc_stmt
),
1018 FALLTHRU_EDGE (loop
->latch
), locus
);
1019 reduc
->new_phi
= new_phi
;
1029 basic_block store_bb
;
1030 basic_block load_bb
;
1033 /* Callback for htab_traverse. Create an atomic instruction for the
1034 reduction described in SLOT.
1035 DATA annotates the place in memory the atomic operation relates to,
1036 and the basic block it needs to be generated in. */
1039 create_call_for_reduction_1 (void **slot
, void *data
)
1041 struct reduction_info
*const reduc
= (struct reduction_info
*) *slot
;
1042 struct clsn_data
*const clsn_data
= (struct clsn_data
*) data
;
1043 gimple_stmt_iterator gsi
;
1044 tree type
= TREE_TYPE (PHI_RESULT (reduc
->reduc_phi
));
1049 tree t
, addr
, ref
, x
;
1050 tree tmp_load
, name
;
1053 load_struct
= build_simple_mem_ref (clsn_data
->load
);
1054 t
= build3 (COMPONENT_REF
, type
, load_struct
, reduc
->field
, NULL_TREE
);
1056 addr
= build_addr (t
, current_function_decl
);
1058 /* Create phi node. */
1059 bb
= clsn_data
->load_bb
;
1061 e
= split_block (bb
, t
);
1064 tmp_load
= create_tmp_var (TREE_TYPE (TREE_TYPE (addr
)), NULL
);
1065 tmp_load
= make_ssa_name (tmp_load
, NULL
);
1066 load
= gimple_build_omp_atomic_load (tmp_load
, addr
);
1067 SSA_NAME_DEF_STMT (tmp_load
) = load
;
1068 gsi
= gsi_start_bb (new_bb
);
1069 gsi_insert_after (&gsi
, load
, GSI_NEW_STMT
);
1071 e
= split_block (new_bb
, load
);
1073 gsi
= gsi_start_bb (new_bb
);
1075 x
= fold_build2 (reduc
->reduction_code
,
1076 TREE_TYPE (PHI_RESULT (reduc
->new_phi
)), ref
,
1077 PHI_RESULT (reduc
->new_phi
));
1079 name
= force_gimple_operand_gsi (&gsi
, x
, true, NULL_TREE
, true,
1080 GSI_CONTINUE_LINKING
);
1082 gsi_insert_after (&gsi
, gimple_build_omp_atomic_store (name
), GSI_NEW_STMT
);
1086 /* Create the atomic operation at the join point of the threads.
1087 REDUCTION_LIST describes the reductions in the LOOP.
1088 LD_ST_DATA describes the shared data structure where
1089 shared data is stored in and loaded from. */
1091 create_call_for_reduction (struct loop
*loop
, htab_t reduction_list
,
1092 struct clsn_data
*ld_st_data
)
1094 htab_traverse (reduction_list
, create_phi_for_local_result
, loop
);
1095 /* Find the fallthru edge from GIMPLE_OMP_CONTINUE. */
1096 ld_st_data
->load_bb
= FALLTHRU_EDGE (loop
->latch
)->dest
;
1097 htab_traverse (reduction_list
, create_call_for_reduction_1
, ld_st_data
);
1100 /* Callback for htab_traverse. Loads the final reduction value at the
1101 join point of all threads, and inserts it in the right place. */
1104 create_loads_for_reductions (void **slot
, void *data
)
1106 struct reduction_info
*const red
= (struct reduction_info
*) *slot
;
1107 struct clsn_data
*const clsn_data
= (struct clsn_data
*) data
;
1109 gimple_stmt_iterator gsi
;
1110 tree type
= TREE_TYPE (gimple_assign_lhs (red
->reduc_stmt
));
1115 gsi
= gsi_after_labels (clsn_data
->load_bb
);
1116 load_struct
= build_simple_mem_ref (clsn_data
->load
);
1117 load_struct
= build3 (COMPONENT_REF
, type
, load_struct
, red
->field
,
1121 name
= PHI_RESULT (red
->keep_res
);
1122 stmt
= gimple_build_assign (name
, x
);
1123 SSA_NAME_DEF_STMT (name
) = stmt
;
1125 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1127 for (gsi
= gsi_start_phis (gimple_bb (red
->keep_res
));
1128 !gsi_end_p (gsi
); gsi_next (&gsi
))
1129 if (gsi_stmt (gsi
) == red
->keep_res
)
1131 remove_phi_node (&gsi
, false);
1137 /* Load the reduction result that was stored in LD_ST_DATA.
1138 REDUCTION_LIST describes the list of reductions that the
1139 loads should be generated for. */
1141 create_final_loads_for_reduction (htab_t reduction_list
,
1142 struct clsn_data
*ld_st_data
)
1144 gimple_stmt_iterator gsi
;
1148 gsi
= gsi_after_labels (ld_st_data
->load_bb
);
1149 t
= build_fold_addr_expr (ld_st_data
->store
);
1150 stmt
= gimple_build_assign (ld_st_data
->load
, t
);
1152 gsi_insert_before (&gsi
, stmt
, GSI_NEW_STMT
);
1153 SSA_NAME_DEF_STMT (ld_st_data
->load
) = stmt
;
1155 htab_traverse (reduction_list
, create_loads_for_reductions
, ld_st_data
);
1159 /* Callback for htab_traverse. Store the neutral value for the
1160 particular reduction's operation, e.g. 0 for PLUS_EXPR,
1161 1 for MULT_EXPR, etc. into the reduction field.
1162 The reduction is specified in SLOT. The store information is
1166 create_stores_for_reduction (void **slot
, void *data
)
1168 struct reduction_info
*const red
= (struct reduction_info
*) *slot
;
1169 struct clsn_data
*const clsn_data
= (struct clsn_data
*) data
;
1172 gimple_stmt_iterator gsi
;
1173 tree type
= TREE_TYPE (gimple_assign_lhs (red
->reduc_stmt
));
1175 gsi
= gsi_last_bb (clsn_data
->store_bb
);
1176 t
= build3 (COMPONENT_REF
, type
, clsn_data
->store
, red
->field
, NULL_TREE
);
1177 stmt
= gimple_build_assign (t
, red
->initial_value
);
1178 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1183 /* Callback for htab_traverse. Creates loads to a field of LOAD in LOAD_BB and
1184 store to a field of STORE in STORE_BB for the ssa name and its duplicate
1185 specified in SLOT. */
1188 create_loads_and_stores_for_name (void **slot
, void *data
)
1190 struct name_to_copy_elt
*const elt
= (struct name_to_copy_elt
*) *slot
;
1191 struct clsn_data
*const clsn_data
= (struct clsn_data
*) data
;
1194 gimple_stmt_iterator gsi
;
1195 tree type
= TREE_TYPE (elt
->new_name
);
1198 gsi
= gsi_last_bb (clsn_data
->store_bb
);
1199 t
= build3 (COMPONENT_REF
, type
, clsn_data
->store
, elt
->field
, NULL_TREE
);
1200 stmt
= gimple_build_assign (t
, ssa_name (elt
->version
));
1201 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1203 gsi
= gsi_last_bb (clsn_data
->load_bb
);
1204 load_struct
= build_simple_mem_ref (clsn_data
->load
);
1205 t
= build3 (COMPONENT_REF
, type
, load_struct
, elt
->field
, NULL_TREE
);
1206 stmt
= gimple_build_assign (elt
->new_name
, t
);
1207 SSA_NAME_DEF_STMT (elt
->new_name
) = stmt
;
1208 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1213 /* Moves all the variables used in LOOP and defined outside of it (including
1214 the initial values of loop phi nodes, and *PER_THREAD if it is a ssa
1215 name) to a structure created for this purpose. The code
1223 is transformed this way:
1238 `old' is stored to *ARG_STRUCT and `new' is stored to NEW_ARG_STRUCT. The
1239 pointer `new' is intentionally not initialized (the loop will be split to a
1240 separate function later, and `new' will be initialized from its arguments).
1241 LD_ST_DATA holds information about the shared data structure used to pass
1242 information among the threads. It is initialized here, and
1243 gen_parallel_loop will pass it to create_call_for_reduction that
1244 needs this information. REDUCTION_LIST describes the reductions
1248 separate_decls_in_region (edge entry
, edge exit
, htab_t reduction_list
,
1249 tree
*arg_struct
, tree
*new_arg_struct
,
1250 struct clsn_data
*ld_st_data
)
1253 basic_block bb1
= split_edge (entry
);
1254 basic_block bb0
= single_pred (bb1
);
1255 htab_t name_copies
= htab_create (10, name_to_copy_elt_hash
,
1256 name_to_copy_elt_eq
, free
);
1257 htab_t decl_copies
= htab_create (10, int_tree_map_hash
, int_tree_map_eq
,
1260 tree type
, type_name
, nvar
;
1261 gimple_stmt_iterator gsi
;
1262 struct clsn_data clsn_data
;
1263 vec
<basic_block
> body
;
1266 basic_block entry_bb
= bb1
;
1267 basic_block exit_bb
= exit
->dest
;
1268 bool has_debug_stmt
= false;
1270 entry
= single_succ_edge (entry_bb
);
1271 gather_blocks_in_sese_region (entry_bb
, exit_bb
, &body
);
1273 FOR_EACH_VEC_ELT (body
, i
, bb
)
1275 if (bb
!= entry_bb
&& bb
!= exit_bb
)
1277 for (gsi
= gsi_start_phis (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1278 separate_decls_in_region_stmt (entry
, exit
, gsi_stmt (gsi
),
1279 name_copies
, decl_copies
);
1281 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1283 gimple stmt
= gsi_stmt (gsi
);
1285 if (is_gimple_debug (stmt
))
1286 has_debug_stmt
= true;
1288 separate_decls_in_region_stmt (entry
, exit
, stmt
,
1289 name_copies
, decl_copies
);
1294 /* Now process debug bind stmts. We must not create decls while
1295 processing debug stmts, so we defer their processing so as to
1296 make sure we will have debug info for as many variables as
1297 possible (all of those that were dealt with in the loop above),
1298 and discard those for which we know there's nothing we can
1301 FOR_EACH_VEC_ELT (body
, i
, bb
)
1302 if (bb
!= entry_bb
&& bb
!= exit_bb
)
1304 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
);)
1306 gimple stmt
= gsi_stmt (gsi
);
1308 if (is_gimple_debug (stmt
))
1310 if (separate_decls_in_region_debug (stmt
, name_copies
,
1313 gsi_remove (&gsi
, true);
1324 if (htab_elements (name_copies
) == 0 && htab_elements (reduction_list
) == 0)
1326 /* It may happen that there is nothing to copy (if there are only
1327 loop carried and external variables in the loop). */
1329 *new_arg_struct
= NULL
;
1333 /* Create the type for the structure to store the ssa names to. */
1334 type
= lang_hooks
.types
.make_type (RECORD_TYPE
);
1335 type_name
= build_decl (UNKNOWN_LOCATION
,
1336 TYPE_DECL
, create_tmp_var_name (".paral_data"),
1338 TYPE_NAME (type
) = type_name
;
1340 htab_traverse (name_copies
, add_field_for_name
, type
);
1341 if (reduction_list
&& htab_elements (reduction_list
) > 0)
1343 /* Create the fields for reductions. */
1344 htab_traverse (reduction_list
, add_field_for_reduction
,
1349 /* Create the loads and stores. */
1350 *arg_struct
= create_tmp_var (type
, ".paral_data_store");
1351 nvar
= create_tmp_var (build_pointer_type (type
), ".paral_data_load");
1352 *new_arg_struct
= make_ssa_name (nvar
, NULL
);
1354 ld_st_data
->store
= *arg_struct
;
1355 ld_st_data
->load
= *new_arg_struct
;
1356 ld_st_data
->store_bb
= bb0
;
1357 ld_st_data
->load_bb
= bb1
;
1359 htab_traverse (name_copies
, create_loads_and_stores_for_name
,
1362 /* Load the calculation from memory (after the join of the threads). */
1364 if (reduction_list
&& htab_elements (reduction_list
) > 0)
1366 htab_traverse (reduction_list
, create_stores_for_reduction
,
1368 clsn_data
.load
= make_ssa_name (nvar
, NULL
);
1369 clsn_data
.load_bb
= exit
->dest
;
1370 clsn_data
.store
= ld_st_data
->store
;
1371 create_final_loads_for_reduction (reduction_list
, &clsn_data
);
1375 htab_delete (decl_copies
);
1376 htab_delete (name_copies
);
1379 /* Bitmap containing uids of functions created by parallelization. We cannot
1380 allocate it from the default obstack, as it must live across compilation
1381 of several functions; we make it gc allocated instead. */
1383 static GTY(()) bitmap parallelized_functions
;
1385 /* Returns true if FN was created by create_loop_fn. */
1388 parallelized_function_p (tree fn
)
1390 if (!parallelized_functions
|| !DECL_ARTIFICIAL (fn
))
1393 return bitmap_bit_p (parallelized_functions
, DECL_UID (fn
));
1396 /* Creates and returns an empty function that will receive the body of
1397 a parallelized loop. */
1400 create_loop_fn (location_t loc
)
1404 tree decl
, type
, name
, t
;
1405 struct function
*act_cfun
= cfun
;
1406 static unsigned loopfn_num
;
1408 loc
= LOCATION_LOCUS (loc
);
1409 snprintf (buf
, 100, "%s.$loopfn", current_function_name ());
1410 ASM_FORMAT_PRIVATE_NAME (tname
, buf
, loopfn_num
++);
1411 clean_symbol_name (tname
);
1412 name
= get_identifier (tname
);
1413 type
= build_function_type_list (void_type_node
, ptr_type_node
, NULL_TREE
);
1415 decl
= build_decl (loc
, FUNCTION_DECL
, name
, type
);
1416 if (!parallelized_functions
)
1417 parallelized_functions
= BITMAP_GGC_ALLOC ();
1418 bitmap_set_bit (parallelized_functions
, DECL_UID (decl
));
1420 TREE_STATIC (decl
) = 1;
1421 TREE_USED (decl
) = 1;
1422 DECL_ARTIFICIAL (decl
) = 1;
1423 DECL_IGNORED_P (decl
) = 0;
1424 TREE_PUBLIC (decl
) = 0;
1425 DECL_UNINLINABLE (decl
) = 1;
1426 DECL_EXTERNAL (decl
) = 0;
1427 DECL_CONTEXT (decl
) = NULL_TREE
;
1428 DECL_INITIAL (decl
) = make_node (BLOCK
);
1430 t
= build_decl (loc
, RESULT_DECL
, NULL_TREE
, void_type_node
);
1431 DECL_ARTIFICIAL (t
) = 1;
1432 DECL_IGNORED_P (t
) = 1;
1433 DECL_RESULT (decl
) = t
;
1435 t
= build_decl (loc
, PARM_DECL
, get_identifier (".paral_data_param"),
1437 DECL_ARTIFICIAL (t
) = 1;
1438 DECL_ARG_TYPE (t
) = ptr_type_node
;
1439 DECL_CONTEXT (t
) = decl
;
1441 DECL_ARGUMENTS (decl
) = t
;
1443 allocate_struct_function (decl
, false);
1445 /* The call to allocate_struct_function clobbers CFUN, so we need to restore
1447 set_cfun (act_cfun
);
1452 /* Moves the exit condition of LOOP to the beginning of its header, and
1453 duplicates the part of the last iteration that gets disabled to the
1454 exit of the loop. NIT is the number of iterations of the loop
1455 (used to initialize the variables in the duplicated part).
1457 TODO: the common case is that latch of the loop is empty and immediately
1458 follows the loop exit. In this case, it would be better not to copy the
1459 body of the loop, but only move the entry of the loop directly before the
1460 exit check and increase the number of iterations of the loop by one.
1461 This may need some additional preconditioning in case NIT = ~0.
1462 REDUCTION_LIST describes the reductions in LOOP. */
1465 transform_to_exit_first_loop (struct loop
*loop
, htab_t reduction_list
, tree nit
)
1467 basic_block
*bbs
, *nbbs
, ex_bb
, orig_header
;
1470 edge exit
= single_dom_exit (loop
), hpred
;
1471 tree control
, control_name
, res
, t
;
1472 gimple phi
, nphi
, cond_stmt
, stmt
, cond_nit
;
1473 gimple_stmt_iterator gsi
;
1476 split_block_after_labels (loop
->header
);
1477 orig_header
= single_succ (loop
->header
);
1478 hpred
= single_succ_edge (loop
->header
);
1480 cond_stmt
= last_stmt (exit
->src
);
1481 control
= gimple_cond_lhs (cond_stmt
);
1482 gcc_assert (gimple_cond_rhs (cond_stmt
) == nit
);
1484 /* Make sure that we have phi nodes on exit for all loop header phis
1485 (create_parallel_loop requires that). */
1486 for (gsi
= gsi_start_phis (loop
->header
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1488 phi
= gsi_stmt (gsi
);
1489 res
= PHI_RESULT (phi
);
1490 t
= copy_ssa_name (res
, phi
);
1491 SET_PHI_RESULT (phi
, t
);
1492 nphi
= create_phi_node (res
, orig_header
);
1493 add_phi_arg (nphi
, t
, hpred
, UNKNOWN_LOCATION
);
1497 gimple_cond_set_lhs (cond_stmt
, t
);
1498 update_stmt (cond_stmt
);
1503 bbs
= get_loop_body_in_dom_order (loop
);
1505 for (n
= 0; bbs
[n
] != exit
->src
; n
++)
1507 nbbs
= XNEWVEC (basic_block
, n
);
1508 ok
= gimple_duplicate_sese_tail (single_succ_edge (loop
->header
), exit
,
1515 /* Other than reductions, the only gimple reg that should be copied
1516 out of the loop is the control variable. */
1517 exit
= single_dom_exit (loop
);
1518 control_name
= NULL_TREE
;
1519 for (gsi
= gsi_start_phis (ex_bb
); !gsi_end_p (gsi
); )
1521 phi
= gsi_stmt (gsi
);
1522 res
= PHI_RESULT (phi
);
1523 if (virtual_operand_p (res
))
1529 /* Check if it is a part of reduction. If it is,
1530 keep the phi at the reduction's keep_res field. The
1531 PHI_RESULT of this phi is the resulting value of the reduction
1532 variable when exiting the loop. */
1534 if (htab_elements (reduction_list
) > 0)
1536 struct reduction_info
*red
;
1538 tree val
= PHI_ARG_DEF_FROM_EDGE (phi
, exit
);
1539 red
= reduction_phi (reduction_list
, SSA_NAME_DEF_STMT (val
));
1542 red
->keep_res
= phi
;
1547 gcc_assert (control_name
== NULL_TREE
1548 && SSA_NAME_VAR (res
) == SSA_NAME_VAR (control
));
1550 remove_phi_node (&gsi
, false);
1552 gcc_assert (control_name
!= NULL_TREE
);
1554 /* Initialize the control variable to number of iterations
1555 according to the rhs of the exit condition. */
1556 gsi
= gsi_after_labels (ex_bb
);
1557 cond_nit
= last_stmt (exit
->src
);
1558 nit_1
= gimple_cond_rhs (cond_nit
);
1559 nit_1
= force_gimple_operand_gsi (&gsi
,
1560 fold_convert (TREE_TYPE (control_name
), nit_1
),
1561 false, NULL_TREE
, false, GSI_SAME_STMT
);
1562 stmt
= gimple_build_assign (control_name
, nit_1
);
1563 gsi_insert_before (&gsi
, stmt
, GSI_NEW_STMT
);
1564 SSA_NAME_DEF_STMT (control_name
) = stmt
;
1567 /* Create the parallel constructs for LOOP as described in gen_parallel_loop.
1568 LOOP_FN and DATA are the arguments of GIMPLE_OMP_PARALLEL.
1569 NEW_DATA is the variable that should be initialized from the argument
1570 of LOOP_FN. N_THREADS is the requested number of threads. Returns the
1571 basic block containing GIMPLE_OMP_PARALLEL tree. */
1574 create_parallel_loop (struct loop
*loop
, tree loop_fn
, tree data
,
1575 tree new_data
, unsigned n_threads
, location_t loc
)
1577 gimple_stmt_iterator gsi
;
1578 basic_block bb
, paral_bb
, for_bb
, ex_bb
;
1580 gimple stmt
, for_stmt
, phi
, cond_stmt
;
1581 tree cvar
, cvar_init
, initvar
, cvar_next
, cvar_base
, type
;
1582 edge exit
, nexit
, guard
, end
, e
;
1584 /* Prepare the GIMPLE_OMP_PARALLEL statement. */
1585 bb
= loop_preheader_edge (loop
)->src
;
1586 paral_bb
= single_pred (bb
);
1587 gsi
= gsi_last_bb (paral_bb
);
1589 t
= build_omp_clause (loc
, OMP_CLAUSE_NUM_THREADS
);
1590 OMP_CLAUSE_NUM_THREADS_EXPR (t
)
1591 = build_int_cst (integer_type_node
, n_threads
);
1592 stmt
= gimple_build_omp_parallel (NULL
, t
, loop_fn
, data
);
1593 gimple_set_location (stmt
, loc
);
1595 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1597 /* Initialize NEW_DATA. */
1600 gsi
= gsi_after_labels (bb
);
1602 param
= make_ssa_name (DECL_ARGUMENTS (loop_fn
), NULL
);
1603 stmt
= gimple_build_assign (param
, build_fold_addr_expr (data
));
1604 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
1605 SSA_NAME_DEF_STMT (param
) = stmt
;
1607 stmt
= gimple_build_assign (new_data
,
1608 fold_convert (TREE_TYPE (new_data
), param
));
1609 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
1610 SSA_NAME_DEF_STMT (new_data
) = stmt
;
1613 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_PARALLEL. */
1614 bb
= split_loop_exit_edge (single_dom_exit (loop
));
1615 gsi
= gsi_last_bb (bb
);
1616 stmt
= gimple_build_omp_return (false);
1617 gimple_set_location (stmt
, loc
);
1618 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1620 /* Extract data for GIMPLE_OMP_FOR. */
1621 gcc_assert (loop
->header
== single_dom_exit (loop
)->src
);
1622 cond_stmt
= last_stmt (loop
->header
);
1624 cvar
= gimple_cond_lhs (cond_stmt
);
1625 cvar_base
= SSA_NAME_VAR (cvar
);
1626 phi
= SSA_NAME_DEF_STMT (cvar
);
1627 cvar_init
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_preheader_edge (loop
));
1628 initvar
= copy_ssa_name (cvar
, NULL
);
1629 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi
, loop_preheader_edge (loop
)),
1631 cvar_next
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (loop
));
1633 gsi
= gsi_last_nondebug_bb (loop
->latch
);
1634 gcc_assert (gsi_stmt (gsi
) == SSA_NAME_DEF_STMT (cvar_next
));
1635 gsi_remove (&gsi
, true);
1638 for_bb
= split_edge (loop_preheader_edge (loop
));
1639 ex_bb
= split_loop_exit_edge (single_dom_exit (loop
));
1640 extract_true_false_edges_from_block (loop
->header
, &nexit
, &exit
);
1641 gcc_assert (exit
== single_dom_exit (loop
));
1643 guard
= make_edge (for_bb
, ex_bb
, 0);
1644 single_succ_edge (loop
->latch
)->flags
= 0;
1645 end
= make_edge (loop
->latch
, ex_bb
, EDGE_FALLTHRU
);
1646 for (gsi
= gsi_start_phis (ex_bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1648 source_location locus
;
1650 phi
= gsi_stmt (gsi
);
1651 stmt
= SSA_NAME_DEF_STMT (PHI_ARG_DEF_FROM_EDGE (phi
, exit
));
1653 def
= PHI_ARG_DEF_FROM_EDGE (stmt
, loop_preheader_edge (loop
));
1654 locus
= gimple_phi_arg_location_from_edge (stmt
,
1655 loop_preheader_edge (loop
));
1656 add_phi_arg (phi
, def
, guard
, locus
);
1658 def
= PHI_ARG_DEF_FROM_EDGE (stmt
, loop_latch_edge (loop
));
1659 locus
= gimple_phi_arg_location_from_edge (stmt
, loop_latch_edge (loop
));
1660 add_phi_arg (phi
, def
, end
, locus
);
1662 e
= redirect_edge_and_branch (exit
, nexit
->dest
);
1663 PENDING_STMT (e
) = NULL
;
1665 /* Emit GIMPLE_OMP_FOR. */
1666 gimple_cond_set_lhs (cond_stmt
, cvar_base
);
1667 type
= TREE_TYPE (cvar
);
1668 t
= build_omp_clause (loc
, OMP_CLAUSE_SCHEDULE
);
1669 OMP_CLAUSE_SCHEDULE_KIND (t
) = OMP_CLAUSE_SCHEDULE_STATIC
;
1671 for_stmt
= gimple_build_omp_for (NULL
, t
, 1, NULL
);
1672 gimple_set_location (for_stmt
, loc
);
1673 gimple_omp_for_set_index (for_stmt
, 0, initvar
);
1674 gimple_omp_for_set_initial (for_stmt
, 0, cvar_init
);
1675 gimple_omp_for_set_final (for_stmt
, 0, gimple_cond_rhs (cond_stmt
));
1676 gimple_omp_for_set_cond (for_stmt
, 0, gimple_cond_code (cond_stmt
));
1677 gimple_omp_for_set_incr (for_stmt
, 0, build2 (PLUS_EXPR
, type
,
1679 build_int_cst (type
, 1)));
1681 gsi
= gsi_last_bb (for_bb
);
1682 gsi_insert_after (&gsi
, for_stmt
, GSI_NEW_STMT
);
1683 SSA_NAME_DEF_STMT (initvar
) = for_stmt
;
1685 /* Emit GIMPLE_OMP_CONTINUE. */
1686 gsi
= gsi_last_bb (loop
->latch
);
1687 stmt
= gimple_build_omp_continue (cvar_next
, cvar
);
1688 gimple_set_location (stmt
, loc
);
1689 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1690 SSA_NAME_DEF_STMT (cvar_next
) = stmt
;
1692 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_FOR. */
1693 gsi
= gsi_last_bb (ex_bb
);
1694 stmt
= gimple_build_omp_return (true);
1695 gimple_set_location (stmt
, loc
);
1696 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1698 /* After the above dom info is hosed. Re-compute it. */
1699 free_dominance_info (CDI_DOMINATORS
);
1700 calculate_dominance_info (CDI_DOMINATORS
);
1705 /* Generates code to execute the iterations of LOOP in N_THREADS
1706 threads in parallel.
1708 NITER describes number of iterations of LOOP.
1709 REDUCTION_LIST describes the reductions existent in the LOOP. */
1712 gen_parallel_loop (struct loop
*loop
, htab_t reduction_list
,
1713 unsigned n_threads
, struct tree_niter_desc
*niter
)
1716 tree many_iterations_cond
, type
, nit
;
1717 tree arg_struct
, new_arg_struct
;
1719 basic_block parallel_head
;
1721 struct clsn_data clsn_data
;
1725 unsigned int m_p_thread
=2;
1729 ---------------------------------------------------------------------
1732 IV = phi (INIT, IV + STEP)
1738 ---------------------------------------------------------------------
1740 with # of iterations NITER (possibly with MAY_BE_ZERO assumption),
1741 we generate the following code:
1743 ---------------------------------------------------------------------
1746 || NITER < MIN_PER_THREAD * N_THREADS)
1750 store all local loop-invariant variables used in body of the loop to DATA.
1751 GIMPLE_OMP_PARALLEL (OMP_CLAUSE_NUM_THREADS (N_THREADS), LOOPFN, DATA);
1752 load the variables from DATA.
1753 GIMPLE_OMP_FOR (IV = INIT; COND; IV += STEP) (OMP_CLAUSE_SCHEDULE (static))
1756 GIMPLE_OMP_CONTINUE;
1757 GIMPLE_OMP_RETURN -- GIMPLE_OMP_FOR
1758 GIMPLE_OMP_RETURN -- GIMPLE_OMP_PARALLEL
1764 IV = phi (INIT, IV + STEP)
1775 /* Create two versions of the loop -- in the old one, we know that the
1776 number of iterations is large enough, and we will transform it into the
1777 loop that will be split to loop_fn, the new one will be used for the
1778 remaining iterations. */
1780 /* We should compute a better number-of-iterations value for outer loops.
1783 for (i = 0; i < n; ++i)
1784 for (j = 0; j < m; ++j)
1787 we should compute nit = n * m, not nit = n.
1788 Also may_be_zero handling would need to be adjusted. */
1790 type
= TREE_TYPE (niter
->niter
);
1791 nit
= force_gimple_operand (unshare_expr (niter
->niter
), &stmts
, true,
1794 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
1799 m_p_thread
=MIN_PER_THREAD
;
1801 many_iterations_cond
=
1802 fold_build2 (GE_EXPR
, boolean_type_node
,
1803 nit
, build_int_cst (type
, m_p_thread
* n_threads
));
1805 many_iterations_cond
1806 = fold_build2 (TRUTH_AND_EXPR
, boolean_type_node
,
1807 invert_truthvalue (unshare_expr (niter
->may_be_zero
)),
1808 many_iterations_cond
);
1809 many_iterations_cond
1810 = force_gimple_operand (many_iterations_cond
, &stmts
, false, NULL_TREE
);
1812 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
1813 if (!is_gimple_condexpr (many_iterations_cond
))
1815 many_iterations_cond
1816 = force_gimple_operand (many_iterations_cond
, &stmts
,
1819 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
1822 initialize_original_copy_tables ();
1824 /* We assume that the loop usually iterates a lot. */
1825 prob
= 4 * REG_BR_PROB_BASE
/ 5;
1826 loop_version (loop
, many_iterations_cond
, NULL
,
1827 prob
, prob
, REG_BR_PROB_BASE
- prob
, true);
1828 update_ssa (TODO_update_ssa
);
1829 free_original_copy_tables ();
1831 /* Base all the induction variables in LOOP on a single control one. */
1832 canonicalize_loop_ivs (loop
, &nit
, true);
1834 /* Ensure that the exit condition is the first statement in the loop. */
1835 transform_to_exit_first_loop (loop
, reduction_list
, nit
);
1837 /* Generate initializations for reductions. */
1838 if (htab_elements (reduction_list
) > 0)
1839 htab_traverse (reduction_list
, initialize_reductions
, loop
);
1841 /* Eliminate the references to local variables from the loop. */
1842 gcc_assert (single_exit (loop
));
1843 entry
= loop_preheader_edge (loop
);
1844 exit
= single_dom_exit (loop
);
1846 eliminate_local_variables (entry
, exit
);
1847 /* In the old loop, move all variables non-local to the loop to a structure
1848 and back, and create separate decls for the variables used in loop. */
1849 separate_decls_in_region (entry
, exit
, reduction_list
, &arg_struct
,
1850 &new_arg_struct
, &clsn_data
);
1852 /* Create the parallel constructs. */
1853 loc
= UNKNOWN_LOCATION
;
1854 cond_stmt
= last_stmt (loop
->header
);
1856 loc
= gimple_location (cond_stmt
);
1857 parallel_head
= create_parallel_loop (loop
, create_loop_fn (loc
), arg_struct
,
1858 new_arg_struct
, n_threads
, loc
);
1859 if (htab_elements (reduction_list
) > 0)
1860 create_call_for_reduction (loop
, reduction_list
, &clsn_data
);
1864 /* Cancel the loop (it is simpler to do it here rather than to teach the
1865 expander to do it). */
1866 cancel_loop_tree (loop
);
1868 /* Free loop bound estimations that could contain references to
1869 removed statements. */
1870 FOR_EACH_LOOP (li
, loop
, 0)
1871 free_numbers_of_iterations_estimates_loop (loop
);
1873 /* Expand the parallel constructs. We do it directly here instead of running
1874 a separate expand_omp pass, since it is more efficient, and less likely to
1875 cause troubles with further analyses not being able to deal with the
1878 omp_expand_local (parallel_head
);
1881 /* Returns true when LOOP contains vector phi nodes. */
1884 loop_has_vector_phi_nodes (struct loop
*loop ATTRIBUTE_UNUSED
)
1887 basic_block
*bbs
= get_loop_body_in_dom_order (loop
);
1888 gimple_stmt_iterator gsi
;
1891 for (i
= 0; i
< loop
->num_nodes
; i
++)
1892 for (gsi
= gsi_start_phis (bbs
[i
]); !gsi_end_p (gsi
); gsi_next (&gsi
))
1893 if (TREE_CODE (TREE_TYPE (PHI_RESULT (gsi_stmt (gsi
)))) == VECTOR_TYPE
)
1902 /* Create a reduction_info struct, initialize it with REDUC_STMT
1903 and PHI, insert it to the REDUCTION_LIST. */
1906 build_new_reduction (htab_t reduction_list
, gimple reduc_stmt
, gimple phi
)
1909 struct reduction_info
*new_reduction
;
1911 gcc_assert (reduc_stmt
);
1913 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1916 "Detected reduction. reduction stmt is: \n");
1917 print_gimple_stmt (dump_file
, reduc_stmt
, 0, 0);
1918 fprintf (dump_file
, "\n");
1921 new_reduction
= XCNEW (struct reduction_info
);
1923 new_reduction
->reduc_stmt
= reduc_stmt
;
1924 new_reduction
->reduc_phi
= phi
;
1925 new_reduction
->reduc_version
= SSA_NAME_VERSION (gimple_phi_result (phi
));
1926 new_reduction
->reduction_code
= gimple_assign_rhs_code (reduc_stmt
);
1927 slot
= htab_find_slot (reduction_list
, new_reduction
, INSERT
);
1928 *slot
= new_reduction
;
1931 /* Callback for htab_traverse. Sets gimple_uid of reduc_phi stmts. */
1934 set_reduc_phi_uids (void **slot
, void *data ATTRIBUTE_UNUSED
)
1936 struct reduction_info
*const red
= (struct reduction_info
*) *slot
;
1937 gimple_set_uid (red
->reduc_phi
, red
->reduc_version
);
1941 /* Detect all reductions in the LOOP, insert them into REDUCTION_LIST. */
1944 gather_scalar_reductions (loop_p loop
, htab_t reduction_list
)
1946 gimple_stmt_iterator gsi
;
1947 loop_vec_info simple_loop_info
;
1949 simple_loop_info
= vect_analyze_loop_form (loop
);
1951 for (gsi
= gsi_start_phis (loop
->header
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1953 gimple phi
= gsi_stmt (gsi
);
1955 tree res
= PHI_RESULT (phi
);
1958 if (virtual_operand_p (res
))
1961 if (!simple_iv (loop
, loop
, res
, &iv
, true)
1962 && simple_loop_info
)
1964 gimple reduc_stmt
= vect_force_simple_reduction (simple_loop_info
,
1967 if (reduc_stmt
&& !double_reduc
)
1968 build_new_reduction (reduction_list
, reduc_stmt
, phi
);
1971 destroy_loop_vec_info (simple_loop_info
, true);
1973 /* As gimple_uid is used by the vectorizer in between vect_analyze_loop_form
1974 and destroy_loop_vec_info, we can set gimple_uid of reduc_phi stmts
1976 htab_traverse (reduction_list
, set_reduc_phi_uids
, NULL
);
1979 /* Try to initialize NITER for code generation part. */
1982 try_get_loop_niter (loop_p loop
, struct tree_niter_desc
*niter
)
1984 edge exit
= single_dom_exit (loop
);
1988 /* We need to know # of iterations, and there should be no uses of values
1989 defined inside loop outside of it, unless the values are invariants of
1991 if (!number_of_iterations_exit (loop
, exit
, niter
, false))
1993 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1994 fprintf (dump_file
, " FAILED: number of iterations not known\n");
2001 /* Try to initialize REDUCTION_LIST for code generation part.
2002 REDUCTION_LIST describes the reductions. */
2005 try_create_reduction_list (loop_p loop
, htab_t reduction_list
)
2007 edge exit
= single_dom_exit (loop
);
2008 gimple_stmt_iterator gsi
;
2012 gather_scalar_reductions (loop
, reduction_list
);
2015 for (gsi
= gsi_start_phis (exit
->dest
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2017 gimple phi
= gsi_stmt (gsi
);
2018 struct reduction_info
*red
;
2019 imm_use_iterator imm_iter
;
2020 use_operand_p use_p
;
2022 tree val
= PHI_ARG_DEF_FROM_EDGE (phi
, exit
);
2024 if (!virtual_operand_p (val
))
2026 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2028 fprintf (dump_file
, "phi is ");
2029 print_gimple_stmt (dump_file
, phi
, 0, 0);
2030 fprintf (dump_file
, "arg of phi to exit: value ");
2031 print_generic_expr (dump_file
, val
, 0);
2032 fprintf (dump_file
, " used outside loop\n");
2034 " checking if it a part of reduction pattern: \n");
2036 if (htab_elements (reduction_list
) == 0)
2038 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2040 " FAILED: it is not a part of reduction.\n");
2044 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, val
)
2046 if (!gimple_debug_bind_p (USE_STMT (use_p
))
2047 && flow_bb_inside_loop_p (loop
, gimple_bb (USE_STMT (use_p
))))
2049 reduc_phi
= USE_STMT (use_p
);
2053 red
= reduction_phi (reduction_list
, reduc_phi
);
2056 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2058 " FAILED: it is not a part of reduction.\n");
2061 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2063 fprintf (dump_file
, "reduction phi is ");
2064 print_gimple_stmt (dump_file
, red
->reduc_phi
, 0, 0);
2065 fprintf (dump_file
, "reduction stmt is ");
2066 print_gimple_stmt (dump_file
, red
->reduc_stmt
, 0, 0);
2071 /* The iterations of the loop may communicate only through bivs whose
2072 iteration space can be distributed efficiently. */
2073 for (gsi
= gsi_start_phis (loop
->header
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2075 gimple phi
= gsi_stmt (gsi
);
2076 tree def
= PHI_RESULT (phi
);
2079 if (!virtual_operand_p (def
) && !simple_iv (loop
, loop
, def
, &iv
, true))
2081 struct reduction_info
*red
;
2083 red
= reduction_phi (reduction_list
, phi
);
2086 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2088 " FAILED: scalar dependency between iterations\n");
2098 /* Detect parallel loops and generate parallel code using libgomp
2099 primitives. Returns true if some loop was parallelized, false
2103 parallelize_loops (void)
2105 unsigned n_threads
= flag_tree_parallelize_loops
;
2106 bool changed
= false;
2108 struct tree_niter_desc niter_desc
;
2110 htab_t reduction_list
;
2111 struct obstack parloop_obstack
;
2112 HOST_WIDE_INT estimated
;
2115 /* Do not parallelize loops in the functions created by parallelization. */
2116 if (parallelized_function_p (cfun
->decl
))
2118 if (cfun
->has_nonlocal_label
)
2121 gcc_obstack_init (&parloop_obstack
);
2122 reduction_list
= htab_create (10, reduction_info_hash
,
2123 reduction_info_eq
, free
);
2124 init_stmt_vec_info_vec ();
2126 FOR_EACH_LOOP (li
, loop
, 0)
2128 htab_empty (reduction_list
);
2129 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2131 fprintf (dump_file
, "Trying loop %d as candidate\n",loop
->num
);
2133 fprintf (dump_file
, "loop %d is not innermost\n",loop
->num
);
2135 fprintf (dump_file
, "loop %d is innermost\n",loop
->num
);
2138 /* If we use autopar in graphite pass, we use its marked dependency
2139 checking results. */
2140 if (flag_loop_parallelize_all
&& !loop
->can_be_parallel
)
2142 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2143 fprintf (dump_file
, "loop is not parallel according to graphite\n");
2147 if (!single_dom_exit (loop
))
2150 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2151 fprintf (dump_file
, "loop is !single_dom_exit\n");
2156 if (/* And of course, the loop must be parallelizable. */
2157 !can_duplicate_loop_p (loop
)
2158 || loop_has_blocks_with_irreducible_flag (loop
)
2159 || (loop_preheader_edge (loop
)->src
->flags
& BB_IRREDUCIBLE_LOOP
)
2160 /* FIXME: the check for vector phi nodes could be removed. */
2161 || loop_has_vector_phi_nodes (loop
))
2164 estimated
= estimated_stmt_executions_int (loop
);
2165 if (estimated
== -1)
2166 estimated
= max_stmt_executions_int (loop
);
2167 /* FIXME: Bypass this check as graphite doesn't update the
2168 count and frequency correctly now. */
2169 if (!flag_loop_parallelize_all
2170 && ((estimated
!= -1
2171 && estimated
<= (HOST_WIDE_INT
) n_threads
* MIN_PER_THREAD
)
2172 /* Do not bother with loops in cold areas. */
2173 || optimize_loop_nest_for_size_p (loop
)))
2176 if (!try_get_loop_niter (loop
, &niter_desc
))
2179 if (!try_create_reduction_list (loop
, reduction_list
))
2182 if (!flag_loop_parallelize_all
2183 && !loop_parallel_p (loop
, &parloop_obstack
))
2187 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2190 fprintf (dump_file
, "parallelizing outer loop %d\n",loop
->header
->index
);
2192 fprintf (dump_file
, "parallelizing inner loop %d\n",loop
->header
->index
);
2193 loop_loc
= find_loop_location (loop
);
2194 if (loop_loc
!= UNKNOWN_LOC
)
2195 fprintf (dump_file
, "\nloop at %s:%d: ",
2196 LOC_FILE (loop_loc
), LOC_LINE (loop_loc
));
2198 gen_parallel_loop (loop
, reduction_list
,
2199 n_threads
, &niter_desc
);
2200 #ifdef ENABLE_CHECKING
2201 verify_flow_info ();
2202 verify_loop_structure ();
2203 verify_loop_closed_ssa (true);
2207 free_stmt_vec_info_vec ();
2208 htab_delete (reduction_list
);
2209 obstack_free (&parloop_obstack
, NULL
);
2211 /* Parallelization will cause new function calls to be inserted through
2212 which local variables will escape. Reset the points-to solution
2215 pt_solution_reset (&cfun
->gimple_df
->escaped
);
2220 #include "gt-tree-parloops.h"