1 /* Loop autoparallelization.
2 Copyright (C) 2006-2013 Free Software Foundation, Inc.
3 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
4 Zdenek Dvorak <dvorakz@suse.cz> and Razya Ladelsky <razya@il.ibm.com>.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
27 #include "gimple-ssa.h"
29 #include "tree-phinodes.h"
30 #include "ssa-iterators.h"
31 #include "tree-ssanames.h"
32 #include "tree-ssa-loop-ivopts.h"
33 #include "tree-ssa-loop-manip.h"
34 #include "tree-ssa-loop-niter.h"
35 #include "tree-ssa-loop.h"
36 #include "tree-into-ssa.h"
38 #include "tree-data-ref.h"
39 #include "tree-scalar-evolution.h"
40 #include "gimple-pretty-print.h"
41 #include "tree-pass.h"
42 #include "langhooks.h"
43 #include "tree-vectorizer.h"
44 #include "tree-hasher.h"
45 #include "tree-parloops.h"
48 /* This pass tries to distribute iterations of loops into several threads.
49 The implementation is straightforward -- for each loop we test whether its
50 iterations are independent, and if it is the case (and some additional
51 conditions regarding profitability and correctness are satisfied), we
52 add GIMPLE_OMP_PARALLEL and GIMPLE_OMP_FOR codes and let omp expansion
55 The most of the complexity is in bringing the code into shape expected
57 -- for GIMPLE_OMP_FOR, ensuring that the loop has only one induction
58 variable and that the exit test is at the start of the loop body
59 -- for GIMPLE_OMP_PARALLEL, replacing the references to local addressable
60 variables by accesses through pointers, and breaking up ssa chains
61 by storing the values incoming to the parallelized loop to a structure
62 passed to the new function as an argument (something similar is done
63 in omp gimplification, unfortunately only a small part of the code
67 -- if there are several parallelizable loops in a function, it may be
68 possible to generate the threads just once (using synchronization to
69 ensure that cross-loop dependences are obeyed).
70 -- handling of common reduction patterns for outer loops.
72 More info can also be found at http://gcc.gnu.org/wiki/AutoParInGCC */
75 currently we use vect_force_simple_reduction() to detect reduction patterns.
76 The code transformation will be introduced by an example.
83 for (i = 0; i < N; i++)
93 # sum_29 = PHI <sum_11(5), 1(3)>
94 # i_28 = PHI <i_12(5), 0(3)>
97 sum_11 = D.1795_8 + sum_29;
105 # sum_21 = PHI <sum_11(4)>
106 printf (&"%d"[0], sum_21);
109 after reduction transformation (only relevant parts):
117 # Storing the initial value given by the user. #
119 .paral_data_store.32.sum.27 = 1;
121 #pragma omp parallel num_threads(4)
123 #pragma omp for schedule(static)
125 # The neutral element corresponding to the particular
126 reduction's operation, e.g. 0 for PLUS_EXPR,
127 1 for MULT_EXPR, etc. replaces the user's initial value. #
129 # sum.27_29 = PHI <sum.27_11, 0>
131 sum.27_11 = D.1827_8 + sum.27_29;
135 # Adding this reduction phi is done at create_phi_for_local_result() #
136 # sum.27_56 = PHI <sum.27_11, 0>
139 # Creating the atomic operation is done at
140 create_call_for_reduction_1() #
142 #pragma omp atomic_load
143 D.1839_59 = *&.paral_data_load.33_51->reduction.23;
144 D.1840_60 = sum.27_56 + D.1839_59;
145 #pragma omp atomic_store (D.1840_60);
149 # collecting the result after the join of the threads is done at
150 create_loads_for_reductions().
151 The value computed by the threads is loaded from the
155 .paral_data_load.33_52 = &.paral_data_store.32;
156 sum_37 = .paral_data_load.33_52->sum.27;
157 sum_43 = D.1795_41 + sum_37;
160 # sum_21 = PHI <sum_43, sum_26>
161 printf (&"%d"[0], sum_21);
169 /* Minimal number of iterations of a loop that should be executed in each
171 #define MIN_PER_THREAD 100
173 /* Element of the hashtable, representing a
174 reduction in the current loop. */
175 struct reduction_info
177 gimple reduc_stmt
; /* reduction statement. */
178 gimple reduc_phi
; /* The phi node defining the reduction. */
179 enum tree_code reduction_code
;/* code for the reduction operation. */
180 unsigned reduc_version
; /* SSA_NAME_VERSION of original reduc_phi
182 gimple keep_res
; /* The PHI_RESULT of this phi is the resulting value
183 of the reduction variable when existing the loop. */
184 tree initial_value
; /* The initial value of the reduction var before entering the loop. */
185 tree field
; /* the name of the field in the parloop data structure intended for reduction. */
186 tree init
; /* reduction initialization value. */
187 gimple new_phi
; /* (helper field) Newly created phi node whose result
188 will be passed to the atomic operation. Represents
189 the local result each thread computed for the reduction
193 /* Reduction info hashtable helpers. */
195 struct reduction_hasher
: typed_free_remove
<reduction_info
>
197 typedef reduction_info value_type
;
198 typedef reduction_info compare_type
;
199 static inline hashval_t
hash (const value_type
*);
200 static inline bool equal (const value_type
*, const compare_type
*);
203 /* Equality and hash functions for hashtab code. */
206 reduction_hasher::equal (const value_type
*a
, const compare_type
*b
)
208 return (a
->reduc_phi
== b
->reduc_phi
);
212 reduction_hasher::hash (const value_type
*a
)
214 return a
->reduc_version
;
217 typedef hash_table
<reduction_hasher
> reduction_info_table_type
;
220 static struct reduction_info
*
221 reduction_phi (reduction_info_table_type reduction_list
, gimple phi
)
223 struct reduction_info tmpred
, *red
;
225 if (reduction_list
.elements () == 0 || phi
== NULL
)
228 tmpred
.reduc_phi
= phi
;
229 tmpred
.reduc_version
= gimple_uid (phi
);
230 red
= reduction_list
.find (&tmpred
);
235 /* Element of hashtable of names to copy. */
237 struct name_to_copy_elt
239 unsigned version
; /* The version of the name to copy. */
240 tree new_name
; /* The new name used in the copy. */
241 tree field
; /* The field of the structure used to pass the
245 /* Name copies hashtable helpers. */
247 struct name_to_copy_hasher
: typed_free_remove
<name_to_copy_elt
>
249 typedef name_to_copy_elt value_type
;
250 typedef name_to_copy_elt compare_type
;
251 static inline hashval_t
hash (const value_type
*);
252 static inline bool equal (const value_type
*, const compare_type
*);
255 /* Equality and hash functions for hashtab code. */
258 name_to_copy_hasher::equal (const value_type
*a
, const compare_type
*b
)
260 return a
->version
== b
->version
;
264 name_to_copy_hasher::hash (const value_type
*a
)
266 return (hashval_t
) a
->version
;
269 typedef hash_table
<name_to_copy_hasher
> name_to_copy_table_type
;
271 /* A transformation matrix, which is a self-contained ROWSIZE x COLSIZE
272 matrix. Rather than use floats, we simply keep a single DENOMINATOR that
273 represents the denominator for every element in the matrix. */
274 typedef struct lambda_trans_matrix_s
276 lambda_matrix matrix
;
280 } *lambda_trans_matrix
;
281 #define LTM_MATRIX(T) ((T)->matrix)
282 #define LTM_ROWSIZE(T) ((T)->rowsize)
283 #define LTM_COLSIZE(T) ((T)->colsize)
284 #define LTM_DENOMINATOR(T) ((T)->denominator)
286 /* Allocate a new transformation matrix. */
288 static lambda_trans_matrix
289 lambda_trans_matrix_new (int colsize
, int rowsize
,
290 struct obstack
* lambda_obstack
)
292 lambda_trans_matrix ret
;
294 ret
= (lambda_trans_matrix
)
295 obstack_alloc (lambda_obstack
, sizeof (struct lambda_trans_matrix_s
));
296 LTM_MATRIX (ret
) = lambda_matrix_new (rowsize
, colsize
, lambda_obstack
);
297 LTM_ROWSIZE (ret
) = rowsize
;
298 LTM_COLSIZE (ret
) = colsize
;
299 LTM_DENOMINATOR (ret
) = 1;
303 /* Multiply a vector VEC by a matrix MAT.
304 MAT is an M*N matrix, and VEC is a vector with length N. The result
305 is stored in DEST which must be a vector of length M. */
308 lambda_matrix_vector_mult (lambda_matrix matrix
, int m
, int n
,
309 lambda_vector vec
, lambda_vector dest
)
313 lambda_vector_clear (dest
, m
);
314 for (i
= 0; i
< m
; i
++)
315 for (j
= 0; j
< n
; j
++)
316 dest
[i
] += matrix
[i
][j
] * vec
[j
];
319 /* Return true if TRANS is a legal transformation matrix that respects
320 the dependence vectors in DISTS and DIRS. The conservative answer
323 "Wolfe proves that a unimodular transformation represented by the
324 matrix T is legal when applied to a loop nest with a set of
325 lexicographically non-negative distance vectors RDG if and only if
326 for each vector d in RDG, (T.d >= 0) is lexicographically positive.
327 i.e.: if and only if it transforms the lexicographically positive
328 distance vectors to lexicographically positive vectors. Note that
329 a unimodular matrix must transform the zero vector (and only it) to
330 the zero vector." S.Muchnick. */
333 lambda_transform_legal_p (lambda_trans_matrix trans
,
335 vec
<ddr_p
> dependence_relations
)
338 lambda_vector distres
;
339 struct data_dependence_relation
*ddr
;
341 gcc_assert (LTM_COLSIZE (trans
) == nb_loops
342 && LTM_ROWSIZE (trans
) == nb_loops
);
344 /* When there are no dependences, the transformation is correct. */
345 if (dependence_relations
.length () == 0)
348 ddr
= dependence_relations
[0];
352 /* When there is an unknown relation in the dependence_relations, we
353 know that it is no worth looking at this loop nest: give up. */
354 if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
357 distres
= lambda_vector_new (nb_loops
);
359 /* For each distance vector in the dependence graph. */
360 FOR_EACH_VEC_ELT (dependence_relations
, i
, ddr
)
362 /* Don't care about relations for which we know that there is no
363 dependence, nor about read-read (aka. output-dependences):
364 these data accesses can happen in any order. */
365 if (DDR_ARE_DEPENDENT (ddr
) == chrec_known
366 || (DR_IS_READ (DDR_A (ddr
)) && DR_IS_READ (DDR_B (ddr
))))
369 /* Conservatively answer: "this transformation is not valid". */
370 if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
373 /* If the dependence could not be captured by a distance vector,
374 conservatively answer that the transform is not valid. */
375 if (DDR_NUM_DIST_VECTS (ddr
) == 0)
378 /* Compute trans.dist_vect */
379 for (j
= 0; j
< DDR_NUM_DIST_VECTS (ddr
); j
++)
381 lambda_matrix_vector_mult (LTM_MATRIX (trans
), nb_loops
, nb_loops
,
382 DDR_DIST_VECT (ddr
, j
), distres
);
384 if (!lambda_vector_lexico_pos (distres
, nb_loops
))
391 /* Data dependency analysis. Returns true if the iterations of LOOP
392 are independent on each other (that is, if we can execute them
396 loop_parallel_p (struct loop
*loop
, struct obstack
* parloop_obstack
)
398 vec
<loop_p
> loop_nest
;
399 vec
<ddr_p
> dependence_relations
;
400 vec
<data_reference_p
> datarefs
;
401 lambda_trans_matrix trans
;
404 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
406 fprintf (dump_file
, "Considering loop %d\n", loop
->num
);
408 fprintf (dump_file
, "loop is innermost\n");
410 fprintf (dump_file
, "loop NOT innermost\n");
413 /* Check for problems with dependences. If the loop can be reversed,
414 the iterations are independent. */
415 datarefs
.create (10);
416 dependence_relations
.create (10 * 10);
417 loop_nest
.create (3);
418 if (! compute_data_dependences_for_loop (loop
, true, &loop_nest
, &datarefs
,
419 &dependence_relations
))
421 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
422 fprintf (dump_file
, " FAILED: cannot analyze data dependencies\n");
426 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
427 dump_data_dependence_relations (dump_file
, dependence_relations
);
429 trans
= lambda_trans_matrix_new (1, 1, parloop_obstack
);
430 LTM_MATRIX (trans
)[0][0] = -1;
432 if (lambda_transform_legal_p (trans
, 1, dependence_relations
))
435 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
436 fprintf (dump_file
, " SUCCESS: may be parallelized\n");
438 else if (dump_file
&& (dump_flags
& TDF_DETAILS
))
440 " FAILED: data dependencies exist across iterations\n");
443 loop_nest
.release ();
444 free_dependence_relations (dependence_relations
);
445 free_data_refs (datarefs
);
450 /* Return true when LOOP contains basic blocks marked with the
451 BB_IRREDUCIBLE_LOOP flag. */
454 loop_has_blocks_with_irreducible_flag (struct loop
*loop
)
457 basic_block
*bbs
= get_loop_body_in_dom_order (loop
);
460 for (i
= 0; i
< loop
->num_nodes
; i
++)
461 if (bbs
[i
]->flags
& BB_IRREDUCIBLE_LOOP
)
470 /* Assigns the address of OBJ in TYPE to an ssa name, and returns this name.
471 The assignment statement is placed on edge ENTRY. DECL_ADDRESS maps decls
472 to their addresses that can be reused. The address of OBJ is known to
473 be invariant in the whole function. Other needed statements are placed
477 take_address_of (tree obj
, tree type
, edge entry
,
478 int_tree_htab_type decl_address
, gimple_stmt_iterator
*gsi
)
481 int_tree_map
**dslot
;
482 struct int_tree_map ielt
, *nielt
;
483 tree
*var_p
, name
, addr
;
487 /* Since the address of OBJ is invariant, the trees may be shared.
488 Avoid rewriting unrelated parts of the code. */
489 obj
= unshare_expr (obj
);
491 handled_component_p (*var_p
);
492 var_p
= &TREE_OPERAND (*var_p
, 0))
495 /* Canonicalize the access to base on a MEM_REF. */
497 *var_p
= build_simple_mem_ref (build_fold_addr_expr (*var_p
));
499 /* Assign a canonical SSA name to the address of the base decl used
500 in the address and share it for all accesses and addresses based
502 uid
= DECL_UID (TREE_OPERAND (TREE_OPERAND (*var_p
, 0), 0));
504 dslot
= decl_address
.find_slot_with_hash (&ielt
, uid
, INSERT
);
509 addr
= TREE_OPERAND (*var_p
, 0);
511 = get_name (TREE_OPERAND (TREE_OPERAND (*var_p
, 0), 0));
513 name
= make_temp_ssa_name (TREE_TYPE (addr
), NULL
, obj_name
);
515 name
= make_ssa_name (TREE_TYPE (addr
), NULL
);
516 stmt
= gimple_build_assign (name
, addr
);
517 gsi_insert_on_edge_immediate (entry
, stmt
);
519 nielt
= XNEW (struct int_tree_map
);
527 /* Express the address in terms of the canonical SSA name. */
528 TREE_OPERAND (*var_p
, 0) = name
;
530 return build_fold_addr_expr_with_type (obj
, type
);
532 name
= force_gimple_operand (build_addr (obj
, current_function_decl
),
533 &stmts
, true, NULL_TREE
);
534 if (!gimple_seq_empty_p (stmts
))
535 gsi_insert_seq_before (gsi
, stmts
, GSI_SAME_STMT
);
537 if (!useless_type_conversion_p (type
, TREE_TYPE (name
)))
539 name
= force_gimple_operand (fold_convert (type
, name
), &stmts
, true,
541 if (!gimple_seq_empty_p (stmts
))
542 gsi_insert_seq_before (gsi
, stmts
, GSI_SAME_STMT
);
548 /* Callback for htab_traverse. Create the initialization statement
549 for reduction described in SLOT, and place it at the preheader of
550 the loop described in DATA. */
553 initialize_reductions (reduction_info
**slot
, struct loop
*loop
)
556 tree bvar
, type
, arg
;
559 struct reduction_info
*const reduc
= *slot
;
561 /* Create initialization in preheader:
562 reduction_variable = initialization value of reduction. */
564 /* In the phi node at the header, replace the argument coming
565 from the preheader with the reduction initialization value. */
567 /* Create a new variable to initialize the reduction. */
568 type
= TREE_TYPE (PHI_RESULT (reduc
->reduc_phi
));
569 bvar
= create_tmp_var (type
, "reduction");
571 c
= build_omp_clause (gimple_location (reduc
->reduc_stmt
),
572 OMP_CLAUSE_REDUCTION
);
573 OMP_CLAUSE_REDUCTION_CODE (c
) = reduc
->reduction_code
;
574 OMP_CLAUSE_DECL (c
) = SSA_NAME_VAR (gimple_assign_lhs (reduc
->reduc_stmt
));
576 init
= omp_reduction_init (c
, TREE_TYPE (bvar
));
579 /* Replace the argument representing the initialization value
580 with the initialization value for the reduction (neutral
581 element for the particular operation, e.g. 0 for PLUS_EXPR,
582 1 for MULT_EXPR, etc).
583 Keep the old value in a new variable "reduction_initial",
584 that will be taken in consideration after the parallel
585 computing is done. */
587 e
= loop_preheader_edge (loop
);
588 arg
= PHI_ARG_DEF_FROM_EDGE (reduc
->reduc_phi
, e
);
589 /* Create new variable to hold the initial value. */
591 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE
592 (reduc
->reduc_phi
, loop_preheader_edge (loop
)), init
);
593 reduc
->initial_value
= arg
;
599 struct walk_stmt_info info
;
601 int_tree_htab_type decl_address
;
602 gimple_stmt_iterator
*gsi
;
607 /* Eliminates references to local variables in *TP out of the single
608 entry single exit region starting at DTA->ENTRY.
609 DECL_ADDRESS contains addresses of the references that had their
610 address taken already. If the expression is changed, CHANGED is
611 set to true. Callback for walk_tree. */
614 eliminate_local_variables_1 (tree
*tp
, int *walk_subtrees
, void *data
)
616 struct elv_data
*const dta
= (struct elv_data
*) data
;
617 tree t
= *tp
, var
, addr
, addr_type
, type
, obj
;
623 if (!SSA_VAR_P (t
) || DECL_EXTERNAL (t
))
626 type
= TREE_TYPE (t
);
627 addr_type
= build_pointer_type (type
);
628 addr
= take_address_of (t
, addr_type
, dta
->entry
, dta
->decl_address
,
630 if (dta
->gsi
== NULL
&& addr
== NULL_TREE
)
636 *tp
= build_simple_mem_ref (addr
);
642 if (TREE_CODE (t
) == ADDR_EXPR
)
644 /* ADDR_EXPR may appear in two contexts:
645 -- as a gimple operand, when the address taken is a function invariant
646 -- as gimple rhs, when the resulting address in not a function
648 We do not need to do anything special in the latter case (the base of
649 the memory reference whose address is taken may be replaced in the
650 DECL_P case). The former case is more complicated, as we need to
651 ensure that the new address is still a gimple operand. Thus, it
652 is not sufficient to replace just the base of the memory reference --
653 we need to move the whole computation of the address out of the
655 if (!is_gimple_val (t
))
659 obj
= TREE_OPERAND (t
, 0);
660 var
= get_base_address (obj
);
661 if (!var
|| !SSA_VAR_P (var
) || DECL_EXTERNAL (var
))
664 addr_type
= TREE_TYPE (t
);
665 addr
= take_address_of (obj
, addr_type
, dta
->entry
, dta
->decl_address
,
667 if (dta
->gsi
== NULL
&& addr
== NULL_TREE
)
684 /* Moves the references to local variables in STMT at *GSI out of the single
685 entry single exit region starting at ENTRY. DECL_ADDRESS contains
686 addresses of the references that had their address taken
690 eliminate_local_variables_stmt (edge entry
, gimple_stmt_iterator
*gsi
,
691 int_tree_htab_type decl_address
)
694 gimple stmt
= gsi_stmt (*gsi
);
696 memset (&dta
.info
, '\0', sizeof (dta
.info
));
698 dta
.decl_address
= decl_address
;
702 if (gimple_debug_bind_p (stmt
))
705 walk_tree (gimple_debug_bind_get_value_ptr (stmt
),
706 eliminate_local_variables_1
, &dta
.info
, NULL
);
709 gimple_debug_bind_reset_value (stmt
);
713 else if (gimple_clobber_p (stmt
))
715 stmt
= gimple_build_nop ();
716 gsi_replace (gsi
, stmt
, false);
722 walk_gimple_op (stmt
, eliminate_local_variables_1
, &dta
.info
);
729 /* Eliminates the references to local variables from the single entry
730 single exit region between the ENTRY and EXIT edges.
733 1) Taking address of a local variable -- these are moved out of the
734 region (and temporary variable is created to hold the address if
737 2) Dereferencing a local variable -- these are replaced with indirect
741 eliminate_local_variables (edge entry
, edge exit
)
744 vec
<basic_block
> body
;
747 gimple_stmt_iterator gsi
;
748 bool has_debug_stmt
= false;
749 int_tree_htab_type decl_address
;
750 decl_address
.create (10);
751 basic_block entry_bb
= entry
->src
;
752 basic_block exit_bb
= exit
->dest
;
754 gather_blocks_in_sese_region (entry_bb
, exit_bb
, &body
);
756 FOR_EACH_VEC_ELT (body
, i
, bb
)
757 if (bb
!= entry_bb
&& bb
!= exit_bb
)
758 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
759 if (is_gimple_debug (gsi_stmt (gsi
)))
761 if (gimple_debug_bind_p (gsi_stmt (gsi
)))
762 has_debug_stmt
= true;
765 eliminate_local_variables_stmt (entry
, &gsi
, decl_address
);
768 FOR_EACH_VEC_ELT (body
, i
, bb
)
769 if (bb
!= entry_bb
&& bb
!= exit_bb
)
770 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
771 if (gimple_debug_bind_p (gsi_stmt (gsi
)))
772 eliminate_local_variables_stmt (entry
, &gsi
, decl_address
);
774 decl_address
.dispose ();
778 /* Returns true if expression EXPR is not defined between ENTRY and
779 EXIT, i.e. if all its operands are defined outside of the region. */
782 expr_invariant_in_region_p (edge entry
, edge exit
, tree expr
)
784 basic_block entry_bb
= entry
->src
;
785 basic_block exit_bb
= exit
->dest
;
788 if (is_gimple_min_invariant (expr
))
791 if (TREE_CODE (expr
) == SSA_NAME
)
793 def_bb
= gimple_bb (SSA_NAME_DEF_STMT (expr
));
795 && dominated_by_p (CDI_DOMINATORS
, def_bb
, entry_bb
)
796 && !dominated_by_p (CDI_DOMINATORS
, def_bb
, exit_bb
))
805 /* If COPY_NAME_P is true, creates and returns a duplicate of NAME.
806 The copies are stored to NAME_COPIES, if NAME was already duplicated,
807 its duplicate stored in NAME_COPIES is returned.
809 Regardless of COPY_NAME_P, the decl used as a base of the ssa name is also
810 duplicated, storing the copies in DECL_COPIES. */
813 separate_decls_in_region_name (tree name
, name_to_copy_table_type name_copies
,
814 int_tree_htab_type decl_copies
, bool copy_name_p
)
816 tree copy
, var
, var_copy
;
817 unsigned idx
, uid
, nuid
;
818 struct int_tree_map ielt
, *nielt
;
819 struct name_to_copy_elt elt
, *nelt
;
820 name_to_copy_elt
**slot
;
821 int_tree_map
**dslot
;
823 if (TREE_CODE (name
) != SSA_NAME
)
826 idx
= SSA_NAME_VERSION (name
);
828 slot
= name_copies
.find_slot_with_hash (&elt
, idx
,
829 copy_name_p
? INSERT
: NO_INSERT
);
831 return (*slot
)->new_name
;
835 copy
= duplicate_ssa_name (name
, NULL
);
836 nelt
= XNEW (struct name_to_copy_elt
);
838 nelt
->new_name
= copy
;
839 nelt
->field
= NULL_TREE
;
848 var
= SSA_NAME_VAR (name
);
852 uid
= DECL_UID (var
);
854 dslot
= decl_copies
.find_slot_with_hash (&ielt
, uid
, INSERT
);
857 var_copy
= create_tmp_var (TREE_TYPE (var
), get_name (var
));
858 DECL_GIMPLE_REG_P (var_copy
) = DECL_GIMPLE_REG_P (var
);
859 nielt
= XNEW (struct int_tree_map
);
861 nielt
->to
= var_copy
;
864 /* Ensure that when we meet this decl next time, we won't duplicate
866 nuid
= DECL_UID (var_copy
);
868 dslot
= decl_copies
.find_slot_with_hash (&ielt
, nuid
, INSERT
);
869 gcc_assert (!*dslot
);
870 nielt
= XNEW (struct int_tree_map
);
872 nielt
->to
= var_copy
;
876 var_copy
= ((struct int_tree_map
*) *dslot
)->to
;
878 replace_ssa_name_symbol (copy
, var_copy
);
882 /* Finds the ssa names used in STMT that are defined outside the
883 region between ENTRY and EXIT and replaces such ssa names with
884 their duplicates. The duplicates are stored to NAME_COPIES. Base
885 decls of all ssa names used in STMT (including those defined in
886 LOOP) are replaced with the new temporary variables; the
887 replacement decls are stored in DECL_COPIES. */
890 separate_decls_in_region_stmt (edge entry
, edge exit
, gimple stmt
,
891 name_to_copy_table_type name_copies
,
892 int_tree_htab_type decl_copies
)
900 FOR_EACH_PHI_OR_STMT_DEF (def
, stmt
, oi
, SSA_OP_DEF
)
902 name
= DEF_FROM_PTR (def
);
903 gcc_assert (TREE_CODE (name
) == SSA_NAME
);
904 copy
= separate_decls_in_region_name (name
, name_copies
, decl_copies
,
906 gcc_assert (copy
== name
);
909 FOR_EACH_PHI_OR_STMT_USE (use
, stmt
, oi
, SSA_OP_USE
)
911 name
= USE_FROM_PTR (use
);
912 if (TREE_CODE (name
) != SSA_NAME
)
915 copy_name_p
= expr_invariant_in_region_p (entry
, exit
, name
);
916 copy
= separate_decls_in_region_name (name
, name_copies
, decl_copies
,
922 /* Finds the ssa names used in STMT that are defined outside the
923 region between ENTRY and EXIT and replaces such ssa names with
924 their duplicates. The duplicates are stored to NAME_COPIES. Base
925 decls of all ssa names used in STMT (including those defined in
926 LOOP) are replaced with the new temporary variables; the
927 replacement decls are stored in DECL_COPIES. */
930 separate_decls_in_region_debug (gimple stmt
,
931 name_to_copy_table_type name_copies
,
932 int_tree_htab_type decl_copies
)
937 struct int_tree_map ielt
;
938 struct name_to_copy_elt elt
;
939 name_to_copy_elt
**slot
;
940 int_tree_map
**dslot
;
942 if (gimple_debug_bind_p (stmt
))
943 var
= gimple_debug_bind_get_var (stmt
);
944 else if (gimple_debug_source_bind_p (stmt
))
945 var
= gimple_debug_source_bind_get_var (stmt
);
948 if (TREE_CODE (var
) == DEBUG_EXPR_DECL
|| TREE_CODE (var
) == LABEL_DECL
)
950 gcc_assert (DECL_P (var
) && SSA_VAR_P (var
));
951 ielt
.uid
= DECL_UID (var
);
952 dslot
= decl_copies
.find_slot_with_hash (&ielt
, ielt
.uid
, NO_INSERT
);
955 if (gimple_debug_bind_p (stmt
))
956 gimple_debug_bind_set_var (stmt
, ((struct int_tree_map
*) *dslot
)->to
);
957 else if (gimple_debug_source_bind_p (stmt
))
958 gimple_debug_source_bind_set_var (stmt
, ((struct int_tree_map
*) *dslot
)->to
);
960 FOR_EACH_PHI_OR_STMT_USE (use
, stmt
, oi
, SSA_OP_USE
)
962 name
= USE_FROM_PTR (use
);
963 if (TREE_CODE (name
) != SSA_NAME
)
966 elt
.version
= SSA_NAME_VERSION (name
);
967 slot
= name_copies
.find_slot_with_hash (&elt
, elt
.version
, NO_INSERT
);
970 gimple_debug_bind_reset_value (stmt
);
975 SET_USE (use
, (*slot
)->new_name
);
981 /* Callback for htab_traverse. Adds a field corresponding to the reduction
982 specified in SLOT. The type is passed in DATA. */
985 add_field_for_reduction (reduction_info
**slot
, tree type
)
988 struct reduction_info
*const red
= *slot
;
989 tree var
= gimple_assign_lhs (red
->reduc_stmt
);
990 tree field
= build_decl (gimple_location (red
->reduc_stmt
), FIELD_DECL
,
991 SSA_NAME_IDENTIFIER (var
), TREE_TYPE (var
));
993 insert_field_into_struct (type
, field
);
1000 /* Callback for htab_traverse. Adds a field corresponding to a ssa name
1001 described in SLOT. The type is passed in DATA. */
1004 add_field_for_name (name_to_copy_elt
**slot
, tree type
)
1006 struct name_to_copy_elt
*const elt
= *slot
;
1007 tree name
= ssa_name (elt
->version
);
1008 tree field
= build_decl (UNKNOWN_LOCATION
,
1009 FIELD_DECL
, SSA_NAME_IDENTIFIER (name
),
1012 insert_field_into_struct (type
, field
);
1018 /* Callback for htab_traverse. A local result is the intermediate result
1019 computed by a single
1020 thread, or the initial value in case no iteration was executed.
1021 This function creates a phi node reflecting these values.
1022 The phi's result will be stored in NEW_PHI field of the
1023 reduction's data structure. */
1026 create_phi_for_local_result (reduction_info
**slot
, struct loop
*loop
)
1028 struct reduction_info
*const reduc
= *slot
;
1031 basic_block store_bb
;
1033 source_location locus
;
1035 /* STORE_BB is the block where the phi
1036 should be stored. It is the destination of the loop exit.
1037 (Find the fallthru edge from GIMPLE_OMP_CONTINUE). */
1038 store_bb
= FALLTHRU_EDGE (loop
->latch
)->dest
;
1040 /* STORE_BB has two predecessors. One coming from the loop
1041 (the reduction's result is computed at the loop),
1042 and another coming from a block preceding the loop,
1044 are executed (the initial value should be taken). */
1045 if (EDGE_PRED (store_bb
, 0) == FALLTHRU_EDGE (loop
->latch
))
1046 e
= EDGE_PRED (store_bb
, 1);
1048 e
= EDGE_PRED (store_bb
, 0);
1049 local_res
= copy_ssa_name (gimple_assign_lhs (reduc
->reduc_stmt
), NULL
);
1050 locus
= gimple_location (reduc
->reduc_stmt
);
1051 new_phi
= create_phi_node (local_res
, store_bb
);
1052 add_phi_arg (new_phi
, reduc
->init
, e
, locus
);
1053 add_phi_arg (new_phi
, gimple_assign_lhs (reduc
->reduc_stmt
),
1054 FALLTHRU_EDGE (loop
->latch
), locus
);
1055 reduc
->new_phi
= new_phi
;
1065 basic_block store_bb
;
1066 basic_block load_bb
;
1069 /* Callback for htab_traverse. Create an atomic instruction for the
1070 reduction described in SLOT.
1071 DATA annotates the place in memory the atomic operation relates to,
1072 and the basic block it needs to be generated in. */
1075 create_call_for_reduction_1 (reduction_info
**slot
, struct clsn_data
*clsn_data
)
1077 struct reduction_info
*const reduc
= *slot
;
1078 gimple_stmt_iterator gsi
;
1079 tree type
= TREE_TYPE (PHI_RESULT (reduc
->reduc_phi
));
1084 tree t
, addr
, ref
, x
;
1085 tree tmp_load
, name
;
1088 load_struct
= build_simple_mem_ref (clsn_data
->load
);
1089 t
= build3 (COMPONENT_REF
, type
, load_struct
, reduc
->field
, NULL_TREE
);
1091 addr
= build_addr (t
, current_function_decl
);
1093 /* Create phi node. */
1094 bb
= clsn_data
->load_bb
;
1096 e
= split_block (bb
, t
);
1099 tmp_load
= create_tmp_var (TREE_TYPE (TREE_TYPE (addr
)), NULL
);
1100 tmp_load
= make_ssa_name (tmp_load
, NULL
);
1101 load
= gimple_build_omp_atomic_load (tmp_load
, addr
);
1102 SSA_NAME_DEF_STMT (tmp_load
) = load
;
1103 gsi
= gsi_start_bb (new_bb
);
1104 gsi_insert_after (&gsi
, load
, GSI_NEW_STMT
);
1106 e
= split_block (new_bb
, load
);
1108 gsi
= gsi_start_bb (new_bb
);
1110 x
= fold_build2 (reduc
->reduction_code
,
1111 TREE_TYPE (PHI_RESULT (reduc
->new_phi
)), ref
,
1112 PHI_RESULT (reduc
->new_phi
));
1114 name
= force_gimple_operand_gsi (&gsi
, x
, true, NULL_TREE
, true,
1115 GSI_CONTINUE_LINKING
);
1117 gsi_insert_after (&gsi
, gimple_build_omp_atomic_store (name
), GSI_NEW_STMT
);
1121 /* Create the atomic operation at the join point of the threads.
1122 REDUCTION_LIST describes the reductions in the LOOP.
1123 LD_ST_DATA describes the shared data structure where
1124 shared data is stored in and loaded from. */
1126 create_call_for_reduction (struct loop
*loop
,
1127 reduction_info_table_type reduction_list
,
1128 struct clsn_data
*ld_st_data
)
1130 reduction_list
.traverse
<struct loop
*, create_phi_for_local_result
> (loop
);
1131 /* Find the fallthru edge from GIMPLE_OMP_CONTINUE. */
1132 ld_st_data
->load_bb
= FALLTHRU_EDGE (loop
->latch
)->dest
;
1134 .traverse
<struct clsn_data
*, create_call_for_reduction_1
> (ld_st_data
);
1137 /* Callback for htab_traverse. Loads the final reduction value at the
1138 join point of all threads, and inserts it in the right place. */
1141 create_loads_for_reductions (reduction_info
**slot
, struct clsn_data
*clsn_data
)
1143 struct reduction_info
*const red
= *slot
;
1145 gimple_stmt_iterator gsi
;
1146 tree type
= TREE_TYPE (gimple_assign_lhs (red
->reduc_stmt
));
1151 gsi
= gsi_after_labels (clsn_data
->load_bb
);
1152 load_struct
= build_simple_mem_ref (clsn_data
->load
);
1153 load_struct
= build3 (COMPONENT_REF
, type
, load_struct
, red
->field
,
1157 name
= PHI_RESULT (red
->keep_res
);
1158 stmt
= gimple_build_assign (name
, x
);
1159 SSA_NAME_DEF_STMT (name
) = stmt
;
1161 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1163 for (gsi
= gsi_start_phis (gimple_bb (red
->keep_res
));
1164 !gsi_end_p (gsi
); gsi_next (&gsi
))
1165 if (gsi_stmt (gsi
) == red
->keep_res
)
1167 remove_phi_node (&gsi
, false);
1173 /* Load the reduction result that was stored in LD_ST_DATA.
1174 REDUCTION_LIST describes the list of reductions that the
1175 loads should be generated for. */
1177 create_final_loads_for_reduction (reduction_info_table_type reduction_list
,
1178 struct clsn_data
*ld_st_data
)
1180 gimple_stmt_iterator gsi
;
1184 gsi
= gsi_after_labels (ld_st_data
->load_bb
);
1185 t
= build_fold_addr_expr (ld_st_data
->store
);
1186 stmt
= gimple_build_assign (ld_st_data
->load
, t
);
1188 gsi_insert_before (&gsi
, stmt
, GSI_NEW_STMT
);
1189 SSA_NAME_DEF_STMT (ld_st_data
->load
) = stmt
;
1192 .traverse
<struct clsn_data
*, create_loads_for_reductions
> (ld_st_data
);
1196 /* Callback for htab_traverse. Store the neutral value for the
1197 particular reduction's operation, e.g. 0 for PLUS_EXPR,
1198 1 for MULT_EXPR, etc. into the reduction field.
1199 The reduction is specified in SLOT. The store information is
1203 create_stores_for_reduction (reduction_info
**slot
, struct clsn_data
*clsn_data
)
1205 struct reduction_info
*const red
= *slot
;
1208 gimple_stmt_iterator gsi
;
1209 tree type
= TREE_TYPE (gimple_assign_lhs (red
->reduc_stmt
));
1211 gsi
= gsi_last_bb (clsn_data
->store_bb
);
1212 t
= build3 (COMPONENT_REF
, type
, clsn_data
->store
, red
->field
, NULL_TREE
);
1213 stmt
= gimple_build_assign (t
, red
->initial_value
);
1214 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1219 /* Callback for htab_traverse. Creates loads to a field of LOAD in LOAD_BB and
1220 store to a field of STORE in STORE_BB for the ssa name and its duplicate
1221 specified in SLOT. */
1224 create_loads_and_stores_for_name (name_to_copy_elt
**slot
,
1225 struct clsn_data
*clsn_data
)
1227 struct name_to_copy_elt
*const elt
= *slot
;
1230 gimple_stmt_iterator gsi
;
1231 tree type
= TREE_TYPE (elt
->new_name
);
1234 gsi
= gsi_last_bb (clsn_data
->store_bb
);
1235 t
= build3 (COMPONENT_REF
, type
, clsn_data
->store
, elt
->field
, NULL_TREE
);
1236 stmt
= gimple_build_assign (t
, ssa_name (elt
->version
));
1237 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1239 gsi
= gsi_last_bb (clsn_data
->load_bb
);
1240 load_struct
= build_simple_mem_ref (clsn_data
->load
);
1241 t
= build3 (COMPONENT_REF
, type
, load_struct
, elt
->field
, NULL_TREE
);
1242 stmt
= gimple_build_assign (elt
->new_name
, t
);
1243 SSA_NAME_DEF_STMT (elt
->new_name
) = stmt
;
1244 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1249 /* Moves all the variables used in LOOP and defined outside of it (including
1250 the initial values of loop phi nodes, and *PER_THREAD if it is a ssa
1251 name) to a structure created for this purpose. The code
1259 is transformed this way:
1274 `old' is stored to *ARG_STRUCT and `new' is stored to NEW_ARG_STRUCT. The
1275 pointer `new' is intentionally not initialized (the loop will be split to a
1276 separate function later, and `new' will be initialized from its arguments).
1277 LD_ST_DATA holds information about the shared data structure used to pass
1278 information among the threads. It is initialized here, and
1279 gen_parallel_loop will pass it to create_call_for_reduction that
1280 needs this information. REDUCTION_LIST describes the reductions
1284 separate_decls_in_region (edge entry
, edge exit
,
1285 reduction_info_table_type reduction_list
,
1286 tree
*arg_struct
, tree
*new_arg_struct
,
1287 struct clsn_data
*ld_st_data
)
1290 basic_block bb1
= split_edge (entry
);
1291 basic_block bb0
= single_pred (bb1
);
1292 name_to_copy_table_type name_copies
;
1293 name_copies
.create (10);
1294 int_tree_htab_type decl_copies
;
1295 decl_copies
.create (10);
1297 tree type
, type_name
, nvar
;
1298 gimple_stmt_iterator gsi
;
1299 struct clsn_data clsn_data
;
1300 vec
<basic_block
> body
;
1303 basic_block entry_bb
= bb1
;
1304 basic_block exit_bb
= exit
->dest
;
1305 bool has_debug_stmt
= false;
1307 entry
= single_succ_edge (entry_bb
);
1308 gather_blocks_in_sese_region (entry_bb
, exit_bb
, &body
);
1310 FOR_EACH_VEC_ELT (body
, i
, bb
)
1312 if (bb
!= entry_bb
&& bb
!= exit_bb
)
1314 for (gsi
= gsi_start_phis (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1315 separate_decls_in_region_stmt (entry
, exit
, gsi_stmt (gsi
),
1316 name_copies
, decl_copies
);
1318 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1320 gimple stmt
= gsi_stmt (gsi
);
1322 if (is_gimple_debug (stmt
))
1323 has_debug_stmt
= true;
1325 separate_decls_in_region_stmt (entry
, exit
, stmt
,
1326 name_copies
, decl_copies
);
1331 /* Now process debug bind stmts. We must not create decls while
1332 processing debug stmts, so we defer their processing so as to
1333 make sure we will have debug info for as many variables as
1334 possible (all of those that were dealt with in the loop above),
1335 and discard those for which we know there's nothing we can
1338 FOR_EACH_VEC_ELT (body
, i
, bb
)
1339 if (bb
!= entry_bb
&& bb
!= exit_bb
)
1341 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
);)
1343 gimple stmt
= gsi_stmt (gsi
);
1345 if (is_gimple_debug (stmt
))
1347 if (separate_decls_in_region_debug (stmt
, name_copies
,
1350 gsi_remove (&gsi
, true);
1361 if (name_copies
.elements () == 0 && reduction_list
.elements () == 0)
1363 /* It may happen that there is nothing to copy (if there are only
1364 loop carried and external variables in the loop). */
1366 *new_arg_struct
= NULL
;
1370 /* Create the type for the structure to store the ssa names to. */
1371 type
= lang_hooks
.types
.make_type (RECORD_TYPE
);
1372 type_name
= build_decl (UNKNOWN_LOCATION
,
1373 TYPE_DECL
, create_tmp_var_name (".paral_data"),
1375 TYPE_NAME (type
) = type_name
;
1377 name_copies
.traverse
<tree
, add_field_for_name
> (type
);
1378 if (reduction_list
.is_created () && reduction_list
.elements () > 0)
1380 /* Create the fields for reductions. */
1381 reduction_list
.traverse
<tree
, add_field_for_reduction
> (type
);
1385 /* Create the loads and stores. */
1386 *arg_struct
= create_tmp_var (type
, ".paral_data_store");
1387 nvar
= create_tmp_var (build_pointer_type (type
), ".paral_data_load");
1388 *new_arg_struct
= make_ssa_name (nvar
, NULL
);
1390 ld_st_data
->store
= *arg_struct
;
1391 ld_st_data
->load
= *new_arg_struct
;
1392 ld_st_data
->store_bb
= bb0
;
1393 ld_st_data
->load_bb
= bb1
;
1396 .traverse
<struct clsn_data
*, create_loads_and_stores_for_name
>
1399 /* Load the calculation from memory (after the join of the threads). */
1401 if (reduction_list
.is_created () && reduction_list
.elements () > 0)
1404 .traverse
<struct clsn_data
*, create_stores_for_reduction
>
1406 clsn_data
.load
= make_ssa_name (nvar
, NULL
);
1407 clsn_data
.load_bb
= exit
->dest
;
1408 clsn_data
.store
= ld_st_data
->store
;
1409 create_final_loads_for_reduction (reduction_list
, &clsn_data
);
1413 decl_copies
.dispose ();
1414 name_copies
.dispose ();
1417 /* Bitmap containing uids of functions created by parallelization. We cannot
1418 allocate it from the default obstack, as it must live across compilation
1419 of several functions; we make it gc allocated instead. */
1421 static GTY(()) bitmap parallelized_functions
;
1423 /* Returns true if FN was created by create_loop_fn. */
1426 parallelized_function_p (tree fn
)
1428 if (!parallelized_functions
|| !DECL_ARTIFICIAL (fn
))
1431 return bitmap_bit_p (parallelized_functions
, DECL_UID (fn
));
1434 /* Creates and returns an empty function that will receive the body of
1435 a parallelized loop. */
1438 create_loop_fn (location_t loc
)
1442 tree decl
, type
, name
, t
;
1443 struct function
*act_cfun
= cfun
;
1444 static unsigned loopfn_num
;
1446 loc
= LOCATION_LOCUS (loc
);
1447 snprintf (buf
, 100, "%s.$loopfn", current_function_name ());
1448 ASM_FORMAT_PRIVATE_NAME (tname
, buf
, loopfn_num
++);
1449 clean_symbol_name (tname
);
1450 name
= get_identifier (tname
);
1451 type
= build_function_type_list (void_type_node
, ptr_type_node
, NULL_TREE
);
1453 decl
= build_decl (loc
, FUNCTION_DECL
, name
, type
);
1454 if (!parallelized_functions
)
1455 parallelized_functions
= BITMAP_GGC_ALLOC ();
1456 bitmap_set_bit (parallelized_functions
, DECL_UID (decl
));
1458 TREE_STATIC (decl
) = 1;
1459 TREE_USED (decl
) = 1;
1460 DECL_ARTIFICIAL (decl
) = 1;
1461 DECL_IGNORED_P (decl
) = 0;
1462 TREE_PUBLIC (decl
) = 0;
1463 DECL_UNINLINABLE (decl
) = 1;
1464 DECL_EXTERNAL (decl
) = 0;
1465 DECL_CONTEXT (decl
) = NULL_TREE
;
1466 DECL_INITIAL (decl
) = make_node (BLOCK
);
1468 t
= build_decl (loc
, RESULT_DECL
, NULL_TREE
, void_type_node
);
1469 DECL_ARTIFICIAL (t
) = 1;
1470 DECL_IGNORED_P (t
) = 1;
1471 DECL_RESULT (decl
) = t
;
1473 t
= build_decl (loc
, PARM_DECL
, get_identifier (".paral_data_param"),
1475 DECL_ARTIFICIAL (t
) = 1;
1476 DECL_ARG_TYPE (t
) = ptr_type_node
;
1477 DECL_CONTEXT (t
) = decl
;
1479 DECL_ARGUMENTS (decl
) = t
;
1481 allocate_struct_function (decl
, false);
1483 /* The call to allocate_struct_function clobbers CFUN, so we need to restore
1485 set_cfun (act_cfun
);
1490 /* Moves the exit condition of LOOP to the beginning of its header, and
1491 duplicates the part of the last iteration that gets disabled to the
1492 exit of the loop. NIT is the number of iterations of the loop
1493 (used to initialize the variables in the duplicated part).
1495 TODO: the common case is that latch of the loop is empty and immediately
1496 follows the loop exit. In this case, it would be better not to copy the
1497 body of the loop, but only move the entry of the loop directly before the
1498 exit check and increase the number of iterations of the loop by one.
1499 This may need some additional preconditioning in case NIT = ~0.
1500 REDUCTION_LIST describes the reductions in LOOP. */
1503 transform_to_exit_first_loop (struct loop
*loop
,
1504 reduction_info_table_type reduction_list
,
1507 basic_block
*bbs
, *nbbs
, ex_bb
, orig_header
;
1510 edge exit
= single_dom_exit (loop
), hpred
;
1511 tree control
, control_name
, res
, t
;
1512 gimple phi
, nphi
, cond_stmt
, stmt
, cond_nit
;
1513 gimple_stmt_iterator gsi
;
1516 split_block_after_labels (loop
->header
);
1517 orig_header
= single_succ (loop
->header
);
1518 hpred
= single_succ_edge (loop
->header
);
1520 cond_stmt
= last_stmt (exit
->src
);
1521 control
= gimple_cond_lhs (cond_stmt
);
1522 gcc_assert (gimple_cond_rhs (cond_stmt
) == nit
);
1524 /* Make sure that we have phi nodes on exit for all loop header phis
1525 (create_parallel_loop requires that). */
1526 for (gsi
= gsi_start_phis (loop
->header
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1528 phi
= gsi_stmt (gsi
);
1529 res
= PHI_RESULT (phi
);
1530 t
= copy_ssa_name (res
, phi
);
1531 SET_PHI_RESULT (phi
, t
);
1532 nphi
= create_phi_node (res
, orig_header
);
1533 add_phi_arg (nphi
, t
, hpred
, UNKNOWN_LOCATION
);
1537 gimple_cond_set_lhs (cond_stmt
, t
);
1538 update_stmt (cond_stmt
);
1543 bbs
= get_loop_body_in_dom_order (loop
);
1545 for (n
= 0; bbs
[n
] != exit
->src
; n
++)
1547 nbbs
= XNEWVEC (basic_block
, n
);
1548 ok
= gimple_duplicate_sese_tail (single_succ_edge (loop
->header
), exit
,
1555 /* Other than reductions, the only gimple reg that should be copied
1556 out of the loop is the control variable. */
1557 exit
= single_dom_exit (loop
);
1558 control_name
= NULL_TREE
;
1559 for (gsi
= gsi_start_phis (ex_bb
); !gsi_end_p (gsi
); )
1561 phi
= gsi_stmt (gsi
);
1562 res
= PHI_RESULT (phi
);
1563 if (virtual_operand_p (res
))
1569 /* Check if it is a part of reduction. If it is,
1570 keep the phi at the reduction's keep_res field. The
1571 PHI_RESULT of this phi is the resulting value of the reduction
1572 variable when exiting the loop. */
1574 if (reduction_list
.elements () > 0)
1576 struct reduction_info
*red
;
1578 tree val
= PHI_ARG_DEF_FROM_EDGE (phi
, exit
);
1579 red
= reduction_phi (reduction_list
, SSA_NAME_DEF_STMT (val
));
1582 red
->keep_res
= phi
;
1587 gcc_assert (control_name
== NULL_TREE
1588 && SSA_NAME_VAR (res
) == SSA_NAME_VAR (control
));
1590 remove_phi_node (&gsi
, false);
1592 gcc_assert (control_name
!= NULL_TREE
);
1594 /* Initialize the control variable to number of iterations
1595 according to the rhs of the exit condition. */
1596 gsi
= gsi_after_labels (ex_bb
);
1597 cond_nit
= last_stmt (exit
->src
);
1598 nit_1
= gimple_cond_rhs (cond_nit
);
1599 nit_1
= force_gimple_operand_gsi (&gsi
,
1600 fold_convert (TREE_TYPE (control_name
), nit_1
),
1601 false, NULL_TREE
, false, GSI_SAME_STMT
);
1602 stmt
= gimple_build_assign (control_name
, nit_1
);
1603 gsi_insert_before (&gsi
, stmt
, GSI_NEW_STMT
);
1604 SSA_NAME_DEF_STMT (control_name
) = stmt
;
1607 /* Create the parallel constructs for LOOP as described in gen_parallel_loop.
1608 LOOP_FN and DATA are the arguments of GIMPLE_OMP_PARALLEL.
1609 NEW_DATA is the variable that should be initialized from the argument
1610 of LOOP_FN. N_THREADS is the requested number of threads. Returns the
1611 basic block containing GIMPLE_OMP_PARALLEL tree. */
1614 create_parallel_loop (struct loop
*loop
, tree loop_fn
, tree data
,
1615 tree new_data
, unsigned n_threads
, location_t loc
)
1617 gimple_stmt_iterator gsi
;
1618 basic_block bb
, paral_bb
, for_bb
, ex_bb
;
1620 gimple stmt
, for_stmt
, phi
, cond_stmt
;
1621 tree cvar
, cvar_init
, initvar
, cvar_next
, cvar_base
, type
;
1622 edge exit
, nexit
, guard
, end
, e
;
1624 /* Prepare the GIMPLE_OMP_PARALLEL statement. */
1625 bb
= loop_preheader_edge (loop
)->src
;
1626 paral_bb
= single_pred (bb
);
1627 gsi
= gsi_last_bb (paral_bb
);
1629 t
= build_omp_clause (loc
, OMP_CLAUSE_NUM_THREADS
);
1630 OMP_CLAUSE_NUM_THREADS_EXPR (t
)
1631 = build_int_cst (integer_type_node
, n_threads
);
1632 stmt
= gimple_build_omp_parallel (NULL
, t
, loop_fn
, data
);
1633 gimple_set_location (stmt
, loc
);
1635 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1637 /* Initialize NEW_DATA. */
1640 gsi
= gsi_after_labels (bb
);
1642 param
= make_ssa_name (DECL_ARGUMENTS (loop_fn
), NULL
);
1643 stmt
= gimple_build_assign (param
, build_fold_addr_expr (data
));
1644 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
1645 SSA_NAME_DEF_STMT (param
) = stmt
;
1647 stmt
= gimple_build_assign (new_data
,
1648 fold_convert (TREE_TYPE (new_data
), param
));
1649 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
1650 SSA_NAME_DEF_STMT (new_data
) = stmt
;
1653 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_PARALLEL. */
1654 bb
= split_loop_exit_edge (single_dom_exit (loop
));
1655 gsi
= gsi_last_bb (bb
);
1656 stmt
= gimple_build_omp_return (false);
1657 gimple_set_location (stmt
, loc
);
1658 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1660 /* Extract data for GIMPLE_OMP_FOR. */
1661 gcc_assert (loop
->header
== single_dom_exit (loop
)->src
);
1662 cond_stmt
= last_stmt (loop
->header
);
1664 cvar
= gimple_cond_lhs (cond_stmt
);
1665 cvar_base
= SSA_NAME_VAR (cvar
);
1666 phi
= SSA_NAME_DEF_STMT (cvar
);
1667 cvar_init
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_preheader_edge (loop
));
1668 initvar
= copy_ssa_name (cvar
, NULL
);
1669 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi
, loop_preheader_edge (loop
)),
1671 cvar_next
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (loop
));
1673 gsi
= gsi_last_nondebug_bb (loop
->latch
);
1674 gcc_assert (gsi_stmt (gsi
) == SSA_NAME_DEF_STMT (cvar_next
));
1675 gsi_remove (&gsi
, true);
1678 for_bb
= split_edge (loop_preheader_edge (loop
));
1679 ex_bb
= split_loop_exit_edge (single_dom_exit (loop
));
1680 extract_true_false_edges_from_block (loop
->header
, &nexit
, &exit
);
1681 gcc_assert (exit
== single_dom_exit (loop
));
1683 guard
= make_edge (for_bb
, ex_bb
, 0);
1684 single_succ_edge (loop
->latch
)->flags
= 0;
1685 end
= make_edge (loop
->latch
, ex_bb
, EDGE_FALLTHRU
);
1686 for (gsi
= gsi_start_phis (ex_bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1688 source_location locus
;
1690 phi
= gsi_stmt (gsi
);
1691 stmt
= SSA_NAME_DEF_STMT (PHI_ARG_DEF_FROM_EDGE (phi
, exit
));
1693 def
= PHI_ARG_DEF_FROM_EDGE (stmt
, loop_preheader_edge (loop
));
1694 locus
= gimple_phi_arg_location_from_edge (stmt
,
1695 loop_preheader_edge (loop
));
1696 add_phi_arg (phi
, def
, guard
, locus
);
1698 def
= PHI_ARG_DEF_FROM_EDGE (stmt
, loop_latch_edge (loop
));
1699 locus
= gimple_phi_arg_location_from_edge (stmt
, loop_latch_edge (loop
));
1700 add_phi_arg (phi
, def
, end
, locus
);
1702 e
= redirect_edge_and_branch (exit
, nexit
->dest
);
1703 PENDING_STMT (e
) = NULL
;
1705 /* Emit GIMPLE_OMP_FOR. */
1706 gimple_cond_set_lhs (cond_stmt
, cvar_base
);
1707 type
= TREE_TYPE (cvar
);
1708 t
= build_omp_clause (loc
, OMP_CLAUSE_SCHEDULE
);
1709 OMP_CLAUSE_SCHEDULE_KIND (t
) = OMP_CLAUSE_SCHEDULE_STATIC
;
1711 for_stmt
= gimple_build_omp_for (NULL
, GF_OMP_FOR_KIND_FOR
, t
, 1, NULL
);
1712 gimple_set_location (for_stmt
, loc
);
1713 gimple_omp_for_set_index (for_stmt
, 0, initvar
);
1714 gimple_omp_for_set_initial (for_stmt
, 0, cvar_init
);
1715 gimple_omp_for_set_final (for_stmt
, 0, gimple_cond_rhs (cond_stmt
));
1716 gimple_omp_for_set_cond (for_stmt
, 0, gimple_cond_code (cond_stmt
));
1717 gimple_omp_for_set_incr (for_stmt
, 0, build2 (PLUS_EXPR
, type
,
1719 build_int_cst (type
, 1)));
1721 gsi
= gsi_last_bb (for_bb
);
1722 gsi_insert_after (&gsi
, for_stmt
, GSI_NEW_STMT
);
1723 SSA_NAME_DEF_STMT (initvar
) = for_stmt
;
1725 /* Emit GIMPLE_OMP_CONTINUE. */
1726 gsi
= gsi_last_bb (loop
->latch
);
1727 stmt
= gimple_build_omp_continue (cvar_next
, cvar
);
1728 gimple_set_location (stmt
, loc
);
1729 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1730 SSA_NAME_DEF_STMT (cvar_next
) = stmt
;
1732 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_FOR. */
1733 gsi
= gsi_last_bb (ex_bb
);
1734 stmt
= gimple_build_omp_return (true);
1735 gimple_set_location (stmt
, loc
);
1736 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1738 /* After the above dom info is hosed. Re-compute it. */
1739 free_dominance_info (CDI_DOMINATORS
);
1740 calculate_dominance_info (CDI_DOMINATORS
);
1745 /* Generates code to execute the iterations of LOOP in N_THREADS
1746 threads in parallel.
1748 NITER describes number of iterations of LOOP.
1749 REDUCTION_LIST describes the reductions existent in the LOOP. */
1752 gen_parallel_loop (struct loop
*loop
, reduction_info_table_type reduction_list
,
1753 unsigned n_threads
, struct tree_niter_desc
*niter
)
1756 tree many_iterations_cond
, type
, nit
;
1757 tree arg_struct
, new_arg_struct
;
1759 basic_block parallel_head
;
1761 struct clsn_data clsn_data
;
1765 unsigned int m_p_thread
=2;
1769 ---------------------------------------------------------------------
1772 IV = phi (INIT, IV + STEP)
1778 ---------------------------------------------------------------------
1780 with # of iterations NITER (possibly with MAY_BE_ZERO assumption),
1781 we generate the following code:
1783 ---------------------------------------------------------------------
1786 || NITER < MIN_PER_THREAD * N_THREADS)
1790 store all local loop-invariant variables used in body of the loop to DATA.
1791 GIMPLE_OMP_PARALLEL (OMP_CLAUSE_NUM_THREADS (N_THREADS), LOOPFN, DATA);
1792 load the variables from DATA.
1793 GIMPLE_OMP_FOR (IV = INIT; COND; IV += STEP) (OMP_CLAUSE_SCHEDULE (static))
1796 GIMPLE_OMP_CONTINUE;
1797 GIMPLE_OMP_RETURN -- GIMPLE_OMP_FOR
1798 GIMPLE_OMP_RETURN -- GIMPLE_OMP_PARALLEL
1804 IV = phi (INIT, IV + STEP)
1815 /* Create two versions of the loop -- in the old one, we know that the
1816 number of iterations is large enough, and we will transform it into the
1817 loop that will be split to loop_fn, the new one will be used for the
1818 remaining iterations. */
1820 /* We should compute a better number-of-iterations value for outer loops.
1823 for (i = 0; i < n; ++i)
1824 for (j = 0; j < m; ++j)
1827 we should compute nit = n * m, not nit = n.
1828 Also may_be_zero handling would need to be adjusted. */
1830 type
= TREE_TYPE (niter
->niter
);
1831 nit
= force_gimple_operand (unshare_expr (niter
->niter
), &stmts
, true,
1834 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
1839 m_p_thread
=MIN_PER_THREAD
;
1841 many_iterations_cond
=
1842 fold_build2 (GE_EXPR
, boolean_type_node
,
1843 nit
, build_int_cst (type
, m_p_thread
* n_threads
));
1845 many_iterations_cond
1846 = fold_build2 (TRUTH_AND_EXPR
, boolean_type_node
,
1847 invert_truthvalue (unshare_expr (niter
->may_be_zero
)),
1848 many_iterations_cond
);
1849 many_iterations_cond
1850 = force_gimple_operand (many_iterations_cond
, &stmts
, false, NULL_TREE
);
1852 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
1853 if (!is_gimple_condexpr (many_iterations_cond
))
1855 many_iterations_cond
1856 = force_gimple_operand (many_iterations_cond
, &stmts
,
1859 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
1862 initialize_original_copy_tables ();
1864 /* We assume that the loop usually iterates a lot. */
1865 prob
= 4 * REG_BR_PROB_BASE
/ 5;
1866 loop_version (loop
, many_iterations_cond
, NULL
,
1867 prob
, prob
, REG_BR_PROB_BASE
- prob
, true);
1868 update_ssa (TODO_update_ssa
);
1869 free_original_copy_tables ();
1871 /* Base all the induction variables in LOOP on a single control one. */
1872 canonicalize_loop_ivs (loop
, &nit
, true);
1874 /* Ensure that the exit condition is the first statement in the loop. */
1875 transform_to_exit_first_loop (loop
, reduction_list
, nit
);
1877 /* Generate initializations for reductions. */
1878 if (reduction_list
.elements () > 0)
1879 reduction_list
.traverse
<struct loop
*, initialize_reductions
> (loop
);
1881 /* Eliminate the references to local variables from the loop. */
1882 gcc_assert (single_exit (loop
));
1883 entry
= loop_preheader_edge (loop
);
1884 exit
= single_dom_exit (loop
);
1886 eliminate_local_variables (entry
, exit
);
1887 /* In the old loop, move all variables non-local to the loop to a structure
1888 and back, and create separate decls for the variables used in loop. */
1889 separate_decls_in_region (entry
, exit
, reduction_list
, &arg_struct
,
1890 &new_arg_struct
, &clsn_data
);
1892 /* Create the parallel constructs. */
1893 loc
= UNKNOWN_LOCATION
;
1894 cond_stmt
= last_stmt (loop
->header
);
1896 loc
= gimple_location (cond_stmt
);
1897 parallel_head
= create_parallel_loop (loop
, create_loop_fn (loc
), arg_struct
,
1898 new_arg_struct
, n_threads
, loc
);
1899 if (reduction_list
.elements () > 0)
1900 create_call_for_reduction (loop
, reduction_list
, &clsn_data
);
1904 /* Cancel the loop (it is simpler to do it here rather than to teach the
1905 expander to do it). */
1906 cancel_loop_tree (loop
);
1908 /* Free loop bound estimations that could contain references to
1909 removed statements. */
1910 FOR_EACH_LOOP (li
, loop
, 0)
1911 free_numbers_of_iterations_estimates_loop (loop
);
1913 /* Expand the parallel constructs. We do it directly here instead of running
1914 a separate expand_omp pass, since it is more efficient, and less likely to
1915 cause troubles with further analyses not being able to deal with the
1918 omp_expand_local (parallel_head
);
1921 /* Returns true when LOOP contains vector phi nodes. */
1924 loop_has_vector_phi_nodes (struct loop
*loop ATTRIBUTE_UNUSED
)
1927 basic_block
*bbs
= get_loop_body_in_dom_order (loop
);
1928 gimple_stmt_iterator gsi
;
1931 for (i
= 0; i
< loop
->num_nodes
; i
++)
1932 for (gsi
= gsi_start_phis (bbs
[i
]); !gsi_end_p (gsi
); gsi_next (&gsi
))
1933 if (TREE_CODE (TREE_TYPE (PHI_RESULT (gsi_stmt (gsi
)))) == VECTOR_TYPE
)
1942 /* Create a reduction_info struct, initialize it with REDUC_STMT
1943 and PHI, insert it to the REDUCTION_LIST. */
1946 build_new_reduction (reduction_info_table_type reduction_list
,
1947 gimple reduc_stmt
, gimple phi
)
1949 reduction_info
**slot
;
1950 struct reduction_info
*new_reduction
;
1952 gcc_assert (reduc_stmt
);
1954 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1957 "Detected reduction. reduction stmt is: \n");
1958 print_gimple_stmt (dump_file
, reduc_stmt
, 0, 0);
1959 fprintf (dump_file
, "\n");
1962 new_reduction
= XCNEW (struct reduction_info
);
1964 new_reduction
->reduc_stmt
= reduc_stmt
;
1965 new_reduction
->reduc_phi
= phi
;
1966 new_reduction
->reduc_version
= SSA_NAME_VERSION (gimple_phi_result (phi
));
1967 new_reduction
->reduction_code
= gimple_assign_rhs_code (reduc_stmt
);
1968 slot
= reduction_list
.find_slot (new_reduction
, INSERT
);
1969 *slot
= new_reduction
;
1972 /* Callback for htab_traverse. Sets gimple_uid of reduc_phi stmts. */
1975 set_reduc_phi_uids (reduction_info
**slot
, void *data ATTRIBUTE_UNUSED
)
1977 struct reduction_info
*const red
= *slot
;
1978 gimple_set_uid (red
->reduc_phi
, red
->reduc_version
);
1982 /* Detect all reductions in the LOOP, insert them into REDUCTION_LIST. */
1985 gather_scalar_reductions (loop_p loop
, reduction_info_table_type reduction_list
)
1987 gimple_stmt_iterator gsi
;
1988 loop_vec_info simple_loop_info
;
1990 simple_loop_info
= vect_analyze_loop_form (loop
);
1992 for (gsi
= gsi_start_phis (loop
->header
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1994 gimple phi
= gsi_stmt (gsi
);
1996 tree res
= PHI_RESULT (phi
);
1999 if (virtual_operand_p (res
))
2002 if (!simple_iv (loop
, loop
, res
, &iv
, true)
2003 && simple_loop_info
)
2005 gimple reduc_stmt
= vect_force_simple_reduction (simple_loop_info
,
2008 if (reduc_stmt
&& !double_reduc
)
2009 build_new_reduction (reduction_list
, reduc_stmt
, phi
);
2012 destroy_loop_vec_info (simple_loop_info
, true);
2014 /* As gimple_uid is used by the vectorizer in between vect_analyze_loop_form
2015 and destroy_loop_vec_info, we can set gimple_uid of reduc_phi stmts
2017 reduction_list
.traverse
<void *, set_reduc_phi_uids
> (NULL
);
2020 /* Try to initialize NITER for code generation part. */
2023 try_get_loop_niter (loop_p loop
, struct tree_niter_desc
*niter
)
2025 edge exit
= single_dom_exit (loop
);
2029 /* We need to know # of iterations, and there should be no uses of values
2030 defined inside loop outside of it, unless the values are invariants of
2032 if (!number_of_iterations_exit (loop
, exit
, niter
, false))
2034 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2035 fprintf (dump_file
, " FAILED: number of iterations not known\n");
2042 /* Try to initialize REDUCTION_LIST for code generation part.
2043 REDUCTION_LIST describes the reductions. */
2046 try_create_reduction_list (loop_p loop
,
2047 reduction_info_table_type reduction_list
)
2049 edge exit
= single_dom_exit (loop
);
2050 gimple_stmt_iterator gsi
;
2054 gather_scalar_reductions (loop
, reduction_list
);
2057 for (gsi
= gsi_start_phis (exit
->dest
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2059 gimple phi
= gsi_stmt (gsi
);
2060 struct reduction_info
*red
;
2061 imm_use_iterator imm_iter
;
2062 use_operand_p use_p
;
2064 tree val
= PHI_ARG_DEF_FROM_EDGE (phi
, exit
);
2066 if (!virtual_operand_p (val
))
2068 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2070 fprintf (dump_file
, "phi is ");
2071 print_gimple_stmt (dump_file
, phi
, 0, 0);
2072 fprintf (dump_file
, "arg of phi to exit: value ");
2073 print_generic_expr (dump_file
, val
, 0);
2074 fprintf (dump_file
, " used outside loop\n");
2076 " checking if it a part of reduction pattern: \n");
2078 if (reduction_list
.elements () == 0)
2080 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2082 " FAILED: it is not a part of reduction.\n");
2086 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, val
)
2088 if (!gimple_debug_bind_p (USE_STMT (use_p
))
2089 && flow_bb_inside_loop_p (loop
, gimple_bb (USE_STMT (use_p
))))
2091 reduc_phi
= USE_STMT (use_p
);
2095 red
= reduction_phi (reduction_list
, reduc_phi
);
2098 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2100 " FAILED: it is not a part of reduction.\n");
2103 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2105 fprintf (dump_file
, "reduction phi is ");
2106 print_gimple_stmt (dump_file
, red
->reduc_phi
, 0, 0);
2107 fprintf (dump_file
, "reduction stmt is ");
2108 print_gimple_stmt (dump_file
, red
->reduc_stmt
, 0, 0);
2113 /* The iterations of the loop may communicate only through bivs whose
2114 iteration space can be distributed efficiently. */
2115 for (gsi
= gsi_start_phis (loop
->header
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2117 gimple phi
= gsi_stmt (gsi
);
2118 tree def
= PHI_RESULT (phi
);
2121 if (!virtual_operand_p (def
) && !simple_iv (loop
, loop
, def
, &iv
, true))
2123 struct reduction_info
*red
;
2125 red
= reduction_phi (reduction_list
, phi
);
2128 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2130 " FAILED: scalar dependency between iterations\n");
2140 /* Detect parallel loops and generate parallel code using libgomp
2141 primitives. Returns true if some loop was parallelized, false
2145 parallelize_loops (void)
2147 unsigned n_threads
= flag_tree_parallelize_loops
;
2148 bool changed
= false;
2150 struct tree_niter_desc niter_desc
;
2152 reduction_info_table_type reduction_list
;
2153 struct obstack parloop_obstack
;
2154 HOST_WIDE_INT estimated
;
2157 /* Do not parallelize loops in the functions created by parallelization. */
2158 if (parallelized_function_p (cfun
->decl
))
2160 if (cfun
->has_nonlocal_label
)
2163 gcc_obstack_init (&parloop_obstack
);
2164 reduction_list
.create (10);
2165 init_stmt_vec_info_vec ();
2167 FOR_EACH_LOOP (li
, loop
, 0)
2169 reduction_list
.empty ();
2170 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2172 fprintf (dump_file
, "Trying loop %d as candidate\n",loop
->num
);
2174 fprintf (dump_file
, "loop %d is not innermost\n",loop
->num
);
2176 fprintf (dump_file
, "loop %d is innermost\n",loop
->num
);
2179 /* If we use autopar in graphite pass, we use its marked dependency
2180 checking results. */
2181 if (flag_loop_parallelize_all
&& !loop
->can_be_parallel
)
2183 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2184 fprintf (dump_file
, "loop is not parallel according to graphite\n");
2188 if (!single_dom_exit (loop
))
2191 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2192 fprintf (dump_file
, "loop is !single_dom_exit\n");
2197 if (/* And of course, the loop must be parallelizable. */
2198 !can_duplicate_loop_p (loop
)
2199 || loop_has_blocks_with_irreducible_flag (loop
)
2200 || (loop_preheader_edge (loop
)->src
->flags
& BB_IRREDUCIBLE_LOOP
)
2201 /* FIXME: the check for vector phi nodes could be removed. */
2202 || loop_has_vector_phi_nodes (loop
))
2205 estimated
= estimated_stmt_executions_int (loop
);
2206 if (estimated
== -1)
2207 estimated
= max_stmt_executions_int (loop
);
2208 /* FIXME: Bypass this check as graphite doesn't update the
2209 count and frequency correctly now. */
2210 if (!flag_loop_parallelize_all
2211 && ((estimated
!= -1
2212 && estimated
<= (HOST_WIDE_INT
) n_threads
* MIN_PER_THREAD
)
2213 /* Do not bother with loops in cold areas. */
2214 || optimize_loop_nest_for_size_p (loop
)))
2217 if (!try_get_loop_niter (loop
, &niter_desc
))
2220 if (!try_create_reduction_list (loop
, reduction_list
))
2223 if (!flag_loop_parallelize_all
2224 && !loop_parallel_p (loop
, &parloop_obstack
))
2228 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2231 fprintf (dump_file
, "parallelizing outer loop %d\n",loop
->header
->index
);
2233 fprintf (dump_file
, "parallelizing inner loop %d\n",loop
->header
->index
);
2234 loop_loc
= find_loop_location (loop
);
2235 if (loop_loc
!= UNKNOWN_LOC
)
2236 fprintf (dump_file
, "\nloop at %s:%d: ",
2237 LOC_FILE (loop_loc
), LOC_LINE (loop_loc
));
2239 gen_parallel_loop (loop
, reduction_list
,
2240 n_threads
, &niter_desc
);
2243 free_stmt_vec_info_vec ();
2244 reduction_list
.dispose ();
2245 obstack_free (&parloop_obstack
, NULL
);
2247 /* Parallelization will cause new function calls to be inserted through
2248 which local variables will escape. Reset the points-to solution
2251 pt_solution_reset (&cfun
->gimple_df
->escaped
);
2256 /* Parallelization. */
2259 gate_tree_parallelize_loops (void)
2261 return flag_tree_parallelize_loops
> 1;
2265 tree_parallelize_loops (void)
2267 if (number_of_loops (cfun
) <= 1)
2270 if (parallelize_loops ())
2271 return TODO_cleanup_cfg
| TODO_rebuild_alias
;
2277 const pass_data pass_data_parallelize_loops
=
2279 GIMPLE_PASS
, /* type */
2280 "parloops", /* name */
2281 OPTGROUP_LOOP
, /* optinfo_flags */
2282 true, /* has_gate */
2283 true, /* has_execute */
2284 TV_TREE_PARALLELIZE_LOOPS
, /* tv_id */
2285 ( PROP_cfg
| PROP_ssa
), /* properties_required */
2286 0, /* properties_provided */
2287 0, /* properties_destroyed */
2288 0, /* todo_flags_start */
2289 TODO_verify_flow
, /* todo_flags_finish */
2292 class pass_parallelize_loops
: public gimple_opt_pass
2295 pass_parallelize_loops (gcc::context
*ctxt
)
2296 : gimple_opt_pass (pass_data_parallelize_loops
, ctxt
)
2299 /* opt_pass methods: */
2300 bool gate () { return gate_tree_parallelize_loops (); }
2301 unsigned int execute () { return tree_parallelize_loops (); }
2303 }; // class pass_parallelize_loops
2308 make_pass_parallelize_loops (gcc::context
*ctxt
)
2310 return new pass_parallelize_loops (ctxt
);
2314 #include "gt-tree-parloops.h"