1 /* Loop autoparallelization.
2 Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Sebastian Pop <pop@cri.ensmp.fr> and
5 Zdenek Dvorak <dvorakz@suse.cz>.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
26 #include "tree-flow.h"
28 #include "tree-data-ref.h"
29 #include "tree-scalar-evolution.h"
30 #include "gimple-pretty-print.h"
31 #include "tree-pass.h"
32 #include "langhooks.h"
33 #include "tree-vectorizer.h"
35 /* This pass tries to distribute iterations of loops into several threads.
36 The implementation is straightforward -- for each loop we test whether its
37 iterations are independent, and if it is the case (and some additional
38 conditions regarding profitability and correctness are satisfied), we
39 add GIMPLE_OMP_PARALLEL and GIMPLE_OMP_FOR codes and let omp expansion
42 The most of the complexity is in bringing the code into shape expected
44 -- for GIMPLE_OMP_FOR, ensuring that the loop has only one induction
45 variable and that the exit test is at the start of the loop body
46 -- for GIMPLE_OMP_PARALLEL, replacing the references to local addressable
47 variables by accesses through pointers, and breaking up ssa chains
48 by storing the values incoming to the parallelized loop to a structure
49 passed to the new function as an argument (something similar is done
50 in omp gimplification, unfortunately only a small part of the code
54 -- if there are several parallelizable loops in a function, it may be
55 possible to generate the threads just once (using synchronization to
56 ensure that cross-loop dependences are obeyed).
57 -- handling of common scalar dependence patterns (accumulation, ...)
58 -- handling of non-innermost loops */
62 currently we use vect_force_simple_reduction() to detect reduction patterns.
63 The code transformation will be introduced by an example.
70 for (i = 0; i < N; i++)
80 # sum_29 = PHI <sum_11(5), 1(3)>
81 # i_28 = PHI <i_12(5), 0(3)>
84 sum_11 = D.1795_8 + sum_29;
92 # sum_21 = PHI <sum_11(4)>
93 printf (&"%d"[0], sum_21);
96 after reduction transformation (only relevant parts):
104 # Storing the initial value given by the user. #
106 .paral_data_store.32.sum.27 = 1;
108 #pragma omp parallel num_threads(4)
110 #pragma omp for schedule(static)
112 # The neutral element corresponding to the particular
113 reduction's operation, e.g. 0 for PLUS_EXPR,
114 1 for MULT_EXPR, etc. replaces the user's initial value. #
116 # sum.27_29 = PHI <sum.27_11, 0>
118 sum.27_11 = D.1827_8 + sum.27_29;
122 # Adding this reduction phi is done at create_phi_for_local_result() #
123 # sum.27_56 = PHI <sum.27_11, 0>
126 # Creating the atomic operation is done at
127 create_call_for_reduction_1() #
129 #pragma omp atomic_load
130 D.1839_59 = *&.paral_data_load.33_51->reduction.23;
131 D.1840_60 = sum.27_56 + D.1839_59;
132 #pragma omp atomic_store (D.1840_60);
136 # collecting the result after the join of the threads is done at
137 create_loads_for_reductions().
138 The value computed by the threads is loaded from the
142 .paral_data_load.33_52 = &.paral_data_store.32;
143 sum_37 = .paral_data_load.33_52->sum.27;
144 sum_43 = D.1795_41 + sum_37;
147 # sum_21 = PHI <sum_43, sum_26>
148 printf (&"%d"[0], sum_21);
156 /* Minimal number of iterations of a loop that should be executed in each
158 #define MIN_PER_THREAD 100
160 /* Element of the hashtable, representing a
161 reduction in the current loop. */
162 struct reduction_info
164 gimple reduc_stmt
; /* reduction statement. */
165 gimple reduc_phi
; /* The phi node defining the reduction. */
166 enum tree_code reduction_code
;/* code for the reduction operation. */
167 unsigned reduc_version
; /* SSA_NAME_VERSION of original reduc_phi
169 gimple keep_res
; /* The PHI_RESULT of this phi is the resulting value
170 of the reduction variable when existing the loop. */
171 tree initial_value
; /* The initial value of the reduction var before entering the loop. */
172 tree field
; /* the name of the field in the parloop data structure intended for reduction. */
173 tree init
; /* reduction initialization value. */
174 gimple new_phi
; /* (helper field) Newly created phi node whose result
175 will be passed to the atomic operation. Represents
176 the local result each thread computed for the reduction
180 /* Equality and hash functions for hashtab code. */
183 reduction_info_eq (const void *aa
, const void *bb
)
185 const struct reduction_info
*a
= (const struct reduction_info
*) aa
;
186 const struct reduction_info
*b
= (const struct reduction_info
*) bb
;
188 return (a
->reduc_phi
== b
->reduc_phi
);
192 reduction_info_hash (const void *aa
)
194 const struct reduction_info
*a
= (const struct reduction_info
*) aa
;
196 return a
->reduc_version
;
199 static struct reduction_info
*
200 reduction_phi (htab_t reduction_list
, gimple phi
)
202 struct reduction_info tmpred
, *red
;
204 if (htab_elements (reduction_list
) == 0 || phi
== NULL
)
207 tmpred
.reduc_phi
= phi
;
208 tmpred
.reduc_version
= gimple_uid (phi
);
209 red
= (struct reduction_info
*) htab_find (reduction_list
, &tmpred
);
214 /* Element of hashtable of names to copy. */
216 struct name_to_copy_elt
218 unsigned version
; /* The version of the name to copy. */
219 tree new_name
; /* The new name used in the copy. */
220 tree field
; /* The field of the structure used to pass the
224 /* Equality and hash functions for hashtab code. */
227 name_to_copy_elt_eq (const void *aa
, const void *bb
)
229 const struct name_to_copy_elt
*a
= (const struct name_to_copy_elt
*) aa
;
230 const struct name_to_copy_elt
*b
= (const struct name_to_copy_elt
*) bb
;
232 return a
->version
== b
->version
;
236 name_to_copy_elt_hash (const void *aa
)
238 const struct name_to_copy_elt
*a
= (const struct name_to_copy_elt
*) aa
;
240 return (hashval_t
) a
->version
;
243 /* A transformation matrix, which is a self-contained ROWSIZE x COLSIZE
244 matrix. Rather than use floats, we simply keep a single DENOMINATOR that
245 represents the denominator for every element in the matrix. */
246 typedef struct lambda_trans_matrix_s
248 lambda_matrix matrix
;
252 } *lambda_trans_matrix
;
253 #define LTM_MATRIX(T) ((T)->matrix)
254 #define LTM_ROWSIZE(T) ((T)->rowsize)
255 #define LTM_COLSIZE(T) ((T)->colsize)
256 #define LTM_DENOMINATOR(T) ((T)->denominator)
258 /* Allocate a new transformation matrix. */
260 static lambda_trans_matrix
261 lambda_trans_matrix_new (int colsize
, int rowsize
,
262 struct obstack
* lambda_obstack
)
264 lambda_trans_matrix ret
;
266 ret
= (lambda_trans_matrix
)
267 obstack_alloc (lambda_obstack
, sizeof (struct lambda_trans_matrix_s
));
268 LTM_MATRIX (ret
) = lambda_matrix_new (rowsize
, colsize
, lambda_obstack
);
269 LTM_ROWSIZE (ret
) = rowsize
;
270 LTM_COLSIZE (ret
) = colsize
;
271 LTM_DENOMINATOR (ret
) = 1;
275 /* Multiply a vector VEC by a matrix MAT.
276 MAT is an M*N matrix, and VEC is a vector with length N. The result
277 is stored in DEST which must be a vector of length M. */
280 lambda_matrix_vector_mult (lambda_matrix matrix
, int m
, int n
,
281 lambda_vector vec
, lambda_vector dest
)
285 lambda_vector_clear (dest
, m
);
286 for (i
= 0; i
< m
; i
++)
287 for (j
= 0; j
< n
; j
++)
288 dest
[i
] += matrix
[i
][j
] * vec
[j
];
291 /* Return true if TRANS is a legal transformation matrix that respects
292 the dependence vectors in DISTS and DIRS. The conservative answer
295 "Wolfe proves that a unimodular transformation represented by the
296 matrix T is legal when applied to a loop nest with a set of
297 lexicographically non-negative distance vectors RDG if and only if
298 for each vector d in RDG, (T.d >= 0) is lexicographically positive.
299 i.e.: if and only if it transforms the lexicographically positive
300 distance vectors to lexicographically positive vectors. Note that
301 a unimodular matrix must transform the zero vector (and only it) to
302 the zero vector." S.Muchnick. */
305 lambda_transform_legal_p (lambda_trans_matrix trans
,
307 VEC (ddr_p
, heap
) *dependence_relations
)
310 lambda_vector distres
;
311 struct data_dependence_relation
*ddr
;
313 gcc_assert (LTM_COLSIZE (trans
) == nb_loops
314 && LTM_ROWSIZE (trans
) == nb_loops
);
316 /* When there are no dependences, the transformation is correct. */
317 if (VEC_length (ddr_p
, dependence_relations
) == 0)
320 ddr
= VEC_index (ddr_p
, dependence_relations
, 0);
324 /* When there is an unknown relation in the dependence_relations, we
325 know that it is no worth looking at this loop nest: give up. */
326 if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
329 distres
= lambda_vector_new (nb_loops
);
331 /* For each distance vector in the dependence graph. */
332 FOR_EACH_VEC_ELT (ddr_p
, dependence_relations
, i
, ddr
)
334 /* Don't care about relations for which we know that there is no
335 dependence, nor about read-read (aka. output-dependences):
336 these data accesses can happen in any order. */
337 if (DDR_ARE_DEPENDENT (ddr
) == chrec_known
338 || (DR_IS_READ (DDR_A (ddr
)) && DR_IS_READ (DDR_B (ddr
))))
341 /* Conservatively answer: "this transformation is not valid". */
342 if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
345 /* If the dependence could not be captured by a distance vector,
346 conservatively answer that the transform is not valid. */
347 if (DDR_NUM_DIST_VECTS (ddr
) == 0)
350 /* Compute trans.dist_vect */
351 for (j
= 0; j
< DDR_NUM_DIST_VECTS (ddr
); j
++)
353 lambda_matrix_vector_mult (LTM_MATRIX (trans
), nb_loops
, nb_loops
,
354 DDR_DIST_VECT (ddr
, j
), distres
);
356 if (!lambda_vector_lexico_pos (distres
, nb_loops
))
363 /* Data dependency analysis. Returns true if the iterations of LOOP
364 are independent on each other (that is, if we can execute them
368 loop_parallel_p (struct loop
*loop
, struct obstack
* parloop_obstack
)
370 VEC (loop_p
, heap
) *loop_nest
;
371 VEC (ddr_p
, heap
) *dependence_relations
;
372 VEC (data_reference_p
, heap
) *datarefs
;
373 lambda_trans_matrix trans
;
376 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
378 fprintf (dump_file
, "Considering loop %d\n", loop
->num
);
380 fprintf (dump_file
, "loop is innermost\n");
382 fprintf (dump_file
, "loop NOT innermost\n");
385 /* Check for problems with dependences. If the loop can be reversed,
386 the iterations are independent. */
387 datarefs
= VEC_alloc (data_reference_p
, heap
, 10);
388 dependence_relations
= VEC_alloc (ddr_p
, heap
, 10 * 10);
389 loop_nest
= VEC_alloc (loop_p
, heap
, 3);
390 compute_data_dependences_for_loop (loop
, true, &loop_nest
, &datarefs
,
391 &dependence_relations
);
392 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
393 dump_data_dependence_relations (dump_file
, dependence_relations
);
395 trans
= lambda_trans_matrix_new (1, 1, parloop_obstack
);
396 LTM_MATRIX (trans
)[0][0] = -1;
398 if (lambda_transform_legal_p (trans
, 1, dependence_relations
))
401 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
402 fprintf (dump_file
, " SUCCESS: may be parallelized\n");
404 else if (dump_file
&& (dump_flags
& TDF_DETAILS
))
406 " FAILED: data dependencies exist across iterations\n");
408 VEC_free (loop_p
, heap
, loop_nest
);
409 free_dependence_relations (dependence_relations
);
410 free_data_refs (datarefs
);
415 /* Return true when LOOP contains basic blocks marked with the
416 BB_IRREDUCIBLE_LOOP flag. */
419 loop_has_blocks_with_irreducible_flag (struct loop
*loop
)
422 basic_block
*bbs
= get_loop_body_in_dom_order (loop
);
425 for (i
= 0; i
< loop
->num_nodes
; i
++)
426 if (bbs
[i
]->flags
& BB_IRREDUCIBLE_LOOP
)
435 /* Assigns the address of OBJ in TYPE to an ssa name, and returns this name.
436 The assignment statement is placed on edge ENTRY. DECL_ADDRESS maps decls
437 to their addresses that can be reused. The address of OBJ is known to
438 be invariant in the whole function. Other needed statements are placed
442 take_address_of (tree obj
, tree type
, edge entry
, htab_t decl_address
,
443 gimple_stmt_iterator
*gsi
)
447 struct int_tree_map ielt
, *nielt
;
448 tree
*var_p
, name
, bvar
, addr
;
452 /* Since the address of OBJ is invariant, the trees may be shared.
453 Avoid rewriting unrelated parts of the code. */
454 obj
= unshare_expr (obj
);
456 handled_component_p (*var_p
);
457 var_p
= &TREE_OPERAND (*var_p
, 0))
460 /* Canonicalize the access to base on a MEM_REF. */
462 *var_p
= build_simple_mem_ref (build_fold_addr_expr (*var_p
));
464 /* Assign a canonical SSA name to the address of the base decl used
465 in the address and share it for all accesses and addresses based
467 uid
= DECL_UID (TREE_OPERAND (TREE_OPERAND (*var_p
, 0), 0));
469 dslot
= htab_find_slot_with_hash (decl_address
, &ielt
, uid
, INSERT
);
474 addr
= TREE_OPERAND (*var_p
, 0);
475 bvar
= create_tmp_var (TREE_TYPE (addr
),
476 get_name (TREE_OPERAND
477 (TREE_OPERAND (*var_p
, 0), 0)));
478 add_referenced_var (bvar
);
479 stmt
= gimple_build_assign (bvar
, addr
);
480 name
= make_ssa_name (bvar
, stmt
);
481 gimple_assign_set_lhs (stmt
, name
);
482 gsi_insert_on_edge_immediate (entry
, stmt
);
484 nielt
= XNEW (struct int_tree_map
);
490 name
= ((struct int_tree_map
*) *dslot
)->to
;
492 /* Express the address in terms of the canonical SSA name. */
493 TREE_OPERAND (*var_p
, 0) = name
;
495 return build_fold_addr_expr_with_type (obj
, type
);
497 name
= force_gimple_operand (build_addr (obj
, current_function_decl
),
498 &stmts
, true, NULL_TREE
);
499 if (!gimple_seq_empty_p (stmts
))
500 gsi_insert_seq_before (gsi
, stmts
, GSI_SAME_STMT
);
502 if (!useless_type_conversion_p (type
, TREE_TYPE (name
)))
504 name
= force_gimple_operand (fold_convert (type
, name
), &stmts
, true,
506 if (!gimple_seq_empty_p (stmts
))
507 gsi_insert_seq_before (gsi
, stmts
, GSI_SAME_STMT
);
513 /* Callback for htab_traverse. Create the initialization statement
514 for reduction described in SLOT, and place it at the preheader of
515 the loop described in DATA. */
518 initialize_reductions (void **slot
, void *data
)
521 tree bvar
, type
, arg
;
524 struct reduction_info
*const reduc
= (struct reduction_info
*) *slot
;
525 struct loop
*loop
= (struct loop
*) data
;
527 /* Create initialization in preheader:
528 reduction_variable = initialization value of reduction. */
530 /* In the phi node at the header, replace the argument coming
531 from the preheader with the reduction initialization value. */
533 /* Create a new variable to initialize the reduction. */
534 type
= TREE_TYPE (PHI_RESULT (reduc
->reduc_phi
));
535 bvar
= create_tmp_var (type
, "reduction");
536 add_referenced_var (bvar
);
538 c
= build_omp_clause (gimple_location (reduc
->reduc_stmt
),
539 OMP_CLAUSE_REDUCTION
);
540 OMP_CLAUSE_REDUCTION_CODE (c
) = reduc
->reduction_code
;
541 OMP_CLAUSE_DECL (c
) = SSA_NAME_VAR (gimple_assign_lhs (reduc
->reduc_stmt
));
543 init
= omp_reduction_init (c
, TREE_TYPE (bvar
));
546 /* Replace the argument representing the initialization value
547 with the initialization value for the reduction (neutral
548 element for the particular operation, e.g. 0 for PLUS_EXPR,
549 1 for MULT_EXPR, etc).
550 Keep the old value in a new variable "reduction_initial",
551 that will be taken in consideration after the parallel
552 computing is done. */
554 e
= loop_preheader_edge (loop
);
555 arg
= PHI_ARG_DEF_FROM_EDGE (reduc
->reduc_phi
, e
);
556 /* Create new variable to hold the initial value. */
558 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE
559 (reduc
->reduc_phi
, loop_preheader_edge (loop
)), init
);
560 reduc
->initial_value
= arg
;
566 struct walk_stmt_info info
;
569 gimple_stmt_iterator
*gsi
;
574 /* Eliminates references to local variables in *TP out of the single
575 entry single exit region starting at DTA->ENTRY.
576 DECL_ADDRESS contains addresses of the references that had their
577 address taken already. If the expression is changed, CHANGED is
578 set to true. Callback for walk_tree. */
581 eliminate_local_variables_1 (tree
*tp
, int *walk_subtrees
, void *data
)
583 struct elv_data
*const dta
= (struct elv_data
*) data
;
584 tree t
= *tp
, var
, addr
, addr_type
, type
, obj
;
590 if (!SSA_VAR_P (t
) || DECL_EXTERNAL (t
))
593 type
= TREE_TYPE (t
);
594 addr_type
= build_pointer_type (type
);
595 addr
= take_address_of (t
, addr_type
, dta
->entry
, dta
->decl_address
,
597 if (dta
->gsi
== NULL
&& addr
== NULL_TREE
)
603 *tp
= build_simple_mem_ref (addr
);
609 if (TREE_CODE (t
) == ADDR_EXPR
)
611 /* ADDR_EXPR may appear in two contexts:
612 -- as a gimple operand, when the address taken is a function invariant
613 -- as gimple rhs, when the resulting address in not a function
615 We do not need to do anything special in the latter case (the base of
616 the memory reference whose address is taken may be replaced in the
617 DECL_P case). The former case is more complicated, as we need to
618 ensure that the new address is still a gimple operand. Thus, it
619 is not sufficient to replace just the base of the memory reference --
620 we need to move the whole computation of the address out of the
622 if (!is_gimple_val (t
))
626 obj
= TREE_OPERAND (t
, 0);
627 var
= get_base_address (obj
);
628 if (!var
|| !SSA_VAR_P (var
) || DECL_EXTERNAL (var
))
631 addr_type
= TREE_TYPE (t
);
632 addr
= take_address_of (obj
, addr_type
, dta
->entry
, dta
->decl_address
,
634 if (dta
->gsi
== NULL
&& addr
== NULL_TREE
)
651 /* Moves the references to local variables in STMT at *GSI out of the single
652 entry single exit region starting at ENTRY. DECL_ADDRESS contains
653 addresses of the references that had their address taken
657 eliminate_local_variables_stmt (edge entry
, gimple_stmt_iterator
*gsi
,
661 gimple stmt
= gsi_stmt (*gsi
);
663 memset (&dta
.info
, '\0', sizeof (dta
.info
));
665 dta
.decl_address
= decl_address
;
669 if (gimple_debug_bind_p (stmt
))
672 walk_tree (gimple_debug_bind_get_value_ptr (stmt
),
673 eliminate_local_variables_1
, &dta
.info
, NULL
);
676 gimple_debug_bind_reset_value (stmt
);
683 walk_gimple_op (stmt
, eliminate_local_variables_1
, &dta
.info
);
690 /* Eliminates the references to local variables from the single entry
691 single exit region between the ENTRY and EXIT edges.
694 1) Taking address of a local variable -- these are moved out of the
695 region (and temporary variable is created to hold the address if
698 2) Dereferencing a local variable -- these are replaced with indirect
702 eliminate_local_variables (edge entry
, edge exit
)
705 VEC (basic_block
, heap
) *body
= VEC_alloc (basic_block
, heap
, 3);
707 gimple_stmt_iterator gsi
;
708 bool has_debug_stmt
= false;
709 htab_t decl_address
= htab_create (10, int_tree_map_hash
, int_tree_map_eq
,
711 basic_block entry_bb
= entry
->src
;
712 basic_block exit_bb
= exit
->dest
;
714 gather_blocks_in_sese_region (entry_bb
, exit_bb
, &body
);
716 FOR_EACH_VEC_ELT (basic_block
, body
, i
, bb
)
717 if (bb
!= entry_bb
&& bb
!= exit_bb
)
718 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
719 if (is_gimple_debug (gsi_stmt (gsi
)))
721 if (gimple_debug_bind_p (gsi_stmt (gsi
)))
722 has_debug_stmt
= true;
725 eliminate_local_variables_stmt (entry
, &gsi
, decl_address
);
728 FOR_EACH_VEC_ELT (basic_block
, body
, i
, bb
)
729 if (bb
!= entry_bb
&& bb
!= exit_bb
)
730 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
731 if (gimple_debug_bind_p (gsi_stmt (gsi
)))
732 eliminate_local_variables_stmt (entry
, &gsi
, decl_address
);
734 htab_delete (decl_address
);
735 VEC_free (basic_block
, heap
, body
);
738 /* Returns true if expression EXPR is not defined between ENTRY and
739 EXIT, i.e. if all its operands are defined outside of the region. */
742 expr_invariant_in_region_p (edge entry
, edge exit
, tree expr
)
744 basic_block entry_bb
= entry
->src
;
745 basic_block exit_bb
= exit
->dest
;
748 if (is_gimple_min_invariant (expr
))
751 if (TREE_CODE (expr
) == SSA_NAME
)
753 def_bb
= gimple_bb (SSA_NAME_DEF_STMT (expr
));
755 && dominated_by_p (CDI_DOMINATORS
, def_bb
, entry_bb
)
756 && !dominated_by_p (CDI_DOMINATORS
, def_bb
, exit_bb
))
765 /* If COPY_NAME_P is true, creates and returns a duplicate of NAME.
766 The copies are stored to NAME_COPIES, if NAME was already duplicated,
767 its duplicate stored in NAME_COPIES is returned.
769 Regardless of COPY_NAME_P, the decl used as a base of the ssa name is also
770 duplicated, storing the copies in DECL_COPIES. */
773 separate_decls_in_region_name (tree name
,
774 htab_t name_copies
, htab_t decl_copies
,
777 tree copy
, var
, var_copy
;
778 unsigned idx
, uid
, nuid
;
779 struct int_tree_map ielt
, *nielt
;
780 struct name_to_copy_elt elt
, *nelt
;
781 void **slot
, **dslot
;
783 if (TREE_CODE (name
) != SSA_NAME
)
786 idx
= SSA_NAME_VERSION (name
);
788 slot
= htab_find_slot_with_hash (name_copies
, &elt
, idx
,
789 copy_name_p
? INSERT
: NO_INSERT
);
791 return ((struct name_to_copy_elt
*) *slot
)->new_name
;
793 var
= SSA_NAME_VAR (name
);
794 uid
= DECL_UID (var
);
796 dslot
= htab_find_slot_with_hash (decl_copies
, &ielt
, uid
, INSERT
);
799 var_copy
= create_tmp_var (TREE_TYPE (var
), get_name (var
));
800 DECL_GIMPLE_REG_P (var_copy
) = DECL_GIMPLE_REG_P (var
);
801 add_referenced_var (var_copy
);
802 nielt
= XNEW (struct int_tree_map
);
804 nielt
->to
= var_copy
;
807 /* Ensure that when we meet this decl next time, we won't duplicate
809 nuid
= DECL_UID (var_copy
);
811 dslot
= htab_find_slot_with_hash (decl_copies
, &ielt
, nuid
, INSERT
);
812 gcc_assert (!*dslot
);
813 nielt
= XNEW (struct int_tree_map
);
815 nielt
->to
= var_copy
;
819 var_copy
= ((struct int_tree_map
*) *dslot
)->to
;
823 copy
= duplicate_ssa_name (name
, NULL
);
824 nelt
= XNEW (struct name_to_copy_elt
);
826 nelt
->new_name
= copy
;
827 nelt
->field
= NULL_TREE
;
836 SSA_NAME_VAR (copy
) = var_copy
;
840 /* Finds the ssa names used in STMT that are defined outside the
841 region between ENTRY and EXIT and replaces such ssa names with
842 their duplicates. The duplicates are stored to NAME_COPIES. Base
843 decls of all ssa names used in STMT (including those defined in
844 LOOP) are replaced with the new temporary variables; the
845 replacement decls are stored in DECL_COPIES. */
848 separate_decls_in_region_stmt (edge entry
, edge exit
, gimple stmt
,
849 htab_t name_copies
, htab_t decl_copies
)
857 mark_virtual_ops_for_renaming (stmt
);
859 FOR_EACH_PHI_OR_STMT_DEF (def
, stmt
, oi
, SSA_OP_DEF
)
861 name
= DEF_FROM_PTR (def
);
862 gcc_assert (TREE_CODE (name
) == SSA_NAME
);
863 copy
= separate_decls_in_region_name (name
, name_copies
, decl_copies
,
865 gcc_assert (copy
== name
);
868 FOR_EACH_PHI_OR_STMT_USE (use
, stmt
, oi
, SSA_OP_USE
)
870 name
= USE_FROM_PTR (use
);
871 if (TREE_CODE (name
) != SSA_NAME
)
874 copy_name_p
= expr_invariant_in_region_p (entry
, exit
, name
);
875 copy
= separate_decls_in_region_name (name
, name_copies
, decl_copies
,
881 /* Finds the ssa names used in STMT that are defined outside the
882 region between ENTRY and EXIT and replaces such ssa names with
883 their duplicates. The duplicates are stored to NAME_COPIES. Base
884 decls of all ssa names used in STMT (including those defined in
885 LOOP) are replaced with the new temporary variables; the
886 replacement decls are stored in DECL_COPIES. */
889 separate_decls_in_region_debug (gimple stmt
, htab_t name_copies
,
895 struct int_tree_map ielt
;
896 struct name_to_copy_elt elt
;
897 void **slot
, **dslot
;
899 if (gimple_debug_bind_p (stmt
))
900 var
= gimple_debug_bind_get_var (stmt
);
901 else if (gimple_debug_source_bind_p (stmt
))
902 var
= gimple_debug_source_bind_get_var (stmt
);
905 if (TREE_CODE (var
) == DEBUG_EXPR_DECL
)
907 gcc_assert (DECL_P (var
) && SSA_VAR_P (var
));
908 ielt
.uid
= DECL_UID (var
);
909 dslot
= htab_find_slot_with_hash (decl_copies
, &ielt
, ielt
.uid
, NO_INSERT
);
912 if (gimple_debug_bind_p (stmt
))
913 gimple_debug_bind_set_var (stmt
, ((struct int_tree_map
*) *dslot
)->to
);
914 else if (gimple_debug_source_bind_p (stmt
))
915 gimple_debug_source_bind_set_var (stmt
, ((struct int_tree_map
*) *dslot
)->to
);
917 FOR_EACH_PHI_OR_STMT_USE (use
, stmt
, oi
, SSA_OP_USE
)
919 name
= USE_FROM_PTR (use
);
920 if (TREE_CODE (name
) != SSA_NAME
)
923 elt
.version
= SSA_NAME_VERSION (name
);
924 slot
= htab_find_slot_with_hash (name_copies
, &elt
, elt
.version
, NO_INSERT
);
927 gimple_debug_bind_reset_value (stmt
);
932 SET_USE (use
, ((struct name_to_copy_elt
*) *slot
)->new_name
);
938 /* Callback for htab_traverse. Adds a field corresponding to the reduction
939 specified in SLOT. The type is passed in DATA. */
942 add_field_for_reduction (void **slot
, void *data
)
945 struct reduction_info
*const red
= (struct reduction_info
*) *slot
;
946 tree
const type
= (tree
) data
;
947 tree var
= SSA_NAME_VAR (gimple_assign_lhs (red
->reduc_stmt
));
948 tree field
= build_decl (gimple_location (red
->reduc_stmt
),
949 FIELD_DECL
, DECL_NAME (var
), TREE_TYPE (var
));
951 insert_field_into_struct (type
, field
);
958 /* Callback for htab_traverse. Adds a field corresponding to a ssa name
959 described in SLOT. The type is passed in DATA. */
962 add_field_for_name (void **slot
, void *data
)
964 struct name_to_copy_elt
*const elt
= (struct name_to_copy_elt
*) *slot
;
965 tree type
= (tree
) data
;
966 tree name
= ssa_name (elt
->version
);
967 tree var
= SSA_NAME_VAR (name
);
968 tree field
= build_decl (DECL_SOURCE_LOCATION (var
),
969 FIELD_DECL
, DECL_NAME (var
), TREE_TYPE (var
));
971 insert_field_into_struct (type
, field
);
977 /* Callback for htab_traverse. A local result is the intermediate result
979 thread, or the initial value in case no iteration was executed.
980 This function creates a phi node reflecting these values.
981 The phi's result will be stored in NEW_PHI field of the
982 reduction's data structure. */
985 create_phi_for_local_result (void **slot
, void *data
)
987 struct reduction_info
*const reduc
= (struct reduction_info
*) *slot
;
988 const struct loop
*const loop
= (const struct loop
*) data
;
991 basic_block store_bb
;
993 source_location locus
;
995 /* STORE_BB is the block where the phi
996 should be stored. It is the destination of the loop exit.
997 (Find the fallthru edge from GIMPLE_OMP_CONTINUE). */
998 store_bb
= FALLTHRU_EDGE (loop
->latch
)->dest
;
1000 /* STORE_BB has two predecessors. One coming from the loop
1001 (the reduction's result is computed at the loop),
1002 and another coming from a block preceding the loop,
1004 are executed (the initial value should be taken). */
1005 if (EDGE_PRED (store_bb
, 0) == FALLTHRU_EDGE (loop
->latch
))
1006 e
= EDGE_PRED (store_bb
, 1);
1008 e
= EDGE_PRED (store_bb
, 0);
1010 = make_ssa_name (SSA_NAME_VAR (gimple_assign_lhs (reduc
->reduc_stmt
)),
1012 locus
= gimple_location (reduc
->reduc_stmt
);
1013 new_phi
= create_phi_node (local_res
, store_bb
);
1014 SSA_NAME_DEF_STMT (local_res
) = new_phi
;
1015 add_phi_arg (new_phi
, reduc
->init
, e
, locus
);
1016 add_phi_arg (new_phi
, gimple_assign_lhs (reduc
->reduc_stmt
),
1017 FALLTHRU_EDGE (loop
->latch
), locus
);
1018 reduc
->new_phi
= new_phi
;
1028 basic_block store_bb
;
1029 basic_block load_bb
;
1032 /* Callback for htab_traverse. Create an atomic instruction for the
1033 reduction described in SLOT.
1034 DATA annotates the place in memory the atomic operation relates to,
1035 and the basic block it needs to be generated in. */
1038 create_call_for_reduction_1 (void **slot
, void *data
)
1040 struct reduction_info
*const reduc
= (struct reduction_info
*) *slot
;
1041 struct clsn_data
*const clsn_data
= (struct clsn_data
*) data
;
1042 gimple_stmt_iterator gsi
;
1043 tree type
= TREE_TYPE (PHI_RESULT (reduc
->reduc_phi
));
1048 tree t
, addr
, ref
, x
;
1049 tree tmp_load
, name
;
1052 load_struct
= build_simple_mem_ref (clsn_data
->load
);
1053 t
= build3 (COMPONENT_REF
, type
, load_struct
, reduc
->field
, NULL_TREE
);
1055 addr
= build_addr (t
, current_function_decl
);
1057 /* Create phi node. */
1058 bb
= clsn_data
->load_bb
;
1060 e
= split_block (bb
, t
);
1063 tmp_load
= create_tmp_var (TREE_TYPE (TREE_TYPE (addr
)), NULL
);
1064 add_referenced_var (tmp_load
);
1065 tmp_load
= make_ssa_name (tmp_load
, NULL
);
1066 load
= gimple_build_omp_atomic_load (tmp_load
, addr
);
1067 SSA_NAME_DEF_STMT (tmp_load
) = load
;
1068 gsi
= gsi_start_bb (new_bb
);
1069 gsi_insert_after (&gsi
, load
, GSI_NEW_STMT
);
1071 e
= split_block (new_bb
, load
);
1073 gsi
= gsi_start_bb (new_bb
);
1075 x
= fold_build2 (reduc
->reduction_code
,
1076 TREE_TYPE (PHI_RESULT (reduc
->new_phi
)), ref
,
1077 PHI_RESULT (reduc
->new_phi
));
1079 name
= force_gimple_operand_gsi (&gsi
, x
, true, NULL_TREE
, true,
1080 GSI_CONTINUE_LINKING
);
1082 gsi_insert_after (&gsi
, gimple_build_omp_atomic_store (name
), GSI_NEW_STMT
);
1086 /* Create the atomic operation at the join point of the threads.
1087 REDUCTION_LIST describes the reductions in the LOOP.
1088 LD_ST_DATA describes the shared data structure where
1089 shared data is stored in and loaded from. */
1091 create_call_for_reduction (struct loop
*loop
, htab_t reduction_list
,
1092 struct clsn_data
*ld_st_data
)
1094 htab_traverse (reduction_list
, create_phi_for_local_result
, loop
);
1095 /* Find the fallthru edge from GIMPLE_OMP_CONTINUE. */
1096 ld_st_data
->load_bb
= FALLTHRU_EDGE (loop
->latch
)->dest
;
1097 htab_traverse (reduction_list
, create_call_for_reduction_1
, ld_st_data
);
1100 /* Callback for htab_traverse. Loads the final reduction value at the
1101 join point of all threads, and inserts it in the right place. */
1104 create_loads_for_reductions (void **slot
, void *data
)
1106 struct reduction_info
*const red
= (struct reduction_info
*) *slot
;
1107 struct clsn_data
*const clsn_data
= (struct clsn_data
*) data
;
1109 gimple_stmt_iterator gsi
;
1110 tree type
= TREE_TYPE (gimple_assign_lhs (red
->reduc_stmt
));
1115 gsi
= gsi_after_labels (clsn_data
->load_bb
);
1116 load_struct
= build_simple_mem_ref (clsn_data
->load
);
1117 load_struct
= build3 (COMPONENT_REF
, type
, load_struct
, red
->field
,
1121 name
= PHI_RESULT (red
->keep_res
);
1122 stmt
= gimple_build_assign (name
, x
);
1123 SSA_NAME_DEF_STMT (name
) = stmt
;
1125 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1127 for (gsi
= gsi_start_phis (gimple_bb (red
->keep_res
));
1128 !gsi_end_p (gsi
); gsi_next (&gsi
))
1129 if (gsi_stmt (gsi
) == red
->keep_res
)
1131 remove_phi_node (&gsi
, false);
1137 /* Load the reduction result that was stored in LD_ST_DATA.
1138 REDUCTION_LIST describes the list of reductions that the
1139 loads should be generated for. */
1141 create_final_loads_for_reduction (htab_t reduction_list
,
1142 struct clsn_data
*ld_st_data
)
1144 gimple_stmt_iterator gsi
;
1148 gsi
= gsi_after_labels (ld_st_data
->load_bb
);
1149 t
= build_fold_addr_expr (ld_st_data
->store
);
1150 stmt
= gimple_build_assign (ld_st_data
->load
, t
);
1152 gsi_insert_before (&gsi
, stmt
, GSI_NEW_STMT
);
1153 SSA_NAME_DEF_STMT (ld_st_data
->load
) = stmt
;
1155 htab_traverse (reduction_list
, create_loads_for_reductions
, ld_st_data
);
1159 /* Callback for htab_traverse. Store the neutral value for the
1160 particular reduction's operation, e.g. 0 for PLUS_EXPR,
1161 1 for MULT_EXPR, etc. into the reduction field.
1162 The reduction is specified in SLOT. The store information is
1166 create_stores_for_reduction (void **slot
, void *data
)
1168 struct reduction_info
*const red
= (struct reduction_info
*) *slot
;
1169 struct clsn_data
*const clsn_data
= (struct clsn_data
*) data
;
1172 gimple_stmt_iterator gsi
;
1173 tree type
= TREE_TYPE (gimple_assign_lhs (red
->reduc_stmt
));
1175 gsi
= gsi_last_bb (clsn_data
->store_bb
);
1176 t
= build3 (COMPONENT_REF
, type
, clsn_data
->store
, red
->field
, NULL_TREE
);
1177 stmt
= gimple_build_assign (t
, red
->initial_value
);
1178 mark_virtual_ops_for_renaming (stmt
);
1179 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1184 /* Callback for htab_traverse. Creates loads to a field of LOAD in LOAD_BB and
1185 store to a field of STORE in STORE_BB for the ssa name and its duplicate
1186 specified in SLOT. */
1189 create_loads_and_stores_for_name (void **slot
, void *data
)
1191 struct name_to_copy_elt
*const elt
= (struct name_to_copy_elt
*) *slot
;
1192 struct clsn_data
*const clsn_data
= (struct clsn_data
*) data
;
1195 gimple_stmt_iterator gsi
;
1196 tree type
= TREE_TYPE (elt
->new_name
);
1199 gsi
= gsi_last_bb (clsn_data
->store_bb
);
1200 t
= build3 (COMPONENT_REF
, type
, clsn_data
->store
, elt
->field
, NULL_TREE
);
1201 stmt
= gimple_build_assign (t
, ssa_name (elt
->version
));
1202 mark_virtual_ops_for_renaming (stmt
);
1203 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1205 gsi
= gsi_last_bb (clsn_data
->load_bb
);
1206 load_struct
= build_simple_mem_ref (clsn_data
->load
);
1207 t
= build3 (COMPONENT_REF
, type
, load_struct
, elt
->field
, NULL_TREE
);
1208 stmt
= gimple_build_assign (elt
->new_name
, t
);
1209 SSA_NAME_DEF_STMT (elt
->new_name
) = stmt
;
1210 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1215 /* Moves all the variables used in LOOP and defined outside of it (including
1216 the initial values of loop phi nodes, and *PER_THREAD if it is a ssa
1217 name) to a structure created for this purpose. The code
1225 is transformed this way:
1240 `old' is stored to *ARG_STRUCT and `new' is stored to NEW_ARG_STRUCT. The
1241 pointer `new' is intentionally not initialized (the loop will be split to a
1242 separate function later, and `new' will be initialized from its arguments).
1243 LD_ST_DATA holds information about the shared data structure used to pass
1244 information among the threads. It is initialized here, and
1245 gen_parallel_loop will pass it to create_call_for_reduction that
1246 needs this information. REDUCTION_LIST describes the reductions
1250 separate_decls_in_region (edge entry
, edge exit
, htab_t reduction_list
,
1251 tree
*arg_struct
, tree
*new_arg_struct
,
1252 struct clsn_data
*ld_st_data
)
1255 basic_block bb1
= split_edge (entry
);
1256 basic_block bb0
= single_pred (bb1
);
1257 htab_t name_copies
= htab_create (10, name_to_copy_elt_hash
,
1258 name_to_copy_elt_eq
, free
);
1259 htab_t decl_copies
= htab_create (10, int_tree_map_hash
, int_tree_map_eq
,
1262 tree type
, type_name
, nvar
;
1263 gimple_stmt_iterator gsi
;
1264 struct clsn_data clsn_data
;
1265 VEC (basic_block
, heap
) *body
= VEC_alloc (basic_block
, heap
, 3);
1267 basic_block entry_bb
= bb1
;
1268 basic_block exit_bb
= exit
->dest
;
1269 bool has_debug_stmt
= false;
1271 entry
= single_succ_edge (entry_bb
);
1272 gather_blocks_in_sese_region (entry_bb
, exit_bb
, &body
);
1274 FOR_EACH_VEC_ELT (basic_block
, body
, i
, bb
)
1276 if (bb
!= entry_bb
&& bb
!= exit_bb
)
1278 for (gsi
= gsi_start_phis (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1279 separate_decls_in_region_stmt (entry
, exit
, gsi_stmt (gsi
),
1280 name_copies
, decl_copies
);
1282 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1284 gimple stmt
= gsi_stmt (gsi
);
1286 if (is_gimple_debug (stmt
))
1287 has_debug_stmt
= true;
1289 separate_decls_in_region_stmt (entry
, exit
, stmt
,
1290 name_copies
, decl_copies
);
1295 /* Now process debug bind stmts. We must not create decls while
1296 processing debug stmts, so we defer their processing so as to
1297 make sure we will have debug info for as many variables as
1298 possible (all of those that were dealt with in the loop above),
1299 and discard those for which we know there's nothing we can
1302 FOR_EACH_VEC_ELT (basic_block
, body
, i
, bb
)
1303 if (bb
!= entry_bb
&& bb
!= exit_bb
)
1305 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
);)
1307 gimple stmt
= gsi_stmt (gsi
);
1309 if (is_gimple_debug (stmt
))
1311 if (separate_decls_in_region_debug (stmt
, name_copies
,
1314 gsi_remove (&gsi
, true);
1323 VEC_free (basic_block
, heap
, body
);
1325 if (htab_elements (name_copies
) == 0 && htab_elements (reduction_list
) == 0)
1327 /* It may happen that there is nothing to copy (if there are only
1328 loop carried and external variables in the loop). */
1330 *new_arg_struct
= NULL
;
1334 /* Create the type for the structure to store the ssa names to. */
1335 type
= lang_hooks
.types
.make_type (RECORD_TYPE
);
1336 type_name
= build_decl (UNKNOWN_LOCATION
,
1337 TYPE_DECL
, create_tmp_var_name (".paral_data"),
1339 TYPE_NAME (type
) = type_name
;
1341 htab_traverse (name_copies
, add_field_for_name
, type
);
1342 if (reduction_list
&& htab_elements (reduction_list
) > 0)
1344 /* Create the fields for reductions. */
1345 htab_traverse (reduction_list
, add_field_for_reduction
,
1350 /* Create the loads and stores. */
1351 *arg_struct
= create_tmp_var (type
, ".paral_data_store");
1352 add_referenced_var (*arg_struct
);
1353 nvar
= create_tmp_var (build_pointer_type (type
), ".paral_data_load");
1354 add_referenced_var (nvar
);
1355 *new_arg_struct
= make_ssa_name (nvar
, NULL
);
1357 ld_st_data
->store
= *arg_struct
;
1358 ld_st_data
->load
= *new_arg_struct
;
1359 ld_st_data
->store_bb
= bb0
;
1360 ld_st_data
->load_bb
= bb1
;
1362 htab_traverse (name_copies
, create_loads_and_stores_for_name
,
1365 /* Load the calculation from memory (after the join of the threads). */
1367 if (reduction_list
&& htab_elements (reduction_list
) > 0)
1369 htab_traverse (reduction_list
, create_stores_for_reduction
,
1371 clsn_data
.load
= make_ssa_name (nvar
, NULL
);
1372 clsn_data
.load_bb
= exit
->dest
;
1373 clsn_data
.store
= ld_st_data
->store
;
1374 create_final_loads_for_reduction (reduction_list
, &clsn_data
);
1378 htab_delete (decl_copies
);
1379 htab_delete (name_copies
);
1382 /* Bitmap containing uids of functions created by parallelization. We cannot
1383 allocate it from the default obstack, as it must live across compilation
1384 of several functions; we make it gc allocated instead. */
1386 static GTY(()) bitmap parallelized_functions
;
1388 /* Returns true if FN was created by create_loop_fn. */
1391 parallelized_function_p (tree fn
)
1393 if (!parallelized_functions
|| !DECL_ARTIFICIAL (fn
))
1396 return bitmap_bit_p (parallelized_functions
, DECL_UID (fn
));
1399 /* Creates and returns an empty function that will receive the body of
1400 a parallelized loop. */
1403 create_loop_fn (location_t loc
)
1407 tree decl
, type
, name
, t
;
1408 struct function
*act_cfun
= cfun
;
1409 static unsigned loopfn_num
;
1411 snprintf (buf
, 100, "%s.$loopfn", current_function_name ());
1412 ASM_FORMAT_PRIVATE_NAME (tname
, buf
, loopfn_num
++);
1413 clean_symbol_name (tname
);
1414 name
= get_identifier (tname
);
1415 type
= build_function_type_list (void_type_node
, ptr_type_node
, NULL_TREE
);
1417 decl
= build_decl (loc
, FUNCTION_DECL
, name
, type
);
1418 if (!parallelized_functions
)
1419 parallelized_functions
= BITMAP_GGC_ALLOC ();
1420 bitmap_set_bit (parallelized_functions
, DECL_UID (decl
));
1422 TREE_STATIC (decl
) = 1;
1423 TREE_USED (decl
) = 1;
1424 DECL_ARTIFICIAL (decl
) = 1;
1425 DECL_IGNORED_P (decl
) = 0;
1426 TREE_PUBLIC (decl
) = 0;
1427 DECL_UNINLINABLE (decl
) = 1;
1428 DECL_EXTERNAL (decl
) = 0;
1429 DECL_CONTEXT (decl
) = NULL_TREE
;
1430 DECL_INITIAL (decl
) = make_node (BLOCK
);
1432 t
= build_decl (loc
, RESULT_DECL
, NULL_TREE
, void_type_node
);
1433 DECL_ARTIFICIAL (t
) = 1;
1434 DECL_IGNORED_P (t
) = 1;
1435 DECL_RESULT (decl
) = t
;
1437 t
= build_decl (loc
, PARM_DECL
, get_identifier (".paral_data_param"),
1439 DECL_ARTIFICIAL (t
) = 1;
1440 DECL_ARG_TYPE (t
) = ptr_type_node
;
1441 DECL_CONTEXT (t
) = decl
;
1443 DECL_ARGUMENTS (decl
) = t
;
1445 allocate_struct_function (decl
, false);
1447 /* The call to allocate_struct_function clobbers CFUN, so we need to restore
1449 set_cfun (act_cfun
);
1454 /* Moves the exit condition of LOOP to the beginning of its header, and
1455 duplicates the part of the last iteration that gets disabled to the
1456 exit of the loop. NIT is the number of iterations of the loop
1457 (used to initialize the variables in the duplicated part).
1459 TODO: the common case is that latch of the loop is empty and immediately
1460 follows the loop exit. In this case, it would be better not to copy the
1461 body of the loop, but only move the entry of the loop directly before the
1462 exit check and increase the number of iterations of the loop by one.
1463 This may need some additional preconditioning in case NIT = ~0.
1464 REDUCTION_LIST describes the reductions in LOOP. */
1467 transform_to_exit_first_loop (struct loop
*loop
, htab_t reduction_list
, tree nit
)
1469 basic_block
*bbs
, *nbbs
, ex_bb
, orig_header
;
1472 edge exit
= single_dom_exit (loop
), hpred
;
1473 tree control
, control_name
, res
, t
;
1474 gimple phi
, nphi
, cond_stmt
, stmt
, cond_nit
;
1475 gimple_stmt_iterator gsi
;
1480 split_block_after_labels (loop
->header
);
1481 orig_header
= single_succ (loop
->header
);
1482 hpred
= single_succ_edge (loop
->header
);
1484 cond_stmt
= last_stmt (exit
->src
);
1485 control
= gimple_cond_lhs (cond_stmt
);
1486 gcc_assert (gimple_cond_rhs (cond_stmt
) == nit
);
1488 /* Make sure that we have phi nodes on exit for all loop header phis
1489 (create_parallel_loop requires that). */
1490 for (gsi
= gsi_start_phis (loop
->header
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1492 phi
= gsi_stmt (gsi
);
1493 res
= PHI_RESULT (phi
);
1494 t
= make_ssa_name (SSA_NAME_VAR (res
), phi
);
1495 SET_PHI_RESULT (phi
, t
);
1496 nphi
= create_phi_node (res
, orig_header
);
1497 SSA_NAME_DEF_STMT (res
) = nphi
;
1498 add_phi_arg (nphi
, t
, hpred
, UNKNOWN_LOCATION
);
1502 gimple_cond_set_lhs (cond_stmt
, t
);
1503 update_stmt (cond_stmt
);
1508 /* Setting the condition towards peeling the last iteration:
1509 If the block consisting of the exit condition has the latch as
1510 successor, then the body of the loop is executed before
1511 the exit condition is tested. In such case, moving the
1512 condition to the entry, causes that the loop will iterate
1513 one less iteration (which is the wanted outcome, since we
1514 peel out the last iteration). If the body is executed after
1515 the condition, moving the condition to the entry requires
1516 decrementing one iteration. */
1517 exit_1
= EDGE_SUCC (exit
->src
, EDGE_SUCC (exit
->src
, 0) == exit
);
1518 if (exit_1
->dest
== loop
->latch
)
1519 new_rhs
= gimple_cond_rhs (cond_stmt
);
1522 new_rhs
= fold_build2 (MINUS_EXPR
, TREE_TYPE (gimple_cond_rhs (cond_stmt
)),
1523 gimple_cond_rhs (cond_stmt
),
1524 build_int_cst (TREE_TYPE (gimple_cond_rhs (cond_stmt
)), 1));
1525 if (TREE_CODE (gimple_cond_rhs (cond_stmt
)) == SSA_NAME
)
1527 basic_block preheader
;
1528 gimple_stmt_iterator gsi1
;
1530 preheader
= loop_preheader_edge(loop
)->src
;
1531 gsi1
= gsi_after_labels (preheader
);
1532 new_rhs
= force_gimple_operand_gsi (&gsi1
, new_rhs
, true,
1533 NULL_TREE
,false,GSI_CONTINUE_LINKING
);
1536 gimple_cond_set_rhs (cond_stmt
, unshare_expr (new_rhs
));
1537 gimple_cond_set_lhs (cond_stmt
, unshare_expr (gimple_cond_lhs (cond_stmt
)));
1539 bbs
= get_loop_body_in_dom_order (loop
);
1541 for (n
= 0; bbs
[n
] != loop
->latch
; n
++)
1543 nbbs
= XNEWVEC (basic_block
, n
);
1544 ok
= gimple_duplicate_sese_tail (single_succ_edge (loop
->header
), exit
,
1551 /* Other than reductions, the only gimple reg that should be copied
1552 out of the loop is the control variable. */
1554 control_name
= NULL_TREE
;
1555 for (gsi
= gsi_start_phis (ex_bb
); !gsi_end_p (gsi
); )
1557 phi
= gsi_stmt (gsi
);
1558 res
= PHI_RESULT (phi
);
1559 if (!is_gimple_reg (res
))
1565 /* Check if it is a part of reduction. If it is,
1566 keep the phi at the reduction's keep_res field. The
1567 PHI_RESULT of this phi is the resulting value of the reduction
1568 variable when exiting the loop. */
1570 exit
= single_dom_exit (loop
);
1572 if (htab_elements (reduction_list
) > 0)
1574 struct reduction_info
*red
;
1576 tree val
= PHI_ARG_DEF_FROM_EDGE (phi
, exit
);
1577 red
= reduction_phi (reduction_list
, SSA_NAME_DEF_STMT (val
));
1580 red
->keep_res
= phi
;
1585 gcc_assert (control_name
== NULL_TREE
1586 && SSA_NAME_VAR (res
) == SSA_NAME_VAR (control
));
1588 remove_phi_node (&gsi
, false);
1590 gcc_assert (control_name
!= NULL_TREE
);
1592 /* Initialize the control variable to number of iterations
1593 according to the rhs of the exit condition. */
1594 gsi
= gsi_after_labels (ex_bb
);
1595 cond_nit
= last_stmt (exit
->src
);
1596 nit_1
= gimple_cond_rhs (cond_nit
);
1597 nit_1
= force_gimple_operand_gsi (&gsi
,
1598 fold_convert (TREE_TYPE (control_name
), nit_1
),
1599 false, NULL_TREE
, false, GSI_SAME_STMT
);
1600 stmt
= gimple_build_assign (control_name
, nit_1
);
1601 gsi_insert_before (&gsi
, stmt
, GSI_NEW_STMT
);
1602 SSA_NAME_DEF_STMT (control_name
) = stmt
;
1605 /* Create the parallel constructs for LOOP as described in gen_parallel_loop.
1606 LOOP_FN and DATA are the arguments of GIMPLE_OMP_PARALLEL.
1607 NEW_DATA is the variable that should be initialized from the argument
1608 of LOOP_FN. N_THREADS is the requested number of threads. Returns the
1609 basic block containing GIMPLE_OMP_PARALLEL tree. */
1612 create_parallel_loop (struct loop
*loop
, tree loop_fn
, tree data
,
1613 tree new_data
, unsigned n_threads
, location_t loc
)
1615 gimple_stmt_iterator gsi
;
1616 basic_block bb
, paral_bb
, for_bb
, ex_bb
;
1618 gimple stmt
, for_stmt
, phi
, cond_stmt
;
1619 tree cvar
, cvar_init
, initvar
, cvar_next
, cvar_base
, type
;
1620 edge exit
, nexit
, guard
, end
, e
;
1622 /* Prepare the GIMPLE_OMP_PARALLEL statement. */
1623 bb
= loop_preheader_edge (loop
)->src
;
1624 paral_bb
= single_pred (bb
);
1625 gsi
= gsi_last_bb (paral_bb
);
1627 t
= build_omp_clause (loc
, OMP_CLAUSE_NUM_THREADS
);
1628 OMP_CLAUSE_NUM_THREADS_EXPR (t
)
1629 = build_int_cst (integer_type_node
, n_threads
);
1630 stmt
= gimple_build_omp_parallel (NULL
, t
, loop_fn
, data
);
1631 gimple_set_location (stmt
, loc
);
1633 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1635 /* Initialize NEW_DATA. */
1638 gsi
= gsi_after_labels (bb
);
1640 param
= make_ssa_name (DECL_ARGUMENTS (loop_fn
), NULL
);
1641 stmt
= gimple_build_assign (param
, build_fold_addr_expr (data
));
1642 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
1643 SSA_NAME_DEF_STMT (param
) = stmt
;
1645 stmt
= gimple_build_assign (new_data
,
1646 fold_convert (TREE_TYPE (new_data
), param
));
1647 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
1648 SSA_NAME_DEF_STMT (new_data
) = stmt
;
1651 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_PARALLEL. */
1652 bb
= split_loop_exit_edge (single_dom_exit (loop
));
1653 gsi
= gsi_last_bb (bb
);
1654 stmt
= gimple_build_omp_return (false);
1655 gimple_set_location (stmt
, loc
);
1656 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1658 /* Extract data for GIMPLE_OMP_FOR. */
1659 gcc_assert (loop
->header
== single_dom_exit (loop
)->src
);
1660 cond_stmt
= last_stmt (loop
->header
);
1662 cvar
= gimple_cond_lhs (cond_stmt
);
1663 cvar_base
= SSA_NAME_VAR (cvar
);
1664 phi
= SSA_NAME_DEF_STMT (cvar
);
1665 cvar_init
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_preheader_edge (loop
));
1666 initvar
= make_ssa_name (cvar_base
, NULL
);
1667 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi
, loop_preheader_edge (loop
)),
1669 cvar_next
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (loop
));
1671 gsi
= gsi_last_nondebug_bb (loop
->latch
);
1672 gcc_assert (gsi_stmt (gsi
) == SSA_NAME_DEF_STMT (cvar_next
));
1673 gsi_remove (&gsi
, true);
1676 for_bb
= split_edge (loop_preheader_edge (loop
));
1677 ex_bb
= split_loop_exit_edge (single_dom_exit (loop
));
1678 extract_true_false_edges_from_block (loop
->header
, &nexit
, &exit
);
1679 gcc_assert (exit
== single_dom_exit (loop
));
1681 guard
= make_edge (for_bb
, ex_bb
, 0);
1682 single_succ_edge (loop
->latch
)->flags
= 0;
1683 end
= make_edge (loop
->latch
, ex_bb
, EDGE_FALLTHRU
);
1684 for (gsi
= gsi_start_phis (ex_bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1686 source_location locus
;
1688 phi
= gsi_stmt (gsi
);
1689 stmt
= SSA_NAME_DEF_STMT (PHI_ARG_DEF_FROM_EDGE (phi
, exit
));
1691 def
= PHI_ARG_DEF_FROM_EDGE (stmt
, loop_preheader_edge (loop
));
1692 locus
= gimple_phi_arg_location_from_edge (stmt
,
1693 loop_preheader_edge (loop
));
1694 add_phi_arg (phi
, def
, guard
, locus
);
1696 def
= PHI_ARG_DEF_FROM_EDGE (stmt
, loop_latch_edge (loop
));
1697 locus
= gimple_phi_arg_location_from_edge (stmt
, loop_latch_edge (loop
));
1698 add_phi_arg (phi
, def
, end
, locus
);
1700 e
= redirect_edge_and_branch (exit
, nexit
->dest
);
1701 PENDING_STMT (e
) = NULL
;
1703 /* Emit GIMPLE_OMP_FOR. */
1704 gimple_cond_set_lhs (cond_stmt
, cvar_base
);
1705 type
= TREE_TYPE (cvar
);
1706 t
= build_omp_clause (loc
, OMP_CLAUSE_SCHEDULE
);
1707 OMP_CLAUSE_SCHEDULE_KIND (t
) = OMP_CLAUSE_SCHEDULE_STATIC
;
1709 for_stmt
= gimple_build_omp_for (NULL
, t
, 1, NULL
);
1710 gimple_set_location (for_stmt
, loc
);
1711 gimple_omp_for_set_index (for_stmt
, 0, initvar
);
1712 gimple_omp_for_set_initial (for_stmt
, 0, cvar_init
);
1713 gimple_omp_for_set_final (for_stmt
, 0, gimple_cond_rhs (cond_stmt
));
1714 gimple_omp_for_set_cond (for_stmt
, 0, gimple_cond_code (cond_stmt
));
1715 gimple_omp_for_set_incr (for_stmt
, 0, build2 (PLUS_EXPR
, type
,
1717 build_int_cst (type
, 1)));
1719 gsi
= gsi_last_bb (for_bb
);
1720 gsi_insert_after (&gsi
, for_stmt
, GSI_NEW_STMT
);
1721 SSA_NAME_DEF_STMT (initvar
) = for_stmt
;
1723 /* Emit GIMPLE_OMP_CONTINUE. */
1724 gsi
= gsi_last_bb (loop
->latch
);
1725 stmt
= gimple_build_omp_continue (cvar_next
, cvar
);
1726 gimple_set_location (stmt
, loc
);
1727 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1728 SSA_NAME_DEF_STMT (cvar_next
) = stmt
;
1730 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_FOR. */
1731 gsi
= gsi_last_bb (ex_bb
);
1732 stmt
= gimple_build_omp_return (true);
1733 gimple_set_location (stmt
, loc
);
1734 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1739 /* Generates code to execute the iterations of LOOP in N_THREADS
1740 threads in parallel.
1742 NITER describes number of iterations of LOOP.
1743 REDUCTION_LIST describes the reductions existent in the LOOP. */
1746 gen_parallel_loop (struct loop
*loop
, htab_t reduction_list
,
1747 unsigned n_threads
, struct tree_niter_desc
*niter
)
1750 tree many_iterations_cond
, type
, nit
;
1751 tree arg_struct
, new_arg_struct
;
1753 basic_block parallel_head
;
1755 struct clsn_data clsn_data
;
1762 ---------------------------------------------------------------------
1765 IV = phi (INIT, IV + STEP)
1771 ---------------------------------------------------------------------
1773 with # of iterations NITER (possibly with MAY_BE_ZERO assumption),
1774 we generate the following code:
1776 ---------------------------------------------------------------------
1779 || NITER < MIN_PER_THREAD * N_THREADS)
1783 store all local loop-invariant variables used in body of the loop to DATA.
1784 GIMPLE_OMP_PARALLEL (OMP_CLAUSE_NUM_THREADS (N_THREADS), LOOPFN, DATA);
1785 load the variables from DATA.
1786 GIMPLE_OMP_FOR (IV = INIT; COND; IV += STEP) (OMP_CLAUSE_SCHEDULE (static))
1789 GIMPLE_OMP_CONTINUE;
1790 GIMPLE_OMP_RETURN -- GIMPLE_OMP_FOR
1791 GIMPLE_OMP_RETURN -- GIMPLE_OMP_PARALLEL
1797 IV = phi (INIT, IV + STEP)
1808 /* Create two versions of the loop -- in the old one, we know that the
1809 number of iterations is large enough, and we will transform it into the
1810 loop that will be split to loop_fn, the new one will be used for the
1811 remaining iterations. */
1813 type
= TREE_TYPE (niter
->niter
);
1814 nit
= force_gimple_operand (unshare_expr (niter
->niter
), &stmts
, true,
1817 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
1819 many_iterations_cond
=
1820 fold_build2 (GE_EXPR
, boolean_type_node
,
1821 nit
, build_int_cst (type
, MIN_PER_THREAD
* n_threads
));
1822 many_iterations_cond
1823 = fold_build2 (TRUTH_AND_EXPR
, boolean_type_node
,
1824 invert_truthvalue (unshare_expr (niter
->may_be_zero
)),
1825 many_iterations_cond
);
1826 many_iterations_cond
1827 = force_gimple_operand (many_iterations_cond
, &stmts
, false, NULL_TREE
);
1829 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
1830 if (!is_gimple_condexpr (many_iterations_cond
))
1832 many_iterations_cond
1833 = force_gimple_operand (many_iterations_cond
, &stmts
,
1836 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
1839 initialize_original_copy_tables ();
1841 /* We assume that the loop usually iterates a lot. */
1842 prob
= 4 * REG_BR_PROB_BASE
/ 5;
1843 loop_version (loop
, many_iterations_cond
, NULL
,
1844 prob
, prob
, REG_BR_PROB_BASE
- prob
, true);
1845 update_ssa (TODO_update_ssa
);
1846 free_original_copy_tables ();
1848 /* Base all the induction variables in LOOP on a single control one. */
1849 canonicalize_loop_ivs (loop
, &nit
, true);
1851 /* Ensure that the exit condition is the first statement in the loop. */
1852 transform_to_exit_first_loop (loop
, reduction_list
, nit
);
1854 /* Generate initializations for reductions. */
1855 if (htab_elements (reduction_list
) > 0)
1856 htab_traverse (reduction_list
, initialize_reductions
, loop
);
1858 /* Eliminate the references to local variables from the loop. */
1859 gcc_assert (single_exit (loop
));
1860 entry
= loop_preheader_edge (loop
);
1861 exit
= single_dom_exit (loop
);
1863 eliminate_local_variables (entry
, exit
);
1864 /* In the old loop, move all variables non-local to the loop to a structure
1865 and back, and create separate decls for the variables used in loop. */
1866 separate_decls_in_region (entry
, exit
, reduction_list
, &arg_struct
,
1867 &new_arg_struct
, &clsn_data
);
1869 /* Create the parallel constructs. */
1870 loc
= UNKNOWN_LOCATION
;
1871 cond_stmt
= last_stmt (loop
->header
);
1873 loc
= gimple_location (cond_stmt
);
1874 parallel_head
= create_parallel_loop (loop
, create_loop_fn (loc
), arg_struct
,
1875 new_arg_struct
, n_threads
, loc
);
1876 if (htab_elements (reduction_list
) > 0)
1877 create_call_for_reduction (loop
, reduction_list
, &clsn_data
);
1881 /* Cancel the loop (it is simpler to do it here rather than to teach the
1882 expander to do it). */
1883 cancel_loop_tree (loop
);
1885 /* Free loop bound estimations that could contain references to
1886 removed statements. */
1887 FOR_EACH_LOOP (li
, loop
, 0)
1888 free_numbers_of_iterations_estimates_loop (loop
);
1890 /* Expand the parallel constructs. We do it directly here instead of running
1891 a separate expand_omp pass, since it is more efficient, and less likely to
1892 cause troubles with further analyses not being able to deal with the
1895 omp_expand_local (parallel_head
);
1898 /* Returns true when LOOP contains vector phi nodes. */
1901 loop_has_vector_phi_nodes (struct loop
*loop ATTRIBUTE_UNUSED
)
1904 basic_block
*bbs
= get_loop_body_in_dom_order (loop
);
1905 gimple_stmt_iterator gsi
;
1908 for (i
= 0; i
< loop
->num_nodes
; i
++)
1909 for (gsi
= gsi_start_phis (bbs
[i
]); !gsi_end_p (gsi
); gsi_next (&gsi
))
1910 if (TREE_CODE (TREE_TYPE (PHI_RESULT (gsi_stmt (gsi
)))) == VECTOR_TYPE
)
1919 /* Create a reduction_info struct, initialize it with REDUC_STMT
1920 and PHI, insert it to the REDUCTION_LIST. */
1923 build_new_reduction (htab_t reduction_list
, gimple reduc_stmt
, gimple phi
)
1926 struct reduction_info
*new_reduction
;
1928 gcc_assert (reduc_stmt
);
1930 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1933 "Detected reduction. reduction stmt is: \n");
1934 print_gimple_stmt (dump_file
, reduc_stmt
, 0, 0);
1935 fprintf (dump_file
, "\n");
1938 new_reduction
= XCNEW (struct reduction_info
);
1940 new_reduction
->reduc_stmt
= reduc_stmt
;
1941 new_reduction
->reduc_phi
= phi
;
1942 new_reduction
->reduc_version
= SSA_NAME_VERSION (gimple_phi_result (phi
));
1943 new_reduction
->reduction_code
= gimple_assign_rhs_code (reduc_stmt
);
1944 slot
= htab_find_slot (reduction_list
, new_reduction
, INSERT
);
1945 *slot
= new_reduction
;
1948 /* Callback for htab_traverse. Sets gimple_uid of reduc_phi stmts. */
1951 set_reduc_phi_uids (void **slot
, void *data ATTRIBUTE_UNUSED
)
1953 struct reduction_info
*const red
= (struct reduction_info
*) *slot
;
1954 gimple_set_uid (red
->reduc_phi
, red
->reduc_version
);
1958 /* Detect all reductions in the LOOP, insert them into REDUCTION_LIST. */
1961 gather_scalar_reductions (loop_p loop
, htab_t reduction_list
)
1963 gimple_stmt_iterator gsi
;
1964 loop_vec_info simple_loop_info
;
1967 simple_loop_info
= vect_analyze_loop_form (loop
);
1969 for (gsi
= gsi_start_phis (loop
->header
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1971 gimple phi
= gsi_stmt (gsi
);
1973 tree res
= PHI_RESULT (phi
);
1976 if (!is_gimple_reg (res
))
1979 if (!simple_iv (loop
, loop
, res
, &iv
, true)
1980 && simple_loop_info
)
1982 gimple reduc_stmt
= vect_force_simple_reduction (simple_loop_info
,
1985 if (reduc_stmt
&& !double_reduc
)
1986 build_new_reduction (reduction_list
, reduc_stmt
, phi
);
1989 destroy_loop_vec_info (simple_loop_info
, true);
1991 /* As gimple_uid is used by the vectorizer in between vect_analyze_loop_form
1992 and destroy_loop_vec_info, we can set gimple_uid of reduc_phi stmts
1994 htab_traverse (reduction_list
, set_reduc_phi_uids
, NULL
);
1997 /* Try to initialize NITER for code generation part. */
2000 try_get_loop_niter (loop_p loop
, struct tree_niter_desc
*niter
)
2002 edge exit
= single_dom_exit (loop
);
2006 /* We need to know # of iterations, and there should be no uses of values
2007 defined inside loop outside of it, unless the values are invariants of
2009 if (!number_of_iterations_exit (loop
, exit
, niter
, false))
2011 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2012 fprintf (dump_file
, " FAILED: number of iterations not known\n");
2019 /* Try to initialize REDUCTION_LIST for code generation part.
2020 REDUCTION_LIST describes the reductions. */
2023 try_create_reduction_list (loop_p loop
, htab_t reduction_list
)
2025 edge exit
= single_dom_exit (loop
);
2026 gimple_stmt_iterator gsi
;
2030 gather_scalar_reductions (loop
, reduction_list
);
2033 for (gsi
= gsi_start_phis (exit
->dest
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2035 gimple phi
= gsi_stmt (gsi
);
2036 struct reduction_info
*red
;
2037 imm_use_iterator imm_iter
;
2038 use_operand_p use_p
;
2040 tree val
= PHI_ARG_DEF_FROM_EDGE (phi
, exit
);
2042 if (is_gimple_reg (val
))
2044 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2046 fprintf (dump_file
, "phi is ");
2047 print_gimple_stmt (dump_file
, phi
, 0, 0);
2048 fprintf (dump_file
, "arg of phi to exit: value ");
2049 print_generic_expr (dump_file
, val
, 0);
2050 fprintf (dump_file
, " used outside loop\n");
2052 " checking if it a part of reduction pattern: \n");
2054 if (htab_elements (reduction_list
) == 0)
2056 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2058 " FAILED: it is not a part of reduction.\n");
2062 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, val
)
2064 if (!gimple_debug_bind_p (USE_STMT (use_p
))
2065 && flow_bb_inside_loop_p (loop
, gimple_bb (USE_STMT (use_p
))))
2067 reduc_phi
= USE_STMT (use_p
);
2071 red
= reduction_phi (reduction_list
, reduc_phi
);
2074 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2076 " FAILED: it is not a part of reduction.\n");
2079 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2081 fprintf (dump_file
, "reduction phi is ");
2082 print_gimple_stmt (dump_file
, red
->reduc_phi
, 0, 0);
2083 fprintf (dump_file
, "reduction stmt is ");
2084 print_gimple_stmt (dump_file
, red
->reduc_stmt
, 0, 0);
2089 /* The iterations of the loop may communicate only through bivs whose
2090 iteration space can be distributed efficiently. */
2091 for (gsi
= gsi_start_phis (loop
->header
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2093 gimple phi
= gsi_stmt (gsi
);
2094 tree def
= PHI_RESULT (phi
);
2097 if (is_gimple_reg (def
) && !simple_iv (loop
, loop
, def
, &iv
, true))
2099 struct reduction_info
*red
;
2101 red
= reduction_phi (reduction_list
, phi
);
2104 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2106 " FAILED: scalar dependency between iterations\n");
2116 /* Detect parallel loops and generate parallel code using libgomp
2117 primitives. Returns true if some loop was parallelized, false
2121 parallelize_loops (void)
2123 unsigned n_threads
= flag_tree_parallelize_loops
;
2124 bool changed
= false;
2126 struct tree_niter_desc niter_desc
;
2128 htab_t reduction_list
;
2129 struct obstack parloop_obstack
;
2130 HOST_WIDE_INT estimated
;
2133 /* Do not parallelize loops in the functions created by parallelization. */
2134 if (parallelized_function_p (cfun
->decl
))
2136 if (cfun
->has_nonlocal_label
)
2139 gcc_obstack_init (&parloop_obstack
);
2140 reduction_list
= htab_create (10, reduction_info_hash
,
2141 reduction_info_eq
, free
);
2142 init_stmt_vec_info_vec ();
2144 FOR_EACH_LOOP (li
, loop
, 0)
2146 htab_empty (reduction_list
);
2147 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2149 fprintf (dump_file
, "Trying loop %d as candidate\n",loop
->num
);
2151 fprintf (dump_file
, "loop %d is not innermost\n",loop
->num
);
2153 fprintf (dump_file
, "loop %d is innermost\n",loop
->num
);
2156 /* If we use autopar in graphite pass, we use its marked dependency
2157 checking results. */
2158 if (flag_loop_parallelize_all
&& !loop
->can_be_parallel
)
2160 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2161 fprintf (dump_file
, "loop is not parallel according to graphite\n");
2165 if (!single_dom_exit (loop
))
2168 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2169 fprintf (dump_file
, "loop is !single_dom_exit\n");
2174 if (/* And of course, the loop must be parallelizable. */
2175 !can_duplicate_loop_p (loop
)
2176 || loop_has_blocks_with_irreducible_flag (loop
)
2177 || (loop_preheader_edge (loop
)->src
->flags
& BB_IRREDUCIBLE_LOOP
)
2178 /* FIXME: the check for vector phi nodes could be removed. */
2179 || loop_has_vector_phi_nodes (loop
))
2181 estimated
= max_stmt_executions_int (loop
, false);
2182 /* FIXME: Bypass this check as graphite doesn't update the
2183 count and frequency correctly now. */
2184 if (!flag_loop_parallelize_all
2186 && estimated
<= (HOST_WIDE_INT
) n_threads
* MIN_PER_THREAD
)
2187 /* Do not bother with loops in cold areas. */
2188 || optimize_loop_nest_for_size_p (loop
)))
2191 if (!try_get_loop_niter (loop
, &niter_desc
))
2194 if (!try_create_reduction_list (loop
, reduction_list
))
2197 if (!flag_loop_parallelize_all
2198 && !loop_parallel_p (loop
, &parloop_obstack
))
2202 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2205 fprintf (dump_file
, "parallelizing outer loop %d\n",loop
->header
->index
);
2207 fprintf (dump_file
, "parallelizing inner loop %d\n",loop
->header
->index
);
2208 loop_loc
= find_loop_location (loop
);
2209 if (loop_loc
!= UNKNOWN_LOC
)
2210 fprintf (dump_file
, "\nloop at %s:%d: ",
2211 LOC_FILE (loop_loc
), LOC_LINE (loop_loc
));
2213 gen_parallel_loop (loop
, reduction_list
,
2214 n_threads
, &niter_desc
);
2215 verify_flow_info ();
2216 verify_dominators (CDI_DOMINATORS
);
2217 verify_loop_structure ();
2218 verify_loop_closed_ssa (true);
2221 free_stmt_vec_info_vec ();
2222 htab_delete (reduction_list
);
2223 obstack_free (&parloop_obstack
, NULL
);
2225 /* Parallelization will cause new function calls to be inserted through
2226 which local variables will escape. Reset the points-to solution
2229 pt_solution_reset (&cfun
->gimple_df
->escaped
);
2234 #include "gt-tree-parloops.h"