1 /* Loop autoparallelization.
2 Copyright (C) 2006-2013 Free Software Foundation, Inc.
3 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
4 Zdenek Dvorak <dvorakz@suse.cz> and Razya Ladelsky <razya@il.ibm.com>.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
28 #include "gimple-iterator.h"
29 #include "gimplify-me.h"
30 #include "gimple-walk.h"
31 #include "gimple-ssa.h"
33 #include "tree-phinodes.h"
34 #include "ssa-iterators.h"
35 #include "tree-ssanames.h"
36 #include "tree-ssa-loop-ivopts.h"
37 #include "tree-ssa-loop-manip.h"
38 #include "tree-ssa-loop-niter.h"
39 #include "tree-ssa-loop.h"
40 #include "tree-into-ssa.h"
42 #include "tree-data-ref.h"
43 #include "tree-scalar-evolution.h"
44 #include "gimple-pretty-print.h"
45 #include "tree-pass.h"
46 #include "langhooks.h"
47 #include "tree-vectorizer.h"
48 #include "tree-hasher.h"
49 #include "tree-parloops.h"
51 #include "tree-nested.h"
53 /* This pass tries to distribute iterations of loops into several threads.
54 The implementation is straightforward -- for each loop we test whether its
55 iterations are independent, and if it is the case (and some additional
56 conditions regarding profitability and correctness are satisfied), we
57 add GIMPLE_OMP_PARALLEL and GIMPLE_OMP_FOR codes and let omp expansion
60 The most of the complexity is in bringing the code into shape expected
62 -- for GIMPLE_OMP_FOR, ensuring that the loop has only one induction
63 variable and that the exit test is at the start of the loop body
64 -- for GIMPLE_OMP_PARALLEL, replacing the references to local addressable
65 variables by accesses through pointers, and breaking up ssa chains
66 by storing the values incoming to the parallelized loop to a structure
67 passed to the new function as an argument (something similar is done
68 in omp gimplification, unfortunately only a small part of the code
72 -- if there are several parallelizable loops in a function, it may be
73 possible to generate the threads just once (using synchronization to
74 ensure that cross-loop dependences are obeyed).
75 -- handling of common reduction patterns for outer loops.
77 More info can also be found at http://gcc.gnu.org/wiki/AutoParInGCC */
80 currently we use vect_force_simple_reduction() to detect reduction patterns.
81 The code transformation will be introduced by an example.
88 for (i = 0; i < N; i++)
98 # sum_29 = PHI <sum_11(5), 1(3)>
99 # i_28 = PHI <i_12(5), 0(3)>
102 sum_11 = D.1795_8 + sum_29;
110 # sum_21 = PHI <sum_11(4)>
111 printf (&"%d"[0], sum_21);
114 after reduction transformation (only relevant parts):
122 # Storing the initial value given by the user. #
124 .paral_data_store.32.sum.27 = 1;
126 #pragma omp parallel num_threads(4)
128 #pragma omp for schedule(static)
130 # The neutral element corresponding to the particular
131 reduction's operation, e.g. 0 for PLUS_EXPR,
132 1 for MULT_EXPR, etc. replaces the user's initial value. #
134 # sum.27_29 = PHI <sum.27_11, 0>
136 sum.27_11 = D.1827_8 + sum.27_29;
140 # Adding this reduction phi is done at create_phi_for_local_result() #
141 # sum.27_56 = PHI <sum.27_11, 0>
144 # Creating the atomic operation is done at
145 create_call_for_reduction_1() #
147 #pragma omp atomic_load
148 D.1839_59 = *&.paral_data_load.33_51->reduction.23;
149 D.1840_60 = sum.27_56 + D.1839_59;
150 #pragma omp atomic_store (D.1840_60);
154 # collecting the result after the join of the threads is done at
155 create_loads_for_reductions().
156 The value computed by the threads is loaded from the
160 .paral_data_load.33_52 = &.paral_data_store.32;
161 sum_37 = .paral_data_load.33_52->sum.27;
162 sum_43 = D.1795_41 + sum_37;
165 # sum_21 = PHI <sum_43, sum_26>
166 printf (&"%d"[0], sum_21);
174 /* Minimal number of iterations of a loop that should be executed in each
176 #define MIN_PER_THREAD 100
178 /* Element of the hashtable, representing a
179 reduction in the current loop. */
180 struct reduction_info
182 gimple reduc_stmt
; /* reduction statement. */
183 gimple reduc_phi
; /* The phi node defining the reduction. */
184 enum tree_code reduction_code
;/* code for the reduction operation. */
185 unsigned reduc_version
; /* SSA_NAME_VERSION of original reduc_phi
187 gimple keep_res
; /* The PHI_RESULT of this phi is the resulting value
188 of the reduction variable when existing the loop. */
189 tree initial_value
; /* The initial value of the reduction var before entering the loop. */
190 tree field
; /* the name of the field in the parloop data structure intended for reduction. */
191 tree init
; /* reduction initialization value. */
192 gimple new_phi
; /* (helper field) Newly created phi node whose result
193 will be passed to the atomic operation. Represents
194 the local result each thread computed for the reduction
198 /* Reduction info hashtable helpers. */
200 struct reduction_hasher
: typed_free_remove
<reduction_info
>
202 typedef reduction_info value_type
;
203 typedef reduction_info compare_type
;
204 static inline hashval_t
hash (const value_type
*);
205 static inline bool equal (const value_type
*, const compare_type
*);
208 /* Equality and hash functions for hashtab code. */
211 reduction_hasher::equal (const value_type
*a
, const compare_type
*b
)
213 return (a
->reduc_phi
== b
->reduc_phi
);
217 reduction_hasher::hash (const value_type
*a
)
219 return a
->reduc_version
;
222 typedef hash_table
<reduction_hasher
> reduction_info_table_type
;
225 static struct reduction_info
*
226 reduction_phi (reduction_info_table_type reduction_list
, gimple phi
)
228 struct reduction_info tmpred
, *red
;
230 if (reduction_list
.elements () == 0 || phi
== NULL
)
233 tmpred
.reduc_phi
= phi
;
234 tmpred
.reduc_version
= gimple_uid (phi
);
235 red
= reduction_list
.find (&tmpred
);
240 /* Element of hashtable of names to copy. */
242 struct name_to_copy_elt
244 unsigned version
; /* The version of the name to copy. */
245 tree new_name
; /* The new name used in the copy. */
246 tree field
; /* The field of the structure used to pass the
250 /* Name copies hashtable helpers. */
252 struct name_to_copy_hasher
: typed_free_remove
<name_to_copy_elt
>
254 typedef name_to_copy_elt value_type
;
255 typedef name_to_copy_elt compare_type
;
256 static inline hashval_t
hash (const value_type
*);
257 static inline bool equal (const value_type
*, const compare_type
*);
260 /* Equality and hash functions for hashtab code. */
263 name_to_copy_hasher::equal (const value_type
*a
, const compare_type
*b
)
265 return a
->version
== b
->version
;
269 name_to_copy_hasher::hash (const value_type
*a
)
271 return (hashval_t
) a
->version
;
274 typedef hash_table
<name_to_copy_hasher
> name_to_copy_table_type
;
276 /* A transformation matrix, which is a self-contained ROWSIZE x COLSIZE
277 matrix. Rather than use floats, we simply keep a single DENOMINATOR that
278 represents the denominator for every element in the matrix. */
279 typedef struct lambda_trans_matrix_s
281 lambda_matrix matrix
;
285 } *lambda_trans_matrix
;
286 #define LTM_MATRIX(T) ((T)->matrix)
287 #define LTM_ROWSIZE(T) ((T)->rowsize)
288 #define LTM_COLSIZE(T) ((T)->colsize)
289 #define LTM_DENOMINATOR(T) ((T)->denominator)
291 /* Allocate a new transformation matrix. */
293 static lambda_trans_matrix
294 lambda_trans_matrix_new (int colsize
, int rowsize
,
295 struct obstack
* lambda_obstack
)
297 lambda_trans_matrix ret
;
299 ret
= (lambda_trans_matrix
)
300 obstack_alloc (lambda_obstack
, sizeof (struct lambda_trans_matrix_s
));
301 LTM_MATRIX (ret
) = lambda_matrix_new (rowsize
, colsize
, lambda_obstack
);
302 LTM_ROWSIZE (ret
) = rowsize
;
303 LTM_COLSIZE (ret
) = colsize
;
304 LTM_DENOMINATOR (ret
) = 1;
308 /* Multiply a vector VEC by a matrix MAT.
309 MAT is an M*N matrix, and VEC is a vector with length N. The result
310 is stored in DEST which must be a vector of length M. */
313 lambda_matrix_vector_mult (lambda_matrix matrix
, int m
, int n
,
314 lambda_vector vec
, lambda_vector dest
)
318 lambda_vector_clear (dest
, m
);
319 for (i
= 0; i
< m
; i
++)
320 for (j
= 0; j
< n
; j
++)
321 dest
[i
] += matrix
[i
][j
] * vec
[j
];
324 /* Return true if TRANS is a legal transformation matrix that respects
325 the dependence vectors in DISTS and DIRS. The conservative answer
328 "Wolfe proves that a unimodular transformation represented by the
329 matrix T is legal when applied to a loop nest with a set of
330 lexicographically non-negative distance vectors RDG if and only if
331 for each vector d in RDG, (T.d >= 0) is lexicographically positive.
332 i.e.: if and only if it transforms the lexicographically positive
333 distance vectors to lexicographically positive vectors. Note that
334 a unimodular matrix must transform the zero vector (and only it) to
335 the zero vector." S.Muchnick. */
338 lambda_transform_legal_p (lambda_trans_matrix trans
,
340 vec
<ddr_p
> dependence_relations
)
343 lambda_vector distres
;
344 struct data_dependence_relation
*ddr
;
346 gcc_assert (LTM_COLSIZE (trans
) == nb_loops
347 && LTM_ROWSIZE (trans
) == nb_loops
);
349 /* When there are no dependences, the transformation is correct. */
350 if (dependence_relations
.length () == 0)
353 ddr
= dependence_relations
[0];
357 /* When there is an unknown relation in the dependence_relations, we
358 know that it is no worth looking at this loop nest: give up. */
359 if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
362 distres
= lambda_vector_new (nb_loops
);
364 /* For each distance vector in the dependence graph. */
365 FOR_EACH_VEC_ELT (dependence_relations
, i
, ddr
)
367 /* Don't care about relations for which we know that there is no
368 dependence, nor about read-read (aka. output-dependences):
369 these data accesses can happen in any order. */
370 if (DDR_ARE_DEPENDENT (ddr
) == chrec_known
371 || (DR_IS_READ (DDR_A (ddr
)) && DR_IS_READ (DDR_B (ddr
))))
374 /* Conservatively answer: "this transformation is not valid". */
375 if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
378 /* If the dependence could not be captured by a distance vector,
379 conservatively answer that the transform is not valid. */
380 if (DDR_NUM_DIST_VECTS (ddr
) == 0)
383 /* Compute trans.dist_vect */
384 for (j
= 0; j
< DDR_NUM_DIST_VECTS (ddr
); j
++)
386 lambda_matrix_vector_mult (LTM_MATRIX (trans
), nb_loops
, nb_loops
,
387 DDR_DIST_VECT (ddr
, j
), distres
);
389 if (!lambda_vector_lexico_pos (distres
, nb_loops
))
396 /* Data dependency analysis. Returns true if the iterations of LOOP
397 are independent on each other (that is, if we can execute them
401 loop_parallel_p (struct loop
*loop
, struct obstack
* parloop_obstack
)
403 vec
<ddr_p
> dependence_relations
;
404 vec
<data_reference_p
> datarefs
;
405 lambda_trans_matrix trans
;
408 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
410 fprintf (dump_file
, "Considering loop %d\n", loop
->num
);
412 fprintf (dump_file
, "loop is innermost\n");
414 fprintf (dump_file
, "loop NOT innermost\n");
417 /* Check for problems with dependences. If the loop can be reversed,
418 the iterations are independent. */
419 stack_vec
<loop_p
, 3> loop_nest
;
420 datarefs
.create (10);
421 dependence_relations
.create (100);
422 if (! compute_data_dependences_for_loop (loop
, true, &loop_nest
, &datarefs
,
423 &dependence_relations
))
425 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
426 fprintf (dump_file
, " FAILED: cannot analyze data dependencies\n");
430 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
431 dump_data_dependence_relations (dump_file
, dependence_relations
);
433 trans
= lambda_trans_matrix_new (1, 1, parloop_obstack
);
434 LTM_MATRIX (trans
)[0][0] = -1;
436 if (lambda_transform_legal_p (trans
, 1, dependence_relations
))
439 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
440 fprintf (dump_file
, " SUCCESS: may be parallelized\n");
442 else if (dump_file
&& (dump_flags
& TDF_DETAILS
))
444 " FAILED: data dependencies exist across iterations\n");
447 free_dependence_relations (dependence_relations
);
448 free_data_refs (datarefs
);
453 /* Return true when LOOP contains basic blocks marked with the
454 BB_IRREDUCIBLE_LOOP flag. */
457 loop_has_blocks_with_irreducible_flag (struct loop
*loop
)
460 basic_block
*bbs
= get_loop_body_in_dom_order (loop
);
463 for (i
= 0; i
< loop
->num_nodes
; i
++)
464 if (bbs
[i
]->flags
& BB_IRREDUCIBLE_LOOP
)
473 /* Assigns the address of OBJ in TYPE to an ssa name, and returns this name.
474 The assignment statement is placed on edge ENTRY. DECL_ADDRESS maps decls
475 to their addresses that can be reused. The address of OBJ is known to
476 be invariant in the whole function. Other needed statements are placed
480 take_address_of (tree obj
, tree type
, edge entry
,
481 int_tree_htab_type decl_address
, gimple_stmt_iterator
*gsi
)
484 int_tree_map
**dslot
;
485 struct int_tree_map ielt
, *nielt
;
486 tree
*var_p
, name
, addr
;
490 /* Since the address of OBJ is invariant, the trees may be shared.
491 Avoid rewriting unrelated parts of the code. */
492 obj
= unshare_expr (obj
);
494 handled_component_p (*var_p
);
495 var_p
= &TREE_OPERAND (*var_p
, 0))
498 /* Canonicalize the access to base on a MEM_REF. */
500 *var_p
= build_simple_mem_ref (build_fold_addr_expr (*var_p
));
502 /* Assign a canonical SSA name to the address of the base decl used
503 in the address and share it for all accesses and addresses based
505 uid
= DECL_UID (TREE_OPERAND (TREE_OPERAND (*var_p
, 0), 0));
507 dslot
= decl_address
.find_slot_with_hash (&ielt
, uid
, INSERT
);
512 addr
= TREE_OPERAND (*var_p
, 0);
514 = get_name (TREE_OPERAND (TREE_OPERAND (*var_p
, 0), 0));
516 name
= make_temp_ssa_name (TREE_TYPE (addr
), NULL
, obj_name
);
518 name
= make_ssa_name (TREE_TYPE (addr
), NULL
);
519 stmt
= gimple_build_assign (name
, addr
);
520 gsi_insert_on_edge_immediate (entry
, stmt
);
522 nielt
= XNEW (struct int_tree_map
);
530 /* Express the address in terms of the canonical SSA name. */
531 TREE_OPERAND (*var_p
, 0) = name
;
533 return build_fold_addr_expr_with_type (obj
, type
);
535 name
= force_gimple_operand (build_addr (obj
, current_function_decl
),
536 &stmts
, true, NULL_TREE
);
537 if (!gimple_seq_empty_p (stmts
))
538 gsi_insert_seq_before (gsi
, stmts
, GSI_SAME_STMT
);
540 if (!useless_type_conversion_p (type
, TREE_TYPE (name
)))
542 name
= force_gimple_operand (fold_convert (type
, name
), &stmts
, true,
544 if (!gimple_seq_empty_p (stmts
))
545 gsi_insert_seq_before (gsi
, stmts
, GSI_SAME_STMT
);
551 /* Callback for htab_traverse. Create the initialization statement
552 for reduction described in SLOT, and place it at the preheader of
553 the loop described in DATA. */
556 initialize_reductions (reduction_info
**slot
, struct loop
*loop
)
559 tree bvar
, type
, arg
;
562 struct reduction_info
*const reduc
= *slot
;
564 /* Create initialization in preheader:
565 reduction_variable = initialization value of reduction. */
567 /* In the phi node at the header, replace the argument coming
568 from the preheader with the reduction initialization value. */
570 /* Create a new variable to initialize the reduction. */
571 type
= TREE_TYPE (PHI_RESULT (reduc
->reduc_phi
));
572 bvar
= create_tmp_var (type
, "reduction");
574 c
= build_omp_clause (gimple_location (reduc
->reduc_stmt
),
575 OMP_CLAUSE_REDUCTION
);
576 OMP_CLAUSE_REDUCTION_CODE (c
) = reduc
->reduction_code
;
577 OMP_CLAUSE_DECL (c
) = SSA_NAME_VAR (gimple_assign_lhs (reduc
->reduc_stmt
));
579 init
= omp_reduction_init (c
, TREE_TYPE (bvar
));
582 /* Replace the argument representing the initialization value
583 with the initialization value for the reduction (neutral
584 element for the particular operation, e.g. 0 for PLUS_EXPR,
585 1 for MULT_EXPR, etc).
586 Keep the old value in a new variable "reduction_initial",
587 that will be taken in consideration after the parallel
588 computing is done. */
590 e
= loop_preheader_edge (loop
);
591 arg
= PHI_ARG_DEF_FROM_EDGE (reduc
->reduc_phi
, e
);
592 /* Create new variable to hold the initial value. */
594 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE
595 (reduc
->reduc_phi
, loop_preheader_edge (loop
)), init
);
596 reduc
->initial_value
= arg
;
602 struct walk_stmt_info info
;
604 int_tree_htab_type decl_address
;
605 gimple_stmt_iterator
*gsi
;
610 /* Eliminates references to local variables in *TP out of the single
611 entry single exit region starting at DTA->ENTRY.
612 DECL_ADDRESS contains addresses of the references that had their
613 address taken already. If the expression is changed, CHANGED is
614 set to true. Callback for walk_tree. */
617 eliminate_local_variables_1 (tree
*tp
, int *walk_subtrees
, void *data
)
619 struct elv_data
*const dta
= (struct elv_data
*) data
;
620 tree t
= *tp
, var
, addr
, addr_type
, type
, obj
;
626 if (!SSA_VAR_P (t
) || DECL_EXTERNAL (t
))
629 type
= TREE_TYPE (t
);
630 addr_type
= build_pointer_type (type
);
631 addr
= take_address_of (t
, addr_type
, dta
->entry
, dta
->decl_address
,
633 if (dta
->gsi
== NULL
&& addr
== NULL_TREE
)
639 *tp
= build_simple_mem_ref (addr
);
645 if (TREE_CODE (t
) == ADDR_EXPR
)
647 /* ADDR_EXPR may appear in two contexts:
648 -- as a gimple operand, when the address taken is a function invariant
649 -- as gimple rhs, when the resulting address in not a function
651 We do not need to do anything special in the latter case (the base of
652 the memory reference whose address is taken may be replaced in the
653 DECL_P case). The former case is more complicated, as we need to
654 ensure that the new address is still a gimple operand. Thus, it
655 is not sufficient to replace just the base of the memory reference --
656 we need to move the whole computation of the address out of the
658 if (!is_gimple_val (t
))
662 obj
= TREE_OPERAND (t
, 0);
663 var
= get_base_address (obj
);
664 if (!var
|| !SSA_VAR_P (var
) || DECL_EXTERNAL (var
))
667 addr_type
= TREE_TYPE (t
);
668 addr
= take_address_of (obj
, addr_type
, dta
->entry
, dta
->decl_address
,
670 if (dta
->gsi
== NULL
&& addr
== NULL_TREE
)
687 /* Moves the references to local variables in STMT at *GSI out of the single
688 entry single exit region starting at ENTRY. DECL_ADDRESS contains
689 addresses of the references that had their address taken
693 eliminate_local_variables_stmt (edge entry
, gimple_stmt_iterator
*gsi
,
694 int_tree_htab_type decl_address
)
697 gimple stmt
= gsi_stmt (*gsi
);
699 memset (&dta
.info
, '\0', sizeof (dta
.info
));
701 dta
.decl_address
= decl_address
;
705 if (gimple_debug_bind_p (stmt
))
708 walk_tree (gimple_debug_bind_get_value_ptr (stmt
),
709 eliminate_local_variables_1
, &dta
.info
, NULL
);
712 gimple_debug_bind_reset_value (stmt
);
716 else if (gimple_clobber_p (stmt
))
718 stmt
= gimple_build_nop ();
719 gsi_replace (gsi
, stmt
, false);
725 walk_gimple_op (stmt
, eliminate_local_variables_1
, &dta
.info
);
732 /* Eliminates the references to local variables from the single entry
733 single exit region between the ENTRY and EXIT edges.
736 1) Taking address of a local variable -- these are moved out of the
737 region (and temporary variable is created to hold the address if
740 2) Dereferencing a local variable -- these are replaced with indirect
744 eliminate_local_variables (edge entry
, edge exit
)
747 stack_vec
<basic_block
, 3> body
;
749 gimple_stmt_iterator gsi
;
750 bool has_debug_stmt
= false;
751 int_tree_htab_type decl_address
;
752 decl_address
.create (10);
753 basic_block entry_bb
= entry
->src
;
754 basic_block exit_bb
= exit
->dest
;
756 gather_blocks_in_sese_region (entry_bb
, exit_bb
, &body
);
758 FOR_EACH_VEC_ELT (body
, i
, bb
)
759 if (bb
!= entry_bb
&& bb
!= exit_bb
)
760 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
761 if (is_gimple_debug (gsi_stmt (gsi
)))
763 if (gimple_debug_bind_p (gsi_stmt (gsi
)))
764 has_debug_stmt
= true;
767 eliminate_local_variables_stmt (entry
, &gsi
, decl_address
);
770 FOR_EACH_VEC_ELT (body
, i
, bb
)
771 if (bb
!= entry_bb
&& bb
!= exit_bb
)
772 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
773 if (gimple_debug_bind_p (gsi_stmt (gsi
)))
774 eliminate_local_variables_stmt (entry
, &gsi
, decl_address
);
776 decl_address
.dispose ();
779 /* Returns true if expression EXPR is not defined between ENTRY and
780 EXIT, i.e. if all its operands are defined outside of the region. */
783 expr_invariant_in_region_p (edge entry
, edge exit
, tree expr
)
785 basic_block entry_bb
= entry
->src
;
786 basic_block exit_bb
= exit
->dest
;
789 if (is_gimple_min_invariant (expr
))
792 if (TREE_CODE (expr
) == SSA_NAME
)
794 def_bb
= gimple_bb (SSA_NAME_DEF_STMT (expr
));
796 && dominated_by_p (CDI_DOMINATORS
, def_bb
, entry_bb
)
797 && !dominated_by_p (CDI_DOMINATORS
, def_bb
, exit_bb
))
806 /* If COPY_NAME_P is true, creates and returns a duplicate of NAME.
807 The copies are stored to NAME_COPIES, if NAME was already duplicated,
808 its duplicate stored in NAME_COPIES is returned.
810 Regardless of COPY_NAME_P, the decl used as a base of the ssa name is also
811 duplicated, storing the copies in DECL_COPIES. */
814 separate_decls_in_region_name (tree name
, name_to_copy_table_type name_copies
,
815 int_tree_htab_type decl_copies
, bool copy_name_p
)
817 tree copy
, var
, var_copy
;
818 unsigned idx
, uid
, nuid
;
819 struct int_tree_map ielt
, *nielt
;
820 struct name_to_copy_elt elt
, *nelt
;
821 name_to_copy_elt
**slot
;
822 int_tree_map
**dslot
;
824 if (TREE_CODE (name
) != SSA_NAME
)
827 idx
= SSA_NAME_VERSION (name
);
829 slot
= name_copies
.find_slot_with_hash (&elt
, idx
,
830 copy_name_p
? INSERT
: NO_INSERT
);
832 return (*slot
)->new_name
;
836 copy
= duplicate_ssa_name (name
, NULL
);
837 nelt
= XNEW (struct name_to_copy_elt
);
839 nelt
->new_name
= copy
;
840 nelt
->field
= NULL_TREE
;
849 var
= SSA_NAME_VAR (name
);
853 uid
= DECL_UID (var
);
855 dslot
= decl_copies
.find_slot_with_hash (&ielt
, uid
, INSERT
);
858 var_copy
= create_tmp_var (TREE_TYPE (var
), get_name (var
));
859 DECL_GIMPLE_REG_P (var_copy
) = DECL_GIMPLE_REG_P (var
);
860 nielt
= XNEW (struct int_tree_map
);
862 nielt
->to
= var_copy
;
865 /* Ensure that when we meet this decl next time, we won't duplicate
867 nuid
= DECL_UID (var_copy
);
869 dslot
= decl_copies
.find_slot_with_hash (&ielt
, nuid
, INSERT
);
870 gcc_assert (!*dslot
);
871 nielt
= XNEW (struct int_tree_map
);
873 nielt
->to
= var_copy
;
877 var_copy
= ((struct int_tree_map
*) *dslot
)->to
;
879 replace_ssa_name_symbol (copy
, var_copy
);
883 /* Finds the ssa names used in STMT that are defined outside the
884 region between ENTRY and EXIT and replaces such ssa names with
885 their duplicates. The duplicates are stored to NAME_COPIES. Base
886 decls of all ssa names used in STMT (including those defined in
887 LOOP) are replaced with the new temporary variables; the
888 replacement decls are stored in DECL_COPIES. */
891 separate_decls_in_region_stmt (edge entry
, edge exit
, gimple stmt
,
892 name_to_copy_table_type name_copies
,
893 int_tree_htab_type decl_copies
)
901 FOR_EACH_PHI_OR_STMT_DEF (def
, stmt
, oi
, SSA_OP_DEF
)
903 name
= DEF_FROM_PTR (def
);
904 gcc_assert (TREE_CODE (name
) == SSA_NAME
);
905 copy
= separate_decls_in_region_name (name
, name_copies
, decl_copies
,
907 gcc_assert (copy
== name
);
910 FOR_EACH_PHI_OR_STMT_USE (use
, stmt
, oi
, SSA_OP_USE
)
912 name
= USE_FROM_PTR (use
);
913 if (TREE_CODE (name
) != SSA_NAME
)
916 copy_name_p
= expr_invariant_in_region_p (entry
, exit
, name
);
917 copy
= separate_decls_in_region_name (name
, name_copies
, decl_copies
,
923 /* Finds the ssa names used in STMT that are defined outside the
924 region between ENTRY and EXIT and replaces such ssa names with
925 their duplicates. The duplicates are stored to NAME_COPIES. Base
926 decls of all ssa names used in STMT (including those defined in
927 LOOP) are replaced with the new temporary variables; the
928 replacement decls are stored in DECL_COPIES. */
931 separate_decls_in_region_debug (gimple stmt
,
932 name_to_copy_table_type name_copies
,
933 int_tree_htab_type decl_copies
)
938 struct int_tree_map ielt
;
939 struct name_to_copy_elt elt
;
940 name_to_copy_elt
**slot
;
941 int_tree_map
**dslot
;
943 if (gimple_debug_bind_p (stmt
))
944 var
= gimple_debug_bind_get_var (stmt
);
945 else if (gimple_debug_source_bind_p (stmt
))
946 var
= gimple_debug_source_bind_get_var (stmt
);
949 if (TREE_CODE (var
) == DEBUG_EXPR_DECL
|| TREE_CODE (var
) == LABEL_DECL
)
951 gcc_assert (DECL_P (var
) && SSA_VAR_P (var
));
952 ielt
.uid
= DECL_UID (var
);
953 dslot
= decl_copies
.find_slot_with_hash (&ielt
, ielt
.uid
, NO_INSERT
);
956 if (gimple_debug_bind_p (stmt
))
957 gimple_debug_bind_set_var (stmt
, ((struct int_tree_map
*) *dslot
)->to
);
958 else if (gimple_debug_source_bind_p (stmt
))
959 gimple_debug_source_bind_set_var (stmt
, ((struct int_tree_map
*) *dslot
)->to
);
961 FOR_EACH_PHI_OR_STMT_USE (use
, stmt
, oi
, SSA_OP_USE
)
963 name
= USE_FROM_PTR (use
);
964 if (TREE_CODE (name
) != SSA_NAME
)
967 elt
.version
= SSA_NAME_VERSION (name
);
968 slot
= name_copies
.find_slot_with_hash (&elt
, elt
.version
, NO_INSERT
);
971 gimple_debug_bind_reset_value (stmt
);
976 SET_USE (use
, (*slot
)->new_name
);
982 /* Callback for htab_traverse. Adds a field corresponding to the reduction
983 specified in SLOT. The type is passed in DATA. */
986 add_field_for_reduction (reduction_info
**slot
, tree type
)
989 struct reduction_info
*const red
= *slot
;
990 tree var
= gimple_assign_lhs (red
->reduc_stmt
);
991 tree field
= build_decl (gimple_location (red
->reduc_stmt
), FIELD_DECL
,
992 SSA_NAME_IDENTIFIER (var
), TREE_TYPE (var
));
994 insert_field_into_struct (type
, field
);
1001 /* Callback for htab_traverse. Adds a field corresponding to a ssa name
1002 described in SLOT. The type is passed in DATA. */
1005 add_field_for_name (name_to_copy_elt
**slot
, tree type
)
1007 struct name_to_copy_elt
*const elt
= *slot
;
1008 tree name
= ssa_name (elt
->version
);
1009 tree field
= build_decl (UNKNOWN_LOCATION
,
1010 FIELD_DECL
, SSA_NAME_IDENTIFIER (name
),
1013 insert_field_into_struct (type
, field
);
1019 /* Callback for htab_traverse. A local result is the intermediate result
1020 computed by a single
1021 thread, or the initial value in case no iteration was executed.
1022 This function creates a phi node reflecting these values.
1023 The phi's result will be stored in NEW_PHI field of the
1024 reduction's data structure. */
1027 create_phi_for_local_result (reduction_info
**slot
, struct loop
*loop
)
1029 struct reduction_info
*const reduc
= *slot
;
1032 basic_block store_bb
;
1034 source_location locus
;
1036 /* STORE_BB is the block where the phi
1037 should be stored. It is the destination of the loop exit.
1038 (Find the fallthru edge from GIMPLE_OMP_CONTINUE). */
1039 store_bb
= FALLTHRU_EDGE (loop
->latch
)->dest
;
1041 /* STORE_BB has two predecessors. One coming from the loop
1042 (the reduction's result is computed at the loop),
1043 and another coming from a block preceding the loop,
1045 are executed (the initial value should be taken). */
1046 if (EDGE_PRED (store_bb
, 0) == FALLTHRU_EDGE (loop
->latch
))
1047 e
= EDGE_PRED (store_bb
, 1);
1049 e
= EDGE_PRED (store_bb
, 0);
1050 local_res
= copy_ssa_name (gimple_assign_lhs (reduc
->reduc_stmt
), NULL
);
1051 locus
= gimple_location (reduc
->reduc_stmt
);
1052 new_phi
= create_phi_node (local_res
, store_bb
);
1053 add_phi_arg (new_phi
, reduc
->init
, e
, locus
);
1054 add_phi_arg (new_phi
, gimple_assign_lhs (reduc
->reduc_stmt
),
1055 FALLTHRU_EDGE (loop
->latch
), locus
);
1056 reduc
->new_phi
= new_phi
;
1066 basic_block store_bb
;
1067 basic_block load_bb
;
1070 /* Callback for htab_traverse. Create an atomic instruction for the
1071 reduction described in SLOT.
1072 DATA annotates the place in memory the atomic operation relates to,
1073 and the basic block it needs to be generated in. */
1076 create_call_for_reduction_1 (reduction_info
**slot
, struct clsn_data
*clsn_data
)
1078 struct reduction_info
*const reduc
= *slot
;
1079 gimple_stmt_iterator gsi
;
1080 tree type
= TREE_TYPE (PHI_RESULT (reduc
->reduc_phi
));
1085 tree t
, addr
, ref
, x
;
1086 tree tmp_load
, name
;
1089 load_struct
= build_simple_mem_ref (clsn_data
->load
);
1090 t
= build3 (COMPONENT_REF
, type
, load_struct
, reduc
->field
, NULL_TREE
);
1092 addr
= build_addr (t
, current_function_decl
);
1094 /* Create phi node. */
1095 bb
= clsn_data
->load_bb
;
1097 e
= split_block (bb
, t
);
1100 tmp_load
= create_tmp_var (TREE_TYPE (TREE_TYPE (addr
)), NULL
);
1101 tmp_load
= make_ssa_name (tmp_load
, NULL
);
1102 load
= gimple_build_omp_atomic_load (tmp_load
, addr
);
1103 SSA_NAME_DEF_STMT (tmp_load
) = load
;
1104 gsi
= gsi_start_bb (new_bb
);
1105 gsi_insert_after (&gsi
, load
, GSI_NEW_STMT
);
1107 e
= split_block (new_bb
, load
);
1109 gsi
= gsi_start_bb (new_bb
);
1111 x
= fold_build2 (reduc
->reduction_code
,
1112 TREE_TYPE (PHI_RESULT (reduc
->new_phi
)), ref
,
1113 PHI_RESULT (reduc
->new_phi
));
1115 name
= force_gimple_operand_gsi (&gsi
, x
, true, NULL_TREE
, true,
1116 GSI_CONTINUE_LINKING
);
1118 gsi_insert_after (&gsi
, gimple_build_omp_atomic_store (name
), GSI_NEW_STMT
);
1122 /* Create the atomic operation at the join point of the threads.
1123 REDUCTION_LIST describes the reductions in the LOOP.
1124 LD_ST_DATA describes the shared data structure where
1125 shared data is stored in and loaded from. */
1127 create_call_for_reduction (struct loop
*loop
,
1128 reduction_info_table_type reduction_list
,
1129 struct clsn_data
*ld_st_data
)
1131 reduction_list
.traverse
<struct loop
*, create_phi_for_local_result
> (loop
);
1132 /* Find the fallthru edge from GIMPLE_OMP_CONTINUE. */
1133 ld_st_data
->load_bb
= FALLTHRU_EDGE (loop
->latch
)->dest
;
1135 .traverse
<struct clsn_data
*, create_call_for_reduction_1
> (ld_st_data
);
1138 /* Callback for htab_traverse. Loads the final reduction value at the
1139 join point of all threads, and inserts it in the right place. */
1142 create_loads_for_reductions (reduction_info
**slot
, struct clsn_data
*clsn_data
)
1144 struct reduction_info
*const red
= *slot
;
1146 gimple_stmt_iterator gsi
;
1147 tree type
= TREE_TYPE (gimple_assign_lhs (red
->reduc_stmt
));
1152 gsi
= gsi_after_labels (clsn_data
->load_bb
);
1153 load_struct
= build_simple_mem_ref (clsn_data
->load
);
1154 load_struct
= build3 (COMPONENT_REF
, type
, load_struct
, red
->field
,
1158 name
= PHI_RESULT (red
->keep_res
);
1159 stmt
= gimple_build_assign (name
, x
);
1161 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1163 for (gsi
= gsi_start_phis (gimple_bb (red
->keep_res
));
1164 !gsi_end_p (gsi
); gsi_next (&gsi
))
1165 if (gsi_stmt (gsi
) == red
->keep_res
)
1167 remove_phi_node (&gsi
, false);
1173 /* Load the reduction result that was stored in LD_ST_DATA.
1174 REDUCTION_LIST describes the list of reductions that the
1175 loads should be generated for. */
1177 create_final_loads_for_reduction (reduction_info_table_type reduction_list
,
1178 struct clsn_data
*ld_st_data
)
1180 gimple_stmt_iterator gsi
;
1184 gsi
= gsi_after_labels (ld_st_data
->load_bb
);
1185 t
= build_fold_addr_expr (ld_st_data
->store
);
1186 stmt
= gimple_build_assign (ld_st_data
->load
, t
);
1188 gsi_insert_before (&gsi
, stmt
, GSI_NEW_STMT
);
1191 .traverse
<struct clsn_data
*, create_loads_for_reductions
> (ld_st_data
);
1195 /* Callback for htab_traverse. Store the neutral value for the
1196 particular reduction's operation, e.g. 0 for PLUS_EXPR,
1197 1 for MULT_EXPR, etc. into the reduction field.
1198 The reduction is specified in SLOT. The store information is
1202 create_stores_for_reduction (reduction_info
**slot
, struct clsn_data
*clsn_data
)
1204 struct reduction_info
*const red
= *slot
;
1207 gimple_stmt_iterator gsi
;
1208 tree type
= TREE_TYPE (gimple_assign_lhs (red
->reduc_stmt
));
1210 gsi
= gsi_last_bb (clsn_data
->store_bb
);
1211 t
= build3 (COMPONENT_REF
, type
, clsn_data
->store
, red
->field
, NULL_TREE
);
1212 stmt
= gimple_build_assign (t
, red
->initial_value
);
1213 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1218 /* Callback for htab_traverse. Creates loads to a field of LOAD in LOAD_BB and
1219 store to a field of STORE in STORE_BB for the ssa name and its duplicate
1220 specified in SLOT. */
1223 create_loads_and_stores_for_name (name_to_copy_elt
**slot
,
1224 struct clsn_data
*clsn_data
)
1226 struct name_to_copy_elt
*const elt
= *slot
;
1229 gimple_stmt_iterator gsi
;
1230 tree type
= TREE_TYPE (elt
->new_name
);
1233 gsi
= gsi_last_bb (clsn_data
->store_bb
);
1234 t
= build3 (COMPONENT_REF
, type
, clsn_data
->store
, elt
->field
, NULL_TREE
);
1235 stmt
= gimple_build_assign (t
, ssa_name (elt
->version
));
1236 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1238 gsi
= gsi_last_bb (clsn_data
->load_bb
);
1239 load_struct
= build_simple_mem_ref (clsn_data
->load
);
1240 t
= build3 (COMPONENT_REF
, type
, load_struct
, elt
->field
, NULL_TREE
);
1241 stmt
= gimple_build_assign (elt
->new_name
, t
);
1242 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1247 /* Moves all the variables used in LOOP and defined outside of it (including
1248 the initial values of loop phi nodes, and *PER_THREAD if it is a ssa
1249 name) to a structure created for this purpose. The code
1257 is transformed this way:
1272 `old' is stored to *ARG_STRUCT and `new' is stored to NEW_ARG_STRUCT. The
1273 pointer `new' is intentionally not initialized (the loop will be split to a
1274 separate function later, and `new' will be initialized from its arguments).
1275 LD_ST_DATA holds information about the shared data structure used to pass
1276 information among the threads. It is initialized here, and
1277 gen_parallel_loop will pass it to create_call_for_reduction that
1278 needs this information. REDUCTION_LIST describes the reductions
1282 separate_decls_in_region (edge entry
, edge exit
,
1283 reduction_info_table_type reduction_list
,
1284 tree
*arg_struct
, tree
*new_arg_struct
,
1285 struct clsn_data
*ld_st_data
)
1288 basic_block bb1
= split_edge (entry
);
1289 basic_block bb0
= single_pred (bb1
);
1290 name_to_copy_table_type name_copies
;
1291 name_copies
.create (10);
1292 int_tree_htab_type decl_copies
;
1293 decl_copies
.create (10);
1295 tree type
, type_name
, nvar
;
1296 gimple_stmt_iterator gsi
;
1297 struct clsn_data clsn_data
;
1298 stack_vec
<basic_block
, 3> body
;
1300 basic_block entry_bb
= bb1
;
1301 basic_block exit_bb
= exit
->dest
;
1302 bool has_debug_stmt
= false;
1304 entry
= single_succ_edge (entry_bb
);
1305 gather_blocks_in_sese_region (entry_bb
, exit_bb
, &body
);
1307 FOR_EACH_VEC_ELT (body
, i
, bb
)
1309 if (bb
!= entry_bb
&& bb
!= exit_bb
)
1311 for (gsi
= gsi_start_phis (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1312 separate_decls_in_region_stmt (entry
, exit
, gsi_stmt (gsi
),
1313 name_copies
, decl_copies
);
1315 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1317 gimple stmt
= gsi_stmt (gsi
);
1319 if (is_gimple_debug (stmt
))
1320 has_debug_stmt
= true;
1322 separate_decls_in_region_stmt (entry
, exit
, stmt
,
1323 name_copies
, decl_copies
);
1328 /* Now process debug bind stmts. We must not create decls while
1329 processing debug stmts, so we defer their processing so as to
1330 make sure we will have debug info for as many variables as
1331 possible (all of those that were dealt with in the loop above),
1332 and discard those for which we know there's nothing we can
1335 FOR_EACH_VEC_ELT (body
, i
, bb
)
1336 if (bb
!= entry_bb
&& bb
!= exit_bb
)
1338 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
);)
1340 gimple stmt
= gsi_stmt (gsi
);
1342 if (is_gimple_debug (stmt
))
1344 if (separate_decls_in_region_debug (stmt
, name_copies
,
1347 gsi_remove (&gsi
, true);
1356 if (name_copies
.elements () == 0 && reduction_list
.elements () == 0)
1358 /* It may happen that there is nothing to copy (if there are only
1359 loop carried and external variables in the loop). */
1361 *new_arg_struct
= NULL
;
1365 /* Create the type for the structure to store the ssa names to. */
1366 type
= lang_hooks
.types
.make_type (RECORD_TYPE
);
1367 type_name
= build_decl (UNKNOWN_LOCATION
,
1368 TYPE_DECL
, create_tmp_var_name (".paral_data"),
1370 TYPE_NAME (type
) = type_name
;
1372 name_copies
.traverse
<tree
, add_field_for_name
> (type
);
1373 if (reduction_list
.is_created () && reduction_list
.elements () > 0)
1375 /* Create the fields for reductions. */
1376 reduction_list
.traverse
<tree
, add_field_for_reduction
> (type
);
1380 /* Create the loads and stores. */
1381 *arg_struct
= create_tmp_var (type
, ".paral_data_store");
1382 nvar
= create_tmp_var (build_pointer_type (type
), ".paral_data_load");
1383 *new_arg_struct
= make_ssa_name (nvar
, NULL
);
1385 ld_st_data
->store
= *arg_struct
;
1386 ld_st_data
->load
= *new_arg_struct
;
1387 ld_st_data
->store_bb
= bb0
;
1388 ld_st_data
->load_bb
= bb1
;
1391 .traverse
<struct clsn_data
*, create_loads_and_stores_for_name
>
1394 /* Load the calculation from memory (after the join of the threads). */
1396 if (reduction_list
.is_created () && reduction_list
.elements () > 0)
1399 .traverse
<struct clsn_data
*, create_stores_for_reduction
>
1401 clsn_data
.load
= make_ssa_name (nvar
, NULL
);
1402 clsn_data
.load_bb
= exit
->dest
;
1403 clsn_data
.store
= ld_st_data
->store
;
1404 create_final_loads_for_reduction (reduction_list
, &clsn_data
);
1408 decl_copies
.dispose ();
1409 name_copies
.dispose ();
1412 /* Bitmap containing uids of functions created by parallelization. We cannot
1413 allocate it from the default obstack, as it must live across compilation
1414 of several functions; we make it gc allocated instead. */
1416 static GTY(()) bitmap parallelized_functions
;
1418 /* Returns true if FN was created by create_loop_fn. */
1421 parallelized_function_p (tree fn
)
1423 if (!parallelized_functions
|| !DECL_ARTIFICIAL (fn
))
1426 return bitmap_bit_p (parallelized_functions
, DECL_UID (fn
));
1429 /* Creates and returns an empty function that will receive the body of
1430 a parallelized loop. */
1433 create_loop_fn (location_t loc
)
1437 tree decl
, type
, name
, t
;
1438 struct function
*act_cfun
= cfun
;
1439 static unsigned loopfn_num
;
1441 loc
= LOCATION_LOCUS (loc
);
1442 snprintf (buf
, 100, "%s.$loopfn", current_function_name ());
1443 ASM_FORMAT_PRIVATE_NAME (tname
, buf
, loopfn_num
++);
1444 clean_symbol_name (tname
);
1445 name
= get_identifier (tname
);
1446 type
= build_function_type_list (void_type_node
, ptr_type_node
, NULL_TREE
);
1448 decl
= build_decl (loc
, FUNCTION_DECL
, name
, type
);
1449 if (!parallelized_functions
)
1450 parallelized_functions
= BITMAP_GGC_ALLOC ();
1451 bitmap_set_bit (parallelized_functions
, DECL_UID (decl
));
1453 TREE_STATIC (decl
) = 1;
1454 TREE_USED (decl
) = 1;
1455 DECL_ARTIFICIAL (decl
) = 1;
1456 DECL_IGNORED_P (decl
) = 0;
1457 TREE_PUBLIC (decl
) = 0;
1458 DECL_UNINLINABLE (decl
) = 1;
1459 DECL_EXTERNAL (decl
) = 0;
1460 DECL_CONTEXT (decl
) = NULL_TREE
;
1461 DECL_INITIAL (decl
) = make_node (BLOCK
);
1463 t
= build_decl (loc
, RESULT_DECL
, NULL_TREE
, void_type_node
);
1464 DECL_ARTIFICIAL (t
) = 1;
1465 DECL_IGNORED_P (t
) = 1;
1466 DECL_RESULT (decl
) = t
;
1468 t
= build_decl (loc
, PARM_DECL
, get_identifier (".paral_data_param"),
1470 DECL_ARTIFICIAL (t
) = 1;
1471 DECL_ARG_TYPE (t
) = ptr_type_node
;
1472 DECL_CONTEXT (t
) = decl
;
1474 DECL_ARGUMENTS (decl
) = t
;
1476 allocate_struct_function (decl
, false);
1478 /* The call to allocate_struct_function clobbers CFUN, so we need to restore
1480 set_cfun (act_cfun
);
1485 /* Moves the exit condition of LOOP to the beginning of its header, and
1486 duplicates the part of the last iteration that gets disabled to the
1487 exit of the loop. NIT is the number of iterations of the loop
1488 (used to initialize the variables in the duplicated part).
1490 TODO: the common case is that latch of the loop is empty and immediately
1491 follows the loop exit. In this case, it would be better not to copy the
1492 body of the loop, but only move the entry of the loop directly before the
1493 exit check and increase the number of iterations of the loop by one.
1494 This may need some additional preconditioning in case NIT = ~0.
1495 REDUCTION_LIST describes the reductions in LOOP. */
1498 transform_to_exit_first_loop (struct loop
*loop
,
1499 reduction_info_table_type reduction_list
,
1502 basic_block
*bbs
, *nbbs
, ex_bb
, orig_header
;
1505 edge exit
= single_dom_exit (loop
), hpred
;
1506 tree control
, control_name
, res
, t
;
1507 gimple phi
, nphi
, cond_stmt
, stmt
, cond_nit
;
1508 gimple_stmt_iterator gsi
;
1511 split_block_after_labels (loop
->header
);
1512 orig_header
= single_succ (loop
->header
);
1513 hpred
= single_succ_edge (loop
->header
);
1515 cond_stmt
= last_stmt (exit
->src
);
1516 control
= gimple_cond_lhs (cond_stmt
);
1517 gcc_assert (gimple_cond_rhs (cond_stmt
) == nit
);
1519 /* Make sure that we have phi nodes on exit for all loop header phis
1520 (create_parallel_loop requires that). */
1521 for (gsi
= gsi_start_phis (loop
->header
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1523 phi
= gsi_stmt (gsi
);
1524 res
= PHI_RESULT (phi
);
1525 t
= copy_ssa_name (res
, phi
);
1526 SET_PHI_RESULT (phi
, t
);
1527 nphi
= create_phi_node (res
, orig_header
);
1528 add_phi_arg (nphi
, t
, hpred
, UNKNOWN_LOCATION
);
1532 gimple_cond_set_lhs (cond_stmt
, t
);
1533 update_stmt (cond_stmt
);
1538 bbs
= get_loop_body_in_dom_order (loop
);
1540 for (n
= 0; bbs
[n
] != exit
->src
; n
++)
1542 nbbs
= XNEWVEC (basic_block
, n
);
1543 ok
= gimple_duplicate_sese_tail (single_succ_edge (loop
->header
), exit
,
1550 /* Other than reductions, the only gimple reg that should be copied
1551 out of the loop is the control variable. */
1552 exit
= single_dom_exit (loop
);
1553 control_name
= NULL_TREE
;
1554 for (gsi
= gsi_start_phis (ex_bb
); !gsi_end_p (gsi
); )
1556 phi
= gsi_stmt (gsi
);
1557 res
= PHI_RESULT (phi
);
1558 if (virtual_operand_p (res
))
1564 /* Check if it is a part of reduction. If it is,
1565 keep the phi at the reduction's keep_res field. The
1566 PHI_RESULT of this phi is the resulting value of the reduction
1567 variable when exiting the loop. */
1569 if (reduction_list
.elements () > 0)
1571 struct reduction_info
*red
;
1573 tree val
= PHI_ARG_DEF_FROM_EDGE (phi
, exit
);
1574 red
= reduction_phi (reduction_list
, SSA_NAME_DEF_STMT (val
));
1577 red
->keep_res
= phi
;
1582 gcc_assert (control_name
== NULL_TREE
1583 && SSA_NAME_VAR (res
) == SSA_NAME_VAR (control
));
1585 remove_phi_node (&gsi
, false);
1587 gcc_assert (control_name
!= NULL_TREE
);
1589 /* Initialize the control variable to number of iterations
1590 according to the rhs of the exit condition. */
1591 gsi
= gsi_after_labels (ex_bb
);
1592 cond_nit
= last_stmt (exit
->src
);
1593 nit_1
= gimple_cond_rhs (cond_nit
);
1594 nit_1
= force_gimple_operand_gsi (&gsi
,
1595 fold_convert (TREE_TYPE (control_name
), nit_1
),
1596 false, NULL_TREE
, false, GSI_SAME_STMT
);
1597 stmt
= gimple_build_assign (control_name
, nit_1
);
1598 gsi_insert_before (&gsi
, stmt
, GSI_NEW_STMT
);
1601 /* Create the parallel constructs for LOOP as described in gen_parallel_loop.
1602 LOOP_FN and DATA are the arguments of GIMPLE_OMP_PARALLEL.
1603 NEW_DATA is the variable that should be initialized from the argument
1604 of LOOP_FN. N_THREADS is the requested number of threads. Returns the
1605 basic block containing GIMPLE_OMP_PARALLEL tree. */
1608 create_parallel_loop (struct loop
*loop
, tree loop_fn
, tree data
,
1609 tree new_data
, unsigned n_threads
, location_t loc
)
1611 gimple_stmt_iterator gsi
;
1612 basic_block bb
, paral_bb
, for_bb
, ex_bb
;
1614 gimple stmt
, for_stmt
, phi
, cond_stmt
;
1615 tree cvar
, cvar_init
, initvar
, cvar_next
, cvar_base
, type
;
1616 edge exit
, nexit
, guard
, end
, e
;
1618 /* Prepare the GIMPLE_OMP_PARALLEL statement. */
1619 bb
= loop_preheader_edge (loop
)->src
;
1620 paral_bb
= single_pred (bb
);
1621 gsi
= gsi_last_bb (paral_bb
);
1623 t
= build_omp_clause (loc
, OMP_CLAUSE_NUM_THREADS
);
1624 OMP_CLAUSE_NUM_THREADS_EXPR (t
)
1625 = build_int_cst (integer_type_node
, n_threads
);
1626 stmt
= gimple_build_omp_parallel (NULL
, t
, loop_fn
, data
);
1627 gimple_set_location (stmt
, loc
);
1629 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1631 /* Initialize NEW_DATA. */
1634 gsi
= gsi_after_labels (bb
);
1636 param
= make_ssa_name (DECL_ARGUMENTS (loop_fn
), NULL
);
1637 stmt
= gimple_build_assign (param
, build_fold_addr_expr (data
));
1638 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
1640 stmt
= gimple_build_assign (new_data
,
1641 fold_convert (TREE_TYPE (new_data
), param
));
1642 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
1645 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_PARALLEL. */
1646 bb
= split_loop_exit_edge (single_dom_exit (loop
));
1647 gsi
= gsi_last_bb (bb
);
1648 stmt
= gimple_build_omp_return (false);
1649 gimple_set_location (stmt
, loc
);
1650 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1652 /* Extract data for GIMPLE_OMP_FOR. */
1653 gcc_assert (loop
->header
== single_dom_exit (loop
)->src
);
1654 cond_stmt
= last_stmt (loop
->header
);
1656 cvar
= gimple_cond_lhs (cond_stmt
);
1657 cvar_base
= SSA_NAME_VAR (cvar
);
1658 phi
= SSA_NAME_DEF_STMT (cvar
);
1659 cvar_init
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_preheader_edge (loop
));
1660 initvar
= copy_ssa_name (cvar
, NULL
);
1661 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi
, loop_preheader_edge (loop
)),
1663 cvar_next
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (loop
));
1665 gsi
= gsi_last_nondebug_bb (loop
->latch
);
1666 gcc_assert (gsi_stmt (gsi
) == SSA_NAME_DEF_STMT (cvar_next
));
1667 gsi_remove (&gsi
, true);
1670 for_bb
= split_edge (loop_preheader_edge (loop
));
1671 ex_bb
= split_loop_exit_edge (single_dom_exit (loop
));
1672 extract_true_false_edges_from_block (loop
->header
, &nexit
, &exit
);
1673 gcc_assert (exit
== single_dom_exit (loop
));
1675 guard
= make_edge (for_bb
, ex_bb
, 0);
1676 single_succ_edge (loop
->latch
)->flags
= 0;
1677 end
= make_edge (loop
->latch
, ex_bb
, EDGE_FALLTHRU
);
1678 for (gsi
= gsi_start_phis (ex_bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1680 source_location locus
;
1682 phi
= gsi_stmt (gsi
);
1683 stmt
= SSA_NAME_DEF_STMT (PHI_ARG_DEF_FROM_EDGE (phi
, exit
));
1685 def
= PHI_ARG_DEF_FROM_EDGE (stmt
, loop_preheader_edge (loop
));
1686 locus
= gimple_phi_arg_location_from_edge (stmt
,
1687 loop_preheader_edge (loop
));
1688 add_phi_arg (phi
, def
, guard
, locus
);
1690 def
= PHI_ARG_DEF_FROM_EDGE (stmt
, loop_latch_edge (loop
));
1691 locus
= gimple_phi_arg_location_from_edge (stmt
, loop_latch_edge (loop
));
1692 add_phi_arg (phi
, def
, end
, locus
);
1694 e
= redirect_edge_and_branch (exit
, nexit
->dest
);
1695 PENDING_STMT (e
) = NULL
;
1697 /* Emit GIMPLE_OMP_FOR. */
1698 gimple_cond_set_lhs (cond_stmt
, cvar_base
);
1699 type
= TREE_TYPE (cvar
);
1700 t
= build_omp_clause (loc
, OMP_CLAUSE_SCHEDULE
);
1701 OMP_CLAUSE_SCHEDULE_KIND (t
) = OMP_CLAUSE_SCHEDULE_STATIC
;
1703 for_stmt
= gimple_build_omp_for (NULL
, GF_OMP_FOR_KIND_FOR
, t
, 1, NULL
);
1704 gimple_set_location (for_stmt
, loc
);
1705 gimple_omp_for_set_index (for_stmt
, 0, initvar
);
1706 gimple_omp_for_set_initial (for_stmt
, 0, cvar_init
);
1707 gimple_omp_for_set_final (for_stmt
, 0, gimple_cond_rhs (cond_stmt
));
1708 gimple_omp_for_set_cond (for_stmt
, 0, gimple_cond_code (cond_stmt
));
1709 gimple_omp_for_set_incr (for_stmt
, 0, build2 (PLUS_EXPR
, type
,
1711 build_int_cst (type
, 1)));
1713 gsi
= gsi_last_bb (for_bb
);
1714 gsi_insert_after (&gsi
, for_stmt
, GSI_NEW_STMT
);
1715 SSA_NAME_DEF_STMT (initvar
) = for_stmt
;
1717 /* Emit GIMPLE_OMP_CONTINUE. */
1718 gsi
= gsi_last_bb (loop
->latch
);
1719 stmt
= gimple_build_omp_continue (cvar_next
, cvar
);
1720 gimple_set_location (stmt
, loc
);
1721 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1722 SSA_NAME_DEF_STMT (cvar_next
) = stmt
;
1724 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_FOR. */
1725 gsi
= gsi_last_bb (ex_bb
);
1726 stmt
= gimple_build_omp_return (true);
1727 gimple_set_location (stmt
, loc
);
1728 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1730 /* After the above dom info is hosed. Re-compute it. */
1731 free_dominance_info (CDI_DOMINATORS
);
1732 calculate_dominance_info (CDI_DOMINATORS
);
1737 /* Generates code to execute the iterations of LOOP in N_THREADS
1738 threads in parallel.
1740 NITER describes number of iterations of LOOP.
1741 REDUCTION_LIST describes the reductions existent in the LOOP. */
1744 gen_parallel_loop (struct loop
*loop
, reduction_info_table_type reduction_list
,
1745 unsigned n_threads
, struct tree_niter_desc
*niter
)
1748 tree many_iterations_cond
, type
, nit
;
1749 tree arg_struct
, new_arg_struct
;
1751 basic_block parallel_head
;
1753 struct clsn_data clsn_data
;
1757 unsigned int m_p_thread
=2;
1761 ---------------------------------------------------------------------
1764 IV = phi (INIT, IV + STEP)
1770 ---------------------------------------------------------------------
1772 with # of iterations NITER (possibly with MAY_BE_ZERO assumption),
1773 we generate the following code:
1775 ---------------------------------------------------------------------
1778 || NITER < MIN_PER_THREAD * N_THREADS)
1782 store all local loop-invariant variables used in body of the loop to DATA.
1783 GIMPLE_OMP_PARALLEL (OMP_CLAUSE_NUM_THREADS (N_THREADS), LOOPFN, DATA);
1784 load the variables from DATA.
1785 GIMPLE_OMP_FOR (IV = INIT; COND; IV += STEP) (OMP_CLAUSE_SCHEDULE (static))
1788 GIMPLE_OMP_CONTINUE;
1789 GIMPLE_OMP_RETURN -- GIMPLE_OMP_FOR
1790 GIMPLE_OMP_RETURN -- GIMPLE_OMP_PARALLEL
1796 IV = phi (INIT, IV + STEP)
1807 /* Create two versions of the loop -- in the old one, we know that the
1808 number of iterations is large enough, and we will transform it into the
1809 loop that will be split to loop_fn, the new one will be used for the
1810 remaining iterations. */
1812 /* We should compute a better number-of-iterations value for outer loops.
1815 for (i = 0; i < n; ++i)
1816 for (j = 0; j < m; ++j)
1819 we should compute nit = n * m, not nit = n.
1820 Also may_be_zero handling would need to be adjusted. */
1822 type
= TREE_TYPE (niter
->niter
);
1823 nit
= force_gimple_operand (unshare_expr (niter
->niter
), &stmts
, true,
1826 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
1831 m_p_thread
=MIN_PER_THREAD
;
1833 many_iterations_cond
=
1834 fold_build2 (GE_EXPR
, boolean_type_node
,
1835 nit
, build_int_cst (type
, m_p_thread
* n_threads
));
1837 many_iterations_cond
1838 = fold_build2 (TRUTH_AND_EXPR
, boolean_type_node
,
1839 invert_truthvalue (unshare_expr (niter
->may_be_zero
)),
1840 many_iterations_cond
);
1841 many_iterations_cond
1842 = force_gimple_operand (many_iterations_cond
, &stmts
, false, NULL_TREE
);
1844 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
1845 if (!is_gimple_condexpr (many_iterations_cond
))
1847 many_iterations_cond
1848 = force_gimple_operand (many_iterations_cond
, &stmts
,
1851 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
1854 initialize_original_copy_tables ();
1856 /* We assume that the loop usually iterates a lot. */
1857 prob
= 4 * REG_BR_PROB_BASE
/ 5;
1858 loop_version (loop
, many_iterations_cond
, NULL
,
1859 prob
, prob
, REG_BR_PROB_BASE
- prob
, true);
1860 update_ssa (TODO_update_ssa
);
1861 free_original_copy_tables ();
1863 /* Base all the induction variables in LOOP on a single control one. */
1864 canonicalize_loop_ivs (loop
, &nit
, true);
1866 /* Ensure that the exit condition is the first statement in the loop. */
1867 transform_to_exit_first_loop (loop
, reduction_list
, nit
);
1869 /* Generate initializations for reductions. */
1870 if (reduction_list
.elements () > 0)
1871 reduction_list
.traverse
<struct loop
*, initialize_reductions
> (loop
);
1873 /* Eliminate the references to local variables from the loop. */
1874 gcc_assert (single_exit (loop
));
1875 entry
= loop_preheader_edge (loop
);
1876 exit
= single_dom_exit (loop
);
1878 eliminate_local_variables (entry
, exit
);
1879 /* In the old loop, move all variables non-local to the loop to a structure
1880 and back, and create separate decls for the variables used in loop. */
1881 separate_decls_in_region (entry
, exit
, reduction_list
, &arg_struct
,
1882 &new_arg_struct
, &clsn_data
);
1884 /* Create the parallel constructs. */
1885 loc
= UNKNOWN_LOCATION
;
1886 cond_stmt
= last_stmt (loop
->header
);
1888 loc
= gimple_location (cond_stmt
);
1889 parallel_head
= create_parallel_loop (loop
, create_loop_fn (loc
), arg_struct
,
1890 new_arg_struct
, n_threads
, loc
);
1891 if (reduction_list
.elements () > 0)
1892 create_call_for_reduction (loop
, reduction_list
, &clsn_data
);
1896 /* Cancel the loop (it is simpler to do it here rather than to teach the
1897 expander to do it). */
1898 cancel_loop_tree (loop
);
1900 /* Free loop bound estimations that could contain references to
1901 removed statements. */
1902 FOR_EACH_LOOP (li
, loop
, 0)
1903 free_numbers_of_iterations_estimates_loop (loop
);
1905 /* Expand the parallel constructs. We do it directly here instead of running
1906 a separate expand_omp pass, since it is more efficient, and less likely to
1907 cause troubles with further analyses not being able to deal with the
1910 omp_expand_local (parallel_head
);
1913 /* Returns true when LOOP contains vector phi nodes. */
1916 loop_has_vector_phi_nodes (struct loop
*loop ATTRIBUTE_UNUSED
)
1919 basic_block
*bbs
= get_loop_body_in_dom_order (loop
);
1920 gimple_stmt_iterator gsi
;
1923 for (i
= 0; i
< loop
->num_nodes
; i
++)
1924 for (gsi
= gsi_start_phis (bbs
[i
]); !gsi_end_p (gsi
); gsi_next (&gsi
))
1925 if (TREE_CODE (TREE_TYPE (PHI_RESULT (gsi_stmt (gsi
)))) == VECTOR_TYPE
)
1934 /* Create a reduction_info struct, initialize it with REDUC_STMT
1935 and PHI, insert it to the REDUCTION_LIST. */
1938 build_new_reduction (reduction_info_table_type reduction_list
,
1939 gimple reduc_stmt
, gimple phi
)
1941 reduction_info
**slot
;
1942 struct reduction_info
*new_reduction
;
1944 gcc_assert (reduc_stmt
);
1946 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1949 "Detected reduction. reduction stmt is: \n");
1950 print_gimple_stmt (dump_file
, reduc_stmt
, 0, 0);
1951 fprintf (dump_file
, "\n");
1954 new_reduction
= XCNEW (struct reduction_info
);
1956 new_reduction
->reduc_stmt
= reduc_stmt
;
1957 new_reduction
->reduc_phi
= phi
;
1958 new_reduction
->reduc_version
= SSA_NAME_VERSION (gimple_phi_result (phi
));
1959 new_reduction
->reduction_code
= gimple_assign_rhs_code (reduc_stmt
);
1960 slot
= reduction_list
.find_slot (new_reduction
, INSERT
);
1961 *slot
= new_reduction
;
1964 /* Callback for htab_traverse. Sets gimple_uid of reduc_phi stmts. */
1967 set_reduc_phi_uids (reduction_info
**slot
, void *data ATTRIBUTE_UNUSED
)
1969 struct reduction_info
*const red
= *slot
;
1970 gimple_set_uid (red
->reduc_phi
, red
->reduc_version
);
1974 /* Detect all reductions in the LOOP, insert them into REDUCTION_LIST. */
1977 gather_scalar_reductions (loop_p loop
, reduction_info_table_type reduction_list
)
1979 gimple_stmt_iterator gsi
;
1980 loop_vec_info simple_loop_info
;
1982 simple_loop_info
= vect_analyze_loop_form (loop
);
1984 for (gsi
= gsi_start_phis (loop
->header
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1986 gimple phi
= gsi_stmt (gsi
);
1988 tree res
= PHI_RESULT (phi
);
1991 if (virtual_operand_p (res
))
1994 if (!simple_iv (loop
, loop
, res
, &iv
, true)
1995 && simple_loop_info
)
1997 gimple reduc_stmt
= vect_force_simple_reduction (simple_loop_info
,
2000 if (reduc_stmt
&& !double_reduc
)
2001 build_new_reduction (reduction_list
, reduc_stmt
, phi
);
2004 destroy_loop_vec_info (simple_loop_info
, true);
2006 /* As gimple_uid is used by the vectorizer in between vect_analyze_loop_form
2007 and destroy_loop_vec_info, we can set gimple_uid of reduc_phi stmts
2009 reduction_list
.traverse
<void *, set_reduc_phi_uids
> (NULL
);
2012 /* Try to initialize NITER for code generation part. */
2015 try_get_loop_niter (loop_p loop
, struct tree_niter_desc
*niter
)
2017 edge exit
= single_dom_exit (loop
);
2021 /* We need to know # of iterations, and there should be no uses of values
2022 defined inside loop outside of it, unless the values are invariants of
2024 if (!number_of_iterations_exit (loop
, exit
, niter
, false))
2026 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2027 fprintf (dump_file
, " FAILED: number of iterations not known\n");
2034 /* Try to initialize REDUCTION_LIST for code generation part.
2035 REDUCTION_LIST describes the reductions. */
2038 try_create_reduction_list (loop_p loop
,
2039 reduction_info_table_type reduction_list
)
2041 edge exit
= single_dom_exit (loop
);
2042 gimple_stmt_iterator gsi
;
2046 gather_scalar_reductions (loop
, reduction_list
);
2049 for (gsi
= gsi_start_phis (exit
->dest
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2051 gimple phi
= gsi_stmt (gsi
);
2052 struct reduction_info
*red
;
2053 imm_use_iterator imm_iter
;
2054 use_operand_p use_p
;
2056 tree val
= PHI_ARG_DEF_FROM_EDGE (phi
, exit
);
2058 if (!virtual_operand_p (val
))
2060 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2062 fprintf (dump_file
, "phi is ");
2063 print_gimple_stmt (dump_file
, phi
, 0, 0);
2064 fprintf (dump_file
, "arg of phi to exit: value ");
2065 print_generic_expr (dump_file
, val
, 0);
2066 fprintf (dump_file
, " used outside loop\n");
2068 " checking if it a part of reduction pattern: \n");
2070 if (reduction_list
.elements () == 0)
2072 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2074 " FAILED: it is not a part of reduction.\n");
2078 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, val
)
2080 if (!gimple_debug_bind_p (USE_STMT (use_p
))
2081 && flow_bb_inside_loop_p (loop
, gimple_bb (USE_STMT (use_p
))))
2083 reduc_phi
= USE_STMT (use_p
);
2087 red
= reduction_phi (reduction_list
, reduc_phi
);
2090 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2092 " FAILED: it is not a part of reduction.\n");
2095 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2097 fprintf (dump_file
, "reduction phi is ");
2098 print_gimple_stmt (dump_file
, red
->reduc_phi
, 0, 0);
2099 fprintf (dump_file
, "reduction stmt is ");
2100 print_gimple_stmt (dump_file
, red
->reduc_stmt
, 0, 0);
2105 /* The iterations of the loop may communicate only through bivs whose
2106 iteration space can be distributed efficiently. */
2107 for (gsi
= gsi_start_phis (loop
->header
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2109 gimple phi
= gsi_stmt (gsi
);
2110 tree def
= PHI_RESULT (phi
);
2113 if (!virtual_operand_p (def
) && !simple_iv (loop
, loop
, def
, &iv
, true))
2115 struct reduction_info
*red
;
2117 red
= reduction_phi (reduction_list
, phi
);
2120 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2122 " FAILED: scalar dependency between iterations\n");
2132 /* Detect parallel loops and generate parallel code using libgomp
2133 primitives. Returns true if some loop was parallelized, false
2137 parallelize_loops (void)
2139 unsigned n_threads
= flag_tree_parallelize_loops
;
2140 bool changed
= false;
2142 struct tree_niter_desc niter_desc
;
2144 reduction_info_table_type reduction_list
;
2145 struct obstack parloop_obstack
;
2146 HOST_WIDE_INT estimated
;
2149 /* Do not parallelize loops in the functions created by parallelization. */
2150 if (parallelized_function_p (cfun
->decl
))
2152 if (cfun
->has_nonlocal_label
)
2155 gcc_obstack_init (&parloop_obstack
);
2156 reduction_list
.create (10);
2157 init_stmt_vec_info_vec ();
2159 FOR_EACH_LOOP (li
, loop
, 0)
2161 reduction_list
.empty ();
2162 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2164 fprintf (dump_file
, "Trying loop %d as candidate\n",loop
->num
);
2166 fprintf (dump_file
, "loop %d is not innermost\n",loop
->num
);
2168 fprintf (dump_file
, "loop %d is innermost\n",loop
->num
);
2171 /* If we use autopar in graphite pass, we use its marked dependency
2172 checking results. */
2173 if (flag_loop_parallelize_all
&& !loop
->can_be_parallel
)
2175 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2176 fprintf (dump_file
, "loop is not parallel according to graphite\n");
2180 if (!single_dom_exit (loop
))
2183 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2184 fprintf (dump_file
, "loop is !single_dom_exit\n");
2189 if (/* And of course, the loop must be parallelizable. */
2190 !can_duplicate_loop_p (loop
)
2191 || loop_has_blocks_with_irreducible_flag (loop
)
2192 || (loop_preheader_edge (loop
)->src
->flags
& BB_IRREDUCIBLE_LOOP
)
2193 /* FIXME: the check for vector phi nodes could be removed. */
2194 || loop_has_vector_phi_nodes (loop
))
2197 estimated
= estimated_stmt_executions_int (loop
);
2198 if (estimated
== -1)
2199 estimated
= max_stmt_executions_int (loop
);
2200 /* FIXME: Bypass this check as graphite doesn't update the
2201 count and frequency correctly now. */
2202 if (!flag_loop_parallelize_all
2203 && ((estimated
!= -1
2204 && estimated
<= (HOST_WIDE_INT
) n_threads
* MIN_PER_THREAD
)
2205 /* Do not bother with loops in cold areas. */
2206 || optimize_loop_nest_for_size_p (loop
)))
2209 if (!try_get_loop_niter (loop
, &niter_desc
))
2212 if (!try_create_reduction_list (loop
, reduction_list
))
2215 if (!flag_loop_parallelize_all
2216 && !loop_parallel_p (loop
, &parloop_obstack
))
2220 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2223 fprintf (dump_file
, "parallelizing outer loop %d\n",loop
->header
->index
);
2225 fprintf (dump_file
, "parallelizing inner loop %d\n",loop
->header
->index
);
2226 loop_loc
= find_loop_location (loop
);
2227 if (loop_loc
!= UNKNOWN_LOC
)
2228 fprintf (dump_file
, "\nloop at %s:%d: ",
2229 LOC_FILE (loop_loc
), LOC_LINE (loop_loc
));
2231 gen_parallel_loop (loop
, reduction_list
,
2232 n_threads
, &niter_desc
);
2235 free_stmt_vec_info_vec ();
2236 reduction_list
.dispose ();
2237 obstack_free (&parloop_obstack
, NULL
);
2239 /* Parallelization will cause new function calls to be inserted through
2240 which local variables will escape. Reset the points-to solution
2243 pt_solution_reset (&cfun
->gimple_df
->escaped
);
2248 /* Parallelization. */
2251 gate_tree_parallelize_loops (void)
2253 return flag_tree_parallelize_loops
> 1;
2257 tree_parallelize_loops (void)
2259 if (number_of_loops (cfun
) <= 1)
2262 if (parallelize_loops ())
2263 return TODO_cleanup_cfg
| TODO_rebuild_alias
;
2269 const pass_data pass_data_parallelize_loops
=
2271 GIMPLE_PASS
, /* type */
2272 "parloops", /* name */
2273 OPTGROUP_LOOP
, /* optinfo_flags */
2274 true, /* has_gate */
2275 true, /* has_execute */
2276 TV_TREE_PARALLELIZE_LOOPS
, /* tv_id */
2277 ( PROP_cfg
| PROP_ssa
), /* properties_required */
2278 0, /* properties_provided */
2279 0, /* properties_destroyed */
2280 0, /* todo_flags_start */
2281 TODO_verify_flow
, /* todo_flags_finish */
2284 class pass_parallelize_loops
: public gimple_opt_pass
2287 pass_parallelize_loops (gcc::context
*ctxt
)
2288 : gimple_opt_pass (pass_data_parallelize_loops
, ctxt
)
2291 /* opt_pass methods: */
2292 bool gate () { return gate_tree_parallelize_loops (); }
2293 unsigned int execute () { return tree_parallelize_loops (); }
2295 }; // class pass_parallelize_loops
2300 make_pass_parallelize_loops (gcc::context
*ctxt
)
2302 return new pass_parallelize_loops (ctxt
);
2306 #include "gt-tree-parloops.h"