1 /* Loop autoparallelization.
2 Copyright (C) 2006-2015 Free Software Foundation, Inc.
3 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
4 Zdenek Dvorak <dvorakz@suse.cz> and Razya Ladelsky <razya@il.ibm.com>.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "fold-const.h"
33 #include "hard-reg-set.h"
36 #include "dominance.h"
38 #include "basic-block.h"
39 #include "tree-ssa-alias.h"
40 #include "internal-fn.h"
41 #include "gimple-expr.h"
45 #include "gimple-iterator.h"
46 #include "gimplify-me.h"
47 #include "gimple-walk.h"
48 #include "stor-layout.h"
49 #include "tree-nested.h"
50 #include "gimple-ssa.h"
52 #include "tree-phinodes.h"
53 #include "ssa-iterators.h"
54 #include "stringpool.h"
55 #include "tree-ssanames.h"
56 #include "tree-ssa-loop-ivopts.h"
57 #include "tree-ssa-loop-manip.h"
58 #include "tree-ssa-loop-niter.h"
59 #include "tree-ssa-loop.h"
60 #include "tree-into-ssa.h"
62 #include "tree-data-ref.h"
63 #include "tree-scalar-evolution.h"
64 #include "gimple-pretty-print.h"
65 #include "tree-pass.h"
66 #include "langhooks.h"
67 #include "tree-vectorizer.h"
68 #include "tree-hasher.h"
69 #include "tree-parloops.h"
71 #include "tree-nested.h"
72 #include "plugin-api.h"
77 /* This pass tries to distribute iterations of loops into several threads.
78 The implementation is straightforward -- for each loop we test whether its
79 iterations are independent, and if it is the case (and some additional
80 conditions regarding profitability and correctness are satisfied), we
81 add GIMPLE_OMP_PARALLEL and GIMPLE_OMP_FOR codes and let omp expansion
84 The most of the complexity is in bringing the code into shape expected
86 -- for GIMPLE_OMP_FOR, ensuring that the loop has only one induction
87 variable and that the exit test is at the start of the loop body
88 -- for GIMPLE_OMP_PARALLEL, replacing the references to local addressable
89 variables by accesses through pointers, and breaking up ssa chains
90 by storing the values incoming to the parallelized loop to a structure
91 passed to the new function as an argument (something similar is done
92 in omp gimplification, unfortunately only a small part of the code
96 -- if there are several parallelizable loops in a function, it may be
97 possible to generate the threads just once (using synchronization to
98 ensure that cross-loop dependences are obeyed).
99 -- handling of common reduction patterns for outer loops.
101 More info can also be found at http://gcc.gnu.org/wiki/AutoParInGCC */
104 currently we use vect_force_simple_reduction() to detect reduction patterns.
105 The code transformation will be introduced by an example.
112 for (i = 0; i < N; i++)
122 # sum_29 = PHI <sum_11(5), 1(3)>
123 # i_28 = PHI <i_12(5), 0(3)>
126 sum_11 = D.1795_8 + sum_29;
134 # sum_21 = PHI <sum_11(4)>
135 printf (&"%d"[0], sum_21);
138 after reduction transformation (only relevant parts):
146 # Storing the initial value given by the user. #
148 .paral_data_store.32.sum.27 = 1;
150 #pragma omp parallel num_threads(4)
152 #pragma omp for schedule(static)
154 # The neutral element corresponding to the particular
155 reduction's operation, e.g. 0 for PLUS_EXPR,
156 1 for MULT_EXPR, etc. replaces the user's initial value. #
158 # sum.27_29 = PHI <sum.27_11, 0>
160 sum.27_11 = D.1827_8 + sum.27_29;
164 # Adding this reduction phi is done at create_phi_for_local_result() #
165 # sum.27_56 = PHI <sum.27_11, 0>
168 # Creating the atomic operation is done at
169 create_call_for_reduction_1() #
171 #pragma omp atomic_load
172 D.1839_59 = *&.paral_data_load.33_51->reduction.23;
173 D.1840_60 = sum.27_56 + D.1839_59;
174 #pragma omp atomic_store (D.1840_60);
178 # collecting the result after the join of the threads is done at
179 create_loads_for_reductions().
180 The value computed by the threads is loaded from the
184 .paral_data_load.33_52 = &.paral_data_store.32;
185 sum_37 = .paral_data_load.33_52->sum.27;
186 sum_43 = D.1795_41 + sum_37;
189 # sum_21 = PHI <sum_43, sum_26>
190 printf (&"%d"[0], sum_21);
198 /* Minimal number of iterations of a loop that should be executed in each
200 #define MIN_PER_THREAD 100
202 /* Element of the hashtable, representing a
203 reduction in the current loop. */
204 struct reduction_info
206 gimple reduc_stmt
; /* reduction statement. */
207 gimple reduc_phi
; /* The phi node defining the reduction. */
208 enum tree_code reduction_code
;/* code for the reduction operation. */
209 unsigned reduc_version
; /* SSA_NAME_VERSION of original reduc_phi
211 gphi
*keep_res
; /* The PHI_RESULT of this phi is the resulting value
212 of the reduction variable when existing the loop. */
213 tree initial_value
; /* The initial value of the reduction var before entering the loop. */
214 tree field
; /* the name of the field in the parloop data structure intended for reduction. */
215 tree init
; /* reduction initialization value. */
216 gphi
*new_phi
; /* (helper field) Newly created phi node whose result
217 will be passed to the atomic operation. Represents
218 the local result each thread computed for the reduction
222 /* Reduction info hashtable helpers. */
224 struct reduction_hasher
: typed_free_remove
<reduction_info
>
226 typedef reduction_info
*value_type
;
227 typedef reduction_info
*compare_type
;
228 static inline hashval_t
hash (const reduction_info
*);
229 static inline bool equal (const reduction_info
*, const reduction_info
*);
232 /* Equality and hash functions for hashtab code. */
235 reduction_hasher::equal (const reduction_info
*a
, const reduction_info
*b
)
237 return (a
->reduc_phi
== b
->reduc_phi
);
241 reduction_hasher::hash (const reduction_info
*a
)
243 return a
->reduc_version
;
246 typedef hash_table
<reduction_hasher
> reduction_info_table_type
;
249 static struct reduction_info
*
250 reduction_phi (reduction_info_table_type
*reduction_list
, gimple phi
)
252 struct reduction_info tmpred
, *red
;
254 if (reduction_list
->elements () == 0 || phi
== NULL
)
257 tmpred
.reduc_phi
= phi
;
258 tmpred
.reduc_version
= gimple_uid (phi
);
259 red
= reduction_list
->find (&tmpred
);
264 /* Element of hashtable of names to copy. */
266 struct name_to_copy_elt
268 unsigned version
; /* The version of the name to copy. */
269 tree new_name
; /* The new name used in the copy. */
270 tree field
; /* The field of the structure used to pass the
274 /* Name copies hashtable helpers. */
276 struct name_to_copy_hasher
: typed_free_remove
<name_to_copy_elt
>
278 typedef name_to_copy_elt
*value_type
;
279 typedef name_to_copy_elt
*compare_type
;
280 static inline hashval_t
hash (const name_to_copy_elt
*);
281 static inline bool equal (const name_to_copy_elt
*, const name_to_copy_elt
*);
284 /* Equality and hash functions for hashtab code. */
287 name_to_copy_hasher::equal (const name_to_copy_elt
*a
, const name_to_copy_elt
*b
)
289 return a
->version
== b
->version
;
293 name_to_copy_hasher::hash (const name_to_copy_elt
*a
)
295 return (hashval_t
) a
->version
;
298 typedef hash_table
<name_to_copy_hasher
> name_to_copy_table_type
;
300 /* A transformation matrix, which is a self-contained ROWSIZE x COLSIZE
301 matrix. Rather than use floats, we simply keep a single DENOMINATOR that
302 represents the denominator for every element in the matrix. */
303 typedef struct lambda_trans_matrix_s
305 lambda_matrix matrix
;
309 } *lambda_trans_matrix
;
310 #define LTM_MATRIX(T) ((T)->matrix)
311 #define LTM_ROWSIZE(T) ((T)->rowsize)
312 #define LTM_COLSIZE(T) ((T)->colsize)
313 #define LTM_DENOMINATOR(T) ((T)->denominator)
315 /* Allocate a new transformation matrix. */
317 static lambda_trans_matrix
318 lambda_trans_matrix_new (int colsize
, int rowsize
,
319 struct obstack
* lambda_obstack
)
321 lambda_trans_matrix ret
;
323 ret
= (lambda_trans_matrix
)
324 obstack_alloc (lambda_obstack
, sizeof (struct lambda_trans_matrix_s
));
325 LTM_MATRIX (ret
) = lambda_matrix_new (rowsize
, colsize
, lambda_obstack
);
326 LTM_ROWSIZE (ret
) = rowsize
;
327 LTM_COLSIZE (ret
) = colsize
;
328 LTM_DENOMINATOR (ret
) = 1;
332 /* Multiply a vector VEC by a matrix MAT.
333 MAT is an M*N matrix, and VEC is a vector with length N. The result
334 is stored in DEST which must be a vector of length M. */
337 lambda_matrix_vector_mult (lambda_matrix matrix
, int m
, int n
,
338 lambda_vector vec
, lambda_vector dest
)
342 lambda_vector_clear (dest
, m
);
343 for (i
= 0; i
< m
; i
++)
344 for (j
= 0; j
< n
; j
++)
345 dest
[i
] += matrix
[i
][j
] * vec
[j
];
348 /* Return true if TRANS is a legal transformation matrix that respects
349 the dependence vectors in DISTS and DIRS. The conservative answer
352 "Wolfe proves that a unimodular transformation represented by the
353 matrix T is legal when applied to a loop nest with a set of
354 lexicographically non-negative distance vectors RDG if and only if
355 for each vector d in RDG, (T.d >= 0) is lexicographically positive.
356 i.e.: if and only if it transforms the lexicographically positive
357 distance vectors to lexicographically positive vectors. Note that
358 a unimodular matrix must transform the zero vector (and only it) to
359 the zero vector." S.Muchnick. */
362 lambda_transform_legal_p (lambda_trans_matrix trans
,
364 vec
<ddr_p
> dependence_relations
)
367 lambda_vector distres
;
368 struct data_dependence_relation
*ddr
;
370 gcc_assert (LTM_COLSIZE (trans
) == nb_loops
371 && LTM_ROWSIZE (trans
) == nb_loops
);
373 /* When there are no dependences, the transformation is correct. */
374 if (dependence_relations
.length () == 0)
377 ddr
= dependence_relations
[0];
381 /* When there is an unknown relation in the dependence_relations, we
382 know that it is no worth looking at this loop nest: give up. */
383 if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
386 distres
= lambda_vector_new (nb_loops
);
388 /* For each distance vector in the dependence graph. */
389 FOR_EACH_VEC_ELT (dependence_relations
, i
, ddr
)
391 /* Don't care about relations for which we know that there is no
392 dependence, nor about read-read (aka. output-dependences):
393 these data accesses can happen in any order. */
394 if (DDR_ARE_DEPENDENT (ddr
) == chrec_known
395 || (DR_IS_READ (DDR_A (ddr
)) && DR_IS_READ (DDR_B (ddr
))))
398 /* Conservatively answer: "this transformation is not valid". */
399 if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
402 /* If the dependence could not be captured by a distance vector,
403 conservatively answer that the transform is not valid. */
404 if (DDR_NUM_DIST_VECTS (ddr
) == 0)
407 /* Compute trans.dist_vect */
408 for (j
= 0; j
< DDR_NUM_DIST_VECTS (ddr
); j
++)
410 lambda_matrix_vector_mult (LTM_MATRIX (trans
), nb_loops
, nb_loops
,
411 DDR_DIST_VECT (ddr
, j
), distres
);
413 if (!lambda_vector_lexico_pos (distres
, nb_loops
))
420 /* Data dependency analysis. Returns true if the iterations of LOOP
421 are independent on each other (that is, if we can execute them
425 loop_parallel_p (struct loop
*loop
, struct obstack
* parloop_obstack
)
427 vec
<ddr_p
> dependence_relations
;
428 vec
<data_reference_p
> datarefs
;
429 lambda_trans_matrix trans
;
432 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
434 fprintf (dump_file
, "Considering loop %d\n", loop
->num
);
436 fprintf (dump_file
, "loop is innermost\n");
438 fprintf (dump_file
, "loop NOT innermost\n");
441 /* Check for problems with dependences. If the loop can be reversed,
442 the iterations are independent. */
443 auto_vec
<loop_p
, 3> loop_nest
;
444 datarefs
.create (10);
445 dependence_relations
.create (100);
446 if (! compute_data_dependences_for_loop (loop
, true, &loop_nest
, &datarefs
,
447 &dependence_relations
))
449 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
450 fprintf (dump_file
, " FAILED: cannot analyze data dependencies\n");
454 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
455 dump_data_dependence_relations (dump_file
, dependence_relations
);
457 trans
= lambda_trans_matrix_new (1, 1, parloop_obstack
);
458 LTM_MATRIX (trans
)[0][0] = -1;
460 if (lambda_transform_legal_p (trans
, 1, dependence_relations
))
463 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
464 fprintf (dump_file
, " SUCCESS: may be parallelized\n");
466 else if (dump_file
&& (dump_flags
& TDF_DETAILS
))
468 " FAILED: data dependencies exist across iterations\n");
471 free_dependence_relations (dependence_relations
);
472 free_data_refs (datarefs
);
477 /* Return true when LOOP contains basic blocks marked with the
478 BB_IRREDUCIBLE_LOOP flag. */
481 loop_has_blocks_with_irreducible_flag (struct loop
*loop
)
484 basic_block
*bbs
= get_loop_body_in_dom_order (loop
);
487 for (i
= 0; i
< loop
->num_nodes
; i
++)
488 if (bbs
[i
]->flags
& BB_IRREDUCIBLE_LOOP
)
497 /* Assigns the address of OBJ in TYPE to an ssa name, and returns this name.
498 The assignment statement is placed on edge ENTRY. DECL_ADDRESS maps decls
499 to their addresses that can be reused. The address of OBJ is known to
500 be invariant in the whole function. Other needed statements are placed
504 take_address_of (tree obj
, tree type
, edge entry
,
505 int_tree_htab_type
*decl_address
, gimple_stmt_iterator
*gsi
)
508 tree
*var_p
, name
, addr
;
512 /* Since the address of OBJ is invariant, the trees may be shared.
513 Avoid rewriting unrelated parts of the code. */
514 obj
= unshare_expr (obj
);
516 handled_component_p (*var_p
);
517 var_p
= &TREE_OPERAND (*var_p
, 0))
520 /* Canonicalize the access to base on a MEM_REF. */
522 *var_p
= build_simple_mem_ref (build_fold_addr_expr (*var_p
));
524 /* Assign a canonical SSA name to the address of the base decl used
525 in the address and share it for all accesses and addresses based
527 uid
= DECL_UID (TREE_OPERAND (TREE_OPERAND (*var_p
, 0), 0));
530 int_tree_map
*slot
= decl_address
->find_slot (elt
, INSERT
);
535 addr
= TREE_OPERAND (*var_p
, 0);
537 = get_name (TREE_OPERAND (TREE_OPERAND (*var_p
, 0), 0));
539 name
= make_temp_ssa_name (TREE_TYPE (addr
), NULL
, obj_name
);
541 name
= make_ssa_name (TREE_TYPE (addr
));
542 stmt
= gimple_build_assign (name
, addr
);
543 gsi_insert_on_edge_immediate (entry
, stmt
);
551 /* Express the address in terms of the canonical SSA name. */
552 TREE_OPERAND (*var_p
, 0) = name
;
554 return build_fold_addr_expr_with_type (obj
, type
);
556 name
= force_gimple_operand (build_addr (obj
, current_function_decl
),
557 &stmts
, true, NULL_TREE
);
558 if (!gimple_seq_empty_p (stmts
))
559 gsi_insert_seq_before (gsi
, stmts
, GSI_SAME_STMT
);
561 if (!useless_type_conversion_p (type
, TREE_TYPE (name
)))
563 name
= force_gimple_operand (fold_convert (type
, name
), &stmts
, true,
565 if (!gimple_seq_empty_p (stmts
))
566 gsi_insert_seq_before (gsi
, stmts
, GSI_SAME_STMT
);
572 /* Callback for htab_traverse. Create the initialization statement
573 for reduction described in SLOT, and place it at the preheader of
574 the loop described in DATA. */
577 initialize_reductions (reduction_info
**slot
, struct loop
*loop
)
580 tree bvar
, type
, arg
;
583 struct reduction_info
*const reduc
= *slot
;
585 /* Create initialization in preheader:
586 reduction_variable = initialization value of reduction. */
588 /* In the phi node at the header, replace the argument coming
589 from the preheader with the reduction initialization value. */
591 /* Create a new variable to initialize the reduction. */
592 type
= TREE_TYPE (PHI_RESULT (reduc
->reduc_phi
));
593 bvar
= create_tmp_var (type
, "reduction");
595 c
= build_omp_clause (gimple_location (reduc
->reduc_stmt
),
596 OMP_CLAUSE_REDUCTION
);
597 OMP_CLAUSE_REDUCTION_CODE (c
) = reduc
->reduction_code
;
598 OMP_CLAUSE_DECL (c
) = SSA_NAME_VAR (gimple_assign_lhs (reduc
->reduc_stmt
));
600 init
= omp_reduction_init (c
, TREE_TYPE (bvar
));
603 /* Replace the argument representing the initialization value
604 with the initialization value for the reduction (neutral
605 element for the particular operation, e.g. 0 for PLUS_EXPR,
606 1 for MULT_EXPR, etc).
607 Keep the old value in a new variable "reduction_initial",
608 that will be taken in consideration after the parallel
609 computing is done. */
611 e
= loop_preheader_edge (loop
);
612 arg
= PHI_ARG_DEF_FROM_EDGE (reduc
->reduc_phi
, e
);
613 /* Create new variable to hold the initial value. */
615 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE
616 (reduc
->reduc_phi
, loop_preheader_edge (loop
)), init
);
617 reduc
->initial_value
= arg
;
623 struct walk_stmt_info info
;
625 int_tree_htab_type
*decl_address
;
626 gimple_stmt_iterator
*gsi
;
631 /* Eliminates references to local variables in *TP out of the single
632 entry single exit region starting at DTA->ENTRY.
633 DECL_ADDRESS contains addresses of the references that had their
634 address taken already. If the expression is changed, CHANGED is
635 set to true. Callback for walk_tree. */
638 eliminate_local_variables_1 (tree
*tp
, int *walk_subtrees
, void *data
)
640 struct elv_data
*const dta
= (struct elv_data
*) data
;
641 tree t
= *tp
, var
, addr
, addr_type
, type
, obj
;
647 if (!SSA_VAR_P (t
) || DECL_EXTERNAL (t
))
650 type
= TREE_TYPE (t
);
651 addr_type
= build_pointer_type (type
);
652 addr
= take_address_of (t
, addr_type
, dta
->entry
, dta
->decl_address
,
654 if (dta
->gsi
== NULL
&& addr
== NULL_TREE
)
660 *tp
= build_simple_mem_ref (addr
);
666 if (TREE_CODE (t
) == ADDR_EXPR
)
668 /* ADDR_EXPR may appear in two contexts:
669 -- as a gimple operand, when the address taken is a function invariant
670 -- as gimple rhs, when the resulting address in not a function
672 We do not need to do anything special in the latter case (the base of
673 the memory reference whose address is taken may be replaced in the
674 DECL_P case). The former case is more complicated, as we need to
675 ensure that the new address is still a gimple operand. Thus, it
676 is not sufficient to replace just the base of the memory reference --
677 we need to move the whole computation of the address out of the
679 if (!is_gimple_val (t
))
683 obj
= TREE_OPERAND (t
, 0);
684 var
= get_base_address (obj
);
685 if (!var
|| !SSA_VAR_P (var
) || DECL_EXTERNAL (var
))
688 addr_type
= TREE_TYPE (t
);
689 addr
= take_address_of (obj
, addr_type
, dta
->entry
, dta
->decl_address
,
691 if (dta
->gsi
== NULL
&& addr
== NULL_TREE
)
708 /* Moves the references to local variables in STMT at *GSI out of the single
709 entry single exit region starting at ENTRY. DECL_ADDRESS contains
710 addresses of the references that had their address taken
714 eliminate_local_variables_stmt (edge entry
, gimple_stmt_iterator
*gsi
,
715 int_tree_htab_type
*decl_address
)
718 gimple stmt
= gsi_stmt (*gsi
);
720 memset (&dta
.info
, '\0', sizeof (dta
.info
));
722 dta
.decl_address
= decl_address
;
726 if (gimple_debug_bind_p (stmt
))
729 walk_tree (gimple_debug_bind_get_value_ptr (stmt
),
730 eliminate_local_variables_1
, &dta
.info
, NULL
);
733 gimple_debug_bind_reset_value (stmt
);
737 else if (gimple_clobber_p (stmt
))
739 stmt
= gimple_build_nop ();
740 gsi_replace (gsi
, stmt
, false);
746 walk_gimple_op (stmt
, eliminate_local_variables_1
, &dta
.info
);
753 /* Eliminates the references to local variables from the single entry
754 single exit region between the ENTRY and EXIT edges.
757 1) Taking address of a local variable -- these are moved out of the
758 region (and temporary variable is created to hold the address if
761 2) Dereferencing a local variable -- these are replaced with indirect
765 eliminate_local_variables (edge entry
, edge exit
)
768 auto_vec
<basic_block
, 3> body
;
770 gimple_stmt_iterator gsi
;
771 bool has_debug_stmt
= false;
772 int_tree_htab_type
decl_address (10);
773 basic_block entry_bb
= entry
->src
;
774 basic_block exit_bb
= exit
->dest
;
776 gather_blocks_in_sese_region (entry_bb
, exit_bb
, &body
);
778 FOR_EACH_VEC_ELT (body
, i
, bb
)
779 if (bb
!= entry_bb
&& bb
!= exit_bb
)
780 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
781 if (is_gimple_debug (gsi_stmt (gsi
)))
783 if (gimple_debug_bind_p (gsi_stmt (gsi
)))
784 has_debug_stmt
= true;
787 eliminate_local_variables_stmt (entry
, &gsi
, &decl_address
);
790 FOR_EACH_VEC_ELT (body
, i
, bb
)
791 if (bb
!= entry_bb
&& bb
!= exit_bb
)
792 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
793 if (gimple_debug_bind_p (gsi_stmt (gsi
)))
794 eliminate_local_variables_stmt (entry
, &gsi
, &decl_address
);
797 /* Returns true if expression EXPR is not defined between ENTRY and
798 EXIT, i.e. if all its operands are defined outside of the region. */
801 expr_invariant_in_region_p (edge entry
, edge exit
, tree expr
)
803 basic_block entry_bb
= entry
->src
;
804 basic_block exit_bb
= exit
->dest
;
807 if (is_gimple_min_invariant (expr
))
810 if (TREE_CODE (expr
) == SSA_NAME
)
812 def_bb
= gimple_bb (SSA_NAME_DEF_STMT (expr
));
814 && dominated_by_p (CDI_DOMINATORS
, def_bb
, entry_bb
)
815 && !dominated_by_p (CDI_DOMINATORS
, def_bb
, exit_bb
))
824 /* If COPY_NAME_P is true, creates and returns a duplicate of NAME.
825 The copies are stored to NAME_COPIES, if NAME was already duplicated,
826 its duplicate stored in NAME_COPIES is returned.
828 Regardless of COPY_NAME_P, the decl used as a base of the ssa name is also
829 duplicated, storing the copies in DECL_COPIES. */
832 separate_decls_in_region_name (tree name
, name_to_copy_table_type
*name_copies
,
833 int_tree_htab_type
*decl_copies
,
836 tree copy
, var
, var_copy
;
837 unsigned idx
, uid
, nuid
;
838 struct int_tree_map ielt
;
839 struct name_to_copy_elt elt
, *nelt
;
840 name_to_copy_elt
**slot
;
843 if (TREE_CODE (name
) != SSA_NAME
)
846 idx
= SSA_NAME_VERSION (name
);
848 slot
= name_copies
->find_slot_with_hash (&elt
, idx
,
849 copy_name_p
? INSERT
: NO_INSERT
);
851 return (*slot
)->new_name
;
855 copy
= duplicate_ssa_name (name
, NULL
);
856 nelt
= XNEW (struct name_to_copy_elt
);
858 nelt
->new_name
= copy
;
859 nelt
->field
= NULL_TREE
;
868 var
= SSA_NAME_VAR (name
);
872 uid
= DECL_UID (var
);
874 dslot
= decl_copies
->find_slot_with_hash (ielt
, uid
, INSERT
);
877 var_copy
= create_tmp_var (TREE_TYPE (var
), get_name (var
));
878 DECL_GIMPLE_REG_P (var_copy
) = DECL_GIMPLE_REG_P (var
);
880 dslot
->to
= var_copy
;
882 /* Ensure that when we meet this decl next time, we won't duplicate
884 nuid
= DECL_UID (var_copy
);
886 dslot
= decl_copies
->find_slot_with_hash (ielt
, nuid
, INSERT
);
887 gcc_assert (!dslot
->to
);
889 dslot
->to
= var_copy
;
892 var_copy
= dslot
->to
;
894 replace_ssa_name_symbol (copy
, var_copy
);
898 /* Finds the ssa names used in STMT that are defined outside the
899 region between ENTRY and EXIT and replaces such ssa names with
900 their duplicates. The duplicates are stored to NAME_COPIES. Base
901 decls of all ssa names used in STMT (including those defined in
902 LOOP) are replaced with the new temporary variables; the
903 replacement decls are stored in DECL_COPIES. */
906 separate_decls_in_region_stmt (edge entry
, edge exit
, gimple stmt
,
907 name_to_copy_table_type
*name_copies
,
908 int_tree_htab_type
*decl_copies
)
916 FOR_EACH_PHI_OR_STMT_DEF (def
, stmt
, oi
, SSA_OP_DEF
)
918 name
= DEF_FROM_PTR (def
);
919 gcc_assert (TREE_CODE (name
) == SSA_NAME
);
920 copy
= separate_decls_in_region_name (name
, name_copies
, decl_copies
,
922 gcc_assert (copy
== name
);
925 FOR_EACH_PHI_OR_STMT_USE (use
, stmt
, oi
, SSA_OP_USE
)
927 name
= USE_FROM_PTR (use
);
928 if (TREE_CODE (name
) != SSA_NAME
)
931 copy_name_p
= expr_invariant_in_region_p (entry
, exit
, name
);
932 copy
= separate_decls_in_region_name (name
, name_copies
, decl_copies
,
938 /* Finds the ssa names used in STMT that are defined outside the
939 region between ENTRY and EXIT and replaces such ssa names with
940 their duplicates. The duplicates are stored to NAME_COPIES. Base
941 decls of all ssa names used in STMT (including those defined in
942 LOOP) are replaced with the new temporary variables; the
943 replacement decls are stored in DECL_COPIES. */
946 separate_decls_in_region_debug (gimple stmt
,
947 name_to_copy_table_type
*name_copies
,
948 int_tree_htab_type
*decl_copies
)
953 struct int_tree_map ielt
;
954 struct name_to_copy_elt elt
;
955 name_to_copy_elt
**slot
;
958 if (gimple_debug_bind_p (stmt
))
959 var
= gimple_debug_bind_get_var (stmt
);
960 else if (gimple_debug_source_bind_p (stmt
))
961 var
= gimple_debug_source_bind_get_var (stmt
);
964 if (TREE_CODE (var
) == DEBUG_EXPR_DECL
|| TREE_CODE (var
) == LABEL_DECL
)
966 gcc_assert (DECL_P (var
) && SSA_VAR_P (var
));
967 ielt
.uid
= DECL_UID (var
);
968 dslot
= decl_copies
->find_slot_with_hash (ielt
, ielt
.uid
, NO_INSERT
);
971 if (gimple_debug_bind_p (stmt
))
972 gimple_debug_bind_set_var (stmt
, dslot
->to
);
973 else if (gimple_debug_source_bind_p (stmt
))
974 gimple_debug_source_bind_set_var (stmt
, dslot
->to
);
976 FOR_EACH_PHI_OR_STMT_USE (use
, stmt
, oi
, SSA_OP_USE
)
978 name
= USE_FROM_PTR (use
);
979 if (TREE_CODE (name
) != SSA_NAME
)
982 elt
.version
= SSA_NAME_VERSION (name
);
983 slot
= name_copies
->find_slot_with_hash (&elt
, elt
.version
, NO_INSERT
);
986 gimple_debug_bind_reset_value (stmt
);
991 SET_USE (use
, (*slot
)->new_name
);
997 /* Callback for htab_traverse. Adds a field corresponding to the reduction
998 specified in SLOT. The type is passed in DATA. */
1001 add_field_for_reduction (reduction_info
**slot
, tree type
)
1004 struct reduction_info
*const red
= *slot
;
1005 tree var
= gimple_assign_lhs (red
->reduc_stmt
);
1006 tree field
= build_decl (gimple_location (red
->reduc_stmt
), FIELD_DECL
,
1007 SSA_NAME_IDENTIFIER (var
), TREE_TYPE (var
));
1009 insert_field_into_struct (type
, field
);
1016 /* Callback for htab_traverse. Adds a field corresponding to a ssa name
1017 described in SLOT. The type is passed in DATA. */
1020 add_field_for_name (name_to_copy_elt
**slot
, tree type
)
1022 struct name_to_copy_elt
*const elt
= *slot
;
1023 tree name
= ssa_name (elt
->version
);
1024 tree field
= build_decl (UNKNOWN_LOCATION
,
1025 FIELD_DECL
, SSA_NAME_IDENTIFIER (name
),
1028 insert_field_into_struct (type
, field
);
1034 /* Callback for htab_traverse. A local result is the intermediate result
1035 computed by a single
1036 thread, or the initial value in case no iteration was executed.
1037 This function creates a phi node reflecting these values.
1038 The phi's result will be stored in NEW_PHI field of the
1039 reduction's data structure. */
1042 create_phi_for_local_result (reduction_info
**slot
, struct loop
*loop
)
1044 struct reduction_info
*const reduc
= *slot
;
1047 basic_block store_bb
;
1049 source_location locus
;
1051 /* STORE_BB is the block where the phi
1052 should be stored. It is the destination of the loop exit.
1053 (Find the fallthru edge from GIMPLE_OMP_CONTINUE). */
1054 store_bb
= FALLTHRU_EDGE (loop
->latch
)->dest
;
1056 /* STORE_BB has two predecessors. One coming from the loop
1057 (the reduction's result is computed at the loop),
1058 and another coming from a block preceding the loop,
1060 are executed (the initial value should be taken). */
1061 if (EDGE_PRED (store_bb
, 0) == FALLTHRU_EDGE (loop
->latch
))
1062 e
= EDGE_PRED (store_bb
, 1);
1064 e
= EDGE_PRED (store_bb
, 0);
1065 local_res
= copy_ssa_name (gimple_assign_lhs (reduc
->reduc_stmt
));
1066 locus
= gimple_location (reduc
->reduc_stmt
);
1067 new_phi
= create_phi_node (local_res
, store_bb
);
1068 add_phi_arg (new_phi
, reduc
->init
, e
, locus
);
1069 add_phi_arg (new_phi
, gimple_assign_lhs (reduc
->reduc_stmt
),
1070 FALLTHRU_EDGE (loop
->latch
), locus
);
1071 reduc
->new_phi
= new_phi
;
1081 basic_block store_bb
;
1082 basic_block load_bb
;
1085 /* Callback for htab_traverse. Create an atomic instruction for the
1086 reduction described in SLOT.
1087 DATA annotates the place in memory the atomic operation relates to,
1088 and the basic block it needs to be generated in. */
1091 create_call_for_reduction_1 (reduction_info
**slot
, struct clsn_data
*clsn_data
)
1093 struct reduction_info
*const reduc
= *slot
;
1094 gimple_stmt_iterator gsi
;
1095 tree type
= TREE_TYPE (PHI_RESULT (reduc
->reduc_phi
));
1100 tree t
, addr
, ref
, x
;
1101 tree tmp_load
, name
;
1104 load_struct
= build_simple_mem_ref (clsn_data
->load
);
1105 t
= build3 (COMPONENT_REF
, type
, load_struct
, reduc
->field
, NULL_TREE
);
1107 addr
= build_addr (t
, current_function_decl
);
1109 /* Create phi node. */
1110 bb
= clsn_data
->load_bb
;
1112 gsi
= gsi_last_bb (bb
);
1113 e
= split_block (bb
, gsi_stmt (gsi
));
1116 tmp_load
= create_tmp_var (TREE_TYPE (TREE_TYPE (addr
)));
1117 tmp_load
= make_ssa_name (tmp_load
);
1118 load
= gimple_build_omp_atomic_load (tmp_load
, addr
);
1119 SSA_NAME_DEF_STMT (tmp_load
) = load
;
1120 gsi
= gsi_start_bb (new_bb
);
1121 gsi_insert_after (&gsi
, load
, GSI_NEW_STMT
);
1123 e
= split_block (new_bb
, load
);
1125 gsi
= gsi_start_bb (new_bb
);
1127 x
= fold_build2 (reduc
->reduction_code
,
1128 TREE_TYPE (PHI_RESULT (reduc
->new_phi
)), ref
,
1129 PHI_RESULT (reduc
->new_phi
));
1131 name
= force_gimple_operand_gsi (&gsi
, x
, true, NULL_TREE
, true,
1132 GSI_CONTINUE_LINKING
);
1134 gsi_insert_after (&gsi
, gimple_build_omp_atomic_store (name
), GSI_NEW_STMT
);
1138 /* Create the atomic operation at the join point of the threads.
1139 REDUCTION_LIST describes the reductions in the LOOP.
1140 LD_ST_DATA describes the shared data structure where
1141 shared data is stored in and loaded from. */
1143 create_call_for_reduction (struct loop
*loop
,
1144 reduction_info_table_type
*reduction_list
,
1145 struct clsn_data
*ld_st_data
)
1147 reduction_list
->traverse
<struct loop
*, create_phi_for_local_result
> (loop
);
1148 /* Find the fallthru edge from GIMPLE_OMP_CONTINUE. */
1149 ld_st_data
->load_bb
= FALLTHRU_EDGE (loop
->latch
)->dest
;
1151 ->traverse
<struct clsn_data
*, create_call_for_reduction_1
> (ld_st_data
);
1154 /* Callback for htab_traverse. Loads the final reduction value at the
1155 join point of all threads, and inserts it in the right place. */
1158 create_loads_for_reductions (reduction_info
**slot
, struct clsn_data
*clsn_data
)
1160 struct reduction_info
*const red
= *slot
;
1162 gimple_stmt_iterator gsi
;
1163 tree type
= TREE_TYPE (gimple_assign_lhs (red
->reduc_stmt
));
1168 gsi
= gsi_after_labels (clsn_data
->load_bb
);
1169 load_struct
= build_simple_mem_ref (clsn_data
->load
);
1170 load_struct
= build3 (COMPONENT_REF
, type
, load_struct
, red
->field
,
1174 name
= PHI_RESULT (red
->keep_res
);
1175 stmt
= gimple_build_assign (name
, x
);
1177 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1179 for (gsi
= gsi_start_phis (gimple_bb (red
->keep_res
));
1180 !gsi_end_p (gsi
); gsi_next (&gsi
))
1181 if (gsi_stmt (gsi
) == red
->keep_res
)
1183 remove_phi_node (&gsi
, false);
1189 /* Load the reduction result that was stored in LD_ST_DATA.
1190 REDUCTION_LIST describes the list of reductions that the
1191 loads should be generated for. */
1193 create_final_loads_for_reduction (reduction_info_table_type
*reduction_list
,
1194 struct clsn_data
*ld_st_data
)
1196 gimple_stmt_iterator gsi
;
1200 gsi
= gsi_after_labels (ld_st_data
->load_bb
);
1201 t
= build_fold_addr_expr (ld_st_data
->store
);
1202 stmt
= gimple_build_assign (ld_st_data
->load
, t
);
1204 gsi_insert_before (&gsi
, stmt
, GSI_NEW_STMT
);
1207 ->traverse
<struct clsn_data
*, create_loads_for_reductions
> (ld_st_data
);
1211 /* Callback for htab_traverse. Store the neutral value for the
1212 particular reduction's operation, e.g. 0 for PLUS_EXPR,
1213 1 for MULT_EXPR, etc. into the reduction field.
1214 The reduction is specified in SLOT. The store information is
1218 create_stores_for_reduction (reduction_info
**slot
, struct clsn_data
*clsn_data
)
1220 struct reduction_info
*const red
= *slot
;
1223 gimple_stmt_iterator gsi
;
1224 tree type
= TREE_TYPE (gimple_assign_lhs (red
->reduc_stmt
));
1226 gsi
= gsi_last_bb (clsn_data
->store_bb
);
1227 t
= build3 (COMPONENT_REF
, type
, clsn_data
->store
, red
->field
, NULL_TREE
);
1228 stmt
= gimple_build_assign (t
, red
->initial_value
);
1229 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1234 /* Callback for htab_traverse. Creates loads to a field of LOAD in LOAD_BB and
1235 store to a field of STORE in STORE_BB for the ssa name and its duplicate
1236 specified in SLOT. */
1239 create_loads_and_stores_for_name (name_to_copy_elt
**slot
,
1240 struct clsn_data
*clsn_data
)
1242 struct name_to_copy_elt
*const elt
= *slot
;
1245 gimple_stmt_iterator gsi
;
1246 tree type
= TREE_TYPE (elt
->new_name
);
1249 gsi
= gsi_last_bb (clsn_data
->store_bb
);
1250 t
= build3 (COMPONENT_REF
, type
, clsn_data
->store
, elt
->field
, NULL_TREE
);
1251 stmt
= gimple_build_assign (t
, ssa_name (elt
->version
));
1252 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1254 gsi
= gsi_last_bb (clsn_data
->load_bb
);
1255 load_struct
= build_simple_mem_ref (clsn_data
->load
);
1256 t
= build3 (COMPONENT_REF
, type
, load_struct
, elt
->field
, NULL_TREE
);
1257 stmt
= gimple_build_assign (elt
->new_name
, t
);
1258 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1263 /* Moves all the variables used in LOOP and defined outside of it (including
1264 the initial values of loop phi nodes, and *PER_THREAD if it is a ssa
1265 name) to a structure created for this purpose. The code
1273 is transformed this way:
1288 `old' is stored to *ARG_STRUCT and `new' is stored to NEW_ARG_STRUCT. The
1289 pointer `new' is intentionally not initialized (the loop will be split to a
1290 separate function later, and `new' will be initialized from its arguments).
1291 LD_ST_DATA holds information about the shared data structure used to pass
1292 information among the threads. It is initialized here, and
1293 gen_parallel_loop will pass it to create_call_for_reduction that
1294 needs this information. REDUCTION_LIST describes the reductions
1298 separate_decls_in_region (edge entry
, edge exit
,
1299 reduction_info_table_type
*reduction_list
,
1300 tree
*arg_struct
, tree
*new_arg_struct
,
1301 struct clsn_data
*ld_st_data
)
1304 basic_block bb1
= split_edge (entry
);
1305 basic_block bb0
= single_pred (bb1
);
1306 name_to_copy_table_type
name_copies (10);
1307 int_tree_htab_type
decl_copies (10);
1309 tree type
, type_name
, nvar
;
1310 gimple_stmt_iterator gsi
;
1311 struct clsn_data clsn_data
;
1312 auto_vec
<basic_block
, 3> body
;
1314 basic_block entry_bb
= bb1
;
1315 basic_block exit_bb
= exit
->dest
;
1316 bool has_debug_stmt
= false;
1318 entry
= single_succ_edge (entry_bb
);
1319 gather_blocks_in_sese_region (entry_bb
, exit_bb
, &body
);
1321 FOR_EACH_VEC_ELT (body
, i
, bb
)
1323 if (bb
!= entry_bb
&& bb
!= exit_bb
)
1325 for (gsi
= gsi_start_phis (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1326 separate_decls_in_region_stmt (entry
, exit
, gsi_stmt (gsi
),
1327 &name_copies
, &decl_copies
);
1329 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1331 gimple stmt
= gsi_stmt (gsi
);
1333 if (is_gimple_debug (stmt
))
1334 has_debug_stmt
= true;
1336 separate_decls_in_region_stmt (entry
, exit
, stmt
,
1337 &name_copies
, &decl_copies
);
1342 /* Now process debug bind stmts. We must not create decls while
1343 processing debug stmts, so we defer their processing so as to
1344 make sure we will have debug info for as many variables as
1345 possible (all of those that were dealt with in the loop above),
1346 and discard those for which we know there's nothing we can
1349 FOR_EACH_VEC_ELT (body
, i
, bb
)
1350 if (bb
!= entry_bb
&& bb
!= exit_bb
)
1352 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
);)
1354 gimple stmt
= gsi_stmt (gsi
);
1356 if (is_gimple_debug (stmt
))
1358 if (separate_decls_in_region_debug (stmt
, &name_copies
,
1361 gsi_remove (&gsi
, true);
1370 if (name_copies
.elements () == 0 && reduction_list
->elements () == 0)
1372 /* It may happen that there is nothing to copy (if there are only
1373 loop carried and external variables in the loop). */
1375 *new_arg_struct
= NULL
;
1379 /* Create the type for the structure to store the ssa names to. */
1380 type
= lang_hooks
.types
.make_type (RECORD_TYPE
);
1381 type_name
= build_decl (UNKNOWN_LOCATION
,
1382 TYPE_DECL
, create_tmp_var_name (".paral_data"),
1384 TYPE_NAME (type
) = type_name
;
1386 name_copies
.traverse
<tree
, add_field_for_name
> (type
);
1387 if (reduction_list
&& reduction_list
->elements () > 0)
1389 /* Create the fields for reductions. */
1390 reduction_list
->traverse
<tree
, add_field_for_reduction
> (type
);
1394 /* Create the loads and stores. */
1395 *arg_struct
= create_tmp_var (type
, ".paral_data_store");
1396 nvar
= create_tmp_var (build_pointer_type (type
), ".paral_data_load");
1397 *new_arg_struct
= make_ssa_name (nvar
);
1399 ld_st_data
->store
= *arg_struct
;
1400 ld_st_data
->load
= *new_arg_struct
;
1401 ld_st_data
->store_bb
= bb0
;
1402 ld_st_data
->load_bb
= bb1
;
1405 .traverse
<struct clsn_data
*, create_loads_and_stores_for_name
>
1408 /* Load the calculation from memory (after the join of the threads). */
1410 if (reduction_list
&& reduction_list
->elements () > 0)
1413 ->traverse
<struct clsn_data
*, create_stores_for_reduction
>
1415 clsn_data
.load
= make_ssa_name (nvar
);
1416 clsn_data
.load_bb
= exit
->dest
;
1417 clsn_data
.store
= ld_st_data
->store
;
1418 create_final_loads_for_reduction (reduction_list
, &clsn_data
);
1423 /* Returns true if FN was created to run in parallel. */
1426 parallelized_function_p (tree fndecl
)
1428 cgraph_node
*node
= cgraph_node::get (fndecl
);
1429 gcc_assert (node
!= NULL
);
1430 return node
->parallelized_function
;
1433 /* Creates and returns an empty function that will receive the body of
1434 a parallelized loop. */
1437 create_loop_fn (location_t loc
)
1441 tree decl
, type
, name
, t
;
1442 struct function
*act_cfun
= cfun
;
1443 static unsigned loopfn_num
;
1445 loc
= LOCATION_LOCUS (loc
);
1446 snprintf (buf
, 100, "%s.$loopfn", current_function_name ());
1447 ASM_FORMAT_PRIVATE_NAME (tname
, buf
, loopfn_num
++);
1448 clean_symbol_name (tname
);
1449 name
= get_identifier (tname
);
1450 type
= build_function_type_list (void_type_node
, ptr_type_node
, NULL_TREE
);
1452 decl
= build_decl (loc
, FUNCTION_DECL
, name
, type
);
1453 TREE_STATIC (decl
) = 1;
1454 TREE_USED (decl
) = 1;
1455 DECL_ARTIFICIAL (decl
) = 1;
1456 DECL_IGNORED_P (decl
) = 0;
1457 TREE_PUBLIC (decl
) = 0;
1458 DECL_UNINLINABLE (decl
) = 1;
1459 DECL_EXTERNAL (decl
) = 0;
1460 DECL_CONTEXT (decl
) = NULL_TREE
;
1461 DECL_INITIAL (decl
) = make_node (BLOCK
);
1463 t
= build_decl (loc
, RESULT_DECL
, NULL_TREE
, void_type_node
);
1464 DECL_ARTIFICIAL (t
) = 1;
1465 DECL_IGNORED_P (t
) = 1;
1466 DECL_RESULT (decl
) = t
;
1468 t
= build_decl (loc
, PARM_DECL
, get_identifier (".paral_data_param"),
1470 DECL_ARTIFICIAL (t
) = 1;
1471 DECL_ARG_TYPE (t
) = ptr_type_node
;
1472 DECL_CONTEXT (t
) = decl
;
1474 DECL_ARGUMENTS (decl
) = t
;
1476 allocate_struct_function (decl
, false);
1478 /* The call to allocate_struct_function clobbers CFUN, so we need to restore
1480 set_cfun (act_cfun
);
1485 /* Replace uses of NAME by VAL in block BB. */
1488 replace_uses_in_bb_by (tree name
, tree val
, basic_block bb
)
1491 imm_use_iterator imm_iter
;
1493 FOR_EACH_IMM_USE_STMT (use_stmt
, imm_iter
, name
)
1495 if (gimple_bb (use_stmt
) != bb
)
1498 use_operand_p use_p
;
1499 FOR_EACH_IMM_USE_ON_STMT (use_p
, imm_iter
)
1500 SET_USE (use_p
, val
);
1504 /* Replace uses of NAME by VAL in blocks BBS. */
1507 replace_uses_in_bbs_by (tree name
, tree val
, bitmap bbs
)
1510 imm_use_iterator imm_iter
;
1512 FOR_EACH_IMM_USE_STMT (use_stmt
, imm_iter
, name
)
1514 if (!bitmap_bit_p (bbs
, gimple_bb (use_stmt
)->index
))
1517 use_operand_p use_p
;
1518 FOR_EACH_IMM_USE_ON_STMT (use_p
, imm_iter
)
1519 SET_USE (use_p
, val
);
1523 /* Do transformation from:
1530 ivtmp_a = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
1531 sum_a = PHI <sum_init (preheader), sum_b (latch)>
1535 sum_b = sum_a + sum_update
1543 ivtmp_b = ivtmp_a + 1;
1547 sum_z = PHI <sum_b (cond[1])>
1549 [1] Where <bb cond> is single_pred (bb latch); In the simplest case,
1559 ivtmp_a = PHI <ivtmp_c (latch)>
1560 sum_a = PHI <sum_c (latch)>
1564 sum_b = sum_a + sum_update
1569 ivtmp_c = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
1570 sum_c = PHI <sum_init (preheader), sum_b (latch)>
1571 if (ivtmp_c < n + 1)
1577 ivtmp_b = ivtmp_a + 1;
1581 sum_z = PHI <sum_c (newheader)>
1584 In unified diff format:
1589 + goto <bb newheader>
1592 - ivtmp_a = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
1593 - sum_a = PHI <sum_init (preheader), sum_b (latch)>
1594 + ivtmp_a = PHI <ivtmp_c (latch)>
1595 + sum_a = PHI <sum_c (latch)>
1599 sum_b = sum_a + sum_update
1606 + ivtmp_c = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
1607 + sum_c = PHI <sum_init (preheader), sum_b (latch)>
1608 + if (ivtmp_c < n + 1)
1614 ivtmp_b = ivtmp_a + 1;
1616 + goto <bb newheader>
1619 - sum_z = PHI <sum_b (cond[1])>
1620 + sum_z = PHI <sum_c (newheader)>
1622 Note: the example does not show any virtual phis, but these are handled more
1623 or less as reductions.
1626 Moves the exit condition of LOOP to the beginning of its header.
1627 REDUCTION_LIST describes the reductions in LOOP. BOUND is the new loop
1631 transform_to_exit_first_loop_alt (struct loop
*loop
,
1632 reduction_info_table_type
*reduction_list
,
1635 basic_block header
= loop
->header
;
1636 basic_block latch
= loop
->latch
;
1637 edge exit
= single_dom_exit (loop
);
1638 basic_block exit_block
= exit
->dest
;
1639 gcond
*cond_stmt
= as_a
<gcond
*> (last_stmt (exit
->src
));
1640 tree control
= gimple_cond_lhs (cond_stmt
);
1643 /* Gather the bbs dominated by the exit block. */
1644 bitmap exit_dominated
= BITMAP_ALLOC (NULL
);
1645 bitmap_set_bit (exit_dominated
, exit_block
->index
);
1646 vec
<basic_block
> exit_dominated_vec
1647 = get_dominated_by (CDI_DOMINATORS
, exit_block
);
1651 FOR_EACH_VEC_ELT (exit_dominated_vec
, i
, dom_bb
)
1652 bitmap_set_bit (exit_dominated
, dom_bb
->index
);
1654 exit_dominated_vec
.release ();
1656 /* Create the new_header block. */
1657 basic_block new_header
= split_block_before_cond_jump (exit
->src
);
1658 edge split_edge
= single_pred_edge (new_header
);
1660 /* Redirect entry edge to new_header. */
1661 edge entry
= loop_preheader_edge (loop
);
1662 e
= redirect_edge_and_branch (entry
, new_header
);
1663 gcc_assert (e
== entry
);
1665 /* Redirect post_inc_edge to new_header. */
1666 edge post_inc_edge
= single_succ_edge (latch
);
1667 e
= redirect_edge_and_branch (post_inc_edge
, new_header
);
1668 gcc_assert (e
== post_inc_edge
);
1670 /* Redirect post_cond_edge to header. */
1671 edge post_cond_edge
= single_pred_edge (latch
);
1672 e
= redirect_edge_and_branch (post_cond_edge
, header
);
1673 gcc_assert (e
== post_cond_edge
);
1675 /* Redirect split_edge to latch. */
1676 e
= redirect_edge_and_branch (split_edge
, latch
);
1677 gcc_assert (e
== split_edge
);
1679 /* Set the new loop bound. */
1680 gimple_cond_set_rhs (cond_stmt
, bound
);
1682 /* Repair the ssa. */
1683 vec
<edge_var_map
> *v
= redirect_edge_var_map_vector (post_inc_edge
);
1686 for (gsi
= gsi_start_phis (header
), i
= 0;
1687 !gsi_end_p (gsi
) && v
->iterate (i
, &vm
);
1688 gsi_next (&gsi
), i
++)
1690 gphi
*phi
= gsi
.phi ();
1691 tree res_a
= PHI_RESULT (phi
);
1693 /* Create new phi. */
1694 tree res_c
= copy_ssa_name (res_a
, phi
);
1695 gphi
*nphi
= create_phi_node (res_c
, new_header
);
1697 /* Replace ivtmp_a with ivtmp_c in condition 'if (ivtmp_a < n)'. */
1698 replace_uses_in_bb_by (res_a
, res_c
, new_header
);
1700 /* Replace ivtmp/sum_b with ivtmp/sum_c in header phi. */
1701 add_phi_arg (phi
, res_c
, post_cond_edge
, UNKNOWN_LOCATION
);
1703 /* Replace sum_b with sum_c in exit phi. Loop-closed ssa does not hold
1704 for virtuals, so we cannot get away with exit_block only. */
1705 tree res_b
= redirect_edge_var_map_def (vm
);
1706 replace_uses_in_bbs_by (res_b
, res_c
, exit_dominated
);
1708 struct reduction_info
*red
= reduction_phi (reduction_list
, phi
);
1709 gcc_assert (virtual_operand_p (res_a
)
1715 /* Register the new reduction phi. */
1716 red
->reduc_phi
= nphi
;
1717 gimple_set_uid (red
->reduc_phi
, red
->reduc_version
);
1720 gcc_assert (gsi_end_p (gsi
) && !v
->iterate (i
, &vm
));
1721 BITMAP_FREE (exit_dominated
);
1723 /* Set the preheader argument of the new phis to ivtmp/sum_init. */
1724 flush_pending_stmts (entry
);
1726 /* Set the latch arguments of the new phis to ivtmp/sum_b. */
1727 flush_pending_stmts (post_inc_edge
);
1729 /* Register the reduction exit phis. */
1730 for (gphi_iterator gsi
= gsi_start_phis (exit_block
);
1734 gphi
*phi
= gsi
.phi ();
1735 tree res_z
= PHI_RESULT (phi
);
1736 if (virtual_operand_p (res_z
))
1739 tree res_c
= PHI_ARG_DEF_FROM_EDGE (phi
, exit
);
1740 gimple reduc_phi
= SSA_NAME_DEF_STMT (res_c
);
1741 struct reduction_info
*red
= reduction_phi (reduction_list
, reduc_phi
);
1743 red
->keep_res
= phi
;
1746 /* We're going to cancel the loop at the end of gen_parallel_loop, but until
1747 then we're still using some fields, so only bother about fields that are
1748 still used: header and latch.
1749 The loop has a new header bb, so we update it. The latch bb stays the
1751 loop
->header
= new_header
;
1753 /* Recalculate dominance info. */
1754 free_dominance_info (CDI_DOMINATORS
);
1755 calculate_dominance_info (CDI_DOMINATORS
);
1758 /* Tries to moves the exit condition of LOOP to the beginning of its header
1759 without duplication of the loop body. NIT is the number of iterations of the
1760 loop. REDUCTION_LIST describes the reductions in LOOP. Return true if
1761 transformation is successful. */
1764 try_transform_to_exit_first_loop_alt (struct loop
*loop
,
1765 reduction_info_table_type
*reduction_list
,
1768 /* Check whether the latch contains a single statement. */
1769 if (!gimple_seq_nondebug_singleton_p (bb_seq (loop
->latch
)))
1772 /* Check whether the latch contains the loop iv increment. */
1773 edge back
= single_succ_edge (loop
->latch
);
1774 edge exit
= single_dom_exit (loop
);
1775 gcond
*cond_stmt
= as_a
<gcond
*> (last_stmt (exit
->src
));
1776 tree control
= gimple_cond_lhs (cond_stmt
);
1777 gphi
*phi
= as_a
<gphi
*> (SSA_NAME_DEF_STMT (control
));
1778 tree inc_res
= gimple_phi_arg_def (phi
, back
->dest_idx
);
1779 if (gimple_bb (SSA_NAME_DEF_STMT (inc_res
)) != loop
->latch
)
1782 /* Check whether there's no code between the loop condition and the latch. */
1783 if (!single_pred_p (loop
->latch
)
1784 || single_pred (loop
->latch
) != exit
->src
)
1787 tree alt_bound
= NULL_TREE
;
1788 tree nit_type
= TREE_TYPE (nit
);
1790 /* Figure out whether nit + 1 overflows. */
1791 if (TREE_CODE (nit
) == INTEGER_CST
)
1793 if (!tree_int_cst_equal (nit
, TYPE_MAXVAL (nit_type
)))
1795 alt_bound
= fold_build2_loc (UNKNOWN_LOCATION
, PLUS_EXPR
, nit_type
,
1796 nit
, build_one_cst (nit_type
));
1798 gcc_assert (TREE_CODE (alt_bound
) == INTEGER_CST
);
1802 /* Todo: Figure out if we can trigger this, if it's worth to handle
1803 optimally, and if we can handle it optimally. */
1808 gcc_assert (TREE_CODE (nit
) == SSA_NAME
);
1810 gimple def
= SSA_NAME_DEF_STMT (nit
);
1813 && is_gimple_assign (def
)
1814 && gimple_assign_rhs_code (def
) == PLUS_EXPR
)
1816 tree op1
= gimple_assign_rhs1 (def
);
1817 tree op2
= gimple_assign_rhs2 (def
);
1818 if (integer_minus_onep (op1
))
1820 else if (integer_minus_onep (op2
))
1824 /* There is a number of test-cases for which we don't get an alt_bound
1825 here: they're listed here, with the lhs of the last stmt as the nit:
1827 libgomp.graphite/force-parallel-1.c:
1828 _21 = (signed long) N_6(D);
1830 _7 = (unsigned long) _19;
1832 libgomp.graphite/force-parallel-2.c:
1833 _33 = (signed long) N_9(D);
1835 _37 = (unsigned long) _16;
1837 libgomp.graphite/force-parallel-5.c:
1839 # graphite_IV.5_46 = PHI <0(5), graphite_IV.5_47(11)>
1841 _33 = (unsigned long) graphite_IV.5_46;
1843 g++.dg/tree-ssa/pr34355.C:
1844 _2 = (unsigned int) i_9;
1849 _18 = (unsigned int) _5;
1851 We will be able to handle some of these cases, if we can determine when
1852 it's safe to look past casts. */
1855 if (alt_bound
== NULL_TREE
)
1858 transform_to_exit_first_loop_alt (loop
, reduction_list
, alt_bound
);
1862 /* Moves the exit condition of LOOP to the beginning of its header. NIT is the
1863 number of iterations of the loop. REDUCTION_LIST describes the reductions in
1867 transform_to_exit_first_loop (struct loop
*loop
,
1868 reduction_info_table_type
*reduction_list
,
1871 basic_block
*bbs
, *nbbs
, ex_bb
, orig_header
;
1874 edge exit
= single_dom_exit (loop
), hpred
;
1875 tree control
, control_name
, res
, t
;
1878 gcond
*cond_stmt
, *cond_nit
;
1881 split_block_after_labels (loop
->header
);
1882 orig_header
= single_succ (loop
->header
);
1883 hpred
= single_succ_edge (loop
->header
);
1885 cond_stmt
= as_a
<gcond
*> (last_stmt (exit
->src
));
1886 control
= gimple_cond_lhs (cond_stmt
);
1887 gcc_assert (gimple_cond_rhs (cond_stmt
) == nit
);
1889 /* Make sure that we have phi nodes on exit for all loop header phis
1890 (create_parallel_loop requires that). */
1891 for (gphi_iterator gsi
= gsi_start_phis (loop
->header
);
1896 res
= PHI_RESULT (phi
);
1897 t
= copy_ssa_name (res
, phi
);
1898 SET_PHI_RESULT (phi
, t
);
1899 nphi
= create_phi_node (res
, orig_header
);
1900 add_phi_arg (nphi
, t
, hpred
, UNKNOWN_LOCATION
);
1904 gimple_cond_set_lhs (cond_stmt
, t
);
1905 update_stmt (cond_stmt
);
1910 bbs
= get_loop_body_in_dom_order (loop
);
1912 for (n
= 0; bbs
[n
] != exit
->src
; n
++)
1914 nbbs
= XNEWVEC (basic_block
, n
);
1915 ok
= gimple_duplicate_sese_tail (single_succ_edge (loop
->header
), exit
,
1922 /* Other than reductions, the only gimple reg that should be copied
1923 out of the loop is the control variable. */
1924 exit
= single_dom_exit (loop
);
1925 control_name
= NULL_TREE
;
1926 for (gphi_iterator gsi
= gsi_start_phis (ex_bb
);
1930 res
= PHI_RESULT (phi
);
1931 if (virtual_operand_p (res
))
1937 /* Check if it is a part of reduction. If it is,
1938 keep the phi at the reduction's keep_res field. The
1939 PHI_RESULT of this phi is the resulting value of the reduction
1940 variable when exiting the loop. */
1942 if (reduction_list
->elements () > 0)
1944 struct reduction_info
*red
;
1946 tree val
= PHI_ARG_DEF_FROM_EDGE (phi
, exit
);
1947 red
= reduction_phi (reduction_list
, SSA_NAME_DEF_STMT (val
));
1950 red
->keep_res
= phi
;
1955 gcc_assert (control_name
== NULL_TREE
1956 && SSA_NAME_VAR (res
) == SSA_NAME_VAR (control
));
1958 remove_phi_node (&gsi
, false);
1960 gcc_assert (control_name
!= NULL_TREE
);
1962 /* Initialize the control variable to number of iterations
1963 according to the rhs of the exit condition. */
1964 gimple_stmt_iterator gsi
= gsi_after_labels (ex_bb
);
1965 cond_nit
= as_a
<gcond
*> (last_stmt (exit
->src
));
1966 nit_1
= gimple_cond_rhs (cond_nit
);
1967 nit_1
= force_gimple_operand_gsi (&gsi
,
1968 fold_convert (TREE_TYPE (control_name
), nit_1
),
1969 false, NULL_TREE
, false, GSI_SAME_STMT
);
1970 stmt
= gimple_build_assign (control_name
, nit_1
);
1971 gsi_insert_before (&gsi
, stmt
, GSI_NEW_STMT
);
1974 /* Create the parallel constructs for LOOP as described in gen_parallel_loop.
1975 LOOP_FN and DATA are the arguments of GIMPLE_OMP_PARALLEL.
1976 NEW_DATA is the variable that should be initialized from the argument
1977 of LOOP_FN. N_THREADS is the requested number of threads. Returns the
1978 basic block containing GIMPLE_OMP_PARALLEL tree. */
1981 create_parallel_loop (struct loop
*loop
, tree loop_fn
, tree data
,
1982 tree new_data
, unsigned n_threads
, location_t loc
)
1984 gimple_stmt_iterator gsi
;
1985 basic_block bb
, paral_bb
, for_bb
, ex_bb
;
1987 gomp_parallel
*omp_par_stmt
;
1988 gimple omp_return_stmt1
, omp_return_stmt2
;
1992 gomp_continue
*omp_cont_stmt
;
1993 tree cvar
, cvar_init
, initvar
, cvar_next
, cvar_base
, type
;
1994 edge exit
, nexit
, guard
, end
, e
;
1996 /* Prepare the GIMPLE_OMP_PARALLEL statement. */
1997 bb
= loop_preheader_edge (loop
)->src
;
1998 paral_bb
= single_pred (bb
);
1999 gsi
= gsi_last_bb (paral_bb
);
2001 t
= build_omp_clause (loc
, OMP_CLAUSE_NUM_THREADS
);
2002 OMP_CLAUSE_NUM_THREADS_EXPR (t
)
2003 = build_int_cst (integer_type_node
, n_threads
);
2004 omp_par_stmt
= gimple_build_omp_parallel (NULL
, t
, loop_fn
, data
);
2005 gimple_set_location (omp_par_stmt
, loc
);
2007 gsi_insert_after (&gsi
, omp_par_stmt
, GSI_NEW_STMT
);
2009 /* Initialize NEW_DATA. */
2012 gassign
*assign_stmt
;
2014 gsi
= gsi_after_labels (bb
);
2016 param
= make_ssa_name (DECL_ARGUMENTS (loop_fn
));
2017 assign_stmt
= gimple_build_assign (param
, build_fold_addr_expr (data
));
2018 gsi_insert_before (&gsi
, assign_stmt
, GSI_SAME_STMT
);
2020 assign_stmt
= gimple_build_assign (new_data
,
2021 fold_convert (TREE_TYPE (new_data
), param
));
2022 gsi_insert_before (&gsi
, assign_stmt
, GSI_SAME_STMT
);
2025 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_PARALLEL. */
2026 bb
= split_loop_exit_edge (single_dom_exit (loop
));
2027 gsi
= gsi_last_bb (bb
);
2028 omp_return_stmt1
= gimple_build_omp_return (false);
2029 gimple_set_location (omp_return_stmt1
, loc
);
2030 gsi_insert_after (&gsi
, omp_return_stmt1
, GSI_NEW_STMT
);
2032 /* Extract data for GIMPLE_OMP_FOR. */
2033 gcc_assert (loop
->header
== single_dom_exit (loop
)->src
);
2034 cond_stmt
= as_a
<gcond
*> (last_stmt (loop
->header
));
2036 cvar
= gimple_cond_lhs (cond_stmt
);
2037 cvar_base
= SSA_NAME_VAR (cvar
);
2038 phi
= SSA_NAME_DEF_STMT (cvar
);
2039 cvar_init
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_preheader_edge (loop
));
2040 initvar
= copy_ssa_name (cvar
);
2041 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi
, loop_preheader_edge (loop
)),
2043 cvar_next
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (loop
));
2045 gsi
= gsi_last_nondebug_bb (loop
->latch
);
2046 gcc_assert (gsi_stmt (gsi
) == SSA_NAME_DEF_STMT (cvar_next
));
2047 gsi_remove (&gsi
, true);
2050 for_bb
= split_edge (loop_preheader_edge (loop
));
2051 ex_bb
= split_loop_exit_edge (single_dom_exit (loop
));
2052 extract_true_false_edges_from_block (loop
->header
, &nexit
, &exit
);
2053 gcc_assert (exit
== single_dom_exit (loop
));
2055 guard
= make_edge (for_bb
, ex_bb
, 0);
2056 single_succ_edge (loop
->latch
)->flags
= 0;
2057 end
= make_edge (loop
->latch
, ex_bb
, EDGE_FALLTHRU
);
2058 for (gphi_iterator gpi
= gsi_start_phis (ex_bb
);
2059 !gsi_end_p (gpi
); gsi_next (&gpi
))
2061 source_location locus
;
2063 gphi
*phi
= gpi
.phi ();
2066 stmt
= as_a
<gphi
*> (
2067 SSA_NAME_DEF_STMT (PHI_ARG_DEF_FROM_EDGE (phi
, exit
)));
2069 def
= PHI_ARG_DEF_FROM_EDGE (stmt
, loop_preheader_edge (loop
));
2070 locus
= gimple_phi_arg_location_from_edge (stmt
,
2071 loop_preheader_edge (loop
));
2072 add_phi_arg (phi
, def
, guard
, locus
);
2074 def
= PHI_ARG_DEF_FROM_EDGE (stmt
, loop_latch_edge (loop
));
2075 locus
= gimple_phi_arg_location_from_edge (stmt
, loop_latch_edge (loop
));
2076 add_phi_arg (phi
, def
, end
, locus
);
2078 e
= redirect_edge_and_branch (exit
, nexit
->dest
);
2079 PENDING_STMT (e
) = NULL
;
2081 /* Emit GIMPLE_OMP_FOR. */
2082 gimple_cond_set_lhs (cond_stmt
, cvar_base
);
2083 type
= TREE_TYPE (cvar
);
2084 t
= build_omp_clause (loc
, OMP_CLAUSE_SCHEDULE
);
2085 OMP_CLAUSE_SCHEDULE_KIND (t
) = OMP_CLAUSE_SCHEDULE_STATIC
;
2087 for_stmt
= gimple_build_omp_for (NULL
, GF_OMP_FOR_KIND_FOR
, t
, 1, NULL
);
2088 gimple_set_location (for_stmt
, loc
);
2089 gimple_omp_for_set_index (for_stmt
, 0, initvar
);
2090 gimple_omp_for_set_initial (for_stmt
, 0, cvar_init
);
2091 gimple_omp_for_set_final (for_stmt
, 0, gimple_cond_rhs (cond_stmt
));
2092 gimple_omp_for_set_cond (for_stmt
, 0, gimple_cond_code (cond_stmt
));
2093 gimple_omp_for_set_incr (for_stmt
, 0, build2 (PLUS_EXPR
, type
,
2095 build_int_cst (type
, 1)));
2097 gsi
= gsi_last_bb (for_bb
);
2098 gsi_insert_after (&gsi
, for_stmt
, GSI_NEW_STMT
);
2099 SSA_NAME_DEF_STMT (initvar
) = for_stmt
;
2101 /* Emit GIMPLE_OMP_CONTINUE. */
2102 gsi
= gsi_last_bb (loop
->latch
);
2103 omp_cont_stmt
= gimple_build_omp_continue (cvar_next
, cvar
);
2104 gimple_set_location (omp_cont_stmt
, loc
);
2105 gsi_insert_after (&gsi
, omp_cont_stmt
, GSI_NEW_STMT
);
2106 SSA_NAME_DEF_STMT (cvar_next
) = omp_cont_stmt
;
2108 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_FOR. */
2109 gsi
= gsi_last_bb (ex_bb
);
2110 omp_return_stmt2
= gimple_build_omp_return (true);
2111 gimple_set_location (omp_return_stmt2
, loc
);
2112 gsi_insert_after (&gsi
, omp_return_stmt2
, GSI_NEW_STMT
);
2114 /* After the above dom info is hosed. Re-compute it. */
2115 free_dominance_info (CDI_DOMINATORS
);
2116 calculate_dominance_info (CDI_DOMINATORS
);
2121 /* Generates code to execute the iterations of LOOP in N_THREADS
2122 threads in parallel.
2124 NITER describes number of iterations of LOOP.
2125 REDUCTION_LIST describes the reductions existent in the LOOP. */
2128 gen_parallel_loop (struct loop
*loop
,
2129 reduction_info_table_type
*reduction_list
,
2130 unsigned n_threads
, struct tree_niter_desc
*niter
)
2132 tree many_iterations_cond
, type
, nit
;
2133 tree arg_struct
, new_arg_struct
;
2136 struct clsn_data clsn_data
;
2140 unsigned int m_p_thread
=2;
2144 ---------------------------------------------------------------------
2147 IV = phi (INIT, IV + STEP)
2153 ---------------------------------------------------------------------
2155 with # of iterations NITER (possibly with MAY_BE_ZERO assumption),
2156 we generate the following code:
2158 ---------------------------------------------------------------------
2161 || NITER < MIN_PER_THREAD * N_THREADS)
2165 store all local loop-invariant variables used in body of the loop to DATA.
2166 GIMPLE_OMP_PARALLEL (OMP_CLAUSE_NUM_THREADS (N_THREADS), LOOPFN, DATA);
2167 load the variables from DATA.
2168 GIMPLE_OMP_FOR (IV = INIT; COND; IV += STEP) (OMP_CLAUSE_SCHEDULE (static))
2171 GIMPLE_OMP_CONTINUE;
2172 GIMPLE_OMP_RETURN -- GIMPLE_OMP_FOR
2173 GIMPLE_OMP_RETURN -- GIMPLE_OMP_PARALLEL
2179 IV = phi (INIT, IV + STEP)
2190 /* Create two versions of the loop -- in the old one, we know that the
2191 number of iterations is large enough, and we will transform it into the
2192 loop that will be split to loop_fn, the new one will be used for the
2193 remaining iterations. */
2195 /* We should compute a better number-of-iterations value for outer loops.
2198 for (i = 0; i < n; ++i)
2199 for (j = 0; j < m; ++j)
2202 we should compute nit = n * m, not nit = n.
2203 Also may_be_zero handling would need to be adjusted. */
2205 type
= TREE_TYPE (niter
->niter
);
2206 nit
= force_gimple_operand (unshare_expr (niter
->niter
), &stmts
, true,
2209 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
2214 m_p_thread
=MIN_PER_THREAD
;
2216 many_iterations_cond
=
2217 fold_build2 (GE_EXPR
, boolean_type_node
,
2218 nit
, build_int_cst (type
, m_p_thread
* n_threads
));
2220 many_iterations_cond
2221 = fold_build2 (TRUTH_AND_EXPR
, boolean_type_node
,
2222 invert_truthvalue (unshare_expr (niter
->may_be_zero
)),
2223 many_iterations_cond
);
2224 many_iterations_cond
2225 = force_gimple_operand (many_iterations_cond
, &stmts
, false, NULL_TREE
);
2227 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
2228 if (!is_gimple_condexpr (many_iterations_cond
))
2230 many_iterations_cond
2231 = force_gimple_operand (many_iterations_cond
, &stmts
,
2234 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
2237 initialize_original_copy_tables ();
2239 /* We assume that the loop usually iterates a lot. */
2240 prob
= 4 * REG_BR_PROB_BASE
/ 5;
2241 loop_version (loop
, many_iterations_cond
, NULL
,
2242 prob
, prob
, REG_BR_PROB_BASE
- prob
, true);
2243 update_ssa (TODO_update_ssa
);
2244 free_original_copy_tables ();
2246 /* Base all the induction variables in LOOP on a single control one. */
2247 canonicalize_loop_ivs (loop
, &nit
, true);
2249 /* Ensure that the exit condition is the first statement in the loop.
2250 The common case is that latch of the loop is empty (apart from the
2251 increment) and immediately follows the loop exit test. Attempt to move the
2252 entry of the loop directly before the exit check and increase the number of
2253 iterations of the loop by one. */
2254 if (!try_transform_to_exit_first_loop_alt (loop
, reduction_list
, nit
))
2256 /* Fall back on the method that handles more cases, but duplicates the
2257 loop body: move the exit condition of LOOP to the beginning of its
2258 header, and duplicate the part of the last iteration that gets disabled
2259 to the exit of the loop. */
2260 transform_to_exit_first_loop (loop
, reduction_list
, nit
);
2263 /* Generate initializations for reductions. */
2264 if (reduction_list
->elements () > 0)
2265 reduction_list
->traverse
<struct loop
*, initialize_reductions
> (loop
);
2267 /* Eliminate the references to local variables from the loop. */
2268 gcc_assert (single_exit (loop
));
2269 entry
= loop_preheader_edge (loop
);
2270 exit
= single_dom_exit (loop
);
2272 eliminate_local_variables (entry
, exit
);
2273 /* In the old loop, move all variables non-local to the loop to a structure
2274 and back, and create separate decls for the variables used in loop. */
2275 separate_decls_in_region (entry
, exit
, reduction_list
, &arg_struct
,
2276 &new_arg_struct
, &clsn_data
);
2278 /* Create the parallel constructs. */
2279 loc
= UNKNOWN_LOCATION
;
2280 cond_stmt
= last_stmt (loop
->header
);
2282 loc
= gimple_location (cond_stmt
);
2283 create_parallel_loop (loop
, create_loop_fn (loc
), arg_struct
,
2284 new_arg_struct
, n_threads
, loc
);
2285 if (reduction_list
->elements () > 0)
2286 create_call_for_reduction (loop
, reduction_list
, &clsn_data
);
2290 /* Cancel the loop (it is simpler to do it here rather than to teach the
2291 expander to do it). */
2292 cancel_loop_tree (loop
);
2294 /* Free loop bound estimations that could contain references to
2295 removed statements. */
2296 FOR_EACH_LOOP (loop
, 0)
2297 free_numbers_of_iterations_estimates_loop (loop
);
2300 /* Returns true when LOOP contains vector phi nodes. */
2303 loop_has_vector_phi_nodes (struct loop
*loop ATTRIBUTE_UNUSED
)
2306 basic_block
*bbs
= get_loop_body_in_dom_order (loop
);
2310 for (i
= 0; i
< loop
->num_nodes
; i
++)
2311 for (gsi
= gsi_start_phis (bbs
[i
]); !gsi_end_p (gsi
); gsi_next (&gsi
))
2312 if (TREE_CODE (TREE_TYPE (PHI_RESULT (gsi
.phi ()))) == VECTOR_TYPE
)
2321 /* Create a reduction_info struct, initialize it with REDUC_STMT
2322 and PHI, insert it to the REDUCTION_LIST. */
2325 build_new_reduction (reduction_info_table_type
*reduction_list
,
2326 gimple reduc_stmt
, gphi
*phi
)
2328 reduction_info
**slot
;
2329 struct reduction_info
*new_reduction
;
2331 gcc_assert (reduc_stmt
);
2333 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2336 "Detected reduction. reduction stmt is: \n");
2337 print_gimple_stmt (dump_file
, reduc_stmt
, 0, 0);
2338 fprintf (dump_file
, "\n");
2341 new_reduction
= XCNEW (struct reduction_info
);
2343 new_reduction
->reduc_stmt
= reduc_stmt
;
2344 new_reduction
->reduc_phi
= phi
;
2345 new_reduction
->reduc_version
= SSA_NAME_VERSION (gimple_phi_result (phi
));
2346 new_reduction
->reduction_code
= gimple_assign_rhs_code (reduc_stmt
);
2347 slot
= reduction_list
->find_slot (new_reduction
, INSERT
);
2348 *slot
= new_reduction
;
2351 /* Callback for htab_traverse. Sets gimple_uid of reduc_phi stmts. */
2354 set_reduc_phi_uids (reduction_info
**slot
, void *data ATTRIBUTE_UNUSED
)
2356 struct reduction_info
*const red
= *slot
;
2357 gimple_set_uid (red
->reduc_phi
, red
->reduc_version
);
2361 /* Detect all reductions in the LOOP, insert them into REDUCTION_LIST. */
2364 gather_scalar_reductions (loop_p loop
, reduction_info_table_type
*reduction_list
)
2367 loop_vec_info simple_loop_info
;
2369 simple_loop_info
= vect_analyze_loop_form (loop
);
2371 for (gsi
= gsi_start_phis (loop
->header
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2373 gphi
*phi
= gsi
.phi ();
2375 tree res
= PHI_RESULT (phi
);
2378 if (virtual_operand_p (res
))
2381 if (!simple_iv (loop
, loop
, res
, &iv
, true)
2382 && simple_loop_info
)
2384 gimple reduc_stmt
= vect_force_simple_reduction (simple_loop_info
,
2387 if (reduc_stmt
&& !double_reduc
)
2388 build_new_reduction (reduction_list
, reduc_stmt
, phi
);
2391 destroy_loop_vec_info (simple_loop_info
, true);
2393 /* As gimple_uid is used by the vectorizer in between vect_analyze_loop_form
2394 and destroy_loop_vec_info, we can set gimple_uid of reduc_phi stmts
2396 reduction_list
->traverse
<void *, set_reduc_phi_uids
> (NULL
);
2399 /* Try to initialize NITER for code generation part. */
2402 try_get_loop_niter (loop_p loop
, struct tree_niter_desc
*niter
)
2404 edge exit
= single_dom_exit (loop
);
2408 /* We need to know # of iterations, and there should be no uses of values
2409 defined inside loop outside of it, unless the values are invariants of
2411 if (!number_of_iterations_exit (loop
, exit
, niter
, false))
2413 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2414 fprintf (dump_file
, " FAILED: number of iterations not known\n");
2421 /* Try to initialize REDUCTION_LIST for code generation part.
2422 REDUCTION_LIST describes the reductions. */
2425 try_create_reduction_list (loop_p loop
,
2426 reduction_info_table_type
*reduction_list
)
2428 edge exit
= single_dom_exit (loop
);
2433 gather_scalar_reductions (loop
, reduction_list
);
2436 for (gsi
= gsi_start_phis (exit
->dest
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2438 gphi
*phi
= gsi
.phi ();
2439 struct reduction_info
*red
;
2440 imm_use_iterator imm_iter
;
2441 use_operand_p use_p
;
2443 tree val
= PHI_ARG_DEF_FROM_EDGE (phi
, exit
);
2445 if (!virtual_operand_p (val
))
2447 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2449 fprintf (dump_file
, "phi is ");
2450 print_gimple_stmt (dump_file
, phi
, 0, 0);
2451 fprintf (dump_file
, "arg of phi to exit: value ");
2452 print_generic_expr (dump_file
, val
, 0);
2453 fprintf (dump_file
, " used outside loop\n");
2455 " checking if it a part of reduction pattern: \n");
2457 if (reduction_list
->elements () == 0)
2459 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2461 " FAILED: it is not a part of reduction.\n");
2465 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, val
)
2467 if (!gimple_debug_bind_p (USE_STMT (use_p
))
2468 && flow_bb_inside_loop_p (loop
, gimple_bb (USE_STMT (use_p
))))
2470 reduc_phi
= USE_STMT (use_p
);
2474 red
= reduction_phi (reduction_list
, reduc_phi
);
2477 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2479 " FAILED: it is not a part of reduction.\n");
2482 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2484 fprintf (dump_file
, "reduction phi is ");
2485 print_gimple_stmt (dump_file
, red
->reduc_phi
, 0, 0);
2486 fprintf (dump_file
, "reduction stmt is ");
2487 print_gimple_stmt (dump_file
, red
->reduc_stmt
, 0, 0);
2492 /* The iterations of the loop may communicate only through bivs whose
2493 iteration space can be distributed efficiently. */
2494 for (gsi
= gsi_start_phis (loop
->header
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2496 gphi
*phi
= gsi
.phi ();
2497 tree def
= PHI_RESULT (phi
);
2500 if (!virtual_operand_p (def
) && !simple_iv (loop
, loop
, def
, &iv
, true))
2502 struct reduction_info
*red
;
2504 red
= reduction_phi (reduction_list
, phi
);
2507 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2509 " FAILED: scalar dependency between iterations\n");
2519 /* Detect parallel loops and generate parallel code using libgomp
2520 primitives. Returns true if some loop was parallelized, false
2524 parallelize_loops (void)
2526 unsigned n_threads
= flag_tree_parallelize_loops
;
2527 bool changed
= false;
2529 struct tree_niter_desc niter_desc
;
2530 struct obstack parloop_obstack
;
2531 HOST_WIDE_INT estimated
;
2532 source_location loop_loc
;
2534 /* Do not parallelize loops in the functions created by parallelization. */
2535 if (parallelized_function_p (cfun
->decl
))
2537 if (cfun
->has_nonlocal_label
)
2540 gcc_obstack_init (&parloop_obstack
);
2541 reduction_info_table_type
reduction_list (10);
2542 init_stmt_vec_info_vec ();
2544 FOR_EACH_LOOP (loop
, 0)
2546 reduction_list
.empty ();
2547 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2549 fprintf (dump_file
, "Trying loop %d as candidate\n",loop
->num
);
2551 fprintf (dump_file
, "loop %d is not innermost\n",loop
->num
);
2553 fprintf (dump_file
, "loop %d is innermost\n",loop
->num
);
2556 /* If we use autopar in graphite pass, we use its marked dependency
2557 checking results. */
2558 if (flag_loop_parallelize_all
&& !loop
->can_be_parallel
)
2560 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2561 fprintf (dump_file
, "loop is not parallel according to graphite\n");
2565 if (!single_dom_exit (loop
))
2568 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2569 fprintf (dump_file
, "loop is !single_dom_exit\n");
2574 if (/* And of course, the loop must be parallelizable. */
2575 !can_duplicate_loop_p (loop
)
2576 || loop_has_blocks_with_irreducible_flag (loop
)
2577 || (loop_preheader_edge (loop
)->src
->flags
& BB_IRREDUCIBLE_LOOP
)
2578 /* FIXME: the check for vector phi nodes could be removed. */
2579 || loop_has_vector_phi_nodes (loop
))
2582 estimated
= estimated_stmt_executions_int (loop
);
2583 if (estimated
== -1)
2584 estimated
= max_stmt_executions_int (loop
);
2585 /* FIXME: Bypass this check as graphite doesn't update the
2586 count and frequency correctly now. */
2587 if (!flag_loop_parallelize_all
2588 && ((estimated
!= -1
2589 && estimated
<= (HOST_WIDE_INT
) n_threads
* MIN_PER_THREAD
)
2590 /* Do not bother with loops in cold areas. */
2591 || optimize_loop_nest_for_size_p (loop
)))
2594 if (!try_get_loop_niter (loop
, &niter_desc
))
2597 if (!try_create_reduction_list (loop
, &reduction_list
))
2600 if (!flag_loop_parallelize_all
2601 && !loop_parallel_p (loop
, &parloop_obstack
))
2605 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2608 fprintf (dump_file
, "parallelizing outer loop %d\n",loop
->header
->index
);
2610 fprintf (dump_file
, "parallelizing inner loop %d\n",loop
->header
->index
);
2611 loop_loc
= find_loop_location (loop
);
2612 if (loop_loc
!= UNKNOWN_LOCATION
)
2613 fprintf (dump_file
, "\nloop at %s:%d: ",
2614 LOCATION_FILE (loop_loc
), LOCATION_LINE (loop_loc
));
2616 gen_parallel_loop (loop
, &reduction_list
,
2617 n_threads
, &niter_desc
);
2620 free_stmt_vec_info_vec ();
2621 obstack_free (&parloop_obstack
, NULL
);
2623 /* Parallelization will cause new function calls to be inserted through
2624 which local variables will escape. Reset the points-to solution
2627 pt_solution_reset (&cfun
->gimple_df
->escaped
);
2632 /* Parallelization. */
2636 const pass_data pass_data_parallelize_loops
=
2638 GIMPLE_PASS
, /* type */
2639 "parloops", /* name */
2640 OPTGROUP_LOOP
, /* optinfo_flags */
2641 TV_TREE_PARALLELIZE_LOOPS
, /* tv_id */
2642 ( PROP_cfg
| PROP_ssa
), /* properties_required */
2643 0, /* properties_provided */
2644 0, /* properties_destroyed */
2645 0, /* todo_flags_start */
2646 0, /* todo_flags_finish */
2649 class pass_parallelize_loops
: public gimple_opt_pass
2652 pass_parallelize_loops (gcc::context
*ctxt
)
2653 : gimple_opt_pass (pass_data_parallelize_loops
, ctxt
)
2656 /* opt_pass methods: */
2657 virtual bool gate (function
*) { return flag_tree_parallelize_loops
> 1; }
2658 virtual unsigned int execute (function
*);
2660 }; // class pass_parallelize_loops
2663 pass_parallelize_loops::execute (function
*fun
)
2665 if (number_of_loops (fun
) <= 1)
2668 if (parallelize_loops ())
2670 fun
->curr_properties
&= ~(PROP_gimple_eomp
);
2671 return TODO_update_ssa
;
2680 make_pass_parallelize_loops (gcc::context
*ctxt
)
2682 return new pass_parallelize_loops (ctxt
);