1 /* Loop autoparallelization.
2 Copyright (C) 2006-2015 Free Software Foundation, Inc.
3 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
4 Zdenek Dvorak <dvorakz@suse.cz> and Razya Ladelsky <razya@il.ibm.com>.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
29 #include "tree-pass.h"
32 #include "gimple-pretty-print.h"
33 #include "fold-const.h"
35 #include "gimple-iterator.h"
36 #include "gimplify-me.h"
37 #include "gimple-walk.h"
38 #include "stor-layout.h"
39 #include "tree-nested.h"
41 #include "tree-ssa-loop-ivopts.h"
42 #include "tree-ssa-loop-manip.h"
43 #include "tree-ssa-loop-niter.h"
44 #include "tree-ssa-loop.h"
45 #include "tree-into-ssa.h"
47 #include "tree-scalar-evolution.h"
48 #include "langhooks.h"
49 #include "tree-vectorizer.h"
50 #include "tree-hasher.h"
51 #include "tree-parloops.h"
55 #include "params-enum.h"
57 /* This pass tries to distribute iterations of loops into several threads.
58 The implementation is straightforward -- for each loop we test whether its
59 iterations are independent, and if it is the case (and some additional
60 conditions regarding profitability and correctness are satisfied), we
61 add GIMPLE_OMP_PARALLEL and GIMPLE_OMP_FOR codes and let omp expansion
64 The most of the complexity is in bringing the code into shape expected
66 -- for GIMPLE_OMP_FOR, ensuring that the loop has only one induction
67 variable and that the exit test is at the start of the loop body
68 -- for GIMPLE_OMP_PARALLEL, replacing the references to local addressable
69 variables by accesses through pointers, and breaking up ssa chains
70 by storing the values incoming to the parallelized loop to a structure
71 passed to the new function as an argument (something similar is done
72 in omp gimplification, unfortunately only a small part of the code
76 -- if there are several parallelizable loops in a function, it may be
77 possible to generate the threads just once (using synchronization to
78 ensure that cross-loop dependences are obeyed).
79 -- handling of common reduction patterns for outer loops.
81 More info can also be found at http://gcc.gnu.org/wiki/AutoParInGCC */
84 currently we use vect_force_simple_reduction() to detect reduction patterns.
85 The code transformation will be introduced by an example.
92 for (i = 0; i < N; i++)
102 # sum_29 = PHI <sum_11(5), 1(3)>
103 # i_28 = PHI <i_12(5), 0(3)>
106 sum_11 = D.1795_8 + sum_29;
114 # sum_21 = PHI <sum_11(4)>
115 printf (&"%d"[0], sum_21);
118 after reduction transformation (only relevant parts):
126 # Storing the initial value given by the user. #
128 .paral_data_store.32.sum.27 = 1;
130 #pragma omp parallel num_threads(4)
132 #pragma omp for schedule(static)
134 # The neutral element corresponding to the particular
135 reduction's operation, e.g. 0 for PLUS_EXPR,
136 1 for MULT_EXPR, etc. replaces the user's initial value. #
138 # sum.27_29 = PHI <sum.27_11, 0>
140 sum.27_11 = D.1827_8 + sum.27_29;
144 # Adding this reduction phi is done at create_phi_for_local_result() #
145 # sum.27_56 = PHI <sum.27_11, 0>
148 # Creating the atomic operation is done at
149 create_call_for_reduction_1() #
151 #pragma omp atomic_load
152 D.1839_59 = *&.paral_data_load.33_51->reduction.23;
153 D.1840_60 = sum.27_56 + D.1839_59;
154 #pragma omp atomic_store (D.1840_60);
158 # collecting the result after the join of the threads is done at
159 create_loads_for_reductions().
160 The value computed by the threads is loaded from the
164 .paral_data_load.33_52 = &.paral_data_store.32;
165 sum_37 = .paral_data_load.33_52->sum.27;
166 sum_43 = D.1795_41 + sum_37;
169 # sum_21 = PHI <sum_43, sum_26>
170 printf (&"%d"[0], sum_21);
178 /* Minimal number of iterations of a loop that should be executed in each
180 #define MIN_PER_THREAD 100
182 /* Element of the hashtable, representing a
183 reduction in the current loop. */
184 struct reduction_info
186 gimple
*reduc_stmt
; /* reduction statement. */
187 gimple
*reduc_phi
; /* The phi node defining the reduction. */
188 enum tree_code reduction_code
;/* code for the reduction operation. */
189 unsigned reduc_version
; /* SSA_NAME_VERSION of original reduc_phi
191 gphi
*keep_res
; /* The PHI_RESULT of this phi is the resulting value
192 of the reduction variable when existing the loop. */
193 tree initial_value
; /* The initial value of the reduction var before entering the loop. */
194 tree field
; /* the name of the field in the parloop data structure intended for reduction. */
195 tree init
; /* reduction initialization value. */
196 gphi
*new_phi
; /* (helper field) Newly created phi node whose result
197 will be passed to the atomic operation. Represents
198 the local result each thread computed for the reduction
202 /* Reduction info hashtable helpers. */
204 struct reduction_hasher
: free_ptr_hash
<reduction_info
>
206 static inline hashval_t
hash (const reduction_info
*);
207 static inline bool equal (const reduction_info
*, const reduction_info
*);
210 /* Equality and hash functions for hashtab code. */
213 reduction_hasher::equal (const reduction_info
*a
, const reduction_info
*b
)
215 return (a
->reduc_phi
== b
->reduc_phi
);
219 reduction_hasher::hash (const reduction_info
*a
)
221 return a
->reduc_version
;
224 typedef hash_table
<reduction_hasher
> reduction_info_table_type
;
227 static struct reduction_info
*
228 reduction_phi (reduction_info_table_type
*reduction_list
, gimple
*phi
)
230 struct reduction_info tmpred
, *red
;
232 if (reduction_list
->elements () == 0 || phi
== NULL
)
235 if (gimple_uid (phi
) == (unsigned int)-1
236 || gimple_uid (phi
) == 0)
239 tmpred
.reduc_phi
= phi
;
240 tmpred
.reduc_version
= gimple_uid (phi
);
241 red
= reduction_list
->find (&tmpred
);
242 gcc_assert (red
== NULL
|| red
->reduc_phi
== phi
);
247 /* Element of hashtable of names to copy. */
249 struct name_to_copy_elt
251 unsigned version
; /* The version of the name to copy. */
252 tree new_name
; /* The new name used in the copy. */
253 tree field
; /* The field of the structure used to pass the
257 /* Name copies hashtable helpers. */
259 struct name_to_copy_hasher
: free_ptr_hash
<name_to_copy_elt
>
261 static inline hashval_t
hash (const name_to_copy_elt
*);
262 static inline bool equal (const name_to_copy_elt
*, const name_to_copy_elt
*);
265 /* Equality and hash functions for hashtab code. */
268 name_to_copy_hasher::equal (const name_to_copy_elt
*a
, const name_to_copy_elt
*b
)
270 return a
->version
== b
->version
;
274 name_to_copy_hasher::hash (const name_to_copy_elt
*a
)
276 return (hashval_t
) a
->version
;
279 typedef hash_table
<name_to_copy_hasher
> name_to_copy_table_type
;
281 /* A transformation matrix, which is a self-contained ROWSIZE x COLSIZE
282 matrix. Rather than use floats, we simply keep a single DENOMINATOR that
283 represents the denominator for every element in the matrix. */
284 typedef struct lambda_trans_matrix_s
286 lambda_matrix matrix
;
290 } *lambda_trans_matrix
;
291 #define LTM_MATRIX(T) ((T)->matrix)
292 #define LTM_ROWSIZE(T) ((T)->rowsize)
293 #define LTM_COLSIZE(T) ((T)->colsize)
294 #define LTM_DENOMINATOR(T) ((T)->denominator)
296 /* Allocate a new transformation matrix. */
298 static lambda_trans_matrix
299 lambda_trans_matrix_new (int colsize
, int rowsize
,
300 struct obstack
* lambda_obstack
)
302 lambda_trans_matrix ret
;
304 ret
= (lambda_trans_matrix
)
305 obstack_alloc (lambda_obstack
, sizeof (struct lambda_trans_matrix_s
));
306 LTM_MATRIX (ret
) = lambda_matrix_new (rowsize
, colsize
, lambda_obstack
);
307 LTM_ROWSIZE (ret
) = rowsize
;
308 LTM_COLSIZE (ret
) = colsize
;
309 LTM_DENOMINATOR (ret
) = 1;
313 /* Multiply a vector VEC by a matrix MAT.
314 MAT is an M*N matrix, and VEC is a vector with length N. The result
315 is stored in DEST which must be a vector of length M. */
318 lambda_matrix_vector_mult (lambda_matrix matrix
, int m
, int n
,
319 lambda_vector vec
, lambda_vector dest
)
323 lambda_vector_clear (dest
, m
);
324 for (i
= 0; i
< m
; i
++)
325 for (j
= 0; j
< n
; j
++)
326 dest
[i
] += matrix
[i
][j
] * vec
[j
];
329 /* Return true if TRANS is a legal transformation matrix that respects
330 the dependence vectors in DISTS and DIRS. The conservative answer
333 "Wolfe proves that a unimodular transformation represented by the
334 matrix T is legal when applied to a loop nest with a set of
335 lexicographically non-negative distance vectors RDG if and only if
336 for each vector d in RDG, (T.d >= 0) is lexicographically positive.
337 i.e.: if and only if it transforms the lexicographically positive
338 distance vectors to lexicographically positive vectors. Note that
339 a unimodular matrix must transform the zero vector (and only it) to
340 the zero vector." S.Muchnick. */
343 lambda_transform_legal_p (lambda_trans_matrix trans
,
345 vec
<ddr_p
> dependence_relations
)
348 lambda_vector distres
;
349 struct data_dependence_relation
*ddr
;
351 gcc_assert (LTM_COLSIZE (trans
) == nb_loops
352 && LTM_ROWSIZE (trans
) == nb_loops
);
354 /* When there are no dependences, the transformation is correct. */
355 if (dependence_relations
.length () == 0)
358 ddr
= dependence_relations
[0];
362 /* When there is an unknown relation in the dependence_relations, we
363 know that it is no worth looking at this loop nest: give up. */
364 if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
367 distres
= lambda_vector_new (nb_loops
);
369 /* For each distance vector in the dependence graph. */
370 FOR_EACH_VEC_ELT (dependence_relations
, i
, ddr
)
372 /* Don't care about relations for which we know that there is no
373 dependence, nor about read-read (aka. output-dependences):
374 these data accesses can happen in any order. */
375 if (DDR_ARE_DEPENDENT (ddr
) == chrec_known
376 || (DR_IS_READ (DDR_A (ddr
)) && DR_IS_READ (DDR_B (ddr
))))
379 /* Conservatively answer: "this transformation is not valid". */
380 if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
383 /* If the dependence could not be captured by a distance vector,
384 conservatively answer that the transform is not valid. */
385 if (DDR_NUM_DIST_VECTS (ddr
) == 0)
388 /* Compute trans.dist_vect */
389 for (j
= 0; j
< DDR_NUM_DIST_VECTS (ddr
); j
++)
391 lambda_matrix_vector_mult (LTM_MATRIX (trans
), nb_loops
, nb_loops
,
392 DDR_DIST_VECT (ddr
, j
), distres
);
394 if (!lambda_vector_lexico_pos (distres
, nb_loops
))
401 /* Data dependency analysis. Returns true if the iterations of LOOP
402 are independent on each other (that is, if we can execute them
406 loop_parallel_p (struct loop
*loop
, struct obstack
* parloop_obstack
)
408 vec
<ddr_p
> dependence_relations
;
409 vec
<data_reference_p
> datarefs
;
410 lambda_trans_matrix trans
;
413 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
415 fprintf (dump_file
, "Considering loop %d\n", loop
->num
);
417 fprintf (dump_file
, "loop is innermost\n");
419 fprintf (dump_file
, "loop NOT innermost\n");
422 /* Check for problems with dependences. If the loop can be reversed,
423 the iterations are independent. */
424 auto_vec
<loop_p
, 3> loop_nest
;
425 datarefs
.create (10);
426 dependence_relations
.create (100);
427 if (! compute_data_dependences_for_loop (loop
, true, &loop_nest
, &datarefs
,
428 &dependence_relations
))
430 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
431 fprintf (dump_file
, " FAILED: cannot analyze data dependencies\n");
435 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
436 dump_data_dependence_relations (dump_file
, dependence_relations
);
438 trans
= lambda_trans_matrix_new (1, 1, parloop_obstack
);
439 LTM_MATRIX (trans
)[0][0] = -1;
441 if (lambda_transform_legal_p (trans
, 1, dependence_relations
))
444 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
445 fprintf (dump_file
, " SUCCESS: may be parallelized\n");
447 else if (dump_file
&& (dump_flags
& TDF_DETAILS
))
449 " FAILED: data dependencies exist across iterations\n");
452 free_dependence_relations (dependence_relations
);
453 free_data_refs (datarefs
);
458 /* Return true when LOOP contains basic blocks marked with the
459 BB_IRREDUCIBLE_LOOP flag. */
462 loop_has_blocks_with_irreducible_flag (struct loop
*loop
)
465 basic_block
*bbs
= get_loop_body_in_dom_order (loop
);
468 for (i
= 0; i
< loop
->num_nodes
; i
++)
469 if (bbs
[i
]->flags
& BB_IRREDUCIBLE_LOOP
)
478 /* Assigns the address of OBJ in TYPE to an ssa name, and returns this name.
479 The assignment statement is placed on edge ENTRY. DECL_ADDRESS maps decls
480 to their addresses that can be reused. The address of OBJ is known to
481 be invariant in the whole function. Other needed statements are placed
485 take_address_of (tree obj
, tree type
, edge entry
,
486 int_tree_htab_type
*decl_address
, gimple_stmt_iterator
*gsi
)
489 tree
*var_p
, name
, addr
;
493 /* Since the address of OBJ is invariant, the trees may be shared.
494 Avoid rewriting unrelated parts of the code. */
495 obj
= unshare_expr (obj
);
497 handled_component_p (*var_p
);
498 var_p
= &TREE_OPERAND (*var_p
, 0))
501 /* Canonicalize the access to base on a MEM_REF. */
503 *var_p
= build_simple_mem_ref (build_fold_addr_expr (*var_p
));
505 /* Assign a canonical SSA name to the address of the base decl used
506 in the address and share it for all accesses and addresses based
508 uid
= DECL_UID (TREE_OPERAND (TREE_OPERAND (*var_p
, 0), 0));
511 int_tree_map
*slot
= decl_address
->find_slot (elt
, INSERT
);
516 addr
= TREE_OPERAND (*var_p
, 0);
518 = get_name (TREE_OPERAND (TREE_OPERAND (*var_p
, 0), 0));
520 name
= make_temp_ssa_name (TREE_TYPE (addr
), NULL
, obj_name
);
522 name
= make_ssa_name (TREE_TYPE (addr
));
523 stmt
= gimple_build_assign (name
, addr
);
524 gsi_insert_on_edge_immediate (entry
, stmt
);
532 /* Express the address in terms of the canonical SSA name. */
533 TREE_OPERAND (*var_p
, 0) = name
;
535 return build_fold_addr_expr_with_type (obj
, type
);
537 name
= force_gimple_operand (build_addr (obj
),
538 &stmts
, true, NULL_TREE
);
539 if (!gimple_seq_empty_p (stmts
))
540 gsi_insert_seq_before (gsi
, stmts
, GSI_SAME_STMT
);
542 if (!useless_type_conversion_p (type
, TREE_TYPE (name
)))
544 name
= force_gimple_operand (fold_convert (type
, name
), &stmts
, true,
546 if (!gimple_seq_empty_p (stmts
))
547 gsi_insert_seq_before (gsi
, stmts
, GSI_SAME_STMT
);
554 reduc_stmt_res (gimple
*stmt
)
556 return (gimple_code (stmt
) == GIMPLE_PHI
557 ? gimple_phi_result (stmt
)
558 : gimple_assign_lhs (stmt
));
561 /* Callback for htab_traverse. Create the initialization statement
562 for reduction described in SLOT, and place it at the preheader of
563 the loop described in DATA. */
566 initialize_reductions (reduction_info
**slot
, struct loop
*loop
)
572 struct reduction_info
*const reduc
= *slot
;
574 /* Create initialization in preheader:
575 reduction_variable = initialization value of reduction. */
577 /* In the phi node at the header, replace the argument coming
578 from the preheader with the reduction initialization value. */
580 /* Initialize the reduction. */
581 type
= TREE_TYPE (PHI_RESULT (reduc
->reduc_phi
));
582 init
= omp_reduction_init_op (gimple_location (reduc
->reduc_stmt
),
583 reduc
->reduction_code
, type
);
586 /* Replace the argument representing the initialization value
587 with the initialization value for the reduction (neutral
588 element for the particular operation, e.g. 0 for PLUS_EXPR,
589 1 for MULT_EXPR, etc).
590 Keep the old value in a new variable "reduction_initial",
591 that will be taken in consideration after the parallel
592 computing is done. */
594 e
= loop_preheader_edge (loop
);
595 arg
= PHI_ARG_DEF_FROM_EDGE (reduc
->reduc_phi
, e
);
596 /* Create new variable to hold the initial value. */
598 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE
599 (reduc
->reduc_phi
, loop_preheader_edge (loop
)), init
);
600 reduc
->initial_value
= arg
;
606 struct walk_stmt_info info
;
608 int_tree_htab_type
*decl_address
;
609 gimple_stmt_iterator
*gsi
;
614 /* Eliminates references to local variables in *TP out of the single
615 entry single exit region starting at DTA->ENTRY.
616 DECL_ADDRESS contains addresses of the references that had their
617 address taken already. If the expression is changed, CHANGED is
618 set to true. Callback for walk_tree. */
621 eliminate_local_variables_1 (tree
*tp
, int *walk_subtrees
, void *data
)
623 struct elv_data
*const dta
= (struct elv_data
*) data
;
624 tree t
= *tp
, var
, addr
, addr_type
, type
, obj
;
630 if (!SSA_VAR_P (t
) || DECL_EXTERNAL (t
))
633 type
= TREE_TYPE (t
);
634 addr_type
= build_pointer_type (type
);
635 addr
= take_address_of (t
, addr_type
, dta
->entry
, dta
->decl_address
,
637 if (dta
->gsi
== NULL
&& addr
== NULL_TREE
)
643 *tp
= build_simple_mem_ref (addr
);
649 if (TREE_CODE (t
) == ADDR_EXPR
)
651 /* ADDR_EXPR may appear in two contexts:
652 -- as a gimple operand, when the address taken is a function invariant
653 -- as gimple rhs, when the resulting address in not a function
655 We do not need to do anything special in the latter case (the base of
656 the memory reference whose address is taken may be replaced in the
657 DECL_P case). The former case is more complicated, as we need to
658 ensure that the new address is still a gimple operand. Thus, it
659 is not sufficient to replace just the base of the memory reference --
660 we need to move the whole computation of the address out of the
662 if (!is_gimple_val (t
))
666 obj
= TREE_OPERAND (t
, 0);
667 var
= get_base_address (obj
);
668 if (!var
|| !SSA_VAR_P (var
) || DECL_EXTERNAL (var
))
671 addr_type
= TREE_TYPE (t
);
672 addr
= take_address_of (obj
, addr_type
, dta
->entry
, dta
->decl_address
,
674 if (dta
->gsi
== NULL
&& addr
== NULL_TREE
)
691 /* Moves the references to local variables in STMT at *GSI out of the single
692 entry single exit region starting at ENTRY. DECL_ADDRESS contains
693 addresses of the references that had their address taken
697 eliminate_local_variables_stmt (edge entry
, gimple_stmt_iterator
*gsi
,
698 int_tree_htab_type
*decl_address
)
701 gimple
*stmt
= gsi_stmt (*gsi
);
703 memset (&dta
.info
, '\0', sizeof (dta
.info
));
705 dta
.decl_address
= decl_address
;
709 if (gimple_debug_bind_p (stmt
))
712 walk_tree (gimple_debug_bind_get_value_ptr (stmt
),
713 eliminate_local_variables_1
, &dta
.info
, NULL
);
716 gimple_debug_bind_reset_value (stmt
);
720 else if (gimple_clobber_p (stmt
))
722 stmt
= gimple_build_nop ();
723 gsi_replace (gsi
, stmt
, false);
729 walk_gimple_op (stmt
, eliminate_local_variables_1
, &dta
.info
);
736 /* Eliminates the references to local variables from the single entry
737 single exit region between the ENTRY and EXIT edges.
740 1) Taking address of a local variable -- these are moved out of the
741 region (and temporary variable is created to hold the address if
744 2) Dereferencing a local variable -- these are replaced with indirect
748 eliminate_local_variables (edge entry
, edge exit
)
751 auto_vec
<basic_block
, 3> body
;
753 gimple_stmt_iterator gsi
;
754 bool has_debug_stmt
= false;
755 int_tree_htab_type
decl_address (10);
756 basic_block entry_bb
= entry
->src
;
757 basic_block exit_bb
= exit
->dest
;
759 gather_blocks_in_sese_region (entry_bb
, exit_bb
, &body
);
761 FOR_EACH_VEC_ELT (body
, i
, bb
)
762 if (bb
!= entry_bb
&& bb
!= exit_bb
)
763 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
764 if (is_gimple_debug (gsi_stmt (gsi
)))
766 if (gimple_debug_bind_p (gsi_stmt (gsi
)))
767 has_debug_stmt
= true;
770 eliminate_local_variables_stmt (entry
, &gsi
, &decl_address
);
773 FOR_EACH_VEC_ELT (body
, i
, bb
)
774 if (bb
!= entry_bb
&& bb
!= exit_bb
)
775 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
776 if (gimple_debug_bind_p (gsi_stmt (gsi
)))
777 eliminate_local_variables_stmt (entry
, &gsi
, &decl_address
);
780 /* Returns true if expression EXPR is not defined between ENTRY and
781 EXIT, i.e. if all its operands are defined outside of the region. */
784 expr_invariant_in_region_p (edge entry
, edge exit
, tree expr
)
786 basic_block entry_bb
= entry
->src
;
787 basic_block exit_bb
= exit
->dest
;
790 if (is_gimple_min_invariant (expr
))
793 if (TREE_CODE (expr
) == SSA_NAME
)
795 def_bb
= gimple_bb (SSA_NAME_DEF_STMT (expr
));
797 && dominated_by_p (CDI_DOMINATORS
, def_bb
, entry_bb
)
798 && !dominated_by_p (CDI_DOMINATORS
, def_bb
, exit_bb
))
807 /* If COPY_NAME_P is true, creates and returns a duplicate of NAME.
808 The copies are stored to NAME_COPIES, if NAME was already duplicated,
809 its duplicate stored in NAME_COPIES is returned.
811 Regardless of COPY_NAME_P, the decl used as a base of the ssa name is also
812 duplicated, storing the copies in DECL_COPIES. */
815 separate_decls_in_region_name (tree name
, name_to_copy_table_type
*name_copies
,
816 int_tree_htab_type
*decl_copies
,
819 tree copy
, var
, var_copy
;
820 unsigned idx
, uid
, nuid
;
821 struct int_tree_map ielt
;
822 struct name_to_copy_elt elt
, *nelt
;
823 name_to_copy_elt
**slot
;
826 if (TREE_CODE (name
) != SSA_NAME
)
829 idx
= SSA_NAME_VERSION (name
);
831 slot
= name_copies
->find_slot_with_hash (&elt
, idx
,
832 copy_name_p
? INSERT
: NO_INSERT
);
834 return (*slot
)->new_name
;
838 copy
= duplicate_ssa_name (name
, NULL
);
839 nelt
= XNEW (struct name_to_copy_elt
);
841 nelt
->new_name
= copy
;
842 nelt
->field
= NULL_TREE
;
851 var
= SSA_NAME_VAR (name
);
855 uid
= DECL_UID (var
);
857 dslot
= decl_copies
->find_slot_with_hash (ielt
, uid
, INSERT
);
860 var_copy
= create_tmp_var (TREE_TYPE (var
), get_name (var
));
861 DECL_GIMPLE_REG_P (var_copy
) = DECL_GIMPLE_REG_P (var
);
863 dslot
->to
= var_copy
;
865 /* Ensure that when we meet this decl next time, we won't duplicate
867 nuid
= DECL_UID (var_copy
);
869 dslot
= decl_copies
->find_slot_with_hash (ielt
, nuid
, INSERT
);
870 gcc_assert (!dslot
->to
);
872 dslot
->to
= var_copy
;
875 var_copy
= dslot
->to
;
877 replace_ssa_name_symbol (copy
, var_copy
);
881 /* Finds the ssa names used in STMT that are defined outside the
882 region between ENTRY and EXIT and replaces such ssa names with
883 their duplicates. The duplicates are stored to NAME_COPIES. Base
884 decls of all ssa names used in STMT (including those defined in
885 LOOP) are replaced with the new temporary variables; the
886 replacement decls are stored in DECL_COPIES. */
889 separate_decls_in_region_stmt (edge entry
, edge exit
, gimple
*stmt
,
890 name_to_copy_table_type
*name_copies
,
891 int_tree_htab_type
*decl_copies
)
899 FOR_EACH_PHI_OR_STMT_DEF (def
, stmt
, oi
, SSA_OP_DEF
)
901 name
= DEF_FROM_PTR (def
);
902 gcc_assert (TREE_CODE (name
) == SSA_NAME
);
903 copy
= separate_decls_in_region_name (name
, name_copies
, decl_copies
,
905 gcc_assert (copy
== name
);
908 FOR_EACH_PHI_OR_STMT_USE (use
, stmt
, oi
, SSA_OP_USE
)
910 name
= USE_FROM_PTR (use
);
911 if (TREE_CODE (name
) != SSA_NAME
)
914 copy_name_p
= expr_invariant_in_region_p (entry
, exit
, name
);
915 copy
= separate_decls_in_region_name (name
, name_copies
, decl_copies
,
921 /* Finds the ssa names used in STMT that are defined outside the
922 region between ENTRY and EXIT and replaces such ssa names with
923 their duplicates. The duplicates are stored to NAME_COPIES. Base
924 decls of all ssa names used in STMT (including those defined in
925 LOOP) are replaced with the new temporary variables; the
926 replacement decls are stored in DECL_COPIES. */
929 separate_decls_in_region_debug (gimple
*stmt
,
930 name_to_copy_table_type
*name_copies
,
931 int_tree_htab_type
*decl_copies
)
936 struct int_tree_map ielt
;
937 struct name_to_copy_elt elt
;
938 name_to_copy_elt
**slot
;
941 if (gimple_debug_bind_p (stmt
))
942 var
= gimple_debug_bind_get_var (stmt
);
943 else if (gimple_debug_source_bind_p (stmt
))
944 var
= gimple_debug_source_bind_get_var (stmt
);
947 if (TREE_CODE (var
) == DEBUG_EXPR_DECL
|| TREE_CODE (var
) == LABEL_DECL
)
949 gcc_assert (DECL_P (var
) && SSA_VAR_P (var
));
950 ielt
.uid
= DECL_UID (var
);
951 dslot
= decl_copies
->find_slot_with_hash (ielt
, ielt
.uid
, NO_INSERT
);
954 if (gimple_debug_bind_p (stmt
))
955 gimple_debug_bind_set_var (stmt
, dslot
->to
);
956 else if (gimple_debug_source_bind_p (stmt
))
957 gimple_debug_source_bind_set_var (stmt
, dslot
->to
);
959 FOR_EACH_PHI_OR_STMT_USE (use
, stmt
, oi
, SSA_OP_USE
)
961 name
= USE_FROM_PTR (use
);
962 if (TREE_CODE (name
) != SSA_NAME
)
965 elt
.version
= SSA_NAME_VERSION (name
);
966 slot
= name_copies
->find_slot_with_hash (&elt
, elt
.version
, NO_INSERT
);
969 gimple_debug_bind_reset_value (stmt
);
974 SET_USE (use
, (*slot
)->new_name
);
980 /* Callback for htab_traverse. Adds a field corresponding to the reduction
981 specified in SLOT. The type is passed in DATA. */
984 add_field_for_reduction (reduction_info
**slot
, tree type
)
987 struct reduction_info
*const red
= *slot
;
988 tree var
= reduc_stmt_res (red
->reduc_stmt
);
989 tree field
= build_decl (gimple_location (red
->reduc_stmt
), FIELD_DECL
,
990 SSA_NAME_IDENTIFIER (var
), TREE_TYPE (var
));
992 insert_field_into_struct (type
, field
);
999 /* Callback for htab_traverse. Adds a field corresponding to a ssa name
1000 described in SLOT. The type is passed in DATA. */
1003 add_field_for_name (name_to_copy_elt
**slot
, tree type
)
1005 struct name_to_copy_elt
*const elt
= *slot
;
1006 tree name
= ssa_name (elt
->version
);
1007 tree field
= build_decl (UNKNOWN_LOCATION
,
1008 FIELD_DECL
, SSA_NAME_IDENTIFIER (name
),
1011 insert_field_into_struct (type
, field
);
1017 /* Callback for htab_traverse. A local result is the intermediate result
1018 computed by a single
1019 thread, or the initial value in case no iteration was executed.
1020 This function creates a phi node reflecting these values.
1021 The phi's result will be stored in NEW_PHI field of the
1022 reduction's data structure. */
1025 create_phi_for_local_result (reduction_info
**slot
, struct loop
*loop
)
1027 struct reduction_info
*const reduc
= *slot
;
1030 basic_block store_bb
, continue_bb
;
1032 source_location locus
;
1034 /* STORE_BB is the block where the phi
1035 should be stored. It is the destination of the loop exit.
1036 (Find the fallthru edge from GIMPLE_OMP_CONTINUE). */
1037 continue_bb
= single_pred (loop
->latch
);
1038 store_bb
= FALLTHRU_EDGE (continue_bb
)->dest
;
1040 /* STORE_BB has two predecessors. One coming from the loop
1041 (the reduction's result is computed at the loop),
1042 and another coming from a block preceding the loop,
1044 are executed (the initial value should be taken). */
1045 if (EDGE_PRED (store_bb
, 0) == FALLTHRU_EDGE (continue_bb
))
1046 e
= EDGE_PRED (store_bb
, 1);
1048 e
= EDGE_PRED (store_bb
, 0);
1049 tree lhs
= reduc_stmt_res (reduc
->reduc_stmt
);
1050 local_res
= copy_ssa_name (lhs
);
1051 locus
= gimple_location (reduc
->reduc_stmt
);
1052 new_phi
= create_phi_node (local_res
, store_bb
);
1053 add_phi_arg (new_phi
, reduc
->init
, e
, locus
);
1054 add_phi_arg (new_phi
, lhs
, FALLTHRU_EDGE (continue_bb
), locus
);
1055 reduc
->new_phi
= new_phi
;
1065 basic_block store_bb
;
1066 basic_block load_bb
;
1069 /* Callback for htab_traverse. Create an atomic instruction for the
1070 reduction described in SLOT.
1071 DATA annotates the place in memory the atomic operation relates to,
1072 and the basic block it needs to be generated in. */
1075 create_call_for_reduction_1 (reduction_info
**slot
, struct clsn_data
*clsn_data
)
1077 struct reduction_info
*const reduc
= *slot
;
1078 gimple_stmt_iterator gsi
;
1079 tree type
= TREE_TYPE (PHI_RESULT (reduc
->reduc_phi
));
1084 tree t
, addr
, ref
, x
;
1085 tree tmp_load
, name
;
1088 load_struct
= build_simple_mem_ref (clsn_data
->load
);
1089 t
= build3 (COMPONENT_REF
, type
, load_struct
, reduc
->field
, NULL_TREE
);
1091 addr
= build_addr (t
);
1093 /* Create phi node. */
1094 bb
= clsn_data
->load_bb
;
1096 gsi
= gsi_last_bb (bb
);
1097 e
= split_block (bb
, gsi_stmt (gsi
));
1100 tmp_load
= create_tmp_var (TREE_TYPE (TREE_TYPE (addr
)));
1101 tmp_load
= make_ssa_name (tmp_load
);
1102 load
= gimple_build_omp_atomic_load (tmp_load
, addr
);
1103 SSA_NAME_DEF_STMT (tmp_load
) = load
;
1104 gsi
= gsi_start_bb (new_bb
);
1105 gsi_insert_after (&gsi
, load
, GSI_NEW_STMT
);
1107 e
= split_block (new_bb
, load
);
1109 gsi
= gsi_start_bb (new_bb
);
1111 x
= fold_build2 (reduc
->reduction_code
,
1112 TREE_TYPE (PHI_RESULT (reduc
->new_phi
)), ref
,
1113 PHI_RESULT (reduc
->new_phi
));
1115 name
= force_gimple_operand_gsi (&gsi
, x
, true, NULL_TREE
, true,
1116 GSI_CONTINUE_LINKING
);
1118 gsi_insert_after (&gsi
, gimple_build_omp_atomic_store (name
), GSI_NEW_STMT
);
1122 /* Create the atomic operation at the join point of the threads.
1123 REDUCTION_LIST describes the reductions in the LOOP.
1124 LD_ST_DATA describes the shared data structure where
1125 shared data is stored in and loaded from. */
1127 create_call_for_reduction (struct loop
*loop
,
1128 reduction_info_table_type
*reduction_list
,
1129 struct clsn_data
*ld_st_data
)
1131 reduction_list
->traverse
<struct loop
*, create_phi_for_local_result
> (loop
);
1132 /* Find the fallthru edge from GIMPLE_OMP_CONTINUE. */
1133 basic_block continue_bb
= single_pred (loop
->latch
);
1134 ld_st_data
->load_bb
= FALLTHRU_EDGE (continue_bb
)->dest
;
1136 ->traverse
<struct clsn_data
*, create_call_for_reduction_1
> (ld_st_data
);
1139 /* Callback for htab_traverse. Loads the final reduction value at the
1140 join point of all threads, and inserts it in the right place. */
1143 create_loads_for_reductions (reduction_info
**slot
, struct clsn_data
*clsn_data
)
1145 struct reduction_info
*const red
= *slot
;
1147 gimple_stmt_iterator gsi
;
1148 tree type
= TREE_TYPE (reduc_stmt_res (red
->reduc_stmt
));
1153 /* If there's no exit phi, the result of the reduction is unused. */
1154 if (red
->keep_res
== NULL
)
1157 gsi
= gsi_after_labels (clsn_data
->load_bb
);
1158 load_struct
= build_simple_mem_ref (clsn_data
->load
);
1159 load_struct
= build3 (COMPONENT_REF
, type
, load_struct
, red
->field
,
1163 name
= PHI_RESULT (red
->keep_res
);
1164 stmt
= gimple_build_assign (name
, x
);
1166 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1168 for (gsi
= gsi_start_phis (gimple_bb (red
->keep_res
));
1169 !gsi_end_p (gsi
); gsi_next (&gsi
))
1170 if (gsi_stmt (gsi
) == red
->keep_res
)
1172 remove_phi_node (&gsi
, false);
1178 /* Load the reduction result that was stored in LD_ST_DATA.
1179 REDUCTION_LIST describes the list of reductions that the
1180 loads should be generated for. */
1182 create_final_loads_for_reduction (reduction_info_table_type
*reduction_list
,
1183 struct clsn_data
*ld_st_data
)
1185 gimple_stmt_iterator gsi
;
1189 gsi
= gsi_after_labels (ld_st_data
->load_bb
);
1190 t
= build_fold_addr_expr (ld_st_data
->store
);
1191 stmt
= gimple_build_assign (ld_st_data
->load
, t
);
1193 gsi_insert_before (&gsi
, stmt
, GSI_NEW_STMT
);
1196 ->traverse
<struct clsn_data
*, create_loads_for_reductions
> (ld_st_data
);
1200 /* Callback for htab_traverse. Store the neutral value for the
1201 particular reduction's operation, e.g. 0 for PLUS_EXPR,
1202 1 for MULT_EXPR, etc. into the reduction field.
1203 The reduction is specified in SLOT. The store information is
1207 create_stores_for_reduction (reduction_info
**slot
, struct clsn_data
*clsn_data
)
1209 struct reduction_info
*const red
= *slot
;
1212 gimple_stmt_iterator gsi
;
1213 tree type
= TREE_TYPE (reduc_stmt_res (red
->reduc_stmt
));
1215 gsi
= gsi_last_bb (clsn_data
->store_bb
);
1216 t
= build3 (COMPONENT_REF
, type
, clsn_data
->store
, red
->field
, NULL_TREE
);
1217 stmt
= gimple_build_assign (t
, red
->initial_value
);
1218 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1223 /* Callback for htab_traverse. Creates loads to a field of LOAD in LOAD_BB and
1224 store to a field of STORE in STORE_BB for the ssa name and its duplicate
1225 specified in SLOT. */
1228 create_loads_and_stores_for_name (name_to_copy_elt
**slot
,
1229 struct clsn_data
*clsn_data
)
1231 struct name_to_copy_elt
*const elt
= *slot
;
1234 gimple_stmt_iterator gsi
;
1235 tree type
= TREE_TYPE (elt
->new_name
);
1238 gsi
= gsi_last_bb (clsn_data
->store_bb
);
1239 t
= build3 (COMPONENT_REF
, type
, clsn_data
->store
, elt
->field
, NULL_TREE
);
1240 stmt
= gimple_build_assign (t
, ssa_name (elt
->version
));
1241 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1243 gsi
= gsi_last_bb (clsn_data
->load_bb
);
1244 load_struct
= build_simple_mem_ref (clsn_data
->load
);
1245 t
= build3 (COMPONENT_REF
, type
, load_struct
, elt
->field
, NULL_TREE
);
1246 stmt
= gimple_build_assign (elt
->new_name
, t
);
1247 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1252 /* Moves all the variables used in LOOP and defined outside of it (including
1253 the initial values of loop phi nodes, and *PER_THREAD if it is a ssa
1254 name) to a structure created for this purpose. The code
1262 is transformed this way:
1277 `old' is stored to *ARG_STRUCT and `new' is stored to NEW_ARG_STRUCT. The
1278 pointer `new' is intentionally not initialized (the loop will be split to a
1279 separate function later, and `new' will be initialized from its arguments).
1280 LD_ST_DATA holds information about the shared data structure used to pass
1281 information among the threads. It is initialized here, and
1282 gen_parallel_loop will pass it to create_call_for_reduction that
1283 needs this information. REDUCTION_LIST describes the reductions
1287 separate_decls_in_region (edge entry
, edge exit
,
1288 reduction_info_table_type
*reduction_list
,
1289 tree
*arg_struct
, tree
*new_arg_struct
,
1290 struct clsn_data
*ld_st_data
)
1293 basic_block bb1
= split_edge (entry
);
1294 basic_block bb0
= single_pred (bb1
);
1295 name_to_copy_table_type
name_copies (10);
1296 int_tree_htab_type
decl_copies (10);
1298 tree type
, type_name
, nvar
;
1299 gimple_stmt_iterator gsi
;
1300 struct clsn_data clsn_data
;
1301 auto_vec
<basic_block
, 3> body
;
1303 basic_block entry_bb
= bb1
;
1304 basic_block exit_bb
= exit
->dest
;
1305 bool has_debug_stmt
= false;
1307 entry
= single_succ_edge (entry_bb
);
1308 gather_blocks_in_sese_region (entry_bb
, exit_bb
, &body
);
1310 FOR_EACH_VEC_ELT (body
, i
, bb
)
1312 if (bb
!= entry_bb
&& bb
!= exit_bb
)
1314 for (gsi
= gsi_start_phis (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1315 separate_decls_in_region_stmt (entry
, exit
, gsi_stmt (gsi
),
1316 &name_copies
, &decl_copies
);
1318 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1320 gimple
*stmt
= gsi_stmt (gsi
);
1322 if (is_gimple_debug (stmt
))
1323 has_debug_stmt
= true;
1325 separate_decls_in_region_stmt (entry
, exit
, stmt
,
1326 &name_copies
, &decl_copies
);
1331 /* Now process debug bind stmts. We must not create decls while
1332 processing debug stmts, so we defer their processing so as to
1333 make sure we will have debug info for as many variables as
1334 possible (all of those that were dealt with in the loop above),
1335 and discard those for which we know there's nothing we can
1338 FOR_EACH_VEC_ELT (body
, i
, bb
)
1339 if (bb
!= entry_bb
&& bb
!= exit_bb
)
1341 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
);)
1343 gimple
*stmt
= gsi_stmt (gsi
);
1345 if (is_gimple_debug (stmt
))
1347 if (separate_decls_in_region_debug (stmt
, &name_copies
,
1350 gsi_remove (&gsi
, true);
1359 if (name_copies
.elements () == 0 && reduction_list
->elements () == 0)
1361 /* It may happen that there is nothing to copy (if there are only
1362 loop carried and external variables in the loop). */
1364 *new_arg_struct
= NULL
;
1368 /* Create the type for the structure to store the ssa names to. */
1369 type
= lang_hooks
.types
.make_type (RECORD_TYPE
);
1370 type_name
= build_decl (UNKNOWN_LOCATION
,
1371 TYPE_DECL
, create_tmp_var_name (".paral_data"),
1373 TYPE_NAME (type
) = type_name
;
1375 name_copies
.traverse
<tree
, add_field_for_name
> (type
);
1376 if (reduction_list
&& reduction_list
->elements () > 0)
1378 /* Create the fields for reductions. */
1379 reduction_list
->traverse
<tree
, add_field_for_reduction
> (type
);
1383 /* Create the loads and stores. */
1384 *arg_struct
= create_tmp_var (type
, ".paral_data_store");
1385 nvar
= create_tmp_var (build_pointer_type (type
), ".paral_data_load");
1386 *new_arg_struct
= make_ssa_name (nvar
);
1388 ld_st_data
->store
= *arg_struct
;
1389 ld_st_data
->load
= *new_arg_struct
;
1390 ld_st_data
->store_bb
= bb0
;
1391 ld_st_data
->load_bb
= bb1
;
1394 .traverse
<struct clsn_data
*, create_loads_and_stores_for_name
>
1397 /* Load the calculation from memory (after the join of the threads). */
1399 if (reduction_list
&& reduction_list
->elements () > 0)
1402 ->traverse
<struct clsn_data
*, create_stores_for_reduction
>
1404 clsn_data
.load
= make_ssa_name (nvar
);
1405 clsn_data
.load_bb
= exit
->dest
;
1406 clsn_data
.store
= ld_st_data
->store
;
1407 create_final_loads_for_reduction (reduction_list
, &clsn_data
);
1412 /* Returns true if FN was created to run in parallel. */
1415 parallelized_function_p (tree fndecl
)
1417 cgraph_node
*node
= cgraph_node::get (fndecl
);
1418 gcc_assert (node
!= NULL
);
1419 return node
->parallelized_function
;
1422 /* Creates and returns an empty function that will receive the body of
1423 a parallelized loop. */
1426 create_loop_fn (location_t loc
)
1430 tree decl
, type
, name
, t
;
1431 struct function
*act_cfun
= cfun
;
1432 static unsigned loopfn_num
;
1434 loc
= LOCATION_LOCUS (loc
);
1435 snprintf (buf
, 100, "%s.$loopfn", current_function_name ());
1436 ASM_FORMAT_PRIVATE_NAME (tname
, buf
, loopfn_num
++);
1437 clean_symbol_name (tname
);
1438 name
= get_identifier (tname
);
1439 type
= build_function_type_list (void_type_node
, ptr_type_node
, NULL_TREE
);
1441 decl
= build_decl (loc
, FUNCTION_DECL
, name
, type
);
1442 TREE_STATIC (decl
) = 1;
1443 TREE_USED (decl
) = 1;
1444 DECL_ARTIFICIAL (decl
) = 1;
1445 DECL_IGNORED_P (decl
) = 0;
1446 TREE_PUBLIC (decl
) = 0;
1447 DECL_UNINLINABLE (decl
) = 1;
1448 DECL_EXTERNAL (decl
) = 0;
1449 DECL_CONTEXT (decl
) = NULL_TREE
;
1450 DECL_INITIAL (decl
) = make_node (BLOCK
);
1452 t
= build_decl (loc
, RESULT_DECL
, NULL_TREE
, void_type_node
);
1453 DECL_ARTIFICIAL (t
) = 1;
1454 DECL_IGNORED_P (t
) = 1;
1455 DECL_RESULT (decl
) = t
;
1457 t
= build_decl (loc
, PARM_DECL
, get_identifier (".paral_data_param"),
1459 DECL_ARTIFICIAL (t
) = 1;
1460 DECL_ARG_TYPE (t
) = ptr_type_node
;
1461 DECL_CONTEXT (t
) = decl
;
1463 DECL_ARGUMENTS (decl
) = t
;
1465 allocate_struct_function (decl
, false);
1467 /* The call to allocate_struct_function clobbers CFUN, so we need to restore
1469 set_cfun (act_cfun
);
1474 /* Replace uses of NAME by VAL in block BB. */
1477 replace_uses_in_bb_by (tree name
, tree val
, basic_block bb
)
1480 imm_use_iterator imm_iter
;
1482 FOR_EACH_IMM_USE_STMT (use_stmt
, imm_iter
, name
)
1484 if (gimple_bb (use_stmt
) != bb
)
1487 use_operand_p use_p
;
1488 FOR_EACH_IMM_USE_ON_STMT (use_p
, imm_iter
)
1489 SET_USE (use_p
, val
);
1493 /* Do transformation from:
1500 ivtmp_a = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
1501 sum_a = PHI <sum_init (preheader), sum_b (latch)>
1505 sum_b = sum_a + sum_update
1513 ivtmp_b = ivtmp_a + 1;
1517 sum_z = PHI <sum_b (cond[1]), ...>
1519 [1] Where <bb cond> is single_pred (bb latch); In the simplest case,
1529 ivtmp_a = PHI <ivtmp_c (latch)>
1530 sum_a = PHI <sum_c (latch)>
1534 sum_b = sum_a + sum_update
1539 ivtmp_c = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
1540 sum_c = PHI <sum_init (preheader), sum_b (latch)>
1541 if (ivtmp_c < n + 1)
1547 ivtmp_b = ivtmp_a + 1;
1551 sum_y = PHI <sum_c (newheader)>
1554 sum_z = PHI <sum_y (newexit), ...>
1557 In unified diff format:
1562 + goto <bb newheader>
1565 - ivtmp_a = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
1566 - sum_a = PHI <sum_init (preheader), sum_b (latch)>
1567 + ivtmp_a = PHI <ivtmp_c (latch)>
1568 + sum_a = PHI <sum_c (latch)>
1572 sum_b = sum_a + sum_update
1579 + ivtmp_c = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
1580 + sum_c = PHI <sum_init (preheader), sum_b (latch)>
1581 + if (ivtmp_c < n + 1)
1587 ivtmp_b = ivtmp_a + 1;
1589 + goto <bb newheader>
1592 + sum_y = PHI <sum_c (newheader)>
1595 - sum_z = PHI <sum_b (cond[1]), ...>
1596 + sum_z = PHI <sum_y (newexit), ...>
1598 Note: the example does not show any virtual phis, but these are handled more
1599 or less as reductions.
1602 Moves the exit condition of LOOP to the beginning of its header.
1603 REDUCTION_LIST describes the reductions in LOOP. BOUND is the new loop
1607 transform_to_exit_first_loop_alt (struct loop
*loop
,
1608 reduction_info_table_type
*reduction_list
,
1611 basic_block header
= loop
->header
;
1612 basic_block latch
= loop
->latch
;
1613 edge exit
= single_dom_exit (loop
);
1614 basic_block exit_block
= exit
->dest
;
1615 gcond
*cond_stmt
= as_a
<gcond
*> (last_stmt (exit
->src
));
1616 tree control
= gimple_cond_lhs (cond_stmt
);
1619 /* Rewriting virtuals into loop-closed ssa normal form makes this
1620 transformation simpler. It also ensures that the virtuals are in
1621 loop-closed ssa normal from after the transformation, which is required by
1622 create_parallel_loop. */
1623 rewrite_virtuals_into_loop_closed_ssa (loop
);
1625 /* Create the new_header block. */
1626 basic_block new_header
= split_block_before_cond_jump (exit
->src
);
1627 edge edge_at_split
= single_pred_edge (new_header
);
1629 /* Redirect entry edge to new_header. */
1630 edge entry
= loop_preheader_edge (loop
);
1631 e
= redirect_edge_and_branch (entry
, new_header
);
1632 gcc_assert (e
== entry
);
1634 /* Redirect post_inc_edge to new_header. */
1635 edge post_inc_edge
= single_succ_edge (latch
);
1636 e
= redirect_edge_and_branch (post_inc_edge
, new_header
);
1637 gcc_assert (e
== post_inc_edge
);
1639 /* Redirect post_cond_edge to header. */
1640 edge post_cond_edge
= single_pred_edge (latch
);
1641 e
= redirect_edge_and_branch (post_cond_edge
, header
);
1642 gcc_assert (e
== post_cond_edge
);
1644 /* Redirect edge_at_split to latch. */
1645 e
= redirect_edge_and_branch (edge_at_split
, latch
);
1646 gcc_assert (e
== edge_at_split
);
1648 /* Set the new loop bound. */
1649 gimple_cond_set_rhs (cond_stmt
, bound
);
1650 update_stmt (cond_stmt
);
1652 /* Repair the ssa. */
1653 vec
<edge_var_map
> *v
= redirect_edge_var_map_vector (post_inc_edge
);
1657 for (gsi
= gsi_start_phis (header
), i
= 0;
1658 !gsi_end_p (gsi
) && v
->iterate (i
, &vm
);
1659 gsi_next (&gsi
), i
++)
1661 gphi
*phi
= gsi
.phi ();
1662 tree res_a
= PHI_RESULT (phi
);
1664 /* Create new phi. */
1665 tree res_c
= copy_ssa_name (res_a
, phi
);
1666 gphi
*nphi
= create_phi_node (res_c
, new_header
);
1668 /* Replace ivtmp_a with ivtmp_c in condition 'if (ivtmp_a < n)'. */
1669 replace_uses_in_bb_by (res_a
, res_c
, new_header
);
1671 /* Replace ivtmp/sum_b with ivtmp/sum_c in header phi. */
1672 add_phi_arg (phi
, res_c
, post_cond_edge
, UNKNOWN_LOCATION
);
1674 /* Replace sum_b with sum_c in exit phi. */
1675 tree res_b
= redirect_edge_var_map_def (vm
);
1676 replace_uses_in_bb_by (res_b
, res_c
, exit_block
);
1678 struct reduction_info
*red
= reduction_phi (reduction_list
, phi
);
1679 gcc_assert (virtual_operand_p (res_a
)
1685 /* Register the new reduction phi. */
1686 red
->reduc_phi
= nphi
;
1687 gimple_set_uid (red
->reduc_phi
, red
->reduc_version
);
1690 gcc_assert (gsi_end_p (gsi
) && !v
->iterate (i
, &vm
));
1692 /* Set the preheader argument of the new phis to ivtmp/sum_init. */
1693 flush_pending_stmts (entry
);
1695 /* Set the latch arguments of the new phis to ivtmp/sum_b. */
1696 flush_pending_stmts (post_inc_edge
);
1698 /* Create a new empty exit block, inbetween the new loop header and the old
1699 exit block. The function separate_decls_in_region needs this block to
1700 insert code that is active on loop exit, but not any other path. */
1701 basic_block new_exit_block
= split_edge (exit
);
1703 /* Insert and register the reduction exit phis. */
1704 for (gphi_iterator gsi
= gsi_start_phis (exit_block
);
1708 gphi
*phi
= gsi
.phi ();
1709 tree res_z
= PHI_RESULT (phi
);
1711 /* Now that we have a new exit block, duplicate the phi of the old exit
1712 block in the new exit block to preserve loop-closed ssa. */
1713 edge succ_new_exit_block
= single_succ_edge (new_exit_block
);
1714 edge pred_new_exit_block
= single_pred_edge (new_exit_block
);
1715 tree res_y
= copy_ssa_name (res_z
, phi
);
1716 gphi
*nphi
= create_phi_node (res_y
, new_exit_block
);
1717 tree res_c
= PHI_ARG_DEF_FROM_EDGE (phi
, succ_new_exit_block
);
1718 add_phi_arg (nphi
, res_c
, pred_new_exit_block
, UNKNOWN_LOCATION
);
1719 add_phi_arg (phi
, res_y
, succ_new_exit_block
, UNKNOWN_LOCATION
);
1721 if (virtual_operand_p (res_z
))
1724 gimple
*reduc_phi
= SSA_NAME_DEF_STMT (res_c
);
1725 struct reduction_info
*red
= reduction_phi (reduction_list
, reduc_phi
);
1727 red
->keep_res
= nphi
;
1730 /* We're going to cancel the loop at the end of gen_parallel_loop, but until
1731 then we're still using some fields, so only bother about fields that are
1732 still used: header and latch.
1733 The loop has a new header bb, so we update it. The latch bb stays the
1735 loop
->header
= new_header
;
1737 /* Recalculate dominance info. */
1738 free_dominance_info (CDI_DOMINATORS
);
1739 calculate_dominance_info (CDI_DOMINATORS
);
1741 checking_verify_ssa (true, true);
1744 /* Tries to moves the exit condition of LOOP to the beginning of its header
1745 without duplication of the loop body. NIT is the number of iterations of the
1746 loop. REDUCTION_LIST describes the reductions in LOOP. Return true if
1747 transformation is successful. */
1750 try_transform_to_exit_first_loop_alt (struct loop
*loop
,
1751 reduction_info_table_type
*reduction_list
,
1754 /* Check whether the latch contains a single statement. */
1755 if (!gimple_seq_nondebug_singleton_p (bb_seq (loop
->latch
)))
1758 /* Check whether the latch contains the loop iv increment. */
1759 edge back
= single_succ_edge (loop
->latch
);
1760 edge exit
= single_dom_exit (loop
);
1761 gcond
*cond_stmt
= as_a
<gcond
*> (last_stmt (exit
->src
));
1762 tree control
= gimple_cond_lhs (cond_stmt
);
1763 gphi
*phi
= as_a
<gphi
*> (SSA_NAME_DEF_STMT (control
));
1764 tree inc_res
= gimple_phi_arg_def (phi
, back
->dest_idx
);
1765 if (gimple_bb (SSA_NAME_DEF_STMT (inc_res
)) != loop
->latch
)
1768 /* Check whether there's no code between the loop condition and the latch. */
1769 if (!single_pred_p (loop
->latch
)
1770 || single_pred (loop
->latch
) != exit
->src
)
1773 tree alt_bound
= NULL_TREE
;
1774 tree nit_type
= TREE_TYPE (nit
);
1776 /* Figure out whether nit + 1 overflows. */
1777 if (TREE_CODE (nit
) == INTEGER_CST
)
1779 if (!tree_int_cst_equal (nit
, TYPE_MAXVAL (nit_type
)))
1781 alt_bound
= fold_build2_loc (UNKNOWN_LOCATION
, PLUS_EXPR
, nit_type
,
1782 nit
, build_one_cst (nit_type
));
1784 gcc_assert (TREE_CODE (alt_bound
) == INTEGER_CST
);
1785 transform_to_exit_first_loop_alt (loop
, reduction_list
, alt_bound
);
1790 /* Todo: Figure out if we can trigger this, if it's worth to handle
1791 optimally, and if we can handle it optimally. */
1796 gcc_assert (TREE_CODE (nit
) == SSA_NAME
);
1798 /* Variable nit is the loop bound as returned by canonicalize_loop_ivs, for an
1799 iv with base 0 and step 1 that is incremented in the latch, like this:
1802 # iv_1 = PHI <0 (preheader), iv_2 (latch)>
1813 The range of iv_1 is [0, nit]. The latch edge is taken for
1814 iv_1 == [0, nit - 1] and the exit edge is taken for iv_1 == nit. So the
1815 number of latch executions is equal to nit.
1817 The function max_loop_iterations gives us the maximum number of latch
1818 executions, so it gives us the maximum value of nit. */
1820 if (!max_loop_iterations (loop
, &nit_max
))
1823 /* Check if nit + 1 overflows. */
1824 widest_int type_max
= wi::to_widest (TYPE_MAXVAL (nit_type
));
1825 if (!wi::lts_p (nit_max
, type_max
))
1828 gimple
*def
= SSA_NAME_DEF_STMT (nit
);
1830 /* Try to find nit + 1, in the form of n in an assignment nit = n - 1. */
1832 && is_gimple_assign (def
)
1833 && gimple_assign_rhs_code (def
) == PLUS_EXPR
)
1835 tree op1
= gimple_assign_rhs1 (def
);
1836 tree op2
= gimple_assign_rhs2 (def
);
1837 if (integer_minus_onep (op1
))
1839 else if (integer_minus_onep (op2
))
1843 /* If not found, insert nit + 1. */
1844 if (alt_bound
== NULL_TREE
)
1846 alt_bound
= fold_build2 (PLUS_EXPR
, nit_type
, nit
,
1847 build_int_cst_type (nit_type
, 1));
1849 gimple_stmt_iterator gsi
= gsi_last_bb (loop_preheader_edge (loop
)->src
);
1852 = force_gimple_operand_gsi (&gsi
, alt_bound
, true, NULL_TREE
, false,
1853 GSI_CONTINUE_LINKING
);
1856 transform_to_exit_first_loop_alt (loop
, reduction_list
, alt_bound
);
1860 /* Moves the exit condition of LOOP to the beginning of its header. NIT is the
1861 number of iterations of the loop. REDUCTION_LIST describes the reductions in
1865 transform_to_exit_first_loop (struct loop
*loop
,
1866 reduction_info_table_type
*reduction_list
,
1869 basic_block
*bbs
, *nbbs
, ex_bb
, orig_header
;
1872 edge exit
= single_dom_exit (loop
), hpred
;
1873 tree control
, control_name
, res
, t
;
1876 gcond
*cond_stmt
, *cond_nit
;
1879 split_block_after_labels (loop
->header
);
1880 orig_header
= single_succ (loop
->header
);
1881 hpred
= single_succ_edge (loop
->header
);
1883 cond_stmt
= as_a
<gcond
*> (last_stmt (exit
->src
));
1884 control
= gimple_cond_lhs (cond_stmt
);
1885 gcc_assert (gimple_cond_rhs (cond_stmt
) == nit
);
1887 /* Make sure that we have phi nodes on exit for all loop header phis
1888 (create_parallel_loop requires that). */
1889 for (gphi_iterator gsi
= gsi_start_phis (loop
->header
);
1894 res
= PHI_RESULT (phi
);
1895 t
= copy_ssa_name (res
, phi
);
1896 SET_PHI_RESULT (phi
, t
);
1897 nphi
= create_phi_node (res
, orig_header
);
1898 add_phi_arg (nphi
, t
, hpred
, UNKNOWN_LOCATION
);
1902 gimple_cond_set_lhs (cond_stmt
, t
);
1903 update_stmt (cond_stmt
);
1908 bbs
= get_loop_body_in_dom_order (loop
);
1910 for (n
= 0; bbs
[n
] != exit
->src
; n
++)
1912 nbbs
= XNEWVEC (basic_block
, n
);
1913 ok
= gimple_duplicate_sese_tail (single_succ_edge (loop
->header
), exit
,
1920 /* Other than reductions, the only gimple reg that should be copied
1921 out of the loop is the control variable. */
1922 exit
= single_dom_exit (loop
);
1923 control_name
= NULL_TREE
;
1924 for (gphi_iterator gsi
= gsi_start_phis (ex_bb
);
1928 res
= PHI_RESULT (phi
);
1929 if (virtual_operand_p (res
))
1935 /* Check if it is a part of reduction. If it is,
1936 keep the phi at the reduction's keep_res field. The
1937 PHI_RESULT of this phi is the resulting value of the reduction
1938 variable when exiting the loop. */
1940 if (reduction_list
->elements () > 0)
1942 struct reduction_info
*red
;
1944 tree val
= PHI_ARG_DEF_FROM_EDGE (phi
, exit
);
1945 red
= reduction_phi (reduction_list
, SSA_NAME_DEF_STMT (val
));
1948 red
->keep_res
= phi
;
1953 gcc_assert (control_name
== NULL_TREE
1954 && SSA_NAME_VAR (res
) == SSA_NAME_VAR (control
));
1956 remove_phi_node (&gsi
, false);
1958 gcc_assert (control_name
!= NULL_TREE
);
1960 /* Initialize the control variable to number of iterations
1961 according to the rhs of the exit condition. */
1962 gimple_stmt_iterator gsi
= gsi_after_labels (ex_bb
);
1963 cond_nit
= as_a
<gcond
*> (last_stmt (exit
->src
));
1964 nit_1
= gimple_cond_rhs (cond_nit
);
1965 nit_1
= force_gimple_operand_gsi (&gsi
,
1966 fold_convert (TREE_TYPE (control_name
), nit_1
),
1967 false, NULL_TREE
, false, GSI_SAME_STMT
);
1968 stmt
= gimple_build_assign (control_name
, nit_1
);
1969 gsi_insert_before (&gsi
, stmt
, GSI_NEW_STMT
);
1972 /* Create the parallel constructs for LOOP as described in gen_parallel_loop.
1973 LOOP_FN and DATA are the arguments of GIMPLE_OMP_PARALLEL.
1974 NEW_DATA is the variable that should be initialized from the argument
1975 of LOOP_FN. N_THREADS is the requested number of threads. Returns the
1976 basic block containing GIMPLE_OMP_PARALLEL tree. */
1979 create_parallel_loop (struct loop
*loop
, tree loop_fn
, tree data
,
1980 tree new_data
, unsigned n_threads
, location_t loc
)
1982 gimple_stmt_iterator gsi
;
1983 basic_block bb
, paral_bb
, for_bb
, ex_bb
, continue_bb
;
1985 gomp_parallel
*omp_par_stmt
;
1986 gimple
*omp_return_stmt1
, *omp_return_stmt2
;
1990 gomp_continue
*omp_cont_stmt
;
1991 tree cvar
, cvar_init
, initvar
, cvar_next
, cvar_base
, type
;
1992 edge exit
, nexit
, guard
, end
, e
;
1994 /* Prepare the GIMPLE_OMP_PARALLEL statement. */
1995 bb
= loop_preheader_edge (loop
)->src
;
1996 paral_bb
= single_pred (bb
);
1997 gsi
= gsi_last_bb (paral_bb
);
1999 t
= build_omp_clause (loc
, OMP_CLAUSE_NUM_THREADS
);
2000 OMP_CLAUSE_NUM_THREADS_EXPR (t
)
2001 = build_int_cst (integer_type_node
, n_threads
);
2002 omp_par_stmt
= gimple_build_omp_parallel (NULL
, t
, loop_fn
, data
);
2003 gimple_set_location (omp_par_stmt
, loc
);
2005 gsi_insert_after (&gsi
, omp_par_stmt
, GSI_NEW_STMT
);
2007 /* Initialize NEW_DATA. */
2010 gassign
*assign_stmt
;
2012 gsi
= gsi_after_labels (bb
);
2014 param
= make_ssa_name (DECL_ARGUMENTS (loop_fn
));
2015 assign_stmt
= gimple_build_assign (param
, build_fold_addr_expr (data
));
2016 gsi_insert_before (&gsi
, assign_stmt
, GSI_SAME_STMT
);
2018 assign_stmt
= gimple_build_assign (new_data
,
2019 fold_convert (TREE_TYPE (new_data
), param
));
2020 gsi_insert_before (&gsi
, assign_stmt
, GSI_SAME_STMT
);
2023 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_PARALLEL. */
2024 bb
= split_loop_exit_edge (single_dom_exit (loop
));
2025 gsi
= gsi_last_bb (bb
);
2026 omp_return_stmt1
= gimple_build_omp_return (false);
2027 gimple_set_location (omp_return_stmt1
, loc
);
2028 gsi_insert_after (&gsi
, omp_return_stmt1
, GSI_NEW_STMT
);
2030 /* Extract data for GIMPLE_OMP_FOR. */
2031 gcc_assert (loop
->header
== single_dom_exit (loop
)->src
);
2032 cond_stmt
= as_a
<gcond
*> (last_stmt (loop
->header
));
2034 cvar
= gimple_cond_lhs (cond_stmt
);
2035 cvar_base
= SSA_NAME_VAR (cvar
);
2036 phi
= SSA_NAME_DEF_STMT (cvar
);
2037 cvar_init
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_preheader_edge (loop
));
2038 initvar
= copy_ssa_name (cvar
);
2039 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi
, loop_preheader_edge (loop
)),
2041 cvar_next
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (loop
));
2043 gsi
= gsi_last_nondebug_bb (loop
->latch
);
2044 gcc_assert (gsi_stmt (gsi
) == SSA_NAME_DEF_STMT (cvar_next
));
2045 gsi_remove (&gsi
, true);
2048 for_bb
= split_edge (loop_preheader_edge (loop
));
2049 ex_bb
= split_loop_exit_edge (single_dom_exit (loop
));
2050 extract_true_false_edges_from_block (loop
->header
, &nexit
, &exit
);
2051 gcc_assert (exit
== single_dom_exit (loop
));
2053 guard
= make_edge (for_bb
, ex_bb
, 0);
2054 /* Split the latch edge, so LOOPS_HAVE_SIMPLE_LATCHES is still valid. */
2055 loop
->latch
= split_edge (single_succ_edge (loop
->latch
));
2056 single_pred_edge (loop
->latch
)->flags
= 0;
2057 end
= make_edge (single_pred (loop
->latch
), ex_bb
, EDGE_FALLTHRU
);
2058 rescan_loop_exit (end
, true, false);
2060 for (gphi_iterator gpi
= gsi_start_phis (ex_bb
);
2061 !gsi_end_p (gpi
); gsi_next (&gpi
))
2063 source_location locus
;
2064 gphi
*phi
= gpi
.phi ();
2065 tree def
= PHI_ARG_DEF_FROM_EDGE (phi
, exit
);
2066 gimple
*def_stmt
= SSA_NAME_DEF_STMT (def
);
2068 /* If the exit phi is not connected to a header phi in the same loop, this
2069 value is not modified in the loop, and we're done with this phi. */
2070 if (!(gimple_code (def_stmt
) == GIMPLE_PHI
2071 && gimple_bb (def_stmt
) == loop
->header
))
2074 gphi
*stmt
= as_a
<gphi
*> (def_stmt
);
2075 def
= PHI_ARG_DEF_FROM_EDGE (stmt
, loop_preheader_edge (loop
));
2076 locus
= gimple_phi_arg_location_from_edge (stmt
,
2077 loop_preheader_edge (loop
));
2078 add_phi_arg (phi
, def
, guard
, locus
);
2080 def
= PHI_ARG_DEF_FROM_EDGE (stmt
, loop_latch_edge (loop
));
2081 locus
= gimple_phi_arg_location_from_edge (stmt
, loop_latch_edge (loop
));
2082 add_phi_arg (phi
, def
, end
, locus
);
2084 e
= redirect_edge_and_branch (exit
, nexit
->dest
);
2085 PENDING_STMT (e
) = NULL
;
2087 /* Emit GIMPLE_OMP_FOR. */
2088 gimple_cond_set_lhs (cond_stmt
, cvar_base
);
2089 type
= TREE_TYPE (cvar
);
2090 t
= build_omp_clause (loc
, OMP_CLAUSE_SCHEDULE
);
2091 int chunk_size
= PARAM_VALUE (PARAM_PARLOOPS_CHUNK_SIZE
);
2092 enum PARAM_PARLOOPS_SCHEDULE_KIND schedule_type \
2093 = (enum PARAM_PARLOOPS_SCHEDULE_KIND
) PARAM_VALUE (PARAM_PARLOOPS_SCHEDULE
);
2094 switch (schedule_type
)
2096 case PARAM_PARLOOPS_SCHEDULE_KIND_static
:
2097 OMP_CLAUSE_SCHEDULE_KIND (t
) = OMP_CLAUSE_SCHEDULE_STATIC
;
2099 case PARAM_PARLOOPS_SCHEDULE_KIND_dynamic
:
2100 OMP_CLAUSE_SCHEDULE_KIND (t
) = OMP_CLAUSE_SCHEDULE_DYNAMIC
;
2102 case PARAM_PARLOOPS_SCHEDULE_KIND_guided
:
2103 OMP_CLAUSE_SCHEDULE_KIND (t
) = OMP_CLAUSE_SCHEDULE_GUIDED
;
2105 case PARAM_PARLOOPS_SCHEDULE_KIND_auto
:
2106 OMP_CLAUSE_SCHEDULE_KIND (t
) = OMP_CLAUSE_SCHEDULE_AUTO
;
2109 case PARAM_PARLOOPS_SCHEDULE_KIND_runtime
:
2110 OMP_CLAUSE_SCHEDULE_KIND (t
) = OMP_CLAUSE_SCHEDULE_RUNTIME
;
2116 if (chunk_size
!= 0)
2117 OMP_CLAUSE_SCHEDULE_CHUNK_EXPR (t
)
2118 = build_int_cst (integer_type_node
, chunk_size
);
2120 for_stmt
= gimple_build_omp_for (NULL
, GF_OMP_FOR_KIND_FOR
, t
, 1, NULL
);
2121 gimple_set_location (for_stmt
, loc
);
2122 gimple_omp_for_set_index (for_stmt
, 0, initvar
);
2123 gimple_omp_for_set_initial (for_stmt
, 0, cvar_init
);
2124 gimple_omp_for_set_final (for_stmt
, 0, gimple_cond_rhs (cond_stmt
));
2125 gimple_omp_for_set_cond (for_stmt
, 0, gimple_cond_code (cond_stmt
));
2126 gimple_omp_for_set_incr (for_stmt
, 0, build2 (PLUS_EXPR
, type
,
2128 build_int_cst (type
, 1)));
2130 gsi
= gsi_last_bb (for_bb
);
2131 gsi_insert_after (&gsi
, for_stmt
, GSI_NEW_STMT
);
2132 SSA_NAME_DEF_STMT (initvar
) = for_stmt
;
2134 /* Emit GIMPLE_OMP_CONTINUE. */
2135 continue_bb
= single_pred (loop
->latch
);
2136 gsi
= gsi_last_bb (continue_bb
);
2137 omp_cont_stmt
= gimple_build_omp_continue (cvar_next
, cvar
);
2138 gimple_set_location (omp_cont_stmt
, loc
);
2139 gsi_insert_after (&gsi
, omp_cont_stmt
, GSI_NEW_STMT
);
2140 SSA_NAME_DEF_STMT (cvar_next
) = omp_cont_stmt
;
2142 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_FOR. */
2143 gsi
= gsi_last_bb (ex_bb
);
2144 omp_return_stmt2
= gimple_build_omp_return (true);
2145 gimple_set_location (omp_return_stmt2
, loc
);
2146 gsi_insert_after (&gsi
, omp_return_stmt2
, GSI_NEW_STMT
);
2148 /* After the above dom info is hosed. Re-compute it. */
2149 free_dominance_info (CDI_DOMINATORS
);
2150 calculate_dominance_info (CDI_DOMINATORS
);
2155 /* Generates code to execute the iterations of LOOP in N_THREADS
2156 threads in parallel.
2158 NITER describes number of iterations of LOOP.
2159 REDUCTION_LIST describes the reductions existent in the LOOP. */
2162 gen_parallel_loop (struct loop
*loop
,
2163 reduction_info_table_type
*reduction_list
,
2164 unsigned n_threads
, struct tree_niter_desc
*niter
)
2166 tree many_iterations_cond
, type
, nit
;
2167 tree arg_struct
, new_arg_struct
;
2170 struct clsn_data clsn_data
;
2174 unsigned int m_p_thread
=2;
2178 ---------------------------------------------------------------------
2181 IV = phi (INIT, IV + STEP)
2187 ---------------------------------------------------------------------
2189 with # of iterations NITER (possibly with MAY_BE_ZERO assumption),
2190 we generate the following code:
2192 ---------------------------------------------------------------------
2195 || NITER < MIN_PER_THREAD * N_THREADS)
2199 store all local loop-invariant variables used in body of the loop to DATA.
2200 GIMPLE_OMP_PARALLEL (OMP_CLAUSE_NUM_THREADS (N_THREADS), LOOPFN, DATA);
2201 load the variables from DATA.
2202 GIMPLE_OMP_FOR (IV = INIT; COND; IV += STEP) (OMP_CLAUSE_SCHEDULE (static))
2205 GIMPLE_OMP_CONTINUE;
2206 GIMPLE_OMP_RETURN -- GIMPLE_OMP_FOR
2207 GIMPLE_OMP_RETURN -- GIMPLE_OMP_PARALLEL
2213 IV = phi (INIT, IV + STEP)
2224 /* Create two versions of the loop -- in the old one, we know that the
2225 number of iterations is large enough, and we will transform it into the
2226 loop that will be split to loop_fn, the new one will be used for the
2227 remaining iterations. */
2229 /* We should compute a better number-of-iterations value for outer loops.
2232 for (i = 0; i < n; ++i)
2233 for (j = 0; j < m; ++j)
2236 we should compute nit = n * m, not nit = n.
2237 Also may_be_zero handling would need to be adjusted. */
2239 type
= TREE_TYPE (niter
->niter
);
2240 nit
= force_gimple_operand (unshare_expr (niter
->niter
), &stmts
, true,
2243 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
2248 m_p_thread
=MIN_PER_THREAD
;
2250 many_iterations_cond
=
2251 fold_build2 (GE_EXPR
, boolean_type_node
,
2252 nit
, build_int_cst (type
, m_p_thread
* n_threads
));
2254 many_iterations_cond
2255 = fold_build2 (TRUTH_AND_EXPR
, boolean_type_node
,
2256 invert_truthvalue (unshare_expr (niter
->may_be_zero
)),
2257 many_iterations_cond
);
2258 many_iterations_cond
2259 = force_gimple_operand (many_iterations_cond
, &stmts
, false, NULL_TREE
);
2261 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
2262 if (!is_gimple_condexpr (many_iterations_cond
))
2264 many_iterations_cond
2265 = force_gimple_operand (many_iterations_cond
, &stmts
,
2268 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
2271 initialize_original_copy_tables ();
2273 /* We assume that the loop usually iterates a lot. */
2274 prob
= 4 * REG_BR_PROB_BASE
/ 5;
2275 loop_version (loop
, many_iterations_cond
, NULL
,
2276 prob
, prob
, REG_BR_PROB_BASE
- prob
, true);
2277 update_ssa (TODO_update_ssa
);
2278 free_original_copy_tables ();
2280 /* Base all the induction variables in LOOP on a single control one. */
2281 canonicalize_loop_ivs (loop
, &nit
, true);
2283 /* Ensure that the exit condition is the first statement in the loop.
2284 The common case is that latch of the loop is empty (apart from the
2285 increment) and immediately follows the loop exit test. Attempt to move the
2286 entry of the loop directly before the exit check and increase the number of
2287 iterations of the loop by one. */
2288 if (try_transform_to_exit_first_loop_alt (loop
, reduction_list
, nit
))
2291 && (dump_flags
& TDF_DETAILS
))
2293 "alternative exit-first loop transform succeeded"
2294 " for loop %d\n", loop
->num
);
2298 /* Fall back on the method that handles more cases, but duplicates the
2299 loop body: move the exit condition of LOOP to the beginning of its
2300 header, and duplicate the part of the last iteration that gets disabled
2301 to the exit of the loop. */
2302 transform_to_exit_first_loop (loop
, reduction_list
, nit
);
2305 /* Generate initializations for reductions. */
2306 if (reduction_list
->elements () > 0)
2307 reduction_list
->traverse
<struct loop
*, initialize_reductions
> (loop
);
2309 /* Eliminate the references to local variables from the loop. */
2310 gcc_assert (single_exit (loop
));
2311 entry
= loop_preheader_edge (loop
);
2312 exit
= single_dom_exit (loop
);
2314 eliminate_local_variables (entry
, exit
);
2315 /* In the old loop, move all variables non-local to the loop to a structure
2316 and back, and create separate decls for the variables used in loop. */
2317 separate_decls_in_region (entry
, exit
, reduction_list
, &arg_struct
,
2318 &new_arg_struct
, &clsn_data
);
2320 /* Create the parallel constructs. */
2321 loc
= UNKNOWN_LOCATION
;
2322 cond_stmt
= last_stmt (loop
->header
);
2324 loc
= gimple_location (cond_stmt
);
2325 create_parallel_loop (loop
, create_loop_fn (loc
), arg_struct
,
2326 new_arg_struct
, n_threads
, loc
);
2327 if (reduction_list
->elements () > 0)
2328 create_call_for_reduction (loop
, reduction_list
, &clsn_data
);
2332 /* Free loop bound estimations that could contain references to
2333 removed statements. */
2334 FOR_EACH_LOOP (loop
, 0)
2335 free_numbers_of_iterations_estimates_loop (loop
);
2338 /* Returns true when LOOP contains vector phi nodes. */
2341 loop_has_vector_phi_nodes (struct loop
*loop ATTRIBUTE_UNUSED
)
2344 basic_block
*bbs
= get_loop_body_in_dom_order (loop
);
2348 for (i
= 0; i
< loop
->num_nodes
; i
++)
2349 for (gsi
= gsi_start_phis (bbs
[i
]); !gsi_end_p (gsi
); gsi_next (&gsi
))
2350 if (TREE_CODE (TREE_TYPE (PHI_RESULT (gsi
.phi ()))) == VECTOR_TYPE
)
2359 /* Create a reduction_info struct, initialize it with REDUC_STMT
2360 and PHI, insert it to the REDUCTION_LIST. */
2363 build_new_reduction (reduction_info_table_type
*reduction_list
,
2364 gimple
*reduc_stmt
, gphi
*phi
)
2366 reduction_info
**slot
;
2367 struct reduction_info
*new_reduction
;
2368 enum tree_code reduction_code
;
2370 gcc_assert (reduc_stmt
);
2372 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2375 "Detected reduction. reduction stmt is: \n");
2376 print_gimple_stmt (dump_file
, reduc_stmt
, 0, 0);
2377 fprintf (dump_file
, "\n");
2380 if (gimple_code (reduc_stmt
) == GIMPLE_PHI
)
2382 tree op1
= PHI_ARG_DEF (reduc_stmt
, 0);
2383 gimple
*def1
= SSA_NAME_DEF_STMT (op1
);
2384 reduction_code
= gimple_assign_rhs_code (def1
);
2388 reduction_code
= gimple_assign_rhs_code (reduc_stmt
);
2390 new_reduction
= XCNEW (struct reduction_info
);
2392 new_reduction
->reduc_stmt
= reduc_stmt
;
2393 new_reduction
->reduc_phi
= phi
;
2394 new_reduction
->reduc_version
= SSA_NAME_VERSION (gimple_phi_result (phi
));
2395 new_reduction
->reduction_code
= reduction_code
;
2396 slot
= reduction_list
->find_slot (new_reduction
, INSERT
);
2397 *slot
= new_reduction
;
2400 /* Callback for htab_traverse. Sets gimple_uid of reduc_phi stmts. */
2403 set_reduc_phi_uids (reduction_info
**slot
, void *data ATTRIBUTE_UNUSED
)
2405 struct reduction_info
*const red
= *slot
;
2406 gimple_set_uid (red
->reduc_phi
, red
->reduc_version
);
2410 /* Detect all reductions in the LOOP, insert them into REDUCTION_LIST. */
2413 gather_scalar_reductions (loop_p loop
, reduction_info_table_type
*reduction_list
)
2416 loop_vec_info simple_loop_info
;
2417 loop_vec_info simple_inner_loop_info
= NULL
;
2418 bool allow_double_reduc
= true;
2420 if (!stmt_vec_info_vec
.exists ())
2421 init_stmt_vec_info_vec ();
2423 simple_loop_info
= vect_analyze_loop_form (loop
);
2424 if (simple_loop_info
== NULL
)
2427 for (gsi
= gsi_start_phis (loop
->header
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2429 gphi
*phi
= gsi
.phi ();
2431 tree res
= PHI_RESULT (phi
);
2434 if (virtual_operand_p (res
))
2437 if (simple_iv (loop
, loop
, res
, &iv
, true))
2441 = vect_force_simple_reduction (simple_loop_info
, phi
, true,
2442 &double_reduc
, true);
2448 if (!allow_double_reduc
2449 || loop
->inner
->inner
!= NULL
)
2452 if (!simple_inner_loop_info
)
2454 simple_inner_loop_info
= vect_analyze_loop_form (loop
->inner
);
2455 if (!simple_inner_loop_info
)
2457 allow_double_reduc
= false;
2462 use_operand_p use_p
;
2464 bool single_use_p
= single_imm_use (res
, &use_p
, &inner_stmt
);
2465 gcc_assert (single_use_p
);
2466 gphi
*inner_phi
= as_a
<gphi
*> (inner_stmt
);
2467 if (simple_iv (loop
->inner
, loop
->inner
, PHI_RESULT (inner_phi
),
2471 gimple
*inner_reduc_stmt
2472 = vect_force_simple_reduction (simple_inner_loop_info
, inner_phi
,
2473 true, &double_reduc
, true);
2474 gcc_assert (!double_reduc
);
2475 if (inner_reduc_stmt
== NULL
)
2479 build_new_reduction (reduction_list
, reduc_stmt
, phi
);
2481 destroy_loop_vec_info (simple_loop_info
, true);
2482 destroy_loop_vec_info (simple_inner_loop_info
, true);
2484 /* Release the claim on gimple_uid. */
2485 free_stmt_vec_info_vec ();
2487 /* As gimple_uid is used by the vectorizer in between vect_analyze_loop_form
2488 and free_stmt_vec_info_vec, we can set gimple_uid of reduc_phi stmts only
2491 FOR_EACH_BB_FN (bb
, cfun
)
2492 for (gsi
= gsi_start_phis (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2493 gimple_set_uid (gsi_stmt (gsi
), (unsigned int)-1);
2494 reduction_list
->traverse
<void *, set_reduc_phi_uids
> (NULL
);
2497 /* Try to initialize NITER for code generation part. */
2500 try_get_loop_niter (loop_p loop
, struct tree_niter_desc
*niter
)
2502 edge exit
= single_dom_exit (loop
);
2506 /* We need to know # of iterations, and there should be no uses of values
2507 defined inside loop outside of it, unless the values are invariants of
2509 if (!number_of_iterations_exit (loop
, exit
, niter
, false))
2511 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2512 fprintf (dump_file
, " FAILED: number of iterations not known\n");
2519 /* Try to initialize REDUCTION_LIST for code generation part.
2520 REDUCTION_LIST describes the reductions. */
2523 try_create_reduction_list (loop_p loop
,
2524 reduction_info_table_type
*reduction_list
)
2526 edge exit
= single_dom_exit (loop
);
2531 gather_scalar_reductions (loop
, reduction_list
);
2534 for (gsi
= gsi_start_phis (exit
->dest
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2536 gphi
*phi
= gsi
.phi ();
2537 struct reduction_info
*red
;
2538 imm_use_iterator imm_iter
;
2539 use_operand_p use_p
;
2541 tree val
= PHI_ARG_DEF_FROM_EDGE (phi
, exit
);
2543 if (!virtual_operand_p (val
))
2545 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2547 fprintf (dump_file
, "phi is ");
2548 print_gimple_stmt (dump_file
, phi
, 0, 0);
2549 fprintf (dump_file
, "arg of phi to exit: value ");
2550 print_generic_expr (dump_file
, val
, 0);
2551 fprintf (dump_file
, " used outside loop\n");
2553 " checking if it a part of reduction pattern: \n");
2555 if (reduction_list
->elements () == 0)
2557 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2559 " FAILED: it is not a part of reduction.\n");
2563 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, val
)
2565 if (!gimple_debug_bind_p (USE_STMT (use_p
))
2566 && flow_bb_inside_loop_p (loop
, gimple_bb (USE_STMT (use_p
))))
2568 reduc_phi
= USE_STMT (use_p
);
2572 red
= reduction_phi (reduction_list
, reduc_phi
);
2575 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2577 " FAILED: it is not a part of reduction.\n");
2580 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2582 fprintf (dump_file
, "reduction phi is ");
2583 print_gimple_stmt (dump_file
, red
->reduc_phi
, 0, 0);
2584 fprintf (dump_file
, "reduction stmt is ");
2585 print_gimple_stmt (dump_file
, red
->reduc_stmt
, 0, 0);
2590 /* The iterations of the loop may communicate only through bivs whose
2591 iteration space can be distributed efficiently. */
2592 for (gsi
= gsi_start_phis (loop
->header
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2594 gphi
*phi
= gsi
.phi ();
2595 tree def
= PHI_RESULT (phi
);
2598 if (!virtual_operand_p (def
) && !simple_iv (loop
, loop
, def
, &iv
, true))
2600 struct reduction_info
*red
;
2602 red
= reduction_phi (reduction_list
, phi
);
2605 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2607 " FAILED: scalar dependency between iterations\n");
2617 /* Detect parallel loops and generate parallel code using libgomp
2618 primitives. Returns true if some loop was parallelized, false
2622 parallelize_loops (void)
2624 unsigned n_threads
= flag_tree_parallelize_loops
;
2625 bool changed
= false;
2627 struct loop
*skip_loop
= NULL
;
2628 struct tree_niter_desc niter_desc
;
2629 struct obstack parloop_obstack
;
2630 HOST_WIDE_INT estimated
;
2631 source_location loop_loc
;
2633 /* Do not parallelize loops in the functions created by parallelization. */
2634 if (parallelized_function_p (cfun
->decl
))
2636 if (cfun
->has_nonlocal_label
)
2639 gcc_obstack_init (&parloop_obstack
);
2640 reduction_info_table_type
reduction_list (10);
2642 FOR_EACH_LOOP (loop
, 0)
2644 if (loop
== skip_loop
)
2646 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2648 "Skipping loop %d as inner loop of parallelized loop\n",
2651 skip_loop
= loop
->inner
;
2657 reduction_list
.empty ();
2658 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2660 fprintf (dump_file
, "Trying loop %d as candidate\n",loop
->num
);
2662 fprintf (dump_file
, "loop %d is not innermost\n",loop
->num
);
2664 fprintf (dump_file
, "loop %d is innermost\n",loop
->num
);
2667 /* If we use autopar in graphite pass, we use its marked dependency
2668 checking results. */
2669 if (flag_loop_parallelize_all
&& !loop
->can_be_parallel
)
2671 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2672 fprintf (dump_file
, "loop is not parallel according to graphite\n");
2676 if (!single_dom_exit (loop
))
2679 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2680 fprintf (dump_file
, "loop is !single_dom_exit\n");
2685 if (/* And of course, the loop must be parallelizable. */
2686 !can_duplicate_loop_p (loop
)
2687 || loop_has_blocks_with_irreducible_flag (loop
)
2688 || (loop_preheader_edge (loop
)->src
->flags
& BB_IRREDUCIBLE_LOOP
)
2689 /* FIXME: the check for vector phi nodes could be removed. */
2690 || loop_has_vector_phi_nodes (loop
))
2693 estimated
= estimated_stmt_executions_int (loop
);
2694 if (estimated
== -1)
2695 estimated
= max_stmt_executions_int (loop
);
2696 /* FIXME: Bypass this check as graphite doesn't update the
2697 count and frequency correctly now. */
2698 if (!flag_loop_parallelize_all
2699 && ((estimated
!= -1
2700 && estimated
<= (HOST_WIDE_INT
) n_threads
* MIN_PER_THREAD
)
2701 /* Do not bother with loops in cold areas. */
2702 || optimize_loop_nest_for_size_p (loop
)))
2705 if (!try_get_loop_niter (loop
, &niter_desc
))
2708 if (!try_create_reduction_list (loop
, &reduction_list
))
2711 if (!flag_loop_parallelize_all
2712 && !loop_parallel_p (loop
, &parloop_obstack
))
2716 skip_loop
= loop
->inner
;
2717 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2720 fprintf (dump_file
, "parallelizing outer loop %d\n",loop
->header
->index
);
2722 fprintf (dump_file
, "parallelizing inner loop %d\n",loop
->header
->index
);
2723 loop_loc
= find_loop_location (loop
);
2724 if (loop_loc
!= UNKNOWN_LOCATION
)
2725 fprintf (dump_file
, "\nloop at %s:%d: ",
2726 LOCATION_FILE (loop_loc
), LOCATION_LINE (loop_loc
));
2728 gen_parallel_loop (loop
, &reduction_list
,
2729 n_threads
, &niter_desc
);
2732 obstack_free (&parloop_obstack
, NULL
);
2734 /* Parallelization will cause new function calls to be inserted through
2735 which local variables will escape. Reset the points-to solution
2738 pt_solution_reset (&cfun
->gimple_df
->escaped
);
2743 /* Parallelization. */
2747 const pass_data pass_data_parallelize_loops
=
2749 GIMPLE_PASS
, /* type */
2750 "parloops", /* name */
2751 OPTGROUP_LOOP
, /* optinfo_flags */
2752 TV_TREE_PARALLELIZE_LOOPS
, /* tv_id */
2753 ( PROP_cfg
| PROP_ssa
), /* properties_required */
2754 0, /* properties_provided */
2755 0, /* properties_destroyed */
2756 0, /* todo_flags_start */
2757 0, /* todo_flags_finish */
2760 class pass_parallelize_loops
: public gimple_opt_pass
2763 pass_parallelize_loops (gcc::context
*ctxt
)
2764 : gimple_opt_pass (pass_data_parallelize_loops
, ctxt
)
2767 /* opt_pass methods: */
2768 virtual bool gate (function
*) { return flag_tree_parallelize_loops
> 1; }
2769 virtual unsigned int execute (function
*);
2771 }; // class pass_parallelize_loops
2774 pass_parallelize_loops::execute (function
*fun
)
2776 if (number_of_loops (fun
) <= 1)
2779 if (parallelize_loops ())
2781 fun
->curr_properties
&= ~(PROP_gimple_eomp
);
2783 checking_verify_loop_structure ();
2785 return TODO_update_ssa
;
2794 make_pass_parallelize_loops (gcc::context
*ctxt
)
2796 return new pass_parallelize_loops (ctxt
);