1 /* Loop autoparallelization.
2 Copyright (C) 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
3 Contributed by Sebastian Pop <pop@cri.ensmp.fr> and
4 Zdenek Dvorak <dvorakz@suse.cz>.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
28 #include "tree-flow.h"
31 #include "diagnostic.h"
32 #include "tree-pass.h"
33 #include "tree-chrec.h"
34 #include "tree-scalar-evolution.h"
35 #include "tree-data-ref.h"
37 #include "langhooks.h"
38 #include "tree-vectorizer.h"
40 /* This pass tries to distribute iterations of loops into several threads.
41 The implementation is straightforward -- for each loop we test whether its
42 iterations are independent, and if it is the case (and some additional
43 conditions regarding profitability and correctness are satisfied), we
44 add GIMPLE_OMP_PARALLEL and GIMPLE_OMP_FOR codes and let omp expansion
47 The most of the complexity is in bringing the code into shape expected
49 -- for GIMPLE_OMP_FOR, ensuring that the loop has only one induction
50 variable and that the exit test is at the start of the loop body
51 -- for GIMPLE_OMP_PARALLEL, replacing the references to local addressable
52 variables by accesses through pointers, and breaking up ssa chains
53 by storing the values incoming to the parallelized loop to a structure
54 passed to the new function as an argument (something similar is done
55 in omp gimplification, unfortunately only a small part of the code
59 -- if there are several parallelizable loops in a function, it may be
60 possible to generate the threads just once (using synchronization to
61 ensure that cross-loop dependences are obeyed).
62 -- handling of common scalar dependence patterns (accumulation, ...)
63 -- handling of non-innermost loops */
67 currently we use vect_is_simple_reduction() to detect reduction patterns.
68 The code transformation will be introduced by an example.
75 for (i = 0; i < N; i++)
85 # sum_29 = PHI <sum_11(5), 1(3)>
86 # i_28 = PHI <i_12(5), 0(3)>
89 sum_11 = D.1795_8 + sum_29;
97 # sum_21 = PHI <sum_11(4)>
98 printf (&"%d"[0], sum_21);
101 after reduction transformation (only relevant parts):
109 # Storing the initial value given by the user. #
111 .paral_data_store.32.sum.27 = 1;
113 #pragma omp parallel num_threads(4)
115 #pragma omp for schedule(static)
117 # The neutral element corresponding to the particular
118 reduction's operation, e.g. 0 for PLUS_EXPR,
119 1 for MULT_EXPR, etc. replaces the user's initial value. #
121 # sum.27_29 = PHI <sum.27_11, 0>
123 sum.27_11 = D.1827_8 + sum.27_29;
127 # Adding this reduction phi is done at create_phi_for_local_result() #
128 # sum.27_56 = PHI <sum.27_11, 0>
131 # Creating the atomic operation is done at
132 create_call_for_reduction_1() #
134 #pragma omp atomic_load
135 D.1839_59 = *&.paral_data_load.33_51->reduction.23;
136 D.1840_60 = sum.27_56 + D.1839_59;
137 #pragma omp atomic_store (D.1840_60);
141 # collecting the result after the join of the threads is done at
142 create_loads_for_reductions().
143 The value computed by the threads is loaded from the
147 .paral_data_load.33_52 = &.paral_data_store.32;
148 sum_37 = .paral_data_load.33_52->sum.27;
149 sum_43 = D.1795_41 + sum_37;
152 # sum_21 = PHI <sum_43, sum_26>
153 printf (&"%d"[0], sum_21);
161 /* Minimal number of iterations of a loop that should be executed in each
163 #define MIN_PER_THREAD 100
165 /* Element of the hashtable, representing a
166 reduction in the current loop. */
167 struct reduction_info
169 gimple reduc_stmt
; /* reduction statement. */
170 gimple reduc_phi
; /* The phi node defining the reduction. */
171 enum tree_code reduction_code
;/* code for the reduction operation. */
172 gimple keep_res
; /* The PHI_RESULT of this phi is the resulting value
173 of the reduction variable when existing the loop. */
174 tree initial_value
; /* The initial value of the reduction var before entering the loop. */
175 tree field
; /* the name of the field in the parloop data structure intended for reduction. */
176 tree init
; /* reduction initialization value. */
177 gimple new_phi
; /* (helper field) Newly created phi node whose result
178 will be passed to the atomic operation. Represents
179 the local result each thread computed for the reduction
183 /* Equality and hash functions for hashtab code. */
186 reduction_info_eq (const void *aa
, const void *bb
)
188 const struct reduction_info
*a
= (const struct reduction_info
*) aa
;
189 const struct reduction_info
*b
= (const struct reduction_info
*) bb
;
191 return (a
->reduc_phi
== b
->reduc_phi
);
195 reduction_info_hash (const void *aa
)
197 const struct reduction_info
*a
= (const struct reduction_info
*) aa
;
199 return htab_hash_pointer (a
->reduc_phi
);
202 static struct reduction_info
*
203 reduction_phi (htab_t reduction_list
, gimple phi
)
205 struct reduction_info tmpred
, *red
;
207 if (htab_elements (reduction_list
) == 0)
210 tmpred
.reduc_phi
= phi
;
211 red
= (struct reduction_info
*) htab_find (reduction_list
, &tmpred
);
216 /* Element of hashtable of names to copy. */
218 struct name_to_copy_elt
220 unsigned version
; /* The version of the name to copy. */
221 tree new_name
; /* The new name used in the copy. */
222 tree field
; /* The field of the structure used to pass the
226 /* Equality and hash functions for hashtab code. */
229 name_to_copy_elt_eq (const void *aa
, const void *bb
)
231 const struct name_to_copy_elt
*a
= (const struct name_to_copy_elt
*) aa
;
232 const struct name_to_copy_elt
*b
= (const struct name_to_copy_elt
*) bb
;
234 return a
->version
== b
->version
;
238 name_to_copy_elt_hash (const void *aa
)
240 const struct name_to_copy_elt
*a
= (const struct name_to_copy_elt
*) aa
;
242 return (hashval_t
) a
->version
;
246 /* Data dependency analysis. Returns true if the iterations of LOOP
247 are independent on each other (that is, if we can execute them
251 loop_parallel_p (struct loop
*loop
)
253 VEC (ddr_p
, heap
) * dependence_relations
;
254 VEC (data_reference_p
, heap
) *datarefs
;
255 lambda_trans_matrix trans
;
258 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
259 fprintf (dump_file
, "\nConsidering loop %d\n", loop
->num
);
261 /* Check for problems with dependences. If the loop can be reversed,
262 the iterations are independent. */
263 datarefs
= VEC_alloc (data_reference_p
, heap
, 10);
264 dependence_relations
= VEC_alloc (ddr_p
, heap
, 10 * 10);
265 compute_data_dependences_for_loop (loop
, true, &datarefs
,
266 &dependence_relations
);
267 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
268 dump_data_dependence_relations (dump_file
, dependence_relations
);
270 trans
= lambda_trans_matrix_new (1, 1);
271 LTM_MATRIX (trans
)[0][0] = -1;
273 if (lambda_transform_legal_p (trans
, 1, dependence_relations
))
276 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
277 fprintf (dump_file
, " SUCCESS: may be parallelized\n");
279 else if (dump_file
&& (dump_flags
& TDF_DETAILS
))
281 " FAILED: data dependencies exist across iterations\n");
283 free_dependence_relations (dependence_relations
);
284 free_data_refs (datarefs
);
289 /* Return true when LOOP contains basic blocks marked with the
290 BB_IRREDUCIBLE_LOOP flag. */
293 loop_has_blocks_with_irreducible_flag (struct loop
*loop
)
296 basic_block
*bbs
= get_loop_body_in_dom_order (loop
);
299 for (i
= 0; i
< loop
->num_nodes
; i
++)
300 if (bbs
[i
]->flags
& BB_IRREDUCIBLE_LOOP
)
309 /* Assigns the address of OBJ in TYPE to an ssa name, and returns this name.
310 The assignment statement is placed on edge ENTRY. DECL_ADDRESS maps decls
311 to their addresses that can be reused. The address of OBJ is known to
312 be invariant in the whole function. */
315 take_address_of (tree obj
, tree type
, edge entry
, htab_t decl_address
)
319 struct int_tree_map ielt
, *nielt
;
320 tree
*var_p
, name
, bvar
, addr
;
324 /* Since the address of OBJ is invariant, the trees may be shared.
325 Avoid rewriting unrelated parts of the code. */
326 obj
= unshare_expr (obj
);
328 handled_component_p (*var_p
);
329 var_p
= &TREE_OPERAND (*var_p
, 0))
331 uid
= DECL_UID (*var_p
);
334 dslot
= htab_find_slot_with_hash (decl_address
, &ielt
, uid
, INSERT
);
337 addr
= build_addr (*var_p
, current_function_decl
);
338 bvar
= create_tmp_var (TREE_TYPE (addr
), get_name (*var_p
));
339 add_referenced_var (bvar
);
340 stmt
= gimple_build_assign (bvar
, addr
);
341 name
= make_ssa_name (bvar
, stmt
);
342 gimple_assign_set_lhs (stmt
, name
);
343 gsi_insert_on_edge_immediate (entry
, stmt
);
345 nielt
= XNEW (struct int_tree_map
);
351 name
= ((struct int_tree_map
*) *dslot
)->to
;
355 *var_p
= build1 (INDIRECT_REF
, TREE_TYPE (*var_p
), name
);
356 name
= force_gimple_operand (build_addr (obj
, current_function_decl
),
357 &stmts
, true, NULL_TREE
);
358 if (!gimple_seq_empty_p (stmts
))
359 gsi_insert_seq_on_edge_immediate (entry
, stmts
);
362 if (TREE_TYPE (name
) != type
)
364 name
= force_gimple_operand (fold_convert (type
, name
), &stmts
, true,
366 if (!gimple_seq_empty_p (stmts
))
367 gsi_insert_seq_on_edge_immediate (entry
, stmts
);
373 /* Callback for htab_traverse. Create the initialization statement
374 for reduction described in SLOT, and place it at the preheader of
375 the loop described in DATA. */
378 initialize_reductions (void **slot
, void *data
)
381 tree bvar
, type
, arg
;
384 struct reduction_info
*const reduc
= (struct reduction_info
*) *slot
;
385 struct loop
*loop
= (struct loop
*) data
;
387 /* Create initialization in preheader:
388 reduction_variable = initialization value of reduction. */
390 /* In the phi node at the header, replace the argument coming
391 from the preheader with the reduction initialization value. */
393 /* Create a new variable to initialize the reduction. */
394 type
= TREE_TYPE (PHI_RESULT (reduc
->reduc_phi
));
395 bvar
= create_tmp_var (type
, "reduction");
396 add_referenced_var (bvar
);
398 c
= build_omp_clause (gimple_location (reduc
->reduc_stmt
),
399 OMP_CLAUSE_REDUCTION
);
400 OMP_CLAUSE_REDUCTION_CODE (c
) = reduc
->reduction_code
;
401 OMP_CLAUSE_DECL (c
) = SSA_NAME_VAR (gimple_assign_lhs (reduc
->reduc_stmt
));
403 init
= omp_reduction_init (c
, TREE_TYPE (bvar
));
406 /* Replace the argument representing the initialization value
407 with the initialization value for the reduction (neutral
408 element for the particular operation, e.g. 0 for PLUS_EXPR,
409 1 for MULT_EXPR, etc).
410 Keep the old value in a new variable "reduction_initial",
411 that will be taken in consideration after the parallel
412 computing is done. */
414 e
= loop_preheader_edge (loop
);
415 arg
= PHI_ARG_DEF_FROM_EDGE (reduc
->reduc_phi
, e
);
416 /* Create new variable to hold the initial value. */
418 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE
419 (reduc
->reduc_phi
, loop_preheader_edge (loop
)), init
);
420 reduc
->initial_value
= arg
;
426 struct walk_stmt_info info
;
432 /* Eliminates references to local variables in *TP out of the single
433 entry single exit region starting at DTA->ENTRY.
434 DECL_ADDRESS contains addresses of the references that had their
435 address taken already. If the expression is changed, CHANGED is
436 set to true. Callback for walk_tree. */
439 eliminate_local_variables_1 (tree
*tp
, int *walk_subtrees
, void *data
)
441 struct elv_data
*const dta
= (struct elv_data
*) data
;
442 tree t
= *tp
, var
, addr
, addr_type
, type
, obj
;
448 if (!SSA_VAR_P (t
) || DECL_EXTERNAL (t
))
451 type
= TREE_TYPE (t
);
452 addr_type
= build_pointer_type (type
);
453 addr
= take_address_of (t
, addr_type
, dta
->entry
, dta
->decl_address
);
454 *tp
= build1 (INDIRECT_REF
, TREE_TYPE (*tp
), addr
);
460 if (TREE_CODE (t
) == ADDR_EXPR
)
462 /* ADDR_EXPR may appear in two contexts:
463 -- as a gimple operand, when the address taken is a function invariant
464 -- as gimple rhs, when the resulting address in not a function
466 We do not need to do anything special in the latter case (the base of
467 the memory reference whose address is taken may be replaced in the
468 DECL_P case). The former case is more complicated, as we need to
469 ensure that the new address is still a gimple operand. Thus, it
470 is not sufficient to replace just the base of the memory reference --
471 we need to move the whole computation of the address out of the
473 if (!is_gimple_val (t
))
477 obj
= TREE_OPERAND (t
, 0);
478 var
= get_base_address (obj
);
479 if (!var
|| !SSA_VAR_P (var
) || DECL_EXTERNAL (var
))
482 addr_type
= TREE_TYPE (t
);
483 addr
= take_address_of (obj
, addr_type
, dta
->entry
, dta
->decl_address
);
496 /* Moves the references to local variables in STMT out of the single
497 entry single exit region starting at ENTRY. DECL_ADDRESS contains
498 addresses of the references that had their address taken
502 eliminate_local_variables_stmt (edge entry
, gimple stmt
,
507 memset (&dta
.info
, '\0', sizeof (dta
.info
));
509 dta
.decl_address
= decl_address
;
512 if (gimple_debug_bind_p (stmt
))
513 walk_tree (gimple_debug_bind_get_value_ptr (stmt
),
514 eliminate_local_variables_1
, &dta
.info
, NULL
);
516 walk_gimple_op (stmt
, eliminate_local_variables_1
, &dta
.info
);
522 /* Eliminates the references to local variables from the single entry
523 single exit region between the ENTRY and EXIT edges.
526 1) Taking address of a local variable -- these are moved out of the
527 region (and temporary variable is created to hold the address if
530 2) Dereferencing a local variable -- these are replaced with indirect
534 eliminate_local_variables (edge entry
, edge exit
)
537 VEC (basic_block
, heap
) *body
= VEC_alloc (basic_block
, heap
, 3);
539 gimple_stmt_iterator gsi
;
540 htab_t decl_address
= htab_create (10, int_tree_map_hash
, int_tree_map_eq
,
542 basic_block entry_bb
= entry
->src
;
543 basic_block exit_bb
= exit
->dest
;
545 gather_blocks_in_sese_region (entry_bb
, exit_bb
, &body
);
547 for (i
= 0; VEC_iterate (basic_block
, body
, i
, bb
); i
++)
548 if (bb
!= entry_bb
&& bb
!= exit_bb
)
549 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
550 eliminate_local_variables_stmt (entry
, gsi_stmt (gsi
),
553 htab_delete (decl_address
);
554 VEC_free (basic_block
, heap
, body
);
557 /* Returns true if expression EXPR is not defined between ENTRY and
558 EXIT, i.e. if all its operands are defined outside of the region. */
561 expr_invariant_in_region_p (edge entry
, edge exit
, tree expr
)
563 basic_block entry_bb
= entry
->src
;
564 basic_block exit_bb
= exit
->dest
;
567 if (is_gimple_min_invariant (expr
))
570 if (TREE_CODE (expr
) == SSA_NAME
)
572 def_bb
= gimple_bb (SSA_NAME_DEF_STMT (expr
));
574 && dominated_by_p (CDI_DOMINATORS
, def_bb
, entry_bb
)
575 && !dominated_by_p (CDI_DOMINATORS
, def_bb
, exit_bb
))
584 /* If COPY_NAME_P is true, creates and returns a duplicate of NAME.
585 The copies are stored to NAME_COPIES, if NAME was already duplicated,
586 its duplicate stored in NAME_COPIES is returned.
588 Regardless of COPY_NAME_P, the decl used as a base of the ssa name is also
589 duplicated, storing the copies in DECL_COPIES. */
592 separate_decls_in_region_name (tree name
,
593 htab_t name_copies
, htab_t decl_copies
,
596 tree copy
, var
, var_copy
;
597 unsigned idx
, uid
, nuid
;
598 struct int_tree_map ielt
, *nielt
;
599 struct name_to_copy_elt elt
, *nelt
;
600 void **slot
, **dslot
;
602 if (TREE_CODE (name
) != SSA_NAME
)
605 idx
= SSA_NAME_VERSION (name
);
607 slot
= htab_find_slot_with_hash (name_copies
, &elt
, idx
,
608 copy_name_p
? INSERT
: NO_INSERT
);
610 return ((struct name_to_copy_elt
*) *slot
)->new_name
;
612 var
= SSA_NAME_VAR (name
);
613 uid
= DECL_UID (var
);
615 dslot
= htab_find_slot_with_hash (decl_copies
, &ielt
, uid
, INSERT
);
618 var_copy
= create_tmp_var (TREE_TYPE (var
), get_name (var
));
619 DECL_GIMPLE_REG_P (var_copy
) = DECL_GIMPLE_REG_P (var
);
620 add_referenced_var (var_copy
);
621 nielt
= XNEW (struct int_tree_map
);
623 nielt
->to
= var_copy
;
626 /* Ensure that when we meet this decl next time, we won't duplicate
628 nuid
= DECL_UID (var_copy
);
630 dslot
= htab_find_slot_with_hash (decl_copies
, &ielt
, nuid
, INSERT
);
631 gcc_assert (!*dslot
);
632 nielt
= XNEW (struct int_tree_map
);
634 nielt
->to
= var_copy
;
638 var_copy
= ((struct int_tree_map
*) *dslot
)->to
;
642 copy
= duplicate_ssa_name (name
, NULL
);
643 nelt
= XNEW (struct name_to_copy_elt
);
645 nelt
->new_name
= copy
;
646 nelt
->field
= NULL_TREE
;
655 SSA_NAME_VAR (copy
) = var_copy
;
659 /* Finds the ssa names used in STMT that are defined outside the
660 region between ENTRY and EXIT and replaces such ssa names with
661 their duplicates. The duplicates are stored to NAME_COPIES. Base
662 decls of all ssa names used in STMT (including those defined in
663 LOOP) are replaced with the new temporary variables; the
664 replacement decls are stored in DECL_COPIES. */
667 separate_decls_in_region_stmt (edge entry
, edge exit
, gimple stmt
,
668 htab_t name_copies
, htab_t decl_copies
)
676 mark_virtual_ops_for_renaming (stmt
);
678 FOR_EACH_PHI_OR_STMT_DEF (def
, stmt
, oi
, SSA_OP_DEF
)
680 name
= DEF_FROM_PTR (def
);
681 gcc_assert (TREE_CODE (name
) == SSA_NAME
);
682 copy
= separate_decls_in_region_name (name
, name_copies
, decl_copies
,
684 gcc_assert (copy
== name
);
687 FOR_EACH_PHI_OR_STMT_USE (use
, stmt
, oi
, SSA_OP_USE
)
689 name
= USE_FROM_PTR (use
);
690 if (TREE_CODE (name
) != SSA_NAME
)
693 copy_name_p
= expr_invariant_in_region_p (entry
, exit
, name
);
694 copy
= separate_decls_in_region_name (name
, name_copies
, decl_copies
,
700 /* Finds the ssa names used in STMT that are defined outside the
701 region between ENTRY and EXIT and replaces such ssa names with
702 their duplicates. The duplicates are stored to NAME_COPIES. Base
703 decls of all ssa names used in STMT (including those defined in
704 LOOP) are replaced with the new temporary variables; the
705 replacement decls are stored in DECL_COPIES. */
708 separate_decls_in_region_debug_bind (gimple stmt
,
709 htab_t name_copies
, htab_t decl_copies
)
714 struct int_tree_map ielt
;
715 struct name_to_copy_elt elt
;
716 void **slot
, **dslot
;
718 var
= gimple_debug_bind_get_var (stmt
);
719 gcc_assert (DECL_P (var
) && SSA_VAR_P (var
));
720 ielt
.uid
= DECL_UID (var
);
721 dslot
= htab_find_slot_with_hash (decl_copies
, &ielt
, ielt
.uid
, NO_INSERT
);
724 gimple_debug_bind_set_var (stmt
, ((struct int_tree_map
*) *dslot
)->to
);
726 FOR_EACH_PHI_OR_STMT_USE (use
, stmt
, oi
, SSA_OP_USE
)
728 name
= USE_FROM_PTR (use
);
729 if (TREE_CODE (name
) != SSA_NAME
)
732 elt
.version
= SSA_NAME_VERSION (name
);
733 slot
= htab_find_slot_with_hash (name_copies
, &elt
, elt
.version
, NO_INSERT
);
736 gimple_debug_bind_reset_value (stmt
);
741 SET_USE (use
, ((struct name_to_copy_elt
*) *slot
)->new_name
);
747 /* Callback for htab_traverse. Adds a field corresponding to the reduction
748 specified in SLOT. The type is passed in DATA. */
751 add_field_for_reduction (void **slot
, void *data
)
754 struct reduction_info
*const red
= (struct reduction_info
*) *slot
;
755 tree
const type
= (tree
) data
;
756 tree var
= SSA_NAME_VAR (gimple_assign_lhs (red
->reduc_stmt
));
757 tree field
= build_decl (gimple_location (red
->reduc_stmt
),
758 FIELD_DECL
, DECL_NAME (var
), TREE_TYPE (var
));
760 insert_field_into_struct (type
, field
);
767 /* Callback for htab_traverse. Adds a field corresponding to a ssa name
768 described in SLOT. The type is passed in DATA. */
771 add_field_for_name (void **slot
, void *data
)
773 struct name_to_copy_elt
*const elt
= (struct name_to_copy_elt
*) *slot
;
774 tree type
= (tree
) data
;
775 tree name
= ssa_name (elt
->version
);
776 tree var
= SSA_NAME_VAR (name
);
777 tree field
= build_decl (DECL_SOURCE_LOCATION (var
),
778 FIELD_DECL
, DECL_NAME (var
), TREE_TYPE (var
));
780 insert_field_into_struct (type
, field
);
786 /* Callback for htab_traverse. A local result is the intermediate result
788 thread, or the initial value in case no iteration was executed.
789 This function creates a phi node reflecting these values.
790 The phi's result will be stored in NEW_PHI field of the
791 reduction's data structure. */
794 create_phi_for_local_result (void **slot
, void *data
)
796 struct reduction_info
*const reduc
= (struct reduction_info
*) *slot
;
797 const struct loop
*const loop
= (const struct loop
*) data
;
800 basic_block store_bb
;
802 source_location locus
;
804 /* STORE_BB is the block where the phi
805 should be stored. It is the destination of the loop exit.
806 (Find the fallthru edge from GIMPLE_OMP_CONTINUE). */
807 store_bb
= FALLTHRU_EDGE (loop
->latch
)->dest
;
809 /* STORE_BB has two predecessors. One coming from the loop
810 (the reduction's result is computed at the loop),
811 and another coming from a block preceding the loop,
813 are executed (the initial value should be taken). */
814 if (EDGE_PRED (store_bb
, 0) == FALLTHRU_EDGE (loop
->latch
))
815 e
= EDGE_PRED (store_bb
, 1);
817 e
= EDGE_PRED (store_bb
, 0);
819 = make_ssa_name (SSA_NAME_VAR (gimple_assign_lhs (reduc
->reduc_stmt
)),
821 locus
= gimple_location (reduc
->reduc_stmt
);
822 new_phi
= create_phi_node (local_res
, store_bb
);
823 SSA_NAME_DEF_STMT (local_res
) = new_phi
;
824 add_phi_arg (new_phi
, reduc
->init
, e
, locus
);
825 add_phi_arg (new_phi
, gimple_assign_lhs (reduc
->reduc_stmt
),
826 FALLTHRU_EDGE (loop
->latch
), locus
);
827 reduc
->new_phi
= new_phi
;
837 basic_block store_bb
;
841 /* Callback for htab_traverse. Create an atomic instruction for the
842 reduction described in SLOT.
843 DATA annotates the place in memory the atomic operation relates to,
844 and the basic block it needs to be generated in. */
847 create_call_for_reduction_1 (void **slot
, void *data
)
849 struct reduction_info
*const reduc
= (struct reduction_info
*) *slot
;
850 struct clsn_data
*const clsn_data
= (struct clsn_data
*) data
;
851 gimple_stmt_iterator gsi
;
852 tree type
= TREE_TYPE (PHI_RESULT (reduc
->reduc_phi
));
853 tree struct_type
= TREE_TYPE (TREE_TYPE (clsn_data
->load
));
858 tree t
, addr
, addr_type
, ref
, x
;
862 load_struct
= fold_build1 (INDIRECT_REF
, struct_type
, clsn_data
->load
);
863 t
= build3 (COMPONENT_REF
, type
, load_struct
, reduc
->field
, NULL_TREE
);
864 addr_type
= build_pointer_type (type
);
866 addr
= build_addr (t
, current_function_decl
);
868 /* Create phi node. */
869 bb
= clsn_data
->load_bb
;
871 e
= split_block (bb
, t
);
874 tmp_load
= create_tmp_var (TREE_TYPE (TREE_TYPE (addr
)), NULL
);
875 add_referenced_var (tmp_load
);
876 tmp_load
= make_ssa_name (tmp_load
, NULL
);
877 load
= gimple_build_omp_atomic_load (tmp_load
, addr
);
878 SSA_NAME_DEF_STMT (tmp_load
) = load
;
879 gsi
= gsi_start_bb (new_bb
);
880 gsi_insert_after (&gsi
, load
, GSI_NEW_STMT
);
882 e
= split_block (new_bb
, load
);
884 gsi
= gsi_start_bb (new_bb
);
886 x
= fold_build2 (reduc
->reduction_code
,
887 TREE_TYPE (PHI_RESULT (reduc
->new_phi
)), ref
,
888 PHI_RESULT (reduc
->new_phi
));
890 name
= force_gimple_operand_gsi (&gsi
, x
, true, NULL_TREE
, true,
891 GSI_CONTINUE_LINKING
);
893 gsi_insert_after (&gsi
, gimple_build_omp_atomic_store (name
), GSI_NEW_STMT
);
897 /* Create the atomic operation at the join point of the threads.
898 REDUCTION_LIST describes the reductions in the LOOP.
899 LD_ST_DATA describes the shared data structure where
900 shared data is stored in and loaded from. */
902 create_call_for_reduction (struct loop
*loop
, htab_t reduction_list
,
903 struct clsn_data
*ld_st_data
)
905 htab_traverse (reduction_list
, create_phi_for_local_result
, loop
);
906 /* Find the fallthru edge from GIMPLE_OMP_CONTINUE. */
907 ld_st_data
->load_bb
= FALLTHRU_EDGE (loop
->latch
)->dest
;
908 htab_traverse (reduction_list
, create_call_for_reduction_1
, ld_st_data
);
911 /* Callback for htab_traverse. Loads the final reduction value at the
912 join point of all threads, and inserts it in the right place. */
915 create_loads_for_reductions (void **slot
, void *data
)
917 struct reduction_info
*const red
= (struct reduction_info
*) *slot
;
918 struct clsn_data
*const clsn_data
= (struct clsn_data
*) data
;
920 gimple_stmt_iterator gsi
;
921 tree type
= TREE_TYPE (gimple_assign_lhs (red
->reduc_stmt
));
922 tree struct_type
= TREE_TYPE (TREE_TYPE (clsn_data
->load
));
927 gsi
= gsi_after_labels (clsn_data
->load_bb
);
928 load_struct
= fold_build1 (INDIRECT_REF
, struct_type
, clsn_data
->load
);
929 load_struct
= build3 (COMPONENT_REF
, type
, load_struct
, red
->field
,
933 name
= PHI_RESULT (red
->keep_res
);
934 stmt
= gimple_build_assign (name
, x
);
935 SSA_NAME_DEF_STMT (name
) = stmt
;
937 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
939 for (gsi
= gsi_start_phis (gimple_bb (red
->keep_res
));
940 !gsi_end_p (gsi
); gsi_next (&gsi
))
941 if (gsi_stmt (gsi
) == red
->keep_res
)
943 remove_phi_node (&gsi
, false);
949 /* Load the reduction result that was stored in LD_ST_DATA.
950 REDUCTION_LIST describes the list of reductions that the
951 loads should be generated for. */
953 create_final_loads_for_reduction (htab_t reduction_list
,
954 struct clsn_data
*ld_st_data
)
956 gimple_stmt_iterator gsi
;
960 gsi
= gsi_after_labels (ld_st_data
->load_bb
);
961 t
= build_fold_addr_expr (ld_st_data
->store
);
962 stmt
= gimple_build_assign (ld_st_data
->load
, t
);
964 gsi_insert_before (&gsi
, stmt
, GSI_NEW_STMT
);
965 SSA_NAME_DEF_STMT (ld_st_data
->load
) = stmt
;
967 htab_traverse (reduction_list
, create_loads_for_reductions
, ld_st_data
);
971 /* Callback for htab_traverse. Store the neutral value for the
972 particular reduction's operation, e.g. 0 for PLUS_EXPR,
973 1 for MULT_EXPR, etc. into the reduction field.
974 The reduction is specified in SLOT. The store information is
978 create_stores_for_reduction (void **slot
, void *data
)
980 struct reduction_info
*const red
= (struct reduction_info
*) *slot
;
981 struct clsn_data
*const clsn_data
= (struct clsn_data
*) data
;
984 gimple_stmt_iterator gsi
;
985 tree type
= TREE_TYPE (gimple_assign_lhs (red
->reduc_stmt
));
987 gsi
= gsi_last_bb (clsn_data
->store_bb
);
988 t
= build3 (COMPONENT_REF
, type
, clsn_data
->store
, red
->field
, NULL_TREE
);
989 stmt
= gimple_build_assign (t
, red
->initial_value
);
990 mark_virtual_ops_for_renaming (stmt
);
991 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
996 /* Callback for htab_traverse. Creates loads to a field of LOAD in LOAD_BB and
997 store to a field of STORE in STORE_BB for the ssa name and its duplicate
998 specified in SLOT. */
1001 create_loads_and_stores_for_name (void **slot
, void *data
)
1003 struct name_to_copy_elt
*const elt
= (struct name_to_copy_elt
*) *slot
;
1004 struct clsn_data
*const clsn_data
= (struct clsn_data
*) data
;
1007 gimple_stmt_iterator gsi
;
1008 tree type
= TREE_TYPE (elt
->new_name
);
1009 tree struct_type
= TREE_TYPE (TREE_TYPE (clsn_data
->load
));
1012 gsi
= gsi_last_bb (clsn_data
->store_bb
);
1013 t
= build3 (COMPONENT_REF
, type
, clsn_data
->store
, elt
->field
, NULL_TREE
);
1014 stmt
= gimple_build_assign (t
, ssa_name (elt
->version
));
1015 mark_virtual_ops_for_renaming (stmt
);
1016 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1018 gsi
= gsi_last_bb (clsn_data
->load_bb
);
1019 load_struct
= fold_build1 (INDIRECT_REF
, struct_type
, clsn_data
->load
);
1020 t
= build3 (COMPONENT_REF
, type
, load_struct
, elt
->field
, NULL_TREE
);
1021 stmt
= gimple_build_assign (elt
->new_name
, t
);
1022 SSA_NAME_DEF_STMT (elt
->new_name
) = stmt
;
1023 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1028 /* Moves all the variables used in LOOP and defined outside of it (including
1029 the initial values of loop phi nodes, and *PER_THREAD if it is a ssa
1030 name) to a structure created for this purpose. The code
1038 is transformed this way:
1053 `old' is stored to *ARG_STRUCT and `new' is stored to NEW_ARG_STRUCT. The
1054 pointer `new' is intentionally not initialized (the loop will be split to a
1055 separate function later, and `new' will be initialized from its arguments).
1056 LD_ST_DATA holds information about the shared data structure used to pass
1057 information among the threads. It is initialized here, and
1058 gen_parallel_loop will pass it to create_call_for_reduction that
1059 needs this information. REDUCTION_LIST describes the reductions
1063 separate_decls_in_region (edge entry
, edge exit
, htab_t reduction_list
,
1064 tree
*arg_struct
, tree
*new_arg_struct
,
1065 struct clsn_data
*ld_st_data
)
1068 basic_block bb1
= split_edge (entry
);
1069 basic_block bb0
= single_pred (bb1
);
1070 htab_t name_copies
= htab_create (10, name_to_copy_elt_hash
,
1071 name_to_copy_elt_eq
, free
);
1072 htab_t decl_copies
= htab_create (10, int_tree_map_hash
, int_tree_map_eq
,
1075 tree type
, type_name
, nvar
;
1076 gimple_stmt_iterator gsi
;
1077 struct clsn_data clsn_data
;
1078 VEC (basic_block
, heap
) *body
= VEC_alloc (basic_block
, heap
, 3);
1080 basic_block entry_bb
= bb1
;
1081 basic_block exit_bb
= exit
->dest
;
1082 bool has_debug_stmt
= false;
1084 entry
= single_succ_edge (entry_bb
);
1085 gather_blocks_in_sese_region (entry_bb
, exit_bb
, &body
);
1087 for (i
= 0; VEC_iterate (basic_block
, body
, i
, bb
); i
++)
1089 if (bb
!= entry_bb
&& bb
!= exit_bb
)
1091 for (gsi
= gsi_start_phis (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1092 separate_decls_in_region_stmt (entry
, exit
, gsi_stmt (gsi
),
1093 name_copies
, decl_copies
);
1095 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1097 gimple stmt
= gsi_stmt (gsi
);
1099 if (is_gimple_debug (stmt
))
1100 has_debug_stmt
= true;
1102 separate_decls_in_region_stmt (entry
, exit
, stmt
,
1103 name_copies
, decl_copies
);
1108 /* Now process debug bind stmts. We must not create decls while
1109 processing debug stmts, so we defer their processing so as to
1110 make sure we will have debug info for as many variables as
1111 possible (all of those that were dealt with in the loop above),
1112 and discard those for which we know there's nothing we can
1115 for (i
= 0; VEC_iterate (basic_block
, body
, i
, bb
); i
++)
1116 if (bb
!= entry_bb
&& bb
!= exit_bb
)
1118 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
);)
1120 gimple stmt
= gsi_stmt (gsi
);
1122 if (gimple_debug_bind_p (stmt
))
1124 if (separate_decls_in_region_debug_bind (stmt
,
1128 gsi_remove (&gsi
, true);
1137 VEC_free (basic_block
, heap
, body
);
1139 if (htab_elements (name_copies
) == 0 && reduction_list
== 0)
1141 /* It may happen that there is nothing to copy (if there are only
1142 loop carried and external variables in the loop). */
1144 *new_arg_struct
= NULL
;
1148 /* Create the type for the structure to store the ssa names to. */
1149 type
= lang_hooks
.types
.make_type (RECORD_TYPE
);
1150 type_name
= build_decl (BUILTINS_LOCATION
,
1151 TYPE_DECL
, create_tmp_var_name (".paral_data"),
1153 TYPE_NAME (type
) = type_name
;
1155 htab_traverse (name_copies
, add_field_for_name
, type
);
1156 if (reduction_list
&& htab_elements (reduction_list
) > 0)
1158 /* Create the fields for reductions. */
1159 htab_traverse (reduction_list
, add_field_for_reduction
,
1164 /* Create the loads and stores. */
1165 *arg_struct
= create_tmp_var (type
, ".paral_data_store");
1166 add_referenced_var (*arg_struct
);
1167 nvar
= create_tmp_var (build_pointer_type (type
), ".paral_data_load");
1168 add_referenced_var (nvar
);
1169 *new_arg_struct
= make_ssa_name (nvar
, NULL
);
1171 ld_st_data
->store
= *arg_struct
;
1172 ld_st_data
->load
= *new_arg_struct
;
1173 ld_st_data
->store_bb
= bb0
;
1174 ld_st_data
->load_bb
= bb1
;
1176 htab_traverse (name_copies
, create_loads_and_stores_for_name
,
1179 /* Load the calculation from memory (after the join of the threads). */
1181 if (reduction_list
&& htab_elements (reduction_list
) > 0)
1183 htab_traverse (reduction_list
, create_stores_for_reduction
,
1185 clsn_data
.load
= make_ssa_name (nvar
, NULL
);
1186 clsn_data
.load_bb
= exit
->dest
;
1187 clsn_data
.store
= ld_st_data
->store
;
1188 create_final_loads_for_reduction (reduction_list
, &clsn_data
);
1192 htab_delete (decl_copies
);
1193 htab_delete (name_copies
);
1196 /* Bitmap containing uids of functions created by parallelization. We cannot
1197 allocate it from the default obstack, as it must live across compilation
1198 of several functions; we make it gc allocated instead. */
1200 static GTY(()) bitmap parallelized_functions
;
1202 /* Returns true if FN was created by create_loop_fn. */
1205 parallelized_function_p (tree fn
)
1207 if (!parallelized_functions
|| !DECL_ARTIFICIAL (fn
))
1210 return bitmap_bit_p (parallelized_functions
, DECL_UID (fn
));
1213 /* Creates and returns an empty function that will receive the body of
1214 a parallelized loop. */
1217 create_loop_fn (void)
1221 tree decl
, type
, name
, t
;
1222 struct function
*act_cfun
= cfun
;
1223 static unsigned loopfn_num
;
1225 snprintf (buf
, 100, "%s.$loopfn", current_function_name ());
1226 ASM_FORMAT_PRIVATE_NAME (tname
, buf
, loopfn_num
++);
1227 clean_symbol_name (tname
);
1228 name
= get_identifier (tname
);
1229 type
= build_function_type_list (void_type_node
, ptr_type_node
, NULL_TREE
);
1231 decl
= build_decl (BUILTINS_LOCATION
,
1232 FUNCTION_DECL
, name
, type
);
1233 if (!parallelized_functions
)
1234 parallelized_functions
= BITMAP_GGC_ALLOC ();
1235 bitmap_set_bit (parallelized_functions
, DECL_UID (decl
));
1237 TREE_STATIC (decl
) = 1;
1238 TREE_USED (decl
) = 1;
1239 DECL_ARTIFICIAL (decl
) = 1;
1240 DECL_IGNORED_P (decl
) = 0;
1241 TREE_PUBLIC (decl
) = 0;
1242 DECL_UNINLINABLE (decl
) = 1;
1243 DECL_EXTERNAL (decl
) = 0;
1244 DECL_CONTEXT (decl
) = NULL_TREE
;
1245 DECL_INITIAL (decl
) = make_node (BLOCK
);
1247 t
= build_decl (BUILTINS_LOCATION
,
1248 RESULT_DECL
, NULL_TREE
, void_type_node
);
1249 DECL_ARTIFICIAL (t
) = 1;
1250 DECL_IGNORED_P (t
) = 1;
1251 DECL_RESULT (decl
) = t
;
1253 t
= build_decl (BUILTINS_LOCATION
,
1254 PARM_DECL
, get_identifier (".paral_data_param"),
1256 DECL_ARTIFICIAL (t
) = 1;
1257 DECL_ARG_TYPE (t
) = ptr_type_node
;
1258 DECL_CONTEXT (t
) = decl
;
1260 DECL_ARGUMENTS (decl
) = t
;
1262 allocate_struct_function (decl
, false);
1264 /* The call to allocate_struct_function clobbers CFUN, so we need to restore
1266 set_cfun (act_cfun
);
1271 /* Moves the exit condition of LOOP to the beginning of its header, and
1272 duplicates the part of the last iteration that gets disabled to the
1273 exit of the loop. NIT is the number of iterations of the loop
1274 (used to initialize the variables in the duplicated part).
1276 TODO: the common case is that latch of the loop is empty and immediately
1277 follows the loop exit. In this case, it would be better not to copy the
1278 body of the loop, but only move the entry of the loop directly before the
1279 exit check and increase the number of iterations of the loop by one.
1280 This may need some additional preconditioning in case NIT = ~0.
1281 REDUCTION_LIST describes the reductions in LOOP. */
1284 transform_to_exit_first_loop (struct loop
*loop
, htab_t reduction_list
, tree nit
)
1286 basic_block
*bbs
, *nbbs
, ex_bb
, orig_header
;
1289 edge exit
= single_dom_exit (loop
), hpred
;
1290 tree control
, control_name
, res
, t
;
1291 gimple phi
, nphi
, cond_stmt
, stmt
;
1292 gimple_stmt_iterator gsi
;
1294 split_block_after_labels (loop
->header
);
1295 orig_header
= single_succ (loop
->header
);
1296 hpred
= single_succ_edge (loop
->header
);
1298 cond_stmt
= last_stmt (exit
->src
);
1299 control
= gimple_cond_lhs (cond_stmt
);
1300 gcc_assert (gimple_cond_rhs (cond_stmt
) == nit
);
1302 /* Make sure that we have phi nodes on exit for all loop header phis
1303 (create_parallel_loop requires that). */
1304 for (gsi
= gsi_start_phis (loop
->header
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1306 phi
= gsi_stmt (gsi
);
1307 res
= PHI_RESULT (phi
);
1308 t
= make_ssa_name (SSA_NAME_VAR (res
), phi
);
1309 SET_PHI_RESULT (phi
, t
);
1311 nphi
= create_phi_node (res
, orig_header
);
1312 SSA_NAME_DEF_STMT (res
) = nphi
;
1313 add_phi_arg (nphi
, t
, hpred
, UNKNOWN_LOCATION
);
1317 gimple_cond_set_lhs (cond_stmt
, t
);
1318 update_stmt (cond_stmt
);
1323 bbs
= get_loop_body_in_dom_order (loop
);
1324 for (n
= 0; bbs
[n
] != exit
->src
; n
++)
1326 nbbs
= XNEWVEC (basic_block
, n
);
1327 ok
= gimple_duplicate_sese_tail (single_succ_edge (loop
->header
), exit
,
1334 /* Other than reductions, the only gimple reg that should be copied
1335 out of the loop is the control variable. */
1337 control_name
= NULL_TREE
;
1338 for (gsi
= gsi_start_phis (ex_bb
); !gsi_end_p (gsi
); )
1340 phi
= gsi_stmt (gsi
);
1341 res
= PHI_RESULT (phi
);
1342 if (!is_gimple_reg (res
))
1348 /* Check if it is a part of reduction. If it is,
1349 keep the phi at the reduction's keep_res field. The
1350 PHI_RESULT of this phi is the resulting value of the reduction
1351 variable when exiting the loop. */
1353 exit
= single_dom_exit (loop
);
1355 if (htab_elements (reduction_list
) > 0)
1357 struct reduction_info
*red
;
1359 tree val
= PHI_ARG_DEF_FROM_EDGE (phi
, exit
);
1361 red
= reduction_phi (reduction_list
, SSA_NAME_DEF_STMT (val
));
1364 red
->keep_res
= phi
;
1369 gcc_assert (control_name
== NULL_TREE
1370 && SSA_NAME_VAR (res
) == SSA_NAME_VAR (control
));
1372 remove_phi_node (&gsi
, false);
1374 gcc_assert (control_name
!= NULL_TREE
);
1376 /* Initialize the control variable to NIT. */
1377 gsi
= gsi_after_labels (ex_bb
);
1378 nit
= force_gimple_operand_gsi (&gsi
,
1379 fold_convert (TREE_TYPE (control_name
), nit
),
1380 false, NULL_TREE
, false, GSI_SAME_STMT
);
1381 stmt
= gimple_build_assign (control_name
, nit
);
1382 gsi_insert_before (&gsi
, stmt
, GSI_NEW_STMT
);
1383 SSA_NAME_DEF_STMT (control_name
) = stmt
;
1386 /* Create the parallel constructs for LOOP as described in gen_parallel_loop.
1387 LOOP_FN and DATA are the arguments of GIMPLE_OMP_PARALLEL.
1388 NEW_DATA is the variable that should be initialized from the argument
1389 of LOOP_FN. N_THREADS is the requested number of threads. Returns the
1390 basic block containing GIMPLE_OMP_PARALLEL tree. */
1393 create_parallel_loop (struct loop
*loop
, tree loop_fn
, tree data
,
1394 tree new_data
, unsigned n_threads
)
1396 gimple_stmt_iterator gsi
;
1397 basic_block bb
, paral_bb
, for_bb
, ex_bb
;
1399 gimple stmt
, for_stmt
, phi
, cond_stmt
;
1400 tree cvar
, cvar_init
, initvar
, cvar_next
, cvar_base
, type
;
1401 edge exit
, nexit
, guard
, end
, e
;
1403 /* Prepare the GIMPLE_OMP_PARALLEL statement. */
1404 bb
= loop_preheader_edge (loop
)->src
;
1405 paral_bb
= single_pred (bb
);
1406 gsi
= gsi_last_bb (paral_bb
);
1408 t
= build_omp_clause (BUILTINS_LOCATION
, OMP_CLAUSE_NUM_THREADS
);
1409 OMP_CLAUSE_NUM_THREADS_EXPR (t
)
1410 = build_int_cst (integer_type_node
, n_threads
);
1411 stmt
= gimple_build_omp_parallel (NULL
, t
, loop_fn
, data
);
1413 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1415 /* Initialize NEW_DATA. */
1418 gsi
= gsi_after_labels (bb
);
1420 param
= make_ssa_name (DECL_ARGUMENTS (loop_fn
), NULL
);
1421 stmt
= gimple_build_assign (param
, build_fold_addr_expr (data
));
1422 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
1423 SSA_NAME_DEF_STMT (param
) = stmt
;
1425 stmt
= gimple_build_assign (new_data
,
1426 fold_convert (TREE_TYPE (new_data
), param
));
1427 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
1428 SSA_NAME_DEF_STMT (new_data
) = stmt
;
1431 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_PARALLEL. */
1432 bb
= split_loop_exit_edge (single_dom_exit (loop
));
1433 gsi
= gsi_last_bb (bb
);
1434 gsi_insert_after (&gsi
, gimple_build_omp_return (false), GSI_NEW_STMT
);
1436 /* Extract data for GIMPLE_OMP_FOR. */
1437 gcc_assert (loop
->header
== single_dom_exit (loop
)->src
);
1438 cond_stmt
= last_stmt (loop
->header
);
1440 cvar
= gimple_cond_lhs (cond_stmt
);
1441 cvar_base
= SSA_NAME_VAR (cvar
);
1442 phi
= SSA_NAME_DEF_STMT (cvar
);
1443 cvar_init
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_preheader_edge (loop
));
1444 initvar
= make_ssa_name (cvar_base
, NULL
);
1445 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi
, loop_preheader_edge (loop
)),
1447 cvar_next
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (loop
));
1449 gsi
= gsi_last_bb (loop
->latch
);
1450 gcc_assert (gsi_stmt (gsi
) == SSA_NAME_DEF_STMT (cvar_next
));
1451 gsi_remove (&gsi
, true);
1454 for_bb
= split_edge (loop_preheader_edge (loop
));
1455 ex_bb
= split_loop_exit_edge (single_dom_exit (loop
));
1456 extract_true_false_edges_from_block (loop
->header
, &nexit
, &exit
);
1457 gcc_assert (exit
== single_dom_exit (loop
));
1459 guard
= make_edge (for_bb
, ex_bb
, 0);
1460 single_succ_edge (loop
->latch
)->flags
= 0;
1461 end
= make_edge (loop
->latch
, ex_bb
, EDGE_FALLTHRU
);
1462 for (gsi
= gsi_start_phis (ex_bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1464 source_location locus
;
1466 phi
= gsi_stmt (gsi
);
1467 res
= PHI_RESULT (phi
);
1468 stmt
= SSA_NAME_DEF_STMT (PHI_ARG_DEF_FROM_EDGE (phi
, exit
));
1470 def
= PHI_ARG_DEF_FROM_EDGE (stmt
, loop_preheader_edge (loop
));
1471 locus
= gimple_phi_arg_location_from_edge (stmt
,
1472 loop_preheader_edge (loop
));
1473 add_phi_arg (phi
, def
, guard
, locus
);
1475 def
= PHI_ARG_DEF_FROM_EDGE (stmt
, loop_latch_edge (loop
));
1476 locus
= gimple_phi_arg_location_from_edge (stmt
, loop_latch_edge (loop
));
1477 add_phi_arg (phi
, def
, end
, locus
);
1479 e
= redirect_edge_and_branch (exit
, nexit
->dest
);
1480 PENDING_STMT (e
) = NULL
;
1482 /* Emit GIMPLE_OMP_FOR. */
1483 gimple_cond_set_lhs (cond_stmt
, cvar_base
);
1484 type
= TREE_TYPE (cvar
);
1485 t
= build_omp_clause (BUILTINS_LOCATION
, OMP_CLAUSE_SCHEDULE
);
1486 OMP_CLAUSE_SCHEDULE_KIND (t
) = OMP_CLAUSE_SCHEDULE_STATIC
;
1488 for_stmt
= gimple_build_omp_for (NULL
, t
, 1, NULL
);
1489 gimple_omp_for_set_index (for_stmt
, 0, initvar
);
1490 gimple_omp_for_set_initial (for_stmt
, 0, cvar_init
);
1491 gimple_omp_for_set_final (for_stmt
, 0, gimple_cond_rhs (cond_stmt
));
1492 gimple_omp_for_set_cond (for_stmt
, 0, gimple_cond_code (cond_stmt
));
1493 gimple_omp_for_set_incr (for_stmt
, 0, build2 (PLUS_EXPR
, type
,
1495 build_int_cst (type
, 1)));
1497 gsi
= gsi_last_bb (for_bb
);
1498 gsi_insert_after (&gsi
, for_stmt
, GSI_NEW_STMT
);
1499 SSA_NAME_DEF_STMT (initvar
) = for_stmt
;
1501 /* Emit GIMPLE_OMP_CONTINUE. */
1502 gsi
= gsi_last_bb (loop
->latch
);
1503 stmt
= gimple_build_omp_continue (cvar_next
, cvar
);
1504 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1505 SSA_NAME_DEF_STMT (cvar_next
) = stmt
;
1507 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_FOR. */
1508 gsi
= gsi_last_bb (ex_bb
);
1509 gsi_insert_after (&gsi
, gimple_build_omp_return (true), GSI_NEW_STMT
);
1514 /* Generates code to execute the iterations of LOOP in N_THREADS
1515 threads in parallel.
1517 NITER describes number of iterations of LOOP.
1518 REDUCTION_LIST describes the reductions existent in the LOOP. */
1521 gen_parallel_loop (struct loop
*loop
, htab_t reduction_list
,
1522 unsigned n_threads
, struct tree_niter_desc
*niter
)
1526 tree many_iterations_cond
, type
, nit
;
1527 tree arg_struct
, new_arg_struct
;
1529 basic_block parallel_head
;
1531 struct clsn_data clsn_data
;
1536 ---------------------------------------------------------------------
1539 IV = phi (INIT, IV + STEP)
1545 ---------------------------------------------------------------------
1547 with # of iterations NITER (possibly with MAY_BE_ZERO assumption),
1548 we generate the following code:
1550 ---------------------------------------------------------------------
1553 || NITER < MIN_PER_THREAD * N_THREADS)
1557 store all local loop-invariant variables used in body of the loop to DATA.
1558 GIMPLE_OMP_PARALLEL (OMP_CLAUSE_NUM_THREADS (N_THREADS), LOOPFN, DATA);
1559 load the variables from DATA.
1560 GIMPLE_OMP_FOR (IV = INIT; COND; IV += STEP) (OMP_CLAUSE_SCHEDULE (static))
1563 GIMPLE_OMP_CONTINUE;
1564 GIMPLE_OMP_RETURN -- GIMPLE_OMP_FOR
1565 GIMPLE_OMP_RETURN -- GIMPLE_OMP_PARALLEL
1571 IV = phi (INIT, IV + STEP)
1582 /* Create two versions of the loop -- in the old one, we know that the
1583 number of iterations is large enough, and we will transform it into the
1584 loop that will be split to loop_fn, the new one will be used for the
1585 remaining iterations. */
1587 type
= TREE_TYPE (niter
->niter
);
1588 nit
= force_gimple_operand (unshare_expr (niter
->niter
), &stmts
, true,
1591 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
1593 many_iterations_cond
=
1594 fold_build2 (GE_EXPR
, boolean_type_node
,
1595 nit
, build_int_cst (type
, MIN_PER_THREAD
* n_threads
));
1596 many_iterations_cond
1597 = fold_build2 (TRUTH_AND_EXPR
, boolean_type_node
,
1598 invert_truthvalue (unshare_expr (niter
->may_be_zero
)),
1599 many_iterations_cond
);
1600 many_iterations_cond
1601 = force_gimple_operand (many_iterations_cond
, &stmts
, false, NULL_TREE
);
1603 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
1604 if (!is_gimple_condexpr (many_iterations_cond
))
1606 many_iterations_cond
1607 = force_gimple_operand (many_iterations_cond
, &stmts
,
1610 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
1613 initialize_original_copy_tables ();
1615 /* We assume that the loop usually iterates a lot. */
1616 prob
= 4 * REG_BR_PROB_BASE
/ 5;
1617 nloop
= loop_version (loop
, many_iterations_cond
, NULL
,
1618 prob
, prob
, REG_BR_PROB_BASE
- prob
, true);
1619 update_ssa (TODO_update_ssa
);
1620 free_original_copy_tables ();
1622 /* Base all the induction variables in LOOP on a single control one. */
1623 canonicalize_loop_ivs (loop
, &nit
);
1625 /* Ensure that the exit condition is the first statement in the loop. */
1626 transform_to_exit_first_loop (loop
, reduction_list
, nit
);
1628 /* Generate initializations for reductions. */
1629 if (htab_elements (reduction_list
) > 0)
1630 htab_traverse (reduction_list
, initialize_reductions
, loop
);
1632 /* Eliminate the references to local variables from the loop. */
1633 gcc_assert (single_exit (loop
));
1634 entry
= loop_preheader_edge (loop
);
1635 exit
= single_dom_exit (loop
);
1637 eliminate_local_variables (entry
, exit
);
1638 /* In the old loop, move all variables non-local to the loop to a structure
1639 and back, and create separate decls for the variables used in loop. */
1640 separate_decls_in_region (entry
, exit
, reduction_list
, &arg_struct
,
1641 &new_arg_struct
, &clsn_data
);
1643 /* Create the parallel constructs. */
1644 parallel_head
= create_parallel_loop (loop
, create_loop_fn (), arg_struct
,
1645 new_arg_struct
, n_threads
);
1646 if (htab_elements (reduction_list
) > 0)
1647 create_call_for_reduction (loop
, reduction_list
, &clsn_data
);
1651 /* Cancel the loop (it is simpler to do it here rather than to teach the
1652 expander to do it). */
1653 cancel_loop_tree (loop
);
1655 /* Free loop bound estimations that could contain references to
1656 removed statements. */
1657 FOR_EACH_LOOP (li
, loop
, 0)
1658 free_numbers_of_iterations_estimates_loop (loop
);
1660 /* Expand the parallel constructs. We do it directly here instead of running
1661 a separate expand_omp pass, since it is more efficient, and less likely to
1662 cause troubles with further analyses not being able to deal with the
1665 omp_expand_local (parallel_head
);
1668 /* Returns true when LOOP contains vector phi nodes. */
1671 loop_has_vector_phi_nodes (struct loop
*loop ATTRIBUTE_UNUSED
)
1674 basic_block
*bbs
= get_loop_body_in_dom_order (loop
);
1675 gimple_stmt_iterator gsi
;
1678 for (i
= 0; i
< loop
->num_nodes
; i
++)
1679 for (gsi
= gsi_start_phis (bbs
[i
]); !gsi_end_p (gsi
); gsi_next (&gsi
))
1680 if (TREE_CODE (TREE_TYPE (PHI_RESULT (gsi_stmt (gsi
)))) == VECTOR_TYPE
)
1689 /* Create a reduction_info struct, initialize it with REDUC_STMT
1690 and PHI, insert it to the REDUCTION_LIST. */
1693 build_new_reduction (htab_t reduction_list
, gimple reduc_stmt
, gimple phi
)
1696 struct reduction_info
*new_reduction
;
1698 gcc_assert (reduc_stmt
);
1700 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1703 "Detected reduction. reduction stmt is: \n");
1704 print_gimple_stmt (dump_file
, reduc_stmt
, 0, 0);
1705 fprintf (dump_file
, "\n");
1708 new_reduction
= XCNEW (struct reduction_info
);
1710 new_reduction
->reduc_stmt
= reduc_stmt
;
1711 new_reduction
->reduc_phi
= phi
;
1712 new_reduction
->reduction_code
= gimple_assign_rhs_code (reduc_stmt
);
1713 slot
= htab_find_slot (reduction_list
, new_reduction
, INSERT
);
1714 *slot
= new_reduction
;
1717 /* Detect all reductions in the LOOP, insert them into REDUCTION_LIST. */
1720 gather_scalar_reductions (loop_p loop
, htab_t reduction_list
)
1722 gimple_stmt_iterator gsi
;
1723 loop_vec_info simple_loop_info
;
1726 simple_loop_info
= vect_analyze_loop_form (loop
);
1728 for (gsi
= gsi_start_phis (loop
->header
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1730 gimple phi
= gsi_stmt (gsi
);
1732 tree res
= PHI_RESULT (phi
);
1735 if (!is_gimple_reg (res
))
1738 if (!simple_iv (loop
, loop
, res
, &iv
, true)
1739 && simple_loop_info
)
1741 gimple reduc_stmt
= vect_is_simple_reduction (simple_loop_info
, phi
, true, &double_reduc
);
1743 build_new_reduction (reduction_list
, reduc_stmt
, phi
);
1746 destroy_loop_vec_info (simple_loop_info
, true);
1749 /* Try to initialize NITER for code generation part. */
1752 try_get_loop_niter (loop_p loop
, struct tree_niter_desc
*niter
)
1754 edge exit
= single_dom_exit (loop
);
1758 /* We need to know # of iterations, and there should be no uses of values
1759 defined inside loop outside of it, unless the values are invariants of
1761 if (!number_of_iterations_exit (loop
, exit
, niter
, false))
1763 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1764 fprintf (dump_file
, " FAILED: number of iterations not known\n");
1771 /* Try to initialize REDUCTION_LIST for code generation part.
1772 REDUCTION_LIST describes the reductions. */
1775 try_create_reduction_list (loop_p loop
, htab_t reduction_list
)
1777 edge exit
= single_dom_exit (loop
);
1778 gimple_stmt_iterator gsi
;
1782 gather_scalar_reductions (loop
, reduction_list
);
1785 for (gsi
= gsi_start_phis (exit
->dest
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1787 gimple phi
= gsi_stmt (gsi
);
1788 struct reduction_info
*red
;
1789 imm_use_iterator imm_iter
;
1790 use_operand_p use_p
;
1792 tree val
= PHI_ARG_DEF_FROM_EDGE (phi
, exit
);
1794 if (is_gimple_reg (val
))
1796 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1798 fprintf (dump_file
, "phi is ");
1799 print_gimple_stmt (dump_file
, phi
, 0, 0);
1800 fprintf (dump_file
, "arg of phi to exit: value ");
1801 print_generic_expr (dump_file
, val
, 0);
1802 fprintf (dump_file
, " used outside loop\n");
1804 " checking if it a part of reduction pattern: \n");
1806 if (htab_elements (reduction_list
) == 0)
1808 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1810 " FAILED: it is not a part of reduction.\n");
1814 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, val
)
1816 if (flow_bb_inside_loop_p (loop
, gimple_bb (USE_STMT (use_p
))))
1818 reduc_phi
= USE_STMT (use_p
);
1822 red
= reduction_phi (reduction_list
, reduc_phi
);
1825 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1827 " FAILED: it is not a part of reduction.\n");
1830 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1832 fprintf (dump_file
, "reduction phi is ");
1833 print_gimple_stmt (dump_file
, red
->reduc_phi
, 0, 0);
1834 fprintf (dump_file
, "reduction stmt is ");
1835 print_gimple_stmt (dump_file
, red
->reduc_stmt
, 0, 0);
1840 /* The iterations of the loop may communicate only through bivs whose
1841 iteration space can be distributed efficiently. */
1842 for (gsi
= gsi_start_phis (loop
->header
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1844 gimple phi
= gsi_stmt (gsi
);
1845 tree def
= PHI_RESULT (phi
);
1848 if (is_gimple_reg (def
) && !simple_iv (loop
, loop
, def
, &iv
, true))
1850 struct reduction_info
*red
;
1852 red
= reduction_phi (reduction_list
, phi
);
1855 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1857 " FAILED: scalar dependency between iterations\n");
1867 /* Detect parallel loops and generate parallel code using libgomp
1868 primitives. Returns true if some loop was parallelized, false
1872 parallelize_loops (void)
1874 unsigned n_threads
= flag_tree_parallelize_loops
;
1875 bool changed
= false;
1877 struct tree_niter_desc niter_desc
;
1879 htab_t reduction_list
;
1881 /* Do not parallelize loops in the functions created by parallelization. */
1882 if (parallelized_function_p (cfun
->decl
))
1885 reduction_list
= htab_create (10, reduction_info_hash
,
1886 reduction_info_eq
, free
);
1887 init_stmt_vec_info_vec ();
1889 FOR_EACH_LOOP (li
, loop
, 0)
1891 htab_empty (reduction_list
);
1893 /* If we use autopar in graphite pass, we use it's marked dependency
1894 checking results. */
1895 if (flag_loop_parallelize_all
&& !loop
->can_be_parallel
)
1898 /* FIXME: Only consider innermost loops with just one exit. */
1899 if (loop
->inner
|| !single_dom_exit (loop
))
1902 if (/* And of course, the loop must be parallelizable. */
1903 !can_duplicate_loop_p (loop
)
1904 || loop_has_blocks_with_irreducible_flag (loop
)
1905 /* FIXME: the check for vector phi nodes could be removed. */
1906 || loop_has_vector_phi_nodes (loop
))
1909 /* FIXME: Bypass this check as graphite doesn't update the
1910 count and frequency correctly now. */
1911 if (!flag_loop_parallelize_all
1912 && (expected_loop_iterations (loop
) <= n_threads
1913 /* Do not bother with loops in cold areas. */
1914 || optimize_loop_nest_for_size_p (loop
)))
1917 if (!try_get_loop_niter (loop
, &niter_desc
))
1920 if (!try_create_reduction_list (loop
, reduction_list
))
1923 if (!flag_loop_parallelize_all
&& !loop_parallel_p (loop
))
1927 gen_parallel_loop (loop
, reduction_list
,
1928 n_threads
, &niter_desc
);
1929 verify_flow_info ();
1930 verify_dominators (CDI_DOMINATORS
);
1931 verify_loop_structure ();
1932 verify_loop_closed_ssa ();
1935 free_stmt_vec_info_vec ();
1936 htab_delete (reduction_list
);
1938 /* Parallelization will cause new function calls to be inserted through
1939 which local variables will escape. Reset the points-to solutions
1940 for ESCAPED and CALLUSED. */
1943 pt_solution_reset (&cfun
->gimple_df
->escaped
);
1944 pt_solution_reset (&cfun
->gimple_df
->callused
);
1950 #include "gt-tree-parloops.h"