PR tree-optimization/43833
[official-gcc/alias-decl.git] / gcc / tree-parloops.c
blob69a0b659616d57f6be5fac1adceccce300e6494a
1 /* Loop autoparallelization.
2 Copyright (C) 2006, 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
4 Contributed by Sebastian Pop <pop@cri.ensmp.fr> and
5 Zdenek Dvorak <dvorakz@suse.cz>.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "tree.h"
28 #include "rtl.h"
29 #include "tree-flow.h"
30 #include "cfgloop.h"
31 #include "ggc.h"
32 #include "tree-data-ref.h"
33 #include "diagnostic.h"
34 #include "tree-pass.h"
35 #include "tree-scalar-evolution.h"
36 #include "hashtab.h"
37 #include "langhooks.h"
38 #include "tree-vectorizer.h"
40 /* This pass tries to distribute iterations of loops into several threads.
41 The implementation is straightforward -- for each loop we test whether its
42 iterations are independent, and if it is the case (and some additional
43 conditions regarding profitability and correctness are satisfied), we
44 add GIMPLE_OMP_PARALLEL and GIMPLE_OMP_FOR codes and let omp expansion
45 machinery do its job.
47 The most of the complexity is in bringing the code into shape expected
48 by the omp expanders:
49 -- for GIMPLE_OMP_FOR, ensuring that the loop has only one induction
50 variable and that the exit test is at the start of the loop body
51 -- for GIMPLE_OMP_PARALLEL, replacing the references to local addressable
52 variables by accesses through pointers, and breaking up ssa chains
53 by storing the values incoming to the parallelized loop to a structure
54 passed to the new function as an argument (something similar is done
55 in omp gimplification, unfortunately only a small part of the code
56 can be shared).
58 TODO:
59 -- if there are several parallelizable loops in a function, it may be
60 possible to generate the threads just once (using synchronization to
61 ensure that cross-loop dependences are obeyed).
62 -- handling of common scalar dependence patterns (accumulation, ...)
63 -- handling of non-innermost loops */
66 Reduction handling:
67 currently we use vect_is_simple_reduction() to detect reduction patterns.
68 The code transformation will be introduced by an example.
71 parloop
73 int sum=1;
75 for (i = 0; i < N; i++)
77 x[i] = i + 3;
78 sum+=x[i];
82 gimple-like code:
83 header_bb:
85 # sum_29 = PHI <sum_11(5), 1(3)>
86 # i_28 = PHI <i_12(5), 0(3)>
87 D.1795_8 = i_28 + 3;
88 x[i_28] = D.1795_8;
89 sum_11 = D.1795_8 + sum_29;
90 i_12 = i_28 + 1;
91 if (N_6(D) > i_12)
92 goto header_bb;
95 exit_bb:
97 # sum_21 = PHI <sum_11(4)>
98 printf (&"%d"[0], sum_21);
101 after reduction transformation (only relevant parts):
103 parloop
106 ....
109 # Storing the initial value given by the user. #
111 .paral_data_store.32.sum.27 = 1;
113 #pragma omp parallel num_threads(4)
115 #pragma omp for schedule(static)
117 # The neutral element corresponding to the particular
118 reduction's operation, e.g. 0 for PLUS_EXPR,
119 1 for MULT_EXPR, etc. replaces the user's initial value. #
121 # sum.27_29 = PHI <sum.27_11, 0>
123 sum.27_11 = D.1827_8 + sum.27_29;
125 GIMPLE_OMP_CONTINUE
127 # Adding this reduction phi is done at create_phi_for_local_result() #
128 # sum.27_56 = PHI <sum.27_11, 0>
129 GIMPLE_OMP_RETURN
131 # Creating the atomic operation is done at
132 create_call_for_reduction_1() #
134 #pragma omp atomic_load
135 D.1839_59 = *&.paral_data_load.33_51->reduction.23;
136 D.1840_60 = sum.27_56 + D.1839_59;
137 #pragma omp atomic_store (D.1840_60);
139 GIMPLE_OMP_RETURN
141 # collecting the result after the join of the threads is done at
142 create_loads_for_reductions().
143 The value computed by the threads is loaded from the
144 shared struct. #
147 .paral_data_load.33_52 = &.paral_data_store.32;
148 sum_37 = .paral_data_load.33_52->sum.27;
149 sum_43 = D.1795_41 + sum_37;
151 exit bb:
152 # sum_21 = PHI <sum_43, sum_26>
153 printf (&"%d"[0], sum_21);
161 /* Minimal number of iterations of a loop that should be executed in each
162 thread. */
163 #define MIN_PER_THREAD 100
165 /* Element of the hashtable, representing a
166 reduction in the current loop. */
167 struct reduction_info
169 gimple reduc_stmt; /* reduction statement. */
170 gimple reduc_phi; /* The phi node defining the reduction. */
171 enum tree_code reduction_code;/* code for the reduction operation. */
172 gimple keep_res; /* The PHI_RESULT of this phi is the resulting value
173 of the reduction variable when existing the loop. */
174 tree initial_value; /* The initial value of the reduction var before entering the loop. */
175 tree field; /* the name of the field in the parloop data structure intended for reduction. */
176 tree init; /* reduction initialization value. */
177 gimple new_phi; /* (helper field) Newly created phi node whose result
178 will be passed to the atomic operation. Represents
179 the local result each thread computed for the reduction
180 operation. */
183 /* Equality and hash functions for hashtab code. */
185 static int
186 reduction_info_eq (const void *aa, const void *bb)
188 const struct reduction_info *a = (const struct reduction_info *) aa;
189 const struct reduction_info *b = (const struct reduction_info *) bb;
191 return (a->reduc_phi == b->reduc_phi);
194 static hashval_t
195 reduction_info_hash (const void *aa)
197 const struct reduction_info *a = (const struct reduction_info *) aa;
199 return htab_hash_pointer (a->reduc_phi);
202 static struct reduction_info *
203 reduction_phi (htab_t reduction_list, gimple phi)
205 struct reduction_info tmpred, *red;
207 if (htab_elements (reduction_list) == 0)
208 return NULL;
210 tmpred.reduc_phi = phi;
211 red = (struct reduction_info *) htab_find (reduction_list, &tmpred);
213 return red;
216 /* Element of hashtable of names to copy. */
218 struct name_to_copy_elt
220 unsigned version; /* The version of the name to copy. */
221 tree new_name; /* The new name used in the copy. */
222 tree field; /* The field of the structure used to pass the
223 value. */
226 /* Equality and hash functions for hashtab code. */
228 static int
229 name_to_copy_elt_eq (const void *aa, const void *bb)
231 const struct name_to_copy_elt *a = (const struct name_to_copy_elt *) aa;
232 const struct name_to_copy_elt *b = (const struct name_to_copy_elt *) bb;
234 return a->version == b->version;
237 static hashval_t
238 name_to_copy_elt_hash (const void *aa)
240 const struct name_to_copy_elt *a = (const struct name_to_copy_elt *) aa;
242 return (hashval_t) a->version;
246 /* Data dependency analysis. Returns true if the iterations of LOOP
247 are independent on each other (that is, if we can execute them
248 in parallel). */
250 static bool
251 loop_parallel_p (struct loop *loop, struct obstack * parloop_obstack)
253 VEC (ddr_p, heap) * dependence_relations;
254 VEC (data_reference_p, heap) *datarefs;
255 lambda_trans_matrix trans;
256 bool ret = false;
258 if (dump_file && (dump_flags & TDF_DETAILS))
260 fprintf (dump_file, "Considering loop %d\n", loop->num);
261 if (!loop->inner)
262 fprintf (dump_file, "loop is innermost\n");
263 else
264 fprintf (dump_file, "loop NOT innermost\n");
267 /* Check for problems with dependences. If the loop can be reversed,
268 the iterations are independent. */
269 datarefs = VEC_alloc (data_reference_p, heap, 10);
270 dependence_relations = VEC_alloc (ddr_p, heap, 10 * 10);
271 compute_data_dependences_for_loop (loop, true, &datarefs,
272 &dependence_relations);
273 if (dump_file && (dump_flags & TDF_DETAILS))
274 dump_data_dependence_relations (dump_file, dependence_relations);
276 trans = lambda_trans_matrix_new (1, 1, parloop_obstack);
277 LTM_MATRIX (trans)[0][0] = -1;
279 if (lambda_transform_legal_p (trans, 1, dependence_relations))
281 ret = true;
282 if (dump_file && (dump_flags & TDF_DETAILS))
283 fprintf (dump_file, " SUCCESS: may be parallelized\n");
285 else if (dump_file && (dump_flags & TDF_DETAILS))
286 fprintf (dump_file,
287 " FAILED: data dependencies exist across iterations\n");
289 free_dependence_relations (dependence_relations);
290 free_data_refs (datarefs);
292 return ret;
295 /* Return true when LOOP contains basic blocks marked with the
296 BB_IRREDUCIBLE_LOOP flag. */
298 static inline bool
299 loop_has_blocks_with_irreducible_flag (struct loop *loop)
301 unsigned i;
302 basic_block *bbs = get_loop_body_in_dom_order (loop);
303 bool res = true;
305 for (i = 0; i < loop->num_nodes; i++)
306 if (bbs[i]->flags & BB_IRREDUCIBLE_LOOP)
307 goto end;
309 res = false;
310 end:
311 free (bbs);
312 return res;
315 /* Assigns the address of OBJ in TYPE to an ssa name, and returns this name.
316 The assignment statement is placed on edge ENTRY. DECL_ADDRESS maps decls
317 to their addresses that can be reused. The address of OBJ is known to
318 be invariant in the whole function. */
320 static tree
321 take_address_of (tree obj, tree type, edge entry, htab_t decl_address)
323 int uid;
324 void **dslot;
325 struct int_tree_map ielt, *nielt;
326 tree *var_p, name, bvar, addr;
327 gimple stmt;
328 gimple_seq stmts;
330 /* Since the address of OBJ is invariant, the trees may be shared.
331 Avoid rewriting unrelated parts of the code. */
332 obj = unshare_expr (obj);
333 for (var_p = &obj;
334 handled_component_p (*var_p);
335 var_p = &TREE_OPERAND (*var_p, 0))
336 continue;
337 uid = DECL_UID (*var_p);
339 ielt.uid = uid;
340 dslot = htab_find_slot_with_hash (decl_address, &ielt, uid, INSERT);
341 if (!*dslot)
343 addr = build_addr (*var_p, current_function_decl);
344 bvar = create_tmp_var (TREE_TYPE (addr), get_name (*var_p));
345 add_referenced_var (bvar);
346 stmt = gimple_build_assign (bvar, addr);
347 name = make_ssa_name (bvar, stmt);
348 gimple_assign_set_lhs (stmt, name);
349 gsi_insert_on_edge_immediate (entry, stmt);
351 nielt = XNEW (struct int_tree_map);
352 nielt->uid = uid;
353 nielt->to = name;
354 *dslot = nielt;
356 else
357 name = ((struct int_tree_map *) *dslot)->to;
359 if (var_p != &obj)
361 *var_p = build1 (INDIRECT_REF, TREE_TYPE (*var_p), name);
362 name = force_gimple_operand (build_addr (obj, current_function_decl),
363 &stmts, true, NULL_TREE);
364 if (!gimple_seq_empty_p (stmts))
365 gsi_insert_seq_on_edge_immediate (entry, stmts);
368 if (TREE_TYPE (name) != type)
370 name = force_gimple_operand (fold_convert (type, name), &stmts, true,
371 NULL_TREE);
372 if (!gimple_seq_empty_p (stmts))
373 gsi_insert_seq_on_edge_immediate (entry, stmts);
376 return name;
379 /* Callback for htab_traverse. Create the initialization statement
380 for reduction described in SLOT, and place it at the preheader of
381 the loop described in DATA. */
383 static int
384 initialize_reductions (void **slot, void *data)
386 tree init, c;
387 tree bvar, type, arg;
388 edge e;
390 struct reduction_info *const reduc = (struct reduction_info *) *slot;
391 struct loop *loop = (struct loop *) data;
393 /* Create initialization in preheader:
394 reduction_variable = initialization value of reduction. */
396 /* In the phi node at the header, replace the argument coming
397 from the preheader with the reduction initialization value. */
399 /* Create a new variable to initialize the reduction. */
400 type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
401 bvar = create_tmp_var (type, "reduction");
402 add_referenced_var (bvar);
404 c = build_omp_clause (gimple_location (reduc->reduc_stmt),
405 OMP_CLAUSE_REDUCTION);
406 OMP_CLAUSE_REDUCTION_CODE (c) = reduc->reduction_code;
407 OMP_CLAUSE_DECL (c) = SSA_NAME_VAR (gimple_assign_lhs (reduc->reduc_stmt));
409 init = omp_reduction_init (c, TREE_TYPE (bvar));
410 reduc->init = init;
412 /* Replace the argument representing the initialization value
413 with the initialization value for the reduction (neutral
414 element for the particular operation, e.g. 0 for PLUS_EXPR,
415 1 for MULT_EXPR, etc).
416 Keep the old value in a new variable "reduction_initial",
417 that will be taken in consideration after the parallel
418 computing is done. */
420 e = loop_preheader_edge (loop);
421 arg = PHI_ARG_DEF_FROM_EDGE (reduc->reduc_phi, e);
422 /* Create new variable to hold the initial value. */
424 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE
425 (reduc->reduc_phi, loop_preheader_edge (loop)), init);
426 reduc->initial_value = arg;
427 return 1;
430 struct elv_data
432 struct walk_stmt_info info;
433 edge entry;
434 htab_t decl_address;
435 bool changed;
438 /* Eliminates references to local variables in *TP out of the single
439 entry single exit region starting at DTA->ENTRY.
440 DECL_ADDRESS contains addresses of the references that had their
441 address taken already. If the expression is changed, CHANGED is
442 set to true. Callback for walk_tree. */
444 static tree
445 eliminate_local_variables_1 (tree *tp, int *walk_subtrees, void *data)
447 struct elv_data *const dta = (struct elv_data *) data;
448 tree t = *tp, var, addr, addr_type, type, obj;
450 if (DECL_P (t))
452 *walk_subtrees = 0;
454 if (!SSA_VAR_P (t) || DECL_EXTERNAL (t))
455 return NULL_TREE;
457 type = TREE_TYPE (t);
458 addr_type = build_pointer_type (type);
459 addr = take_address_of (t, addr_type, dta->entry, dta->decl_address);
460 *tp = build1 (INDIRECT_REF, TREE_TYPE (*tp), addr);
462 dta->changed = true;
463 return NULL_TREE;
466 if (TREE_CODE (t) == ADDR_EXPR)
468 /* ADDR_EXPR may appear in two contexts:
469 -- as a gimple operand, when the address taken is a function invariant
470 -- as gimple rhs, when the resulting address in not a function
471 invariant
472 We do not need to do anything special in the latter case (the base of
473 the memory reference whose address is taken may be replaced in the
474 DECL_P case). The former case is more complicated, as we need to
475 ensure that the new address is still a gimple operand. Thus, it
476 is not sufficient to replace just the base of the memory reference --
477 we need to move the whole computation of the address out of the
478 loop. */
479 if (!is_gimple_val (t))
480 return NULL_TREE;
482 *walk_subtrees = 0;
483 obj = TREE_OPERAND (t, 0);
484 var = get_base_address (obj);
485 if (!var || !SSA_VAR_P (var) || DECL_EXTERNAL (var))
486 return NULL_TREE;
488 addr_type = TREE_TYPE (t);
489 addr = take_address_of (obj, addr_type, dta->entry, dta->decl_address);
490 *tp = addr;
492 dta->changed = true;
493 return NULL_TREE;
496 if (!EXPR_P (t))
497 *walk_subtrees = 0;
499 return NULL_TREE;
502 /* Moves the references to local variables in STMT out of the single
503 entry single exit region starting at ENTRY. DECL_ADDRESS contains
504 addresses of the references that had their address taken
505 already. */
507 static void
508 eliminate_local_variables_stmt (edge entry, gimple stmt,
509 htab_t decl_address)
511 struct elv_data dta;
513 memset (&dta.info, '\0', sizeof (dta.info));
514 dta.entry = entry;
515 dta.decl_address = decl_address;
516 dta.changed = false;
518 if (gimple_debug_bind_p (stmt))
519 walk_tree (gimple_debug_bind_get_value_ptr (stmt),
520 eliminate_local_variables_1, &dta.info, NULL);
521 else
522 walk_gimple_op (stmt, eliminate_local_variables_1, &dta.info);
524 if (dta.changed)
525 update_stmt (stmt);
528 /* Eliminates the references to local variables from the single entry
529 single exit region between the ENTRY and EXIT edges.
531 This includes:
532 1) Taking address of a local variable -- these are moved out of the
533 region (and temporary variable is created to hold the address if
534 necessary).
536 2) Dereferencing a local variable -- these are replaced with indirect
537 references. */
539 static void
540 eliminate_local_variables (edge entry, edge exit)
542 basic_block bb;
543 VEC (basic_block, heap) *body = VEC_alloc (basic_block, heap, 3);
544 unsigned i;
545 gimple_stmt_iterator gsi;
546 htab_t decl_address = htab_create (10, int_tree_map_hash, int_tree_map_eq,
547 free);
548 basic_block entry_bb = entry->src;
549 basic_block exit_bb = exit->dest;
551 gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
553 for (i = 0; VEC_iterate (basic_block, body, i, bb); i++)
554 if (bb != entry_bb && bb != exit_bb)
555 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
556 eliminate_local_variables_stmt (entry, gsi_stmt (gsi),
557 decl_address);
559 htab_delete (decl_address);
560 VEC_free (basic_block, heap, body);
563 /* Returns true if expression EXPR is not defined between ENTRY and
564 EXIT, i.e. if all its operands are defined outside of the region. */
566 static bool
567 expr_invariant_in_region_p (edge entry, edge exit, tree expr)
569 basic_block entry_bb = entry->src;
570 basic_block exit_bb = exit->dest;
571 basic_block def_bb;
573 if (is_gimple_min_invariant (expr))
574 return true;
576 if (TREE_CODE (expr) == SSA_NAME)
578 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
579 if (def_bb
580 && dominated_by_p (CDI_DOMINATORS, def_bb, entry_bb)
581 && !dominated_by_p (CDI_DOMINATORS, def_bb, exit_bb))
582 return false;
584 return true;
587 return false;
590 /* If COPY_NAME_P is true, creates and returns a duplicate of NAME.
591 The copies are stored to NAME_COPIES, if NAME was already duplicated,
592 its duplicate stored in NAME_COPIES is returned.
594 Regardless of COPY_NAME_P, the decl used as a base of the ssa name is also
595 duplicated, storing the copies in DECL_COPIES. */
597 static tree
598 separate_decls_in_region_name (tree name,
599 htab_t name_copies, htab_t decl_copies,
600 bool copy_name_p)
602 tree copy, var, var_copy;
603 unsigned idx, uid, nuid;
604 struct int_tree_map ielt, *nielt;
605 struct name_to_copy_elt elt, *nelt;
606 void **slot, **dslot;
608 if (TREE_CODE (name) != SSA_NAME)
609 return name;
611 idx = SSA_NAME_VERSION (name);
612 elt.version = idx;
613 slot = htab_find_slot_with_hash (name_copies, &elt, idx,
614 copy_name_p ? INSERT : NO_INSERT);
615 if (slot && *slot)
616 return ((struct name_to_copy_elt *) *slot)->new_name;
618 var = SSA_NAME_VAR (name);
619 uid = DECL_UID (var);
620 ielt.uid = uid;
621 dslot = htab_find_slot_with_hash (decl_copies, &ielt, uid, INSERT);
622 if (!*dslot)
624 var_copy = create_tmp_var (TREE_TYPE (var), get_name (var));
625 DECL_GIMPLE_REG_P (var_copy) = DECL_GIMPLE_REG_P (var);
626 add_referenced_var (var_copy);
627 nielt = XNEW (struct int_tree_map);
628 nielt->uid = uid;
629 nielt->to = var_copy;
630 *dslot = nielt;
632 /* Ensure that when we meet this decl next time, we won't duplicate
633 it again. */
634 nuid = DECL_UID (var_copy);
635 ielt.uid = nuid;
636 dslot = htab_find_slot_with_hash (decl_copies, &ielt, nuid, INSERT);
637 gcc_assert (!*dslot);
638 nielt = XNEW (struct int_tree_map);
639 nielt->uid = nuid;
640 nielt->to = var_copy;
641 *dslot = nielt;
643 else
644 var_copy = ((struct int_tree_map *) *dslot)->to;
646 if (copy_name_p)
648 copy = duplicate_ssa_name (name, NULL);
649 nelt = XNEW (struct name_to_copy_elt);
650 nelt->version = idx;
651 nelt->new_name = copy;
652 nelt->field = NULL_TREE;
653 *slot = nelt;
655 else
657 gcc_assert (!slot);
658 copy = name;
661 SSA_NAME_VAR (copy) = var_copy;
662 return copy;
665 /* Finds the ssa names used in STMT that are defined outside the
666 region between ENTRY and EXIT and replaces such ssa names with
667 their duplicates. The duplicates are stored to NAME_COPIES. Base
668 decls of all ssa names used in STMT (including those defined in
669 LOOP) are replaced with the new temporary variables; the
670 replacement decls are stored in DECL_COPIES. */
672 static void
673 separate_decls_in_region_stmt (edge entry, edge exit, gimple stmt,
674 htab_t name_copies, htab_t decl_copies)
676 use_operand_p use;
677 def_operand_p def;
678 ssa_op_iter oi;
679 tree name, copy;
680 bool copy_name_p;
682 mark_virtual_ops_for_renaming (stmt);
684 FOR_EACH_PHI_OR_STMT_DEF (def, stmt, oi, SSA_OP_DEF)
686 name = DEF_FROM_PTR (def);
687 gcc_assert (TREE_CODE (name) == SSA_NAME);
688 copy = separate_decls_in_region_name (name, name_copies, decl_copies,
689 false);
690 gcc_assert (copy == name);
693 FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
695 name = USE_FROM_PTR (use);
696 if (TREE_CODE (name) != SSA_NAME)
697 continue;
699 copy_name_p = expr_invariant_in_region_p (entry, exit, name);
700 copy = separate_decls_in_region_name (name, name_copies, decl_copies,
701 copy_name_p);
702 SET_USE (use, copy);
706 /* Finds the ssa names used in STMT that are defined outside the
707 region between ENTRY and EXIT and replaces such ssa names with
708 their duplicates. The duplicates are stored to NAME_COPIES. Base
709 decls of all ssa names used in STMT (including those defined in
710 LOOP) are replaced with the new temporary variables; the
711 replacement decls are stored in DECL_COPIES. */
713 static bool
714 separate_decls_in_region_debug_bind (gimple stmt,
715 htab_t name_copies, htab_t decl_copies)
717 use_operand_p use;
718 ssa_op_iter oi;
719 tree var, name;
720 struct int_tree_map ielt;
721 struct name_to_copy_elt elt;
722 void **slot, **dslot;
724 var = gimple_debug_bind_get_var (stmt);
725 if (TREE_CODE (var) == DEBUG_EXPR_DECL)
726 return true;
727 gcc_assert (DECL_P (var) && SSA_VAR_P (var));
728 ielt.uid = DECL_UID (var);
729 dslot = htab_find_slot_with_hash (decl_copies, &ielt, ielt.uid, NO_INSERT);
730 if (!dslot)
731 return true;
732 gimple_debug_bind_set_var (stmt, ((struct int_tree_map *) *dslot)->to);
734 FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
736 name = USE_FROM_PTR (use);
737 if (TREE_CODE (name) != SSA_NAME)
738 continue;
740 elt.version = SSA_NAME_VERSION (name);
741 slot = htab_find_slot_with_hash (name_copies, &elt, elt.version, NO_INSERT);
742 if (!slot)
744 gimple_debug_bind_reset_value (stmt);
745 update_stmt (stmt);
746 break;
749 SET_USE (use, ((struct name_to_copy_elt *) *slot)->new_name);
752 return false;
755 /* Callback for htab_traverse. Adds a field corresponding to the reduction
756 specified in SLOT. The type is passed in DATA. */
758 static int
759 add_field_for_reduction (void **slot, void *data)
762 struct reduction_info *const red = (struct reduction_info *) *slot;
763 tree const type = (tree) data;
764 tree var = SSA_NAME_VAR (gimple_assign_lhs (red->reduc_stmt));
765 tree field = build_decl (gimple_location (red->reduc_stmt),
766 FIELD_DECL, DECL_NAME (var), TREE_TYPE (var));
768 insert_field_into_struct (type, field);
770 red->field = field;
772 return 1;
775 /* Callback for htab_traverse. Adds a field corresponding to a ssa name
776 described in SLOT. The type is passed in DATA. */
778 static int
779 add_field_for_name (void **slot, void *data)
781 struct name_to_copy_elt *const elt = (struct name_to_copy_elt *) *slot;
782 tree type = (tree) data;
783 tree name = ssa_name (elt->version);
784 tree var = SSA_NAME_VAR (name);
785 tree field = build_decl (DECL_SOURCE_LOCATION (var),
786 FIELD_DECL, DECL_NAME (var), TREE_TYPE (var));
788 insert_field_into_struct (type, field);
789 elt->field = field;
791 return 1;
794 /* Callback for htab_traverse. A local result is the intermediate result
795 computed by a single
796 thread, or the initial value in case no iteration was executed.
797 This function creates a phi node reflecting these values.
798 The phi's result will be stored in NEW_PHI field of the
799 reduction's data structure. */
801 static int
802 create_phi_for_local_result (void **slot, void *data)
804 struct reduction_info *const reduc = (struct reduction_info *) *slot;
805 const struct loop *const loop = (const struct loop *) data;
806 edge e;
807 gimple new_phi;
808 basic_block store_bb;
809 tree local_res;
810 source_location locus;
812 /* STORE_BB is the block where the phi
813 should be stored. It is the destination of the loop exit.
814 (Find the fallthru edge from GIMPLE_OMP_CONTINUE). */
815 store_bb = FALLTHRU_EDGE (loop->latch)->dest;
817 /* STORE_BB has two predecessors. One coming from the loop
818 (the reduction's result is computed at the loop),
819 and another coming from a block preceding the loop,
820 when no iterations
821 are executed (the initial value should be taken). */
822 if (EDGE_PRED (store_bb, 0) == FALLTHRU_EDGE (loop->latch))
823 e = EDGE_PRED (store_bb, 1);
824 else
825 e = EDGE_PRED (store_bb, 0);
826 local_res
827 = make_ssa_name (SSA_NAME_VAR (gimple_assign_lhs (reduc->reduc_stmt)),
828 NULL);
829 locus = gimple_location (reduc->reduc_stmt);
830 new_phi = create_phi_node (local_res, store_bb);
831 SSA_NAME_DEF_STMT (local_res) = new_phi;
832 add_phi_arg (new_phi, reduc->init, e, locus);
833 add_phi_arg (new_phi, gimple_assign_lhs (reduc->reduc_stmt),
834 FALLTHRU_EDGE (loop->latch), locus);
835 reduc->new_phi = new_phi;
837 return 1;
840 struct clsn_data
842 tree store;
843 tree load;
845 basic_block store_bb;
846 basic_block load_bb;
849 /* Callback for htab_traverse. Create an atomic instruction for the
850 reduction described in SLOT.
851 DATA annotates the place in memory the atomic operation relates to,
852 and the basic block it needs to be generated in. */
854 static int
855 create_call_for_reduction_1 (void **slot, void *data)
857 struct reduction_info *const reduc = (struct reduction_info *) *slot;
858 struct clsn_data *const clsn_data = (struct clsn_data *) data;
859 gimple_stmt_iterator gsi;
860 tree type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
861 tree struct_type = TREE_TYPE (TREE_TYPE (clsn_data->load));
862 tree load_struct;
863 basic_block bb;
864 basic_block new_bb;
865 edge e;
866 tree t, addr, ref, x;
867 tree tmp_load, name;
868 gimple load;
870 load_struct = fold_build1 (INDIRECT_REF, struct_type, clsn_data->load);
871 t = build3 (COMPONENT_REF, type, load_struct, reduc->field, NULL_TREE);
873 addr = build_addr (t, current_function_decl);
875 /* Create phi node. */
876 bb = clsn_data->load_bb;
878 e = split_block (bb, t);
879 new_bb = e->dest;
881 tmp_load = create_tmp_var (TREE_TYPE (TREE_TYPE (addr)), NULL);
882 add_referenced_var (tmp_load);
883 tmp_load = make_ssa_name (tmp_load, NULL);
884 load = gimple_build_omp_atomic_load (tmp_load, addr);
885 SSA_NAME_DEF_STMT (tmp_load) = load;
886 gsi = gsi_start_bb (new_bb);
887 gsi_insert_after (&gsi, load, GSI_NEW_STMT);
889 e = split_block (new_bb, load);
890 new_bb = e->dest;
891 gsi = gsi_start_bb (new_bb);
892 ref = tmp_load;
893 x = fold_build2 (reduc->reduction_code,
894 TREE_TYPE (PHI_RESULT (reduc->new_phi)), ref,
895 PHI_RESULT (reduc->new_phi));
897 name = force_gimple_operand_gsi (&gsi, x, true, NULL_TREE, true,
898 GSI_CONTINUE_LINKING);
900 gsi_insert_after (&gsi, gimple_build_omp_atomic_store (name), GSI_NEW_STMT);
901 return 1;
904 /* Create the atomic operation at the join point of the threads.
905 REDUCTION_LIST describes the reductions in the LOOP.
906 LD_ST_DATA describes the shared data structure where
907 shared data is stored in and loaded from. */
908 static void
909 create_call_for_reduction (struct loop *loop, htab_t reduction_list,
910 struct clsn_data *ld_st_data)
912 htab_traverse (reduction_list, create_phi_for_local_result, loop);
913 /* Find the fallthru edge from GIMPLE_OMP_CONTINUE. */
914 ld_st_data->load_bb = FALLTHRU_EDGE (loop->latch)->dest;
915 htab_traverse (reduction_list, create_call_for_reduction_1, ld_st_data);
918 /* Callback for htab_traverse. Loads the final reduction value at the
919 join point of all threads, and inserts it in the right place. */
921 static int
922 create_loads_for_reductions (void **slot, void *data)
924 struct reduction_info *const red = (struct reduction_info *) *slot;
925 struct clsn_data *const clsn_data = (struct clsn_data *) data;
926 gimple stmt;
927 gimple_stmt_iterator gsi;
928 tree type = TREE_TYPE (gimple_assign_lhs (red->reduc_stmt));
929 tree struct_type = TREE_TYPE (TREE_TYPE (clsn_data->load));
930 tree load_struct;
931 tree name;
932 tree x;
934 gsi = gsi_after_labels (clsn_data->load_bb);
935 load_struct = fold_build1 (INDIRECT_REF, struct_type, clsn_data->load);
936 load_struct = build3 (COMPONENT_REF, type, load_struct, red->field,
937 NULL_TREE);
939 x = load_struct;
940 name = PHI_RESULT (red->keep_res);
941 stmt = gimple_build_assign (name, x);
942 SSA_NAME_DEF_STMT (name) = stmt;
944 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
946 for (gsi = gsi_start_phis (gimple_bb (red->keep_res));
947 !gsi_end_p (gsi); gsi_next (&gsi))
948 if (gsi_stmt (gsi) == red->keep_res)
950 remove_phi_node (&gsi, false);
951 return 1;
953 gcc_unreachable ();
956 /* Load the reduction result that was stored in LD_ST_DATA.
957 REDUCTION_LIST describes the list of reductions that the
958 loads should be generated for. */
959 static void
960 create_final_loads_for_reduction (htab_t reduction_list,
961 struct clsn_data *ld_st_data)
963 gimple_stmt_iterator gsi;
964 tree t;
965 gimple stmt;
967 gsi = gsi_after_labels (ld_st_data->load_bb);
968 t = build_fold_addr_expr (ld_st_data->store);
969 stmt = gimple_build_assign (ld_st_data->load, t);
971 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
972 SSA_NAME_DEF_STMT (ld_st_data->load) = stmt;
974 htab_traverse (reduction_list, create_loads_for_reductions, ld_st_data);
978 /* Callback for htab_traverse. Store the neutral value for the
979 particular reduction's operation, e.g. 0 for PLUS_EXPR,
980 1 for MULT_EXPR, etc. into the reduction field.
981 The reduction is specified in SLOT. The store information is
982 passed in DATA. */
984 static int
985 create_stores_for_reduction (void **slot, void *data)
987 struct reduction_info *const red = (struct reduction_info *) *slot;
988 struct clsn_data *const clsn_data = (struct clsn_data *) data;
989 tree t;
990 gimple stmt;
991 gimple_stmt_iterator gsi;
992 tree type = TREE_TYPE (gimple_assign_lhs (red->reduc_stmt));
994 gsi = gsi_last_bb (clsn_data->store_bb);
995 t = build3 (COMPONENT_REF, type, clsn_data->store, red->field, NULL_TREE);
996 stmt = gimple_build_assign (t, red->initial_value);
997 mark_virtual_ops_for_renaming (stmt);
998 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1000 return 1;
1003 /* Callback for htab_traverse. Creates loads to a field of LOAD in LOAD_BB and
1004 store to a field of STORE in STORE_BB for the ssa name and its duplicate
1005 specified in SLOT. */
1007 static int
1008 create_loads_and_stores_for_name (void **slot, void *data)
1010 struct name_to_copy_elt *const elt = (struct name_to_copy_elt *) *slot;
1011 struct clsn_data *const clsn_data = (struct clsn_data *) data;
1012 tree t;
1013 gimple stmt;
1014 gimple_stmt_iterator gsi;
1015 tree type = TREE_TYPE (elt->new_name);
1016 tree struct_type = TREE_TYPE (TREE_TYPE (clsn_data->load));
1017 tree load_struct;
1019 gsi = gsi_last_bb (clsn_data->store_bb);
1020 t = build3 (COMPONENT_REF, type, clsn_data->store, elt->field, NULL_TREE);
1021 stmt = gimple_build_assign (t, ssa_name (elt->version));
1022 mark_virtual_ops_for_renaming (stmt);
1023 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1025 gsi = gsi_last_bb (clsn_data->load_bb);
1026 load_struct = fold_build1 (INDIRECT_REF, struct_type, clsn_data->load);
1027 t = build3 (COMPONENT_REF, type, load_struct, elt->field, NULL_TREE);
1028 stmt = gimple_build_assign (elt->new_name, t);
1029 SSA_NAME_DEF_STMT (elt->new_name) = stmt;
1030 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1032 return 1;
1035 /* Moves all the variables used in LOOP and defined outside of it (including
1036 the initial values of loop phi nodes, and *PER_THREAD if it is a ssa
1037 name) to a structure created for this purpose. The code
1039 while (1)
1041 use (a);
1042 use (b);
1045 is transformed this way:
1047 bb0:
1048 old.a = a;
1049 old.b = b;
1051 bb1:
1052 a' = new->a;
1053 b' = new->b;
1054 while (1)
1056 use (a');
1057 use (b');
1060 `old' is stored to *ARG_STRUCT and `new' is stored to NEW_ARG_STRUCT. The
1061 pointer `new' is intentionally not initialized (the loop will be split to a
1062 separate function later, and `new' will be initialized from its arguments).
1063 LD_ST_DATA holds information about the shared data structure used to pass
1064 information among the threads. It is initialized here, and
1065 gen_parallel_loop will pass it to create_call_for_reduction that
1066 needs this information. REDUCTION_LIST describes the reductions
1067 in LOOP. */
1069 static void
1070 separate_decls_in_region (edge entry, edge exit, htab_t reduction_list,
1071 tree *arg_struct, tree *new_arg_struct,
1072 struct clsn_data *ld_st_data)
1075 basic_block bb1 = split_edge (entry);
1076 basic_block bb0 = single_pred (bb1);
1077 htab_t name_copies = htab_create (10, name_to_copy_elt_hash,
1078 name_to_copy_elt_eq, free);
1079 htab_t decl_copies = htab_create (10, int_tree_map_hash, int_tree_map_eq,
1080 free);
1081 unsigned i;
1082 tree type, type_name, nvar;
1083 gimple_stmt_iterator gsi;
1084 struct clsn_data clsn_data;
1085 VEC (basic_block, heap) *body = VEC_alloc (basic_block, heap, 3);
1086 basic_block bb;
1087 basic_block entry_bb = bb1;
1088 basic_block exit_bb = exit->dest;
1089 bool has_debug_stmt = false;
1091 entry = single_succ_edge (entry_bb);
1092 gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
1094 for (i = 0; VEC_iterate (basic_block, body, i, bb); i++)
1096 if (bb != entry_bb && bb != exit_bb)
1098 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1099 separate_decls_in_region_stmt (entry, exit, gsi_stmt (gsi),
1100 name_copies, decl_copies);
1102 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1104 gimple stmt = gsi_stmt (gsi);
1106 if (is_gimple_debug (stmt))
1107 has_debug_stmt = true;
1108 else
1109 separate_decls_in_region_stmt (entry, exit, stmt,
1110 name_copies, decl_copies);
1115 /* Now process debug bind stmts. We must not create decls while
1116 processing debug stmts, so we defer their processing so as to
1117 make sure we will have debug info for as many variables as
1118 possible (all of those that were dealt with in the loop above),
1119 and discard those for which we know there's nothing we can
1120 do. */
1121 if (has_debug_stmt)
1122 for (i = 0; VEC_iterate (basic_block, body, i, bb); i++)
1123 if (bb != entry_bb && bb != exit_bb)
1125 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
1127 gimple stmt = gsi_stmt (gsi);
1129 if (gimple_debug_bind_p (stmt))
1131 if (separate_decls_in_region_debug_bind (stmt,
1132 name_copies,
1133 decl_copies))
1135 gsi_remove (&gsi, true);
1136 continue;
1140 gsi_next (&gsi);
1144 VEC_free (basic_block, heap, body);
1146 if (htab_elements (name_copies) == 0 && htab_elements (reduction_list) == 0)
1148 /* It may happen that there is nothing to copy (if there are only
1149 loop carried and external variables in the loop). */
1150 *arg_struct = NULL;
1151 *new_arg_struct = NULL;
1153 else
1155 /* Create the type for the structure to store the ssa names to. */
1156 type = lang_hooks.types.make_type (RECORD_TYPE);
1157 type_name = build_decl (BUILTINS_LOCATION,
1158 TYPE_DECL, create_tmp_var_name (".paral_data"),
1159 type);
1160 TYPE_NAME (type) = type_name;
1162 htab_traverse (name_copies, add_field_for_name, type);
1163 if (reduction_list && htab_elements (reduction_list) > 0)
1165 /* Create the fields for reductions. */
1166 htab_traverse (reduction_list, add_field_for_reduction,
1167 type);
1169 layout_type (type);
1171 /* Create the loads and stores. */
1172 *arg_struct = create_tmp_var (type, ".paral_data_store");
1173 add_referenced_var (*arg_struct);
1174 nvar = create_tmp_var (build_pointer_type (type), ".paral_data_load");
1175 add_referenced_var (nvar);
1176 *new_arg_struct = make_ssa_name (nvar, NULL);
1178 ld_st_data->store = *arg_struct;
1179 ld_st_data->load = *new_arg_struct;
1180 ld_st_data->store_bb = bb0;
1181 ld_st_data->load_bb = bb1;
1183 htab_traverse (name_copies, create_loads_and_stores_for_name,
1184 ld_st_data);
1186 /* Load the calculation from memory (after the join of the threads). */
1188 if (reduction_list && htab_elements (reduction_list) > 0)
1190 htab_traverse (reduction_list, create_stores_for_reduction,
1191 ld_st_data);
1192 clsn_data.load = make_ssa_name (nvar, NULL);
1193 clsn_data.load_bb = exit->dest;
1194 clsn_data.store = ld_st_data->store;
1195 create_final_loads_for_reduction (reduction_list, &clsn_data);
1199 htab_delete (decl_copies);
1200 htab_delete (name_copies);
1203 /* Bitmap containing uids of functions created by parallelization. We cannot
1204 allocate it from the default obstack, as it must live across compilation
1205 of several functions; we make it gc allocated instead. */
1207 static GTY(()) bitmap parallelized_functions;
1209 /* Returns true if FN was created by create_loop_fn. */
1211 static bool
1212 parallelized_function_p (tree fn)
1214 if (!parallelized_functions || !DECL_ARTIFICIAL (fn))
1215 return false;
1217 return bitmap_bit_p (parallelized_functions, DECL_UID (fn));
1220 /* Creates and returns an empty function that will receive the body of
1221 a parallelized loop. */
1223 static tree
1224 create_loop_fn (void)
1226 char buf[100];
1227 char *tname;
1228 tree decl, type, name, t;
1229 struct function *act_cfun = cfun;
1230 static unsigned loopfn_num;
1232 snprintf (buf, 100, "%s.$loopfn", current_function_name ());
1233 ASM_FORMAT_PRIVATE_NAME (tname, buf, loopfn_num++);
1234 clean_symbol_name (tname);
1235 name = get_identifier (tname);
1236 type = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
1238 decl = build_decl (BUILTINS_LOCATION,
1239 FUNCTION_DECL, name, type);
1240 if (!parallelized_functions)
1241 parallelized_functions = BITMAP_GGC_ALLOC ();
1242 bitmap_set_bit (parallelized_functions, DECL_UID (decl));
1244 TREE_STATIC (decl) = 1;
1245 TREE_USED (decl) = 1;
1246 DECL_ARTIFICIAL (decl) = 1;
1247 DECL_IGNORED_P (decl) = 0;
1248 TREE_PUBLIC (decl) = 0;
1249 DECL_UNINLINABLE (decl) = 1;
1250 DECL_EXTERNAL (decl) = 0;
1251 DECL_CONTEXT (decl) = NULL_TREE;
1252 DECL_INITIAL (decl) = make_node (BLOCK);
1254 t = build_decl (BUILTINS_LOCATION,
1255 RESULT_DECL, NULL_TREE, void_type_node);
1256 DECL_ARTIFICIAL (t) = 1;
1257 DECL_IGNORED_P (t) = 1;
1258 DECL_RESULT (decl) = t;
1260 t = build_decl (BUILTINS_LOCATION,
1261 PARM_DECL, get_identifier (".paral_data_param"),
1262 ptr_type_node);
1263 DECL_ARTIFICIAL (t) = 1;
1264 DECL_ARG_TYPE (t) = ptr_type_node;
1265 DECL_CONTEXT (t) = decl;
1266 TREE_USED (t) = 1;
1267 DECL_ARGUMENTS (decl) = t;
1269 allocate_struct_function (decl, false);
1271 /* The call to allocate_struct_function clobbers CFUN, so we need to restore
1272 it. */
1273 set_cfun (act_cfun);
1275 return decl;
1278 /* Moves the exit condition of LOOP to the beginning of its header, and
1279 duplicates the part of the last iteration that gets disabled to the
1280 exit of the loop. NIT is the number of iterations of the loop
1281 (used to initialize the variables in the duplicated part).
1283 TODO: the common case is that latch of the loop is empty and immediately
1284 follows the loop exit. In this case, it would be better not to copy the
1285 body of the loop, but only move the entry of the loop directly before the
1286 exit check and increase the number of iterations of the loop by one.
1287 This may need some additional preconditioning in case NIT = ~0.
1288 REDUCTION_LIST describes the reductions in LOOP. */
1290 static void
1291 transform_to_exit_first_loop (struct loop *loop, htab_t reduction_list, tree nit)
1293 basic_block *bbs, *nbbs, ex_bb, orig_header;
1294 unsigned n;
1295 bool ok;
1296 edge exit = single_dom_exit (loop), hpred;
1297 tree control, control_name, res, t;
1298 gimple phi, nphi, cond_stmt, stmt, cond_nit;
1299 gimple_stmt_iterator gsi;
1300 tree nit_1;
1302 split_block_after_labels (loop->header);
1303 orig_header = single_succ (loop->header);
1304 hpred = single_succ_edge (loop->header);
1306 cond_stmt = last_stmt (exit->src);
1307 control = gimple_cond_lhs (cond_stmt);
1308 gcc_assert (gimple_cond_rhs (cond_stmt) == nit);
1310 /* Make sure that we have phi nodes on exit for all loop header phis
1311 (create_parallel_loop requires that). */
1312 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
1314 phi = gsi_stmt (gsi);
1315 res = PHI_RESULT (phi);
1316 t = make_ssa_name (SSA_NAME_VAR (res), phi);
1317 SET_PHI_RESULT (phi, t);
1318 nphi = create_phi_node (res, orig_header);
1319 SSA_NAME_DEF_STMT (res) = nphi;
1320 add_phi_arg (nphi, t, hpred, UNKNOWN_LOCATION);
1322 if (res == control)
1324 gimple_cond_set_lhs (cond_stmt, t);
1325 update_stmt (cond_stmt);
1326 control = t;
1329 bbs = get_loop_body_in_dom_order (loop);
1331 for (n = 0; bbs[n] != loop->latch; n++)
1332 continue;
1333 nbbs = XNEWVEC (basic_block, n);
1334 ok = gimple_duplicate_sese_tail (single_succ_edge (loop->header), exit,
1335 bbs + 1, n, nbbs);
1336 gcc_assert (ok);
1337 free (bbs);
1338 ex_bb = nbbs[0];
1339 free (nbbs);
1341 /* Other than reductions, the only gimple reg that should be copied
1342 out of the loop is the control variable. */
1344 control_name = NULL_TREE;
1345 for (gsi = gsi_start_phis (ex_bb); !gsi_end_p (gsi); )
1347 phi = gsi_stmt (gsi);
1348 res = PHI_RESULT (phi);
1349 if (!is_gimple_reg (res))
1351 gsi_next (&gsi);
1352 continue;
1355 /* Check if it is a part of reduction. If it is,
1356 keep the phi at the reduction's keep_res field. The
1357 PHI_RESULT of this phi is the resulting value of the reduction
1358 variable when exiting the loop. */
1360 exit = single_dom_exit (loop);
1362 if (htab_elements (reduction_list) > 0)
1364 struct reduction_info *red;
1366 tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1367 red = reduction_phi (reduction_list, SSA_NAME_DEF_STMT (val));
1368 if (red)
1370 red->keep_res = phi;
1371 gsi_next (&gsi);
1372 continue;
1375 gcc_assert (control_name == NULL_TREE
1376 && SSA_NAME_VAR (res) == SSA_NAME_VAR (control));
1377 control_name = res;
1378 remove_phi_node (&gsi, false);
1380 gcc_assert (control_name != NULL_TREE);
1382 /* Initialize the control variable to number of iterations
1383 according to the rhs of the exit condition. */
1384 gsi = gsi_after_labels (ex_bb);
1385 cond_nit = last_stmt (exit->src);
1386 nit_1 = gimple_cond_rhs (cond_nit);
1387 nit_1 = force_gimple_operand_gsi (&gsi,
1388 fold_convert (TREE_TYPE (control_name), nit_1),
1389 false, NULL_TREE, false, GSI_SAME_STMT);
1390 stmt = gimple_build_assign (control_name, nit_1);
1391 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
1392 SSA_NAME_DEF_STMT (control_name) = stmt;
1395 /* Create the parallel constructs for LOOP as described in gen_parallel_loop.
1396 LOOP_FN and DATA are the arguments of GIMPLE_OMP_PARALLEL.
1397 NEW_DATA is the variable that should be initialized from the argument
1398 of LOOP_FN. N_THREADS is the requested number of threads. Returns the
1399 basic block containing GIMPLE_OMP_PARALLEL tree. */
1401 static basic_block
1402 create_parallel_loop (struct loop *loop, tree loop_fn, tree data,
1403 tree new_data, unsigned n_threads)
1405 gimple_stmt_iterator gsi;
1406 basic_block bb, paral_bb, for_bb, ex_bb;
1407 tree t, param;
1408 gimple stmt, for_stmt, phi, cond_stmt;
1409 tree cvar, cvar_init, initvar, cvar_next, cvar_base, type;
1410 edge exit, nexit, guard, end, e;
1412 /* Prepare the GIMPLE_OMP_PARALLEL statement. */
1413 bb = loop_preheader_edge (loop)->src;
1414 paral_bb = single_pred (bb);
1415 gsi = gsi_last_bb (paral_bb);
1417 t = build_omp_clause (BUILTINS_LOCATION, OMP_CLAUSE_NUM_THREADS);
1418 OMP_CLAUSE_NUM_THREADS_EXPR (t)
1419 = build_int_cst (integer_type_node, n_threads);
1420 stmt = gimple_build_omp_parallel (NULL, t, loop_fn, data);
1422 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1424 /* Initialize NEW_DATA. */
1425 if (data)
1427 gsi = gsi_after_labels (bb);
1429 param = make_ssa_name (DECL_ARGUMENTS (loop_fn), NULL);
1430 stmt = gimple_build_assign (param, build_fold_addr_expr (data));
1431 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1432 SSA_NAME_DEF_STMT (param) = stmt;
1434 stmt = gimple_build_assign (new_data,
1435 fold_convert (TREE_TYPE (new_data), param));
1436 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1437 SSA_NAME_DEF_STMT (new_data) = stmt;
1440 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_PARALLEL. */
1441 bb = split_loop_exit_edge (single_dom_exit (loop));
1442 gsi = gsi_last_bb (bb);
1443 gsi_insert_after (&gsi, gimple_build_omp_return (false), GSI_NEW_STMT);
1445 /* Extract data for GIMPLE_OMP_FOR. */
1446 gcc_assert (loop->header == single_dom_exit (loop)->src);
1447 cond_stmt = last_stmt (loop->header);
1449 cvar = gimple_cond_lhs (cond_stmt);
1450 cvar_base = SSA_NAME_VAR (cvar);
1451 phi = SSA_NAME_DEF_STMT (cvar);
1452 cvar_init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1453 initvar = make_ssa_name (cvar_base, NULL);
1454 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, loop_preheader_edge (loop)),
1455 initvar);
1456 cvar_next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1458 gsi = gsi_last_bb (loop->latch);
1459 gcc_assert (gsi_stmt (gsi) == SSA_NAME_DEF_STMT (cvar_next));
1460 gsi_remove (&gsi, true);
1462 /* Prepare cfg. */
1463 for_bb = split_edge (loop_preheader_edge (loop));
1464 ex_bb = split_loop_exit_edge (single_dom_exit (loop));
1465 extract_true_false_edges_from_block (loop->header, &nexit, &exit);
1466 gcc_assert (exit == single_dom_exit (loop));
1468 guard = make_edge (for_bb, ex_bb, 0);
1469 single_succ_edge (loop->latch)->flags = 0;
1470 end = make_edge (loop->latch, ex_bb, EDGE_FALLTHRU);
1471 for (gsi = gsi_start_phis (ex_bb); !gsi_end_p (gsi); gsi_next (&gsi))
1473 source_location locus;
1474 tree def;
1475 phi = gsi_stmt (gsi);
1476 stmt = SSA_NAME_DEF_STMT (PHI_ARG_DEF_FROM_EDGE (phi, exit));
1478 def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_preheader_edge (loop));
1479 locus = gimple_phi_arg_location_from_edge (stmt,
1480 loop_preheader_edge (loop));
1481 add_phi_arg (phi, def, guard, locus);
1483 def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (loop));
1484 locus = gimple_phi_arg_location_from_edge (stmt, loop_latch_edge (loop));
1485 add_phi_arg (phi, def, end, locus);
1487 e = redirect_edge_and_branch (exit, nexit->dest);
1488 PENDING_STMT (e) = NULL;
1490 /* Emit GIMPLE_OMP_FOR. */
1491 gimple_cond_set_lhs (cond_stmt, cvar_base);
1492 type = TREE_TYPE (cvar);
1493 t = build_omp_clause (BUILTINS_LOCATION, OMP_CLAUSE_SCHEDULE);
1494 OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_STATIC;
1496 for_stmt = gimple_build_omp_for (NULL, t, 1, NULL);
1497 gimple_omp_for_set_index (for_stmt, 0, initvar);
1498 gimple_omp_for_set_initial (for_stmt, 0, cvar_init);
1499 gimple_omp_for_set_final (for_stmt, 0, gimple_cond_rhs (cond_stmt));
1500 gimple_omp_for_set_cond (for_stmt, 0, gimple_cond_code (cond_stmt));
1501 gimple_omp_for_set_incr (for_stmt, 0, build2 (PLUS_EXPR, type,
1502 cvar_base,
1503 build_int_cst (type, 1)));
1505 gsi = gsi_last_bb (for_bb);
1506 gsi_insert_after (&gsi, for_stmt, GSI_NEW_STMT);
1507 SSA_NAME_DEF_STMT (initvar) = for_stmt;
1509 /* Emit GIMPLE_OMP_CONTINUE. */
1510 gsi = gsi_last_bb (loop->latch);
1511 stmt = gimple_build_omp_continue (cvar_next, cvar);
1512 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1513 SSA_NAME_DEF_STMT (cvar_next) = stmt;
1515 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_FOR. */
1516 gsi = gsi_last_bb (ex_bb);
1517 gsi_insert_after (&gsi, gimple_build_omp_return (true), GSI_NEW_STMT);
1519 return paral_bb;
1522 /* Generates code to execute the iterations of LOOP in N_THREADS
1523 threads in parallel.
1525 NITER describes number of iterations of LOOP.
1526 REDUCTION_LIST describes the reductions existent in the LOOP. */
1528 static void
1529 gen_parallel_loop (struct loop *loop, htab_t reduction_list,
1530 unsigned n_threads, struct tree_niter_desc *niter)
1532 loop_iterator li;
1533 tree many_iterations_cond, type, nit;
1534 tree arg_struct, new_arg_struct;
1535 gimple_seq stmts;
1536 basic_block parallel_head;
1537 edge entry, exit;
1538 struct clsn_data clsn_data;
1539 unsigned prob;
1541 /* From
1543 ---------------------------------------------------------------------
1544 loop
1546 IV = phi (INIT, IV + STEP)
1547 BODY1;
1548 if (COND)
1549 break;
1550 BODY2;
1552 ---------------------------------------------------------------------
1554 with # of iterations NITER (possibly with MAY_BE_ZERO assumption),
1555 we generate the following code:
1557 ---------------------------------------------------------------------
1559 if (MAY_BE_ZERO
1560 || NITER < MIN_PER_THREAD * N_THREADS)
1561 goto original;
1563 BODY1;
1564 store all local loop-invariant variables used in body of the loop to DATA.
1565 GIMPLE_OMP_PARALLEL (OMP_CLAUSE_NUM_THREADS (N_THREADS), LOOPFN, DATA);
1566 load the variables from DATA.
1567 GIMPLE_OMP_FOR (IV = INIT; COND; IV += STEP) (OMP_CLAUSE_SCHEDULE (static))
1568 BODY2;
1569 BODY1;
1570 GIMPLE_OMP_CONTINUE;
1571 GIMPLE_OMP_RETURN -- GIMPLE_OMP_FOR
1572 GIMPLE_OMP_RETURN -- GIMPLE_OMP_PARALLEL
1573 goto end;
1575 original:
1576 loop
1578 IV = phi (INIT, IV + STEP)
1579 BODY1;
1580 if (COND)
1581 break;
1582 BODY2;
1585 end:
1589 /* Create two versions of the loop -- in the old one, we know that the
1590 number of iterations is large enough, and we will transform it into the
1591 loop that will be split to loop_fn, the new one will be used for the
1592 remaining iterations. */
1594 type = TREE_TYPE (niter->niter);
1595 nit = force_gimple_operand (unshare_expr (niter->niter), &stmts, true,
1596 NULL_TREE);
1597 if (stmts)
1598 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1600 many_iterations_cond =
1601 fold_build2 (GE_EXPR, boolean_type_node,
1602 nit, build_int_cst (type, MIN_PER_THREAD * n_threads));
1603 many_iterations_cond
1604 = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1605 invert_truthvalue (unshare_expr (niter->may_be_zero)),
1606 many_iterations_cond);
1607 many_iterations_cond
1608 = force_gimple_operand (many_iterations_cond, &stmts, false, NULL_TREE);
1609 if (stmts)
1610 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1611 if (!is_gimple_condexpr (many_iterations_cond))
1613 many_iterations_cond
1614 = force_gimple_operand (many_iterations_cond, &stmts,
1615 true, NULL_TREE);
1616 if (stmts)
1617 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1620 initialize_original_copy_tables ();
1622 /* We assume that the loop usually iterates a lot. */
1623 prob = 4 * REG_BR_PROB_BASE / 5;
1624 loop_version (loop, many_iterations_cond, NULL,
1625 prob, prob, REG_BR_PROB_BASE - prob, true);
1626 update_ssa (TODO_update_ssa);
1627 free_original_copy_tables ();
1629 /* Base all the induction variables in LOOP on a single control one. */
1630 canonicalize_loop_ivs (loop, &nit, true);
1632 /* Ensure that the exit condition is the first statement in the loop. */
1633 transform_to_exit_first_loop (loop, reduction_list, nit);
1635 /* Generate initializations for reductions. */
1636 if (htab_elements (reduction_list) > 0)
1637 htab_traverse (reduction_list, initialize_reductions, loop);
1639 /* Eliminate the references to local variables from the loop. */
1640 gcc_assert (single_exit (loop));
1641 entry = loop_preheader_edge (loop);
1642 exit = single_dom_exit (loop);
1644 eliminate_local_variables (entry, exit);
1645 /* In the old loop, move all variables non-local to the loop to a structure
1646 and back, and create separate decls for the variables used in loop. */
1647 separate_decls_in_region (entry, exit, reduction_list, &arg_struct,
1648 &new_arg_struct, &clsn_data);
1650 /* Create the parallel constructs. */
1651 parallel_head = create_parallel_loop (loop, create_loop_fn (), arg_struct,
1652 new_arg_struct, n_threads);
1653 if (htab_elements (reduction_list) > 0)
1654 create_call_for_reduction (loop, reduction_list, &clsn_data);
1656 scev_reset ();
1658 /* Cancel the loop (it is simpler to do it here rather than to teach the
1659 expander to do it). */
1660 cancel_loop_tree (loop);
1662 /* Free loop bound estimations that could contain references to
1663 removed statements. */
1664 FOR_EACH_LOOP (li, loop, 0)
1665 free_numbers_of_iterations_estimates_loop (loop);
1667 /* Expand the parallel constructs. We do it directly here instead of running
1668 a separate expand_omp pass, since it is more efficient, and less likely to
1669 cause troubles with further analyses not being able to deal with the
1670 OMP trees. */
1672 omp_expand_local (parallel_head);
1675 /* Returns true when LOOP contains vector phi nodes. */
1677 static bool
1678 loop_has_vector_phi_nodes (struct loop *loop ATTRIBUTE_UNUSED)
1680 unsigned i;
1681 basic_block *bbs = get_loop_body_in_dom_order (loop);
1682 gimple_stmt_iterator gsi;
1683 bool res = true;
1685 for (i = 0; i < loop->num_nodes; i++)
1686 for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
1687 if (TREE_CODE (TREE_TYPE (PHI_RESULT (gsi_stmt (gsi)))) == VECTOR_TYPE)
1688 goto end;
1690 res = false;
1691 end:
1692 free (bbs);
1693 return res;
1696 /* Create a reduction_info struct, initialize it with REDUC_STMT
1697 and PHI, insert it to the REDUCTION_LIST. */
1699 static void
1700 build_new_reduction (htab_t reduction_list, gimple reduc_stmt, gimple phi)
1702 PTR *slot;
1703 struct reduction_info *new_reduction;
1705 gcc_assert (reduc_stmt);
1707 if (dump_file && (dump_flags & TDF_DETAILS))
1709 fprintf (dump_file,
1710 "Detected reduction. reduction stmt is: \n");
1711 print_gimple_stmt (dump_file, reduc_stmt, 0, 0);
1712 fprintf (dump_file, "\n");
1715 new_reduction = XCNEW (struct reduction_info);
1717 new_reduction->reduc_stmt = reduc_stmt;
1718 new_reduction->reduc_phi = phi;
1719 new_reduction->reduction_code = gimple_assign_rhs_code (reduc_stmt);
1720 slot = htab_find_slot (reduction_list, new_reduction, INSERT);
1721 *slot = new_reduction;
1724 /* Detect all reductions in the LOOP, insert them into REDUCTION_LIST. */
1726 static void
1727 gather_scalar_reductions (loop_p loop, htab_t reduction_list)
1729 gimple_stmt_iterator gsi;
1730 loop_vec_info simple_loop_info;
1732 vect_dump = NULL;
1733 simple_loop_info = vect_analyze_loop_form (loop);
1735 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
1737 gimple phi = gsi_stmt (gsi);
1738 affine_iv iv;
1739 tree res = PHI_RESULT (phi);
1740 bool double_reduc;
1742 if (!is_gimple_reg (res))
1743 continue;
1745 if (!simple_iv (loop, loop, res, &iv, true)
1746 && simple_loop_info)
1748 gimple reduc_stmt = vect_is_simple_reduction (simple_loop_info, phi, true, &double_reduc);
1749 if (reduc_stmt && !double_reduc)
1750 build_new_reduction (reduction_list, reduc_stmt, phi);
1753 destroy_loop_vec_info (simple_loop_info, true);
1756 /* Try to initialize NITER for code generation part. */
1758 static bool
1759 try_get_loop_niter (loop_p loop, struct tree_niter_desc *niter)
1761 edge exit = single_dom_exit (loop);
1763 gcc_assert (exit);
1765 /* We need to know # of iterations, and there should be no uses of values
1766 defined inside loop outside of it, unless the values are invariants of
1767 the loop. */
1768 if (!number_of_iterations_exit (loop, exit, niter, false))
1770 if (dump_file && (dump_flags & TDF_DETAILS))
1771 fprintf (dump_file, " FAILED: number of iterations not known\n");
1772 return false;
1775 return true;
1778 /* Try to initialize REDUCTION_LIST for code generation part.
1779 REDUCTION_LIST describes the reductions. */
1781 static bool
1782 try_create_reduction_list (loop_p loop, htab_t reduction_list)
1784 edge exit = single_dom_exit (loop);
1785 gimple_stmt_iterator gsi;
1787 gcc_assert (exit);
1789 gather_scalar_reductions (loop, reduction_list);
1792 for (gsi = gsi_start_phis (exit->dest); !gsi_end_p (gsi); gsi_next (&gsi))
1794 gimple phi = gsi_stmt (gsi);
1795 struct reduction_info *red;
1796 imm_use_iterator imm_iter;
1797 use_operand_p use_p;
1798 gimple reduc_phi;
1799 tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1801 if (is_gimple_reg (val))
1803 if (dump_file && (dump_flags & TDF_DETAILS))
1805 fprintf (dump_file, "phi is ");
1806 print_gimple_stmt (dump_file, phi, 0, 0);
1807 fprintf (dump_file, "arg of phi to exit: value ");
1808 print_generic_expr (dump_file, val, 0);
1809 fprintf (dump_file, " used outside loop\n");
1810 fprintf (dump_file,
1811 " checking if it a part of reduction pattern: \n");
1813 if (htab_elements (reduction_list) == 0)
1815 if (dump_file && (dump_flags & TDF_DETAILS))
1816 fprintf (dump_file,
1817 " FAILED: it is not a part of reduction.\n");
1818 return false;
1820 reduc_phi = NULL;
1821 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, val)
1823 if (flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
1825 reduc_phi = USE_STMT (use_p);
1826 break;
1829 red = reduction_phi (reduction_list, reduc_phi);
1830 if (red == NULL)
1832 if (dump_file && (dump_flags & TDF_DETAILS))
1833 fprintf (dump_file,
1834 " FAILED: it is not a part of reduction.\n");
1835 return false;
1837 if (dump_file && (dump_flags & TDF_DETAILS))
1839 fprintf (dump_file, "reduction phi is ");
1840 print_gimple_stmt (dump_file, red->reduc_phi, 0, 0);
1841 fprintf (dump_file, "reduction stmt is ");
1842 print_gimple_stmt (dump_file, red->reduc_stmt, 0, 0);
1847 /* The iterations of the loop may communicate only through bivs whose
1848 iteration space can be distributed efficiently. */
1849 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
1851 gimple phi = gsi_stmt (gsi);
1852 tree def = PHI_RESULT (phi);
1853 affine_iv iv;
1855 if (is_gimple_reg (def) && !simple_iv (loop, loop, def, &iv, true))
1857 struct reduction_info *red;
1859 red = reduction_phi (reduction_list, phi);
1860 if (red == NULL)
1862 if (dump_file && (dump_flags & TDF_DETAILS))
1863 fprintf (dump_file,
1864 " FAILED: scalar dependency between iterations\n");
1865 return false;
1871 return true;
1874 /* Detect parallel loops and generate parallel code using libgomp
1875 primitives. Returns true if some loop was parallelized, false
1876 otherwise. */
1878 bool
1879 parallelize_loops (void)
1881 unsigned n_threads = flag_tree_parallelize_loops;
1882 bool changed = false;
1883 struct loop *loop;
1884 struct tree_niter_desc niter_desc;
1885 loop_iterator li;
1886 htab_t reduction_list;
1887 struct obstack parloop_obstack;
1888 HOST_WIDE_INT estimated;
1889 LOC loop_loc;
1891 /* Do not parallelize loops in the functions created by parallelization. */
1892 if (parallelized_function_p (cfun->decl))
1893 return false;
1894 if (cfun->has_nonlocal_label)
1895 return false;
1897 gcc_obstack_init (&parloop_obstack);
1898 reduction_list = htab_create (10, reduction_info_hash,
1899 reduction_info_eq, free);
1900 init_stmt_vec_info_vec ();
1902 FOR_EACH_LOOP (li, loop, 0)
1904 htab_empty (reduction_list);
1905 if (dump_file && (dump_flags & TDF_DETAILS))
1907 fprintf (dump_file, "Trying loop %d as candidate\n",loop->num);
1908 if (loop->inner)
1909 fprintf (dump_file, "loop %d is not innermost\n",loop->num);
1910 else
1911 fprintf (dump_file, "loop %d is innermost\n",loop->num);
1914 /* If we use autopar in graphite pass, we use its marked dependency
1915 checking results. */
1916 if (flag_loop_parallelize_all && !loop->can_be_parallel)
1918 if (dump_file && (dump_flags & TDF_DETAILS))
1919 fprintf (dump_file, "loop is not parallel according to graphite\n");
1920 continue;
1923 if (!single_dom_exit (loop))
1926 if (dump_file && (dump_flags & TDF_DETAILS))
1927 fprintf (dump_file, "loop is !single_dom_exit\n");
1929 continue;
1932 if (/* And of course, the loop must be parallelizable. */
1933 !can_duplicate_loop_p (loop)
1934 || loop_has_blocks_with_irreducible_flag (loop)
1935 || (loop_preheader_edge (loop)->src->flags & BB_IRREDUCIBLE_LOOP)
1936 /* FIXME: the check for vector phi nodes could be removed. */
1937 || loop_has_vector_phi_nodes (loop))
1938 continue;
1939 estimated = estimated_loop_iterations_int (loop, false);
1940 /* FIXME: Bypass this check as graphite doesn't update the
1941 count and frequency correctly now. */
1942 if (!flag_loop_parallelize_all
1943 && ((estimated !=-1
1944 && estimated <= (HOST_WIDE_INT) n_threads * MIN_PER_THREAD)
1945 /* Do not bother with loops in cold areas. */
1946 || optimize_loop_nest_for_size_p (loop)))
1947 continue;
1949 if (!try_get_loop_niter (loop, &niter_desc))
1950 continue;
1952 if (!try_create_reduction_list (loop, reduction_list))
1953 continue;
1955 if (!flag_loop_parallelize_all
1956 && !loop_parallel_p (loop, &parloop_obstack))
1957 continue;
1959 changed = true;
1960 if (dump_file && (dump_flags & TDF_DETAILS))
1962 if (loop->inner)
1963 fprintf (dump_file, "parallelizing outer loop %d\n",loop->header->index);
1964 else
1965 fprintf (dump_file, "parallelizing inner loop %d\n",loop->header->index);
1966 loop_loc = find_loop_location (loop);
1967 if (loop_loc != UNKNOWN_LOC)
1968 fprintf (dump_file, "\nloop at %s:%d: ",
1969 LOC_FILE (loop_loc), LOC_LINE (loop_loc));
1971 gen_parallel_loop (loop, reduction_list,
1972 n_threads, &niter_desc);
1973 verify_flow_info ();
1974 verify_dominators (CDI_DOMINATORS);
1975 verify_loop_structure ();
1976 verify_loop_closed_ssa (true);
1979 free_stmt_vec_info_vec ();
1980 htab_delete (reduction_list);
1981 obstack_free (&parloop_obstack, NULL);
1983 /* Parallelization will cause new function calls to be inserted through
1984 which local variables will escape. Reset the points-to solution
1985 for ESCAPED. */
1986 if (changed)
1987 pt_solution_reset (&cfun->gimple_df->escaped);
1989 return changed;
1992 #include "gt-tree-parloops.h"