Enable dumping of alias graphs.
[official-gcc/Ramakrishna.git] / gcc / tree-parloops.c
blobd705e3f307cfeecc53625f6629a8fccc2bd9ca84
1 /* Loop autoparallelization.
2 Copyright (C) 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
3 Contributed by Sebastian Pop <pop@cri.ensmp.fr> and
4 Zdenek Dvorak <dvorakz@suse.cz>.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "tree.h"
27 #include "rtl.h"
28 #include "tree-flow.h"
29 #include "cfgloop.h"
30 #include "ggc.h"
31 #include "diagnostic.h"
32 #include "tree-pass.h"
33 #include "tree-chrec.h"
34 #include "tree-scalar-evolution.h"
35 #include "tree-data-ref.h"
36 #include "hashtab.h"
37 #include "langhooks.h"
38 #include "tree-vectorizer.h"
40 /* This pass tries to distribute iterations of loops into several threads.
41 The implementation is straightforward -- for each loop we test whether its
42 iterations are independent, and if it is the case (and some additional
43 conditions regarding profitability and correctness are satisfied), we
44 add GIMPLE_OMP_PARALLEL and GIMPLE_OMP_FOR codes and let omp expansion
45 machinery do its job.
47 The most of the complexity is in bringing the code into shape expected
48 by the omp expanders:
49 -- for GIMPLE_OMP_FOR, ensuring that the loop has only one induction
50 variable and that the exit test is at the start of the loop body
51 -- for GIMPLE_OMP_PARALLEL, replacing the references to local addressable
52 variables by accesses through pointers, and breaking up ssa chains
53 by storing the values incoming to the parallelized loop to a structure
54 passed to the new function as an argument (something similar is done
55 in omp gimplification, unfortunately only a small part of the code
56 can be shared).
58 TODO:
59 -- if there are several parallelizable loops in a function, it may be
60 possible to generate the threads just once (using synchronization to
61 ensure that cross-loop dependences are obeyed).
62 -- handling of common scalar dependence patterns (accumulation, ...)
63 -- handling of non-innermost loops */
65 /*
66 Reduction handling:
67 currently we use vect_is_simple_reduction() to detect reduction patterns.
68 The code transformation will be introduced by an example.
71 parloop
73 int sum=1;
75 for (i = 0; i < N; i++)
77 x[i] = i + 3;
78 sum+=x[i];
82 gimple-like code:
83 header_bb:
85 # sum_29 = PHI <sum_11(5), 1(3)>
86 # i_28 = PHI <i_12(5), 0(3)>
87 D.1795_8 = i_28 + 3;
88 x[i_28] = D.1795_8;
89 sum_11 = D.1795_8 + sum_29;
90 i_12 = i_28 + 1;
91 if (N_6(D) > i_12)
92 goto header_bb;
95 exit_bb:
97 # sum_21 = PHI <sum_11(4)>
98 printf (&"%d"[0], sum_21);
101 after reduction transformation (only relevant parts):
103 parloop
106 ....
109 # Storing the initial value given by the user. #
111 .paral_data_store.32.sum.27 = 1;
113 #pragma omp parallel num_threads(4)
115 #pragma omp for schedule(static)
117 # The neutral element corresponding to the particular
118 reduction's operation, e.g. 0 for PLUS_EXPR,
119 1 for MULT_EXPR, etc. replaces the user's initial value. #
121 # sum.27_29 = PHI <sum.27_11, 0>
123 sum.27_11 = D.1827_8 + sum.27_29;
125 GIMPLE_OMP_CONTINUE
127 # Adding this reduction phi is done at create_phi_for_local_result() #
128 # sum.27_56 = PHI <sum.27_11, 0>
129 GIMPLE_OMP_RETURN
131 # Creating the atomic operation is done at
132 create_call_for_reduction_1() #
134 #pragma omp atomic_load
135 D.1839_59 = *&.paral_data_load.33_51->reduction.23;
136 D.1840_60 = sum.27_56 + D.1839_59;
137 #pragma omp atomic_store (D.1840_60);
139 GIMPLE_OMP_RETURN
141 # collecting the result after the join of the threads is done at
142 create_loads_for_reductions().
143 The value computed by the threads is loaded from the
144 shared struct. #
147 .paral_data_load.33_52 = &.paral_data_store.32;
148 sum_37 = .paral_data_load.33_52->sum.27;
149 sum_43 = D.1795_41 + sum_37;
151 exit bb:
152 # sum_21 = PHI <sum_43, sum_26>
153 printf (&"%d"[0], sum_21);
161 /* Minimal number of iterations of a loop that should be executed in each
162 thread. */
163 #define MIN_PER_THREAD 100
165 /* Element of the hashtable, representing a
166 reduction in the current loop. */
167 struct reduction_info
169 gimple reduc_stmt; /* reduction statement. */
170 gimple reduc_phi; /* The phi node defining the reduction. */
171 enum tree_code reduction_code;/* code for the reduction operation. */
172 gimple keep_res; /* The PHI_RESULT of this phi is the resulting value
173 of the reduction variable when existing the loop. */
174 tree initial_value; /* The initial value of the reduction var before entering the loop. */
175 tree field; /* the name of the field in the parloop data structure intended for reduction. */
176 tree init; /* reduction initialization value. */
177 gimple new_phi; /* (helper field) Newly created phi node whose result
178 will be passed to the atomic operation. Represents
179 the local result each thread computed for the reduction
180 operation. */
183 /* Equality and hash functions for hashtab code. */
185 static int
186 reduction_info_eq (const void *aa, const void *bb)
188 const struct reduction_info *a = (const struct reduction_info *) aa;
189 const struct reduction_info *b = (const struct reduction_info *) bb;
191 return (a->reduc_phi == b->reduc_phi);
194 static hashval_t
195 reduction_info_hash (const void *aa)
197 const struct reduction_info *a = (const struct reduction_info *) aa;
199 return htab_hash_pointer (a->reduc_phi);
202 static struct reduction_info *
203 reduction_phi (htab_t reduction_list, gimple phi)
205 struct reduction_info tmpred, *red;
207 if (htab_elements (reduction_list) == 0)
208 return NULL;
210 tmpred.reduc_phi = phi;
211 red = (struct reduction_info *) htab_find (reduction_list, &tmpred);
213 return red;
216 /* Element of hashtable of names to copy. */
218 struct name_to_copy_elt
220 unsigned version; /* The version of the name to copy. */
221 tree new_name; /* The new name used in the copy. */
222 tree field; /* The field of the structure used to pass the
223 value. */
226 /* Equality and hash functions for hashtab code. */
228 static int
229 name_to_copy_elt_eq (const void *aa, const void *bb)
231 const struct name_to_copy_elt *a = (const struct name_to_copy_elt *) aa;
232 const struct name_to_copy_elt *b = (const struct name_to_copy_elt *) bb;
234 return a->version == b->version;
237 static hashval_t
238 name_to_copy_elt_hash (const void *aa)
240 const struct name_to_copy_elt *a = (const struct name_to_copy_elt *) aa;
242 return (hashval_t) a->version;
246 /* Data dependency analysis. Returns true if the iterations of LOOP
247 are independent on each other (that is, if we can execute them
248 in parallel). */
250 static bool
251 loop_parallel_p (struct loop *loop)
253 VEC (ddr_p, heap) * dependence_relations;
254 VEC (data_reference_p, heap) *datarefs;
255 lambda_trans_matrix trans;
256 bool ret = false;
258 if (dump_file && (dump_flags & TDF_DETAILS))
259 fprintf (dump_file, "\nConsidering loop %d\n", loop->num);
261 /* Check for problems with dependences. If the loop can be reversed,
262 the iterations are independent. */
263 datarefs = VEC_alloc (data_reference_p, heap, 10);
264 dependence_relations = VEC_alloc (ddr_p, heap, 10 * 10);
265 compute_data_dependences_for_loop (loop, true, &datarefs,
266 &dependence_relations);
267 if (dump_file && (dump_flags & TDF_DETAILS))
268 dump_data_dependence_relations (dump_file, dependence_relations);
270 trans = lambda_trans_matrix_new (1, 1);
271 LTM_MATRIX (trans)[0][0] = -1;
273 if (lambda_transform_legal_p (trans, 1, dependence_relations))
275 ret = true;
276 if (dump_file && (dump_flags & TDF_DETAILS))
277 fprintf (dump_file, " SUCCESS: may be parallelized\n");
279 else if (dump_file && (dump_flags & TDF_DETAILS))
280 fprintf (dump_file,
281 " FAILED: data dependencies exist across iterations\n");
283 free_dependence_relations (dependence_relations);
284 free_data_refs (datarefs);
286 return ret;
289 /* Return true when LOOP contains basic blocks marked with the
290 BB_IRREDUCIBLE_LOOP flag. */
292 static inline bool
293 loop_has_blocks_with_irreducible_flag (struct loop *loop)
295 unsigned i;
296 basic_block *bbs = get_loop_body_in_dom_order (loop);
297 bool res = true;
299 for (i = 0; i < loop->num_nodes; i++)
300 if (bbs[i]->flags & BB_IRREDUCIBLE_LOOP)
301 goto end;
303 res = false;
304 end:
305 free (bbs);
306 return res;
309 /* Assigns the address of OBJ in TYPE to an ssa name, and returns this name.
310 The assignment statement is placed on edge ENTRY. DECL_ADDRESS maps decls
311 to their addresses that can be reused. The address of OBJ is known to
312 be invariant in the whole function. */
314 static tree
315 take_address_of (tree obj, tree type, edge entry, htab_t decl_address)
317 int uid;
318 void **dslot;
319 struct int_tree_map ielt, *nielt;
320 tree *var_p, name, bvar, addr;
321 gimple stmt;
322 gimple_seq stmts;
324 /* Since the address of OBJ is invariant, the trees may be shared.
325 Avoid rewriting unrelated parts of the code. */
326 obj = unshare_expr (obj);
327 for (var_p = &obj;
328 handled_component_p (*var_p);
329 var_p = &TREE_OPERAND (*var_p, 0))
330 continue;
331 uid = DECL_UID (*var_p);
333 ielt.uid = uid;
334 dslot = htab_find_slot_with_hash (decl_address, &ielt, uid, INSERT);
335 if (!*dslot)
337 addr = build_addr (*var_p, current_function_decl);
338 bvar = create_tmp_var (TREE_TYPE (addr), get_name (*var_p));
339 add_referenced_var (bvar);
340 stmt = gimple_build_assign (bvar, addr);
341 name = make_ssa_name (bvar, stmt);
342 gimple_assign_set_lhs (stmt, name);
343 gsi_insert_on_edge_immediate (entry, stmt);
345 nielt = XNEW (struct int_tree_map);
346 nielt->uid = uid;
347 nielt->to = name;
348 *dslot = nielt;
350 else
351 name = ((struct int_tree_map *) *dslot)->to;
353 if (var_p != &obj)
355 *var_p = build1 (INDIRECT_REF, TREE_TYPE (*var_p), name);
356 name = force_gimple_operand (build_addr (obj, current_function_decl),
357 &stmts, true, NULL_TREE);
358 if (!gimple_seq_empty_p (stmts))
359 gsi_insert_seq_on_edge_immediate (entry, stmts);
362 if (TREE_TYPE (name) != type)
364 name = force_gimple_operand (fold_convert (type, name), &stmts, true,
365 NULL_TREE);
366 if (!gimple_seq_empty_p (stmts))
367 gsi_insert_seq_on_edge_immediate (entry, stmts);
370 return name;
373 /* Callback for htab_traverse. Create the initialization statement
374 for reduction described in SLOT, and place it at the preheader of
375 the loop described in DATA. */
377 static int
378 initialize_reductions (void **slot, void *data)
380 tree init, c;
381 tree bvar, type, arg;
382 edge e;
384 struct reduction_info *const reduc = (struct reduction_info *) *slot;
385 struct loop *loop = (struct loop *) data;
387 /* Create initialization in preheader:
388 reduction_variable = initialization value of reduction. */
390 /* In the phi node at the header, replace the argument coming
391 from the preheader with the reduction initialization value. */
393 /* Create a new variable to initialize the reduction. */
394 type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
395 bvar = create_tmp_var (type, "reduction");
396 add_referenced_var (bvar);
398 c = build_omp_clause (gimple_location (reduc->reduc_stmt),
399 OMP_CLAUSE_REDUCTION);
400 OMP_CLAUSE_REDUCTION_CODE (c) = reduc->reduction_code;
401 OMP_CLAUSE_DECL (c) = SSA_NAME_VAR (gimple_assign_lhs (reduc->reduc_stmt));
403 init = omp_reduction_init (c, TREE_TYPE (bvar));
404 reduc->init = init;
406 /* Replace the argument representing the initialization value
407 with the initialization value for the reduction (neutral
408 element for the particular operation, e.g. 0 for PLUS_EXPR,
409 1 for MULT_EXPR, etc).
410 Keep the old value in a new variable "reduction_initial",
411 that will be taken in consideration after the parallel
412 computing is done. */
414 e = loop_preheader_edge (loop);
415 arg = PHI_ARG_DEF_FROM_EDGE (reduc->reduc_phi, e);
416 /* Create new variable to hold the initial value. */
418 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE
419 (reduc->reduc_phi, loop_preheader_edge (loop)), init);
420 reduc->initial_value = arg;
421 return 1;
424 struct elv_data
426 struct walk_stmt_info info;
427 edge entry;
428 htab_t decl_address;
429 bool changed;
432 /* Eliminates references to local variables in *TP out of the single
433 entry single exit region starting at DTA->ENTRY.
434 DECL_ADDRESS contains addresses of the references that had their
435 address taken already. If the expression is changed, CHANGED is
436 set to true. Callback for walk_tree. */
438 static tree
439 eliminate_local_variables_1 (tree *tp, int *walk_subtrees, void *data)
441 struct elv_data *const dta = (struct elv_data *) data;
442 tree t = *tp, var, addr, addr_type, type, obj;
444 if (DECL_P (t))
446 *walk_subtrees = 0;
448 if (!SSA_VAR_P (t) || DECL_EXTERNAL (t))
449 return NULL_TREE;
451 type = TREE_TYPE (t);
452 addr_type = build_pointer_type (type);
453 addr = take_address_of (t, addr_type, dta->entry, dta->decl_address);
454 *tp = build1 (INDIRECT_REF, TREE_TYPE (*tp), addr);
456 dta->changed = true;
457 return NULL_TREE;
460 if (TREE_CODE (t) == ADDR_EXPR)
462 /* ADDR_EXPR may appear in two contexts:
463 -- as a gimple operand, when the address taken is a function invariant
464 -- as gimple rhs, when the resulting address in not a function
465 invariant
466 We do not need to do anything special in the latter case (the base of
467 the memory reference whose address is taken may be replaced in the
468 DECL_P case). The former case is more complicated, as we need to
469 ensure that the new address is still a gimple operand. Thus, it
470 is not sufficient to replace just the base of the memory reference --
471 we need to move the whole computation of the address out of the
472 loop. */
473 if (!is_gimple_val (t))
474 return NULL_TREE;
476 *walk_subtrees = 0;
477 obj = TREE_OPERAND (t, 0);
478 var = get_base_address (obj);
479 if (!var || !SSA_VAR_P (var) || DECL_EXTERNAL (var))
480 return NULL_TREE;
482 addr_type = TREE_TYPE (t);
483 addr = take_address_of (obj, addr_type, dta->entry, dta->decl_address);
484 *tp = addr;
486 dta->changed = true;
487 return NULL_TREE;
490 if (!EXPR_P (t))
491 *walk_subtrees = 0;
493 return NULL_TREE;
496 /* Moves the references to local variables in STMT out of the single
497 entry single exit region starting at ENTRY. DECL_ADDRESS contains
498 addresses of the references that had their address taken
499 already. */
501 static void
502 eliminate_local_variables_stmt (edge entry, gimple stmt,
503 htab_t decl_address)
505 struct elv_data dta;
507 memset (&dta.info, '\0', sizeof (dta.info));
508 dta.entry = entry;
509 dta.decl_address = decl_address;
510 dta.changed = false;
512 if (gimple_debug_bind_p (stmt))
513 walk_tree (gimple_debug_bind_get_value_ptr (stmt),
514 eliminate_local_variables_1, &dta.info, NULL);
515 else
516 walk_gimple_op (stmt, eliminate_local_variables_1, &dta.info);
518 if (dta.changed)
519 update_stmt (stmt);
522 /* Eliminates the references to local variables from the single entry
523 single exit region between the ENTRY and EXIT edges.
525 This includes:
526 1) Taking address of a local variable -- these are moved out of the
527 region (and temporary variable is created to hold the address if
528 necessary).
530 2) Dereferencing a local variable -- these are replaced with indirect
531 references. */
533 static void
534 eliminate_local_variables (edge entry, edge exit)
536 basic_block bb;
537 VEC (basic_block, heap) *body = VEC_alloc (basic_block, heap, 3);
538 unsigned i;
539 gimple_stmt_iterator gsi;
540 htab_t decl_address = htab_create (10, int_tree_map_hash, int_tree_map_eq,
541 free);
542 basic_block entry_bb = entry->src;
543 basic_block exit_bb = exit->dest;
545 gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
547 for (i = 0; VEC_iterate (basic_block, body, i, bb); i++)
548 if (bb != entry_bb && bb != exit_bb)
549 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
550 eliminate_local_variables_stmt (entry, gsi_stmt (gsi),
551 decl_address);
553 htab_delete (decl_address);
554 VEC_free (basic_block, heap, body);
557 /* Returns true if expression EXPR is not defined between ENTRY and
558 EXIT, i.e. if all its operands are defined outside of the region. */
560 static bool
561 expr_invariant_in_region_p (edge entry, edge exit, tree expr)
563 basic_block entry_bb = entry->src;
564 basic_block exit_bb = exit->dest;
565 basic_block def_bb;
567 if (is_gimple_min_invariant (expr))
568 return true;
570 if (TREE_CODE (expr) == SSA_NAME)
572 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
573 if (def_bb
574 && dominated_by_p (CDI_DOMINATORS, def_bb, entry_bb)
575 && !dominated_by_p (CDI_DOMINATORS, def_bb, exit_bb))
576 return false;
578 return true;
581 return false;
584 /* If COPY_NAME_P is true, creates and returns a duplicate of NAME.
585 The copies are stored to NAME_COPIES, if NAME was already duplicated,
586 its duplicate stored in NAME_COPIES is returned.
588 Regardless of COPY_NAME_P, the decl used as a base of the ssa name is also
589 duplicated, storing the copies in DECL_COPIES. */
591 static tree
592 separate_decls_in_region_name (tree name,
593 htab_t name_copies, htab_t decl_copies,
594 bool copy_name_p)
596 tree copy, var, var_copy;
597 unsigned idx, uid, nuid;
598 struct int_tree_map ielt, *nielt;
599 struct name_to_copy_elt elt, *nelt;
600 void **slot, **dslot;
602 if (TREE_CODE (name) != SSA_NAME)
603 return name;
605 idx = SSA_NAME_VERSION (name);
606 elt.version = idx;
607 slot = htab_find_slot_with_hash (name_copies, &elt, idx,
608 copy_name_p ? INSERT : NO_INSERT);
609 if (slot && *slot)
610 return ((struct name_to_copy_elt *) *slot)->new_name;
612 var = SSA_NAME_VAR (name);
613 uid = DECL_UID (var);
614 ielt.uid = uid;
615 dslot = htab_find_slot_with_hash (decl_copies, &ielt, uid, INSERT);
616 if (!*dslot)
618 var_copy = create_tmp_var (TREE_TYPE (var), get_name (var));
619 DECL_GIMPLE_REG_P (var_copy) = DECL_GIMPLE_REG_P (var);
620 add_referenced_var (var_copy);
621 nielt = XNEW (struct int_tree_map);
622 nielt->uid = uid;
623 nielt->to = var_copy;
624 *dslot = nielt;
626 /* Ensure that when we meet this decl next time, we won't duplicate
627 it again. */
628 nuid = DECL_UID (var_copy);
629 ielt.uid = nuid;
630 dslot = htab_find_slot_with_hash (decl_copies, &ielt, nuid, INSERT);
631 gcc_assert (!*dslot);
632 nielt = XNEW (struct int_tree_map);
633 nielt->uid = nuid;
634 nielt->to = var_copy;
635 *dslot = nielt;
637 else
638 var_copy = ((struct int_tree_map *) *dslot)->to;
640 if (copy_name_p)
642 copy = duplicate_ssa_name (name, NULL);
643 nelt = XNEW (struct name_to_copy_elt);
644 nelt->version = idx;
645 nelt->new_name = copy;
646 nelt->field = NULL_TREE;
647 *slot = nelt;
649 else
651 gcc_assert (!slot);
652 copy = name;
655 SSA_NAME_VAR (copy) = var_copy;
656 return copy;
659 /* Finds the ssa names used in STMT that are defined outside the
660 region between ENTRY and EXIT and replaces such ssa names with
661 their duplicates. The duplicates are stored to NAME_COPIES. Base
662 decls of all ssa names used in STMT (including those defined in
663 LOOP) are replaced with the new temporary variables; the
664 replacement decls are stored in DECL_COPIES. */
666 static void
667 separate_decls_in_region_stmt (edge entry, edge exit, gimple stmt,
668 htab_t name_copies, htab_t decl_copies)
670 use_operand_p use;
671 def_operand_p def;
672 ssa_op_iter oi;
673 tree name, copy;
674 bool copy_name_p;
676 mark_virtual_ops_for_renaming (stmt);
678 FOR_EACH_PHI_OR_STMT_DEF (def, stmt, oi, SSA_OP_DEF)
680 name = DEF_FROM_PTR (def);
681 gcc_assert (TREE_CODE (name) == SSA_NAME);
682 copy = separate_decls_in_region_name (name, name_copies, decl_copies,
683 false);
684 gcc_assert (copy == name);
687 FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
689 name = USE_FROM_PTR (use);
690 if (TREE_CODE (name) != SSA_NAME)
691 continue;
693 copy_name_p = expr_invariant_in_region_p (entry, exit, name);
694 copy = separate_decls_in_region_name (name, name_copies, decl_copies,
695 copy_name_p);
696 SET_USE (use, copy);
700 /* Finds the ssa names used in STMT that are defined outside the
701 region between ENTRY and EXIT and replaces such ssa names with
702 their duplicates. The duplicates are stored to NAME_COPIES. Base
703 decls of all ssa names used in STMT (including those defined in
704 LOOP) are replaced with the new temporary variables; the
705 replacement decls are stored in DECL_COPIES. */
707 static bool
708 separate_decls_in_region_debug_bind (gimple stmt,
709 htab_t name_copies, htab_t decl_copies)
711 use_operand_p use;
712 ssa_op_iter oi;
713 tree var, name;
714 struct int_tree_map ielt;
715 struct name_to_copy_elt elt;
716 void **slot, **dslot;
718 var = gimple_debug_bind_get_var (stmt);
719 gcc_assert (DECL_P (var) && SSA_VAR_P (var));
720 ielt.uid = DECL_UID (var);
721 dslot = htab_find_slot_with_hash (decl_copies, &ielt, ielt.uid, NO_INSERT);
722 if (!dslot)
723 return true;
724 gimple_debug_bind_set_var (stmt, ((struct int_tree_map *) *dslot)->to);
726 FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
728 name = USE_FROM_PTR (use);
729 if (TREE_CODE (name) != SSA_NAME)
730 continue;
732 elt.version = SSA_NAME_VERSION (name);
733 slot = htab_find_slot_with_hash (name_copies, &elt, elt.version, NO_INSERT);
734 if (!slot)
736 gimple_debug_bind_reset_value (stmt);
737 update_stmt (stmt);
738 break;
741 SET_USE (use, ((struct name_to_copy_elt *) *slot)->new_name);
744 return false;
747 /* Callback for htab_traverse. Adds a field corresponding to the reduction
748 specified in SLOT. The type is passed in DATA. */
750 static int
751 add_field_for_reduction (void **slot, void *data)
754 struct reduction_info *const red = (struct reduction_info *) *slot;
755 tree const type = (tree) data;
756 tree var = SSA_NAME_VAR (gimple_assign_lhs (red->reduc_stmt));
757 tree field = build_decl (gimple_location (red->reduc_stmt),
758 FIELD_DECL, DECL_NAME (var), TREE_TYPE (var));
760 insert_field_into_struct (type, field);
762 red->field = field;
764 return 1;
767 /* Callback for htab_traverse. Adds a field corresponding to a ssa name
768 described in SLOT. The type is passed in DATA. */
770 static int
771 add_field_for_name (void **slot, void *data)
773 struct name_to_copy_elt *const elt = (struct name_to_copy_elt *) *slot;
774 tree type = (tree) data;
775 tree name = ssa_name (elt->version);
776 tree var = SSA_NAME_VAR (name);
777 tree field = build_decl (DECL_SOURCE_LOCATION (var),
778 FIELD_DECL, DECL_NAME (var), TREE_TYPE (var));
780 insert_field_into_struct (type, field);
781 elt->field = field;
783 return 1;
786 /* Callback for htab_traverse. A local result is the intermediate result
787 computed by a single
788 thread, or the initial value in case no iteration was executed.
789 This function creates a phi node reflecting these values.
790 The phi's result will be stored in NEW_PHI field of the
791 reduction's data structure. */
793 static int
794 create_phi_for_local_result (void **slot, void *data)
796 struct reduction_info *const reduc = (struct reduction_info *) *slot;
797 const struct loop *const loop = (const struct loop *) data;
798 edge e;
799 gimple new_phi;
800 basic_block store_bb;
801 tree local_res;
802 source_location locus;
804 /* STORE_BB is the block where the phi
805 should be stored. It is the destination of the loop exit.
806 (Find the fallthru edge from GIMPLE_OMP_CONTINUE). */
807 store_bb = FALLTHRU_EDGE (loop->latch)->dest;
809 /* STORE_BB has two predecessors. One coming from the loop
810 (the reduction's result is computed at the loop),
811 and another coming from a block preceding the loop,
812 when no iterations
813 are executed (the initial value should be taken). */
814 if (EDGE_PRED (store_bb, 0) == FALLTHRU_EDGE (loop->latch))
815 e = EDGE_PRED (store_bb, 1);
816 else
817 e = EDGE_PRED (store_bb, 0);
818 local_res
819 = make_ssa_name (SSA_NAME_VAR (gimple_assign_lhs (reduc->reduc_stmt)),
820 NULL);
821 locus = gimple_location (reduc->reduc_stmt);
822 new_phi = create_phi_node (local_res, store_bb);
823 SSA_NAME_DEF_STMT (local_res) = new_phi;
824 add_phi_arg (new_phi, reduc->init, e, locus);
825 add_phi_arg (new_phi, gimple_assign_lhs (reduc->reduc_stmt),
826 FALLTHRU_EDGE (loop->latch), locus);
827 reduc->new_phi = new_phi;
829 return 1;
832 struct clsn_data
834 tree store;
835 tree load;
837 basic_block store_bb;
838 basic_block load_bb;
841 /* Callback for htab_traverse. Create an atomic instruction for the
842 reduction described in SLOT.
843 DATA annotates the place in memory the atomic operation relates to,
844 and the basic block it needs to be generated in. */
846 static int
847 create_call_for_reduction_1 (void **slot, void *data)
849 struct reduction_info *const reduc = (struct reduction_info *) *slot;
850 struct clsn_data *const clsn_data = (struct clsn_data *) data;
851 gimple_stmt_iterator gsi;
852 tree type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
853 tree struct_type = TREE_TYPE (TREE_TYPE (clsn_data->load));
854 tree load_struct;
855 basic_block bb;
856 basic_block new_bb;
857 edge e;
858 tree t, addr, addr_type, ref, x;
859 tree tmp_load, name;
860 gimple load;
862 load_struct = fold_build1 (INDIRECT_REF, struct_type, clsn_data->load);
863 t = build3 (COMPONENT_REF, type, load_struct, reduc->field, NULL_TREE);
864 addr_type = build_pointer_type (type);
866 addr = build_addr (t, current_function_decl);
868 /* Create phi node. */
869 bb = clsn_data->load_bb;
871 e = split_block (bb, t);
872 new_bb = e->dest;
874 tmp_load = create_tmp_var (TREE_TYPE (TREE_TYPE (addr)), NULL);
875 add_referenced_var (tmp_load);
876 tmp_load = make_ssa_name (tmp_load, NULL);
877 load = gimple_build_omp_atomic_load (tmp_load, addr);
878 SSA_NAME_DEF_STMT (tmp_load) = load;
879 gsi = gsi_start_bb (new_bb);
880 gsi_insert_after (&gsi, load, GSI_NEW_STMT);
882 e = split_block (new_bb, load);
883 new_bb = e->dest;
884 gsi = gsi_start_bb (new_bb);
885 ref = tmp_load;
886 x = fold_build2 (reduc->reduction_code,
887 TREE_TYPE (PHI_RESULT (reduc->new_phi)), ref,
888 PHI_RESULT (reduc->new_phi));
890 name = force_gimple_operand_gsi (&gsi, x, true, NULL_TREE, true,
891 GSI_CONTINUE_LINKING);
893 gsi_insert_after (&gsi, gimple_build_omp_atomic_store (name), GSI_NEW_STMT);
894 return 1;
897 /* Create the atomic operation at the join point of the threads.
898 REDUCTION_LIST describes the reductions in the LOOP.
899 LD_ST_DATA describes the shared data structure where
900 shared data is stored in and loaded from. */
901 static void
902 create_call_for_reduction (struct loop *loop, htab_t reduction_list,
903 struct clsn_data *ld_st_data)
905 htab_traverse (reduction_list, create_phi_for_local_result, loop);
906 /* Find the fallthru edge from GIMPLE_OMP_CONTINUE. */
907 ld_st_data->load_bb = FALLTHRU_EDGE (loop->latch)->dest;
908 htab_traverse (reduction_list, create_call_for_reduction_1, ld_st_data);
911 /* Callback for htab_traverse. Loads the final reduction value at the
912 join point of all threads, and inserts it in the right place. */
914 static int
915 create_loads_for_reductions (void **slot, void *data)
917 struct reduction_info *const red = (struct reduction_info *) *slot;
918 struct clsn_data *const clsn_data = (struct clsn_data *) data;
919 gimple stmt;
920 gimple_stmt_iterator gsi;
921 tree type = TREE_TYPE (gimple_assign_lhs (red->reduc_stmt));
922 tree struct_type = TREE_TYPE (TREE_TYPE (clsn_data->load));
923 tree load_struct;
924 tree name;
925 tree x;
927 gsi = gsi_after_labels (clsn_data->load_bb);
928 load_struct = fold_build1 (INDIRECT_REF, struct_type, clsn_data->load);
929 load_struct = build3 (COMPONENT_REF, type, load_struct, red->field,
930 NULL_TREE);
932 x = load_struct;
933 name = PHI_RESULT (red->keep_res);
934 stmt = gimple_build_assign (name, x);
935 SSA_NAME_DEF_STMT (name) = stmt;
937 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
939 for (gsi = gsi_start_phis (gimple_bb (red->keep_res));
940 !gsi_end_p (gsi); gsi_next (&gsi))
941 if (gsi_stmt (gsi) == red->keep_res)
943 remove_phi_node (&gsi, false);
944 return 1;
946 gcc_unreachable ();
949 /* Load the reduction result that was stored in LD_ST_DATA.
950 REDUCTION_LIST describes the list of reductions that the
951 loads should be generated for. */
952 static void
953 create_final_loads_for_reduction (htab_t reduction_list,
954 struct clsn_data *ld_st_data)
956 gimple_stmt_iterator gsi;
957 tree t;
958 gimple stmt;
960 gsi = gsi_after_labels (ld_st_data->load_bb);
961 t = build_fold_addr_expr (ld_st_data->store);
962 stmt = gimple_build_assign (ld_st_data->load, t);
964 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
965 SSA_NAME_DEF_STMT (ld_st_data->load) = stmt;
967 htab_traverse (reduction_list, create_loads_for_reductions, ld_st_data);
971 /* Callback for htab_traverse. Store the neutral value for the
972 particular reduction's operation, e.g. 0 for PLUS_EXPR,
973 1 for MULT_EXPR, etc. into the reduction field.
974 The reduction is specified in SLOT. The store information is
975 passed in DATA. */
977 static int
978 create_stores_for_reduction (void **slot, void *data)
980 struct reduction_info *const red = (struct reduction_info *) *slot;
981 struct clsn_data *const clsn_data = (struct clsn_data *) data;
982 tree t;
983 gimple stmt;
984 gimple_stmt_iterator gsi;
985 tree type = TREE_TYPE (gimple_assign_lhs (red->reduc_stmt));
987 gsi = gsi_last_bb (clsn_data->store_bb);
988 t = build3 (COMPONENT_REF, type, clsn_data->store, red->field, NULL_TREE);
989 stmt = gimple_build_assign (t, red->initial_value);
990 mark_virtual_ops_for_renaming (stmt);
991 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
993 return 1;
996 /* Callback for htab_traverse. Creates loads to a field of LOAD in LOAD_BB and
997 store to a field of STORE in STORE_BB for the ssa name and its duplicate
998 specified in SLOT. */
1000 static int
1001 create_loads_and_stores_for_name (void **slot, void *data)
1003 struct name_to_copy_elt *const elt = (struct name_to_copy_elt *) *slot;
1004 struct clsn_data *const clsn_data = (struct clsn_data *) data;
1005 tree t;
1006 gimple stmt;
1007 gimple_stmt_iterator gsi;
1008 tree type = TREE_TYPE (elt->new_name);
1009 tree struct_type = TREE_TYPE (TREE_TYPE (clsn_data->load));
1010 tree load_struct;
1012 gsi = gsi_last_bb (clsn_data->store_bb);
1013 t = build3 (COMPONENT_REF, type, clsn_data->store, elt->field, NULL_TREE);
1014 stmt = gimple_build_assign (t, ssa_name (elt->version));
1015 mark_virtual_ops_for_renaming (stmt);
1016 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1018 gsi = gsi_last_bb (clsn_data->load_bb);
1019 load_struct = fold_build1 (INDIRECT_REF, struct_type, clsn_data->load);
1020 t = build3 (COMPONENT_REF, type, load_struct, elt->field, NULL_TREE);
1021 stmt = gimple_build_assign (elt->new_name, t);
1022 SSA_NAME_DEF_STMT (elt->new_name) = stmt;
1023 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1025 return 1;
1028 /* Moves all the variables used in LOOP and defined outside of it (including
1029 the initial values of loop phi nodes, and *PER_THREAD if it is a ssa
1030 name) to a structure created for this purpose. The code
1032 while (1)
1034 use (a);
1035 use (b);
1038 is transformed this way:
1040 bb0:
1041 old.a = a;
1042 old.b = b;
1044 bb1:
1045 a' = new->a;
1046 b' = new->b;
1047 while (1)
1049 use (a');
1050 use (b');
1053 `old' is stored to *ARG_STRUCT and `new' is stored to NEW_ARG_STRUCT. The
1054 pointer `new' is intentionally not initialized (the loop will be split to a
1055 separate function later, and `new' will be initialized from its arguments).
1056 LD_ST_DATA holds information about the shared data structure used to pass
1057 information among the threads. It is initialized here, and
1058 gen_parallel_loop will pass it to create_call_for_reduction that
1059 needs this information. REDUCTION_LIST describes the reductions
1060 in LOOP. */
1062 static void
1063 separate_decls_in_region (edge entry, edge exit, htab_t reduction_list,
1064 tree *arg_struct, tree *new_arg_struct,
1065 struct clsn_data *ld_st_data)
1068 basic_block bb1 = split_edge (entry);
1069 basic_block bb0 = single_pred (bb1);
1070 htab_t name_copies = htab_create (10, name_to_copy_elt_hash,
1071 name_to_copy_elt_eq, free);
1072 htab_t decl_copies = htab_create (10, int_tree_map_hash, int_tree_map_eq,
1073 free);
1074 unsigned i;
1075 tree type, type_name, nvar;
1076 gimple_stmt_iterator gsi;
1077 struct clsn_data clsn_data;
1078 VEC (basic_block, heap) *body = VEC_alloc (basic_block, heap, 3);
1079 basic_block bb;
1080 basic_block entry_bb = bb1;
1081 basic_block exit_bb = exit->dest;
1082 bool has_debug_stmt = false;
1084 entry = single_succ_edge (entry_bb);
1085 gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
1087 for (i = 0; VEC_iterate (basic_block, body, i, bb); i++)
1089 if (bb != entry_bb && bb != exit_bb)
1091 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1092 separate_decls_in_region_stmt (entry, exit, gsi_stmt (gsi),
1093 name_copies, decl_copies);
1095 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1097 gimple stmt = gsi_stmt (gsi);
1099 if (is_gimple_debug (stmt))
1100 has_debug_stmt = true;
1101 else
1102 separate_decls_in_region_stmt (entry, exit, stmt,
1103 name_copies, decl_copies);
1108 /* Now process debug bind stmts. We must not create decls while
1109 processing debug stmts, so we defer their processing so as to
1110 make sure we will have debug info for as many variables as
1111 possible (all of those that were dealt with in the loop above),
1112 and discard those for which we know there's nothing we can
1113 do. */
1114 if (has_debug_stmt)
1115 for (i = 0; VEC_iterate (basic_block, body, i, bb); i++)
1116 if (bb != entry_bb && bb != exit_bb)
1118 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
1120 gimple stmt = gsi_stmt (gsi);
1122 if (gimple_debug_bind_p (stmt))
1124 if (separate_decls_in_region_debug_bind (stmt,
1125 name_copies,
1126 decl_copies))
1128 gsi_remove (&gsi, true);
1129 continue;
1133 gsi_next (&gsi);
1137 VEC_free (basic_block, heap, body);
1139 if (htab_elements (name_copies) == 0 && reduction_list == 0)
1141 /* It may happen that there is nothing to copy (if there are only
1142 loop carried and external variables in the loop). */
1143 *arg_struct = NULL;
1144 *new_arg_struct = NULL;
1146 else
1148 /* Create the type for the structure to store the ssa names to. */
1149 type = lang_hooks.types.make_type (RECORD_TYPE);
1150 type_name = build_decl (BUILTINS_LOCATION,
1151 TYPE_DECL, create_tmp_var_name (".paral_data"),
1152 type);
1153 TYPE_NAME (type) = type_name;
1155 htab_traverse (name_copies, add_field_for_name, type);
1156 if (reduction_list && htab_elements (reduction_list) > 0)
1158 /* Create the fields for reductions. */
1159 htab_traverse (reduction_list, add_field_for_reduction,
1160 type);
1162 layout_type (type);
1164 /* Create the loads and stores. */
1165 *arg_struct = create_tmp_var (type, ".paral_data_store");
1166 add_referenced_var (*arg_struct);
1167 nvar = create_tmp_var (build_pointer_type (type), ".paral_data_load");
1168 add_referenced_var (nvar);
1169 *new_arg_struct = make_ssa_name (nvar, NULL);
1171 ld_st_data->store = *arg_struct;
1172 ld_st_data->load = *new_arg_struct;
1173 ld_st_data->store_bb = bb0;
1174 ld_st_data->load_bb = bb1;
1176 htab_traverse (name_copies, create_loads_and_stores_for_name,
1177 ld_st_data);
1179 /* Load the calculation from memory (after the join of the threads). */
1181 if (reduction_list && htab_elements (reduction_list) > 0)
1183 htab_traverse (reduction_list, create_stores_for_reduction,
1184 ld_st_data);
1185 clsn_data.load = make_ssa_name (nvar, NULL);
1186 clsn_data.load_bb = exit->dest;
1187 clsn_data.store = ld_st_data->store;
1188 create_final_loads_for_reduction (reduction_list, &clsn_data);
1192 htab_delete (decl_copies);
1193 htab_delete (name_copies);
1196 /* Bitmap containing uids of functions created by parallelization. We cannot
1197 allocate it from the default obstack, as it must live across compilation
1198 of several functions; we make it gc allocated instead. */
1200 static GTY(()) bitmap parallelized_functions;
1202 /* Returns true if FN was created by create_loop_fn. */
1204 static bool
1205 parallelized_function_p (tree fn)
1207 if (!parallelized_functions || !DECL_ARTIFICIAL (fn))
1208 return false;
1210 return bitmap_bit_p (parallelized_functions, DECL_UID (fn));
1213 /* Creates and returns an empty function that will receive the body of
1214 a parallelized loop. */
1216 static tree
1217 create_loop_fn (void)
1219 char buf[100];
1220 char *tname;
1221 tree decl, type, name, t;
1222 struct function *act_cfun = cfun;
1223 static unsigned loopfn_num;
1225 snprintf (buf, 100, "%s.$loopfn", current_function_name ());
1226 ASM_FORMAT_PRIVATE_NAME (tname, buf, loopfn_num++);
1227 clean_symbol_name (tname);
1228 name = get_identifier (tname);
1229 type = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
1231 decl = build_decl (BUILTINS_LOCATION,
1232 FUNCTION_DECL, name, type);
1233 if (!parallelized_functions)
1234 parallelized_functions = BITMAP_GGC_ALLOC ();
1235 bitmap_set_bit (parallelized_functions, DECL_UID (decl));
1237 TREE_STATIC (decl) = 1;
1238 TREE_USED (decl) = 1;
1239 DECL_ARTIFICIAL (decl) = 1;
1240 DECL_IGNORED_P (decl) = 0;
1241 TREE_PUBLIC (decl) = 0;
1242 DECL_UNINLINABLE (decl) = 1;
1243 DECL_EXTERNAL (decl) = 0;
1244 DECL_CONTEXT (decl) = NULL_TREE;
1245 DECL_INITIAL (decl) = make_node (BLOCK);
1247 t = build_decl (BUILTINS_LOCATION,
1248 RESULT_DECL, NULL_TREE, void_type_node);
1249 DECL_ARTIFICIAL (t) = 1;
1250 DECL_IGNORED_P (t) = 1;
1251 DECL_RESULT (decl) = t;
1253 t = build_decl (BUILTINS_LOCATION,
1254 PARM_DECL, get_identifier (".paral_data_param"),
1255 ptr_type_node);
1256 DECL_ARTIFICIAL (t) = 1;
1257 DECL_ARG_TYPE (t) = ptr_type_node;
1258 DECL_CONTEXT (t) = decl;
1259 TREE_USED (t) = 1;
1260 DECL_ARGUMENTS (decl) = t;
1262 allocate_struct_function (decl, false);
1264 /* The call to allocate_struct_function clobbers CFUN, so we need to restore
1265 it. */
1266 set_cfun (act_cfun);
1268 return decl;
1271 /* Moves the exit condition of LOOP to the beginning of its header, and
1272 duplicates the part of the last iteration that gets disabled to the
1273 exit of the loop. NIT is the number of iterations of the loop
1274 (used to initialize the variables in the duplicated part).
1276 TODO: the common case is that latch of the loop is empty and immediately
1277 follows the loop exit. In this case, it would be better not to copy the
1278 body of the loop, but only move the entry of the loop directly before the
1279 exit check and increase the number of iterations of the loop by one.
1280 This may need some additional preconditioning in case NIT = ~0.
1281 REDUCTION_LIST describes the reductions in LOOP. */
1283 static void
1284 transform_to_exit_first_loop (struct loop *loop, htab_t reduction_list, tree nit)
1286 basic_block *bbs, *nbbs, ex_bb, orig_header;
1287 unsigned n;
1288 bool ok;
1289 edge exit = single_dom_exit (loop), hpred;
1290 tree control, control_name, res, t;
1291 gimple phi, nphi, cond_stmt, stmt;
1292 gimple_stmt_iterator gsi;
1294 split_block_after_labels (loop->header);
1295 orig_header = single_succ (loop->header);
1296 hpred = single_succ_edge (loop->header);
1298 cond_stmt = last_stmt (exit->src);
1299 control = gimple_cond_lhs (cond_stmt);
1300 gcc_assert (gimple_cond_rhs (cond_stmt) == nit);
1302 /* Make sure that we have phi nodes on exit for all loop header phis
1303 (create_parallel_loop requires that). */
1304 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
1306 phi = gsi_stmt (gsi);
1307 res = PHI_RESULT (phi);
1308 t = make_ssa_name (SSA_NAME_VAR (res), phi);
1309 SET_PHI_RESULT (phi, t);
1311 nphi = create_phi_node (res, orig_header);
1312 SSA_NAME_DEF_STMT (res) = nphi;
1313 add_phi_arg (nphi, t, hpred, UNKNOWN_LOCATION);
1315 if (res == control)
1317 gimple_cond_set_lhs (cond_stmt, t);
1318 update_stmt (cond_stmt);
1319 control = t;
1323 bbs = get_loop_body_in_dom_order (loop);
1324 for (n = 0; bbs[n] != exit->src; n++)
1325 continue;
1326 nbbs = XNEWVEC (basic_block, n);
1327 ok = gimple_duplicate_sese_tail (single_succ_edge (loop->header), exit,
1328 bbs + 1, n, nbbs);
1329 gcc_assert (ok);
1330 free (bbs);
1331 ex_bb = nbbs[0];
1332 free (nbbs);
1334 /* Other than reductions, the only gimple reg that should be copied
1335 out of the loop is the control variable. */
1337 control_name = NULL_TREE;
1338 for (gsi = gsi_start_phis (ex_bb); !gsi_end_p (gsi); )
1340 phi = gsi_stmt (gsi);
1341 res = PHI_RESULT (phi);
1342 if (!is_gimple_reg (res))
1344 gsi_next (&gsi);
1345 continue;
1348 /* Check if it is a part of reduction. If it is,
1349 keep the phi at the reduction's keep_res field. The
1350 PHI_RESULT of this phi is the resulting value of the reduction
1351 variable when exiting the loop. */
1353 exit = single_dom_exit (loop);
1355 if (htab_elements (reduction_list) > 0)
1357 struct reduction_info *red;
1359 tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1361 red = reduction_phi (reduction_list, SSA_NAME_DEF_STMT (val));
1362 if (red)
1364 red->keep_res = phi;
1365 gsi_next (&gsi);
1366 continue;
1369 gcc_assert (control_name == NULL_TREE
1370 && SSA_NAME_VAR (res) == SSA_NAME_VAR (control));
1371 control_name = res;
1372 remove_phi_node (&gsi, false);
1374 gcc_assert (control_name != NULL_TREE);
1376 /* Initialize the control variable to NIT. */
1377 gsi = gsi_after_labels (ex_bb);
1378 nit = force_gimple_operand_gsi (&gsi,
1379 fold_convert (TREE_TYPE (control_name), nit),
1380 false, NULL_TREE, false, GSI_SAME_STMT);
1381 stmt = gimple_build_assign (control_name, nit);
1382 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
1383 SSA_NAME_DEF_STMT (control_name) = stmt;
1386 /* Create the parallel constructs for LOOP as described in gen_parallel_loop.
1387 LOOP_FN and DATA are the arguments of GIMPLE_OMP_PARALLEL.
1388 NEW_DATA is the variable that should be initialized from the argument
1389 of LOOP_FN. N_THREADS is the requested number of threads. Returns the
1390 basic block containing GIMPLE_OMP_PARALLEL tree. */
1392 static basic_block
1393 create_parallel_loop (struct loop *loop, tree loop_fn, tree data,
1394 tree new_data, unsigned n_threads)
1396 gimple_stmt_iterator gsi;
1397 basic_block bb, paral_bb, for_bb, ex_bb;
1398 tree t, param, res;
1399 gimple stmt, for_stmt, phi, cond_stmt;
1400 tree cvar, cvar_init, initvar, cvar_next, cvar_base, type;
1401 edge exit, nexit, guard, end, e;
1403 /* Prepare the GIMPLE_OMP_PARALLEL statement. */
1404 bb = loop_preheader_edge (loop)->src;
1405 paral_bb = single_pred (bb);
1406 gsi = gsi_last_bb (paral_bb);
1408 t = build_omp_clause (BUILTINS_LOCATION, OMP_CLAUSE_NUM_THREADS);
1409 OMP_CLAUSE_NUM_THREADS_EXPR (t)
1410 = build_int_cst (integer_type_node, n_threads);
1411 stmt = gimple_build_omp_parallel (NULL, t, loop_fn, data);
1413 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1415 /* Initialize NEW_DATA. */
1416 if (data)
1418 gsi = gsi_after_labels (bb);
1420 param = make_ssa_name (DECL_ARGUMENTS (loop_fn), NULL);
1421 stmt = gimple_build_assign (param, build_fold_addr_expr (data));
1422 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1423 SSA_NAME_DEF_STMT (param) = stmt;
1425 stmt = gimple_build_assign (new_data,
1426 fold_convert (TREE_TYPE (new_data), param));
1427 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1428 SSA_NAME_DEF_STMT (new_data) = stmt;
1431 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_PARALLEL. */
1432 bb = split_loop_exit_edge (single_dom_exit (loop));
1433 gsi = gsi_last_bb (bb);
1434 gsi_insert_after (&gsi, gimple_build_omp_return (false), GSI_NEW_STMT);
1436 /* Extract data for GIMPLE_OMP_FOR. */
1437 gcc_assert (loop->header == single_dom_exit (loop)->src);
1438 cond_stmt = last_stmt (loop->header);
1440 cvar = gimple_cond_lhs (cond_stmt);
1441 cvar_base = SSA_NAME_VAR (cvar);
1442 phi = SSA_NAME_DEF_STMT (cvar);
1443 cvar_init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1444 initvar = make_ssa_name (cvar_base, NULL);
1445 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, loop_preheader_edge (loop)),
1446 initvar);
1447 cvar_next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1449 gsi = gsi_last_bb (loop->latch);
1450 gcc_assert (gsi_stmt (gsi) == SSA_NAME_DEF_STMT (cvar_next));
1451 gsi_remove (&gsi, true);
1453 /* Prepare cfg. */
1454 for_bb = split_edge (loop_preheader_edge (loop));
1455 ex_bb = split_loop_exit_edge (single_dom_exit (loop));
1456 extract_true_false_edges_from_block (loop->header, &nexit, &exit);
1457 gcc_assert (exit == single_dom_exit (loop));
1459 guard = make_edge (for_bb, ex_bb, 0);
1460 single_succ_edge (loop->latch)->flags = 0;
1461 end = make_edge (loop->latch, ex_bb, EDGE_FALLTHRU);
1462 for (gsi = gsi_start_phis (ex_bb); !gsi_end_p (gsi); gsi_next (&gsi))
1464 source_location locus;
1465 tree def;
1466 phi = gsi_stmt (gsi);
1467 res = PHI_RESULT (phi);
1468 stmt = SSA_NAME_DEF_STMT (PHI_ARG_DEF_FROM_EDGE (phi, exit));
1470 def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_preheader_edge (loop));
1471 locus = gimple_phi_arg_location_from_edge (stmt,
1472 loop_preheader_edge (loop));
1473 add_phi_arg (phi, def, guard, locus);
1475 def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (loop));
1476 locus = gimple_phi_arg_location_from_edge (stmt, loop_latch_edge (loop));
1477 add_phi_arg (phi, def, end, locus);
1479 e = redirect_edge_and_branch (exit, nexit->dest);
1480 PENDING_STMT (e) = NULL;
1482 /* Emit GIMPLE_OMP_FOR. */
1483 gimple_cond_set_lhs (cond_stmt, cvar_base);
1484 type = TREE_TYPE (cvar);
1485 t = build_omp_clause (BUILTINS_LOCATION, OMP_CLAUSE_SCHEDULE);
1486 OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_STATIC;
1488 for_stmt = gimple_build_omp_for (NULL, t, 1, NULL);
1489 gimple_omp_for_set_index (for_stmt, 0, initvar);
1490 gimple_omp_for_set_initial (for_stmt, 0, cvar_init);
1491 gimple_omp_for_set_final (for_stmt, 0, gimple_cond_rhs (cond_stmt));
1492 gimple_omp_for_set_cond (for_stmt, 0, gimple_cond_code (cond_stmt));
1493 gimple_omp_for_set_incr (for_stmt, 0, build2 (PLUS_EXPR, type,
1494 cvar_base,
1495 build_int_cst (type, 1)));
1497 gsi = gsi_last_bb (for_bb);
1498 gsi_insert_after (&gsi, for_stmt, GSI_NEW_STMT);
1499 SSA_NAME_DEF_STMT (initvar) = for_stmt;
1501 /* Emit GIMPLE_OMP_CONTINUE. */
1502 gsi = gsi_last_bb (loop->latch);
1503 stmt = gimple_build_omp_continue (cvar_next, cvar);
1504 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1505 SSA_NAME_DEF_STMT (cvar_next) = stmt;
1507 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_FOR. */
1508 gsi = gsi_last_bb (ex_bb);
1509 gsi_insert_after (&gsi, gimple_build_omp_return (true), GSI_NEW_STMT);
1511 return paral_bb;
1514 /* Generates code to execute the iterations of LOOP in N_THREADS
1515 threads in parallel.
1517 NITER describes number of iterations of LOOP.
1518 REDUCTION_LIST describes the reductions existent in the LOOP. */
1520 static void
1521 gen_parallel_loop (struct loop *loop, htab_t reduction_list,
1522 unsigned n_threads, struct tree_niter_desc *niter)
1524 struct loop *nloop;
1525 loop_iterator li;
1526 tree many_iterations_cond, type, nit;
1527 tree arg_struct, new_arg_struct;
1528 gimple_seq stmts;
1529 basic_block parallel_head;
1530 edge entry, exit;
1531 struct clsn_data clsn_data;
1532 unsigned prob;
1534 /* From
1536 ---------------------------------------------------------------------
1537 loop
1539 IV = phi (INIT, IV + STEP)
1540 BODY1;
1541 if (COND)
1542 break;
1543 BODY2;
1545 ---------------------------------------------------------------------
1547 with # of iterations NITER (possibly with MAY_BE_ZERO assumption),
1548 we generate the following code:
1550 ---------------------------------------------------------------------
1552 if (MAY_BE_ZERO
1553 || NITER < MIN_PER_THREAD * N_THREADS)
1554 goto original;
1556 BODY1;
1557 store all local loop-invariant variables used in body of the loop to DATA.
1558 GIMPLE_OMP_PARALLEL (OMP_CLAUSE_NUM_THREADS (N_THREADS), LOOPFN, DATA);
1559 load the variables from DATA.
1560 GIMPLE_OMP_FOR (IV = INIT; COND; IV += STEP) (OMP_CLAUSE_SCHEDULE (static))
1561 BODY2;
1562 BODY1;
1563 GIMPLE_OMP_CONTINUE;
1564 GIMPLE_OMP_RETURN -- GIMPLE_OMP_FOR
1565 GIMPLE_OMP_RETURN -- GIMPLE_OMP_PARALLEL
1566 goto end;
1568 original:
1569 loop
1571 IV = phi (INIT, IV + STEP)
1572 BODY1;
1573 if (COND)
1574 break;
1575 BODY2;
1578 end:
1582 /* Create two versions of the loop -- in the old one, we know that the
1583 number of iterations is large enough, and we will transform it into the
1584 loop that will be split to loop_fn, the new one will be used for the
1585 remaining iterations. */
1587 type = TREE_TYPE (niter->niter);
1588 nit = force_gimple_operand (unshare_expr (niter->niter), &stmts, true,
1589 NULL_TREE);
1590 if (stmts)
1591 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1593 many_iterations_cond =
1594 fold_build2 (GE_EXPR, boolean_type_node,
1595 nit, build_int_cst (type, MIN_PER_THREAD * n_threads));
1596 many_iterations_cond
1597 = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1598 invert_truthvalue (unshare_expr (niter->may_be_zero)),
1599 many_iterations_cond);
1600 many_iterations_cond
1601 = force_gimple_operand (many_iterations_cond, &stmts, false, NULL_TREE);
1602 if (stmts)
1603 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1604 if (!is_gimple_condexpr (many_iterations_cond))
1606 many_iterations_cond
1607 = force_gimple_operand (many_iterations_cond, &stmts,
1608 true, NULL_TREE);
1609 if (stmts)
1610 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1613 initialize_original_copy_tables ();
1615 /* We assume that the loop usually iterates a lot. */
1616 prob = 4 * REG_BR_PROB_BASE / 5;
1617 nloop = loop_version (loop, many_iterations_cond, NULL,
1618 prob, prob, REG_BR_PROB_BASE - prob, true);
1619 update_ssa (TODO_update_ssa);
1620 free_original_copy_tables ();
1622 /* Base all the induction variables in LOOP on a single control one. */
1623 canonicalize_loop_ivs (loop, &nit);
1625 /* Ensure that the exit condition is the first statement in the loop. */
1626 transform_to_exit_first_loop (loop, reduction_list, nit);
1628 /* Generate initializations for reductions. */
1629 if (htab_elements (reduction_list) > 0)
1630 htab_traverse (reduction_list, initialize_reductions, loop);
1632 /* Eliminate the references to local variables from the loop. */
1633 gcc_assert (single_exit (loop));
1634 entry = loop_preheader_edge (loop);
1635 exit = single_dom_exit (loop);
1637 eliminate_local_variables (entry, exit);
1638 /* In the old loop, move all variables non-local to the loop to a structure
1639 and back, and create separate decls for the variables used in loop. */
1640 separate_decls_in_region (entry, exit, reduction_list, &arg_struct,
1641 &new_arg_struct, &clsn_data);
1643 /* Create the parallel constructs. */
1644 parallel_head = create_parallel_loop (loop, create_loop_fn (), arg_struct,
1645 new_arg_struct, n_threads);
1646 if (htab_elements (reduction_list) > 0)
1647 create_call_for_reduction (loop, reduction_list, &clsn_data);
1649 scev_reset ();
1651 /* Cancel the loop (it is simpler to do it here rather than to teach the
1652 expander to do it). */
1653 cancel_loop_tree (loop);
1655 /* Free loop bound estimations that could contain references to
1656 removed statements. */
1657 FOR_EACH_LOOP (li, loop, 0)
1658 free_numbers_of_iterations_estimates_loop (loop);
1660 /* Expand the parallel constructs. We do it directly here instead of running
1661 a separate expand_omp pass, since it is more efficient, and less likely to
1662 cause troubles with further analyses not being able to deal with the
1663 OMP trees. */
1665 omp_expand_local (parallel_head);
1668 /* Returns true when LOOP contains vector phi nodes. */
1670 static bool
1671 loop_has_vector_phi_nodes (struct loop *loop ATTRIBUTE_UNUSED)
1673 unsigned i;
1674 basic_block *bbs = get_loop_body_in_dom_order (loop);
1675 gimple_stmt_iterator gsi;
1676 bool res = true;
1678 for (i = 0; i < loop->num_nodes; i++)
1679 for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
1680 if (TREE_CODE (TREE_TYPE (PHI_RESULT (gsi_stmt (gsi)))) == VECTOR_TYPE)
1681 goto end;
1683 res = false;
1684 end:
1685 free (bbs);
1686 return res;
1689 /* Create a reduction_info struct, initialize it with REDUC_STMT
1690 and PHI, insert it to the REDUCTION_LIST. */
1692 static void
1693 build_new_reduction (htab_t reduction_list, gimple reduc_stmt, gimple phi)
1695 PTR *slot;
1696 struct reduction_info *new_reduction;
1698 gcc_assert (reduc_stmt);
1700 if (dump_file && (dump_flags & TDF_DETAILS))
1702 fprintf (dump_file,
1703 "Detected reduction. reduction stmt is: \n");
1704 print_gimple_stmt (dump_file, reduc_stmt, 0, 0);
1705 fprintf (dump_file, "\n");
1708 new_reduction = XCNEW (struct reduction_info);
1710 new_reduction->reduc_stmt = reduc_stmt;
1711 new_reduction->reduc_phi = phi;
1712 new_reduction->reduction_code = gimple_assign_rhs_code (reduc_stmt);
1713 slot = htab_find_slot (reduction_list, new_reduction, INSERT);
1714 *slot = new_reduction;
1717 /* Detect all reductions in the LOOP, insert them into REDUCTION_LIST. */
1719 static void
1720 gather_scalar_reductions (loop_p loop, htab_t reduction_list)
1722 gimple_stmt_iterator gsi;
1723 loop_vec_info simple_loop_info;
1725 vect_dump = NULL;
1726 simple_loop_info = vect_analyze_loop_form (loop);
1728 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
1730 gimple phi = gsi_stmt (gsi);
1731 affine_iv iv;
1732 tree res = PHI_RESULT (phi);
1733 bool double_reduc;
1735 if (!is_gimple_reg (res))
1736 continue;
1738 if (!simple_iv (loop, loop, res, &iv, true)
1739 && simple_loop_info)
1741 gimple reduc_stmt = vect_is_simple_reduction (simple_loop_info, phi, true, &double_reduc);
1742 if (reduc_stmt)
1743 build_new_reduction (reduction_list, reduc_stmt, phi);
1746 destroy_loop_vec_info (simple_loop_info, true);
1749 /* Try to initialize NITER for code generation part. */
1751 static bool
1752 try_get_loop_niter (loop_p loop, struct tree_niter_desc *niter)
1754 edge exit = single_dom_exit (loop);
1756 gcc_assert (exit);
1758 /* We need to know # of iterations, and there should be no uses of values
1759 defined inside loop outside of it, unless the values are invariants of
1760 the loop. */
1761 if (!number_of_iterations_exit (loop, exit, niter, false))
1763 if (dump_file && (dump_flags & TDF_DETAILS))
1764 fprintf (dump_file, " FAILED: number of iterations not known\n");
1765 return false;
1768 return true;
1771 /* Try to initialize REDUCTION_LIST for code generation part.
1772 REDUCTION_LIST describes the reductions. */
1774 static bool
1775 try_create_reduction_list (loop_p loop, htab_t reduction_list)
1777 edge exit = single_dom_exit (loop);
1778 gimple_stmt_iterator gsi;
1780 gcc_assert (exit);
1782 gather_scalar_reductions (loop, reduction_list);
1785 for (gsi = gsi_start_phis (exit->dest); !gsi_end_p (gsi); gsi_next (&gsi))
1787 gimple phi = gsi_stmt (gsi);
1788 struct reduction_info *red;
1789 imm_use_iterator imm_iter;
1790 use_operand_p use_p;
1791 gimple reduc_phi;
1792 tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1794 if (is_gimple_reg (val))
1796 if (dump_file && (dump_flags & TDF_DETAILS))
1798 fprintf (dump_file, "phi is ");
1799 print_gimple_stmt (dump_file, phi, 0, 0);
1800 fprintf (dump_file, "arg of phi to exit: value ");
1801 print_generic_expr (dump_file, val, 0);
1802 fprintf (dump_file, " used outside loop\n");
1803 fprintf (dump_file,
1804 " checking if it a part of reduction pattern: \n");
1806 if (htab_elements (reduction_list) == 0)
1808 if (dump_file && (dump_flags & TDF_DETAILS))
1809 fprintf (dump_file,
1810 " FAILED: it is not a part of reduction.\n");
1811 return false;
1813 reduc_phi = NULL;
1814 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, val)
1816 if (flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
1818 reduc_phi = USE_STMT (use_p);
1819 break;
1822 red = reduction_phi (reduction_list, reduc_phi);
1823 if (red == NULL)
1825 if (dump_file && (dump_flags & TDF_DETAILS))
1826 fprintf (dump_file,
1827 " FAILED: it is not a part of reduction.\n");
1828 return false;
1830 if (dump_file && (dump_flags & TDF_DETAILS))
1832 fprintf (dump_file, "reduction phi is ");
1833 print_gimple_stmt (dump_file, red->reduc_phi, 0, 0);
1834 fprintf (dump_file, "reduction stmt is ");
1835 print_gimple_stmt (dump_file, red->reduc_stmt, 0, 0);
1840 /* The iterations of the loop may communicate only through bivs whose
1841 iteration space can be distributed efficiently. */
1842 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
1844 gimple phi = gsi_stmt (gsi);
1845 tree def = PHI_RESULT (phi);
1846 affine_iv iv;
1848 if (is_gimple_reg (def) && !simple_iv (loop, loop, def, &iv, true))
1850 struct reduction_info *red;
1852 red = reduction_phi (reduction_list, phi);
1853 if (red == NULL)
1855 if (dump_file && (dump_flags & TDF_DETAILS))
1856 fprintf (dump_file,
1857 " FAILED: scalar dependency between iterations\n");
1858 return false;
1864 return true;
1867 /* Detect parallel loops and generate parallel code using libgomp
1868 primitives. Returns true if some loop was parallelized, false
1869 otherwise. */
1871 bool
1872 parallelize_loops (void)
1874 unsigned n_threads = flag_tree_parallelize_loops;
1875 bool changed = false;
1876 struct loop *loop;
1877 struct tree_niter_desc niter_desc;
1878 loop_iterator li;
1879 htab_t reduction_list;
1881 /* Do not parallelize loops in the functions created by parallelization. */
1882 if (parallelized_function_p (cfun->decl))
1883 return false;
1885 reduction_list = htab_create (10, reduction_info_hash,
1886 reduction_info_eq, free);
1887 init_stmt_vec_info_vec ();
1889 FOR_EACH_LOOP (li, loop, 0)
1891 htab_empty (reduction_list);
1893 /* If we use autopar in graphite pass, we use it's marked dependency
1894 checking results. */
1895 if (flag_loop_parallelize_all && !loop->can_be_parallel)
1896 continue;
1898 /* FIXME: Only consider innermost loops with just one exit. */
1899 if (loop->inner || !single_dom_exit (loop))
1900 continue;
1902 if (/* And of course, the loop must be parallelizable. */
1903 !can_duplicate_loop_p (loop)
1904 || loop_has_blocks_with_irreducible_flag (loop)
1905 /* FIXME: the check for vector phi nodes could be removed. */
1906 || loop_has_vector_phi_nodes (loop))
1907 continue;
1909 /* FIXME: Bypass this check as graphite doesn't update the
1910 count and frequency correctly now. */
1911 if (!flag_loop_parallelize_all
1912 && (expected_loop_iterations (loop) <= n_threads
1913 /* Do not bother with loops in cold areas. */
1914 || optimize_loop_nest_for_size_p (loop)))
1915 continue;
1917 if (!try_get_loop_niter (loop, &niter_desc))
1918 continue;
1920 if (!try_create_reduction_list (loop, reduction_list))
1921 continue;
1923 if (!flag_loop_parallelize_all && !loop_parallel_p (loop))
1924 continue;
1926 changed = true;
1927 gen_parallel_loop (loop, reduction_list,
1928 n_threads, &niter_desc);
1929 verify_flow_info ();
1930 verify_dominators (CDI_DOMINATORS);
1931 verify_loop_structure ();
1932 verify_loop_closed_ssa ();
1935 free_stmt_vec_info_vec ();
1936 htab_delete (reduction_list);
1938 /* Parallelization will cause new function calls to be inserted through
1939 which local variables will escape. Reset the points-to solutions
1940 for ESCAPED and CALLUSED. */
1941 if (changed)
1943 pt_solution_reset (&cfun->gimple_df->escaped);
1944 pt_solution_reset (&cfun->gimple_df->callused);
1947 return changed;
1950 #include "gt-tree-parloops.h"