2010-10-26 Tobias Burnus <burnus@net-b.de>
[official-gcc.git] / gcc / tree-parloops.c
blobde9faefe77804b8b9b878b8aa33fccd5ea288df6
1 /* Loop autoparallelization.
2 Copyright (C) 2006, 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
4 Contributed by Sebastian Pop <pop@cri.ensmp.fr> and
5 Zdenek Dvorak <dvorakz@suse.cz>.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "tree.h"
28 #include "tree-flow.h"
29 #include "cfgloop.h"
30 #include "tree-data-ref.h"
31 #include "tree-pretty-print.h"
32 #include "gimple-pretty-print.h"
33 #include "tree-pass.h"
34 #include "tree-scalar-evolution.h"
35 #include "hashtab.h"
36 #include "langhooks.h"
37 #include "tree-vectorizer.h"
39 /* This pass tries to distribute iterations of loops into several threads.
40 The implementation is straightforward -- for each loop we test whether its
41 iterations are independent, and if it is the case (and some additional
42 conditions regarding profitability and correctness are satisfied), we
43 add GIMPLE_OMP_PARALLEL and GIMPLE_OMP_FOR codes and let omp expansion
44 machinery do its job.
46 The most of the complexity is in bringing the code into shape expected
47 by the omp expanders:
48 -- for GIMPLE_OMP_FOR, ensuring that the loop has only one induction
49 variable and that the exit test is at the start of the loop body
50 -- for GIMPLE_OMP_PARALLEL, replacing the references to local addressable
51 variables by accesses through pointers, and breaking up ssa chains
52 by storing the values incoming to the parallelized loop to a structure
53 passed to the new function as an argument (something similar is done
54 in omp gimplification, unfortunately only a small part of the code
55 can be shared).
57 TODO:
58 -- if there are several parallelizable loops in a function, it may be
59 possible to generate the threads just once (using synchronization to
60 ensure that cross-loop dependences are obeyed).
61 -- handling of common scalar dependence patterns (accumulation, ...)
62 -- handling of non-innermost loops */
65 Reduction handling:
66 currently we use vect_force_simple_reduction() to detect reduction patterns.
67 The code transformation will be introduced by an example.
70 parloop
72 int sum=1;
74 for (i = 0; i < N; i++)
76 x[i] = i + 3;
77 sum+=x[i];
81 gimple-like code:
82 header_bb:
84 # sum_29 = PHI <sum_11(5), 1(3)>
85 # i_28 = PHI <i_12(5), 0(3)>
86 D.1795_8 = i_28 + 3;
87 x[i_28] = D.1795_8;
88 sum_11 = D.1795_8 + sum_29;
89 i_12 = i_28 + 1;
90 if (N_6(D) > i_12)
91 goto header_bb;
94 exit_bb:
96 # sum_21 = PHI <sum_11(4)>
97 printf (&"%d"[0], sum_21);
100 after reduction transformation (only relevant parts):
102 parloop
105 ....
108 # Storing the initial value given by the user. #
110 .paral_data_store.32.sum.27 = 1;
112 #pragma omp parallel num_threads(4)
114 #pragma omp for schedule(static)
116 # The neutral element corresponding to the particular
117 reduction's operation, e.g. 0 for PLUS_EXPR,
118 1 for MULT_EXPR, etc. replaces the user's initial value. #
120 # sum.27_29 = PHI <sum.27_11, 0>
122 sum.27_11 = D.1827_8 + sum.27_29;
124 GIMPLE_OMP_CONTINUE
126 # Adding this reduction phi is done at create_phi_for_local_result() #
127 # sum.27_56 = PHI <sum.27_11, 0>
128 GIMPLE_OMP_RETURN
130 # Creating the atomic operation is done at
131 create_call_for_reduction_1() #
133 #pragma omp atomic_load
134 D.1839_59 = *&.paral_data_load.33_51->reduction.23;
135 D.1840_60 = sum.27_56 + D.1839_59;
136 #pragma omp atomic_store (D.1840_60);
138 GIMPLE_OMP_RETURN
140 # collecting the result after the join of the threads is done at
141 create_loads_for_reductions().
142 The value computed by the threads is loaded from the
143 shared struct. #
146 .paral_data_load.33_52 = &.paral_data_store.32;
147 sum_37 = .paral_data_load.33_52->sum.27;
148 sum_43 = D.1795_41 + sum_37;
150 exit bb:
151 # sum_21 = PHI <sum_43, sum_26>
152 printf (&"%d"[0], sum_21);
160 /* Minimal number of iterations of a loop that should be executed in each
161 thread. */
162 #define MIN_PER_THREAD 100
164 /* Element of the hashtable, representing a
165 reduction in the current loop. */
166 struct reduction_info
168 gimple reduc_stmt; /* reduction statement. */
169 gimple reduc_phi; /* The phi node defining the reduction. */
170 enum tree_code reduction_code;/* code for the reduction operation. */
171 gimple keep_res; /* The PHI_RESULT of this phi is the resulting value
172 of the reduction variable when existing the loop. */
173 tree initial_value; /* The initial value of the reduction var before entering the loop. */
174 tree field; /* the name of the field in the parloop data structure intended for reduction. */
175 tree init; /* reduction initialization value. */
176 gimple new_phi; /* (helper field) Newly created phi node whose result
177 will be passed to the atomic operation. Represents
178 the local result each thread computed for the reduction
179 operation. */
182 /* Equality and hash functions for hashtab code. */
184 static int
185 reduction_info_eq (const void *aa, const void *bb)
187 const struct reduction_info *a = (const struct reduction_info *) aa;
188 const struct reduction_info *b = (const struct reduction_info *) bb;
190 return (a->reduc_phi == b->reduc_phi);
193 static hashval_t
194 reduction_info_hash (const void *aa)
196 const struct reduction_info *a = (const struct reduction_info *) aa;
198 return htab_hash_pointer (a->reduc_phi);
201 static struct reduction_info *
202 reduction_phi (htab_t reduction_list, gimple phi)
204 struct reduction_info tmpred, *red;
206 if (htab_elements (reduction_list) == 0)
207 return NULL;
209 tmpred.reduc_phi = phi;
210 red = (struct reduction_info *) htab_find (reduction_list, &tmpred);
212 return red;
215 /* Element of hashtable of names to copy. */
217 struct name_to_copy_elt
219 unsigned version; /* The version of the name to copy. */
220 tree new_name; /* The new name used in the copy. */
221 tree field; /* The field of the structure used to pass the
222 value. */
225 /* Equality and hash functions for hashtab code. */
227 static int
228 name_to_copy_elt_eq (const void *aa, const void *bb)
230 const struct name_to_copy_elt *a = (const struct name_to_copy_elt *) aa;
231 const struct name_to_copy_elt *b = (const struct name_to_copy_elt *) bb;
233 return a->version == b->version;
236 static hashval_t
237 name_to_copy_elt_hash (const void *aa)
239 const struct name_to_copy_elt *a = (const struct name_to_copy_elt *) aa;
241 return (hashval_t) a->version;
245 /* Data dependency analysis. Returns true if the iterations of LOOP
246 are independent on each other (that is, if we can execute them
247 in parallel). */
249 static bool
250 loop_parallel_p (struct loop *loop, struct obstack * parloop_obstack)
252 VEC (ddr_p, heap) * dependence_relations;
253 VEC (data_reference_p, heap) *datarefs;
254 lambda_trans_matrix trans;
255 bool ret = false;
257 if (dump_file && (dump_flags & TDF_DETAILS))
259 fprintf (dump_file, "Considering loop %d\n", loop->num);
260 if (!loop->inner)
261 fprintf (dump_file, "loop is innermost\n");
262 else
263 fprintf (dump_file, "loop NOT innermost\n");
266 /* Check for problems with dependences. If the loop can be reversed,
267 the iterations are independent. */
268 datarefs = VEC_alloc (data_reference_p, heap, 10);
269 dependence_relations = VEC_alloc (ddr_p, heap, 10 * 10);
270 compute_data_dependences_for_loop (loop, true, &datarefs,
271 &dependence_relations);
272 if (dump_file && (dump_flags & TDF_DETAILS))
273 dump_data_dependence_relations (dump_file, dependence_relations);
275 trans = lambda_trans_matrix_new (1, 1, parloop_obstack);
276 LTM_MATRIX (trans)[0][0] = -1;
278 if (lambda_transform_legal_p (trans, 1, dependence_relations))
280 ret = true;
281 if (dump_file && (dump_flags & TDF_DETAILS))
282 fprintf (dump_file, " SUCCESS: may be parallelized\n");
284 else if (dump_file && (dump_flags & TDF_DETAILS))
285 fprintf (dump_file,
286 " FAILED: data dependencies exist across iterations\n");
288 free_dependence_relations (dependence_relations);
289 free_data_refs (datarefs);
291 return ret;
294 /* Return true when LOOP contains basic blocks marked with the
295 BB_IRREDUCIBLE_LOOP flag. */
297 static inline bool
298 loop_has_blocks_with_irreducible_flag (struct loop *loop)
300 unsigned i;
301 basic_block *bbs = get_loop_body_in_dom_order (loop);
302 bool res = true;
304 for (i = 0; i < loop->num_nodes; i++)
305 if (bbs[i]->flags & BB_IRREDUCIBLE_LOOP)
306 goto end;
308 res = false;
309 end:
310 free (bbs);
311 return res;
314 /* Assigns the address of OBJ in TYPE to an ssa name, and returns this name.
315 The assignment statement is placed on edge ENTRY. DECL_ADDRESS maps decls
316 to their addresses that can be reused. The address of OBJ is known to
317 be invariant in the whole function. */
319 static tree
320 take_address_of (tree obj, tree type, edge entry, htab_t decl_address)
322 int uid;
323 void **dslot;
324 struct int_tree_map ielt, *nielt;
325 tree *var_p, name, bvar, addr;
326 gimple stmt;
327 gimple_seq stmts;
329 /* Since the address of OBJ is invariant, the trees may be shared.
330 Avoid rewriting unrelated parts of the code. */
331 obj = unshare_expr (obj);
332 for (var_p = &obj;
333 handled_component_p (*var_p);
334 var_p = &TREE_OPERAND (*var_p, 0))
335 continue;
337 /* Canonicalize the access to base on a MEM_REF. */
338 if (DECL_P (*var_p))
339 *var_p = build_simple_mem_ref (build_fold_addr_expr (*var_p));
341 /* Assign a canonical SSA name to the address of the base decl used
342 in the address and share it for all accesses and addresses based
343 on it. */
344 uid = DECL_UID (TREE_OPERAND (TREE_OPERAND (*var_p, 0), 0));
345 ielt.uid = uid;
346 dslot = htab_find_slot_with_hash (decl_address, &ielt, uid, INSERT);
347 if (!*dslot)
349 addr = TREE_OPERAND (*var_p, 0);
350 bvar = create_tmp_var (TREE_TYPE (addr),
351 get_name (TREE_OPERAND
352 (TREE_OPERAND (*var_p, 0), 0)));
353 add_referenced_var (bvar);
354 stmt = gimple_build_assign (bvar, addr);
355 name = make_ssa_name (bvar, stmt);
356 gimple_assign_set_lhs (stmt, name);
357 gsi_insert_on_edge_immediate (entry, stmt);
359 nielt = XNEW (struct int_tree_map);
360 nielt->uid = uid;
361 nielt->to = name;
362 *dslot = nielt;
364 else
365 name = ((struct int_tree_map *) *dslot)->to;
367 /* Express the address in terms of the canonical SSA name. */
368 TREE_OPERAND (*var_p, 0) = name;
369 name = force_gimple_operand (build_addr (obj, current_function_decl),
370 &stmts, true, NULL_TREE);
371 if (!gimple_seq_empty_p (stmts))
372 gsi_insert_seq_on_edge_immediate (entry, stmts);
374 if (!useless_type_conversion_p (type, TREE_TYPE (name)))
376 name = force_gimple_operand (fold_convert (type, name), &stmts, true,
377 NULL_TREE);
378 if (!gimple_seq_empty_p (stmts))
379 gsi_insert_seq_on_edge_immediate (entry, stmts);
382 return name;
385 /* Callback for htab_traverse. Create the initialization statement
386 for reduction described in SLOT, and place it at the preheader of
387 the loop described in DATA. */
389 static int
390 initialize_reductions (void **slot, void *data)
392 tree init, c;
393 tree bvar, type, arg;
394 edge e;
396 struct reduction_info *const reduc = (struct reduction_info *) *slot;
397 struct loop *loop = (struct loop *) data;
399 /* Create initialization in preheader:
400 reduction_variable = initialization value of reduction. */
402 /* In the phi node at the header, replace the argument coming
403 from the preheader with the reduction initialization value. */
405 /* Create a new variable to initialize the reduction. */
406 type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
407 bvar = create_tmp_var (type, "reduction");
408 add_referenced_var (bvar);
410 c = build_omp_clause (gimple_location (reduc->reduc_stmt),
411 OMP_CLAUSE_REDUCTION);
412 OMP_CLAUSE_REDUCTION_CODE (c) = reduc->reduction_code;
413 OMP_CLAUSE_DECL (c) = SSA_NAME_VAR (gimple_assign_lhs (reduc->reduc_stmt));
415 init = omp_reduction_init (c, TREE_TYPE (bvar));
416 reduc->init = init;
418 /* Replace the argument representing the initialization value
419 with the initialization value for the reduction (neutral
420 element for the particular operation, e.g. 0 for PLUS_EXPR,
421 1 for MULT_EXPR, etc).
422 Keep the old value in a new variable "reduction_initial",
423 that will be taken in consideration after the parallel
424 computing is done. */
426 e = loop_preheader_edge (loop);
427 arg = PHI_ARG_DEF_FROM_EDGE (reduc->reduc_phi, e);
428 /* Create new variable to hold the initial value. */
430 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE
431 (reduc->reduc_phi, loop_preheader_edge (loop)), init);
432 reduc->initial_value = arg;
433 return 1;
436 struct elv_data
438 struct walk_stmt_info info;
439 edge entry;
440 htab_t decl_address;
441 bool changed;
444 /* Eliminates references to local variables in *TP out of the single
445 entry single exit region starting at DTA->ENTRY.
446 DECL_ADDRESS contains addresses of the references that had their
447 address taken already. If the expression is changed, CHANGED is
448 set to true. Callback for walk_tree. */
450 static tree
451 eliminate_local_variables_1 (tree *tp, int *walk_subtrees, void *data)
453 struct elv_data *const dta = (struct elv_data *) data;
454 tree t = *tp, var, addr, addr_type, type, obj;
456 if (DECL_P (t))
458 *walk_subtrees = 0;
460 if (!SSA_VAR_P (t) || DECL_EXTERNAL (t))
461 return NULL_TREE;
463 type = TREE_TYPE (t);
464 addr_type = build_pointer_type (type);
465 addr = take_address_of (t, addr_type, dta->entry, dta->decl_address);
466 *tp = build_simple_mem_ref (addr);
468 dta->changed = true;
469 return NULL_TREE;
472 if (TREE_CODE (t) == ADDR_EXPR)
474 /* ADDR_EXPR may appear in two contexts:
475 -- as a gimple operand, when the address taken is a function invariant
476 -- as gimple rhs, when the resulting address in not a function
477 invariant
478 We do not need to do anything special in the latter case (the base of
479 the memory reference whose address is taken may be replaced in the
480 DECL_P case). The former case is more complicated, as we need to
481 ensure that the new address is still a gimple operand. Thus, it
482 is not sufficient to replace just the base of the memory reference --
483 we need to move the whole computation of the address out of the
484 loop. */
485 if (!is_gimple_val (t))
486 return NULL_TREE;
488 *walk_subtrees = 0;
489 obj = TREE_OPERAND (t, 0);
490 var = get_base_address (obj);
491 if (!var || !SSA_VAR_P (var) || DECL_EXTERNAL (var))
492 return NULL_TREE;
494 addr_type = TREE_TYPE (t);
495 addr = take_address_of (obj, addr_type, dta->entry, dta->decl_address);
496 *tp = addr;
498 dta->changed = true;
499 return NULL_TREE;
502 if (!EXPR_P (t))
503 *walk_subtrees = 0;
505 return NULL_TREE;
508 /* Moves the references to local variables in STMT out of the single
509 entry single exit region starting at ENTRY. DECL_ADDRESS contains
510 addresses of the references that had their address taken
511 already. */
513 static void
514 eliminate_local_variables_stmt (edge entry, gimple stmt,
515 htab_t decl_address)
517 struct elv_data dta;
519 memset (&dta.info, '\0', sizeof (dta.info));
520 dta.entry = entry;
521 dta.decl_address = decl_address;
522 dta.changed = false;
524 if (gimple_debug_bind_p (stmt))
525 walk_tree (gimple_debug_bind_get_value_ptr (stmt),
526 eliminate_local_variables_1, &dta.info, NULL);
527 else
528 walk_gimple_op (stmt, eliminate_local_variables_1, &dta.info);
530 if (dta.changed)
531 update_stmt (stmt);
534 /* Eliminates the references to local variables from the single entry
535 single exit region between the ENTRY and EXIT edges.
537 This includes:
538 1) Taking address of a local variable -- these are moved out of the
539 region (and temporary variable is created to hold the address if
540 necessary).
542 2) Dereferencing a local variable -- these are replaced with indirect
543 references. */
545 static void
546 eliminate_local_variables (edge entry, edge exit)
548 basic_block bb;
549 VEC (basic_block, heap) *body = VEC_alloc (basic_block, heap, 3);
550 unsigned i;
551 gimple_stmt_iterator gsi;
552 htab_t decl_address = htab_create (10, int_tree_map_hash, int_tree_map_eq,
553 free);
554 basic_block entry_bb = entry->src;
555 basic_block exit_bb = exit->dest;
557 gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
559 FOR_EACH_VEC_ELT (basic_block, body, i, bb)
560 if (bb != entry_bb && bb != exit_bb)
561 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
562 eliminate_local_variables_stmt (entry, gsi_stmt (gsi),
563 decl_address);
565 htab_delete (decl_address);
566 VEC_free (basic_block, heap, body);
569 /* Returns true if expression EXPR is not defined between ENTRY and
570 EXIT, i.e. if all its operands are defined outside of the region. */
572 static bool
573 expr_invariant_in_region_p (edge entry, edge exit, tree expr)
575 basic_block entry_bb = entry->src;
576 basic_block exit_bb = exit->dest;
577 basic_block def_bb;
579 if (is_gimple_min_invariant (expr))
580 return true;
582 if (TREE_CODE (expr) == SSA_NAME)
584 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
585 if (def_bb
586 && dominated_by_p (CDI_DOMINATORS, def_bb, entry_bb)
587 && !dominated_by_p (CDI_DOMINATORS, def_bb, exit_bb))
588 return false;
590 return true;
593 return false;
596 /* If COPY_NAME_P is true, creates and returns a duplicate of NAME.
597 The copies are stored to NAME_COPIES, if NAME was already duplicated,
598 its duplicate stored in NAME_COPIES is returned.
600 Regardless of COPY_NAME_P, the decl used as a base of the ssa name is also
601 duplicated, storing the copies in DECL_COPIES. */
603 static tree
604 separate_decls_in_region_name (tree name,
605 htab_t name_copies, htab_t decl_copies,
606 bool copy_name_p)
608 tree copy, var, var_copy;
609 unsigned idx, uid, nuid;
610 struct int_tree_map ielt, *nielt;
611 struct name_to_copy_elt elt, *nelt;
612 void **slot, **dslot;
614 if (TREE_CODE (name) != SSA_NAME)
615 return name;
617 idx = SSA_NAME_VERSION (name);
618 elt.version = idx;
619 slot = htab_find_slot_with_hash (name_copies, &elt, idx,
620 copy_name_p ? INSERT : NO_INSERT);
621 if (slot && *slot)
622 return ((struct name_to_copy_elt *) *slot)->new_name;
624 var = SSA_NAME_VAR (name);
625 uid = DECL_UID (var);
626 ielt.uid = uid;
627 dslot = htab_find_slot_with_hash (decl_copies, &ielt, uid, INSERT);
628 if (!*dslot)
630 var_copy = create_tmp_var (TREE_TYPE (var), get_name (var));
631 DECL_GIMPLE_REG_P (var_copy) = DECL_GIMPLE_REG_P (var);
632 add_referenced_var (var_copy);
633 nielt = XNEW (struct int_tree_map);
634 nielt->uid = uid;
635 nielt->to = var_copy;
636 *dslot = nielt;
638 /* Ensure that when we meet this decl next time, we won't duplicate
639 it again. */
640 nuid = DECL_UID (var_copy);
641 ielt.uid = nuid;
642 dslot = htab_find_slot_with_hash (decl_copies, &ielt, nuid, INSERT);
643 gcc_assert (!*dslot);
644 nielt = XNEW (struct int_tree_map);
645 nielt->uid = nuid;
646 nielt->to = var_copy;
647 *dslot = nielt;
649 else
650 var_copy = ((struct int_tree_map *) *dslot)->to;
652 if (copy_name_p)
654 copy = duplicate_ssa_name (name, NULL);
655 nelt = XNEW (struct name_to_copy_elt);
656 nelt->version = idx;
657 nelt->new_name = copy;
658 nelt->field = NULL_TREE;
659 *slot = nelt;
661 else
663 gcc_assert (!slot);
664 copy = name;
667 SSA_NAME_VAR (copy) = var_copy;
668 return copy;
671 /* Finds the ssa names used in STMT that are defined outside the
672 region between ENTRY and EXIT and replaces such ssa names with
673 their duplicates. The duplicates are stored to NAME_COPIES. Base
674 decls of all ssa names used in STMT (including those defined in
675 LOOP) are replaced with the new temporary variables; the
676 replacement decls are stored in DECL_COPIES. */
678 static void
679 separate_decls_in_region_stmt (edge entry, edge exit, gimple stmt,
680 htab_t name_copies, htab_t decl_copies)
682 use_operand_p use;
683 def_operand_p def;
684 ssa_op_iter oi;
685 tree name, copy;
686 bool copy_name_p;
688 mark_virtual_ops_for_renaming (stmt);
690 FOR_EACH_PHI_OR_STMT_DEF (def, stmt, oi, SSA_OP_DEF)
692 name = DEF_FROM_PTR (def);
693 gcc_assert (TREE_CODE (name) == SSA_NAME);
694 copy = separate_decls_in_region_name (name, name_copies, decl_copies,
695 false);
696 gcc_assert (copy == name);
699 FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
701 name = USE_FROM_PTR (use);
702 if (TREE_CODE (name) != SSA_NAME)
703 continue;
705 copy_name_p = expr_invariant_in_region_p (entry, exit, name);
706 copy = separate_decls_in_region_name (name, name_copies, decl_copies,
707 copy_name_p);
708 SET_USE (use, copy);
712 /* Finds the ssa names used in STMT that are defined outside the
713 region between ENTRY and EXIT and replaces such ssa names with
714 their duplicates. The duplicates are stored to NAME_COPIES. Base
715 decls of all ssa names used in STMT (including those defined in
716 LOOP) are replaced with the new temporary variables; the
717 replacement decls are stored in DECL_COPIES. */
719 static bool
720 separate_decls_in_region_debug_bind (gimple stmt,
721 htab_t name_copies, htab_t decl_copies)
723 use_operand_p use;
724 ssa_op_iter oi;
725 tree var, name;
726 struct int_tree_map ielt;
727 struct name_to_copy_elt elt;
728 void **slot, **dslot;
730 var = gimple_debug_bind_get_var (stmt);
731 if (TREE_CODE (var) == DEBUG_EXPR_DECL)
732 return true;
733 gcc_assert (DECL_P (var) && SSA_VAR_P (var));
734 ielt.uid = DECL_UID (var);
735 dslot = htab_find_slot_with_hash (decl_copies, &ielt, ielt.uid, NO_INSERT);
736 if (!dslot)
737 return true;
738 gimple_debug_bind_set_var (stmt, ((struct int_tree_map *) *dslot)->to);
740 FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
742 name = USE_FROM_PTR (use);
743 if (TREE_CODE (name) != SSA_NAME)
744 continue;
746 elt.version = SSA_NAME_VERSION (name);
747 slot = htab_find_slot_with_hash (name_copies, &elt, elt.version, NO_INSERT);
748 if (!slot)
750 gimple_debug_bind_reset_value (stmt);
751 update_stmt (stmt);
752 break;
755 SET_USE (use, ((struct name_to_copy_elt *) *slot)->new_name);
758 return false;
761 /* Callback for htab_traverse. Adds a field corresponding to the reduction
762 specified in SLOT. The type is passed in DATA. */
764 static int
765 add_field_for_reduction (void **slot, void *data)
768 struct reduction_info *const red = (struct reduction_info *) *slot;
769 tree const type = (tree) data;
770 tree var = SSA_NAME_VAR (gimple_assign_lhs (red->reduc_stmt));
771 tree field = build_decl (gimple_location (red->reduc_stmt),
772 FIELD_DECL, DECL_NAME (var), TREE_TYPE (var));
774 insert_field_into_struct (type, field);
776 red->field = field;
778 return 1;
781 /* Callback for htab_traverse. Adds a field corresponding to a ssa name
782 described in SLOT. The type is passed in DATA. */
784 static int
785 add_field_for_name (void **slot, void *data)
787 struct name_to_copy_elt *const elt = (struct name_to_copy_elt *) *slot;
788 tree type = (tree) data;
789 tree name = ssa_name (elt->version);
790 tree var = SSA_NAME_VAR (name);
791 tree field = build_decl (DECL_SOURCE_LOCATION (var),
792 FIELD_DECL, DECL_NAME (var), TREE_TYPE (var));
794 insert_field_into_struct (type, field);
795 elt->field = field;
797 return 1;
800 /* Callback for htab_traverse. A local result is the intermediate result
801 computed by a single
802 thread, or the initial value in case no iteration was executed.
803 This function creates a phi node reflecting these values.
804 The phi's result will be stored in NEW_PHI field of the
805 reduction's data structure. */
807 static int
808 create_phi_for_local_result (void **slot, void *data)
810 struct reduction_info *const reduc = (struct reduction_info *) *slot;
811 const struct loop *const loop = (const struct loop *) data;
812 edge e;
813 gimple new_phi;
814 basic_block store_bb;
815 tree local_res;
816 source_location locus;
818 /* STORE_BB is the block where the phi
819 should be stored. It is the destination of the loop exit.
820 (Find the fallthru edge from GIMPLE_OMP_CONTINUE). */
821 store_bb = FALLTHRU_EDGE (loop->latch)->dest;
823 /* STORE_BB has two predecessors. One coming from the loop
824 (the reduction's result is computed at the loop),
825 and another coming from a block preceding the loop,
826 when no iterations
827 are executed (the initial value should be taken). */
828 if (EDGE_PRED (store_bb, 0) == FALLTHRU_EDGE (loop->latch))
829 e = EDGE_PRED (store_bb, 1);
830 else
831 e = EDGE_PRED (store_bb, 0);
832 local_res
833 = make_ssa_name (SSA_NAME_VAR (gimple_assign_lhs (reduc->reduc_stmt)),
834 NULL);
835 locus = gimple_location (reduc->reduc_stmt);
836 new_phi = create_phi_node (local_res, store_bb);
837 SSA_NAME_DEF_STMT (local_res) = new_phi;
838 add_phi_arg (new_phi, reduc->init, e, locus);
839 add_phi_arg (new_phi, gimple_assign_lhs (reduc->reduc_stmt),
840 FALLTHRU_EDGE (loop->latch), locus);
841 reduc->new_phi = new_phi;
843 return 1;
846 struct clsn_data
848 tree store;
849 tree load;
851 basic_block store_bb;
852 basic_block load_bb;
855 /* Callback for htab_traverse. Create an atomic instruction for the
856 reduction described in SLOT.
857 DATA annotates the place in memory the atomic operation relates to,
858 and the basic block it needs to be generated in. */
860 static int
861 create_call_for_reduction_1 (void **slot, void *data)
863 struct reduction_info *const reduc = (struct reduction_info *) *slot;
864 struct clsn_data *const clsn_data = (struct clsn_data *) data;
865 gimple_stmt_iterator gsi;
866 tree type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
867 tree load_struct;
868 basic_block bb;
869 basic_block new_bb;
870 edge e;
871 tree t, addr, ref, x;
872 tree tmp_load, name;
873 gimple load;
875 load_struct = build_simple_mem_ref (clsn_data->load);
876 t = build3 (COMPONENT_REF, type, load_struct, reduc->field, NULL_TREE);
878 addr = build_addr (t, current_function_decl);
880 /* Create phi node. */
881 bb = clsn_data->load_bb;
883 e = split_block (bb, t);
884 new_bb = e->dest;
886 tmp_load = create_tmp_var (TREE_TYPE (TREE_TYPE (addr)), NULL);
887 add_referenced_var (tmp_load);
888 tmp_load = make_ssa_name (tmp_load, NULL);
889 load = gimple_build_omp_atomic_load (tmp_load, addr);
890 SSA_NAME_DEF_STMT (tmp_load) = load;
891 gsi = gsi_start_bb (new_bb);
892 gsi_insert_after (&gsi, load, GSI_NEW_STMT);
894 e = split_block (new_bb, load);
895 new_bb = e->dest;
896 gsi = gsi_start_bb (new_bb);
897 ref = tmp_load;
898 x = fold_build2 (reduc->reduction_code,
899 TREE_TYPE (PHI_RESULT (reduc->new_phi)), ref,
900 PHI_RESULT (reduc->new_phi));
902 name = force_gimple_operand_gsi (&gsi, x, true, NULL_TREE, true,
903 GSI_CONTINUE_LINKING);
905 gsi_insert_after (&gsi, gimple_build_omp_atomic_store (name), GSI_NEW_STMT);
906 return 1;
909 /* Create the atomic operation at the join point of the threads.
910 REDUCTION_LIST describes the reductions in the LOOP.
911 LD_ST_DATA describes the shared data structure where
912 shared data is stored in and loaded from. */
913 static void
914 create_call_for_reduction (struct loop *loop, htab_t reduction_list,
915 struct clsn_data *ld_st_data)
917 htab_traverse (reduction_list, create_phi_for_local_result, loop);
918 /* Find the fallthru edge from GIMPLE_OMP_CONTINUE. */
919 ld_st_data->load_bb = FALLTHRU_EDGE (loop->latch)->dest;
920 htab_traverse (reduction_list, create_call_for_reduction_1, ld_st_data);
923 /* Callback for htab_traverse. Loads the final reduction value at the
924 join point of all threads, and inserts it in the right place. */
926 static int
927 create_loads_for_reductions (void **slot, void *data)
929 struct reduction_info *const red = (struct reduction_info *) *slot;
930 struct clsn_data *const clsn_data = (struct clsn_data *) data;
931 gimple stmt;
932 gimple_stmt_iterator gsi;
933 tree type = TREE_TYPE (gimple_assign_lhs (red->reduc_stmt));
934 tree load_struct;
935 tree name;
936 tree x;
938 gsi = gsi_after_labels (clsn_data->load_bb);
939 load_struct = build_simple_mem_ref (clsn_data->load);
940 load_struct = build3 (COMPONENT_REF, type, load_struct, red->field,
941 NULL_TREE);
943 x = load_struct;
944 name = PHI_RESULT (red->keep_res);
945 stmt = gimple_build_assign (name, x);
946 SSA_NAME_DEF_STMT (name) = stmt;
948 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
950 for (gsi = gsi_start_phis (gimple_bb (red->keep_res));
951 !gsi_end_p (gsi); gsi_next (&gsi))
952 if (gsi_stmt (gsi) == red->keep_res)
954 remove_phi_node (&gsi, false);
955 return 1;
957 gcc_unreachable ();
960 /* Load the reduction result that was stored in LD_ST_DATA.
961 REDUCTION_LIST describes the list of reductions that the
962 loads should be generated for. */
963 static void
964 create_final_loads_for_reduction (htab_t reduction_list,
965 struct clsn_data *ld_st_data)
967 gimple_stmt_iterator gsi;
968 tree t;
969 gimple stmt;
971 gsi = gsi_after_labels (ld_st_data->load_bb);
972 t = build_fold_addr_expr (ld_st_data->store);
973 stmt = gimple_build_assign (ld_st_data->load, t);
975 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
976 SSA_NAME_DEF_STMT (ld_st_data->load) = stmt;
978 htab_traverse (reduction_list, create_loads_for_reductions, ld_st_data);
982 /* Callback for htab_traverse. Store the neutral value for the
983 particular reduction's operation, e.g. 0 for PLUS_EXPR,
984 1 for MULT_EXPR, etc. into the reduction field.
985 The reduction is specified in SLOT. The store information is
986 passed in DATA. */
988 static int
989 create_stores_for_reduction (void **slot, void *data)
991 struct reduction_info *const red = (struct reduction_info *) *slot;
992 struct clsn_data *const clsn_data = (struct clsn_data *) data;
993 tree t;
994 gimple stmt;
995 gimple_stmt_iterator gsi;
996 tree type = TREE_TYPE (gimple_assign_lhs (red->reduc_stmt));
998 gsi = gsi_last_bb (clsn_data->store_bb);
999 t = build3 (COMPONENT_REF, type, clsn_data->store, red->field, NULL_TREE);
1000 stmt = gimple_build_assign (t, red->initial_value);
1001 mark_virtual_ops_for_renaming (stmt);
1002 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1004 return 1;
1007 /* Callback for htab_traverse. Creates loads to a field of LOAD in LOAD_BB and
1008 store to a field of STORE in STORE_BB for the ssa name and its duplicate
1009 specified in SLOT. */
1011 static int
1012 create_loads_and_stores_for_name (void **slot, void *data)
1014 struct name_to_copy_elt *const elt = (struct name_to_copy_elt *) *slot;
1015 struct clsn_data *const clsn_data = (struct clsn_data *) data;
1016 tree t;
1017 gimple stmt;
1018 gimple_stmt_iterator gsi;
1019 tree type = TREE_TYPE (elt->new_name);
1020 tree load_struct;
1022 gsi = gsi_last_bb (clsn_data->store_bb);
1023 t = build3 (COMPONENT_REF, type, clsn_data->store, elt->field, NULL_TREE);
1024 stmt = gimple_build_assign (t, ssa_name (elt->version));
1025 mark_virtual_ops_for_renaming (stmt);
1026 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1028 gsi = gsi_last_bb (clsn_data->load_bb);
1029 load_struct = build_simple_mem_ref (clsn_data->load);
1030 t = build3 (COMPONENT_REF, type, load_struct, elt->field, NULL_TREE);
1031 stmt = gimple_build_assign (elt->new_name, t);
1032 SSA_NAME_DEF_STMT (elt->new_name) = stmt;
1033 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1035 return 1;
1038 /* Moves all the variables used in LOOP and defined outside of it (including
1039 the initial values of loop phi nodes, and *PER_THREAD if it is a ssa
1040 name) to a structure created for this purpose. The code
1042 while (1)
1044 use (a);
1045 use (b);
1048 is transformed this way:
1050 bb0:
1051 old.a = a;
1052 old.b = b;
1054 bb1:
1055 a' = new->a;
1056 b' = new->b;
1057 while (1)
1059 use (a');
1060 use (b');
1063 `old' is stored to *ARG_STRUCT and `new' is stored to NEW_ARG_STRUCT. The
1064 pointer `new' is intentionally not initialized (the loop will be split to a
1065 separate function later, and `new' will be initialized from its arguments).
1066 LD_ST_DATA holds information about the shared data structure used to pass
1067 information among the threads. It is initialized here, and
1068 gen_parallel_loop will pass it to create_call_for_reduction that
1069 needs this information. REDUCTION_LIST describes the reductions
1070 in LOOP. */
1072 static void
1073 separate_decls_in_region (edge entry, edge exit, htab_t reduction_list,
1074 tree *arg_struct, tree *new_arg_struct,
1075 struct clsn_data *ld_st_data)
1078 basic_block bb1 = split_edge (entry);
1079 basic_block bb0 = single_pred (bb1);
1080 htab_t name_copies = htab_create (10, name_to_copy_elt_hash,
1081 name_to_copy_elt_eq, free);
1082 htab_t decl_copies = htab_create (10, int_tree_map_hash, int_tree_map_eq,
1083 free);
1084 unsigned i;
1085 tree type, type_name, nvar;
1086 gimple_stmt_iterator gsi;
1087 struct clsn_data clsn_data;
1088 VEC (basic_block, heap) *body = VEC_alloc (basic_block, heap, 3);
1089 basic_block bb;
1090 basic_block entry_bb = bb1;
1091 basic_block exit_bb = exit->dest;
1092 bool has_debug_stmt = false;
1094 entry = single_succ_edge (entry_bb);
1095 gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
1097 FOR_EACH_VEC_ELT (basic_block, body, i, bb)
1099 if (bb != entry_bb && bb != exit_bb)
1101 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1102 separate_decls_in_region_stmt (entry, exit, gsi_stmt (gsi),
1103 name_copies, decl_copies);
1105 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1107 gimple stmt = gsi_stmt (gsi);
1109 if (is_gimple_debug (stmt))
1110 has_debug_stmt = true;
1111 else
1112 separate_decls_in_region_stmt (entry, exit, stmt,
1113 name_copies, decl_copies);
1118 /* Now process debug bind stmts. We must not create decls while
1119 processing debug stmts, so we defer their processing so as to
1120 make sure we will have debug info for as many variables as
1121 possible (all of those that were dealt with in the loop above),
1122 and discard those for which we know there's nothing we can
1123 do. */
1124 if (has_debug_stmt)
1125 FOR_EACH_VEC_ELT (basic_block, body, i, bb)
1126 if (bb != entry_bb && bb != exit_bb)
1128 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
1130 gimple stmt = gsi_stmt (gsi);
1132 if (gimple_debug_bind_p (stmt))
1134 if (separate_decls_in_region_debug_bind (stmt,
1135 name_copies,
1136 decl_copies))
1138 gsi_remove (&gsi, true);
1139 continue;
1143 gsi_next (&gsi);
1147 VEC_free (basic_block, heap, body);
1149 if (htab_elements (name_copies) == 0 && htab_elements (reduction_list) == 0)
1151 /* It may happen that there is nothing to copy (if there are only
1152 loop carried and external variables in the loop). */
1153 *arg_struct = NULL;
1154 *new_arg_struct = NULL;
1156 else
1158 /* Create the type for the structure to store the ssa names to. */
1159 type = lang_hooks.types.make_type (RECORD_TYPE);
1160 type_name = build_decl (BUILTINS_LOCATION,
1161 TYPE_DECL, create_tmp_var_name (".paral_data"),
1162 type);
1163 TYPE_NAME (type) = type_name;
1165 htab_traverse (name_copies, add_field_for_name, type);
1166 if (reduction_list && htab_elements (reduction_list) > 0)
1168 /* Create the fields for reductions. */
1169 htab_traverse (reduction_list, add_field_for_reduction,
1170 type);
1172 layout_type (type);
1174 /* Create the loads and stores. */
1175 *arg_struct = create_tmp_var (type, ".paral_data_store");
1176 add_referenced_var (*arg_struct);
1177 nvar = create_tmp_var (build_pointer_type (type), ".paral_data_load");
1178 add_referenced_var (nvar);
1179 *new_arg_struct = make_ssa_name (nvar, NULL);
1181 ld_st_data->store = *arg_struct;
1182 ld_st_data->load = *new_arg_struct;
1183 ld_st_data->store_bb = bb0;
1184 ld_st_data->load_bb = bb1;
1186 htab_traverse (name_copies, create_loads_and_stores_for_name,
1187 ld_st_data);
1189 /* Load the calculation from memory (after the join of the threads). */
1191 if (reduction_list && htab_elements (reduction_list) > 0)
1193 htab_traverse (reduction_list, create_stores_for_reduction,
1194 ld_st_data);
1195 clsn_data.load = make_ssa_name (nvar, NULL);
1196 clsn_data.load_bb = exit->dest;
1197 clsn_data.store = ld_st_data->store;
1198 create_final_loads_for_reduction (reduction_list, &clsn_data);
1202 htab_delete (decl_copies);
1203 htab_delete (name_copies);
1206 /* Bitmap containing uids of functions created by parallelization. We cannot
1207 allocate it from the default obstack, as it must live across compilation
1208 of several functions; we make it gc allocated instead. */
1210 static GTY(()) bitmap parallelized_functions;
1212 /* Returns true if FN was created by create_loop_fn. */
1214 static bool
1215 parallelized_function_p (tree fn)
1217 if (!parallelized_functions || !DECL_ARTIFICIAL (fn))
1218 return false;
1220 return bitmap_bit_p (parallelized_functions, DECL_UID (fn));
1223 /* Creates and returns an empty function that will receive the body of
1224 a parallelized loop. */
1226 static tree
1227 create_loop_fn (void)
1229 char buf[100];
1230 char *tname;
1231 tree decl, type, name, t;
1232 struct function *act_cfun = cfun;
1233 static unsigned loopfn_num;
1235 snprintf (buf, 100, "%s.$loopfn", current_function_name ());
1236 ASM_FORMAT_PRIVATE_NAME (tname, buf, loopfn_num++);
1237 clean_symbol_name (tname);
1238 name = get_identifier (tname);
1239 type = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
1241 decl = build_decl (BUILTINS_LOCATION,
1242 FUNCTION_DECL, name, type);
1243 if (!parallelized_functions)
1244 parallelized_functions = BITMAP_GGC_ALLOC ();
1245 bitmap_set_bit (parallelized_functions, DECL_UID (decl));
1247 TREE_STATIC (decl) = 1;
1248 TREE_USED (decl) = 1;
1249 DECL_ARTIFICIAL (decl) = 1;
1250 DECL_IGNORED_P (decl) = 0;
1251 TREE_PUBLIC (decl) = 0;
1252 DECL_UNINLINABLE (decl) = 1;
1253 DECL_EXTERNAL (decl) = 0;
1254 DECL_CONTEXT (decl) = NULL_TREE;
1255 DECL_INITIAL (decl) = make_node (BLOCK);
1257 t = build_decl (BUILTINS_LOCATION,
1258 RESULT_DECL, NULL_TREE, void_type_node);
1259 DECL_ARTIFICIAL (t) = 1;
1260 DECL_IGNORED_P (t) = 1;
1261 DECL_RESULT (decl) = t;
1263 t = build_decl (BUILTINS_LOCATION,
1264 PARM_DECL, get_identifier (".paral_data_param"),
1265 ptr_type_node);
1266 DECL_ARTIFICIAL (t) = 1;
1267 DECL_ARG_TYPE (t) = ptr_type_node;
1268 DECL_CONTEXT (t) = decl;
1269 TREE_USED (t) = 1;
1270 DECL_ARGUMENTS (decl) = t;
1272 allocate_struct_function (decl, false);
1274 /* The call to allocate_struct_function clobbers CFUN, so we need to restore
1275 it. */
1276 set_cfun (act_cfun);
1278 return decl;
1281 /* Moves the exit condition of LOOP to the beginning of its header, and
1282 duplicates the part of the last iteration that gets disabled to the
1283 exit of the loop. NIT is the number of iterations of the loop
1284 (used to initialize the variables in the duplicated part).
1286 TODO: the common case is that latch of the loop is empty and immediately
1287 follows the loop exit. In this case, it would be better not to copy the
1288 body of the loop, but only move the entry of the loop directly before the
1289 exit check and increase the number of iterations of the loop by one.
1290 This may need some additional preconditioning in case NIT = ~0.
1291 REDUCTION_LIST describes the reductions in LOOP. */
1293 static void
1294 transform_to_exit_first_loop (struct loop *loop, htab_t reduction_list, tree nit)
1296 basic_block *bbs, *nbbs, ex_bb, orig_header;
1297 unsigned n;
1298 bool ok;
1299 edge exit = single_dom_exit (loop), hpred;
1300 tree control, control_name, res, t;
1301 gimple phi, nphi, cond_stmt, stmt, cond_nit;
1302 gimple_stmt_iterator gsi;
1303 tree nit_1;
1305 split_block_after_labels (loop->header);
1306 orig_header = single_succ (loop->header);
1307 hpred = single_succ_edge (loop->header);
1309 cond_stmt = last_stmt (exit->src);
1310 control = gimple_cond_lhs (cond_stmt);
1311 gcc_assert (gimple_cond_rhs (cond_stmt) == nit);
1313 /* Make sure that we have phi nodes on exit for all loop header phis
1314 (create_parallel_loop requires that). */
1315 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
1317 phi = gsi_stmt (gsi);
1318 res = PHI_RESULT (phi);
1319 t = make_ssa_name (SSA_NAME_VAR (res), phi);
1320 SET_PHI_RESULT (phi, t);
1321 nphi = create_phi_node (res, orig_header);
1322 SSA_NAME_DEF_STMT (res) = nphi;
1323 add_phi_arg (nphi, t, hpred, UNKNOWN_LOCATION);
1325 if (res == control)
1327 gimple_cond_set_lhs (cond_stmt, t);
1328 update_stmt (cond_stmt);
1329 control = t;
1332 bbs = get_loop_body_in_dom_order (loop);
1334 for (n = 0; bbs[n] != loop->latch; n++)
1335 continue;
1336 nbbs = XNEWVEC (basic_block, n);
1337 ok = gimple_duplicate_sese_tail (single_succ_edge (loop->header), exit,
1338 bbs + 1, n, nbbs);
1339 gcc_assert (ok);
1340 free (bbs);
1341 ex_bb = nbbs[0];
1342 free (nbbs);
1344 /* Other than reductions, the only gimple reg that should be copied
1345 out of the loop is the control variable. */
1347 control_name = NULL_TREE;
1348 for (gsi = gsi_start_phis (ex_bb); !gsi_end_p (gsi); )
1350 phi = gsi_stmt (gsi);
1351 res = PHI_RESULT (phi);
1352 if (!is_gimple_reg (res))
1354 gsi_next (&gsi);
1355 continue;
1358 /* Check if it is a part of reduction. If it is,
1359 keep the phi at the reduction's keep_res field. The
1360 PHI_RESULT of this phi is the resulting value of the reduction
1361 variable when exiting the loop. */
1363 exit = single_dom_exit (loop);
1365 if (htab_elements (reduction_list) > 0)
1367 struct reduction_info *red;
1369 tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1370 red = reduction_phi (reduction_list, SSA_NAME_DEF_STMT (val));
1371 if (red)
1373 red->keep_res = phi;
1374 gsi_next (&gsi);
1375 continue;
1378 gcc_assert (control_name == NULL_TREE
1379 && SSA_NAME_VAR (res) == SSA_NAME_VAR (control));
1380 control_name = res;
1381 remove_phi_node (&gsi, false);
1383 gcc_assert (control_name != NULL_TREE);
1385 /* Initialize the control variable to number of iterations
1386 according to the rhs of the exit condition. */
1387 gsi = gsi_after_labels (ex_bb);
1388 cond_nit = last_stmt (exit->src);
1389 nit_1 = gimple_cond_rhs (cond_nit);
1390 nit_1 = force_gimple_operand_gsi (&gsi,
1391 fold_convert (TREE_TYPE (control_name), nit_1),
1392 false, NULL_TREE, false, GSI_SAME_STMT);
1393 stmt = gimple_build_assign (control_name, nit_1);
1394 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
1395 SSA_NAME_DEF_STMT (control_name) = stmt;
1398 /* Create the parallel constructs for LOOP as described in gen_parallel_loop.
1399 LOOP_FN and DATA are the arguments of GIMPLE_OMP_PARALLEL.
1400 NEW_DATA is the variable that should be initialized from the argument
1401 of LOOP_FN. N_THREADS is the requested number of threads. Returns the
1402 basic block containing GIMPLE_OMP_PARALLEL tree. */
1404 static basic_block
1405 create_parallel_loop (struct loop *loop, tree loop_fn, tree data,
1406 tree new_data, unsigned n_threads)
1408 gimple_stmt_iterator gsi;
1409 basic_block bb, paral_bb, for_bb, ex_bb;
1410 tree t, param;
1411 gimple stmt, for_stmt, phi, cond_stmt;
1412 tree cvar, cvar_init, initvar, cvar_next, cvar_base, type;
1413 edge exit, nexit, guard, end, e;
1415 /* Prepare the GIMPLE_OMP_PARALLEL statement. */
1416 bb = loop_preheader_edge (loop)->src;
1417 paral_bb = single_pred (bb);
1418 gsi = gsi_last_bb (paral_bb);
1420 t = build_omp_clause (BUILTINS_LOCATION, OMP_CLAUSE_NUM_THREADS);
1421 OMP_CLAUSE_NUM_THREADS_EXPR (t)
1422 = build_int_cst (integer_type_node, n_threads);
1423 stmt = gimple_build_omp_parallel (NULL, t, loop_fn, data);
1425 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1427 /* Initialize NEW_DATA. */
1428 if (data)
1430 gsi = gsi_after_labels (bb);
1432 param = make_ssa_name (DECL_ARGUMENTS (loop_fn), NULL);
1433 stmt = gimple_build_assign (param, build_fold_addr_expr (data));
1434 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1435 SSA_NAME_DEF_STMT (param) = stmt;
1437 stmt = gimple_build_assign (new_data,
1438 fold_convert (TREE_TYPE (new_data), param));
1439 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1440 SSA_NAME_DEF_STMT (new_data) = stmt;
1443 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_PARALLEL. */
1444 bb = split_loop_exit_edge (single_dom_exit (loop));
1445 gsi = gsi_last_bb (bb);
1446 gsi_insert_after (&gsi, gimple_build_omp_return (false), GSI_NEW_STMT);
1448 /* Extract data for GIMPLE_OMP_FOR. */
1449 gcc_assert (loop->header == single_dom_exit (loop)->src);
1450 cond_stmt = last_stmt (loop->header);
1452 cvar = gimple_cond_lhs (cond_stmt);
1453 cvar_base = SSA_NAME_VAR (cvar);
1454 phi = SSA_NAME_DEF_STMT (cvar);
1455 cvar_init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1456 initvar = make_ssa_name (cvar_base, NULL);
1457 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, loop_preheader_edge (loop)),
1458 initvar);
1459 cvar_next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1461 gsi = gsi_last_nondebug_bb (loop->latch);
1462 gcc_assert (gsi_stmt (gsi) == SSA_NAME_DEF_STMT (cvar_next));
1463 gsi_remove (&gsi, true);
1465 /* Prepare cfg. */
1466 for_bb = split_edge (loop_preheader_edge (loop));
1467 ex_bb = split_loop_exit_edge (single_dom_exit (loop));
1468 extract_true_false_edges_from_block (loop->header, &nexit, &exit);
1469 gcc_assert (exit == single_dom_exit (loop));
1471 guard = make_edge (for_bb, ex_bb, 0);
1472 single_succ_edge (loop->latch)->flags = 0;
1473 end = make_edge (loop->latch, ex_bb, EDGE_FALLTHRU);
1474 for (gsi = gsi_start_phis (ex_bb); !gsi_end_p (gsi); gsi_next (&gsi))
1476 source_location locus;
1477 tree def;
1478 phi = gsi_stmt (gsi);
1479 stmt = SSA_NAME_DEF_STMT (PHI_ARG_DEF_FROM_EDGE (phi, exit));
1481 def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_preheader_edge (loop));
1482 locus = gimple_phi_arg_location_from_edge (stmt,
1483 loop_preheader_edge (loop));
1484 add_phi_arg (phi, def, guard, locus);
1486 def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (loop));
1487 locus = gimple_phi_arg_location_from_edge (stmt, loop_latch_edge (loop));
1488 add_phi_arg (phi, def, end, locus);
1490 e = redirect_edge_and_branch (exit, nexit->dest);
1491 PENDING_STMT (e) = NULL;
1493 /* Emit GIMPLE_OMP_FOR. */
1494 gimple_cond_set_lhs (cond_stmt, cvar_base);
1495 type = TREE_TYPE (cvar);
1496 t = build_omp_clause (BUILTINS_LOCATION, OMP_CLAUSE_SCHEDULE);
1497 OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_STATIC;
1499 for_stmt = gimple_build_omp_for (NULL, t, 1, NULL);
1500 gimple_omp_for_set_index (for_stmt, 0, initvar);
1501 gimple_omp_for_set_initial (for_stmt, 0, cvar_init);
1502 gimple_omp_for_set_final (for_stmt, 0, gimple_cond_rhs (cond_stmt));
1503 gimple_omp_for_set_cond (for_stmt, 0, gimple_cond_code (cond_stmt));
1504 gimple_omp_for_set_incr (for_stmt, 0, build2 (PLUS_EXPR, type,
1505 cvar_base,
1506 build_int_cst (type, 1)));
1508 gsi = gsi_last_bb (for_bb);
1509 gsi_insert_after (&gsi, for_stmt, GSI_NEW_STMT);
1510 SSA_NAME_DEF_STMT (initvar) = for_stmt;
1512 /* Emit GIMPLE_OMP_CONTINUE. */
1513 gsi = gsi_last_bb (loop->latch);
1514 stmt = gimple_build_omp_continue (cvar_next, cvar);
1515 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1516 SSA_NAME_DEF_STMT (cvar_next) = stmt;
1518 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_FOR. */
1519 gsi = gsi_last_bb (ex_bb);
1520 gsi_insert_after (&gsi, gimple_build_omp_return (true), GSI_NEW_STMT);
1522 return paral_bb;
1525 /* Generates code to execute the iterations of LOOP in N_THREADS
1526 threads in parallel.
1528 NITER describes number of iterations of LOOP.
1529 REDUCTION_LIST describes the reductions existent in the LOOP. */
1531 static void
1532 gen_parallel_loop (struct loop *loop, htab_t reduction_list,
1533 unsigned n_threads, struct tree_niter_desc *niter)
1535 loop_iterator li;
1536 tree many_iterations_cond, type, nit;
1537 tree arg_struct, new_arg_struct;
1538 gimple_seq stmts;
1539 basic_block parallel_head;
1540 edge entry, exit;
1541 struct clsn_data clsn_data;
1542 unsigned prob;
1544 /* From
1546 ---------------------------------------------------------------------
1547 loop
1549 IV = phi (INIT, IV + STEP)
1550 BODY1;
1551 if (COND)
1552 break;
1553 BODY2;
1555 ---------------------------------------------------------------------
1557 with # of iterations NITER (possibly with MAY_BE_ZERO assumption),
1558 we generate the following code:
1560 ---------------------------------------------------------------------
1562 if (MAY_BE_ZERO
1563 || NITER < MIN_PER_THREAD * N_THREADS)
1564 goto original;
1566 BODY1;
1567 store all local loop-invariant variables used in body of the loop to DATA.
1568 GIMPLE_OMP_PARALLEL (OMP_CLAUSE_NUM_THREADS (N_THREADS), LOOPFN, DATA);
1569 load the variables from DATA.
1570 GIMPLE_OMP_FOR (IV = INIT; COND; IV += STEP) (OMP_CLAUSE_SCHEDULE (static))
1571 BODY2;
1572 BODY1;
1573 GIMPLE_OMP_CONTINUE;
1574 GIMPLE_OMP_RETURN -- GIMPLE_OMP_FOR
1575 GIMPLE_OMP_RETURN -- GIMPLE_OMP_PARALLEL
1576 goto end;
1578 original:
1579 loop
1581 IV = phi (INIT, IV + STEP)
1582 BODY1;
1583 if (COND)
1584 break;
1585 BODY2;
1588 end:
1592 /* Create two versions of the loop -- in the old one, we know that the
1593 number of iterations is large enough, and we will transform it into the
1594 loop that will be split to loop_fn, the new one will be used for the
1595 remaining iterations. */
1597 type = TREE_TYPE (niter->niter);
1598 nit = force_gimple_operand (unshare_expr (niter->niter), &stmts, true,
1599 NULL_TREE);
1600 if (stmts)
1601 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1603 many_iterations_cond =
1604 fold_build2 (GE_EXPR, boolean_type_node,
1605 nit, build_int_cst (type, MIN_PER_THREAD * n_threads));
1606 many_iterations_cond
1607 = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1608 invert_truthvalue (unshare_expr (niter->may_be_zero)),
1609 many_iterations_cond);
1610 many_iterations_cond
1611 = force_gimple_operand (many_iterations_cond, &stmts, false, NULL_TREE);
1612 if (stmts)
1613 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1614 if (!is_gimple_condexpr (many_iterations_cond))
1616 many_iterations_cond
1617 = force_gimple_operand (many_iterations_cond, &stmts,
1618 true, NULL_TREE);
1619 if (stmts)
1620 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1623 initialize_original_copy_tables ();
1625 /* We assume that the loop usually iterates a lot. */
1626 prob = 4 * REG_BR_PROB_BASE / 5;
1627 loop_version (loop, many_iterations_cond, NULL,
1628 prob, prob, REG_BR_PROB_BASE - prob, true);
1629 update_ssa (TODO_update_ssa);
1630 free_original_copy_tables ();
1632 /* Base all the induction variables in LOOP on a single control one. */
1633 canonicalize_loop_ivs (loop, &nit, true);
1635 /* Ensure that the exit condition is the first statement in the loop. */
1636 transform_to_exit_first_loop (loop, reduction_list, nit);
1638 /* Generate initializations for reductions. */
1639 if (htab_elements (reduction_list) > 0)
1640 htab_traverse (reduction_list, initialize_reductions, loop);
1642 /* Eliminate the references to local variables from the loop. */
1643 gcc_assert (single_exit (loop));
1644 entry = loop_preheader_edge (loop);
1645 exit = single_dom_exit (loop);
1647 eliminate_local_variables (entry, exit);
1648 /* In the old loop, move all variables non-local to the loop to a structure
1649 and back, and create separate decls for the variables used in loop. */
1650 separate_decls_in_region (entry, exit, reduction_list, &arg_struct,
1651 &new_arg_struct, &clsn_data);
1653 /* Create the parallel constructs. */
1654 parallel_head = create_parallel_loop (loop, create_loop_fn (), arg_struct,
1655 new_arg_struct, n_threads);
1656 if (htab_elements (reduction_list) > 0)
1657 create_call_for_reduction (loop, reduction_list, &clsn_data);
1659 scev_reset ();
1661 /* Cancel the loop (it is simpler to do it here rather than to teach the
1662 expander to do it). */
1663 cancel_loop_tree (loop);
1665 /* Free loop bound estimations that could contain references to
1666 removed statements. */
1667 FOR_EACH_LOOP (li, loop, 0)
1668 free_numbers_of_iterations_estimates_loop (loop);
1670 /* Expand the parallel constructs. We do it directly here instead of running
1671 a separate expand_omp pass, since it is more efficient, and less likely to
1672 cause troubles with further analyses not being able to deal with the
1673 OMP trees. */
1675 omp_expand_local (parallel_head);
1678 /* Returns true when LOOP contains vector phi nodes. */
1680 static bool
1681 loop_has_vector_phi_nodes (struct loop *loop ATTRIBUTE_UNUSED)
1683 unsigned i;
1684 basic_block *bbs = get_loop_body_in_dom_order (loop);
1685 gimple_stmt_iterator gsi;
1686 bool res = true;
1688 for (i = 0; i < loop->num_nodes; i++)
1689 for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
1690 if (TREE_CODE (TREE_TYPE (PHI_RESULT (gsi_stmt (gsi)))) == VECTOR_TYPE)
1691 goto end;
1693 res = false;
1694 end:
1695 free (bbs);
1696 return res;
1699 /* Create a reduction_info struct, initialize it with REDUC_STMT
1700 and PHI, insert it to the REDUCTION_LIST. */
1702 static void
1703 build_new_reduction (htab_t reduction_list, gimple reduc_stmt, gimple phi)
1705 PTR *slot;
1706 struct reduction_info *new_reduction;
1708 gcc_assert (reduc_stmt);
1710 if (dump_file && (dump_flags & TDF_DETAILS))
1712 fprintf (dump_file,
1713 "Detected reduction. reduction stmt is: \n");
1714 print_gimple_stmt (dump_file, reduc_stmt, 0, 0);
1715 fprintf (dump_file, "\n");
1718 new_reduction = XCNEW (struct reduction_info);
1720 new_reduction->reduc_stmt = reduc_stmt;
1721 new_reduction->reduc_phi = phi;
1722 new_reduction->reduction_code = gimple_assign_rhs_code (reduc_stmt);
1723 slot = htab_find_slot (reduction_list, new_reduction, INSERT);
1724 *slot = new_reduction;
1727 /* Detect all reductions in the LOOP, insert them into REDUCTION_LIST. */
1729 static void
1730 gather_scalar_reductions (loop_p loop, htab_t reduction_list)
1732 gimple_stmt_iterator gsi;
1733 loop_vec_info simple_loop_info;
1735 vect_dump = NULL;
1736 simple_loop_info = vect_analyze_loop_form (loop);
1738 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
1740 gimple phi = gsi_stmt (gsi);
1741 affine_iv iv;
1742 tree res = PHI_RESULT (phi);
1743 bool double_reduc;
1745 if (!is_gimple_reg (res))
1746 continue;
1748 if (!simple_iv (loop, loop, res, &iv, true)
1749 && simple_loop_info)
1751 gimple reduc_stmt = vect_force_simple_reduction (simple_loop_info,
1752 phi, true,
1753 &double_reduc);
1754 if (reduc_stmt && !double_reduc)
1755 build_new_reduction (reduction_list, reduc_stmt, phi);
1758 destroy_loop_vec_info (simple_loop_info, true);
1761 /* Try to initialize NITER for code generation part. */
1763 static bool
1764 try_get_loop_niter (loop_p loop, struct tree_niter_desc *niter)
1766 edge exit = single_dom_exit (loop);
1768 gcc_assert (exit);
1770 /* We need to know # of iterations, and there should be no uses of values
1771 defined inside loop outside of it, unless the values are invariants of
1772 the loop. */
1773 if (!number_of_iterations_exit (loop, exit, niter, false))
1775 if (dump_file && (dump_flags & TDF_DETAILS))
1776 fprintf (dump_file, " FAILED: number of iterations not known\n");
1777 return false;
1780 return true;
1783 /* Try to initialize REDUCTION_LIST for code generation part.
1784 REDUCTION_LIST describes the reductions. */
1786 static bool
1787 try_create_reduction_list (loop_p loop, htab_t reduction_list)
1789 edge exit = single_dom_exit (loop);
1790 gimple_stmt_iterator gsi;
1792 gcc_assert (exit);
1794 gather_scalar_reductions (loop, reduction_list);
1797 for (gsi = gsi_start_phis (exit->dest); !gsi_end_p (gsi); gsi_next (&gsi))
1799 gimple phi = gsi_stmt (gsi);
1800 struct reduction_info *red;
1801 imm_use_iterator imm_iter;
1802 use_operand_p use_p;
1803 gimple reduc_phi;
1804 tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1806 if (is_gimple_reg (val))
1808 if (dump_file && (dump_flags & TDF_DETAILS))
1810 fprintf (dump_file, "phi is ");
1811 print_gimple_stmt (dump_file, phi, 0, 0);
1812 fprintf (dump_file, "arg of phi to exit: value ");
1813 print_generic_expr (dump_file, val, 0);
1814 fprintf (dump_file, " used outside loop\n");
1815 fprintf (dump_file,
1816 " checking if it a part of reduction pattern: \n");
1818 if (htab_elements (reduction_list) == 0)
1820 if (dump_file && (dump_flags & TDF_DETAILS))
1821 fprintf (dump_file,
1822 " FAILED: it is not a part of reduction.\n");
1823 return false;
1825 reduc_phi = NULL;
1826 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, val)
1828 if (flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
1830 reduc_phi = USE_STMT (use_p);
1831 break;
1834 red = reduction_phi (reduction_list, reduc_phi);
1835 if (red == NULL)
1837 if (dump_file && (dump_flags & TDF_DETAILS))
1838 fprintf (dump_file,
1839 " FAILED: it is not a part of reduction.\n");
1840 return false;
1842 if (dump_file && (dump_flags & TDF_DETAILS))
1844 fprintf (dump_file, "reduction phi is ");
1845 print_gimple_stmt (dump_file, red->reduc_phi, 0, 0);
1846 fprintf (dump_file, "reduction stmt is ");
1847 print_gimple_stmt (dump_file, red->reduc_stmt, 0, 0);
1852 /* The iterations of the loop may communicate only through bivs whose
1853 iteration space can be distributed efficiently. */
1854 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
1856 gimple phi = gsi_stmt (gsi);
1857 tree def = PHI_RESULT (phi);
1858 affine_iv iv;
1860 if (is_gimple_reg (def) && !simple_iv (loop, loop, def, &iv, true))
1862 struct reduction_info *red;
1864 red = reduction_phi (reduction_list, phi);
1865 if (red == NULL)
1867 if (dump_file && (dump_flags & TDF_DETAILS))
1868 fprintf (dump_file,
1869 " FAILED: scalar dependency between iterations\n");
1870 return false;
1876 return true;
1879 /* Detect parallel loops and generate parallel code using libgomp
1880 primitives. Returns true if some loop was parallelized, false
1881 otherwise. */
1883 bool
1884 parallelize_loops (void)
1886 unsigned n_threads = flag_tree_parallelize_loops;
1887 bool changed = false;
1888 struct loop *loop;
1889 struct tree_niter_desc niter_desc;
1890 loop_iterator li;
1891 htab_t reduction_list;
1892 struct obstack parloop_obstack;
1893 HOST_WIDE_INT estimated;
1894 LOC loop_loc;
1896 /* Do not parallelize loops in the functions created by parallelization. */
1897 if (parallelized_function_p (cfun->decl))
1898 return false;
1899 if (cfun->has_nonlocal_label)
1900 return false;
1902 gcc_obstack_init (&parloop_obstack);
1903 reduction_list = htab_create (10, reduction_info_hash,
1904 reduction_info_eq, free);
1905 init_stmt_vec_info_vec ();
1907 FOR_EACH_LOOP (li, loop, 0)
1909 htab_empty (reduction_list);
1910 if (dump_file && (dump_flags & TDF_DETAILS))
1912 fprintf (dump_file, "Trying loop %d as candidate\n",loop->num);
1913 if (loop->inner)
1914 fprintf (dump_file, "loop %d is not innermost\n",loop->num);
1915 else
1916 fprintf (dump_file, "loop %d is innermost\n",loop->num);
1919 /* If we use autopar in graphite pass, we use its marked dependency
1920 checking results. */
1921 if (flag_loop_parallelize_all && !loop->can_be_parallel)
1923 if (dump_file && (dump_flags & TDF_DETAILS))
1924 fprintf (dump_file, "loop is not parallel according to graphite\n");
1925 continue;
1928 if (!single_dom_exit (loop))
1931 if (dump_file && (dump_flags & TDF_DETAILS))
1932 fprintf (dump_file, "loop is !single_dom_exit\n");
1934 continue;
1937 if (/* And of course, the loop must be parallelizable. */
1938 !can_duplicate_loop_p (loop)
1939 || loop_has_blocks_with_irreducible_flag (loop)
1940 || (loop_preheader_edge (loop)->src->flags & BB_IRREDUCIBLE_LOOP)
1941 /* FIXME: the check for vector phi nodes could be removed. */
1942 || loop_has_vector_phi_nodes (loop))
1943 continue;
1944 estimated = estimated_loop_iterations_int (loop, false);
1945 /* FIXME: Bypass this check as graphite doesn't update the
1946 count and frequency correctly now. */
1947 if (!flag_loop_parallelize_all
1948 && ((estimated !=-1
1949 && estimated <= (HOST_WIDE_INT) n_threads * MIN_PER_THREAD)
1950 /* Do not bother with loops in cold areas. */
1951 || optimize_loop_nest_for_size_p (loop)))
1952 continue;
1954 if (!try_get_loop_niter (loop, &niter_desc))
1955 continue;
1957 if (!try_create_reduction_list (loop, reduction_list))
1958 continue;
1960 if (!flag_loop_parallelize_all
1961 && !loop_parallel_p (loop, &parloop_obstack))
1962 continue;
1964 changed = true;
1965 if (dump_file && (dump_flags & TDF_DETAILS))
1967 if (loop->inner)
1968 fprintf (dump_file, "parallelizing outer loop %d\n",loop->header->index);
1969 else
1970 fprintf (dump_file, "parallelizing inner loop %d\n",loop->header->index);
1971 loop_loc = find_loop_location (loop);
1972 if (loop_loc != UNKNOWN_LOC)
1973 fprintf (dump_file, "\nloop at %s:%d: ",
1974 LOC_FILE (loop_loc), LOC_LINE (loop_loc));
1976 gen_parallel_loop (loop, reduction_list,
1977 n_threads, &niter_desc);
1978 verify_flow_info ();
1979 verify_dominators (CDI_DOMINATORS);
1980 verify_loop_structure ();
1981 verify_loop_closed_ssa (true);
1984 free_stmt_vec_info_vec ();
1985 htab_delete (reduction_list);
1986 obstack_free (&parloop_obstack, NULL);
1988 /* Parallelization will cause new function calls to be inserted through
1989 which local variables will escape. Reset the points-to solution
1990 for ESCAPED. */
1991 if (changed)
1992 pt_solution_reset (&cfun->gimple_df->escaped);
1994 return changed;
1997 #include "gt-tree-parloops.h"