2009-08-05 Paul Thomas <pault@gcc.gnu.org>
[official-gcc.git] / gcc / tree-parloops.c
blob9acf0ff75f0db8f798e042837d5e53e075d6e388
1 /* Loop autoparallelization.
2 Copyright (C) 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
3 Contributed by Sebastian Pop <pop@cri.ensmp.fr> and
4 Zdenek Dvorak <dvorakz@suse.cz>.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "tree.h"
27 #include "rtl.h"
28 #include "tree-flow.h"
29 #include "cfgloop.h"
30 #include "ggc.h"
31 #include "tree-data-ref.h"
32 #include "diagnostic.h"
33 #include "tree-pass.h"
34 #include "tree-scalar-evolution.h"
35 #include "hashtab.h"
36 #include "langhooks.h"
37 #include "tree-vectorizer.h"
39 /* This pass tries to distribute iterations of loops into several threads.
40 The implementation is straightforward -- for each loop we test whether its
41 iterations are independent, and if it is the case (and some additional
42 conditions regarding profitability and correctness are satisfied), we
43 add GIMPLE_OMP_PARALLEL and GIMPLE_OMP_FOR codes and let omp expansion
44 machinery do its job.
46 The most of the complexity is in bringing the code into shape expected
47 by the omp expanders:
48 -- for GIMPLE_OMP_FOR, ensuring that the loop has only one induction
49 variable and that the exit test is at the start of the loop body
50 -- for GIMPLE_OMP_PARALLEL, replacing the references to local addressable
51 variables by accesses through pointers, and breaking up ssa chains
52 by storing the values incoming to the parallelized loop to a structure
53 passed to the new function as an argument (something similar is done
54 in omp gimplification, unfortunately only a small part of the code
55 can be shared).
57 TODO:
58 -- if there are several parallelizable loops in a function, it may be
59 possible to generate the threads just once (using synchronization to
60 ensure that cross-loop dependences are obeyed).
61 -- handling of common scalar dependence patterns (accumulation, ...)
62 -- handling of non-innermost loops */
64 /*
65 Reduction handling:
66 currently we use vect_is_simple_reduction() to detect reduction patterns.
67 The code transformation will be introduced by an example.
70 parloop
72 int sum=1;
74 for (i = 0; i < N; i++)
76 x[i] = i + 3;
77 sum+=x[i];
81 gimple-like code:
82 header_bb:
84 # sum_29 = PHI <sum_11(5), 1(3)>
85 # i_28 = PHI <i_12(5), 0(3)>
86 D.1795_8 = i_28 + 3;
87 x[i_28] = D.1795_8;
88 sum_11 = D.1795_8 + sum_29;
89 i_12 = i_28 + 1;
90 if (N_6(D) > i_12)
91 goto header_bb;
94 exit_bb:
96 # sum_21 = PHI <sum_11(4)>
97 printf (&"%d"[0], sum_21);
100 after reduction transformation (only relevant parts):
102 parloop
105 ....
108 # Storing the initial value given by the user. #
110 .paral_data_store.32.sum.27 = 1;
112 #pragma omp parallel num_threads(4)
114 #pragma omp for schedule(static)
116 # The neutral element corresponding to the particular
117 reduction's operation, e.g. 0 for PLUS_EXPR,
118 1 for MULT_EXPR, etc. replaces the user's initial value. #
120 # sum.27_29 = PHI <sum.27_11, 0>
122 sum.27_11 = D.1827_8 + sum.27_29;
124 GIMPLE_OMP_CONTINUE
126 # Adding this reduction phi is done at create_phi_for_local_result() #
127 # sum.27_56 = PHI <sum.27_11, 0>
128 GIMPLE_OMP_RETURN
130 # Creating the atomic operation is done at
131 create_call_for_reduction_1() #
133 #pragma omp atomic_load
134 D.1839_59 = *&.paral_data_load.33_51->reduction.23;
135 D.1840_60 = sum.27_56 + D.1839_59;
136 #pragma omp atomic_store (D.1840_60);
138 GIMPLE_OMP_RETURN
140 # collecting the result after the join of the threads is done at
141 create_loads_for_reductions().
142 The value computed by the threads is loaded from the
143 shared struct. #
146 .paral_data_load.33_52 = &.paral_data_store.32;
147 sum_37 = .paral_data_load.33_52->sum.27;
148 sum_43 = D.1795_41 + sum_37;
150 exit bb:
151 # sum_21 = PHI <sum_43, sum_26>
152 printf (&"%d"[0], sum_21);
160 /* Minimal number of iterations of a loop that should be executed in each
161 thread. */
162 #define MIN_PER_THREAD 100
164 /* Element of the hashtable, representing a
165 reduction in the current loop. */
166 struct reduction_info
168 gimple reduc_stmt; /* reduction statement. */
169 gimple reduc_phi; /* The phi node defining the reduction. */
170 enum tree_code reduction_code;/* code for the reduction operation. */
171 gimple keep_res; /* The PHI_RESULT of this phi is the resulting value
172 of the reduction variable when existing the loop. */
173 tree initial_value; /* The initial value of the reduction var before entering the loop. */
174 tree field; /* the name of the field in the parloop data structure intended for reduction. */
175 tree init; /* reduction initialization value. */
176 gimple new_phi; /* (helper field) Newly created phi node whose result
177 will be passed to the atomic operation. Represents
178 the local result each thread computed for the reduction
179 operation. */
182 /* Equality and hash functions for hashtab code. */
184 static int
185 reduction_info_eq (const void *aa, const void *bb)
187 const struct reduction_info *a = (const struct reduction_info *) aa;
188 const struct reduction_info *b = (const struct reduction_info *) bb;
190 return (a->reduc_phi == b->reduc_phi);
193 static hashval_t
194 reduction_info_hash (const void *aa)
196 const struct reduction_info *a = (const struct reduction_info *) aa;
198 return htab_hash_pointer (a->reduc_phi);
201 static struct reduction_info *
202 reduction_phi (htab_t reduction_list, gimple phi)
204 struct reduction_info tmpred, *red;
206 if (htab_elements (reduction_list) == 0)
207 return NULL;
209 tmpred.reduc_phi = phi;
210 red = (struct reduction_info *) htab_find (reduction_list, &tmpred);
212 return red;
215 /* Element of hashtable of names to copy. */
217 struct name_to_copy_elt
219 unsigned version; /* The version of the name to copy. */
220 tree new_name; /* The new name used in the copy. */
221 tree field; /* The field of the structure used to pass the
222 value. */
225 /* Equality and hash functions for hashtab code. */
227 static int
228 name_to_copy_elt_eq (const void *aa, const void *bb)
230 const struct name_to_copy_elt *a = (const struct name_to_copy_elt *) aa;
231 const struct name_to_copy_elt *b = (const struct name_to_copy_elt *) bb;
233 return a->version == b->version;
236 static hashval_t
237 name_to_copy_elt_hash (const void *aa)
239 const struct name_to_copy_elt *a = (const struct name_to_copy_elt *) aa;
241 return (hashval_t) a->version;
245 /* Data dependency analysis. Returns true if the iterations of LOOP
246 are independent on each other (that is, if we can execute them
247 in parallel). */
249 static bool
250 loop_parallel_p (struct loop *loop)
252 VEC (ddr_p, heap) * dependence_relations;
253 VEC (data_reference_p, heap) *datarefs;
254 lambda_trans_matrix trans;
255 bool ret = false;
257 if (dump_file && (dump_flags & TDF_DETAILS))
258 fprintf (dump_file, "\nConsidering loop %d\n", loop->num);
260 /* Check for problems with dependences. If the loop can be reversed,
261 the iterations are independent. */
262 datarefs = VEC_alloc (data_reference_p, heap, 10);
263 dependence_relations = VEC_alloc (ddr_p, heap, 10 * 10);
264 compute_data_dependences_for_loop (loop, true, &datarefs,
265 &dependence_relations);
266 if (dump_file && (dump_flags & TDF_DETAILS))
267 dump_data_dependence_relations (dump_file, dependence_relations);
269 trans = lambda_trans_matrix_new (1, 1);
270 LTM_MATRIX (trans)[0][0] = -1;
272 if (lambda_transform_legal_p (trans, 1, dependence_relations))
274 ret = true;
275 if (dump_file && (dump_flags & TDF_DETAILS))
276 fprintf (dump_file, " SUCCESS: may be parallelized\n");
278 else if (dump_file && (dump_flags & TDF_DETAILS))
279 fprintf (dump_file,
280 " FAILED: data dependencies exist across iterations\n");
282 free_dependence_relations (dependence_relations);
283 free_data_refs (datarefs);
285 return ret;
288 /* Return true when LOOP contains basic blocks marked with the
289 BB_IRREDUCIBLE_LOOP flag. */
291 static inline bool
292 loop_has_blocks_with_irreducible_flag (struct loop *loop)
294 unsigned i;
295 basic_block *bbs = get_loop_body_in_dom_order (loop);
296 bool res = true;
298 for (i = 0; i < loop->num_nodes; i++)
299 if (bbs[i]->flags & BB_IRREDUCIBLE_LOOP)
300 goto end;
302 res = false;
303 end:
304 free (bbs);
305 return res;
308 /* Assigns the address of OBJ in TYPE to an ssa name, and returns this name.
309 The assignment statement is placed on edge ENTRY. DECL_ADDRESS maps decls
310 to their addresses that can be reused. The address of OBJ is known to
311 be invariant in the whole function. */
313 static tree
314 take_address_of (tree obj, tree type, edge entry, htab_t decl_address)
316 int uid;
317 void **dslot;
318 struct int_tree_map ielt, *nielt;
319 tree *var_p, name, bvar, addr;
320 gimple stmt;
321 gimple_seq stmts;
323 /* Since the address of OBJ is invariant, the trees may be shared.
324 Avoid rewriting unrelated parts of the code. */
325 obj = unshare_expr (obj);
326 for (var_p = &obj;
327 handled_component_p (*var_p);
328 var_p = &TREE_OPERAND (*var_p, 0))
329 continue;
330 uid = DECL_UID (*var_p);
332 ielt.uid = uid;
333 dslot = htab_find_slot_with_hash (decl_address, &ielt, uid, INSERT);
334 if (!*dslot)
336 addr = build_addr (*var_p, current_function_decl);
337 bvar = create_tmp_var (TREE_TYPE (addr), get_name (*var_p));
338 add_referenced_var (bvar);
339 stmt = gimple_build_assign (bvar, addr);
340 name = make_ssa_name (bvar, stmt);
341 gimple_assign_set_lhs (stmt, name);
342 gsi_insert_on_edge_immediate (entry, stmt);
344 nielt = XNEW (struct int_tree_map);
345 nielt->uid = uid;
346 nielt->to = name;
347 *dslot = nielt;
349 else
350 name = ((struct int_tree_map *) *dslot)->to;
352 if (var_p != &obj)
354 *var_p = build1 (INDIRECT_REF, TREE_TYPE (*var_p), name);
355 name = force_gimple_operand (build_addr (obj, current_function_decl),
356 &stmts, true, NULL_TREE);
357 if (!gimple_seq_empty_p (stmts))
358 gsi_insert_seq_on_edge_immediate (entry, stmts);
361 if (TREE_TYPE (name) != type)
363 name = force_gimple_operand (fold_convert (type, name), &stmts, true,
364 NULL_TREE);
365 if (!gimple_seq_empty_p (stmts))
366 gsi_insert_seq_on_edge_immediate (entry, stmts);
369 return name;
372 /* Callback for htab_traverse. Create the initialization statement
373 for reduction described in SLOT, and place it at the preheader of
374 the loop described in DATA. */
376 static int
377 initialize_reductions (void **slot, void *data)
379 tree init, c;
380 tree bvar, type, arg;
381 edge e;
383 struct reduction_info *const reduc = (struct reduction_info *) *slot;
384 struct loop *loop = (struct loop *) data;
386 /* Create initialization in preheader:
387 reduction_variable = initialization value of reduction. */
389 /* In the phi node at the header, replace the argument coming
390 from the preheader with the reduction initialization value. */
392 /* Create a new variable to initialize the reduction. */
393 type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
394 bvar = create_tmp_var (type, "reduction");
395 add_referenced_var (bvar);
397 c = build_omp_clause (gimple_location (reduc->reduc_stmt),
398 OMP_CLAUSE_REDUCTION);
399 OMP_CLAUSE_REDUCTION_CODE (c) = reduc->reduction_code;
400 OMP_CLAUSE_DECL (c) = SSA_NAME_VAR (gimple_assign_lhs (reduc->reduc_stmt));
402 init = omp_reduction_init (c, TREE_TYPE (bvar));
403 reduc->init = init;
405 /* Replace the argument representing the initialization value
406 with the initialization value for the reduction (neutral
407 element for the particular operation, e.g. 0 for PLUS_EXPR,
408 1 for MULT_EXPR, etc).
409 Keep the old value in a new variable "reduction_initial",
410 that will be taken in consideration after the parallel
411 computing is done. */
413 e = loop_preheader_edge (loop);
414 arg = PHI_ARG_DEF_FROM_EDGE (reduc->reduc_phi, e);
415 /* Create new variable to hold the initial value. */
417 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE
418 (reduc->reduc_phi, loop_preheader_edge (loop)), init);
419 reduc->initial_value = arg;
420 return 1;
423 struct elv_data
425 struct walk_stmt_info info;
426 edge entry;
427 htab_t decl_address;
428 bool changed;
431 /* Eliminates references to local variables in *TP out of the single
432 entry single exit region starting at DTA->ENTRY.
433 DECL_ADDRESS contains addresses of the references that had their
434 address taken already. If the expression is changed, CHANGED is
435 set to true. Callback for walk_tree. */
437 static tree
438 eliminate_local_variables_1 (tree *tp, int *walk_subtrees, void *data)
440 struct elv_data *const dta = (struct elv_data *) data;
441 tree t = *tp, var, addr, addr_type, type, obj;
443 if (DECL_P (t))
445 *walk_subtrees = 0;
447 if (!SSA_VAR_P (t) || DECL_EXTERNAL (t))
448 return NULL_TREE;
450 type = TREE_TYPE (t);
451 addr_type = build_pointer_type (type);
452 addr = take_address_of (t, addr_type, dta->entry, dta->decl_address);
453 *tp = build1 (INDIRECT_REF, TREE_TYPE (*tp), addr);
455 dta->changed = true;
456 return NULL_TREE;
459 if (TREE_CODE (t) == ADDR_EXPR)
461 /* ADDR_EXPR may appear in two contexts:
462 -- as a gimple operand, when the address taken is a function invariant
463 -- as gimple rhs, when the resulting address in not a function
464 invariant
465 We do not need to do anything special in the latter case (the base of
466 the memory reference whose address is taken may be replaced in the
467 DECL_P case). The former case is more complicated, as we need to
468 ensure that the new address is still a gimple operand. Thus, it
469 is not sufficient to replace just the base of the memory reference --
470 we need to move the whole computation of the address out of the
471 loop. */
472 if (!is_gimple_val (t))
473 return NULL_TREE;
475 *walk_subtrees = 0;
476 obj = TREE_OPERAND (t, 0);
477 var = get_base_address (obj);
478 if (!var || !SSA_VAR_P (var) || DECL_EXTERNAL (var))
479 return NULL_TREE;
481 addr_type = TREE_TYPE (t);
482 addr = take_address_of (obj, addr_type, dta->entry, dta->decl_address);
483 *tp = addr;
485 dta->changed = true;
486 return NULL_TREE;
489 if (!EXPR_P (t))
490 *walk_subtrees = 0;
492 return NULL_TREE;
495 /* Moves the references to local variables in STMT out of the single
496 entry single exit region starting at ENTRY. DECL_ADDRESS contains
497 addresses of the references that had their address taken
498 already. */
500 static void
501 eliminate_local_variables_stmt (edge entry, gimple stmt,
502 htab_t decl_address)
504 struct elv_data dta;
506 memset (&dta.info, '\0', sizeof (dta.info));
507 dta.entry = entry;
508 dta.decl_address = decl_address;
509 dta.changed = false;
511 walk_gimple_op (stmt, eliminate_local_variables_1, &dta.info);
513 if (dta.changed)
514 update_stmt (stmt);
517 /* Eliminates the references to local variables from the single entry
518 single exit region between the ENTRY and EXIT edges.
520 This includes:
521 1) Taking address of a local variable -- these are moved out of the
522 region (and temporary variable is created to hold the address if
523 necessary).
525 2) Dereferencing a local variable -- these are replaced with indirect
526 references. */
528 static void
529 eliminate_local_variables (edge entry, edge exit)
531 basic_block bb;
532 VEC (basic_block, heap) *body = VEC_alloc (basic_block, heap, 3);
533 unsigned i;
534 gimple_stmt_iterator gsi;
535 htab_t decl_address = htab_create (10, int_tree_map_hash, int_tree_map_eq,
536 free);
537 basic_block entry_bb = entry->src;
538 basic_block exit_bb = exit->dest;
540 gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
542 for (i = 0; VEC_iterate (basic_block, body, i, bb); i++)
543 if (bb != entry_bb && bb != exit_bb)
544 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
545 eliminate_local_variables_stmt (entry, gsi_stmt (gsi),
546 decl_address);
548 htab_delete (decl_address);
549 VEC_free (basic_block, heap, body);
552 /* Returns true if expression EXPR is not defined between ENTRY and
553 EXIT, i.e. if all its operands are defined outside of the region. */
555 static bool
556 expr_invariant_in_region_p (edge entry, edge exit, tree expr)
558 basic_block entry_bb = entry->src;
559 basic_block exit_bb = exit->dest;
560 basic_block def_bb;
562 if (is_gimple_min_invariant (expr))
563 return true;
565 if (TREE_CODE (expr) == SSA_NAME)
567 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
568 if (def_bb
569 && dominated_by_p (CDI_DOMINATORS, def_bb, entry_bb)
570 && !dominated_by_p (CDI_DOMINATORS, def_bb, exit_bb))
571 return false;
573 return true;
576 return false;
579 /* If COPY_NAME_P is true, creates and returns a duplicate of NAME.
580 The copies are stored to NAME_COPIES, if NAME was already duplicated,
581 its duplicate stored in NAME_COPIES is returned.
583 Regardless of COPY_NAME_P, the decl used as a base of the ssa name is also
584 duplicated, storing the copies in DECL_COPIES. */
586 static tree
587 separate_decls_in_region_name (tree name,
588 htab_t name_copies, htab_t decl_copies,
589 bool copy_name_p)
591 tree copy, var, var_copy;
592 unsigned idx, uid, nuid;
593 struct int_tree_map ielt, *nielt;
594 struct name_to_copy_elt elt, *nelt;
595 void **slot, **dslot;
597 if (TREE_CODE (name) != SSA_NAME)
598 return name;
600 idx = SSA_NAME_VERSION (name);
601 elt.version = idx;
602 slot = htab_find_slot_with_hash (name_copies, &elt, idx,
603 copy_name_p ? INSERT : NO_INSERT);
604 if (slot && *slot)
605 return ((struct name_to_copy_elt *) *slot)->new_name;
607 var = SSA_NAME_VAR (name);
608 uid = DECL_UID (var);
609 ielt.uid = uid;
610 dslot = htab_find_slot_with_hash (decl_copies, &ielt, uid, INSERT);
611 if (!*dslot)
613 var_copy = create_tmp_var (TREE_TYPE (var), get_name (var));
614 DECL_GIMPLE_REG_P (var_copy) = DECL_GIMPLE_REG_P (var);
615 add_referenced_var (var_copy);
616 nielt = XNEW (struct int_tree_map);
617 nielt->uid = uid;
618 nielt->to = var_copy;
619 *dslot = nielt;
621 /* Ensure that when we meet this decl next time, we won't duplicate
622 it again. */
623 nuid = DECL_UID (var_copy);
624 ielt.uid = nuid;
625 dslot = htab_find_slot_with_hash (decl_copies, &ielt, nuid, INSERT);
626 gcc_assert (!*dslot);
627 nielt = XNEW (struct int_tree_map);
628 nielt->uid = nuid;
629 nielt->to = var_copy;
630 *dslot = nielt;
632 else
633 var_copy = ((struct int_tree_map *) *dslot)->to;
635 if (copy_name_p)
637 copy = duplicate_ssa_name (name, NULL);
638 nelt = XNEW (struct name_to_copy_elt);
639 nelt->version = idx;
640 nelt->new_name = copy;
641 nelt->field = NULL_TREE;
642 *slot = nelt;
644 else
646 gcc_assert (!slot);
647 copy = name;
650 SSA_NAME_VAR (copy) = var_copy;
651 return copy;
654 /* Finds the ssa names used in STMT that are defined outside the
655 region between ENTRY and EXIT and replaces such ssa names with
656 their duplicates. The duplicates are stored to NAME_COPIES. Base
657 decls of all ssa names used in STMT (including those defined in
658 LOOP) are replaced with the new temporary variables; the
659 replacement decls are stored in DECL_COPIES. */
661 static void
662 separate_decls_in_region_stmt (edge entry, edge exit, gimple stmt,
663 htab_t name_copies, htab_t decl_copies)
665 use_operand_p use;
666 def_operand_p def;
667 ssa_op_iter oi;
668 tree name, copy;
669 bool copy_name_p;
671 mark_virtual_ops_for_renaming (stmt);
673 FOR_EACH_PHI_OR_STMT_DEF (def, stmt, oi, SSA_OP_DEF)
675 name = DEF_FROM_PTR (def);
676 gcc_assert (TREE_CODE (name) == SSA_NAME);
677 copy = separate_decls_in_region_name (name, name_copies, decl_copies,
678 false);
679 gcc_assert (copy == name);
682 FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
684 name = USE_FROM_PTR (use);
685 if (TREE_CODE (name) != SSA_NAME)
686 continue;
688 copy_name_p = expr_invariant_in_region_p (entry, exit, name);
689 copy = separate_decls_in_region_name (name, name_copies, decl_copies,
690 copy_name_p);
691 SET_USE (use, copy);
695 /* Callback for htab_traverse. Adds a field corresponding to the reduction
696 specified in SLOT. The type is passed in DATA. */
698 static int
699 add_field_for_reduction (void **slot, void *data)
702 struct reduction_info *const red = (struct reduction_info *) *slot;
703 tree const type = (tree) data;
704 tree var = SSA_NAME_VAR (gimple_assign_lhs (red->reduc_stmt));
705 tree field = build_decl (gimple_location (red->reduc_stmt),
706 FIELD_DECL, DECL_NAME (var), TREE_TYPE (var));
708 insert_field_into_struct (type, field);
710 red->field = field;
712 return 1;
715 /* Callback for htab_traverse. Adds a field corresponding to a ssa name
716 described in SLOT. The type is passed in DATA. */
718 static int
719 add_field_for_name (void **slot, void *data)
721 struct name_to_copy_elt *const elt = (struct name_to_copy_elt *) *slot;
722 tree type = (tree) data;
723 tree name = ssa_name (elt->version);
724 tree var = SSA_NAME_VAR (name);
725 tree field = build_decl (DECL_SOURCE_LOCATION (var),
726 FIELD_DECL, DECL_NAME (var), TREE_TYPE (var));
728 insert_field_into_struct (type, field);
729 elt->field = field;
731 return 1;
734 /* Callback for htab_traverse. A local result is the intermediate result
735 computed by a single
736 thread, or the initial value in case no iteration was executed.
737 This function creates a phi node reflecting these values.
738 The phi's result will be stored in NEW_PHI field of the
739 reduction's data structure. */
741 static int
742 create_phi_for_local_result (void **slot, void *data)
744 struct reduction_info *const reduc = (struct reduction_info *) *slot;
745 const struct loop *const loop = (const struct loop *) data;
746 edge e;
747 gimple new_phi;
748 basic_block store_bb;
749 tree local_res;
750 source_location locus;
752 /* STORE_BB is the block where the phi
753 should be stored. It is the destination of the loop exit.
754 (Find the fallthru edge from GIMPLE_OMP_CONTINUE). */
755 store_bb = FALLTHRU_EDGE (loop->latch)->dest;
757 /* STORE_BB has two predecessors. One coming from the loop
758 (the reduction's result is computed at the loop),
759 and another coming from a block preceding the loop,
760 when no iterations
761 are executed (the initial value should be taken). */
762 if (EDGE_PRED (store_bb, 0) == FALLTHRU_EDGE (loop->latch))
763 e = EDGE_PRED (store_bb, 1);
764 else
765 e = EDGE_PRED (store_bb, 0);
766 local_res
767 = make_ssa_name (SSA_NAME_VAR (gimple_assign_lhs (reduc->reduc_stmt)),
768 NULL);
769 locus = gimple_location (reduc->reduc_stmt);
770 new_phi = create_phi_node (local_res, store_bb);
771 SSA_NAME_DEF_STMT (local_res) = new_phi;
772 add_phi_arg (new_phi, reduc->init, e, locus);
773 add_phi_arg (new_phi, gimple_assign_lhs (reduc->reduc_stmt),
774 FALLTHRU_EDGE (loop->latch), locus);
775 reduc->new_phi = new_phi;
777 return 1;
780 struct clsn_data
782 tree store;
783 tree load;
785 basic_block store_bb;
786 basic_block load_bb;
789 /* Callback for htab_traverse. Create an atomic instruction for the
790 reduction described in SLOT.
791 DATA annotates the place in memory the atomic operation relates to,
792 and the basic block it needs to be generated in. */
794 static int
795 create_call_for_reduction_1 (void **slot, void *data)
797 struct reduction_info *const reduc = (struct reduction_info *) *slot;
798 struct clsn_data *const clsn_data = (struct clsn_data *) data;
799 gimple_stmt_iterator gsi;
800 tree type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
801 tree struct_type = TREE_TYPE (TREE_TYPE (clsn_data->load));
802 tree load_struct;
803 basic_block bb;
804 basic_block new_bb;
805 edge e;
806 tree t, addr, addr_type, ref, x;
807 tree tmp_load, name;
808 gimple load;
810 load_struct = fold_build1 (INDIRECT_REF, struct_type, clsn_data->load);
811 t = build3 (COMPONENT_REF, type, load_struct, reduc->field, NULL_TREE);
812 addr_type = build_pointer_type (type);
814 addr = build_addr (t, current_function_decl);
816 /* Create phi node. */
817 bb = clsn_data->load_bb;
819 e = split_block (bb, t);
820 new_bb = e->dest;
822 tmp_load = create_tmp_var (TREE_TYPE (TREE_TYPE (addr)), NULL);
823 add_referenced_var (tmp_load);
824 tmp_load = make_ssa_name (tmp_load, NULL);
825 load = gimple_build_omp_atomic_load (tmp_load, addr);
826 SSA_NAME_DEF_STMT (tmp_load) = load;
827 gsi = gsi_start_bb (new_bb);
828 gsi_insert_after (&gsi, load, GSI_NEW_STMT);
830 e = split_block (new_bb, load);
831 new_bb = e->dest;
832 gsi = gsi_start_bb (new_bb);
833 ref = tmp_load;
834 x = fold_build2 (reduc->reduction_code,
835 TREE_TYPE (PHI_RESULT (reduc->new_phi)), ref,
836 PHI_RESULT (reduc->new_phi));
838 name = force_gimple_operand_gsi (&gsi, x, true, NULL_TREE, true,
839 GSI_CONTINUE_LINKING);
841 gsi_insert_after (&gsi, gimple_build_omp_atomic_store (name), GSI_NEW_STMT);
842 return 1;
845 /* Create the atomic operation at the join point of the threads.
846 REDUCTION_LIST describes the reductions in the LOOP.
847 LD_ST_DATA describes the shared data structure where
848 shared data is stored in and loaded from. */
849 static void
850 create_call_for_reduction (struct loop *loop, htab_t reduction_list,
851 struct clsn_data *ld_st_data)
853 htab_traverse (reduction_list, create_phi_for_local_result, loop);
854 /* Find the fallthru edge from GIMPLE_OMP_CONTINUE. */
855 ld_st_data->load_bb = FALLTHRU_EDGE (loop->latch)->dest;
856 htab_traverse (reduction_list, create_call_for_reduction_1, ld_st_data);
859 /* Callback for htab_traverse. Loads the final reduction value at the
860 join point of all threads, and inserts it in the right place. */
862 static int
863 create_loads_for_reductions (void **slot, void *data)
865 struct reduction_info *const red = (struct reduction_info *) *slot;
866 struct clsn_data *const clsn_data = (struct clsn_data *) data;
867 gimple stmt;
868 gimple_stmt_iterator gsi;
869 tree type = TREE_TYPE (gimple_assign_lhs (red->reduc_stmt));
870 tree struct_type = TREE_TYPE (TREE_TYPE (clsn_data->load));
871 tree load_struct;
872 tree name;
873 tree x;
875 gsi = gsi_after_labels (clsn_data->load_bb);
876 load_struct = fold_build1 (INDIRECT_REF, struct_type, clsn_data->load);
877 load_struct = build3 (COMPONENT_REF, type, load_struct, red->field,
878 NULL_TREE);
880 x = load_struct;
881 name = PHI_RESULT (red->keep_res);
882 stmt = gimple_build_assign (name, x);
883 SSA_NAME_DEF_STMT (name) = stmt;
885 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
887 for (gsi = gsi_start_phis (gimple_bb (red->keep_res));
888 !gsi_end_p (gsi); gsi_next (&gsi))
889 if (gsi_stmt (gsi) == red->keep_res)
891 remove_phi_node (&gsi, false);
892 return 1;
894 gcc_unreachable ();
897 /* Load the reduction result that was stored in LD_ST_DATA.
898 REDUCTION_LIST describes the list of reductions that the
899 loads should be generated for. */
900 static void
901 create_final_loads_for_reduction (htab_t reduction_list,
902 struct clsn_data *ld_st_data)
904 gimple_stmt_iterator gsi;
905 tree t;
906 gimple stmt;
908 gsi = gsi_after_labels (ld_st_data->load_bb);
909 t = build_fold_addr_expr (ld_st_data->store);
910 stmt = gimple_build_assign (ld_st_data->load, t);
912 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
913 SSA_NAME_DEF_STMT (ld_st_data->load) = stmt;
915 htab_traverse (reduction_list, create_loads_for_reductions, ld_st_data);
919 /* Callback for htab_traverse. Store the neutral value for the
920 particular reduction's operation, e.g. 0 for PLUS_EXPR,
921 1 for MULT_EXPR, etc. into the reduction field.
922 The reduction is specified in SLOT. The store information is
923 passed in DATA. */
925 static int
926 create_stores_for_reduction (void **slot, void *data)
928 struct reduction_info *const red = (struct reduction_info *) *slot;
929 struct clsn_data *const clsn_data = (struct clsn_data *) data;
930 tree t;
931 gimple stmt;
932 gimple_stmt_iterator gsi;
933 tree type = TREE_TYPE (gimple_assign_lhs (red->reduc_stmt));
935 gsi = gsi_last_bb (clsn_data->store_bb);
936 t = build3 (COMPONENT_REF, type, clsn_data->store, red->field, NULL_TREE);
937 stmt = gimple_build_assign (t, red->initial_value);
938 mark_virtual_ops_for_renaming (stmt);
939 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
941 return 1;
944 /* Callback for htab_traverse. Creates loads to a field of LOAD in LOAD_BB and
945 store to a field of STORE in STORE_BB for the ssa name and its duplicate
946 specified in SLOT. */
948 static int
949 create_loads_and_stores_for_name (void **slot, void *data)
951 struct name_to_copy_elt *const elt = (struct name_to_copy_elt *) *slot;
952 struct clsn_data *const clsn_data = (struct clsn_data *) data;
953 tree t;
954 gimple stmt;
955 gimple_stmt_iterator gsi;
956 tree type = TREE_TYPE (elt->new_name);
957 tree struct_type = TREE_TYPE (TREE_TYPE (clsn_data->load));
958 tree load_struct;
960 gsi = gsi_last_bb (clsn_data->store_bb);
961 t = build3 (COMPONENT_REF, type, clsn_data->store, elt->field, NULL_TREE);
962 stmt = gimple_build_assign (t, ssa_name (elt->version));
963 mark_virtual_ops_for_renaming (stmt);
964 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
966 gsi = gsi_last_bb (clsn_data->load_bb);
967 load_struct = fold_build1 (INDIRECT_REF, struct_type, clsn_data->load);
968 t = build3 (COMPONENT_REF, type, load_struct, elt->field, NULL_TREE);
969 stmt = gimple_build_assign (elt->new_name, t);
970 SSA_NAME_DEF_STMT (elt->new_name) = stmt;
971 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
973 return 1;
976 /* Moves all the variables used in LOOP and defined outside of it (including
977 the initial values of loop phi nodes, and *PER_THREAD if it is a ssa
978 name) to a structure created for this purpose. The code
980 while (1)
982 use (a);
983 use (b);
986 is transformed this way:
988 bb0:
989 old.a = a;
990 old.b = b;
992 bb1:
993 a' = new->a;
994 b' = new->b;
995 while (1)
997 use (a');
998 use (b');
1001 `old' is stored to *ARG_STRUCT and `new' is stored to NEW_ARG_STRUCT. The
1002 pointer `new' is intentionally not initialized (the loop will be split to a
1003 separate function later, and `new' will be initialized from its arguments).
1004 LD_ST_DATA holds information about the shared data structure used to pass
1005 information among the threads. It is initialized here, and
1006 gen_parallel_loop will pass it to create_call_for_reduction that
1007 needs this information. REDUCTION_LIST describes the reductions
1008 in LOOP. */
1010 static void
1011 separate_decls_in_region (edge entry, edge exit, htab_t reduction_list,
1012 tree *arg_struct, tree *new_arg_struct,
1013 struct clsn_data *ld_st_data)
1016 basic_block bb1 = split_edge (entry);
1017 basic_block bb0 = single_pred (bb1);
1018 htab_t name_copies = htab_create (10, name_to_copy_elt_hash,
1019 name_to_copy_elt_eq, free);
1020 htab_t decl_copies = htab_create (10, int_tree_map_hash, int_tree_map_eq,
1021 free);
1022 unsigned i;
1023 tree type, type_name, nvar;
1024 gimple_stmt_iterator gsi;
1025 struct clsn_data clsn_data;
1026 VEC (basic_block, heap) *body = VEC_alloc (basic_block, heap, 3);
1027 basic_block bb;
1028 basic_block entry_bb = bb1;
1029 basic_block exit_bb = exit->dest;
1031 entry = single_succ_edge (entry_bb);
1032 gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
1034 for (i = 0; VEC_iterate (basic_block, body, i, bb); i++)
1036 if (bb != entry_bb && bb != exit_bb)
1038 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1039 separate_decls_in_region_stmt (entry, exit, gsi_stmt (gsi),
1040 name_copies, decl_copies);
1042 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1043 separate_decls_in_region_stmt (entry, exit, gsi_stmt (gsi),
1044 name_copies, decl_copies);
1048 VEC_free (basic_block, heap, body);
1050 if (htab_elements (name_copies) == 0 && reduction_list == 0)
1052 /* It may happen that there is nothing to copy (if there are only
1053 loop carried and external variables in the loop). */
1054 *arg_struct = NULL;
1055 *new_arg_struct = NULL;
1057 else
1059 /* Create the type for the structure to store the ssa names to. */
1060 type = lang_hooks.types.make_type (RECORD_TYPE);
1061 type_name = build_decl (BUILTINS_LOCATION,
1062 TYPE_DECL, create_tmp_var_name (".paral_data"),
1063 type);
1064 TYPE_NAME (type) = type_name;
1066 htab_traverse (name_copies, add_field_for_name, type);
1067 if (reduction_list && htab_elements (reduction_list) > 0)
1069 /* Create the fields for reductions. */
1070 htab_traverse (reduction_list, add_field_for_reduction,
1071 type);
1073 layout_type (type);
1075 /* Create the loads and stores. */
1076 *arg_struct = create_tmp_var (type, ".paral_data_store");
1077 add_referenced_var (*arg_struct);
1078 nvar = create_tmp_var (build_pointer_type (type), ".paral_data_load");
1079 add_referenced_var (nvar);
1080 *new_arg_struct = make_ssa_name (nvar, NULL);
1082 ld_st_data->store = *arg_struct;
1083 ld_st_data->load = *new_arg_struct;
1084 ld_st_data->store_bb = bb0;
1085 ld_st_data->load_bb = bb1;
1087 htab_traverse (name_copies, create_loads_and_stores_for_name,
1088 ld_st_data);
1090 /* Load the calculation from memory (after the join of the threads). */
1092 if (reduction_list && htab_elements (reduction_list) > 0)
1094 htab_traverse (reduction_list, create_stores_for_reduction,
1095 ld_st_data);
1096 clsn_data.load = make_ssa_name (nvar, NULL);
1097 clsn_data.load_bb = exit->dest;
1098 clsn_data.store = ld_st_data->store;
1099 create_final_loads_for_reduction (reduction_list, &clsn_data);
1103 htab_delete (decl_copies);
1104 htab_delete (name_copies);
1107 /* Bitmap containing uids of functions created by parallelization. We cannot
1108 allocate it from the default obstack, as it must live across compilation
1109 of several functions; we make it gc allocated instead. */
1111 static GTY(()) bitmap parallelized_functions;
1113 /* Returns true if FN was created by create_loop_fn. */
1115 static bool
1116 parallelized_function_p (tree fn)
1118 if (!parallelized_functions || !DECL_ARTIFICIAL (fn))
1119 return false;
1121 return bitmap_bit_p (parallelized_functions, DECL_UID (fn));
1124 /* Creates and returns an empty function that will receive the body of
1125 a parallelized loop. */
1127 static tree
1128 create_loop_fn (void)
1130 char buf[100];
1131 char *tname;
1132 tree decl, type, name, t;
1133 struct function *act_cfun = cfun;
1134 static unsigned loopfn_num;
1136 snprintf (buf, 100, "%s.$loopfn", current_function_name ());
1137 ASM_FORMAT_PRIVATE_NAME (tname, buf, loopfn_num++);
1138 clean_symbol_name (tname);
1139 name = get_identifier (tname);
1140 type = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
1142 decl = build_decl (BUILTINS_LOCATION,
1143 FUNCTION_DECL, name, type);
1144 if (!parallelized_functions)
1145 parallelized_functions = BITMAP_GGC_ALLOC ();
1146 bitmap_set_bit (parallelized_functions, DECL_UID (decl));
1148 TREE_STATIC (decl) = 1;
1149 TREE_USED (decl) = 1;
1150 DECL_ARTIFICIAL (decl) = 1;
1151 DECL_IGNORED_P (decl) = 0;
1152 TREE_PUBLIC (decl) = 0;
1153 DECL_UNINLINABLE (decl) = 1;
1154 DECL_EXTERNAL (decl) = 0;
1155 DECL_CONTEXT (decl) = NULL_TREE;
1156 DECL_INITIAL (decl) = make_node (BLOCK);
1158 t = build_decl (BUILTINS_LOCATION,
1159 RESULT_DECL, NULL_TREE, void_type_node);
1160 DECL_ARTIFICIAL (t) = 1;
1161 DECL_IGNORED_P (t) = 1;
1162 DECL_RESULT (decl) = t;
1164 t = build_decl (BUILTINS_LOCATION,
1165 PARM_DECL, get_identifier (".paral_data_param"),
1166 ptr_type_node);
1167 DECL_ARTIFICIAL (t) = 1;
1168 DECL_ARG_TYPE (t) = ptr_type_node;
1169 DECL_CONTEXT (t) = decl;
1170 TREE_USED (t) = 1;
1171 DECL_ARGUMENTS (decl) = t;
1173 allocate_struct_function (decl, false);
1175 /* The call to allocate_struct_function clobbers CFUN, so we need to restore
1176 it. */
1177 set_cfun (act_cfun);
1179 return decl;
1182 /* Moves the exit condition of LOOP to the beginning of its header, and
1183 duplicates the part of the last iteration that gets disabled to the
1184 exit of the loop. NIT is the number of iterations of the loop
1185 (used to initialize the variables in the duplicated part).
1187 TODO: the common case is that latch of the loop is empty and immediately
1188 follows the loop exit. In this case, it would be better not to copy the
1189 body of the loop, but only move the entry of the loop directly before the
1190 exit check and increase the number of iterations of the loop by one.
1191 This may need some additional preconditioning in case NIT = ~0.
1192 REDUCTION_LIST describes the reductions in LOOP. */
1194 static void
1195 transform_to_exit_first_loop (struct loop *loop, htab_t reduction_list, tree nit)
1197 basic_block *bbs, *nbbs, ex_bb, orig_header;
1198 unsigned n;
1199 bool ok;
1200 edge exit = single_dom_exit (loop), hpred;
1201 tree control, control_name, res, t;
1202 gimple phi, nphi, cond_stmt, stmt;
1203 gimple_stmt_iterator gsi;
1205 split_block_after_labels (loop->header);
1206 orig_header = single_succ (loop->header);
1207 hpred = single_succ_edge (loop->header);
1209 cond_stmt = last_stmt (exit->src);
1210 control = gimple_cond_lhs (cond_stmt);
1211 gcc_assert (gimple_cond_rhs (cond_stmt) == nit);
1213 /* Make sure that we have phi nodes on exit for all loop header phis
1214 (create_parallel_loop requires that). */
1215 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
1217 phi = gsi_stmt (gsi);
1218 res = PHI_RESULT (phi);
1219 t = make_ssa_name (SSA_NAME_VAR (res), phi);
1220 SET_PHI_RESULT (phi, t);
1222 nphi = create_phi_node (res, orig_header);
1223 SSA_NAME_DEF_STMT (res) = nphi;
1224 add_phi_arg (nphi, t, hpred, UNKNOWN_LOCATION);
1226 if (res == control)
1228 gimple_cond_set_lhs (cond_stmt, t);
1229 update_stmt (cond_stmt);
1230 control = t;
1234 bbs = get_loop_body_in_dom_order (loop);
1235 for (n = 0; bbs[n] != exit->src; n++)
1236 continue;
1237 nbbs = XNEWVEC (basic_block, n);
1238 ok = gimple_duplicate_sese_tail (single_succ_edge (loop->header), exit,
1239 bbs + 1, n, nbbs);
1240 gcc_assert (ok);
1241 free (bbs);
1242 ex_bb = nbbs[0];
1243 free (nbbs);
1245 /* Other than reductions, the only gimple reg that should be copied
1246 out of the loop is the control variable. */
1248 control_name = NULL_TREE;
1249 for (gsi = gsi_start_phis (ex_bb); !gsi_end_p (gsi); )
1251 phi = gsi_stmt (gsi);
1252 res = PHI_RESULT (phi);
1253 if (!is_gimple_reg (res))
1255 gsi_next (&gsi);
1256 continue;
1259 /* Check if it is a part of reduction. If it is,
1260 keep the phi at the reduction's keep_res field. The
1261 PHI_RESULT of this phi is the resulting value of the reduction
1262 variable when exiting the loop. */
1264 exit = single_dom_exit (loop);
1266 if (htab_elements (reduction_list) > 0)
1268 struct reduction_info *red;
1270 tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1272 red = reduction_phi (reduction_list, SSA_NAME_DEF_STMT (val));
1273 if (red)
1275 red->keep_res = phi;
1276 gsi_next (&gsi);
1277 continue;
1280 gcc_assert (control_name == NULL_TREE
1281 && SSA_NAME_VAR (res) == SSA_NAME_VAR (control));
1282 control_name = res;
1283 remove_phi_node (&gsi, false);
1285 gcc_assert (control_name != NULL_TREE);
1287 /* Initialize the control variable to NIT. */
1288 gsi = gsi_after_labels (ex_bb);
1289 nit = force_gimple_operand_gsi (&gsi,
1290 fold_convert (TREE_TYPE (control_name), nit),
1291 false, NULL_TREE, false, GSI_SAME_STMT);
1292 stmt = gimple_build_assign (control_name, nit);
1293 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
1294 SSA_NAME_DEF_STMT (control_name) = stmt;
1297 /* Create the parallel constructs for LOOP as described in gen_parallel_loop.
1298 LOOP_FN and DATA are the arguments of GIMPLE_OMP_PARALLEL.
1299 NEW_DATA is the variable that should be initialized from the argument
1300 of LOOP_FN. N_THREADS is the requested number of threads. Returns the
1301 basic block containing GIMPLE_OMP_PARALLEL tree. */
1303 static basic_block
1304 create_parallel_loop (struct loop *loop, tree loop_fn, tree data,
1305 tree new_data, unsigned n_threads)
1307 gimple_stmt_iterator gsi;
1308 basic_block bb, paral_bb, for_bb, ex_bb;
1309 tree t, param, res;
1310 gimple stmt, for_stmt, phi, cond_stmt;
1311 tree cvar, cvar_init, initvar, cvar_next, cvar_base, type;
1312 edge exit, nexit, guard, end, e;
1314 /* Prepare the GIMPLE_OMP_PARALLEL statement. */
1315 bb = loop_preheader_edge (loop)->src;
1316 paral_bb = single_pred (bb);
1317 gsi = gsi_last_bb (paral_bb);
1319 t = build_omp_clause (BUILTINS_LOCATION, OMP_CLAUSE_NUM_THREADS);
1320 OMP_CLAUSE_NUM_THREADS_EXPR (t)
1321 = build_int_cst (integer_type_node, n_threads);
1322 stmt = gimple_build_omp_parallel (NULL, t, loop_fn, data);
1324 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1326 /* Initialize NEW_DATA. */
1327 if (data)
1329 gsi = gsi_after_labels (bb);
1331 param = make_ssa_name (DECL_ARGUMENTS (loop_fn), NULL);
1332 stmt = gimple_build_assign (param, build_fold_addr_expr (data));
1333 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1334 SSA_NAME_DEF_STMT (param) = stmt;
1336 stmt = gimple_build_assign (new_data,
1337 fold_convert (TREE_TYPE (new_data), param));
1338 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1339 SSA_NAME_DEF_STMT (new_data) = stmt;
1342 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_PARALLEL. */
1343 bb = split_loop_exit_edge (single_dom_exit (loop));
1344 gsi = gsi_last_bb (bb);
1345 gsi_insert_after (&gsi, gimple_build_omp_return (false), GSI_NEW_STMT);
1347 /* Extract data for GIMPLE_OMP_FOR. */
1348 gcc_assert (loop->header == single_dom_exit (loop)->src);
1349 cond_stmt = last_stmt (loop->header);
1351 cvar = gimple_cond_lhs (cond_stmt);
1352 cvar_base = SSA_NAME_VAR (cvar);
1353 phi = SSA_NAME_DEF_STMT (cvar);
1354 cvar_init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1355 initvar = make_ssa_name (cvar_base, NULL);
1356 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, loop_preheader_edge (loop)),
1357 initvar);
1358 cvar_next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1360 gsi = gsi_last_bb (loop->latch);
1361 gcc_assert (gsi_stmt (gsi) == SSA_NAME_DEF_STMT (cvar_next));
1362 gsi_remove (&gsi, true);
1364 /* Prepare cfg. */
1365 for_bb = split_edge (loop_preheader_edge (loop));
1366 ex_bb = split_loop_exit_edge (single_dom_exit (loop));
1367 extract_true_false_edges_from_block (loop->header, &nexit, &exit);
1368 gcc_assert (exit == single_dom_exit (loop));
1370 guard = make_edge (for_bb, ex_bb, 0);
1371 single_succ_edge (loop->latch)->flags = 0;
1372 end = make_edge (loop->latch, ex_bb, EDGE_FALLTHRU);
1373 for (gsi = gsi_start_phis (ex_bb); !gsi_end_p (gsi); gsi_next (&gsi))
1375 source_location locus;
1376 tree def;
1377 phi = gsi_stmt (gsi);
1378 res = PHI_RESULT (phi);
1379 stmt = SSA_NAME_DEF_STMT (PHI_ARG_DEF_FROM_EDGE (phi, exit));
1381 def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_preheader_edge (loop));
1382 locus = gimple_phi_arg_location_from_edge (stmt,
1383 loop_preheader_edge (loop));
1384 add_phi_arg (phi, def, guard, locus);
1386 def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (loop));
1387 locus = gimple_phi_arg_location_from_edge (stmt, loop_latch_edge (loop));
1388 add_phi_arg (phi, def, end, locus);
1390 e = redirect_edge_and_branch (exit, nexit->dest);
1391 PENDING_STMT (e) = NULL;
1393 /* Emit GIMPLE_OMP_FOR. */
1394 gimple_cond_set_lhs (cond_stmt, cvar_base);
1395 type = TREE_TYPE (cvar);
1396 t = build_omp_clause (BUILTINS_LOCATION, OMP_CLAUSE_SCHEDULE);
1397 OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_STATIC;
1399 for_stmt = gimple_build_omp_for (NULL, t, 1, NULL);
1400 gimple_omp_for_set_index (for_stmt, 0, initvar);
1401 gimple_omp_for_set_initial (for_stmt, 0, cvar_init);
1402 gimple_omp_for_set_final (for_stmt, 0, gimple_cond_rhs (cond_stmt));
1403 gimple_omp_for_set_cond (for_stmt, 0, gimple_cond_code (cond_stmt));
1404 gimple_omp_for_set_incr (for_stmt, 0, build2 (PLUS_EXPR, type,
1405 cvar_base,
1406 build_int_cst (type, 1)));
1408 gsi = gsi_last_bb (for_bb);
1409 gsi_insert_after (&gsi, for_stmt, GSI_NEW_STMT);
1410 SSA_NAME_DEF_STMT (initvar) = for_stmt;
1412 /* Emit GIMPLE_OMP_CONTINUE. */
1413 gsi = gsi_last_bb (loop->latch);
1414 stmt = gimple_build_omp_continue (cvar_next, cvar);
1415 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1416 SSA_NAME_DEF_STMT (cvar_next) = stmt;
1418 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_FOR. */
1419 gsi = gsi_last_bb (ex_bb);
1420 gsi_insert_after (&gsi, gimple_build_omp_return (true), GSI_NEW_STMT);
1422 return paral_bb;
1425 /* Generates code to execute the iterations of LOOP in N_THREADS
1426 threads in parallel.
1428 NITER describes number of iterations of LOOP.
1429 REDUCTION_LIST describes the reductions existent in the LOOP. */
1431 static void
1432 gen_parallel_loop (struct loop *loop, htab_t reduction_list,
1433 unsigned n_threads, struct tree_niter_desc *niter)
1435 struct loop *nloop;
1436 loop_iterator li;
1437 tree many_iterations_cond, type, nit;
1438 tree arg_struct, new_arg_struct;
1439 gimple_seq stmts;
1440 basic_block parallel_head;
1441 edge entry, exit;
1442 struct clsn_data clsn_data;
1443 unsigned prob;
1445 /* From
1447 ---------------------------------------------------------------------
1448 loop
1450 IV = phi (INIT, IV + STEP)
1451 BODY1;
1452 if (COND)
1453 break;
1454 BODY2;
1456 ---------------------------------------------------------------------
1458 with # of iterations NITER (possibly with MAY_BE_ZERO assumption),
1459 we generate the following code:
1461 ---------------------------------------------------------------------
1463 if (MAY_BE_ZERO
1464 || NITER < MIN_PER_THREAD * N_THREADS)
1465 goto original;
1467 BODY1;
1468 store all local loop-invariant variables used in body of the loop to DATA.
1469 GIMPLE_OMP_PARALLEL (OMP_CLAUSE_NUM_THREADS (N_THREADS), LOOPFN, DATA);
1470 load the variables from DATA.
1471 GIMPLE_OMP_FOR (IV = INIT; COND; IV += STEP) (OMP_CLAUSE_SCHEDULE (static))
1472 BODY2;
1473 BODY1;
1474 GIMPLE_OMP_CONTINUE;
1475 GIMPLE_OMP_RETURN -- GIMPLE_OMP_FOR
1476 GIMPLE_OMP_RETURN -- GIMPLE_OMP_PARALLEL
1477 goto end;
1479 original:
1480 loop
1482 IV = phi (INIT, IV + STEP)
1483 BODY1;
1484 if (COND)
1485 break;
1486 BODY2;
1489 end:
1493 /* Create two versions of the loop -- in the old one, we know that the
1494 number of iterations is large enough, and we will transform it into the
1495 loop that will be split to loop_fn, the new one will be used for the
1496 remaining iterations. */
1498 type = TREE_TYPE (niter->niter);
1499 nit = force_gimple_operand (unshare_expr (niter->niter), &stmts, true,
1500 NULL_TREE);
1501 if (stmts)
1502 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1504 many_iterations_cond =
1505 fold_build2 (GE_EXPR, boolean_type_node,
1506 nit, build_int_cst (type, MIN_PER_THREAD * n_threads));
1507 many_iterations_cond
1508 = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1509 invert_truthvalue (unshare_expr (niter->may_be_zero)),
1510 many_iterations_cond);
1511 many_iterations_cond
1512 = force_gimple_operand (many_iterations_cond, &stmts, false, NULL_TREE);
1513 if (stmts)
1514 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1515 if (!is_gimple_condexpr (many_iterations_cond))
1517 many_iterations_cond
1518 = force_gimple_operand (many_iterations_cond, &stmts,
1519 true, NULL_TREE);
1520 if (stmts)
1521 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1524 initialize_original_copy_tables ();
1526 /* We assume that the loop usually iterates a lot. */
1527 prob = 4 * REG_BR_PROB_BASE / 5;
1528 nloop = loop_version (loop, many_iterations_cond, NULL,
1529 prob, prob, REG_BR_PROB_BASE - prob, true);
1530 update_ssa (TODO_update_ssa);
1531 free_original_copy_tables ();
1533 /* Base all the induction variables in LOOP on a single control one. */
1534 canonicalize_loop_ivs (loop, &nit);
1536 /* Ensure that the exit condition is the first statement in the loop. */
1537 transform_to_exit_first_loop (loop, reduction_list, nit);
1539 /* Generate initializations for reductions. */
1540 if (htab_elements (reduction_list) > 0)
1541 htab_traverse (reduction_list, initialize_reductions, loop);
1543 /* Eliminate the references to local variables from the loop. */
1544 gcc_assert (single_exit (loop));
1545 entry = loop_preheader_edge (loop);
1546 exit = single_dom_exit (loop);
1548 eliminate_local_variables (entry, exit);
1549 /* In the old loop, move all variables non-local to the loop to a structure
1550 and back, and create separate decls for the variables used in loop. */
1551 separate_decls_in_region (entry, exit, reduction_list, &arg_struct,
1552 &new_arg_struct, &clsn_data);
1554 /* Create the parallel constructs. */
1555 parallel_head = create_parallel_loop (loop, create_loop_fn (), arg_struct,
1556 new_arg_struct, n_threads);
1557 if (htab_elements (reduction_list) > 0)
1558 create_call_for_reduction (loop, reduction_list, &clsn_data);
1560 scev_reset ();
1562 /* Cancel the loop (it is simpler to do it here rather than to teach the
1563 expander to do it). */
1564 cancel_loop_tree (loop);
1566 /* Free loop bound estimations that could contain references to
1567 removed statements. */
1568 FOR_EACH_LOOP (li, loop, 0)
1569 free_numbers_of_iterations_estimates_loop (loop);
1571 /* Expand the parallel constructs. We do it directly here instead of running
1572 a separate expand_omp pass, since it is more efficient, and less likely to
1573 cause troubles with further analyses not being able to deal with the
1574 OMP trees. */
1576 omp_expand_local (parallel_head);
1579 /* Returns true when LOOP contains vector phi nodes. */
1581 static bool
1582 loop_has_vector_phi_nodes (struct loop *loop ATTRIBUTE_UNUSED)
1584 unsigned i;
1585 basic_block *bbs = get_loop_body_in_dom_order (loop);
1586 gimple_stmt_iterator gsi;
1587 bool res = true;
1589 for (i = 0; i < loop->num_nodes; i++)
1590 for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
1591 if (TREE_CODE (TREE_TYPE (PHI_RESULT (gsi_stmt (gsi)))) == VECTOR_TYPE)
1592 goto end;
1594 res = false;
1595 end:
1596 free (bbs);
1597 return res;
1600 /* Create a reduction_info struct, initialize it with REDUC_STMT
1601 and PHI, insert it to the REDUCTION_LIST. */
1603 static void
1604 build_new_reduction (htab_t reduction_list, gimple reduc_stmt, gimple phi)
1606 PTR *slot;
1607 struct reduction_info *new_reduction;
1609 gcc_assert (reduc_stmt);
1611 if (dump_file && (dump_flags & TDF_DETAILS))
1613 fprintf (dump_file,
1614 "Detected reduction. reduction stmt is: \n");
1615 print_gimple_stmt (dump_file, reduc_stmt, 0, 0);
1616 fprintf (dump_file, "\n");
1619 new_reduction = XCNEW (struct reduction_info);
1621 new_reduction->reduc_stmt = reduc_stmt;
1622 new_reduction->reduc_phi = phi;
1623 new_reduction->reduction_code = gimple_assign_rhs_code (reduc_stmt);
1624 slot = htab_find_slot (reduction_list, new_reduction, INSERT);
1625 *slot = new_reduction;
1628 /* Detect all reductions in the LOOP, insert them into REDUCTION_LIST. */
1630 static void
1631 gather_scalar_reductions (loop_p loop, htab_t reduction_list)
1633 gimple_stmt_iterator gsi;
1634 loop_vec_info simple_loop_info;
1636 vect_dump = NULL;
1637 simple_loop_info = vect_analyze_loop_form (loop);
1639 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
1641 gimple phi = gsi_stmt (gsi);
1642 affine_iv iv;
1643 tree res = PHI_RESULT (phi);
1644 bool double_reduc;
1646 if (!is_gimple_reg (res))
1647 continue;
1649 if (!simple_iv (loop, loop, res, &iv, true)
1650 && simple_loop_info)
1652 gimple reduc_stmt = vect_is_simple_reduction (simple_loop_info, phi, true, &double_reduc);
1653 if (reduc_stmt)
1654 build_new_reduction (reduction_list, reduc_stmt, phi);
1657 destroy_loop_vec_info (simple_loop_info, true);
1660 /* Try to initialize NITER for code generation part. */
1662 static bool
1663 try_get_loop_niter (loop_p loop, struct tree_niter_desc *niter)
1665 edge exit = single_dom_exit (loop);
1667 gcc_assert (exit);
1669 /* We need to know # of iterations, and there should be no uses of values
1670 defined inside loop outside of it, unless the values are invariants of
1671 the loop. */
1672 if (!number_of_iterations_exit (loop, exit, niter, false))
1674 if (dump_file && (dump_flags & TDF_DETAILS))
1675 fprintf (dump_file, " FAILED: number of iterations not known\n");
1676 return false;
1679 return true;
1682 /* Try to initialize REDUCTION_LIST for code generation part.
1683 REDUCTION_LIST describes the reductions. */
1685 static bool
1686 try_create_reduction_list (loop_p loop, htab_t reduction_list)
1688 edge exit = single_dom_exit (loop);
1689 gimple_stmt_iterator gsi;
1691 gcc_assert (exit);
1693 gather_scalar_reductions (loop, reduction_list);
1696 for (gsi = gsi_start_phis (exit->dest); !gsi_end_p (gsi); gsi_next (&gsi))
1698 gimple phi = gsi_stmt (gsi);
1699 struct reduction_info *red;
1700 imm_use_iterator imm_iter;
1701 use_operand_p use_p;
1702 gimple reduc_phi;
1703 tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1705 if (is_gimple_reg (val))
1707 if (dump_file && (dump_flags & TDF_DETAILS))
1709 fprintf (dump_file, "phi is ");
1710 print_gimple_stmt (dump_file, phi, 0, 0);
1711 fprintf (dump_file, "arg of phi to exit: value ");
1712 print_generic_expr (dump_file, val, 0);
1713 fprintf (dump_file, " used outside loop\n");
1714 fprintf (dump_file,
1715 " checking if it a part of reduction pattern: \n");
1717 if (htab_elements (reduction_list) == 0)
1719 if (dump_file && (dump_flags & TDF_DETAILS))
1720 fprintf (dump_file,
1721 " FAILED: it is not a part of reduction.\n");
1722 return false;
1724 reduc_phi = NULL;
1725 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, val)
1727 if (flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
1729 reduc_phi = USE_STMT (use_p);
1730 break;
1733 red = reduction_phi (reduction_list, reduc_phi);
1734 if (red == NULL)
1736 if (dump_file && (dump_flags & TDF_DETAILS))
1737 fprintf (dump_file,
1738 " FAILED: it is not a part of reduction.\n");
1739 return false;
1741 if (dump_file && (dump_flags & TDF_DETAILS))
1743 fprintf (dump_file, "reduction phi is ");
1744 print_gimple_stmt (dump_file, red->reduc_phi, 0, 0);
1745 fprintf (dump_file, "reduction stmt is ");
1746 print_gimple_stmt (dump_file, red->reduc_stmt, 0, 0);
1751 /* The iterations of the loop may communicate only through bivs whose
1752 iteration space can be distributed efficiently. */
1753 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
1755 gimple phi = gsi_stmt (gsi);
1756 tree def = PHI_RESULT (phi);
1757 affine_iv iv;
1759 if (is_gimple_reg (def) && !simple_iv (loop, loop, def, &iv, true))
1761 struct reduction_info *red;
1763 red = reduction_phi (reduction_list, phi);
1764 if (red == NULL)
1766 if (dump_file && (dump_flags & TDF_DETAILS))
1767 fprintf (dump_file,
1768 " FAILED: scalar dependency between iterations\n");
1769 return false;
1775 return true;
1778 /* Detect parallel loops and generate parallel code using libgomp
1779 primitives. Returns true if some loop was parallelized, false
1780 otherwise. */
1782 bool
1783 parallelize_loops (void)
1785 unsigned n_threads = flag_tree_parallelize_loops;
1786 bool changed = false;
1787 struct loop *loop;
1788 struct tree_niter_desc niter_desc;
1789 loop_iterator li;
1790 htab_t reduction_list;
1792 /* Do not parallelize loops in the functions created by parallelization. */
1793 if (parallelized_function_p (cfun->decl))
1794 return false;
1796 reduction_list = htab_create (10, reduction_info_hash,
1797 reduction_info_eq, free);
1798 init_stmt_vec_info_vec ();
1800 FOR_EACH_LOOP (li, loop, 0)
1802 htab_empty (reduction_list);
1804 /* FIXME: Only consider innermost loops with just one exit. */
1805 if (loop->inner || !single_dom_exit (loop))
1806 continue;
1808 if (/* And of course, the loop must be parallelizable. */
1809 !can_duplicate_loop_p (loop)
1810 || loop_has_blocks_with_irreducible_flag (loop)
1811 /* FIXME: the check for vector phi nodes could be removed. */
1812 || loop_has_vector_phi_nodes (loop))
1813 continue;
1815 if (/* Do not bother with loops in cold areas. */
1816 optimize_loop_nest_for_size_p (loop)
1817 /* Or loops that roll too little. */
1818 || expected_loop_iterations (loop) <= n_threads)
1819 continue;
1820 if (!try_get_loop_niter (loop, &niter_desc))
1821 continue;
1823 if (!try_create_reduction_list (loop, reduction_list))
1824 continue;
1826 if (!loop_parallel_p (loop))
1827 continue;
1829 changed = true;
1830 gen_parallel_loop (loop, reduction_list,
1831 n_threads, &niter_desc);
1832 verify_flow_info ();
1833 verify_dominators (CDI_DOMINATORS);
1834 verify_loop_structure ();
1835 verify_loop_closed_ssa ();
1838 free_stmt_vec_info_vec ();
1839 htab_delete (reduction_list);
1841 /* Parallelization will cause new function calls to be inserted through
1842 which local variables will escape. Reset the points-to solutions
1843 for ESCAPED and CALLUSED. */
1844 if (changed)
1846 pt_solution_reset (&cfun->gimple_df->escaped);
1847 pt_solution_reset (&cfun->gimple_df->callused);
1850 return changed;
1853 #include "gt-tree-parloops.h"