2013-09-12 Paolo Carlini <paolo.carlini@oracle.com>
[official-gcc.git] / gcc / tree-parloops.c
blob94843cae52ba49c35a495cbcc977a0d4f610fbc7
1 /* Loop autoparallelization.
2 Copyright (C) 2006-2013 Free Software Foundation, Inc.
3 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
4 Zdenek Dvorak <dvorakz@suse.cz> and Razya Ladelsky <razya@il.ibm.com>.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tree-ssa.h"
26 #include "cfgloop.h"
27 #include "tree-data-ref.h"
28 #include "tree-scalar-evolution.h"
29 #include "gimple-pretty-print.h"
30 #include "tree-pass.h"
31 #include "langhooks.h"
32 #include "tree-vectorizer.h"
33 #include "tree-hasher.h"
35 /* This pass tries to distribute iterations of loops into several threads.
36 The implementation is straightforward -- for each loop we test whether its
37 iterations are independent, and if it is the case (and some additional
38 conditions regarding profitability and correctness are satisfied), we
39 add GIMPLE_OMP_PARALLEL and GIMPLE_OMP_FOR codes and let omp expansion
40 machinery do its job.
42 The most of the complexity is in bringing the code into shape expected
43 by the omp expanders:
44 -- for GIMPLE_OMP_FOR, ensuring that the loop has only one induction
45 variable and that the exit test is at the start of the loop body
46 -- for GIMPLE_OMP_PARALLEL, replacing the references to local addressable
47 variables by accesses through pointers, and breaking up ssa chains
48 by storing the values incoming to the parallelized loop to a structure
49 passed to the new function as an argument (something similar is done
50 in omp gimplification, unfortunately only a small part of the code
51 can be shared).
53 TODO:
54 -- if there are several parallelizable loops in a function, it may be
55 possible to generate the threads just once (using synchronization to
56 ensure that cross-loop dependences are obeyed).
57 -- handling of common reduction patterns for outer loops.
59 More info can also be found at http://gcc.gnu.org/wiki/AutoParInGCC */
61 Reduction handling:
62 currently we use vect_force_simple_reduction() to detect reduction patterns.
63 The code transformation will be introduced by an example.
66 parloop
68 int sum=1;
70 for (i = 0; i < N; i++)
72 x[i] = i + 3;
73 sum+=x[i];
77 gimple-like code:
78 header_bb:
80 # sum_29 = PHI <sum_11(5), 1(3)>
81 # i_28 = PHI <i_12(5), 0(3)>
82 D.1795_8 = i_28 + 3;
83 x[i_28] = D.1795_8;
84 sum_11 = D.1795_8 + sum_29;
85 i_12 = i_28 + 1;
86 if (N_6(D) > i_12)
87 goto header_bb;
90 exit_bb:
92 # sum_21 = PHI <sum_11(4)>
93 printf (&"%d"[0], sum_21);
96 after reduction transformation (only relevant parts):
98 parloop
101 ....
104 # Storing the initial value given by the user. #
106 .paral_data_store.32.sum.27 = 1;
108 #pragma omp parallel num_threads(4)
110 #pragma omp for schedule(static)
112 # The neutral element corresponding to the particular
113 reduction's operation, e.g. 0 for PLUS_EXPR,
114 1 for MULT_EXPR, etc. replaces the user's initial value. #
116 # sum.27_29 = PHI <sum.27_11, 0>
118 sum.27_11 = D.1827_8 + sum.27_29;
120 GIMPLE_OMP_CONTINUE
122 # Adding this reduction phi is done at create_phi_for_local_result() #
123 # sum.27_56 = PHI <sum.27_11, 0>
124 GIMPLE_OMP_RETURN
126 # Creating the atomic operation is done at
127 create_call_for_reduction_1() #
129 #pragma omp atomic_load
130 D.1839_59 = *&.paral_data_load.33_51->reduction.23;
131 D.1840_60 = sum.27_56 + D.1839_59;
132 #pragma omp atomic_store (D.1840_60);
134 GIMPLE_OMP_RETURN
136 # collecting the result after the join of the threads is done at
137 create_loads_for_reductions().
138 The value computed by the threads is loaded from the
139 shared struct. #
142 .paral_data_load.33_52 = &.paral_data_store.32;
143 sum_37 = .paral_data_load.33_52->sum.27;
144 sum_43 = D.1795_41 + sum_37;
146 exit bb:
147 # sum_21 = PHI <sum_43, sum_26>
148 printf (&"%d"[0], sum_21);
156 /* Minimal number of iterations of a loop that should be executed in each
157 thread. */
158 #define MIN_PER_THREAD 100
160 /* Element of the hashtable, representing a
161 reduction in the current loop. */
162 struct reduction_info
164 gimple reduc_stmt; /* reduction statement. */
165 gimple reduc_phi; /* The phi node defining the reduction. */
166 enum tree_code reduction_code;/* code for the reduction operation. */
167 unsigned reduc_version; /* SSA_NAME_VERSION of original reduc_phi
168 result. */
169 gimple keep_res; /* The PHI_RESULT of this phi is the resulting value
170 of the reduction variable when existing the loop. */
171 tree initial_value; /* The initial value of the reduction var before entering the loop. */
172 tree field; /* the name of the field in the parloop data structure intended for reduction. */
173 tree init; /* reduction initialization value. */
174 gimple new_phi; /* (helper field) Newly created phi node whose result
175 will be passed to the atomic operation. Represents
176 the local result each thread computed for the reduction
177 operation. */
180 /* Reduction info hashtable helpers. */
182 struct reduction_hasher : typed_free_remove <reduction_info>
184 typedef reduction_info value_type;
185 typedef reduction_info compare_type;
186 static inline hashval_t hash (const value_type *);
187 static inline bool equal (const value_type *, const compare_type *);
190 /* Equality and hash functions for hashtab code. */
192 inline bool
193 reduction_hasher::equal (const value_type *a, const compare_type *b)
195 return (a->reduc_phi == b->reduc_phi);
198 inline hashval_t
199 reduction_hasher::hash (const value_type *a)
201 return a->reduc_version;
204 typedef hash_table <reduction_hasher> reduction_info_table_type;
207 static struct reduction_info *
208 reduction_phi (reduction_info_table_type reduction_list, gimple phi)
210 struct reduction_info tmpred, *red;
212 if (reduction_list.elements () == 0 || phi == NULL)
213 return NULL;
215 tmpred.reduc_phi = phi;
216 tmpred.reduc_version = gimple_uid (phi);
217 red = reduction_list.find (&tmpred);
219 return red;
222 /* Element of hashtable of names to copy. */
224 struct name_to_copy_elt
226 unsigned version; /* The version of the name to copy. */
227 tree new_name; /* The new name used in the copy. */
228 tree field; /* The field of the structure used to pass the
229 value. */
232 /* Name copies hashtable helpers. */
234 struct name_to_copy_hasher : typed_free_remove <name_to_copy_elt>
236 typedef name_to_copy_elt value_type;
237 typedef name_to_copy_elt compare_type;
238 static inline hashval_t hash (const value_type *);
239 static inline bool equal (const value_type *, const compare_type *);
242 /* Equality and hash functions for hashtab code. */
244 inline bool
245 name_to_copy_hasher::equal (const value_type *a, const compare_type *b)
247 return a->version == b->version;
250 inline hashval_t
251 name_to_copy_hasher::hash (const value_type *a)
253 return (hashval_t) a->version;
256 typedef hash_table <name_to_copy_hasher> name_to_copy_table_type;
258 /* A transformation matrix, which is a self-contained ROWSIZE x COLSIZE
259 matrix. Rather than use floats, we simply keep a single DENOMINATOR that
260 represents the denominator for every element in the matrix. */
261 typedef struct lambda_trans_matrix_s
263 lambda_matrix matrix;
264 int rowsize;
265 int colsize;
266 int denominator;
267 } *lambda_trans_matrix;
268 #define LTM_MATRIX(T) ((T)->matrix)
269 #define LTM_ROWSIZE(T) ((T)->rowsize)
270 #define LTM_COLSIZE(T) ((T)->colsize)
271 #define LTM_DENOMINATOR(T) ((T)->denominator)
273 /* Allocate a new transformation matrix. */
275 static lambda_trans_matrix
276 lambda_trans_matrix_new (int colsize, int rowsize,
277 struct obstack * lambda_obstack)
279 lambda_trans_matrix ret;
281 ret = (lambda_trans_matrix)
282 obstack_alloc (lambda_obstack, sizeof (struct lambda_trans_matrix_s));
283 LTM_MATRIX (ret) = lambda_matrix_new (rowsize, colsize, lambda_obstack);
284 LTM_ROWSIZE (ret) = rowsize;
285 LTM_COLSIZE (ret) = colsize;
286 LTM_DENOMINATOR (ret) = 1;
287 return ret;
290 /* Multiply a vector VEC by a matrix MAT.
291 MAT is an M*N matrix, and VEC is a vector with length N. The result
292 is stored in DEST which must be a vector of length M. */
294 static void
295 lambda_matrix_vector_mult (lambda_matrix matrix, int m, int n,
296 lambda_vector vec, lambda_vector dest)
298 int i, j;
300 lambda_vector_clear (dest, m);
301 for (i = 0; i < m; i++)
302 for (j = 0; j < n; j++)
303 dest[i] += matrix[i][j] * vec[j];
306 /* Return true if TRANS is a legal transformation matrix that respects
307 the dependence vectors in DISTS and DIRS. The conservative answer
308 is false.
310 "Wolfe proves that a unimodular transformation represented by the
311 matrix T is legal when applied to a loop nest with a set of
312 lexicographically non-negative distance vectors RDG if and only if
313 for each vector d in RDG, (T.d >= 0) is lexicographically positive.
314 i.e.: if and only if it transforms the lexicographically positive
315 distance vectors to lexicographically positive vectors. Note that
316 a unimodular matrix must transform the zero vector (and only it) to
317 the zero vector." S.Muchnick. */
319 static bool
320 lambda_transform_legal_p (lambda_trans_matrix trans,
321 int nb_loops,
322 vec<ddr_p> dependence_relations)
324 unsigned int i, j;
325 lambda_vector distres;
326 struct data_dependence_relation *ddr;
328 gcc_assert (LTM_COLSIZE (trans) == nb_loops
329 && LTM_ROWSIZE (trans) == nb_loops);
331 /* When there are no dependences, the transformation is correct. */
332 if (dependence_relations.length () == 0)
333 return true;
335 ddr = dependence_relations[0];
336 if (ddr == NULL)
337 return true;
339 /* When there is an unknown relation in the dependence_relations, we
340 know that it is no worth looking at this loop nest: give up. */
341 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
342 return false;
344 distres = lambda_vector_new (nb_loops);
346 /* For each distance vector in the dependence graph. */
347 FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
349 /* Don't care about relations for which we know that there is no
350 dependence, nor about read-read (aka. output-dependences):
351 these data accesses can happen in any order. */
352 if (DDR_ARE_DEPENDENT (ddr) == chrec_known
353 || (DR_IS_READ (DDR_A (ddr)) && DR_IS_READ (DDR_B (ddr))))
354 continue;
356 /* Conservatively answer: "this transformation is not valid". */
357 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
358 return false;
360 /* If the dependence could not be captured by a distance vector,
361 conservatively answer that the transform is not valid. */
362 if (DDR_NUM_DIST_VECTS (ddr) == 0)
363 return false;
365 /* Compute trans.dist_vect */
366 for (j = 0; j < DDR_NUM_DIST_VECTS (ddr); j++)
368 lambda_matrix_vector_mult (LTM_MATRIX (trans), nb_loops, nb_loops,
369 DDR_DIST_VECT (ddr, j), distres);
371 if (!lambda_vector_lexico_pos (distres, nb_loops))
372 return false;
375 return true;
378 /* Data dependency analysis. Returns true if the iterations of LOOP
379 are independent on each other (that is, if we can execute them
380 in parallel). */
382 static bool
383 loop_parallel_p (struct loop *loop, struct obstack * parloop_obstack)
385 vec<loop_p> loop_nest;
386 vec<ddr_p> dependence_relations;
387 vec<data_reference_p> datarefs;
388 lambda_trans_matrix trans;
389 bool ret = false;
391 if (dump_file && (dump_flags & TDF_DETAILS))
393 fprintf (dump_file, "Considering loop %d\n", loop->num);
394 if (!loop->inner)
395 fprintf (dump_file, "loop is innermost\n");
396 else
397 fprintf (dump_file, "loop NOT innermost\n");
400 /* Check for problems with dependences. If the loop can be reversed,
401 the iterations are independent. */
402 datarefs.create (10);
403 dependence_relations.create (10 * 10);
404 loop_nest.create (3);
405 if (! compute_data_dependences_for_loop (loop, true, &loop_nest, &datarefs,
406 &dependence_relations))
408 if (dump_file && (dump_flags & TDF_DETAILS))
409 fprintf (dump_file, " FAILED: cannot analyze data dependencies\n");
410 ret = false;
411 goto end;
413 if (dump_file && (dump_flags & TDF_DETAILS))
414 dump_data_dependence_relations (dump_file, dependence_relations);
416 trans = lambda_trans_matrix_new (1, 1, parloop_obstack);
417 LTM_MATRIX (trans)[0][0] = -1;
419 if (lambda_transform_legal_p (trans, 1, dependence_relations))
421 ret = true;
422 if (dump_file && (dump_flags & TDF_DETAILS))
423 fprintf (dump_file, " SUCCESS: may be parallelized\n");
425 else if (dump_file && (dump_flags & TDF_DETAILS))
426 fprintf (dump_file,
427 " FAILED: data dependencies exist across iterations\n");
429 end:
430 loop_nest.release ();
431 free_dependence_relations (dependence_relations);
432 free_data_refs (datarefs);
434 return ret;
437 /* Return true when LOOP contains basic blocks marked with the
438 BB_IRREDUCIBLE_LOOP flag. */
440 static inline bool
441 loop_has_blocks_with_irreducible_flag (struct loop *loop)
443 unsigned i;
444 basic_block *bbs = get_loop_body_in_dom_order (loop);
445 bool res = true;
447 for (i = 0; i < loop->num_nodes; i++)
448 if (bbs[i]->flags & BB_IRREDUCIBLE_LOOP)
449 goto end;
451 res = false;
452 end:
453 free (bbs);
454 return res;
457 /* Assigns the address of OBJ in TYPE to an ssa name, and returns this name.
458 The assignment statement is placed on edge ENTRY. DECL_ADDRESS maps decls
459 to their addresses that can be reused. The address of OBJ is known to
460 be invariant in the whole function. Other needed statements are placed
461 right before GSI. */
463 static tree
464 take_address_of (tree obj, tree type, edge entry,
465 int_tree_htab_type decl_address, gimple_stmt_iterator *gsi)
467 int uid;
468 int_tree_map **dslot;
469 struct int_tree_map ielt, *nielt;
470 tree *var_p, name, addr;
471 gimple stmt;
472 gimple_seq stmts;
474 /* Since the address of OBJ is invariant, the trees may be shared.
475 Avoid rewriting unrelated parts of the code. */
476 obj = unshare_expr (obj);
477 for (var_p = &obj;
478 handled_component_p (*var_p);
479 var_p = &TREE_OPERAND (*var_p, 0))
480 continue;
482 /* Canonicalize the access to base on a MEM_REF. */
483 if (DECL_P (*var_p))
484 *var_p = build_simple_mem_ref (build_fold_addr_expr (*var_p));
486 /* Assign a canonical SSA name to the address of the base decl used
487 in the address and share it for all accesses and addresses based
488 on it. */
489 uid = DECL_UID (TREE_OPERAND (TREE_OPERAND (*var_p, 0), 0));
490 ielt.uid = uid;
491 dslot = decl_address.find_slot_with_hash (&ielt, uid, INSERT);
492 if (!*dslot)
494 if (gsi == NULL)
495 return NULL;
496 addr = TREE_OPERAND (*var_p, 0);
497 const char *obj_name
498 = get_name (TREE_OPERAND (TREE_OPERAND (*var_p, 0), 0));
499 if (obj_name)
500 name = make_temp_ssa_name (TREE_TYPE (addr), NULL, obj_name);
501 else
502 name = make_ssa_name (TREE_TYPE (addr), NULL);
503 stmt = gimple_build_assign (name, addr);
504 gsi_insert_on_edge_immediate (entry, stmt);
506 nielt = XNEW (struct int_tree_map);
507 nielt->uid = uid;
508 nielt->to = name;
509 *dslot = nielt;
511 else
512 name = (*dslot)->to;
514 /* Express the address in terms of the canonical SSA name. */
515 TREE_OPERAND (*var_p, 0) = name;
516 if (gsi == NULL)
517 return build_fold_addr_expr_with_type (obj, type);
519 name = force_gimple_operand (build_addr (obj, current_function_decl),
520 &stmts, true, NULL_TREE);
521 if (!gimple_seq_empty_p (stmts))
522 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
524 if (!useless_type_conversion_p (type, TREE_TYPE (name)))
526 name = force_gimple_operand (fold_convert (type, name), &stmts, true,
527 NULL_TREE);
528 if (!gimple_seq_empty_p (stmts))
529 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
532 return name;
535 /* Callback for htab_traverse. Create the initialization statement
536 for reduction described in SLOT, and place it at the preheader of
537 the loop described in DATA. */
540 initialize_reductions (reduction_info **slot, struct loop *loop)
542 tree init, c;
543 tree bvar, type, arg;
544 edge e;
546 struct reduction_info *const reduc = *slot;
548 /* Create initialization in preheader:
549 reduction_variable = initialization value of reduction. */
551 /* In the phi node at the header, replace the argument coming
552 from the preheader with the reduction initialization value. */
554 /* Create a new variable to initialize the reduction. */
555 type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
556 bvar = create_tmp_var (type, "reduction");
558 c = build_omp_clause (gimple_location (reduc->reduc_stmt),
559 OMP_CLAUSE_REDUCTION);
560 OMP_CLAUSE_REDUCTION_CODE (c) = reduc->reduction_code;
561 OMP_CLAUSE_DECL (c) = SSA_NAME_VAR (gimple_assign_lhs (reduc->reduc_stmt));
563 init = omp_reduction_init (c, TREE_TYPE (bvar));
564 reduc->init = init;
566 /* Replace the argument representing the initialization value
567 with the initialization value for the reduction (neutral
568 element for the particular operation, e.g. 0 for PLUS_EXPR,
569 1 for MULT_EXPR, etc).
570 Keep the old value in a new variable "reduction_initial",
571 that will be taken in consideration after the parallel
572 computing is done. */
574 e = loop_preheader_edge (loop);
575 arg = PHI_ARG_DEF_FROM_EDGE (reduc->reduc_phi, e);
576 /* Create new variable to hold the initial value. */
578 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE
579 (reduc->reduc_phi, loop_preheader_edge (loop)), init);
580 reduc->initial_value = arg;
581 return 1;
584 struct elv_data
586 struct walk_stmt_info info;
587 edge entry;
588 int_tree_htab_type decl_address;
589 gimple_stmt_iterator *gsi;
590 bool changed;
591 bool reset;
594 /* Eliminates references to local variables in *TP out of the single
595 entry single exit region starting at DTA->ENTRY.
596 DECL_ADDRESS contains addresses of the references that had their
597 address taken already. If the expression is changed, CHANGED is
598 set to true. Callback for walk_tree. */
600 static tree
601 eliminate_local_variables_1 (tree *tp, int *walk_subtrees, void *data)
603 struct elv_data *const dta = (struct elv_data *) data;
604 tree t = *tp, var, addr, addr_type, type, obj;
606 if (DECL_P (t))
608 *walk_subtrees = 0;
610 if (!SSA_VAR_P (t) || DECL_EXTERNAL (t))
611 return NULL_TREE;
613 type = TREE_TYPE (t);
614 addr_type = build_pointer_type (type);
615 addr = take_address_of (t, addr_type, dta->entry, dta->decl_address,
616 dta->gsi);
617 if (dta->gsi == NULL && addr == NULL_TREE)
619 dta->reset = true;
620 return NULL_TREE;
623 *tp = build_simple_mem_ref (addr);
625 dta->changed = true;
626 return NULL_TREE;
629 if (TREE_CODE (t) == ADDR_EXPR)
631 /* ADDR_EXPR may appear in two contexts:
632 -- as a gimple operand, when the address taken is a function invariant
633 -- as gimple rhs, when the resulting address in not a function
634 invariant
635 We do not need to do anything special in the latter case (the base of
636 the memory reference whose address is taken may be replaced in the
637 DECL_P case). The former case is more complicated, as we need to
638 ensure that the new address is still a gimple operand. Thus, it
639 is not sufficient to replace just the base of the memory reference --
640 we need to move the whole computation of the address out of the
641 loop. */
642 if (!is_gimple_val (t))
643 return NULL_TREE;
645 *walk_subtrees = 0;
646 obj = TREE_OPERAND (t, 0);
647 var = get_base_address (obj);
648 if (!var || !SSA_VAR_P (var) || DECL_EXTERNAL (var))
649 return NULL_TREE;
651 addr_type = TREE_TYPE (t);
652 addr = take_address_of (obj, addr_type, dta->entry, dta->decl_address,
653 dta->gsi);
654 if (dta->gsi == NULL && addr == NULL_TREE)
656 dta->reset = true;
657 return NULL_TREE;
659 *tp = addr;
661 dta->changed = true;
662 return NULL_TREE;
665 if (!EXPR_P (t))
666 *walk_subtrees = 0;
668 return NULL_TREE;
671 /* Moves the references to local variables in STMT at *GSI out of the single
672 entry single exit region starting at ENTRY. DECL_ADDRESS contains
673 addresses of the references that had their address taken
674 already. */
676 static void
677 eliminate_local_variables_stmt (edge entry, gimple_stmt_iterator *gsi,
678 int_tree_htab_type decl_address)
680 struct elv_data dta;
681 gimple stmt = gsi_stmt (*gsi);
683 memset (&dta.info, '\0', sizeof (dta.info));
684 dta.entry = entry;
685 dta.decl_address = decl_address;
686 dta.changed = false;
687 dta.reset = false;
689 if (gimple_debug_bind_p (stmt))
691 dta.gsi = NULL;
692 walk_tree (gimple_debug_bind_get_value_ptr (stmt),
693 eliminate_local_variables_1, &dta.info, NULL);
694 if (dta.reset)
696 gimple_debug_bind_reset_value (stmt);
697 dta.changed = true;
700 else if (gimple_clobber_p (stmt))
702 stmt = gimple_build_nop ();
703 gsi_replace (gsi, stmt, false);
704 dta.changed = true;
706 else
708 dta.gsi = gsi;
709 walk_gimple_op (stmt, eliminate_local_variables_1, &dta.info);
712 if (dta.changed)
713 update_stmt (stmt);
716 /* Eliminates the references to local variables from the single entry
717 single exit region between the ENTRY and EXIT edges.
719 This includes:
720 1) Taking address of a local variable -- these are moved out of the
721 region (and temporary variable is created to hold the address if
722 necessary).
724 2) Dereferencing a local variable -- these are replaced with indirect
725 references. */
727 static void
728 eliminate_local_variables (edge entry, edge exit)
730 basic_block bb;
731 vec<basic_block> body;
732 body.create (3);
733 unsigned i;
734 gimple_stmt_iterator gsi;
735 bool has_debug_stmt = false;
736 int_tree_htab_type decl_address;
737 decl_address.create (10);
738 basic_block entry_bb = entry->src;
739 basic_block exit_bb = exit->dest;
741 gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
743 FOR_EACH_VEC_ELT (body, i, bb)
744 if (bb != entry_bb && bb != exit_bb)
745 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
746 if (is_gimple_debug (gsi_stmt (gsi)))
748 if (gimple_debug_bind_p (gsi_stmt (gsi)))
749 has_debug_stmt = true;
751 else
752 eliminate_local_variables_stmt (entry, &gsi, decl_address);
754 if (has_debug_stmt)
755 FOR_EACH_VEC_ELT (body, i, bb)
756 if (bb != entry_bb && bb != exit_bb)
757 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
758 if (gimple_debug_bind_p (gsi_stmt (gsi)))
759 eliminate_local_variables_stmt (entry, &gsi, decl_address);
761 decl_address.dispose ();
762 body.release ();
765 /* Returns true if expression EXPR is not defined between ENTRY and
766 EXIT, i.e. if all its operands are defined outside of the region. */
768 static bool
769 expr_invariant_in_region_p (edge entry, edge exit, tree expr)
771 basic_block entry_bb = entry->src;
772 basic_block exit_bb = exit->dest;
773 basic_block def_bb;
775 if (is_gimple_min_invariant (expr))
776 return true;
778 if (TREE_CODE (expr) == SSA_NAME)
780 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
781 if (def_bb
782 && dominated_by_p (CDI_DOMINATORS, def_bb, entry_bb)
783 && !dominated_by_p (CDI_DOMINATORS, def_bb, exit_bb))
784 return false;
786 return true;
789 return false;
792 /* If COPY_NAME_P is true, creates and returns a duplicate of NAME.
793 The copies are stored to NAME_COPIES, if NAME was already duplicated,
794 its duplicate stored in NAME_COPIES is returned.
796 Regardless of COPY_NAME_P, the decl used as a base of the ssa name is also
797 duplicated, storing the copies in DECL_COPIES. */
799 static tree
800 separate_decls_in_region_name (tree name, name_to_copy_table_type name_copies,
801 int_tree_htab_type decl_copies, bool copy_name_p)
803 tree copy, var, var_copy;
804 unsigned idx, uid, nuid;
805 struct int_tree_map ielt, *nielt;
806 struct name_to_copy_elt elt, *nelt;
807 name_to_copy_elt **slot;
808 int_tree_map **dslot;
810 if (TREE_CODE (name) != SSA_NAME)
811 return name;
813 idx = SSA_NAME_VERSION (name);
814 elt.version = idx;
815 slot = name_copies.find_slot_with_hash (&elt, idx,
816 copy_name_p ? INSERT : NO_INSERT);
817 if (slot && *slot)
818 return (*slot)->new_name;
820 if (copy_name_p)
822 copy = duplicate_ssa_name (name, NULL);
823 nelt = XNEW (struct name_to_copy_elt);
824 nelt->version = idx;
825 nelt->new_name = copy;
826 nelt->field = NULL_TREE;
827 *slot = nelt;
829 else
831 gcc_assert (!slot);
832 copy = name;
835 var = SSA_NAME_VAR (name);
836 if (!var)
837 return copy;
839 uid = DECL_UID (var);
840 ielt.uid = uid;
841 dslot = decl_copies.find_slot_with_hash (&ielt, uid, INSERT);
842 if (!*dslot)
844 var_copy = create_tmp_var (TREE_TYPE (var), get_name (var));
845 DECL_GIMPLE_REG_P (var_copy) = DECL_GIMPLE_REG_P (var);
846 nielt = XNEW (struct int_tree_map);
847 nielt->uid = uid;
848 nielt->to = var_copy;
849 *dslot = nielt;
851 /* Ensure that when we meet this decl next time, we won't duplicate
852 it again. */
853 nuid = DECL_UID (var_copy);
854 ielt.uid = nuid;
855 dslot = decl_copies.find_slot_with_hash (&ielt, nuid, INSERT);
856 gcc_assert (!*dslot);
857 nielt = XNEW (struct int_tree_map);
858 nielt->uid = nuid;
859 nielt->to = var_copy;
860 *dslot = nielt;
862 else
863 var_copy = ((struct int_tree_map *) *dslot)->to;
865 replace_ssa_name_symbol (copy, var_copy);
866 return copy;
869 /* Finds the ssa names used in STMT that are defined outside the
870 region between ENTRY and EXIT and replaces such ssa names with
871 their duplicates. The duplicates are stored to NAME_COPIES. Base
872 decls of all ssa names used in STMT (including those defined in
873 LOOP) are replaced with the new temporary variables; the
874 replacement decls are stored in DECL_COPIES. */
876 static void
877 separate_decls_in_region_stmt (edge entry, edge exit, gimple stmt,
878 name_to_copy_table_type name_copies,
879 int_tree_htab_type decl_copies)
881 use_operand_p use;
882 def_operand_p def;
883 ssa_op_iter oi;
884 tree name, copy;
885 bool copy_name_p;
887 FOR_EACH_PHI_OR_STMT_DEF (def, stmt, oi, SSA_OP_DEF)
889 name = DEF_FROM_PTR (def);
890 gcc_assert (TREE_CODE (name) == SSA_NAME);
891 copy = separate_decls_in_region_name (name, name_copies, decl_copies,
892 false);
893 gcc_assert (copy == name);
896 FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
898 name = USE_FROM_PTR (use);
899 if (TREE_CODE (name) != SSA_NAME)
900 continue;
902 copy_name_p = expr_invariant_in_region_p (entry, exit, name);
903 copy = separate_decls_in_region_name (name, name_copies, decl_copies,
904 copy_name_p);
905 SET_USE (use, copy);
909 /* Finds the ssa names used in STMT that are defined outside the
910 region between ENTRY and EXIT and replaces such ssa names with
911 their duplicates. The duplicates are stored to NAME_COPIES. Base
912 decls of all ssa names used in STMT (including those defined in
913 LOOP) are replaced with the new temporary variables; the
914 replacement decls are stored in DECL_COPIES. */
916 static bool
917 separate_decls_in_region_debug (gimple stmt,
918 name_to_copy_table_type name_copies,
919 int_tree_htab_type decl_copies)
921 use_operand_p use;
922 ssa_op_iter oi;
923 tree var, name;
924 struct int_tree_map ielt;
925 struct name_to_copy_elt elt;
926 name_to_copy_elt **slot;
927 int_tree_map **dslot;
929 if (gimple_debug_bind_p (stmt))
930 var = gimple_debug_bind_get_var (stmt);
931 else if (gimple_debug_source_bind_p (stmt))
932 var = gimple_debug_source_bind_get_var (stmt);
933 else
934 return true;
935 if (TREE_CODE (var) == DEBUG_EXPR_DECL || TREE_CODE (var) == LABEL_DECL)
936 return true;
937 gcc_assert (DECL_P (var) && SSA_VAR_P (var));
938 ielt.uid = DECL_UID (var);
939 dslot = decl_copies.find_slot_with_hash (&ielt, ielt.uid, NO_INSERT);
940 if (!dslot)
941 return true;
942 if (gimple_debug_bind_p (stmt))
943 gimple_debug_bind_set_var (stmt, ((struct int_tree_map *) *dslot)->to);
944 else if (gimple_debug_source_bind_p (stmt))
945 gimple_debug_source_bind_set_var (stmt, ((struct int_tree_map *) *dslot)->to);
947 FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
949 name = USE_FROM_PTR (use);
950 if (TREE_CODE (name) != SSA_NAME)
951 continue;
953 elt.version = SSA_NAME_VERSION (name);
954 slot = name_copies.find_slot_with_hash (&elt, elt.version, NO_INSERT);
955 if (!slot)
957 gimple_debug_bind_reset_value (stmt);
958 update_stmt (stmt);
959 break;
962 SET_USE (use, (*slot)->new_name);
965 return false;
968 /* Callback for htab_traverse. Adds a field corresponding to the reduction
969 specified in SLOT. The type is passed in DATA. */
972 add_field_for_reduction (reduction_info **slot, tree type)
975 struct reduction_info *const red = *slot;
976 tree var = gimple_assign_lhs (red->reduc_stmt);
977 tree field = build_decl (gimple_location (red->reduc_stmt), FIELD_DECL,
978 SSA_NAME_IDENTIFIER (var), TREE_TYPE (var));
980 insert_field_into_struct (type, field);
982 red->field = field;
984 return 1;
987 /* Callback for htab_traverse. Adds a field corresponding to a ssa name
988 described in SLOT. The type is passed in DATA. */
991 add_field_for_name (name_to_copy_elt **slot, tree type)
993 struct name_to_copy_elt *const elt = *slot;
994 tree name = ssa_name (elt->version);
995 tree field = build_decl (UNKNOWN_LOCATION,
996 FIELD_DECL, SSA_NAME_IDENTIFIER (name),
997 TREE_TYPE (name));
999 insert_field_into_struct (type, field);
1000 elt->field = field;
1002 return 1;
1005 /* Callback for htab_traverse. A local result is the intermediate result
1006 computed by a single
1007 thread, or the initial value in case no iteration was executed.
1008 This function creates a phi node reflecting these values.
1009 The phi's result will be stored in NEW_PHI field of the
1010 reduction's data structure. */
1013 create_phi_for_local_result (reduction_info **slot, struct loop *loop)
1015 struct reduction_info *const reduc = *slot;
1016 edge e;
1017 gimple new_phi;
1018 basic_block store_bb;
1019 tree local_res;
1020 source_location locus;
1022 /* STORE_BB is the block where the phi
1023 should be stored. It is the destination of the loop exit.
1024 (Find the fallthru edge from GIMPLE_OMP_CONTINUE). */
1025 store_bb = FALLTHRU_EDGE (loop->latch)->dest;
1027 /* STORE_BB has two predecessors. One coming from the loop
1028 (the reduction's result is computed at the loop),
1029 and another coming from a block preceding the loop,
1030 when no iterations
1031 are executed (the initial value should be taken). */
1032 if (EDGE_PRED (store_bb, 0) == FALLTHRU_EDGE (loop->latch))
1033 e = EDGE_PRED (store_bb, 1);
1034 else
1035 e = EDGE_PRED (store_bb, 0);
1036 local_res = copy_ssa_name (gimple_assign_lhs (reduc->reduc_stmt), NULL);
1037 locus = gimple_location (reduc->reduc_stmt);
1038 new_phi = create_phi_node (local_res, store_bb);
1039 add_phi_arg (new_phi, reduc->init, e, locus);
1040 add_phi_arg (new_phi, gimple_assign_lhs (reduc->reduc_stmt),
1041 FALLTHRU_EDGE (loop->latch), locus);
1042 reduc->new_phi = new_phi;
1044 return 1;
1047 struct clsn_data
1049 tree store;
1050 tree load;
1052 basic_block store_bb;
1053 basic_block load_bb;
1056 /* Callback for htab_traverse. Create an atomic instruction for the
1057 reduction described in SLOT.
1058 DATA annotates the place in memory the atomic operation relates to,
1059 and the basic block it needs to be generated in. */
1062 create_call_for_reduction_1 (reduction_info **slot, struct clsn_data *clsn_data)
1064 struct reduction_info *const reduc = *slot;
1065 gimple_stmt_iterator gsi;
1066 tree type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
1067 tree load_struct;
1068 basic_block bb;
1069 basic_block new_bb;
1070 edge e;
1071 tree t, addr, ref, x;
1072 tree tmp_load, name;
1073 gimple load;
1075 load_struct = build_simple_mem_ref (clsn_data->load);
1076 t = build3 (COMPONENT_REF, type, load_struct, reduc->field, NULL_TREE);
1078 addr = build_addr (t, current_function_decl);
1080 /* Create phi node. */
1081 bb = clsn_data->load_bb;
1083 e = split_block (bb, t);
1084 new_bb = e->dest;
1086 tmp_load = create_tmp_var (TREE_TYPE (TREE_TYPE (addr)), NULL);
1087 tmp_load = make_ssa_name (tmp_load, NULL);
1088 load = gimple_build_omp_atomic_load (tmp_load, addr);
1089 SSA_NAME_DEF_STMT (tmp_load) = load;
1090 gsi = gsi_start_bb (new_bb);
1091 gsi_insert_after (&gsi, load, GSI_NEW_STMT);
1093 e = split_block (new_bb, load);
1094 new_bb = e->dest;
1095 gsi = gsi_start_bb (new_bb);
1096 ref = tmp_load;
1097 x = fold_build2 (reduc->reduction_code,
1098 TREE_TYPE (PHI_RESULT (reduc->new_phi)), ref,
1099 PHI_RESULT (reduc->new_phi));
1101 name = force_gimple_operand_gsi (&gsi, x, true, NULL_TREE, true,
1102 GSI_CONTINUE_LINKING);
1104 gsi_insert_after (&gsi, gimple_build_omp_atomic_store (name), GSI_NEW_STMT);
1105 return 1;
1108 /* Create the atomic operation at the join point of the threads.
1109 REDUCTION_LIST describes the reductions in the LOOP.
1110 LD_ST_DATA describes the shared data structure where
1111 shared data is stored in and loaded from. */
1112 static void
1113 create_call_for_reduction (struct loop *loop,
1114 reduction_info_table_type reduction_list,
1115 struct clsn_data *ld_st_data)
1117 reduction_list.traverse <struct loop *, create_phi_for_local_result> (loop);
1118 /* Find the fallthru edge from GIMPLE_OMP_CONTINUE. */
1119 ld_st_data->load_bb = FALLTHRU_EDGE (loop->latch)->dest;
1120 reduction_list
1121 .traverse <struct clsn_data *, create_call_for_reduction_1> (ld_st_data);
1124 /* Callback for htab_traverse. Loads the final reduction value at the
1125 join point of all threads, and inserts it in the right place. */
1128 create_loads_for_reductions (reduction_info **slot, struct clsn_data *clsn_data)
1130 struct reduction_info *const red = *slot;
1131 gimple stmt;
1132 gimple_stmt_iterator gsi;
1133 tree type = TREE_TYPE (gimple_assign_lhs (red->reduc_stmt));
1134 tree load_struct;
1135 tree name;
1136 tree x;
1138 gsi = gsi_after_labels (clsn_data->load_bb);
1139 load_struct = build_simple_mem_ref (clsn_data->load);
1140 load_struct = build3 (COMPONENT_REF, type, load_struct, red->field,
1141 NULL_TREE);
1143 x = load_struct;
1144 name = PHI_RESULT (red->keep_res);
1145 stmt = gimple_build_assign (name, x);
1146 SSA_NAME_DEF_STMT (name) = stmt;
1148 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1150 for (gsi = gsi_start_phis (gimple_bb (red->keep_res));
1151 !gsi_end_p (gsi); gsi_next (&gsi))
1152 if (gsi_stmt (gsi) == red->keep_res)
1154 remove_phi_node (&gsi, false);
1155 return 1;
1157 gcc_unreachable ();
1160 /* Load the reduction result that was stored in LD_ST_DATA.
1161 REDUCTION_LIST describes the list of reductions that the
1162 loads should be generated for. */
1163 static void
1164 create_final_loads_for_reduction (reduction_info_table_type reduction_list,
1165 struct clsn_data *ld_st_data)
1167 gimple_stmt_iterator gsi;
1168 tree t;
1169 gimple stmt;
1171 gsi = gsi_after_labels (ld_st_data->load_bb);
1172 t = build_fold_addr_expr (ld_st_data->store);
1173 stmt = gimple_build_assign (ld_st_data->load, t);
1175 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
1176 SSA_NAME_DEF_STMT (ld_st_data->load) = stmt;
1178 reduction_list
1179 .traverse <struct clsn_data *, create_loads_for_reductions> (ld_st_data);
1183 /* Callback for htab_traverse. Store the neutral value for the
1184 particular reduction's operation, e.g. 0 for PLUS_EXPR,
1185 1 for MULT_EXPR, etc. into the reduction field.
1186 The reduction is specified in SLOT. The store information is
1187 passed in DATA. */
1190 create_stores_for_reduction (reduction_info **slot, struct clsn_data *clsn_data)
1192 struct reduction_info *const red = *slot;
1193 tree t;
1194 gimple stmt;
1195 gimple_stmt_iterator gsi;
1196 tree type = TREE_TYPE (gimple_assign_lhs (red->reduc_stmt));
1198 gsi = gsi_last_bb (clsn_data->store_bb);
1199 t = build3 (COMPONENT_REF, type, clsn_data->store, red->field, NULL_TREE);
1200 stmt = gimple_build_assign (t, red->initial_value);
1201 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1203 return 1;
1206 /* Callback for htab_traverse. Creates loads to a field of LOAD in LOAD_BB and
1207 store to a field of STORE in STORE_BB for the ssa name and its duplicate
1208 specified in SLOT. */
1211 create_loads_and_stores_for_name (name_to_copy_elt **slot,
1212 struct clsn_data *clsn_data)
1214 struct name_to_copy_elt *const elt = *slot;
1215 tree t;
1216 gimple stmt;
1217 gimple_stmt_iterator gsi;
1218 tree type = TREE_TYPE (elt->new_name);
1219 tree load_struct;
1221 gsi = gsi_last_bb (clsn_data->store_bb);
1222 t = build3 (COMPONENT_REF, type, clsn_data->store, elt->field, NULL_TREE);
1223 stmt = gimple_build_assign (t, ssa_name (elt->version));
1224 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1226 gsi = gsi_last_bb (clsn_data->load_bb);
1227 load_struct = build_simple_mem_ref (clsn_data->load);
1228 t = build3 (COMPONENT_REF, type, load_struct, elt->field, NULL_TREE);
1229 stmt = gimple_build_assign (elt->new_name, t);
1230 SSA_NAME_DEF_STMT (elt->new_name) = stmt;
1231 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1233 return 1;
1236 /* Moves all the variables used in LOOP and defined outside of it (including
1237 the initial values of loop phi nodes, and *PER_THREAD if it is a ssa
1238 name) to a structure created for this purpose. The code
1240 while (1)
1242 use (a);
1243 use (b);
1246 is transformed this way:
1248 bb0:
1249 old.a = a;
1250 old.b = b;
1252 bb1:
1253 a' = new->a;
1254 b' = new->b;
1255 while (1)
1257 use (a');
1258 use (b');
1261 `old' is stored to *ARG_STRUCT and `new' is stored to NEW_ARG_STRUCT. The
1262 pointer `new' is intentionally not initialized (the loop will be split to a
1263 separate function later, and `new' will be initialized from its arguments).
1264 LD_ST_DATA holds information about the shared data structure used to pass
1265 information among the threads. It is initialized here, and
1266 gen_parallel_loop will pass it to create_call_for_reduction that
1267 needs this information. REDUCTION_LIST describes the reductions
1268 in LOOP. */
1270 static void
1271 separate_decls_in_region (edge entry, edge exit,
1272 reduction_info_table_type reduction_list,
1273 tree *arg_struct, tree *new_arg_struct,
1274 struct clsn_data *ld_st_data)
1277 basic_block bb1 = split_edge (entry);
1278 basic_block bb0 = single_pred (bb1);
1279 name_to_copy_table_type name_copies;
1280 name_copies.create (10);
1281 int_tree_htab_type decl_copies;
1282 decl_copies.create (10);
1283 unsigned i;
1284 tree type, type_name, nvar;
1285 gimple_stmt_iterator gsi;
1286 struct clsn_data clsn_data;
1287 vec<basic_block> body;
1288 body.create (3);
1289 basic_block bb;
1290 basic_block entry_bb = bb1;
1291 basic_block exit_bb = exit->dest;
1292 bool has_debug_stmt = false;
1294 entry = single_succ_edge (entry_bb);
1295 gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
1297 FOR_EACH_VEC_ELT (body, i, bb)
1299 if (bb != entry_bb && bb != exit_bb)
1301 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1302 separate_decls_in_region_stmt (entry, exit, gsi_stmt (gsi),
1303 name_copies, decl_copies);
1305 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1307 gimple stmt = gsi_stmt (gsi);
1309 if (is_gimple_debug (stmt))
1310 has_debug_stmt = true;
1311 else
1312 separate_decls_in_region_stmt (entry, exit, stmt,
1313 name_copies, decl_copies);
1318 /* Now process debug bind stmts. We must not create decls while
1319 processing debug stmts, so we defer their processing so as to
1320 make sure we will have debug info for as many variables as
1321 possible (all of those that were dealt with in the loop above),
1322 and discard those for which we know there's nothing we can
1323 do. */
1324 if (has_debug_stmt)
1325 FOR_EACH_VEC_ELT (body, i, bb)
1326 if (bb != entry_bb && bb != exit_bb)
1328 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
1330 gimple stmt = gsi_stmt (gsi);
1332 if (is_gimple_debug (stmt))
1334 if (separate_decls_in_region_debug (stmt, name_copies,
1335 decl_copies))
1337 gsi_remove (&gsi, true);
1338 continue;
1342 gsi_next (&gsi);
1346 body.release ();
1348 if (name_copies.elements () == 0 && reduction_list.elements () == 0)
1350 /* It may happen that there is nothing to copy (if there are only
1351 loop carried and external variables in the loop). */
1352 *arg_struct = NULL;
1353 *new_arg_struct = NULL;
1355 else
1357 /* Create the type for the structure to store the ssa names to. */
1358 type = lang_hooks.types.make_type (RECORD_TYPE);
1359 type_name = build_decl (UNKNOWN_LOCATION,
1360 TYPE_DECL, create_tmp_var_name (".paral_data"),
1361 type);
1362 TYPE_NAME (type) = type_name;
1364 name_copies.traverse <tree, add_field_for_name> (type);
1365 if (reduction_list.is_created () && reduction_list.elements () > 0)
1367 /* Create the fields for reductions. */
1368 reduction_list.traverse <tree, add_field_for_reduction> (type);
1370 layout_type (type);
1372 /* Create the loads and stores. */
1373 *arg_struct = create_tmp_var (type, ".paral_data_store");
1374 nvar = create_tmp_var (build_pointer_type (type), ".paral_data_load");
1375 *new_arg_struct = make_ssa_name (nvar, NULL);
1377 ld_st_data->store = *arg_struct;
1378 ld_st_data->load = *new_arg_struct;
1379 ld_st_data->store_bb = bb0;
1380 ld_st_data->load_bb = bb1;
1382 name_copies
1383 .traverse <struct clsn_data *, create_loads_and_stores_for_name>
1384 (ld_st_data);
1386 /* Load the calculation from memory (after the join of the threads). */
1388 if (reduction_list.is_created () && reduction_list.elements () > 0)
1390 reduction_list
1391 .traverse <struct clsn_data *, create_stores_for_reduction>
1392 (ld_st_data);
1393 clsn_data.load = make_ssa_name (nvar, NULL);
1394 clsn_data.load_bb = exit->dest;
1395 clsn_data.store = ld_st_data->store;
1396 create_final_loads_for_reduction (reduction_list, &clsn_data);
1400 decl_copies.dispose ();
1401 name_copies.dispose ();
1404 /* Bitmap containing uids of functions created by parallelization. We cannot
1405 allocate it from the default obstack, as it must live across compilation
1406 of several functions; we make it gc allocated instead. */
1408 static GTY(()) bitmap parallelized_functions;
1410 /* Returns true if FN was created by create_loop_fn. */
1412 bool
1413 parallelized_function_p (tree fn)
1415 if (!parallelized_functions || !DECL_ARTIFICIAL (fn))
1416 return false;
1418 return bitmap_bit_p (parallelized_functions, DECL_UID (fn));
1421 /* Creates and returns an empty function that will receive the body of
1422 a parallelized loop. */
1424 static tree
1425 create_loop_fn (location_t loc)
1427 char buf[100];
1428 char *tname;
1429 tree decl, type, name, t;
1430 struct function *act_cfun = cfun;
1431 static unsigned loopfn_num;
1433 loc = LOCATION_LOCUS (loc);
1434 snprintf (buf, 100, "%s.$loopfn", current_function_name ());
1435 ASM_FORMAT_PRIVATE_NAME (tname, buf, loopfn_num++);
1436 clean_symbol_name (tname);
1437 name = get_identifier (tname);
1438 type = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
1440 decl = build_decl (loc, FUNCTION_DECL, name, type);
1441 if (!parallelized_functions)
1442 parallelized_functions = BITMAP_GGC_ALLOC ();
1443 bitmap_set_bit (parallelized_functions, DECL_UID (decl));
1445 TREE_STATIC (decl) = 1;
1446 TREE_USED (decl) = 1;
1447 DECL_ARTIFICIAL (decl) = 1;
1448 DECL_IGNORED_P (decl) = 0;
1449 TREE_PUBLIC (decl) = 0;
1450 DECL_UNINLINABLE (decl) = 1;
1451 DECL_EXTERNAL (decl) = 0;
1452 DECL_CONTEXT (decl) = NULL_TREE;
1453 DECL_INITIAL (decl) = make_node (BLOCK);
1455 t = build_decl (loc, RESULT_DECL, NULL_TREE, void_type_node);
1456 DECL_ARTIFICIAL (t) = 1;
1457 DECL_IGNORED_P (t) = 1;
1458 DECL_RESULT (decl) = t;
1460 t = build_decl (loc, PARM_DECL, get_identifier (".paral_data_param"),
1461 ptr_type_node);
1462 DECL_ARTIFICIAL (t) = 1;
1463 DECL_ARG_TYPE (t) = ptr_type_node;
1464 DECL_CONTEXT (t) = decl;
1465 TREE_USED (t) = 1;
1466 DECL_ARGUMENTS (decl) = t;
1468 allocate_struct_function (decl, false);
1470 /* The call to allocate_struct_function clobbers CFUN, so we need to restore
1471 it. */
1472 set_cfun (act_cfun);
1474 return decl;
1477 /* Moves the exit condition of LOOP to the beginning of its header, and
1478 duplicates the part of the last iteration that gets disabled to the
1479 exit of the loop. NIT is the number of iterations of the loop
1480 (used to initialize the variables in the duplicated part).
1482 TODO: the common case is that latch of the loop is empty and immediately
1483 follows the loop exit. In this case, it would be better not to copy the
1484 body of the loop, but only move the entry of the loop directly before the
1485 exit check and increase the number of iterations of the loop by one.
1486 This may need some additional preconditioning in case NIT = ~0.
1487 REDUCTION_LIST describes the reductions in LOOP. */
1489 static void
1490 transform_to_exit_first_loop (struct loop *loop,
1491 reduction_info_table_type reduction_list,
1492 tree nit)
1494 basic_block *bbs, *nbbs, ex_bb, orig_header;
1495 unsigned n;
1496 bool ok;
1497 edge exit = single_dom_exit (loop), hpred;
1498 tree control, control_name, res, t;
1499 gimple phi, nphi, cond_stmt, stmt, cond_nit;
1500 gimple_stmt_iterator gsi;
1501 tree nit_1;
1503 split_block_after_labels (loop->header);
1504 orig_header = single_succ (loop->header);
1505 hpred = single_succ_edge (loop->header);
1507 cond_stmt = last_stmt (exit->src);
1508 control = gimple_cond_lhs (cond_stmt);
1509 gcc_assert (gimple_cond_rhs (cond_stmt) == nit);
1511 /* Make sure that we have phi nodes on exit for all loop header phis
1512 (create_parallel_loop requires that). */
1513 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
1515 phi = gsi_stmt (gsi);
1516 res = PHI_RESULT (phi);
1517 t = copy_ssa_name (res, phi);
1518 SET_PHI_RESULT (phi, t);
1519 nphi = create_phi_node (res, orig_header);
1520 add_phi_arg (nphi, t, hpred, UNKNOWN_LOCATION);
1522 if (res == control)
1524 gimple_cond_set_lhs (cond_stmt, t);
1525 update_stmt (cond_stmt);
1526 control = t;
1530 bbs = get_loop_body_in_dom_order (loop);
1532 for (n = 0; bbs[n] != exit->src; n++)
1533 continue;
1534 nbbs = XNEWVEC (basic_block, n);
1535 ok = gimple_duplicate_sese_tail (single_succ_edge (loop->header), exit,
1536 bbs + 1, n, nbbs);
1537 gcc_assert (ok);
1538 free (bbs);
1539 ex_bb = nbbs[0];
1540 free (nbbs);
1542 /* Other than reductions, the only gimple reg that should be copied
1543 out of the loop is the control variable. */
1544 exit = single_dom_exit (loop);
1545 control_name = NULL_TREE;
1546 for (gsi = gsi_start_phis (ex_bb); !gsi_end_p (gsi); )
1548 phi = gsi_stmt (gsi);
1549 res = PHI_RESULT (phi);
1550 if (virtual_operand_p (res))
1552 gsi_next (&gsi);
1553 continue;
1556 /* Check if it is a part of reduction. If it is,
1557 keep the phi at the reduction's keep_res field. The
1558 PHI_RESULT of this phi is the resulting value of the reduction
1559 variable when exiting the loop. */
1561 if (reduction_list.elements () > 0)
1563 struct reduction_info *red;
1565 tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1566 red = reduction_phi (reduction_list, SSA_NAME_DEF_STMT (val));
1567 if (red)
1569 red->keep_res = phi;
1570 gsi_next (&gsi);
1571 continue;
1574 gcc_assert (control_name == NULL_TREE
1575 && SSA_NAME_VAR (res) == SSA_NAME_VAR (control));
1576 control_name = res;
1577 remove_phi_node (&gsi, false);
1579 gcc_assert (control_name != NULL_TREE);
1581 /* Initialize the control variable to number of iterations
1582 according to the rhs of the exit condition. */
1583 gsi = gsi_after_labels (ex_bb);
1584 cond_nit = last_stmt (exit->src);
1585 nit_1 = gimple_cond_rhs (cond_nit);
1586 nit_1 = force_gimple_operand_gsi (&gsi,
1587 fold_convert (TREE_TYPE (control_name), nit_1),
1588 false, NULL_TREE, false, GSI_SAME_STMT);
1589 stmt = gimple_build_assign (control_name, nit_1);
1590 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
1591 SSA_NAME_DEF_STMT (control_name) = stmt;
1594 /* Create the parallel constructs for LOOP as described in gen_parallel_loop.
1595 LOOP_FN and DATA are the arguments of GIMPLE_OMP_PARALLEL.
1596 NEW_DATA is the variable that should be initialized from the argument
1597 of LOOP_FN. N_THREADS is the requested number of threads. Returns the
1598 basic block containing GIMPLE_OMP_PARALLEL tree. */
1600 static basic_block
1601 create_parallel_loop (struct loop *loop, tree loop_fn, tree data,
1602 tree new_data, unsigned n_threads, location_t loc)
1604 gimple_stmt_iterator gsi;
1605 basic_block bb, paral_bb, for_bb, ex_bb;
1606 tree t, param;
1607 gimple stmt, for_stmt, phi, cond_stmt;
1608 tree cvar, cvar_init, initvar, cvar_next, cvar_base, type;
1609 edge exit, nexit, guard, end, e;
1611 /* Prepare the GIMPLE_OMP_PARALLEL statement. */
1612 bb = loop_preheader_edge (loop)->src;
1613 paral_bb = single_pred (bb);
1614 gsi = gsi_last_bb (paral_bb);
1616 t = build_omp_clause (loc, OMP_CLAUSE_NUM_THREADS);
1617 OMP_CLAUSE_NUM_THREADS_EXPR (t)
1618 = build_int_cst (integer_type_node, n_threads);
1619 stmt = gimple_build_omp_parallel (NULL, t, loop_fn, data);
1620 gimple_set_location (stmt, loc);
1622 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1624 /* Initialize NEW_DATA. */
1625 if (data)
1627 gsi = gsi_after_labels (bb);
1629 param = make_ssa_name (DECL_ARGUMENTS (loop_fn), NULL);
1630 stmt = gimple_build_assign (param, build_fold_addr_expr (data));
1631 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1632 SSA_NAME_DEF_STMT (param) = stmt;
1634 stmt = gimple_build_assign (new_data,
1635 fold_convert (TREE_TYPE (new_data), param));
1636 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1637 SSA_NAME_DEF_STMT (new_data) = stmt;
1640 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_PARALLEL. */
1641 bb = split_loop_exit_edge (single_dom_exit (loop));
1642 gsi = gsi_last_bb (bb);
1643 stmt = gimple_build_omp_return (false);
1644 gimple_set_location (stmt, loc);
1645 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1647 /* Extract data for GIMPLE_OMP_FOR. */
1648 gcc_assert (loop->header == single_dom_exit (loop)->src);
1649 cond_stmt = last_stmt (loop->header);
1651 cvar = gimple_cond_lhs (cond_stmt);
1652 cvar_base = SSA_NAME_VAR (cvar);
1653 phi = SSA_NAME_DEF_STMT (cvar);
1654 cvar_init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1655 initvar = copy_ssa_name (cvar, NULL);
1656 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, loop_preheader_edge (loop)),
1657 initvar);
1658 cvar_next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1660 gsi = gsi_last_nondebug_bb (loop->latch);
1661 gcc_assert (gsi_stmt (gsi) == SSA_NAME_DEF_STMT (cvar_next));
1662 gsi_remove (&gsi, true);
1664 /* Prepare cfg. */
1665 for_bb = split_edge (loop_preheader_edge (loop));
1666 ex_bb = split_loop_exit_edge (single_dom_exit (loop));
1667 extract_true_false_edges_from_block (loop->header, &nexit, &exit);
1668 gcc_assert (exit == single_dom_exit (loop));
1670 guard = make_edge (for_bb, ex_bb, 0);
1671 single_succ_edge (loop->latch)->flags = 0;
1672 end = make_edge (loop->latch, ex_bb, EDGE_FALLTHRU);
1673 for (gsi = gsi_start_phis (ex_bb); !gsi_end_p (gsi); gsi_next (&gsi))
1675 source_location locus;
1676 tree def;
1677 phi = gsi_stmt (gsi);
1678 stmt = SSA_NAME_DEF_STMT (PHI_ARG_DEF_FROM_EDGE (phi, exit));
1680 def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_preheader_edge (loop));
1681 locus = gimple_phi_arg_location_from_edge (stmt,
1682 loop_preheader_edge (loop));
1683 add_phi_arg (phi, def, guard, locus);
1685 def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (loop));
1686 locus = gimple_phi_arg_location_from_edge (stmt, loop_latch_edge (loop));
1687 add_phi_arg (phi, def, end, locus);
1689 e = redirect_edge_and_branch (exit, nexit->dest);
1690 PENDING_STMT (e) = NULL;
1692 /* Emit GIMPLE_OMP_FOR. */
1693 gimple_cond_set_lhs (cond_stmt, cvar_base);
1694 type = TREE_TYPE (cvar);
1695 t = build_omp_clause (loc, OMP_CLAUSE_SCHEDULE);
1696 OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_STATIC;
1698 for_stmt = gimple_build_omp_for (NULL, GF_OMP_FOR_KIND_FOR, t, 1, NULL);
1699 gimple_set_location (for_stmt, loc);
1700 gimple_omp_for_set_index (for_stmt, 0, initvar);
1701 gimple_omp_for_set_initial (for_stmt, 0, cvar_init);
1702 gimple_omp_for_set_final (for_stmt, 0, gimple_cond_rhs (cond_stmt));
1703 gimple_omp_for_set_cond (for_stmt, 0, gimple_cond_code (cond_stmt));
1704 gimple_omp_for_set_incr (for_stmt, 0, build2 (PLUS_EXPR, type,
1705 cvar_base,
1706 build_int_cst (type, 1)));
1708 gsi = gsi_last_bb (for_bb);
1709 gsi_insert_after (&gsi, for_stmt, GSI_NEW_STMT);
1710 SSA_NAME_DEF_STMT (initvar) = for_stmt;
1712 /* Emit GIMPLE_OMP_CONTINUE. */
1713 gsi = gsi_last_bb (loop->latch);
1714 stmt = gimple_build_omp_continue (cvar_next, cvar);
1715 gimple_set_location (stmt, loc);
1716 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1717 SSA_NAME_DEF_STMT (cvar_next) = stmt;
1719 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_FOR. */
1720 gsi = gsi_last_bb (ex_bb);
1721 stmt = gimple_build_omp_return (true);
1722 gimple_set_location (stmt, loc);
1723 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1725 /* After the above dom info is hosed. Re-compute it. */
1726 free_dominance_info (CDI_DOMINATORS);
1727 calculate_dominance_info (CDI_DOMINATORS);
1729 return paral_bb;
1732 /* Generates code to execute the iterations of LOOP in N_THREADS
1733 threads in parallel.
1735 NITER describes number of iterations of LOOP.
1736 REDUCTION_LIST describes the reductions existent in the LOOP. */
1738 static void
1739 gen_parallel_loop (struct loop *loop, reduction_info_table_type reduction_list,
1740 unsigned n_threads, struct tree_niter_desc *niter)
1742 loop_iterator li;
1743 tree many_iterations_cond, type, nit;
1744 tree arg_struct, new_arg_struct;
1745 gimple_seq stmts;
1746 basic_block parallel_head;
1747 edge entry, exit;
1748 struct clsn_data clsn_data;
1749 unsigned prob;
1750 location_t loc;
1751 gimple cond_stmt;
1752 unsigned int m_p_thread=2;
1754 /* From
1756 ---------------------------------------------------------------------
1757 loop
1759 IV = phi (INIT, IV + STEP)
1760 BODY1;
1761 if (COND)
1762 break;
1763 BODY2;
1765 ---------------------------------------------------------------------
1767 with # of iterations NITER (possibly with MAY_BE_ZERO assumption),
1768 we generate the following code:
1770 ---------------------------------------------------------------------
1772 if (MAY_BE_ZERO
1773 || NITER < MIN_PER_THREAD * N_THREADS)
1774 goto original;
1776 BODY1;
1777 store all local loop-invariant variables used in body of the loop to DATA.
1778 GIMPLE_OMP_PARALLEL (OMP_CLAUSE_NUM_THREADS (N_THREADS), LOOPFN, DATA);
1779 load the variables from DATA.
1780 GIMPLE_OMP_FOR (IV = INIT; COND; IV += STEP) (OMP_CLAUSE_SCHEDULE (static))
1781 BODY2;
1782 BODY1;
1783 GIMPLE_OMP_CONTINUE;
1784 GIMPLE_OMP_RETURN -- GIMPLE_OMP_FOR
1785 GIMPLE_OMP_RETURN -- GIMPLE_OMP_PARALLEL
1786 goto end;
1788 original:
1789 loop
1791 IV = phi (INIT, IV + STEP)
1792 BODY1;
1793 if (COND)
1794 break;
1795 BODY2;
1798 end:
1802 /* Create two versions of the loop -- in the old one, we know that the
1803 number of iterations is large enough, and we will transform it into the
1804 loop that will be split to loop_fn, the new one will be used for the
1805 remaining iterations. */
1807 /* We should compute a better number-of-iterations value for outer loops.
1808 That is, if we have
1810 for (i = 0; i < n; ++i)
1811 for (j = 0; j < m; ++j)
1814 we should compute nit = n * m, not nit = n.
1815 Also may_be_zero handling would need to be adjusted. */
1817 type = TREE_TYPE (niter->niter);
1818 nit = force_gimple_operand (unshare_expr (niter->niter), &stmts, true,
1819 NULL_TREE);
1820 if (stmts)
1821 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1823 if (loop->inner)
1824 m_p_thread=2;
1825 else
1826 m_p_thread=MIN_PER_THREAD;
1828 many_iterations_cond =
1829 fold_build2 (GE_EXPR, boolean_type_node,
1830 nit, build_int_cst (type, m_p_thread * n_threads));
1832 many_iterations_cond
1833 = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1834 invert_truthvalue (unshare_expr (niter->may_be_zero)),
1835 many_iterations_cond);
1836 many_iterations_cond
1837 = force_gimple_operand (many_iterations_cond, &stmts, false, NULL_TREE);
1838 if (stmts)
1839 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1840 if (!is_gimple_condexpr (many_iterations_cond))
1842 many_iterations_cond
1843 = force_gimple_operand (many_iterations_cond, &stmts,
1844 true, NULL_TREE);
1845 if (stmts)
1846 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1849 initialize_original_copy_tables ();
1851 /* We assume that the loop usually iterates a lot. */
1852 prob = 4 * REG_BR_PROB_BASE / 5;
1853 loop_version (loop, many_iterations_cond, NULL,
1854 prob, prob, REG_BR_PROB_BASE - prob, true);
1855 update_ssa (TODO_update_ssa);
1856 free_original_copy_tables ();
1858 /* Base all the induction variables in LOOP on a single control one. */
1859 canonicalize_loop_ivs (loop, &nit, true);
1861 /* Ensure that the exit condition is the first statement in the loop. */
1862 transform_to_exit_first_loop (loop, reduction_list, nit);
1864 /* Generate initializations for reductions. */
1865 if (reduction_list.elements () > 0)
1866 reduction_list.traverse <struct loop *, initialize_reductions> (loop);
1868 /* Eliminate the references to local variables from the loop. */
1869 gcc_assert (single_exit (loop));
1870 entry = loop_preheader_edge (loop);
1871 exit = single_dom_exit (loop);
1873 eliminate_local_variables (entry, exit);
1874 /* In the old loop, move all variables non-local to the loop to a structure
1875 and back, and create separate decls for the variables used in loop. */
1876 separate_decls_in_region (entry, exit, reduction_list, &arg_struct,
1877 &new_arg_struct, &clsn_data);
1879 /* Create the parallel constructs. */
1880 loc = UNKNOWN_LOCATION;
1881 cond_stmt = last_stmt (loop->header);
1882 if (cond_stmt)
1883 loc = gimple_location (cond_stmt);
1884 parallel_head = create_parallel_loop (loop, create_loop_fn (loc), arg_struct,
1885 new_arg_struct, n_threads, loc);
1886 if (reduction_list.elements () > 0)
1887 create_call_for_reduction (loop, reduction_list, &clsn_data);
1889 scev_reset ();
1891 /* Cancel the loop (it is simpler to do it here rather than to teach the
1892 expander to do it). */
1893 cancel_loop_tree (loop);
1895 /* Free loop bound estimations that could contain references to
1896 removed statements. */
1897 FOR_EACH_LOOP (li, loop, 0)
1898 free_numbers_of_iterations_estimates_loop (loop);
1900 /* Expand the parallel constructs. We do it directly here instead of running
1901 a separate expand_omp pass, since it is more efficient, and less likely to
1902 cause troubles with further analyses not being able to deal with the
1903 OMP trees. */
1905 omp_expand_local (parallel_head);
1908 /* Returns true when LOOP contains vector phi nodes. */
1910 static bool
1911 loop_has_vector_phi_nodes (struct loop *loop ATTRIBUTE_UNUSED)
1913 unsigned i;
1914 basic_block *bbs = get_loop_body_in_dom_order (loop);
1915 gimple_stmt_iterator gsi;
1916 bool res = true;
1918 for (i = 0; i < loop->num_nodes; i++)
1919 for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
1920 if (TREE_CODE (TREE_TYPE (PHI_RESULT (gsi_stmt (gsi)))) == VECTOR_TYPE)
1921 goto end;
1923 res = false;
1924 end:
1925 free (bbs);
1926 return res;
1929 /* Create a reduction_info struct, initialize it with REDUC_STMT
1930 and PHI, insert it to the REDUCTION_LIST. */
1932 static void
1933 build_new_reduction (reduction_info_table_type reduction_list,
1934 gimple reduc_stmt, gimple phi)
1936 reduction_info **slot;
1937 struct reduction_info *new_reduction;
1939 gcc_assert (reduc_stmt);
1941 if (dump_file && (dump_flags & TDF_DETAILS))
1943 fprintf (dump_file,
1944 "Detected reduction. reduction stmt is: \n");
1945 print_gimple_stmt (dump_file, reduc_stmt, 0, 0);
1946 fprintf (dump_file, "\n");
1949 new_reduction = XCNEW (struct reduction_info);
1951 new_reduction->reduc_stmt = reduc_stmt;
1952 new_reduction->reduc_phi = phi;
1953 new_reduction->reduc_version = SSA_NAME_VERSION (gimple_phi_result (phi));
1954 new_reduction->reduction_code = gimple_assign_rhs_code (reduc_stmt);
1955 slot = reduction_list.find_slot (new_reduction, INSERT);
1956 *slot = new_reduction;
1959 /* Callback for htab_traverse. Sets gimple_uid of reduc_phi stmts. */
1962 set_reduc_phi_uids (reduction_info **slot, void *data ATTRIBUTE_UNUSED)
1964 struct reduction_info *const red = *slot;
1965 gimple_set_uid (red->reduc_phi, red->reduc_version);
1966 return 1;
1969 /* Detect all reductions in the LOOP, insert them into REDUCTION_LIST. */
1971 static void
1972 gather_scalar_reductions (loop_p loop, reduction_info_table_type reduction_list)
1974 gimple_stmt_iterator gsi;
1975 loop_vec_info simple_loop_info;
1977 simple_loop_info = vect_analyze_loop_form (loop);
1979 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
1981 gimple phi = gsi_stmt (gsi);
1982 affine_iv iv;
1983 tree res = PHI_RESULT (phi);
1984 bool double_reduc;
1986 if (virtual_operand_p (res))
1987 continue;
1989 if (!simple_iv (loop, loop, res, &iv, true)
1990 && simple_loop_info)
1992 gimple reduc_stmt = vect_force_simple_reduction (simple_loop_info,
1993 phi, true,
1994 &double_reduc);
1995 if (reduc_stmt && !double_reduc)
1996 build_new_reduction (reduction_list, reduc_stmt, phi);
1999 destroy_loop_vec_info (simple_loop_info, true);
2001 /* As gimple_uid is used by the vectorizer in between vect_analyze_loop_form
2002 and destroy_loop_vec_info, we can set gimple_uid of reduc_phi stmts
2003 only now. */
2004 reduction_list.traverse <void *, set_reduc_phi_uids> (NULL);
2007 /* Try to initialize NITER for code generation part. */
2009 static bool
2010 try_get_loop_niter (loop_p loop, struct tree_niter_desc *niter)
2012 edge exit = single_dom_exit (loop);
2014 gcc_assert (exit);
2016 /* We need to know # of iterations, and there should be no uses of values
2017 defined inside loop outside of it, unless the values are invariants of
2018 the loop. */
2019 if (!number_of_iterations_exit (loop, exit, niter, false))
2021 if (dump_file && (dump_flags & TDF_DETAILS))
2022 fprintf (dump_file, " FAILED: number of iterations not known\n");
2023 return false;
2026 return true;
2029 /* Try to initialize REDUCTION_LIST for code generation part.
2030 REDUCTION_LIST describes the reductions. */
2032 static bool
2033 try_create_reduction_list (loop_p loop,
2034 reduction_info_table_type reduction_list)
2036 edge exit = single_dom_exit (loop);
2037 gimple_stmt_iterator gsi;
2039 gcc_assert (exit);
2041 gather_scalar_reductions (loop, reduction_list);
2044 for (gsi = gsi_start_phis (exit->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2046 gimple phi = gsi_stmt (gsi);
2047 struct reduction_info *red;
2048 imm_use_iterator imm_iter;
2049 use_operand_p use_p;
2050 gimple reduc_phi;
2051 tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2053 if (!virtual_operand_p (val))
2055 if (dump_file && (dump_flags & TDF_DETAILS))
2057 fprintf (dump_file, "phi is ");
2058 print_gimple_stmt (dump_file, phi, 0, 0);
2059 fprintf (dump_file, "arg of phi to exit: value ");
2060 print_generic_expr (dump_file, val, 0);
2061 fprintf (dump_file, " used outside loop\n");
2062 fprintf (dump_file,
2063 " checking if it a part of reduction pattern: \n");
2065 if (reduction_list.elements () == 0)
2067 if (dump_file && (dump_flags & TDF_DETAILS))
2068 fprintf (dump_file,
2069 " FAILED: it is not a part of reduction.\n");
2070 return false;
2072 reduc_phi = NULL;
2073 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, val)
2075 if (!gimple_debug_bind_p (USE_STMT (use_p))
2076 && flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
2078 reduc_phi = USE_STMT (use_p);
2079 break;
2082 red = reduction_phi (reduction_list, reduc_phi);
2083 if (red == NULL)
2085 if (dump_file && (dump_flags & TDF_DETAILS))
2086 fprintf (dump_file,
2087 " FAILED: it is not a part of reduction.\n");
2088 return false;
2090 if (dump_file && (dump_flags & TDF_DETAILS))
2092 fprintf (dump_file, "reduction phi is ");
2093 print_gimple_stmt (dump_file, red->reduc_phi, 0, 0);
2094 fprintf (dump_file, "reduction stmt is ");
2095 print_gimple_stmt (dump_file, red->reduc_stmt, 0, 0);
2100 /* The iterations of the loop may communicate only through bivs whose
2101 iteration space can be distributed efficiently. */
2102 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
2104 gimple phi = gsi_stmt (gsi);
2105 tree def = PHI_RESULT (phi);
2106 affine_iv iv;
2108 if (!virtual_operand_p (def) && !simple_iv (loop, loop, def, &iv, true))
2110 struct reduction_info *red;
2112 red = reduction_phi (reduction_list, phi);
2113 if (red == NULL)
2115 if (dump_file && (dump_flags & TDF_DETAILS))
2116 fprintf (dump_file,
2117 " FAILED: scalar dependency between iterations\n");
2118 return false;
2124 return true;
2127 /* Detect parallel loops and generate parallel code using libgomp
2128 primitives. Returns true if some loop was parallelized, false
2129 otherwise. */
2131 bool
2132 parallelize_loops (void)
2134 unsigned n_threads = flag_tree_parallelize_loops;
2135 bool changed = false;
2136 struct loop *loop;
2137 struct tree_niter_desc niter_desc;
2138 loop_iterator li;
2139 reduction_info_table_type reduction_list;
2140 struct obstack parloop_obstack;
2141 HOST_WIDE_INT estimated;
2142 LOC loop_loc;
2144 /* Do not parallelize loops in the functions created by parallelization. */
2145 if (parallelized_function_p (cfun->decl))
2146 return false;
2147 if (cfun->has_nonlocal_label)
2148 return false;
2150 gcc_obstack_init (&parloop_obstack);
2151 reduction_list.create (10);
2152 init_stmt_vec_info_vec ();
2154 FOR_EACH_LOOP (li, loop, 0)
2156 reduction_list.empty ();
2157 if (dump_file && (dump_flags & TDF_DETAILS))
2159 fprintf (dump_file, "Trying loop %d as candidate\n",loop->num);
2160 if (loop->inner)
2161 fprintf (dump_file, "loop %d is not innermost\n",loop->num);
2162 else
2163 fprintf (dump_file, "loop %d is innermost\n",loop->num);
2166 /* If we use autopar in graphite pass, we use its marked dependency
2167 checking results. */
2168 if (flag_loop_parallelize_all && !loop->can_be_parallel)
2170 if (dump_file && (dump_flags & TDF_DETAILS))
2171 fprintf (dump_file, "loop is not parallel according to graphite\n");
2172 continue;
2175 if (!single_dom_exit (loop))
2178 if (dump_file && (dump_flags & TDF_DETAILS))
2179 fprintf (dump_file, "loop is !single_dom_exit\n");
2181 continue;
2184 if (/* And of course, the loop must be parallelizable. */
2185 !can_duplicate_loop_p (loop)
2186 || loop_has_blocks_with_irreducible_flag (loop)
2187 || (loop_preheader_edge (loop)->src->flags & BB_IRREDUCIBLE_LOOP)
2188 /* FIXME: the check for vector phi nodes could be removed. */
2189 || loop_has_vector_phi_nodes (loop))
2190 continue;
2192 estimated = estimated_stmt_executions_int (loop);
2193 if (estimated == -1)
2194 estimated = max_stmt_executions_int (loop);
2195 /* FIXME: Bypass this check as graphite doesn't update the
2196 count and frequency correctly now. */
2197 if (!flag_loop_parallelize_all
2198 && ((estimated != -1
2199 && estimated <= (HOST_WIDE_INT) n_threads * MIN_PER_THREAD)
2200 /* Do not bother with loops in cold areas. */
2201 || optimize_loop_nest_for_size_p (loop)))
2202 continue;
2204 if (!try_get_loop_niter (loop, &niter_desc))
2205 continue;
2207 if (!try_create_reduction_list (loop, reduction_list))
2208 continue;
2210 if (!flag_loop_parallelize_all
2211 && !loop_parallel_p (loop, &parloop_obstack))
2212 continue;
2214 changed = true;
2215 if (dump_file && (dump_flags & TDF_DETAILS))
2217 if (loop->inner)
2218 fprintf (dump_file, "parallelizing outer loop %d\n",loop->header->index);
2219 else
2220 fprintf (dump_file, "parallelizing inner loop %d\n",loop->header->index);
2221 loop_loc = find_loop_location (loop);
2222 if (loop_loc != UNKNOWN_LOC)
2223 fprintf (dump_file, "\nloop at %s:%d: ",
2224 LOC_FILE (loop_loc), LOC_LINE (loop_loc));
2226 gen_parallel_loop (loop, reduction_list,
2227 n_threads, &niter_desc);
2230 free_stmt_vec_info_vec ();
2231 reduction_list.dispose ();
2232 obstack_free (&parloop_obstack, NULL);
2234 /* Parallelization will cause new function calls to be inserted through
2235 which local variables will escape. Reset the points-to solution
2236 for ESCAPED. */
2237 if (changed)
2238 pt_solution_reset (&cfun->gimple_df->escaped);
2240 return changed;
2243 #include "gt-tree-parloops.h"