Released GUPC 4.9.0.1 based on version 203902.
[official-gcc.git] / gcc / tree-parloops.c
blobb843fe5abec987defe32be6e6024b4da3b0de641
1 /* Loop autoparallelization.
2 Copyright (C) 2006-2013 Free Software Foundation, Inc.
3 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
4 Zdenek Dvorak <dvorakz@suse.cz> and Razya Ladelsky <razya@il.ibm.com>.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tree.h"
26 #include "gimple.h"
27 #include "gimple-ssa.h"
28 #include "tree-cfg.h"
29 #include "tree-phinodes.h"
30 #include "ssa-iterators.h"
31 #include "tree-ssanames.h"
32 #include "tree-ssa-loop-ivopts.h"
33 #include "tree-ssa-loop-manip.h"
34 #include "tree-ssa-loop-niter.h"
35 #include "tree-ssa-loop.h"
36 #include "tree-into-ssa.h"
37 #include "cfgloop.h"
38 #include "tree-data-ref.h"
39 #include "tree-scalar-evolution.h"
40 #include "gimple-pretty-print.h"
41 #include "tree-pass.h"
42 #include "langhooks.h"
43 #include "tree-vectorizer.h"
44 #include "tree-hasher.h"
45 #include "tree-parloops.h"
46 #include "omp-low.h"
48 /* This pass tries to distribute iterations of loops into several threads.
49 The implementation is straightforward -- for each loop we test whether its
50 iterations are independent, and if it is the case (and some additional
51 conditions regarding profitability and correctness are satisfied), we
52 add GIMPLE_OMP_PARALLEL and GIMPLE_OMP_FOR codes and let omp expansion
53 machinery do its job.
55 The most of the complexity is in bringing the code into shape expected
56 by the omp expanders:
57 -- for GIMPLE_OMP_FOR, ensuring that the loop has only one induction
58 variable and that the exit test is at the start of the loop body
59 -- for GIMPLE_OMP_PARALLEL, replacing the references to local addressable
60 variables by accesses through pointers, and breaking up ssa chains
61 by storing the values incoming to the parallelized loop to a structure
62 passed to the new function as an argument (something similar is done
63 in omp gimplification, unfortunately only a small part of the code
64 can be shared).
66 TODO:
67 -- if there are several parallelizable loops in a function, it may be
68 possible to generate the threads just once (using synchronization to
69 ensure that cross-loop dependences are obeyed).
70 -- handling of common reduction patterns for outer loops.
72 More info can also be found at http://gcc.gnu.org/wiki/AutoParInGCC */
74 Reduction handling:
75 currently we use vect_force_simple_reduction() to detect reduction patterns.
76 The code transformation will be introduced by an example.
79 parloop
81 int sum=1;
83 for (i = 0; i < N; i++)
85 x[i] = i + 3;
86 sum+=x[i];
90 gimple-like code:
91 header_bb:
93 # sum_29 = PHI <sum_11(5), 1(3)>
94 # i_28 = PHI <i_12(5), 0(3)>
95 D.1795_8 = i_28 + 3;
96 x[i_28] = D.1795_8;
97 sum_11 = D.1795_8 + sum_29;
98 i_12 = i_28 + 1;
99 if (N_6(D) > i_12)
100 goto header_bb;
103 exit_bb:
105 # sum_21 = PHI <sum_11(4)>
106 printf (&"%d"[0], sum_21);
109 after reduction transformation (only relevant parts):
111 parloop
114 ....
117 # Storing the initial value given by the user. #
119 .paral_data_store.32.sum.27 = 1;
121 #pragma omp parallel num_threads(4)
123 #pragma omp for schedule(static)
125 # The neutral element corresponding to the particular
126 reduction's operation, e.g. 0 for PLUS_EXPR,
127 1 for MULT_EXPR, etc. replaces the user's initial value. #
129 # sum.27_29 = PHI <sum.27_11, 0>
131 sum.27_11 = D.1827_8 + sum.27_29;
133 GIMPLE_OMP_CONTINUE
135 # Adding this reduction phi is done at create_phi_for_local_result() #
136 # sum.27_56 = PHI <sum.27_11, 0>
137 GIMPLE_OMP_RETURN
139 # Creating the atomic operation is done at
140 create_call_for_reduction_1() #
142 #pragma omp atomic_load
143 D.1839_59 = *&.paral_data_load.33_51->reduction.23;
144 D.1840_60 = sum.27_56 + D.1839_59;
145 #pragma omp atomic_store (D.1840_60);
147 GIMPLE_OMP_RETURN
149 # collecting the result after the join of the threads is done at
150 create_loads_for_reductions().
151 The value computed by the threads is loaded from the
152 shared struct. #
155 .paral_data_load.33_52 = &.paral_data_store.32;
156 sum_37 = .paral_data_load.33_52->sum.27;
157 sum_43 = D.1795_41 + sum_37;
159 exit bb:
160 # sum_21 = PHI <sum_43, sum_26>
161 printf (&"%d"[0], sum_21);
169 /* Minimal number of iterations of a loop that should be executed in each
170 thread. */
171 #define MIN_PER_THREAD 100
173 /* Element of the hashtable, representing a
174 reduction in the current loop. */
175 struct reduction_info
177 gimple reduc_stmt; /* reduction statement. */
178 gimple reduc_phi; /* The phi node defining the reduction. */
179 enum tree_code reduction_code;/* code for the reduction operation. */
180 unsigned reduc_version; /* SSA_NAME_VERSION of original reduc_phi
181 result. */
182 gimple keep_res; /* The PHI_RESULT of this phi is the resulting value
183 of the reduction variable when existing the loop. */
184 tree initial_value; /* The initial value of the reduction var before entering the loop. */
185 tree field; /* the name of the field in the parloop data structure intended for reduction. */
186 tree init; /* reduction initialization value. */
187 gimple new_phi; /* (helper field) Newly created phi node whose result
188 will be passed to the atomic operation. Represents
189 the local result each thread computed for the reduction
190 operation. */
193 /* Reduction info hashtable helpers. */
195 struct reduction_hasher : typed_free_remove <reduction_info>
197 typedef reduction_info value_type;
198 typedef reduction_info compare_type;
199 static inline hashval_t hash (const value_type *);
200 static inline bool equal (const value_type *, const compare_type *);
203 /* Equality and hash functions for hashtab code. */
205 inline bool
206 reduction_hasher::equal (const value_type *a, const compare_type *b)
208 return (a->reduc_phi == b->reduc_phi);
211 inline hashval_t
212 reduction_hasher::hash (const value_type *a)
214 return a->reduc_version;
217 typedef hash_table <reduction_hasher> reduction_info_table_type;
220 static struct reduction_info *
221 reduction_phi (reduction_info_table_type reduction_list, gimple phi)
223 struct reduction_info tmpred, *red;
225 if (reduction_list.elements () == 0 || phi == NULL)
226 return NULL;
228 tmpred.reduc_phi = phi;
229 tmpred.reduc_version = gimple_uid (phi);
230 red = reduction_list.find (&tmpred);
232 return red;
235 /* Element of hashtable of names to copy. */
237 struct name_to_copy_elt
239 unsigned version; /* The version of the name to copy. */
240 tree new_name; /* The new name used in the copy. */
241 tree field; /* The field of the structure used to pass the
242 value. */
245 /* Name copies hashtable helpers. */
247 struct name_to_copy_hasher : typed_free_remove <name_to_copy_elt>
249 typedef name_to_copy_elt value_type;
250 typedef name_to_copy_elt compare_type;
251 static inline hashval_t hash (const value_type *);
252 static inline bool equal (const value_type *, const compare_type *);
255 /* Equality and hash functions for hashtab code. */
257 inline bool
258 name_to_copy_hasher::equal (const value_type *a, const compare_type *b)
260 return a->version == b->version;
263 inline hashval_t
264 name_to_copy_hasher::hash (const value_type *a)
266 return (hashval_t) a->version;
269 typedef hash_table <name_to_copy_hasher> name_to_copy_table_type;
271 /* A transformation matrix, which is a self-contained ROWSIZE x COLSIZE
272 matrix. Rather than use floats, we simply keep a single DENOMINATOR that
273 represents the denominator for every element in the matrix. */
274 typedef struct lambda_trans_matrix_s
276 lambda_matrix matrix;
277 int rowsize;
278 int colsize;
279 int denominator;
280 } *lambda_trans_matrix;
281 #define LTM_MATRIX(T) ((T)->matrix)
282 #define LTM_ROWSIZE(T) ((T)->rowsize)
283 #define LTM_COLSIZE(T) ((T)->colsize)
284 #define LTM_DENOMINATOR(T) ((T)->denominator)
286 /* Allocate a new transformation matrix. */
288 static lambda_trans_matrix
289 lambda_trans_matrix_new (int colsize, int rowsize,
290 struct obstack * lambda_obstack)
292 lambda_trans_matrix ret;
294 ret = (lambda_trans_matrix)
295 obstack_alloc (lambda_obstack, sizeof (struct lambda_trans_matrix_s));
296 LTM_MATRIX (ret) = lambda_matrix_new (rowsize, colsize, lambda_obstack);
297 LTM_ROWSIZE (ret) = rowsize;
298 LTM_COLSIZE (ret) = colsize;
299 LTM_DENOMINATOR (ret) = 1;
300 return ret;
303 /* Multiply a vector VEC by a matrix MAT.
304 MAT is an M*N matrix, and VEC is a vector with length N. The result
305 is stored in DEST which must be a vector of length M. */
307 static void
308 lambda_matrix_vector_mult (lambda_matrix matrix, int m, int n,
309 lambda_vector vec, lambda_vector dest)
311 int i, j;
313 lambda_vector_clear (dest, m);
314 for (i = 0; i < m; i++)
315 for (j = 0; j < n; j++)
316 dest[i] += matrix[i][j] * vec[j];
319 /* Return true if TRANS is a legal transformation matrix that respects
320 the dependence vectors in DISTS and DIRS. The conservative answer
321 is false.
323 "Wolfe proves that a unimodular transformation represented by the
324 matrix T is legal when applied to a loop nest with a set of
325 lexicographically non-negative distance vectors RDG if and only if
326 for each vector d in RDG, (T.d >= 0) is lexicographically positive.
327 i.e.: if and only if it transforms the lexicographically positive
328 distance vectors to lexicographically positive vectors. Note that
329 a unimodular matrix must transform the zero vector (and only it) to
330 the zero vector." S.Muchnick. */
332 static bool
333 lambda_transform_legal_p (lambda_trans_matrix trans,
334 int nb_loops,
335 vec<ddr_p> dependence_relations)
337 unsigned int i, j;
338 lambda_vector distres;
339 struct data_dependence_relation *ddr;
341 gcc_assert (LTM_COLSIZE (trans) == nb_loops
342 && LTM_ROWSIZE (trans) == nb_loops);
344 /* When there are no dependences, the transformation is correct. */
345 if (dependence_relations.length () == 0)
346 return true;
348 ddr = dependence_relations[0];
349 if (ddr == NULL)
350 return true;
352 /* When there is an unknown relation in the dependence_relations, we
353 know that it is no worth looking at this loop nest: give up. */
354 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
355 return false;
357 distres = lambda_vector_new (nb_loops);
359 /* For each distance vector in the dependence graph. */
360 FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
362 /* Don't care about relations for which we know that there is no
363 dependence, nor about read-read (aka. output-dependences):
364 these data accesses can happen in any order. */
365 if (DDR_ARE_DEPENDENT (ddr) == chrec_known
366 || (DR_IS_READ (DDR_A (ddr)) && DR_IS_READ (DDR_B (ddr))))
367 continue;
369 /* Conservatively answer: "this transformation is not valid". */
370 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
371 return false;
373 /* If the dependence could not be captured by a distance vector,
374 conservatively answer that the transform is not valid. */
375 if (DDR_NUM_DIST_VECTS (ddr) == 0)
376 return false;
378 /* Compute trans.dist_vect */
379 for (j = 0; j < DDR_NUM_DIST_VECTS (ddr); j++)
381 lambda_matrix_vector_mult (LTM_MATRIX (trans), nb_loops, nb_loops,
382 DDR_DIST_VECT (ddr, j), distres);
384 if (!lambda_vector_lexico_pos (distres, nb_loops))
385 return false;
388 return true;
391 /* Data dependency analysis. Returns true if the iterations of LOOP
392 are independent on each other (that is, if we can execute them
393 in parallel). */
395 static bool
396 loop_parallel_p (struct loop *loop, struct obstack * parloop_obstack)
398 vec<loop_p> loop_nest;
399 vec<ddr_p> dependence_relations;
400 vec<data_reference_p> datarefs;
401 lambda_trans_matrix trans;
402 bool ret = false;
404 if (dump_file && (dump_flags & TDF_DETAILS))
406 fprintf (dump_file, "Considering loop %d\n", loop->num);
407 if (!loop->inner)
408 fprintf (dump_file, "loop is innermost\n");
409 else
410 fprintf (dump_file, "loop NOT innermost\n");
413 /* Check for problems with dependences. If the loop can be reversed,
414 the iterations are independent. */
415 datarefs.create (10);
416 dependence_relations.create (10 * 10);
417 loop_nest.create (3);
418 if (! compute_data_dependences_for_loop (loop, true, &loop_nest, &datarefs,
419 &dependence_relations))
421 if (dump_file && (dump_flags & TDF_DETAILS))
422 fprintf (dump_file, " FAILED: cannot analyze data dependencies\n");
423 ret = false;
424 goto end;
426 if (dump_file && (dump_flags & TDF_DETAILS))
427 dump_data_dependence_relations (dump_file, dependence_relations);
429 trans = lambda_trans_matrix_new (1, 1, parloop_obstack);
430 LTM_MATRIX (trans)[0][0] = -1;
432 if (lambda_transform_legal_p (trans, 1, dependence_relations))
434 ret = true;
435 if (dump_file && (dump_flags & TDF_DETAILS))
436 fprintf (dump_file, " SUCCESS: may be parallelized\n");
438 else if (dump_file && (dump_flags & TDF_DETAILS))
439 fprintf (dump_file,
440 " FAILED: data dependencies exist across iterations\n");
442 end:
443 loop_nest.release ();
444 free_dependence_relations (dependence_relations);
445 free_data_refs (datarefs);
447 return ret;
450 /* Return true when LOOP contains basic blocks marked with the
451 BB_IRREDUCIBLE_LOOP flag. */
453 static inline bool
454 loop_has_blocks_with_irreducible_flag (struct loop *loop)
456 unsigned i;
457 basic_block *bbs = get_loop_body_in_dom_order (loop);
458 bool res = true;
460 for (i = 0; i < loop->num_nodes; i++)
461 if (bbs[i]->flags & BB_IRREDUCIBLE_LOOP)
462 goto end;
464 res = false;
465 end:
466 free (bbs);
467 return res;
470 /* Assigns the address of OBJ in TYPE to an ssa name, and returns this name.
471 The assignment statement is placed on edge ENTRY. DECL_ADDRESS maps decls
472 to their addresses that can be reused. The address of OBJ is known to
473 be invariant in the whole function. Other needed statements are placed
474 right before GSI. */
476 static tree
477 take_address_of (tree obj, tree type, edge entry,
478 int_tree_htab_type decl_address, gimple_stmt_iterator *gsi)
480 int uid;
481 int_tree_map **dslot;
482 struct int_tree_map ielt, *nielt;
483 tree *var_p, name, addr;
484 gimple stmt;
485 gimple_seq stmts;
487 /* Since the address of OBJ is invariant, the trees may be shared.
488 Avoid rewriting unrelated parts of the code. */
489 obj = unshare_expr (obj);
490 for (var_p = &obj;
491 handled_component_p (*var_p);
492 var_p = &TREE_OPERAND (*var_p, 0))
493 continue;
495 /* Canonicalize the access to base on a MEM_REF. */
496 if (DECL_P (*var_p))
497 *var_p = build_simple_mem_ref (build_fold_addr_expr (*var_p));
499 /* Assign a canonical SSA name to the address of the base decl used
500 in the address and share it for all accesses and addresses based
501 on it. */
502 uid = DECL_UID (TREE_OPERAND (TREE_OPERAND (*var_p, 0), 0));
503 ielt.uid = uid;
504 dslot = decl_address.find_slot_with_hash (&ielt, uid, INSERT);
505 if (!*dslot)
507 if (gsi == NULL)
508 return NULL;
509 addr = TREE_OPERAND (*var_p, 0);
510 const char *obj_name
511 = get_name (TREE_OPERAND (TREE_OPERAND (*var_p, 0), 0));
512 if (obj_name)
513 name = make_temp_ssa_name (TREE_TYPE (addr), NULL, obj_name);
514 else
515 name = make_ssa_name (TREE_TYPE (addr), NULL);
516 stmt = gimple_build_assign (name, addr);
517 gsi_insert_on_edge_immediate (entry, stmt);
519 nielt = XNEW (struct int_tree_map);
520 nielt->uid = uid;
521 nielt->to = name;
522 *dslot = nielt;
524 else
525 name = (*dslot)->to;
527 /* Express the address in terms of the canonical SSA name. */
528 TREE_OPERAND (*var_p, 0) = name;
529 if (gsi == NULL)
530 return build_fold_addr_expr_with_type (obj, type);
532 name = force_gimple_operand (build_addr (obj, current_function_decl),
533 &stmts, true, NULL_TREE);
534 if (!gimple_seq_empty_p (stmts))
535 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
537 if (!useless_type_conversion_p (type, TREE_TYPE (name)))
539 name = force_gimple_operand (fold_convert (type, name), &stmts, true,
540 NULL_TREE);
541 if (!gimple_seq_empty_p (stmts))
542 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
545 return name;
548 /* Callback for htab_traverse. Create the initialization statement
549 for reduction described in SLOT, and place it at the preheader of
550 the loop described in DATA. */
553 initialize_reductions (reduction_info **slot, struct loop *loop)
555 tree init, c;
556 tree bvar, type, arg;
557 edge e;
559 struct reduction_info *const reduc = *slot;
561 /* Create initialization in preheader:
562 reduction_variable = initialization value of reduction. */
564 /* In the phi node at the header, replace the argument coming
565 from the preheader with the reduction initialization value. */
567 /* Create a new variable to initialize the reduction. */
568 type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
569 bvar = create_tmp_var (type, "reduction");
571 c = build_omp_clause (gimple_location (reduc->reduc_stmt),
572 OMP_CLAUSE_REDUCTION);
573 OMP_CLAUSE_REDUCTION_CODE (c) = reduc->reduction_code;
574 OMP_CLAUSE_DECL (c) = SSA_NAME_VAR (gimple_assign_lhs (reduc->reduc_stmt));
576 init = omp_reduction_init (c, TREE_TYPE (bvar));
577 reduc->init = init;
579 /* Replace the argument representing the initialization value
580 with the initialization value for the reduction (neutral
581 element for the particular operation, e.g. 0 for PLUS_EXPR,
582 1 for MULT_EXPR, etc).
583 Keep the old value in a new variable "reduction_initial",
584 that will be taken in consideration after the parallel
585 computing is done. */
587 e = loop_preheader_edge (loop);
588 arg = PHI_ARG_DEF_FROM_EDGE (reduc->reduc_phi, e);
589 /* Create new variable to hold the initial value. */
591 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE
592 (reduc->reduc_phi, loop_preheader_edge (loop)), init);
593 reduc->initial_value = arg;
594 return 1;
597 struct elv_data
599 struct walk_stmt_info info;
600 edge entry;
601 int_tree_htab_type decl_address;
602 gimple_stmt_iterator *gsi;
603 bool changed;
604 bool reset;
607 /* Eliminates references to local variables in *TP out of the single
608 entry single exit region starting at DTA->ENTRY.
609 DECL_ADDRESS contains addresses of the references that had their
610 address taken already. If the expression is changed, CHANGED is
611 set to true. Callback for walk_tree. */
613 static tree
614 eliminate_local_variables_1 (tree *tp, int *walk_subtrees, void *data)
616 struct elv_data *const dta = (struct elv_data *) data;
617 tree t = *tp, var, addr, addr_type, type, obj;
619 if (DECL_P (t))
621 *walk_subtrees = 0;
623 if (!SSA_VAR_P (t) || DECL_EXTERNAL (t))
624 return NULL_TREE;
626 type = TREE_TYPE (t);
627 addr_type = build_pointer_type (type);
628 addr = take_address_of (t, addr_type, dta->entry, dta->decl_address,
629 dta->gsi);
630 if (dta->gsi == NULL && addr == NULL_TREE)
632 dta->reset = true;
633 return NULL_TREE;
636 *tp = build_simple_mem_ref (addr);
638 dta->changed = true;
639 return NULL_TREE;
642 if (TREE_CODE (t) == ADDR_EXPR)
644 /* ADDR_EXPR may appear in two contexts:
645 -- as a gimple operand, when the address taken is a function invariant
646 -- as gimple rhs, when the resulting address in not a function
647 invariant
648 We do not need to do anything special in the latter case (the base of
649 the memory reference whose address is taken may be replaced in the
650 DECL_P case). The former case is more complicated, as we need to
651 ensure that the new address is still a gimple operand. Thus, it
652 is not sufficient to replace just the base of the memory reference --
653 we need to move the whole computation of the address out of the
654 loop. */
655 if (!is_gimple_val (t))
656 return NULL_TREE;
658 *walk_subtrees = 0;
659 obj = TREE_OPERAND (t, 0);
660 var = get_base_address (obj);
661 if (!var || !SSA_VAR_P (var) || DECL_EXTERNAL (var))
662 return NULL_TREE;
664 addr_type = TREE_TYPE (t);
665 addr = take_address_of (obj, addr_type, dta->entry, dta->decl_address,
666 dta->gsi);
667 if (dta->gsi == NULL && addr == NULL_TREE)
669 dta->reset = true;
670 return NULL_TREE;
672 *tp = addr;
674 dta->changed = true;
675 return NULL_TREE;
678 if (!EXPR_P (t))
679 *walk_subtrees = 0;
681 return NULL_TREE;
684 /* Moves the references to local variables in STMT at *GSI out of the single
685 entry single exit region starting at ENTRY. DECL_ADDRESS contains
686 addresses of the references that had their address taken
687 already. */
689 static void
690 eliminate_local_variables_stmt (edge entry, gimple_stmt_iterator *gsi,
691 int_tree_htab_type decl_address)
693 struct elv_data dta;
694 gimple stmt = gsi_stmt (*gsi);
696 memset (&dta.info, '\0', sizeof (dta.info));
697 dta.entry = entry;
698 dta.decl_address = decl_address;
699 dta.changed = false;
700 dta.reset = false;
702 if (gimple_debug_bind_p (stmt))
704 dta.gsi = NULL;
705 walk_tree (gimple_debug_bind_get_value_ptr (stmt),
706 eliminate_local_variables_1, &dta.info, NULL);
707 if (dta.reset)
709 gimple_debug_bind_reset_value (stmt);
710 dta.changed = true;
713 else if (gimple_clobber_p (stmt))
715 stmt = gimple_build_nop ();
716 gsi_replace (gsi, stmt, false);
717 dta.changed = true;
719 else
721 dta.gsi = gsi;
722 walk_gimple_op (stmt, eliminate_local_variables_1, &dta.info);
725 if (dta.changed)
726 update_stmt (stmt);
729 /* Eliminates the references to local variables from the single entry
730 single exit region between the ENTRY and EXIT edges.
732 This includes:
733 1) Taking address of a local variable -- these are moved out of the
734 region (and temporary variable is created to hold the address if
735 necessary).
737 2) Dereferencing a local variable -- these are replaced with indirect
738 references. */
740 static void
741 eliminate_local_variables (edge entry, edge exit)
743 basic_block bb;
744 vec<basic_block> body;
745 body.create (3);
746 unsigned i;
747 gimple_stmt_iterator gsi;
748 bool has_debug_stmt = false;
749 int_tree_htab_type decl_address;
750 decl_address.create (10);
751 basic_block entry_bb = entry->src;
752 basic_block exit_bb = exit->dest;
754 gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
756 FOR_EACH_VEC_ELT (body, i, bb)
757 if (bb != entry_bb && bb != exit_bb)
758 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
759 if (is_gimple_debug (gsi_stmt (gsi)))
761 if (gimple_debug_bind_p (gsi_stmt (gsi)))
762 has_debug_stmt = true;
764 else
765 eliminate_local_variables_stmt (entry, &gsi, decl_address);
767 if (has_debug_stmt)
768 FOR_EACH_VEC_ELT (body, i, bb)
769 if (bb != entry_bb && bb != exit_bb)
770 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
771 if (gimple_debug_bind_p (gsi_stmt (gsi)))
772 eliminate_local_variables_stmt (entry, &gsi, decl_address);
774 decl_address.dispose ();
775 body.release ();
778 /* Returns true if expression EXPR is not defined between ENTRY and
779 EXIT, i.e. if all its operands are defined outside of the region. */
781 static bool
782 expr_invariant_in_region_p (edge entry, edge exit, tree expr)
784 basic_block entry_bb = entry->src;
785 basic_block exit_bb = exit->dest;
786 basic_block def_bb;
788 if (is_gimple_min_invariant (expr))
789 return true;
791 if (TREE_CODE (expr) == SSA_NAME)
793 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
794 if (def_bb
795 && dominated_by_p (CDI_DOMINATORS, def_bb, entry_bb)
796 && !dominated_by_p (CDI_DOMINATORS, def_bb, exit_bb))
797 return false;
799 return true;
802 return false;
805 /* If COPY_NAME_P is true, creates and returns a duplicate of NAME.
806 The copies are stored to NAME_COPIES, if NAME was already duplicated,
807 its duplicate stored in NAME_COPIES is returned.
809 Regardless of COPY_NAME_P, the decl used as a base of the ssa name is also
810 duplicated, storing the copies in DECL_COPIES. */
812 static tree
813 separate_decls_in_region_name (tree name, name_to_copy_table_type name_copies,
814 int_tree_htab_type decl_copies, bool copy_name_p)
816 tree copy, var, var_copy;
817 unsigned idx, uid, nuid;
818 struct int_tree_map ielt, *nielt;
819 struct name_to_copy_elt elt, *nelt;
820 name_to_copy_elt **slot;
821 int_tree_map **dslot;
823 if (TREE_CODE (name) != SSA_NAME)
824 return name;
826 idx = SSA_NAME_VERSION (name);
827 elt.version = idx;
828 slot = name_copies.find_slot_with_hash (&elt, idx,
829 copy_name_p ? INSERT : NO_INSERT);
830 if (slot && *slot)
831 return (*slot)->new_name;
833 if (copy_name_p)
835 copy = duplicate_ssa_name (name, NULL);
836 nelt = XNEW (struct name_to_copy_elt);
837 nelt->version = idx;
838 nelt->new_name = copy;
839 nelt->field = NULL_TREE;
840 *slot = nelt;
842 else
844 gcc_assert (!slot);
845 copy = name;
848 var = SSA_NAME_VAR (name);
849 if (!var)
850 return copy;
852 uid = DECL_UID (var);
853 ielt.uid = uid;
854 dslot = decl_copies.find_slot_with_hash (&ielt, uid, INSERT);
855 if (!*dslot)
857 var_copy = create_tmp_var (TREE_TYPE (var), get_name (var));
858 DECL_GIMPLE_REG_P (var_copy) = DECL_GIMPLE_REG_P (var);
859 nielt = XNEW (struct int_tree_map);
860 nielt->uid = uid;
861 nielt->to = var_copy;
862 *dslot = nielt;
864 /* Ensure that when we meet this decl next time, we won't duplicate
865 it again. */
866 nuid = DECL_UID (var_copy);
867 ielt.uid = nuid;
868 dslot = decl_copies.find_slot_with_hash (&ielt, nuid, INSERT);
869 gcc_assert (!*dslot);
870 nielt = XNEW (struct int_tree_map);
871 nielt->uid = nuid;
872 nielt->to = var_copy;
873 *dslot = nielt;
875 else
876 var_copy = ((struct int_tree_map *) *dslot)->to;
878 replace_ssa_name_symbol (copy, var_copy);
879 return copy;
882 /* Finds the ssa names used in STMT that are defined outside the
883 region between ENTRY and EXIT and replaces such ssa names with
884 their duplicates. The duplicates are stored to NAME_COPIES. Base
885 decls of all ssa names used in STMT (including those defined in
886 LOOP) are replaced with the new temporary variables; the
887 replacement decls are stored in DECL_COPIES. */
889 static void
890 separate_decls_in_region_stmt (edge entry, edge exit, gimple stmt,
891 name_to_copy_table_type name_copies,
892 int_tree_htab_type decl_copies)
894 use_operand_p use;
895 def_operand_p def;
896 ssa_op_iter oi;
897 tree name, copy;
898 bool copy_name_p;
900 FOR_EACH_PHI_OR_STMT_DEF (def, stmt, oi, SSA_OP_DEF)
902 name = DEF_FROM_PTR (def);
903 gcc_assert (TREE_CODE (name) == SSA_NAME);
904 copy = separate_decls_in_region_name (name, name_copies, decl_copies,
905 false);
906 gcc_assert (copy == name);
909 FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
911 name = USE_FROM_PTR (use);
912 if (TREE_CODE (name) != SSA_NAME)
913 continue;
915 copy_name_p = expr_invariant_in_region_p (entry, exit, name);
916 copy = separate_decls_in_region_name (name, name_copies, decl_copies,
917 copy_name_p);
918 SET_USE (use, copy);
922 /* Finds the ssa names used in STMT that are defined outside the
923 region between ENTRY and EXIT and replaces such ssa names with
924 their duplicates. The duplicates are stored to NAME_COPIES. Base
925 decls of all ssa names used in STMT (including those defined in
926 LOOP) are replaced with the new temporary variables; the
927 replacement decls are stored in DECL_COPIES. */
929 static bool
930 separate_decls_in_region_debug (gimple stmt,
931 name_to_copy_table_type name_copies,
932 int_tree_htab_type decl_copies)
934 use_operand_p use;
935 ssa_op_iter oi;
936 tree var, name;
937 struct int_tree_map ielt;
938 struct name_to_copy_elt elt;
939 name_to_copy_elt **slot;
940 int_tree_map **dslot;
942 if (gimple_debug_bind_p (stmt))
943 var = gimple_debug_bind_get_var (stmt);
944 else if (gimple_debug_source_bind_p (stmt))
945 var = gimple_debug_source_bind_get_var (stmt);
946 else
947 return true;
948 if (TREE_CODE (var) == DEBUG_EXPR_DECL || TREE_CODE (var) == LABEL_DECL)
949 return true;
950 gcc_assert (DECL_P (var) && SSA_VAR_P (var));
951 ielt.uid = DECL_UID (var);
952 dslot = decl_copies.find_slot_with_hash (&ielt, ielt.uid, NO_INSERT);
953 if (!dslot)
954 return true;
955 if (gimple_debug_bind_p (stmt))
956 gimple_debug_bind_set_var (stmt, ((struct int_tree_map *) *dslot)->to);
957 else if (gimple_debug_source_bind_p (stmt))
958 gimple_debug_source_bind_set_var (stmt, ((struct int_tree_map *) *dslot)->to);
960 FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
962 name = USE_FROM_PTR (use);
963 if (TREE_CODE (name) != SSA_NAME)
964 continue;
966 elt.version = SSA_NAME_VERSION (name);
967 slot = name_copies.find_slot_with_hash (&elt, elt.version, NO_INSERT);
968 if (!slot)
970 gimple_debug_bind_reset_value (stmt);
971 update_stmt (stmt);
972 break;
975 SET_USE (use, (*slot)->new_name);
978 return false;
981 /* Callback for htab_traverse. Adds a field corresponding to the reduction
982 specified in SLOT. The type is passed in DATA. */
985 add_field_for_reduction (reduction_info **slot, tree type)
988 struct reduction_info *const red = *slot;
989 tree var = gimple_assign_lhs (red->reduc_stmt);
990 tree field = build_decl (gimple_location (red->reduc_stmt), FIELD_DECL,
991 SSA_NAME_IDENTIFIER (var), TREE_TYPE (var));
993 insert_field_into_struct (type, field);
995 red->field = field;
997 return 1;
1000 /* Callback for htab_traverse. Adds a field corresponding to a ssa name
1001 described in SLOT. The type is passed in DATA. */
1004 add_field_for_name (name_to_copy_elt **slot, tree type)
1006 struct name_to_copy_elt *const elt = *slot;
1007 tree name = ssa_name (elt->version);
1008 tree field = build_decl (UNKNOWN_LOCATION,
1009 FIELD_DECL, SSA_NAME_IDENTIFIER (name),
1010 TREE_TYPE (name));
1012 insert_field_into_struct (type, field);
1013 elt->field = field;
1015 return 1;
1018 /* Callback for htab_traverse. A local result is the intermediate result
1019 computed by a single
1020 thread, or the initial value in case no iteration was executed.
1021 This function creates a phi node reflecting these values.
1022 The phi's result will be stored in NEW_PHI field of the
1023 reduction's data structure. */
1026 create_phi_for_local_result (reduction_info **slot, struct loop *loop)
1028 struct reduction_info *const reduc = *slot;
1029 edge e;
1030 gimple new_phi;
1031 basic_block store_bb;
1032 tree local_res;
1033 source_location locus;
1035 /* STORE_BB is the block where the phi
1036 should be stored. It is the destination of the loop exit.
1037 (Find the fallthru edge from GIMPLE_OMP_CONTINUE). */
1038 store_bb = FALLTHRU_EDGE (loop->latch)->dest;
1040 /* STORE_BB has two predecessors. One coming from the loop
1041 (the reduction's result is computed at the loop),
1042 and another coming from a block preceding the loop,
1043 when no iterations
1044 are executed (the initial value should be taken). */
1045 if (EDGE_PRED (store_bb, 0) == FALLTHRU_EDGE (loop->latch))
1046 e = EDGE_PRED (store_bb, 1);
1047 else
1048 e = EDGE_PRED (store_bb, 0);
1049 local_res = copy_ssa_name (gimple_assign_lhs (reduc->reduc_stmt), NULL);
1050 locus = gimple_location (reduc->reduc_stmt);
1051 new_phi = create_phi_node (local_res, store_bb);
1052 add_phi_arg (new_phi, reduc->init, e, locus);
1053 add_phi_arg (new_phi, gimple_assign_lhs (reduc->reduc_stmt),
1054 FALLTHRU_EDGE (loop->latch), locus);
1055 reduc->new_phi = new_phi;
1057 return 1;
1060 struct clsn_data
1062 tree store;
1063 tree load;
1065 basic_block store_bb;
1066 basic_block load_bb;
1069 /* Callback for htab_traverse. Create an atomic instruction for the
1070 reduction described in SLOT.
1071 DATA annotates the place in memory the atomic operation relates to,
1072 and the basic block it needs to be generated in. */
1075 create_call_for_reduction_1 (reduction_info **slot, struct clsn_data *clsn_data)
1077 struct reduction_info *const reduc = *slot;
1078 gimple_stmt_iterator gsi;
1079 tree type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
1080 tree load_struct;
1081 basic_block bb;
1082 basic_block new_bb;
1083 edge e;
1084 tree t, addr, ref, x;
1085 tree tmp_load, name;
1086 gimple load;
1088 load_struct = build_simple_mem_ref (clsn_data->load);
1089 t = build3 (COMPONENT_REF, type, load_struct, reduc->field, NULL_TREE);
1091 addr = build_addr (t, current_function_decl);
1093 /* Create phi node. */
1094 bb = clsn_data->load_bb;
1096 e = split_block (bb, t);
1097 new_bb = e->dest;
1099 tmp_load = create_tmp_var (TREE_TYPE (TREE_TYPE (addr)), NULL);
1100 tmp_load = make_ssa_name (tmp_load, NULL);
1101 load = gimple_build_omp_atomic_load (tmp_load, addr);
1102 SSA_NAME_DEF_STMT (tmp_load) = load;
1103 gsi = gsi_start_bb (new_bb);
1104 gsi_insert_after (&gsi, load, GSI_NEW_STMT);
1106 e = split_block (new_bb, load);
1107 new_bb = e->dest;
1108 gsi = gsi_start_bb (new_bb);
1109 ref = tmp_load;
1110 x = fold_build2 (reduc->reduction_code,
1111 TREE_TYPE (PHI_RESULT (reduc->new_phi)), ref,
1112 PHI_RESULT (reduc->new_phi));
1114 name = force_gimple_operand_gsi (&gsi, x, true, NULL_TREE, true,
1115 GSI_CONTINUE_LINKING);
1117 gsi_insert_after (&gsi, gimple_build_omp_atomic_store (name), GSI_NEW_STMT);
1118 return 1;
1121 /* Create the atomic operation at the join point of the threads.
1122 REDUCTION_LIST describes the reductions in the LOOP.
1123 LD_ST_DATA describes the shared data structure where
1124 shared data is stored in and loaded from. */
1125 static void
1126 create_call_for_reduction (struct loop *loop,
1127 reduction_info_table_type reduction_list,
1128 struct clsn_data *ld_st_data)
1130 reduction_list.traverse <struct loop *, create_phi_for_local_result> (loop);
1131 /* Find the fallthru edge from GIMPLE_OMP_CONTINUE. */
1132 ld_st_data->load_bb = FALLTHRU_EDGE (loop->latch)->dest;
1133 reduction_list
1134 .traverse <struct clsn_data *, create_call_for_reduction_1> (ld_st_data);
1137 /* Callback for htab_traverse. Loads the final reduction value at the
1138 join point of all threads, and inserts it in the right place. */
1141 create_loads_for_reductions (reduction_info **slot, struct clsn_data *clsn_data)
1143 struct reduction_info *const red = *slot;
1144 gimple stmt;
1145 gimple_stmt_iterator gsi;
1146 tree type = TREE_TYPE (gimple_assign_lhs (red->reduc_stmt));
1147 tree load_struct;
1148 tree name;
1149 tree x;
1151 gsi = gsi_after_labels (clsn_data->load_bb);
1152 load_struct = build_simple_mem_ref (clsn_data->load);
1153 load_struct = build3 (COMPONENT_REF, type, load_struct, red->field,
1154 NULL_TREE);
1156 x = load_struct;
1157 name = PHI_RESULT (red->keep_res);
1158 stmt = gimple_build_assign (name, x);
1159 SSA_NAME_DEF_STMT (name) = stmt;
1161 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1163 for (gsi = gsi_start_phis (gimple_bb (red->keep_res));
1164 !gsi_end_p (gsi); gsi_next (&gsi))
1165 if (gsi_stmt (gsi) == red->keep_res)
1167 remove_phi_node (&gsi, false);
1168 return 1;
1170 gcc_unreachable ();
1173 /* Load the reduction result that was stored in LD_ST_DATA.
1174 REDUCTION_LIST describes the list of reductions that the
1175 loads should be generated for. */
1176 static void
1177 create_final_loads_for_reduction (reduction_info_table_type reduction_list,
1178 struct clsn_data *ld_st_data)
1180 gimple_stmt_iterator gsi;
1181 tree t;
1182 gimple stmt;
1184 gsi = gsi_after_labels (ld_st_data->load_bb);
1185 t = build_fold_addr_expr (ld_st_data->store);
1186 stmt = gimple_build_assign (ld_st_data->load, t);
1188 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
1189 SSA_NAME_DEF_STMT (ld_st_data->load) = stmt;
1191 reduction_list
1192 .traverse <struct clsn_data *, create_loads_for_reductions> (ld_st_data);
1196 /* Callback for htab_traverse. Store the neutral value for the
1197 particular reduction's operation, e.g. 0 for PLUS_EXPR,
1198 1 for MULT_EXPR, etc. into the reduction field.
1199 The reduction is specified in SLOT. The store information is
1200 passed in DATA. */
1203 create_stores_for_reduction (reduction_info **slot, struct clsn_data *clsn_data)
1205 struct reduction_info *const red = *slot;
1206 tree t;
1207 gimple stmt;
1208 gimple_stmt_iterator gsi;
1209 tree type = TREE_TYPE (gimple_assign_lhs (red->reduc_stmt));
1211 gsi = gsi_last_bb (clsn_data->store_bb);
1212 t = build3 (COMPONENT_REF, type, clsn_data->store, red->field, NULL_TREE);
1213 stmt = gimple_build_assign (t, red->initial_value);
1214 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1216 return 1;
1219 /* Callback for htab_traverse. Creates loads to a field of LOAD in LOAD_BB and
1220 store to a field of STORE in STORE_BB for the ssa name and its duplicate
1221 specified in SLOT. */
1224 create_loads_and_stores_for_name (name_to_copy_elt **slot,
1225 struct clsn_data *clsn_data)
1227 struct name_to_copy_elt *const elt = *slot;
1228 tree t;
1229 gimple stmt;
1230 gimple_stmt_iterator gsi;
1231 tree type = TREE_TYPE (elt->new_name);
1232 tree load_struct;
1234 gsi = gsi_last_bb (clsn_data->store_bb);
1235 t = build3 (COMPONENT_REF, type, clsn_data->store, elt->field, NULL_TREE);
1236 stmt = gimple_build_assign (t, ssa_name (elt->version));
1237 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1239 gsi = gsi_last_bb (clsn_data->load_bb);
1240 load_struct = build_simple_mem_ref (clsn_data->load);
1241 t = build3 (COMPONENT_REF, type, load_struct, elt->field, NULL_TREE);
1242 stmt = gimple_build_assign (elt->new_name, t);
1243 SSA_NAME_DEF_STMT (elt->new_name) = stmt;
1244 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1246 return 1;
1249 /* Moves all the variables used in LOOP and defined outside of it (including
1250 the initial values of loop phi nodes, and *PER_THREAD if it is a ssa
1251 name) to a structure created for this purpose. The code
1253 while (1)
1255 use (a);
1256 use (b);
1259 is transformed this way:
1261 bb0:
1262 old.a = a;
1263 old.b = b;
1265 bb1:
1266 a' = new->a;
1267 b' = new->b;
1268 while (1)
1270 use (a');
1271 use (b');
1274 `old' is stored to *ARG_STRUCT and `new' is stored to NEW_ARG_STRUCT. The
1275 pointer `new' is intentionally not initialized (the loop will be split to a
1276 separate function later, and `new' will be initialized from its arguments).
1277 LD_ST_DATA holds information about the shared data structure used to pass
1278 information among the threads. It is initialized here, and
1279 gen_parallel_loop will pass it to create_call_for_reduction that
1280 needs this information. REDUCTION_LIST describes the reductions
1281 in LOOP. */
1283 static void
1284 separate_decls_in_region (edge entry, edge exit,
1285 reduction_info_table_type reduction_list,
1286 tree *arg_struct, tree *new_arg_struct,
1287 struct clsn_data *ld_st_data)
1290 basic_block bb1 = split_edge (entry);
1291 basic_block bb0 = single_pred (bb1);
1292 name_to_copy_table_type name_copies;
1293 name_copies.create (10);
1294 int_tree_htab_type decl_copies;
1295 decl_copies.create (10);
1296 unsigned i;
1297 tree type, type_name, nvar;
1298 gimple_stmt_iterator gsi;
1299 struct clsn_data clsn_data;
1300 vec<basic_block> body;
1301 body.create (3);
1302 basic_block bb;
1303 basic_block entry_bb = bb1;
1304 basic_block exit_bb = exit->dest;
1305 bool has_debug_stmt = false;
1307 entry = single_succ_edge (entry_bb);
1308 gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
1310 FOR_EACH_VEC_ELT (body, i, bb)
1312 if (bb != entry_bb && bb != exit_bb)
1314 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1315 separate_decls_in_region_stmt (entry, exit, gsi_stmt (gsi),
1316 name_copies, decl_copies);
1318 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1320 gimple stmt = gsi_stmt (gsi);
1322 if (is_gimple_debug (stmt))
1323 has_debug_stmt = true;
1324 else
1325 separate_decls_in_region_stmt (entry, exit, stmt,
1326 name_copies, decl_copies);
1331 /* Now process debug bind stmts. We must not create decls while
1332 processing debug stmts, so we defer their processing so as to
1333 make sure we will have debug info for as many variables as
1334 possible (all of those that were dealt with in the loop above),
1335 and discard those for which we know there's nothing we can
1336 do. */
1337 if (has_debug_stmt)
1338 FOR_EACH_VEC_ELT (body, i, bb)
1339 if (bb != entry_bb && bb != exit_bb)
1341 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
1343 gimple stmt = gsi_stmt (gsi);
1345 if (is_gimple_debug (stmt))
1347 if (separate_decls_in_region_debug (stmt, name_copies,
1348 decl_copies))
1350 gsi_remove (&gsi, true);
1351 continue;
1355 gsi_next (&gsi);
1359 body.release ();
1361 if (name_copies.elements () == 0 && reduction_list.elements () == 0)
1363 /* It may happen that there is nothing to copy (if there are only
1364 loop carried and external variables in the loop). */
1365 *arg_struct = NULL;
1366 *new_arg_struct = NULL;
1368 else
1370 /* Create the type for the structure to store the ssa names to. */
1371 type = lang_hooks.types.make_type (RECORD_TYPE);
1372 type_name = build_decl (UNKNOWN_LOCATION,
1373 TYPE_DECL, create_tmp_var_name (".paral_data"),
1374 type);
1375 TYPE_NAME (type) = type_name;
1377 name_copies.traverse <tree, add_field_for_name> (type);
1378 if (reduction_list.is_created () && reduction_list.elements () > 0)
1380 /* Create the fields for reductions. */
1381 reduction_list.traverse <tree, add_field_for_reduction> (type);
1383 layout_type (type);
1385 /* Create the loads and stores. */
1386 *arg_struct = create_tmp_var (type, ".paral_data_store");
1387 nvar = create_tmp_var (build_pointer_type (type), ".paral_data_load");
1388 *new_arg_struct = make_ssa_name (nvar, NULL);
1390 ld_st_data->store = *arg_struct;
1391 ld_st_data->load = *new_arg_struct;
1392 ld_st_data->store_bb = bb0;
1393 ld_st_data->load_bb = bb1;
1395 name_copies
1396 .traverse <struct clsn_data *, create_loads_and_stores_for_name>
1397 (ld_st_data);
1399 /* Load the calculation from memory (after the join of the threads). */
1401 if (reduction_list.is_created () && reduction_list.elements () > 0)
1403 reduction_list
1404 .traverse <struct clsn_data *, create_stores_for_reduction>
1405 (ld_st_data);
1406 clsn_data.load = make_ssa_name (nvar, NULL);
1407 clsn_data.load_bb = exit->dest;
1408 clsn_data.store = ld_st_data->store;
1409 create_final_loads_for_reduction (reduction_list, &clsn_data);
1413 decl_copies.dispose ();
1414 name_copies.dispose ();
1417 /* Bitmap containing uids of functions created by parallelization. We cannot
1418 allocate it from the default obstack, as it must live across compilation
1419 of several functions; we make it gc allocated instead. */
1421 static GTY(()) bitmap parallelized_functions;
1423 /* Returns true if FN was created by create_loop_fn. */
1425 bool
1426 parallelized_function_p (tree fn)
1428 if (!parallelized_functions || !DECL_ARTIFICIAL (fn))
1429 return false;
1431 return bitmap_bit_p (parallelized_functions, DECL_UID (fn));
1434 /* Creates and returns an empty function that will receive the body of
1435 a parallelized loop. */
1437 static tree
1438 create_loop_fn (location_t loc)
1440 char buf[100];
1441 char *tname;
1442 tree decl, type, name, t;
1443 struct function *act_cfun = cfun;
1444 static unsigned loopfn_num;
1446 loc = LOCATION_LOCUS (loc);
1447 snprintf (buf, 100, "%s.$loopfn", current_function_name ());
1448 ASM_FORMAT_PRIVATE_NAME (tname, buf, loopfn_num++);
1449 clean_symbol_name (tname);
1450 name = get_identifier (tname);
1451 type = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
1453 decl = build_decl (loc, FUNCTION_DECL, name, type);
1454 if (!parallelized_functions)
1455 parallelized_functions = BITMAP_GGC_ALLOC ();
1456 bitmap_set_bit (parallelized_functions, DECL_UID (decl));
1458 TREE_STATIC (decl) = 1;
1459 TREE_USED (decl) = 1;
1460 DECL_ARTIFICIAL (decl) = 1;
1461 DECL_IGNORED_P (decl) = 0;
1462 TREE_PUBLIC (decl) = 0;
1463 DECL_UNINLINABLE (decl) = 1;
1464 DECL_EXTERNAL (decl) = 0;
1465 DECL_CONTEXT (decl) = NULL_TREE;
1466 DECL_INITIAL (decl) = make_node (BLOCK);
1468 t = build_decl (loc, RESULT_DECL, NULL_TREE, void_type_node);
1469 DECL_ARTIFICIAL (t) = 1;
1470 DECL_IGNORED_P (t) = 1;
1471 DECL_RESULT (decl) = t;
1473 t = build_decl (loc, PARM_DECL, get_identifier (".paral_data_param"),
1474 ptr_type_node);
1475 DECL_ARTIFICIAL (t) = 1;
1476 DECL_ARG_TYPE (t) = ptr_type_node;
1477 DECL_CONTEXT (t) = decl;
1478 TREE_USED (t) = 1;
1479 DECL_ARGUMENTS (decl) = t;
1481 allocate_struct_function (decl, false);
1483 /* The call to allocate_struct_function clobbers CFUN, so we need to restore
1484 it. */
1485 set_cfun (act_cfun);
1487 return decl;
1490 /* Moves the exit condition of LOOP to the beginning of its header, and
1491 duplicates the part of the last iteration that gets disabled to the
1492 exit of the loop. NIT is the number of iterations of the loop
1493 (used to initialize the variables in the duplicated part).
1495 TODO: the common case is that latch of the loop is empty and immediately
1496 follows the loop exit. In this case, it would be better not to copy the
1497 body of the loop, but only move the entry of the loop directly before the
1498 exit check and increase the number of iterations of the loop by one.
1499 This may need some additional preconditioning in case NIT = ~0.
1500 REDUCTION_LIST describes the reductions in LOOP. */
1502 static void
1503 transform_to_exit_first_loop (struct loop *loop,
1504 reduction_info_table_type reduction_list,
1505 tree nit)
1507 basic_block *bbs, *nbbs, ex_bb, orig_header;
1508 unsigned n;
1509 bool ok;
1510 edge exit = single_dom_exit (loop), hpred;
1511 tree control, control_name, res, t;
1512 gimple phi, nphi, cond_stmt, stmt, cond_nit;
1513 gimple_stmt_iterator gsi;
1514 tree nit_1;
1516 split_block_after_labels (loop->header);
1517 orig_header = single_succ (loop->header);
1518 hpred = single_succ_edge (loop->header);
1520 cond_stmt = last_stmt (exit->src);
1521 control = gimple_cond_lhs (cond_stmt);
1522 gcc_assert (gimple_cond_rhs (cond_stmt) == nit);
1524 /* Make sure that we have phi nodes on exit for all loop header phis
1525 (create_parallel_loop requires that). */
1526 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
1528 phi = gsi_stmt (gsi);
1529 res = PHI_RESULT (phi);
1530 t = copy_ssa_name (res, phi);
1531 SET_PHI_RESULT (phi, t);
1532 nphi = create_phi_node (res, orig_header);
1533 add_phi_arg (nphi, t, hpred, UNKNOWN_LOCATION);
1535 if (res == control)
1537 gimple_cond_set_lhs (cond_stmt, t);
1538 update_stmt (cond_stmt);
1539 control = t;
1543 bbs = get_loop_body_in_dom_order (loop);
1545 for (n = 0; bbs[n] != exit->src; n++)
1546 continue;
1547 nbbs = XNEWVEC (basic_block, n);
1548 ok = gimple_duplicate_sese_tail (single_succ_edge (loop->header), exit,
1549 bbs + 1, n, nbbs);
1550 gcc_assert (ok);
1551 free (bbs);
1552 ex_bb = nbbs[0];
1553 free (nbbs);
1555 /* Other than reductions, the only gimple reg that should be copied
1556 out of the loop is the control variable. */
1557 exit = single_dom_exit (loop);
1558 control_name = NULL_TREE;
1559 for (gsi = gsi_start_phis (ex_bb); !gsi_end_p (gsi); )
1561 phi = gsi_stmt (gsi);
1562 res = PHI_RESULT (phi);
1563 if (virtual_operand_p (res))
1565 gsi_next (&gsi);
1566 continue;
1569 /* Check if it is a part of reduction. If it is,
1570 keep the phi at the reduction's keep_res field. The
1571 PHI_RESULT of this phi is the resulting value of the reduction
1572 variable when exiting the loop. */
1574 if (reduction_list.elements () > 0)
1576 struct reduction_info *red;
1578 tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1579 red = reduction_phi (reduction_list, SSA_NAME_DEF_STMT (val));
1580 if (red)
1582 red->keep_res = phi;
1583 gsi_next (&gsi);
1584 continue;
1587 gcc_assert (control_name == NULL_TREE
1588 && SSA_NAME_VAR (res) == SSA_NAME_VAR (control));
1589 control_name = res;
1590 remove_phi_node (&gsi, false);
1592 gcc_assert (control_name != NULL_TREE);
1594 /* Initialize the control variable to number of iterations
1595 according to the rhs of the exit condition. */
1596 gsi = gsi_after_labels (ex_bb);
1597 cond_nit = last_stmt (exit->src);
1598 nit_1 = gimple_cond_rhs (cond_nit);
1599 nit_1 = force_gimple_operand_gsi (&gsi,
1600 fold_convert (TREE_TYPE (control_name), nit_1),
1601 false, NULL_TREE, false, GSI_SAME_STMT);
1602 stmt = gimple_build_assign (control_name, nit_1);
1603 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
1604 SSA_NAME_DEF_STMT (control_name) = stmt;
1607 /* Create the parallel constructs for LOOP as described in gen_parallel_loop.
1608 LOOP_FN and DATA are the arguments of GIMPLE_OMP_PARALLEL.
1609 NEW_DATA is the variable that should be initialized from the argument
1610 of LOOP_FN. N_THREADS is the requested number of threads. Returns the
1611 basic block containing GIMPLE_OMP_PARALLEL tree. */
1613 static basic_block
1614 create_parallel_loop (struct loop *loop, tree loop_fn, tree data,
1615 tree new_data, unsigned n_threads, location_t loc)
1617 gimple_stmt_iterator gsi;
1618 basic_block bb, paral_bb, for_bb, ex_bb;
1619 tree t, param;
1620 gimple stmt, for_stmt, phi, cond_stmt;
1621 tree cvar, cvar_init, initvar, cvar_next, cvar_base, type;
1622 edge exit, nexit, guard, end, e;
1624 /* Prepare the GIMPLE_OMP_PARALLEL statement. */
1625 bb = loop_preheader_edge (loop)->src;
1626 paral_bb = single_pred (bb);
1627 gsi = gsi_last_bb (paral_bb);
1629 t = build_omp_clause (loc, OMP_CLAUSE_NUM_THREADS);
1630 OMP_CLAUSE_NUM_THREADS_EXPR (t)
1631 = build_int_cst (integer_type_node, n_threads);
1632 stmt = gimple_build_omp_parallel (NULL, t, loop_fn, data);
1633 gimple_set_location (stmt, loc);
1635 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1637 /* Initialize NEW_DATA. */
1638 if (data)
1640 gsi = gsi_after_labels (bb);
1642 param = make_ssa_name (DECL_ARGUMENTS (loop_fn), NULL);
1643 stmt = gimple_build_assign (param, build_fold_addr_expr (data));
1644 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1645 SSA_NAME_DEF_STMT (param) = stmt;
1647 stmt = gimple_build_assign (new_data,
1648 fold_convert (TREE_TYPE (new_data), param));
1649 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1650 SSA_NAME_DEF_STMT (new_data) = stmt;
1653 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_PARALLEL. */
1654 bb = split_loop_exit_edge (single_dom_exit (loop));
1655 gsi = gsi_last_bb (bb);
1656 stmt = gimple_build_omp_return (false);
1657 gimple_set_location (stmt, loc);
1658 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1660 /* Extract data for GIMPLE_OMP_FOR. */
1661 gcc_assert (loop->header == single_dom_exit (loop)->src);
1662 cond_stmt = last_stmt (loop->header);
1664 cvar = gimple_cond_lhs (cond_stmt);
1665 cvar_base = SSA_NAME_VAR (cvar);
1666 phi = SSA_NAME_DEF_STMT (cvar);
1667 cvar_init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1668 initvar = copy_ssa_name (cvar, NULL);
1669 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, loop_preheader_edge (loop)),
1670 initvar);
1671 cvar_next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1673 gsi = gsi_last_nondebug_bb (loop->latch);
1674 gcc_assert (gsi_stmt (gsi) == SSA_NAME_DEF_STMT (cvar_next));
1675 gsi_remove (&gsi, true);
1677 /* Prepare cfg. */
1678 for_bb = split_edge (loop_preheader_edge (loop));
1679 ex_bb = split_loop_exit_edge (single_dom_exit (loop));
1680 extract_true_false_edges_from_block (loop->header, &nexit, &exit);
1681 gcc_assert (exit == single_dom_exit (loop));
1683 guard = make_edge (for_bb, ex_bb, 0);
1684 single_succ_edge (loop->latch)->flags = 0;
1685 end = make_edge (loop->latch, ex_bb, EDGE_FALLTHRU);
1686 for (gsi = gsi_start_phis (ex_bb); !gsi_end_p (gsi); gsi_next (&gsi))
1688 source_location locus;
1689 tree def;
1690 phi = gsi_stmt (gsi);
1691 stmt = SSA_NAME_DEF_STMT (PHI_ARG_DEF_FROM_EDGE (phi, exit));
1693 def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_preheader_edge (loop));
1694 locus = gimple_phi_arg_location_from_edge (stmt,
1695 loop_preheader_edge (loop));
1696 add_phi_arg (phi, def, guard, locus);
1698 def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (loop));
1699 locus = gimple_phi_arg_location_from_edge (stmt, loop_latch_edge (loop));
1700 add_phi_arg (phi, def, end, locus);
1702 e = redirect_edge_and_branch (exit, nexit->dest);
1703 PENDING_STMT (e) = NULL;
1705 /* Emit GIMPLE_OMP_FOR. */
1706 gimple_cond_set_lhs (cond_stmt, cvar_base);
1707 type = TREE_TYPE (cvar);
1708 t = build_omp_clause (loc, OMP_CLAUSE_SCHEDULE);
1709 OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_STATIC;
1711 for_stmt = gimple_build_omp_for (NULL, GF_OMP_FOR_KIND_FOR, t, 1, NULL);
1712 gimple_set_location (for_stmt, loc);
1713 gimple_omp_for_set_index (for_stmt, 0, initvar);
1714 gimple_omp_for_set_initial (for_stmt, 0, cvar_init);
1715 gimple_omp_for_set_final (for_stmt, 0, gimple_cond_rhs (cond_stmt));
1716 gimple_omp_for_set_cond (for_stmt, 0, gimple_cond_code (cond_stmt));
1717 gimple_omp_for_set_incr (for_stmt, 0, build2 (PLUS_EXPR, type,
1718 cvar_base,
1719 build_int_cst (type, 1)));
1721 gsi = gsi_last_bb (for_bb);
1722 gsi_insert_after (&gsi, for_stmt, GSI_NEW_STMT);
1723 SSA_NAME_DEF_STMT (initvar) = for_stmt;
1725 /* Emit GIMPLE_OMP_CONTINUE. */
1726 gsi = gsi_last_bb (loop->latch);
1727 stmt = gimple_build_omp_continue (cvar_next, cvar);
1728 gimple_set_location (stmt, loc);
1729 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1730 SSA_NAME_DEF_STMT (cvar_next) = stmt;
1732 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_FOR. */
1733 gsi = gsi_last_bb (ex_bb);
1734 stmt = gimple_build_omp_return (true);
1735 gimple_set_location (stmt, loc);
1736 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1738 /* After the above dom info is hosed. Re-compute it. */
1739 free_dominance_info (CDI_DOMINATORS);
1740 calculate_dominance_info (CDI_DOMINATORS);
1742 return paral_bb;
1745 /* Generates code to execute the iterations of LOOP in N_THREADS
1746 threads in parallel.
1748 NITER describes number of iterations of LOOP.
1749 REDUCTION_LIST describes the reductions existent in the LOOP. */
1751 static void
1752 gen_parallel_loop (struct loop *loop, reduction_info_table_type reduction_list,
1753 unsigned n_threads, struct tree_niter_desc *niter)
1755 loop_iterator li;
1756 tree many_iterations_cond, type, nit;
1757 tree arg_struct, new_arg_struct;
1758 gimple_seq stmts;
1759 basic_block parallel_head;
1760 edge entry, exit;
1761 struct clsn_data clsn_data;
1762 unsigned prob;
1763 location_t loc;
1764 gimple cond_stmt;
1765 unsigned int m_p_thread=2;
1767 /* From
1769 ---------------------------------------------------------------------
1770 loop
1772 IV = phi (INIT, IV + STEP)
1773 BODY1;
1774 if (COND)
1775 break;
1776 BODY2;
1778 ---------------------------------------------------------------------
1780 with # of iterations NITER (possibly with MAY_BE_ZERO assumption),
1781 we generate the following code:
1783 ---------------------------------------------------------------------
1785 if (MAY_BE_ZERO
1786 || NITER < MIN_PER_THREAD * N_THREADS)
1787 goto original;
1789 BODY1;
1790 store all local loop-invariant variables used in body of the loop to DATA.
1791 GIMPLE_OMP_PARALLEL (OMP_CLAUSE_NUM_THREADS (N_THREADS), LOOPFN, DATA);
1792 load the variables from DATA.
1793 GIMPLE_OMP_FOR (IV = INIT; COND; IV += STEP) (OMP_CLAUSE_SCHEDULE (static))
1794 BODY2;
1795 BODY1;
1796 GIMPLE_OMP_CONTINUE;
1797 GIMPLE_OMP_RETURN -- GIMPLE_OMP_FOR
1798 GIMPLE_OMP_RETURN -- GIMPLE_OMP_PARALLEL
1799 goto end;
1801 original:
1802 loop
1804 IV = phi (INIT, IV + STEP)
1805 BODY1;
1806 if (COND)
1807 break;
1808 BODY2;
1811 end:
1815 /* Create two versions of the loop -- in the old one, we know that the
1816 number of iterations is large enough, and we will transform it into the
1817 loop that will be split to loop_fn, the new one will be used for the
1818 remaining iterations. */
1820 /* We should compute a better number-of-iterations value for outer loops.
1821 That is, if we have
1823 for (i = 0; i < n; ++i)
1824 for (j = 0; j < m; ++j)
1827 we should compute nit = n * m, not nit = n.
1828 Also may_be_zero handling would need to be adjusted. */
1830 type = TREE_TYPE (niter->niter);
1831 nit = force_gimple_operand (unshare_expr (niter->niter), &stmts, true,
1832 NULL_TREE);
1833 if (stmts)
1834 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1836 if (loop->inner)
1837 m_p_thread=2;
1838 else
1839 m_p_thread=MIN_PER_THREAD;
1841 many_iterations_cond =
1842 fold_build2 (GE_EXPR, boolean_type_node,
1843 nit, build_int_cst (type, m_p_thread * n_threads));
1845 many_iterations_cond
1846 = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1847 invert_truthvalue (unshare_expr (niter->may_be_zero)),
1848 many_iterations_cond);
1849 many_iterations_cond
1850 = force_gimple_operand (many_iterations_cond, &stmts, false, NULL_TREE);
1851 if (stmts)
1852 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1853 if (!is_gimple_condexpr (many_iterations_cond))
1855 many_iterations_cond
1856 = force_gimple_operand (many_iterations_cond, &stmts,
1857 true, NULL_TREE);
1858 if (stmts)
1859 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1862 initialize_original_copy_tables ();
1864 /* We assume that the loop usually iterates a lot. */
1865 prob = 4 * REG_BR_PROB_BASE / 5;
1866 loop_version (loop, many_iterations_cond, NULL,
1867 prob, prob, REG_BR_PROB_BASE - prob, true);
1868 update_ssa (TODO_update_ssa);
1869 free_original_copy_tables ();
1871 /* Base all the induction variables in LOOP on a single control one. */
1872 canonicalize_loop_ivs (loop, &nit, true);
1874 /* Ensure that the exit condition is the first statement in the loop. */
1875 transform_to_exit_first_loop (loop, reduction_list, nit);
1877 /* Generate initializations for reductions. */
1878 if (reduction_list.elements () > 0)
1879 reduction_list.traverse <struct loop *, initialize_reductions> (loop);
1881 /* Eliminate the references to local variables from the loop. */
1882 gcc_assert (single_exit (loop));
1883 entry = loop_preheader_edge (loop);
1884 exit = single_dom_exit (loop);
1886 eliminate_local_variables (entry, exit);
1887 /* In the old loop, move all variables non-local to the loop to a structure
1888 and back, and create separate decls for the variables used in loop. */
1889 separate_decls_in_region (entry, exit, reduction_list, &arg_struct,
1890 &new_arg_struct, &clsn_data);
1892 /* Create the parallel constructs. */
1893 loc = UNKNOWN_LOCATION;
1894 cond_stmt = last_stmt (loop->header);
1895 if (cond_stmt)
1896 loc = gimple_location (cond_stmt);
1897 parallel_head = create_parallel_loop (loop, create_loop_fn (loc), arg_struct,
1898 new_arg_struct, n_threads, loc);
1899 if (reduction_list.elements () > 0)
1900 create_call_for_reduction (loop, reduction_list, &clsn_data);
1902 scev_reset ();
1904 /* Cancel the loop (it is simpler to do it here rather than to teach the
1905 expander to do it). */
1906 cancel_loop_tree (loop);
1908 /* Free loop bound estimations that could contain references to
1909 removed statements. */
1910 FOR_EACH_LOOP (li, loop, 0)
1911 free_numbers_of_iterations_estimates_loop (loop);
1913 /* Expand the parallel constructs. We do it directly here instead of running
1914 a separate expand_omp pass, since it is more efficient, and less likely to
1915 cause troubles with further analyses not being able to deal with the
1916 OMP trees. */
1918 omp_expand_local (parallel_head);
1921 /* Returns true when LOOP contains vector phi nodes. */
1923 static bool
1924 loop_has_vector_phi_nodes (struct loop *loop ATTRIBUTE_UNUSED)
1926 unsigned i;
1927 basic_block *bbs = get_loop_body_in_dom_order (loop);
1928 gimple_stmt_iterator gsi;
1929 bool res = true;
1931 for (i = 0; i < loop->num_nodes; i++)
1932 for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
1933 if (TREE_CODE (TREE_TYPE (PHI_RESULT (gsi_stmt (gsi)))) == VECTOR_TYPE)
1934 goto end;
1936 res = false;
1937 end:
1938 free (bbs);
1939 return res;
1942 /* Create a reduction_info struct, initialize it with REDUC_STMT
1943 and PHI, insert it to the REDUCTION_LIST. */
1945 static void
1946 build_new_reduction (reduction_info_table_type reduction_list,
1947 gimple reduc_stmt, gimple phi)
1949 reduction_info **slot;
1950 struct reduction_info *new_reduction;
1952 gcc_assert (reduc_stmt);
1954 if (dump_file && (dump_flags & TDF_DETAILS))
1956 fprintf (dump_file,
1957 "Detected reduction. reduction stmt is: \n");
1958 print_gimple_stmt (dump_file, reduc_stmt, 0, 0);
1959 fprintf (dump_file, "\n");
1962 new_reduction = XCNEW (struct reduction_info);
1964 new_reduction->reduc_stmt = reduc_stmt;
1965 new_reduction->reduc_phi = phi;
1966 new_reduction->reduc_version = SSA_NAME_VERSION (gimple_phi_result (phi));
1967 new_reduction->reduction_code = gimple_assign_rhs_code (reduc_stmt);
1968 slot = reduction_list.find_slot (new_reduction, INSERT);
1969 *slot = new_reduction;
1972 /* Callback for htab_traverse. Sets gimple_uid of reduc_phi stmts. */
1975 set_reduc_phi_uids (reduction_info **slot, void *data ATTRIBUTE_UNUSED)
1977 struct reduction_info *const red = *slot;
1978 gimple_set_uid (red->reduc_phi, red->reduc_version);
1979 return 1;
1982 /* Detect all reductions in the LOOP, insert them into REDUCTION_LIST. */
1984 static void
1985 gather_scalar_reductions (loop_p loop, reduction_info_table_type reduction_list)
1987 gimple_stmt_iterator gsi;
1988 loop_vec_info simple_loop_info;
1990 simple_loop_info = vect_analyze_loop_form (loop);
1992 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
1994 gimple phi = gsi_stmt (gsi);
1995 affine_iv iv;
1996 tree res = PHI_RESULT (phi);
1997 bool double_reduc;
1999 if (virtual_operand_p (res))
2000 continue;
2002 if (!simple_iv (loop, loop, res, &iv, true)
2003 && simple_loop_info)
2005 gimple reduc_stmt = vect_force_simple_reduction (simple_loop_info,
2006 phi, true,
2007 &double_reduc);
2008 if (reduc_stmt && !double_reduc)
2009 build_new_reduction (reduction_list, reduc_stmt, phi);
2012 destroy_loop_vec_info (simple_loop_info, true);
2014 /* As gimple_uid is used by the vectorizer in between vect_analyze_loop_form
2015 and destroy_loop_vec_info, we can set gimple_uid of reduc_phi stmts
2016 only now. */
2017 reduction_list.traverse <void *, set_reduc_phi_uids> (NULL);
2020 /* Try to initialize NITER for code generation part. */
2022 static bool
2023 try_get_loop_niter (loop_p loop, struct tree_niter_desc *niter)
2025 edge exit = single_dom_exit (loop);
2027 gcc_assert (exit);
2029 /* We need to know # of iterations, and there should be no uses of values
2030 defined inside loop outside of it, unless the values are invariants of
2031 the loop. */
2032 if (!number_of_iterations_exit (loop, exit, niter, false))
2034 if (dump_file && (dump_flags & TDF_DETAILS))
2035 fprintf (dump_file, " FAILED: number of iterations not known\n");
2036 return false;
2039 return true;
2042 /* Try to initialize REDUCTION_LIST for code generation part.
2043 REDUCTION_LIST describes the reductions. */
2045 static bool
2046 try_create_reduction_list (loop_p loop,
2047 reduction_info_table_type reduction_list)
2049 edge exit = single_dom_exit (loop);
2050 gimple_stmt_iterator gsi;
2052 gcc_assert (exit);
2054 gather_scalar_reductions (loop, reduction_list);
2057 for (gsi = gsi_start_phis (exit->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2059 gimple phi = gsi_stmt (gsi);
2060 struct reduction_info *red;
2061 imm_use_iterator imm_iter;
2062 use_operand_p use_p;
2063 gimple reduc_phi;
2064 tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2066 if (!virtual_operand_p (val))
2068 if (dump_file && (dump_flags & TDF_DETAILS))
2070 fprintf (dump_file, "phi is ");
2071 print_gimple_stmt (dump_file, phi, 0, 0);
2072 fprintf (dump_file, "arg of phi to exit: value ");
2073 print_generic_expr (dump_file, val, 0);
2074 fprintf (dump_file, " used outside loop\n");
2075 fprintf (dump_file,
2076 " checking if it a part of reduction pattern: \n");
2078 if (reduction_list.elements () == 0)
2080 if (dump_file && (dump_flags & TDF_DETAILS))
2081 fprintf (dump_file,
2082 " FAILED: it is not a part of reduction.\n");
2083 return false;
2085 reduc_phi = NULL;
2086 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, val)
2088 if (!gimple_debug_bind_p (USE_STMT (use_p))
2089 && flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
2091 reduc_phi = USE_STMT (use_p);
2092 break;
2095 red = reduction_phi (reduction_list, reduc_phi);
2096 if (red == NULL)
2098 if (dump_file && (dump_flags & TDF_DETAILS))
2099 fprintf (dump_file,
2100 " FAILED: it is not a part of reduction.\n");
2101 return false;
2103 if (dump_file && (dump_flags & TDF_DETAILS))
2105 fprintf (dump_file, "reduction phi is ");
2106 print_gimple_stmt (dump_file, red->reduc_phi, 0, 0);
2107 fprintf (dump_file, "reduction stmt is ");
2108 print_gimple_stmt (dump_file, red->reduc_stmt, 0, 0);
2113 /* The iterations of the loop may communicate only through bivs whose
2114 iteration space can be distributed efficiently. */
2115 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
2117 gimple phi = gsi_stmt (gsi);
2118 tree def = PHI_RESULT (phi);
2119 affine_iv iv;
2121 if (!virtual_operand_p (def) && !simple_iv (loop, loop, def, &iv, true))
2123 struct reduction_info *red;
2125 red = reduction_phi (reduction_list, phi);
2126 if (red == NULL)
2128 if (dump_file && (dump_flags & TDF_DETAILS))
2129 fprintf (dump_file,
2130 " FAILED: scalar dependency between iterations\n");
2131 return false;
2137 return true;
2140 /* Detect parallel loops and generate parallel code using libgomp
2141 primitives. Returns true if some loop was parallelized, false
2142 otherwise. */
2144 bool
2145 parallelize_loops (void)
2147 unsigned n_threads = flag_tree_parallelize_loops;
2148 bool changed = false;
2149 struct loop *loop;
2150 struct tree_niter_desc niter_desc;
2151 loop_iterator li;
2152 reduction_info_table_type reduction_list;
2153 struct obstack parloop_obstack;
2154 HOST_WIDE_INT estimated;
2155 LOC loop_loc;
2157 /* Do not parallelize loops in the functions created by parallelization. */
2158 if (parallelized_function_p (cfun->decl))
2159 return false;
2160 if (cfun->has_nonlocal_label)
2161 return false;
2163 gcc_obstack_init (&parloop_obstack);
2164 reduction_list.create (10);
2165 init_stmt_vec_info_vec ();
2167 FOR_EACH_LOOP (li, loop, 0)
2169 reduction_list.empty ();
2170 if (dump_file && (dump_flags & TDF_DETAILS))
2172 fprintf (dump_file, "Trying loop %d as candidate\n",loop->num);
2173 if (loop->inner)
2174 fprintf (dump_file, "loop %d is not innermost\n",loop->num);
2175 else
2176 fprintf (dump_file, "loop %d is innermost\n",loop->num);
2179 /* If we use autopar in graphite pass, we use its marked dependency
2180 checking results. */
2181 if (flag_loop_parallelize_all && !loop->can_be_parallel)
2183 if (dump_file && (dump_flags & TDF_DETAILS))
2184 fprintf (dump_file, "loop is not parallel according to graphite\n");
2185 continue;
2188 if (!single_dom_exit (loop))
2191 if (dump_file && (dump_flags & TDF_DETAILS))
2192 fprintf (dump_file, "loop is !single_dom_exit\n");
2194 continue;
2197 if (/* And of course, the loop must be parallelizable. */
2198 !can_duplicate_loop_p (loop)
2199 || loop_has_blocks_with_irreducible_flag (loop)
2200 || (loop_preheader_edge (loop)->src->flags & BB_IRREDUCIBLE_LOOP)
2201 /* FIXME: the check for vector phi nodes could be removed. */
2202 || loop_has_vector_phi_nodes (loop))
2203 continue;
2205 estimated = estimated_stmt_executions_int (loop);
2206 if (estimated == -1)
2207 estimated = max_stmt_executions_int (loop);
2208 /* FIXME: Bypass this check as graphite doesn't update the
2209 count and frequency correctly now. */
2210 if (!flag_loop_parallelize_all
2211 && ((estimated != -1
2212 && estimated <= (HOST_WIDE_INT) n_threads * MIN_PER_THREAD)
2213 /* Do not bother with loops in cold areas. */
2214 || optimize_loop_nest_for_size_p (loop)))
2215 continue;
2217 if (!try_get_loop_niter (loop, &niter_desc))
2218 continue;
2220 if (!try_create_reduction_list (loop, reduction_list))
2221 continue;
2223 if (!flag_loop_parallelize_all
2224 && !loop_parallel_p (loop, &parloop_obstack))
2225 continue;
2227 changed = true;
2228 if (dump_file && (dump_flags & TDF_DETAILS))
2230 if (loop->inner)
2231 fprintf (dump_file, "parallelizing outer loop %d\n",loop->header->index);
2232 else
2233 fprintf (dump_file, "parallelizing inner loop %d\n",loop->header->index);
2234 loop_loc = find_loop_location (loop);
2235 if (loop_loc != UNKNOWN_LOC)
2236 fprintf (dump_file, "\nloop at %s:%d: ",
2237 LOC_FILE (loop_loc), LOC_LINE (loop_loc));
2239 gen_parallel_loop (loop, reduction_list,
2240 n_threads, &niter_desc);
2243 free_stmt_vec_info_vec ();
2244 reduction_list.dispose ();
2245 obstack_free (&parloop_obstack, NULL);
2247 /* Parallelization will cause new function calls to be inserted through
2248 which local variables will escape. Reset the points-to solution
2249 for ESCAPED. */
2250 if (changed)
2251 pt_solution_reset (&cfun->gimple_df->escaped);
2253 return changed;
2256 /* Parallelization. */
2258 static bool
2259 gate_tree_parallelize_loops (void)
2261 return flag_tree_parallelize_loops > 1;
2264 static unsigned
2265 tree_parallelize_loops (void)
2267 if (number_of_loops (cfun) <= 1)
2268 return 0;
2270 if (parallelize_loops ())
2271 return TODO_cleanup_cfg | TODO_rebuild_alias;
2272 return 0;
2275 namespace {
2277 const pass_data pass_data_parallelize_loops =
2279 GIMPLE_PASS, /* type */
2280 "parloops", /* name */
2281 OPTGROUP_LOOP, /* optinfo_flags */
2282 true, /* has_gate */
2283 true, /* has_execute */
2284 TV_TREE_PARALLELIZE_LOOPS, /* tv_id */
2285 ( PROP_cfg | PROP_ssa ), /* properties_required */
2286 0, /* properties_provided */
2287 0, /* properties_destroyed */
2288 0, /* todo_flags_start */
2289 TODO_verify_flow, /* todo_flags_finish */
2292 class pass_parallelize_loops : public gimple_opt_pass
2294 public:
2295 pass_parallelize_loops (gcc::context *ctxt)
2296 : gimple_opt_pass (pass_data_parallelize_loops, ctxt)
2299 /* opt_pass methods: */
2300 bool gate () { return gate_tree_parallelize_loops (); }
2301 unsigned int execute () { return tree_parallelize_loops (); }
2303 }; // class pass_parallelize_loops
2305 } // anon namespace
2307 gimple_opt_pass *
2308 make_pass_parallelize_loops (gcc::context *ctxt)
2310 return new pass_parallelize_loops (ctxt);
2314 #include "gt-tree-parloops.h"