2013-11-12 Andrew MacLeod <amacleod@redhat.com>
[official-gcc.git] / gcc / tree-parloops.c
bloba17085c869244f8ab240b79ddeca0453498a6b30
1 /* Loop autoparallelization.
2 Copyright (C) 2006-2013 Free Software Foundation, Inc.
3 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
4 Zdenek Dvorak <dvorakz@suse.cz> and Razya Ladelsky <razya@il.ibm.com>.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tree.h"
26 #include "gimplify.h"
27 #include "gimple-ssa.h"
28 #include "tree-cfg.h"
29 #include "tree-phinodes.h"
30 #include "ssa-iterators.h"
31 #include "tree-ssanames.h"
32 #include "tree-ssa-loop-ivopts.h"
33 #include "tree-ssa-loop-manip.h"
34 #include "tree-ssa-loop-niter.h"
35 #include "tree-ssa-loop.h"
36 #include "tree-into-ssa.h"
37 #include "cfgloop.h"
38 #include "tree-data-ref.h"
39 #include "tree-scalar-evolution.h"
40 #include "gimple-pretty-print.h"
41 #include "tree-pass.h"
42 #include "langhooks.h"
43 #include "tree-vectorizer.h"
44 #include "tree-hasher.h"
45 #include "tree-parloops.h"
46 #include "omp-low.h"
48 /* This pass tries to distribute iterations of loops into several threads.
49 The implementation is straightforward -- for each loop we test whether its
50 iterations are independent, and if it is the case (and some additional
51 conditions regarding profitability and correctness are satisfied), we
52 add GIMPLE_OMP_PARALLEL and GIMPLE_OMP_FOR codes and let omp expansion
53 machinery do its job.
55 The most of the complexity is in bringing the code into shape expected
56 by the omp expanders:
57 -- for GIMPLE_OMP_FOR, ensuring that the loop has only one induction
58 variable and that the exit test is at the start of the loop body
59 -- for GIMPLE_OMP_PARALLEL, replacing the references to local addressable
60 variables by accesses through pointers, and breaking up ssa chains
61 by storing the values incoming to the parallelized loop to a structure
62 passed to the new function as an argument (something similar is done
63 in omp gimplification, unfortunately only a small part of the code
64 can be shared).
66 TODO:
67 -- if there are several parallelizable loops in a function, it may be
68 possible to generate the threads just once (using synchronization to
69 ensure that cross-loop dependences are obeyed).
70 -- handling of common reduction patterns for outer loops.
72 More info can also be found at http://gcc.gnu.org/wiki/AutoParInGCC */
74 Reduction handling:
75 currently we use vect_force_simple_reduction() to detect reduction patterns.
76 The code transformation will be introduced by an example.
79 parloop
81 int sum=1;
83 for (i = 0; i < N; i++)
85 x[i] = i + 3;
86 sum+=x[i];
90 gimple-like code:
91 header_bb:
93 # sum_29 = PHI <sum_11(5), 1(3)>
94 # i_28 = PHI <i_12(5), 0(3)>
95 D.1795_8 = i_28 + 3;
96 x[i_28] = D.1795_8;
97 sum_11 = D.1795_8 + sum_29;
98 i_12 = i_28 + 1;
99 if (N_6(D) > i_12)
100 goto header_bb;
103 exit_bb:
105 # sum_21 = PHI <sum_11(4)>
106 printf (&"%d"[0], sum_21);
109 after reduction transformation (only relevant parts):
111 parloop
114 ....
117 # Storing the initial value given by the user. #
119 .paral_data_store.32.sum.27 = 1;
121 #pragma omp parallel num_threads(4)
123 #pragma omp for schedule(static)
125 # The neutral element corresponding to the particular
126 reduction's operation, e.g. 0 for PLUS_EXPR,
127 1 for MULT_EXPR, etc. replaces the user's initial value. #
129 # sum.27_29 = PHI <sum.27_11, 0>
131 sum.27_11 = D.1827_8 + sum.27_29;
133 GIMPLE_OMP_CONTINUE
135 # Adding this reduction phi is done at create_phi_for_local_result() #
136 # sum.27_56 = PHI <sum.27_11, 0>
137 GIMPLE_OMP_RETURN
139 # Creating the atomic operation is done at
140 create_call_for_reduction_1() #
142 #pragma omp atomic_load
143 D.1839_59 = *&.paral_data_load.33_51->reduction.23;
144 D.1840_60 = sum.27_56 + D.1839_59;
145 #pragma omp atomic_store (D.1840_60);
147 GIMPLE_OMP_RETURN
149 # collecting the result after the join of the threads is done at
150 create_loads_for_reductions().
151 The value computed by the threads is loaded from the
152 shared struct. #
155 .paral_data_load.33_52 = &.paral_data_store.32;
156 sum_37 = .paral_data_load.33_52->sum.27;
157 sum_43 = D.1795_41 + sum_37;
159 exit bb:
160 # sum_21 = PHI <sum_43, sum_26>
161 printf (&"%d"[0], sum_21);
169 /* Minimal number of iterations of a loop that should be executed in each
170 thread. */
171 #define MIN_PER_THREAD 100
173 /* Element of the hashtable, representing a
174 reduction in the current loop. */
175 struct reduction_info
177 gimple reduc_stmt; /* reduction statement. */
178 gimple reduc_phi; /* The phi node defining the reduction. */
179 enum tree_code reduction_code;/* code for the reduction operation. */
180 unsigned reduc_version; /* SSA_NAME_VERSION of original reduc_phi
181 result. */
182 gimple keep_res; /* The PHI_RESULT of this phi is the resulting value
183 of the reduction variable when existing the loop. */
184 tree initial_value; /* The initial value of the reduction var before entering the loop. */
185 tree field; /* the name of the field in the parloop data structure intended for reduction. */
186 tree init; /* reduction initialization value. */
187 gimple new_phi; /* (helper field) Newly created phi node whose result
188 will be passed to the atomic operation. Represents
189 the local result each thread computed for the reduction
190 operation. */
193 /* Reduction info hashtable helpers. */
195 struct reduction_hasher : typed_free_remove <reduction_info>
197 typedef reduction_info value_type;
198 typedef reduction_info compare_type;
199 static inline hashval_t hash (const value_type *);
200 static inline bool equal (const value_type *, const compare_type *);
203 /* Equality and hash functions for hashtab code. */
205 inline bool
206 reduction_hasher::equal (const value_type *a, const compare_type *b)
208 return (a->reduc_phi == b->reduc_phi);
211 inline hashval_t
212 reduction_hasher::hash (const value_type *a)
214 return a->reduc_version;
217 typedef hash_table <reduction_hasher> reduction_info_table_type;
220 static struct reduction_info *
221 reduction_phi (reduction_info_table_type reduction_list, gimple phi)
223 struct reduction_info tmpred, *red;
225 if (reduction_list.elements () == 0 || phi == NULL)
226 return NULL;
228 tmpred.reduc_phi = phi;
229 tmpred.reduc_version = gimple_uid (phi);
230 red = reduction_list.find (&tmpred);
232 return red;
235 /* Element of hashtable of names to copy. */
237 struct name_to_copy_elt
239 unsigned version; /* The version of the name to copy. */
240 tree new_name; /* The new name used in the copy. */
241 tree field; /* The field of the structure used to pass the
242 value. */
245 /* Name copies hashtable helpers. */
247 struct name_to_copy_hasher : typed_free_remove <name_to_copy_elt>
249 typedef name_to_copy_elt value_type;
250 typedef name_to_copy_elt compare_type;
251 static inline hashval_t hash (const value_type *);
252 static inline bool equal (const value_type *, const compare_type *);
255 /* Equality and hash functions for hashtab code. */
257 inline bool
258 name_to_copy_hasher::equal (const value_type *a, const compare_type *b)
260 return a->version == b->version;
263 inline hashval_t
264 name_to_copy_hasher::hash (const value_type *a)
266 return (hashval_t) a->version;
269 typedef hash_table <name_to_copy_hasher> name_to_copy_table_type;
271 /* A transformation matrix, which is a self-contained ROWSIZE x COLSIZE
272 matrix. Rather than use floats, we simply keep a single DENOMINATOR that
273 represents the denominator for every element in the matrix. */
274 typedef struct lambda_trans_matrix_s
276 lambda_matrix matrix;
277 int rowsize;
278 int colsize;
279 int denominator;
280 } *lambda_trans_matrix;
281 #define LTM_MATRIX(T) ((T)->matrix)
282 #define LTM_ROWSIZE(T) ((T)->rowsize)
283 #define LTM_COLSIZE(T) ((T)->colsize)
284 #define LTM_DENOMINATOR(T) ((T)->denominator)
286 /* Allocate a new transformation matrix. */
288 static lambda_trans_matrix
289 lambda_trans_matrix_new (int colsize, int rowsize,
290 struct obstack * lambda_obstack)
292 lambda_trans_matrix ret;
294 ret = (lambda_trans_matrix)
295 obstack_alloc (lambda_obstack, sizeof (struct lambda_trans_matrix_s));
296 LTM_MATRIX (ret) = lambda_matrix_new (rowsize, colsize, lambda_obstack);
297 LTM_ROWSIZE (ret) = rowsize;
298 LTM_COLSIZE (ret) = colsize;
299 LTM_DENOMINATOR (ret) = 1;
300 return ret;
303 /* Multiply a vector VEC by a matrix MAT.
304 MAT is an M*N matrix, and VEC is a vector with length N. The result
305 is stored in DEST which must be a vector of length M. */
307 static void
308 lambda_matrix_vector_mult (lambda_matrix matrix, int m, int n,
309 lambda_vector vec, lambda_vector dest)
311 int i, j;
313 lambda_vector_clear (dest, m);
314 for (i = 0; i < m; i++)
315 for (j = 0; j < n; j++)
316 dest[i] += matrix[i][j] * vec[j];
319 /* Return true if TRANS is a legal transformation matrix that respects
320 the dependence vectors in DISTS and DIRS. The conservative answer
321 is false.
323 "Wolfe proves that a unimodular transformation represented by the
324 matrix T is legal when applied to a loop nest with a set of
325 lexicographically non-negative distance vectors RDG if and only if
326 for each vector d in RDG, (T.d >= 0) is lexicographically positive.
327 i.e.: if and only if it transforms the lexicographically positive
328 distance vectors to lexicographically positive vectors. Note that
329 a unimodular matrix must transform the zero vector (and only it) to
330 the zero vector." S.Muchnick. */
332 static bool
333 lambda_transform_legal_p (lambda_trans_matrix trans,
334 int nb_loops,
335 vec<ddr_p> dependence_relations)
337 unsigned int i, j;
338 lambda_vector distres;
339 struct data_dependence_relation *ddr;
341 gcc_assert (LTM_COLSIZE (trans) == nb_loops
342 && LTM_ROWSIZE (trans) == nb_loops);
344 /* When there are no dependences, the transformation is correct. */
345 if (dependence_relations.length () == 0)
346 return true;
348 ddr = dependence_relations[0];
349 if (ddr == NULL)
350 return true;
352 /* When there is an unknown relation in the dependence_relations, we
353 know that it is no worth looking at this loop nest: give up. */
354 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
355 return false;
357 distres = lambda_vector_new (nb_loops);
359 /* For each distance vector in the dependence graph. */
360 FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
362 /* Don't care about relations for which we know that there is no
363 dependence, nor about read-read (aka. output-dependences):
364 these data accesses can happen in any order. */
365 if (DDR_ARE_DEPENDENT (ddr) == chrec_known
366 || (DR_IS_READ (DDR_A (ddr)) && DR_IS_READ (DDR_B (ddr))))
367 continue;
369 /* Conservatively answer: "this transformation is not valid". */
370 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
371 return false;
373 /* If the dependence could not be captured by a distance vector,
374 conservatively answer that the transform is not valid. */
375 if (DDR_NUM_DIST_VECTS (ddr) == 0)
376 return false;
378 /* Compute trans.dist_vect */
379 for (j = 0; j < DDR_NUM_DIST_VECTS (ddr); j++)
381 lambda_matrix_vector_mult (LTM_MATRIX (trans), nb_loops, nb_loops,
382 DDR_DIST_VECT (ddr, j), distres);
384 if (!lambda_vector_lexico_pos (distres, nb_loops))
385 return false;
388 return true;
391 /* Data dependency analysis. Returns true if the iterations of LOOP
392 are independent on each other (that is, if we can execute them
393 in parallel). */
395 static bool
396 loop_parallel_p (struct loop *loop, struct obstack * parloop_obstack)
398 vec<ddr_p> dependence_relations;
399 vec<data_reference_p> datarefs;
400 lambda_trans_matrix trans;
401 bool ret = false;
403 if (dump_file && (dump_flags & TDF_DETAILS))
405 fprintf (dump_file, "Considering loop %d\n", loop->num);
406 if (!loop->inner)
407 fprintf (dump_file, "loop is innermost\n");
408 else
409 fprintf (dump_file, "loop NOT innermost\n");
412 /* Check for problems with dependences. If the loop can be reversed,
413 the iterations are independent. */
414 stack_vec<loop_p, 3> loop_nest;
415 datarefs.create (10);
416 dependence_relations.create (100);
417 if (! compute_data_dependences_for_loop (loop, true, &loop_nest, &datarefs,
418 &dependence_relations))
420 if (dump_file && (dump_flags & TDF_DETAILS))
421 fprintf (dump_file, " FAILED: cannot analyze data dependencies\n");
422 ret = false;
423 goto end;
425 if (dump_file && (dump_flags & TDF_DETAILS))
426 dump_data_dependence_relations (dump_file, dependence_relations);
428 trans = lambda_trans_matrix_new (1, 1, parloop_obstack);
429 LTM_MATRIX (trans)[0][0] = -1;
431 if (lambda_transform_legal_p (trans, 1, dependence_relations))
433 ret = true;
434 if (dump_file && (dump_flags & TDF_DETAILS))
435 fprintf (dump_file, " SUCCESS: may be parallelized\n");
437 else if (dump_file && (dump_flags & TDF_DETAILS))
438 fprintf (dump_file,
439 " FAILED: data dependencies exist across iterations\n");
441 end:
442 free_dependence_relations (dependence_relations);
443 free_data_refs (datarefs);
445 return ret;
448 /* Return true when LOOP contains basic blocks marked with the
449 BB_IRREDUCIBLE_LOOP flag. */
451 static inline bool
452 loop_has_blocks_with_irreducible_flag (struct loop *loop)
454 unsigned i;
455 basic_block *bbs = get_loop_body_in_dom_order (loop);
456 bool res = true;
458 for (i = 0; i < loop->num_nodes; i++)
459 if (bbs[i]->flags & BB_IRREDUCIBLE_LOOP)
460 goto end;
462 res = false;
463 end:
464 free (bbs);
465 return res;
468 /* Assigns the address of OBJ in TYPE to an ssa name, and returns this name.
469 The assignment statement is placed on edge ENTRY. DECL_ADDRESS maps decls
470 to their addresses that can be reused. The address of OBJ is known to
471 be invariant in the whole function. Other needed statements are placed
472 right before GSI. */
474 static tree
475 take_address_of (tree obj, tree type, edge entry,
476 int_tree_htab_type decl_address, gimple_stmt_iterator *gsi)
478 int uid;
479 int_tree_map **dslot;
480 struct int_tree_map ielt, *nielt;
481 tree *var_p, name, addr;
482 gimple stmt;
483 gimple_seq stmts;
485 /* Since the address of OBJ is invariant, the trees may be shared.
486 Avoid rewriting unrelated parts of the code. */
487 obj = unshare_expr (obj);
488 for (var_p = &obj;
489 handled_component_p (*var_p);
490 var_p = &TREE_OPERAND (*var_p, 0))
491 continue;
493 /* Canonicalize the access to base on a MEM_REF. */
494 if (DECL_P (*var_p))
495 *var_p = build_simple_mem_ref (build_fold_addr_expr (*var_p));
497 /* Assign a canonical SSA name to the address of the base decl used
498 in the address and share it for all accesses and addresses based
499 on it. */
500 uid = DECL_UID (TREE_OPERAND (TREE_OPERAND (*var_p, 0), 0));
501 ielt.uid = uid;
502 dslot = decl_address.find_slot_with_hash (&ielt, uid, INSERT);
503 if (!*dslot)
505 if (gsi == NULL)
506 return NULL;
507 addr = TREE_OPERAND (*var_p, 0);
508 const char *obj_name
509 = get_name (TREE_OPERAND (TREE_OPERAND (*var_p, 0), 0));
510 if (obj_name)
511 name = make_temp_ssa_name (TREE_TYPE (addr), NULL, obj_name);
512 else
513 name = make_ssa_name (TREE_TYPE (addr), NULL);
514 stmt = gimple_build_assign (name, addr);
515 gsi_insert_on_edge_immediate (entry, stmt);
517 nielt = XNEW (struct int_tree_map);
518 nielt->uid = uid;
519 nielt->to = name;
520 *dslot = nielt;
522 else
523 name = (*dslot)->to;
525 /* Express the address in terms of the canonical SSA name. */
526 TREE_OPERAND (*var_p, 0) = name;
527 if (gsi == NULL)
528 return build_fold_addr_expr_with_type (obj, type);
530 name = force_gimple_operand (build_addr (obj, current_function_decl),
531 &stmts, true, NULL_TREE);
532 if (!gimple_seq_empty_p (stmts))
533 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
535 if (!useless_type_conversion_p (type, TREE_TYPE (name)))
537 name = force_gimple_operand (fold_convert (type, name), &stmts, true,
538 NULL_TREE);
539 if (!gimple_seq_empty_p (stmts))
540 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
543 return name;
546 /* Callback for htab_traverse. Create the initialization statement
547 for reduction described in SLOT, and place it at the preheader of
548 the loop described in DATA. */
551 initialize_reductions (reduction_info **slot, struct loop *loop)
553 tree init, c;
554 tree bvar, type, arg;
555 edge e;
557 struct reduction_info *const reduc = *slot;
559 /* Create initialization in preheader:
560 reduction_variable = initialization value of reduction. */
562 /* In the phi node at the header, replace the argument coming
563 from the preheader with the reduction initialization value. */
565 /* Create a new variable to initialize the reduction. */
566 type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
567 bvar = create_tmp_var (type, "reduction");
569 c = build_omp_clause (gimple_location (reduc->reduc_stmt),
570 OMP_CLAUSE_REDUCTION);
571 OMP_CLAUSE_REDUCTION_CODE (c) = reduc->reduction_code;
572 OMP_CLAUSE_DECL (c) = SSA_NAME_VAR (gimple_assign_lhs (reduc->reduc_stmt));
574 init = omp_reduction_init (c, TREE_TYPE (bvar));
575 reduc->init = init;
577 /* Replace the argument representing the initialization value
578 with the initialization value for the reduction (neutral
579 element for the particular operation, e.g. 0 for PLUS_EXPR,
580 1 for MULT_EXPR, etc).
581 Keep the old value in a new variable "reduction_initial",
582 that will be taken in consideration after the parallel
583 computing is done. */
585 e = loop_preheader_edge (loop);
586 arg = PHI_ARG_DEF_FROM_EDGE (reduc->reduc_phi, e);
587 /* Create new variable to hold the initial value. */
589 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE
590 (reduc->reduc_phi, loop_preheader_edge (loop)), init);
591 reduc->initial_value = arg;
592 return 1;
595 struct elv_data
597 struct walk_stmt_info info;
598 edge entry;
599 int_tree_htab_type decl_address;
600 gimple_stmt_iterator *gsi;
601 bool changed;
602 bool reset;
605 /* Eliminates references to local variables in *TP out of the single
606 entry single exit region starting at DTA->ENTRY.
607 DECL_ADDRESS contains addresses of the references that had their
608 address taken already. If the expression is changed, CHANGED is
609 set to true. Callback for walk_tree. */
611 static tree
612 eliminate_local_variables_1 (tree *tp, int *walk_subtrees, void *data)
614 struct elv_data *const dta = (struct elv_data *) data;
615 tree t = *tp, var, addr, addr_type, type, obj;
617 if (DECL_P (t))
619 *walk_subtrees = 0;
621 if (!SSA_VAR_P (t) || DECL_EXTERNAL (t))
622 return NULL_TREE;
624 type = TREE_TYPE (t);
625 addr_type = build_pointer_type (type);
626 addr = take_address_of (t, addr_type, dta->entry, dta->decl_address,
627 dta->gsi);
628 if (dta->gsi == NULL && addr == NULL_TREE)
630 dta->reset = true;
631 return NULL_TREE;
634 *tp = build_simple_mem_ref (addr);
636 dta->changed = true;
637 return NULL_TREE;
640 if (TREE_CODE (t) == ADDR_EXPR)
642 /* ADDR_EXPR may appear in two contexts:
643 -- as a gimple operand, when the address taken is a function invariant
644 -- as gimple rhs, when the resulting address in not a function
645 invariant
646 We do not need to do anything special in the latter case (the base of
647 the memory reference whose address is taken may be replaced in the
648 DECL_P case). The former case is more complicated, as we need to
649 ensure that the new address is still a gimple operand. Thus, it
650 is not sufficient to replace just the base of the memory reference --
651 we need to move the whole computation of the address out of the
652 loop. */
653 if (!is_gimple_val (t))
654 return NULL_TREE;
656 *walk_subtrees = 0;
657 obj = TREE_OPERAND (t, 0);
658 var = get_base_address (obj);
659 if (!var || !SSA_VAR_P (var) || DECL_EXTERNAL (var))
660 return NULL_TREE;
662 addr_type = TREE_TYPE (t);
663 addr = take_address_of (obj, addr_type, dta->entry, dta->decl_address,
664 dta->gsi);
665 if (dta->gsi == NULL && addr == NULL_TREE)
667 dta->reset = true;
668 return NULL_TREE;
670 *tp = addr;
672 dta->changed = true;
673 return NULL_TREE;
676 if (!EXPR_P (t))
677 *walk_subtrees = 0;
679 return NULL_TREE;
682 /* Moves the references to local variables in STMT at *GSI out of the single
683 entry single exit region starting at ENTRY. DECL_ADDRESS contains
684 addresses of the references that had their address taken
685 already. */
687 static void
688 eliminate_local_variables_stmt (edge entry, gimple_stmt_iterator *gsi,
689 int_tree_htab_type decl_address)
691 struct elv_data dta;
692 gimple stmt = gsi_stmt (*gsi);
694 memset (&dta.info, '\0', sizeof (dta.info));
695 dta.entry = entry;
696 dta.decl_address = decl_address;
697 dta.changed = false;
698 dta.reset = false;
700 if (gimple_debug_bind_p (stmt))
702 dta.gsi = NULL;
703 walk_tree (gimple_debug_bind_get_value_ptr (stmt),
704 eliminate_local_variables_1, &dta.info, NULL);
705 if (dta.reset)
707 gimple_debug_bind_reset_value (stmt);
708 dta.changed = true;
711 else if (gimple_clobber_p (stmt))
713 stmt = gimple_build_nop ();
714 gsi_replace (gsi, stmt, false);
715 dta.changed = true;
717 else
719 dta.gsi = gsi;
720 walk_gimple_op (stmt, eliminate_local_variables_1, &dta.info);
723 if (dta.changed)
724 update_stmt (stmt);
727 /* Eliminates the references to local variables from the single entry
728 single exit region between the ENTRY and EXIT edges.
730 This includes:
731 1) Taking address of a local variable -- these are moved out of the
732 region (and temporary variable is created to hold the address if
733 necessary).
735 2) Dereferencing a local variable -- these are replaced with indirect
736 references. */
738 static void
739 eliminate_local_variables (edge entry, edge exit)
741 basic_block bb;
742 stack_vec<basic_block, 3> body;
743 unsigned i;
744 gimple_stmt_iterator gsi;
745 bool has_debug_stmt = false;
746 int_tree_htab_type decl_address;
747 decl_address.create (10);
748 basic_block entry_bb = entry->src;
749 basic_block exit_bb = exit->dest;
751 gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
753 FOR_EACH_VEC_ELT (body, i, bb)
754 if (bb != entry_bb && bb != exit_bb)
755 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
756 if (is_gimple_debug (gsi_stmt (gsi)))
758 if (gimple_debug_bind_p (gsi_stmt (gsi)))
759 has_debug_stmt = true;
761 else
762 eliminate_local_variables_stmt (entry, &gsi, decl_address);
764 if (has_debug_stmt)
765 FOR_EACH_VEC_ELT (body, i, bb)
766 if (bb != entry_bb && bb != exit_bb)
767 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
768 if (gimple_debug_bind_p (gsi_stmt (gsi)))
769 eliminate_local_variables_stmt (entry, &gsi, decl_address);
771 decl_address.dispose ();
774 /* Returns true if expression EXPR is not defined between ENTRY and
775 EXIT, i.e. if all its operands are defined outside of the region. */
777 static bool
778 expr_invariant_in_region_p (edge entry, edge exit, tree expr)
780 basic_block entry_bb = entry->src;
781 basic_block exit_bb = exit->dest;
782 basic_block def_bb;
784 if (is_gimple_min_invariant (expr))
785 return true;
787 if (TREE_CODE (expr) == SSA_NAME)
789 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
790 if (def_bb
791 && dominated_by_p (CDI_DOMINATORS, def_bb, entry_bb)
792 && !dominated_by_p (CDI_DOMINATORS, def_bb, exit_bb))
793 return false;
795 return true;
798 return false;
801 /* If COPY_NAME_P is true, creates and returns a duplicate of NAME.
802 The copies are stored to NAME_COPIES, if NAME was already duplicated,
803 its duplicate stored in NAME_COPIES is returned.
805 Regardless of COPY_NAME_P, the decl used as a base of the ssa name is also
806 duplicated, storing the copies in DECL_COPIES. */
808 static tree
809 separate_decls_in_region_name (tree name, name_to_copy_table_type name_copies,
810 int_tree_htab_type decl_copies, bool copy_name_p)
812 tree copy, var, var_copy;
813 unsigned idx, uid, nuid;
814 struct int_tree_map ielt, *nielt;
815 struct name_to_copy_elt elt, *nelt;
816 name_to_copy_elt **slot;
817 int_tree_map **dslot;
819 if (TREE_CODE (name) != SSA_NAME)
820 return name;
822 idx = SSA_NAME_VERSION (name);
823 elt.version = idx;
824 slot = name_copies.find_slot_with_hash (&elt, idx,
825 copy_name_p ? INSERT : NO_INSERT);
826 if (slot && *slot)
827 return (*slot)->new_name;
829 if (copy_name_p)
831 copy = duplicate_ssa_name (name, NULL);
832 nelt = XNEW (struct name_to_copy_elt);
833 nelt->version = idx;
834 nelt->new_name = copy;
835 nelt->field = NULL_TREE;
836 *slot = nelt;
838 else
840 gcc_assert (!slot);
841 copy = name;
844 var = SSA_NAME_VAR (name);
845 if (!var)
846 return copy;
848 uid = DECL_UID (var);
849 ielt.uid = uid;
850 dslot = decl_copies.find_slot_with_hash (&ielt, uid, INSERT);
851 if (!*dslot)
853 var_copy = create_tmp_var (TREE_TYPE (var), get_name (var));
854 DECL_GIMPLE_REG_P (var_copy) = DECL_GIMPLE_REG_P (var);
855 nielt = XNEW (struct int_tree_map);
856 nielt->uid = uid;
857 nielt->to = var_copy;
858 *dslot = nielt;
860 /* Ensure that when we meet this decl next time, we won't duplicate
861 it again. */
862 nuid = DECL_UID (var_copy);
863 ielt.uid = nuid;
864 dslot = decl_copies.find_slot_with_hash (&ielt, nuid, INSERT);
865 gcc_assert (!*dslot);
866 nielt = XNEW (struct int_tree_map);
867 nielt->uid = nuid;
868 nielt->to = var_copy;
869 *dslot = nielt;
871 else
872 var_copy = ((struct int_tree_map *) *dslot)->to;
874 replace_ssa_name_symbol (copy, var_copy);
875 return copy;
878 /* Finds the ssa names used in STMT that are defined outside the
879 region between ENTRY and EXIT and replaces such ssa names with
880 their duplicates. The duplicates are stored to NAME_COPIES. Base
881 decls of all ssa names used in STMT (including those defined in
882 LOOP) are replaced with the new temporary variables; the
883 replacement decls are stored in DECL_COPIES. */
885 static void
886 separate_decls_in_region_stmt (edge entry, edge exit, gimple stmt,
887 name_to_copy_table_type name_copies,
888 int_tree_htab_type decl_copies)
890 use_operand_p use;
891 def_operand_p def;
892 ssa_op_iter oi;
893 tree name, copy;
894 bool copy_name_p;
896 FOR_EACH_PHI_OR_STMT_DEF (def, stmt, oi, SSA_OP_DEF)
898 name = DEF_FROM_PTR (def);
899 gcc_assert (TREE_CODE (name) == SSA_NAME);
900 copy = separate_decls_in_region_name (name, name_copies, decl_copies,
901 false);
902 gcc_assert (copy == name);
905 FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
907 name = USE_FROM_PTR (use);
908 if (TREE_CODE (name) != SSA_NAME)
909 continue;
911 copy_name_p = expr_invariant_in_region_p (entry, exit, name);
912 copy = separate_decls_in_region_name (name, name_copies, decl_copies,
913 copy_name_p);
914 SET_USE (use, copy);
918 /* Finds the ssa names used in STMT that are defined outside the
919 region between ENTRY and EXIT and replaces such ssa names with
920 their duplicates. The duplicates are stored to NAME_COPIES. Base
921 decls of all ssa names used in STMT (including those defined in
922 LOOP) are replaced with the new temporary variables; the
923 replacement decls are stored in DECL_COPIES. */
925 static bool
926 separate_decls_in_region_debug (gimple stmt,
927 name_to_copy_table_type name_copies,
928 int_tree_htab_type decl_copies)
930 use_operand_p use;
931 ssa_op_iter oi;
932 tree var, name;
933 struct int_tree_map ielt;
934 struct name_to_copy_elt elt;
935 name_to_copy_elt **slot;
936 int_tree_map **dslot;
938 if (gimple_debug_bind_p (stmt))
939 var = gimple_debug_bind_get_var (stmt);
940 else if (gimple_debug_source_bind_p (stmt))
941 var = gimple_debug_source_bind_get_var (stmt);
942 else
943 return true;
944 if (TREE_CODE (var) == DEBUG_EXPR_DECL || TREE_CODE (var) == LABEL_DECL)
945 return true;
946 gcc_assert (DECL_P (var) && SSA_VAR_P (var));
947 ielt.uid = DECL_UID (var);
948 dslot = decl_copies.find_slot_with_hash (&ielt, ielt.uid, NO_INSERT);
949 if (!dslot)
950 return true;
951 if (gimple_debug_bind_p (stmt))
952 gimple_debug_bind_set_var (stmt, ((struct int_tree_map *) *dslot)->to);
953 else if (gimple_debug_source_bind_p (stmt))
954 gimple_debug_source_bind_set_var (stmt, ((struct int_tree_map *) *dslot)->to);
956 FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
958 name = USE_FROM_PTR (use);
959 if (TREE_CODE (name) != SSA_NAME)
960 continue;
962 elt.version = SSA_NAME_VERSION (name);
963 slot = name_copies.find_slot_with_hash (&elt, elt.version, NO_INSERT);
964 if (!slot)
966 gimple_debug_bind_reset_value (stmt);
967 update_stmt (stmt);
968 break;
971 SET_USE (use, (*slot)->new_name);
974 return false;
977 /* Callback for htab_traverse. Adds a field corresponding to the reduction
978 specified in SLOT. The type is passed in DATA. */
981 add_field_for_reduction (reduction_info **slot, tree type)
984 struct reduction_info *const red = *slot;
985 tree var = gimple_assign_lhs (red->reduc_stmt);
986 tree field = build_decl (gimple_location (red->reduc_stmt), FIELD_DECL,
987 SSA_NAME_IDENTIFIER (var), TREE_TYPE (var));
989 insert_field_into_struct (type, field);
991 red->field = field;
993 return 1;
996 /* Callback for htab_traverse. Adds a field corresponding to a ssa name
997 described in SLOT. The type is passed in DATA. */
1000 add_field_for_name (name_to_copy_elt **slot, tree type)
1002 struct name_to_copy_elt *const elt = *slot;
1003 tree name = ssa_name (elt->version);
1004 tree field = build_decl (UNKNOWN_LOCATION,
1005 FIELD_DECL, SSA_NAME_IDENTIFIER (name),
1006 TREE_TYPE (name));
1008 insert_field_into_struct (type, field);
1009 elt->field = field;
1011 return 1;
1014 /* Callback for htab_traverse. A local result is the intermediate result
1015 computed by a single
1016 thread, or the initial value in case no iteration was executed.
1017 This function creates a phi node reflecting these values.
1018 The phi's result will be stored in NEW_PHI field of the
1019 reduction's data structure. */
1022 create_phi_for_local_result (reduction_info **slot, struct loop *loop)
1024 struct reduction_info *const reduc = *slot;
1025 edge e;
1026 gimple new_phi;
1027 basic_block store_bb;
1028 tree local_res;
1029 source_location locus;
1031 /* STORE_BB is the block where the phi
1032 should be stored. It is the destination of the loop exit.
1033 (Find the fallthru edge from GIMPLE_OMP_CONTINUE). */
1034 store_bb = FALLTHRU_EDGE (loop->latch)->dest;
1036 /* STORE_BB has two predecessors. One coming from the loop
1037 (the reduction's result is computed at the loop),
1038 and another coming from a block preceding the loop,
1039 when no iterations
1040 are executed (the initial value should be taken). */
1041 if (EDGE_PRED (store_bb, 0) == FALLTHRU_EDGE (loop->latch))
1042 e = EDGE_PRED (store_bb, 1);
1043 else
1044 e = EDGE_PRED (store_bb, 0);
1045 local_res = copy_ssa_name (gimple_assign_lhs (reduc->reduc_stmt), NULL);
1046 locus = gimple_location (reduc->reduc_stmt);
1047 new_phi = create_phi_node (local_res, store_bb);
1048 add_phi_arg (new_phi, reduc->init, e, locus);
1049 add_phi_arg (new_phi, gimple_assign_lhs (reduc->reduc_stmt),
1050 FALLTHRU_EDGE (loop->latch), locus);
1051 reduc->new_phi = new_phi;
1053 return 1;
1056 struct clsn_data
1058 tree store;
1059 tree load;
1061 basic_block store_bb;
1062 basic_block load_bb;
1065 /* Callback for htab_traverse. Create an atomic instruction for the
1066 reduction described in SLOT.
1067 DATA annotates the place in memory the atomic operation relates to,
1068 and the basic block it needs to be generated in. */
1071 create_call_for_reduction_1 (reduction_info **slot, struct clsn_data *clsn_data)
1073 struct reduction_info *const reduc = *slot;
1074 gimple_stmt_iterator gsi;
1075 tree type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
1076 tree load_struct;
1077 basic_block bb;
1078 basic_block new_bb;
1079 edge e;
1080 tree t, addr, ref, x;
1081 tree tmp_load, name;
1082 gimple load;
1084 load_struct = build_simple_mem_ref (clsn_data->load);
1085 t = build3 (COMPONENT_REF, type, load_struct, reduc->field, NULL_TREE);
1087 addr = build_addr (t, current_function_decl);
1089 /* Create phi node. */
1090 bb = clsn_data->load_bb;
1092 e = split_block (bb, t);
1093 new_bb = e->dest;
1095 tmp_load = create_tmp_var (TREE_TYPE (TREE_TYPE (addr)), NULL);
1096 tmp_load = make_ssa_name (tmp_load, NULL);
1097 load = gimple_build_omp_atomic_load (tmp_load, addr);
1098 SSA_NAME_DEF_STMT (tmp_load) = load;
1099 gsi = gsi_start_bb (new_bb);
1100 gsi_insert_after (&gsi, load, GSI_NEW_STMT);
1102 e = split_block (new_bb, load);
1103 new_bb = e->dest;
1104 gsi = gsi_start_bb (new_bb);
1105 ref = tmp_load;
1106 x = fold_build2 (reduc->reduction_code,
1107 TREE_TYPE (PHI_RESULT (reduc->new_phi)), ref,
1108 PHI_RESULT (reduc->new_phi));
1110 name = force_gimple_operand_gsi (&gsi, x, true, NULL_TREE, true,
1111 GSI_CONTINUE_LINKING);
1113 gsi_insert_after (&gsi, gimple_build_omp_atomic_store (name), GSI_NEW_STMT);
1114 return 1;
1117 /* Create the atomic operation at the join point of the threads.
1118 REDUCTION_LIST describes the reductions in the LOOP.
1119 LD_ST_DATA describes the shared data structure where
1120 shared data is stored in and loaded from. */
1121 static void
1122 create_call_for_reduction (struct loop *loop,
1123 reduction_info_table_type reduction_list,
1124 struct clsn_data *ld_st_data)
1126 reduction_list.traverse <struct loop *, create_phi_for_local_result> (loop);
1127 /* Find the fallthru edge from GIMPLE_OMP_CONTINUE. */
1128 ld_st_data->load_bb = FALLTHRU_EDGE (loop->latch)->dest;
1129 reduction_list
1130 .traverse <struct clsn_data *, create_call_for_reduction_1> (ld_st_data);
1133 /* Callback for htab_traverse. Loads the final reduction value at the
1134 join point of all threads, and inserts it in the right place. */
1137 create_loads_for_reductions (reduction_info **slot, struct clsn_data *clsn_data)
1139 struct reduction_info *const red = *slot;
1140 gimple stmt;
1141 gimple_stmt_iterator gsi;
1142 tree type = TREE_TYPE (gimple_assign_lhs (red->reduc_stmt));
1143 tree load_struct;
1144 tree name;
1145 tree x;
1147 gsi = gsi_after_labels (clsn_data->load_bb);
1148 load_struct = build_simple_mem_ref (clsn_data->load);
1149 load_struct = build3 (COMPONENT_REF, type, load_struct, red->field,
1150 NULL_TREE);
1152 x = load_struct;
1153 name = PHI_RESULT (red->keep_res);
1154 stmt = gimple_build_assign (name, x);
1156 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1158 for (gsi = gsi_start_phis (gimple_bb (red->keep_res));
1159 !gsi_end_p (gsi); gsi_next (&gsi))
1160 if (gsi_stmt (gsi) == red->keep_res)
1162 remove_phi_node (&gsi, false);
1163 return 1;
1165 gcc_unreachable ();
1168 /* Load the reduction result that was stored in LD_ST_DATA.
1169 REDUCTION_LIST describes the list of reductions that the
1170 loads should be generated for. */
1171 static void
1172 create_final_loads_for_reduction (reduction_info_table_type reduction_list,
1173 struct clsn_data *ld_st_data)
1175 gimple_stmt_iterator gsi;
1176 tree t;
1177 gimple stmt;
1179 gsi = gsi_after_labels (ld_st_data->load_bb);
1180 t = build_fold_addr_expr (ld_st_data->store);
1181 stmt = gimple_build_assign (ld_st_data->load, t);
1183 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
1185 reduction_list
1186 .traverse <struct clsn_data *, create_loads_for_reductions> (ld_st_data);
1190 /* Callback for htab_traverse. Store the neutral value for the
1191 particular reduction's operation, e.g. 0 for PLUS_EXPR,
1192 1 for MULT_EXPR, etc. into the reduction field.
1193 The reduction is specified in SLOT. The store information is
1194 passed in DATA. */
1197 create_stores_for_reduction (reduction_info **slot, struct clsn_data *clsn_data)
1199 struct reduction_info *const red = *slot;
1200 tree t;
1201 gimple stmt;
1202 gimple_stmt_iterator gsi;
1203 tree type = TREE_TYPE (gimple_assign_lhs (red->reduc_stmt));
1205 gsi = gsi_last_bb (clsn_data->store_bb);
1206 t = build3 (COMPONENT_REF, type, clsn_data->store, red->field, NULL_TREE);
1207 stmt = gimple_build_assign (t, red->initial_value);
1208 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1210 return 1;
1213 /* Callback for htab_traverse. Creates loads to a field of LOAD in LOAD_BB and
1214 store to a field of STORE in STORE_BB for the ssa name and its duplicate
1215 specified in SLOT. */
1218 create_loads_and_stores_for_name (name_to_copy_elt **slot,
1219 struct clsn_data *clsn_data)
1221 struct name_to_copy_elt *const elt = *slot;
1222 tree t;
1223 gimple stmt;
1224 gimple_stmt_iterator gsi;
1225 tree type = TREE_TYPE (elt->new_name);
1226 tree load_struct;
1228 gsi = gsi_last_bb (clsn_data->store_bb);
1229 t = build3 (COMPONENT_REF, type, clsn_data->store, elt->field, NULL_TREE);
1230 stmt = gimple_build_assign (t, ssa_name (elt->version));
1231 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1233 gsi = gsi_last_bb (clsn_data->load_bb);
1234 load_struct = build_simple_mem_ref (clsn_data->load);
1235 t = build3 (COMPONENT_REF, type, load_struct, elt->field, NULL_TREE);
1236 stmt = gimple_build_assign (elt->new_name, t);
1237 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1239 return 1;
1242 /* Moves all the variables used in LOOP and defined outside of it (including
1243 the initial values of loop phi nodes, and *PER_THREAD if it is a ssa
1244 name) to a structure created for this purpose. The code
1246 while (1)
1248 use (a);
1249 use (b);
1252 is transformed this way:
1254 bb0:
1255 old.a = a;
1256 old.b = b;
1258 bb1:
1259 a' = new->a;
1260 b' = new->b;
1261 while (1)
1263 use (a');
1264 use (b');
1267 `old' is stored to *ARG_STRUCT and `new' is stored to NEW_ARG_STRUCT. The
1268 pointer `new' is intentionally not initialized (the loop will be split to a
1269 separate function later, and `new' will be initialized from its arguments).
1270 LD_ST_DATA holds information about the shared data structure used to pass
1271 information among the threads. It is initialized here, and
1272 gen_parallel_loop will pass it to create_call_for_reduction that
1273 needs this information. REDUCTION_LIST describes the reductions
1274 in LOOP. */
1276 static void
1277 separate_decls_in_region (edge entry, edge exit,
1278 reduction_info_table_type reduction_list,
1279 tree *arg_struct, tree *new_arg_struct,
1280 struct clsn_data *ld_st_data)
1283 basic_block bb1 = split_edge (entry);
1284 basic_block bb0 = single_pred (bb1);
1285 name_to_copy_table_type name_copies;
1286 name_copies.create (10);
1287 int_tree_htab_type decl_copies;
1288 decl_copies.create (10);
1289 unsigned i;
1290 tree type, type_name, nvar;
1291 gimple_stmt_iterator gsi;
1292 struct clsn_data clsn_data;
1293 stack_vec<basic_block, 3> body;
1294 basic_block bb;
1295 basic_block entry_bb = bb1;
1296 basic_block exit_bb = exit->dest;
1297 bool has_debug_stmt = false;
1299 entry = single_succ_edge (entry_bb);
1300 gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
1302 FOR_EACH_VEC_ELT (body, i, bb)
1304 if (bb != entry_bb && bb != exit_bb)
1306 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1307 separate_decls_in_region_stmt (entry, exit, gsi_stmt (gsi),
1308 name_copies, decl_copies);
1310 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1312 gimple stmt = gsi_stmt (gsi);
1314 if (is_gimple_debug (stmt))
1315 has_debug_stmt = true;
1316 else
1317 separate_decls_in_region_stmt (entry, exit, stmt,
1318 name_copies, decl_copies);
1323 /* Now process debug bind stmts. We must not create decls while
1324 processing debug stmts, so we defer their processing so as to
1325 make sure we will have debug info for as many variables as
1326 possible (all of those that were dealt with in the loop above),
1327 and discard those for which we know there's nothing we can
1328 do. */
1329 if (has_debug_stmt)
1330 FOR_EACH_VEC_ELT (body, i, bb)
1331 if (bb != entry_bb && bb != exit_bb)
1333 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
1335 gimple stmt = gsi_stmt (gsi);
1337 if (is_gimple_debug (stmt))
1339 if (separate_decls_in_region_debug (stmt, name_copies,
1340 decl_copies))
1342 gsi_remove (&gsi, true);
1343 continue;
1347 gsi_next (&gsi);
1351 if (name_copies.elements () == 0 && reduction_list.elements () == 0)
1353 /* It may happen that there is nothing to copy (if there are only
1354 loop carried and external variables in the loop). */
1355 *arg_struct = NULL;
1356 *new_arg_struct = NULL;
1358 else
1360 /* Create the type for the structure to store the ssa names to. */
1361 type = lang_hooks.types.make_type (RECORD_TYPE);
1362 type_name = build_decl (UNKNOWN_LOCATION,
1363 TYPE_DECL, create_tmp_var_name (".paral_data"),
1364 type);
1365 TYPE_NAME (type) = type_name;
1367 name_copies.traverse <tree, add_field_for_name> (type);
1368 if (reduction_list.is_created () && reduction_list.elements () > 0)
1370 /* Create the fields for reductions. */
1371 reduction_list.traverse <tree, add_field_for_reduction> (type);
1373 layout_type (type);
1375 /* Create the loads and stores. */
1376 *arg_struct = create_tmp_var (type, ".paral_data_store");
1377 nvar = create_tmp_var (build_pointer_type (type), ".paral_data_load");
1378 *new_arg_struct = make_ssa_name (nvar, NULL);
1380 ld_st_data->store = *arg_struct;
1381 ld_st_data->load = *new_arg_struct;
1382 ld_st_data->store_bb = bb0;
1383 ld_st_data->load_bb = bb1;
1385 name_copies
1386 .traverse <struct clsn_data *, create_loads_and_stores_for_name>
1387 (ld_st_data);
1389 /* Load the calculation from memory (after the join of the threads). */
1391 if (reduction_list.is_created () && reduction_list.elements () > 0)
1393 reduction_list
1394 .traverse <struct clsn_data *, create_stores_for_reduction>
1395 (ld_st_data);
1396 clsn_data.load = make_ssa_name (nvar, NULL);
1397 clsn_data.load_bb = exit->dest;
1398 clsn_data.store = ld_st_data->store;
1399 create_final_loads_for_reduction (reduction_list, &clsn_data);
1403 decl_copies.dispose ();
1404 name_copies.dispose ();
1407 /* Bitmap containing uids of functions created by parallelization. We cannot
1408 allocate it from the default obstack, as it must live across compilation
1409 of several functions; we make it gc allocated instead. */
1411 static GTY(()) bitmap parallelized_functions;
1413 /* Returns true if FN was created by create_loop_fn. */
1415 bool
1416 parallelized_function_p (tree fn)
1418 if (!parallelized_functions || !DECL_ARTIFICIAL (fn))
1419 return false;
1421 return bitmap_bit_p (parallelized_functions, DECL_UID (fn));
1424 /* Creates and returns an empty function that will receive the body of
1425 a parallelized loop. */
1427 static tree
1428 create_loop_fn (location_t loc)
1430 char buf[100];
1431 char *tname;
1432 tree decl, type, name, t;
1433 struct function *act_cfun = cfun;
1434 static unsigned loopfn_num;
1436 loc = LOCATION_LOCUS (loc);
1437 snprintf (buf, 100, "%s.$loopfn", current_function_name ());
1438 ASM_FORMAT_PRIVATE_NAME (tname, buf, loopfn_num++);
1439 clean_symbol_name (tname);
1440 name = get_identifier (tname);
1441 type = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
1443 decl = build_decl (loc, FUNCTION_DECL, name, type);
1444 if (!parallelized_functions)
1445 parallelized_functions = BITMAP_GGC_ALLOC ();
1446 bitmap_set_bit (parallelized_functions, DECL_UID (decl));
1448 TREE_STATIC (decl) = 1;
1449 TREE_USED (decl) = 1;
1450 DECL_ARTIFICIAL (decl) = 1;
1451 DECL_IGNORED_P (decl) = 0;
1452 TREE_PUBLIC (decl) = 0;
1453 DECL_UNINLINABLE (decl) = 1;
1454 DECL_EXTERNAL (decl) = 0;
1455 DECL_CONTEXT (decl) = NULL_TREE;
1456 DECL_INITIAL (decl) = make_node (BLOCK);
1458 t = build_decl (loc, RESULT_DECL, NULL_TREE, void_type_node);
1459 DECL_ARTIFICIAL (t) = 1;
1460 DECL_IGNORED_P (t) = 1;
1461 DECL_RESULT (decl) = t;
1463 t = build_decl (loc, PARM_DECL, get_identifier (".paral_data_param"),
1464 ptr_type_node);
1465 DECL_ARTIFICIAL (t) = 1;
1466 DECL_ARG_TYPE (t) = ptr_type_node;
1467 DECL_CONTEXT (t) = decl;
1468 TREE_USED (t) = 1;
1469 DECL_ARGUMENTS (decl) = t;
1471 allocate_struct_function (decl, false);
1473 /* The call to allocate_struct_function clobbers CFUN, so we need to restore
1474 it. */
1475 set_cfun (act_cfun);
1477 return decl;
1480 /* Moves the exit condition of LOOP to the beginning of its header, and
1481 duplicates the part of the last iteration that gets disabled to the
1482 exit of the loop. NIT is the number of iterations of the loop
1483 (used to initialize the variables in the duplicated part).
1485 TODO: the common case is that latch of the loop is empty and immediately
1486 follows the loop exit. In this case, it would be better not to copy the
1487 body of the loop, but only move the entry of the loop directly before the
1488 exit check and increase the number of iterations of the loop by one.
1489 This may need some additional preconditioning in case NIT = ~0.
1490 REDUCTION_LIST describes the reductions in LOOP. */
1492 static void
1493 transform_to_exit_first_loop (struct loop *loop,
1494 reduction_info_table_type reduction_list,
1495 tree nit)
1497 basic_block *bbs, *nbbs, ex_bb, orig_header;
1498 unsigned n;
1499 bool ok;
1500 edge exit = single_dom_exit (loop), hpred;
1501 tree control, control_name, res, t;
1502 gimple phi, nphi, cond_stmt, stmt, cond_nit;
1503 gimple_stmt_iterator gsi;
1504 tree nit_1;
1506 split_block_after_labels (loop->header);
1507 orig_header = single_succ (loop->header);
1508 hpred = single_succ_edge (loop->header);
1510 cond_stmt = last_stmt (exit->src);
1511 control = gimple_cond_lhs (cond_stmt);
1512 gcc_assert (gimple_cond_rhs (cond_stmt) == nit);
1514 /* Make sure that we have phi nodes on exit for all loop header phis
1515 (create_parallel_loop requires that). */
1516 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
1518 phi = gsi_stmt (gsi);
1519 res = PHI_RESULT (phi);
1520 t = copy_ssa_name (res, phi);
1521 SET_PHI_RESULT (phi, t);
1522 nphi = create_phi_node (res, orig_header);
1523 add_phi_arg (nphi, t, hpred, UNKNOWN_LOCATION);
1525 if (res == control)
1527 gimple_cond_set_lhs (cond_stmt, t);
1528 update_stmt (cond_stmt);
1529 control = t;
1533 bbs = get_loop_body_in_dom_order (loop);
1535 for (n = 0; bbs[n] != exit->src; n++)
1536 continue;
1537 nbbs = XNEWVEC (basic_block, n);
1538 ok = gimple_duplicate_sese_tail (single_succ_edge (loop->header), exit,
1539 bbs + 1, n, nbbs);
1540 gcc_assert (ok);
1541 free (bbs);
1542 ex_bb = nbbs[0];
1543 free (nbbs);
1545 /* Other than reductions, the only gimple reg that should be copied
1546 out of the loop is the control variable. */
1547 exit = single_dom_exit (loop);
1548 control_name = NULL_TREE;
1549 for (gsi = gsi_start_phis (ex_bb); !gsi_end_p (gsi); )
1551 phi = gsi_stmt (gsi);
1552 res = PHI_RESULT (phi);
1553 if (virtual_operand_p (res))
1555 gsi_next (&gsi);
1556 continue;
1559 /* Check if it is a part of reduction. If it is,
1560 keep the phi at the reduction's keep_res field. The
1561 PHI_RESULT of this phi is the resulting value of the reduction
1562 variable when exiting the loop. */
1564 if (reduction_list.elements () > 0)
1566 struct reduction_info *red;
1568 tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1569 red = reduction_phi (reduction_list, SSA_NAME_DEF_STMT (val));
1570 if (red)
1572 red->keep_res = phi;
1573 gsi_next (&gsi);
1574 continue;
1577 gcc_assert (control_name == NULL_TREE
1578 && SSA_NAME_VAR (res) == SSA_NAME_VAR (control));
1579 control_name = res;
1580 remove_phi_node (&gsi, false);
1582 gcc_assert (control_name != NULL_TREE);
1584 /* Initialize the control variable to number of iterations
1585 according to the rhs of the exit condition. */
1586 gsi = gsi_after_labels (ex_bb);
1587 cond_nit = last_stmt (exit->src);
1588 nit_1 = gimple_cond_rhs (cond_nit);
1589 nit_1 = force_gimple_operand_gsi (&gsi,
1590 fold_convert (TREE_TYPE (control_name), nit_1),
1591 false, NULL_TREE, false, GSI_SAME_STMT);
1592 stmt = gimple_build_assign (control_name, nit_1);
1593 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
1596 /* Create the parallel constructs for LOOP as described in gen_parallel_loop.
1597 LOOP_FN and DATA are the arguments of GIMPLE_OMP_PARALLEL.
1598 NEW_DATA is the variable that should be initialized from the argument
1599 of LOOP_FN. N_THREADS is the requested number of threads. Returns the
1600 basic block containing GIMPLE_OMP_PARALLEL tree. */
1602 static basic_block
1603 create_parallel_loop (struct loop *loop, tree loop_fn, tree data,
1604 tree new_data, unsigned n_threads, location_t loc)
1606 gimple_stmt_iterator gsi;
1607 basic_block bb, paral_bb, for_bb, ex_bb;
1608 tree t, param;
1609 gimple stmt, for_stmt, phi, cond_stmt;
1610 tree cvar, cvar_init, initvar, cvar_next, cvar_base, type;
1611 edge exit, nexit, guard, end, e;
1613 /* Prepare the GIMPLE_OMP_PARALLEL statement. */
1614 bb = loop_preheader_edge (loop)->src;
1615 paral_bb = single_pred (bb);
1616 gsi = gsi_last_bb (paral_bb);
1618 t = build_omp_clause (loc, OMP_CLAUSE_NUM_THREADS);
1619 OMP_CLAUSE_NUM_THREADS_EXPR (t)
1620 = build_int_cst (integer_type_node, n_threads);
1621 stmt = gimple_build_omp_parallel (NULL, t, loop_fn, data);
1622 gimple_set_location (stmt, loc);
1624 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1626 /* Initialize NEW_DATA. */
1627 if (data)
1629 gsi = gsi_after_labels (bb);
1631 param = make_ssa_name (DECL_ARGUMENTS (loop_fn), NULL);
1632 stmt = gimple_build_assign (param, build_fold_addr_expr (data));
1633 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1635 stmt = gimple_build_assign (new_data,
1636 fold_convert (TREE_TYPE (new_data), param));
1637 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1640 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_PARALLEL. */
1641 bb = split_loop_exit_edge (single_dom_exit (loop));
1642 gsi = gsi_last_bb (bb);
1643 stmt = gimple_build_omp_return (false);
1644 gimple_set_location (stmt, loc);
1645 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1647 /* Extract data for GIMPLE_OMP_FOR. */
1648 gcc_assert (loop->header == single_dom_exit (loop)->src);
1649 cond_stmt = last_stmt (loop->header);
1651 cvar = gimple_cond_lhs (cond_stmt);
1652 cvar_base = SSA_NAME_VAR (cvar);
1653 phi = SSA_NAME_DEF_STMT (cvar);
1654 cvar_init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1655 initvar = copy_ssa_name (cvar, NULL);
1656 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, loop_preheader_edge (loop)),
1657 initvar);
1658 cvar_next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1660 gsi = gsi_last_nondebug_bb (loop->latch);
1661 gcc_assert (gsi_stmt (gsi) == SSA_NAME_DEF_STMT (cvar_next));
1662 gsi_remove (&gsi, true);
1664 /* Prepare cfg. */
1665 for_bb = split_edge (loop_preheader_edge (loop));
1666 ex_bb = split_loop_exit_edge (single_dom_exit (loop));
1667 extract_true_false_edges_from_block (loop->header, &nexit, &exit);
1668 gcc_assert (exit == single_dom_exit (loop));
1670 guard = make_edge (for_bb, ex_bb, 0);
1671 single_succ_edge (loop->latch)->flags = 0;
1672 end = make_edge (loop->latch, ex_bb, EDGE_FALLTHRU);
1673 for (gsi = gsi_start_phis (ex_bb); !gsi_end_p (gsi); gsi_next (&gsi))
1675 source_location locus;
1676 tree def;
1677 phi = gsi_stmt (gsi);
1678 stmt = SSA_NAME_DEF_STMT (PHI_ARG_DEF_FROM_EDGE (phi, exit));
1680 def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_preheader_edge (loop));
1681 locus = gimple_phi_arg_location_from_edge (stmt,
1682 loop_preheader_edge (loop));
1683 add_phi_arg (phi, def, guard, locus);
1685 def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (loop));
1686 locus = gimple_phi_arg_location_from_edge (stmt, loop_latch_edge (loop));
1687 add_phi_arg (phi, def, end, locus);
1689 e = redirect_edge_and_branch (exit, nexit->dest);
1690 PENDING_STMT (e) = NULL;
1692 /* Emit GIMPLE_OMP_FOR. */
1693 gimple_cond_set_lhs (cond_stmt, cvar_base);
1694 type = TREE_TYPE (cvar);
1695 t = build_omp_clause (loc, OMP_CLAUSE_SCHEDULE);
1696 OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_STATIC;
1698 for_stmt = gimple_build_omp_for (NULL, GF_OMP_FOR_KIND_FOR, t, 1, NULL);
1699 gimple_set_location (for_stmt, loc);
1700 gimple_omp_for_set_index (for_stmt, 0, initvar);
1701 gimple_omp_for_set_initial (for_stmt, 0, cvar_init);
1702 gimple_omp_for_set_final (for_stmt, 0, gimple_cond_rhs (cond_stmt));
1703 gimple_omp_for_set_cond (for_stmt, 0, gimple_cond_code (cond_stmt));
1704 gimple_omp_for_set_incr (for_stmt, 0, build2 (PLUS_EXPR, type,
1705 cvar_base,
1706 build_int_cst (type, 1)));
1708 gsi = gsi_last_bb (for_bb);
1709 gsi_insert_after (&gsi, for_stmt, GSI_NEW_STMT);
1710 SSA_NAME_DEF_STMT (initvar) = for_stmt;
1712 /* Emit GIMPLE_OMP_CONTINUE. */
1713 gsi = gsi_last_bb (loop->latch);
1714 stmt = gimple_build_omp_continue (cvar_next, cvar);
1715 gimple_set_location (stmt, loc);
1716 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1717 SSA_NAME_DEF_STMT (cvar_next) = stmt;
1719 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_FOR. */
1720 gsi = gsi_last_bb (ex_bb);
1721 stmt = gimple_build_omp_return (true);
1722 gimple_set_location (stmt, loc);
1723 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1725 /* After the above dom info is hosed. Re-compute it. */
1726 free_dominance_info (CDI_DOMINATORS);
1727 calculate_dominance_info (CDI_DOMINATORS);
1729 return paral_bb;
1732 /* Generates code to execute the iterations of LOOP in N_THREADS
1733 threads in parallel.
1735 NITER describes number of iterations of LOOP.
1736 REDUCTION_LIST describes the reductions existent in the LOOP. */
1738 static void
1739 gen_parallel_loop (struct loop *loop, reduction_info_table_type reduction_list,
1740 unsigned n_threads, struct tree_niter_desc *niter)
1742 loop_iterator li;
1743 tree many_iterations_cond, type, nit;
1744 tree arg_struct, new_arg_struct;
1745 gimple_seq stmts;
1746 basic_block parallel_head;
1747 edge entry, exit;
1748 struct clsn_data clsn_data;
1749 unsigned prob;
1750 location_t loc;
1751 gimple cond_stmt;
1752 unsigned int m_p_thread=2;
1754 /* From
1756 ---------------------------------------------------------------------
1757 loop
1759 IV = phi (INIT, IV + STEP)
1760 BODY1;
1761 if (COND)
1762 break;
1763 BODY2;
1765 ---------------------------------------------------------------------
1767 with # of iterations NITER (possibly with MAY_BE_ZERO assumption),
1768 we generate the following code:
1770 ---------------------------------------------------------------------
1772 if (MAY_BE_ZERO
1773 || NITER < MIN_PER_THREAD * N_THREADS)
1774 goto original;
1776 BODY1;
1777 store all local loop-invariant variables used in body of the loop to DATA.
1778 GIMPLE_OMP_PARALLEL (OMP_CLAUSE_NUM_THREADS (N_THREADS), LOOPFN, DATA);
1779 load the variables from DATA.
1780 GIMPLE_OMP_FOR (IV = INIT; COND; IV += STEP) (OMP_CLAUSE_SCHEDULE (static))
1781 BODY2;
1782 BODY1;
1783 GIMPLE_OMP_CONTINUE;
1784 GIMPLE_OMP_RETURN -- GIMPLE_OMP_FOR
1785 GIMPLE_OMP_RETURN -- GIMPLE_OMP_PARALLEL
1786 goto end;
1788 original:
1789 loop
1791 IV = phi (INIT, IV + STEP)
1792 BODY1;
1793 if (COND)
1794 break;
1795 BODY2;
1798 end:
1802 /* Create two versions of the loop -- in the old one, we know that the
1803 number of iterations is large enough, and we will transform it into the
1804 loop that will be split to loop_fn, the new one will be used for the
1805 remaining iterations. */
1807 /* We should compute a better number-of-iterations value for outer loops.
1808 That is, if we have
1810 for (i = 0; i < n; ++i)
1811 for (j = 0; j < m; ++j)
1814 we should compute nit = n * m, not nit = n.
1815 Also may_be_zero handling would need to be adjusted. */
1817 type = TREE_TYPE (niter->niter);
1818 nit = force_gimple_operand (unshare_expr (niter->niter), &stmts, true,
1819 NULL_TREE);
1820 if (stmts)
1821 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1823 if (loop->inner)
1824 m_p_thread=2;
1825 else
1826 m_p_thread=MIN_PER_THREAD;
1828 many_iterations_cond =
1829 fold_build2 (GE_EXPR, boolean_type_node,
1830 nit, build_int_cst (type, m_p_thread * n_threads));
1832 many_iterations_cond
1833 = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1834 invert_truthvalue (unshare_expr (niter->may_be_zero)),
1835 many_iterations_cond);
1836 many_iterations_cond
1837 = force_gimple_operand (many_iterations_cond, &stmts, false, NULL_TREE);
1838 if (stmts)
1839 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1840 if (!is_gimple_condexpr (many_iterations_cond))
1842 many_iterations_cond
1843 = force_gimple_operand (many_iterations_cond, &stmts,
1844 true, NULL_TREE);
1845 if (stmts)
1846 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1849 initialize_original_copy_tables ();
1851 /* We assume that the loop usually iterates a lot. */
1852 prob = 4 * REG_BR_PROB_BASE / 5;
1853 loop_version (loop, many_iterations_cond, NULL,
1854 prob, prob, REG_BR_PROB_BASE - prob, true);
1855 update_ssa (TODO_update_ssa);
1856 free_original_copy_tables ();
1858 /* Base all the induction variables in LOOP on a single control one. */
1859 canonicalize_loop_ivs (loop, &nit, true);
1861 /* Ensure that the exit condition is the first statement in the loop. */
1862 transform_to_exit_first_loop (loop, reduction_list, nit);
1864 /* Generate initializations for reductions. */
1865 if (reduction_list.elements () > 0)
1866 reduction_list.traverse <struct loop *, initialize_reductions> (loop);
1868 /* Eliminate the references to local variables from the loop. */
1869 gcc_assert (single_exit (loop));
1870 entry = loop_preheader_edge (loop);
1871 exit = single_dom_exit (loop);
1873 eliminate_local_variables (entry, exit);
1874 /* In the old loop, move all variables non-local to the loop to a structure
1875 and back, and create separate decls for the variables used in loop. */
1876 separate_decls_in_region (entry, exit, reduction_list, &arg_struct,
1877 &new_arg_struct, &clsn_data);
1879 /* Create the parallel constructs. */
1880 loc = UNKNOWN_LOCATION;
1881 cond_stmt = last_stmt (loop->header);
1882 if (cond_stmt)
1883 loc = gimple_location (cond_stmt);
1884 parallel_head = create_parallel_loop (loop, create_loop_fn (loc), arg_struct,
1885 new_arg_struct, n_threads, loc);
1886 if (reduction_list.elements () > 0)
1887 create_call_for_reduction (loop, reduction_list, &clsn_data);
1889 scev_reset ();
1891 /* Cancel the loop (it is simpler to do it here rather than to teach the
1892 expander to do it). */
1893 cancel_loop_tree (loop);
1895 /* Free loop bound estimations that could contain references to
1896 removed statements. */
1897 FOR_EACH_LOOP (li, loop, 0)
1898 free_numbers_of_iterations_estimates_loop (loop);
1900 /* Expand the parallel constructs. We do it directly here instead of running
1901 a separate expand_omp pass, since it is more efficient, and less likely to
1902 cause troubles with further analyses not being able to deal with the
1903 OMP trees. */
1905 omp_expand_local (parallel_head);
1908 /* Returns true when LOOP contains vector phi nodes. */
1910 static bool
1911 loop_has_vector_phi_nodes (struct loop *loop ATTRIBUTE_UNUSED)
1913 unsigned i;
1914 basic_block *bbs = get_loop_body_in_dom_order (loop);
1915 gimple_stmt_iterator gsi;
1916 bool res = true;
1918 for (i = 0; i < loop->num_nodes; i++)
1919 for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
1920 if (TREE_CODE (TREE_TYPE (PHI_RESULT (gsi_stmt (gsi)))) == VECTOR_TYPE)
1921 goto end;
1923 res = false;
1924 end:
1925 free (bbs);
1926 return res;
1929 /* Create a reduction_info struct, initialize it with REDUC_STMT
1930 and PHI, insert it to the REDUCTION_LIST. */
1932 static void
1933 build_new_reduction (reduction_info_table_type reduction_list,
1934 gimple reduc_stmt, gimple phi)
1936 reduction_info **slot;
1937 struct reduction_info *new_reduction;
1939 gcc_assert (reduc_stmt);
1941 if (dump_file && (dump_flags & TDF_DETAILS))
1943 fprintf (dump_file,
1944 "Detected reduction. reduction stmt is: \n");
1945 print_gimple_stmt (dump_file, reduc_stmt, 0, 0);
1946 fprintf (dump_file, "\n");
1949 new_reduction = XCNEW (struct reduction_info);
1951 new_reduction->reduc_stmt = reduc_stmt;
1952 new_reduction->reduc_phi = phi;
1953 new_reduction->reduc_version = SSA_NAME_VERSION (gimple_phi_result (phi));
1954 new_reduction->reduction_code = gimple_assign_rhs_code (reduc_stmt);
1955 slot = reduction_list.find_slot (new_reduction, INSERT);
1956 *slot = new_reduction;
1959 /* Callback for htab_traverse. Sets gimple_uid of reduc_phi stmts. */
1962 set_reduc_phi_uids (reduction_info **slot, void *data ATTRIBUTE_UNUSED)
1964 struct reduction_info *const red = *slot;
1965 gimple_set_uid (red->reduc_phi, red->reduc_version);
1966 return 1;
1969 /* Detect all reductions in the LOOP, insert them into REDUCTION_LIST. */
1971 static void
1972 gather_scalar_reductions (loop_p loop, reduction_info_table_type reduction_list)
1974 gimple_stmt_iterator gsi;
1975 loop_vec_info simple_loop_info;
1977 simple_loop_info = vect_analyze_loop_form (loop);
1979 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
1981 gimple phi = gsi_stmt (gsi);
1982 affine_iv iv;
1983 tree res = PHI_RESULT (phi);
1984 bool double_reduc;
1986 if (virtual_operand_p (res))
1987 continue;
1989 if (!simple_iv (loop, loop, res, &iv, true)
1990 && simple_loop_info)
1992 gimple reduc_stmt = vect_force_simple_reduction (simple_loop_info,
1993 phi, true,
1994 &double_reduc);
1995 if (reduc_stmt && !double_reduc)
1996 build_new_reduction (reduction_list, reduc_stmt, phi);
1999 destroy_loop_vec_info (simple_loop_info, true);
2001 /* As gimple_uid is used by the vectorizer in between vect_analyze_loop_form
2002 and destroy_loop_vec_info, we can set gimple_uid of reduc_phi stmts
2003 only now. */
2004 reduction_list.traverse <void *, set_reduc_phi_uids> (NULL);
2007 /* Try to initialize NITER for code generation part. */
2009 static bool
2010 try_get_loop_niter (loop_p loop, struct tree_niter_desc *niter)
2012 edge exit = single_dom_exit (loop);
2014 gcc_assert (exit);
2016 /* We need to know # of iterations, and there should be no uses of values
2017 defined inside loop outside of it, unless the values are invariants of
2018 the loop. */
2019 if (!number_of_iterations_exit (loop, exit, niter, false))
2021 if (dump_file && (dump_flags & TDF_DETAILS))
2022 fprintf (dump_file, " FAILED: number of iterations not known\n");
2023 return false;
2026 return true;
2029 /* Try to initialize REDUCTION_LIST for code generation part.
2030 REDUCTION_LIST describes the reductions. */
2032 static bool
2033 try_create_reduction_list (loop_p loop,
2034 reduction_info_table_type reduction_list)
2036 edge exit = single_dom_exit (loop);
2037 gimple_stmt_iterator gsi;
2039 gcc_assert (exit);
2041 gather_scalar_reductions (loop, reduction_list);
2044 for (gsi = gsi_start_phis (exit->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2046 gimple phi = gsi_stmt (gsi);
2047 struct reduction_info *red;
2048 imm_use_iterator imm_iter;
2049 use_operand_p use_p;
2050 gimple reduc_phi;
2051 tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2053 if (!virtual_operand_p (val))
2055 if (dump_file && (dump_flags & TDF_DETAILS))
2057 fprintf (dump_file, "phi is ");
2058 print_gimple_stmt (dump_file, phi, 0, 0);
2059 fprintf (dump_file, "arg of phi to exit: value ");
2060 print_generic_expr (dump_file, val, 0);
2061 fprintf (dump_file, " used outside loop\n");
2062 fprintf (dump_file,
2063 " checking if it a part of reduction pattern: \n");
2065 if (reduction_list.elements () == 0)
2067 if (dump_file && (dump_flags & TDF_DETAILS))
2068 fprintf (dump_file,
2069 " FAILED: it is not a part of reduction.\n");
2070 return false;
2072 reduc_phi = NULL;
2073 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, val)
2075 if (!gimple_debug_bind_p (USE_STMT (use_p))
2076 && flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
2078 reduc_phi = USE_STMT (use_p);
2079 break;
2082 red = reduction_phi (reduction_list, reduc_phi);
2083 if (red == NULL)
2085 if (dump_file && (dump_flags & TDF_DETAILS))
2086 fprintf (dump_file,
2087 " FAILED: it is not a part of reduction.\n");
2088 return false;
2090 if (dump_file && (dump_flags & TDF_DETAILS))
2092 fprintf (dump_file, "reduction phi is ");
2093 print_gimple_stmt (dump_file, red->reduc_phi, 0, 0);
2094 fprintf (dump_file, "reduction stmt is ");
2095 print_gimple_stmt (dump_file, red->reduc_stmt, 0, 0);
2100 /* The iterations of the loop may communicate only through bivs whose
2101 iteration space can be distributed efficiently. */
2102 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
2104 gimple phi = gsi_stmt (gsi);
2105 tree def = PHI_RESULT (phi);
2106 affine_iv iv;
2108 if (!virtual_operand_p (def) && !simple_iv (loop, loop, def, &iv, true))
2110 struct reduction_info *red;
2112 red = reduction_phi (reduction_list, phi);
2113 if (red == NULL)
2115 if (dump_file && (dump_flags & TDF_DETAILS))
2116 fprintf (dump_file,
2117 " FAILED: scalar dependency between iterations\n");
2118 return false;
2124 return true;
2127 /* Detect parallel loops and generate parallel code using libgomp
2128 primitives. Returns true if some loop was parallelized, false
2129 otherwise. */
2131 bool
2132 parallelize_loops (void)
2134 unsigned n_threads = flag_tree_parallelize_loops;
2135 bool changed = false;
2136 struct loop *loop;
2137 struct tree_niter_desc niter_desc;
2138 loop_iterator li;
2139 reduction_info_table_type reduction_list;
2140 struct obstack parloop_obstack;
2141 HOST_WIDE_INT estimated;
2142 LOC loop_loc;
2144 /* Do not parallelize loops in the functions created by parallelization. */
2145 if (parallelized_function_p (cfun->decl))
2146 return false;
2147 if (cfun->has_nonlocal_label)
2148 return false;
2150 gcc_obstack_init (&parloop_obstack);
2151 reduction_list.create (10);
2152 init_stmt_vec_info_vec ();
2154 FOR_EACH_LOOP (li, loop, 0)
2156 reduction_list.empty ();
2157 if (dump_file && (dump_flags & TDF_DETAILS))
2159 fprintf (dump_file, "Trying loop %d as candidate\n",loop->num);
2160 if (loop->inner)
2161 fprintf (dump_file, "loop %d is not innermost\n",loop->num);
2162 else
2163 fprintf (dump_file, "loop %d is innermost\n",loop->num);
2166 /* If we use autopar in graphite pass, we use its marked dependency
2167 checking results. */
2168 if (flag_loop_parallelize_all && !loop->can_be_parallel)
2170 if (dump_file && (dump_flags & TDF_DETAILS))
2171 fprintf (dump_file, "loop is not parallel according to graphite\n");
2172 continue;
2175 if (!single_dom_exit (loop))
2178 if (dump_file && (dump_flags & TDF_DETAILS))
2179 fprintf (dump_file, "loop is !single_dom_exit\n");
2181 continue;
2184 if (/* And of course, the loop must be parallelizable. */
2185 !can_duplicate_loop_p (loop)
2186 || loop_has_blocks_with_irreducible_flag (loop)
2187 || (loop_preheader_edge (loop)->src->flags & BB_IRREDUCIBLE_LOOP)
2188 /* FIXME: the check for vector phi nodes could be removed. */
2189 || loop_has_vector_phi_nodes (loop))
2190 continue;
2192 estimated = estimated_stmt_executions_int (loop);
2193 if (estimated == -1)
2194 estimated = max_stmt_executions_int (loop);
2195 /* FIXME: Bypass this check as graphite doesn't update the
2196 count and frequency correctly now. */
2197 if (!flag_loop_parallelize_all
2198 && ((estimated != -1
2199 && estimated <= (HOST_WIDE_INT) n_threads * MIN_PER_THREAD)
2200 /* Do not bother with loops in cold areas. */
2201 || optimize_loop_nest_for_size_p (loop)))
2202 continue;
2204 if (!try_get_loop_niter (loop, &niter_desc))
2205 continue;
2207 if (!try_create_reduction_list (loop, reduction_list))
2208 continue;
2210 if (!flag_loop_parallelize_all
2211 && !loop_parallel_p (loop, &parloop_obstack))
2212 continue;
2214 changed = true;
2215 if (dump_file && (dump_flags & TDF_DETAILS))
2217 if (loop->inner)
2218 fprintf (dump_file, "parallelizing outer loop %d\n",loop->header->index);
2219 else
2220 fprintf (dump_file, "parallelizing inner loop %d\n",loop->header->index);
2221 loop_loc = find_loop_location (loop);
2222 if (loop_loc != UNKNOWN_LOC)
2223 fprintf (dump_file, "\nloop at %s:%d: ",
2224 LOC_FILE (loop_loc), LOC_LINE (loop_loc));
2226 gen_parallel_loop (loop, reduction_list,
2227 n_threads, &niter_desc);
2230 free_stmt_vec_info_vec ();
2231 reduction_list.dispose ();
2232 obstack_free (&parloop_obstack, NULL);
2234 /* Parallelization will cause new function calls to be inserted through
2235 which local variables will escape. Reset the points-to solution
2236 for ESCAPED. */
2237 if (changed)
2238 pt_solution_reset (&cfun->gimple_df->escaped);
2240 return changed;
2243 /* Parallelization. */
2245 static bool
2246 gate_tree_parallelize_loops (void)
2248 return flag_tree_parallelize_loops > 1;
2251 static unsigned
2252 tree_parallelize_loops (void)
2254 if (number_of_loops (cfun) <= 1)
2255 return 0;
2257 if (parallelize_loops ())
2258 return TODO_cleanup_cfg | TODO_rebuild_alias;
2259 return 0;
2262 namespace {
2264 const pass_data pass_data_parallelize_loops =
2266 GIMPLE_PASS, /* type */
2267 "parloops", /* name */
2268 OPTGROUP_LOOP, /* optinfo_flags */
2269 true, /* has_gate */
2270 true, /* has_execute */
2271 TV_TREE_PARALLELIZE_LOOPS, /* tv_id */
2272 ( PROP_cfg | PROP_ssa ), /* properties_required */
2273 0, /* properties_provided */
2274 0, /* properties_destroyed */
2275 0, /* todo_flags_start */
2276 TODO_verify_flow, /* todo_flags_finish */
2279 class pass_parallelize_loops : public gimple_opt_pass
2281 public:
2282 pass_parallelize_loops (gcc::context *ctxt)
2283 : gimple_opt_pass (pass_data_parallelize_loops, ctxt)
2286 /* opt_pass methods: */
2287 bool gate () { return gate_tree_parallelize_loops (); }
2288 unsigned int execute () { return tree_parallelize_loops (); }
2290 }; // class pass_parallelize_loops
2292 } // anon namespace
2294 gimple_opt_pass *
2295 make_pass_parallelize_loops (gcc::context *ctxt)
2297 return new pass_parallelize_loops (ctxt);
2301 #include "gt-tree-parloops.h"