* config/rl78/rl78.c (rl78_alloc_address_registers_macax): Verify
[official-gcc.git] / gcc / tree-parloops.c
blob056950dd066b185eca15b4237a151ac1a8065945
1 /* Loop autoparallelization.
2 Copyright (C) 2006-2013 Free Software Foundation, Inc.
3 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
4 Zdenek Dvorak <dvorakz@suse.cz> and Razya Ladelsky <razya@il.ibm.com>.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tree-ssa.h"
26 #include "cfgloop.h"
27 #include "tree-data-ref.h"
28 #include "tree-scalar-evolution.h"
29 #include "gimple-pretty-print.h"
30 #include "tree-pass.h"
31 #include "langhooks.h"
32 #include "tree-vectorizer.h"
33 #include "tree-hasher.h"
34 #include "tree-parloops.h"
36 /* This pass tries to distribute iterations of loops into several threads.
37 The implementation is straightforward -- for each loop we test whether its
38 iterations are independent, and if it is the case (and some additional
39 conditions regarding profitability and correctness are satisfied), we
40 add GIMPLE_OMP_PARALLEL and GIMPLE_OMP_FOR codes and let omp expansion
41 machinery do its job.
43 The most of the complexity is in bringing the code into shape expected
44 by the omp expanders:
45 -- for GIMPLE_OMP_FOR, ensuring that the loop has only one induction
46 variable and that the exit test is at the start of the loop body
47 -- for GIMPLE_OMP_PARALLEL, replacing the references to local addressable
48 variables by accesses through pointers, and breaking up ssa chains
49 by storing the values incoming to the parallelized loop to a structure
50 passed to the new function as an argument (something similar is done
51 in omp gimplification, unfortunately only a small part of the code
52 can be shared).
54 TODO:
55 -- if there are several parallelizable loops in a function, it may be
56 possible to generate the threads just once (using synchronization to
57 ensure that cross-loop dependences are obeyed).
58 -- handling of common reduction patterns for outer loops.
60 More info can also be found at http://gcc.gnu.org/wiki/AutoParInGCC */
62 Reduction handling:
63 currently we use vect_force_simple_reduction() to detect reduction patterns.
64 The code transformation will be introduced by an example.
67 parloop
69 int sum=1;
71 for (i = 0; i < N; i++)
73 x[i] = i + 3;
74 sum+=x[i];
78 gimple-like code:
79 header_bb:
81 # sum_29 = PHI <sum_11(5), 1(3)>
82 # i_28 = PHI <i_12(5), 0(3)>
83 D.1795_8 = i_28 + 3;
84 x[i_28] = D.1795_8;
85 sum_11 = D.1795_8 + sum_29;
86 i_12 = i_28 + 1;
87 if (N_6(D) > i_12)
88 goto header_bb;
91 exit_bb:
93 # sum_21 = PHI <sum_11(4)>
94 printf (&"%d"[0], sum_21);
97 after reduction transformation (only relevant parts):
99 parloop
102 ....
105 # Storing the initial value given by the user. #
107 .paral_data_store.32.sum.27 = 1;
109 #pragma omp parallel num_threads(4)
111 #pragma omp for schedule(static)
113 # The neutral element corresponding to the particular
114 reduction's operation, e.g. 0 for PLUS_EXPR,
115 1 for MULT_EXPR, etc. replaces the user's initial value. #
117 # sum.27_29 = PHI <sum.27_11, 0>
119 sum.27_11 = D.1827_8 + sum.27_29;
121 GIMPLE_OMP_CONTINUE
123 # Adding this reduction phi is done at create_phi_for_local_result() #
124 # sum.27_56 = PHI <sum.27_11, 0>
125 GIMPLE_OMP_RETURN
127 # Creating the atomic operation is done at
128 create_call_for_reduction_1() #
130 #pragma omp atomic_load
131 D.1839_59 = *&.paral_data_load.33_51->reduction.23;
132 D.1840_60 = sum.27_56 + D.1839_59;
133 #pragma omp atomic_store (D.1840_60);
135 GIMPLE_OMP_RETURN
137 # collecting the result after the join of the threads is done at
138 create_loads_for_reductions().
139 The value computed by the threads is loaded from the
140 shared struct. #
143 .paral_data_load.33_52 = &.paral_data_store.32;
144 sum_37 = .paral_data_load.33_52->sum.27;
145 sum_43 = D.1795_41 + sum_37;
147 exit bb:
148 # sum_21 = PHI <sum_43, sum_26>
149 printf (&"%d"[0], sum_21);
157 /* Minimal number of iterations of a loop that should be executed in each
158 thread. */
159 #define MIN_PER_THREAD 100
161 /* Element of the hashtable, representing a
162 reduction in the current loop. */
163 struct reduction_info
165 gimple reduc_stmt; /* reduction statement. */
166 gimple reduc_phi; /* The phi node defining the reduction. */
167 enum tree_code reduction_code;/* code for the reduction operation. */
168 unsigned reduc_version; /* SSA_NAME_VERSION of original reduc_phi
169 result. */
170 gimple keep_res; /* The PHI_RESULT of this phi is the resulting value
171 of the reduction variable when existing the loop. */
172 tree initial_value; /* The initial value of the reduction var before entering the loop. */
173 tree field; /* the name of the field in the parloop data structure intended for reduction. */
174 tree init; /* reduction initialization value. */
175 gimple new_phi; /* (helper field) Newly created phi node whose result
176 will be passed to the atomic operation. Represents
177 the local result each thread computed for the reduction
178 operation. */
181 /* Reduction info hashtable helpers. */
183 struct reduction_hasher : typed_free_remove <reduction_info>
185 typedef reduction_info value_type;
186 typedef reduction_info compare_type;
187 static inline hashval_t hash (const value_type *);
188 static inline bool equal (const value_type *, const compare_type *);
191 /* Equality and hash functions for hashtab code. */
193 inline bool
194 reduction_hasher::equal (const value_type *a, const compare_type *b)
196 return (a->reduc_phi == b->reduc_phi);
199 inline hashval_t
200 reduction_hasher::hash (const value_type *a)
202 return a->reduc_version;
205 typedef hash_table <reduction_hasher> reduction_info_table_type;
208 static struct reduction_info *
209 reduction_phi (reduction_info_table_type reduction_list, gimple phi)
211 struct reduction_info tmpred, *red;
213 if (reduction_list.elements () == 0 || phi == NULL)
214 return NULL;
216 tmpred.reduc_phi = phi;
217 tmpred.reduc_version = gimple_uid (phi);
218 red = reduction_list.find (&tmpred);
220 return red;
223 /* Element of hashtable of names to copy. */
225 struct name_to_copy_elt
227 unsigned version; /* The version of the name to copy. */
228 tree new_name; /* The new name used in the copy. */
229 tree field; /* The field of the structure used to pass the
230 value. */
233 /* Name copies hashtable helpers. */
235 struct name_to_copy_hasher : typed_free_remove <name_to_copy_elt>
237 typedef name_to_copy_elt value_type;
238 typedef name_to_copy_elt compare_type;
239 static inline hashval_t hash (const value_type *);
240 static inline bool equal (const value_type *, const compare_type *);
243 /* Equality and hash functions for hashtab code. */
245 inline bool
246 name_to_copy_hasher::equal (const value_type *a, const compare_type *b)
248 return a->version == b->version;
251 inline hashval_t
252 name_to_copy_hasher::hash (const value_type *a)
254 return (hashval_t) a->version;
257 typedef hash_table <name_to_copy_hasher> name_to_copy_table_type;
259 /* A transformation matrix, which is a self-contained ROWSIZE x COLSIZE
260 matrix. Rather than use floats, we simply keep a single DENOMINATOR that
261 represents the denominator for every element in the matrix. */
262 typedef struct lambda_trans_matrix_s
264 lambda_matrix matrix;
265 int rowsize;
266 int colsize;
267 int denominator;
268 } *lambda_trans_matrix;
269 #define LTM_MATRIX(T) ((T)->matrix)
270 #define LTM_ROWSIZE(T) ((T)->rowsize)
271 #define LTM_COLSIZE(T) ((T)->colsize)
272 #define LTM_DENOMINATOR(T) ((T)->denominator)
274 /* Allocate a new transformation matrix. */
276 static lambda_trans_matrix
277 lambda_trans_matrix_new (int colsize, int rowsize,
278 struct obstack * lambda_obstack)
280 lambda_trans_matrix ret;
282 ret = (lambda_trans_matrix)
283 obstack_alloc (lambda_obstack, sizeof (struct lambda_trans_matrix_s));
284 LTM_MATRIX (ret) = lambda_matrix_new (rowsize, colsize, lambda_obstack);
285 LTM_ROWSIZE (ret) = rowsize;
286 LTM_COLSIZE (ret) = colsize;
287 LTM_DENOMINATOR (ret) = 1;
288 return ret;
291 /* Multiply a vector VEC by a matrix MAT.
292 MAT is an M*N matrix, and VEC is a vector with length N. The result
293 is stored in DEST which must be a vector of length M. */
295 static void
296 lambda_matrix_vector_mult (lambda_matrix matrix, int m, int n,
297 lambda_vector vec, lambda_vector dest)
299 int i, j;
301 lambda_vector_clear (dest, m);
302 for (i = 0; i < m; i++)
303 for (j = 0; j < n; j++)
304 dest[i] += matrix[i][j] * vec[j];
307 /* Return true if TRANS is a legal transformation matrix that respects
308 the dependence vectors in DISTS and DIRS. The conservative answer
309 is false.
311 "Wolfe proves that a unimodular transformation represented by the
312 matrix T is legal when applied to a loop nest with a set of
313 lexicographically non-negative distance vectors RDG if and only if
314 for each vector d in RDG, (T.d >= 0) is lexicographically positive.
315 i.e.: if and only if it transforms the lexicographically positive
316 distance vectors to lexicographically positive vectors. Note that
317 a unimodular matrix must transform the zero vector (and only it) to
318 the zero vector." S.Muchnick. */
320 static bool
321 lambda_transform_legal_p (lambda_trans_matrix trans,
322 int nb_loops,
323 vec<ddr_p> dependence_relations)
325 unsigned int i, j;
326 lambda_vector distres;
327 struct data_dependence_relation *ddr;
329 gcc_assert (LTM_COLSIZE (trans) == nb_loops
330 && LTM_ROWSIZE (trans) == nb_loops);
332 /* When there are no dependences, the transformation is correct. */
333 if (dependence_relations.length () == 0)
334 return true;
336 ddr = dependence_relations[0];
337 if (ddr == NULL)
338 return true;
340 /* When there is an unknown relation in the dependence_relations, we
341 know that it is no worth looking at this loop nest: give up. */
342 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
343 return false;
345 distres = lambda_vector_new (nb_loops);
347 /* For each distance vector in the dependence graph. */
348 FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
350 /* Don't care about relations for which we know that there is no
351 dependence, nor about read-read (aka. output-dependences):
352 these data accesses can happen in any order. */
353 if (DDR_ARE_DEPENDENT (ddr) == chrec_known
354 || (DR_IS_READ (DDR_A (ddr)) && DR_IS_READ (DDR_B (ddr))))
355 continue;
357 /* Conservatively answer: "this transformation is not valid". */
358 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
359 return false;
361 /* If the dependence could not be captured by a distance vector,
362 conservatively answer that the transform is not valid. */
363 if (DDR_NUM_DIST_VECTS (ddr) == 0)
364 return false;
366 /* Compute trans.dist_vect */
367 for (j = 0; j < DDR_NUM_DIST_VECTS (ddr); j++)
369 lambda_matrix_vector_mult (LTM_MATRIX (trans), nb_loops, nb_loops,
370 DDR_DIST_VECT (ddr, j), distres);
372 if (!lambda_vector_lexico_pos (distres, nb_loops))
373 return false;
376 return true;
379 /* Data dependency analysis. Returns true if the iterations of LOOP
380 are independent on each other (that is, if we can execute them
381 in parallel). */
383 static bool
384 loop_parallel_p (struct loop *loop, struct obstack * parloop_obstack)
386 vec<loop_p> loop_nest;
387 vec<ddr_p> dependence_relations;
388 vec<data_reference_p> datarefs;
389 lambda_trans_matrix trans;
390 bool ret = false;
392 if (dump_file && (dump_flags & TDF_DETAILS))
394 fprintf (dump_file, "Considering loop %d\n", loop->num);
395 if (!loop->inner)
396 fprintf (dump_file, "loop is innermost\n");
397 else
398 fprintf (dump_file, "loop NOT innermost\n");
401 /* Check for problems with dependences. If the loop can be reversed,
402 the iterations are independent. */
403 datarefs.create (10);
404 dependence_relations.create (10 * 10);
405 loop_nest.create (3);
406 if (! compute_data_dependences_for_loop (loop, true, &loop_nest, &datarefs,
407 &dependence_relations))
409 if (dump_file && (dump_flags & TDF_DETAILS))
410 fprintf (dump_file, " FAILED: cannot analyze data dependencies\n");
411 ret = false;
412 goto end;
414 if (dump_file && (dump_flags & TDF_DETAILS))
415 dump_data_dependence_relations (dump_file, dependence_relations);
417 trans = lambda_trans_matrix_new (1, 1, parloop_obstack);
418 LTM_MATRIX (trans)[0][0] = -1;
420 if (lambda_transform_legal_p (trans, 1, dependence_relations))
422 ret = true;
423 if (dump_file && (dump_flags & TDF_DETAILS))
424 fprintf (dump_file, " SUCCESS: may be parallelized\n");
426 else if (dump_file && (dump_flags & TDF_DETAILS))
427 fprintf (dump_file,
428 " FAILED: data dependencies exist across iterations\n");
430 end:
431 loop_nest.release ();
432 free_dependence_relations (dependence_relations);
433 free_data_refs (datarefs);
435 return ret;
438 /* Return true when LOOP contains basic blocks marked with the
439 BB_IRREDUCIBLE_LOOP flag. */
441 static inline bool
442 loop_has_blocks_with_irreducible_flag (struct loop *loop)
444 unsigned i;
445 basic_block *bbs = get_loop_body_in_dom_order (loop);
446 bool res = true;
448 for (i = 0; i < loop->num_nodes; i++)
449 if (bbs[i]->flags & BB_IRREDUCIBLE_LOOP)
450 goto end;
452 res = false;
453 end:
454 free (bbs);
455 return res;
458 /* Assigns the address of OBJ in TYPE to an ssa name, and returns this name.
459 The assignment statement is placed on edge ENTRY. DECL_ADDRESS maps decls
460 to their addresses that can be reused. The address of OBJ is known to
461 be invariant in the whole function. Other needed statements are placed
462 right before GSI. */
464 static tree
465 take_address_of (tree obj, tree type, edge entry,
466 int_tree_htab_type decl_address, gimple_stmt_iterator *gsi)
468 int uid;
469 int_tree_map **dslot;
470 struct int_tree_map ielt, *nielt;
471 tree *var_p, name, addr;
472 gimple stmt;
473 gimple_seq stmts;
475 /* Since the address of OBJ is invariant, the trees may be shared.
476 Avoid rewriting unrelated parts of the code. */
477 obj = unshare_expr (obj);
478 for (var_p = &obj;
479 handled_component_p (*var_p);
480 var_p = &TREE_OPERAND (*var_p, 0))
481 continue;
483 /* Canonicalize the access to base on a MEM_REF. */
484 if (DECL_P (*var_p))
485 *var_p = build_simple_mem_ref (build_fold_addr_expr (*var_p));
487 /* Assign a canonical SSA name to the address of the base decl used
488 in the address and share it for all accesses and addresses based
489 on it. */
490 uid = DECL_UID (TREE_OPERAND (TREE_OPERAND (*var_p, 0), 0));
491 ielt.uid = uid;
492 dslot = decl_address.find_slot_with_hash (&ielt, uid, INSERT);
493 if (!*dslot)
495 if (gsi == NULL)
496 return NULL;
497 addr = TREE_OPERAND (*var_p, 0);
498 const char *obj_name
499 = get_name (TREE_OPERAND (TREE_OPERAND (*var_p, 0), 0));
500 if (obj_name)
501 name = make_temp_ssa_name (TREE_TYPE (addr), NULL, obj_name);
502 else
503 name = make_ssa_name (TREE_TYPE (addr), NULL);
504 stmt = gimple_build_assign (name, addr);
505 gsi_insert_on_edge_immediate (entry, stmt);
507 nielt = XNEW (struct int_tree_map);
508 nielt->uid = uid;
509 nielt->to = name;
510 *dslot = nielt;
512 else
513 name = (*dslot)->to;
515 /* Express the address in terms of the canonical SSA name. */
516 TREE_OPERAND (*var_p, 0) = name;
517 if (gsi == NULL)
518 return build_fold_addr_expr_with_type (obj, type);
520 name = force_gimple_operand (build_addr (obj, current_function_decl),
521 &stmts, true, NULL_TREE);
522 if (!gimple_seq_empty_p (stmts))
523 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
525 if (!useless_type_conversion_p (type, TREE_TYPE (name)))
527 name = force_gimple_operand (fold_convert (type, name), &stmts, true,
528 NULL_TREE);
529 if (!gimple_seq_empty_p (stmts))
530 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
533 return name;
536 /* Callback for htab_traverse. Create the initialization statement
537 for reduction described in SLOT, and place it at the preheader of
538 the loop described in DATA. */
541 initialize_reductions (reduction_info **slot, struct loop *loop)
543 tree init, c;
544 tree bvar, type, arg;
545 edge e;
547 struct reduction_info *const reduc = *slot;
549 /* Create initialization in preheader:
550 reduction_variable = initialization value of reduction. */
552 /* In the phi node at the header, replace the argument coming
553 from the preheader with the reduction initialization value. */
555 /* Create a new variable to initialize the reduction. */
556 type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
557 bvar = create_tmp_var (type, "reduction");
559 c = build_omp_clause (gimple_location (reduc->reduc_stmt),
560 OMP_CLAUSE_REDUCTION);
561 OMP_CLAUSE_REDUCTION_CODE (c) = reduc->reduction_code;
562 OMP_CLAUSE_DECL (c) = SSA_NAME_VAR (gimple_assign_lhs (reduc->reduc_stmt));
564 init = omp_reduction_init (c, TREE_TYPE (bvar));
565 reduc->init = init;
567 /* Replace the argument representing the initialization value
568 with the initialization value for the reduction (neutral
569 element for the particular operation, e.g. 0 for PLUS_EXPR,
570 1 for MULT_EXPR, etc).
571 Keep the old value in a new variable "reduction_initial",
572 that will be taken in consideration after the parallel
573 computing is done. */
575 e = loop_preheader_edge (loop);
576 arg = PHI_ARG_DEF_FROM_EDGE (reduc->reduc_phi, e);
577 /* Create new variable to hold the initial value. */
579 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE
580 (reduc->reduc_phi, loop_preheader_edge (loop)), init);
581 reduc->initial_value = arg;
582 return 1;
585 struct elv_data
587 struct walk_stmt_info info;
588 edge entry;
589 int_tree_htab_type decl_address;
590 gimple_stmt_iterator *gsi;
591 bool changed;
592 bool reset;
595 /* Eliminates references to local variables in *TP out of the single
596 entry single exit region starting at DTA->ENTRY.
597 DECL_ADDRESS contains addresses of the references that had their
598 address taken already. If the expression is changed, CHANGED is
599 set to true. Callback for walk_tree. */
601 static tree
602 eliminate_local_variables_1 (tree *tp, int *walk_subtrees, void *data)
604 struct elv_data *const dta = (struct elv_data *) data;
605 tree t = *tp, var, addr, addr_type, type, obj;
607 if (DECL_P (t))
609 *walk_subtrees = 0;
611 if (!SSA_VAR_P (t) || DECL_EXTERNAL (t))
612 return NULL_TREE;
614 type = TREE_TYPE (t);
615 addr_type = build_pointer_type (type);
616 addr = take_address_of (t, addr_type, dta->entry, dta->decl_address,
617 dta->gsi);
618 if (dta->gsi == NULL && addr == NULL_TREE)
620 dta->reset = true;
621 return NULL_TREE;
624 *tp = build_simple_mem_ref (addr);
626 dta->changed = true;
627 return NULL_TREE;
630 if (TREE_CODE (t) == ADDR_EXPR)
632 /* ADDR_EXPR may appear in two contexts:
633 -- as a gimple operand, when the address taken is a function invariant
634 -- as gimple rhs, when the resulting address in not a function
635 invariant
636 We do not need to do anything special in the latter case (the base of
637 the memory reference whose address is taken may be replaced in the
638 DECL_P case). The former case is more complicated, as we need to
639 ensure that the new address is still a gimple operand. Thus, it
640 is not sufficient to replace just the base of the memory reference --
641 we need to move the whole computation of the address out of the
642 loop. */
643 if (!is_gimple_val (t))
644 return NULL_TREE;
646 *walk_subtrees = 0;
647 obj = TREE_OPERAND (t, 0);
648 var = get_base_address (obj);
649 if (!var || !SSA_VAR_P (var) || DECL_EXTERNAL (var))
650 return NULL_TREE;
652 addr_type = TREE_TYPE (t);
653 addr = take_address_of (obj, addr_type, dta->entry, dta->decl_address,
654 dta->gsi);
655 if (dta->gsi == NULL && addr == NULL_TREE)
657 dta->reset = true;
658 return NULL_TREE;
660 *tp = addr;
662 dta->changed = true;
663 return NULL_TREE;
666 if (!EXPR_P (t))
667 *walk_subtrees = 0;
669 return NULL_TREE;
672 /* Moves the references to local variables in STMT at *GSI out of the single
673 entry single exit region starting at ENTRY. DECL_ADDRESS contains
674 addresses of the references that had their address taken
675 already. */
677 static void
678 eliminate_local_variables_stmt (edge entry, gimple_stmt_iterator *gsi,
679 int_tree_htab_type decl_address)
681 struct elv_data dta;
682 gimple stmt = gsi_stmt (*gsi);
684 memset (&dta.info, '\0', sizeof (dta.info));
685 dta.entry = entry;
686 dta.decl_address = decl_address;
687 dta.changed = false;
688 dta.reset = false;
690 if (gimple_debug_bind_p (stmt))
692 dta.gsi = NULL;
693 walk_tree (gimple_debug_bind_get_value_ptr (stmt),
694 eliminate_local_variables_1, &dta.info, NULL);
695 if (dta.reset)
697 gimple_debug_bind_reset_value (stmt);
698 dta.changed = true;
701 else if (gimple_clobber_p (stmt))
703 stmt = gimple_build_nop ();
704 gsi_replace (gsi, stmt, false);
705 dta.changed = true;
707 else
709 dta.gsi = gsi;
710 walk_gimple_op (stmt, eliminate_local_variables_1, &dta.info);
713 if (dta.changed)
714 update_stmt (stmt);
717 /* Eliminates the references to local variables from the single entry
718 single exit region between the ENTRY and EXIT edges.
720 This includes:
721 1) Taking address of a local variable -- these are moved out of the
722 region (and temporary variable is created to hold the address if
723 necessary).
725 2) Dereferencing a local variable -- these are replaced with indirect
726 references. */
728 static void
729 eliminate_local_variables (edge entry, edge exit)
731 basic_block bb;
732 vec<basic_block> body;
733 body.create (3);
734 unsigned i;
735 gimple_stmt_iterator gsi;
736 bool has_debug_stmt = false;
737 int_tree_htab_type decl_address;
738 decl_address.create (10);
739 basic_block entry_bb = entry->src;
740 basic_block exit_bb = exit->dest;
742 gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
744 FOR_EACH_VEC_ELT (body, i, bb)
745 if (bb != entry_bb && bb != exit_bb)
746 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
747 if (is_gimple_debug (gsi_stmt (gsi)))
749 if (gimple_debug_bind_p (gsi_stmt (gsi)))
750 has_debug_stmt = true;
752 else
753 eliminate_local_variables_stmt (entry, &gsi, decl_address);
755 if (has_debug_stmt)
756 FOR_EACH_VEC_ELT (body, i, bb)
757 if (bb != entry_bb && bb != exit_bb)
758 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
759 if (gimple_debug_bind_p (gsi_stmt (gsi)))
760 eliminate_local_variables_stmt (entry, &gsi, decl_address);
762 decl_address.dispose ();
763 body.release ();
766 /* Returns true if expression EXPR is not defined between ENTRY and
767 EXIT, i.e. if all its operands are defined outside of the region. */
769 static bool
770 expr_invariant_in_region_p (edge entry, edge exit, tree expr)
772 basic_block entry_bb = entry->src;
773 basic_block exit_bb = exit->dest;
774 basic_block def_bb;
776 if (is_gimple_min_invariant (expr))
777 return true;
779 if (TREE_CODE (expr) == SSA_NAME)
781 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
782 if (def_bb
783 && dominated_by_p (CDI_DOMINATORS, def_bb, entry_bb)
784 && !dominated_by_p (CDI_DOMINATORS, def_bb, exit_bb))
785 return false;
787 return true;
790 return false;
793 /* If COPY_NAME_P is true, creates and returns a duplicate of NAME.
794 The copies are stored to NAME_COPIES, if NAME was already duplicated,
795 its duplicate stored in NAME_COPIES is returned.
797 Regardless of COPY_NAME_P, the decl used as a base of the ssa name is also
798 duplicated, storing the copies in DECL_COPIES. */
800 static tree
801 separate_decls_in_region_name (tree name, name_to_copy_table_type name_copies,
802 int_tree_htab_type decl_copies, bool copy_name_p)
804 tree copy, var, var_copy;
805 unsigned idx, uid, nuid;
806 struct int_tree_map ielt, *nielt;
807 struct name_to_copy_elt elt, *nelt;
808 name_to_copy_elt **slot;
809 int_tree_map **dslot;
811 if (TREE_CODE (name) != SSA_NAME)
812 return name;
814 idx = SSA_NAME_VERSION (name);
815 elt.version = idx;
816 slot = name_copies.find_slot_with_hash (&elt, idx,
817 copy_name_p ? INSERT : NO_INSERT);
818 if (slot && *slot)
819 return (*slot)->new_name;
821 if (copy_name_p)
823 copy = duplicate_ssa_name (name, NULL);
824 nelt = XNEW (struct name_to_copy_elt);
825 nelt->version = idx;
826 nelt->new_name = copy;
827 nelt->field = NULL_TREE;
828 *slot = nelt;
830 else
832 gcc_assert (!slot);
833 copy = name;
836 var = SSA_NAME_VAR (name);
837 if (!var)
838 return copy;
840 uid = DECL_UID (var);
841 ielt.uid = uid;
842 dslot = decl_copies.find_slot_with_hash (&ielt, uid, INSERT);
843 if (!*dslot)
845 var_copy = create_tmp_var (TREE_TYPE (var), get_name (var));
846 DECL_GIMPLE_REG_P (var_copy) = DECL_GIMPLE_REG_P (var);
847 nielt = XNEW (struct int_tree_map);
848 nielt->uid = uid;
849 nielt->to = var_copy;
850 *dslot = nielt;
852 /* Ensure that when we meet this decl next time, we won't duplicate
853 it again. */
854 nuid = DECL_UID (var_copy);
855 ielt.uid = nuid;
856 dslot = decl_copies.find_slot_with_hash (&ielt, nuid, INSERT);
857 gcc_assert (!*dslot);
858 nielt = XNEW (struct int_tree_map);
859 nielt->uid = nuid;
860 nielt->to = var_copy;
861 *dslot = nielt;
863 else
864 var_copy = ((struct int_tree_map *) *dslot)->to;
866 replace_ssa_name_symbol (copy, var_copy);
867 return copy;
870 /* Finds the ssa names used in STMT that are defined outside the
871 region between ENTRY and EXIT and replaces such ssa names with
872 their duplicates. The duplicates are stored to NAME_COPIES. Base
873 decls of all ssa names used in STMT (including those defined in
874 LOOP) are replaced with the new temporary variables; the
875 replacement decls are stored in DECL_COPIES. */
877 static void
878 separate_decls_in_region_stmt (edge entry, edge exit, gimple stmt,
879 name_to_copy_table_type name_copies,
880 int_tree_htab_type decl_copies)
882 use_operand_p use;
883 def_operand_p def;
884 ssa_op_iter oi;
885 tree name, copy;
886 bool copy_name_p;
888 FOR_EACH_PHI_OR_STMT_DEF (def, stmt, oi, SSA_OP_DEF)
890 name = DEF_FROM_PTR (def);
891 gcc_assert (TREE_CODE (name) == SSA_NAME);
892 copy = separate_decls_in_region_name (name, name_copies, decl_copies,
893 false);
894 gcc_assert (copy == name);
897 FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
899 name = USE_FROM_PTR (use);
900 if (TREE_CODE (name) != SSA_NAME)
901 continue;
903 copy_name_p = expr_invariant_in_region_p (entry, exit, name);
904 copy = separate_decls_in_region_name (name, name_copies, decl_copies,
905 copy_name_p);
906 SET_USE (use, copy);
910 /* Finds the ssa names used in STMT that are defined outside the
911 region between ENTRY and EXIT and replaces such ssa names with
912 their duplicates. The duplicates are stored to NAME_COPIES. Base
913 decls of all ssa names used in STMT (including those defined in
914 LOOP) are replaced with the new temporary variables; the
915 replacement decls are stored in DECL_COPIES. */
917 static bool
918 separate_decls_in_region_debug (gimple stmt,
919 name_to_copy_table_type name_copies,
920 int_tree_htab_type decl_copies)
922 use_operand_p use;
923 ssa_op_iter oi;
924 tree var, name;
925 struct int_tree_map ielt;
926 struct name_to_copy_elt elt;
927 name_to_copy_elt **slot;
928 int_tree_map **dslot;
930 if (gimple_debug_bind_p (stmt))
931 var = gimple_debug_bind_get_var (stmt);
932 else if (gimple_debug_source_bind_p (stmt))
933 var = gimple_debug_source_bind_get_var (stmt);
934 else
935 return true;
936 if (TREE_CODE (var) == DEBUG_EXPR_DECL || TREE_CODE (var) == LABEL_DECL)
937 return true;
938 gcc_assert (DECL_P (var) && SSA_VAR_P (var));
939 ielt.uid = DECL_UID (var);
940 dslot = decl_copies.find_slot_with_hash (&ielt, ielt.uid, NO_INSERT);
941 if (!dslot)
942 return true;
943 if (gimple_debug_bind_p (stmt))
944 gimple_debug_bind_set_var (stmt, ((struct int_tree_map *) *dslot)->to);
945 else if (gimple_debug_source_bind_p (stmt))
946 gimple_debug_source_bind_set_var (stmt, ((struct int_tree_map *) *dslot)->to);
948 FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
950 name = USE_FROM_PTR (use);
951 if (TREE_CODE (name) != SSA_NAME)
952 continue;
954 elt.version = SSA_NAME_VERSION (name);
955 slot = name_copies.find_slot_with_hash (&elt, elt.version, NO_INSERT);
956 if (!slot)
958 gimple_debug_bind_reset_value (stmt);
959 update_stmt (stmt);
960 break;
963 SET_USE (use, (*slot)->new_name);
966 return false;
969 /* Callback for htab_traverse. Adds a field corresponding to the reduction
970 specified in SLOT. The type is passed in DATA. */
973 add_field_for_reduction (reduction_info **slot, tree type)
976 struct reduction_info *const red = *slot;
977 tree var = gimple_assign_lhs (red->reduc_stmt);
978 tree field = build_decl (gimple_location (red->reduc_stmt), FIELD_DECL,
979 SSA_NAME_IDENTIFIER (var), TREE_TYPE (var));
981 insert_field_into_struct (type, field);
983 red->field = field;
985 return 1;
988 /* Callback for htab_traverse. Adds a field corresponding to a ssa name
989 described in SLOT. The type is passed in DATA. */
992 add_field_for_name (name_to_copy_elt **slot, tree type)
994 struct name_to_copy_elt *const elt = *slot;
995 tree name = ssa_name (elt->version);
996 tree field = build_decl (UNKNOWN_LOCATION,
997 FIELD_DECL, SSA_NAME_IDENTIFIER (name),
998 TREE_TYPE (name));
1000 insert_field_into_struct (type, field);
1001 elt->field = field;
1003 return 1;
1006 /* Callback for htab_traverse. A local result is the intermediate result
1007 computed by a single
1008 thread, or the initial value in case no iteration was executed.
1009 This function creates a phi node reflecting these values.
1010 The phi's result will be stored in NEW_PHI field of the
1011 reduction's data structure. */
1014 create_phi_for_local_result (reduction_info **slot, struct loop *loop)
1016 struct reduction_info *const reduc = *slot;
1017 edge e;
1018 gimple new_phi;
1019 basic_block store_bb;
1020 tree local_res;
1021 source_location locus;
1023 /* STORE_BB is the block where the phi
1024 should be stored. It is the destination of the loop exit.
1025 (Find the fallthru edge from GIMPLE_OMP_CONTINUE). */
1026 store_bb = FALLTHRU_EDGE (loop->latch)->dest;
1028 /* STORE_BB has two predecessors. One coming from the loop
1029 (the reduction's result is computed at the loop),
1030 and another coming from a block preceding the loop,
1031 when no iterations
1032 are executed (the initial value should be taken). */
1033 if (EDGE_PRED (store_bb, 0) == FALLTHRU_EDGE (loop->latch))
1034 e = EDGE_PRED (store_bb, 1);
1035 else
1036 e = EDGE_PRED (store_bb, 0);
1037 local_res = copy_ssa_name (gimple_assign_lhs (reduc->reduc_stmt), NULL);
1038 locus = gimple_location (reduc->reduc_stmt);
1039 new_phi = create_phi_node (local_res, store_bb);
1040 add_phi_arg (new_phi, reduc->init, e, locus);
1041 add_phi_arg (new_phi, gimple_assign_lhs (reduc->reduc_stmt),
1042 FALLTHRU_EDGE (loop->latch), locus);
1043 reduc->new_phi = new_phi;
1045 return 1;
1048 struct clsn_data
1050 tree store;
1051 tree load;
1053 basic_block store_bb;
1054 basic_block load_bb;
1057 /* Callback for htab_traverse. Create an atomic instruction for the
1058 reduction described in SLOT.
1059 DATA annotates the place in memory the atomic operation relates to,
1060 and the basic block it needs to be generated in. */
1063 create_call_for_reduction_1 (reduction_info **slot, struct clsn_data *clsn_data)
1065 struct reduction_info *const reduc = *slot;
1066 gimple_stmt_iterator gsi;
1067 tree type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
1068 tree load_struct;
1069 basic_block bb;
1070 basic_block new_bb;
1071 edge e;
1072 tree t, addr, ref, x;
1073 tree tmp_load, name;
1074 gimple load;
1076 load_struct = build_simple_mem_ref (clsn_data->load);
1077 t = build3 (COMPONENT_REF, type, load_struct, reduc->field, NULL_TREE);
1079 addr = build_addr (t, current_function_decl);
1081 /* Create phi node. */
1082 bb = clsn_data->load_bb;
1084 e = split_block (bb, t);
1085 new_bb = e->dest;
1087 tmp_load = create_tmp_var (TREE_TYPE (TREE_TYPE (addr)), NULL);
1088 tmp_load = make_ssa_name (tmp_load, NULL);
1089 load = gimple_build_omp_atomic_load (tmp_load, addr);
1090 SSA_NAME_DEF_STMT (tmp_load) = load;
1091 gsi = gsi_start_bb (new_bb);
1092 gsi_insert_after (&gsi, load, GSI_NEW_STMT);
1094 e = split_block (new_bb, load);
1095 new_bb = e->dest;
1096 gsi = gsi_start_bb (new_bb);
1097 ref = tmp_load;
1098 x = fold_build2 (reduc->reduction_code,
1099 TREE_TYPE (PHI_RESULT (reduc->new_phi)), ref,
1100 PHI_RESULT (reduc->new_phi));
1102 name = force_gimple_operand_gsi (&gsi, x, true, NULL_TREE, true,
1103 GSI_CONTINUE_LINKING);
1105 gsi_insert_after (&gsi, gimple_build_omp_atomic_store (name), GSI_NEW_STMT);
1106 return 1;
1109 /* Create the atomic operation at the join point of the threads.
1110 REDUCTION_LIST describes the reductions in the LOOP.
1111 LD_ST_DATA describes the shared data structure where
1112 shared data is stored in and loaded from. */
1113 static void
1114 create_call_for_reduction (struct loop *loop,
1115 reduction_info_table_type reduction_list,
1116 struct clsn_data *ld_st_data)
1118 reduction_list.traverse <struct loop *, create_phi_for_local_result> (loop);
1119 /* Find the fallthru edge from GIMPLE_OMP_CONTINUE. */
1120 ld_st_data->load_bb = FALLTHRU_EDGE (loop->latch)->dest;
1121 reduction_list
1122 .traverse <struct clsn_data *, create_call_for_reduction_1> (ld_st_data);
1125 /* Callback for htab_traverse. Loads the final reduction value at the
1126 join point of all threads, and inserts it in the right place. */
1129 create_loads_for_reductions (reduction_info **slot, struct clsn_data *clsn_data)
1131 struct reduction_info *const red = *slot;
1132 gimple stmt;
1133 gimple_stmt_iterator gsi;
1134 tree type = TREE_TYPE (gimple_assign_lhs (red->reduc_stmt));
1135 tree load_struct;
1136 tree name;
1137 tree x;
1139 gsi = gsi_after_labels (clsn_data->load_bb);
1140 load_struct = build_simple_mem_ref (clsn_data->load);
1141 load_struct = build3 (COMPONENT_REF, type, load_struct, red->field,
1142 NULL_TREE);
1144 x = load_struct;
1145 name = PHI_RESULT (red->keep_res);
1146 stmt = gimple_build_assign (name, x);
1147 SSA_NAME_DEF_STMT (name) = stmt;
1149 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1151 for (gsi = gsi_start_phis (gimple_bb (red->keep_res));
1152 !gsi_end_p (gsi); gsi_next (&gsi))
1153 if (gsi_stmt (gsi) == red->keep_res)
1155 remove_phi_node (&gsi, false);
1156 return 1;
1158 gcc_unreachable ();
1161 /* Load the reduction result that was stored in LD_ST_DATA.
1162 REDUCTION_LIST describes the list of reductions that the
1163 loads should be generated for. */
1164 static void
1165 create_final_loads_for_reduction (reduction_info_table_type reduction_list,
1166 struct clsn_data *ld_st_data)
1168 gimple_stmt_iterator gsi;
1169 tree t;
1170 gimple stmt;
1172 gsi = gsi_after_labels (ld_st_data->load_bb);
1173 t = build_fold_addr_expr (ld_st_data->store);
1174 stmt = gimple_build_assign (ld_st_data->load, t);
1176 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
1177 SSA_NAME_DEF_STMT (ld_st_data->load) = stmt;
1179 reduction_list
1180 .traverse <struct clsn_data *, create_loads_for_reductions> (ld_st_data);
1184 /* Callback for htab_traverse. Store the neutral value for the
1185 particular reduction's operation, e.g. 0 for PLUS_EXPR,
1186 1 for MULT_EXPR, etc. into the reduction field.
1187 The reduction is specified in SLOT. The store information is
1188 passed in DATA. */
1191 create_stores_for_reduction (reduction_info **slot, struct clsn_data *clsn_data)
1193 struct reduction_info *const red = *slot;
1194 tree t;
1195 gimple stmt;
1196 gimple_stmt_iterator gsi;
1197 tree type = TREE_TYPE (gimple_assign_lhs (red->reduc_stmt));
1199 gsi = gsi_last_bb (clsn_data->store_bb);
1200 t = build3 (COMPONENT_REF, type, clsn_data->store, red->field, NULL_TREE);
1201 stmt = gimple_build_assign (t, red->initial_value);
1202 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1204 return 1;
1207 /* Callback for htab_traverse. Creates loads to a field of LOAD in LOAD_BB and
1208 store to a field of STORE in STORE_BB for the ssa name and its duplicate
1209 specified in SLOT. */
1212 create_loads_and_stores_for_name (name_to_copy_elt **slot,
1213 struct clsn_data *clsn_data)
1215 struct name_to_copy_elt *const elt = *slot;
1216 tree t;
1217 gimple stmt;
1218 gimple_stmt_iterator gsi;
1219 tree type = TREE_TYPE (elt->new_name);
1220 tree load_struct;
1222 gsi = gsi_last_bb (clsn_data->store_bb);
1223 t = build3 (COMPONENT_REF, type, clsn_data->store, elt->field, NULL_TREE);
1224 stmt = gimple_build_assign (t, ssa_name (elt->version));
1225 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1227 gsi = gsi_last_bb (clsn_data->load_bb);
1228 load_struct = build_simple_mem_ref (clsn_data->load);
1229 t = build3 (COMPONENT_REF, type, load_struct, elt->field, NULL_TREE);
1230 stmt = gimple_build_assign (elt->new_name, t);
1231 SSA_NAME_DEF_STMT (elt->new_name) = stmt;
1232 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1234 return 1;
1237 /* Moves all the variables used in LOOP and defined outside of it (including
1238 the initial values of loop phi nodes, and *PER_THREAD if it is a ssa
1239 name) to a structure created for this purpose. The code
1241 while (1)
1243 use (a);
1244 use (b);
1247 is transformed this way:
1249 bb0:
1250 old.a = a;
1251 old.b = b;
1253 bb1:
1254 a' = new->a;
1255 b' = new->b;
1256 while (1)
1258 use (a');
1259 use (b');
1262 `old' is stored to *ARG_STRUCT and `new' is stored to NEW_ARG_STRUCT. The
1263 pointer `new' is intentionally not initialized (the loop will be split to a
1264 separate function later, and `new' will be initialized from its arguments).
1265 LD_ST_DATA holds information about the shared data structure used to pass
1266 information among the threads. It is initialized here, and
1267 gen_parallel_loop will pass it to create_call_for_reduction that
1268 needs this information. REDUCTION_LIST describes the reductions
1269 in LOOP. */
1271 static void
1272 separate_decls_in_region (edge entry, edge exit,
1273 reduction_info_table_type reduction_list,
1274 tree *arg_struct, tree *new_arg_struct,
1275 struct clsn_data *ld_st_data)
1278 basic_block bb1 = split_edge (entry);
1279 basic_block bb0 = single_pred (bb1);
1280 name_to_copy_table_type name_copies;
1281 name_copies.create (10);
1282 int_tree_htab_type decl_copies;
1283 decl_copies.create (10);
1284 unsigned i;
1285 tree type, type_name, nvar;
1286 gimple_stmt_iterator gsi;
1287 struct clsn_data clsn_data;
1288 vec<basic_block> body;
1289 body.create (3);
1290 basic_block bb;
1291 basic_block entry_bb = bb1;
1292 basic_block exit_bb = exit->dest;
1293 bool has_debug_stmt = false;
1295 entry = single_succ_edge (entry_bb);
1296 gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
1298 FOR_EACH_VEC_ELT (body, i, bb)
1300 if (bb != entry_bb && bb != exit_bb)
1302 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1303 separate_decls_in_region_stmt (entry, exit, gsi_stmt (gsi),
1304 name_copies, decl_copies);
1306 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1308 gimple stmt = gsi_stmt (gsi);
1310 if (is_gimple_debug (stmt))
1311 has_debug_stmt = true;
1312 else
1313 separate_decls_in_region_stmt (entry, exit, stmt,
1314 name_copies, decl_copies);
1319 /* Now process debug bind stmts. We must not create decls while
1320 processing debug stmts, so we defer their processing so as to
1321 make sure we will have debug info for as many variables as
1322 possible (all of those that were dealt with in the loop above),
1323 and discard those for which we know there's nothing we can
1324 do. */
1325 if (has_debug_stmt)
1326 FOR_EACH_VEC_ELT (body, i, bb)
1327 if (bb != entry_bb && bb != exit_bb)
1329 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
1331 gimple stmt = gsi_stmt (gsi);
1333 if (is_gimple_debug (stmt))
1335 if (separate_decls_in_region_debug (stmt, name_copies,
1336 decl_copies))
1338 gsi_remove (&gsi, true);
1339 continue;
1343 gsi_next (&gsi);
1347 body.release ();
1349 if (name_copies.elements () == 0 && reduction_list.elements () == 0)
1351 /* It may happen that there is nothing to copy (if there are only
1352 loop carried and external variables in the loop). */
1353 *arg_struct = NULL;
1354 *new_arg_struct = NULL;
1356 else
1358 /* Create the type for the structure to store the ssa names to. */
1359 type = lang_hooks.types.make_type (RECORD_TYPE);
1360 type_name = build_decl (UNKNOWN_LOCATION,
1361 TYPE_DECL, create_tmp_var_name (".paral_data"),
1362 type);
1363 TYPE_NAME (type) = type_name;
1365 name_copies.traverse <tree, add_field_for_name> (type);
1366 if (reduction_list.is_created () && reduction_list.elements () > 0)
1368 /* Create the fields for reductions. */
1369 reduction_list.traverse <tree, add_field_for_reduction> (type);
1371 layout_type (type);
1373 /* Create the loads and stores. */
1374 *arg_struct = create_tmp_var (type, ".paral_data_store");
1375 nvar = create_tmp_var (build_pointer_type (type), ".paral_data_load");
1376 *new_arg_struct = make_ssa_name (nvar, NULL);
1378 ld_st_data->store = *arg_struct;
1379 ld_st_data->load = *new_arg_struct;
1380 ld_st_data->store_bb = bb0;
1381 ld_st_data->load_bb = bb1;
1383 name_copies
1384 .traverse <struct clsn_data *, create_loads_and_stores_for_name>
1385 (ld_st_data);
1387 /* Load the calculation from memory (after the join of the threads). */
1389 if (reduction_list.is_created () && reduction_list.elements () > 0)
1391 reduction_list
1392 .traverse <struct clsn_data *, create_stores_for_reduction>
1393 (ld_st_data);
1394 clsn_data.load = make_ssa_name (nvar, NULL);
1395 clsn_data.load_bb = exit->dest;
1396 clsn_data.store = ld_st_data->store;
1397 create_final_loads_for_reduction (reduction_list, &clsn_data);
1401 decl_copies.dispose ();
1402 name_copies.dispose ();
1405 /* Bitmap containing uids of functions created by parallelization. We cannot
1406 allocate it from the default obstack, as it must live across compilation
1407 of several functions; we make it gc allocated instead. */
1409 static GTY(()) bitmap parallelized_functions;
1411 /* Returns true if FN was created by create_loop_fn. */
1413 bool
1414 parallelized_function_p (tree fn)
1416 if (!parallelized_functions || !DECL_ARTIFICIAL (fn))
1417 return false;
1419 return bitmap_bit_p (parallelized_functions, DECL_UID (fn));
1422 /* Creates and returns an empty function that will receive the body of
1423 a parallelized loop. */
1425 static tree
1426 create_loop_fn (location_t loc)
1428 char buf[100];
1429 char *tname;
1430 tree decl, type, name, t;
1431 struct function *act_cfun = cfun;
1432 static unsigned loopfn_num;
1434 loc = LOCATION_LOCUS (loc);
1435 snprintf (buf, 100, "%s.$loopfn", current_function_name ());
1436 ASM_FORMAT_PRIVATE_NAME (tname, buf, loopfn_num++);
1437 clean_symbol_name (tname);
1438 name = get_identifier (tname);
1439 type = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
1441 decl = build_decl (loc, FUNCTION_DECL, name, type);
1442 if (!parallelized_functions)
1443 parallelized_functions = BITMAP_GGC_ALLOC ();
1444 bitmap_set_bit (parallelized_functions, DECL_UID (decl));
1446 TREE_STATIC (decl) = 1;
1447 TREE_USED (decl) = 1;
1448 DECL_ARTIFICIAL (decl) = 1;
1449 DECL_IGNORED_P (decl) = 0;
1450 TREE_PUBLIC (decl) = 0;
1451 DECL_UNINLINABLE (decl) = 1;
1452 DECL_EXTERNAL (decl) = 0;
1453 DECL_CONTEXT (decl) = NULL_TREE;
1454 DECL_INITIAL (decl) = make_node (BLOCK);
1456 t = build_decl (loc, RESULT_DECL, NULL_TREE, void_type_node);
1457 DECL_ARTIFICIAL (t) = 1;
1458 DECL_IGNORED_P (t) = 1;
1459 DECL_RESULT (decl) = t;
1461 t = build_decl (loc, PARM_DECL, get_identifier (".paral_data_param"),
1462 ptr_type_node);
1463 DECL_ARTIFICIAL (t) = 1;
1464 DECL_ARG_TYPE (t) = ptr_type_node;
1465 DECL_CONTEXT (t) = decl;
1466 TREE_USED (t) = 1;
1467 DECL_ARGUMENTS (decl) = t;
1469 allocate_struct_function (decl, false);
1471 /* The call to allocate_struct_function clobbers CFUN, so we need to restore
1472 it. */
1473 set_cfun (act_cfun);
1475 return decl;
1478 /* Moves the exit condition of LOOP to the beginning of its header, and
1479 duplicates the part of the last iteration that gets disabled to the
1480 exit of the loop. NIT is the number of iterations of the loop
1481 (used to initialize the variables in the duplicated part).
1483 TODO: the common case is that latch of the loop is empty and immediately
1484 follows the loop exit. In this case, it would be better not to copy the
1485 body of the loop, but only move the entry of the loop directly before the
1486 exit check and increase the number of iterations of the loop by one.
1487 This may need some additional preconditioning in case NIT = ~0.
1488 REDUCTION_LIST describes the reductions in LOOP. */
1490 static void
1491 transform_to_exit_first_loop (struct loop *loop,
1492 reduction_info_table_type reduction_list,
1493 tree nit)
1495 basic_block *bbs, *nbbs, ex_bb, orig_header;
1496 unsigned n;
1497 bool ok;
1498 edge exit = single_dom_exit (loop), hpred;
1499 tree control, control_name, res, t;
1500 gimple phi, nphi, cond_stmt, stmt, cond_nit;
1501 gimple_stmt_iterator gsi;
1502 tree nit_1;
1504 split_block_after_labels (loop->header);
1505 orig_header = single_succ (loop->header);
1506 hpred = single_succ_edge (loop->header);
1508 cond_stmt = last_stmt (exit->src);
1509 control = gimple_cond_lhs (cond_stmt);
1510 gcc_assert (gimple_cond_rhs (cond_stmt) == nit);
1512 /* Make sure that we have phi nodes on exit for all loop header phis
1513 (create_parallel_loop requires that). */
1514 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
1516 phi = gsi_stmt (gsi);
1517 res = PHI_RESULT (phi);
1518 t = copy_ssa_name (res, phi);
1519 SET_PHI_RESULT (phi, t);
1520 nphi = create_phi_node (res, orig_header);
1521 add_phi_arg (nphi, t, hpred, UNKNOWN_LOCATION);
1523 if (res == control)
1525 gimple_cond_set_lhs (cond_stmt, t);
1526 update_stmt (cond_stmt);
1527 control = t;
1531 bbs = get_loop_body_in_dom_order (loop);
1533 for (n = 0; bbs[n] != exit->src; n++)
1534 continue;
1535 nbbs = XNEWVEC (basic_block, n);
1536 ok = gimple_duplicate_sese_tail (single_succ_edge (loop->header), exit,
1537 bbs + 1, n, nbbs);
1538 gcc_assert (ok);
1539 free (bbs);
1540 ex_bb = nbbs[0];
1541 free (nbbs);
1543 /* Other than reductions, the only gimple reg that should be copied
1544 out of the loop is the control variable. */
1545 exit = single_dom_exit (loop);
1546 control_name = NULL_TREE;
1547 for (gsi = gsi_start_phis (ex_bb); !gsi_end_p (gsi); )
1549 phi = gsi_stmt (gsi);
1550 res = PHI_RESULT (phi);
1551 if (virtual_operand_p (res))
1553 gsi_next (&gsi);
1554 continue;
1557 /* Check if it is a part of reduction. If it is,
1558 keep the phi at the reduction's keep_res field. The
1559 PHI_RESULT of this phi is the resulting value of the reduction
1560 variable when exiting the loop. */
1562 if (reduction_list.elements () > 0)
1564 struct reduction_info *red;
1566 tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1567 red = reduction_phi (reduction_list, SSA_NAME_DEF_STMT (val));
1568 if (red)
1570 red->keep_res = phi;
1571 gsi_next (&gsi);
1572 continue;
1575 gcc_assert (control_name == NULL_TREE
1576 && SSA_NAME_VAR (res) == SSA_NAME_VAR (control));
1577 control_name = res;
1578 remove_phi_node (&gsi, false);
1580 gcc_assert (control_name != NULL_TREE);
1582 /* Initialize the control variable to number of iterations
1583 according to the rhs of the exit condition. */
1584 gsi = gsi_after_labels (ex_bb);
1585 cond_nit = last_stmt (exit->src);
1586 nit_1 = gimple_cond_rhs (cond_nit);
1587 nit_1 = force_gimple_operand_gsi (&gsi,
1588 fold_convert (TREE_TYPE (control_name), nit_1),
1589 false, NULL_TREE, false, GSI_SAME_STMT);
1590 stmt = gimple_build_assign (control_name, nit_1);
1591 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
1592 SSA_NAME_DEF_STMT (control_name) = stmt;
1595 /* Create the parallel constructs for LOOP as described in gen_parallel_loop.
1596 LOOP_FN and DATA are the arguments of GIMPLE_OMP_PARALLEL.
1597 NEW_DATA is the variable that should be initialized from the argument
1598 of LOOP_FN. N_THREADS is the requested number of threads. Returns the
1599 basic block containing GIMPLE_OMP_PARALLEL tree. */
1601 static basic_block
1602 create_parallel_loop (struct loop *loop, tree loop_fn, tree data,
1603 tree new_data, unsigned n_threads, location_t loc)
1605 gimple_stmt_iterator gsi;
1606 basic_block bb, paral_bb, for_bb, ex_bb;
1607 tree t, param;
1608 gimple stmt, for_stmt, phi, cond_stmt;
1609 tree cvar, cvar_init, initvar, cvar_next, cvar_base, type;
1610 edge exit, nexit, guard, end, e;
1612 /* Prepare the GIMPLE_OMP_PARALLEL statement. */
1613 bb = loop_preheader_edge (loop)->src;
1614 paral_bb = single_pred (bb);
1615 gsi = gsi_last_bb (paral_bb);
1617 t = build_omp_clause (loc, OMP_CLAUSE_NUM_THREADS);
1618 OMP_CLAUSE_NUM_THREADS_EXPR (t)
1619 = build_int_cst (integer_type_node, n_threads);
1620 stmt = gimple_build_omp_parallel (NULL, t, loop_fn, data);
1621 gimple_set_location (stmt, loc);
1623 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1625 /* Initialize NEW_DATA. */
1626 if (data)
1628 gsi = gsi_after_labels (bb);
1630 param = make_ssa_name (DECL_ARGUMENTS (loop_fn), NULL);
1631 stmt = gimple_build_assign (param, build_fold_addr_expr (data));
1632 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1633 SSA_NAME_DEF_STMT (param) = stmt;
1635 stmt = gimple_build_assign (new_data,
1636 fold_convert (TREE_TYPE (new_data), param));
1637 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1638 SSA_NAME_DEF_STMT (new_data) = stmt;
1641 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_PARALLEL. */
1642 bb = split_loop_exit_edge (single_dom_exit (loop));
1643 gsi = gsi_last_bb (bb);
1644 stmt = gimple_build_omp_return (false);
1645 gimple_set_location (stmt, loc);
1646 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1648 /* Extract data for GIMPLE_OMP_FOR. */
1649 gcc_assert (loop->header == single_dom_exit (loop)->src);
1650 cond_stmt = last_stmt (loop->header);
1652 cvar = gimple_cond_lhs (cond_stmt);
1653 cvar_base = SSA_NAME_VAR (cvar);
1654 phi = SSA_NAME_DEF_STMT (cvar);
1655 cvar_init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1656 initvar = copy_ssa_name (cvar, NULL);
1657 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, loop_preheader_edge (loop)),
1658 initvar);
1659 cvar_next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1661 gsi = gsi_last_nondebug_bb (loop->latch);
1662 gcc_assert (gsi_stmt (gsi) == SSA_NAME_DEF_STMT (cvar_next));
1663 gsi_remove (&gsi, true);
1665 /* Prepare cfg. */
1666 for_bb = split_edge (loop_preheader_edge (loop));
1667 ex_bb = split_loop_exit_edge (single_dom_exit (loop));
1668 extract_true_false_edges_from_block (loop->header, &nexit, &exit);
1669 gcc_assert (exit == single_dom_exit (loop));
1671 guard = make_edge (for_bb, ex_bb, 0);
1672 single_succ_edge (loop->latch)->flags = 0;
1673 end = make_edge (loop->latch, ex_bb, EDGE_FALLTHRU);
1674 for (gsi = gsi_start_phis (ex_bb); !gsi_end_p (gsi); gsi_next (&gsi))
1676 source_location locus;
1677 tree def;
1678 phi = gsi_stmt (gsi);
1679 stmt = SSA_NAME_DEF_STMT (PHI_ARG_DEF_FROM_EDGE (phi, exit));
1681 def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_preheader_edge (loop));
1682 locus = gimple_phi_arg_location_from_edge (stmt,
1683 loop_preheader_edge (loop));
1684 add_phi_arg (phi, def, guard, locus);
1686 def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (loop));
1687 locus = gimple_phi_arg_location_from_edge (stmt, loop_latch_edge (loop));
1688 add_phi_arg (phi, def, end, locus);
1690 e = redirect_edge_and_branch (exit, nexit->dest);
1691 PENDING_STMT (e) = NULL;
1693 /* Emit GIMPLE_OMP_FOR. */
1694 gimple_cond_set_lhs (cond_stmt, cvar_base);
1695 type = TREE_TYPE (cvar);
1696 t = build_omp_clause (loc, OMP_CLAUSE_SCHEDULE);
1697 OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_STATIC;
1699 for_stmt = gimple_build_omp_for (NULL, GF_OMP_FOR_KIND_FOR, t, 1, NULL);
1700 gimple_set_location (for_stmt, loc);
1701 gimple_omp_for_set_index (for_stmt, 0, initvar);
1702 gimple_omp_for_set_initial (for_stmt, 0, cvar_init);
1703 gimple_omp_for_set_final (for_stmt, 0, gimple_cond_rhs (cond_stmt));
1704 gimple_omp_for_set_cond (for_stmt, 0, gimple_cond_code (cond_stmt));
1705 gimple_omp_for_set_incr (for_stmt, 0, build2 (PLUS_EXPR, type,
1706 cvar_base,
1707 build_int_cst (type, 1)));
1709 gsi = gsi_last_bb (for_bb);
1710 gsi_insert_after (&gsi, for_stmt, GSI_NEW_STMT);
1711 SSA_NAME_DEF_STMT (initvar) = for_stmt;
1713 /* Emit GIMPLE_OMP_CONTINUE. */
1714 gsi = gsi_last_bb (loop->latch);
1715 stmt = gimple_build_omp_continue (cvar_next, cvar);
1716 gimple_set_location (stmt, loc);
1717 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1718 SSA_NAME_DEF_STMT (cvar_next) = stmt;
1720 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_FOR. */
1721 gsi = gsi_last_bb (ex_bb);
1722 stmt = gimple_build_omp_return (true);
1723 gimple_set_location (stmt, loc);
1724 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1726 /* After the above dom info is hosed. Re-compute it. */
1727 free_dominance_info (CDI_DOMINATORS);
1728 calculate_dominance_info (CDI_DOMINATORS);
1730 return paral_bb;
1733 /* Generates code to execute the iterations of LOOP in N_THREADS
1734 threads in parallel.
1736 NITER describes number of iterations of LOOP.
1737 REDUCTION_LIST describes the reductions existent in the LOOP. */
1739 static void
1740 gen_parallel_loop (struct loop *loop, reduction_info_table_type reduction_list,
1741 unsigned n_threads, struct tree_niter_desc *niter)
1743 loop_iterator li;
1744 tree many_iterations_cond, type, nit;
1745 tree arg_struct, new_arg_struct;
1746 gimple_seq stmts;
1747 basic_block parallel_head;
1748 edge entry, exit;
1749 struct clsn_data clsn_data;
1750 unsigned prob;
1751 location_t loc;
1752 gimple cond_stmt;
1753 unsigned int m_p_thread=2;
1755 /* From
1757 ---------------------------------------------------------------------
1758 loop
1760 IV = phi (INIT, IV + STEP)
1761 BODY1;
1762 if (COND)
1763 break;
1764 BODY2;
1766 ---------------------------------------------------------------------
1768 with # of iterations NITER (possibly with MAY_BE_ZERO assumption),
1769 we generate the following code:
1771 ---------------------------------------------------------------------
1773 if (MAY_BE_ZERO
1774 || NITER < MIN_PER_THREAD * N_THREADS)
1775 goto original;
1777 BODY1;
1778 store all local loop-invariant variables used in body of the loop to DATA.
1779 GIMPLE_OMP_PARALLEL (OMP_CLAUSE_NUM_THREADS (N_THREADS), LOOPFN, DATA);
1780 load the variables from DATA.
1781 GIMPLE_OMP_FOR (IV = INIT; COND; IV += STEP) (OMP_CLAUSE_SCHEDULE (static))
1782 BODY2;
1783 BODY1;
1784 GIMPLE_OMP_CONTINUE;
1785 GIMPLE_OMP_RETURN -- GIMPLE_OMP_FOR
1786 GIMPLE_OMP_RETURN -- GIMPLE_OMP_PARALLEL
1787 goto end;
1789 original:
1790 loop
1792 IV = phi (INIT, IV + STEP)
1793 BODY1;
1794 if (COND)
1795 break;
1796 BODY2;
1799 end:
1803 /* Create two versions of the loop -- in the old one, we know that the
1804 number of iterations is large enough, and we will transform it into the
1805 loop that will be split to loop_fn, the new one will be used for the
1806 remaining iterations. */
1808 /* We should compute a better number-of-iterations value for outer loops.
1809 That is, if we have
1811 for (i = 0; i < n; ++i)
1812 for (j = 0; j < m; ++j)
1815 we should compute nit = n * m, not nit = n.
1816 Also may_be_zero handling would need to be adjusted. */
1818 type = TREE_TYPE (niter->niter);
1819 nit = force_gimple_operand (unshare_expr (niter->niter), &stmts, true,
1820 NULL_TREE);
1821 if (stmts)
1822 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1824 if (loop->inner)
1825 m_p_thread=2;
1826 else
1827 m_p_thread=MIN_PER_THREAD;
1829 many_iterations_cond =
1830 fold_build2 (GE_EXPR, boolean_type_node,
1831 nit, build_int_cst (type, m_p_thread * n_threads));
1833 many_iterations_cond
1834 = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1835 invert_truthvalue (unshare_expr (niter->may_be_zero)),
1836 many_iterations_cond);
1837 many_iterations_cond
1838 = force_gimple_operand (many_iterations_cond, &stmts, false, NULL_TREE);
1839 if (stmts)
1840 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1841 if (!is_gimple_condexpr (many_iterations_cond))
1843 many_iterations_cond
1844 = force_gimple_operand (many_iterations_cond, &stmts,
1845 true, NULL_TREE);
1846 if (stmts)
1847 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1850 initialize_original_copy_tables ();
1852 /* We assume that the loop usually iterates a lot. */
1853 prob = 4 * REG_BR_PROB_BASE / 5;
1854 loop_version (loop, many_iterations_cond, NULL,
1855 prob, prob, REG_BR_PROB_BASE - prob, true);
1856 update_ssa (TODO_update_ssa);
1857 free_original_copy_tables ();
1859 /* Base all the induction variables in LOOP on a single control one. */
1860 canonicalize_loop_ivs (loop, &nit, true);
1862 /* Ensure that the exit condition is the first statement in the loop. */
1863 transform_to_exit_first_loop (loop, reduction_list, nit);
1865 /* Generate initializations for reductions. */
1866 if (reduction_list.elements () > 0)
1867 reduction_list.traverse <struct loop *, initialize_reductions> (loop);
1869 /* Eliminate the references to local variables from the loop. */
1870 gcc_assert (single_exit (loop));
1871 entry = loop_preheader_edge (loop);
1872 exit = single_dom_exit (loop);
1874 eliminate_local_variables (entry, exit);
1875 /* In the old loop, move all variables non-local to the loop to a structure
1876 and back, and create separate decls for the variables used in loop. */
1877 separate_decls_in_region (entry, exit, reduction_list, &arg_struct,
1878 &new_arg_struct, &clsn_data);
1880 /* Create the parallel constructs. */
1881 loc = UNKNOWN_LOCATION;
1882 cond_stmt = last_stmt (loop->header);
1883 if (cond_stmt)
1884 loc = gimple_location (cond_stmt);
1885 parallel_head = create_parallel_loop (loop, create_loop_fn (loc), arg_struct,
1886 new_arg_struct, n_threads, loc);
1887 if (reduction_list.elements () > 0)
1888 create_call_for_reduction (loop, reduction_list, &clsn_data);
1890 scev_reset ();
1892 /* Cancel the loop (it is simpler to do it here rather than to teach the
1893 expander to do it). */
1894 cancel_loop_tree (loop);
1896 /* Free loop bound estimations that could contain references to
1897 removed statements. */
1898 FOR_EACH_LOOP (li, loop, 0)
1899 free_numbers_of_iterations_estimates_loop (loop);
1901 /* Expand the parallel constructs. We do it directly here instead of running
1902 a separate expand_omp pass, since it is more efficient, and less likely to
1903 cause troubles with further analyses not being able to deal with the
1904 OMP trees. */
1906 omp_expand_local (parallel_head);
1909 /* Returns true when LOOP contains vector phi nodes. */
1911 static bool
1912 loop_has_vector_phi_nodes (struct loop *loop ATTRIBUTE_UNUSED)
1914 unsigned i;
1915 basic_block *bbs = get_loop_body_in_dom_order (loop);
1916 gimple_stmt_iterator gsi;
1917 bool res = true;
1919 for (i = 0; i < loop->num_nodes; i++)
1920 for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
1921 if (TREE_CODE (TREE_TYPE (PHI_RESULT (gsi_stmt (gsi)))) == VECTOR_TYPE)
1922 goto end;
1924 res = false;
1925 end:
1926 free (bbs);
1927 return res;
1930 /* Create a reduction_info struct, initialize it with REDUC_STMT
1931 and PHI, insert it to the REDUCTION_LIST. */
1933 static void
1934 build_new_reduction (reduction_info_table_type reduction_list,
1935 gimple reduc_stmt, gimple phi)
1937 reduction_info **slot;
1938 struct reduction_info *new_reduction;
1940 gcc_assert (reduc_stmt);
1942 if (dump_file && (dump_flags & TDF_DETAILS))
1944 fprintf (dump_file,
1945 "Detected reduction. reduction stmt is: \n");
1946 print_gimple_stmt (dump_file, reduc_stmt, 0, 0);
1947 fprintf (dump_file, "\n");
1950 new_reduction = XCNEW (struct reduction_info);
1952 new_reduction->reduc_stmt = reduc_stmt;
1953 new_reduction->reduc_phi = phi;
1954 new_reduction->reduc_version = SSA_NAME_VERSION (gimple_phi_result (phi));
1955 new_reduction->reduction_code = gimple_assign_rhs_code (reduc_stmt);
1956 slot = reduction_list.find_slot (new_reduction, INSERT);
1957 *slot = new_reduction;
1960 /* Callback for htab_traverse. Sets gimple_uid of reduc_phi stmts. */
1963 set_reduc_phi_uids (reduction_info **slot, void *data ATTRIBUTE_UNUSED)
1965 struct reduction_info *const red = *slot;
1966 gimple_set_uid (red->reduc_phi, red->reduc_version);
1967 return 1;
1970 /* Detect all reductions in the LOOP, insert them into REDUCTION_LIST. */
1972 static void
1973 gather_scalar_reductions (loop_p loop, reduction_info_table_type reduction_list)
1975 gimple_stmt_iterator gsi;
1976 loop_vec_info simple_loop_info;
1978 simple_loop_info = vect_analyze_loop_form (loop);
1980 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
1982 gimple phi = gsi_stmt (gsi);
1983 affine_iv iv;
1984 tree res = PHI_RESULT (phi);
1985 bool double_reduc;
1987 if (virtual_operand_p (res))
1988 continue;
1990 if (!simple_iv (loop, loop, res, &iv, true)
1991 && simple_loop_info)
1993 gimple reduc_stmt = vect_force_simple_reduction (simple_loop_info,
1994 phi, true,
1995 &double_reduc);
1996 if (reduc_stmt && !double_reduc)
1997 build_new_reduction (reduction_list, reduc_stmt, phi);
2000 destroy_loop_vec_info (simple_loop_info, true);
2002 /* As gimple_uid is used by the vectorizer in between vect_analyze_loop_form
2003 and destroy_loop_vec_info, we can set gimple_uid of reduc_phi stmts
2004 only now. */
2005 reduction_list.traverse <void *, set_reduc_phi_uids> (NULL);
2008 /* Try to initialize NITER for code generation part. */
2010 static bool
2011 try_get_loop_niter (loop_p loop, struct tree_niter_desc *niter)
2013 edge exit = single_dom_exit (loop);
2015 gcc_assert (exit);
2017 /* We need to know # of iterations, and there should be no uses of values
2018 defined inside loop outside of it, unless the values are invariants of
2019 the loop. */
2020 if (!number_of_iterations_exit (loop, exit, niter, false))
2022 if (dump_file && (dump_flags & TDF_DETAILS))
2023 fprintf (dump_file, " FAILED: number of iterations not known\n");
2024 return false;
2027 return true;
2030 /* Try to initialize REDUCTION_LIST for code generation part.
2031 REDUCTION_LIST describes the reductions. */
2033 static bool
2034 try_create_reduction_list (loop_p loop,
2035 reduction_info_table_type reduction_list)
2037 edge exit = single_dom_exit (loop);
2038 gimple_stmt_iterator gsi;
2040 gcc_assert (exit);
2042 gather_scalar_reductions (loop, reduction_list);
2045 for (gsi = gsi_start_phis (exit->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2047 gimple phi = gsi_stmt (gsi);
2048 struct reduction_info *red;
2049 imm_use_iterator imm_iter;
2050 use_operand_p use_p;
2051 gimple reduc_phi;
2052 tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2054 if (!virtual_operand_p (val))
2056 if (dump_file && (dump_flags & TDF_DETAILS))
2058 fprintf (dump_file, "phi is ");
2059 print_gimple_stmt (dump_file, phi, 0, 0);
2060 fprintf (dump_file, "arg of phi to exit: value ");
2061 print_generic_expr (dump_file, val, 0);
2062 fprintf (dump_file, " used outside loop\n");
2063 fprintf (dump_file,
2064 " checking if it a part of reduction pattern: \n");
2066 if (reduction_list.elements () == 0)
2068 if (dump_file && (dump_flags & TDF_DETAILS))
2069 fprintf (dump_file,
2070 " FAILED: it is not a part of reduction.\n");
2071 return false;
2073 reduc_phi = NULL;
2074 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, val)
2076 if (!gimple_debug_bind_p (USE_STMT (use_p))
2077 && flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
2079 reduc_phi = USE_STMT (use_p);
2080 break;
2083 red = reduction_phi (reduction_list, reduc_phi);
2084 if (red == NULL)
2086 if (dump_file && (dump_flags & TDF_DETAILS))
2087 fprintf (dump_file,
2088 " FAILED: it is not a part of reduction.\n");
2089 return false;
2091 if (dump_file && (dump_flags & TDF_DETAILS))
2093 fprintf (dump_file, "reduction phi is ");
2094 print_gimple_stmt (dump_file, red->reduc_phi, 0, 0);
2095 fprintf (dump_file, "reduction stmt is ");
2096 print_gimple_stmt (dump_file, red->reduc_stmt, 0, 0);
2101 /* The iterations of the loop may communicate only through bivs whose
2102 iteration space can be distributed efficiently. */
2103 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
2105 gimple phi = gsi_stmt (gsi);
2106 tree def = PHI_RESULT (phi);
2107 affine_iv iv;
2109 if (!virtual_operand_p (def) && !simple_iv (loop, loop, def, &iv, true))
2111 struct reduction_info *red;
2113 red = reduction_phi (reduction_list, phi);
2114 if (red == NULL)
2116 if (dump_file && (dump_flags & TDF_DETAILS))
2117 fprintf (dump_file,
2118 " FAILED: scalar dependency between iterations\n");
2119 return false;
2125 return true;
2128 /* Detect parallel loops and generate parallel code using libgomp
2129 primitives. Returns true if some loop was parallelized, false
2130 otherwise. */
2132 bool
2133 parallelize_loops (void)
2135 unsigned n_threads = flag_tree_parallelize_loops;
2136 bool changed = false;
2137 struct loop *loop;
2138 struct tree_niter_desc niter_desc;
2139 loop_iterator li;
2140 reduction_info_table_type reduction_list;
2141 struct obstack parloop_obstack;
2142 HOST_WIDE_INT estimated;
2143 LOC loop_loc;
2145 /* Do not parallelize loops in the functions created by parallelization. */
2146 if (parallelized_function_p (cfun->decl))
2147 return false;
2148 if (cfun->has_nonlocal_label)
2149 return false;
2151 gcc_obstack_init (&parloop_obstack);
2152 reduction_list.create (10);
2153 init_stmt_vec_info_vec ();
2155 FOR_EACH_LOOP (li, loop, 0)
2157 reduction_list.empty ();
2158 if (dump_file && (dump_flags & TDF_DETAILS))
2160 fprintf (dump_file, "Trying loop %d as candidate\n",loop->num);
2161 if (loop->inner)
2162 fprintf (dump_file, "loop %d is not innermost\n",loop->num);
2163 else
2164 fprintf (dump_file, "loop %d is innermost\n",loop->num);
2167 /* If we use autopar in graphite pass, we use its marked dependency
2168 checking results. */
2169 if (flag_loop_parallelize_all && !loop->can_be_parallel)
2171 if (dump_file && (dump_flags & TDF_DETAILS))
2172 fprintf (dump_file, "loop is not parallel according to graphite\n");
2173 continue;
2176 if (!single_dom_exit (loop))
2179 if (dump_file && (dump_flags & TDF_DETAILS))
2180 fprintf (dump_file, "loop is !single_dom_exit\n");
2182 continue;
2185 if (/* And of course, the loop must be parallelizable. */
2186 !can_duplicate_loop_p (loop)
2187 || loop_has_blocks_with_irreducible_flag (loop)
2188 || (loop_preheader_edge (loop)->src->flags & BB_IRREDUCIBLE_LOOP)
2189 /* FIXME: the check for vector phi nodes could be removed. */
2190 || loop_has_vector_phi_nodes (loop))
2191 continue;
2193 estimated = estimated_stmt_executions_int (loop);
2194 if (estimated == -1)
2195 estimated = max_stmt_executions_int (loop);
2196 /* FIXME: Bypass this check as graphite doesn't update the
2197 count and frequency correctly now. */
2198 if (!flag_loop_parallelize_all
2199 && ((estimated != -1
2200 && estimated <= (HOST_WIDE_INT) n_threads * MIN_PER_THREAD)
2201 /* Do not bother with loops in cold areas. */
2202 || optimize_loop_nest_for_size_p (loop)))
2203 continue;
2205 if (!try_get_loop_niter (loop, &niter_desc))
2206 continue;
2208 if (!try_create_reduction_list (loop, reduction_list))
2209 continue;
2211 if (!flag_loop_parallelize_all
2212 && !loop_parallel_p (loop, &parloop_obstack))
2213 continue;
2215 changed = true;
2216 if (dump_file && (dump_flags & TDF_DETAILS))
2218 if (loop->inner)
2219 fprintf (dump_file, "parallelizing outer loop %d\n",loop->header->index);
2220 else
2221 fprintf (dump_file, "parallelizing inner loop %d\n",loop->header->index);
2222 loop_loc = find_loop_location (loop);
2223 if (loop_loc != UNKNOWN_LOC)
2224 fprintf (dump_file, "\nloop at %s:%d: ",
2225 LOC_FILE (loop_loc), LOC_LINE (loop_loc));
2227 gen_parallel_loop (loop, reduction_list,
2228 n_threads, &niter_desc);
2231 free_stmt_vec_info_vec ();
2232 reduction_list.dispose ();
2233 obstack_free (&parloop_obstack, NULL);
2235 /* Parallelization will cause new function calls to be inserted through
2236 which local variables will escape. Reset the points-to solution
2237 for ESCAPED. */
2238 if (changed)
2239 pt_solution_reset (&cfun->gimple_df->escaped);
2241 return changed;
2244 /* Parallelization. */
2246 static bool
2247 gate_tree_parallelize_loops (void)
2249 return flag_tree_parallelize_loops > 1;
2252 static unsigned
2253 tree_parallelize_loops (void)
2255 if (number_of_loops (cfun) <= 1)
2256 return 0;
2258 if (parallelize_loops ())
2259 return TODO_cleanup_cfg | TODO_rebuild_alias;
2260 return 0;
2263 namespace {
2265 const pass_data pass_data_parallelize_loops =
2267 GIMPLE_PASS, /* type */
2268 "parloops", /* name */
2269 OPTGROUP_LOOP, /* optinfo_flags */
2270 true, /* has_gate */
2271 true, /* has_execute */
2272 TV_TREE_PARALLELIZE_LOOPS, /* tv_id */
2273 ( PROP_cfg | PROP_ssa ), /* properties_required */
2274 0, /* properties_provided */
2275 0, /* properties_destroyed */
2276 0, /* todo_flags_start */
2277 TODO_verify_flow, /* todo_flags_finish */
2280 class pass_parallelize_loops : public gimple_opt_pass
2282 public:
2283 pass_parallelize_loops (gcc::context *ctxt)
2284 : gimple_opt_pass (pass_data_parallelize_loops, ctxt)
2287 /* opt_pass methods: */
2288 bool gate () { return gate_tree_parallelize_loops (); }
2289 unsigned int execute () { return tree_parallelize_loops (); }
2291 }; // class pass_parallelize_loops
2293 } // anon namespace
2295 gimple_opt_pass *
2296 make_pass_parallelize_loops (gcc::context *ctxt)
2298 return new pass_parallelize_loops (ctxt);
2302 #include "gt-tree-parloops.h"