Mark ChangeLog
[official-gcc.git] / gcc / tree-parloops.c
blob9d2c3ca3b4172893f3f49a1659d2a30bd707643d
1 /* Loop autoparallelization.
2 Copyright (C) 2006-2013 Free Software Foundation, Inc.
3 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
4 Zdenek Dvorak <dvorakz@suse.cz> and Razya Ladelsky <razya@il.ibm.com>.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tree-flow.h"
26 #include "cfgloop.h"
27 #include "tree-data-ref.h"
28 #include "tree-scalar-evolution.h"
29 #include "gimple-pretty-print.h"
30 #include "tree-pass.h"
31 #include "langhooks.h"
32 #include "tree-vectorizer.h"
34 /* This pass tries to distribute iterations of loops into several threads.
35 The implementation is straightforward -- for each loop we test whether its
36 iterations are independent, and if it is the case (and some additional
37 conditions regarding profitability and correctness are satisfied), we
38 add GIMPLE_OMP_PARALLEL and GIMPLE_OMP_FOR codes and let omp expansion
39 machinery do its job.
41 The most of the complexity is in bringing the code into shape expected
42 by the omp expanders:
43 -- for GIMPLE_OMP_FOR, ensuring that the loop has only one induction
44 variable and that the exit test is at the start of the loop body
45 -- for GIMPLE_OMP_PARALLEL, replacing the references to local addressable
46 variables by accesses through pointers, and breaking up ssa chains
47 by storing the values incoming to the parallelized loop to a structure
48 passed to the new function as an argument (something similar is done
49 in omp gimplification, unfortunately only a small part of the code
50 can be shared).
52 TODO:
53 -- if there are several parallelizable loops in a function, it may be
54 possible to generate the threads just once (using synchronization to
55 ensure that cross-loop dependences are obeyed).
56 -- handling of common reduction patterns for outer loops.
58 More info can also be found at http://gcc.gnu.org/wiki/AutoParInGCC */
60 Reduction handling:
61 currently we use vect_force_simple_reduction() to detect reduction patterns.
62 The code transformation will be introduced by an example.
65 parloop
67 int sum=1;
69 for (i = 0; i < N; i++)
71 x[i] = i + 3;
72 sum+=x[i];
76 gimple-like code:
77 header_bb:
79 # sum_29 = PHI <sum_11(5), 1(3)>
80 # i_28 = PHI <i_12(5), 0(3)>
81 D.1795_8 = i_28 + 3;
82 x[i_28] = D.1795_8;
83 sum_11 = D.1795_8 + sum_29;
84 i_12 = i_28 + 1;
85 if (N_6(D) > i_12)
86 goto header_bb;
89 exit_bb:
91 # sum_21 = PHI <sum_11(4)>
92 printf (&"%d"[0], sum_21);
95 after reduction transformation (only relevant parts):
97 parloop
100 ....
103 # Storing the initial value given by the user. #
105 .paral_data_store.32.sum.27 = 1;
107 #pragma omp parallel num_threads(4)
109 #pragma omp for schedule(static)
111 # The neutral element corresponding to the particular
112 reduction's operation, e.g. 0 for PLUS_EXPR,
113 1 for MULT_EXPR, etc. replaces the user's initial value. #
115 # sum.27_29 = PHI <sum.27_11, 0>
117 sum.27_11 = D.1827_8 + sum.27_29;
119 GIMPLE_OMP_CONTINUE
121 # Adding this reduction phi is done at create_phi_for_local_result() #
122 # sum.27_56 = PHI <sum.27_11, 0>
123 GIMPLE_OMP_RETURN
125 # Creating the atomic operation is done at
126 create_call_for_reduction_1() #
128 #pragma omp atomic_load
129 D.1839_59 = *&.paral_data_load.33_51->reduction.23;
130 D.1840_60 = sum.27_56 + D.1839_59;
131 #pragma omp atomic_store (D.1840_60);
133 GIMPLE_OMP_RETURN
135 # collecting the result after the join of the threads is done at
136 create_loads_for_reductions().
137 The value computed by the threads is loaded from the
138 shared struct. #
141 .paral_data_load.33_52 = &.paral_data_store.32;
142 sum_37 = .paral_data_load.33_52->sum.27;
143 sum_43 = D.1795_41 + sum_37;
145 exit bb:
146 # sum_21 = PHI <sum_43, sum_26>
147 printf (&"%d"[0], sum_21);
155 /* Minimal number of iterations of a loop that should be executed in each
156 thread. */
157 #define MIN_PER_THREAD 100
159 /* Element of the hashtable, representing a
160 reduction in the current loop. */
161 struct reduction_info
163 gimple reduc_stmt; /* reduction statement. */
164 gimple reduc_phi; /* The phi node defining the reduction. */
165 enum tree_code reduction_code;/* code for the reduction operation. */
166 unsigned reduc_version; /* SSA_NAME_VERSION of original reduc_phi
167 result. */
168 gimple keep_res; /* The PHI_RESULT of this phi is the resulting value
169 of the reduction variable when existing the loop. */
170 tree initial_value; /* The initial value of the reduction var before entering the loop. */
171 tree field; /* the name of the field in the parloop data structure intended for reduction. */
172 tree init; /* reduction initialization value. */
173 gimple new_phi; /* (helper field) Newly created phi node whose result
174 will be passed to the atomic operation. Represents
175 the local result each thread computed for the reduction
176 operation. */
179 /* Equality and hash functions for hashtab code. */
181 static int
182 reduction_info_eq (const void *aa, const void *bb)
184 const struct reduction_info *a = (const struct reduction_info *) aa;
185 const struct reduction_info *b = (const struct reduction_info *) bb;
187 return (a->reduc_phi == b->reduc_phi);
190 static hashval_t
191 reduction_info_hash (const void *aa)
193 const struct reduction_info *a = (const struct reduction_info *) aa;
195 return a->reduc_version;
198 static struct reduction_info *
199 reduction_phi (htab_t reduction_list, gimple phi)
201 struct reduction_info tmpred, *red;
203 if (htab_elements (reduction_list) == 0 || phi == NULL)
204 return NULL;
206 tmpred.reduc_phi = phi;
207 tmpred.reduc_version = gimple_uid (phi);
208 red = (struct reduction_info *) htab_find (reduction_list, &tmpred);
210 return red;
213 /* Element of hashtable of names to copy. */
215 struct name_to_copy_elt
217 unsigned version; /* The version of the name to copy. */
218 tree new_name; /* The new name used in the copy. */
219 tree field; /* The field of the structure used to pass the
220 value. */
223 /* Equality and hash functions for hashtab code. */
225 static int
226 name_to_copy_elt_eq (const void *aa, const void *bb)
228 const struct name_to_copy_elt *a = (const struct name_to_copy_elt *) aa;
229 const struct name_to_copy_elt *b = (const struct name_to_copy_elt *) bb;
231 return a->version == b->version;
234 static hashval_t
235 name_to_copy_elt_hash (const void *aa)
237 const struct name_to_copy_elt *a = (const struct name_to_copy_elt *) aa;
239 return (hashval_t) a->version;
242 /* A transformation matrix, which is a self-contained ROWSIZE x COLSIZE
243 matrix. Rather than use floats, we simply keep a single DENOMINATOR that
244 represents the denominator for every element in the matrix. */
245 typedef struct lambda_trans_matrix_s
247 lambda_matrix matrix;
248 int rowsize;
249 int colsize;
250 int denominator;
251 } *lambda_trans_matrix;
252 #define LTM_MATRIX(T) ((T)->matrix)
253 #define LTM_ROWSIZE(T) ((T)->rowsize)
254 #define LTM_COLSIZE(T) ((T)->colsize)
255 #define LTM_DENOMINATOR(T) ((T)->denominator)
257 /* Allocate a new transformation matrix. */
259 static lambda_trans_matrix
260 lambda_trans_matrix_new (int colsize, int rowsize,
261 struct obstack * lambda_obstack)
263 lambda_trans_matrix ret;
265 ret = (lambda_trans_matrix)
266 obstack_alloc (lambda_obstack, sizeof (struct lambda_trans_matrix_s));
267 LTM_MATRIX (ret) = lambda_matrix_new (rowsize, colsize, lambda_obstack);
268 LTM_ROWSIZE (ret) = rowsize;
269 LTM_COLSIZE (ret) = colsize;
270 LTM_DENOMINATOR (ret) = 1;
271 return ret;
274 /* Multiply a vector VEC by a matrix MAT.
275 MAT is an M*N matrix, and VEC is a vector with length N. The result
276 is stored in DEST which must be a vector of length M. */
278 static void
279 lambda_matrix_vector_mult (lambda_matrix matrix, int m, int n,
280 lambda_vector vec, lambda_vector dest)
282 int i, j;
284 lambda_vector_clear (dest, m);
285 for (i = 0; i < m; i++)
286 for (j = 0; j < n; j++)
287 dest[i] += matrix[i][j] * vec[j];
290 /* Return true if TRANS is a legal transformation matrix that respects
291 the dependence vectors in DISTS and DIRS. The conservative answer
292 is false.
294 "Wolfe proves that a unimodular transformation represented by the
295 matrix T is legal when applied to a loop nest with a set of
296 lexicographically non-negative distance vectors RDG if and only if
297 for each vector d in RDG, (T.d >= 0) is lexicographically positive.
298 i.e.: if and only if it transforms the lexicographically positive
299 distance vectors to lexicographically positive vectors. Note that
300 a unimodular matrix must transform the zero vector (and only it) to
301 the zero vector." S.Muchnick. */
303 static bool
304 lambda_transform_legal_p (lambda_trans_matrix trans,
305 int nb_loops,
306 vec<ddr_p> dependence_relations)
308 unsigned int i, j;
309 lambda_vector distres;
310 struct data_dependence_relation *ddr;
312 gcc_assert (LTM_COLSIZE (trans) == nb_loops
313 && LTM_ROWSIZE (trans) == nb_loops);
315 /* When there are no dependences, the transformation is correct. */
316 if (dependence_relations.length () == 0)
317 return true;
319 ddr = dependence_relations[0];
320 if (ddr == NULL)
321 return true;
323 /* When there is an unknown relation in the dependence_relations, we
324 know that it is no worth looking at this loop nest: give up. */
325 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
326 return false;
328 distres = lambda_vector_new (nb_loops);
330 /* For each distance vector in the dependence graph. */
331 FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
333 /* Don't care about relations for which we know that there is no
334 dependence, nor about read-read (aka. output-dependences):
335 these data accesses can happen in any order. */
336 if (DDR_ARE_DEPENDENT (ddr) == chrec_known
337 || (DR_IS_READ (DDR_A (ddr)) && DR_IS_READ (DDR_B (ddr))))
338 continue;
340 /* Conservatively answer: "this transformation is not valid". */
341 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
342 return false;
344 /* If the dependence could not be captured by a distance vector,
345 conservatively answer that the transform is not valid. */
346 if (DDR_NUM_DIST_VECTS (ddr) == 0)
347 return false;
349 /* Compute trans.dist_vect */
350 for (j = 0; j < DDR_NUM_DIST_VECTS (ddr); j++)
352 lambda_matrix_vector_mult (LTM_MATRIX (trans), nb_loops, nb_loops,
353 DDR_DIST_VECT (ddr, j), distres);
355 if (!lambda_vector_lexico_pos (distres, nb_loops))
356 return false;
359 return true;
362 /* Data dependency analysis. Returns true if the iterations of LOOP
363 are independent on each other (that is, if we can execute them
364 in parallel). */
366 static bool
367 loop_parallel_p (struct loop *loop, struct obstack * parloop_obstack)
369 vec<loop_p> loop_nest;
370 vec<ddr_p> dependence_relations;
371 vec<data_reference_p> datarefs;
372 lambda_trans_matrix trans;
373 bool ret = false;
375 if (dump_file && (dump_flags & TDF_DETAILS))
377 fprintf (dump_file, "Considering loop %d\n", loop->num);
378 if (!loop->inner)
379 fprintf (dump_file, "loop is innermost\n");
380 else
381 fprintf (dump_file, "loop NOT innermost\n");
384 /* Check for problems with dependences. If the loop can be reversed,
385 the iterations are independent. */
386 datarefs.create (10);
387 dependence_relations.create (10 * 10);
388 loop_nest.create (3);
389 if (! compute_data_dependences_for_loop (loop, true, &loop_nest, &datarefs,
390 &dependence_relations))
392 if (dump_file && (dump_flags & TDF_DETAILS))
393 fprintf (dump_file, " FAILED: cannot analyze data dependencies\n");
394 ret = false;
395 goto end;
397 if (dump_file && (dump_flags & TDF_DETAILS))
398 dump_data_dependence_relations (dump_file, dependence_relations);
400 trans = lambda_trans_matrix_new (1, 1, parloop_obstack);
401 LTM_MATRIX (trans)[0][0] = -1;
403 if (lambda_transform_legal_p (trans, 1, dependence_relations))
405 ret = true;
406 if (dump_file && (dump_flags & TDF_DETAILS))
407 fprintf (dump_file, " SUCCESS: may be parallelized\n");
409 else if (dump_file && (dump_flags & TDF_DETAILS))
410 fprintf (dump_file,
411 " FAILED: data dependencies exist across iterations\n");
413 end:
414 loop_nest.release ();
415 free_dependence_relations (dependence_relations);
416 free_data_refs (datarefs);
418 return ret;
421 /* Return true when LOOP contains basic blocks marked with the
422 BB_IRREDUCIBLE_LOOP flag. */
424 static inline bool
425 loop_has_blocks_with_irreducible_flag (struct loop *loop)
427 unsigned i;
428 basic_block *bbs = get_loop_body_in_dom_order (loop);
429 bool res = true;
431 for (i = 0; i < loop->num_nodes; i++)
432 if (bbs[i]->flags & BB_IRREDUCIBLE_LOOP)
433 goto end;
435 res = false;
436 end:
437 free (bbs);
438 return res;
441 /* Assigns the address of OBJ in TYPE to an ssa name, and returns this name.
442 The assignment statement is placed on edge ENTRY. DECL_ADDRESS maps decls
443 to their addresses that can be reused. The address of OBJ is known to
444 be invariant in the whole function. Other needed statements are placed
445 right before GSI. */
447 static tree
448 take_address_of (tree obj, tree type, edge entry, htab_t decl_address,
449 gimple_stmt_iterator *gsi)
451 int uid;
452 void **dslot;
453 struct int_tree_map ielt, *nielt;
454 tree *var_p, name, addr;
455 gimple stmt;
456 gimple_seq stmts;
458 /* Since the address of OBJ is invariant, the trees may be shared.
459 Avoid rewriting unrelated parts of the code. */
460 obj = unshare_expr (obj);
461 for (var_p = &obj;
462 handled_component_p (*var_p);
463 var_p = &TREE_OPERAND (*var_p, 0))
464 continue;
466 /* Canonicalize the access to base on a MEM_REF. */
467 if (DECL_P (*var_p))
468 *var_p = build_simple_mem_ref (build_fold_addr_expr (*var_p));
470 /* Assign a canonical SSA name to the address of the base decl used
471 in the address and share it for all accesses and addresses based
472 on it. */
473 uid = DECL_UID (TREE_OPERAND (TREE_OPERAND (*var_p, 0), 0));
474 ielt.uid = uid;
475 dslot = htab_find_slot_with_hash (decl_address, &ielt, uid, INSERT);
476 if (!*dslot)
478 if (gsi == NULL)
479 return NULL;
480 addr = TREE_OPERAND (*var_p, 0);
481 const char *obj_name
482 = get_name (TREE_OPERAND (TREE_OPERAND (*var_p, 0), 0));
483 if (obj_name)
484 name = make_temp_ssa_name (TREE_TYPE (addr), NULL, obj_name);
485 else
486 name = make_ssa_name (TREE_TYPE (addr), NULL);
487 stmt = gimple_build_assign (name, addr);
488 gsi_insert_on_edge_immediate (entry, stmt);
490 nielt = XNEW (struct int_tree_map);
491 nielt->uid = uid;
492 nielt->to = name;
493 *dslot = nielt;
495 else
496 name = ((struct int_tree_map *) *dslot)->to;
498 /* Express the address in terms of the canonical SSA name. */
499 TREE_OPERAND (*var_p, 0) = name;
500 if (gsi == NULL)
501 return build_fold_addr_expr_with_type (obj, type);
503 name = force_gimple_operand (build_addr (obj, current_function_decl),
504 &stmts, true, NULL_TREE);
505 if (!gimple_seq_empty_p (stmts))
506 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
508 if (!useless_type_conversion_p (type, TREE_TYPE (name)))
510 name = force_gimple_operand (fold_convert (type, name), &stmts, true,
511 NULL_TREE);
512 if (!gimple_seq_empty_p (stmts))
513 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
516 return name;
519 /* Callback for htab_traverse. Create the initialization statement
520 for reduction described in SLOT, and place it at the preheader of
521 the loop described in DATA. */
523 static int
524 initialize_reductions (void **slot, void *data)
526 tree init, c;
527 tree bvar, type, arg;
528 edge e;
530 struct reduction_info *const reduc = (struct reduction_info *) *slot;
531 struct loop *loop = (struct loop *) data;
533 /* Create initialization in preheader:
534 reduction_variable = initialization value of reduction. */
536 /* In the phi node at the header, replace the argument coming
537 from the preheader with the reduction initialization value. */
539 /* Create a new variable to initialize the reduction. */
540 type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
541 bvar = create_tmp_var (type, "reduction");
543 c = build_omp_clause (gimple_location (reduc->reduc_stmt),
544 OMP_CLAUSE_REDUCTION);
545 OMP_CLAUSE_REDUCTION_CODE (c) = reduc->reduction_code;
546 OMP_CLAUSE_DECL (c) = SSA_NAME_VAR (gimple_assign_lhs (reduc->reduc_stmt));
548 init = omp_reduction_init (c, TREE_TYPE (bvar));
549 reduc->init = init;
551 /* Replace the argument representing the initialization value
552 with the initialization value for the reduction (neutral
553 element for the particular operation, e.g. 0 for PLUS_EXPR,
554 1 for MULT_EXPR, etc).
555 Keep the old value in a new variable "reduction_initial",
556 that will be taken in consideration after the parallel
557 computing is done. */
559 e = loop_preheader_edge (loop);
560 arg = PHI_ARG_DEF_FROM_EDGE (reduc->reduc_phi, e);
561 /* Create new variable to hold the initial value. */
563 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE
564 (reduc->reduc_phi, loop_preheader_edge (loop)), init);
565 reduc->initial_value = arg;
566 return 1;
569 struct elv_data
571 struct walk_stmt_info info;
572 edge entry;
573 htab_t decl_address;
574 gimple_stmt_iterator *gsi;
575 bool changed;
576 bool reset;
579 /* Eliminates references to local variables in *TP out of the single
580 entry single exit region starting at DTA->ENTRY.
581 DECL_ADDRESS contains addresses of the references that had their
582 address taken already. If the expression is changed, CHANGED is
583 set to true. Callback for walk_tree. */
585 static tree
586 eliminate_local_variables_1 (tree *tp, int *walk_subtrees, void *data)
588 struct elv_data *const dta = (struct elv_data *) data;
589 tree t = *tp, var, addr, addr_type, type, obj;
591 if (DECL_P (t))
593 *walk_subtrees = 0;
595 if (!SSA_VAR_P (t) || DECL_EXTERNAL (t))
596 return NULL_TREE;
598 type = TREE_TYPE (t);
599 addr_type = build_pointer_type (type);
600 addr = take_address_of (t, addr_type, dta->entry, dta->decl_address,
601 dta->gsi);
602 if (dta->gsi == NULL && addr == NULL_TREE)
604 dta->reset = true;
605 return NULL_TREE;
608 *tp = build_simple_mem_ref (addr);
610 dta->changed = true;
611 return NULL_TREE;
614 if (TREE_CODE (t) == ADDR_EXPR)
616 /* ADDR_EXPR may appear in two contexts:
617 -- as a gimple operand, when the address taken is a function invariant
618 -- as gimple rhs, when the resulting address in not a function
619 invariant
620 We do not need to do anything special in the latter case (the base of
621 the memory reference whose address is taken may be replaced in the
622 DECL_P case). The former case is more complicated, as we need to
623 ensure that the new address is still a gimple operand. Thus, it
624 is not sufficient to replace just the base of the memory reference --
625 we need to move the whole computation of the address out of the
626 loop. */
627 if (!is_gimple_val (t))
628 return NULL_TREE;
630 *walk_subtrees = 0;
631 obj = TREE_OPERAND (t, 0);
632 var = get_base_address (obj);
633 if (!var || !SSA_VAR_P (var) || DECL_EXTERNAL (var))
634 return NULL_TREE;
636 addr_type = TREE_TYPE (t);
637 addr = take_address_of (obj, addr_type, dta->entry, dta->decl_address,
638 dta->gsi);
639 if (dta->gsi == NULL && addr == NULL_TREE)
641 dta->reset = true;
642 return NULL_TREE;
644 *tp = addr;
646 dta->changed = true;
647 return NULL_TREE;
650 if (!EXPR_P (t))
651 *walk_subtrees = 0;
653 return NULL_TREE;
656 /* Moves the references to local variables in STMT at *GSI out of the single
657 entry single exit region starting at ENTRY. DECL_ADDRESS contains
658 addresses of the references that had their address taken
659 already. */
661 static void
662 eliminate_local_variables_stmt (edge entry, gimple_stmt_iterator *gsi,
663 htab_t decl_address)
665 struct elv_data dta;
666 gimple stmt = gsi_stmt (*gsi);
668 memset (&dta.info, '\0', sizeof (dta.info));
669 dta.entry = entry;
670 dta.decl_address = decl_address;
671 dta.changed = false;
672 dta.reset = false;
674 if (gimple_debug_bind_p (stmt))
676 dta.gsi = NULL;
677 walk_tree (gimple_debug_bind_get_value_ptr (stmt),
678 eliminate_local_variables_1, &dta.info, NULL);
679 if (dta.reset)
681 gimple_debug_bind_reset_value (stmt);
682 dta.changed = true;
685 else if (gimple_clobber_p (stmt))
687 stmt = gimple_build_nop ();
688 gsi_replace (gsi, stmt, false);
689 dta.changed = true;
691 else
693 dta.gsi = gsi;
694 walk_gimple_op (stmt, eliminate_local_variables_1, &dta.info);
697 if (dta.changed)
698 update_stmt (stmt);
701 /* Eliminates the references to local variables from the single entry
702 single exit region between the ENTRY and EXIT edges.
704 This includes:
705 1) Taking address of a local variable -- these are moved out of the
706 region (and temporary variable is created to hold the address if
707 necessary).
709 2) Dereferencing a local variable -- these are replaced with indirect
710 references. */
712 static void
713 eliminate_local_variables (edge entry, edge exit)
715 basic_block bb;
716 vec<basic_block> body;
717 body.create (3);
718 unsigned i;
719 gimple_stmt_iterator gsi;
720 bool has_debug_stmt = false;
721 htab_t decl_address = htab_create (10, int_tree_map_hash, int_tree_map_eq,
722 free);
723 basic_block entry_bb = entry->src;
724 basic_block exit_bb = exit->dest;
726 gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
728 FOR_EACH_VEC_ELT (body, i, bb)
729 if (bb != entry_bb && bb != exit_bb)
730 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
731 if (is_gimple_debug (gsi_stmt (gsi)))
733 if (gimple_debug_bind_p (gsi_stmt (gsi)))
734 has_debug_stmt = true;
736 else
737 eliminate_local_variables_stmt (entry, &gsi, decl_address);
739 if (has_debug_stmt)
740 FOR_EACH_VEC_ELT (body, i, bb)
741 if (bb != entry_bb && bb != exit_bb)
742 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
743 if (gimple_debug_bind_p (gsi_stmt (gsi)))
744 eliminate_local_variables_stmt (entry, &gsi, decl_address);
746 htab_delete (decl_address);
747 body.release ();
750 /* Returns true if expression EXPR is not defined between ENTRY and
751 EXIT, i.e. if all its operands are defined outside of the region. */
753 static bool
754 expr_invariant_in_region_p (edge entry, edge exit, tree expr)
756 basic_block entry_bb = entry->src;
757 basic_block exit_bb = exit->dest;
758 basic_block def_bb;
760 if (is_gimple_min_invariant (expr))
761 return true;
763 if (TREE_CODE (expr) == SSA_NAME)
765 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
766 if (def_bb
767 && dominated_by_p (CDI_DOMINATORS, def_bb, entry_bb)
768 && !dominated_by_p (CDI_DOMINATORS, def_bb, exit_bb))
769 return false;
771 return true;
774 return false;
777 /* If COPY_NAME_P is true, creates and returns a duplicate of NAME.
778 The copies are stored to NAME_COPIES, if NAME was already duplicated,
779 its duplicate stored in NAME_COPIES is returned.
781 Regardless of COPY_NAME_P, the decl used as a base of the ssa name is also
782 duplicated, storing the copies in DECL_COPIES. */
784 static tree
785 separate_decls_in_region_name (tree name,
786 htab_t name_copies, htab_t decl_copies,
787 bool copy_name_p)
789 tree copy, var, var_copy;
790 unsigned idx, uid, nuid;
791 struct int_tree_map ielt, *nielt;
792 struct name_to_copy_elt elt, *nelt;
793 void **slot, **dslot;
795 if (TREE_CODE (name) != SSA_NAME)
796 return name;
798 idx = SSA_NAME_VERSION (name);
799 elt.version = idx;
800 slot = htab_find_slot_with_hash (name_copies, &elt, idx,
801 copy_name_p ? INSERT : NO_INSERT);
802 if (slot && *slot)
803 return ((struct name_to_copy_elt *) *slot)->new_name;
805 if (copy_name_p)
807 copy = duplicate_ssa_name (name, NULL);
808 nelt = XNEW (struct name_to_copy_elt);
809 nelt->version = idx;
810 nelt->new_name = copy;
811 nelt->field = NULL_TREE;
812 *slot = nelt;
814 else
816 gcc_assert (!slot);
817 copy = name;
820 var = SSA_NAME_VAR (name);
821 if (!var)
822 return copy;
824 uid = DECL_UID (var);
825 ielt.uid = uid;
826 dslot = htab_find_slot_with_hash (decl_copies, &ielt, uid, INSERT);
827 if (!*dslot)
829 var_copy = create_tmp_var (TREE_TYPE (var), get_name (var));
830 DECL_GIMPLE_REG_P (var_copy) = DECL_GIMPLE_REG_P (var);
831 nielt = XNEW (struct int_tree_map);
832 nielt->uid = uid;
833 nielt->to = var_copy;
834 *dslot = nielt;
836 /* Ensure that when we meet this decl next time, we won't duplicate
837 it again. */
838 nuid = DECL_UID (var_copy);
839 ielt.uid = nuid;
840 dslot = htab_find_slot_with_hash (decl_copies, &ielt, nuid, INSERT);
841 gcc_assert (!*dslot);
842 nielt = XNEW (struct int_tree_map);
843 nielt->uid = nuid;
844 nielt->to = var_copy;
845 *dslot = nielt;
847 else
848 var_copy = ((struct int_tree_map *) *dslot)->to;
850 replace_ssa_name_symbol (copy, var_copy);
851 return copy;
854 /* Finds the ssa names used in STMT that are defined outside the
855 region between ENTRY and EXIT and replaces such ssa names with
856 their duplicates. The duplicates are stored to NAME_COPIES. Base
857 decls of all ssa names used in STMT (including those defined in
858 LOOP) are replaced with the new temporary variables; the
859 replacement decls are stored in DECL_COPIES. */
861 static void
862 separate_decls_in_region_stmt (edge entry, edge exit, gimple stmt,
863 htab_t name_copies, htab_t decl_copies)
865 use_operand_p use;
866 def_operand_p def;
867 ssa_op_iter oi;
868 tree name, copy;
869 bool copy_name_p;
871 FOR_EACH_PHI_OR_STMT_DEF (def, stmt, oi, SSA_OP_DEF)
873 name = DEF_FROM_PTR (def);
874 gcc_assert (TREE_CODE (name) == SSA_NAME);
875 copy = separate_decls_in_region_name (name, name_copies, decl_copies,
876 false);
877 gcc_assert (copy == name);
880 FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
882 name = USE_FROM_PTR (use);
883 if (TREE_CODE (name) != SSA_NAME)
884 continue;
886 copy_name_p = expr_invariant_in_region_p (entry, exit, name);
887 copy = separate_decls_in_region_name (name, name_copies, decl_copies,
888 copy_name_p);
889 SET_USE (use, copy);
893 /* Finds the ssa names used in STMT that are defined outside the
894 region between ENTRY and EXIT and replaces such ssa names with
895 their duplicates. The duplicates are stored to NAME_COPIES. Base
896 decls of all ssa names used in STMT (including those defined in
897 LOOP) are replaced with the new temporary variables; the
898 replacement decls are stored in DECL_COPIES. */
900 static bool
901 separate_decls_in_region_debug (gimple stmt, htab_t name_copies,
902 htab_t decl_copies)
904 use_operand_p use;
905 ssa_op_iter oi;
906 tree var, name;
907 struct int_tree_map ielt;
908 struct name_to_copy_elt elt;
909 void **slot, **dslot;
911 if (gimple_debug_bind_p (stmt))
912 var = gimple_debug_bind_get_var (stmt);
913 else if (gimple_debug_source_bind_p (stmt))
914 var = gimple_debug_source_bind_get_var (stmt);
915 else
916 return true;
917 if (TREE_CODE (var) == DEBUG_EXPR_DECL || TREE_CODE (var) == LABEL_DECL)
918 return true;
919 gcc_assert (DECL_P (var) && SSA_VAR_P (var));
920 ielt.uid = DECL_UID (var);
921 dslot = htab_find_slot_with_hash (decl_copies, &ielt, ielt.uid, NO_INSERT);
922 if (!dslot)
923 return true;
924 if (gimple_debug_bind_p (stmt))
925 gimple_debug_bind_set_var (stmt, ((struct int_tree_map *) *dslot)->to);
926 else if (gimple_debug_source_bind_p (stmt))
927 gimple_debug_source_bind_set_var (stmt, ((struct int_tree_map *) *dslot)->to);
929 FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
931 name = USE_FROM_PTR (use);
932 if (TREE_CODE (name) != SSA_NAME)
933 continue;
935 elt.version = SSA_NAME_VERSION (name);
936 slot = htab_find_slot_with_hash (name_copies, &elt, elt.version, NO_INSERT);
937 if (!slot)
939 gimple_debug_bind_reset_value (stmt);
940 update_stmt (stmt);
941 break;
944 SET_USE (use, ((struct name_to_copy_elt *) *slot)->new_name);
947 return false;
950 /* Callback for htab_traverse. Adds a field corresponding to the reduction
951 specified in SLOT. The type is passed in DATA. */
953 static int
954 add_field_for_reduction (void **slot, void *data)
957 struct reduction_info *const red = (struct reduction_info *) *slot;
958 tree const type = (tree) data;
959 tree var = gimple_assign_lhs (red->reduc_stmt);
960 tree field = build_decl (gimple_location (red->reduc_stmt), FIELD_DECL,
961 SSA_NAME_IDENTIFIER (var), TREE_TYPE (var));
963 insert_field_into_struct (type, field);
965 red->field = field;
967 return 1;
970 /* Callback for htab_traverse. Adds a field corresponding to a ssa name
971 described in SLOT. The type is passed in DATA. */
973 static int
974 add_field_for_name (void **slot, void *data)
976 struct name_to_copy_elt *const elt = (struct name_to_copy_elt *) *slot;
977 tree type = (tree) data;
978 tree name = ssa_name (elt->version);
979 tree field = build_decl (UNKNOWN_LOCATION,
980 FIELD_DECL, SSA_NAME_IDENTIFIER (name),
981 TREE_TYPE (name));
983 insert_field_into_struct (type, field);
984 elt->field = field;
986 return 1;
989 /* Callback for htab_traverse. A local result is the intermediate result
990 computed by a single
991 thread, or the initial value in case no iteration was executed.
992 This function creates a phi node reflecting these values.
993 The phi's result will be stored in NEW_PHI field of the
994 reduction's data structure. */
996 static int
997 create_phi_for_local_result (void **slot, void *data)
999 struct reduction_info *const reduc = (struct reduction_info *) *slot;
1000 const struct loop *const loop = (const struct loop *) data;
1001 edge e;
1002 gimple new_phi;
1003 basic_block store_bb;
1004 tree local_res;
1005 source_location locus;
1007 /* STORE_BB is the block where the phi
1008 should be stored. It is the destination of the loop exit.
1009 (Find the fallthru edge from GIMPLE_OMP_CONTINUE). */
1010 store_bb = FALLTHRU_EDGE (loop->latch)->dest;
1012 /* STORE_BB has two predecessors. One coming from the loop
1013 (the reduction's result is computed at the loop),
1014 and another coming from a block preceding the loop,
1015 when no iterations
1016 are executed (the initial value should be taken). */
1017 if (EDGE_PRED (store_bb, 0) == FALLTHRU_EDGE (loop->latch))
1018 e = EDGE_PRED (store_bb, 1);
1019 else
1020 e = EDGE_PRED (store_bb, 0);
1021 local_res = copy_ssa_name (gimple_assign_lhs (reduc->reduc_stmt), NULL);
1022 locus = gimple_location (reduc->reduc_stmt);
1023 new_phi = create_phi_node (local_res, store_bb);
1024 add_phi_arg (new_phi, reduc->init, e, locus);
1025 add_phi_arg (new_phi, gimple_assign_lhs (reduc->reduc_stmt),
1026 FALLTHRU_EDGE (loop->latch), locus);
1027 reduc->new_phi = new_phi;
1029 return 1;
1032 struct clsn_data
1034 tree store;
1035 tree load;
1037 basic_block store_bb;
1038 basic_block load_bb;
1041 /* Callback for htab_traverse. Create an atomic instruction for the
1042 reduction described in SLOT.
1043 DATA annotates the place in memory the atomic operation relates to,
1044 and the basic block it needs to be generated in. */
1046 static int
1047 create_call_for_reduction_1 (void **slot, void *data)
1049 struct reduction_info *const reduc = (struct reduction_info *) *slot;
1050 struct clsn_data *const clsn_data = (struct clsn_data *) data;
1051 gimple_stmt_iterator gsi;
1052 tree type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
1053 tree load_struct;
1054 basic_block bb;
1055 basic_block new_bb;
1056 edge e;
1057 tree t, addr, ref, x;
1058 tree tmp_load, name;
1059 gimple load;
1061 load_struct = build_simple_mem_ref (clsn_data->load);
1062 t = build3 (COMPONENT_REF, type, load_struct, reduc->field, NULL_TREE);
1064 addr = build_addr (t, current_function_decl);
1066 /* Create phi node. */
1067 bb = clsn_data->load_bb;
1069 e = split_block (bb, t);
1070 new_bb = e->dest;
1072 tmp_load = create_tmp_var (TREE_TYPE (TREE_TYPE (addr)), NULL);
1073 tmp_load = make_ssa_name (tmp_load, NULL);
1074 load = gimple_build_omp_atomic_load (tmp_load, addr);
1075 SSA_NAME_DEF_STMT (tmp_load) = load;
1076 gsi = gsi_start_bb (new_bb);
1077 gsi_insert_after (&gsi, load, GSI_NEW_STMT);
1079 e = split_block (new_bb, load);
1080 new_bb = e->dest;
1081 gsi = gsi_start_bb (new_bb);
1082 ref = tmp_load;
1083 x = fold_build2 (reduc->reduction_code,
1084 TREE_TYPE (PHI_RESULT (reduc->new_phi)), ref,
1085 PHI_RESULT (reduc->new_phi));
1087 name = force_gimple_operand_gsi (&gsi, x, true, NULL_TREE, true,
1088 GSI_CONTINUE_LINKING);
1090 gsi_insert_after (&gsi, gimple_build_omp_atomic_store (name), GSI_NEW_STMT);
1091 return 1;
1094 /* Create the atomic operation at the join point of the threads.
1095 REDUCTION_LIST describes the reductions in the LOOP.
1096 LD_ST_DATA describes the shared data structure where
1097 shared data is stored in and loaded from. */
1098 static void
1099 create_call_for_reduction (struct loop *loop, htab_t reduction_list,
1100 struct clsn_data *ld_st_data)
1102 htab_traverse (reduction_list, create_phi_for_local_result, loop);
1103 /* Find the fallthru edge from GIMPLE_OMP_CONTINUE. */
1104 ld_st_data->load_bb = FALLTHRU_EDGE (loop->latch)->dest;
1105 htab_traverse (reduction_list, create_call_for_reduction_1, ld_st_data);
1108 /* Callback for htab_traverse. Loads the final reduction value at the
1109 join point of all threads, and inserts it in the right place. */
1111 static int
1112 create_loads_for_reductions (void **slot, void *data)
1114 struct reduction_info *const red = (struct reduction_info *) *slot;
1115 struct clsn_data *const clsn_data = (struct clsn_data *) data;
1116 gimple stmt;
1117 gimple_stmt_iterator gsi;
1118 tree type = TREE_TYPE (gimple_assign_lhs (red->reduc_stmt));
1119 tree load_struct;
1120 tree name;
1121 tree x;
1123 gsi = gsi_after_labels (clsn_data->load_bb);
1124 load_struct = build_simple_mem_ref (clsn_data->load);
1125 load_struct = build3 (COMPONENT_REF, type, load_struct, red->field,
1126 NULL_TREE);
1128 x = load_struct;
1129 name = PHI_RESULT (red->keep_res);
1130 stmt = gimple_build_assign (name, x);
1131 SSA_NAME_DEF_STMT (name) = stmt;
1133 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1135 for (gsi = gsi_start_phis (gimple_bb (red->keep_res));
1136 !gsi_end_p (gsi); gsi_next (&gsi))
1137 if (gsi_stmt (gsi) == red->keep_res)
1139 remove_phi_node (&gsi, false);
1140 return 1;
1142 gcc_unreachable ();
1145 /* Load the reduction result that was stored in LD_ST_DATA.
1146 REDUCTION_LIST describes the list of reductions that the
1147 loads should be generated for. */
1148 static void
1149 create_final_loads_for_reduction (htab_t reduction_list,
1150 struct clsn_data *ld_st_data)
1152 gimple_stmt_iterator gsi;
1153 tree t;
1154 gimple stmt;
1156 gsi = gsi_after_labels (ld_st_data->load_bb);
1157 t = build_fold_addr_expr (ld_st_data->store);
1158 stmt = gimple_build_assign (ld_st_data->load, t);
1160 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
1161 SSA_NAME_DEF_STMT (ld_st_data->load) = stmt;
1163 htab_traverse (reduction_list, create_loads_for_reductions, ld_st_data);
1167 /* Callback for htab_traverse. Store the neutral value for the
1168 particular reduction's operation, e.g. 0 for PLUS_EXPR,
1169 1 for MULT_EXPR, etc. into the reduction field.
1170 The reduction is specified in SLOT. The store information is
1171 passed in DATA. */
1173 static int
1174 create_stores_for_reduction (void **slot, void *data)
1176 struct reduction_info *const red = (struct reduction_info *) *slot;
1177 struct clsn_data *const clsn_data = (struct clsn_data *) data;
1178 tree t;
1179 gimple stmt;
1180 gimple_stmt_iterator gsi;
1181 tree type = TREE_TYPE (gimple_assign_lhs (red->reduc_stmt));
1183 gsi = gsi_last_bb (clsn_data->store_bb);
1184 t = build3 (COMPONENT_REF, type, clsn_data->store, red->field, NULL_TREE);
1185 stmt = gimple_build_assign (t, red->initial_value);
1186 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1188 return 1;
1191 /* Callback for htab_traverse. Creates loads to a field of LOAD in LOAD_BB and
1192 store to a field of STORE in STORE_BB for the ssa name and its duplicate
1193 specified in SLOT. */
1195 static int
1196 create_loads_and_stores_for_name (void **slot, void *data)
1198 struct name_to_copy_elt *const elt = (struct name_to_copy_elt *) *slot;
1199 struct clsn_data *const clsn_data = (struct clsn_data *) data;
1200 tree t;
1201 gimple stmt;
1202 gimple_stmt_iterator gsi;
1203 tree type = TREE_TYPE (elt->new_name);
1204 tree load_struct;
1206 gsi = gsi_last_bb (clsn_data->store_bb);
1207 t = build3 (COMPONENT_REF, type, clsn_data->store, elt->field, NULL_TREE);
1208 stmt = gimple_build_assign (t, ssa_name (elt->version));
1209 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1211 gsi = gsi_last_bb (clsn_data->load_bb);
1212 load_struct = build_simple_mem_ref (clsn_data->load);
1213 t = build3 (COMPONENT_REF, type, load_struct, elt->field, NULL_TREE);
1214 stmt = gimple_build_assign (elt->new_name, t);
1215 SSA_NAME_DEF_STMT (elt->new_name) = stmt;
1216 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1218 return 1;
1221 /* Moves all the variables used in LOOP and defined outside of it (including
1222 the initial values of loop phi nodes, and *PER_THREAD if it is a ssa
1223 name) to a structure created for this purpose. The code
1225 while (1)
1227 use (a);
1228 use (b);
1231 is transformed this way:
1233 bb0:
1234 old.a = a;
1235 old.b = b;
1237 bb1:
1238 a' = new->a;
1239 b' = new->b;
1240 while (1)
1242 use (a');
1243 use (b');
1246 `old' is stored to *ARG_STRUCT and `new' is stored to NEW_ARG_STRUCT. The
1247 pointer `new' is intentionally not initialized (the loop will be split to a
1248 separate function later, and `new' will be initialized from its arguments).
1249 LD_ST_DATA holds information about the shared data structure used to pass
1250 information among the threads. It is initialized here, and
1251 gen_parallel_loop will pass it to create_call_for_reduction that
1252 needs this information. REDUCTION_LIST describes the reductions
1253 in LOOP. */
1255 static void
1256 separate_decls_in_region (edge entry, edge exit, htab_t reduction_list,
1257 tree *arg_struct, tree *new_arg_struct,
1258 struct clsn_data *ld_st_data)
1261 basic_block bb1 = split_edge (entry);
1262 basic_block bb0 = single_pred (bb1);
1263 htab_t name_copies = htab_create (10, name_to_copy_elt_hash,
1264 name_to_copy_elt_eq, free);
1265 htab_t decl_copies = htab_create (10, int_tree_map_hash, int_tree_map_eq,
1266 free);
1267 unsigned i;
1268 tree type, type_name, nvar;
1269 gimple_stmt_iterator gsi;
1270 struct clsn_data clsn_data;
1271 vec<basic_block> body;
1272 body.create (3);
1273 basic_block bb;
1274 basic_block entry_bb = bb1;
1275 basic_block exit_bb = exit->dest;
1276 bool has_debug_stmt = false;
1278 entry = single_succ_edge (entry_bb);
1279 gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
1281 FOR_EACH_VEC_ELT (body, i, bb)
1283 if (bb != entry_bb && bb != exit_bb)
1285 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1286 separate_decls_in_region_stmt (entry, exit, gsi_stmt (gsi),
1287 name_copies, decl_copies);
1289 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1291 gimple stmt = gsi_stmt (gsi);
1293 if (is_gimple_debug (stmt))
1294 has_debug_stmt = true;
1295 else
1296 separate_decls_in_region_stmt (entry, exit, stmt,
1297 name_copies, decl_copies);
1302 /* Now process debug bind stmts. We must not create decls while
1303 processing debug stmts, so we defer their processing so as to
1304 make sure we will have debug info for as many variables as
1305 possible (all of those that were dealt with in the loop above),
1306 and discard those for which we know there's nothing we can
1307 do. */
1308 if (has_debug_stmt)
1309 FOR_EACH_VEC_ELT (body, i, bb)
1310 if (bb != entry_bb && bb != exit_bb)
1312 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
1314 gimple stmt = gsi_stmt (gsi);
1316 if (is_gimple_debug (stmt))
1318 if (separate_decls_in_region_debug (stmt, name_copies,
1319 decl_copies))
1321 gsi_remove (&gsi, true);
1322 continue;
1326 gsi_next (&gsi);
1330 body.release ();
1332 if (htab_elements (name_copies) == 0 && htab_elements (reduction_list) == 0)
1334 /* It may happen that there is nothing to copy (if there are only
1335 loop carried and external variables in the loop). */
1336 *arg_struct = NULL;
1337 *new_arg_struct = NULL;
1339 else
1341 /* Create the type for the structure to store the ssa names to. */
1342 type = lang_hooks.types.make_type (RECORD_TYPE);
1343 type_name = build_decl (UNKNOWN_LOCATION,
1344 TYPE_DECL, create_tmp_var_name (".paral_data"),
1345 type);
1346 TYPE_NAME (type) = type_name;
1348 htab_traverse (name_copies, add_field_for_name, type);
1349 if (reduction_list && htab_elements (reduction_list) > 0)
1351 /* Create the fields for reductions. */
1352 htab_traverse (reduction_list, add_field_for_reduction,
1353 type);
1355 layout_type (type);
1357 /* Create the loads and stores. */
1358 *arg_struct = create_tmp_var (type, ".paral_data_store");
1359 nvar = create_tmp_var (build_pointer_type (type), ".paral_data_load");
1360 *new_arg_struct = make_ssa_name (nvar, NULL);
1362 ld_st_data->store = *arg_struct;
1363 ld_st_data->load = *new_arg_struct;
1364 ld_st_data->store_bb = bb0;
1365 ld_st_data->load_bb = bb1;
1367 htab_traverse (name_copies, create_loads_and_stores_for_name,
1368 ld_st_data);
1370 /* Load the calculation from memory (after the join of the threads). */
1372 if (reduction_list && htab_elements (reduction_list) > 0)
1374 htab_traverse (reduction_list, create_stores_for_reduction,
1375 ld_st_data);
1376 clsn_data.load = make_ssa_name (nvar, NULL);
1377 clsn_data.load_bb = exit->dest;
1378 clsn_data.store = ld_st_data->store;
1379 create_final_loads_for_reduction (reduction_list, &clsn_data);
1383 htab_delete (decl_copies);
1384 htab_delete (name_copies);
1387 /* Bitmap containing uids of functions created by parallelization. We cannot
1388 allocate it from the default obstack, as it must live across compilation
1389 of several functions; we make it gc allocated instead. */
1391 static GTY(()) bitmap parallelized_functions;
1393 /* Returns true if FN was created by create_loop_fn. */
1395 bool
1396 parallelized_function_p (tree fn)
1398 if (!parallelized_functions || !DECL_ARTIFICIAL (fn))
1399 return false;
1401 return bitmap_bit_p (parallelized_functions, DECL_UID (fn));
1404 /* Creates and returns an empty function that will receive the body of
1405 a parallelized loop. */
1407 static tree
1408 create_loop_fn (location_t loc)
1410 char buf[100];
1411 char *tname;
1412 tree decl, type, name, t;
1413 struct function *act_cfun = cfun;
1414 static unsigned loopfn_num;
1416 loc = LOCATION_LOCUS (loc);
1417 snprintf (buf, 100, "%s.$loopfn", current_function_name ());
1418 ASM_FORMAT_PRIVATE_NAME (tname, buf, loopfn_num++);
1419 clean_symbol_name (tname);
1420 name = get_identifier (tname);
1421 type = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
1423 decl = build_decl (loc, FUNCTION_DECL, name, type);
1424 if (!parallelized_functions)
1425 parallelized_functions = BITMAP_GGC_ALLOC ();
1426 bitmap_set_bit (parallelized_functions, DECL_UID (decl));
1428 TREE_STATIC (decl) = 1;
1429 TREE_USED (decl) = 1;
1430 DECL_ARTIFICIAL (decl) = 1;
1431 DECL_IGNORED_P (decl) = 0;
1432 TREE_PUBLIC (decl) = 0;
1433 DECL_UNINLINABLE (decl) = 1;
1434 DECL_EXTERNAL (decl) = 0;
1435 DECL_CONTEXT (decl) = NULL_TREE;
1436 DECL_INITIAL (decl) = make_node (BLOCK);
1438 t = build_decl (loc, RESULT_DECL, NULL_TREE, void_type_node);
1439 DECL_ARTIFICIAL (t) = 1;
1440 DECL_IGNORED_P (t) = 1;
1441 DECL_RESULT (decl) = t;
1443 t = build_decl (loc, PARM_DECL, get_identifier (".paral_data_param"),
1444 ptr_type_node);
1445 DECL_ARTIFICIAL (t) = 1;
1446 DECL_ARG_TYPE (t) = ptr_type_node;
1447 DECL_CONTEXT (t) = decl;
1448 TREE_USED (t) = 1;
1449 DECL_ARGUMENTS (decl) = t;
1451 allocate_struct_function (decl, false);
1453 /* The call to allocate_struct_function clobbers CFUN, so we need to restore
1454 it. */
1455 set_cfun (act_cfun);
1457 return decl;
1460 /* Moves the exit condition of LOOP to the beginning of its header, and
1461 duplicates the part of the last iteration that gets disabled to the
1462 exit of the loop. NIT is the number of iterations of the loop
1463 (used to initialize the variables in the duplicated part).
1465 TODO: the common case is that latch of the loop is empty and immediately
1466 follows the loop exit. In this case, it would be better not to copy the
1467 body of the loop, but only move the entry of the loop directly before the
1468 exit check and increase the number of iterations of the loop by one.
1469 This may need some additional preconditioning in case NIT = ~0.
1470 REDUCTION_LIST describes the reductions in LOOP. */
1472 static void
1473 transform_to_exit_first_loop (struct loop *loop, htab_t reduction_list, tree nit)
1475 basic_block *bbs, *nbbs, ex_bb, orig_header;
1476 unsigned n;
1477 bool ok;
1478 edge exit = single_dom_exit (loop), hpred;
1479 tree control, control_name, res, t;
1480 gimple phi, nphi, cond_stmt, stmt, cond_nit;
1481 gimple_stmt_iterator gsi;
1482 tree nit_1;
1484 split_block_after_labels (loop->header);
1485 orig_header = single_succ (loop->header);
1486 hpred = single_succ_edge (loop->header);
1488 cond_stmt = last_stmt (exit->src);
1489 control = gimple_cond_lhs (cond_stmt);
1490 gcc_assert (gimple_cond_rhs (cond_stmt) == nit);
1492 /* Make sure that we have phi nodes on exit for all loop header phis
1493 (create_parallel_loop requires that). */
1494 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
1496 phi = gsi_stmt (gsi);
1497 res = PHI_RESULT (phi);
1498 t = copy_ssa_name (res, phi);
1499 SET_PHI_RESULT (phi, t);
1500 nphi = create_phi_node (res, orig_header);
1501 add_phi_arg (nphi, t, hpred, UNKNOWN_LOCATION);
1503 if (res == control)
1505 gimple_cond_set_lhs (cond_stmt, t);
1506 update_stmt (cond_stmt);
1507 control = t;
1511 bbs = get_loop_body_in_dom_order (loop);
1513 for (n = 0; bbs[n] != exit->src; n++)
1514 continue;
1515 nbbs = XNEWVEC (basic_block, n);
1516 ok = gimple_duplicate_sese_tail (single_succ_edge (loop->header), exit,
1517 bbs + 1, n, nbbs);
1518 gcc_assert (ok);
1519 free (bbs);
1520 ex_bb = nbbs[0];
1521 free (nbbs);
1523 /* Other than reductions, the only gimple reg that should be copied
1524 out of the loop is the control variable. */
1525 exit = single_dom_exit (loop);
1526 control_name = NULL_TREE;
1527 for (gsi = gsi_start_phis (ex_bb); !gsi_end_p (gsi); )
1529 phi = gsi_stmt (gsi);
1530 res = PHI_RESULT (phi);
1531 if (virtual_operand_p (res))
1533 gsi_next (&gsi);
1534 continue;
1537 /* Check if it is a part of reduction. If it is,
1538 keep the phi at the reduction's keep_res field. The
1539 PHI_RESULT of this phi is the resulting value of the reduction
1540 variable when exiting the loop. */
1542 if (htab_elements (reduction_list) > 0)
1544 struct reduction_info *red;
1546 tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1547 red = reduction_phi (reduction_list, SSA_NAME_DEF_STMT (val));
1548 if (red)
1550 red->keep_res = phi;
1551 gsi_next (&gsi);
1552 continue;
1555 gcc_assert (control_name == NULL_TREE
1556 && SSA_NAME_VAR (res) == SSA_NAME_VAR (control));
1557 control_name = res;
1558 remove_phi_node (&gsi, false);
1560 gcc_assert (control_name != NULL_TREE);
1562 /* Initialize the control variable to number of iterations
1563 according to the rhs of the exit condition. */
1564 gsi = gsi_after_labels (ex_bb);
1565 cond_nit = last_stmt (exit->src);
1566 nit_1 = gimple_cond_rhs (cond_nit);
1567 nit_1 = force_gimple_operand_gsi (&gsi,
1568 fold_convert (TREE_TYPE (control_name), nit_1),
1569 false, NULL_TREE, false, GSI_SAME_STMT);
1570 stmt = gimple_build_assign (control_name, nit_1);
1571 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
1572 SSA_NAME_DEF_STMT (control_name) = stmt;
1575 /* Create the parallel constructs for LOOP as described in gen_parallel_loop.
1576 LOOP_FN and DATA are the arguments of GIMPLE_OMP_PARALLEL.
1577 NEW_DATA is the variable that should be initialized from the argument
1578 of LOOP_FN. N_THREADS is the requested number of threads. Returns the
1579 basic block containing GIMPLE_OMP_PARALLEL tree. */
1581 static basic_block
1582 create_parallel_loop (struct loop *loop, tree loop_fn, tree data,
1583 tree new_data, unsigned n_threads, location_t loc)
1585 gimple_stmt_iterator gsi;
1586 basic_block bb, paral_bb, for_bb, ex_bb;
1587 tree t, param;
1588 gimple stmt, for_stmt, phi, cond_stmt;
1589 tree cvar, cvar_init, initvar, cvar_next, cvar_base, type;
1590 edge exit, nexit, guard, end, e;
1592 /* Prepare the GIMPLE_OMP_PARALLEL statement. */
1593 bb = loop_preheader_edge (loop)->src;
1594 paral_bb = single_pred (bb);
1595 gsi = gsi_last_bb (paral_bb);
1597 t = build_omp_clause (loc, OMP_CLAUSE_NUM_THREADS);
1598 OMP_CLAUSE_NUM_THREADS_EXPR (t)
1599 = build_int_cst (integer_type_node, n_threads);
1600 stmt = gimple_build_omp_parallel (NULL, t, loop_fn, data);
1601 gimple_set_location (stmt, loc);
1603 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1605 /* Initialize NEW_DATA. */
1606 if (data)
1608 gsi = gsi_after_labels (bb);
1610 param = make_ssa_name (DECL_ARGUMENTS (loop_fn), NULL);
1611 stmt = gimple_build_assign (param, build_fold_addr_expr (data));
1612 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1613 SSA_NAME_DEF_STMT (param) = stmt;
1615 stmt = gimple_build_assign (new_data,
1616 fold_convert (TREE_TYPE (new_data), param));
1617 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1618 SSA_NAME_DEF_STMT (new_data) = stmt;
1621 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_PARALLEL. */
1622 bb = split_loop_exit_edge (single_dom_exit (loop));
1623 gsi = gsi_last_bb (bb);
1624 stmt = gimple_build_omp_return (false);
1625 gimple_set_location (stmt, loc);
1626 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1628 /* Extract data for GIMPLE_OMP_FOR. */
1629 gcc_assert (loop->header == single_dom_exit (loop)->src);
1630 cond_stmt = last_stmt (loop->header);
1632 cvar = gimple_cond_lhs (cond_stmt);
1633 cvar_base = SSA_NAME_VAR (cvar);
1634 phi = SSA_NAME_DEF_STMT (cvar);
1635 cvar_init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1636 initvar = copy_ssa_name (cvar, NULL);
1637 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, loop_preheader_edge (loop)),
1638 initvar);
1639 cvar_next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1641 gsi = gsi_last_nondebug_bb (loop->latch);
1642 gcc_assert (gsi_stmt (gsi) == SSA_NAME_DEF_STMT (cvar_next));
1643 gsi_remove (&gsi, true);
1645 /* Prepare cfg. */
1646 for_bb = split_edge (loop_preheader_edge (loop));
1647 ex_bb = split_loop_exit_edge (single_dom_exit (loop));
1648 extract_true_false_edges_from_block (loop->header, &nexit, &exit);
1649 gcc_assert (exit == single_dom_exit (loop));
1651 guard = make_edge (for_bb, ex_bb, 0);
1652 single_succ_edge (loop->latch)->flags = 0;
1653 end = make_edge (loop->latch, ex_bb, EDGE_FALLTHRU);
1654 for (gsi = gsi_start_phis (ex_bb); !gsi_end_p (gsi); gsi_next (&gsi))
1656 source_location locus;
1657 tree def;
1658 phi = gsi_stmt (gsi);
1659 stmt = SSA_NAME_DEF_STMT (PHI_ARG_DEF_FROM_EDGE (phi, exit));
1661 def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_preheader_edge (loop));
1662 locus = gimple_phi_arg_location_from_edge (stmt,
1663 loop_preheader_edge (loop));
1664 add_phi_arg (phi, def, guard, locus);
1666 def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (loop));
1667 locus = gimple_phi_arg_location_from_edge (stmt, loop_latch_edge (loop));
1668 add_phi_arg (phi, def, end, locus);
1670 e = redirect_edge_and_branch (exit, nexit->dest);
1671 PENDING_STMT (e) = NULL;
1673 /* Emit GIMPLE_OMP_FOR. */
1674 gimple_cond_set_lhs (cond_stmt, cvar_base);
1675 type = TREE_TYPE (cvar);
1676 t = build_omp_clause (loc, OMP_CLAUSE_SCHEDULE);
1677 OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_STATIC;
1679 for_stmt = gimple_build_omp_for (NULL, t, 1, NULL);
1680 gimple_set_location (for_stmt, loc);
1681 gimple_omp_for_set_index (for_stmt, 0, initvar);
1682 gimple_omp_for_set_initial (for_stmt, 0, cvar_init);
1683 gimple_omp_for_set_final (for_stmt, 0, gimple_cond_rhs (cond_stmt));
1684 gimple_omp_for_set_cond (for_stmt, 0, gimple_cond_code (cond_stmt));
1685 gimple_omp_for_set_incr (for_stmt, 0, build2 (PLUS_EXPR, type,
1686 cvar_base,
1687 build_int_cst (type, 1)));
1689 gsi = gsi_last_bb (for_bb);
1690 gsi_insert_after (&gsi, for_stmt, GSI_NEW_STMT);
1691 SSA_NAME_DEF_STMT (initvar) = for_stmt;
1693 /* Emit GIMPLE_OMP_CONTINUE. */
1694 gsi = gsi_last_bb (loop->latch);
1695 stmt = gimple_build_omp_continue (cvar_next, cvar);
1696 gimple_set_location (stmt, loc);
1697 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1698 SSA_NAME_DEF_STMT (cvar_next) = stmt;
1700 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_FOR. */
1701 gsi = gsi_last_bb (ex_bb);
1702 stmt = gimple_build_omp_return (true);
1703 gimple_set_location (stmt, loc);
1704 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1706 /* After the above dom info is hosed. Re-compute it. */
1707 free_dominance_info (CDI_DOMINATORS);
1708 calculate_dominance_info (CDI_DOMINATORS);
1710 return paral_bb;
1713 /* Generates code to execute the iterations of LOOP in N_THREADS
1714 threads in parallel.
1716 NITER describes number of iterations of LOOP.
1717 REDUCTION_LIST describes the reductions existent in the LOOP. */
1719 static void
1720 gen_parallel_loop (struct loop *loop, htab_t reduction_list,
1721 unsigned n_threads, struct tree_niter_desc *niter)
1723 loop_iterator li;
1724 tree many_iterations_cond, type, nit;
1725 tree arg_struct, new_arg_struct;
1726 gimple_seq stmts;
1727 basic_block parallel_head;
1728 edge entry, exit;
1729 struct clsn_data clsn_data;
1730 unsigned prob;
1731 location_t loc;
1732 gimple cond_stmt;
1733 unsigned int m_p_thread=2;
1735 /* From
1737 ---------------------------------------------------------------------
1738 loop
1740 IV = phi (INIT, IV + STEP)
1741 BODY1;
1742 if (COND)
1743 break;
1744 BODY2;
1746 ---------------------------------------------------------------------
1748 with # of iterations NITER (possibly with MAY_BE_ZERO assumption),
1749 we generate the following code:
1751 ---------------------------------------------------------------------
1753 if (MAY_BE_ZERO
1754 || NITER < MIN_PER_THREAD * N_THREADS)
1755 goto original;
1757 BODY1;
1758 store all local loop-invariant variables used in body of the loop to DATA.
1759 GIMPLE_OMP_PARALLEL (OMP_CLAUSE_NUM_THREADS (N_THREADS), LOOPFN, DATA);
1760 load the variables from DATA.
1761 GIMPLE_OMP_FOR (IV = INIT; COND; IV += STEP) (OMP_CLAUSE_SCHEDULE (static))
1762 BODY2;
1763 BODY1;
1764 GIMPLE_OMP_CONTINUE;
1765 GIMPLE_OMP_RETURN -- GIMPLE_OMP_FOR
1766 GIMPLE_OMP_RETURN -- GIMPLE_OMP_PARALLEL
1767 goto end;
1769 original:
1770 loop
1772 IV = phi (INIT, IV + STEP)
1773 BODY1;
1774 if (COND)
1775 break;
1776 BODY2;
1779 end:
1783 /* Create two versions of the loop -- in the old one, we know that the
1784 number of iterations is large enough, and we will transform it into the
1785 loop that will be split to loop_fn, the new one will be used for the
1786 remaining iterations. */
1788 /* We should compute a better number-of-iterations value for outer loops.
1789 That is, if we have
1791 for (i = 0; i < n; ++i)
1792 for (j = 0; j < m; ++j)
1795 we should compute nit = n * m, not nit = n.
1796 Also may_be_zero handling would need to be adjusted. */
1798 type = TREE_TYPE (niter->niter);
1799 nit = force_gimple_operand (unshare_expr (niter->niter), &stmts, true,
1800 NULL_TREE);
1801 if (stmts)
1802 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1804 if (loop->inner)
1805 m_p_thread=2;
1806 else
1807 m_p_thread=MIN_PER_THREAD;
1809 many_iterations_cond =
1810 fold_build2 (GE_EXPR, boolean_type_node,
1811 nit, build_int_cst (type, m_p_thread * n_threads));
1813 many_iterations_cond
1814 = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1815 invert_truthvalue (unshare_expr (niter->may_be_zero)),
1816 many_iterations_cond);
1817 many_iterations_cond
1818 = force_gimple_operand (many_iterations_cond, &stmts, false, NULL_TREE);
1819 if (stmts)
1820 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1821 if (!is_gimple_condexpr (many_iterations_cond))
1823 many_iterations_cond
1824 = force_gimple_operand (many_iterations_cond, &stmts,
1825 true, NULL_TREE);
1826 if (stmts)
1827 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1830 initialize_original_copy_tables ();
1832 /* We assume that the loop usually iterates a lot. */
1833 prob = 4 * REG_BR_PROB_BASE / 5;
1834 loop_version (loop, many_iterations_cond, NULL,
1835 prob, prob, REG_BR_PROB_BASE - prob, true);
1836 update_ssa (TODO_update_ssa);
1837 free_original_copy_tables ();
1839 /* Base all the induction variables in LOOP on a single control one. */
1840 canonicalize_loop_ivs (loop, &nit, true);
1842 /* Ensure that the exit condition is the first statement in the loop. */
1843 transform_to_exit_first_loop (loop, reduction_list, nit);
1845 /* Generate initializations for reductions. */
1846 if (htab_elements (reduction_list) > 0)
1847 htab_traverse (reduction_list, initialize_reductions, loop);
1849 /* Eliminate the references to local variables from the loop. */
1850 gcc_assert (single_exit (loop));
1851 entry = loop_preheader_edge (loop);
1852 exit = single_dom_exit (loop);
1854 eliminate_local_variables (entry, exit);
1855 /* In the old loop, move all variables non-local to the loop to a structure
1856 and back, and create separate decls for the variables used in loop. */
1857 separate_decls_in_region (entry, exit, reduction_list, &arg_struct,
1858 &new_arg_struct, &clsn_data);
1860 /* Create the parallel constructs. */
1861 loc = UNKNOWN_LOCATION;
1862 cond_stmt = last_stmt (loop->header);
1863 if (cond_stmt)
1864 loc = gimple_location (cond_stmt);
1865 parallel_head = create_parallel_loop (loop, create_loop_fn (loc), arg_struct,
1866 new_arg_struct, n_threads, loc);
1867 if (htab_elements (reduction_list) > 0)
1868 create_call_for_reduction (loop, reduction_list, &clsn_data);
1870 scev_reset ();
1872 /* Cancel the loop (it is simpler to do it here rather than to teach the
1873 expander to do it). */
1874 cancel_loop_tree (loop);
1876 /* Free loop bound estimations that could contain references to
1877 removed statements. */
1878 FOR_EACH_LOOP (li, loop, 0)
1879 free_numbers_of_iterations_estimates_loop (loop);
1881 /* Expand the parallel constructs. We do it directly here instead of running
1882 a separate expand_omp pass, since it is more efficient, and less likely to
1883 cause troubles with further analyses not being able to deal with the
1884 OMP trees. */
1886 omp_expand_local (parallel_head);
1889 /* Returns true when LOOP contains vector phi nodes. */
1891 static bool
1892 loop_has_vector_phi_nodes (struct loop *loop ATTRIBUTE_UNUSED)
1894 unsigned i;
1895 basic_block *bbs = get_loop_body_in_dom_order (loop);
1896 gimple_stmt_iterator gsi;
1897 bool res = true;
1899 for (i = 0; i < loop->num_nodes; i++)
1900 for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
1901 if (TREE_CODE (TREE_TYPE (PHI_RESULT (gsi_stmt (gsi)))) == VECTOR_TYPE)
1902 goto end;
1904 res = false;
1905 end:
1906 free (bbs);
1907 return res;
1910 /* Create a reduction_info struct, initialize it with REDUC_STMT
1911 and PHI, insert it to the REDUCTION_LIST. */
1913 static void
1914 build_new_reduction (htab_t reduction_list, gimple reduc_stmt, gimple phi)
1916 PTR *slot;
1917 struct reduction_info *new_reduction;
1919 gcc_assert (reduc_stmt);
1921 if (dump_file && (dump_flags & TDF_DETAILS))
1923 fprintf (dump_file,
1924 "Detected reduction. reduction stmt is: \n");
1925 print_gimple_stmt (dump_file, reduc_stmt, 0, 0);
1926 fprintf (dump_file, "\n");
1929 new_reduction = XCNEW (struct reduction_info);
1931 new_reduction->reduc_stmt = reduc_stmt;
1932 new_reduction->reduc_phi = phi;
1933 new_reduction->reduc_version = SSA_NAME_VERSION (gimple_phi_result (phi));
1934 new_reduction->reduction_code = gimple_assign_rhs_code (reduc_stmt);
1935 slot = htab_find_slot (reduction_list, new_reduction, INSERT);
1936 *slot = new_reduction;
1939 /* Callback for htab_traverse. Sets gimple_uid of reduc_phi stmts. */
1941 static int
1942 set_reduc_phi_uids (void **slot, void *data ATTRIBUTE_UNUSED)
1944 struct reduction_info *const red = (struct reduction_info *) *slot;
1945 gimple_set_uid (red->reduc_phi, red->reduc_version);
1946 return 1;
1949 /* Detect all reductions in the LOOP, insert them into REDUCTION_LIST. */
1951 static void
1952 gather_scalar_reductions (loop_p loop, htab_t reduction_list)
1954 gimple_stmt_iterator gsi;
1955 loop_vec_info simple_loop_info;
1957 simple_loop_info = vect_analyze_loop_form (loop);
1959 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
1961 gimple phi = gsi_stmt (gsi);
1962 affine_iv iv;
1963 tree res = PHI_RESULT (phi);
1964 bool double_reduc;
1966 if (virtual_operand_p (res))
1967 continue;
1969 if (!simple_iv (loop, loop, res, &iv, true)
1970 && simple_loop_info)
1972 gimple reduc_stmt = vect_force_simple_reduction (simple_loop_info,
1973 phi, true,
1974 &double_reduc);
1975 if (reduc_stmt && !double_reduc)
1976 build_new_reduction (reduction_list, reduc_stmt, phi);
1979 destroy_loop_vec_info (simple_loop_info, true);
1981 /* As gimple_uid is used by the vectorizer in between vect_analyze_loop_form
1982 and destroy_loop_vec_info, we can set gimple_uid of reduc_phi stmts
1983 only now. */
1984 htab_traverse (reduction_list, set_reduc_phi_uids, NULL);
1987 /* Try to initialize NITER for code generation part. */
1989 static bool
1990 try_get_loop_niter (loop_p loop, struct tree_niter_desc *niter)
1992 edge exit = single_dom_exit (loop);
1994 gcc_assert (exit);
1996 /* We need to know # of iterations, and there should be no uses of values
1997 defined inside loop outside of it, unless the values are invariants of
1998 the loop. */
1999 if (!number_of_iterations_exit (loop, exit, niter, false))
2001 if (dump_file && (dump_flags & TDF_DETAILS))
2002 fprintf (dump_file, " FAILED: number of iterations not known\n");
2003 return false;
2006 return true;
2009 /* Try to initialize REDUCTION_LIST for code generation part.
2010 REDUCTION_LIST describes the reductions. */
2012 static bool
2013 try_create_reduction_list (loop_p loop, htab_t reduction_list)
2015 edge exit = single_dom_exit (loop);
2016 gimple_stmt_iterator gsi;
2018 gcc_assert (exit);
2020 gather_scalar_reductions (loop, reduction_list);
2023 for (gsi = gsi_start_phis (exit->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2025 gimple phi = gsi_stmt (gsi);
2026 struct reduction_info *red;
2027 imm_use_iterator imm_iter;
2028 use_operand_p use_p;
2029 gimple reduc_phi;
2030 tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2032 if (!virtual_operand_p (val))
2034 if (dump_file && (dump_flags & TDF_DETAILS))
2036 fprintf (dump_file, "phi is ");
2037 print_gimple_stmt (dump_file, phi, 0, 0);
2038 fprintf (dump_file, "arg of phi to exit: value ");
2039 print_generic_expr (dump_file, val, 0);
2040 fprintf (dump_file, " used outside loop\n");
2041 fprintf (dump_file,
2042 " checking if it a part of reduction pattern: \n");
2044 if (htab_elements (reduction_list) == 0)
2046 if (dump_file && (dump_flags & TDF_DETAILS))
2047 fprintf (dump_file,
2048 " FAILED: it is not a part of reduction.\n");
2049 return false;
2051 reduc_phi = NULL;
2052 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, val)
2054 if (!gimple_debug_bind_p (USE_STMT (use_p))
2055 && flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
2057 reduc_phi = USE_STMT (use_p);
2058 break;
2061 red = reduction_phi (reduction_list, reduc_phi);
2062 if (red == NULL)
2064 if (dump_file && (dump_flags & TDF_DETAILS))
2065 fprintf (dump_file,
2066 " FAILED: it is not a part of reduction.\n");
2067 return false;
2069 if (dump_file && (dump_flags & TDF_DETAILS))
2071 fprintf (dump_file, "reduction phi is ");
2072 print_gimple_stmt (dump_file, red->reduc_phi, 0, 0);
2073 fprintf (dump_file, "reduction stmt is ");
2074 print_gimple_stmt (dump_file, red->reduc_stmt, 0, 0);
2079 /* The iterations of the loop may communicate only through bivs whose
2080 iteration space can be distributed efficiently. */
2081 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
2083 gimple phi = gsi_stmt (gsi);
2084 tree def = PHI_RESULT (phi);
2085 affine_iv iv;
2087 if (!virtual_operand_p (def) && !simple_iv (loop, loop, def, &iv, true))
2089 struct reduction_info *red;
2091 red = reduction_phi (reduction_list, phi);
2092 if (red == NULL)
2094 if (dump_file && (dump_flags & TDF_DETAILS))
2095 fprintf (dump_file,
2096 " FAILED: scalar dependency between iterations\n");
2097 return false;
2103 return true;
2106 /* Detect parallel loops and generate parallel code using libgomp
2107 primitives. Returns true if some loop was parallelized, false
2108 otherwise. */
2110 bool
2111 parallelize_loops (void)
2113 unsigned n_threads = flag_tree_parallelize_loops;
2114 bool changed = false;
2115 struct loop *loop;
2116 struct tree_niter_desc niter_desc;
2117 loop_iterator li;
2118 htab_t reduction_list;
2119 struct obstack parloop_obstack;
2120 HOST_WIDE_INT estimated;
2121 LOC loop_loc;
2123 /* Do not parallelize loops in the functions created by parallelization. */
2124 if (parallelized_function_p (cfun->decl))
2125 return false;
2126 if (cfun->has_nonlocal_label)
2127 return false;
2129 gcc_obstack_init (&parloop_obstack);
2130 reduction_list = htab_create (10, reduction_info_hash,
2131 reduction_info_eq, free);
2132 init_stmt_vec_info_vec ();
2134 FOR_EACH_LOOP (li, loop, 0)
2136 htab_empty (reduction_list);
2137 if (dump_file && (dump_flags & TDF_DETAILS))
2139 fprintf (dump_file, "Trying loop %d as candidate\n",loop->num);
2140 if (loop->inner)
2141 fprintf (dump_file, "loop %d is not innermost\n",loop->num);
2142 else
2143 fprintf (dump_file, "loop %d is innermost\n",loop->num);
2146 /* If we use autopar in graphite pass, we use its marked dependency
2147 checking results. */
2148 if (flag_loop_parallelize_all && !loop->can_be_parallel)
2150 if (dump_file && (dump_flags & TDF_DETAILS))
2151 fprintf (dump_file, "loop is not parallel according to graphite\n");
2152 continue;
2155 if (!single_dom_exit (loop))
2158 if (dump_file && (dump_flags & TDF_DETAILS))
2159 fprintf (dump_file, "loop is !single_dom_exit\n");
2161 continue;
2164 if (/* And of course, the loop must be parallelizable. */
2165 !can_duplicate_loop_p (loop)
2166 || loop_has_blocks_with_irreducible_flag (loop)
2167 || (loop_preheader_edge (loop)->src->flags & BB_IRREDUCIBLE_LOOP)
2168 /* FIXME: the check for vector phi nodes could be removed. */
2169 || loop_has_vector_phi_nodes (loop))
2170 continue;
2172 estimated = estimated_stmt_executions_int (loop);
2173 if (estimated == -1)
2174 estimated = max_stmt_executions_int (loop);
2175 /* FIXME: Bypass this check as graphite doesn't update the
2176 count and frequency correctly now. */
2177 if (!flag_loop_parallelize_all
2178 && ((estimated != -1
2179 && estimated <= (HOST_WIDE_INT) n_threads * MIN_PER_THREAD)
2180 /* Do not bother with loops in cold areas. */
2181 || optimize_loop_nest_for_size_p (loop)))
2182 continue;
2184 if (!try_get_loop_niter (loop, &niter_desc))
2185 continue;
2187 if (!try_create_reduction_list (loop, reduction_list))
2188 continue;
2190 if (!flag_loop_parallelize_all
2191 && !loop_parallel_p (loop, &parloop_obstack))
2192 continue;
2194 changed = true;
2195 if (dump_file && (dump_flags & TDF_DETAILS))
2197 if (loop->inner)
2198 fprintf (dump_file, "parallelizing outer loop %d\n",loop->header->index);
2199 else
2200 fprintf (dump_file, "parallelizing inner loop %d\n",loop->header->index);
2201 loop_loc = find_loop_location (loop);
2202 if (loop_loc != UNKNOWN_LOC)
2203 fprintf (dump_file, "\nloop at %s:%d: ",
2204 LOC_FILE (loop_loc), LOC_LINE (loop_loc));
2206 gen_parallel_loop (loop, reduction_list,
2207 n_threads, &niter_desc);
2208 #ifdef ENABLE_CHECKING
2209 verify_flow_info ();
2210 verify_loop_structure ();
2211 verify_loop_closed_ssa (true);
2212 #endif
2215 free_stmt_vec_info_vec ();
2216 htab_delete (reduction_list);
2217 obstack_free (&parloop_obstack, NULL);
2219 /* Parallelization will cause new function calls to be inserted through
2220 which local variables will escape. Reset the points-to solution
2221 for ESCAPED. */
2222 if (changed)
2223 pt_solution_reset (&cfun->gimple_df->escaped);
2225 return changed;
2228 #include "gt-tree-parloops.h"