gcc/ChangeLog
[official-gcc.git] / gcc / tree-parloops.c
blobc164121fdb4dd74280da0a28b465bdbad7a62982
1 /* Loop autoparallelization.
2 Copyright (C) 2006-2015 Free Software Foundation, Inc.
3 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
4 Zdenek Dvorak <dvorakz@suse.cz> and Razya Ladelsky <razya@il.ibm.com>.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "alias.h"
26 #include "backend.h"
27 #include "cfghooks.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "hard-reg-set.h"
31 #include "ssa.h"
32 #include "options.h"
33 #include "fold-const.h"
34 #include "internal-fn.h"
35 #include "gimplify.h"
36 #include "gimple-iterator.h"
37 #include "gimplify-me.h"
38 #include "gimple-walk.h"
39 #include "stor-layout.h"
40 #include "tree-nested.h"
41 #include "tree-cfg.h"
42 #include "tree-ssa-loop-ivopts.h"
43 #include "tree-ssa-loop-manip.h"
44 #include "tree-ssa-loop-niter.h"
45 #include "tree-ssa-loop.h"
46 #include "tree-into-ssa.h"
47 #include "cfgloop.h"
48 #include "tree-data-ref.h"
49 #include "tree-scalar-evolution.h"
50 #include "gimple-pretty-print.h"
51 #include "tree-pass.h"
52 #include "langhooks.h"
53 #include "tree-vectorizer.h"
54 #include "tree-hasher.h"
55 #include "tree-parloops.h"
56 #include "omp-low.h"
57 #include "tree-nested.h"
58 #include "cgraph.h"
59 #include "tree-ssa.h"
60 #include "params.h"
62 /* This pass tries to distribute iterations of loops into several threads.
63 The implementation is straightforward -- for each loop we test whether its
64 iterations are independent, and if it is the case (and some additional
65 conditions regarding profitability and correctness are satisfied), we
66 add GIMPLE_OMP_PARALLEL and GIMPLE_OMP_FOR codes and let omp expansion
67 machinery do its job.
69 The most of the complexity is in bringing the code into shape expected
70 by the omp expanders:
71 -- for GIMPLE_OMP_FOR, ensuring that the loop has only one induction
72 variable and that the exit test is at the start of the loop body
73 -- for GIMPLE_OMP_PARALLEL, replacing the references to local addressable
74 variables by accesses through pointers, and breaking up ssa chains
75 by storing the values incoming to the parallelized loop to a structure
76 passed to the new function as an argument (something similar is done
77 in omp gimplification, unfortunately only a small part of the code
78 can be shared).
80 TODO:
81 -- if there are several parallelizable loops in a function, it may be
82 possible to generate the threads just once (using synchronization to
83 ensure that cross-loop dependences are obeyed).
84 -- handling of common reduction patterns for outer loops.
86 More info can also be found at http://gcc.gnu.org/wiki/AutoParInGCC */
88 Reduction handling:
89 currently we use vect_force_simple_reduction() to detect reduction patterns.
90 The code transformation will be introduced by an example.
93 parloop
95 int sum=1;
97 for (i = 0; i < N; i++)
99 x[i] = i + 3;
100 sum+=x[i];
104 gimple-like code:
105 header_bb:
107 # sum_29 = PHI <sum_11(5), 1(3)>
108 # i_28 = PHI <i_12(5), 0(3)>
109 D.1795_8 = i_28 + 3;
110 x[i_28] = D.1795_8;
111 sum_11 = D.1795_8 + sum_29;
112 i_12 = i_28 + 1;
113 if (N_6(D) > i_12)
114 goto header_bb;
117 exit_bb:
119 # sum_21 = PHI <sum_11(4)>
120 printf (&"%d"[0], sum_21);
123 after reduction transformation (only relevant parts):
125 parloop
128 ....
131 # Storing the initial value given by the user. #
133 .paral_data_store.32.sum.27 = 1;
135 #pragma omp parallel num_threads(4)
137 #pragma omp for schedule(static)
139 # The neutral element corresponding to the particular
140 reduction's operation, e.g. 0 for PLUS_EXPR,
141 1 for MULT_EXPR, etc. replaces the user's initial value. #
143 # sum.27_29 = PHI <sum.27_11, 0>
145 sum.27_11 = D.1827_8 + sum.27_29;
147 GIMPLE_OMP_CONTINUE
149 # Adding this reduction phi is done at create_phi_for_local_result() #
150 # sum.27_56 = PHI <sum.27_11, 0>
151 GIMPLE_OMP_RETURN
153 # Creating the atomic operation is done at
154 create_call_for_reduction_1() #
156 #pragma omp atomic_load
157 D.1839_59 = *&.paral_data_load.33_51->reduction.23;
158 D.1840_60 = sum.27_56 + D.1839_59;
159 #pragma omp atomic_store (D.1840_60);
161 GIMPLE_OMP_RETURN
163 # collecting the result after the join of the threads is done at
164 create_loads_for_reductions().
165 The value computed by the threads is loaded from the
166 shared struct. #
169 .paral_data_load.33_52 = &.paral_data_store.32;
170 sum_37 = .paral_data_load.33_52->sum.27;
171 sum_43 = D.1795_41 + sum_37;
173 exit bb:
174 # sum_21 = PHI <sum_43, sum_26>
175 printf (&"%d"[0], sum_21);
183 /* Minimal number of iterations of a loop that should be executed in each
184 thread. */
185 #define MIN_PER_THREAD 100
187 /* Element of the hashtable, representing a
188 reduction in the current loop. */
189 struct reduction_info
191 gimple reduc_stmt; /* reduction statement. */
192 gimple reduc_phi; /* The phi node defining the reduction. */
193 enum tree_code reduction_code;/* code for the reduction operation. */
194 unsigned reduc_version; /* SSA_NAME_VERSION of original reduc_phi
195 result. */
196 gphi *keep_res; /* The PHI_RESULT of this phi is the resulting value
197 of the reduction variable when existing the loop. */
198 tree initial_value; /* The initial value of the reduction var before entering the loop. */
199 tree field; /* the name of the field in the parloop data structure intended for reduction. */
200 tree init; /* reduction initialization value. */
201 gphi *new_phi; /* (helper field) Newly created phi node whose result
202 will be passed to the atomic operation. Represents
203 the local result each thread computed for the reduction
204 operation. */
207 /* Reduction info hashtable helpers. */
209 struct reduction_hasher : free_ptr_hash <reduction_info>
211 static inline hashval_t hash (const reduction_info *);
212 static inline bool equal (const reduction_info *, const reduction_info *);
215 /* Equality and hash functions for hashtab code. */
217 inline bool
218 reduction_hasher::equal (const reduction_info *a, const reduction_info *b)
220 return (a->reduc_phi == b->reduc_phi);
223 inline hashval_t
224 reduction_hasher::hash (const reduction_info *a)
226 return a->reduc_version;
229 typedef hash_table<reduction_hasher> reduction_info_table_type;
232 static struct reduction_info *
233 reduction_phi (reduction_info_table_type *reduction_list, gimple phi)
235 struct reduction_info tmpred, *red;
237 if (reduction_list->elements () == 0 || phi == NULL)
238 return NULL;
240 tmpred.reduc_phi = phi;
241 tmpred.reduc_version = gimple_uid (phi);
242 red = reduction_list->find (&tmpred);
244 return red;
247 /* Element of hashtable of names to copy. */
249 struct name_to_copy_elt
251 unsigned version; /* The version of the name to copy. */
252 tree new_name; /* The new name used in the copy. */
253 tree field; /* The field of the structure used to pass the
254 value. */
257 /* Name copies hashtable helpers. */
259 struct name_to_copy_hasher : free_ptr_hash <name_to_copy_elt>
261 static inline hashval_t hash (const name_to_copy_elt *);
262 static inline bool equal (const name_to_copy_elt *, const name_to_copy_elt *);
265 /* Equality and hash functions for hashtab code. */
267 inline bool
268 name_to_copy_hasher::equal (const name_to_copy_elt *a, const name_to_copy_elt *b)
270 return a->version == b->version;
273 inline hashval_t
274 name_to_copy_hasher::hash (const name_to_copy_elt *a)
276 return (hashval_t) a->version;
279 typedef hash_table<name_to_copy_hasher> name_to_copy_table_type;
281 /* A transformation matrix, which is a self-contained ROWSIZE x COLSIZE
282 matrix. Rather than use floats, we simply keep a single DENOMINATOR that
283 represents the denominator for every element in the matrix. */
284 typedef struct lambda_trans_matrix_s
286 lambda_matrix matrix;
287 int rowsize;
288 int colsize;
289 int denominator;
290 } *lambda_trans_matrix;
291 #define LTM_MATRIX(T) ((T)->matrix)
292 #define LTM_ROWSIZE(T) ((T)->rowsize)
293 #define LTM_COLSIZE(T) ((T)->colsize)
294 #define LTM_DENOMINATOR(T) ((T)->denominator)
296 /* Allocate a new transformation matrix. */
298 static lambda_trans_matrix
299 lambda_trans_matrix_new (int colsize, int rowsize,
300 struct obstack * lambda_obstack)
302 lambda_trans_matrix ret;
304 ret = (lambda_trans_matrix)
305 obstack_alloc (lambda_obstack, sizeof (struct lambda_trans_matrix_s));
306 LTM_MATRIX (ret) = lambda_matrix_new (rowsize, colsize, lambda_obstack);
307 LTM_ROWSIZE (ret) = rowsize;
308 LTM_COLSIZE (ret) = colsize;
309 LTM_DENOMINATOR (ret) = 1;
310 return ret;
313 /* Multiply a vector VEC by a matrix MAT.
314 MAT is an M*N matrix, and VEC is a vector with length N. The result
315 is stored in DEST which must be a vector of length M. */
317 static void
318 lambda_matrix_vector_mult (lambda_matrix matrix, int m, int n,
319 lambda_vector vec, lambda_vector dest)
321 int i, j;
323 lambda_vector_clear (dest, m);
324 for (i = 0; i < m; i++)
325 for (j = 0; j < n; j++)
326 dest[i] += matrix[i][j] * vec[j];
329 /* Return true if TRANS is a legal transformation matrix that respects
330 the dependence vectors in DISTS and DIRS. The conservative answer
331 is false.
333 "Wolfe proves that a unimodular transformation represented by the
334 matrix T is legal when applied to a loop nest with a set of
335 lexicographically non-negative distance vectors RDG if and only if
336 for each vector d in RDG, (T.d >= 0) is lexicographically positive.
337 i.e.: if and only if it transforms the lexicographically positive
338 distance vectors to lexicographically positive vectors. Note that
339 a unimodular matrix must transform the zero vector (and only it) to
340 the zero vector." S.Muchnick. */
342 static bool
343 lambda_transform_legal_p (lambda_trans_matrix trans,
344 int nb_loops,
345 vec<ddr_p> dependence_relations)
347 unsigned int i, j;
348 lambda_vector distres;
349 struct data_dependence_relation *ddr;
351 gcc_assert (LTM_COLSIZE (trans) == nb_loops
352 && LTM_ROWSIZE (trans) == nb_loops);
354 /* When there are no dependences, the transformation is correct. */
355 if (dependence_relations.length () == 0)
356 return true;
358 ddr = dependence_relations[0];
359 if (ddr == NULL)
360 return true;
362 /* When there is an unknown relation in the dependence_relations, we
363 know that it is no worth looking at this loop nest: give up. */
364 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
365 return false;
367 distres = lambda_vector_new (nb_loops);
369 /* For each distance vector in the dependence graph. */
370 FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
372 /* Don't care about relations for which we know that there is no
373 dependence, nor about read-read (aka. output-dependences):
374 these data accesses can happen in any order. */
375 if (DDR_ARE_DEPENDENT (ddr) == chrec_known
376 || (DR_IS_READ (DDR_A (ddr)) && DR_IS_READ (DDR_B (ddr))))
377 continue;
379 /* Conservatively answer: "this transformation is not valid". */
380 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
381 return false;
383 /* If the dependence could not be captured by a distance vector,
384 conservatively answer that the transform is not valid. */
385 if (DDR_NUM_DIST_VECTS (ddr) == 0)
386 return false;
388 /* Compute trans.dist_vect */
389 for (j = 0; j < DDR_NUM_DIST_VECTS (ddr); j++)
391 lambda_matrix_vector_mult (LTM_MATRIX (trans), nb_loops, nb_loops,
392 DDR_DIST_VECT (ddr, j), distres);
394 if (!lambda_vector_lexico_pos (distres, nb_loops))
395 return false;
398 return true;
401 /* Data dependency analysis. Returns true if the iterations of LOOP
402 are independent on each other (that is, if we can execute them
403 in parallel). */
405 static bool
406 loop_parallel_p (struct loop *loop, struct obstack * parloop_obstack)
408 vec<ddr_p> dependence_relations;
409 vec<data_reference_p> datarefs;
410 lambda_trans_matrix trans;
411 bool ret = false;
413 if (dump_file && (dump_flags & TDF_DETAILS))
415 fprintf (dump_file, "Considering loop %d\n", loop->num);
416 if (!loop->inner)
417 fprintf (dump_file, "loop is innermost\n");
418 else
419 fprintf (dump_file, "loop NOT innermost\n");
422 /* Check for problems with dependences. If the loop can be reversed,
423 the iterations are independent. */
424 auto_vec<loop_p, 3> loop_nest;
425 datarefs.create (10);
426 dependence_relations.create (100);
427 if (! compute_data_dependences_for_loop (loop, true, &loop_nest, &datarefs,
428 &dependence_relations))
430 if (dump_file && (dump_flags & TDF_DETAILS))
431 fprintf (dump_file, " FAILED: cannot analyze data dependencies\n");
432 ret = false;
433 goto end;
435 if (dump_file && (dump_flags & TDF_DETAILS))
436 dump_data_dependence_relations (dump_file, dependence_relations);
438 trans = lambda_trans_matrix_new (1, 1, parloop_obstack);
439 LTM_MATRIX (trans)[0][0] = -1;
441 if (lambda_transform_legal_p (trans, 1, dependence_relations))
443 ret = true;
444 if (dump_file && (dump_flags & TDF_DETAILS))
445 fprintf (dump_file, " SUCCESS: may be parallelized\n");
447 else if (dump_file && (dump_flags & TDF_DETAILS))
448 fprintf (dump_file,
449 " FAILED: data dependencies exist across iterations\n");
451 end:
452 free_dependence_relations (dependence_relations);
453 free_data_refs (datarefs);
455 return ret;
458 /* Return true when LOOP contains basic blocks marked with the
459 BB_IRREDUCIBLE_LOOP flag. */
461 static inline bool
462 loop_has_blocks_with_irreducible_flag (struct loop *loop)
464 unsigned i;
465 basic_block *bbs = get_loop_body_in_dom_order (loop);
466 bool res = true;
468 for (i = 0; i < loop->num_nodes; i++)
469 if (bbs[i]->flags & BB_IRREDUCIBLE_LOOP)
470 goto end;
472 res = false;
473 end:
474 free (bbs);
475 return res;
478 /* Assigns the address of OBJ in TYPE to an ssa name, and returns this name.
479 The assignment statement is placed on edge ENTRY. DECL_ADDRESS maps decls
480 to their addresses that can be reused. The address of OBJ is known to
481 be invariant in the whole function. Other needed statements are placed
482 right before GSI. */
484 static tree
485 take_address_of (tree obj, tree type, edge entry,
486 int_tree_htab_type *decl_address, gimple_stmt_iterator *gsi)
488 int uid;
489 tree *var_p, name, addr;
490 gassign *stmt;
491 gimple_seq stmts;
493 /* Since the address of OBJ is invariant, the trees may be shared.
494 Avoid rewriting unrelated parts of the code. */
495 obj = unshare_expr (obj);
496 for (var_p = &obj;
497 handled_component_p (*var_p);
498 var_p = &TREE_OPERAND (*var_p, 0))
499 continue;
501 /* Canonicalize the access to base on a MEM_REF. */
502 if (DECL_P (*var_p))
503 *var_p = build_simple_mem_ref (build_fold_addr_expr (*var_p));
505 /* Assign a canonical SSA name to the address of the base decl used
506 in the address and share it for all accesses and addresses based
507 on it. */
508 uid = DECL_UID (TREE_OPERAND (TREE_OPERAND (*var_p, 0), 0));
509 int_tree_map elt;
510 elt.uid = uid;
511 int_tree_map *slot = decl_address->find_slot (elt, INSERT);
512 if (!slot->to)
514 if (gsi == NULL)
515 return NULL;
516 addr = TREE_OPERAND (*var_p, 0);
517 const char *obj_name
518 = get_name (TREE_OPERAND (TREE_OPERAND (*var_p, 0), 0));
519 if (obj_name)
520 name = make_temp_ssa_name (TREE_TYPE (addr), NULL, obj_name);
521 else
522 name = make_ssa_name (TREE_TYPE (addr));
523 stmt = gimple_build_assign (name, addr);
524 gsi_insert_on_edge_immediate (entry, stmt);
526 slot->uid = uid;
527 slot->to = name;
529 else
530 name = slot->to;
532 /* Express the address in terms of the canonical SSA name. */
533 TREE_OPERAND (*var_p, 0) = name;
534 if (gsi == NULL)
535 return build_fold_addr_expr_with_type (obj, type);
537 name = force_gimple_operand (build_addr (obj, current_function_decl),
538 &stmts, true, NULL_TREE);
539 if (!gimple_seq_empty_p (stmts))
540 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
542 if (!useless_type_conversion_p (type, TREE_TYPE (name)))
544 name = force_gimple_operand (fold_convert (type, name), &stmts, true,
545 NULL_TREE);
546 if (!gimple_seq_empty_p (stmts))
547 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
550 return name;
553 static tree
554 reduc_stmt_res (gimple stmt)
556 return (gimple_code (stmt) == GIMPLE_PHI
557 ? gimple_phi_result (stmt)
558 : gimple_assign_lhs (stmt));
561 /* Callback for htab_traverse. Create the initialization statement
562 for reduction described in SLOT, and place it at the preheader of
563 the loop described in DATA. */
566 initialize_reductions (reduction_info **slot, struct loop *loop)
568 tree init, c;
569 tree bvar, type, arg;
570 edge e;
572 struct reduction_info *const reduc = *slot;
574 /* Create initialization in preheader:
575 reduction_variable = initialization value of reduction. */
577 /* In the phi node at the header, replace the argument coming
578 from the preheader with the reduction initialization value. */
580 /* Create a new variable to initialize the reduction. */
581 type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
582 bvar = create_tmp_var (type, "reduction");
584 c = build_omp_clause (gimple_location (reduc->reduc_stmt),
585 OMP_CLAUSE_REDUCTION);
586 OMP_CLAUSE_REDUCTION_CODE (c) = reduc->reduction_code;
587 OMP_CLAUSE_DECL (c) = SSA_NAME_VAR (reduc_stmt_res (reduc->reduc_stmt));
589 init = omp_reduction_init (c, TREE_TYPE (bvar));
590 reduc->init = init;
592 /* Replace the argument representing the initialization value
593 with the initialization value for the reduction (neutral
594 element for the particular operation, e.g. 0 for PLUS_EXPR,
595 1 for MULT_EXPR, etc).
596 Keep the old value in a new variable "reduction_initial",
597 that will be taken in consideration after the parallel
598 computing is done. */
600 e = loop_preheader_edge (loop);
601 arg = PHI_ARG_DEF_FROM_EDGE (reduc->reduc_phi, e);
602 /* Create new variable to hold the initial value. */
604 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE
605 (reduc->reduc_phi, loop_preheader_edge (loop)), init);
606 reduc->initial_value = arg;
607 return 1;
610 struct elv_data
612 struct walk_stmt_info info;
613 edge entry;
614 int_tree_htab_type *decl_address;
615 gimple_stmt_iterator *gsi;
616 bool changed;
617 bool reset;
620 /* Eliminates references to local variables in *TP out of the single
621 entry single exit region starting at DTA->ENTRY.
622 DECL_ADDRESS contains addresses of the references that had their
623 address taken already. If the expression is changed, CHANGED is
624 set to true. Callback for walk_tree. */
626 static tree
627 eliminate_local_variables_1 (tree *tp, int *walk_subtrees, void *data)
629 struct elv_data *const dta = (struct elv_data *) data;
630 tree t = *tp, var, addr, addr_type, type, obj;
632 if (DECL_P (t))
634 *walk_subtrees = 0;
636 if (!SSA_VAR_P (t) || DECL_EXTERNAL (t))
637 return NULL_TREE;
639 type = TREE_TYPE (t);
640 addr_type = build_pointer_type (type);
641 addr = take_address_of (t, addr_type, dta->entry, dta->decl_address,
642 dta->gsi);
643 if (dta->gsi == NULL && addr == NULL_TREE)
645 dta->reset = true;
646 return NULL_TREE;
649 *tp = build_simple_mem_ref (addr);
651 dta->changed = true;
652 return NULL_TREE;
655 if (TREE_CODE (t) == ADDR_EXPR)
657 /* ADDR_EXPR may appear in two contexts:
658 -- as a gimple operand, when the address taken is a function invariant
659 -- as gimple rhs, when the resulting address in not a function
660 invariant
661 We do not need to do anything special in the latter case (the base of
662 the memory reference whose address is taken may be replaced in the
663 DECL_P case). The former case is more complicated, as we need to
664 ensure that the new address is still a gimple operand. Thus, it
665 is not sufficient to replace just the base of the memory reference --
666 we need to move the whole computation of the address out of the
667 loop. */
668 if (!is_gimple_val (t))
669 return NULL_TREE;
671 *walk_subtrees = 0;
672 obj = TREE_OPERAND (t, 0);
673 var = get_base_address (obj);
674 if (!var || !SSA_VAR_P (var) || DECL_EXTERNAL (var))
675 return NULL_TREE;
677 addr_type = TREE_TYPE (t);
678 addr = take_address_of (obj, addr_type, dta->entry, dta->decl_address,
679 dta->gsi);
680 if (dta->gsi == NULL && addr == NULL_TREE)
682 dta->reset = true;
683 return NULL_TREE;
685 *tp = addr;
687 dta->changed = true;
688 return NULL_TREE;
691 if (!EXPR_P (t))
692 *walk_subtrees = 0;
694 return NULL_TREE;
697 /* Moves the references to local variables in STMT at *GSI out of the single
698 entry single exit region starting at ENTRY. DECL_ADDRESS contains
699 addresses of the references that had their address taken
700 already. */
702 static void
703 eliminate_local_variables_stmt (edge entry, gimple_stmt_iterator *gsi,
704 int_tree_htab_type *decl_address)
706 struct elv_data dta;
707 gimple stmt = gsi_stmt (*gsi);
709 memset (&dta.info, '\0', sizeof (dta.info));
710 dta.entry = entry;
711 dta.decl_address = decl_address;
712 dta.changed = false;
713 dta.reset = false;
715 if (gimple_debug_bind_p (stmt))
717 dta.gsi = NULL;
718 walk_tree (gimple_debug_bind_get_value_ptr (stmt),
719 eliminate_local_variables_1, &dta.info, NULL);
720 if (dta.reset)
722 gimple_debug_bind_reset_value (stmt);
723 dta.changed = true;
726 else if (gimple_clobber_p (stmt))
728 stmt = gimple_build_nop ();
729 gsi_replace (gsi, stmt, false);
730 dta.changed = true;
732 else
734 dta.gsi = gsi;
735 walk_gimple_op (stmt, eliminate_local_variables_1, &dta.info);
738 if (dta.changed)
739 update_stmt (stmt);
742 /* Eliminates the references to local variables from the single entry
743 single exit region between the ENTRY and EXIT edges.
745 This includes:
746 1) Taking address of a local variable -- these are moved out of the
747 region (and temporary variable is created to hold the address if
748 necessary).
750 2) Dereferencing a local variable -- these are replaced with indirect
751 references. */
753 static void
754 eliminate_local_variables (edge entry, edge exit)
756 basic_block bb;
757 auto_vec<basic_block, 3> body;
758 unsigned i;
759 gimple_stmt_iterator gsi;
760 bool has_debug_stmt = false;
761 int_tree_htab_type decl_address (10);
762 basic_block entry_bb = entry->src;
763 basic_block exit_bb = exit->dest;
765 gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
767 FOR_EACH_VEC_ELT (body, i, bb)
768 if (bb != entry_bb && bb != exit_bb)
769 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
770 if (is_gimple_debug (gsi_stmt (gsi)))
772 if (gimple_debug_bind_p (gsi_stmt (gsi)))
773 has_debug_stmt = true;
775 else
776 eliminate_local_variables_stmt (entry, &gsi, &decl_address);
778 if (has_debug_stmt)
779 FOR_EACH_VEC_ELT (body, i, bb)
780 if (bb != entry_bb && bb != exit_bb)
781 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
782 if (gimple_debug_bind_p (gsi_stmt (gsi)))
783 eliminate_local_variables_stmt (entry, &gsi, &decl_address);
786 /* Returns true if expression EXPR is not defined between ENTRY and
787 EXIT, i.e. if all its operands are defined outside of the region. */
789 static bool
790 expr_invariant_in_region_p (edge entry, edge exit, tree expr)
792 basic_block entry_bb = entry->src;
793 basic_block exit_bb = exit->dest;
794 basic_block def_bb;
796 if (is_gimple_min_invariant (expr))
797 return true;
799 if (TREE_CODE (expr) == SSA_NAME)
801 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
802 if (def_bb
803 && dominated_by_p (CDI_DOMINATORS, def_bb, entry_bb)
804 && !dominated_by_p (CDI_DOMINATORS, def_bb, exit_bb))
805 return false;
807 return true;
810 return false;
813 /* If COPY_NAME_P is true, creates and returns a duplicate of NAME.
814 The copies are stored to NAME_COPIES, if NAME was already duplicated,
815 its duplicate stored in NAME_COPIES is returned.
817 Regardless of COPY_NAME_P, the decl used as a base of the ssa name is also
818 duplicated, storing the copies in DECL_COPIES. */
820 static tree
821 separate_decls_in_region_name (tree name, name_to_copy_table_type *name_copies,
822 int_tree_htab_type *decl_copies,
823 bool copy_name_p)
825 tree copy, var, var_copy;
826 unsigned idx, uid, nuid;
827 struct int_tree_map ielt;
828 struct name_to_copy_elt elt, *nelt;
829 name_to_copy_elt **slot;
830 int_tree_map *dslot;
832 if (TREE_CODE (name) != SSA_NAME)
833 return name;
835 idx = SSA_NAME_VERSION (name);
836 elt.version = idx;
837 slot = name_copies->find_slot_with_hash (&elt, idx,
838 copy_name_p ? INSERT : NO_INSERT);
839 if (slot && *slot)
840 return (*slot)->new_name;
842 if (copy_name_p)
844 copy = duplicate_ssa_name (name, NULL);
845 nelt = XNEW (struct name_to_copy_elt);
846 nelt->version = idx;
847 nelt->new_name = copy;
848 nelt->field = NULL_TREE;
849 *slot = nelt;
851 else
853 gcc_assert (!slot);
854 copy = name;
857 var = SSA_NAME_VAR (name);
858 if (!var)
859 return copy;
861 uid = DECL_UID (var);
862 ielt.uid = uid;
863 dslot = decl_copies->find_slot_with_hash (ielt, uid, INSERT);
864 if (!dslot->to)
866 var_copy = create_tmp_var (TREE_TYPE (var), get_name (var));
867 DECL_GIMPLE_REG_P (var_copy) = DECL_GIMPLE_REG_P (var);
868 dslot->uid = uid;
869 dslot->to = var_copy;
871 /* Ensure that when we meet this decl next time, we won't duplicate
872 it again. */
873 nuid = DECL_UID (var_copy);
874 ielt.uid = nuid;
875 dslot = decl_copies->find_slot_with_hash (ielt, nuid, INSERT);
876 gcc_assert (!dslot->to);
877 dslot->uid = nuid;
878 dslot->to = var_copy;
880 else
881 var_copy = dslot->to;
883 replace_ssa_name_symbol (copy, var_copy);
884 return copy;
887 /* Finds the ssa names used in STMT that are defined outside the
888 region between ENTRY and EXIT and replaces such ssa names with
889 their duplicates. The duplicates are stored to NAME_COPIES. Base
890 decls of all ssa names used in STMT (including those defined in
891 LOOP) are replaced with the new temporary variables; the
892 replacement decls are stored in DECL_COPIES. */
894 static void
895 separate_decls_in_region_stmt (edge entry, edge exit, gimple stmt,
896 name_to_copy_table_type *name_copies,
897 int_tree_htab_type *decl_copies)
899 use_operand_p use;
900 def_operand_p def;
901 ssa_op_iter oi;
902 tree name, copy;
903 bool copy_name_p;
905 FOR_EACH_PHI_OR_STMT_DEF (def, stmt, oi, SSA_OP_DEF)
907 name = DEF_FROM_PTR (def);
908 gcc_assert (TREE_CODE (name) == SSA_NAME);
909 copy = separate_decls_in_region_name (name, name_copies, decl_copies,
910 false);
911 gcc_assert (copy == name);
914 FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
916 name = USE_FROM_PTR (use);
917 if (TREE_CODE (name) != SSA_NAME)
918 continue;
920 copy_name_p = expr_invariant_in_region_p (entry, exit, name);
921 copy = separate_decls_in_region_name (name, name_copies, decl_copies,
922 copy_name_p);
923 SET_USE (use, copy);
927 /* Finds the ssa names used in STMT that are defined outside the
928 region between ENTRY and EXIT and replaces such ssa names with
929 their duplicates. The duplicates are stored to NAME_COPIES. Base
930 decls of all ssa names used in STMT (including those defined in
931 LOOP) are replaced with the new temporary variables; the
932 replacement decls are stored in DECL_COPIES. */
934 static bool
935 separate_decls_in_region_debug (gimple stmt,
936 name_to_copy_table_type *name_copies,
937 int_tree_htab_type *decl_copies)
939 use_operand_p use;
940 ssa_op_iter oi;
941 tree var, name;
942 struct int_tree_map ielt;
943 struct name_to_copy_elt elt;
944 name_to_copy_elt **slot;
945 int_tree_map *dslot;
947 if (gimple_debug_bind_p (stmt))
948 var = gimple_debug_bind_get_var (stmt);
949 else if (gimple_debug_source_bind_p (stmt))
950 var = gimple_debug_source_bind_get_var (stmt);
951 else
952 return true;
953 if (TREE_CODE (var) == DEBUG_EXPR_DECL || TREE_CODE (var) == LABEL_DECL)
954 return true;
955 gcc_assert (DECL_P (var) && SSA_VAR_P (var));
956 ielt.uid = DECL_UID (var);
957 dslot = decl_copies->find_slot_with_hash (ielt, ielt.uid, NO_INSERT);
958 if (!dslot)
959 return true;
960 if (gimple_debug_bind_p (stmt))
961 gimple_debug_bind_set_var (stmt, dslot->to);
962 else if (gimple_debug_source_bind_p (stmt))
963 gimple_debug_source_bind_set_var (stmt, dslot->to);
965 FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
967 name = USE_FROM_PTR (use);
968 if (TREE_CODE (name) != SSA_NAME)
969 continue;
971 elt.version = SSA_NAME_VERSION (name);
972 slot = name_copies->find_slot_with_hash (&elt, elt.version, NO_INSERT);
973 if (!slot)
975 gimple_debug_bind_reset_value (stmt);
976 update_stmt (stmt);
977 break;
980 SET_USE (use, (*slot)->new_name);
983 return false;
986 /* Callback for htab_traverse. Adds a field corresponding to the reduction
987 specified in SLOT. The type is passed in DATA. */
990 add_field_for_reduction (reduction_info **slot, tree type)
993 struct reduction_info *const red = *slot;
994 tree var = reduc_stmt_res (red->reduc_stmt);
995 tree field = build_decl (gimple_location (red->reduc_stmt), FIELD_DECL,
996 SSA_NAME_IDENTIFIER (var), TREE_TYPE (var));
998 insert_field_into_struct (type, field);
1000 red->field = field;
1002 return 1;
1005 /* Callback for htab_traverse. Adds a field corresponding to a ssa name
1006 described in SLOT. The type is passed in DATA. */
1009 add_field_for_name (name_to_copy_elt **slot, tree type)
1011 struct name_to_copy_elt *const elt = *slot;
1012 tree name = ssa_name (elt->version);
1013 tree field = build_decl (UNKNOWN_LOCATION,
1014 FIELD_DECL, SSA_NAME_IDENTIFIER (name),
1015 TREE_TYPE (name));
1017 insert_field_into_struct (type, field);
1018 elt->field = field;
1020 return 1;
1023 /* Callback for htab_traverse. A local result is the intermediate result
1024 computed by a single
1025 thread, or the initial value in case no iteration was executed.
1026 This function creates a phi node reflecting these values.
1027 The phi's result will be stored in NEW_PHI field of the
1028 reduction's data structure. */
1031 create_phi_for_local_result (reduction_info **slot, struct loop *loop)
1033 struct reduction_info *const reduc = *slot;
1034 edge e;
1035 gphi *new_phi;
1036 basic_block store_bb, continue_bb;
1037 tree local_res;
1038 source_location locus;
1040 /* STORE_BB is the block where the phi
1041 should be stored. It is the destination of the loop exit.
1042 (Find the fallthru edge from GIMPLE_OMP_CONTINUE). */
1043 continue_bb = single_pred (loop->latch);
1044 store_bb = FALLTHRU_EDGE (continue_bb)->dest;
1046 /* STORE_BB has two predecessors. One coming from the loop
1047 (the reduction's result is computed at the loop),
1048 and another coming from a block preceding the loop,
1049 when no iterations
1050 are executed (the initial value should be taken). */
1051 if (EDGE_PRED (store_bb, 0) == FALLTHRU_EDGE (continue_bb))
1052 e = EDGE_PRED (store_bb, 1);
1053 else
1054 e = EDGE_PRED (store_bb, 0);
1055 tree lhs = reduc_stmt_res (reduc->reduc_stmt);
1056 local_res = copy_ssa_name (lhs);
1057 locus = gimple_location (reduc->reduc_stmt);
1058 new_phi = create_phi_node (local_res, store_bb);
1059 add_phi_arg (new_phi, reduc->init, e, locus);
1060 add_phi_arg (new_phi, lhs, FALLTHRU_EDGE (continue_bb), locus);
1061 reduc->new_phi = new_phi;
1063 return 1;
1066 struct clsn_data
1068 tree store;
1069 tree load;
1071 basic_block store_bb;
1072 basic_block load_bb;
1075 /* Callback for htab_traverse. Create an atomic instruction for the
1076 reduction described in SLOT.
1077 DATA annotates the place in memory the atomic operation relates to,
1078 and the basic block it needs to be generated in. */
1081 create_call_for_reduction_1 (reduction_info **slot, struct clsn_data *clsn_data)
1083 struct reduction_info *const reduc = *slot;
1084 gimple_stmt_iterator gsi;
1085 tree type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
1086 tree load_struct;
1087 basic_block bb;
1088 basic_block new_bb;
1089 edge e;
1090 tree t, addr, ref, x;
1091 tree tmp_load, name;
1092 gimple load;
1094 load_struct = build_simple_mem_ref (clsn_data->load);
1095 t = build3 (COMPONENT_REF, type, load_struct, reduc->field, NULL_TREE);
1097 addr = build_addr (t, current_function_decl);
1099 /* Create phi node. */
1100 bb = clsn_data->load_bb;
1102 gsi = gsi_last_bb (bb);
1103 e = split_block (bb, gsi_stmt (gsi));
1104 new_bb = e->dest;
1106 tmp_load = create_tmp_var (TREE_TYPE (TREE_TYPE (addr)));
1107 tmp_load = make_ssa_name (tmp_load);
1108 load = gimple_build_omp_atomic_load (tmp_load, addr);
1109 SSA_NAME_DEF_STMT (tmp_load) = load;
1110 gsi = gsi_start_bb (new_bb);
1111 gsi_insert_after (&gsi, load, GSI_NEW_STMT);
1113 e = split_block (new_bb, load);
1114 new_bb = e->dest;
1115 gsi = gsi_start_bb (new_bb);
1116 ref = tmp_load;
1117 x = fold_build2 (reduc->reduction_code,
1118 TREE_TYPE (PHI_RESULT (reduc->new_phi)), ref,
1119 PHI_RESULT (reduc->new_phi));
1121 name = force_gimple_operand_gsi (&gsi, x, true, NULL_TREE, true,
1122 GSI_CONTINUE_LINKING);
1124 gsi_insert_after (&gsi, gimple_build_omp_atomic_store (name), GSI_NEW_STMT);
1125 return 1;
1128 /* Create the atomic operation at the join point of the threads.
1129 REDUCTION_LIST describes the reductions in the LOOP.
1130 LD_ST_DATA describes the shared data structure where
1131 shared data is stored in and loaded from. */
1132 static void
1133 create_call_for_reduction (struct loop *loop,
1134 reduction_info_table_type *reduction_list,
1135 struct clsn_data *ld_st_data)
1137 reduction_list->traverse <struct loop *, create_phi_for_local_result> (loop);
1138 /* Find the fallthru edge from GIMPLE_OMP_CONTINUE. */
1139 basic_block continue_bb = single_pred (loop->latch);
1140 ld_st_data->load_bb = FALLTHRU_EDGE (continue_bb)->dest;
1141 reduction_list
1142 ->traverse <struct clsn_data *, create_call_for_reduction_1> (ld_st_data);
1145 /* Callback for htab_traverse. Loads the final reduction value at the
1146 join point of all threads, and inserts it in the right place. */
1149 create_loads_for_reductions (reduction_info **slot, struct clsn_data *clsn_data)
1151 struct reduction_info *const red = *slot;
1152 gimple stmt;
1153 gimple_stmt_iterator gsi;
1154 tree type = TREE_TYPE (reduc_stmt_res (red->reduc_stmt));
1155 tree load_struct;
1156 tree name;
1157 tree x;
1159 /* If there's no exit phi, the result of the reduction is unused. */
1160 if (red->keep_res == NULL)
1161 return 1;
1163 gsi = gsi_after_labels (clsn_data->load_bb);
1164 load_struct = build_simple_mem_ref (clsn_data->load);
1165 load_struct = build3 (COMPONENT_REF, type, load_struct, red->field,
1166 NULL_TREE);
1168 x = load_struct;
1169 name = PHI_RESULT (red->keep_res);
1170 stmt = gimple_build_assign (name, x);
1172 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1174 for (gsi = gsi_start_phis (gimple_bb (red->keep_res));
1175 !gsi_end_p (gsi); gsi_next (&gsi))
1176 if (gsi_stmt (gsi) == red->keep_res)
1178 remove_phi_node (&gsi, false);
1179 return 1;
1181 gcc_unreachable ();
1184 /* Load the reduction result that was stored in LD_ST_DATA.
1185 REDUCTION_LIST describes the list of reductions that the
1186 loads should be generated for. */
1187 static void
1188 create_final_loads_for_reduction (reduction_info_table_type *reduction_list,
1189 struct clsn_data *ld_st_data)
1191 gimple_stmt_iterator gsi;
1192 tree t;
1193 gimple stmt;
1195 gsi = gsi_after_labels (ld_st_data->load_bb);
1196 t = build_fold_addr_expr (ld_st_data->store);
1197 stmt = gimple_build_assign (ld_st_data->load, t);
1199 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
1201 reduction_list
1202 ->traverse <struct clsn_data *, create_loads_for_reductions> (ld_st_data);
1206 /* Callback for htab_traverse. Store the neutral value for the
1207 particular reduction's operation, e.g. 0 for PLUS_EXPR,
1208 1 for MULT_EXPR, etc. into the reduction field.
1209 The reduction is specified in SLOT. The store information is
1210 passed in DATA. */
1213 create_stores_for_reduction (reduction_info **slot, struct clsn_data *clsn_data)
1215 struct reduction_info *const red = *slot;
1216 tree t;
1217 gimple stmt;
1218 gimple_stmt_iterator gsi;
1219 tree type = TREE_TYPE (reduc_stmt_res (red->reduc_stmt));
1221 gsi = gsi_last_bb (clsn_data->store_bb);
1222 t = build3 (COMPONENT_REF, type, clsn_data->store, red->field, NULL_TREE);
1223 stmt = gimple_build_assign (t, red->initial_value);
1224 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1226 return 1;
1229 /* Callback for htab_traverse. Creates loads to a field of LOAD in LOAD_BB and
1230 store to a field of STORE in STORE_BB for the ssa name and its duplicate
1231 specified in SLOT. */
1234 create_loads_and_stores_for_name (name_to_copy_elt **slot,
1235 struct clsn_data *clsn_data)
1237 struct name_to_copy_elt *const elt = *slot;
1238 tree t;
1239 gimple stmt;
1240 gimple_stmt_iterator gsi;
1241 tree type = TREE_TYPE (elt->new_name);
1242 tree load_struct;
1244 gsi = gsi_last_bb (clsn_data->store_bb);
1245 t = build3 (COMPONENT_REF, type, clsn_data->store, elt->field, NULL_TREE);
1246 stmt = gimple_build_assign (t, ssa_name (elt->version));
1247 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1249 gsi = gsi_last_bb (clsn_data->load_bb);
1250 load_struct = build_simple_mem_ref (clsn_data->load);
1251 t = build3 (COMPONENT_REF, type, load_struct, elt->field, NULL_TREE);
1252 stmt = gimple_build_assign (elt->new_name, t);
1253 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1255 return 1;
1258 /* Moves all the variables used in LOOP and defined outside of it (including
1259 the initial values of loop phi nodes, and *PER_THREAD if it is a ssa
1260 name) to a structure created for this purpose. The code
1262 while (1)
1264 use (a);
1265 use (b);
1268 is transformed this way:
1270 bb0:
1271 old.a = a;
1272 old.b = b;
1274 bb1:
1275 a' = new->a;
1276 b' = new->b;
1277 while (1)
1279 use (a');
1280 use (b');
1283 `old' is stored to *ARG_STRUCT and `new' is stored to NEW_ARG_STRUCT. The
1284 pointer `new' is intentionally not initialized (the loop will be split to a
1285 separate function later, and `new' will be initialized from its arguments).
1286 LD_ST_DATA holds information about the shared data structure used to pass
1287 information among the threads. It is initialized here, and
1288 gen_parallel_loop will pass it to create_call_for_reduction that
1289 needs this information. REDUCTION_LIST describes the reductions
1290 in LOOP. */
1292 static void
1293 separate_decls_in_region (edge entry, edge exit,
1294 reduction_info_table_type *reduction_list,
1295 tree *arg_struct, tree *new_arg_struct,
1296 struct clsn_data *ld_st_data)
1299 basic_block bb1 = split_edge (entry);
1300 basic_block bb0 = single_pred (bb1);
1301 name_to_copy_table_type name_copies (10);
1302 int_tree_htab_type decl_copies (10);
1303 unsigned i;
1304 tree type, type_name, nvar;
1305 gimple_stmt_iterator gsi;
1306 struct clsn_data clsn_data;
1307 auto_vec<basic_block, 3> body;
1308 basic_block bb;
1309 basic_block entry_bb = bb1;
1310 basic_block exit_bb = exit->dest;
1311 bool has_debug_stmt = false;
1313 entry = single_succ_edge (entry_bb);
1314 gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
1316 FOR_EACH_VEC_ELT (body, i, bb)
1318 if (bb != entry_bb && bb != exit_bb)
1320 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1321 separate_decls_in_region_stmt (entry, exit, gsi_stmt (gsi),
1322 &name_copies, &decl_copies);
1324 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1326 gimple stmt = gsi_stmt (gsi);
1328 if (is_gimple_debug (stmt))
1329 has_debug_stmt = true;
1330 else
1331 separate_decls_in_region_stmt (entry, exit, stmt,
1332 &name_copies, &decl_copies);
1337 /* Now process debug bind stmts. We must not create decls while
1338 processing debug stmts, so we defer their processing so as to
1339 make sure we will have debug info for as many variables as
1340 possible (all of those that were dealt with in the loop above),
1341 and discard those for which we know there's nothing we can
1342 do. */
1343 if (has_debug_stmt)
1344 FOR_EACH_VEC_ELT (body, i, bb)
1345 if (bb != entry_bb && bb != exit_bb)
1347 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
1349 gimple stmt = gsi_stmt (gsi);
1351 if (is_gimple_debug (stmt))
1353 if (separate_decls_in_region_debug (stmt, &name_copies,
1354 &decl_copies))
1356 gsi_remove (&gsi, true);
1357 continue;
1361 gsi_next (&gsi);
1365 if (name_copies.elements () == 0 && reduction_list->elements () == 0)
1367 /* It may happen that there is nothing to copy (if there are only
1368 loop carried and external variables in the loop). */
1369 *arg_struct = NULL;
1370 *new_arg_struct = NULL;
1372 else
1374 /* Create the type for the structure to store the ssa names to. */
1375 type = lang_hooks.types.make_type (RECORD_TYPE);
1376 type_name = build_decl (UNKNOWN_LOCATION,
1377 TYPE_DECL, create_tmp_var_name (".paral_data"),
1378 type);
1379 TYPE_NAME (type) = type_name;
1381 name_copies.traverse <tree, add_field_for_name> (type);
1382 if (reduction_list && reduction_list->elements () > 0)
1384 /* Create the fields for reductions. */
1385 reduction_list->traverse <tree, add_field_for_reduction> (type);
1387 layout_type (type);
1389 /* Create the loads and stores. */
1390 *arg_struct = create_tmp_var (type, ".paral_data_store");
1391 nvar = create_tmp_var (build_pointer_type (type), ".paral_data_load");
1392 *new_arg_struct = make_ssa_name (nvar);
1394 ld_st_data->store = *arg_struct;
1395 ld_st_data->load = *new_arg_struct;
1396 ld_st_data->store_bb = bb0;
1397 ld_st_data->load_bb = bb1;
1399 name_copies
1400 .traverse <struct clsn_data *, create_loads_and_stores_for_name>
1401 (ld_st_data);
1403 /* Load the calculation from memory (after the join of the threads). */
1405 if (reduction_list && reduction_list->elements () > 0)
1407 reduction_list
1408 ->traverse <struct clsn_data *, create_stores_for_reduction>
1409 (ld_st_data);
1410 clsn_data.load = make_ssa_name (nvar);
1411 clsn_data.load_bb = exit->dest;
1412 clsn_data.store = ld_st_data->store;
1413 create_final_loads_for_reduction (reduction_list, &clsn_data);
1418 /* Returns true if FN was created to run in parallel. */
1420 bool
1421 parallelized_function_p (tree fndecl)
1423 cgraph_node *node = cgraph_node::get (fndecl);
1424 gcc_assert (node != NULL);
1425 return node->parallelized_function;
1428 /* Creates and returns an empty function that will receive the body of
1429 a parallelized loop. */
1431 static tree
1432 create_loop_fn (location_t loc)
1434 char buf[100];
1435 char *tname;
1436 tree decl, type, name, t;
1437 struct function *act_cfun = cfun;
1438 static unsigned loopfn_num;
1440 loc = LOCATION_LOCUS (loc);
1441 snprintf (buf, 100, "%s.$loopfn", current_function_name ());
1442 ASM_FORMAT_PRIVATE_NAME (tname, buf, loopfn_num++);
1443 clean_symbol_name (tname);
1444 name = get_identifier (tname);
1445 type = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
1447 decl = build_decl (loc, FUNCTION_DECL, name, type);
1448 TREE_STATIC (decl) = 1;
1449 TREE_USED (decl) = 1;
1450 DECL_ARTIFICIAL (decl) = 1;
1451 DECL_IGNORED_P (decl) = 0;
1452 TREE_PUBLIC (decl) = 0;
1453 DECL_UNINLINABLE (decl) = 1;
1454 DECL_EXTERNAL (decl) = 0;
1455 DECL_CONTEXT (decl) = NULL_TREE;
1456 DECL_INITIAL (decl) = make_node (BLOCK);
1458 t = build_decl (loc, RESULT_DECL, NULL_TREE, void_type_node);
1459 DECL_ARTIFICIAL (t) = 1;
1460 DECL_IGNORED_P (t) = 1;
1461 DECL_RESULT (decl) = t;
1463 t = build_decl (loc, PARM_DECL, get_identifier (".paral_data_param"),
1464 ptr_type_node);
1465 DECL_ARTIFICIAL (t) = 1;
1466 DECL_ARG_TYPE (t) = ptr_type_node;
1467 DECL_CONTEXT (t) = decl;
1468 TREE_USED (t) = 1;
1469 DECL_ARGUMENTS (decl) = t;
1471 allocate_struct_function (decl, false);
1473 /* The call to allocate_struct_function clobbers CFUN, so we need to restore
1474 it. */
1475 set_cfun (act_cfun);
1477 return decl;
1480 /* Replace uses of NAME by VAL in block BB. */
1482 static void
1483 replace_uses_in_bb_by (tree name, tree val, basic_block bb)
1485 gimple use_stmt;
1486 imm_use_iterator imm_iter;
1488 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, name)
1490 if (gimple_bb (use_stmt) != bb)
1491 continue;
1493 use_operand_p use_p;
1494 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1495 SET_USE (use_p, val);
1499 /* Do transformation from:
1501 <bb preheader>:
1503 goto <bb header>
1505 <bb header>:
1506 ivtmp_a = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
1507 sum_a = PHI <sum_init (preheader), sum_b (latch)>
1509 use (ivtmp_a)
1511 sum_b = sum_a + sum_update
1513 if (ivtmp_a < n)
1514 goto <bb latch>;
1515 else
1516 goto <bb exit>;
1518 <bb latch>:
1519 ivtmp_b = ivtmp_a + 1;
1520 goto <bb header>
1522 <bb exit>:
1523 sum_z = PHI <sum_b (cond[1]), ...>
1525 [1] Where <bb cond> is single_pred (bb latch); In the simplest case,
1526 that's <bb header>.
1530 <bb preheader>:
1532 goto <bb newheader>
1534 <bb header>:
1535 ivtmp_a = PHI <ivtmp_c (latch)>
1536 sum_a = PHI <sum_c (latch)>
1538 use (ivtmp_a)
1540 sum_b = sum_a + sum_update
1542 goto <bb latch>;
1544 <bb newheader>:
1545 ivtmp_c = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
1546 sum_c = PHI <sum_init (preheader), sum_b (latch)>
1547 if (ivtmp_c < n + 1)
1548 goto <bb header>;
1549 else
1550 goto <bb newexit>;
1552 <bb latch>:
1553 ivtmp_b = ivtmp_a + 1;
1554 goto <bb newheader>
1556 <bb newexit>:
1557 sum_y = PHI <sum_c (newheader)>
1559 <bb exit>:
1560 sum_z = PHI <sum_y (newexit), ...>
1563 In unified diff format:
1565 <bb preheader>:
1567 - goto <bb header>
1568 + goto <bb newheader>
1570 <bb header>:
1571 - ivtmp_a = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
1572 - sum_a = PHI <sum_init (preheader), sum_b (latch)>
1573 + ivtmp_a = PHI <ivtmp_c (latch)>
1574 + sum_a = PHI <sum_c (latch)>
1576 use (ivtmp_a)
1578 sum_b = sum_a + sum_update
1580 - if (ivtmp_a < n)
1581 - goto <bb latch>;
1582 + goto <bb latch>;
1584 + <bb newheader>:
1585 + ivtmp_c = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
1586 + sum_c = PHI <sum_init (preheader), sum_b (latch)>
1587 + if (ivtmp_c < n + 1)
1588 + goto <bb header>;
1589 else
1590 goto <bb exit>;
1592 <bb latch>:
1593 ivtmp_b = ivtmp_a + 1;
1594 - goto <bb header>
1595 + goto <bb newheader>
1597 + <bb newexit>:
1598 + sum_y = PHI <sum_c (newheader)>
1600 <bb exit>:
1601 - sum_z = PHI <sum_b (cond[1]), ...>
1602 + sum_z = PHI <sum_y (newexit), ...>
1604 Note: the example does not show any virtual phis, but these are handled more
1605 or less as reductions.
1608 Moves the exit condition of LOOP to the beginning of its header.
1609 REDUCTION_LIST describes the reductions in LOOP. BOUND is the new loop
1610 bound. */
1612 static void
1613 transform_to_exit_first_loop_alt (struct loop *loop,
1614 reduction_info_table_type *reduction_list,
1615 tree bound)
1617 basic_block header = loop->header;
1618 basic_block latch = loop->latch;
1619 edge exit = single_dom_exit (loop);
1620 basic_block exit_block = exit->dest;
1621 gcond *cond_stmt = as_a <gcond *> (last_stmt (exit->src));
1622 tree control = gimple_cond_lhs (cond_stmt);
1623 edge e;
1625 /* Rewriting virtuals into loop-closed ssa normal form makes this
1626 transformation simpler. It also ensures that the virtuals are in
1627 loop-closed ssa normal from after the transformation, which is required by
1628 create_parallel_loop. */
1629 rewrite_virtuals_into_loop_closed_ssa (loop);
1631 /* Create the new_header block. */
1632 basic_block new_header = split_block_before_cond_jump (exit->src);
1633 edge edge_at_split = single_pred_edge (new_header);
1635 /* Redirect entry edge to new_header. */
1636 edge entry = loop_preheader_edge (loop);
1637 e = redirect_edge_and_branch (entry, new_header);
1638 gcc_assert (e == entry);
1640 /* Redirect post_inc_edge to new_header. */
1641 edge post_inc_edge = single_succ_edge (latch);
1642 e = redirect_edge_and_branch (post_inc_edge, new_header);
1643 gcc_assert (e == post_inc_edge);
1645 /* Redirect post_cond_edge to header. */
1646 edge post_cond_edge = single_pred_edge (latch);
1647 e = redirect_edge_and_branch (post_cond_edge, header);
1648 gcc_assert (e == post_cond_edge);
1650 /* Redirect edge_at_split to latch. */
1651 e = redirect_edge_and_branch (edge_at_split, latch);
1652 gcc_assert (e == edge_at_split);
1654 /* Set the new loop bound. */
1655 gimple_cond_set_rhs (cond_stmt, bound);
1656 update_stmt (cond_stmt);
1658 /* Repair the ssa. */
1659 vec<edge_var_map> *v = redirect_edge_var_map_vector (post_inc_edge);
1660 edge_var_map *vm;
1661 gphi_iterator gsi;
1662 int i;
1663 for (gsi = gsi_start_phis (header), i = 0;
1664 !gsi_end_p (gsi) && v->iterate (i, &vm);
1665 gsi_next (&gsi), i++)
1667 gphi *phi = gsi.phi ();
1668 tree res_a = PHI_RESULT (phi);
1670 /* Create new phi. */
1671 tree res_c = copy_ssa_name (res_a, phi);
1672 gphi *nphi = create_phi_node (res_c, new_header);
1674 /* Replace ivtmp_a with ivtmp_c in condition 'if (ivtmp_a < n)'. */
1675 replace_uses_in_bb_by (res_a, res_c, new_header);
1677 /* Replace ivtmp/sum_b with ivtmp/sum_c in header phi. */
1678 add_phi_arg (phi, res_c, post_cond_edge, UNKNOWN_LOCATION);
1680 /* Replace sum_b with sum_c in exit phi. */
1681 tree res_b = redirect_edge_var_map_def (vm);
1682 replace_uses_in_bb_by (res_b, res_c, exit_block);
1684 struct reduction_info *red = reduction_phi (reduction_list, phi);
1685 gcc_assert (virtual_operand_p (res_a)
1686 || res_a == control
1687 || red != NULL);
1689 if (red)
1691 /* Register the new reduction phi. */
1692 red->reduc_phi = nphi;
1693 gimple_set_uid (red->reduc_phi, red->reduc_version);
1696 gcc_assert (gsi_end_p (gsi) && !v->iterate (i, &vm));
1698 /* Set the preheader argument of the new phis to ivtmp/sum_init. */
1699 flush_pending_stmts (entry);
1701 /* Set the latch arguments of the new phis to ivtmp/sum_b. */
1702 flush_pending_stmts (post_inc_edge);
1704 /* Create a new empty exit block, inbetween the new loop header and the old
1705 exit block. The function separate_decls_in_region needs this block to
1706 insert code that is active on loop exit, but not any other path. */
1707 basic_block new_exit_block = split_edge (exit);
1709 /* Insert and register the reduction exit phis. */
1710 for (gphi_iterator gsi = gsi_start_phis (exit_block);
1711 !gsi_end_p (gsi);
1712 gsi_next (&gsi))
1714 gphi *phi = gsi.phi ();
1715 tree res_z = PHI_RESULT (phi);
1717 /* Now that we have a new exit block, duplicate the phi of the old exit
1718 block in the new exit block to preserve loop-closed ssa. */
1719 edge succ_new_exit_block = single_succ_edge (new_exit_block);
1720 edge pred_new_exit_block = single_pred_edge (new_exit_block);
1721 tree res_y = copy_ssa_name (res_z, phi);
1722 gphi *nphi = create_phi_node (res_y, new_exit_block);
1723 tree res_c = PHI_ARG_DEF_FROM_EDGE (phi, succ_new_exit_block);
1724 add_phi_arg (nphi, res_c, pred_new_exit_block, UNKNOWN_LOCATION);
1725 add_phi_arg (phi, res_y, succ_new_exit_block, UNKNOWN_LOCATION);
1727 if (virtual_operand_p (res_z))
1728 continue;
1730 gimple reduc_phi = SSA_NAME_DEF_STMT (res_c);
1731 struct reduction_info *red = reduction_phi (reduction_list, reduc_phi);
1732 if (red != NULL)
1733 red->keep_res = nphi;
1736 /* We're going to cancel the loop at the end of gen_parallel_loop, but until
1737 then we're still using some fields, so only bother about fields that are
1738 still used: header and latch.
1739 The loop has a new header bb, so we update it. The latch bb stays the
1740 same. */
1741 loop->header = new_header;
1743 /* Recalculate dominance info. */
1744 free_dominance_info (CDI_DOMINATORS);
1745 calculate_dominance_info (CDI_DOMINATORS);
1748 /* Tries to moves the exit condition of LOOP to the beginning of its header
1749 without duplication of the loop body. NIT is the number of iterations of the
1750 loop. REDUCTION_LIST describes the reductions in LOOP. Return true if
1751 transformation is successful. */
1753 static bool
1754 try_transform_to_exit_first_loop_alt (struct loop *loop,
1755 reduction_info_table_type *reduction_list,
1756 tree nit)
1758 /* Check whether the latch contains a single statement. */
1759 if (!gimple_seq_nondebug_singleton_p (bb_seq (loop->latch)))
1760 return false;
1762 /* Check whether the latch contains the loop iv increment. */
1763 edge back = single_succ_edge (loop->latch);
1764 edge exit = single_dom_exit (loop);
1765 gcond *cond_stmt = as_a <gcond *> (last_stmt (exit->src));
1766 tree control = gimple_cond_lhs (cond_stmt);
1767 gphi *phi = as_a <gphi *> (SSA_NAME_DEF_STMT (control));
1768 tree inc_res = gimple_phi_arg_def (phi, back->dest_idx);
1769 if (gimple_bb (SSA_NAME_DEF_STMT (inc_res)) != loop->latch)
1770 return false;
1772 /* Check whether there's no code between the loop condition and the latch. */
1773 if (!single_pred_p (loop->latch)
1774 || single_pred (loop->latch) != exit->src)
1775 return false;
1777 tree alt_bound = NULL_TREE;
1778 tree nit_type = TREE_TYPE (nit);
1780 /* Figure out whether nit + 1 overflows. */
1781 if (TREE_CODE (nit) == INTEGER_CST)
1783 if (!tree_int_cst_equal (nit, TYPE_MAXVAL (nit_type)))
1785 alt_bound = fold_build2_loc (UNKNOWN_LOCATION, PLUS_EXPR, nit_type,
1786 nit, build_one_cst (nit_type));
1788 gcc_assert (TREE_CODE (alt_bound) == INTEGER_CST);
1789 transform_to_exit_first_loop_alt (loop, reduction_list, alt_bound);
1790 return true;
1792 else
1794 /* Todo: Figure out if we can trigger this, if it's worth to handle
1795 optimally, and if we can handle it optimally. */
1796 return false;
1800 gcc_assert (TREE_CODE (nit) == SSA_NAME);
1802 /* Variable nit is the loop bound as returned by canonicalize_loop_ivs, for an
1803 iv with base 0 and step 1 that is incremented in the latch, like this:
1805 <bb header>:
1806 # iv_1 = PHI <0 (preheader), iv_2 (latch)>
1808 if (iv_1 < nit)
1809 goto <bb latch>;
1810 else
1811 goto <bb exit>;
1813 <bb latch>:
1814 iv_2 = iv_1 + 1;
1815 goto <bb header>;
1817 The range of iv_1 is [0, nit]. The latch edge is taken for
1818 iv_1 == [0, nit - 1] and the exit edge is taken for iv_1 == nit. So the
1819 number of latch executions is equal to nit.
1821 The function max_loop_iterations gives us the maximum number of latch
1822 executions, so it gives us the maximum value of nit. */
1823 widest_int nit_max;
1824 if (!max_loop_iterations (loop, &nit_max))
1825 return false;
1827 /* Check if nit + 1 overflows. */
1828 widest_int type_max = wi::to_widest (TYPE_MAXVAL (nit_type));
1829 if (!wi::lts_p (nit_max, type_max))
1830 return false;
1832 gimple def = SSA_NAME_DEF_STMT (nit);
1834 /* Try to find nit + 1, in the form of n in an assignment nit = n - 1. */
1835 if (def
1836 && is_gimple_assign (def)
1837 && gimple_assign_rhs_code (def) == PLUS_EXPR)
1839 tree op1 = gimple_assign_rhs1 (def);
1840 tree op2 = gimple_assign_rhs2 (def);
1841 if (integer_minus_onep (op1))
1842 alt_bound = op2;
1843 else if (integer_minus_onep (op2))
1844 alt_bound = op1;
1847 /* If not found, insert nit + 1. */
1848 if (alt_bound == NULL_TREE)
1850 alt_bound = fold_build2 (PLUS_EXPR, nit_type, nit,
1851 build_int_cst_type (nit_type, 1));
1853 gimple_stmt_iterator gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
1855 alt_bound
1856 = force_gimple_operand_gsi (&gsi, alt_bound, true, NULL_TREE, false,
1857 GSI_CONTINUE_LINKING);
1860 transform_to_exit_first_loop_alt (loop, reduction_list, alt_bound);
1861 return true;
1864 /* Moves the exit condition of LOOP to the beginning of its header. NIT is the
1865 number of iterations of the loop. REDUCTION_LIST describes the reductions in
1866 LOOP. */
1868 static void
1869 transform_to_exit_first_loop (struct loop *loop,
1870 reduction_info_table_type *reduction_list,
1871 tree nit)
1873 basic_block *bbs, *nbbs, ex_bb, orig_header;
1874 unsigned n;
1875 bool ok;
1876 edge exit = single_dom_exit (loop), hpred;
1877 tree control, control_name, res, t;
1878 gphi *phi, *nphi;
1879 gassign *stmt;
1880 gcond *cond_stmt, *cond_nit;
1881 tree nit_1;
1883 split_block_after_labels (loop->header);
1884 orig_header = single_succ (loop->header);
1885 hpred = single_succ_edge (loop->header);
1887 cond_stmt = as_a <gcond *> (last_stmt (exit->src));
1888 control = gimple_cond_lhs (cond_stmt);
1889 gcc_assert (gimple_cond_rhs (cond_stmt) == nit);
1891 /* Make sure that we have phi nodes on exit for all loop header phis
1892 (create_parallel_loop requires that). */
1893 for (gphi_iterator gsi = gsi_start_phis (loop->header);
1894 !gsi_end_p (gsi);
1895 gsi_next (&gsi))
1897 phi = gsi.phi ();
1898 res = PHI_RESULT (phi);
1899 t = copy_ssa_name (res, phi);
1900 SET_PHI_RESULT (phi, t);
1901 nphi = create_phi_node (res, orig_header);
1902 add_phi_arg (nphi, t, hpred, UNKNOWN_LOCATION);
1904 if (res == control)
1906 gimple_cond_set_lhs (cond_stmt, t);
1907 update_stmt (cond_stmt);
1908 control = t;
1912 bbs = get_loop_body_in_dom_order (loop);
1914 for (n = 0; bbs[n] != exit->src; n++)
1915 continue;
1916 nbbs = XNEWVEC (basic_block, n);
1917 ok = gimple_duplicate_sese_tail (single_succ_edge (loop->header), exit,
1918 bbs + 1, n, nbbs);
1919 gcc_assert (ok);
1920 free (bbs);
1921 ex_bb = nbbs[0];
1922 free (nbbs);
1924 /* Other than reductions, the only gimple reg that should be copied
1925 out of the loop is the control variable. */
1926 exit = single_dom_exit (loop);
1927 control_name = NULL_TREE;
1928 for (gphi_iterator gsi = gsi_start_phis (ex_bb);
1929 !gsi_end_p (gsi); )
1931 phi = gsi.phi ();
1932 res = PHI_RESULT (phi);
1933 if (virtual_operand_p (res))
1935 gsi_next (&gsi);
1936 continue;
1939 /* Check if it is a part of reduction. If it is,
1940 keep the phi at the reduction's keep_res field. The
1941 PHI_RESULT of this phi is the resulting value of the reduction
1942 variable when exiting the loop. */
1944 if (reduction_list->elements () > 0)
1946 struct reduction_info *red;
1948 tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1949 red = reduction_phi (reduction_list, SSA_NAME_DEF_STMT (val));
1950 if (red)
1952 red->keep_res = phi;
1953 gsi_next (&gsi);
1954 continue;
1957 gcc_assert (control_name == NULL_TREE
1958 && SSA_NAME_VAR (res) == SSA_NAME_VAR (control));
1959 control_name = res;
1960 remove_phi_node (&gsi, false);
1962 gcc_assert (control_name != NULL_TREE);
1964 /* Initialize the control variable to number of iterations
1965 according to the rhs of the exit condition. */
1966 gimple_stmt_iterator gsi = gsi_after_labels (ex_bb);
1967 cond_nit = as_a <gcond *> (last_stmt (exit->src));
1968 nit_1 = gimple_cond_rhs (cond_nit);
1969 nit_1 = force_gimple_operand_gsi (&gsi,
1970 fold_convert (TREE_TYPE (control_name), nit_1),
1971 false, NULL_TREE, false, GSI_SAME_STMT);
1972 stmt = gimple_build_assign (control_name, nit_1);
1973 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
1976 /* Create the parallel constructs for LOOP as described in gen_parallel_loop.
1977 LOOP_FN and DATA are the arguments of GIMPLE_OMP_PARALLEL.
1978 NEW_DATA is the variable that should be initialized from the argument
1979 of LOOP_FN. N_THREADS is the requested number of threads. Returns the
1980 basic block containing GIMPLE_OMP_PARALLEL tree. */
1982 static basic_block
1983 create_parallel_loop (struct loop *loop, tree loop_fn, tree data,
1984 tree new_data, unsigned n_threads, location_t loc)
1986 gimple_stmt_iterator gsi;
1987 basic_block bb, paral_bb, for_bb, ex_bb, continue_bb;
1988 tree t, param;
1989 gomp_parallel *omp_par_stmt;
1990 gimple omp_return_stmt1, omp_return_stmt2;
1991 gimple phi;
1992 gcond *cond_stmt;
1993 gomp_for *for_stmt;
1994 gomp_continue *omp_cont_stmt;
1995 tree cvar, cvar_init, initvar, cvar_next, cvar_base, type;
1996 edge exit, nexit, guard, end, e;
1998 /* Prepare the GIMPLE_OMP_PARALLEL statement. */
1999 bb = loop_preheader_edge (loop)->src;
2000 paral_bb = single_pred (bb);
2001 gsi = gsi_last_bb (paral_bb);
2003 t = build_omp_clause (loc, OMP_CLAUSE_NUM_THREADS);
2004 OMP_CLAUSE_NUM_THREADS_EXPR (t)
2005 = build_int_cst (integer_type_node, n_threads);
2006 omp_par_stmt = gimple_build_omp_parallel (NULL, t, loop_fn, data);
2007 gimple_set_location (omp_par_stmt, loc);
2009 gsi_insert_after (&gsi, omp_par_stmt, GSI_NEW_STMT);
2011 /* Initialize NEW_DATA. */
2012 if (data)
2014 gassign *assign_stmt;
2016 gsi = gsi_after_labels (bb);
2018 param = make_ssa_name (DECL_ARGUMENTS (loop_fn));
2019 assign_stmt = gimple_build_assign (param, build_fold_addr_expr (data));
2020 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
2022 assign_stmt = gimple_build_assign (new_data,
2023 fold_convert (TREE_TYPE (new_data), param));
2024 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
2027 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_PARALLEL. */
2028 bb = split_loop_exit_edge (single_dom_exit (loop));
2029 gsi = gsi_last_bb (bb);
2030 omp_return_stmt1 = gimple_build_omp_return (false);
2031 gimple_set_location (omp_return_stmt1, loc);
2032 gsi_insert_after (&gsi, omp_return_stmt1, GSI_NEW_STMT);
2034 /* Extract data for GIMPLE_OMP_FOR. */
2035 gcc_assert (loop->header == single_dom_exit (loop)->src);
2036 cond_stmt = as_a <gcond *> (last_stmt (loop->header));
2038 cvar = gimple_cond_lhs (cond_stmt);
2039 cvar_base = SSA_NAME_VAR (cvar);
2040 phi = SSA_NAME_DEF_STMT (cvar);
2041 cvar_init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
2042 initvar = copy_ssa_name (cvar);
2043 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, loop_preheader_edge (loop)),
2044 initvar);
2045 cvar_next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
2047 gsi = gsi_last_nondebug_bb (loop->latch);
2048 gcc_assert (gsi_stmt (gsi) == SSA_NAME_DEF_STMT (cvar_next));
2049 gsi_remove (&gsi, true);
2051 /* Prepare cfg. */
2052 for_bb = split_edge (loop_preheader_edge (loop));
2053 ex_bb = split_loop_exit_edge (single_dom_exit (loop));
2054 extract_true_false_edges_from_block (loop->header, &nexit, &exit);
2055 gcc_assert (exit == single_dom_exit (loop));
2057 guard = make_edge (for_bb, ex_bb, 0);
2058 /* Split the latch edge, so LOOPS_HAVE_SIMPLE_LATCHES is still valid. */
2059 loop->latch = split_edge (single_succ_edge (loop->latch));
2060 single_pred_edge (loop->latch)->flags = 0;
2061 end = make_edge (single_pred (loop->latch), ex_bb, EDGE_FALLTHRU);
2062 rescan_loop_exit (end, true, false);
2064 for (gphi_iterator gpi = gsi_start_phis (ex_bb);
2065 !gsi_end_p (gpi); gsi_next (&gpi))
2067 source_location locus;
2068 gphi *phi = gpi.phi ();
2069 tree def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2070 gimple def_stmt = SSA_NAME_DEF_STMT (def);
2072 /* If the exit phi is not connected to a header phi in the same loop, this
2073 value is not modified in the loop, and we're done with this phi. */
2074 if (!(gimple_code (def_stmt) == GIMPLE_PHI
2075 && gimple_bb (def_stmt) == loop->header))
2076 continue;
2078 gphi *stmt = as_a <gphi *> (def_stmt);
2079 def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_preheader_edge (loop));
2080 locus = gimple_phi_arg_location_from_edge (stmt,
2081 loop_preheader_edge (loop));
2082 add_phi_arg (phi, def, guard, locus);
2084 def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (loop));
2085 locus = gimple_phi_arg_location_from_edge (stmt, loop_latch_edge (loop));
2086 add_phi_arg (phi, def, end, locus);
2088 e = redirect_edge_and_branch (exit, nexit->dest);
2089 PENDING_STMT (e) = NULL;
2091 /* Emit GIMPLE_OMP_FOR. */
2092 gimple_cond_set_lhs (cond_stmt, cvar_base);
2093 type = TREE_TYPE (cvar);
2094 t = build_omp_clause (loc, OMP_CLAUSE_SCHEDULE);
2095 OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_STATIC;
2096 int chunk_size = PARAM_VALUE (PARAM_PARLOOPS_CHUNK_SIZE);
2097 if (chunk_size != 0)
2098 OMP_CLAUSE_SCHEDULE_CHUNK_EXPR (t)
2099 = build_int_cst (integer_type_node, chunk_size);
2101 for_stmt = gimple_build_omp_for (NULL, GF_OMP_FOR_KIND_FOR, t, 1, NULL);
2102 gimple_set_location (for_stmt, loc);
2103 gimple_omp_for_set_index (for_stmt, 0, initvar);
2104 gimple_omp_for_set_initial (for_stmt, 0, cvar_init);
2105 gimple_omp_for_set_final (for_stmt, 0, gimple_cond_rhs (cond_stmt));
2106 gimple_omp_for_set_cond (for_stmt, 0, gimple_cond_code (cond_stmt));
2107 gimple_omp_for_set_incr (for_stmt, 0, build2 (PLUS_EXPR, type,
2108 cvar_base,
2109 build_int_cst (type, 1)));
2111 gsi = gsi_last_bb (for_bb);
2112 gsi_insert_after (&gsi, for_stmt, GSI_NEW_STMT);
2113 SSA_NAME_DEF_STMT (initvar) = for_stmt;
2115 /* Emit GIMPLE_OMP_CONTINUE. */
2116 continue_bb = single_pred (loop->latch);
2117 gsi = gsi_last_bb (continue_bb);
2118 omp_cont_stmt = gimple_build_omp_continue (cvar_next, cvar);
2119 gimple_set_location (omp_cont_stmt, loc);
2120 gsi_insert_after (&gsi, omp_cont_stmt, GSI_NEW_STMT);
2121 SSA_NAME_DEF_STMT (cvar_next) = omp_cont_stmt;
2123 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_FOR. */
2124 gsi = gsi_last_bb (ex_bb);
2125 omp_return_stmt2 = gimple_build_omp_return (true);
2126 gimple_set_location (omp_return_stmt2, loc);
2127 gsi_insert_after (&gsi, omp_return_stmt2, GSI_NEW_STMT);
2129 /* After the above dom info is hosed. Re-compute it. */
2130 free_dominance_info (CDI_DOMINATORS);
2131 calculate_dominance_info (CDI_DOMINATORS);
2133 return paral_bb;
2136 /* Generates code to execute the iterations of LOOP in N_THREADS
2137 threads in parallel.
2139 NITER describes number of iterations of LOOP.
2140 REDUCTION_LIST describes the reductions existent in the LOOP. */
2142 static void
2143 gen_parallel_loop (struct loop *loop,
2144 reduction_info_table_type *reduction_list,
2145 unsigned n_threads, struct tree_niter_desc *niter)
2147 tree many_iterations_cond, type, nit;
2148 tree arg_struct, new_arg_struct;
2149 gimple_seq stmts;
2150 edge entry, exit;
2151 struct clsn_data clsn_data;
2152 unsigned prob;
2153 location_t loc;
2154 gimple cond_stmt;
2155 unsigned int m_p_thread=2;
2157 /* From
2159 ---------------------------------------------------------------------
2160 loop
2162 IV = phi (INIT, IV + STEP)
2163 BODY1;
2164 if (COND)
2165 break;
2166 BODY2;
2168 ---------------------------------------------------------------------
2170 with # of iterations NITER (possibly with MAY_BE_ZERO assumption),
2171 we generate the following code:
2173 ---------------------------------------------------------------------
2175 if (MAY_BE_ZERO
2176 || NITER < MIN_PER_THREAD * N_THREADS)
2177 goto original;
2179 BODY1;
2180 store all local loop-invariant variables used in body of the loop to DATA.
2181 GIMPLE_OMP_PARALLEL (OMP_CLAUSE_NUM_THREADS (N_THREADS), LOOPFN, DATA);
2182 load the variables from DATA.
2183 GIMPLE_OMP_FOR (IV = INIT; COND; IV += STEP) (OMP_CLAUSE_SCHEDULE (static))
2184 BODY2;
2185 BODY1;
2186 GIMPLE_OMP_CONTINUE;
2187 GIMPLE_OMP_RETURN -- GIMPLE_OMP_FOR
2188 GIMPLE_OMP_RETURN -- GIMPLE_OMP_PARALLEL
2189 goto end;
2191 original:
2192 loop
2194 IV = phi (INIT, IV + STEP)
2195 BODY1;
2196 if (COND)
2197 break;
2198 BODY2;
2201 end:
2205 /* Create two versions of the loop -- in the old one, we know that the
2206 number of iterations is large enough, and we will transform it into the
2207 loop that will be split to loop_fn, the new one will be used for the
2208 remaining iterations. */
2210 /* We should compute a better number-of-iterations value for outer loops.
2211 That is, if we have
2213 for (i = 0; i < n; ++i)
2214 for (j = 0; j < m; ++j)
2217 we should compute nit = n * m, not nit = n.
2218 Also may_be_zero handling would need to be adjusted. */
2220 type = TREE_TYPE (niter->niter);
2221 nit = force_gimple_operand (unshare_expr (niter->niter), &stmts, true,
2222 NULL_TREE);
2223 if (stmts)
2224 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2226 if (loop->inner)
2227 m_p_thread=2;
2228 else
2229 m_p_thread=MIN_PER_THREAD;
2231 many_iterations_cond =
2232 fold_build2 (GE_EXPR, boolean_type_node,
2233 nit, build_int_cst (type, m_p_thread * n_threads));
2235 many_iterations_cond
2236 = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2237 invert_truthvalue (unshare_expr (niter->may_be_zero)),
2238 many_iterations_cond);
2239 many_iterations_cond
2240 = force_gimple_operand (many_iterations_cond, &stmts, false, NULL_TREE);
2241 if (stmts)
2242 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2243 if (!is_gimple_condexpr (many_iterations_cond))
2245 many_iterations_cond
2246 = force_gimple_operand (many_iterations_cond, &stmts,
2247 true, NULL_TREE);
2248 if (stmts)
2249 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2252 initialize_original_copy_tables ();
2254 /* We assume that the loop usually iterates a lot. */
2255 prob = 4 * REG_BR_PROB_BASE / 5;
2256 loop_version (loop, many_iterations_cond, NULL,
2257 prob, prob, REG_BR_PROB_BASE - prob, true);
2258 update_ssa (TODO_update_ssa);
2259 free_original_copy_tables ();
2261 /* Base all the induction variables in LOOP on a single control one. */
2262 canonicalize_loop_ivs (loop, &nit, true);
2264 /* Ensure that the exit condition is the first statement in the loop.
2265 The common case is that latch of the loop is empty (apart from the
2266 increment) and immediately follows the loop exit test. Attempt to move the
2267 entry of the loop directly before the exit check and increase the number of
2268 iterations of the loop by one. */
2269 if (try_transform_to_exit_first_loop_alt (loop, reduction_list, nit))
2271 if (dump_file
2272 && (dump_flags & TDF_DETAILS))
2273 fprintf (dump_file,
2274 "alternative exit-first loop transform succeeded"
2275 " for loop %d\n", loop->num);
2277 else
2279 /* Fall back on the method that handles more cases, but duplicates the
2280 loop body: move the exit condition of LOOP to the beginning of its
2281 header, and duplicate the part of the last iteration that gets disabled
2282 to the exit of the loop. */
2283 transform_to_exit_first_loop (loop, reduction_list, nit);
2286 /* Generate initializations for reductions. */
2287 if (reduction_list->elements () > 0)
2288 reduction_list->traverse <struct loop *, initialize_reductions> (loop);
2290 /* Eliminate the references to local variables from the loop. */
2291 gcc_assert (single_exit (loop));
2292 entry = loop_preheader_edge (loop);
2293 exit = single_dom_exit (loop);
2295 eliminate_local_variables (entry, exit);
2296 /* In the old loop, move all variables non-local to the loop to a structure
2297 and back, and create separate decls for the variables used in loop. */
2298 separate_decls_in_region (entry, exit, reduction_list, &arg_struct,
2299 &new_arg_struct, &clsn_data);
2301 /* Create the parallel constructs. */
2302 loc = UNKNOWN_LOCATION;
2303 cond_stmt = last_stmt (loop->header);
2304 if (cond_stmt)
2305 loc = gimple_location (cond_stmt);
2306 create_parallel_loop (loop, create_loop_fn (loc), arg_struct,
2307 new_arg_struct, n_threads, loc);
2308 if (reduction_list->elements () > 0)
2309 create_call_for_reduction (loop, reduction_list, &clsn_data);
2311 scev_reset ();
2313 /* Free loop bound estimations that could contain references to
2314 removed statements. */
2315 FOR_EACH_LOOP (loop, 0)
2316 free_numbers_of_iterations_estimates_loop (loop);
2319 /* Returns true when LOOP contains vector phi nodes. */
2321 static bool
2322 loop_has_vector_phi_nodes (struct loop *loop ATTRIBUTE_UNUSED)
2324 unsigned i;
2325 basic_block *bbs = get_loop_body_in_dom_order (loop);
2326 gphi_iterator gsi;
2327 bool res = true;
2329 for (i = 0; i < loop->num_nodes; i++)
2330 for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
2331 if (TREE_CODE (TREE_TYPE (PHI_RESULT (gsi.phi ()))) == VECTOR_TYPE)
2332 goto end;
2334 res = false;
2335 end:
2336 free (bbs);
2337 return res;
2340 /* Create a reduction_info struct, initialize it with REDUC_STMT
2341 and PHI, insert it to the REDUCTION_LIST. */
2343 static void
2344 build_new_reduction (reduction_info_table_type *reduction_list,
2345 gimple reduc_stmt, gphi *phi)
2347 reduction_info **slot;
2348 struct reduction_info *new_reduction;
2349 enum tree_code reduction_code;
2351 gcc_assert (reduc_stmt);
2353 if (dump_file && (dump_flags & TDF_DETAILS))
2355 fprintf (dump_file,
2356 "Detected reduction. reduction stmt is: \n");
2357 print_gimple_stmt (dump_file, reduc_stmt, 0, 0);
2358 fprintf (dump_file, "\n");
2361 if (gimple_code (reduc_stmt) == GIMPLE_PHI)
2363 tree op1 = PHI_ARG_DEF (reduc_stmt, 0);
2364 gimple def1 = SSA_NAME_DEF_STMT (op1);
2365 reduction_code = gimple_assign_rhs_code (def1);
2368 else
2369 reduction_code = gimple_assign_rhs_code (reduc_stmt);
2371 new_reduction = XCNEW (struct reduction_info);
2373 new_reduction->reduc_stmt = reduc_stmt;
2374 new_reduction->reduc_phi = phi;
2375 new_reduction->reduc_version = SSA_NAME_VERSION (gimple_phi_result (phi));
2376 new_reduction->reduction_code = reduction_code;
2377 slot = reduction_list->find_slot (new_reduction, INSERT);
2378 *slot = new_reduction;
2381 /* Callback for htab_traverse. Sets gimple_uid of reduc_phi stmts. */
2384 set_reduc_phi_uids (reduction_info **slot, void *data ATTRIBUTE_UNUSED)
2386 struct reduction_info *const red = *slot;
2387 gimple_set_uid (red->reduc_phi, red->reduc_version);
2388 return 1;
2391 /* Detect all reductions in the LOOP, insert them into REDUCTION_LIST. */
2393 static void
2394 gather_scalar_reductions (loop_p loop, reduction_info_table_type *reduction_list)
2396 gphi_iterator gsi;
2397 loop_vec_info simple_loop_info;
2398 loop_vec_info simple_inner_loop_info = NULL;
2399 bool allow_double_reduc = true;
2401 simple_loop_info = vect_analyze_loop_form (loop);
2402 if (simple_loop_info == NULL)
2403 return;
2405 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
2407 gphi *phi = gsi.phi ();
2408 affine_iv iv;
2409 tree res = PHI_RESULT (phi);
2410 bool double_reduc;
2412 if (virtual_operand_p (res))
2413 continue;
2415 if (simple_iv (loop, loop, res, &iv, true))
2416 continue;
2418 gimple reduc_stmt
2419 = vect_force_simple_reduction (simple_loop_info, phi, true,
2420 &double_reduc, true);
2421 if (!reduc_stmt)
2422 continue;
2424 if (double_reduc)
2426 if (!allow_double_reduc
2427 || loop->inner->inner != NULL)
2428 continue;
2430 if (!simple_inner_loop_info)
2432 simple_inner_loop_info = vect_analyze_loop_form (loop->inner);
2433 if (!simple_inner_loop_info)
2435 allow_double_reduc = false;
2436 continue;
2440 use_operand_p use_p;
2441 gimple inner_stmt;
2442 bool single_use_p = single_imm_use (res, &use_p, &inner_stmt);
2443 gcc_assert (single_use_p);
2444 gphi *inner_phi = as_a <gphi *> (inner_stmt);
2445 if (simple_iv (loop->inner, loop->inner, PHI_RESULT (inner_phi),
2446 &iv, true))
2447 continue;
2449 gimple inner_reduc_stmt
2450 = vect_force_simple_reduction (simple_inner_loop_info, inner_phi,
2451 true, &double_reduc, true);
2452 gcc_assert (!double_reduc);
2453 if (inner_reduc_stmt == NULL)
2454 continue;
2457 build_new_reduction (reduction_list, reduc_stmt, phi);
2459 destroy_loop_vec_info (simple_loop_info, true);
2460 destroy_loop_vec_info (simple_inner_loop_info, true);
2462 /* As gimple_uid is used by the vectorizer in between vect_analyze_loop_form
2463 and destroy_loop_vec_info, we can set gimple_uid of reduc_phi stmts
2464 only now. */
2465 reduction_list->traverse <void *, set_reduc_phi_uids> (NULL);
2468 /* Try to initialize NITER for code generation part. */
2470 static bool
2471 try_get_loop_niter (loop_p loop, struct tree_niter_desc *niter)
2473 edge exit = single_dom_exit (loop);
2475 gcc_assert (exit);
2477 /* We need to know # of iterations, and there should be no uses of values
2478 defined inside loop outside of it, unless the values are invariants of
2479 the loop. */
2480 if (!number_of_iterations_exit (loop, exit, niter, false))
2482 if (dump_file && (dump_flags & TDF_DETAILS))
2483 fprintf (dump_file, " FAILED: number of iterations not known\n");
2484 return false;
2487 return true;
2490 /* Try to initialize REDUCTION_LIST for code generation part.
2491 REDUCTION_LIST describes the reductions. */
2493 static bool
2494 try_create_reduction_list (loop_p loop,
2495 reduction_info_table_type *reduction_list)
2497 edge exit = single_dom_exit (loop);
2498 gphi_iterator gsi;
2500 gcc_assert (exit);
2502 gather_scalar_reductions (loop, reduction_list);
2505 for (gsi = gsi_start_phis (exit->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2507 gphi *phi = gsi.phi ();
2508 struct reduction_info *red;
2509 imm_use_iterator imm_iter;
2510 use_operand_p use_p;
2511 gimple reduc_phi;
2512 tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2514 if (!virtual_operand_p (val))
2516 if (dump_file && (dump_flags & TDF_DETAILS))
2518 fprintf (dump_file, "phi is ");
2519 print_gimple_stmt (dump_file, phi, 0, 0);
2520 fprintf (dump_file, "arg of phi to exit: value ");
2521 print_generic_expr (dump_file, val, 0);
2522 fprintf (dump_file, " used outside loop\n");
2523 fprintf (dump_file,
2524 " checking if it a part of reduction pattern: \n");
2526 if (reduction_list->elements () == 0)
2528 if (dump_file && (dump_flags & TDF_DETAILS))
2529 fprintf (dump_file,
2530 " FAILED: it is not a part of reduction.\n");
2531 return false;
2533 reduc_phi = NULL;
2534 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, val)
2536 if (!gimple_debug_bind_p (USE_STMT (use_p))
2537 && flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
2539 reduc_phi = USE_STMT (use_p);
2540 break;
2543 red = reduction_phi (reduction_list, reduc_phi);
2544 if (red == NULL)
2546 if (dump_file && (dump_flags & TDF_DETAILS))
2547 fprintf (dump_file,
2548 " FAILED: it is not a part of reduction.\n");
2549 return false;
2551 if (dump_file && (dump_flags & TDF_DETAILS))
2553 fprintf (dump_file, "reduction phi is ");
2554 print_gimple_stmt (dump_file, red->reduc_phi, 0, 0);
2555 fprintf (dump_file, "reduction stmt is ");
2556 print_gimple_stmt (dump_file, red->reduc_stmt, 0, 0);
2561 /* The iterations of the loop may communicate only through bivs whose
2562 iteration space can be distributed efficiently. */
2563 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
2565 gphi *phi = gsi.phi ();
2566 tree def = PHI_RESULT (phi);
2567 affine_iv iv;
2569 if (!virtual_operand_p (def) && !simple_iv (loop, loop, def, &iv, true))
2571 struct reduction_info *red;
2573 red = reduction_phi (reduction_list, phi);
2574 if (red == NULL)
2576 if (dump_file && (dump_flags & TDF_DETAILS))
2577 fprintf (dump_file,
2578 " FAILED: scalar dependency between iterations\n");
2579 return false;
2585 return true;
2588 /* Detect parallel loops and generate parallel code using libgomp
2589 primitives. Returns true if some loop was parallelized, false
2590 otherwise. */
2592 static bool
2593 parallelize_loops (void)
2595 unsigned n_threads = flag_tree_parallelize_loops;
2596 bool changed = false;
2597 struct loop *loop;
2598 struct loop *skip_loop = NULL;
2599 struct tree_niter_desc niter_desc;
2600 struct obstack parloop_obstack;
2601 HOST_WIDE_INT estimated;
2602 source_location loop_loc;
2604 /* Do not parallelize loops in the functions created by parallelization. */
2605 if (parallelized_function_p (cfun->decl))
2606 return false;
2607 if (cfun->has_nonlocal_label)
2608 return false;
2610 gcc_obstack_init (&parloop_obstack);
2611 reduction_info_table_type reduction_list (10);
2612 init_stmt_vec_info_vec ();
2614 FOR_EACH_LOOP (loop, 0)
2616 if (loop == skip_loop)
2618 if (dump_file && (dump_flags & TDF_DETAILS))
2619 fprintf (dump_file,
2620 "Skipping loop %d as inner loop of parallelized loop\n",
2621 loop->num);
2623 skip_loop = loop->inner;
2624 continue;
2626 else
2627 skip_loop = NULL;
2629 reduction_list.empty ();
2630 if (dump_file && (dump_flags & TDF_DETAILS))
2632 fprintf (dump_file, "Trying loop %d as candidate\n",loop->num);
2633 if (loop->inner)
2634 fprintf (dump_file, "loop %d is not innermost\n",loop->num);
2635 else
2636 fprintf (dump_file, "loop %d is innermost\n",loop->num);
2639 /* If we use autopar in graphite pass, we use its marked dependency
2640 checking results. */
2641 if (flag_loop_parallelize_all && !loop->can_be_parallel)
2643 if (dump_file && (dump_flags & TDF_DETAILS))
2644 fprintf (dump_file, "loop is not parallel according to graphite\n");
2645 continue;
2648 if (!single_dom_exit (loop))
2651 if (dump_file && (dump_flags & TDF_DETAILS))
2652 fprintf (dump_file, "loop is !single_dom_exit\n");
2654 continue;
2657 if (/* And of course, the loop must be parallelizable. */
2658 !can_duplicate_loop_p (loop)
2659 || loop_has_blocks_with_irreducible_flag (loop)
2660 || (loop_preheader_edge (loop)->src->flags & BB_IRREDUCIBLE_LOOP)
2661 /* FIXME: the check for vector phi nodes could be removed. */
2662 || loop_has_vector_phi_nodes (loop))
2663 continue;
2665 estimated = estimated_stmt_executions_int (loop);
2666 if (estimated == -1)
2667 estimated = max_stmt_executions_int (loop);
2668 /* FIXME: Bypass this check as graphite doesn't update the
2669 count and frequency correctly now. */
2670 if (!flag_loop_parallelize_all
2671 && ((estimated != -1
2672 && estimated <= (HOST_WIDE_INT) n_threads * MIN_PER_THREAD)
2673 /* Do not bother with loops in cold areas. */
2674 || optimize_loop_nest_for_size_p (loop)))
2675 continue;
2677 if (!try_get_loop_niter (loop, &niter_desc))
2678 continue;
2680 if (!try_create_reduction_list (loop, &reduction_list))
2681 continue;
2683 if (!flag_loop_parallelize_all
2684 && !loop_parallel_p (loop, &parloop_obstack))
2685 continue;
2687 changed = true;
2688 skip_loop = loop->inner;
2689 if (dump_file && (dump_flags & TDF_DETAILS))
2691 if (loop->inner)
2692 fprintf (dump_file, "parallelizing outer loop %d\n",loop->header->index);
2693 else
2694 fprintf (dump_file, "parallelizing inner loop %d\n",loop->header->index);
2695 loop_loc = find_loop_location (loop);
2696 if (loop_loc != UNKNOWN_LOCATION)
2697 fprintf (dump_file, "\nloop at %s:%d: ",
2698 LOCATION_FILE (loop_loc), LOCATION_LINE (loop_loc));
2700 gen_parallel_loop (loop, &reduction_list,
2701 n_threads, &niter_desc);
2704 free_stmt_vec_info_vec ();
2705 obstack_free (&parloop_obstack, NULL);
2707 /* Parallelization will cause new function calls to be inserted through
2708 which local variables will escape. Reset the points-to solution
2709 for ESCAPED. */
2710 if (changed)
2711 pt_solution_reset (&cfun->gimple_df->escaped);
2713 return changed;
2716 /* Parallelization. */
2718 namespace {
2720 const pass_data pass_data_parallelize_loops =
2722 GIMPLE_PASS, /* type */
2723 "parloops", /* name */
2724 OPTGROUP_LOOP, /* optinfo_flags */
2725 TV_TREE_PARALLELIZE_LOOPS, /* tv_id */
2726 ( PROP_cfg | PROP_ssa ), /* properties_required */
2727 0, /* properties_provided */
2728 0, /* properties_destroyed */
2729 0, /* todo_flags_start */
2730 0, /* todo_flags_finish */
2733 class pass_parallelize_loops : public gimple_opt_pass
2735 public:
2736 pass_parallelize_loops (gcc::context *ctxt)
2737 : gimple_opt_pass (pass_data_parallelize_loops, ctxt)
2740 /* opt_pass methods: */
2741 virtual bool gate (function *) { return flag_tree_parallelize_loops > 1; }
2742 virtual unsigned int execute (function *);
2744 }; // class pass_parallelize_loops
2746 unsigned
2747 pass_parallelize_loops::execute (function *fun)
2749 if (number_of_loops (fun) <= 1)
2750 return 0;
2752 if (parallelize_loops ())
2754 fun->curr_properties &= ~(PROP_gimple_eomp);
2756 #ifdef ENABLE_CHECKING
2757 verify_loop_structure ();
2758 #endif
2760 return TODO_update_ssa;
2763 return 0;
2766 } // anon namespace
2768 gimple_opt_pass *
2769 make_pass_parallelize_loops (gcc::context *ctxt)
2771 return new pass_parallelize_loops (ctxt);