2015-06-11 Paul Thomas <pault@gcc.gnu.org>
[official-gcc.git] / gcc / tree-parloops.c
blob3495ac19f26f9966d1da7f41e94bc046cfecf80b
1 /* Loop autoparallelization.
2 Copyright (C) 2006-2015 Free Software Foundation, Inc.
3 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
4 Zdenek Dvorak <dvorakz@suse.cz> and Razya Ladelsky <razya@il.ibm.com>.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "input.h"
26 #include "alias.h"
27 #include "symtab.h"
28 #include "options.h"
29 #include "tree.h"
30 #include "fold-const.h"
31 #include "predict.h"
32 #include "tm.h"
33 #include "hard-reg-set.h"
34 #include "input.h"
35 #include "function.h"
36 #include "dominance.h"
37 #include "cfg.h"
38 #include "basic-block.h"
39 #include "tree-ssa-alias.h"
40 #include "internal-fn.h"
41 #include "gimple-expr.h"
42 #include "is-a.h"
43 #include "gimple.h"
44 #include "gimplify.h"
45 #include "gimple-iterator.h"
46 #include "gimplify-me.h"
47 #include "gimple-walk.h"
48 #include "stor-layout.h"
49 #include "tree-nested.h"
50 #include "gimple-ssa.h"
51 #include "tree-cfg.h"
52 #include "tree-phinodes.h"
53 #include "ssa-iterators.h"
54 #include "stringpool.h"
55 #include "tree-ssanames.h"
56 #include "tree-ssa-loop-ivopts.h"
57 #include "tree-ssa-loop-manip.h"
58 #include "tree-ssa-loop-niter.h"
59 #include "tree-ssa-loop.h"
60 #include "tree-into-ssa.h"
61 #include "cfgloop.h"
62 #include "tree-data-ref.h"
63 #include "tree-scalar-evolution.h"
64 #include "gimple-pretty-print.h"
65 #include "tree-pass.h"
66 #include "langhooks.h"
67 #include "tree-vectorizer.h"
68 #include "tree-hasher.h"
69 #include "tree-parloops.h"
70 #include "omp-low.h"
71 #include "tree-nested.h"
72 #include "plugin-api.h"
73 #include "ipa-ref.h"
74 #include "cgraph.h"
75 #include "tree-ssa.h"
77 /* This pass tries to distribute iterations of loops into several threads.
78 The implementation is straightforward -- for each loop we test whether its
79 iterations are independent, and if it is the case (and some additional
80 conditions regarding profitability and correctness are satisfied), we
81 add GIMPLE_OMP_PARALLEL and GIMPLE_OMP_FOR codes and let omp expansion
82 machinery do its job.
84 The most of the complexity is in bringing the code into shape expected
85 by the omp expanders:
86 -- for GIMPLE_OMP_FOR, ensuring that the loop has only one induction
87 variable and that the exit test is at the start of the loop body
88 -- for GIMPLE_OMP_PARALLEL, replacing the references to local addressable
89 variables by accesses through pointers, and breaking up ssa chains
90 by storing the values incoming to the parallelized loop to a structure
91 passed to the new function as an argument (something similar is done
92 in omp gimplification, unfortunately only a small part of the code
93 can be shared).
95 TODO:
96 -- if there are several parallelizable loops in a function, it may be
97 possible to generate the threads just once (using synchronization to
98 ensure that cross-loop dependences are obeyed).
99 -- handling of common reduction patterns for outer loops.
101 More info can also be found at http://gcc.gnu.org/wiki/AutoParInGCC */
103 Reduction handling:
104 currently we use vect_force_simple_reduction() to detect reduction patterns.
105 The code transformation will be introduced by an example.
108 parloop
110 int sum=1;
112 for (i = 0; i < N; i++)
114 x[i] = i + 3;
115 sum+=x[i];
119 gimple-like code:
120 header_bb:
122 # sum_29 = PHI <sum_11(5), 1(3)>
123 # i_28 = PHI <i_12(5), 0(3)>
124 D.1795_8 = i_28 + 3;
125 x[i_28] = D.1795_8;
126 sum_11 = D.1795_8 + sum_29;
127 i_12 = i_28 + 1;
128 if (N_6(D) > i_12)
129 goto header_bb;
132 exit_bb:
134 # sum_21 = PHI <sum_11(4)>
135 printf (&"%d"[0], sum_21);
138 after reduction transformation (only relevant parts):
140 parloop
143 ....
146 # Storing the initial value given by the user. #
148 .paral_data_store.32.sum.27 = 1;
150 #pragma omp parallel num_threads(4)
152 #pragma omp for schedule(static)
154 # The neutral element corresponding to the particular
155 reduction's operation, e.g. 0 for PLUS_EXPR,
156 1 for MULT_EXPR, etc. replaces the user's initial value. #
158 # sum.27_29 = PHI <sum.27_11, 0>
160 sum.27_11 = D.1827_8 + sum.27_29;
162 GIMPLE_OMP_CONTINUE
164 # Adding this reduction phi is done at create_phi_for_local_result() #
165 # sum.27_56 = PHI <sum.27_11, 0>
166 GIMPLE_OMP_RETURN
168 # Creating the atomic operation is done at
169 create_call_for_reduction_1() #
171 #pragma omp atomic_load
172 D.1839_59 = *&.paral_data_load.33_51->reduction.23;
173 D.1840_60 = sum.27_56 + D.1839_59;
174 #pragma omp atomic_store (D.1840_60);
176 GIMPLE_OMP_RETURN
178 # collecting the result after the join of the threads is done at
179 create_loads_for_reductions().
180 The value computed by the threads is loaded from the
181 shared struct. #
184 .paral_data_load.33_52 = &.paral_data_store.32;
185 sum_37 = .paral_data_load.33_52->sum.27;
186 sum_43 = D.1795_41 + sum_37;
188 exit bb:
189 # sum_21 = PHI <sum_43, sum_26>
190 printf (&"%d"[0], sum_21);
198 /* Minimal number of iterations of a loop that should be executed in each
199 thread. */
200 #define MIN_PER_THREAD 100
202 /* Element of the hashtable, representing a
203 reduction in the current loop. */
204 struct reduction_info
206 gimple reduc_stmt; /* reduction statement. */
207 gimple reduc_phi; /* The phi node defining the reduction. */
208 enum tree_code reduction_code;/* code for the reduction operation. */
209 unsigned reduc_version; /* SSA_NAME_VERSION of original reduc_phi
210 result. */
211 gphi *keep_res; /* The PHI_RESULT of this phi is the resulting value
212 of the reduction variable when existing the loop. */
213 tree initial_value; /* The initial value of the reduction var before entering the loop. */
214 tree field; /* the name of the field in the parloop data structure intended for reduction. */
215 tree init; /* reduction initialization value. */
216 gphi *new_phi; /* (helper field) Newly created phi node whose result
217 will be passed to the atomic operation. Represents
218 the local result each thread computed for the reduction
219 operation. */
222 /* Reduction info hashtable helpers. */
224 struct reduction_hasher : typed_free_remove <reduction_info>
226 typedef reduction_info *value_type;
227 typedef reduction_info *compare_type;
228 static inline hashval_t hash (const reduction_info *);
229 static inline bool equal (const reduction_info *, const reduction_info *);
232 /* Equality and hash functions for hashtab code. */
234 inline bool
235 reduction_hasher::equal (const reduction_info *a, const reduction_info *b)
237 return (a->reduc_phi == b->reduc_phi);
240 inline hashval_t
241 reduction_hasher::hash (const reduction_info *a)
243 return a->reduc_version;
246 typedef hash_table<reduction_hasher> reduction_info_table_type;
249 static struct reduction_info *
250 reduction_phi (reduction_info_table_type *reduction_list, gimple phi)
252 struct reduction_info tmpred, *red;
254 if (reduction_list->elements () == 0 || phi == NULL)
255 return NULL;
257 tmpred.reduc_phi = phi;
258 tmpred.reduc_version = gimple_uid (phi);
259 red = reduction_list->find (&tmpred);
261 return red;
264 /* Element of hashtable of names to copy. */
266 struct name_to_copy_elt
268 unsigned version; /* The version of the name to copy. */
269 tree new_name; /* The new name used in the copy. */
270 tree field; /* The field of the structure used to pass the
271 value. */
274 /* Name copies hashtable helpers. */
276 struct name_to_copy_hasher : typed_free_remove <name_to_copy_elt>
278 typedef name_to_copy_elt *value_type;
279 typedef name_to_copy_elt *compare_type;
280 static inline hashval_t hash (const name_to_copy_elt *);
281 static inline bool equal (const name_to_copy_elt *, const name_to_copy_elt *);
284 /* Equality and hash functions for hashtab code. */
286 inline bool
287 name_to_copy_hasher::equal (const name_to_copy_elt *a, const name_to_copy_elt *b)
289 return a->version == b->version;
292 inline hashval_t
293 name_to_copy_hasher::hash (const name_to_copy_elt *a)
295 return (hashval_t) a->version;
298 typedef hash_table<name_to_copy_hasher> name_to_copy_table_type;
300 /* A transformation matrix, which is a self-contained ROWSIZE x COLSIZE
301 matrix. Rather than use floats, we simply keep a single DENOMINATOR that
302 represents the denominator for every element in the matrix. */
303 typedef struct lambda_trans_matrix_s
305 lambda_matrix matrix;
306 int rowsize;
307 int colsize;
308 int denominator;
309 } *lambda_trans_matrix;
310 #define LTM_MATRIX(T) ((T)->matrix)
311 #define LTM_ROWSIZE(T) ((T)->rowsize)
312 #define LTM_COLSIZE(T) ((T)->colsize)
313 #define LTM_DENOMINATOR(T) ((T)->denominator)
315 /* Allocate a new transformation matrix. */
317 static lambda_trans_matrix
318 lambda_trans_matrix_new (int colsize, int rowsize,
319 struct obstack * lambda_obstack)
321 lambda_trans_matrix ret;
323 ret = (lambda_trans_matrix)
324 obstack_alloc (lambda_obstack, sizeof (struct lambda_trans_matrix_s));
325 LTM_MATRIX (ret) = lambda_matrix_new (rowsize, colsize, lambda_obstack);
326 LTM_ROWSIZE (ret) = rowsize;
327 LTM_COLSIZE (ret) = colsize;
328 LTM_DENOMINATOR (ret) = 1;
329 return ret;
332 /* Multiply a vector VEC by a matrix MAT.
333 MAT is an M*N matrix, and VEC is a vector with length N. The result
334 is stored in DEST which must be a vector of length M. */
336 static void
337 lambda_matrix_vector_mult (lambda_matrix matrix, int m, int n,
338 lambda_vector vec, lambda_vector dest)
340 int i, j;
342 lambda_vector_clear (dest, m);
343 for (i = 0; i < m; i++)
344 for (j = 0; j < n; j++)
345 dest[i] += matrix[i][j] * vec[j];
348 /* Return true if TRANS is a legal transformation matrix that respects
349 the dependence vectors in DISTS and DIRS. The conservative answer
350 is false.
352 "Wolfe proves that a unimodular transformation represented by the
353 matrix T is legal when applied to a loop nest with a set of
354 lexicographically non-negative distance vectors RDG if and only if
355 for each vector d in RDG, (T.d >= 0) is lexicographically positive.
356 i.e.: if and only if it transforms the lexicographically positive
357 distance vectors to lexicographically positive vectors. Note that
358 a unimodular matrix must transform the zero vector (and only it) to
359 the zero vector." S.Muchnick. */
361 static bool
362 lambda_transform_legal_p (lambda_trans_matrix trans,
363 int nb_loops,
364 vec<ddr_p> dependence_relations)
366 unsigned int i, j;
367 lambda_vector distres;
368 struct data_dependence_relation *ddr;
370 gcc_assert (LTM_COLSIZE (trans) == nb_loops
371 && LTM_ROWSIZE (trans) == nb_loops);
373 /* When there are no dependences, the transformation is correct. */
374 if (dependence_relations.length () == 0)
375 return true;
377 ddr = dependence_relations[0];
378 if (ddr == NULL)
379 return true;
381 /* When there is an unknown relation in the dependence_relations, we
382 know that it is no worth looking at this loop nest: give up. */
383 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
384 return false;
386 distres = lambda_vector_new (nb_loops);
388 /* For each distance vector in the dependence graph. */
389 FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
391 /* Don't care about relations for which we know that there is no
392 dependence, nor about read-read (aka. output-dependences):
393 these data accesses can happen in any order. */
394 if (DDR_ARE_DEPENDENT (ddr) == chrec_known
395 || (DR_IS_READ (DDR_A (ddr)) && DR_IS_READ (DDR_B (ddr))))
396 continue;
398 /* Conservatively answer: "this transformation is not valid". */
399 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
400 return false;
402 /* If the dependence could not be captured by a distance vector,
403 conservatively answer that the transform is not valid. */
404 if (DDR_NUM_DIST_VECTS (ddr) == 0)
405 return false;
407 /* Compute trans.dist_vect */
408 for (j = 0; j < DDR_NUM_DIST_VECTS (ddr); j++)
410 lambda_matrix_vector_mult (LTM_MATRIX (trans), nb_loops, nb_loops,
411 DDR_DIST_VECT (ddr, j), distres);
413 if (!lambda_vector_lexico_pos (distres, nb_loops))
414 return false;
417 return true;
420 /* Data dependency analysis. Returns true if the iterations of LOOP
421 are independent on each other (that is, if we can execute them
422 in parallel). */
424 static bool
425 loop_parallel_p (struct loop *loop, struct obstack * parloop_obstack)
427 vec<ddr_p> dependence_relations;
428 vec<data_reference_p> datarefs;
429 lambda_trans_matrix trans;
430 bool ret = false;
432 if (dump_file && (dump_flags & TDF_DETAILS))
434 fprintf (dump_file, "Considering loop %d\n", loop->num);
435 if (!loop->inner)
436 fprintf (dump_file, "loop is innermost\n");
437 else
438 fprintf (dump_file, "loop NOT innermost\n");
441 /* Check for problems with dependences. If the loop can be reversed,
442 the iterations are independent. */
443 auto_vec<loop_p, 3> loop_nest;
444 datarefs.create (10);
445 dependence_relations.create (100);
446 if (! compute_data_dependences_for_loop (loop, true, &loop_nest, &datarefs,
447 &dependence_relations))
449 if (dump_file && (dump_flags & TDF_DETAILS))
450 fprintf (dump_file, " FAILED: cannot analyze data dependencies\n");
451 ret = false;
452 goto end;
454 if (dump_file && (dump_flags & TDF_DETAILS))
455 dump_data_dependence_relations (dump_file, dependence_relations);
457 trans = lambda_trans_matrix_new (1, 1, parloop_obstack);
458 LTM_MATRIX (trans)[0][0] = -1;
460 if (lambda_transform_legal_p (trans, 1, dependence_relations))
462 ret = true;
463 if (dump_file && (dump_flags & TDF_DETAILS))
464 fprintf (dump_file, " SUCCESS: may be parallelized\n");
466 else if (dump_file && (dump_flags & TDF_DETAILS))
467 fprintf (dump_file,
468 " FAILED: data dependencies exist across iterations\n");
470 end:
471 free_dependence_relations (dependence_relations);
472 free_data_refs (datarefs);
474 return ret;
477 /* Return true when LOOP contains basic blocks marked with the
478 BB_IRREDUCIBLE_LOOP flag. */
480 static inline bool
481 loop_has_blocks_with_irreducible_flag (struct loop *loop)
483 unsigned i;
484 basic_block *bbs = get_loop_body_in_dom_order (loop);
485 bool res = true;
487 for (i = 0; i < loop->num_nodes; i++)
488 if (bbs[i]->flags & BB_IRREDUCIBLE_LOOP)
489 goto end;
491 res = false;
492 end:
493 free (bbs);
494 return res;
497 /* Assigns the address of OBJ in TYPE to an ssa name, and returns this name.
498 The assignment statement is placed on edge ENTRY. DECL_ADDRESS maps decls
499 to their addresses that can be reused. The address of OBJ is known to
500 be invariant in the whole function. Other needed statements are placed
501 right before GSI. */
503 static tree
504 take_address_of (tree obj, tree type, edge entry,
505 int_tree_htab_type *decl_address, gimple_stmt_iterator *gsi)
507 int uid;
508 tree *var_p, name, addr;
509 gassign *stmt;
510 gimple_seq stmts;
512 /* Since the address of OBJ is invariant, the trees may be shared.
513 Avoid rewriting unrelated parts of the code. */
514 obj = unshare_expr (obj);
515 for (var_p = &obj;
516 handled_component_p (*var_p);
517 var_p = &TREE_OPERAND (*var_p, 0))
518 continue;
520 /* Canonicalize the access to base on a MEM_REF. */
521 if (DECL_P (*var_p))
522 *var_p = build_simple_mem_ref (build_fold_addr_expr (*var_p));
524 /* Assign a canonical SSA name to the address of the base decl used
525 in the address and share it for all accesses and addresses based
526 on it. */
527 uid = DECL_UID (TREE_OPERAND (TREE_OPERAND (*var_p, 0), 0));
528 int_tree_map elt;
529 elt.uid = uid;
530 int_tree_map *slot = decl_address->find_slot (elt, INSERT);
531 if (!slot->to)
533 if (gsi == NULL)
534 return NULL;
535 addr = TREE_OPERAND (*var_p, 0);
536 const char *obj_name
537 = get_name (TREE_OPERAND (TREE_OPERAND (*var_p, 0), 0));
538 if (obj_name)
539 name = make_temp_ssa_name (TREE_TYPE (addr), NULL, obj_name);
540 else
541 name = make_ssa_name (TREE_TYPE (addr));
542 stmt = gimple_build_assign (name, addr);
543 gsi_insert_on_edge_immediate (entry, stmt);
545 slot->uid = uid;
546 slot->to = name;
548 else
549 name = slot->to;
551 /* Express the address in terms of the canonical SSA name. */
552 TREE_OPERAND (*var_p, 0) = name;
553 if (gsi == NULL)
554 return build_fold_addr_expr_with_type (obj, type);
556 name = force_gimple_operand (build_addr (obj, current_function_decl),
557 &stmts, true, NULL_TREE);
558 if (!gimple_seq_empty_p (stmts))
559 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
561 if (!useless_type_conversion_p (type, TREE_TYPE (name)))
563 name = force_gimple_operand (fold_convert (type, name), &stmts, true,
564 NULL_TREE);
565 if (!gimple_seq_empty_p (stmts))
566 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
569 return name;
572 /* Callback for htab_traverse. Create the initialization statement
573 for reduction described in SLOT, and place it at the preheader of
574 the loop described in DATA. */
577 initialize_reductions (reduction_info **slot, struct loop *loop)
579 tree init, c;
580 tree bvar, type, arg;
581 edge e;
583 struct reduction_info *const reduc = *slot;
585 /* Create initialization in preheader:
586 reduction_variable = initialization value of reduction. */
588 /* In the phi node at the header, replace the argument coming
589 from the preheader with the reduction initialization value. */
591 /* Create a new variable to initialize the reduction. */
592 type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
593 bvar = create_tmp_var (type, "reduction");
595 c = build_omp_clause (gimple_location (reduc->reduc_stmt),
596 OMP_CLAUSE_REDUCTION);
597 OMP_CLAUSE_REDUCTION_CODE (c) = reduc->reduction_code;
598 OMP_CLAUSE_DECL (c) = SSA_NAME_VAR (gimple_assign_lhs (reduc->reduc_stmt));
600 init = omp_reduction_init (c, TREE_TYPE (bvar));
601 reduc->init = init;
603 /* Replace the argument representing the initialization value
604 with the initialization value for the reduction (neutral
605 element for the particular operation, e.g. 0 for PLUS_EXPR,
606 1 for MULT_EXPR, etc).
607 Keep the old value in a new variable "reduction_initial",
608 that will be taken in consideration after the parallel
609 computing is done. */
611 e = loop_preheader_edge (loop);
612 arg = PHI_ARG_DEF_FROM_EDGE (reduc->reduc_phi, e);
613 /* Create new variable to hold the initial value. */
615 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE
616 (reduc->reduc_phi, loop_preheader_edge (loop)), init);
617 reduc->initial_value = arg;
618 return 1;
621 struct elv_data
623 struct walk_stmt_info info;
624 edge entry;
625 int_tree_htab_type *decl_address;
626 gimple_stmt_iterator *gsi;
627 bool changed;
628 bool reset;
631 /* Eliminates references to local variables in *TP out of the single
632 entry single exit region starting at DTA->ENTRY.
633 DECL_ADDRESS contains addresses of the references that had their
634 address taken already. If the expression is changed, CHANGED is
635 set to true. Callback for walk_tree. */
637 static tree
638 eliminate_local_variables_1 (tree *tp, int *walk_subtrees, void *data)
640 struct elv_data *const dta = (struct elv_data *) data;
641 tree t = *tp, var, addr, addr_type, type, obj;
643 if (DECL_P (t))
645 *walk_subtrees = 0;
647 if (!SSA_VAR_P (t) || DECL_EXTERNAL (t))
648 return NULL_TREE;
650 type = TREE_TYPE (t);
651 addr_type = build_pointer_type (type);
652 addr = take_address_of (t, addr_type, dta->entry, dta->decl_address,
653 dta->gsi);
654 if (dta->gsi == NULL && addr == NULL_TREE)
656 dta->reset = true;
657 return NULL_TREE;
660 *tp = build_simple_mem_ref (addr);
662 dta->changed = true;
663 return NULL_TREE;
666 if (TREE_CODE (t) == ADDR_EXPR)
668 /* ADDR_EXPR may appear in two contexts:
669 -- as a gimple operand, when the address taken is a function invariant
670 -- as gimple rhs, when the resulting address in not a function
671 invariant
672 We do not need to do anything special in the latter case (the base of
673 the memory reference whose address is taken may be replaced in the
674 DECL_P case). The former case is more complicated, as we need to
675 ensure that the new address is still a gimple operand. Thus, it
676 is not sufficient to replace just the base of the memory reference --
677 we need to move the whole computation of the address out of the
678 loop. */
679 if (!is_gimple_val (t))
680 return NULL_TREE;
682 *walk_subtrees = 0;
683 obj = TREE_OPERAND (t, 0);
684 var = get_base_address (obj);
685 if (!var || !SSA_VAR_P (var) || DECL_EXTERNAL (var))
686 return NULL_TREE;
688 addr_type = TREE_TYPE (t);
689 addr = take_address_of (obj, addr_type, dta->entry, dta->decl_address,
690 dta->gsi);
691 if (dta->gsi == NULL && addr == NULL_TREE)
693 dta->reset = true;
694 return NULL_TREE;
696 *tp = addr;
698 dta->changed = true;
699 return NULL_TREE;
702 if (!EXPR_P (t))
703 *walk_subtrees = 0;
705 return NULL_TREE;
708 /* Moves the references to local variables in STMT at *GSI out of the single
709 entry single exit region starting at ENTRY. DECL_ADDRESS contains
710 addresses of the references that had their address taken
711 already. */
713 static void
714 eliminate_local_variables_stmt (edge entry, gimple_stmt_iterator *gsi,
715 int_tree_htab_type *decl_address)
717 struct elv_data dta;
718 gimple stmt = gsi_stmt (*gsi);
720 memset (&dta.info, '\0', sizeof (dta.info));
721 dta.entry = entry;
722 dta.decl_address = decl_address;
723 dta.changed = false;
724 dta.reset = false;
726 if (gimple_debug_bind_p (stmt))
728 dta.gsi = NULL;
729 walk_tree (gimple_debug_bind_get_value_ptr (stmt),
730 eliminate_local_variables_1, &dta.info, NULL);
731 if (dta.reset)
733 gimple_debug_bind_reset_value (stmt);
734 dta.changed = true;
737 else if (gimple_clobber_p (stmt))
739 stmt = gimple_build_nop ();
740 gsi_replace (gsi, stmt, false);
741 dta.changed = true;
743 else
745 dta.gsi = gsi;
746 walk_gimple_op (stmt, eliminate_local_variables_1, &dta.info);
749 if (dta.changed)
750 update_stmt (stmt);
753 /* Eliminates the references to local variables from the single entry
754 single exit region between the ENTRY and EXIT edges.
756 This includes:
757 1) Taking address of a local variable -- these are moved out of the
758 region (and temporary variable is created to hold the address if
759 necessary).
761 2) Dereferencing a local variable -- these are replaced with indirect
762 references. */
764 static void
765 eliminate_local_variables (edge entry, edge exit)
767 basic_block bb;
768 auto_vec<basic_block, 3> body;
769 unsigned i;
770 gimple_stmt_iterator gsi;
771 bool has_debug_stmt = false;
772 int_tree_htab_type decl_address (10);
773 basic_block entry_bb = entry->src;
774 basic_block exit_bb = exit->dest;
776 gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
778 FOR_EACH_VEC_ELT (body, i, bb)
779 if (bb != entry_bb && bb != exit_bb)
780 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
781 if (is_gimple_debug (gsi_stmt (gsi)))
783 if (gimple_debug_bind_p (gsi_stmt (gsi)))
784 has_debug_stmt = true;
786 else
787 eliminate_local_variables_stmt (entry, &gsi, &decl_address);
789 if (has_debug_stmt)
790 FOR_EACH_VEC_ELT (body, i, bb)
791 if (bb != entry_bb && bb != exit_bb)
792 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
793 if (gimple_debug_bind_p (gsi_stmt (gsi)))
794 eliminate_local_variables_stmt (entry, &gsi, &decl_address);
797 /* Returns true if expression EXPR is not defined between ENTRY and
798 EXIT, i.e. if all its operands are defined outside of the region. */
800 static bool
801 expr_invariant_in_region_p (edge entry, edge exit, tree expr)
803 basic_block entry_bb = entry->src;
804 basic_block exit_bb = exit->dest;
805 basic_block def_bb;
807 if (is_gimple_min_invariant (expr))
808 return true;
810 if (TREE_CODE (expr) == SSA_NAME)
812 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
813 if (def_bb
814 && dominated_by_p (CDI_DOMINATORS, def_bb, entry_bb)
815 && !dominated_by_p (CDI_DOMINATORS, def_bb, exit_bb))
816 return false;
818 return true;
821 return false;
824 /* If COPY_NAME_P is true, creates and returns a duplicate of NAME.
825 The copies are stored to NAME_COPIES, if NAME was already duplicated,
826 its duplicate stored in NAME_COPIES is returned.
828 Regardless of COPY_NAME_P, the decl used as a base of the ssa name is also
829 duplicated, storing the copies in DECL_COPIES. */
831 static tree
832 separate_decls_in_region_name (tree name, name_to_copy_table_type *name_copies,
833 int_tree_htab_type *decl_copies,
834 bool copy_name_p)
836 tree copy, var, var_copy;
837 unsigned idx, uid, nuid;
838 struct int_tree_map ielt;
839 struct name_to_copy_elt elt, *nelt;
840 name_to_copy_elt **slot;
841 int_tree_map *dslot;
843 if (TREE_CODE (name) != SSA_NAME)
844 return name;
846 idx = SSA_NAME_VERSION (name);
847 elt.version = idx;
848 slot = name_copies->find_slot_with_hash (&elt, idx,
849 copy_name_p ? INSERT : NO_INSERT);
850 if (slot && *slot)
851 return (*slot)->new_name;
853 if (copy_name_p)
855 copy = duplicate_ssa_name (name, NULL);
856 nelt = XNEW (struct name_to_copy_elt);
857 nelt->version = idx;
858 nelt->new_name = copy;
859 nelt->field = NULL_TREE;
860 *slot = nelt;
862 else
864 gcc_assert (!slot);
865 copy = name;
868 var = SSA_NAME_VAR (name);
869 if (!var)
870 return copy;
872 uid = DECL_UID (var);
873 ielt.uid = uid;
874 dslot = decl_copies->find_slot_with_hash (ielt, uid, INSERT);
875 if (!dslot->to)
877 var_copy = create_tmp_var (TREE_TYPE (var), get_name (var));
878 DECL_GIMPLE_REG_P (var_copy) = DECL_GIMPLE_REG_P (var);
879 dslot->uid = uid;
880 dslot->to = var_copy;
882 /* Ensure that when we meet this decl next time, we won't duplicate
883 it again. */
884 nuid = DECL_UID (var_copy);
885 ielt.uid = nuid;
886 dslot = decl_copies->find_slot_with_hash (ielt, nuid, INSERT);
887 gcc_assert (!dslot->to);
888 dslot->uid = nuid;
889 dslot->to = var_copy;
891 else
892 var_copy = dslot->to;
894 replace_ssa_name_symbol (copy, var_copy);
895 return copy;
898 /* Finds the ssa names used in STMT that are defined outside the
899 region between ENTRY and EXIT and replaces such ssa names with
900 their duplicates. The duplicates are stored to NAME_COPIES. Base
901 decls of all ssa names used in STMT (including those defined in
902 LOOP) are replaced with the new temporary variables; the
903 replacement decls are stored in DECL_COPIES. */
905 static void
906 separate_decls_in_region_stmt (edge entry, edge exit, gimple stmt,
907 name_to_copy_table_type *name_copies,
908 int_tree_htab_type *decl_copies)
910 use_operand_p use;
911 def_operand_p def;
912 ssa_op_iter oi;
913 tree name, copy;
914 bool copy_name_p;
916 FOR_EACH_PHI_OR_STMT_DEF (def, stmt, oi, SSA_OP_DEF)
918 name = DEF_FROM_PTR (def);
919 gcc_assert (TREE_CODE (name) == SSA_NAME);
920 copy = separate_decls_in_region_name (name, name_copies, decl_copies,
921 false);
922 gcc_assert (copy == name);
925 FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
927 name = USE_FROM_PTR (use);
928 if (TREE_CODE (name) != SSA_NAME)
929 continue;
931 copy_name_p = expr_invariant_in_region_p (entry, exit, name);
932 copy = separate_decls_in_region_name (name, name_copies, decl_copies,
933 copy_name_p);
934 SET_USE (use, copy);
938 /* Finds the ssa names used in STMT that are defined outside the
939 region between ENTRY and EXIT and replaces such ssa names with
940 their duplicates. The duplicates are stored to NAME_COPIES. Base
941 decls of all ssa names used in STMT (including those defined in
942 LOOP) are replaced with the new temporary variables; the
943 replacement decls are stored in DECL_COPIES. */
945 static bool
946 separate_decls_in_region_debug (gimple stmt,
947 name_to_copy_table_type *name_copies,
948 int_tree_htab_type *decl_copies)
950 use_operand_p use;
951 ssa_op_iter oi;
952 tree var, name;
953 struct int_tree_map ielt;
954 struct name_to_copy_elt elt;
955 name_to_copy_elt **slot;
956 int_tree_map *dslot;
958 if (gimple_debug_bind_p (stmt))
959 var = gimple_debug_bind_get_var (stmt);
960 else if (gimple_debug_source_bind_p (stmt))
961 var = gimple_debug_source_bind_get_var (stmt);
962 else
963 return true;
964 if (TREE_CODE (var) == DEBUG_EXPR_DECL || TREE_CODE (var) == LABEL_DECL)
965 return true;
966 gcc_assert (DECL_P (var) && SSA_VAR_P (var));
967 ielt.uid = DECL_UID (var);
968 dslot = decl_copies->find_slot_with_hash (ielt, ielt.uid, NO_INSERT);
969 if (!dslot)
970 return true;
971 if (gimple_debug_bind_p (stmt))
972 gimple_debug_bind_set_var (stmt, dslot->to);
973 else if (gimple_debug_source_bind_p (stmt))
974 gimple_debug_source_bind_set_var (stmt, dslot->to);
976 FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
978 name = USE_FROM_PTR (use);
979 if (TREE_CODE (name) != SSA_NAME)
980 continue;
982 elt.version = SSA_NAME_VERSION (name);
983 slot = name_copies->find_slot_with_hash (&elt, elt.version, NO_INSERT);
984 if (!slot)
986 gimple_debug_bind_reset_value (stmt);
987 update_stmt (stmt);
988 break;
991 SET_USE (use, (*slot)->new_name);
994 return false;
997 /* Callback for htab_traverse. Adds a field corresponding to the reduction
998 specified in SLOT. The type is passed in DATA. */
1001 add_field_for_reduction (reduction_info **slot, tree type)
1004 struct reduction_info *const red = *slot;
1005 tree var = gimple_assign_lhs (red->reduc_stmt);
1006 tree field = build_decl (gimple_location (red->reduc_stmt), FIELD_DECL,
1007 SSA_NAME_IDENTIFIER (var), TREE_TYPE (var));
1009 insert_field_into_struct (type, field);
1011 red->field = field;
1013 return 1;
1016 /* Callback for htab_traverse. Adds a field corresponding to a ssa name
1017 described in SLOT. The type is passed in DATA. */
1020 add_field_for_name (name_to_copy_elt **slot, tree type)
1022 struct name_to_copy_elt *const elt = *slot;
1023 tree name = ssa_name (elt->version);
1024 tree field = build_decl (UNKNOWN_LOCATION,
1025 FIELD_DECL, SSA_NAME_IDENTIFIER (name),
1026 TREE_TYPE (name));
1028 insert_field_into_struct (type, field);
1029 elt->field = field;
1031 return 1;
1034 /* Callback for htab_traverse. A local result is the intermediate result
1035 computed by a single
1036 thread, or the initial value in case no iteration was executed.
1037 This function creates a phi node reflecting these values.
1038 The phi's result will be stored in NEW_PHI field of the
1039 reduction's data structure. */
1042 create_phi_for_local_result (reduction_info **slot, struct loop *loop)
1044 struct reduction_info *const reduc = *slot;
1045 edge e;
1046 gphi *new_phi;
1047 basic_block store_bb;
1048 tree local_res;
1049 source_location locus;
1051 /* STORE_BB is the block where the phi
1052 should be stored. It is the destination of the loop exit.
1053 (Find the fallthru edge from GIMPLE_OMP_CONTINUE). */
1054 store_bb = FALLTHRU_EDGE (loop->latch)->dest;
1056 /* STORE_BB has two predecessors. One coming from the loop
1057 (the reduction's result is computed at the loop),
1058 and another coming from a block preceding the loop,
1059 when no iterations
1060 are executed (the initial value should be taken). */
1061 if (EDGE_PRED (store_bb, 0) == FALLTHRU_EDGE (loop->latch))
1062 e = EDGE_PRED (store_bb, 1);
1063 else
1064 e = EDGE_PRED (store_bb, 0);
1065 local_res = copy_ssa_name (gimple_assign_lhs (reduc->reduc_stmt));
1066 locus = gimple_location (reduc->reduc_stmt);
1067 new_phi = create_phi_node (local_res, store_bb);
1068 add_phi_arg (new_phi, reduc->init, e, locus);
1069 add_phi_arg (new_phi, gimple_assign_lhs (reduc->reduc_stmt),
1070 FALLTHRU_EDGE (loop->latch), locus);
1071 reduc->new_phi = new_phi;
1073 return 1;
1076 struct clsn_data
1078 tree store;
1079 tree load;
1081 basic_block store_bb;
1082 basic_block load_bb;
1085 /* Callback for htab_traverse. Create an atomic instruction for the
1086 reduction described in SLOT.
1087 DATA annotates the place in memory the atomic operation relates to,
1088 and the basic block it needs to be generated in. */
1091 create_call_for_reduction_1 (reduction_info **slot, struct clsn_data *clsn_data)
1093 struct reduction_info *const reduc = *slot;
1094 gimple_stmt_iterator gsi;
1095 tree type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
1096 tree load_struct;
1097 basic_block bb;
1098 basic_block new_bb;
1099 edge e;
1100 tree t, addr, ref, x;
1101 tree tmp_load, name;
1102 gimple load;
1104 load_struct = build_simple_mem_ref (clsn_data->load);
1105 t = build3 (COMPONENT_REF, type, load_struct, reduc->field, NULL_TREE);
1107 addr = build_addr (t, current_function_decl);
1109 /* Create phi node. */
1110 bb = clsn_data->load_bb;
1112 gsi = gsi_last_bb (bb);
1113 e = split_block (bb, gsi_stmt (gsi));
1114 new_bb = e->dest;
1116 tmp_load = create_tmp_var (TREE_TYPE (TREE_TYPE (addr)));
1117 tmp_load = make_ssa_name (tmp_load);
1118 load = gimple_build_omp_atomic_load (tmp_load, addr);
1119 SSA_NAME_DEF_STMT (tmp_load) = load;
1120 gsi = gsi_start_bb (new_bb);
1121 gsi_insert_after (&gsi, load, GSI_NEW_STMT);
1123 e = split_block (new_bb, load);
1124 new_bb = e->dest;
1125 gsi = gsi_start_bb (new_bb);
1126 ref = tmp_load;
1127 x = fold_build2 (reduc->reduction_code,
1128 TREE_TYPE (PHI_RESULT (reduc->new_phi)), ref,
1129 PHI_RESULT (reduc->new_phi));
1131 name = force_gimple_operand_gsi (&gsi, x, true, NULL_TREE, true,
1132 GSI_CONTINUE_LINKING);
1134 gsi_insert_after (&gsi, gimple_build_omp_atomic_store (name), GSI_NEW_STMT);
1135 return 1;
1138 /* Create the atomic operation at the join point of the threads.
1139 REDUCTION_LIST describes the reductions in the LOOP.
1140 LD_ST_DATA describes the shared data structure where
1141 shared data is stored in and loaded from. */
1142 static void
1143 create_call_for_reduction (struct loop *loop,
1144 reduction_info_table_type *reduction_list,
1145 struct clsn_data *ld_st_data)
1147 reduction_list->traverse <struct loop *, create_phi_for_local_result> (loop);
1148 /* Find the fallthru edge from GIMPLE_OMP_CONTINUE. */
1149 ld_st_data->load_bb = FALLTHRU_EDGE (loop->latch)->dest;
1150 reduction_list
1151 ->traverse <struct clsn_data *, create_call_for_reduction_1> (ld_st_data);
1154 /* Callback for htab_traverse. Loads the final reduction value at the
1155 join point of all threads, and inserts it in the right place. */
1158 create_loads_for_reductions (reduction_info **slot, struct clsn_data *clsn_data)
1160 struct reduction_info *const red = *slot;
1161 gimple stmt;
1162 gimple_stmt_iterator gsi;
1163 tree type = TREE_TYPE (gimple_assign_lhs (red->reduc_stmt));
1164 tree load_struct;
1165 tree name;
1166 tree x;
1168 gsi = gsi_after_labels (clsn_data->load_bb);
1169 load_struct = build_simple_mem_ref (clsn_data->load);
1170 load_struct = build3 (COMPONENT_REF, type, load_struct, red->field,
1171 NULL_TREE);
1173 x = load_struct;
1174 name = PHI_RESULT (red->keep_res);
1175 stmt = gimple_build_assign (name, x);
1177 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1179 for (gsi = gsi_start_phis (gimple_bb (red->keep_res));
1180 !gsi_end_p (gsi); gsi_next (&gsi))
1181 if (gsi_stmt (gsi) == red->keep_res)
1183 remove_phi_node (&gsi, false);
1184 return 1;
1186 gcc_unreachable ();
1189 /* Load the reduction result that was stored in LD_ST_DATA.
1190 REDUCTION_LIST describes the list of reductions that the
1191 loads should be generated for. */
1192 static void
1193 create_final_loads_for_reduction (reduction_info_table_type *reduction_list,
1194 struct clsn_data *ld_st_data)
1196 gimple_stmt_iterator gsi;
1197 tree t;
1198 gimple stmt;
1200 gsi = gsi_after_labels (ld_st_data->load_bb);
1201 t = build_fold_addr_expr (ld_st_data->store);
1202 stmt = gimple_build_assign (ld_st_data->load, t);
1204 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
1206 reduction_list
1207 ->traverse <struct clsn_data *, create_loads_for_reductions> (ld_st_data);
1211 /* Callback for htab_traverse. Store the neutral value for the
1212 particular reduction's operation, e.g. 0 for PLUS_EXPR,
1213 1 for MULT_EXPR, etc. into the reduction field.
1214 The reduction is specified in SLOT. The store information is
1215 passed in DATA. */
1218 create_stores_for_reduction (reduction_info **slot, struct clsn_data *clsn_data)
1220 struct reduction_info *const red = *slot;
1221 tree t;
1222 gimple stmt;
1223 gimple_stmt_iterator gsi;
1224 tree type = TREE_TYPE (gimple_assign_lhs (red->reduc_stmt));
1226 gsi = gsi_last_bb (clsn_data->store_bb);
1227 t = build3 (COMPONENT_REF, type, clsn_data->store, red->field, NULL_TREE);
1228 stmt = gimple_build_assign (t, red->initial_value);
1229 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1231 return 1;
1234 /* Callback for htab_traverse. Creates loads to a field of LOAD in LOAD_BB and
1235 store to a field of STORE in STORE_BB for the ssa name and its duplicate
1236 specified in SLOT. */
1239 create_loads_and_stores_for_name (name_to_copy_elt **slot,
1240 struct clsn_data *clsn_data)
1242 struct name_to_copy_elt *const elt = *slot;
1243 tree t;
1244 gimple stmt;
1245 gimple_stmt_iterator gsi;
1246 tree type = TREE_TYPE (elt->new_name);
1247 tree load_struct;
1249 gsi = gsi_last_bb (clsn_data->store_bb);
1250 t = build3 (COMPONENT_REF, type, clsn_data->store, elt->field, NULL_TREE);
1251 stmt = gimple_build_assign (t, ssa_name (elt->version));
1252 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1254 gsi = gsi_last_bb (clsn_data->load_bb);
1255 load_struct = build_simple_mem_ref (clsn_data->load);
1256 t = build3 (COMPONENT_REF, type, load_struct, elt->field, NULL_TREE);
1257 stmt = gimple_build_assign (elt->new_name, t);
1258 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1260 return 1;
1263 /* Moves all the variables used in LOOP and defined outside of it (including
1264 the initial values of loop phi nodes, and *PER_THREAD if it is a ssa
1265 name) to a structure created for this purpose. The code
1267 while (1)
1269 use (a);
1270 use (b);
1273 is transformed this way:
1275 bb0:
1276 old.a = a;
1277 old.b = b;
1279 bb1:
1280 a' = new->a;
1281 b' = new->b;
1282 while (1)
1284 use (a');
1285 use (b');
1288 `old' is stored to *ARG_STRUCT and `new' is stored to NEW_ARG_STRUCT. The
1289 pointer `new' is intentionally not initialized (the loop will be split to a
1290 separate function later, and `new' will be initialized from its arguments).
1291 LD_ST_DATA holds information about the shared data structure used to pass
1292 information among the threads. It is initialized here, and
1293 gen_parallel_loop will pass it to create_call_for_reduction that
1294 needs this information. REDUCTION_LIST describes the reductions
1295 in LOOP. */
1297 static void
1298 separate_decls_in_region (edge entry, edge exit,
1299 reduction_info_table_type *reduction_list,
1300 tree *arg_struct, tree *new_arg_struct,
1301 struct clsn_data *ld_st_data)
1304 basic_block bb1 = split_edge (entry);
1305 basic_block bb0 = single_pred (bb1);
1306 name_to_copy_table_type name_copies (10);
1307 int_tree_htab_type decl_copies (10);
1308 unsigned i;
1309 tree type, type_name, nvar;
1310 gimple_stmt_iterator gsi;
1311 struct clsn_data clsn_data;
1312 auto_vec<basic_block, 3> body;
1313 basic_block bb;
1314 basic_block entry_bb = bb1;
1315 basic_block exit_bb = exit->dest;
1316 bool has_debug_stmt = false;
1318 entry = single_succ_edge (entry_bb);
1319 gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
1321 FOR_EACH_VEC_ELT (body, i, bb)
1323 if (bb != entry_bb && bb != exit_bb)
1325 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1326 separate_decls_in_region_stmt (entry, exit, gsi_stmt (gsi),
1327 &name_copies, &decl_copies);
1329 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1331 gimple stmt = gsi_stmt (gsi);
1333 if (is_gimple_debug (stmt))
1334 has_debug_stmt = true;
1335 else
1336 separate_decls_in_region_stmt (entry, exit, stmt,
1337 &name_copies, &decl_copies);
1342 /* Now process debug bind stmts. We must not create decls while
1343 processing debug stmts, so we defer their processing so as to
1344 make sure we will have debug info for as many variables as
1345 possible (all of those that were dealt with in the loop above),
1346 and discard those for which we know there's nothing we can
1347 do. */
1348 if (has_debug_stmt)
1349 FOR_EACH_VEC_ELT (body, i, bb)
1350 if (bb != entry_bb && bb != exit_bb)
1352 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
1354 gimple stmt = gsi_stmt (gsi);
1356 if (is_gimple_debug (stmt))
1358 if (separate_decls_in_region_debug (stmt, &name_copies,
1359 &decl_copies))
1361 gsi_remove (&gsi, true);
1362 continue;
1366 gsi_next (&gsi);
1370 if (name_copies.elements () == 0 && reduction_list->elements () == 0)
1372 /* It may happen that there is nothing to copy (if there are only
1373 loop carried and external variables in the loop). */
1374 *arg_struct = NULL;
1375 *new_arg_struct = NULL;
1377 else
1379 /* Create the type for the structure to store the ssa names to. */
1380 type = lang_hooks.types.make_type (RECORD_TYPE);
1381 type_name = build_decl (UNKNOWN_LOCATION,
1382 TYPE_DECL, create_tmp_var_name (".paral_data"),
1383 type);
1384 TYPE_NAME (type) = type_name;
1386 name_copies.traverse <tree, add_field_for_name> (type);
1387 if (reduction_list && reduction_list->elements () > 0)
1389 /* Create the fields for reductions. */
1390 reduction_list->traverse <tree, add_field_for_reduction> (type);
1392 layout_type (type);
1394 /* Create the loads and stores. */
1395 *arg_struct = create_tmp_var (type, ".paral_data_store");
1396 nvar = create_tmp_var (build_pointer_type (type), ".paral_data_load");
1397 *new_arg_struct = make_ssa_name (nvar);
1399 ld_st_data->store = *arg_struct;
1400 ld_st_data->load = *new_arg_struct;
1401 ld_st_data->store_bb = bb0;
1402 ld_st_data->load_bb = bb1;
1404 name_copies
1405 .traverse <struct clsn_data *, create_loads_and_stores_for_name>
1406 (ld_st_data);
1408 /* Load the calculation from memory (after the join of the threads). */
1410 if (reduction_list && reduction_list->elements () > 0)
1412 reduction_list
1413 ->traverse <struct clsn_data *, create_stores_for_reduction>
1414 (ld_st_data);
1415 clsn_data.load = make_ssa_name (nvar);
1416 clsn_data.load_bb = exit->dest;
1417 clsn_data.store = ld_st_data->store;
1418 create_final_loads_for_reduction (reduction_list, &clsn_data);
1423 /* Returns true if FN was created to run in parallel. */
1425 bool
1426 parallelized_function_p (tree fndecl)
1428 cgraph_node *node = cgraph_node::get (fndecl);
1429 gcc_assert (node != NULL);
1430 return node->parallelized_function;
1433 /* Creates and returns an empty function that will receive the body of
1434 a parallelized loop. */
1436 static tree
1437 create_loop_fn (location_t loc)
1439 char buf[100];
1440 char *tname;
1441 tree decl, type, name, t;
1442 struct function *act_cfun = cfun;
1443 static unsigned loopfn_num;
1445 loc = LOCATION_LOCUS (loc);
1446 snprintf (buf, 100, "%s.$loopfn", current_function_name ());
1447 ASM_FORMAT_PRIVATE_NAME (tname, buf, loopfn_num++);
1448 clean_symbol_name (tname);
1449 name = get_identifier (tname);
1450 type = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
1452 decl = build_decl (loc, FUNCTION_DECL, name, type);
1453 TREE_STATIC (decl) = 1;
1454 TREE_USED (decl) = 1;
1455 DECL_ARTIFICIAL (decl) = 1;
1456 DECL_IGNORED_P (decl) = 0;
1457 TREE_PUBLIC (decl) = 0;
1458 DECL_UNINLINABLE (decl) = 1;
1459 DECL_EXTERNAL (decl) = 0;
1460 DECL_CONTEXT (decl) = NULL_TREE;
1461 DECL_INITIAL (decl) = make_node (BLOCK);
1463 t = build_decl (loc, RESULT_DECL, NULL_TREE, void_type_node);
1464 DECL_ARTIFICIAL (t) = 1;
1465 DECL_IGNORED_P (t) = 1;
1466 DECL_RESULT (decl) = t;
1468 t = build_decl (loc, PARM_DECL, get_identifier (".paral_data_param"),
1469 ptr_type_node);
1470 DECL_ARTIFICIAL (t) = 1;
1471 DECL_ARG_TYPE (t) = ptr_type_node;
1472 DECL_CONTEXT (t) = decl;
1473 TREE_USED (t) = 1;
1474 DECL_ARGUMENTS (decl) = t;
1476 allocate_struct_function (decl, false);
1478 /* The call to allocate_struct_function clobbers CFUN, so we need to restore
1479 it. */
1480 set_cfun (act_cfun);
1482 return decl;
1485 /* Replace uses of NAME by VAL in block BB. */
1487 static void
1488 replace_uses_in_bb_by (tree name, tree val, basic_block bb)
1490 gimple use_stmt;
1491 imm_use_iterator imm_iter;
1493 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, name)
1495 if (gimple_bb (use_stmt) != bb)
1496 continue;
1498 use_operand_p use_p;
1499 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1500 SET_USE (use_p, val);
1504 /* Replace uses of NAME by VAL in blocks BBS. */
1506 static void
1507 replace_uses_in_bbs_by (tree name, tree val, bitmap bbs)
1509 gimple use_stmt;
1510 imm_use_iterator imm_iter;
1512 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, name)
1514 if (!bitmap_bit_p (bbs, gimple_bb (use_stmt)->index))
1515 continue;
1517 use_operand_p use_p;
1518 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1519 SET_USE (use_p, val);
1523 /* Do transformation from:
1525 <bb preheader>:
1527 goto <bb header>
1529 <bb header>:
1530 ivtmp_a = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
1531 sum_a = PHI <sum_init (preheader), sum_b (latch)>
1533 use (ivtmp_a)
1535 sum_b = sum_a + sum_update
1537 if (ivtmp_a < n)
1538 goto <bb latch>;
1539 else
1540 goto <bb exit>;
1542 <bb latch>:
1543 ivtmp_b = ivtmp_a + 1;
1544 goto <bb header>
1546 <bb exit>:
1547 sum_z = PHI <sum_b (cond[1])>
1549 [1] Where <bb cond> is single_pred (bb latch); In the simplest case,
1550 that's <bb header>.
1554 <bb preheader>:
1556 goto <bb newheader>
1558 <bb header>:
1559 ivtmp_a = PHI <ivtmp_c (latch)>
1560 sum_a = PHI <sum_c (latch)>
1562 use (ivtmp_a)
1564 sum_b = sum_a + sum_update
1566 goto <bb latch>;
1568 <bb newheader>:
1569 ivtmp_c = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
1570 sum_c = PHI <sum_init (preheader), sum_b (latch)>
1571 if (ivtmp_c < n + 1)
1572 goto <bb header>;
1573 else
1574 goto <bb exit>;
1576 <bb latch>:
1577 ivtmp_b = ivtmp_a + 1;
1578 goto <bb newheader>
1580 <bb exit>:
1581 sum_z = PHI <sum_c (newheader)>
1584 In unified diff format:
1586 <bb preheader>:
1588 - goto <bb header>
1589 + goto <bb newheader>
1591 <bb header>:
1592 - ivtmp_a = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
1593 - sum_a = PHI <sum_init (preheader), sum_b (latch)>
1594 + ivtmp_a = PHI <ivtmp_c (latch)>
1595 + sum_a = PHI <sum_c (latch)>
1597 use (ivtmp_a)
1599 sum_b = sum_a + sum_update
1601 - if (ivtmp_a < n)
1602 - goto <bb latch>;
1603 + goto <bb latch>;
1605 + <bb newheader>:
1606 + ivtmp_c = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
1607 + sum_c = PHI <sum_init (preheader), sum_b (latch)>
1608 + if (ivtmp_c < n + 1)
1609 + goto <bb header>;
1610 else
1611 goto <bb exit>;
1613 <bb latch>:
1614 ivtmp_b = ivtmp_a + 1;
1615 - goto <bb header>
1616 + goto <bb newheader>
1618 <bb exit>:
1619 - sum_z = PHI <sum_b (cond[1])>
1620 + sum_z = PHI <sum_c (newheader)>
1622 Note: the example does not show any virtual phis, but these are handled more
1623 or less as reductions.
1626 Moves the exit condition of LOOP to the beginning of its header.
1627 REDUCTION_LIST describes the reductions in LOOP. BOUND is the new loop
1628 bound. */
1630 static void
1631 transform_to_exit_first_loop_alt (struct loop *loop,
1632 reduction_info_table_type *reduction_list,
1633 tree bound)
1635 basic_block header = loop->header;
1636 basic_block latch = loop->latch;
1637 edge exit = single_dom_exit (loop);
1638 basic_block exit_block = exit->dest;
1639 gcond *cond_stmt = as_a <gcond *> (last_stmt (exit->src));
1640 tree control = gimple_cond_lhs (cond_stmt);
1641 edge e;
1643 /* Gather the bbs dominated by the exit block. */
1644 bitmap exit_dominated = BITMAP_ALLOC (NULL);
1645 bitmap_set_bit (exit_dominated, exit_block->index);
1646 vec<basic_block> exit_dominated_vec
1647 = get_dominated_by (CDI_DOMINATORS, exit_block);
1649 int i;
1650 basic_block dom_bb;
1651 FOR_EACH_VEC_ELT (exit_dominated_vec, i, dom_bb)
1652 bitmap_set_bit (exit_dominated, dom_bb->index);
1654 exit_dominated_vec.release ();
1656 /* Create the new_header block. */
1657 basic_block new_header = split_block_before_cond_jump (exit->src);
1658 edge split_edge = single_pred_edge (new_header);
1660 /* Redirect entry edge to new_header. */
1661 edge entry = loop_preheader_edge (loop);
1662 e = redirect_edge_and_branch (entry, new_header);
1663 gcc_assert (e == entry);
1665 /* Redirect post_inc_edge to new_header. */
1666 edge post_inc_edge = single_succ_edge (latch);
1667 e = redirect_edge_and_branch (post_inc_edge, new_header);
1668 gcc_assert (e == post_inc_edge);
1670 /* Redirect post_cond_edge to header. */
1671 edge post_cond_edge = single_pred_edge (latch);
1672 e = redirect_edge_and_branch (post_cond_edge, header);
1673 gcc_assert (e == post_cond_edge);
1675 /* Redirect split_edge to latch. */
1676 e = redirect_edge_and_branch (split_edge, latch);
1677 gcc_assert (e == split_edge);
1679 /* Set the new loop bound. */
1680 gimple_cond_set_rhs (cond_stmt, bound);
1682 /* Repair the ssa. */
1683 vec<edge_var_map> *v = redirect_edge_var_map_vector (post_inc_edge);
1684 edge_var_map *vm;
1685 gphi_iterator gsi;
1686 for (gsi = gsi_start_phis (header), i = 0;
1687 !gsi_end_p (gsi) && v->iterate (i, &vm);
1688 gsi_next (&gsi), i++)
1690 gphi *phi = gsi.phi ();
1691 tree res_a = PHI_RESULT (phi);
1693 /* Create new phi. */
1694 tree res_c = copy_ssa_name (res_a, phi);
1695 gphi *nphi = create_phi_node (res_c, new_header);
1697 /* Replace ivtmp_a with ivtmp_c in condition 'if (ivtmp_a < n)'. */
1698 replace_uses_in_bb_by (res_a, res_c, new_header);
1700 /* Replace ivtmp/sum_b with ivtmp/sum_c in header phi. */
1701 add_phi_arg (phi, res_c, post_cond_edge, UNKNOWN_LOCATION);
1703 /* Replace sum_b with sum_c in exit phi. Loop-closed ssa does not hold
1704 for virtuals, so we cannot get away with exit_block only. */
1705 tree res_b = redirect_edge_var_map_def (vm);
1706 replace_uses_in_bbs_by (res_b, res_c, exit_dominated);
1708 struct reduction_info *red = reduction_phi (reduction_list, phi);
1709 gcc_assert (virtual_operand_p (res_a)
1710 || res_a == control
1711 || red != NULL);
1713 if (red)
1715 /* Register the new reduction phi. */
1716 red->reduc_phi = nphi;
1717 gimple_set_uid (red->reduc_phi, red->reduc_version);
1720 gcc_assert (gsi_end_p (gsi) && !v->iterate (i, &vm));
1721 BITMAP_FREE (exit_dominated);
1723 /* Set the preheader argument of the new phis to ivtmp/sum_init. */
1724 flush_pending_stmts (entry);
1726 /* Set the latch arguments of the new phis to ivtmp/sum_b. */
1727 flush_pending_stmts (post_inc_edge);
1729 /* Register the reduction exit phis. */
1730 for (gphi_iterator gsi = gsi_start_phis (exit_block);
1731 !gsi_end_p (gsi);
1732 gsi_next (&gsi))
1734 gphi *phi = gsi.phi ();
1735 tree res_z = PHI_RESULT (phi);
1736 if (virtual_operand_p (res_z))
1737 continue;
1739 tree res_c = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1740 gimple reduc_phi = SSA_NAME_DEF_STMT (res_c);
1741 struct reduction_info *red = reduction_phi (reduction_list, reduc_phi);
1742 if (red != NULL)
1743 red->keep_res = phi;
1746 /* We're going to cancel the loop at the end of gen_parallel_loop, but until
1747 then we're still using some fields, so only bother about fields that are
1748 still used: header and latch.
1749 The loop has a new header bb, so we update it. The latch bb stays the
1750 same. */
1751 loop->header = new_header;
1753 /* Recalculate dominance info. */
1754 free_dominance_info (CDI_DOMINATORS);
1755 calculate_dominance_info (CDI_DOMINATORS);
1758 /* Tries to moves the exit condition of LOOP to the beginning of its header
1759 without duplication of the loop body. NIT is the number of iterations of the
1760 loop. REDUCTION_LIST describes the reductions in LOOP. Return true if
1761 transformation is successful. */
1763 static bool
1764 try_transform_to_exit_first_loop_alt (struct loop *loop,
1765 reduction_info_table_type *reduction_list,
1766 tree nit)
1768 /* Check whether the latch contains a single statement. */
1769 if (!gimple_seq_nondebug_singleton_p (bb_seq (loop->latch)))
1770 return false;
1772 /* Check whether the latch contains the loop iv increment. */
1773 edge back = single_succ_edge (loop->latch);
1774 edge exit = single_dom_exit (loop);
1775 gcond *cond_stmt = as_a <gcond *> (last_stmt (exit->src));
1776 tree control = gimple_cond_lhs (cond_stmt);
1777 gphi *phi = as_a <gphi *> (SSA_NAME_DEF_STMT (control));
1778 tree inc_res = gimple_phi_arg_def (phi, back->dest_idx);
1779 if (gimple_bb (SSA_NAME_DEF_STMT (inc_res)) != loop->latch)
1780 return false;
1782 /* Check whether there's no code between the loop condition and the latch. */
1783 if (!single_pred_p (loop->latch)
1784 || single_pred (loop->latch) != exit->src)
1785 return false;
1787 tree alt_bound = NULL_TREE;
1788 tree nit_type = TREE_TYPE (nit);
1790 /* Figure out whether nit + 1 overflows. */
1791 if (TREE_CODE (nit) == INTEGER_CST)
1793 if (!tree_int_cst_equal (nit, TYPE_MAXVAL (nit_type)))
1795 alt_bound = fold_build2_loc (UNKNOWN_LOCATION, PLUS_EXPR, nit_type,
1796 nit, build_one_cst (nit_type));
1798 gcc_assert (TREE_CODE (alt_bound) == INTEGER_CST);
1800 else
1802 /* Todo: Figure out if we can trigger this, if it's worth to handle
1803 optimally, and if we can handle it optimally. */
1806 else
1808 gcc_assert (TREE_CODE (nit) == SSA_NAME);
1810 gimple def = SSA_NAME_DEF_STMT (nit);
1812 if (def
1813 && is_gimple_assign (def)
1814 && gimple_assign_rhs_code (def) == PLUS_EXPR)
1816 tree op1 = gimple_assign_rhs1 (def);
1817 tree op2 = gimple_assign_rhs2 (def);
1818 if (integer_minus_onep (op1))
1819 alt_bound = op2;
1820 else if (integer_minus_onep (op2))
1821 alt_bound = op1;
1824 /* There is a number of test-cases for which we don't get an alt_bound
1825 here: they're listed here, with the lhs of the last stmt as the nit:
1827 libgomp.graphite/force-parallel-1.c:
1828 _21 = (signed long) N_6(D);
1829 _19 = _21 + -1;
1830 _7 = (unsigned long) _19;
1832 libgomp.graphite/force-parallel-2.c:
1833 _33 = (signed long) N_9(D);
1834 _16 = _33 + -1;
1835 _37 = (unsigned long) _16;
1837 libgomp.graphite/force-parallel-5.c:
1838 <bb 6>:
1839 # graphite_IV.5_46 = PHI <0(5), graphite_IV.5_47(11)>
1840 <bb 7>:
1841 _33 = (unsigned long) graphite_IV.5_46;
1843 g++.dg/tree-ssa/pr34355.C:
1844 _2 = (unsigned int) i_9;
1845 _3 = 4 - _2;
1847 gcc.dg/pr53849.c:
1848 _5 = d.0_11 + -2;
1849 _18 = (unsigned int) _5;
1851 We will be able to handle some of these cases, if we can determine when
1852 it's safe to look past casts. */
1855 if (alt_bound == NULL_TREE)
1856 return false;
1858 transform_to_exit_first_loop_alt (loop, reduction_list, alt_bound);
1859 return true;
1862 /* Moves the exit condition of LOOP to the beginning of its header. NIT is the
1863 number of iterations of the loop. REDUCTION_LIST describes the reductions in
1864 LOOP. */
1866 static void
1867 transform_to_exit_first_loop (struct loop *loop,
1868 reduction_info_table_type *reduction_list,
1869 tree nit)
1871 basic_block *bbs, *nbbs, ex_bb, orig_header;
1872 unsigned n;
1873 bool ok;
1874 edge exit = single_dom_exit (loop), hpred;
1875 tree control, control_name, res, t;
1876 gphi *phi, *nphi;
1877 gassign *stmt;
1878 gcond *cond_stmt, *cond_nit;
1879 tree nit_1;
1881 split_block_after_labels (loop->header);
1882 orig_header = single_succ (loop->header);
1883 hpred = single_succ_edge (loop->header);
1885 cond_stmt = as_a <gcond *> (last_stmt (exit->src));
1886 control = gimple_cond_lhs (cond_stmt);
1887 gcc_assert (gimple_cond_rhs (cond_stmt) == nit);
1889 /* Make sure that we have phi nodes on exit for all loop header phis
1890 (create_parallel_loop requires that). */
1891 for (gphi_iterator gsi = gsi_start_phis (loop->header);
1892 !gsi_end_p (gsi);
1893 gsi_next (&gsi))
1895 phi = gsi.phi ();
1896 res = PHI_RESULT (phi);
1897 t = copy_ssa_name (res, phi);
1898 SET_PHI_RESULT (phi, t);
1899 nphi = create_phi_node (res, orig_header);
1900 add_phi_arg (nphi, t, hpred, UNKNOWN_LOCATION);
1902 if (res == control)
1904 gimple_cond_set_lhs (cond_stmt, t);
1905 update_stmt (cond_stmt);
1906 control = t;
1910 bbs = get_loop_body_in_dom_order (loop);
1912 for (n = 0; bbs[n] != exit->src; n++)
1913 continue;
1914 nbbs = XNEWVEC (basic_block, n);
1915 ok = gimple_duplicate_sese_tail (single_succ_edge (loop->header), exit,
1916 bbs + 1, n, nbbs);
1917 gcc_assert (ok);
1918 free (bbs);
1919 ex_bb = nbbs[0];
1920 free (nbbs);
1922 /* Other than reductions, the only gimple reg that should be copied
1923 out of the loop is the control variable. */
1924 exit = single_dom_exit (loop);
1925 control_name = NULL_TREE;
1926 for (gphi_iterator gsi = gsi_start_phis (ex_bb);
1927 !gsi_end_p (gsi); )
1929 phi = gsi.phi ();
1930 res = PHI_RESULT (phi);
1931 if (virtual_operand_p (res))
1933 gsi_next (&gsi);
1934 continue;
1937 /* Check if it is a part of reduction. If it is,
1938 keep the phi at the reduction's keep_res field. The
1939 PHI_RESULT of this phi is the resulting value of the reduction
1940 variable when exiting the loop. */
1942 if (reduction_list->elements () > 0)
1944 struct reduction_info *red;
1946 tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1947 red = reduction_phi (reduction_list, SSA_NAME_DEF_STMT (val));
1948 if (red)
1950 red->keep_res = phi;
1951 gsi_next (&gsi);
1952 continue;
1955 gcc_assert (control_name == NULL_TREE
1956 && SSA_NAME_VAR (res) == SSA_NAME_VAR (control));
1957 control_name = res;
1958 remove_phi_node (&gsi, false);
1960 gcc_assert (control_name != NULL_TREE);
1962 /* Initialize the control variable to number of iterations
1963 according to the rhs of the exit condition. */
1964 gimple_stmt_iterator gsi = gsi_after_labels (ex_bb);
1965 cond_nit = as_a <gcond *> (last_stmt (exit->src));
1966 nit_1 = gimple_cond_rhs (cond_nit);
1967 nit_1 = force_gimple_operand_gsi (&gsi,
1968 fold_convert (TREE_TYPE (control_name), nit_1),
1969 false, NULL_TREE, false, GSI_SAME_STMT);
1970 stmt = gimple_build_assign (control_name, nit_1);
1971 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
1974 /* Create the parallel constructs for LOOP as described in gen_parallel_loop.
1975 LOOP_FN and DATA are the arguments of GIMPLE_OMP_PARALLEL.
1976 NEW_DATA is the variable that should be initialized from the argument
1977 of LOOP_FN. N_THREADS is the requested number of threads. Returns the
1978 basic block containing GIMPLE_OMP_PARALLEL tree. */
1980 static basic_block
1981 create_parallel_loop (struct loop *loop, tree loop_fn, tree data,
1982 tree new_data, unsigned n_threads, location_t loc)
1984 gimple_stmt_iterator gsi;
1985 basic_block bb, paral_bb, for_bb, ex_bb;
1986 tree t, param;
1987 gomp_parallel *omp_par_stmt;
1988 gimple omp_return_stmt1, omp_return_stmt2;
1989 gimple phi;
1990 gcond *cond_stmt;
1991 gomp_for *for_stmt;
1992 gomp_continue *omp_cont_stmt;
1993 tree cvar, cvar_init, initvar, cvar_next, cvar_base, type;
1994 edge exit, nexit, guard, end, e;
1996 /* Prepare the GIMPLE_OMP_PARALLEL statement. */
1997 bb = loop_preheader_edge (loop)->src;
1998 paral_bb = single_pred (bb);
1999 gsi = gsi_last_bb (paral_bb);
2001 t = build_omp_clause (loc, OMP_CLAUSE_NUM_THREADS);
2002 OMP_CLAUSE_NUM_THREADS_EXPR (t)
2003 = build_int_cst (integer_type_node, n_threads);
2004 omp_par_stmt = gimple_build_omp_parallel (NULL, t, loop_fn, data);
2005 gimple_set_location (omp_par_stmt, loc);
2007 gsi_insert_after (&gsi, omp_par_stmt, GSI_NEW_STMT);
2009 /* Initialize NEW_DATA. */
2010 if (data)
2012 gassign *assign_stmt;
2014 gsi = gsi_after_labels (bb);
2016 param = make_ssa_name (DECL_ARGUMENTS (loop_fn));
2017 assign_stmt = gimple_build_assign (param, build_fold_addr_expr (data));
2018 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
2020 assign_stmt = gimple_build_assign (new_data,
2021 fold_convert (TREE_TYPE (new_data), param));
2022 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
2025 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_PARALLEL. */
2026 bb = split_loop_exit_edge (single_dom_exit (loop));
2027 gsi = gsi_last_bb (bb);
2028 omp_return_stmt1 = gimple_build_omp_return (false);
2029 gimple_set_location (omp_return_stmt1, loc);
2030 gsi_insert_after (&gsi, omp_return_stmt1, GSI_NEW_STMT);
2032 /* Extract data for GIMPLE_OMP_FOR. */
2033 gcc_assert (loop->header == single_dom_exit (loop)->src);
2034 cond_stmt = as_a <gcond *> (last_stmt (loop->header));
2036 cvar = gimple_cond_lhs (cond_stmt);
2037 cvar_base = SSA_NAME_VAR (cvar);
2038 phi = SSA_NAME_DEF_STMT (cvar);
2039 cvar_init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
2040 initvar = copy_ssa_name (cvar);
2041 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, loop_preheader_edge (loop)),
2042 initvar);
2043 cvar_next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
2045 gsi = gsi_last_nondebug_bb (loop->latch);
2046 gcc_assert (gsi_stmt (gsi) == SSA_NAME_DEF_STMT (cvar_next));
2047 gsi_remove (&gsi, true);
2049 /* Prepare cfg. */
2050 for_bb = split_edge (loop_preheader_edge (loop));
2051 ex_bb = split_loop_exit_edge (single_dom_exit (loop));
2052 extract_true_false_edges_from_block (loop->header, &nexit, &exit);
2053 gcc_assert (exit == single_dom_exit (loop));
2055 guard = make_edge (for_bb, ex_bb, 0);
2056 single_succ_edge (loop->latch)->flags = 0;
2057 end = make_edge (loop->latch, ex_bb, EDGE_FALLTHRU);
2058 for (gphi_iterator gpi = gsi_start_phis (ex_bb);
2059 !gsi_end_p (gpi); gsi_next (&gpi))
2061 source_location locus;
2062 tree def;
2063 gphi *phi = gpi.phi ();
2064 gphi *stmt;
2066 stmt = as_a <gphi *> (
2067 SSA_NAME_DEF_STMT (PHI_ARG_DEF_FROM_EDGE (phi, exit)));
2069 def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_preheader_edge (loop));
2070 locus = gimple_phi_arg_location_from_edge (stmt,
2071 loop_preheader_edge (loop));
2072 add_phi_arg (phi, def, guard, locus);
2074 def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (loop));
2075 locus = gimple_phi_arg_location_from_edge (stmt, loop_latch_edge (loop));
2076 add_phi_arg (phi, def, end, locus);
2078 e = redirect_edge_and_branch (exit, nexit->dest);
2079 PENDING_STMT (e) = NULL;
2081 /* Emit GIMPLE_OMP_FOR. */
2082 gimple_cond_set_lhs (cond_stmt, cvar_base);
2083 type = TREE_TYPE (cvar);
2084 t = build_omp_clause (loc, OMP_CLAUSE_SCHEDULE);
2085 OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_STATIC;
2087 for_stmt = gimple_build_omp_for (NULL, GF_OMP_FOR_KIND_FOR, t, 1, NULL);
2088 gimple_set_location (for_stmt, loc);
2089 gimple_omp_for_set_index (for_stmt, 0, initvar);
2090 gimple_omp_for_set_initial (for_stmt, 0, cvar_init);
2091 gimple_omp_for_set_final (for_stmt, 0, gimple_cond_rhs (cond_stmt));
2092 gimple_omp_for_set_cond (for_stmt, 0, gimple_cond_code (cond_stmt));
2093 gimple_omp_for_set_incr (for_stmt, 0, build2 (PLUS_EXPR, type,
2094 cvar_base,
2095 build_int_cst (type, 1)));
2097 gsi = gsi_last_bb (for_bb);
2098 gsi_insert_after (&gsi, for_stmt, GSI_NEW_STMT);
2099 SSA_NAME_DEF_STMT (initvar) = for_stmt;
2101 /* Emit GIMPLE_OMP_CONTINUE. */
2102 gsi = gsi_last_bb (loop->latch);
2103 omp_cont_stmt = gimple_build_omp_continue (cvar_next, cvar);
2104 gimple_set_location (omp_cont_stmt, loc);
2105 gsi_insert_after (&gsi, omp_cont_stmt, GSI_NEW_STMT);
2106 SSA_NAME_DEF_STMT (cvar_next) = omp_cont_stmt;
2108 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_FOR. */
2109 gsi = gsi_last_bb (ex_bb);
2110 omp_return_stmt2 = gimple_build_omp_return (true);
2111 gimple_set_location (omp_return_stmt2, loc);
2112 gsi_insert_after (&gsi, omp_return_stmt2, GSI_NEW_STMT);
2114 /* After the above dom info is hosed. Re-compute it. */
2115 free_dominance_info (CDI_DOMINATORS);
2116 calculate_dominance_info (CDI_DOMINATORS);
2118 return paral_bb;
2121 /* Generates code to execute the iterations of LOOP in N_THREADS
2122 threads in parallel.
2124 NITER describes number of iterations of LOOP.
2125 REDUCTION_LIST describes the reductions existent in the LOOP. */
2127 static void
2128 gen_parallel_loop (struct loop *loop,
2129 reduction_info_table_type *reduction_list,
2130 unsigned n_threads, struct tree_niter_desc *niter)
2132 tree many_iterations_cond, type, nit;
2133 tree arg_struct, new_arg_struct;
2134 gimple_seq stmts;
2135 edge entry, exit;
2136 struct clsn_data clsn_data;
2137 unsigned prob;
2138 location_t loc;
2139 gimple cond_stmt;
2140 unsigned int m_p_thread=2;
2142 /* From
2144 ---------------------------------------------------------------------
2145 loop
2147 IV = phi (INIT, IV + STEP)
2148 BODY1;
2149 if (COND)
2150 break;
2151 BODY2;
2153 ---------------------------------------------------------------------
2155 with # of iterations NITER (possibly with MAY_BE_ZERO assumption),
2156 we generate the following code:
2158 ---------------------------------------------------------------------
2160 if (MAY_BE_ZERO
2161 || NITER < MIN_PER_THREAD * N_THREADS)
2162 goto original;
2164 BODY1;
2165 store all local loop-invariant variables used in body of the loop to DATA.
2166 GIMPLE_OMP_PARALLEL (OMP_CLAUSE_NUM_THREADS (N_THREADS), LOOPFN, DATA);
2167 load the variables from DATA.
2168 GIMPLE_OMP_FOR (IV = INIT; COND; IV += STEP) (OMP_CLAUSE_SCHEDULE (static))
2169 BODY2;
2170 BODY1;
2171 GIMPLE_OMP_CONTINUE;
2172 GIMPLE_OMP_RETURN -- GIMPLE_OMP_FOR
2173 GIMPLE_OMP_RETURN -- GIMPLE_OMP_PARALLEL
2174 goto end;
2176 original:
2177 loop
2179 IV = phi (INIT, IV + STEP)
2180 BODY1;
2181 if (COND)
2182 break;
2183 BODY2;
2186 end:
2190 /* Create two versions of the loop -- in the old one, we know that the
2191 number of iterations is large enough, and we will transform it into the
2192 loop that will be split to loop_fn, the new one will be used for the
2193 remaining iterations. */
2195 /* We should compute a better number-of-iterations value for outer loops.
2196 That is, if we have
2198 for (i = 0; i < n; ++i)
2199 for (j = 0; j < m; ++j)
2202 we should compute nit = n * m, not nit = n.
2203 Also may_be_zero handling would need to be adjusted. */
2205 type = TREE_TYPE (niter->niter);
2206 nit = force_gimple_operand (unshare_expr (niter->niter), &stmts, true,
2207 NULL_TREE);
2208 if (stmts)
2209 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2211 if (loop->inner)
2212 m_p_thread=2;
2213 else
2214 m_p_thread=MIN_PER_THREAD;
2216 many_iterations_cond =
2217 fold_build2 (GE_EXPR, boolean_type_node,
2218 nit, build_int_cst (type, m_p_thread * n_threads));
2220 many_iterations_cond
2221 = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2222 invert_truthvalue (unshare_expr (niter->may_be_zero)),
2223 many_iterations_cond);
2224 many_iterations_cond
2225 = force_gimple_operand (many_iterations_cond, &stmts, false, NULL_TREE);
2226 if (stmts)
2227 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2228 if (!is_gimple_condexpr (many_iterations_cond))
2230 many_iterations_cond
2231 = force_gimple_operand (many_iterations_cond, &stmts,
2232 true, NULL_TREE);
2233 if (stmts)
2234 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2237 initialize_original_copy_tables ();
2239 /* We assume that the loop usually iterates a lot. */
2240 prob = 4 * REG_BR_PROB_BASE / 5;
2241 loop_version (loop, many_iterations_cond, NULL,
2242 prob, prob, REG_BR_PROB_BASE - prob, true);
2243 update_ssa (TODO_update_ssa);
2244 free_original_copy_tables ();
2246 /* Base all the induction variables in LOOP on a single control one. */
2247 canonicalize_loop_ivs (loop, &nit, true);
2249 /* Ensure that the exit condition is the first statement in the loop.
2250 The common case is that latch of the loop is empty (apart from the
2251 increment) and immediately follows the loop exit test. Attempt to move the
2252 entry of the loop directly before the exit check and increase the number of
2253 iterations of the loop by one. */
2254 if (!try_transform_to_exit_first_loop_alt (loop, reduction_list, nit))
2256 /* Fall back on the method that handles more cases, but duplicates the
2257 loop body: move the exit condition of LOOP to the beginning of its
2258 header, and duplicate the part of the last iteration that gets disabled
2259 to the exit of the loop. */
2260 transform_to_exit_first_loop (loop, reduction_list, nit);
2263 /* Generate initializations for reductions. */
2264 if (reduction_list->elements () > 0)
2265 reduction_list->traverse <struct loop *, initialize_reductions> (loop);
2267 /* Eliminate the references to local variables from the loop. */
2268 gcc_assert (single_exit (loop));
2269 entry = loop_preheader_edge (loop);
2270 exit = single_dom_exit (loop);
2272 eliminate_local_variables (entry, exit);
2273 /* In the old loop, move all variables non-local to the loop to a structure
2274 and back, and create separate decls for the variables used in loop. */
2275 separate_decls_in_region (entry, exit, reduction_list, &arg_struct,
2276 &new_arg_struct, &clsn_data);
2278 /* Create the parallel constructs. */
2279 loc = UNKNOWN_LOCATION;
2280 cond_stmt = last_stmt (loop->header);
2281 if (cond_stmt)
2282 loc = gimple_location (cond_stmt);
2283 create_parallel_loop (loop, create_loop_fn (loc), arg_struct,
2284 new_arg_struct, n_threads, loc);
2285 if (reduction_list->elements () > 0)
2286 create_call_for_reduction (loop, reduction_list, &clsn_data);
2288 scev_reset ();
2290 /* Cancel the loop (it is simpler to do it here rather than to teach the
2291 expander to do it). */
2292 cancel_loop_tree (loop);
2294 /* Free loop bound estimations that could contain references to
2295 removed statements. */
2296 FOR_EACH_LOOP (loop, 0)
2297 free_numbers_of_iterations_estimates_loop (loop);
2300 /* Returns true when LOOP contains vector phi nodes. */
2302 static bool
2303 loop_has_vector_phi_nodes (struct loop *loop ATTRIBUTE_UNUSED)
2305 unsigned i;
2306 basic_block *bbs = get_loop_body_in_dom_order (loop);
2307 gphi_iterator gsi;
2308 bool res = true;
2310 for (i = 0; i < loop->num_nodes; i++)
2311 for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
2312 if (TREE_CODE (TREE_TYPE (PHI_RESULT (gsi.phi ()))) == VECTOR_TYPE)
2313 goto end;
2315 res = false;
2316 end:
2317 free (bbs);
2318 return res;
2321 /* Create a reduction_info struct, initialize it with REDUC_STMT
2322 and PHI, insert it to the REDUCTION_LIST. */
2324 static void
2325 build_new_reduction (reduction_info_table_type *reduction_list,
2326 gimple reduc_stmt, gphi *phi)
2328 reduction_info **slot;
2329 struct reduction_info *new_reduction;
2331 gcc_assert (reduc_stmt);
2333 if (dump_file && (dump_flags & TDF_DETAILS))
2335 fprintf (dump_file,
2336 "Detected reduction. reduction stmt is: \n");
2337 print_gimple_stmt (dump_file, reduc_stmt, 0, 0);
2338 fprintf (dump_file, "\n");
2341 new_reduction = XCNEW (struct reduction_info);
2343 new_reduction->reduc_stmt = reduc_stmt;
2344 new_reduction->reduc_phi = phi;
2345 new_reduction->reduc_version = SSA_NAME_VERSION (gimple_phi_result (phi));
2346 new_reduction->reduction_code = gimple_assign_rhs_code (reduc_stmt);
2347 slot = reduction_list->find_slot (new_reduction, INSERT);
2348 *slot = new_reduction;
2351 /* Callback for htab_traverse. Sets gimple_uid of reduc_phi stmts. */
2354 set_reduc_phi_uids (reduction_info **slot, void *data ATTRIBUTE_UNUSED)
2356 struct reduction_info *const red = *slot;
2357 gimple_set_uid (red->reduc_phi, red->reduc_version);
2358 return 1;
2361 /* Detect all reductions in the LOOP, insert them into REDUCTION_LIST. */
2363 static void
2364 gather_scalar_reductions (loop_p loop, reduction_info_table_type *reduction_list)
2366 gphi_iterator gsi;
2367 loop_vec_info simple_loop_info;
2369 simple_loop_info = vect_analyze_loop_form (loop);
2371 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
2373 gphi *phi = gsi.phi ();
2374 affine_iv iv;
2375 tree res = PHI_RESULT (phi);
2376 bool double_reduc;
2378 if (virtual_operand_p (res))
2379 continue;
2381 if (!simple_iv (loop, loop, res, &iv, true)
2382 && simple_loop_info)
2384 gimple reduc_stmt = vect_force_simple_reduction (simple_loop_info,
2385 phi, true,
2386 &double_reduc);
2387 if (reduc_stmt && !double_reduc)
2388 build_new_reduction (reduction_list, reduc_stmt, phi);
2391 destroy_loop_vec_info (simple_loop_info, true);
2393 /* As gimple_uid is used by the vectorizer in between vect_analyze_loop_form
2394 and destroy_loop_vec_info, we can set gimple_uid of reduc_phi stmts
2395 only now. */
2396 reduction_list->traverse <void *, set_reduc_phi_uids> (NULL);
2399 /* Try to initialize NITER for code generation part. */
2401 static bool
2402 try_get_loop_niter (loop_p loop, struct tree_niter_desc *niter)
2404 edge exit = single_dom_exit (loop);
2406 gcc_assert (exit);
2408 /* We need to know # of iterations, and there should be no uses of values
2409 defined inside loop outside of it, unless the values are invariants of
2410 the loop. */
2411 if (!number_of_iterations_exit (loop, exit, niter, false))
2413 if (dump_file && (dump_flags & TDF_DETAILS))
2414 fprintf (dump_file, " FAILED: number of iterations not known\n");
2415 return false;
2418 return true;
2421 /* Try to initialize REDUCTION_LIST for code generation part.
2422 REDUCTION_LIST describes the reductions. */
2424 static bool
2425 try_create_reduction_list (loop_p loop,
2426 reduction_info_table_type *reduction_list)
2428 edge exit = single_dom_exit (loop);
2429 gphi_iterator gsi;
2431 gcc_assert (exit);
2433 gather_scalar_reductions (loop, reduction_list);
2436 for (gsi = gsi_start_phis (exit->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2438 gphi *phi = gsi.phi ();
2439 struct reduction_info *red;
2440 imm_use_iterator imm_iter;
2441 use_operand_p use_p;
2442 gimple reduc_phi;
2443 tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2445 if (!virtual_operand_p (val))
2447 if (dump_file && (dump_flags & TDF_DETAILS))
2449 fprintf (dump_file, "phi is ");
2450 print_gimple_stmt (dump_file, phi, 0, 0);
2451 fprintf (dump_file, "arg of phi to exit: value ");
2452 print_generic_expr (dump_file, val, 0);
2453 fprintf (dump_file, " used outside loop\n");
2454 fprintf (dump_file,
2455 " checking if it a part of reduction pattern: \n");
2457 if (reduction_list->elements () == 0)
2459 if (dump_file && (dump_flags & TDF_DETAILS))
2460 fprintf (dump_file,
2461 " FAILED: it is not a part of reduction.\n");
2462 return false;
2464 reduc_phi = NULL;
2465 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, val)
2467 if (!gimple_debug_bind_p (USE_STMT (use_p))
2468 && flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
2470 reduc_phi = USE_STMT (use_p);
2471 break;
2474 red = reduction_phi (reduction_list, reduc_phi);
2475 if (red == NULL)
2477 if (dump_file && (dump_flags & TDF_DETAILS))
2478 fprintf (dump_file,
2479 " FAILED: it is not a part of reduction.\n");
2480 return false;
2482 if (dump_file && (dump_flags & TDF_DETAILS))
2484 fprintf (dump_file, "reduction phi is ");
2485 print_gimple_stmt (dump_file, red->reduc_phi, 0, 0);
2486 fprintf (dump_file, "reduction stmt is ");
2487 print_gimple_stmt (dump_file, red->reduc_stmt, 0, 0);
2492 /* The iterations of the loop may communicate only through bivs whose
2493 iteration space can be distributed efficiently. */
2494 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
2496 gphi *phi = gsi.phi ();
2497 tree def = PHI_RESULT (phi);
2498 affine_iv iv;
2500 if (!virtual_operand_p (def) && !simple_iv (loop, loop, def, &iv, true))
2502 struct reduction_info *red;
2504 red = reduction_phi (reduction_list, phi);
2505 if (red == NULL)
2507 if (dump_file && (dump_flags & TDF_DETAILS))
2508 fprintf (dump_file,
2509 " FAILED: scalar dependency between iterations\n");
2510 return false;
2516 return true;
2519 /* Detect parallel loops and generate parallel code using libgomp
2520 primitives. Returns true if some loop was parallelized, false
2521 otherwise. */
2523 static bool
2524 parallelize_loops (void)
2526 unsigned n_threads = flag_tree_parallelize_loops;
2527 bool changed = false;
2528 struct loop *loop;
2529 struct tree_niter_desc niter_desc;
2530 struct obstack parloop_obstack;
2531 HOST_WIDE_INT estimated;
2532 source_location loop_loc;
2534 /* Do not parallelize loops in the functions created by parallelization. */
2535 if (parallelized_function_p (cfun->decl))
2536 return false;
2537 if (cfun->has_nonlocal_label)
2538 return false;
2540 gcc_obstack_init (&parloop_obstack);
2541 reduction_info_table_type reduction_list (10);
2542 init_stmt_vec_info_vec ();
2544 FOR_EACH_LOOP (loop, 0)
2546 reduction_list.empty ();
2547 if (dump_file && (dump_flags & TDF_DETAILS))
2549 fprintf (dump_file, "Trying loop %d as candidate\n",loop->num);
2550 if (loop->inner)
2551 fprintf (dump_file, "loop %d is not innermost\n",loop->num);
2552 else
2553 fprintf (dump_file, "loop %d is innermost\n",loop->num);
2556 /* If we use autopar in graphite pass, we use its marked dependency
2557 checking results. */
2558 if (flag_loop_parallelize_all && !loop->can_be_parallel)
2560 if (dump_file && (dump_flags & TDF_DETAILS))
2561 fprintf (dump_file, "loop is not parallel according to graphite\n");
2562 continue;
2565 if (!single_dom_exit (loop))
2568 if (dump_file && (dump_flags & TDF_DETAILS))
2569 fprintf (dump_file, "loop is !single_dom_exit\n");
2571 continue;
2574 if (/* And of course, the loop must be parallelizable. */
2575 !can_duplicate_loop_p (loop)
2576 || loop_has_blocks_with_irreducible_flag (loop)
2577 || (loop_preheader_edge (loop)->src->flags & BB_IRREDUCIBLE_LOOP)
2578 /* FIXME: the check for vector phi nodes could be removed. */
2579 || loop_has_vector_phi_nodes (loop))
2580 continue;
2582 estimated = estimated_stmt_executions_int (loop);
2583 if (estimated == -1)
2584 estimated = max_stmt_executions_int (loop);
2585 /* FIXME: Bypass this check as graphite doesn't update the
2586 count and frequency correctly now. */
2587 if (!flag_loop_parallelize_all
2588 && ((estimated != -1
2589 && estimated <= (HOST_WIDE_INT) n_threads * MIN_PER_THREAD)
2590 /* Do not bother with loops in cold areas. */
2591 || optimize_loop_nest_for_size_p (loop)))
2592 continue;
2594 if (!try_get_loop_niter (loop, &niter_desc))
2595 continue;
2597 if (!try_create_reduction_list (loop, &reduction_list))
2598 continue;
2600 if (!flag_loop_parallelize_all
2601 && !loop_parallel_p (loop, &parloop_obstack))
2602 continue;
2604 changed = true;
2605 if (dump_file && (dump_flags & TDF_DETAILS))
2607 if (loop->inner)
2608 fprintf (dump_file, "parallelizing outer loop %d\n",loop->header->index);
2609 else
2610 fprintf (dump_file, "parallelizing inner loop %d\n",loop->header->index);
2611 loop_loc = find_loop_location (loop);
2612 if (loop_loc != UNKNOWN_LOCATION)
2613 fprintf (dump_file, "\nloop at %s:%d: ",
2614 LOCATION_FILE (loop_loc), LOCATION_LINE (loop_loc));
2616 gen_parallel_loop (loop, &reduction_list,
2617 n_threads, &niter_desc);
2620 free_stmt_vec_info_vec ();
2621 obstack_free (&parloop_obstack, NULL);
2623 /* Parallelization will cause new function calls to be inserted through
2624 which local variables will escape. Reset the points-to solution
2625 for ESCAPED. */
2626 if (changed)
2627 pt_solution_reset (&cfun->gimple_df->escaped);
2629 return changed;
2632 /* Parallelization. */
2634 namespace {
2636 const pass_data pass_data_parallelize_loops =
2638 GIMPLE_PASS, /* type */
2639 "parloops", /* name */
2640 OPTGROUP_LOOP, /* optinfo_flags */
2641 TV_TREE_PARALLELIZE_LOOPS, /* tv_id */
2642 ( PROP_cfg | PROP_ssa ), /* properties_required */
2643 0, /* properties_provided */
2644 0, /* properties_destroyed */
2645 0, /* todo_flags_start */
2646 0, /* todo_flags_finish */
2649 class pass_parallelize_loops : public gimple_opt_pass
2651 public:
2652 pass_parallelize_loops (gcc::context *ctxt)
2653 : gimple_opt_pass (pass_data_parallelize_loops, ctxt)
2656 /* opt_pass methods: */
2657 virtual bool gate (function *) { return flag_tree_parallelize_loops > 1; }
2658 virtual unsigned int execute (function *);
2660 }; // class pass_parallelize_loops
2662 unsigned
2663 pass_parallelize_loops::execute (function *fun)
2665 if (number_of_loops (fun) <= 1)
2666 return 0;
2668 if (parallelize_loops ())
2670 fun->curr_properties &= ~(PROP_gimple_eomp);
2671 return TODO_update_ssa;
2674 return 0;
2677 } // anon namespace
2679 gimple_opt_pass *
2680 make_pass_parallelize_loops (gcc::context *ctxt)
2682 return new pass_parallelize_loops (ctxt);