PR middle-end/59175
[official-gcc.git] / gcc / tree-parloops.c
blob648331cc767a3b0b1dc76d52a485c2a4bc9aade3
1 /* Loop autoparallelization.
2 Copyright (C) 2006-2013 Free Software Foundation, Inc.
3 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
4 Zdenek Dvorak <dvorakz@suse.cz> and Razya Ladelsky <razya@il.ibm.com>.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tree.h"
26 #include "gimple.h"
27 #include "gimplify.h"
28 #include "gimple-iterator.h"
29 #include "gimplify-me.h"
30 #include "gimple-walk.h"
31 #include "gimple-ssa.h"
32 #include "tree-cfg.h"
33 #include "tree-phinodes.h"
34 #include "ssa-iterators.h"
35 #include "tree-ssanames.h"
36 #include "tree-ssa-loop-ivopts.h"
37 #include "tree-ssa-loop-manip.h"
38 #include "tree-ssa-loop-niter.h"
39 #include "tree-ssa-loop.h"
40 #include "tree-into-ssa.h"
41 #include "cfgloop.h"
42 #include "tree-data-ref.h"
43 #include "tree-scalar-evolution.h"
44 #include "gimple-pretty-print.h"
45 #include "tree-pass.h"
46 #include "langhooks.h"
47 #include "tree-vectorizer.h"
48 #include "tree-hasher.h"
49 #include "tree-parloops.h"
50 #include "omp-low.h"
51 #include "tree-nested.h"
53 /* This pass tries to distribute iterations of loops into several threads.
54 The implementation is straightforward -- for each loop we test whether its
55 iterations are independent, and if it is the case (and some additional
56 conditions regarding profitability and correctness are satisfied), we
57 add GIMPLE_OMP_PARALLEL and GIMPLE_OMP_FOR codes and let omp expansion
58 machinery do its job.
60 The most of the complexity is in bringing the code into shape expected
61 by the omp expanders:
62 -- for GIMPLE_OMP_FOR, ensuring that the loop has only one induction
63 variable and that the exit test is at the start of the loop body
64 -- for GIMPLE_OMP_PARALLEL, replacing the references to local addressable
65 variables by accesses through pointers, and breaking up ssa chains
66 by storing the values incoming to the parallelized loop to a structure
67 passed to the new function as an argument (something similar is done
68 in omp gimplification, unfortunately only a small part of the code
69 can be shared).
71 TODO:
72 -- if there are several parallelizable loops in a function, it may be
73 possible to generate the threads just once (using synchronization to
74 ensure that cross-loop dependences are obeyed).
75 -- handling of common reduction patterns for outer loops.
77 More info can also be found at http://gcc.gnu.org/wiki/AutoParInGCC */
79 Reduction handling:
80 currently we use vect_force_simple_reduction() to detect reduction patterns.
81 The code transformation will be introduced by an example.
84 parloop
86 int sum=1;
88 for (i = 0; i < N; i++)
90 x[i] = i + 3;
91 sum+=x[i];
95 gimple-like code:
96 header_bb:
98 # sum_29 = PHI <sum_11(5), 1(3)>
99 # i_28 = PHI <i_12(5), 0(3)>
100 D.1795_8 = i_28 + 3;
101 x[i_28] = D.1795_8;
102 sum_11 = D.1795_8 + sum_29;
103 i_12 = i_28 + 1;
104 if (N_6(D) > i_12)
105 goto header_bb;
108 exit_bb:
110 # sum_21 = PHI <sum_11(4)>
111 printf (&"%d"[0], sum_21);
114 after reduction transformation (only relevant parts):
116 parloop
119 ....
122 # Storing the initial value given by the user. #
124 .paral_data_store.32.sum.27 = 1;
126 #pragma omp parallel num_threads(4)
128 #pragma omp for schedule(static)
130 # The neutral element corresponding to the particular
131 reduction's operation, e.g. 0 for PLUS_EXPR,
132 1 for MULT_EXPR, etc. replaces the user's initial value. #
134 # sum.27_29 = PHI <sum.27_11, 0>
136 sum.27_11 = D.1827_8 + sum.27_29;
138 GIMPLE_OMP_CONTINUE
140 # Adding this reduction phi is done at create_phi_for_local_result() #
141 # sum.27_56 = PHI <sum.27_11, 0>
142 GIMPLE_OMP_RETURN
144 # Creating the atomic operation is done at
145 create_call_for_reduction_1() #
147 #pragma omp atomic_load
148 D.1839_59 = *&.paral_data_load.33_51->reduction.23;
149 D.1840_60 = sum.27_56 + D.1839_59;
150 #pragma omp atomic_store (D.1840_60);
152 GIMPLE_OMP_RETURN
154 # collecting the result after the join of the threads is done at
155 create_loads_for_reductions().
156 The value computed by the threads is loaded from the
157 shared struct. #
160 .paral_data_load.33_52 = &.paral_data_store.32;
161 sum_37 = .paral_data_load.33_52->sum.27;
162 sum_43 = D.1795_41 + sum_37;
164 exit bb:
165 # sum_21 = PHI <sum_43, sum_26>
166 printf (&"%d"[0], sum_21);
174 /* Minimal number of iterations of a loop that should be executed in each
175 thread. */
176 #define MIN_PER_THREAD 100
178 /* Element of the hashtable, representing a
179 reduction in the current loop. */
180 struct reduction_info
182 gimple reduc_stmt; /* reduction statement. */
183 gimple reduc_phi; /* The phi node defining the reduction. */
184 enum tree_code reduction_code;/* code for the reduction operation. */
185 unsigned reduc_version; /* SSA_NAME_VERSION of original reduc_phi
186 result. */
187 gimple keep_res; /* The PHI_RESULT of this phi is the resulting value
188 of the reduction variable when existing the loop. */
189 tree initial_value; /* The initial value of the reduction var before entering the loop. */
190 tree field; /* the name of the field in the parloop data structure intended for reduction. */
191 tree init; /* reduction initialization value. */
192 gimple new_phi; /* (helper field) Newly created phi node whose result
193 will be passed to the atomic operation. Represents
194 the local result each thread computed for the reduction
195 operation. */
198 /* Reduction info hashtable helpers. */
200 struct reduction_hasher : typed_free_remove <reduction_info>
202 typedef reduction_info value_type;
203 typedef reduction_info compare_type;
204 static inline hashval_t hash (const value_type *);
205 static inline bool equal (const value_type *, const compare_type *);
208 /* Equality and hash functions for hashtab code. */
210 inline bool
211 reduction_hasher::equal (const value_type *a, const compare_type *b)
213 return (a->reduc_phi == b->reduc_phi);
216 inline hashval_t
217 reduction_hasher::hash (const value_type *a)
219 return a->reduc_version;
222 typedef hash_table <reduction_hasher> reduction_info_table_type;
225 static struct reduction_info *
226 reduction_phi (reduction_info_table_type reduction_list, gimple phi)
228 struct reduction_info tmpred, *red;
230 if (reduction_list.elements () == 0 || phi == NULL)
231 return NULL;
233 tmpred.reduc_phi = phi;
234 tmpred.reduc_version = gimple_uid (phi);
235 red = reduction_list.find (&tmpred);
237 return red;
240 /* Element of hashtable of names to copy. */
242 struct name_to_copy_elt
244 unsigned version; /* The version of the name to copy. */
245 tree new_name; /* The new name used in the copy. */
246 tree field; /* The field of the structure used to pass the
247 value. */
250 /* Name copies hashtable helpers. */
252 struct name_to_copy_hasher : typed_free_remove <name_to_copy_elt>
254 typedef name_to_copy_elt value_type;
255 typedef name_to_copy_elt compare_type;
256 static inline hashval_t hash (const value_type *);
257 static inline bool equal (const value_type *, const compare_type *);
260 /* Equality and hash functions for hashtab code. */
262 inline bool
263 name_to_copy_hasher::equal (const value_type *a, const compare_type *b)
265 return a->version == b->version;
268 inline hashval_t
269 name_to_copy_hasher::hash (const value_type *a)
271 return (hashval_t) a->version;
274 typedef hash_table <name_to_copy_hasher> name_to_copy_table_type;
276 /* A transformation matrix, which is a self-contained ROWSIZE x COLSIZE
277 matrix. Rather than use floats, we simply keep a single DENOMINATOR that
278 represents the denominator for every element in the matrix. */
279 typedef struct lambda_trans_matrix_s
281 lambda_matrix matrix;
282 int rowsize;
283 int colsize;
284 int denominator;
285 } *lambda_trans_matrix;
286 #define LTM_MATRIX(T) ((T)->matrix)
287 #define LTM_ROWSIZE(T) ((T)->rowsize)
288 #define LTM_COLSIZE(T) ((T)->colsize)
289 #define LTM_DENOMINATOR(T) ((T)->denominator)
291 /* Allocate a new transformation matrix. */
293 static lambda_trans_matrix
294 lambda_trans_matrix_new (int colsize, int rowsize,
295 struct obstack * lambda_obstack)
297 lambda_trans_matrix ret;
299 ret = (lambda_trans_matrix)
300 obstack_alloc (lambda_obstack, sizeof (struct lambda_trans_matrix_s));
301 LTM_MATRIX (ret) = lambda_matrix_new (rowsize, colsize, lambda_obstack);
302 LTM_ROWSIZE (ret) = rowsize;
303 LTM_COLSIZE (ret) = colsize;
304 LTM_DENOMINATOR (ret) = 1;
305 return ret;
308 /* Multiply a vector VEC by a matrix MAT.
309 MAT is an M*N matrix, and VEC is a vector with length N. The result
310 is stored in DEST which must be a vector of length M. */
312 static void
313 lambda_matrix_vector_mult (lambda_matrix matrix, int m, int n,
314 lambda_vector vec, lambda_vector dest)
316 int i, j;
318 lambda_vector_clear (dest, m);
319 for (i = 0; i < m; i++)
320 for (j = 0; j < n; j++)
321 dest[i] += matrix[i][j] * vec[j];
324 /* Return true if TRANS is a legal transformation matrix that respects
325 the dependence vectors in DISTS and DIRS. The conservative answer
326 is false.
328 "Wolfe proves that a unimodular transformation represented by the
329 matrix T is legal when applied to a loop nest with a set of
330 lexicographically non-negative distance vectors RDG if and only if
331 for each vector d in RDG, (T.d >= 0) is lexicographically positive.
332 i.e.: if and only if it transforms the lexicographically positive
333 distance vectors to lexicographically positive vectors. Note that
334 a unimodular matrix must transform the zero vector (and only it) to
335 the zero vector." S.Muchnick. */
337 static bool
338 lambda_transform_legal_p (lambda_trans_matrix trans,
339 int nb_loops,
340 vec<ddr_p> dependence_relations)
342 unsigned int i, j;
343 lambda_vector distres;
344 struct data_dependence_relation *ddr;
346 gcc_assert (LTM_COLSIZE (trans) == nb_loops
347 && LTM_ROWSIZE (trans) == nb_loops);
349 /* When there are no dependences, the transformation is correct. */
350 if (dependence_relations.length () == 0)
351 return true;
353 ddr = dependence_relations[0];
354 if (ddr == NULL)
355 return true;
357 /* When there is an unknown relation in the dependence_relations, we
358 know that it is no worth looking at this loop nest: give up. */
359 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
360 return false;
362 distres = lambda_vector_new (nb_loops);
364 /* For each distance vector in the dependence graph. */
365 FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
367 /* Don't care about relations for which we know that there is no
368 dependence, nor about read-read (aka. output-dependences):
369 these data accesses can happen in any order. */
370 if (DDR_ARE_DEPENDENT (ddr) == chrec_known
371 || (DR_IS_READ (DDR_A (ddr)) && DR_IS_READ (DDR_B (ddr))))
372 continue;
374 /* Conservatively answer: "this transformation is not valid". */
375 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
376 return false;
378 /* If the dependence could not be captured by a distance vector,
379 conservatively answer that the transform is not valid. */
380 if (DDR_NUM_DIST_VECTS (ddr) == 0)
381 return false;
383 /* Compute trans.dist_vect */
384 for (j = 0; j < DDR_NUM_DIST_VECTS (ddr); j++)
386 lambda_matrix_vector_mult (LTM_MATRIX (trans), nb_loops, nb_loops,
387 DDR_DIST_VECT (ddr, j), distres);
389 if (!lambda_vector_lexico_pos (distres, nb_loops))
390 return false;
393 return true;
396 /* Data dependency analysis. Returns true if the iterations of LOOP
397 are independent on each other (that is, if we can execute them
398 in parallel). */
400 static bool
401 loop_parallel_p (struct loop *loop, struct obstack * parloop_obstack)
403 vec<ddr_p> dependence_relations;
404 vec<data_reference_p> datarefs;
405 lambda_trans_matrix trans;
406 bool ret = false;
408 if (dump_file && (dump_flags & TDF_DETAILS))
410 fprintf (dump_file, "Considering loop %d\n", loop->num);
411 if (!loop->inner)
412 fprintf (dump_file, "loop is innermost\n");
413 else
414 fprintf (dump_file, "loop NOT innermost\n");
417 /* Check for problems with dependences. If the loop can be reversed,
418 the iterations are independent. */
419 stack_vec<loop_p, 3> loop_nest;
420 datarefs.create (10);
421 dependence_relations.create (100);
422 if (! compute_data_dependences_for_loop (loop, true, &loop_nest, &datarefs,
423 &dependence_relations))
425 if (dump_file && (dump_flags & TDF_DETAILS))
426 fprintf (dump_file, " FAILED: cannot analyze data dependencies\n");
427 ret = false;
428 goto end;
430 if (dump_file && (dump_flags & TDF_DETAILS))
431 dump_data_dependence_relations (dump_file, dependence_relations);
433 trans = lambda_trans_matrix_new (1, 1, parloop_obstack);
434 LTM_MATRIX (trans)[0][0] = -1;
436 if (lambda_transform_legal_p (trans, 1, dependence_relations))
438 ret = true;
439 if (dump_file && (dump_flags & TDF_DETAILS))
440 fprintf (dump_file, " SUCCESS: may be parallelized\n");
442 else if (dump_file && (dump_flags & TDF_DETAILS))
443 fprintf (dump_file,
444 " FAILED: data dependencies exist across iterations\n");
446 end:
447 free_dependence_relations (dependence_relations);
448 free_data_refs (datarefs);
450 return ret;
453 /* Return true when LOOP contains basic blocks marked with the
454 BB_IRREDUCIBLE_LOOP flag. */
456 static inline bool
457 loop_has_blocks_with_irreducible_flag (struct loop *loop)
459 unsigned i;
460 basic_block *bbs = get_loop_body_in_dom_order (loop);
461 bool res = true;
463 for (i = 0; i < loop->num_nodes; i++)
464 if (bbs[i]->flags & BB_IRREDUCIBLE_LOOP)
465 goto end;
467 res = false;
468 end:
469 free (bbs);
470 return res;
473 /* Assigns the address of OBJ in TYPE to an ssa name, and returns this name.
474 The assignment statement is placed on edge ENTRY. DECL_ADDRESS maps decls
475 to their addresses that can be reused. The address of OBJ is known to
476 be invariant in the whole function. Other needed statements are placed
477 right before GSI. */
479 static tree
480 take_address_of (tree obj, tree type, edge entry,
481 int_tree_htab_type decl_address, gimple_stmt_iterator *gsi)
483 int uid;
484 int_tree_map **dslot;
485 struct int_tree_map ielt, *nielt;
486 tree *var_p, name, addr;
487 gimple stmt;
488 gimple_seq stmts;
490 /* Since the address of OBJ is invariant, the trees may be shared.
491 Avoid rewriting unrelated parts of the code. */
492 obj = unshare_expr (obj);
493 for (var_p = &obj;
494 handled_component_p (*var_p);
495 var_p = &TREE_OPERAND (*var_p, 0))
496 continue;
498 /* Canonicalize the access to base on a MEM_REF. */
499 if (DECL_P (*var_p))
500 *var_p = build_simple_mem_ref (build_fold_addr_expr (*var_p));
502 /* Assign a canonical SSA name to the address of the base decl used
503 in the address and share it for all accesses and addresses based
504 on it. */
505 uid = DECL_UID (TREE_OPERAND (TREE_OPERAND (*var_p, 0), 0));
506 ielt.uid = uid;
507 dslot = decl_address.find_slot_with_hash (&ielt, uid, INSERT);
508 if (!*dslot)
510 if (gsi == NULL)
511 return NULL;
512 addr = TREE_OPERAND (*var_p, 0);
513 const char *obj_name
514 = get_name (TREE_OPERAND (TREE_OPERAND (*var_p, 0), 0));
515 if (obj_name)
516 name = make_temp_ssa_name (TREE_TYPE (addr), NULL, obj_name);
517 else
518 name = make_ssa_name (TREE_TYPE (addr), NULL);
519 stmt = gimple_build_assign (name, addr);
520 gsi_insert_on_edge_immediate (entry, stmt);
522 nielt = XNEW (struct int_tree_map);
523 nielt->uid = uid;
524 nielt->to = name;
525 *dslot = nielt;
527 else
528 name = (*dslot)->to;
530 /* Express the address in terms of the canonical SSA name. */
531 TREE_OPERAND (*var_p, 0) = name;
532 if (gsi == NULL)
533 return build_fold_addr_expr_with_type (obj, type);
535 name = force_gimple_operand (build_addr (obj, current_function_decl),
536 &stmts, true, NULL_TREE);
537 if (!gimple_seq_empty_p (stmts))
538 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
540 if (!useless_type_conversion_p (type, TREE_TYPE (name)))
542 name = force_gimple_operand (fold_convert (type, name), &stmts, true,
543 NULL_TREE);
544 if (!gimple_seq_empty_p (stmts))
545 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
548 return name;
551 /* Callback for htab_traverse. Create the initialization statement
552 for reduction described in SLOT, and place it at the preheader of
553 the loop described in DATA. */
556 initialize_reductions (reduction_info **slot, struct loop *loop)
558 tree init, c;
559 tree bvar, type, arg;
560 edge e;
562 struct reduction_info *const reduc = *slot;
564 /* Create initialization in preheader:
565 reduction_variable = initialization value of reduction. */
567 /* In the phi node at the header, replace the argument coming
568 from the preheader with the reduction initialization value. */
570 /* Create a new variable to initialize the reduction. */
571 type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
572 bvar = create_tmp_var (type, "reduction");
574 c = build_omp_clause (gimple_location (reduc->reduc_stmt),
575 OMP_CLAUSE_REDUCTION);
576 OMP_CLAUSE_REDUCTION_CODE (c) = reduc->reduction_code;
577 OMP_CLAUSE_DECL (c) = SSA_NAME_VAR (gimple_assign_lhs (reduc->reduc_stmt));
579 init = omp_reduction_init (c, TREE_TYPE (bvar));
580 reduc->init = init;
582 /* Replace the argument representing the initialization value
583 with the initialization value for the reduction (neutral
584 element for the particular operation, e.g. 0 for PLUS_EXPR,
585 1 for MULT_EXPR, etc).
586 Keep the old value in a new variable "reduction_initial",
587 that will be taken in consideration after the parallel
588 computing is done. */
590 e = loop_preheader_edge (loop);
591 arg = PHI_ARG_DEF_FROM_EDGE (reduc->reduc_phi, e);
592 /* Create new variable to hold the initial value. */
594 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE
595 (reduc->reduc_phi, loop_preheader_edge (loop)), init);
596 reduc->initial_value = arg;
597 return 1;
600 struct elv_data
602 struct walk_stmt_info info;
603 edge entry;
604 int_tree_htab_type decl_address;
605 gimple_stmt_iterator *gsi;
606 bool changed;
607 bool reset;
610 /* Eliminates references to local variables in *TP out of the single
611 entry single exit region starting at DTA->ENTRY.
612 DECL_ADDRESS contains addresses of the references that had their
613 address taken already. If the expression is changed, CHANGED is
614 set to true. Callback for walk_tree. */
616 static tree
617 eliminate_local_variables_1 (tree *tp, int *walk_subtrees, void *data)
619 struct elv_data *const dta = (struct elv_data *) data;
620 tree t = *tp, var, addr, addr_type, type, obj;
622 if (DECL_P (t))
624 *walk_subtrees = 0;
626 if (!SSA_VAR_P (t) || DECL_EXTERNAL (t))
627 return NULL_TREE;
629 type = TREE_TYPE (t);
630 addr_type = build_pointer_type (type);
631 addr = take_address_of (t, addr_type, dta->entry, dta->decl_address,
632 dta->gsi);
633 if (dta->gsi == NULL && addr == NULL_TREE)
635 dta->reset = true;
636 return NULL_TREE;
639 *tp = build_simple_mem_ref (addr);
641 dta->changed = true;
642 return NULL_TREE;
645 if (TREE_CODE (t) == ADDR_EXPR)
647 /* ADDR_EXPR may appear in two contexts:
648 -- as a gimple operand, when the address taken is a function invariant
649 -- as gimple rhs, when the resulting address in not a function
650 invariant
651 We do not need to do anything special in the latter case (the base of
652 the memory reference whose address is taken may be replaced in the
653 DECL_P case). The former case is more complicated, as we need to
654 ensure that the new address is still a gimple operand. Thus, it
655 is not sufficient to replace just the base of the memory reference --
656 we need to move the whole computation of the address out of the
657 loop. */
658 if (!is_gimple_val (t))
659 return NULL_TREE;
661 *walk_subtrees = 0;
662 obj = TREE_OPERAND (t, 0);
663 var = get_base_address (obj);
664 if (!var || !SSA_VAR_P (var) || DECL_EXTERNAL (var))
665 return NULL_TREE;
667 addr_type = TREE_TYPE (t);
668 addr = take_address_of (obj, addr_type, dta->entry, dta->decl_address,
669 dta->gsi);
670 if (dta->gsi == NULL && addr == NULL_TREE)
672 dta->reset = true;
673 return NULL_TREE;
675 *tp = addr;
677 dta->changed = true;
678 return NULL_TREE;
681 if (!EXPR_P (t))
682 *walk_subtrees = 0;
684 return NULL_TREE;
687 /* Moves the references to local variables in STMT at *GSI out of the single
688 entry single exit region starting at ENTRY. DECL_ADDRESS contains
689 addresses of the references that had their address taken
690 already. */
692 static void
693 eliminate_local_variables_stmt (edge entry, gimple_stmt_iterator *gsi,
694 int_tree_htab_type decl_address)
696 struct elv_data dta;
697 gimple stmt = gsi_stmt (*gsi);
699 memset (&dta.info, '\0', sizeof (dta.info));
700 dta.entry = entry;
701 dta.decl_address = decl_address;
702 dta.changed = false;
703 dta.reset = false;
705 if (gimple_debug_bind_p (stmt))
707 dta.gsi = NULL;
708 walk_tree (gimple_debug_bind_get_value_ptr (stmt),
709 eliminate_local_variables_1, &dta.info, NULL);
710 if (dta.reset)
712 gimple_debug_bind_reset_value (stmt);
713 dta.changed = true;
716 else if (gimple_clobber_p (stmt))
718 stmt = gimple_build_nop ();
719 gsi_replace (gsi, stmt, false);
720 dta.changed = true;
722 else
724 dta.gsi = gsi;
725 walk_gimple_op (stmt, eliminate_local_variables_1, &dta.info);
728 if (dta.changed)
729 update_stmt (stmt);
732 /* Eliminates the references to local variables from the single entry
733 single exit region between the ENTRY and EXIT edges.
735 This includes:
736 1) Taking address of a local variable -- these are moved out of the
737 region (and temporary variable is created to hold the address if
738 necessary).
740 2) Dereferencing a local variable -- these are replaced with indirect
741 references. */
743 static void
744 eliminate_local_variables (edge entry, edge exit)
746 basic_block bb;
747 stack_vec<basic_block, 3> body;
748 unsigned i;
749 gimple_stmt_iterator gsi;
750 bool has_debug_stmt = false;
751 int_tree_htab_type decl_address;
752 decl_address.create (10);
753 basic_block entry_bb = entry->src;
754 basic_block exit_bb = exit->dest;
756 gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
758 FOR_EACH_VEC_ELT (body, i, bb)
759 if (bb != entry_bb && bb != exit_bb)
760 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
761 if (is_gimple_debug (gsi_stmt (gsi)))
763 if (gimple_debug_bind_p (gsi_stmt (gsi)))
764 has_debug_stmt = true;
766 else
767 eliminate_local_variables_stmt (entry, &gsi, decl_address);
769 if (has_debug_stmt)
770 FOR_EACH_VEC_ELT (body, i, bb)
771 if (bb != entry_bb && bb != exit_bb)
772 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
773 if (gimple_debug_bind_p (gsi_stmt (gsi)))
774 eliminate_local_variables_stmt (entry, &gsi, decl_address);
776 decl_address.dispose ();
779 /* Returns true if expression EXPR is not defined between ENTRY and
780 EXIT, i.e. if all its operands are defined outside of the region. */
782 static bool
783 expr_invariant_in_region_p (edge entry, edge exit, tree expr)
785 basic_block entry_bb = entry->src;
786 basic_block exit_bb = exit->dest;
787 basic_block def_bb;
789 if (is_gimple_min_invariant (expr))
790 return true;
792 if (TREE_CODE (expr) == SSA_NAME)
794 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
795 if (def_bb
796 && dominated_by_p (CDI_DOMINATORS, def_bb, entry_bb)
797 && !dominated_by_p (CDI_DOMINATORS, def_bb, exit_bb))
798 return false;
800 return true;
803 return false;
806 /* If COPY_NAME_P is true, creates and returns a duplicate of NAME.
807 The copies are stored to NAME_COPIES, if NAME was already duplicated,
808 its duplicate stored in NAME_COPIES is returned.
810 Regardless of COPY_NAME_P, the decl used as a base of the ssa name is also
811 duplicated, storing the copies in DECL_COPIES. */
813 static tree
814 separate_decls_in_region_name (tree name, name_to_copy_table_type name_copies,
815 int_tree_htab_type decl_copies, bool copy_name_p)
817 tree copy, var, var_copy;
818 unsigned idx, uid, nuid;
819 struct int_tree_map ielt, *nielt;
820 struct name_to_copy_elt elt, *nelt;
821 name_to_copy_elt **slot;
822 int_tree_map **dslot;
824 if (TREE_CODE (name) != SSA_NAME)
825 return name;
827 idx = SSA_NAME_VERSION (name);
828 elt.version = idx;
829 slot = name_copies.find_slot_with_hash (&elt, idx,
830 copy_name_p ? INSERT : NO_INSERT);
831 if (slot && *slot)
832 return (*slot)->new_name;
834 if (copy_name_p)
836 copy = duplicate_ssa_name (name, NULL);
837 nelt = XNEW (struct name_to_copy_elt);
838 nelt->version = idx;
839 nelt->new_name = copy;
840 nelt->field = NULL_TREE;
841 *slot = nelt;
843 else
845 gcc_assert (!slot);
846 copy = name;
849 var = SSA_NAME_VAR (name);
850 if (!var)
851 return copy;
853 uid = DECL_UID (var);
854 ielt.uid = uid;
855 dslot = decl_copies.find_slot_with_hash (&ielt, uid, INSERT);
856 if (!*dslot)
858 var_copy = create_tmp_var (TREE_TYPE (var), get_name (var));
859 DECL_GIMPLE_REG_P (var_copy) = DECL_GIMPLE_REG_P (var);
860 nielt = XNEW (struct int_tree_map);
861 nielt->uid = uid;
862 nielt->to = var_copy;
863 *dslot = nielt;
865 /* Ensure that when we meet this decl next time, we won't duplicate
866 it again. */
867 nuid = DECL_UID (var_copy);
868 ielt.uid = nuid;
869 dslot = decl_copies.find_slot_with_hash (&ielt, nuid, INSERT);
870 gcc_assert (!*dslot);
871 nielt = XNEW (struct int_tree_map);
872 nielt->uid = nuid;
873 nielt->to = var_copy;
874 *dslot = nielt;
876 else
877 var_copy = ((struct int_tree_map *) *dslot)->to;
879 replace_ssa_name_symbol (copy, var_copy);
880 return copy;
883 /* Finds the ssa names used in STMT that are defined outside the
884 region between ENTRY and EXIT and replaces such ssa names with
885 their duplicates. The duplicates are stored to NAME_COPIES. Base
886 decls of all ssa names used in STMT (including those defined in
887 LOOP) are replaced with the new temporary variables; the
888 replacement decls are stored in DECL_COPIES. */
890 static void
891 separate_decls_in_region_stmt (edge entry, edge exit, gimple stmt,
892 name_to_copy_table_type name_copies,
893 int_tree_htab_type decl_copies)
895 use_operand_p use;
896 def_operand_p def;
897 ssa_op_iter oi;
898 tree name, copy;
899 bool copy_name_p;
901 FOR_EACH_PHI_OR_STMT_DEF (def, stmt, oi, SSA_OP_DEF)
903 name = DEF_FROM_PTR (def);
904 gcc_assert (TREE_CODE (name) == SSA_NAME);
905 copy = separate_decls_in_region_name (name, name_copies, decl_copies,
906 false);
907 gcc_assert (copy == name);
910 FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
912 name = USE_FROM_PTR (use);
913 if (TREE_CODE (name) != SSA_NAME)
914 continue;
916 copy_name_p = expr_invariant_in_region_p (entry, exit, name);
917 copy = separate_decls_in_region_name (name, name_copies, decl_copies,
918 copy_name_p);
919 SET_USE (use, copy);
923 /* Finds the ssa names used in STMT that are defined outside the
924 region between ENTRY and EXIT and replaces such ssa names with
925 their duplicates. The duplicates are stored to NAME_COPIES. Base
926 decls of all ssa names used in STMT (including those defined in
927 LOOP) are replaced with the new temporary variables; the
928 replacement decls are stored in DECL_COPIES. */
930 static bool
931 separate_decls_in_region_debug (gimple stmt,
932 name_to_copy_table_type name_copies,
933 int_tree_htab_type decl_copies)
935 use_operand_p use;
936 ssa_op_iter oi;
937 tree var, name;
938 struct int_tree_map ielt;
939 struct name_to_copy_elt elt;
940 name_to_copy_elt **slot;
941 int_tree_map **dslot;
943 if (gimple_debug_bind_p (stmt))
944 var = gimple_debug_bind_get_var (stmt);
945 else if (gimple_debug_source_bind_p (stmt))
946 var = gimple_debug_source_bind_get_var (stmt);
947 else
948 return true;
949 if (TREE_CODE (var) == DEBUG_EXPR_DECL || TREE_CODE (var) == LABEL_DECL)
950 return true;
951 gcc_assert (DECL_P (var) && SSA_VAR_P (var));
952 ielt.uid = DECL_UID (var);
953 dslot = decl_copies.find_slot_with_hash (&ielt, ielt.uid, NO_INSERT);
954 if (!dslot)
955 return true;
956 if (gimple_debug_bind_p (stmt))
957 gimple_debug_bind_set_var (stmt, ((struct int_tree_map *) *dslot)->to);
958 else if (gimple_debug_source_bind_p (stmt))
959 gimple_debug_source_bind_set_var (stmt, ((struct int_tree_map *) *dslot)->to);
961 FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
963 name = USE_FROM_PTR (use);
964 if (TREE_CODE (name) != SSA_NAME)
965 continue;
967 elt.version = SSA_NAME_VERSION (name);
968 slot = name_copies.find_slot_with_hash (&elt, elt.version, NO_INSERT);
969 if (!slot)
971 gimple_debug_bind_reset_value (stmt);
972 update_stmt (stmt);
973 break;
976 SET_USE (use, (*slot)->new_name);
979 return false;
982 /* Callback for htab_traverse. Adds a field corresponding to the reduction
983 specified in SLOT. The type is passed in DATA. */
986 add_field_for_reduction (reduction_info **slot, tree type)
989 struct reduction_info *const red = *slot;
990 tree var = gimple_assign_lhs (red->reduc_stmt);
991 tree field = build_decl (gimple_location (red->reduc_stmt), FIELD_DECL,
992 SSA_NAME_IDENTIFIER (var), TREE_TYPE (var));
994 insert_field_into_struct (type, field);
996 red->field = field;
998 return 1;
1001 /* Callback for htab_traverse. Adds a field corresponding to a ssa name
1002 described in SLOT. The type is passed in DATA. */
1005 add_field_for_name (name_to_copy_elt **slot, tree type)
1007 struct name_to_copy_elt *const elt = *slot;
1008 tree name = ssa_name (elt->version);
1009 tree field = build_decl (UNKNOWN_LOCATION,
1010 FIELD_DECL, SSA_NAME_IDENTIFIER (name),
1011 TREE_TYPE (name));
1013 insert_field_into_struct (type, field);
1014 elt->field = field;
1016 return 1;
1019 /* Callback for htab_traverse. A local result is the intermediate result
1020 computed by a single
1021 thread, or the initial value in case no iteration was executed.
1022 This function creates a phi node reflecting these values.
1023 The phi's result will be stored in NEW_PHI field of the
1024 reduction's data structure. */
1027 create_phi_for_local_result (reduction_info **slot, struct loop *loop)
1029 struct reduction_info *const reduc = *slot;
1030 edge e;
1031 gimple new_phi;
1032 basic_block store_bb;
1033 tree local_res;
1034 source_location locus;
1036 /* STORE_BB is the block where the phi
1037 should be stored. It is the destination of the loop exit.
1038 (Find the fallthru edge from GIMPLE_OMP_CONTINUE). */
1039 store_bb = FALLTHRU_EDGE (loop->latch)->dest;
1041 /* STORE_BB has two predecessors. One coming from the loop
1042 (the reduction's result is computed at the loop),
1043 and another coming from a block preceding the loop,
1044 when no iterations
1045 are executed (the initial value should be taken). */
1046 if (EDGE_PRED (store_bb, 0) == FALLTHRU_EDGE (loop->latch))
1047 e = EDGE_PRED (store_bb, 1);
1048 else
1049 e = EDGE_PRED (store_bb, 0);
1050 local_res = copy_ssa_name (gimple_assign_lhs (reduc->reduc_stmt), NULL);
1051 locus = gimple_location (reduc->reduc_stmt);
1052 new_phi = create_phi_node (local_res, store_bb);
1053 add_phi_arg (new_phi, reduc->init, e, locus);
1054 add_phi_arg (new_phi, gimple_assign_lhs (reduc->reduc_stmt),
1055 FALLTHRU_EDGE (loop->latch), locus);
1056 reduc->new_phi = new_phi;
1058 return 1;
1061 struct clsn_data
1063 tree store;
1064 tree load;
1066 basic_block store_bb;
1067 basic_block load_bb;
1070 /* Callback for htab_traverse. Create an atomic instruction for the
1071 reduction described in SLOT.
1072 DATA annotates the place in memory the atomic operation relates to,
1073 and the basic block it needs to be generated in. */
1076 create_call_for_reduction_1 (reduction_info **slot, struct clsn_data *clsn_data)
1078 struct reduction_info *const reduc = *slot;
1079 gimple_stmt_iterator gsi;
1080 tree type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
1081 tree load_struct;
1082 basic_block bb;
1083 basic_block new_bb;
1084 edge e;
1085 tree t, addr, ref, x;
1086 tree tmp_load, name;
1087 gimple load;
1089 load_struct = build_simple_mem_ref (clsn_data->load);
1090 t = build3 (COMPONENT_REF, type, load_struct, reduc->field, NULL_TREE);
1092 addr = build_addr (t, current_function_decl);
1094 /* Create phi node. */
1095 bb = clsn_data->load_bb;
1097 e = split_block (bb, t);
1098 new_bb = e->dest;
1100 tmp_load = create_tmp_var (TREE_TYPE (TREE_TYPE (addr)), NULL);
1101 tmp_load = make_ssa_name (tmp_load, NULL);
1102 load = gimple_build_omp_atomic_load (tmp_load, addr);
1103 SSA_NAME_DEF_STMT (tmp_load) = load;
1104 gsi = gsi_start_bb (new_bb);
1105 gsi_insert_after (&gsi, load, GSI_NEW_STMT);
1107 e = split_block (new_bb, load);
1108 new_bb = e->dest;
1109 gsi = gsi_start_bb (new_bb);
1110 ref = tmp_load;
1111 x = fold_build2 (reduc->reduction_code,
1112 TREE_TYPE (PHI_RESULT (reduc->new_phi)), ref,
1113 PHI_RESULT (reduc->new_phi));
1115 name = force_gimple_operand_gsi (&gsi, x, true, NULL_TREE, true,
1116 GSI_CONTINUE_LINKING);
1118 gsi_insert_after (&gsi, gimple_build_omp_atomic_store (name), GSI_NEW_STMT);
1119 return 1;
1122 /* Create the atomic operation at the join point of the threads.
1123 REDUCTION_LIST describes the reductions in the LOOP.
1124 LD_ST_DATA describes the shared data structure where
1125 shared data is stored in and loaded from. */
1126 static void
1127 create_call_for_reduction (struct loop *loop,
1128 reduction_info_table_type reduction_list,
1129 struct clsn_data *ld_st_data)
1131 reduction_list.traverse <struct loop *, create_phi_for_local_result> (loop);
1132 /* Find the fallthru edge from GIMPLE_OMP_CONTINUE. */
1133 ld_st_data->load_bb = FALLTHRU_EDGE (loop->latch)->dest;
1134 reduction_list
1135 .traverse <struct clsn_data *, create_call_for_reduction_1> (ld_st_data);
1138 /* Callback for htab_traverse. Loads the final reduction value at the
1139 join point of all threads, and inserts it in the right place. */
1142 create_loads_for_reductions (reduction_info **slot, struct clsn_data *clsn_data)
1144 struct reduction_info *const red = *slot;
1145 gimple stmt;
1146 gimple_stmt_iterator gsi;
1147 tree type = TREE_TYPE (gimple_assign_lhs (red->reduc_stmt));
1148 tree load_struct;
1149 tree name;
1150 tree x;
1152 gsi = gsi_after_labels (clsn_data->load_bb);
1153 load_struct = build_simple_mem_ref (clsn_data->load);
1154 load_struct = build3 (COMPONENT_REF, type, load_struct, red->field,
1155 NULL_TREE);
1157 x = load_struct;
1158 name = PHI_RESULT (red->keep_res);
1159 stmt = gimple_build_assign (name, x);
1161 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1163 for (gsi = gsi_start_phis (gimple_bb (red->keep_res));
1164 !gsi_end_p (gsi); gsi_next (&gsi))
1165 if (gsi_stmt (gsi) == red->keep_res)
1167 remove_phi_node (&gsi, false);
1168 return 1;
1170 gcc_unreachable ();
1173 /* Load the reduction result that was stored in LD_ST_DATA.
1174 REDUCTION_LIST describes the list of reductions that the
1175 loads should be generated for. */
1176 static void
1177 create_final_loads_for_reduction (reduction_info_table_type reduction_list,
1178 struct clsn_data *ld_st_data)
1180 gimple_stmt_iterator gsi;
1181 tree t;
1182 gimple stmt;
1184 gsi = gsi_after_labels (ld_st_data->load_bb);
1185 t = build_fold_addr_expr (ld_st_data->store);
1186 stmt = gimple_build_assign (ld_st_data->load, t);
1188 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
1190 reduction_list
1191 .traverse <struct clsn_data *, create_loads_for_reductions> (ld_st_data);
1195 /* Callback for htab_traverse. Store the neutral value for the
1196 particular reduction's operation, e.g. 0 for PLUS_EXPR,
1197 1 for MULT_EXPR, etc. into the reduction field.
1198 The reduction is specified in SLOT. The store information is
1199 passed in DATA. */
1202 create_stores_for_reduction (reduction_info **slot, struct clsn_data *clsn_data)
1204 struct reduction_info *const red = *slot;
1205 tree t;
1206 gimple stmt;
1207 gimple_stmt_iterator gsi;
1208 tree type = TREE_TYPE (gimple_assign_lhs (red->reduc_stmt));
1210 gsi = gsi_last_bb (clsn_data->store_bb);
1211 t = build3 (COMPONENT_REF, type, clsn_data->store, red->field, NULL_TREE);
1212 stmt = gimple_build_assign (t, red->initial_value);
1213 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1215 return 1;
1218 /* Callback for htab_traverse. Creates loads to a field of LOAD in LOAD_BB and
1219 store to a field of STORE in STORE_BB for the ssa name and its duplicate
1220 specified in SLOT. */
1223 create_loads_and_stores_for_name (name_to_copy_elt **slot,
1224 struct clsn_data *clsn_data)
1226 struct name_to_copy_elt *const elt = *slot;
1227 tree t;
1228 gimple stmt;
1229 gimple_stmt_iterator gsi;
1230 tree type = TREE_TYPE (elt->new_name);
1231 tree load_struct;
1233 gsi = gsi_last_bb (clsn_data->store_bb);
1234 t = build3 (COMPONENT_REF, type, clsn_data->store, elt->field, NULL_TREE);
1235 stmt = gimple_build_assign (t, ssa_name (elt->version));
1236 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1238 gsi = gsi_last_bb (clsn_data->load_bb);
1239 load_struct = build_simple_mem_ref (clsn_data->load);
1240 t = build3 (COMPONENT_REF, type, load_struct, elt->field, NULL_TREE);
1241 stmt = gimple_build_assign (elt->new_name, t);
1242 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1244 return 1;
1247 /* Moves all the variables used in LOOP and defined outside of it (including
1248 the initial values of loop phi nodes, and *PER_THREAD if it is a ssa
1249 name) to a structure created for this purpose. The code
1251 while (1)
1253 use (a);
1254 use (b);
1257 is transformed this way:
1259 bb0:
1260 old.a = a;
1261 old.b = b;
1263 bb1:
1264 a' = new->a;
1265 b' = new->b;
1266 while (1)
1268 use (a');
1269 use (b');
1272 `old' is stored to *ARG_STRUCT and `new' is stored to NEW_ARG_STRUCT. The
1273 pointer `new' is intentionally not initialized (the loop will be split to a
1274 separate function later, and `new' will be initialized from its arguments).
1275 LD_ST_DATA holds information about the shared data structure used to pass
1276 information among the threads. It is initialized here, and
1277 gen_parallel_loop will pass it to create_call_for_reduction that
1278 needs this information. REDUCTION_LIST describes the reductions
1279 in LOOP. */
1281 static void
1282 separate_decls_in_region (edge entry, edge exit,
1283 reduction_info_table_type reduction_list,
1284 tree *arg_struct, tree *new_arg_struct,
1285 struct clsn_data *ld_st_data)
1288 basic_block bb1 = split_edge (entry);
1289 basic_block bb0 = single_pred (bb1);
1290 name_to_copy_table_type name_copies;
1291 name_copies.create (10);
1292 int_tree_htab_type decl_copies;
1293 decl_copies.create (10);
1294 unsigned i;
1295 tree type, type_name, nvar;
1296 gimple_stmt_iterator gsi;
1297 struct clsn_data clsn_data;
1298 stack_vec<basic_block, 3> body;
1299 basic_block bb;
1300 basic_block entry_bb = bb1;
1301 basic_block exit_bb = exit->dest;
1302 bool has_debug_stmt = false;
1304 entry = single_succ_edge (entry_bb);
1305 gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
1307 FOR_EACH_VEC_ELT (body, i, bb)
1309 if (bb != entry_bb && bb != exit_bb)
1311 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1312 separate_decls_in_region_stmt (entry, exit, gsi_stmt (gsi),
1313 name_copies, decl_copies);
1315 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1317 gimple stmt = gsi_stmt (gsi);
1319 if (is_gimple_debug (stmt))
1320 has_debug_stmt = true;
1321 else
1322 separate_decls_in_region_stmt (entry, exit, stmt,
1323 name_copies, decl_copies);
1328 /* Now process debug bind stmts. We must not create decls while
1329 processing debug stmts, so we defer their processing so as to
1330 make sure we will have debug info for as many variables as
1331 possible (all of those that were dealt with in the loop above),
1332 and discard those for which we know there's nothing we can
1333 do. */
1334 if (has_debug_stmt)
1335 FOR_EACH_VEC_ELT (body, i, bb)
1336 if (bb != entry_bb && bb != exit_bb)
1338 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
1340 gimple stmt = gsi_stmt (gsi);
1342 if (is_gimple_debug (stmt))
1344 if (separate_decls_in_region_debug (stmt, name_copies,
1345 decl_copies))
1347 gsi_remove (&gsi, true);
1348 continue;
1352 gsi_next (&gsi);
1356 if (name_copies.elements () == 0 && reduction_list.elements () == 0)
1358 /* It may happen that there is nothing to copy (if there are only
1359 loop carried and external variables in the loop). */
1360 *arg_struct = NULL;
1361 *new_arg_struct = NULL;
1363 else
1365 /* Create the type for the structure to store the ssa names to. */
1366 type = lang_hooks.types.make_type (RECORD_TYPE);
1367 type_name = build_decl (UNKNOWN_LOCATION,
1368 TYPE_DECL, create_tmp_var_name (".paral_data"),
1369 type);
1370 TYPE_NAME (type) = type_name;
1372 name_copies.traverse <tree, add_field_for_name> (type);
1373 if (reduction_list.is_created () && reduction_list.elements () > 0)
1375 /* Create the fields for reductions. */
1376 reduction_list.traverse <tree, add_field_for_reduction> (type);
1378 layout_type (type);
1380 /* Create the loads and stores. */
1381 *arg_struct = create_tmp_var (type, ".paral_data_store");
1382 nvar = create_tmp_var (build_pointer_type (type), ".paral_data_load");
1383 *new_arg_struct = make_ssa_name (nvar, NULL);
1385 ld_st_data->store = *arg_struct;
1386 ld_st_data->load = *new_arg_struct;
1387 ld_st_data->store_bb = bb0;
1388 ld_st_data->load_bb = bb1;
1390 name_copies
1391 .traverse <struct clsn_data *, create_loads_and_stores_for_name>
1392 (ld_st_data);
1394 /* Load the calculation from memory (after the join of the threads). */
1396 if (reduction_list.is_created () && reduction_list.elements () > 0)
1398 reduction_list
1399 .traverse <struct clsn_data *, create_stores_for_reduction>
1400 (ld_st_data);
1401 clsn_data.load = make_ssa_name (nvar, NULL);
1402 clsn_data.load_bb = exit->dest;
1403 clsn_data.store = ld_st_data->store;
1404 create_final_loads_for_reduction (reduction_list, &clsn_data);
1408 decl_copies.dispose ();
1409 name_copies.dispose ();
1412 /* Bitmap containing uids of functions created by parallelization. We cannot
1413 allocate it from the default obstack, as it must live across compilation
1414 of several functions; we make it gc allocated instead. */
1416 static GTY(()) bitmap parallelized_functions;
1418 /* Returns true if FN was created by create_loop_fn. */
1420 bool
1421 parallelized_function_p (tree fn)
1423 if (!parallelized_functions || !DECL_ARTIFICIAL (fn))
1424 return false;
1426 return bitmap_bit_p (parallelized_functions, DECL_UID (fn));
1429 /* Creates and returns an empty function that will receive the body of
1430 a parallelized loop. */
1432 static tree
1433 create_loop_fn (location_t loc)
1435 char buf[100];
1436 char *tname;
1437 tree decl, type, name, t;
1438 struct function *act_cfun = cfun;
1439 static unsigned loopfn_num;
1441 loc = LOCATION_LOCUS (loc);
1442 snprintf (buf, 100, "%s.$loopfn", current_function_name ());
1443 ASM_FORMAT_PRIVATE_NAME (tname, buf, loopfn_num++);
1444 clean_symbol_name (tname);
1445 name = get_identifier (tname);
1446 type = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
1448 decl = build_decl (loc, FUNCTION_DECL, name, type);
1449 if (!parallelized_functions)
1450 parallelized_functions = BITMAP_GGC_ALLOC ();
1451 bitmap_set_bit (parallelized_functions, DECL_UID (decl));
1453 TREE_STATIC (decl) = 1;
1454 TREE_USED (decl) = 1;
1455 DECL_ARTIFICIAL (decl) = 1;
1456 DECL_IGNORED_P (decl) = 0;
1457 TREE_PUBLIC (decl) = 0;
1458 DECL_UNINLINABLE (decl) = 1;
1459 DECL_EXTERNAL (decl) = 0;
1460 DECL_CONTEXT (decl) = NULL_TREE;
1461 DECL_INITIAL (decl) = make_node (BLOCK);
1463 t = build_decl (loc, RESULT_DECL, NULL_TREE, void_type_node);
1464 DECL_ARTIFICIAL (t) = 1;
1465 DECL_IGNORED_P (t) = 1;
1466 DECL_RESULT (decl) = t;
1468 t = build_decl (loc, PARM_DECL, get_identifier (".paral_data_param"),
1469 ptr_type_node);
1470 DECL_ARTIFICIAL (t) = 1;
1471 DECL_ARG_TYPE (t) = ptr_type_node;
1472 DECL_CONTEXT (t) = decl;
1473 TREE_USED (t) = 1;
1474 DECL_ARGUMENTS (decl) = t;
1476 allocate_struct_function (decl, false);
1478 /* The call to allocate_struct_function clobbers CFUN, so we need to restore
1479 it. */
1480 set_cfun (act_cfun);
1482 return decl;
1485 /* Moves the exit condition of LOOP to the beginning of its header, and
1486 duplicates the part of the last iteration that gets disabled to the
1487 exit of the loop. NIT is the number of iterations of the loop
1488 (used to initialize the variables in the duplicated part).
1490 TODO: the common case is that latch of the loop is empty and immediately
1491 follows the loop exit. In this case, it would be better not to copy the
1492 body of the loop, but only move the entry of the loop directly before the
1493 exit check and increase the number of iterations of the loop by one.
1494 This may need some additional preconditioning in case NIT = ~0.
1495 REDUCTION_LIST describes the reductions in LOOP. */
1497 static void
1498 transform_to_exit_first_loop (struct loop *loop,
1499 reduction_info_table_type reduction_list,
1500 tree nit)
1502 basic_block *bbs, *nbbs, ex_bb, orig_header;
1503 unsigned n;
1504 bool ok;
1505 edge exit = single_dom_exit (loop), hpred;
1506 tree control, control_name, res, t;
1507 gimple phi, nphi, cond_stmt, stmt, cond_nit;
1508 gimple_stmt_iterator gsi;
1509 tree nit_1;
1511 split_block_after_labels (loop->header);
1512 orig_header = single_succ (loop->header);
1513 hpred = single_succ_edge (loop->header);
1515 cond_stmt = last_stmt (exit->src);
1516 control = gimple_cond_lhs (cond_stmt);
1517 gcc_assert (gimple_cond_rhs (cond_stmt) == nit);
1519 /* Make sure that we have phi nodes on exit for all loop header phis
1520 (create_parallel_loop requires that). */
1521 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
1523 phi = gsi_stmt (gsi);
1524 res = PHI_RESULT (phi);
1525 t = copy_ssa_name (res, phi);
1526 SET_PHI_RESULT (phi, t);
1527 nphi = create_phi_node (res, orig_header);
1528 add_phi_arg (nphi, t, hpred, UNKNOWN_LOCATION);
1530 if (res == control)
1532 gimple_cond_set_lhs (cond_stmt, t);
1533 update_stmt (cond_stmt);
1534 control = t;
1538 bbs = get_loop_body_in_dom_order (loop);
1540 for (n = 0; bbs[n] != exit->src; n++)
1541 continue;
1542 nbbs = XNEWVEC (basic_block, n);
1543 ok = gimple_duplicate_sese_tail (single_succ_edge (loop->header), exit,
1544 bbs + 1, n, nbbs);
1545 gcc_assert (ok);
1546 free (bbs);
1547 ex_bb = nbbs[0];
1548 free (nbbs);
1550 /* Other than reductions, the only gimple reg that should be copied
1551 out of the loop is the control variable. */
1552 exit = single_dom_exit (loop);
1553 control_name = NULL_TREE;
1554 for (gsi = gsi_start_phis (ex_bb); !gsi_end_p (gsi); )
1556 phi = gsi_stmt (gsi);
1557 res = PHI_RESULT (phi);
1558 if (virtual_operand_p (res))
1560 gsi_next (&gsi);
1561 continue;
1564 /* Check if it is a part of reduction. If it is,
1565 keep the phi at the reduction's keep_res field. The
1566 PHI_RESULT of this phi is the resulting value of the reduction
1567 variable when exiting the loop. */
1569 if (reduction_list.elements () > 0)
1571 struct reduction_info *red;
1573 tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1574 red = reduction_phi (reduction_list, SSA_NAME_DEF_STMT (val));
1575 if (red)
1577 red->keep_res = phi;
1578 gsi_next (&gsi);
1579 continue;
1582 gcc_assert (control_name == NULL_TREE
1583 && SSA_NAME_VAR (res) == SSA_NAME_VAR (control));
1584 control_name = res;
1585 remove_phi_node (&gsi, false);
1587 gcc_assert (control_name != NULL_TREE);
1589 /* Initialize the control variable to number of iterations
1590 according to the rhs of the exit condition. */
1591 gsi = gsi_after_labels (ex_bb);
1592 cond_nit = last_stmt (exit->src);
1593 nit_1 = gimple_cond_rhs (cond_nit);
1594 nit_1 = force_gimple_operand_gsi (&gsi,
1595 fold_convert (TREE_TYPE (control_name), nit_1),
1596 false, NULL_TREE, false, GSI_SAME_STMT);
1597 stmt = gimple_build_assign (control_name, nit_1);
1598 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
1601 /* Create the parallel constructs for LOOP as described in gen_parallel_loop.
1602 LOOP_FN and DATA are the arguments of GIMPLE_OMP_PARALLEL.
1603 NEW_DATA is the variable that should be initialized from the argument
1604 of LOOP_FN. N_THREADS is the requested number of threads. Returns the
1605 basic block containing GIMPLE_OMP_PARALLEL tree. */
1607 static basic_block
1608 create_parallel_loop (struct loop *loop, tree loop_fn, tree data,
1609 tree new_data, unsigned n_threads, location_t loc)
1611 gimple_stmt_iterator gsi;
1612 basic_block bb, paral_bb, for_bb, ex_bb;
1613 tree t, param;
1614 gimple stmt, for_stmt, phi, cond_stmt;
1615 tree cvar, cvar_init, initvar, cvar_next, cvar_base, type;
1616 edge exit, nexit, guard, end, e;
1618 /* Prepare the GIMPLE_OMP_PARALLEL statement. */
1619 bb = loop_preheader_edge (loop)->src;
1620 paral_bb = single_pred (bb);
1621 gsi = gsi_last_bb (paral_bb);
1623 t = build_omp_clause (loc, OMP_CLAUSE_NUM_THREADS);
1624 OMP_CLAUSE_NUM_THREADS_EXPR (t)
1625 = build_int_cst (integer_type_node, n_threads);
1626 stmt = gimple_build_omp_parallel (NULL, t, loop_fn, data);
1627 gimple_set_location (stmt, loc);
1629 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1631 /* Initialize NEW_DATA. */
1632 if (data)
1634 gsi = gsi_after_labels (bb);
1636 param = make_ssa_name (DECL_ARGUMENTS (loop_fn), NULL);
1637 stmt = gimple_build_assign (param, build_fold_addr_expr (data));
1638 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1640 stmt = gimple_build_assign (new_data,
1641 fold_convert (TREE_TYPE (new_data), param));
1642 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1645 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_PARALLEL. */
1646 bb = split_loop_exit_edge (single_dom_exit (loop));
1647 gsi = gsi_last_bb (bb);
1648 stmt = gimple_build_omp_return (false);
1649 gimple_set_location (stmt, loc);
1650 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1652 /* Extract data for GIMPLE_OMP_FOR. */
1653 gcc_assert (loop->header == single_dom_exit (loop)->src);
1654 cond_stmt = last_stmt (loop->header);
1656 cvar = gimple_cond_lhs (cond_stmt);
1657 cvar_base = SSA_NAME_VAR (cvar);
1658 phi = SSA_NAME_DEF_STMT (cvar);
1659 cvar_init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1660 initvar = copy_ssa_name (cvar, NULL);
1661 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, loop_preheader_edge (loop)),
1662 initvar);
1663 cvar_next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1665 gsi = gsi_last_nondebug_bb (loop->latch);
1666 gcc_assert (gsi_stmt (gsi) == SSA_NAME_DEF_STMT (cvar_next));
1667 gsi_remove (&gsi, true);
1669 /* Prepare cfg. */
1670 for_bb = split_edge (loop_preheader_edge (loop));
1671 ex_bb = split_loop_exit_edge (single_dom_exit (loop));
1672 extract_true_false_edges_from_block (loop->header, &nexit, &exit);
1673 gcc_assert (exit == single_dom_exit (loop));
1675 guard = make_edge (for_bb, ex_bb, 0);
1676 single_succ_edge (loop->latch)->flags = 0;
1677 end = make_edge (loop->latch, ex_bb, EDGE_FALLTHRU);
1678 for (gsi = gsi_start_phis (ex_bb); !gsi_end_p (gsi); gsi_next (&gsi))
1680 source_location locus;
1681 tree def;
1682 phi = gsi_stmt (gsi);
1683 stmt = SSA_NAME_DEF_STMT (PHI_ARG_DEF_FROM_EDGE (phi, exit));
1685 def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_preheader_edge (loop));
1686 locus = gimple_phi_arg_location_from_edge (stmt,
1687 loop_preheader_edge (loop));
1688 add_phi_arg (phi, def, guard, locus);
1690 def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (loop));
1691 locus = gimple_phi_arg_location_from_edge (stmt, loop_latch_edge (loop));
1692 add_phi_arg (phi, def, end, locus);
1694 e = redirect_edge_and_branch (exit, nexit->dest);
1695 PENDING_STMT (e) = NULL;
1697 /* Emit GIMPLE_OMP_FOR. */
1698 gimple_cond_set_lhs (cond_stmt, cvar_base);
1699 type = TREE_TYPE (cvar);
1700 t = build_omp_clause (loc, OMP_CLAUSE_SCHEDULE);
1701 OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_STATIC;
1703 for_stmt = gimple_build_omp_for (NULL, GF_OMP_FOR_KIND_FOR, t, 1, NULL);
1704 gimple_set_location (for_stmt, loc);
1705 gimple_omp_for_set_index (for_stmt, 0, initvar);
1706 gimple_omp_for_set_initial (for_stmt, 0, cvar_init);
1707 gimple_omp_for_set_final (for_stmt, 0, gimple_cond_rhs (cond_stmt));
1708 gimple_omp_for_set_cond (for_stmt, 0, gimple_cond_code (cond_stmt));
1709 gimple_omp_for_set_incr (for_stmt, 0, build2 (PLUS_EXPR, type,
1710 cvar_base,
1711 build_int_cst (type, 1)));
1713 gsi = gsi_last_bb (for_bb);
1714 gsi_insert_after (&gsi, for_stmt, GSI_NEW_STMT);
1715 SSA_NAME_DEF_STMT (initvar) = for_stmt;
1717 /* Emit GIMPLE_OMP_CONTINUE. */
1718 gsi = gsi_last_bb (loop->latch);
1719 stmt = gimple_build_omp_continue (cvar_next, cvar);
1720 gimple_set_location (stmt, loc);
1721 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1722 SSA_NAME_DEF_STMT (cvar_next) = stmt;
1724 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_FOR. */
1725 gsi = gsi_last_bb (ex_bb);
1726 stmt = gimple_build_omp_return (true);
1727 gimple_set_location (stmt, loc);
1728 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1730 /* After the above dom info is hosed. Re-compute it. */
1731 free_dominance_info (CDI_DOMINATORS);
1732 calculate_dominance_info (CDI_DOMINATORS);
1734 return paral_bb;
1737 /* Generates code to execute the iterations of LOOP in N_THREADS
1738 threads in parallel.
1740 NITER describes number of iterations of LOOP.
1741 REDUCTION_LIST describes the reductions existent in the LOOP. */
1743 static void
1744 gen_parallel_loop (struct loop *loop, reduction_info_table_type reduction_list,
1745 unsigned n_threads, struct tree_niter_desc *niter)
1747 loop_iterator li;
1748 tree many_iterations_cond, type, nit;
1749 tree arg_struct, new_arg_struct;
1750 gimple_seq stmts;
1751 basic_block parallel_head;
1752 edge entry, exit;
1753 struct clsn_data clsn_data;
1754 unsigned prob;
1755 location_t loc;
1756 gimple cond_stmt;
1757 unsigned int m_p_thread=2;
1759 /* From
1761 ---------------------------------------------------------------------
1762 loop
1764 IV = phi (INIT, IV + STEP)
1765 BODY1;
1766 if (COND)
1767 break;
1768 BODY2;
1770 ---------------------------------------------------------------------
1772 with # of iterations NITER (possibly with MAY_BE_ZERO assumption),
1773 we generate the following code:
1775 ---------------------------------------------------------------------
1777 if (MAY_BE_ZERO
1778 || NITER < MIN_PER_THREAD * N_THREADS)
1779 goto original;
1781 BODY1;
1782 store all local loop-invariant variables used in body of the loop to DATA.
1783 GIMPLE_OMP_PARALLEL (OMP_CLAUSE_NUM_THREADS (N_THREADS), LOOPFN, DATA);
1784 load the variables from DATA.
1785 GIMPLE_OMP_FOR (IV = INIT; COND; IV += STEP) (OMP_CLAUSE_SCHEDULE (static))
1786 BODY2;
1787 BODY1;
1788 GIMPLE_OMP_CONTINUE;
1789 GIMPLE_OMP_RETURN -- GIMPLE_OMP_FOR
1790 GIMPLE_OMP_RETURN -- GIMPLE_OMP_PARALLEL
1791 goto end;
1793 original:
1794 loop
1796 IV = phi (INIT, IV + STEP)
1797 BODY1;
1798 if (COND)
1799 break;
1800 BODY2;
1803 end:
1807 /* Create two versions of the loop -- in the old one, we know that the
1808 number of iterations is large enough, and we will transform it into the
1809 loop that will be split to loop_fn, the new one will be used for the
1810 remaining iterations. */
1812 /* We should compute a better number-of-iterations value for outer loops.
1813 That is, if we have
1815 for (i = 0; i < n; ++i)
1816 for (j = 0; j < m; ++j)
1819 we should compute nit = n * m, not nit = n.
1820 Also may_be_zero handling would need to be adjusted. */
1822 type = TREE_TYPE (niter->niter);
1823 nit = force_gimple_operand (unshare_expr (niter->niter), &stmts, true,
1824 NULL_TREE);
1825 if (stmts)
1826 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1828 if (loop->inner)
1829 m_p_thread=2;
1830 else
1831 m_p_thread=MIN_PER_THREAD;
1833 many_iterations_cond =
1834 fold_build2 (GE_EXPR, boolean_type_node,
1835 nit, build_int_cst (type, m_p_thread * n_threads));
1837 many_iterations_cond
1838 = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1839 invert_truthvalue (unshare_expr (niter->may_be_zero)),
1840 many_iterations_cond);
1841 many_iterations_cond
1842 = force_gimple_operand (many_iterations_cond, &stmts, false, NULL_TREE);
1843 if (stmts)
1844 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1845 if (!is_gimple_condexpr (many_iterations_cond))
1847 many_iterations_cond
1848 = force_gimple_operand (many_iterations_cond, &stmts,
1849 true, NULL_TREE);
1850 if (stmts)
1851 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1854 initialize_original_copy_tables ();
1856 /* We assume that the loop usually iterates a lot. */
1857 prob = 4 * REG_BR_PROB_BASE / 5;
1858 loop_version (loop, many_iterations_cond, NULL,
1859 prob, prob, REG_BR_PROB_BASE - prob, true);
1860 update_ssa (TODO_update_ssa);
1861 free_original_copy_tables ();
1863 /* Base all the induction variables in LOOP on a single control one. */
1864 canonicalize_loop_ivs (loop, &nit, true);
1866 /* Ensure that the exit condition is the first statement in the loop. */
1867 transform_to_exit_first_loop (loop, reduction_list, nit);
1869 /* Generate initializations for reductions. */
1870 if (reduction_list.elements () > 0)
1871 reduction_list.traverse <struct loop *, initialize_reductions> (loop);
1873 /* Eliminate the references to local variables from the loop. */
1874 gcc_assert (single_exit (loop));
1875 entry = loop_preheader_edge (loop);
1876 exit = single_dom_exit (loop);
1878 eliminate_local_variables (entry, exit);
1879 /* In the old loop, move all variables non-local to the loop to a structure
1880 and back, and create separate decls for the variables used in loop. */
1881 separate_decls_in_region (entry, exit, reduction_list, &arg_struct,
1882 &new_arg_struct, &clsn_data);
1884 /* Create the parallel constructs. */
1885 loc = UNKNOWN_LOCATION;
1886 cond_stmt = last_stmt (loop->header);
1887 if (cond_stmt)
1888 loc = gimple_location (cond_stmt);
1889 parallel_head = create_parallel_loop (loop, create_loop_fn (loc), arg_struct,
1890 new_arg_struct, n_threads, loc);
1891 if (reduction_list.elements () > 0)
1892 create_call_for_reduction (loop, reduction_list, &clsn_data);
1894 scev_reset ();
1896 /* Cancel the loop (it is simpler to do it here rather than to teach the
1897 expander to do it). */
1898 cancel_loop_tree (loop);
1900 /* Free loop bound estimations that could contain references to
1901 removed statements. */
1902 FOR_EACH_LOOP (li, loop, 0)
1903 free_numbers_of_iterations_estimates_loop (loop);
1905 /* Expand the parallel constructs. We do it directly here instead of running
1906 a separate expand_omp pass, since it is more efficient, and less likely to
1907 cause troubles with further analyses not being able to deal with the
1908 OMP trees. */
1910 omp_expand_local (parallel_head);
1913 /* Returns true when LOOP contains vector phi nodes. */
1915 static bool
1916 loop_has_vector_phi_nodes (struct loop *loop ATTRIBUTE_UNUSED)
1918 unsigned i;
1919 basic_block *bbs = get_loop_body_in_dom_order (loop);
1920 gimple_stmt_iterator gsi;
1921 bool res = true;
1923 for (i = 0; i < loop->num_nodes; i++)
1924 for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
1925 if (TREE_CODE (TREE_TYPE (PHI_RESULT (gsi_stmt (gsi)))) == VECTOR_TYPE)
1926 goto end;
1928 res = false;
1929 end:
1930 free (bbs);
1931 return res;
1934 /* Create a reduction_info struct, initialize it with REDUC_STMT
1935 and PHI, insert it to the REDUCTION_LIST. */
1937 static void
1938 build_new_reduction (reduction_info_table_type reduction_list,
1939 gimple reduc_stmt, gimple phi)
1941 reduction_info **slot;
1942 struct reduction_info *new_reduction;
1944 gcc_assert (reduc_stmt);
1946 if (dump_file && (dump_flags & TDF_DETAILS))
1948 fprintf (dump_file,
1949 "Detected reduction. reduction stmt is: \n");
1950 print_gimple_stmt (dump_file, reduc_stmt, 0, 0);
1951 fprintf (dump_file, "\n");
1954 new_reduction = XCNEW (struct reduction_info);
1956 new_reduction->reduc_stmt = reduc_stmt;
1957 new_reduction->reduc_phi = phi;
1958 new_reduction->reduc_version = SSA_NAME_VERSION (gimple_phi_result (phi));
1959 new_reduction->reduction_code = gimple_assign_rhs_code (reduc_stmt);
1960 slot = reduction_list.find_slot (new_reduction, INSERT);
1961 *slot = new_reduction;
1964 /* Callback for htab_traverse. Sets gimple_uid of reduc_phi stmts. */
1967 set_reduc_phi_uids (reduction_info **slot, void *data ATTRIBUTE_UNUSED)
1969 struct reduction_info *const red = *slot;
1970 gimple_set_uid (red->reduc_phi, red->reduc_version);
1971 return 1;
1974 /* Detect all reductions in the LOOP, insert them into REDUCTION_LIST. */
1976 static void
1977 gather_scalar_reductions (loop_p loop, reduction_info_table_type reduction_list)
1979 gimple_stmt_iterator gsi;
1980 loop_vec_info simple_loop_info;
1982 simple_loop_info = vect_analyze_loop_form (loop);
1984 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
1986 gimple phi = gsi_stmt (gsi);
1987 affine_iv iv;
1988 tree res = PHI_RESULT (phi);
1989 bool double_reduc;
1991 if (virtual_operand_p (res))
1992 continue;
1994 if (!simple_iv (loop, loop, res, &iv, true)
1995 && simple_loop_info)
1997 gimple reduc_stmt = vect_force_simple_reduction (simple_loop_info,
1998 phi, true,
1999 &double_reduc);
2000 if (reduc_stmt && !double_reduc)
2001 build_new_reduction (reduction_list, reduc_stmt, phi);
2004 destroy_loop_vec_info (simple_loop_info, true);
2006 /* As gimple_uid is used by the vectorizer in between vect_analyze_loop_form
2007 and destroy_loop_vec_info, we can set gimple_uid of reduc_phi stmts
2008 only now. */
2009 reduction_list.traverse <void *, set_reduc_phi_uids> (NULL);
2012 /* Try to initialize NITER for code generation part. */
2014 static bool
2015 try_get_loop_niter (loop_p loop, struct tree_niter_desc *niter)
2017 edge exit = single_dom_exit (loop);
2019 gcc_assert (exit);
2021 /* We need to know # of iterations, and there should be no uses of values
2022 defined inside loop outside of it, unless the values are invariants of
2023 the loop. */
2024 if (!number_of_iterations_exit (loop, exit, niter, false))
2026 if (dump_file && (dump_flags & TDF_DETAILS))
2027 fprintf (dump_file, " FAILED: number of iterations not known\n");
2028 return false;
2031 return true;
2034 /* Try to initialize REDUCTION_LIST for code generation part.
2035 REDUCTION_LIST describes the reductions. */
2037 static bool
2038 try_create_reduction_list (loop_p loop,
2039 reduction_info_table_type reduction_list)
2041 edge exit = single_dom_exit (loop);
2042 gimple_stmt_iterator gsi;
2044 gcc_assert (exit);
2046 gather_scalar_reductions (loop, reduction_list);
2049 for (gsi = gsi_start_phis (exit->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2051 gimple phi = gsi_stmt (gsi);
2052 struct reduction_info *red;
2053 imm_use_iterator imm_iter;
2054 use_operand_p use_p;
2055 gimple reduc_phi;
2056 tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2058 if (!virtual_operand_p (val))
2060 if (dump_file && (dump_flags & TDF_DETAILS))
2062 fprintf (dump_file, "phi is ");
2063 print_gimple_stmt (dump_file, phi, 0, 0);
2064 fprintf (dump_file, "arg of phi to exit: value ");
2065 print_generic_expr (dump_file, val, 0);
2066 fprintf (dump_file, " used outside loop\n");
2067 fprintf (dump_file,
2068 " checking if it a part of reduction pattern: \n");
2070 if (reduction_list.elements () == 0)
2072 if (dump_file && (dump_flags & TDF_DETAILS))
2073 fprintf (dump_file,
2074 " FAILED: it is not a part of reduction.\n");
2075 return false;
2077 reduc_phi = NULL;
2078 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, val)
2080 if (!gimple_debug_bind_p (USE_STMT (use_p))
2081 && flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
2083 reduc_phi = USE_STMT (use_p);
2084 break;
2087 red = reduction_phi (reduction_list, reduc_phi);
2088 if (red == NULL)
2090 if (dump_file && (dump_flags & TDF_DETAILS))
2091 fprintf (dump_file,
2092 " FAILED: it is not a part of reduction.\n");
2093 return false;
2095 if (dump_file && (dump_flags & TDF_DETAILS))
2097 fprintf (dump_file, "reduction phi is ");
2098 print_gimple_stmt (dump_file, red->reduc_phi, 0, 0);
2099 fprintf (dump_file, "reduction stmt is ");
2100 print_gimple_stmt (dump_file, red->reduc_stmt, 0, 0);
2105 /* The iterations of the loop may communicate only through bivs whose
2106 iteration space can be distributed efficiently. */
2107 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
2109 gimple phi = gsi_stmt (gsi);
2110 tree def = PHI_RESULT (phi);
2111 affine_iv iv;
2113 if (!virtual_operand_p (def) && !simple_iv (loop, loop, def, &iv, true))
2115 struct reduction_info *red;
2117 red = reduction_phi (reduction_list, phi);
2118 if (red == NULL)
2120 if (dump_file && (dump_flags & TDF_DETAILS))
2121 fprintf (dump_file,
2122 " FAILED: scalar dependency between iterations\n");
2123 return false;
2129 return true;
2132 /* Detect parallel loops and generate parallel code using libgomp
2133 primitives. Returns true if some loop was parallelized, false
2134 otherwise. */
2136 bool
2137 parallelize_loops (void)
2139 unsigned n_threads = flag_tree_parallelize_loops;
2140 bool changed = false;
2141 struct loop *loop;
2142 struct tree_niter_desc niter_desc;
2143 loop_iterator li;
2144 reduction_info_table_type reduction_list;
2145 struct obstack parloop_obstack;
2146 HOST_WIDE_INT estimated;
2147 LOC loop_loc;
2149 /* Do not parallelize loops in the functions created by parallelization. */
2150 if (parallelized_function_p (cfun->decl))
2151 return false;
2152 if (cfun->has_nonlocal_label)
2153 return false;
2155 gcc_obstack_init (&parloop_obstack);
2156 reduction_list.create (10);
2157 init_stmt_vec_info_vec ();
2159 FOR_EACH_LOOP (li, loop, 0)
2161 reduction_list.empty ();
2162 if (dump_file && (dump_flags & TDF_DETAILS))
2164 fprintf (dump_file, "Trying loop %d as candidate\n",loop->num);
2165 if (loop->inner)
2166 fprintf (dump_file, "loop %d is not innermost\n",loop->num);
2167 else
2168 fprintf (dump_file, "loop %d is innermost\n",loop->num);
2171 /* If we use autopar in graphite pass, we use its marked dependency
2172 checking results. */
2173 if (flag_loop_parallelize_all && !loop->can_be_parallel)
2175 if (dump_file && (dump_flags & TDF_DETAILS))
2176 fprintf (dump_file, "loop is not parallel according to graphite\n");
2177 continue;
2180 if (!single_dom_exit (loop))
2183 if (dump_file && (dump_flags & TDF_DETAILS))
2184 fprintf (dump_file, "loop is !single_dom_exit\n");
2186 continue;
2189 if (/* And of course, the loop must be parallelizable. */
2190 !can_duplicate_loop_p (loop)
2191 || loop_has_blocks_with_irreducible_flag (loop)
2192 || (loop_preheader_edge (loop)->src->flags & BB_IRREDUCIBLE_LOOP)
2193 /* FIXME: the check for vector phi nodes could be removed. */
2194 || loop_has_vector_phi_nodes (loop))
2195 continue;
2197 estimated = estimated_stmt_executions_int (loop);
2198 if (estimated == -1)
2199 estimated = max_stmt_executions_int (loop);
2200 /* FIXME: Bypass this check as graphite doesn't update the
2201 count and frequency correctly now. */
2202 if (!flag_loop_parallelize_all
2203 && ((estimated != -1
2204 && estimated <= (HOST_WIDE_INT) n_threads * MIN_PER_THREAD)
2205 /* Do not bother with loops in cold areas. */
2206 || optimize_loop_nest_for_size_p (loop)))
2207 continue;
2209 if (!try_get_loop_niter (loop, &niter_desc))
2210 continue;
2212 if (!try_create_reduction_list (loop, reduction_list))
2213 continue;
2215 if (!flag_loop_parallelize_all
2216 && !loop_parallel_p (loop, &parloop_obstack))
2217 continue;
2219 changed = true;
2220 if (dump_file && (dump_flags & TDF_DETAILS))
2222 if (loop->inner)
2223 fprintf (dump_file, "parallelizing outer loop %d\n",loop->header->index);
2224 else
2225 fprintf (dump_file, "parallelizing inner loop %d\n",loop->header->index);
2226 loop_loc = find_loop_location (loop);
2227 if (loop_loc != UNKNOWN_LOC)
2228 fprintf (dump_file, "\nloop at %s:%d: ",
2229 LOC_FILE (loop_loc), LOC_LINE (loop_loc));
2231 gen_parallel_loop (loop, reduction_list,
2232 n_threads, &niter_desc);
2235 free_stmt_vec_info_vec ();
2236 reduction_list.dispose ();
2237 obstack_free (&parloop_obstack, NULL);
2239 /* Parallelization will cause new function calls to be inserted through
2240 which local variables will escape. Reset the points-to solution
2241 for ESCAPED. */
2242 if (changed)
2243 pt_solution_reset (&cfun->gimple_df->escaped);
2245 return changed;
2248 /* Parallelization. */
2250 static bool
2251 gate_tree_parallelize_loops (void)
2253 return flag_tree_parallelize_loops > 1;
2256 static unsigned
2257 tree_parallelize_loops (void)
2259 if (number_of_loops (cfun) <= 1)
2260 return 0;
2262 if (parallelize_loops ())
2263 return TODO_cleanup_cfg | TODO_rebuild_alias;
2264 return 0;
2267 namespace {
2269 const pass_data pass_data_parallelize_loops =
2271 GIMPLE_PASS, /* type */
2272 "parloops", /* name */
2273 OPTGROUP_LOOP, /* optinfo_flags */
2274 true, /* has_gate */
2275 true, /* has_execute */
2276 TV_TREE_PARALLELIZE_LOOPS, /* tv_id */
2277 ( PROP_cfg | PROP_ssa ), /* properties_required */
2278 0, /* properties_provided */
2279 0, /* properties_destroyed */
2280 0, /* todo_flags_start */
2281 TODO_verify_flow, /* todo_flags_finish */
2284 class pass_parallelize_loops : public gimple_opt_pass
2286 public:
2287 pass_parallelize_loops (gcc::context *ctxt)
2288 : gimple_opt_pass (pass_data_parallelize_loops, ctxt)
2291 /* opt_pass methods: */
2292 bool gate () { return gate_tree_parallelize_loops (); }
2293 unsigned int execute () { return tree_parallelize_loops (); }
2295 }; // class pass_parallelize_loops
2297 } // anon namespace
2299 gimple_opt_pass *
2300 make_pass_parallelize_loops (gcc::context *ctxt)
2302 return new pass_parallelize_loops (ctxt);
2306 #include "gt-tree-parloops.h"