2012-08-01 Richard Guenther <rguenther@suse.de>
[official-gcc.git] / gcc / tree-parloops.c
blob92faa996a8c0764a133875d15bb5dfc10e4acb54
1 /* Loop autoparallelization.
2 Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012
3 Free Software Foundation, Inc.
4 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
5 Zdenek Dvorak <dvorakz@suse.cz> and Razya Ladelsky <razya@il.ibm.com>.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tree-flow.h"
27 #include "cfgloop.h"
28 #include "tree-data-ref.h"
29 #include "tree-scalar-evolution.h"
30 #include "gimple-pretty-print.h"
31 #include "tree-pass.h"
32 #include "langhooks.h"
33 #include "tree-vectorizer.h"
35 /* This pass tries to distribute iterations of loops into several threads.
36 The implementation is straightforward -- for each loop we test whether its
37 iterations are independent, and if it is the case (and some additional
38 conditions regarding profitability and correctness are satisfied), we
39 add GIMPLE_OMP_PARALLEL and GIMPLE_OMP_FOR codes and let omp expansion
40 machinery do its job.
42 The most of the complexity is in bringing the code into shape expected
43 by the omp expanders:
44 -- for GIMPLE_OMP_FOR, ensuring that the loop has only one induction
45 variable and that the exit test is at the start of the loop body
46 -- for GIMPLE_OMP_PARALLEL, replacing the references to local addressable
47 variables by accesses through pointers, and breaking up ssa chains
48 by storing the values incoming to the parallelized loop to a structure
49 passed to the new function as an argument (something similar is done
50 in omp gimplification, unfortunately only a small part of the code
51 can be shared).
53 TODO:
54 -- if there are several parallelizable loops in a function, it may be
55 possible to generate the threads just once (using synchronization to
56 ensure that cross-loop dependences are obeyed).
57 -- handling of common reduction patterns for outer loops.
59 More info can also be found at http://gcc.gnu.org/wiki/AutoParInGCC */
61 Reduction handling:
62 currently we use vect_force_simple_reduction() to detect reduction patterns.
63 The code transformation will be introduced by an example.
66 parloop
68 int sum=1;
70 for (i = 0; i < N; i++)
72 x[i] = i + 3;
73 sum+=x[i];
77 gimple-like code:
78 header_bb:
80 # sum_29 = PHI <sum_11(5), 1(3)>
81 # i_28 = PHI <i_12(5), 0(3)>
82 D.1795_8 = i_28 + 3;
83 x[i_28] = D.1795_8;
84 sum_11 = D.1795_8 + sum_29;
85 i_12 = i_28 + 1;
86 if (N_6(D) > i_12)
87 goto header_bb;
90 exit_bb:
92 # sum_21 = PHI <sum_11(4)>
93 printf (&"%d"[0], sum_21);
96 after reduction transformation (only relevant parts):
98 parloop
101 ....
104 # Storing the initial value given by the user. #
106 .paral_data_store.32.sum.27 = 1;
108 #pragma omp parallel num_threads(4)
110 #pragma omp for schedule(static)
112 # The neutral element corresponding to the particular
113 reduction's operation, e.g. 0 for PLUS_EXPR,
114 1 for MULT_EXPR, etc. replaces the user's initial value. #
116 # sum.27_29 = PHI <sum.27_11, 0>
118 sum.27_11 = D.1827_8 + sum.27_29;
120 GIMPLE_OMP_CONTINUE
122 # Adding this reduction phi is done at create_phi_for_local_result() #
123 # sum.27_56 = PHI <sum.27_11, 0>
124 GIMPLE_OMP_RETURN
126 # Creating the atomic operation is done at
127 create_call_for_reduction_1() #
129 #pragma omp atomic_load
130 D.1839_59 = *&.paral_data_load.33_51->reduction.23;
131 D.1840_60 = sum.27_56 + D.1839_59;
132 #pragma omp atomic_store (D.1840_60);
134 GIMPLE_OMP_RETURN
136 # collecting the result after the join of the threads is done at
137 create_loads_for_reductions().
138 The value computed by the threads is loaded from the
139 shared struct. #
142 .paral_data_load.33_52 = &.paral_data_store.32;
143 sum_37 = .paral_data_load.33_52->sum.27;
144 sum_43 = D.1795_41 + sum_37;
146 exit bb:
147 # sum_21 = PHI <sum_43, sum_26>
148 printf (&"%d"[0], sum_21);
156 /* Minimal number of iterations of a loop that should be executed in each
157 thread. */
158 #define MIN_PER_THREAD 100
160 /* Element of the hashtable, representing a
161 reduction in the current loop. */
162 struct reduction_info
164 gimple reduc_stmt; /* reduction statement. */
165 gimple reduc_phi; /* The phi node defining the reduction. */
166 enum tree_code reduction_code;/* code for the reduction operation. */
167 unsigned reduc_version; /* SSA_NAME_VERSION of original reduc_phi
168 result. */
169 gimple keep_res; /* The PHI_RESULT of this phi is the resulting value
170 of the reduction variable when existing the loop. */
171 tree initial_value; /* The initial value of the reduction var before entering the loop. */
172 tree field; /* the name of the field in the parloop data structure intended for reduction. */
173 tree init; /* reduction initialization value. */
174 gimple new_phi; /* (helper field) Newly created phi node whose result
175 will be passed to the atomic operation. Represents
176 the local result each thread computed for the reduction
177 operation. */
180 /* Equality and hash functions for hashtab code. */
182 static int
183 reduction_info_eq (const void *aa, const void *bb)
185 const struct reduction_info *a = (const struct reduction_info *) aa;
186 const struct reduction_info *b = (const struct reduction_info *) bb;
188 return (a->reduc_phi == b->reduc_phi);
191 static hashval_t
192 reduction_info_hash (const void *aa)
194 const struct reduction_info *a = (const struct reduction_info *) aa;
196 return a->reduc_version;
199 static struct reduction_info *
200 reduction_phi (htab_t reduction_list, gimple phi)
202 struct reduction_info tmpred, *red;
204 if (htab_elements (reduction_list) == 0 || phi == NULL)
205 return NULL;
207 tmpred.reduc_phi = phi;
208 tmpred.reduc_version = gimple_uid (phi);
209 red = (struct reduction_info *) htab_find (reduction_list, &tmpred);
211 return red;
214 /* Element of hashtable of names to copy. */
216 struct name_to_copy_elt
218 unsigned version; /* The version of the name to copy. */
219 tree new_name; /* The new name used in the copy. */
220 tree field; /* The field of the structure used to pass the
221 value. */
224 /* Equality and hash functions for hashtab code. */
226 static int
227 name_to_copy_elt_eq (const void *aa, const void *bb)
229 const struct name_to_copy_elt *a = (const struct name_to_copy_elt *) aa;
230 const struct name_to_copy_elt *b = (const struct name_to_copy_elt *) bb;
232 return a->version == b->version;
235 static hashval_t
236 name_to_copy_elt_hash (const void *aa)
238 const struct name_to_copy_elt *a = (const struct name_to_copy_elt *) aa;
240 return (hashval_t) a->version;
243 /* A transformation matrix, which is a self-contained ROWSIZE x COLSIZE
244 matrix. Rather than use floats, we simply keep a single DENOMINATOR that
245 represents the denominator for every element in the matrix. */
246 typedef struct lambda_trans_matrix_s
248 lambda_matrix matrix;
249 int rowsize;
250 int colsize;
251 int denominator;
252 } *lambda_trans_matrix;
253 #define LTM_MATRIX(T) ((T)->matrix)
254 #define LTM_ROWSIZE(T) ((T)->rowsize)
255 #define LTM_COLSIZE(T) ((T)->colsize)
256 #define LTM_DENOMINATOR(T) ((T)->denominator)
258 /* Allocate a new transformation matrix. */
260 static lambda_trans_matrix
261 lambda_trans_matrix_new (int colsize, int rowsize,
262 struct obstack * lambda_obstack)
264 lambda_trans_matrix ret;
266 ret = (lambda_trans_matrix)
267 obstack_alloc (lambda_obstack, sizeof (struct lambda_trans_matrix_s));
268 LTM_MATRIX (ret) = lambda_matrix_new (rowsize, colsize, lambda_obstack);
269 LTM_ROWSIZE (ret) = rowsize;
270 LTM_COLSIZE (ret) = colsize;
271 LTM_DENOMINATOR (ret) = 1;
272 return ret;
275 /* Multiply a vector VEC by a matrix MAT.
276 MAT is an M*N matrix, and VEC is a vector with length N. The result
277 is stored in DEST which must be a vector of length M. */
279 static void
280 lambda_matrix_vector_mult (lambda_matrix matrix, int m, int n,
281 lambda_vector vec, lambda_vector dest)
283 int i, j;
285 lambda_vector_clear (dest, m);
286 for (i = 0; i < m; i++)
287 for (j = 0; j < n; j++)
288 dest[i] += matrix[i][j] * vec[j];
291 /* Return true if TRANS is a legal transformation matrix that respects
292 the dependence vectors in DISTS and DIRS. The conservative answer
293 is false.
295 "Wolfe proves that a unimodular transformation represented by the
296 matrix T is legal when applied to a loop nest with a set of
297 lexicographically non-negative distance vectors RDG if and only if
298 for each vector d in RDG, (T.d >= 0) is lexicographically positive.
299 i.e.: if and only if it transforms the lexicographically positive
300 distance vectors to lexicographically positive vectors. Note that
301 a unimodular matrix must transform the zero vector (and only it) to
302 the zero vector." S.Muchnick. */
304 static bool
305 lambda_transform_legal_p (lambda_trans_matrix trans,
306 int nb_loops,
307 VEC (ddr_p, heap) *dependence_relations)
309 unsigned int i, j;
310 lambda_vector distres;
311 struct data_dependence_relation *ddr;
313 gcc_assert (LTM_COLSIZE (trans) == nb_loops
314 && LTM_ROWSIZE (trans) == nb_loops);
316 /* When there are no dependences, the transformation is correct. */
317 if (VEC_length (ddr_p, dependence_relations) == 0)
318 return true;
320 ddr = VEC_index (ddr_p, dependence_relations, 0);
321 if (ddr == NULL)
322 return true;
324 /* When there is an unknown relation in the dependence_relations, we
325 know that it is no worth looking at this loop nest: give up. */
326 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
327 return false;
329 distres = lambda_vector_new (nb_loops);
331 /* For each distance vector in the dependence graph. */
332 FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
334 /* Don't care about relations for which we know that there is no
335 dependence, nor about read-read (aka. output-dependences):
336 these data accesses can happen in any order. */
337 if (DDR_ARE_DEPENDENT (ddr) == chrec_known
338 || (DR_IS_READ (DDR_A (ddr)) && DR_IS_READ (DDR_B (ddr))))
339 continue;
341 /* Conservatively answer: "this transformation is not valid". */
342 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
343 return false;
345 /* If the dependence could not be captured by a distance vector,
346 conservatively answer that the transform is not valid. */
347 if (DDR_NUM_DIST_VECTS (ddr) == 0)
348 return false;
350 /* Compute trans.dist_vect */
351 for (j = 0; j < DDR_NUM_DIST_VECTS (ddr); j++)
353 lambda_matrix_vector_mult (LTM_MATRIX (trans), nb_loops, nb_loops,
354 DDR_DIST_VECT (ddr, j), distres);
356 if (!lambda_vector_lexico_pos (distres, nb_loops))
357 return false;
360 return true;
363 /* Data dependency analysis. Returns true if the iterations of LOOP
364 are independent on each other (that is, if we can execute them
365 in parallel). */
367 static bool
368 loop_parallel_p (struct loop *loop, struct obstack * parloop_obstack)
370 VEC (loop_p, heap) *loop_nest;
371 VEC (ddr_p, heap) *dependence_relations;
372 VEC (data_reference_p, heap) *datarefs;
373 lambda_trans_matrix trans;
374 bool ret = false;
376 if (dump_file && (dump_flags & TDF_DETAILS))
378 fprintf (dump_file, "Considering loop %d\n", loop->num);
379 if (!loop->inner)
380 fprintf (dump_file, "loop is innermost\n");
381 else
382 fprintf (dump_file, "loop NOT innermost\n");
385 /* Check for problems with dependences. If the loop can be reversed,
386 the iterations are independent. */
387 datarefs = VEC_alloc (data_reference_p, heap, 10);
388 dependence_relations = VEC_alloc (ddr_p, heap, 10 * 10);
389 loop_nest = VEC_alloc (loop_p, heap, 3);
390 if (! compute_data_dependences_for_loop (loop, true, &loop_nest, &datarefs,
391 &dependence_relations))
393 if (dump_file && (dump_flags & TDF_DETAILS))
394 fprintf (dump_file, " FAILED: cannot analyze data dependencies\n");
395 ret = false;
396 goto end;
398 if (dump_file && (dump_flags & TDF_DETAILS))
399 dump_data_dependence_relations (dump_file, dependence_relations);
401 trans = lambda_trans_matrix_new (1, 1, parloop_obstack);
402 LTM_MATRIX (trans)[0][0] = -1;
404 if (lambda_transform_legal_p (trans, 1, dependence_relations))
406 ret = true;
407 if (dump_file && (dump_flags & TDF_DETAILS))
408 fprintf (dump_file, " SUCCESS: may be parallelized\n");
410 else if (dump_file && (dump_flags & TDF_DETAILS))
411 fprintf (dump_file,
412 " FAILED: data dependencies exist across iterations\n");
414 end:
415 VEC_free (loop_p, heap, loop_nest);
416 free_dependence_relations (dependence_relations);
417 free_data_refs (datarefs);
419 return ret;
422 /* Return true when LOOP contains basic blocks marked with the
423 BB_IRREDUCIBLE_LOOP flag. */
425 static inline bool
426 loop_has_blocks_with_irreducible_flag (struct loop *loop)
428 unsigned i;
429 basic_block *bbs = get_loop_body_in_dom_order (loop);
430 bool res = true;
432 for (i = 0; i < loop->num_nodes; i++)
433 if (bbs[i]->flags & BB_IRREDUCIBLE_LOOP)
434 goto end;
436 res = false;
437 end:
438 free (bbs);
439 return res;
442 /* Assigns the address of OBJ in TYPE to an ssa name, and returns this name.
443 The assignment statement is placed on edge ENTRY. DECL_ADDRESS maps decls
444 to their addresses that can be reused. The address of OBJ is known to
445 be invariant in the whole function. Other needed statements are placed
446 right before GSI. */
448 static tree
449 take_address_of (tree obj, tree type, edge entry, htab_t decl_address,
450 gimple_stmt_iterator *gsi)
452 int uid;
453 void **dslot;
454 struct int_tree_map ielt, *nielt;
455 tree *var_p, name, bvar, addr;
456 gimple stmt;
457 gimple_seq stmts;
459 /* Since the address of OBJ is invariant, the trees may be shared.
460 Avoid rewriting unrelated parts of the code. */
461 obj = unshare_expr (obj);
462 for (var_p = &obj;
463 handled_component_p (*var_p);
464 var_p = &TREE_OPERAND (*var_p, 0))
465 continue;
467 /* Canonicalize the access to base on a MEM_REF. */
468 if (DECL_P (*var_p))
469 *var_p = build_simple_mem_ref (build_fold_addr_expr (*var_p));
471 /* Assign a canonical SSA name to the address of the base decl used
472 in the address and share it for all accesses and addresses based
473 on it. */
474 uid = DECL_UID (TREE_OPERAND (TREE_OPERAND (*var_p, 0), 0));
475 ielt.uid = uid;
476 dslot = htab_find_slot_with_hash (decl_address, &ielt, uid, INSERT);
477 if (!*dslot)
479 if (gsi == NULL)
480 return NULL;
481 addr = TREE_OPERAND (*var_p, 0);
482 bvar = create_tmp_var (TREE_TYPE (addr),
483 get_name (TREE_OPERAND
484 (TREE_OPERAND (*var_p, 0), 0)));
485 add_referenced_var (bvar);
486 stmt = gimple_build_assign (bvar, addr);
487 name = make_ssa_name (bvar, stmt);
488 gimple_assign_set_lhs (stmt, name);
489 gsi_insert_on_edge_immediate (entry, stmt);
491 nielt = XNEW (struct int_tree_map);
492 nielt->uid = uid;
493 nielt->to = name;
494 *dslot = nielt;
496 else
497 name = ((struct int_tree_map *) *dslot)->to;
499 /* Express the address in terms of the canonical SSA name. */
500 TREE_OPERAND (*var_p, 0) = name;
501 if (gsi == NULL)
502 return build_fold_addr_expr_with_type (obj, type);
504 name = force_gimple_operand (build_addr (obj, current_function_decl),
505 &stmts, true, NULL_TREE);
506 if (!gimple_seq_empty_p (stmts))
507 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
509 if (!useless_type_conversion_p (type, TREE_TYPE (name)))
511 name = force_gimple_operand (fold_convert (type, name), &stmts, true,
512 NULL_TREE);
513 if (!gimple_seq_empty_p (stmts))
514 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
517 return name;
520 /* Callback for htab_traverse. Create the initialization statement
521 for reduction described in SLOT, and place it at the preheader of
522 the loop described in DATA. */
524 static int
525 initialize_reductions (void **slot, void *data)
527 tree init, c;
528 tree bvar, type, arg;
529 edge e;
531 struct reduction_info *const reduc = (struct reduction_info *) *slot;
532 struct loop *loop = (struct loop *) data;
534 /* Create initialization in preheader:
535 reduction_variable = initialization value of reduction. */
537 /* In the phi node at the header, replace the argument coming
538 from the preheader with the reduction initialization value. */
540 /* Create a new variable to initialize the reduction. */
541 type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
542 bvar = create_tmp_var (type, "reduction");
543 add_referenced_var (bvar);
545 c = build_omp_clause (gimple_location (reduc->reduc_stmt),
546 OMP_CLAUSE_REDUCTION);
547 OMP_CLAUSE_REDUCTION_CODE (c) = reduc->reduction_code;
548 OMP_CLAUSE_DECL (c) = SSA_NAME_VAR (gimple_assign_lhs (reduc->reduc_stmt));
550 init = omp_reduction_init (c, TREE_TYPE (bvar));
551 reduc->init = init;
553 /* Replace the argument representing the initialization value
554 with the initialization value for the reduction (neutral
555 element for the particular operation, e.g. 0 for PLUS_EXPR,
556 1 for MULT_EXPR, etc).
557 Keep the old value in a new variable "reduction_initial",
558 that will be taken in consideration after the parallel
559 computing is done. */
561 e = loop_preheader_edge (loop);
562 arg = PHI_ARG_DEF_FROM_EDGE (reduc->reduc_phi, e);
563 /* Create new variable to hold the initial value. */
565 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE
566 (reduc->reduc_phi, loop_preheader_edge (loop)), init);
567 reduc->initial_value = arg;
568 return 1;
571 struct elv_data
573 struct walk_stmt_info info;
574 edge entry;
575 htab_t decl_address;
576 gimple_stmt_iterator *gsi;
577 bool changed;
578 bool reset;
581 /* Eliminates references to local variables in *TP out of the single
582 entry single exit region starting at DTA->ENTRY.
583 DECL_ADDRESS contains addresses of the references that had their
584 address taken already. If the expression is changed, CHANGED is
585 set to true. Callback for walk_tree. */
587 static tree
588 eliminate_local_variables_1 (tree *tp, int *walk_subtrees, void *data)
590 struct elv_data *const dta = (struct elv_data *) data;
591 tree t = *tp, var, addr, addr_type, type, obj;
593 if (DECL_P (t))
595 *walk_subtrees = 0;
597 if (!SSA_VAR_P (t) || DECL_EXTERNAL (t))
598 return NULL_TREE;
600 type = TREE_TYPE (t);
601 addr_type = build_pointer_type (type);
602 addr = take_address_of (t, addr_type, dta->entry, dta->decl_address,
603 dta->gsi);
604 if (dta->gsi == NULL && addr == NULL_TREE)
606 dta->reset = true;
607 return NULL_TREE;
610 *tp = build_simple_mem_ref (addr);
612 dta->changed = true;
613 return NULL_TREE;
616 if (TREE_CODE (t) == ADDR_EXPR)
618 /* ADDR_EXPR may appear in two contexts:
619 -- as a gimple operand, when the address taken is a function invariant
620 -- as gimple rhs, when the resulting address in not a function
621 invariant
622 We do not need to do anything special in the latter case (the base of
623 the memory reference whose address is taken may be replaced in the
624 DECL_P case). The former case is more complicated, as we need to
625 ensure that the new address is still a gimple operand. Thus, it
626 is not sufficient to replace just the base of the memory reference --
627 we need to move the whole computation of the address out of the
628 loop. */
629 if (!is_gimple_val (t))
630 return NULL_TREE;
632 *walk_subtrees = 0;
633 obj = TREE_OPERAND (t, 0);
634 var = get_base_address (obj);
635 if (!var || !SSA_VAR_P (var) || DECL_EXTERNAL (var))
636 return NULL_TREE;
638 addr_type = TREE_TYPE (t);
639 addr = take_address_of (obj, addr_type, dta->entry, dta->decl_address,
640 dta->gsi);
641 if (dta->gsi == NULL && addr == NULL_TREE)
643 dta->reset = true;
644 return NULL_TREE;
646 *tp = addr;
648 dta->changed = true;
649 return NULL_TREE;
652 if (!EXPR_P (t))
653 *walk_subtrees = 0;
655 return NULL_TREE;
658 /* Moves the references to local variables in STMT at *GSI out of the single
659 entry single exit region starting at ENTRY. DECL_ADDRESS contains
660 addresses of the references that had their address taken
661 already. */
663 static void
664 eliminate_local_variables_stmt (edge entry, gimple_stmt_iterator *gsi,
665 htab_t decl_address)
667 struct elv_data dta;
668 gimple stmt = gsi_stmt (*gsi);
670 memset (&dta.info, '\0', sizeof (dta.info));
671 dta.entry = entry;
672 dta.decl_address = decl_address;
673 dta.changed = false;
674 dta.reset = false;
676 if (gimple_debug_bind_p (stmt))
678 dta.gsi = NULL;
679 walk_tree (gimple_debug_bind_get_value_ptr (stmt),
680 eliminate_local_variables_1, &dta.info, NULL);
681 if (dta.reset)
683 gimple_debug_bind_reset_value (stmt);
684 dta.changed = true;
687 else
689 dta.gsi = gsi;
690 walk_gimple_op (stmt, eliminate_local_variables_1, &dta.info);
693 if (dta.changed)
694 update_stmt (stmt);
697 /* Eliminates the references to local variables from the single entry
698 single exit region between the ENTRY and EXIT edges.
700 This includes:
701 1) Taking address of a local variable -- these are moved out of the
702 region (and temporary variable is created to hold the address if
703 necessary).
705 2) Dereferencing a local variable -- these are replaced with indirect
706 references. */
708 static void
709 eliminate_local_variables (edge entry, edge exit)
711 basic_block bb;
712 VEC (basic_block, heap) *body = VEC_alloc (basic_block, heap, 3);
713 unsigned i;
714 gimple_stmt_iterator gsi;
715 bool has_debug_stmt = false;
716 htab_t decl_address = htab_create (10, int_tree_map_hash, int_tree_map_eq,
717 free);
718 basic_block entry_bb = entry->src;
719 basic_block exit_bb = exit->dest;
721 gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
723 FOR_EACH_VEC_ELT (basic_block, body, i, bb)
724 if (bb != entry_bb && bb != exit_bb)
725 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
726 if (is_gimple_debug (gsi_stmt (gsi)))
728 if (gimple_debug_bind_p (gsi_stmt (gsi)))
729 has_debug_stmt = true;
731 else
732 eliminate_local_variables_stmt (entry, &gsi, decl_address);
734 if (has_debug_stmt)
735 FOR_EACH_VEC_ELT (basic_block, body, i, bb)
736 if (bb != entry_bb && bb != exit_bb)
737 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
738 if (gimple_debug_bind_p (gsi_stmt (gsi)))
739 eliminate_local_variables_stmt (entry, &gsi, decl_address);
741 htab_delete (decl_address);
742 VEC_free (basic_block, heap, body);
745 /* Returns true if expression EXPR is not defined between ENTRY and
746 EXIT, i.e. if all its operands are defined outside of the region. */
748 static bool
749 expr_invariant_in_region_p (edge entry, edge exit, tree expr)
751 basic_block entry_bb = entry->src;
752 basic_block exit_bb = exit->dest;
753 basic_block def_bb;
755 if (is_gimple_min_invariant (expr))
756 return true;
758 if (TREE_CODE (expr) == SSA_NAME)
760 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
761 if (def_bb
762 && dominated_by_p (CDI_DOMINATORS, def_bb, entry_bb)
763 && !dominated_by_p (CDI_DOMINATORS, def_bb, exit_bb))
764 return false;
766 return true;
769 return false;
772 /* If COPY_NAME_P is true, creates and returns a duplicate of NAME.
773 The copies are stored to NAME_COPIES, if NAME was already duplicated,
774 its duplicate stored in NAME_COPIES is returned.
776 Regardless of COPY_NAME_P, the decl used as a base of the ssa name is also
777 duplicated, storing the copies in DECL_COPIES. */
779 static tree
780 separate_decls_in_region_name (tree name,
781 htab_t name_copies, htab_t decl_copies,
782 bool copy_name_p)
784 tree copy, var, var_copy;
785 unsigned idx, uid, nuid;
786 struct int_tree_map ielt, *nielt;
787 struct name_to_copy_elt elt, *nelt;
788 void **slot, **dslot;
790 if (TREE_CODE (name) != SSA_NAME)
791 return name;
793 idx = SSA_NAME_VERSION (name);
794 elt.version = idx;
795 slot = htab_find_slot_with_hash (name_copies, &elt, idx,
796 copy_name_p ? INSERT : NO_INSERT);
797 if (slot && *slot)
798 return ((struct name_to_copy_elt *) *slot)->new_name;
800 var = SSA_NAME_VAR (name);
801 uid = DECL_UID (var);
802 ielt.uid = uid;
803 dslot = htab_find_slot_with_hash (decl_copies, &ielt, uid, INSERT);
804 if (!*dslot)
806 var_copy = create_tmp_var (TREE_TYPE (var), get_name (var));
807 DECL_GIMPLE_REG_P (var_copy) = DECL_GIMPLE_REG_P (var);
808 add_referenced_var (var_copy);
809 nielt = XNEW (struct int_tree_map);
810 nielt->uid = uid;
811 nielt->to = var_copy;
812 *dslot = nielt;
814 /* Ensure that when we meet this decl next time, we won't duplicate
815 it again. */
816 nuid = DECL_UID (var_copy);
817 ielt.uid = nuid;
818 dslot = htab_find_slot_with_hash (decl_copies, &ielt, nuid, INSERT);
819 gcc_assert (!*dslot);
820 nielt = XNEW (struct int_tree_map);
821 nielt->uid = nuid;
822 nielt->to = var_copy;
823 *dslot = nielt;
825 else
826 var_copy = ((struct int_tree_map *) *dslot)->to;
828 if (copy_name_p)
830 copy = duplicate_ssa_name (name, NULL);
831 nelt = XNEW (struct name_to_copy_elt);
832 nelt->version = idx;
833 nelt->new_name = copy;
834 nelt->field = NULL_TREE;
835 *slot = nelt;
837 else
839 gcc_assert (!slot);
840 copy = name;
843 SSA_NAME_VAR (copy) = var_copy;
844 return copy;
847 /* Finds the ssa names used in STMT that are defined outside the
848 region between ENTRY and EXIT and replaces such ssa names with
849 their duplicates. The duplicates are stored to NAME_COPIES. Base
850 decls of all ssa names used in STMT (including those defined in
851 LOOP) are replaced with the new temporary variables; the
852 replacement decls are stored in DECL_COPIES. */
854 static void
855 separate_decls_in_region_stmt (edge entry, edge exit, gimple stmt,
856 htab_t name_copies, htab_t decl_copies)
858 use_operand_p use;
859 def_operand_p def;
860 ssa_op_iter oi;
861 tree name, copy;
862 bool copy_name_p;
864 FOR_EACH_PHI_OR_STMT_DEF (def, stmt, oi, SSA_OP_DEF)
866 name = DEF_FROM_PTR (def);
867 gcc_assert (TREE_CODE (name) == SSA_NAME);
868 copy = separate_decls_in_region_name (name, name_copies, decl_copies,
869 false);
870 gcc_assert (copy == name);
873 FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
875 name = USE_FROM_PTR (use);
876 if (TREE_CODE (name) != SSA_NAME)
877 continue;
879 copy_name_p = expr_invariant_in_region_p (entry, exit, name);
880 copy = separate_decls_in_region_name (name, name_copies, decl_copies,
881 copy_name_p);
882 SET_USE (use, copy);
886 /* Finds the ssa names used in STMT that are defined outside the
887 region between ENTRY and EXIT and replaces such ssa names with
888 their duplicates. The duplicates are stored to NAME_COPIES. Base
889 decls of all ssa names used in STMT (including those defined in
890 LOOP) are replaced with the new temporary variables; the
891 replacement decls are stored in DECL_COPIES. */
893 static bool
894 separate_decls_in_region_debug (gimple stmt, htab_t name_copies,
895 htab_t decl_copies)
897 use_operand_p use;
898 ssa_op_iter oi;
899 tree var, name;
900 struct int_tree_map ielt;
901 struct name_to_copy_elt elt;
902 void **slot, **dslot;
904 if (gimple_debug_bind_p (stmt))
905 var = gimple_debug_bind_get_var (stmt);
906 else if (gimple_debug_source_bind_p (stmt))
907 var = gimple_debug_source_bind_get_var (stmt);
908 else
909 return true;
910 if (TREE_CODE (var) == DEBUG_EXPR_DECL || TREE_CODE (var) == LABEL_DECL)
911 return true;
912 gcc_assert (DECL_P (var) && SSA_VAR_P (var));
913 ielt.uid = DECL_UID (var);
914 dslot = htab_find_slot_with_hash (decl_copies, &ielt, ielt.uid, NO_INSERT);
915 if (!dslot)
916 return true;
917 if (gimple_debug_bind_p (stmt))
918 gimple_debug_bind_set_var (stmt, ((struct int_tree_map *) *dslot)->to);
919 else if (gimple_debug_source_bind_p (stmt))
920 gimple_debug_source_bind_set_var (stmt, ((struct int_tree_map *) *dslot)->to);
922 FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
924 name = USE_FROM_PTR (use);
925 if (TREE_CODE (name) != SSA_NAME)
926 continue;
928 elt.version = SSA_NAME_VERSION (name);
929 slot = htab_find_slot_with_hash (name_copies, &elt, elt.version, NO_INSERT);
930 if (!slot)
932 gimple_debug_bind_reset_value (stmt);
933 update_stmt (stmt);
934 break;
937 SET_USE (use, ((struct name_to_copy_elt *) *slot)->new_name);
940 return false;
943 /* Callback for htab_traverse. Adds a field corresponding to the reduction
944 specified in SLOT. The type is passed in DATA. */
946 static int
947 add_field_for_reduction (void **slot, void *data)
950 struct reduction_info *const red = (struct reduction_info *) *slot;
951 tree const type = (tree) data;
952 tree var = SSA_NAME_VAR (gimple_assign_lhs (red->reduc_stmt));
953 tree field = build_decl (gimple_location (red->reduc_stmt),
954 FIELD_DECL, DECL_NAME (var), TREE_TYPE (var));
956 insert_field_into_struct (type, field);
958 red->field = field;
960 return 1;
963 /* Callback for htab_traverse. Adds a field corresponding to a ssa name
964 described in SLOT. The type is passed in DATA. */
966 static int
967 add_field_for_name (void **slot, void *data)
969 struct name_to_copy_elt *const elt = (struct name_to_copy_elt *) *slot;
970 tree type = (tree) data;
971 tree name = ssa_name (elt->version);
972 tree var = SSA_NAME_VAR (name);
973 tree field = build_decl (DECL_SOURCE_LOCATION (var),
974 FIELD_DECL, DECL_NAME (var), TREE_TYPE (var));
976 insert_field_into_struct (type, field);
977 elt->field = field;
979 return 1;
982 /* Callback for htab_traverse. A local result is the intermediate result
983 computed by a single
984 thread, or the initial value in case no iteration was executed.
985 This function creates a phi node reflecting these values.
986 The phi's result will be stored in NEW_PHI field of the
987 reduction's data structure. */
989 static int
990 create_phi_for_local_result (void **slot, void *data)
992 struct reduction_info *const reduc = (struct reduction_info *) *slot;
993 const struct loop *const loop = (const struct loop *) data;
994 edge e;
995 gimple new_phi;
996 basic_block store_bb;
997 tree local_res;
998 source_location locus;
1000 /* STORE_BB is the block where the phi
1001 should be stored. It is the destination of the loop exit.
1002 (Find the fallthru edge from GIMPLE_OMP_CONTINUE). */
1003 store_bb = FALLTHRU_EDGE (loop->latch)->dest;
1005 /* STORE_BB has two predecessors. One coming from the loop
1006 (the reduction's result is computed at the loop),
1007 and another coming from a block preceding the loop,
1008 when no iterations
1009 are executed (the initial value should be taken). */
1010 if (EDGE_PRED (store_bb, 0) == FALLTHRU_EDGE (loop->latch))
1011 e = EDGE_PRED (store_bb, 1);
1012 else
1013 e = EDGE_PRED (store_bb, 0);
1014 local_res
1015 = make_ssa_name (SSA_NAME_VAR (gimple_assign_lhs (reduc->reduc_stmt)),
1016 NULL);
1017 locus = gimple_location (reduc->reduc_stmt);
1018 new_phi = create_phi_node (local_res, store_bb);
1019 SSA_NAME_DEF_STMT (local_res) = new_phi;
1020 add_phi_arg (new_phi, reduc->init, e, locus);
1021 add_phi_arg (new_phi, gimple_assign_lhs (reduc->reduc_stmt),
1022 FALLTHRU_EDGE (loop->latch), locus);
1023 reduc->new_phi = new_phi;
1025 return 1;
1028 struct clsn_data
1030 tree store;
1031 tree load;
1033 basic_block store_bb;
1034 basic_block load_bb;
1037 /* Callback for htab_traverse. Create an atomic instruction for the
1038 reduction described in SLOT.
1039 DATA annotates the place in memory the atomic operation relates to,
1040 and the basic block it needs to be generated in. */
1042 static int
1043 create_call_for_reduction_1 (void **slot, void *data)
1045 struct reduction_info *const reduc = (struct reduction_info *) *slot;
1046 struct clsn_data *const clsn_data = (struct clsn_data *) data;
1047 gimple_stmt_iterator gsi;
1048 tree type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
1049 tree load_struct;
1050 basic_block bb;
1051 basic_block new_bb;
1052 edge e;
1053 tree t, addr, ref, x;
1054 tree tmp_load, name;
1055 gimple load;
1057 load_struct = build_simple_mem_ref (clsn_data->load);
1058 t = build3 (COMPONENT_REF, type, load_struct, reduc->field, NULL_TREE);
1060 addr = build_addr (t, current_function_decl);
1062 /* Create phi node. */
1063 bb = clsn_data->load_bb;
1065 e = split_block (bb, t);
1066 new_bb = e->dest;
1068 tmp_load = create_tmp_var (TREE_TYPE (TREE_TYPE (addr)), NULL);
1069 add_referenced_var (tmp_load);
1070 tmp_load = make_ssa_name (tmp_load, NULL);
1071 load = gimple_build_omp_atomic_load (tmp_load, addr);
1072 SSA_NAME_DEF_STMT (tmp_load) = load;
1073 gsi = gsi_start_bb (new_bb);
1074 gsi_insert_after (&gsi, load, GSI_NEW_STMT);
1076 e = split_block (new_bb, load);
1077 new_bb = e->dest;
1078 gsi = gsi_start_bb (new_bb);
1079 ref = tmp_load;
1080 x = fold_build2 (reduc->reduction_code,
1081 TREE_TYPE (PHI_RESULT (reduc->new_phi)), ref,
1082 PHI_RESULT (reduc->new_phi));
1084 name = force_gimple_operand_gsi (&gsi, x, true, NULL_TREE, true,
1085 GSI_CONTINUE_LINKING);
1087 gsi_insert_after (&gsi, gimple_build_omp_atomic_store (name), GSI_NEW_STMT);
1088 return 1;
1091 /* Create the atomic operation at the join point of the threads.
1092 REDUCTION_LIST describes the reductions in the LOOP.
1093 LD_ST_DATA describes the shared data structure where
1094 shared data is stored in and loaded from. */
1095 static void
1096 create_call_for_reduction (struct loop *loop, htab_t reduction_list,
1097 struct clsn_data *ld_st_data)
1099 htab_traverse (reduction_list, create_phi_for_local_result, loop);
1100 /* Find the fallthru edge from GIMPLE_OMP_CONTINUE. */
1101 ld_st_data->load_bb = FALLTHRU_EDGE (loop->latch)->dest;
1102 htab_traverse (reduction_list, create_call_for_reduction_1, ld_st_data);
1105 /* Callback for htab_traverse. Loads the final reduction value at the
1106 join point of all threads, and inserts it in the right place. */
1108 static int
1109 create_loads_for_reductions (void **slot, void *data)
1111 struct reduction_info *const red = (struct reduction_info *) *slot;
1112 struct clsn_data *const clsn_data = (struct clsn_data *) data;
1113 gimple stmt;
1114 gimple_stmt_iterator gsi;
1115 tree type = TREE_TYPE (gimple_assign_lhs (red->reduc_stmt));
1116 tree load_struct;
1117 tree name;
1118 tree x;
1120 gsi = gsi_after_labels (clsn_data->load_bb);
1121 load_struct = build_simple_mem_ref (clsn_data->load);
1122 load_struct = build3 (COMPONENT_REF, type, load_struct, red->field,
1123 NULL_TREE);
1125 x = load_struct;
1126 name = PHI_RESULT (red->keep_res);
1127 stmt = gimple_build_assign (name, x);
1128 SSA_NAME_DEF_STMT (name) = stmt;
1130 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1132 for (gsi = gsi_start_phis (gimple_bb (red->keep_res));
1133 !gsi_end_p (gsi); gsi_next (&gsi))
1134 if (gsi_stmt (gsi) == red->keep_res)
1136 remove_phi_node (&gsi, false);
1137 return 1;
1139 gcc_unreachable ();
1142 /* Load the reduction result that was stored in LD_ST_DATA.
1143 REDUCTION_LIST describes the list of reductions that the
1144 loads should be generated for. */
1145 static void
1146 create_final_loads_for_reduction (htab_t reduction_list,
1147 struct clsn_data *ld_st_data)
1149 gimple_stmt_iterator gsi;
1150 tree t;
1151 gimple stmt;
1153 gsi = gsi_after_labels (ld_st_data->load_bb);
1154 t = build_fold_addr_expr (ld_st_data->store);
1155 stmt = gimple_build_assign (ld_st_data->load, t);
1157 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
1158 SSA_NAME_DEF_STMT (ld_st_data->load) = stmt;
1160 htab_traverse (reduction_list, create_loads_for_reductions, ld_st_data);
1164 /* Callback for htab_traverse. Store the neutral value for the
1165 particular reduction's operation, e.g. 0 for PLUS_EXPR,
1166 1 for MULT_EXPR, etc. into the reduction field.
1167 The reduction is specified in SLOT. The store information is
1168 passed in DATA. */
1170 static int
1171 create_stores_for_reduction (void **slot, void *data)
1173 struct reduction_info *const red = (struct reduction_info *) *slot;
1174 struct clsn_data *const clsn_data = (struct clsn_data *) data;
1175 tree t;
1176 gimple stmt;
1177 gimple_stmt_iterator gsi;
1178 tree type = TREE_TYPE (gimple_assign_lhs (red->reduc_stmt));
1180 gsi = gsi_last_bb (clsn_data->store_bb);
1181 t = build3 (COMPONENT_REF, type, clsn_data->store, red->field, NULL_TREE);
1182 stmt = gimple_build_assign (t, red->initial_value);
1183 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1185 return 1;
1188 /* Callback for htab_traverse. Creates loads to a field of LOAD in LOAD_BB and
1189 store to a field of STORE in STORE_BB for the ssa name and its duplicate
1190 specified in SLOT. */
1192 static int
1193 create_loads_and_stores_for_name (void **slot, void *data)
1195 struct name_to_copy_elt *const elt = (struct name_to_copy_elt *) *slot;
1196 struct clsn_data *const clsn_data = (struct clsn_data *) data;
1197 tree t;
1198 gimple stmt;
1199 gimple_stmt_iterator gsi;
1200 tree type = TREE_TYPE (elt->new_name);
1201 tree load_struct;
1203 gsi = gsi_last_bb (clsn_data->store_bb);
1204 t = build3 (COMPONENT_REF, type, clsn_data->store, elt->field, NULL_TREE);
1205 stmt = gimple_build_assign (t, ssa_name (elt->version));
1206 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1208 gsi = gsi_last_bb (clsn_data->load_bb);
1209 load_struct = build_simple_mem_ref (clsn_data->load);
1210 t = build3 (COMPONENT_REF, type, load_struct, elt->field, NULL_TREE);
1211 stmt = gimple_build_assign (elt->new_name, t);
1212 SSA_NAME_DEF_STMT (elt->new_name) = stmt;
1213 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1215 return 1;
1218 /* Moves all the variables used in LOOP and defined outside of it (including
1219 the initial values of loop phi nodes, and *PER_THREAD if it is a ssa
1220 name) to a structure created for this purpose. The code
1222 while (1)
1224 use (a);
1225 use (b);
1228 is transformed this way:
1230 bb0:
1231 old.a = a;
1232 old.b = b;
1234 bb1:
1235 a' = new->a;
1236 b' = new->b;
1237 while (1)
1239 use (a');
1240 use (b');
1243 `old' is stored to *ARG_STRUCT and `new' is stored to NEW_ARG_STRUCT. The
1244 pointer `new' is intentionally not initialized (the loop will be split to a
1245 separate function later, and `new' will be initialized from its arguments).
1246 LD_ST_DATA holds information about the shared data structure used to pass
1247 information among the threads. It is initialized here, and
1248 gen_parallel_loop will pass it to create_call_for_reduction that
1249 needs this information. REDUCTION_LIST describes the reductions
1250 in LOOP. */
1252 static void
1253 separate_decls_in_region (edge entry, edge exit, htab_t reduction_list,
1254 tree *arg_struct, tree *new_arg_struct,
1255 struct clsn_data *ld_st_data)
1258 basic_block bb1 = split_edge (entry);
1259 basic_block bb0 = single_pred (bb1);
1260 htab_t name_copies = htab_create (10, name_to_copy_elt_hash,
1261 name_to_copy_elt_eq, free);
1262 htab_t decl_copies = htab_create (10, int_tree_map_hash, int_tree_map_eq,
1263 free);
1264 unsigned i;
1265 tree type, type_name, nvar;
1266 gimple_stmt_iterator gsi;
1267 struct clsn_data clsn_data;
1268 VEC (basic_block, heap) *body = VEC_alloc (basic_block, heap, 3);
1269 basic_block bb;
1270 basic_block entry_bb = bb1;
1271 basic_block exit_bb = exit->dest;
1272 bool has_debug_stmt = false;
1274 entry = single_succ_edge (entry_bb);
1275 gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
1277 FOR_EACH_VEC_ELT (basic_block, body, i, bb)
1279 if (bb != entry_bb && bb != exit_bb)
1281 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1282 separate_decls_in_region_stmt (entry, exit, gsi_stmt (gsi),
1283 name_copies, decl_copies);
1285 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1287 gimple stmt = gsi_stmt (gsi);
1289 if (is_gimple_debug (stmt))
1290 has_debug_stmt = true;
1291 else
1292 separate_decls_in_region_stmt (entry, exit, stmt,
1293 name_copies, decl_copies);
1298 /* Now process debug bind stmts. We must not create decls while
1299 processing debug stmts, so we defer their processing so as to
1300 make sure we will have debug info for as many variables as
1301 possible (all of those that were dealt with in the loop above),
1302 and discard those for which we know there's nothing we can
1303 do. */
1304 if (has_debug_stmt)
1305 FOR_EACH_VEC_ELT (basic_block, body, i, bb)
1306 if (bb != entry_bb && bb != exit_bb)
1308 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
1310 gimple stmt = gsi_stmt (gsi);
1312 if (is_gimple_debug (stmt))
1314 if (separate_decls_in_region_debug (stmt, name_copies,
1315 decl_copies))
1317 gsi_remove (&gsi, true);
1318 continue;
1322 gsi_next (&gsi);
1326 VEC_free (basic_block, heap, body);
1328 if (htab_elements (name_copies) == 0 && htab_elements (reduction_list) == 0)
1330 /* It may happen that there is nothing to copy (if there are only
1331 loop carried and external variables in the loop). */
1332 *arg_struct = NULL;
1333 *new_arg_struct = NULL;
1335 else
1337 /* Create the type for the structure to store the ssa names to. */
1338 type = lang_hooks.types.make_type (RECORD_TYPE);
1339 type_name = build_decl (UNKNOWN_LOCATION,
1340 TYPE_DECL, create_tmp_var_name (".paral_data"),
1341 type);
1342 TYPE_NAME (type) = type_name;
1344 htab_traverse (name_copies, add_field_for_name, type);
1345 if (reduction_list && htab_elements (reduction_list) > 0)
1347 /* Create the fields for reductions. */
1348 htab_traverse (reduction_list, add_field_for_reduction,
1349 type);
1351 layout_type (type);
1353 /* Create the loads and stores. */
1354 *arg_struct = create_tmp_var (type, ".paral_data_store");
1355 add_referenced_var (*arg_struct);
1356 nvar = create_tmp_var (build_pointer_type (type), ".paral_data_load");
1357 add_referenced_var (nvar);
1358 *new_arg_struct = make_ssa_name (nvar, NULL);
1360 ld_st_data->store = *arg_struct;
1361 ld_st_data->load = *new_arg_struct;
1362 ld_st_data->store_bb = bb0;
1363 ld_st_data->load_bb = bb1;
1365 htab_traverse (name_copies, create_loads_and_stores_for_name,
1366 ld_st_data);
1368 /* Load the calculation from memory (after the join of the threads). */
1370 if (reduction_list && htab_elements (reduction_list) > 0)
1372 htab_traverse (reduction_list, create_stores_for_reduction,
1373 ld_st_data);
1374 clsn_data.load = make_ssa_name (nvar, NULL);
1375 clsn_data.load_bb = exit->dest;
1376 clsn_data.store = ld_st_data->store;
1377 create_final_loads_for_reduction (reduction_list, &clsn_data);
1381 htab_delete (decl_copies);
1382 htab_delete (name_copies);
1385 /* Bitmap containing uids of functions created by parallelization. We cannot
1386 allocate it from the default obstack, as it must live across compilation
1387 of several functions; we make it gc allocated instead. */
1389 static GTY(()) bitmap parallelized_functions;
1391 /* Returns true if FN was created by create_loop_fn. */
1393 bool
1394 parallelized_function_p (tree fn)
1396 if (!parallelized_functions || !DECL_ARTIFICIAL (fn))
1397 return false;
1399 return bitmap_bit_p (parallelized_functions, DECL_UID (fn));
1402 /* Creates and returns an empty function that will receive the body of
1403 a parallelized loop. */
1405 static tree
1406 create_loop_fn (location_t loc)
1408 char buf[100];
1409 char *tname;
1410 tree decl, type, name, t;
1411 struct function *act_cfun = cfun;
1412 static unsigned loopfn_num;
1414 snprintf (buf, 100, "%s.$loopfn", current_function_name ());
1415 ASM_FORMAT_PRIVATE_NAME (tname, buf, loopfn_num++);
1416 clean_symbol_name (tname);
1417 name = get_identifier (tname);
1418 type = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
1420 decl = build_decl (loc, FUNCTION_DECL, name, type);
1421 if (!parallelized_functions)
1422 parallelized_functions = BITMAP_GGC_ALLOC ();
1423 bitmap_set_bit (parallelized_functions, DECL_UID (decl));
1425 TREE_STATIC (decl) = 1;
1426 TREE_USED (decl) = 1;
1427 DECL_ARTIFICIAL (decl) = 1;
1428 DECL_IGNORED_P (decl) = 0;
1429 TREE_PUBLIC (decl) = 0;
1430 DECL_UNINLINABLE (decl) = 1;
1431 DECL_EXTERNAL (decl) = 0;
1432 DECL_CONTEXT (decl) = NULL_TREE;
1433 DECL_INITIAL (decl) = make_node (BLOCK);
1435 t = build_decl (loc, RESULT_DECL, NULL_TREE, void_type_node);
1436 DECL_ARTIFICIAL (t) = 1;
1437 DECL_IGNORED_P (t) = 1;
1438 DECL_RESULT (decl) = t;
1440 t = build_decl (loc, PARM_DECL, get_identifier (".paral_data_param"),
1441 ptr_type_node);
1442 DECL_ARTIFICIAL (t) = 1;
1443 DECL_ARG_TYPE (t) = ptr_type_node;
1444 DECL_CONTEXT (t) = decl;
1445 TREE_USED (t) = 1;
1446 DECL_ARGUMENTS (decl) = t;
1448 allocate_struct_function (decl, false);
1450 /* The call to allocate_struct_function clobbers CFUN, so we need to restore
1451 it. */
1452 set_cfun (act_cfun);
1454 return decl;
1457 /* Moves the exit condition of LOOP to the beginning of its header, and
1458 duplicates the part of the last iteration that gets disabled to the
1459 exit of the loop. NIT is the number of iterations of the loop
1460 (used to initialize the variables in the duplicated part).
1462 TODO: the common case is that latch of the loop is empty and immediately
1463 follows the loop exit. In this case, it would be better not to copy the
1464 body of the loop, but only move the entry of the loop directly before the
1465 exit check and increase the number of iterations of the loop by one.
1466 This may need some additional preconditioning in case NIT = ~0.
1467 REDUCTION_LIST describes the reductions in LOOP. */
1469 static void
1470 transform_to_exit_first_loop (struct loop *loop, htab_t reduction_list, tree nit)
1472 basic_block *bbs, *nbbs, ex_bb, orig_header;
1473 unsigned n;
1474 bool ok;
1475 edge exit = single_dom_exit (loop), hpred;
1476 tree control, control_name, res, t;
1477 gimple phi, nphi, cond_stmt, stmt, cond_nit;
1478 gimple_stmt_iterator gsi;
1479 tree nit_1;
1481 split_block_after_labels (loop->header);
1482 orig_header = single_succ (loop->header);
1483 hpred = single_succ_edge (loop->header);
1485 cond_stmt = last_stmt (exit->src);
1486 control = gimple_cond_lhs (cond_stmt);
1487 gcc_assert (gimple_cond_rhs (cond_stmt) == nit);
1489 /* Make sure that we have phi nodes on exit for all loop header phis
1490 (create_parallel_loop requires that). */
1491 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
1493 phi = gsi_stmt (gsi);
1494 res = PHI_RESULT (phi);
1495 t = make_ssa_name (SSA_NAME_VAR (res), phi);
1496 SET_PHI_RESULT (phi, t);
1497 nphi = create_phi_node (res, orig_header);
1498 SSA_NAME_DEF_STMT (res) = nphi;
1499 add_phi_arg (nphi, t, hpred, UNKNOWN_LOCATION);
1501 if (res == control)
1503 gimple_cond_set_lhs (cond_stmt, t);
1504 update_stmt (cond_stmt);
1505 control = t;
1509 bbs = get_loop_body_in_dom_order (loop);
1511 for (n = 0; bbs[n] != exit->src; n++)
1512 continue;
1513 nbbs = XNEWVEC (basic_block, n);
1514 ok = gimple_duplicate_sese_tail (single_succ_edge (loop->header), exit,
1515 bbs + 1, n, nbbs);
1516 gcc_assert (ok);
1517 free (bbs);
1518 ex_bb = nbbs[0];
1519 free (nbbs);
1521 /* Other than reductions, the only gimple reg that should be copied
1522 out of the loop is the control variable. */
1523 exit = single_dom_exit (loop);
1524 control_name = NULL_TREE;
1525 for (gsi = gsi_start_phis (ex_bb); !gsi_end_p (gsi); )
1527 phi = gsi_stmt (gsi);
1528 res = PHI_RESULT (phi);
1529 if (!is_gimple_reg (res))
1531 gsi_next (&gsi);
1532 continue;
1535 /* Check if it is a part of reduction. If it is,
1536 keep the phi at the reduction's keep_res field. The
1537 PHI_RESULT of this phi is the resulting value of the reduction
1538 variable when exiting the loop. */
1540 if (htab_elements (reduction_list) > 0)
1542 struct reduction_info *red;
1544 tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1545 red = reduction_phi (reduction_list, SSA_NAME_DEF_STMT (val));
1546 if (red)
1548 red->keep_res = phi;
1549 gsi_next (&gsi);
1550 continue;
1553 gcc_assert (control_name == NULL_TREE
1554 && SSA_NAME_VAR (res) == SSA_NAME_VAR (control));
1555 control_name = res;
1556 remove_phi_node (&gsi, false);
1558 gcc_assert (control_name != NULL_TREE);
1560 /* Initialize the control variable to number of iterations
1561 according to the rhs of the exit condition. */
1562 gsi = gsi_after_labels (ex_bb);
1563 cond_nit = last_stmt (exit->src);
1564 nit_1 = gimple_cond_rhs (cond_nit);
1565 nit_1 = force_gimple_operand_gsi (&gsi,
1566 fold_convert (TREE_TYPE (control_name), nit_1),
1567 false, NULL_TREE, false, GSI_SAME_STMT);
1568 stmt = gimple_build_assign (control_name, nit_1);
1569 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
1570 SSA_NAME_DEF_STMT (control_name) = stmt;
1573 /* Create the parallel constructs for LOOP as described in gen_parallel_loop.
1574 LOOP_FN and DATA are the arguments of GIMPLE_OMP_PARALLEL.
1575 NEW_DATA is the variable that should be initialized from the argument
1576 of LOOP_FN. N_THREADS is the requested number of threads. Returns the
1577 basic block containing GIMPLE_OMP_PARALLEL tree. */
1579 static basic_block
1580 create_parallel_loop (struct loop *loop, tree loop_fn, tree data,
1581 tree new_data, unsigned n_threads, location_t loc)
1583 gimple_stmt_iterator gsi;
1584 basic_block bb, paral_bb, for_bb, ex_bb;
1585 tree t, param;
1586 gimple stmt, for_stmt, phi, cond_stmt;
1587 tree cvar, cvar_init, initvar, cvar_next, cvar_base, type;
1588 edge exit, nexit, guard, end, e;
1590 /* Prepare the GIMPLE_OMP_PARALLEL statement. */
1591 bb = loop_preheader_edge (loop)->src;
1592 paral_bb = single_pred (bb);
1593 gsi = gsi_last_bb (paral_bb);
1595 t = build_omp_clause (loc, OMP_CLAUSE_NUM_THREADS);
1596 OMP_CLAUSE_NUM_THREADS_EXPR (t)
1597 = build_int_cst (integer_type_node, n_threads);
1598 stmt = gimple_build_omp_parallel (NULL, t, loop_fn, data);
1599 gimple_set_location (stmt, loc);
1601 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1603 /* Initialize NEW_DATA. */
1604 if (data)
1606 gsi = gsi_after_labels (bb);
1608 param = make_ssa_name (DECL_ARGUMENTS (loop_fn), NULL);
1609 stmt = gimple_build_assign (param, build_fold_addr_expr (data));
1610 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1611 SSA_NAME_DEF_STMT (param) = stmt;
1613 stmt = gimple_build_assign (new_data,
1614 fold_convert (TREE_TYPE (new_data), param));
1615 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1616 SSA_NAME_DEF_STMT (new_data) = stmt;
1619 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_PARALLEL. */
1620 bb = split_loop_exit_edge (single_dom_exit (loop));
1621 gsi = gsi_last_bb (bb);
1622 stmt = gimple_build_omp_return (false);
1623 gimple_set_location (stmt, loc);
1624 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1626 /* Extract data for GIMPLE_OMP_FOR. */
1627 gcc_assert (loop->header == single_dom_exit (loop)->src);
1628 cond_stmt = last_stmt (loop->header);
1630 cvar = gimple_cond_lhs (cond_stmt);
1631 cvar_base = SSA_NAME_VAR (cvar);
1632 phi = SSA_NAME_DEF_STMT (cvar);
1633 cvar_init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1634 initvar = make_ssa_name (cvar_base, NULL);
1635 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, loop_preheader_edge (loop)),
1636 initvar);
1637 cvar_next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1639 gsi = gsi_last_nondebug_bb (loop->latch);
1640 gcc_assert (gsi_stmt (gsi) == SSA_NAME_DEF_STMT (cvar_next));
1641 gsi_remove (&gsi, true);
1643 /* Prepare cfg. */
1644 for_bb = split_edge (loop_preheader_edge (loop));
1645 ex_bb = split_loop_exit_edge (single_dom_exit (loop));
1646 extract_true_false_edges_from_block (loop->header, &nexit, &exit);
1647 gcc_assert (exit == single_dom_exit (loop));
1649 guard = make_edge (for_bb, ex_bb, 0);
1650 single_succ_edge (loop->latch)->flags = 0;
1651 end = make_edge (loop->latch, ex_bb, EDGE_FALLTHRU);
1652 for (gsi = gsi_start_phis (ex_bb); !gsi_end_p (gsi); gsi_next (&gsi))
1654 source_location locus;
1655 tree def;
1656 phi = gsi_stmt (gsi);
1657 stmt = SSA_NAME_DEF_STMT (PHI_ARG_DEF_FROM_EDGE (phi, exit));
1659 def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_preheader_edge (loop));
1660 locus = gimple_phi_arg_location_from_edge (stmt,
1661 loop_preheader_edge (loop));
1662 add_phi_arg (phi, def, guard, locus);
1664 def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (loop));
1665 locus = gimple_phi_arg_location_from_edge (stmt, loop_latch_edge (loop));
1666 add_phi_arg (phi, def, end, locus);
1668 e = redirect_edge_and_branch (exit, nexit->dest);
1669 PENDING_STMT (e) = NULL;
1671 /* Emit GIMPLE_OMP_FOR. */
1672 gimple_cond_set_lhs (cond_stmt, cvar_base);
1673 type = TREE_TYPE (cvar);
1674 t = build_omp_clause (loc, OMP_CLAUSE_SCHEDULE);
1675 OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_STATIC;
1677 for_stmt = gimple_build_omp_for (NULL, t, 1, NULL);
1678 gimple_set_location (for_stmt, loc);
1679 gimple_omp_for_set_index (for_stmt, 0, initvar);
1680 gimple_omp_for_set_initial (for_stmt, 0, cvar_init);
1681 gimple_omp_for_set_final (for_stmt, 0, gimple_cond_rhs (cond_stmt));
1682 gimple_omp_for_set_cond (for_stmt, 0, gimple_cond_code (cond_stmt));
1683 gimple_omp_for_set_incr (for_stmt, 0, build2 (PLUS_EXPR, type,
1684 cvar_base,
1685 build_int_cst (type, 1)));
1687 gsi = gsi_last_bb (for_bb);
1688 gsi_insert_after (&gsi, for_stmt, GSI_NEW_STMT);
1689 SSA_NAME_DEF_STMT (initvar) = for_stmt;
1691 /* Emit GIMPLE_OMP_CONTINUE. */
1692 gsi = gsi_last_bb (loop->latch);
1693 stmt = gimple_build_omp_continue (cvar_next, cvar);
1694 gimple_set_location (stmt, loc);
1695 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1696 SSA_NAME_DEF_STMT (cvar_next) = stmt;
1698 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_FOR. */
1699 gsi = gsi_last_bb (ex_bb);
1700 stmt = gimple_build_omp_return (true);
1701 gimple_set_location (stmt, loc);
1702 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1704 /* After the above dom info is hosed. Re-compute it. */
1705 free_dominance_info (CDI_DOMINATORS);
1706 calculate_dominance_info (CDI_DOMINATORS);
1708 return paral_bb;
1711 /* Generates code to execute the iterations of LOOP in N_THREADS
1712 threads in parallel.
1714 NITER describes number of iterations of LOOP.
1715 REDUCTION_LIST describes the reductions existent in the LOOP. */
1717 static void
1718 gen_parallel_loop (struct loop *loop, htab_t reduction_list,
1719 unsigned n_threads, struct tree_niter_desc *niter)
1721 loop_iterator li;
1722 tree many_iterations_cond, type, nit;
1723 tree arg_struct, new_arg_struct;
1724 gimple_seq stmts;
1725 basic_block parallel_head;
1726 edge entry, exit;
1727 struct clsn_data clsn_data;
1728 unsigned prob;
1729 location_t loc;
1730 gimple cond_stmt;
1731 unsigned int m_p_thread=2;
1733 /* From
1735 ---------------------------------------------------------------------
1736 loop
1738 IV = phi (INIT, IV + STEP)
1739 BODY1;
1740 if (COND)
1741 break;
1742 BODY2;
1744 ---------------------------------------------------------------------
1746 with # of iterations NITER (possibly with MAY_BE_ZERO assumption),
1747 we generate the following code:
1749 ---------------------------------------------------------------------
1751 if (MAY_BE_ZERO
1752 || NITER < MIN_PER_THREAD * N_THREADS)
1753 goto original;
1755 BODY1;
1756 store all local loop-invariant variables used in body of the loop to DATA.
1757 GIMPLE_OMP_PARALLEL (OMP_CLAUSE_NUM_THREADS (N_THREADS), LOOPFN, DATA);
1758 load the variables from DATA.
1759 GIMPLE_OMP_FOR (IV = INIT; COND; IV += STEP) (OMP_CLAUSE_SCHEDULE (static))
1760 BODY2;
1761 BODY1;
1762 GIMPLE_OMP_CONTINUE;
1763 GIMPLE_OMP_RETURN -- GIMPLE_OMP_FOR
1764 GIMPLE_OMP_RETURN -- GIMPLE_OMP_PARALLEL
1765 goto end;
1767 original:
1768 loop
1770 IV = phi (INIT, IV + STEP)
1771 BODY1;
1772 if (COND)
1773 break;
1774 BODY2;
1777 end:
1781 /* Create two versions of the loop -- in the old one, we know that the
1782 number of iterations is large enough, and we will transform it into the
1783 loop that will be split to loop_fn, the new one will be used for the
1784 remaining iterations. */
1786 /* We should compute a better number-of-iterations value for outer loops.
1787 That is, if we have
1789 for (i = 0; i < n; ++i)
1790 for (j = 0; j < m; ++j)
1793 we should compute nit = n * m, not nit = n.
1794 Also may_be_zero handling would need to be adjusted. */
1796 type = TREE_TYPE (niter->niter);
1797 nit = force_gimple_operand (unshare_expr (niter->niter), &stmts, true,
1798 NULL_TREE);
1799 if (stmts)
1800 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1802 if (loop->inner)
1803 m_p_thread=2;
1804 else
1805 m_p_thread=MIN_PER_THREAD;
1807 many_iterations_cond =
1808 fold_build2 (GE_EXPR, boolean_type_node,
1809 nit, build_int_cst (type, m_p_thread * n_threads));
1811 many_iterations_cond
1812 = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1813 invert_truthvalue (unshare_expr (niter->may_be_zero)),
1814 many_iterations_cond);
1815 many_iterations_cond
1816 = force_gimple_operand (many_iterations_cond, &stmts, false, NULL_TREE);
1817 if (stmts)
1818 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1819 if (!is_gimple_condexpr (many_iterations_cond))
1821 many_iterations_cond
1822 = force_gimple_operand (many_iterations_cond, &stmts,
1823 true, NULL_TREE);
1824 if (stmts)
1825 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1828 initialize_original_copy_tables ();
1830 /* We assume that the loop usually iterates a lot. */
1831 prob = 4 * REG_BR_PROB_BASE / 5;
1832 loop_version (loop, many_iterations_cond, NULL,
1833 prob, prob, REG_BR_PROB_BASE - prob, true);
1834 update_ssa (TODO_update_ssa);
1835 free_original_copy_tables ();
1837 /* Base all the induction variables in LOOP on a single control one. */
1838 canonicalize_loop_ivs (loop, &nit, true);
1840 /* Ensure that the exit condition is the first statement in the loop. */
1841 transform_to_exit_first_loop (loop, reduction_list, nit);
1843 /* Generate initializations for reductions. */
1844 if (htab_elements (reduction_list) > 0)
1845 htab_traverse (reduction_list, initialize_reductions, loop);
1847 /* Eliminate the references to local variables from the loop. */
1848 gcc_assert (single_exit (loop));
1849 entry = loop_preheader_edge (loop);
1850 exit = single_dom_exit (loop);
1852 eliminate_local_variables (entry, exit);
1853 /* In the old loop, move all variables non-local to the loop to a structure
1854 and back, and create separate decls for the variables used in loop. */
1855 separate_decls_in_region (entry, exit, reduction_list, &arg_struct,
1856 &new_arg_struct, &clsn_data);
1858 /* Create the parallel constructs. */
1859 loc = UNKNOWN_LOCATION;
1860 cond_stmt = last_stmt (loop->header);
1861 if (cond_stmt)
1862 loc = gimple_location (cond_stmt);
1863 parallel_head = create_parallel_loop (loop, create_loop_fn (loc), arg_struct,
1864 new_arg_struct, n_threads, loc);
1865 if (htab_elements (reduction_list) > 0)
1866 create_call_for_reduction (loop, reduction_list, &clsn_data);
1868 scev_reset ();
1870 /* Cancel the loop (it is simpler to do it here rather than to teach the
1871 expander to do it). */
1872 cancel_loop_tree (loop);
1874 /* Free loop bound estimations that could contain references to
1875 removed statements. */
1876 FOR_EACH_LOOP (li, loop, 0)
1877 free_numbers_of_iterations_estimates_loop (loop);
1879 /* Expand the parallel constructs. We do it directly here instead of running
1880 a separate expand_omp pass, since it is more efficient, and less likely to
1881 cause troubles with further analyses not being able to deal with the
1882 OMP trees. */
1884 omp_expand_local (parallel_head);
1887 /* Returns true when LOOP contains vector phi nodes. */
1889 static bool
1890 loop_has_vector_phi_nodes (struct loop *loop ATTRIBUTE_UNUSED)
1892 unsigned i;
1893 basic_block *bbs = get_loop_body_in_dom_order (loop);
1894 gimple_stmt_iterator gsi;
1895 bool res = true;
1897 for (i = 0; i < loop->num_nodes; i++)
1898 for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
1899 if (TREE_CODE (TREE_TYPE (PHI_RESULT (gsi_stmt (gsi)))) == VECTOR_TYPE)
1900 goto end;
1902 res = false;
1903 end:
1904 free (bbs);
1905 return res;
1908 /* Create a reduction_info struct, initialize it with REDUC_STMT
1909 and PHI, insert it to the REDUCTION_LIST. */
1911 static void
1912 build_new_reduction (htab_t reduction_list, gimple reduc_stmt, gimple phi)
1914 PTR *slot;
1915 struct reduction_info *new_reduction;
1917 gcc_assert (reduc_stmt);
1919 if (dump_file && (dump_flags & TDF_DETAILS))
1921 fprintf (dump_file,
1922 "Detected reduction. reduction stmt is: \n");
1923 print_gimple_stmt (dump_file, reduc_stmt, 0, 0);
1924 fprintf (dump_file, "\n");
1927 new_reduction = XCNEW (struct reduction_info);
1929 new_reduction->reduc_stmt = reduc_stmt;
1930 new_reduction->reduc_phi = phi;
1931 new_reduction->reduc_version = SSA_NAME_VERSION (gimple_phi_result (phi));
1932 new_reduction->reduction_code = gimple_assign_rhs_code (reduc_stmt);
1933 slot = htab_find_slot (reduction_list, new_reduction, INSERT);
1934 *slot = new_reduction;
1937 /* Callback for htab_traverse. Sets gimple_uid of reduc_phi stmts. */
1939 static int
1940 set_reduc_phi_uids (void **slot, void *data ATTRIBUTE_UNUSED)
1942 struct reduction_info *const red = (struct reduction_info *) *slot;
1943 gimple_set_uid (red->reduc_phi, red->reduc_version);
1944 return 1;
1947 /* Detect all reductions in the LOOP, insert them into REDUCTION_LIST. */
1949 static void
1950 gather_scalar_reductions (loop_p loop, htab_t reduction_list)
1952 gimple_stmt_iterator gsi;
1953 loop_vec_info simple_loop_info;
1955 vect_dump = NULL;
1956 simple_loop_info = vect_analyze_loop_form (loop);
1958 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
1960 gimple phi = gsi_stmt (gsi);
1961 affine_iv iv;
1962 tree res = PHI_RESULT (phi);
1963 bool double_reduc;
1965 if (!is_gimple_reg (res))
1966 continue;
1968 if (!simple_iv (loop, loop, res, &iv, true)
1969 && simple_loop_info)
1971 gimple reduc_stmt = vect_force_simple_reduction (simple_loop_info,
1972 phi, true,
1973 &double_reduc);
1974 if (reduc_stmt && !double_reduc)
1975 build_new_reduction (reduction_list, reduc_stmt, phi);
1978 destroy_loop_vec_info (simple_loop_info, true);
1980 /* As gimple_uid is used by the vectorizer in between vect_analyze_loop_form
1981 and destroy_loop_vec_info, we can set gimple_uid of reduc_phi stmts
1982 only now. */
1983 htab_traverse (reduction_list, set_reduc_phi_uids, NULL);
1986 /* Try to initialize NITER for code generation part. */
1988 static bool
1989 try_get_loop_niter (loop_p loop, struct tree_niter_desc *niter)
1991 edge exit = single_dom_exit (loop);
1993 gcc_assert (exit);
1995 /* We need to know # of iterations, and there should be no uses of values
1996 defined inside loop outside of it, unless the values are invariants of
1997 the loop. */
1998 if (!number_of_iterations_exit (loop, exit, niter, false))
2000 if (dump_file && (dump_flags & TDF_DETAILS))
2001 fprintf (dump_file, " FAILED: number of iterations not known\n");
2002 return false;
2005 return true;
2008 /* Try to initialize REDUCTION_LIST for code generation part.
2009 REDUCTION_LIST describes the reductions. */
2011 static bool
2012 try_create_reduction_list (loop_p loop, htab_t reduction_list)
2014 edge exit = single_dom_exit (loop);
2015 gimple_stmt_iterator gsi;
2017 gcc_assert (exit);
2019 gather_scalar_reductions (loop, reduction_list);
2022 for (gsi = gsi_start_phis (exit->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2024 gimple phi = gsi_stmt (gsi);
2025 struct reduction_info *red;
2026 imm_use_iterator imm_iter;
2027 use_operand_p use_p;
2028 gimple reduc_phi;
2029 tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2031 if (is_gimple_reg (val))
2033 if (dump_file && (dump_flags & TDF_DETAILS))
2035 fprintf (dump_file, "phi is ");
2036 print_gimple_stmt (dump_file, phi, 0, 0);
2037 fprintf (dump_file, "arg of phi to exit: value ");
2038 print_generic_expr (dump_file, val, 0);
2039 fprintf (dump_file, " used outside loop\n");
2040 fprintf (dump_file,
2041 " checking if it a part of reduction pattern: \n");
2043 if (htab_elements (reduction_list) == 0)
2045 if (dump_file && (dump_flags & TDF_DETAILS))
2046 fprintf (dump_file,
2047 " FAILED: it is not a part of reduction.\n");
2048 return false;
2050 reduc_phi = NULL;
2051 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, val)
2053 if (!gimple_debug_bind_p (USE_STMT (use_p))
2054 && flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
2056 reduc_phi = USE_STMT (use_p);
2057 break;
2060 red = reduction_phi (reduction_list, reduc_phi);
2061 if (red == NULL)
2063 if (dump_file && (dump_flags & TDF_DETAILS))
2064 fprintf (dump_file,
2065 " FAILED: it is not a part of reduction.\n");
2066 return false;
2068 if (dump_file && (dump_flags & TDF_DETAILS))
2070 fprintf (dump_file, "reduction phi is ");
2071 print_gimple_stmt (dump_file, red->reduc_phi, 0, 0);
2072 fprintf (dump_file, "reduction stmt is ");
2073 print_gimple_stmt (dump_file, red->reduc_stmt, 0, 0);
2078 /* The iterations of the loop may communicate only through bivs whose
2079 iteration space can be distributed efficiently. */
2080 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
2082 gimple phi = gsi_stmt (gsi);
2083 tree def = PHI_RESULT (phi);
2084 affine_iv iv;
2086 if (is_gimple_reg (def) && !simple_iv (loop, loop, def, &iv, true))
2088 struct reduction_info *red;
2090 red = reduction_phi (reduction_list, phi);
2091 if (red == NULL)
2093 if (dump_file && (dump_flags & TDF_DETAILS))
2094 fprintf (dump_file,
2095 " FAILED: scalar dependency between iterations\n");
2096 return false;
2102 return true;
2105 /* Detect parallel loops and generate parallel code using libgomp
2106 primitives. Returns true if some loop was parallelized, false
2107 otherwise. */
2109 bool
2110 parallelize_loops (void)
2112 unsigned n_threads = flag_tree_parallelize_loops;
2113 bool changed = false;
2114 struct loop *loop;
2115 struct tree_niter_desc niter_desc;
2116 loop_iterator li;
2117 htab_t reduction_list;
2118 struct obstack parloop_obstack;
2119 HOST_WIDE_INT estimated;
2120 LOC loop_loc;
2122 /* Do not parallelize loops in the functions created by parallelization. */
2123 if (parallelized_function_p (cfun->decl))
2124 return false;
2125 if (cfun->has_nonlocal_label)
2126 return false;
2128 gcc_obstack_init (&parloop_obstack);
2129 reduction_list = htab_create (10, reduction_info_hash,
2130 reduction_info_eq, free);
2131 init_stmt_vec_info_vec ();
2133 FOR_EACH_LOOP (li, loop, 0)
2135 htab_empty (reduction_list);
2136 if (dump_file && (dump_flags & TDF_DETAILS))
2138 fprintf (dump_file, "Trying loop %d as candidate\n",loop->num);
2139 if (loop->inner)
2140 fprintf (dump_file, "loop %d is not innermost\n",loop->num);
2141 else
2142 fprintf (dump_file, "loop %d is innermost\n",loop->num);
2145 /* If we use autopar in graphite pass, we use its marked dependency
2146 checking results. */
2147 if (flag_loop_parallelize_all && !loop->can_be_parallel)
2149 if (dump_file && (dump_flags & TDF_DETAILS))
2150 fprintf (dump_file, "loop is not parallel according to graphite\n");
2151 continue;
2154 if (!single_dom_exit (loop))
2157 if (dump_file && (dump_flags & TDF_DETAILS))
2158 fprintf (dump_file, "loop is !single_dom_exit\n");
2160 continue;
2163 if (/* And of course, the loop must be parallelizable. */
2164 !can_duplicate_loop_p (loop)
2165 || loop_has_blocks_with_irreducible_flag (loop)
2166 || (loop_preheader_edge (loop)->src->flags & BB_IRREDUCIBLE_LOOP)
2167 /* FIXME: the check for vector phi nodes could be removed. */
2168 || loop_has_vector_phi_nodes (loop))
2169 continue;
2171 estimated = estimated_stmt_executions_int (loop);
2172 if (estimated == -1)
2173 estimated = max_stmt_executions_int (loop);
2174 /* FIXME: Bypass this check as graphite doesn't update the
2175 count and frequency correctly now. */
2176 if (!flag_loop_parallelize_all
2177 && ((estimated != -1
2178 && estimated <= (HOST_WIDE_INT) n_threads * MIN_PER_THREAD)
2179 /* Do not bother with loops in cold areas. */
2180 || optimize_loop_nest_for_size_p (loop)))
2181 continue;
2183 if (!try_get_loop_niter (loop, &niter_desc))
2184 continue;
2186 if (!try_create_reduction_list (loop, reduction_list))
2187 continue;
2189 if (!flag_loop_parallelize_all
2190 && !loop_parallel_p (loop, &parloop_obstack))
2191 continue;
2193 changed = true;
2194 if (dump_file && (dump_flags & TDF_DETAILS))
2196 if (loop->inner)
2197 fprintf (dump_file, "parallelizing outer loop %d\n",loop->header->index);
2198 else
2199 fprintf (dump_file, "parallelizing inner loop %d\n",loop->header->index);
2200 loop_loc = find_loop_location (loop);
2201 if (loop_loc != UNKNOWN_LOC)
2202 fprintf (dump_file, "\nloop at %s:%d: ",
2203 LOC_FILE (loop_loc), LOC_LINE (loop_loc));
2205 gen_parallel_loop (loop, reduction_list,
2206 n_threads, &niter_desc);
2207 #ifdef ENABLE_CHECKING
2208 verify_flow_info ();
2209 verify_loop_structure ();
2210 verify_loop_closed_ssa (true);
2211 #endif
2214 free_stmt_vec_info_vec ();
2215 htab_delete (reduction_list);
2216 obstack_free (&parloop_obstack, NULL);
2218 /* Parallelization will cause new function calls to be inserted through
2219 which local variables will escape. Reset the points-to solution
2220 for ESCAPED. */
2221 if (changed)
2222 pt_solution_reset (&cfun->gimple_df->escaped);
2224 return changed;
2227 #include "gt-tree-parloops.h"