Support slim switch for cfg graph dump
[official-gcc.git] / gcc / tree-parloops.c
blob1729f230115479b56e158fdcb1156936290e1bc2
1 /* Loop autoparallelization.
2 Copyright (C) 2006-2013 Free Software Foundation, Inc.
3 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
4 Zdenek Dvorak <dvorakz@suse.cz> and Razya Ladelsky <razya@il.ibm.com>.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tree-flow.h"
26 #include "cfgloop.h"
27 #include "tree-data-ref.h"
28 #include "tree-scalar-evolution.h"
29 #include "gimple-pretty-print.h"
30 #include "tree-pass.h"
31 #include "langhooks.h"
32 #include "tree-vectorizer.h"
34 /* This pass tries to distribute iterations of loops into several threads.
35 The implementation is straightforward -- for each loop we test whether its
36 iterations are independent, and if it is the case (and some additional
37 conditions regarding profitability and correctness are satisfied), we
38 add GIMPLE_OMP_PARALLEL and GIMPLE_OMP_FOR codes and let omp expansion
39 machinery do its job.
41 The most of the complexity is in bringing the code into shape expected
42 by the omp expanders:
43 -- for GIMPLE_OMP_FOR, ensuring that the loop has only one induction
44 variable and that the exit test is at the start of the loop body
45 -- for GIMPLE_OMP_PARALLEL, replacing the references to local addressable
46 variables by accesses through pointers, and breaking up ssa chains
47 by storing the values incoming to the parallelized loop to a structure
48 passed to the new function as an argument (something similar is done
49 in omp gimplification, unfortunately only a small part of the code
50 can be shared).
52 TODO:
53 -- if there are several parallelizable loops in a function, it may be
54 possible to generate the threads just once (using synchronization to
55 ensure that cross-loop dependences are obeyed).
56 -- handling of common reduction patterns for outer loops.
58 More info can also be found at http://gcc.gnu.org/wiki/AutoParInGCC */
60 Reduction handling:
61 currently we use vect_force_simple_reduction() to detect reduction patterns.
62 The code transformation will be introduced by an example.
65 parloop
67 int sum=1;
69 for (i = 0; i < N; i++)
71 x[i] = i + 3;
72 sum+=x[i];
76 gimple-like code:
77 header_bb:
79 # sum_29 = PHI <sum_11(5), 1(3)>
80 # i_28 = PHI <i_12(5), 0(3)>
81 D.1795_8 = i_28 + 3;
82 x[i_28] = D.1795_8;
83 sum_11 = D.1795_8 + sum_29;
84 i_12 = i_28 + 1;
85 if (N_6(D) > i_12)
86 goto header_bb;
89 exit_bb:
91 # sum_21 = PHI <sum_11(4)>
92 printf (&"%d"[0], sum_21);
95 after reduction transformation (only relevant parts):
97 parloop
100 ....
103 # Storing the initial value given by the user. #
105 .paral_data_store.32.sum.27 = 1;
107 #pragma omp parallel num_threads(4)
109 #pragma omp for schedule(static)
111 # The neutral element corresponding to the particular
112 reduction's operation, e.g. 0 for PLUS_EXPR,
113 1 for MULT_EXPR, etc. replaces the user's initial value. #
115 # sum.27_29 = PHI <sum.27_11, 0>
117 sum.27_11 = D.1827_8 + sum.27_29;
119 GIMPLE_OMP_CONTINUE
121 # Adding this reduction phi is done at create_phi_for_local_result() #
122 # sum.27_56 = PHI <sum.27_11, 0>
123 GIMPLE_OMP_RETURN
125 # Creating the atomic operation is done at
126 create_call_for_reduction_1() #
128 #pragma omp atomic_load
129 D.1839_59 = *&.paral_data_load.33_51->reduction.23;
130 D.1840_60 = sum.27_56 + D.1839_59;
131 #pragma omp atomic_store (D.1840_60);
133 GIMPLE_OMP_RETURN
135 # collecting the result after the join of the threads is done at
136 create_loads_for_reductions().
137 The value computed by the threads is loaded from the
138 shared struct. #
141 .paral_data_load.33_52 = &.paral_data_store.32;
142 sum_37 = .paral_data_load.33_52->sum.27;
143 sum_43 = D.1795_41 + sum_37;
145 exit bb:
146 # sum_21 = PHI <sum_43, sum_26>
147 printf (&"%d"[0], sum_21);
155 /* Minimal number of iterations of a loop that should be executed in each
156 thread. */
157 #define MIN_PER_THREAD 100
159 /* Element of the hashtable, representing a
160 reduction in the current loop. */
161 struct reduction_info
163 gimple reduc_stmt; /* reduction statement. */
164 gimple reduc_phi; /* The phi node defining the reduction. */
165 enum tree_code reduction_code;/* code for the reduction operation. */
166 unsigned reduc_version; /* SSA_NAME_VERSION of original reduc_phi
167 result. */
168 gimple keep_res; /* The PHI_RESULT of this phi is the resulting value
169 of the reduction variable when existing the loop. */
170 tree initial_value; /* The initial value of the reduction var before entering the loop. */
171 tree field; /* the name of the field in the parloop data structure intended for reduction. */
172 tree init; /* reduction initialization value. */
173 gimple new_phi; /* (helper field) Newly created phi node whose result
174 will be passed to the atomic operation. Represents
175 the local result each thread computed for the reduction
176 operation. */
179 /* Equality and hash functions for hashtab code. */
181 static int
182 reduction_info_eq (const void *aa, const void *bb)
184 const struct reduction_info *a = (const struct reduction_info *) aa;
185 const struct reduction_info *b = (const struct reduction_info *) bb;
187 return (a->reduc_phi == b->reduc_phi);
190 static hashval_t
191 reduction_info_hash (const void *aa)
193 const struct reduction_info *a = (const struct reduction_info *) aa;
195 return a->reduc_version;
198 static struct reduction_info *
199 reduction_phi (htab_t reduction_list, gimple phi)
201 struct reduction_info tmpred, *red;
203 if (htab_elements (reduction_list) == 0 || phi == NULL)
204 return NULL;
206 tmpred.reduc_phi = phi;
207 tmpred.reduc_version = gimple_uid (phi);
208 red = (struct reduction_info *) htab_find (reduction_list, &tmpred);
210 return red;
213 /* Element of hashtable of names to copy. */
215 struct name_to_copy_elt
217 unsigned version; /* The version of the name to copy. */
218 tree new_name; /* The new name used in the copy. */
219 tree field; /* The field of the structure used to pass the
220 value. */
223 /* Equality and hash functions for hashtab code. */
225 static int
226 name_to_copy_elt_eq (const void *aa, const void *bb)
228 const struct name_to_copy_elt *a = (const struct name_to_copy_elt *) aa;
229 const struct name_to_copy_elt *b = (const struct name_to_copy_elt *) bb;
231 return a->version == b->version;
234 static hashval_t
235 name_to_copy_elt_hash (const void *aa)
237 const struct name_to_copy_elt *a = (const struct name_to_copy_elt *) aa;
239 return (hashval_t) a->version;
242 /* A transformation matrix, which is a self-contained ROWSIZE x COLSIZE
243 matrix. Rather than use floats, we simply keep a single DENOMINATOR that
244 represents the denominator for every element in the matrix. */
245 typedef struct lambda_trans_matrix_s
247 lambda_matrix matrix;
248 int rowsize;
249 int colsize;
250 int denominator;
251 } *lambda_trans_matrix;
252 #define LTM_MATRIX(T) ((T)->matrix)
253 #define LTM_ROWSIZE(T) ((T)->rowsize)
254 #define LTM_COLSIZE(T) ((T)->colsize)
255 #define LTM_DENOMINATOR(T) ((T)->denominator)
257 /* Allocate a new transformation matrix. */
259 static lambda_trans_matrix
260 lambda_trans_matrix_new (int colsize, int rowsize,
261 struct obstack * lambda_obstack)
263 lambda_trans_matrix ret;
265 ret = (lambda_trans_matrix)
266 obstack_alloc (lambda_obstack, sizeof (struct lambda_trans_matrix_s));
267 LTM_MATRIX (ret) = lambda_matrix_new (rowsize, colsize, lambda_obstack);
268 LTM_ROWSIZE (ret) = rowsize;
269 LTM_COLSIZE (ret) = colsize;
270 LTM_DENOMINATOR (ret) = 1;
271 return ret;
274 /* Multiply a vector VEC by a matrix MAT.
275 MAT is an M*N matrix, and VEC is a vector with length N. The result
276 is stored in DEST which must be a vector of length M. */
278 static void
279 lambda_matrix_vector_mult (lambda_matrix matrix, int m, int n,
280 lambda_vector vec, lambda_vector dest)
282 int i, j;
284 lambda_vector_clear (dest, m);
285 for (i = 0; i < m; i++)
286 for (j = 0; j < n; j++)
287 dest[i] += matrix[i][j] * vec[j];
290 /* Return true if TRANS is a legal transformation matrix that respects
291 the dependence vectors in DISTS and DIRS. The conservative answer
292 is false.
294 "Wolfe proves that a unimodular transformation represented by the
295 matrix T is legal when applied to a loop nest with a set of
296 lexicographically non-negative distance vectors RDG if and only if
297 for each vector d in RDG, (T.d >= 0) is lexicographically positive.
298 i.e.: if and only if it transforms the lexicographically positive
299 distance vectors to lexicographically positive vectors. Note that
300 a unimodular matrix must transform the zero vector (and only it) to
301 the zero vector." S.Muchnick. */
303 static bool
304 lambda_transform_legal_p (lambda_trans_matrix trans,
305 int nb_loops,
306 vec<ddr_p> dependence_relations)
308 unsigned int i, j;
309 lambda_vector distres;
310 struct data_dependence_relation *ddr;
312 gcc_assert (LTM_COLSIZE (trans) == nb_loops
313 && LTM_ROWSIZE (trans) == nb_loops);
315 /* When there are no dependences, the transformation is correct. */
316 if (dependence_relations.length () == 0)
317 return true;
319 ddr = dependence_relations[0];
320 if (ddr == NULL)
321 return true;
323 /* When there is an unknown relation in the dependence_relations, we
324 know that it is no worth looking at this loop nest: give up. */
325 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
326 return false;
328 distres = lambda_vector_new (nb_loops);
330 /* For each distance vector in the dependence graph. */
331 FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
333 /* Don't care about relations for which we know that there is no
334 dependence, nor about read-read (aka. output-dependences):
335 these data accesses can happen in any order. */
336 if (DDR_ARE_DEPENDENT (ddr) == chrec_known
337 || (DR_IS_READ (DDR_A (ddr)) && DR_IS_READ (DDR_B (ddr))))
338 continue;
340 /* Conservatively answer: "this transformation is not valid". */
341 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
342 return false;
344 /* If the dependence could not be captured by a distance vector,
345 conservatively answer that the transform is not valid. */
346 if (DDR_NUM_DIST_VECTS (ddr) == 0)
347 return false;
349 /* Compute trans.dist_vect */
350 for (j = 0; j < DDR_NUM_DIST_VECTS (ddr); j++)
352 lambda_matrix_vector_mult (LTM_MATRIX (trans), nb_loops, nb_loops,
353 DDR_DIST_VECT (ddr, j), distres);
355 if (!lambda_vector_lexico_pos (distres, nb_loops))
356 return false;
359 return true;
362 /* Data dependency analysis. Returns true if the iterations of LOOP
363 are independent on each other (that is, if we can execute them
364 in parallel). */
366 static bool
367 loop_parallel_p (struct loop *loop, struct obstack * parloop_obstack)
369 vec<loop_p> loop_nest;
370 vec<ddr_p> dependence_relations;
371 vec<data_reference_p> datarefs;
372 lambda_trans_matrix trans;
373 bool ret = false;
375 if (dump_file && (dump_flags & TDF_DETAILS))
377 fprintf (dump_file, "Considering loop %d\n", loop->num);
378 if (!loop->inner)
379 fprintf (dump_file, "loop is innermost\n");
380 else
381 fprintf (dump_file, "loop NOT innermost\n");
384 /* Check for problems with dependences. If the loop can be reversed,
385 the iterations are independent. */
386 datarefs.create (10);
387 dependence_relations.create (10 * 10);
388 loop_nest.create (3);
389 if (! compute_data_dependences_for_loop (loop, true, &loop_nest, &datarefs,
390 &dependence_relations))
392 if (dump_file && (dump_flags & TDF_DETAILS))
393 fprintf (dump_file, " FAILED: cannot analyze data dependencies\n");
394 ret = false;
395 goto end;
397 if (dump_file && (dump_flags & TDF_DETAILS))
398 dump_data_dependence_relations (dump_file, dependence_relations);
400 trans = lambda_trans_matrix_new (1, 1, parloop_obstack);
401 LTM_MATRIX (trans)[0][0] = -1;
403 if (lambda_transform_legal_p (trans, 1, dependence_relations))
405 ret = true;
406 if (dump_file && (dump_flags & TDF_DETAILS))
407 fprintf (dump_file, " SUCCESS: may be parallelized\n");
409 else if (dump_file && (dump_flags & TDF_DETAILS))
410 fprintf (dump_file,
411 " FAILED: data dependencies exist across iterations\n");
413 end:
414 loop_nest.release ();
415 free_dependence_relations (dependence_relations);
416 free_data_refs (datarefs);
418 return ret;
421 /* Return true when LOOP contains basic blocks marked with the
422 BB_IRREDUCIBLE_LOOP flag. */
424 static inline bool
425 loop_has_blocks_with_irreducible_flag (struct loop *loop)
427 unsigned i;
428 basic_block *bbs = get_loop_body_in_dom_order (loop);
429 bool res = true;
431 for (i = 0; i < loop->num_nodes; i++)
432 if (bbs[i]->flags & BB_IRREDUCIBLE_LOOP)
433 goto end;
435 res = false;
436 end:
437 free (bbs);
438 return res;
441 /* Assigns the address of OBJ in TYPE to an ssa name, and returns this name.
442 The assignment statement is placed on edge ENTRY. DECL_ADDRESS maps decls
443 to their addresses that can be reused. The address of OBJ is known to
444 be invariant in the whole function. Other needed statements are placed
445 right before GSI. */
447 static tree
448 take_address_of (tree obj, tree type, edge entry, htab_t decl_address,
449 gimple_stmt_iterator *gsi)
451 int uid;
452 void **dslot;
453 struct int_tree_map ielt, *nielt;
454 tree *var_p, name, addr;
455 gimple stmt;
456 gimple_seq stmts;
458 /* Since the address of OBJ is invariant, the trees may be shared.
459 Avoid rewriting unrelated parts of the code. */
460 obj = unshare_expr (obj);
461 for (var_p = &obj;
462 handled_component_p (*var_p);
463 var_p = &TREE_OPERAND (*var_p, 0))
464 continue;
466 /* Canonicalize the access to base on a MEM_REF. */
467 if (DECL_P (*var_p))
468 *var_p = build_simple_mem_ref (build_fold_addr_expr (*var_p));
470 /* Assign a canonical SSA name to the address of the base decl used
471 in the address and share it for all accesses and addresses based
472 on it. */
473 uid = DECL_UID (TREE_OPERAND (TREE_OPERAND (*var_p, 0), 0));
474 ielt.uid = uid;
475 dslot = htab_find_slot_with_hash (decl_address, &ielt, uid, INSERT);
476 if (!*dslot)
478 if (gsi == NULL)
479 return NULL;
480 addr = TREE_OPERAND (*var_p, 0);
481 name = make_temp_ssa_name (TREE_TYPE (addr), NULL,
482 get_name (TREE_OPERAND
483 (TREE_OPERAND (*var_p, 0), 0)));
484 stmt = gimple_build_assign (name, addr);
485 gsi_insert_on_edge_immediate (entry, stmt);
487 nielt = XNEW (struct int_tree_map);
488 nielt->uid = uid;
489 nielt->to = name;
490 *dslot = nielt;
492 else
493 name = ((struct int_tree_map *) *dslot)->to;
495 /* Express the address in terms of the canonical SSA name. */
496 TREE_OPERAND (*var_p, 0) = name;
497 if (gsi == NULL)
498 return build_fold_addr_expr_with_type (obj, type);
500 name = force_gimple_operand (build_addr (obj, current_function_decl),
501 &stmts, true, NULL_TREE);
502 if (!gimple_seq_empty_p (stmts))
503 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
505 if (!useless_type_conversion_p (type, TREE_TYPE (name)))
507 name = force_gimple_operand (fold_convert (type, name), &stmts, true,
508 NULL_TREE);
509 if (!gimple_seq_empty_p (stmts))
510 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
513 return name;
516 /* Callback for htab_traverse. Create the initialization statement
517 for reduction described in SLOT, and place it at the preheader of
518 the loop described in DATA. */
520 static int
521 initialize_reductions (void **slot, void *data)
523 tree init, c;
524 tree bvar, type, arg;
525 edge e;
527 struct reduction_info *const reduc = (struct reduction_info *) *slot;
528 struct loop *loop = (struct loop *) data;
530 /* Create initialization in preheader:
531 reduction_variable = initialization value of reduction. */
533 /* In the phi node at the header, replace the argument coming
534 from the preheader with the reduction initialization value. */
536 /* Create a new variable to initialize the reduction. */
537 type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
538 bvar = create_tmp_var (type, "reduction");
540 c = build_omp_clause (gimple_location (reduc->reduc_stmt),
541 OMP_CLAUSE_REDUCTION);
542 OMP_CLAUSE_REDUCTION_CODE (c) = reduc->reduction_code;
543 OMP_CLAUSE_DECL (c) = SSA_NAME_VAR (gimple_assign_lhs (reduc->reduc_stmt));
545 init = omp_reduction_init (c, TREE_TYPE (bvar));
546 reduc->init = init;
548 /* Replace the argument representing the initialization value
549 with the initialization value for the reduction (neutral
550 element for the particular operation, e.g. 0 for PLUS_EXPR,
551 1 for MULT_EXPR, etc).
552 Keep the old value in a new variable "reduction_initial",
553 that will be taken in consideration after the parallel
554 computing is done. */
556 e = loop_preheader_edge (loop);
557 arg = PHI_ARG_DEF_FROM_EDGE (reduc->reduc_phi, e);
558 /* Create new variable to hold the initial value. */
560 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE
561 (reduc->reduc_phi, loop_preheader_edge (loop)), init);
562 reduc->initial_value = arg;
563 return 1;
566 struct elv_data
568 struct walk_stmt_info info;
569 edge entry;
570 htab_t decl_address;
571 gimple_stmt_iterator *gsi;
572 bool changed;
573 bool reset;
576 /* Eliminates references to local variables in *TP out of the single
577 entry single exit region starting at DTA->ENTRY.
578 DECL_ADDRESS contains addresses of the references that had their
579 address taken already. If the expression is changed, CHANGED is
580 set to true. Callback for walk_tree. */
582 static tree
583 eliminate_local_variables_1 (tree *tp, int *walk_subtrees, void *data)
585 struct elv_data *const dta = (struct elv_data *) data;
586 tree t = *tp, var, addr, addr_type, type, obj;
588 if (DECL_P (t))
590 *walk_subtrees = 0;
592 if (!SSA_VAR_P (t) || DECL_EXTERNAL (t))
593 return NULL_TREE;
595 type = TREE_TYPE (t);
596 addr_type = build_pointer_type (type);
597 addr = take_address_of (t, addr_type, dta->entry, dta->decl_address,
598 dta->gsi);
599 if (dta->gsi == NULL && addr == NULL_TREE)
601 dta->reset = true;
602 return NULL_TREE;
605 *tp = build_simple_mem_ref (addr);
607 dta->changed = true;
608 return NULL_TREE;
611 if (TREE_CODE (t) == ADDR_EXPR)
613 /* ADDR_EXPR may appear in two contexts:
614 -- as a gimple operand, when the address taken is a function invariant
615 -- as gimple rhs, when the resulting address in not a function
616 invariant
617 We do not need to do anything special in the latter case (the base of
618 the memory reference whose address is taken may be replaced in the
619 DECL_P case). The former case is more complicated, as we need to
620 ensure that the new address is still a gimple operand. Thus, it
621 is not sufficient to replace just the base of the memory reference --
622 we need to move the whole computation of the address out of the
623 loop. */
624 if (!is_gimple_val (t))
625 return NULL_TREE;
627 *walk_subtrees = 0;
628 obj = TREE_OPERAND (t, 0);
629 var = get_base_address (obj);
630 if (!var || !SSA_VAR_P (var) || DECL_EXTERNAL (var))
631 return NULL_TREE;
633 addr_type = TREE_TYPE (t);
634 addr = take_address_of (obj, addr_type, dta->entry, dta->decl_address,
635 dta->gsi);
636 if (dta->gsi == NULL && addr == NULL_TREE)
638 dta->reset = true;
639 return NULL_TREE;
641 *tp = addr;
643 dta->changed = true;
644 return NULL_TREE;
647 if (!EXPR_P (t))
648 *walk_subtrees = 0;
650 return NULL_TREE;
653 /* Moves the references to local variables in STMT at *GSI out of the single
654 entry single exit region starting at ENTRY. DECL_ADDRESS contains
655 addresses of the references that had their address taken
656 already. */
658 static void
659 eliminate_local_variables_stmt (edge entry, gimple_stmt_iterator *gsi,
660 htab_t decl_address)
662 struct elv_data dta;
663 gimple stmt = gsi_stmt (*gsi);
665 memset (&dta.info, '\0', sizeof (dta.info));
666 dta.entry = entry;
667 dta.decl_address = decl_address;
668 dta.changed = false;
669 dta.reset = false;
671 if (gimple_debug_bind_p (stmt))
673 dta.gsi = NULL;
674 walk_tree (gimple_debug_bind_get_value_ptr (stmt),
675 eliminate_local_variables_1, &dta.info, NULL);
676 if (dta.reset)
678 gimple_debug_bind_reset_value (stmt);
679 dta.changed = true;
682 else
684 dta.gsi = gsi;
685 walk_gimple_op (stmt, eliminate_local_variables_1, &dta.info);
688 if (dta.changed)
689 update_stmt (stmt);
692 /* Eliminates the references to local variables from the single entry
693 single exit region between the ENTRY and EXIT edges.
695 This includes:
696 1) Taking address of a local variable -- these are moved out of the
697 region (and temporary variable is created to hold the address if
698 necessary).
700 2) Dereferencing a local variable -- these are replaced with indirect
701 references. */
703 static void
704 eliminate_local_variables (edge entry, edge exit)
706 basic_block bb;
707 vec<basic_block> body;
708 body.create (3);
709 unsigned i;
710 gimple_stmt_iterator gsi;
711 bool has_debug_stmt = false;
712 htab_t decl_address = htab_create (10, int_tree_map_hash, int_tree_map_eq,
713 free);
714 basic_block entry_bb = entry->src;
715 basic_block exit_bb = exit->dest;
717 gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
719 FOR_EACH_VEC_ELT (body, i, bb)
720 if (bb != entry_bb && bb != exit_bb)
721 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
722 if (is_gimple_debug (gsi_stmt (gsi)))
724 if (gimple_debug_bind_p (gsi_stmt (gsi)))
725 has_debug_stmt = true;
727 else
728 eliminate_local_variables_stmt (entry, &gsi, decl_address);
730 if (has_debug_stmt)
731 FOR_EACH_VEC_ELT (body, i, bb)
732 if (bb != entry_bb && bb != exit_bb)
733 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
734 if (gimple_debug_bind_p (gsi_stmt (gsi)))
735 eliminate_local_variables_stmt (entry, &gsi, decl_address);
737 htab_delete (decl_address);
738 body.release ();
741 /* Returns true if expression EXPR is not defined between ENTRY and
742 EXIT, i.e. if all its operands are defined outside of the region. */
744 static bool
745 expr_invariant_in_region_p (edge entry, edge exit, tree expr)
747 basic_block entry_bb = entry->src;
748 basic_block exit_bb = exit->dest;
749 basic_block def_bb;
751 if (is_gimple_min_invariant (expr))
752 return true;
754 if (TREE_CODE (expr) == SSA_NAME)
756 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
757 if (def_bb
758 && dominated_by_p (CDI_DOMINATORS, def_bb, entry_bb)
759 && !dominated_by_p (CDI_DOMINATORS, def_bb, exit_bb))
760 return false;
762 return true;
765 return false;
768 /* If COPY_NAME_P is true, creates and returns a duplicate of NAME.
769 The copies are stored to NAME_COPIES, if NAME was already duplicated,
770 its duplicate stored in NAME_COPIES is returned.
772 Regardless of COPY_NAME_P, the decl used as a base of the ssa name is also
773 duplicated, storing the copies in DECL_COPIES. */
775 static tree
776 separate_decls_in_region_name (tree name,
777 htab_t name_copies, htab_t decl_copies,
778 bool copy_name_p)
780 tree copy, var, var_copy;
781 unsigned idx, uid, nuid;
782 struct int_tree_map ielt, *nielt;
783 struct name_to_copy_elt elt, *nelt;
784 void **slot, **dslot;
786 if (TREE_CODE (name) != SSA_NAME)
787 return name;
789 idx = SSA_NAME_VERSION (name);
790 elt.version = idx;
791 slot = htab_find_slot_with_hash (name_copies, &elt, idx,
792 copy_name_p ? INSERT : NO_INSERT);
793 if (slot && *slot)
794 return ((struct name_to_copy_elt *) *slot)->new_name;
796 if (copy_name_p)
798 copy = duplicate_ssa_name (name, NULL);
799 nelt = XNEW (struct name_to_copy_elt);
800 nelt->version = idx;
801 nelt->new_name = copy;
802 nelt->field = NULL_TREE;
803 *slot = nelt;
805 else
807 gcc_assert (!slot);
808 copy = name;
811 var = SSA_NAME_VAR (name);
812 if (!var)
813 return copy;
815 uid = DECL_UID (var);
816 ielt.uid = uid;
817 dslot = htab_find_slot_with_hash (decl_copies, &ielt, uid, INSERT);
818 if (!*dslot)
820 var_copy = create_tmp_var (TREE_TYPE (var), get_name (var));
821 DECL_GIMPLE_REG_P (var_copy) = DECL_GIMPLE_REG_P (var);
822 nielt = XNEW (struct int_tree_map);
823 nielt->uid = uid;
824 nielt->to = var_copy;
825 *dslot = nielt;
827 /* Ensure that when we meet this decl next time, we won't duplicate
828 it again. */
829 nuid = DECL_UID (var_copy);
830 ielt.uid = nuid;
831 dslot = htab_find_slot_with_hash (decl_copies, &ielt, nuid, INSERT);
832 gcc_assert (!*dslot);
833 nielt = XNEW (struct int_tree_map);
834 nielt->uid = nuid;
835 nielt->to = var_copy;
836 *dslot = nielt;
838 else
839 var_copy = ((struct int_tree_map *) *dslot)->to;
841 replace_ssa_name_symbol (copy, var_copy);
842 return copy;
845 /* Finds the ssa names used in STMT that are defined outside the
846 region between ENTRY and EXIT and replaces such ssa names with
847 their duplicates. The duplicates are stored to NAME_COPIES. Base
848 decls of all ssa names used in STMT (including those defined in
849 LOOP) are replaced with the new temporary variables; the
850 replacement decls are stored in DECL_COPIES. */
852 static void
853 separate_decls_in_region_stmt (edge entry, edge exit, gimple stmt,
854 htab_t name_copies, htab_t decl_copies)
856 use_operand_p use;
857 def_operand_p def;
858 ssa_op_iter oi;
859 tree name, copy;
860 bool copy_name_p;
862 FOR_EACH_PHI_OR_STMT_DEF (def, stmt, oi, SSA_OP_DEF)
864 name = DEF_FROM_PTR (def);
865 gcc_assert (TREE_CODE (name) == SSA_NAME);
866 copy = separate_decls_in_region_name (name, name_copies, decl_copies,
867 false);
868 gcc_assert (copy == name);
871 FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
873 name = USE_FROM_PTR (use);
874 if (TREE_CODE (name) != SSA_NAME)
875 continue;
877 copy_name_p = expr_invariant_in_region_p (entry, exit, name);
878 copy = separate_decls_in_region_name (name, name_copies, decl_copies,
879 copy_name_p);
880 SET_USE (use, copy);
884 /* Finds the ssa names used in STMT that are defined outside the
885 region between ENTRY and EXIT and replaces such ssa names with
886 their duplicates. The duplicates are stored to NAME_COPIES. Base
887 decls of all ssa names used in STMT (including those defined in
888 LOOP) are replaced with the new temporary variables; the
889 replacement decls are stored in DECL_COPIES. */
891 static bool
892 separate_decls_in_region_debug (gimple stmt, htab_t name_copies,
893 htab_t decl_copies)
895 use_operand_p use;
896 ssa_op_iter oi;
897 tree var, name;
898 struct int_tree_map ielt;
899 struct name_to_copy_elt elt;
900 void **slot, **dslot;
902 if (gimple_debug_bind_p (stmt))
903 var = gimple_debug_bind_get_var (stmt);
904 else if (gimple_debug_source_bind_p (stmt))
905 var = gimple_debug_source_bind_get_var (stmt);
906 else
907 return true;
908 if (TREE_CODE (var) == DEBUG_EXPR_DECL || TREE_CODE (var) == LABEL_DECL)
909 return true;
910 gcc_assert (DECL_P (var) && SSA_VAR_P (var));
911 ielt.uid = DECL_UID (var);
912 dslot = htab_find_slot_with_hash (decl_copies, &ielt, ielt.uid, NO_INSERT);
913 if (!dslot)
914 return true;
915 if (gimple_debug_bind_p (stmt))
916 gimple_debug_bind_set_var (stmt, ((struct int_tree_map *) *dslot)->to);
917 else if (gimple_debug_source_bind_p (stmt))
918 gimple_debug_source_bind_set_var (stmt, ((struct int_tree_map *) *dslot)->to);
920 FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
922 name = USE_FROM_PTR (use);
923 if (TREE_CODE (name) != SSA_NAME)
924 continue;
926 elt.version = SSA_NAME_VERSION (name);
927 slot = htab_find_slot_with_hash (name_copies, &elt, elt.version, NO_INSERT);
928 if (!slot)
930 gimple_debug_bind_reset_value (stmt);
931 update_stmt (stmt);
932 break;
935 SET_USE (use, ((struct name_to_copy_elt *) *slot)->new_name);
938 return false;
941 /* Callback for htab_traverse. Adds a field corresponding to the reduction
942 specified in SLOT. The type is passed in DATA. */
944 static int
945 add_field_for_reduction (void **slot, void *data)
948 struct reduction_info *const red = (struct reduction_info *) *slot;
949 tree const type = (tree) data;
950 tree var = SSA_NAME_VAR (gimple_assign_lhs (red->reduc_stmt));
951 tree field = build_decl (gimple_location (red->reduc_stmt),
952 FIELD_DECL, DECL_NAME (var), TREE_TYPE (var));
954 insert_field_into_struct (type, field);
956 red->field = field;
958 return 1;
961 /* Callback for htab_traverse. Adds a field corresponding to a ssa name
962 described in SLOT. The type is passed in DATA. */
964 static int
965 add_field_for_name (void **slot, void *data)
967 struct name_to_copy_elt *const elt = (struct name_to_copy_elt *) *slot;
968 tree type = (tree) data;
969 tree name = ssa_name (elt->version);
970 tree field = build_decl (UNKNOWN_LOCATION,
971 FIELD_DECL, SSA_NAME_IDENTIFIER (name),
972 TREE_TYPE (name));
974 insert_field_into_struct (type, field);
975 elt->field = field;
977 return 1;
980 /* Callback for htab_traverse. A local result is the intermediate result
981 computed by a single
982 thread, or the initial value in case no iteration was executed.
983 This function creates a phi node reflecting these values.
984 The phi's result will be stored in NEW_PHI field of the
985 reduction's data structure. */
987 static int
988 create_phi_for_local_result (void **slot, void *data)
990 struct reduction_info *const reduc = (struct reduction_info *) *slot;
991 const struct loop *const loop = (const struct loop *) data;
992 edge e;
993 gimple new_phi;
994 basic_block store_bb;
995 tree local_res;
996 source_location locus;
998 /* STORE_BB is the block where the phi
999 should be stored. It is the destination of the loop exit.
1000 (Find the fallthru edge from GIMPLE_OMP_CONTINUE). */
1001 store_bb = FALLTHRU_EDGE (loop->latch)->dest;
1003 /* STORE_BB has two predecessors. One coming from the loop
1004 (the reduction's result is computed at the loop),
1005 and another coming from a block preceding the loop,
1006 when no iterations
1007 are executed (the initial value should be taken). */
1008 if (EDGE_PRED (store_bb, 0) == FALLTHRU_EDGE (loop->latch))
1009 e = EDGE_PRED (store_bb, 1);
1010 else
1011 e = EDGE_PRED (store_bb, 0);
1012 local_res = copy_ssa_name (gimple_assign_lhs (reduc->reduc_stmt), NULL);
1013 locus = gimple_location (reduc->reduc_stmt);
1014 new_phi = create_phi_node (local_res, store_bb);
1015 add_phi_arg (new_phi, reduc->init, e, locus);
1016 add_phi_arg (new_phi, gimple_assign_lhs (reduc->reduc_stmt),
1017 FALLTHRU_EDGE (loop->latch), locus);
1018 reduc->new_phi = new_phi;
1020 return 1;
1023 struct clsn_data
1025 tree store;
1026 tree load;
1028 basic_block store_bb;
1029 basic_block load_bb;
1032 /* Callback for htab_traverse. Create an atomic instruction for the
1033 reduction described in SLOT.
1034 DATA annotates the place in memory the atomic operation relates to,
1035 and the basic block it needs to be generated in. */
1037 static int
1038 create_call_for_reduction_1 (void **slot, void *data)
1040 struct reduction_info *const reduc = (struct reduction_info *) *slot;
1041 struct clsn_data *const clsn_data = (struct clsn_data *) data;
1042 gimple_stmt_iterator gsi;
1043 tree type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
1044 tree load_struct;
1045 basic_block bb;
1046 basic_block new_bb;
1047 edge e;
1048 tree t, addr, ref, x;
1049 tree tmp_load, name;
1050 gimple load;
1052 load_struct = build_simple_mem_ref (clsn_data->load);
1053 t = build3 (COMPONENT_REF, type, load_struct, reduc->field, NULL_TREE);
1055 addr = build_addr (t, current_function_decl);
1057 /* Create phi node. */
1058 bb = clsn_data->load_bb;
1060 e = split_block (bb, t);
1061 new_bb = e->dest;
1063 tmp_load = create_tmp_var (TREE_TYPE (TREE_TYPE (addr)), NULL);
1064 tmp_load = make_ssa_name (tmp_load, NULL);
1065 load = gimple_build_omp_atomic_load (tmp_load, addr);
1066 SSA_NAME_DEF_STMT (tmp_load) = load;
1067 gsi = gsi_start_bb (new_bb);
1068 gsi_insert_after (&gsi, load, GSI_NEW_STMT);
1070 e = split_block (new_bb, load);
1071 new_bb = e->dest;
1072 gsi = gsi_start_bb (new_bb);
1073 ref = tmp_load;
1074 x = fold_build2 (reduc->reduction_code,
1075 TREE_TYPE (PHI_RESULT (reduc->new_phi)), ref,
1076 PHI_RESULT (reduc->new_phi));
1078 name = force_gimple_operand_gsi (&gsi, x, true, NULL_TREE, true,
1079 GSI_CONTINUE_LINKING);
1081 gsi_insert_after (&gsi, gimple_build_omp_atomic_store (name), GSI_NEW_STMT);
1082 return 1;
1085 /* Create the atomic operation at the join point of the threads.
1086 REDUCTION_LIST describes the reductions in the LOOP.
1087 LD_ST_DATA describes the shared data structure where
1088 shared data is stored in and loaded from. */
1089 static void
1090 create_call_for_reduction (struct loop *loop, htab_t reduction_list,
1091 struct clsn_data *ld_st_data)
1093 htab_traverse (reduction_list, create_phi_for_local_result, loop);
1094 /* Find the fallthru edge from GIMPLE_OMP_CONTINUE. */
1095 ld_st_data->load_bb = FALLTHRU_EDGE (loop->latch)->dest;
1096 htab_traverse (reduction_list, create_call_for_reduction_1, ld_st_data);
1099 /* Callback for htab_traverse. Loads the final reduction value at the
1100 join point of all threads, and inserts it in the right place. */
1102 static int
1103 create_loads_for_reductions (void **slot, void *data)
1105 struct reduction_info *const red = (struct reduction_info *) *slot;
1106 struct clsn_data *const clsn_data = (struct clsn_data *) data;
1107 gimple stmt;
1108 gimple_stmt_iterator gsi;
1109 tree type = TREE_TYPE (gimple_assign_lhs (red->reduc_stmt));
1110 tree load_struct;
1111 tree name;
1112 tree x;
1114 gsi = gsi_after_labels (clsn_data->load_bb);
1115 load_struct = build_simple_mem_ref (clsn_data->load);
1116 load_struct = build3 (COMPONENT_REF, type, load_struct, red->field,
1117 NULL_TREE);
1119 x = load_struct;
1120 name = PHI_RESULT (red->keep_res);
1121 stmt = gimple_build_assign (name, x);
1122 SSA_NAME_DEF_STMT (name) = stmt;
1124 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1126 for (gsi = gsi_start_phis (gimple_bb (red->keep_res));
1127 !gsi_end_p (gsi); gsi_next (&gsi))
1128 if (gsi_stmt (gsi) == red->keep_res)
1130 remove_phi_node (&gsi, false);
1131 return 1;
1133 gcc_unreachable ();
1136 /* Load the reduction result that was stored in LD_ST_DATA.
1137 REDUCTION_LIST describes the list of reductions that the
1138 loads should be generated for. */
1139 static void
1140 create_final_loads_for_reduction (htab_t reduction_list,
1141 struct clsn_data *ld_st_data)
1143 gimple_stmt_iterator gsi;
1144 tree t;
1145 gimple stmt;
1147 gsi = gsi_after_labels (ld_st_data->load_bb);
1148 t = build_fold_addr_expr (ld_st_data->store);
1149 stmt = gimple_build_assign (ld_st_data->load, t);
1151 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
1152 SSA_NAME_DEF_STMT (ld_st_data->load) = stmt;
1154 htab_traverse (reduction_list, create_loads_for_reductions, ld_st_data);
1158 /* Callback for htab_traverse. Store the neutral value for the
1159 particular reduction's operation, e.g. 0 for PLUS_EXPR,
1160 1 for MULT_EXPR, etc. into the reduction field.
1161 The reduction is specified in SLOT. The store information is
1162 passed in DATA. */
1164 static int
1165 create_stores_for_reduction (void **slot, void *data)
1167 struct reduction_info *const red = (struct reduction_info *) *slot;
1168 struct clsn_data *const clsn_data = (struct clsn_data *) data;
1169 tree t;
1170 gimple stmt;
1171 gimple_stmt_iterator gsi;
1172 tree type = TREE_TYPE (gimple_assign_lhs (red->reduc_stmt));
1174 gsi = gsi_last_bb (clsn_data->store_bb);
1175 t = build3 (COMPONENT_REF, type, clsn_data->store, red->field, NULL_TREE);
1176 stmt = gimple_build_assign (t, red->initial_value);
1177 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1179 return 1;
1182 /* Callback for htab_traverse. Creates loads to a field of LOAD in LOAD_BB and
1183 store to a field of STORE in STORE_BB for the ssa name and its duplicate
1184 specified in SLOT. */
1186 static int
1187 create_loads_and_stores_for_name (void **slot, void *data)
1189 struct name_to_copy_elt *const elt = (struct name_to_copy_elt *) *slot;
1190 struct clsn_data *const clsn_data = (struct clsn_data *) data;
1191 tree t;
1192 gimple stmt;
1193 gimple_stmt_iterator gsi;
1194 tree type = TREE_TYPE (elt->new_name);
1195 tree load_struct;
1197 gsi = gsi_last_bb (clsn_data->store_bb);
1198 t = build3 (COMPONENT_REF, type, clsn_data->store, elt->field, NULL_TREE);
1199 stmt = gimple_build_assign (t, ssa_name (elt->version));
1200 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1202 gsi = gsi_last_bb (clsn_data->load_bb);
1203 load_struct = build_simple_mem_ref (clsn_data->load);
1204 t = build3 (COMPONENT_REF, type, load_struct, elt->field, NULL_TREE);
1205 stmt = gimple_build_assign (elt->new_name, t);
1206 SSA_NAME_DEF_STMT (elt->new_name) = stmt;
1207 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1209 return 1;
1212 /* Moves all the variables used in LOOP and defined outside of it (including
1213 the initial values of loop phi nodes, and *PER_THREAD if it is a ssa
1214 name) to a structure created for this purpose. The code
1216 while (1)
1218 use (a);
1219 use (b);
1222 is transformed this way:
1224 bb0:
1225 old.a = a;
1226 old.b = b;
1228 bb1:
1229 a' = new->a;
1230 b' = new->b;
1231 while (1)
1233 use (a');
1234 use (b');
1237 `old' is stored to *ARG_STRUCT and `new' is stored to NEW_ARG_STRUCT. The
1238 pointer `new' is intentionally not initialized (the loop will be split to a
1239 separate function later, and `new' will be initialized from its arguments).
1240 LD_ST_DATA holds information about the shared data structure used to pass
1241 information among the threads. It is initialized here, and
1242 gen_parallel_loop will pass it to create_call_for_reduction that
1243 needs this information. REDUCTION_LIST describes the reductions
1244 in LOOP. */
1246 static void
1247 separate_decls_in_region (edge entry, edge exit, htab_t reduction_list,
1248 tree *arg_struct, tree *new_arg_struct,
1249 struct clsn_data *ld_st_data)
1252 basic_block bb1 = split_edge (entry);
1253 basic_block bb0 = single_pred (bb1);
1254 htab_t name_copies = htab_create (10, name_to_copy_elt_hash,
1255 name_to_copy_elt_eq, free);
1256 htab_t decl_copies = htab_create (10, int_tree_map_hash, int_tree_map_eq,
1257 free);
1258 unsigned i;
1259 tree type, type_name, nvar;
1260 gimple_stmt_iterator gsi;
1261 struct clsn_data clsn_data;
1262 vec<basic_block> body;
1263 body.create (3);
1264 basic_block bb;
1265 basic_block entry_bb = bb1;
1266 basic_block exit_bb = exit->dest;
1267 bool has_debug_stmt = false;
1269 entry = single_succ_edge (entry_bb);
1270 gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
1272 FOR_EACH_VEC_ELT (body, i, bb)
1274 if (bb != entry_bb && bb != exit_bb)
1276 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1277 separate_decls_in_region_stmt (entry, exit, gsi_stmt (gsi),
1278 name_copies, decl_copies);
1280 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1282 gimple stmt = gsi_stmt (gsi);
1284 if (is_gimple_debug (stmt))
1285 has_debug_stmt = true;
1286 else
1287 separate_decls_in_region_stmt (entry, exit, stmt,
1288 name_copies, decl_copies);
1293 /* Now process debug bind stmts. We must not create decls while
1294 processing debug stmts, so we defer their processing so as to
1295 make sure we will have debug info for as many variables as
1296 possible (all of those that were dealt with in the loop above),
1297 and discard those for which we know there's nothing we can
1298 do. */
1299 if (has_debug_stmt)
1300 FOR_EACH_VEC_ELT (body, i, bb)
1301 if (bb != entry_bb && bb != exit_bb)
1303 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
1305 gimple stmt = gsi_stmt (gsi);
1307 if (is_gimple_debug (stmt))
1309 if (separate_decls_in_region_debug (stmt, name_copies,
1310 decl_copies))
1312 gsi_remove (&gsi, true);
1313 continue;
1317 gsi_next (&gsi);
1321 body.release ();
1323 if (htab_elements (name_copies) == 0 && htab_elements (reduction_list) == 0)
1325 /* It may happen that there is nothing to copy (if there are only
1326 loop carried and external variables in the loop). */
1327 *arg_struct = NULL;
1328 *new_arg_struct = NULL;
1330 else
1332 /* Create the type for the structure to store the ssa names to. */
1333 type = lang_hooks.types.make_type (RECORD_TYPE);
1334 type_name = build_decl (UNKNOWN_LOCATION,
1335 TYPE_DECL, create_tmp_var_name (".paral_data"),
1336 type);
1337 TYPE_NAME (type) = type_name;
1339 htab_traverse (name_copies, add_field_for_name, type);
1340 if (reduction_list && htab_elements (reduction_list) > 0)
1342 /* Create the fields for reductions. */
1343 htab_traverse (reduction_list, add_field_for_reduction,
1344 type);
1346 layout_type (type);
1348 /* Create the loads and stores. */
1349 *arg_struct = create_tmp_var (type, ".paral_data_store");
1350 nvar = create_tmp_var (build_pointer_type (type), ".paral_data_load");
1351 *new_arg_struct = make_ssa_name (nvar, NULL);
1353 ld_st_data->store = *arg_struct;
1354 ld_st_data->load = *new_arg_struct;
1355 ld_st_data->store_bb = bb0;
1356 ld_st_data->load_bb = bb1;
1358 htab_traverse (name_copies, create_loads_and_stores_for_name,
1359 ld_st_data);
1361 /* Load the calculation from memory (after the join of the threads). */
1363 if (reduction_list && htab_elements (reduction_list) > 0)
1365 htab_traverse (reduction_list, create_stores_for_reduction,
1366 ld_st_data);
1367 clsn_data.load = make_ssa_name (nvar, NULL);
1368 clsn_data.load_bb = exit->dest;
1369 clsn_data.store = ld_st_data->store;
1370 create_final_loads_for_reduction (reduction_list, &clsn_data);
1374 htab_delete (decl_copies);
1375 htab_delete (name_copies);
1378 /* Bitmap containing uids of functions created by parallelization. We cannot
1379 allocate it from the default obstack, as it must live across compilation
1380 of several functions; we make it gc allocated instead. */
1382 static GTY(()) bitmap parallelized_functions;
1384 /* Returns true if FN was created by create_loop_fn. */
1386 bool
1387 parallelized_function_p (tree fn)
1389 if (!parallelized_functions || !DECL_ARTIFICIAL (fn))
1390 return false;
1392 return bitmap_bit_p (parallelized_functions, DECL_UID (fn));
1395 /* Creates and returns an empty function that will receive the body of
1396 a parallelized loop. */
1398 static tree
1399 create_loop_fn (location_t loc)
1401 char buf[100];
1402 char *tname;
1403 tree decl, type, name, t;
1404 struct function *act_cfun = cfun;
1405 static unsigned loopfn_num;
1407 loc = LOCATION_LOCUS (loc);
1408 snprintf (buf, 100, "%s.$loopfn", current_function_name ());
1409 ASM_FORMAT_PRIVATE_NAME (tname, buf, loopfn_num++);
1410 clean_symbol_name (tname);
1411 name = get_identifier (tname);
1412 type = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
1414 decl = build_decl (loc, FUNCTION_DECL, name, type);
1415 if (!parallelized_functions)
1416 parallelized_functions = BITMAP_GGC_ALLOC ();
1417 bitmap_set_bit (parallelized_functions, DECL_UID (decl));
1419 TREE_STATIC (decl) = 1;
1420 TREE_USED (decl) = 1;
1421 DECL_ARTIFICIAL (decl) = 1;
1422 DECL_IGNORED_P (decl) = 0;
1423 TREE_PUBLIC (decl) = 0;
1424 DECL_UNINLINABLE (decl) = 1;
1425 DECL_EXTERNAL (decl) = 0;
1426 DECL_CONTEXT (decl) = NULL_TREE;
1427 DECL_INITIAL (decl) = make_node (BLOCK);
1429 t = build_decl (loc, RESULT_DECL, NULL_TREE, void_type_node);
1430 DECL_ARTIFICIAL (t) = 1;
1431 DECL_IGNORED_P (t) = 1;
1432 DECL_RESULT (decl) = t;
1434 t = build_decl (loc, PARM_DECL, get_identifier (".paral_data_param"),
1435 ptr_type_node);
1436 DECL_ARTIFICIAL (t) = 1;
1437 DECL_ARG_TYPE (t) = ptr_type_node;
1438 DECL_CONTEXT (t) = decl;
1439 TREE_USED (t) = 1;
1440 DECL_ARGUMENTS (decl) = t;
1442 allocate_struct_function (decl, false);
1444 /* The call to allocate_struct_function clobbers CFUN, so we need to restore
1445 it. */
1446 set_cfun (act_cfun);
1448 return decl;
1451 /* Moves the exit condition of LOOP to the beginning of its header, and
1452 duplicates the part of the last iteration that gets disabled to the
1453 exit of the loop. NIT is the number of iterations of the loop
1454 (used to initialize the variables in the duplicated part).
1456 TODO: the common case is that latch of the loop is empty and immediately
1457 follows the loop exit. In this case, it would be better not to copy the
1458 body of the loop, but only move the entry of the loop directly before the
1459 exit check and increase the number of iterations of the loop by one.
1460 This may need some additional preconditioning in case NIT = ~0.
1461 REDUCTION_LIST describes the reductions in LOOP. */
1463 static void
1464 transform_to_exit_first_loop (struct loop *loop, htab_t reduction_list, tree nit)
1466 basic_block *bbs, *nbbs, ex_bb, orig_header;
1467 unsigned n;
1468 bool ok;
1469 edge exit = single_dom_exit (loop), hpred;
1470 tree control, control_name, res, t;
1471 gimple phi, nphi, cond_stmt, stmt, cond_nit;
1472 gimple_stmt_iterator gsi;
1473 tree nit_1;
1475 split_block_after_labels (loop->header);
1476 orig_header = single_succ (loop->header);
1477 hpred = single_succ_edge (loop->header);
1479 cond_stmt = last_stmt (exit->src);
1480 control = gimple_cond_lhs (cond_stmt);
1481 gcc_assert (gimple_cond_rhs (cond_stmt) == nit);
1483 /* Make sure that we have phi nodes on exit for all loop header phis
1484 (create_parallel_loop requires that). */
1485 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
1487 phi = gsi_stmt (gsi);
1488 res = PHI_RESULT (phi);
1489 t = copy_ssa_name (res, phi);
1490 SET_PHI_RESULT (phi, t);
1491 nphi = create_phi_node (res, orig_header);
1492 add_phi_arg (nphi, t, hpred, UNKNOWN_LOCATION);
1494 if (res == control)
1496 gimple_cond_set_lhs (cond_stmt, t);
1497 update_stmt (cond_stmt);
1498 control = t;
1502 bbs = get_loop_body_in_dom_order (loop);
1504 for (n = 0; bbs[n] != exit->src; n++)
1505 continue;
1506 nbbs = XNEWVEC (basic_block, n);
1507 ok = gimple_duplicate_sese_tail (single_succ_edge (loop->header), exit,
1508 bbs + 1, n, nbbs);
1509 gcc_assert (ok);
1510 free (bbs);
1511 ex_bb = nbbs[0];
1512 free (nbbs);
1514 /* Other than reductions, the only gimple reg that should be copied
1515 out of the loop is the control variable. */
1516 exit = single_dom_exit (loop);
1517 control_name = NULL_TREE;
1518 for (gsi = gsi_start_phis (ex_bb); !gsi_end_p (gsi); )
1520 phi = gsi_stmt (gsi);
1521 res = PHI_RESULT (phi);
1522 if (virtual_operand_p (res))
1524 gsi_next (&gsi);
1525 continue;
1528 /* Check if it is a part of reduction. If it is,
1529 keep the phi at the reduction's keep_res field. The
1530 PHI_RESULT of this phi is the resulting value of the reduction
1531 variable when exiting the loop. */
1533 if (htab_elements (reduction_list) > 0)
1535 struct reduction_info *red;
1537 tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1538 red = reduction_phi (reduction_list, SSA_NAME_DEF_STMT (val));
1539 if (red)
1541 red->keep_res = phi;
1542 gsi_next (&gsi);
1543 continue;
1546 gcc_assert (control_name == NULL_TREE
1547 && SSA_NAME_VAR (res) == SSA_NAME_VAR (control));
1548 control_name = res;
1549 remove_phi_node (&gsi, false);
1551 gcc_assert (control_name != NULL_TREE);
1553 /* Initialize the control variable to number of iterations
1554 according to the rhs of the exit condition. */
1555 gsi = gsi_after_labels (ex_bb);
1556 cond_nit = last_stmt (exit->src);
1557 nit_1 = gimple_cond_rhs (cond_nit);
1558 nit_1 = force_gimple_operand_gsi (&gsi,
1559 fold_convert (TREE_TYPE (control_name), nit_1),
1560 false, NULL_TREE, false, GSI_SAME_STMT);
1561 stmt = gimple_build_assign (control_name, nit_1);
1562 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
1563 SSA_NAME_DEF_STMT (control_name) = stmt;
1566 /* Create the parallel constructs for LOOP as described in gen_parallel_loop.
1567 LOOP_FN and DATA are the arguments of GIMPLE_OMP_PARALLEL.
1568 NEW_DATA is the variable that should be initialized from the argument
1569 of LOOP_FN. N_THREADS is the requested number of threads. Returns the
1570 basic block containing GIMPLE_OMP_PARALLEL tree. */
1572 static basic_block
1573 create_parallel_loop (struct loop *loop, tree loop_fn, tree data,
1574 tree new_data, unsigned n_threads, location_t loc)
1576 gimple_stmt_iterator gsi;
1577 basic_block bb, paral_bb, for_bb, ex_bb;
1578 tree t, param;
1579 gimple stmt, for_stmt, phi, cond_stmt;
1580 tree cvar, cvar_init, initvar, cvar_next, cvar_base, type;
1581 edge exit, nexit, guard, end, e;
1583 /* Prepare the GIMPLE_OMP_PARALLEL statement. */
1584 bb = loop_preheader_edge (loop)->src;
1585 paral_bb = single_pred (bb);
1586 gsi = gsi_last_bb (paral_bb);
1588 t = build_omp_clause (loc, OMP_CLAUSE_NUM_THREADS);
1589 OMP_CLAUSE_NUM_THREADS_EXPR (t)
1590 = build_int_cst (integer_type_node, n_threads);
1591 stmt = gimple_build_omp_parallel (NULL, t, loop_fn, data);
1592 gimple_set_location (stmt, loc);
1594 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1596 /* Initialize NEW_DATA. */
1597 if (data)
1599 gsi = gsi_after_labels (bb);
1601 param = make_ssa_name (DECL_ARGUMENTS (loop_fn), NULL);
1602 stmt = gimple_build_assign (param, build_fold_addr_expr (data));
1603 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1604 SSA_NAME_DEF_STMT (param) = stmt;
1606 stmt = gimple_build_assign (new_data,
1607 fold_convert (TREE_TYPE (new_data), param));
1608 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1609 SSA_NAME_DEF_STMT (new_data) = stmt;
1612 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_PARALLEL. */
1613 bb = split_loop_exit_edge (single_dom_exit (loop));
1614 gsi = gsi_last_bb (bb);
1615 stmt = gimple_build_omp_return (false);
1616 gimple_set_location (stmt, loc);
1617 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1619 /* Extract data for GIMPLE_OMP_FOR. */
1620 gcc_assert (loop->header == single_dom_exit (loop)->src);
1621 cond_stmt = last_stmt (loop->header);
1623 cvar = gimple_cond_lhs (cond_stmt);
1624 cvar_base = SSA_NAME_VAR (cvar);
1625 phi = SSA_NAME_DEF_STMT (cvar);
1626 cvar_init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1627 initvar = copy_ssa_name (cvar, NULL);
1628 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, loop_preheader_edge (loop)),
1629 initvar);
1630 cvar_next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1632 gsi = gsi_last_nondebug_bb (loop->latch);
1633 gcc_assert (gsi_stmt (gsi) == SSA_NAME_DEF_STMT (cvar_next));
1634 gsi_remove (&gsi, true);
1636 /* Prepare cfg. */
1637 for_bb = split_edge (loop_preheader_edge (loop));
1638 ex_bb = split_loop_exit_edge (single_dom_exit (loop));
1639 extract_true_false_edges_from_block (loop->header, &nexit, &exit);
1640 gcc_assert (exit == single_dom_exit (loop));
1642 guard = make_edge (for_bb, ex_bb, 0);
1643 single_succ_edge (loop->latch)->flags = 0;
1644 end = make_edge (loop->latch, ex_bb, EDGE_FALLTHRU);
1645 for (gsi = gsi_start_phis (ex_bb); !gsi_end_p (gsi); gsi_next (&gsi))
1647 source_location locus;
1648 tree def;
1649 phi = gsi_stmt (gsi);
1650 stmt = SSA_NAME_DEF_STMT (PHI_ARG_DEF_FROM_EDGE (phi, exit));
1652 def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_preheader_edge (loop));
1653 locus = gimple_phi_arg_location_from_edge (stmt,
1654 loop_preheader_edge (loop));
1655 add_phi_arg (phi, def, guard, locus);
1657 def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (loop));
1658 locus = gimple_phi_arg_location_from_edge (stmt, loop_latch_edge (loop));
1659 add_phi_arg (phi, def, end, locus);
1661 e = redirect_edge_and_branch (exit, nexit->dest);
1662 PENDING_STMT (e) = NULL;
1664 /* Emit GIMPLE_OMP_FOR. */
1665 gimple_cond_set_lhs (cond_stmt, cvar_base);
1666 type = TREE_TYPE (cvar);
1667 t = build_omp_clause (loc, OMP_CLAUSE_SCHEDULE);
1668 OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_STATIC;
1670 for_stmt = gimple_build_omp_for (NULL, t, 1, NULL);
1671 gimple_set_location (for_stmt, loc);
1672 gimple_omp_for_set_index (for_stmt, 0, initvar);
1673 gimple_omp_for_set_initial (for_stmt, 0, cvar_init);
1674 gimple_omp_for_set_final (for_stmt, 0, gimple_cond_rhs (cond_stmt));
1675 gimple_omp_for_set_cond (for_stmt, 0, gimple_cond_code (cond_stmt));
1676 gimple_omp_for_set_incr (for_stmt, 0, build2 (PLUS_EXPR, type,
1677 cvar_base,
1678 build_int_cst (type, 1)));
1680 gsi = gsi_last_bb (for_bb);
1681 gsi_insert_after (&gsi, for_stmt, GSI_NEW_STMT);
1682 SSA_NAME_DEF_STMT (initvar) = for_stmt;
1684 /* Emit GIMPLE_OMP_CONTINUE. */
1685 gsi = gsi_last_bb (loop->latch);
1686 stmt = gimple_build_omp_continue (cvar_next, cvar);
1687 gimple_set_location (stmt, loc);
1688 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1689 SSA_NAME_DEF_STMT (cvar_next) = stmt;
1691 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_FOR. */
1692 gsi = gsi_last_bb (ex_bb);
1693 stmt = gimple_build_omp_return (true);
1694 gimple_set_location (stmt, loc);
1695 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1697 /* After the above dom info is hosed. Re-compute it. */
1698 free_dominance_info (CDI_DOMINATORS);
1699 calculate_dominance_info (CDI_DOMINATORS);
1701 return paral_bb;
1704 /* Generates code to execute the iterations of LOOP in N_THREADS
1705 threads in parallel.
1707 NITER describes number of iterations of LOOP.
1708 REDUCTION_LIST describes the reductions existent in the LOOP. */
1710 static void
1711 gen_parallel_loop (struct loop *loop, htab_t reduction_list,
1712 unsigned n_threads, struct tree_niter_desc *niter)
1714 loop_iterator li;
1715 tree many_iterations_cond, type, nit;
1716 tree arg_struct, new_arg_struct;
1717 gimple_seq stmts;
1718 basic_block parallel_head;
1719 edge entry, exit;
1720 struct clsn_data clsn_data;
1721 unsigned prob;
1722 location_t loc;
1723 gimple cond_stmt;
1724 unsigned int m_p_thread=2;
1726 /* From
1728 ---------------------------------------------------------------------
1729 loop
1731 IV = phi (INIT, IV + STEP)
1732 BODY1;
1733 if (COND)
1734 break;
1735 BODY2;
1737 ---------------------------------------------------------------------
1739 with # of iterations NITER (possibly with MAY_BE_ZERO assumption),
1740 we generate the following code:
1742 ---------------------------------------------------------------------
1744 if (MAY_BE_ZERO
1745 || NITER < MIN_PER_THREAD * N_THREADS)
1746 goto original;
1748 BODY1;
1749 store all local loop-invariant variables used in body of the loop to DATA.
1750 GIMPLE_OMP_PARALLEL (OMP_CLAUSE_NUM_THREADS (N_THREADS), LOOPFN, DATA);
1751 load the variables from DATA.
1752 GIMPLE_OMP_FOR (IV = INIT; COND; IV += STEP) (OMP_CLAUSE_SCHEDULE (static))
1753 BODY2;
1754 BODY1;
1755 GIMPLE_OMP_CONTINUE;
1756 GIMPLE_OMP_RETURN -- GIMPLE_OMP_FOR
1757 GIMPLE_OMP_RETURN -- GIMPLE_OMP_PARALLEL
1758 goto end;
1760 original:
1761 loop
1763 IV = phi (INIT, IV + STEP)
1764 BODY1;
1765 if (COND)
1766 break;
1767 BODY2;
1770 end:
1774 /* Create two versions of the loop -- in the old one, we know that the
1775 number of iterations is large enough, and we will transform it into the
1776 loop that will be split to loop_fn, the new one will be used for the
1777 remaining iterations. */
1779 /* We should compute a better number-of-iterations value for outer loops.
1780 That is, if we have
1782 for (i = 0; i < n; ++i)
1783 for (j = 0; j < m; ++j)
1786 we should compute nit = n * m, not nit = n.
1787 Also may_be_zero handling would need to be adjusted. */
1789 type = TREE_TYPE (niter->niter);
1790 nit = force_gimple_operand (unshare_expr (niter->niter), &stmts, true,
1791 NULL_TREE);
1792 if (stmts)
1793 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1795 if (loop->inner)
1796 m_p_thread=2;
1797 else
1798 m_p_thread=MIN_PER_THREAD;
1800 many_iterations_cond =
1801 fold_build2 (GE_EXPR, boolean_type_node,
1802 nit, build_int_cst (type, m_p_thread * n_threads));
1804 many_iterations_cond
1805 = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1806 invert_truthvalue (unshare_expr (niter->may_be_zero)),
1807 many_iterations_cond);
1808 many_iterations_cond
1809 = force_gimple_operand (many_iterations_cond, &stmts, false, NULL_TREE);
1810 if (stmts)
1811 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1812 if (!is_gimple_condexpr (many_iterations_cond))
1814 many_iterations_cond
1815 = force_gimple_operand (many_iterations_cond, &stmts,
1816 true, NULL_TREE);
1817 if (stmts)
1818 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1821 initialize_original_copy_tables ();
1823 /* We assume that the loop usually iterates a lot. */
1824 prob = 4 * REG_BR_PROB_BASE / 5;
1825 loop_version (loop, many_iterations_cond, NULL,
1826 prob, prob, REG_BR_PROB_BASE - prob, true);
1827 update_ssa (TODO_update_ssa);
1828 free_original_copy_tables ();
1830 /* Base all the induction variables in LOOP on a single control one. */
1831 canonicalize_loop_ivs (loop, &nit, true);
1833 /* Ensure that the exit condition is the first statement in the loop. */
1834 transform_to_exit_first_loop (loop, reduction_list, nit);
1836 /* Generate initializations for reductions. */
1837 if (htab_elements (reduction_list) > 0)
1838 htab_traverse (reduction_list, initialize_reductions, loop);
1840 /* Eliminate the references to local variables from the loop. */
1841 gcc_assert (single_exit (loop));
1842 entry = loop_preheader_edge (loop);
1843 exit = single_dom_exit (loop);
1845 eliminate_local_variables (entry, exit);
1846 /* In the old loop, move all variables non-local to the loop to a structure
1847 and back, and create separate decls for the variables used in loop. */
1848 separate_decls_in_region (entry, exit, reduction_list, &arg_struct,
1849 &new_arg_struct, &clsn_data);
1851 /* Create the parallel constructs. */
1852 loc = UNKNOWN_LOCATION;
1853 cond_stmt = last_stmt (loop->header);
1854 if (cond_stmt)
1855 loc = gimple_location (cond_stmt);
1856 parallel_head = create_parallel_loop (loop, create_loop_fn (loc), arg_struct,
1857 new_arg_struct, n_threads, loc);
1858 if (htab_elements (reduction_list) > 0)
1859 create_call_for_reduction (loop, reduction_list, &clsn_data);
1861 scev_reset ();
1863 /* Cancel the loop (it is simpler to do it here rather than to teach the
1864 expander to do it). */
1865 cancel_loop_tree (loop);
1867 /* Free loop bound estimations that could contain references to
1868 removed statements. */
1869 FOR_EACH_LOOP (li, loop, 0)
1870 free_numbers_of_iterations_estimates_loop (loop);
1872 /* Expand the parallel constructs. We do it directly here instead of running
1873 a separate expand_omp pass, since it is more efficient, and less likely to
1874 cause troubles with further analyses not being able to deal with the
1875 OMP trees. */
1877 omp_expand_local (parallel_head);
1880 /* Returns true when LOOP contains vector phi nodes. */
1882 static bool
1883 loop_has_vector_phi_nodes (struct loop *loop ATTRIBUTE_UNUSED)
1885 unsigned i;
1886 basic_block *bbs = get_loop_body_in_dom_order (loop);
1887 gimple_stmt_iterator gsi;
1888 bool res = true;
1890 for (i = 0; i < loop->num_nodes; i++)
1891 for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
1892 if (TREE_CODE (TREE_TYPE (PHI_RESULT (gsi_stmt (gsi)))) == VECTOR_TYPE)
1893 goto end;
1895 res = false;
1896 end:
1897 free (bbs);
1898 return res;
1901 /* Create a reduction_info struct, initialize it with REDUC_STMT
1902 and PHI, insert it to the REDUCTION_LIST. */
1904 static void
1905 build_new_reduction (htab_t reduction_list, gimple reduc_stmt, gimple phi)
1907 PTR *slot;
1908 struct reduction_info *new_reduction;
1910 gcc_assert (reduc_stmt);
1912 if (dump_file && (dump_flags & TDF_DETAILS))
1914 fprintf (dump_file,
1915 "Detected reduction. reduction stmt is: \n");
1916 print_gimple_stmt (dump_file, reduc_stmt, 0, 0);
1917 fprintf (dump_file, "\n");
1920 new_reduction = XCNEW (struct reduction_info);
1922 new_reduction->reduc_stmt = reduc_stmt;
1923 new_reduction->reduc_phi = phi;
1924 new_reduction->reduc_version = SSA_NAME_VERSION (gimple_phi_result (phi));
1925 new_reduction->reduction_code = gimple_assign_rhs_code (reduc_stmt);
1926 slot = htab_find_slot (reduction_list, new_reduction, INSERT);
1927 *slot = new_reduction;
1930 /* Callback for htab_traverse. Sets gimple_uid of reduc_phi stmts. */
1932 static int
1933 set_reduc_phi_uids (void **slot, void *data ATTRIBUTE_UNUSED)
1935 struct reduction_info *const red = (struct reduction_info *) *slot;
1936 gimple_set_uid (red->reduc_phi, red->reduc_version);
1937 return 1;
1940 /* Detect all reductions in the LOOP, insert them into REDUCTION_LIST. */
1942 static void
1943 gather_scalar_reductions (loop_p loop, htab_t reduction_list)
1945 gimple_stmt_iterator gsi;
1946 loop_vec_info simple_loop_info;
1948 simple_loop_info = vect_analyze_loop_form (loop);
1950 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
1952 gimple phi = gsi_stmt (gsi);
1953 affine_iv iv;
1954 tree res = PHI_RESULT (phi);
1955 bool double_reduc;
1957 if (virtual_operand_p (res))
1958 continue;
1960 if (!simple_iv (loop, loop, res, &iv, true)
1961 && simple_loop_info)
1963 gimple reduc_stmt = vect_force_simple_reduction (simple_loop_info,
1964 phi, true,
1965 &double_reduc);
1966 if (reduc_stmt && !double_reduc)
1967 build_new_reduction (reduction_list, reduc_stmt, phi);
1970 destroy_loop_vec_info (simple_loop_info, true);
1972 /* As gimple_uid is used by the vectorizer in between vect_analyze_loop_form
1973 and destroy_loop_vec_info, we can set gimple_uid of reduc_phi stmts
1974 only now. */
1975 htab_traverse (reduction_list, set_reduc_phi_uids, NULL);
1978 /* Try to initialize NITER for code generation part. */
1980 static bool
1981 try_get_loop_niter (loop_p loop, struct tree_niter_desc *niter)
1983 edge exit = single_dom_exit (loop);
1985 gcc_assert (exit);
1987 /* We need to know # of iterations, and there should be no uses of values
1988 defined inside loop outside of it, unless the values are invariants of
1989 the loop. */
1990 if (!number_of_iterations_exit (loop, exit, niter, false))
1992 if (dump_file && (dump_flags & TDF_DETAILS))
1993 fprintf (dump_file, " FAILED: number of iterations not known\n");
1994 return false;
1997 return true;
2000 /* Try to initialize REDUCTION_LIST for code generation part.
2001 REDUCTION_LIST describes the reductions. */
2003 static bool
2004 try_create_reduction_list (loop_p loop, htab_t reduction_list)
2006 edge exit = single_dom_exit (loop);
2007 gimple_stmt_iterator gsi;
2009 gcc_assert (exit);
2011 gather_scalar_reductions (loop, reduction_list);
2014 for (gsi = gsi_start_phis (exit->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2016 gimple phi = gsi_stmt (gsi);
2017 struct reduction_info *red;
2018 imm_use_iterator imm_iter;
2019 use_operand_p use_p;
2020 gimple reduc_phi;
2021 tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2023 if (!virtual_operand_p (val))
2025 if (dump_file && (dump_flags & TDF_DETAILS))
2027 fprintf (dump_file, "phi is ");
2028 print_gimple_stmt (dump_file, phi, 0, 0);
2029 fprintf (dump_file, "arg of phi to exit: value ");
2030 print_generic_expr (dump_file, val, 0);
2031 fprintf (dump_file, " used outside loop\n");
2032 fprintf (dump_file,
2033 " checking if it a part of reduction pattern: \n");
2035 if (htab_elements (reduction_list) == 0)
2037 if (dump_file && (dump_flags & TDF_DETAILS))
2038 fprintf (dump_file,
2039 " FAILED: it is not a part of reduction.\n");
2040 return false;
2042 reduc_phi = NULL;
2043 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, val)
2045 if (!gimple_debug_bind_p (USE_STMT (use_p))
2046 && flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
2048 reduc_phi = USE_STMT (use_p);
2049 break;
2052 red = reduction_phi (reduction_list, reduc_phi);
2053 if (red == NULL)
2055 if (dump_file && (dump_flags & TDF_DETAILS))
2056 fprintf (dump_file,
2057 " FAILED: it is not a part of reduction.\n");
2058 return false;
2060 if (dump_file && (dump_flags & TDF_DETAILS))
2062 fprintf (dump_file, "reduction phi is ");
2063 print_gimple_stmt (dump_file, red->reduc_phi, 0, 0);
2064 fprintf (dump_file, "reduction stmt is ");
2065 print_gimple_stmt (dump_file, red->reduc_stmt, 0, 0);
2070 /* The iterations of the loop may communicate only through bivs whose
2071 iteration space can be distributed efficiently. */
2072 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
2074 gimple phi = gsi_stmt (gsi);
2075 tree def = PHI_RESULT (phi);
2076 affine_iv iv;
2078 if (!virtual_operand_p (def) && !simple_iv (loop, loop, def, &iv, true))
2080 struct reduction_info *red;
2082 red = reduction_phi (reduction_list, phi);
2083 if (red == NULL)
2085 if (dump_file && (dump_flags & TDF_DETAILS))
2086 fprintf (dump_file,
2087 " FAILED: scalar dependency between iterations\n");
2088 return false;
2094 return true;
2097 /* Detect parallel loops and generate parallel code using libgomp
2098 primitives. Returns true if some loop was parallelized, false
2099 otherwise. */
2101 bool
2102 parallelize_loops (void)
2104 unsigned n_threads = flag_tree_parallelize_loops;
2105 bool changed = false;
2106 struct loop *loop;
2107 struct tree_niter_desc niter_desc;
2108 loop_iterator li;
2109 htab_t reduction_list;
2110 struct obstack parloop_obstack;
2111 HOST_WIDE_INT estimated;
2112 LOC loop_loc;
2114 /* Do not parallelize loops in the functions created by parallelization. */
2115 if (parallelized_function_p (cfun->decl))
2116 return false;
2117 if (cfun->has_nonlocal_label)
2118 return false;
2120 gcc_obstack_init (&parloop_obstack);
2121 reduction_list = htab_create (10, reduction_info_hash,
2122 reduction_info_eq, free);
2123 init_stmt_vec_info_vec ();
2125 FOR_EACH_LOOP (li, loop, 0)
2127 htab_empty (reduction_list);
2128 if (dump_file && (dump_flags & TDF_DETAILS))
2130 fprintf (dump_file, "Trying loop %d as candidate\n",loop->num);
2131 if (loop->inner)
2132 fprintf (dump_file, "loop %d is not innermost\n",loop->num);
2133 else
2134 fprintf (dump_file, "loop %d is innermost\n",loop->num);
2137 /* If we use autopar in graphite pass, we use its marked dependency
2138 checking results. */
2139 if (flag_loop_parallelize_all && !loop->can_be_parallel)
2141 if (dump_file && (dump_flags & TDF_DETAILS))
2142 fprintf (dump_file, "loop is not parallel according to graphite\n");
2143 continue;
2146 if (!single_dom_exit (loop))
2149 if (dump_file && (dump_flags & TDF_DETAILS))
2150 fprintf (dump_file, "loop is !single_dom_exit\n");
2152 continue;
2155 if (/* And of course, the loop must be parallelizable. */
2156 !can_duplicate_loop_p (loop)
2157 || loop_has_blocks_with_irreducible_flag (loop)
2158 || (loop_preheader_edge (loop)->src->flags & BB_IRREDUCIBLE_LOOP)
2159 /* FIXME: the check for vector phi nodes could be removed. */
2160 || loop_has_vector_phi_nodes (loop))
2161 continue;
2163 estimated = estimated_stmt_executions_int (loop);
2164 if (estimated == -1)
2165 estimated = max_stmt_executions_int (loop);
2166 /* FIXME: Bypass this check as graphite doesn't update the
2167 count and frequency correctly now. */
2168 if (!flag_loop_parallelize_all
2169 && ((estimated != -1
2170 && estimated <= (HOST_WIDE_INT) n_threads * MIN_PER_THREAD)
2171 /* Do not bother with loops in cold areas. */
2172 || optimize_loop_nest_for_size_p (loop)))
2173 continue;
2175 if (!try_get_loop_niter (loop, &niter_desc))
2176 continue;
2178 if (!try_create_reduction_list (loop, reduction_list))
2179 continue;
2181 if (!flag_loop_parallelize_all
2182 && !loop_parallel_p (loop, &parloop_obstack))
2183 continue;
2185 changed = true;
2186 if (dump_file && (dump_flags & TDF_DETAILS))
2188 if (loop->inner)
2189 fprintf (dump_file, "parallelizing outer loop %d\n",loop->header->index);
2190 else
2191 fprintf (dump_file, "parallelizing inner loop %d\n",loop->header->index);
2192 loop_loc = find_loop_location (loop);
2193 if (loop_loc != UNKNOWN_LOC)
2194 fprintf (dump_file, "\nloop at %s:%d: ",
2195 LOC_FILE (loop_loc), LOC_LINE (loop_loc));
2197 gen_parallel_loop (loop, reduction_list,
2198 n_threads, &niter_desc);
2199 #ifdef ENABLE_CHECKING
2200 verify_flow_info ();
2201 verify_loop_structure ();
2202 verify_loop_closed_ssa (true);
2203 #endif
2206 free_stmt_vec_info_vec ();
2207 htab_delete (reduction_list);
2208 obstack_free (&parloop_obstack, NULL);
2210 /* Parallelization will cause new function calls to be inserted through
2211 which local variables will escape. Reset the points-to solution
2212 for ESCAPED. */
2213 if (changed)
2214 pt_solution_reset (&cfun->gimple_df->escaped);
2216 return changed;
2219 #include "gt-tree-parloops.h"