Simplify gather_scalar_reductions
[official-gcc.git] / gcc / tree-parloops.c
blobdaf23f2532befbf2e7378d626fe3ad95588ee346
1 /* Loop autoparallelization.
2 Copyright (C) 2006-2015 Free Software Foundation, Inc.
3 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
4 Zdenek Dvorak <dvorakz@suse.cz> and Razya Ladelsky <razya@il.ibm.com>.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "alias.h"
26 #include "backend.h"
27 #include "cfghooks.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "hard-reg-set.h"
31 #include "ssa.h"
32 #include "options.h"
33 #include "fold-const.h"
34 #include "internal-fn.h"
35 #include "gimplify.h"
36 #include "gimple-iterator.h"
37 #include "gimplify-me.h"
38 #include "gimple-walk.h"
39 #include "stor-layout.h"
40 #include "tree-nested.h"
41 #include "tree-cfg.h"
42 #include "tree-ssa-loop-ivopts.h"
43 #include "tree-ssa-loop-manip.h"
44 #include "tree-ssa-loop-niter.h"
45 #include "tree-ssa-loop.h"
46 #include "tree-into-ssa.h"
47 #include "cfgloop.h"
48 #include "tree-data-ref.h"
49 #include "tree-scalar-evolution.h"
50 #include "gimple-pretty-print.h"
51 #include "tree-pass.h"
52 #include "langhooks.h"
53 #include "tree-vectorizer.h"
54 #include "tree-hasher.h"
55 #include "tree-parloops.h"
56 #include "omp-low.h"
57 #include "tree-nested.h"
58 #include "cgraph.h"
59 #include "tree-ssa.h"
61 /* This pass tries to distribute iterations of loops into several threads.
62 The implementation is straightforward -- for each loop we test whether its
63 iterations are independent, and if it is the case (and some additional
64 conditions regarding profitability and correctness are satisfied), we
65 add GIMPLE_OMP_PARALLEL and GIMPLE_OMP_FOR codes and let omp expansion
66 machinery do its job.
68 The most of the complexity is in bringing the code into shape expected
69 by the omp expanders:
70 -- for GIMPLE_OMP_FOR, ensuring that the loop has only one induction
71 variable and that the exit test is at the start of the loop body
72 -- for GIMPLE_OMP_PARALLEL, replacing the references to local addressable
73 variables by accesses through pointers, and breaking up ssa chains
74 by storing the values incoming to the parallelized loop to a structure
75 passed to the new function as an argument (something similar is done
76 in omp gimplification, unfortunately only a small part of the code
77 can be shared).
79 TODO:
80 -- if there are several parallelizable loops in a function, it may be
81 possible to generate the threads just once (using synchronization to
82 ensure that cross-loop dependences are obeyed).
83 -- handling of common reduction patterns for outer loops.
85 More info can also be found at http://gcc.gnu.org/wiki/AutoParInGCC */
87 Reduction handling:
88 currently we use vect_force_simple_reduction() to detect reduction patterns.
89 The code transformation will be introduced by an example.
92 parloop
94 int sum=1;
96 for (i = 0; i < N; i++)
98 x[i] = i + 3;
99 sum+=x[i];
103 gimple-like code:
104 header_bb:
106 # sum_29 = PHI <sum_11(5), 1(3)>
107 # i_28 = PHI <i_12(5), 0(3)>
108 D.1795_8 = i_28 + 3;
109 x[i_28] = D.1795_8;
110 sum_11 = D.1795_8 + sum_29;
111 i_12 = i_28 + 1;
112 if (N_6(D) > i_12)
113 goto header_bb;
116 exit_bb:
118 # sum_21 = PHI <sum_11(4)>
119 printf (&"%d"[0], sum_21);
122 after reduction transformation (only relevant parts):
124 parloop
127 ....
130 # Storing the initial value given by the user. #
132 .paral_data_store.32.sum.27 = 1;
134 #pragma omp parallel num_threads(4)
136 #pragma omp for schedule(static)
138 # The neutral element corresponding to the particular
139 reduction's operation, e.g. 0 for PLUS_EXPR,
140 1 for MULT_EXPR, etc. replaces the user's initial value. #
142 # sum.27_29 = PHI <sum.27_11, 0>
144 sum.27_11 = D.1827_8 + sum.27_29;
146 GIMPLE_OMP_CONTINUE
148 # Adding this reduction phi is done at create_phi_for_local_result() #
149 # sum.27_56 = PHI <sum.27_11, 0>
150 GIMPLE_OMP_RETURN
152 # Creating the atomic operation is done at
153 create_call_for_reduction_1() #
155 #pragma omp atomic_load
156 D.1839_59 = *&.paral_data_load.33_51->reduction.23;
157 D.1840_60 = sum.27_56 + D.1839_59;
158 #pragma omp atomic_store (D.1840_60);
160 GIMPLE_OMP_RETURN
162 # collecting the result after the join of the threads is done at
163 create_loads_for_reductions().
164 The value computed by the threads is loaded from the
165 shared struct. #
168 .paral_data_load.33_52 = &.paral_data_store.32;
169 sum_37 = .paral_data_load.33_52->sum.27;
170 sum_43 = D.1795_41 + sum_37;
172 exit bb:
173 # sum_21 = PHI <sum_43, sum_26>
174 printf (&"%d"[0], sum_21);
182 /* Minimal number of iterations of a loop that should be executed in each
183 thread. */
184 #define MIN_PER_THREAD 100
186 /* Element of the hashtable, representing a
187 reduction in the current loop. */
188 struct reduction_info
190 gimple reduc_stmt; /* reduction statement. */
191 gimple reduc_phi; /* The phi node defining the reduction. */
192 enum tree_code reduction_code;/* code for the reduction operation. */
193 unsigned reduc_version; /* SSA_NAME_VERSION of original reduc_phi
194 result. */
195 gphi *keep_res; /* The PHI_RESULT of this phi is the resulting value
196 of the reduction variable when existing the loop. */
197 tree initial_value; /* The initial value of the reduction var before entering the loop. */
198 tree field; /* the name of the field in the parloop data structure intended for reduction. */
199 tree init; /* reduction initialization value. */
200 gphi *new_phi; /* (helper field) Newly created phi node whose result
201 will be passed to the atomic operation. Represents
202 the local result each thread computed for the reduction
203 operation. */
206 /* Reduction info hashtable helpers. */
208 struct reduction_hasher : free_ptr_hash <reduction_info>
210 static inline hashval_t hash (const reduction_info *);
211 static inline bool equal (const reduction_info *, const reduction_info *);
214 /* Equality and hash functions for hashtab code. */
216 inline bool
217 reduction_hasher::equal (const reduction_info *a, const reduction_info *b)
219 return (a->reduc_phi == b->reduc_phi);
222 inline hashval_t
223 reduction_hasher::hash (const reduction_info *a)
225 return a->reduc_version;
228 typedef hash_table<reduction_hasher> reduction_info_table_type;
231 static struct reduction_info *
232 reduction_phi (reduction_info_table_type *reduction_list, gimple phi)
234 struct reduction_info tmpred, *red;
236 if (reduction_list->elements () == 0 || phi == NULL)
237 return NULL;
239 tmpred.reduc_phi = phi;
240 tmpred.reduc_version = gimple_uid (phi);
241 red = reduction_list->find (&tmpred);
243 return red;
246 /* Element of hashtable of names to copy. */
248 struct name_to_copy_elt
250 unsigned version; /* The version of the name to copy. */
251 tree new_name; /* The new name used in the copy. */
252 tree field; /* The field of the structure used to pass the
253 value. */
256 /* Name copies hashtable helpers. */
258 struct name_to_copy_hasher : free_ptr_hash <name_to_copy_elt>
260 static inline hashval_t hash (const name_to_copy_elt *);
261 static inline bool equal (const name_to_copy_elt *, const name_to_copy_elt *);
264 /* Equality and hash functions for hashtab code. */
266 inline bool
267 name_to_copy_hasher::equal (const name_to_copy_elt *a, const name_to_copy_elt *b)
269 return a->version == b->version;
272 inline hashval_t
273 name_to_copy_hasher::hash (const name_to_copy_elt *a)
275 return (hashval_t) a->version;
278 typedef hash_table<name_to_copy_hasher> name_to_copy_table_type;
280 /* A transformation matrix, which is a self-contained ROWSIZE x COLSIZE
281 matrix. Rather than use floats, we simply keep a single DENOMINATOR that
282 represents the denominator for every element in the matrix. */
283 typedef struct lambda_trans_matrix_s
285 lambda_matrix matrix;
286 int rowsize;
287 int colsize;
288 int denominator;
289 } *lambda_trans_matrix;
290 #define LTM_MATRIX(T) ((T)->matrix)
291 #define LTM_ROWSIZE(T) ((T)->rowsize)
292 #define LTM_COLSIZE(T) ((T)->colsize)
293 #define LTM_DENOMINATOR(T) ((T)->denominator)
295 /* Allocate a new transformation matrix. */
297 static lambda_trans_matrix
298 lambda_trans_matrix_new (int colsize, int rowsize,
299 struct obstack * lambda_obstack)
301 lambda_trans_matrix ret;
303 ret = (lambda_trans_matrix)
304 obstack_alloc (lambda_obstack, sizeof (struct lambda_trans_matrix_s));
305 LTM_MATRIX (ret) = lambda_matrix_new (rowsize, colsize, lambda_obstack);
306 LTM_ROWSIZE (ret) = rowsize;
307 LTM_COLSIZE (ret) = colsize;
308 LTM_DENOMINATOR (ret) = 1;
309 return ret;
312 /* Multiply a vector VEC by a matrix MAT.
313 MAT is an M*N matrix, and VEC is a vector with length N. The result
314 is stored in DEST which must be a vector of length M. */
316 static void
317 lambda_matrix_vector_mult (lambda_matrix matrix, int m, int n,
318 lambda_vector vec, lambda_vector dest)
320 int i, j;
322 lambda_vector_clear (dest, m);
323 for (i = 0; i < m; i++)
324 for (j = 0; j < n; j++)
325 dest[i] += matrix[i][j] * vec[j];
328 /* Return true if TRANS is a legal transformation matrix that respects
329 the dependence vectors in DISTS and DIRS. The conservative answer
330 is false.
332 "Wolfe proves that a unimodular transformation represented by the
333 matrix T is legal when applied to a loop nest with a set of
334 lexicographically non-negative distance vectors RDG if and only if
335 for each vector d in RDG, (T.d >= 0) is lexicographically positive.
336 i.e.: if and only if it transforms the lexicographically positive
337 distance vectors to lexicographically positive vectors. Note that
338 a unimodular matrix must transform the zero vector (and only it) to
339 the zero vector." S.Muchnick. */
341 static bool
342 lambda_transform_legal_p (lambda_trans_matrix trans,
343 int nb_loops,
344 vec<ddr_p> dependence_relations)
346 unsigned int i, j;
347 lambda_vector distres;
348 struct data_dependence_relation *ddr;
350 gcc_assert (LTM_COLSIZE (trans) == nb_loops
351 && LTM_ROWSIZE (trans) == nb_loops);
353 /* When there are no dependences, the transformation is correct. */
354 if (dependence_relations.length () == 0)
355 return true;
357 ddr = dependence_relations[0];
358 if (ddr == NULL)
359 return true;
361 /* When there is an unknown relation in the dependence_relations, we
362 know that it is no worth looking at this loop nest: give up. */
363 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
364 return false;
366 distres = lambda_vector_new (nb_loops);
368 /* For each distance vector in the dependence graph. */
369 FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
371 /* Don't care about relations for which we know that there is no
372 dependence, nor about read-read (aka. output-dependences):
373 these data accesses can happen in any order. */
374 if (DDR_ARE_DEPENDENT (ddr) == chrec_known
375 || (DR_IS_READ (DDR_A (ddr)) && DR_IS_READ (DDR_B (ddr))))
376 continue;
378 /* Conservatively answer: "this transformation is not valid". */
379 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
380 return false;
382 /* If the dependence could not be captured by a distance vector,
383 conservatively answer that the transform is not valid. */
384 if (DDR_NUM_DIST_VECTS (ddr) == 0)
385 return false;
387 /* Compute trans.dist_vect */
388 for (j = 0; j < DDR_NUM_DIST_VECTS (ddr); j++)
390 lambda_matrix_vector_mult (LTM_MATRIX (trans), nb_loops, nb_loops,
391 DDR_DIST_VECT (ddr, j), distres);
393 if (!lambda_vector_lexico_pos (distres, nb_loops))
394 return false;
397 return true;
400 /* Data dependency analysis. Returns true if the iterations of LOOP
401 are independent on each other (that is, if we can execute them
402 in parallel). */
404 static bool
405 loop_parallel_p (struct loop *loop, struct obstack * parloop_obstack)
407 vec<ddr_p> dependence_relations;
408 vec<data_reference_p> datarefs;
409 lambda_trans_matrix trans;
410 bool ret = false;
412 if (dump_file && (dump_flags & TDF_DETAILS))
414 fprintf (dump_file, "Considering loop %d\n", loop->num);
415 if (!loop->inner)
416 fprintf (dump_file, "loop is innermost\n");
417 else
418 fprintf (dump_file, "loop NOT innermost\n");
421 /* Check for problems with dependences. If the loop can be reversed,
422 the iterations are independent. */
423 auto_vec<loop_p, 3> loop_nest;
424 datarefs.create (10);
425 dependence_relations.create (100);
426 if (! compute_data_dependences_for_loop (loop, true, &loop_nest, &datarefs,
427 &dependence_relations))
429 if (dump_file && (dump_flags & TDF_DETAILS))
430 fprintf (dump_file, " FAILED: cannot analyze data dependencies\n");
431 ret = false;
432 goto end;
434 if (dump_file && (dump_flags & TDF_DETAILS))
435 dump_data_dependence_relations (dump_file, dependence_relations);
437 trans = lambda_trans_matrix_new (1, 1, parloop_obstack);
438 LTM_MATRIX (trans)[0][0] = -1;
440 if (lambda_transform_legal_p (trans, 1, dependence_relations))
442 ret = true;
443 if (dump_file && (dump_flags & TDF_DETAILS))
444 fprintf (dump_file, " SUCCESS: may be parallelized\n");
446 else if (dump_file && (dump_flags & TDF_DETAILS))
447 fprintf (dump_file,
448 " FAILED: data dependencies exist across iterations\n");
450 end:
451 free_dependence_relations (dependence_relations);
452 free_data_refs (datarefs);
454 return ret;
457 /* Return true when LOOP contains basic blocks marked with the
458 BB_IRREDUCIBLE_LOOP flag. */
460 static inline bool
461 loop_has_blocks_with_irreducible_flag (struct loop *loop)
463 unsigned i;
464 basic_block *bbs = get_loop_body_in_dom_order (loop);
465 bool res = true;
467 for (i = 0; i < loop->num_nodes; i++)
468 if (bbs[i]->flags & BB_IRREDUCIBLE_LOOP)
469 goto end;
471 res = false;
472 end:
473 free (bbs);
474 return res;
477 /* Assigns the address of OBJ in TYPE to an ssa name, and returns this name.
478 The assignment statement is placed on edge ENTRY. DECL_ADDRESS maps decls
479 to their addresses that can be reused. The address of OBJ is known to
480 be invariant in the whole function. Other needed statements are placed
481 right before GSI. */
483 static tree
484 take_address_of (tree obj, tree type, edge entry,
485 int_tree_htab_type *decl_address, gimple_stmt_iterator *gsi)
487 int uid;
488 tree *var_p, name, addr;
489 gassign *stmt;
490 gimple_seq stmts;
492 /* Since the address of OBJ is invariant, the trees may be shared.
493 Avoid rewriting unrelated parts of the code. */
494 obj = unshare_expr (obj);
495 for (var_p = &obj;
496 handled_component_p (*var_p);
497 var_p = &TREE_OPERAND (*var_p, 0))
498 continue;
500 /* Canonicalize the access to base on a MEM_REF. */
501 if (DECL_P (*var_p))
502 *var_p = build_simple_mem_ref (build_fold_addr_expr (*var_p));
504 /* Assign a canonical SSA name to the address of the base decl used
505 in the address and share it for all accesses and addresses based
506 on it. */
507 uid = DECL_UID (TREE_OPERAND (TREE_OPERAND (*var_p, 0), 0));
508 int_tree_map elt;
509 elt.uid = uid;
510 int_tree_map *slot = decl_address->find_slot (elt, INSERT);
511 if (!slot->to)
513 if (gsi == NULL)
514 return NULL;
515 addr = TREE_OPERAND (*var_p, 0);
516 const char *obj_name
517 = get_name (TREE_OPERAND (TREE_OPERAND (*var_p, 0), 0));
518 if (obj_name)
519 name = make_temp_ssa_name (TREE_TYPE (addr), NULL, obj_name);
520 else
521 name = make_ssa_name (TREE_TYPE (addr));
522 stmt = gimple_build_assign (name, addr);
523 gsi_insert_on_edge_immediate (entry, stmt);
525 slot->uid = uid;
526 slot->to = name;
528 else
529 name = slot->to;
531 /* Express the address in terms of the canonical SSA name. */
532 TREE_OPERAND (*var_p, 0) = name;
533 if (gsi == NULL)
534 return build_fold_addr_expr_with_type (obj, type);
536 name = force_gimple_operand (build_addr (obj, current_function_decl),
537 &stmts, true, NULL_TREE);
538 if (!gimple_seq_empty_p (stmts))
539 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
541 if (!useless_type_conversion_p (type, TREE_TYPE (name)))
543 name = force_gimple_operand (fold_convert (type, name), &stmts, true,
544 NULL_TREE);
545 if (!gimple_seq_empty_p (stmts))
546 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
549 return name;
552 /* Callback for htab_traverse. Create the initialization statement
553 for reduction described in SLOT, and place it at the preheader of
554 the loop described in DATA. */
557 initialize_reductions (reduction_info **slot, struct loop *loop)
559 tree init, c;
560 tree bvar, type, arg;
561 edge e;
563 struct reduction_info *const reduc = *slot;
565 /* Create initialization in preheader:
566 reduction_variable = initialization value of reduction. */
568 /* In the phi node at the header, replace the argument coming
569 from the preheader with the reduction initialization value. */
571 /* Create a new variable to initialize the reduction. */
572 type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
573 bvar = create_tmp_var (type, "reduction");
575 c = build_omp_clause (gimple_location (reduc->reduc_stmt),
576 OMP_CLAUSE_REDUCTION);
577 OMP_CLAUSE_REDUCTION_CODE (c) = reduc->reduction_code;
578 OMP_CLAUSE_DECL (c) = SSA_NAME_VAR (gimple_assign_lhs (reduc->reduc_stmt));
580 init = omp_reduction_init (c, TREE_TYPE (bvar));
581 reduc->init = init;
583 /* Replace the argument representing the initialization value
584 with the initialization value for the reduction (neutral
585 element for the particular operation, e.g. 0 for PLUS_EXPR,
586 1 for MULT_EXPR, etc).
587 Keep the old value in a new variable "reduction_initial",
588 that will be taken in consideration after the parallel
589 computing is done. */
591 e = loop_preheader_edge (loop);
592 arg = PHI_ARG_DEF_FROM_EDGE (reduc->reduc_phi, e);
593 /* Create new variable to hold the initial value. */
595 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE
596 (reduc->reduc_phi, loop_preheader_edge (loop)), init);
597 reduc->initial_value = arg;
598 return 1;
601 struct elv_data
603 struct walk_stmt_info info;
604 edge entry;
605 int_tree_htab_type *decl_address;
606 gimple_stmt_iterator *gsi;
607 bool changed;
608 bool reset;
611 /* Eliminates references to local variables in *TP out of the single
612 entry single exit region starting at DTA->ENTRY.
613 DECL_ADDRESS contains addresses of the references that had their
614 address taken already. If the expression is changed, CHANGED is
615 set to true. Callback for walk_tree. */
617 static tree
618 eliminate_local_variables_1 (tree *tp, int *walk_subtrees, void *data)
620 struct elv_data *const dta = (struct elv_data *) data;
621 tree t = *tp, var, addr, addr_type, type, obj;
623 if (DECL_P (t))
625 *walk_subtrees = 0;
627 if (!SSA_VAR_P (t) || DECL_EXTERNAL (t))
628 return NULL_TREE;
630 type = TREE_TYPE (t);
631 addr_type = build_pointer_type (type);
632 addr = take_address_of (t, addr_type, dta->entry, dta->decl_address,
633 dta->gsi);
634 if (dta->gsi == NULL && addr == NULL_TREE)
636 dta->reset = true;
637 return NULL_TREE;
640 *tp = build_simple_mem_ref (addr);
642 dta->changed = true;
643 return NULL_TREE;
646 if (TREE_CODE (t) == ADDR_EXPR)
648 /* ADDR_EXPR may appear in two contexts:
649 -- as a gimple operand, when the address taken is a function invariant
650 -- as gimple rhs, when the resulting address in not a function
651 invariant
652 We do not need to do anything special in the latter case (the base of
653 the memory reference whose address is taken may be replaced in the
654 DECL_P case). The former case is more complicated, as we need to
655 ensure that the new address is still a gimple operand. Thus, it
656 is not sufficient to replace just the base of the memory reference --
657 we need to move the whole computation of the address out of the
658 loop. */
659 if (!is_gimple_val (t))
660 return NULL_TREE;
662 *walk_subtrees = 0;
663 obj = TREE_OPERAND (t, 0);
664 var = get_base_address (obj);
665 if (!var || !SSA_VAR_P (var) || DECL_EXTERNAL (var))
666 return NULL_TREE;
668 addr_type = TREE_TYPE (t);
669 addr = take_address_of (obj, addr_type, dta->entry, dta->decl_address,
670 dta->gsi);
671 if (dta->gsi == NULL && addr == NULL_TREE)
673 dta->reset = true;
674 return NULL_TREE;
676 *tp = addr;
678 dta->changed = true;
679 return NULL_TREE;
682 if (!EXPR_P (t))
683 *walk_subtrees = 0;
685 return NULL_TREE;
688 /* Moves the references to local variables in STMT at *GSI out of the single
689 entry single exit region starting at ENTRY. DECL_ADDRESS contains
690 addresses of the references that had their address taken
691 already. */
693 static void
694 eliminate_local_variables_stmt (edge entry, gimple_stmt_iterator *gsi,
695 int_tree_htab_type *decl_address)
697 struct elv_data dta;
698 gimple stmt = gsi_stmt (*gsi);
700 memset (&dta.info, '\0', sizeof (dta.info));
701 dta.entry = entry;
702 dta.decl_address = decl_address;
703 dta.changed = false;
704 dta.reset = false;
706 if (gimple_debug_bind_p (stmt))
708 dta.gsi = NULL;
709 walk_tree (gimple_debug_bind_get_value_ptr (stmt),
710 eliminate_local_variables_1, &dta.info, NULL);
711 if (dta.reset)
713 gimple_debug_bind_reset_value (stmt);
714 dta.changed = true;
717 else if (gimple_clobber_p (stmt))
719 stmt = gimple_build_nop ();
720 gsi_replace (gsi, stmt, false);
721 dta.changed = true;
723 else
725 dta.gsi = gsi;
726 walk_gimple_op (stmt, eliminate_local_variables_1, &dta.info);
729 if (dta.changed)
730 update_stmt (stmt);
733 /* Eliminates the references to local variables from the single entry
734 single exit region between the ENTRY and EXIT edges.
736 This includes:
737 1) Taking address of a local variable -- these are moved out of the
738 region (and temporary variable is created to hold the address if
739 necessary).
741 2) Dereferencing a local variable -- these are replaced with indirect
742 references. */
744 static void
745 eliminate_local_variables (edge entry, edge exit)
747 basic_block bb;
748 auto_vec<basic_block, 3> body;
749 unsigned i;
750 gimple_stmt_iterator gsi;
751 bool has_debug_stmt = false;
752 int_tree_htab_type decl_address (10);
753 basic_block entry_bb = entry->src;
754 basic_block exit_bb = exit->dest;
756 gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
758 FOR_EACH_VEC_ELT (body, i, bb)
759 if (bb != entry_bb && bb != exit_bb)
760 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
761 if (is_gimple_debug (gsi_stmt (gsi)))
763 if (gimple_debug_bind_p (gsi_stmt (gsi)))
764 has_debug_stmt = true;
766 else
767 eliminate_local_variables_stmt (entry, &gsi, &decl_address);
769 if (has_debug_stmt)
770 FOR_EACH_VEC_ELT (body, i, bb)
771 if (bb != entry_bb && bb != exit_bb)
772 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
773 if (gimple_debug_bind_p (gsi_stmt (gsi)))
774 eliminate_local_variables_stmt (entry, &gsi, &decl_address);
777 /* Returns true if expression EXPR is not defined between ENTRY and
778 EXIT, i.e. if all its operands are defined outside of the region. */
780 static bool
781 expr_invariant_in_region_p (edge entry, edge exit, tree expr)
783 basic_block entry_bb = entry->src;
784 basic_block exit_bb = exit->dest;
785 basic_block def_bb;
787 if (is_gimple_min_invariant (expr))
788 return true;
790 if (TREE_CODE (expr) == SSA_NAME)
792 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
793 if (def_bb
794 && dominated_by_p (CDI_DOMINATORS, def_bb, entry_bb)
795 && !dominated_by_p (CDI_DOMINATORS, def_bb, exit_bb))
796 return false;
798 return true;
801 return false;
804 /* If COPY_NAME_P is true, creates and returns a duplicate of NAME.
805 The copies are stored to NAME_COPIES, if NAME was already duplicated,
806 its duplicate stored in NAME_COPIES is returned.
808 Regardless of COPY_NAME_P, the decl used as a base of the ssa name is also
809 duplicated, storing the copies in DECL_COPIES. */
811 static tree
812 separate_decls_in_region_name (tree name, name_to_copy_table_type *name_copies,
813 int_tree_htab_type *decl_copies,
814 bool copy_name_p)
816 tree copy, var, var_copy;
817 unsigned idx, uid, nuid;
818 struct int_tree_map ielt;
819 struct name_to_copy_elt elt, *nelt;
820 name_to_copy_elt **slot;
821 int_tree_map *dslot;
823 if (TREE_CODE (name) != SSA_NAME)
824 return name;
826 idx = SSA_NAME_VERSION (name);
827 elt.version = idx;
828 slot = name_copies->find_slot_with_hash (&elt, idx,
829 copy_name_p ? INSERT : NO_INSERT);
830 if (slot && *slot)
831 return (*slot)->new_name;
833 if (copy_name_p)
835 copy = duplicate_ssa_name (name, NULL);
836 nelt = XNEW (struct name_to_copy_elt);
837 nelt->version = idx;
838 nelt->new_name = copy;
839 nelt->field = NULL_TREE;
840 *slot = nelt;
842 else
844 gcc_assert (!slot);
845 copy = name;
848 var = SSA_NAME_VAR (name);
849 if (!var)
850 return copy;
852 uid = DECL_UID (var);
853 ielt.uid = uid;
854 dslot = decl_copies->find_slot_with_hash (ielt, uid, INSERT);
855 if (!dslot->to)
857 var_copy = create_tmp_var (TREE_TYPE (var), get_name (var));
858 DECL_GIMPLE_REG_P (var_copy) = DECL_GIMPLE_REG_P (var);
859 dslot->uid = uid;
860 dslot->to = var_copy;
862 /* Ensure that when we meet this decl next time, we won't duplicate
863 it again. */
864 nuid = DECL_UID (var_copy);
865 ielt.uid = nuid;
866 dslot = decl_copies->find_slot_with_hash (ielt, nuid, INSERT);
867 gcc_assert (!dslot->to);
868 dslot->uid = nuid;
869 dslot->to = var_copy;
871 else
872 var_copy = dslot->to;
874 replace_ssa_name_symbol (copy, var_copy);
875 return copy;
878 /* Finds the ssa names used in STMT that are defined outside the
879 region between ENTRY and EXIT and replaces such ssa names with
880 their duplicates. The duplicates are stored to NAME_COPIES. Base
881 decls of all ssa names used in STMT (including those defined in
882 LOOP) are replaced with the new temporary variables; the
883 replacement decls are stored in DECL_COPIES. */
885 static void
886 separate_decls_in_region_stmt (edge entry, edge exit, gimple stmt,
887 name_to_copy_table_type *name_copies,
888 int_tree_htab_type *decl_copies)
890 use_operand_p use;
891 def_operand_p def;
892 ssa_op_iter oi;
893 tree name, copy;
894 bool copy_name_p;
896 FOR_EACH_PHI_OR_STMT_DEF (def, stmt, oi, SSA_OP_DEF)
898 name = DEF_FROM_PTR (def);
899 gcc_assert (TREE_CODE (name) == SSA_NAME);
900 copy = separate_decls_in_region_name (name, name_copies, decl_copies,
901 false);
902 gcc_assert (copy == name);
905 FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
907 name = USE_FROM_PTR (use);
908 if (TREE_CODE (name) != SSA_NAME)
909 continue;
911 copy_name_p = expr_invariant_in_region_p (entry, exit, name);
912 copy = separate_decls_in_region_name (name, name_copies, decl_copies,
913 copy_name_p);
914 SET_USE (use, copy);
918 /* Finds the ssa names used in STMT that are defined outside the
919 region between ENTRY and EXIT and replaces such ssa names with
920 their duplicates. The duplicates are stored to NAME_COPIES. Base
921 decls of all ssa names used in STMT (including those defined in
922 LOOP) are replaced with the new temporary variables; the
923 replacement decls are stored in DECL_COPIES. */
925 static bool
926 separate_decls_in_region_debug (gimple stmt,
927 name_to_copy_table_type *name_copies,
928 int_tree_htab_type *decl_copies)
930 use_operand_p use;
931 ssa_op_iter oi;
932 tree var, name;
933 struct int_tree_map ielt;
934 struct name_to_copy_elt elt;
935 name_to_copy_elt **slot;
936 int_tree_map *dslot;
938 if (gimple_debug_bind_p (stmt))
939 var = gimple_debug_bind_get_var (stmt);
940 else if (gimple_debug_source_bind_p (stmt))
941 var = gimple_debug_source_bind_get_var (stmt);
942 else
943 return true;
944 if (TREE_CODE (var) == DEBUG_EXPR_DECL || TREE_CODE (var) == LABEL_DECL)
945 return true;
946 gcc_assert (DECL_P (var) && SSA_VAR_P (var));
947 ielt.uid = DECL_UID (var);
948 dslot = decl_copies->find_slot_with_hash (ielt, ielt.uid, NO_INSERT);
949 if (!dslot)
950 return true;
951 if (gimple_debug_bind_p (stmt))
952 gimple_debug_bind_set_var (stmt, dslot->to);
953 else if (gimple_debug_source_bind_p (stmt))
954 gimple_debug_source_bind_set_var (stmt, dslot->to);
956 FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
958 name = USE_FROM_PTR (use);
959 if (TREE_CODE (name) != SSA_NAME)
960 continue;
962 elt.version = SSA_NAME_VERSION (name);
963 slot = name_copies->find_slot_with_hash (&elt, elt.version, NO_INSERT);
964 if (!slot)
966 gimple_debug_bind_reset_value (stmt);
967 update_stmt (stmt);
968 break;
971 SET_USE (use, (*slot)->new_name);
974 return false;
977 /* Callback for htab_traverse. Adds a field corresponding to the reduction
978 specified in SLOT. The type is passed in DATA. */
981 add_field_for_reduction (reduction_info **slot, tree type)
984 struct reduction_info *const red = *slot;
985 tree var = gimple_assign_lhs (red->reduc_stmt);
986 tree field = build_decl (gimple_location (red->reduc_stmt), FIELD_DECL,
987 SSA_NAME_IDENTIFIER (var), TREE_TYPE (var));
989 insert_field_into_struct (type, field);
991 red->field = field;
993 return 1;
996 /* Callback for htab_traverse. Adds a field corresponding to a ssa name
997 described in SLOT. The type is passed in DATA. */
1000 add_field_for_name (name_to_copy_elt **slot, tree type)
1002 struct name_to_copy_elt *const elt = *slot;
1003 tree name = ssa_name (elt->version);
1004 tree field = build_decl (UNKNOWN_LOCATION,
1005 FIELD_DECL, SSA_NAME_IDENTIFIER (name),
1006 TREE_TYPE (name));
1008 insert_field_into_struct (type, field);
1009 elt->field = field;
1011 return 1;
1014 /* Callback for htab_traverse. A local result is the intermediate result
1015 computed by a single
1016 thread, or the initial value in case no iteration was executed.
1017 This function creates a phi node reflecting these values.
1018 The phi's result will be stored in NEW_PHI field of the
1019 reduction's data structure. */
1022 create_phi_for_local_result (reduction_info **slot, struct loop *loop)
1024 struct reduction_info *const reduc = *slot;
1025 edge e;
1026 gphi *new_phi;
1027 basic_block store_bb;
1028 tree local_res;
1029 source_location locus;
1031 /* STORE_BB is the block where the phi
1032 should be stored. It is the destination of the loop exit.
1033 (Find the fallthru edge from GIMPLE_OMP_CONTINUE). */
1034 store_bb = FALLTHRU_EDGE (loop->latch)->dest;
1036 /* STORE_BB has two predecessors. One coming from the loop
1037 (the reduction's result is computed at the loop),
1038 and another coming from a block preceding the loop,
1039 when no iterations
1040 are executed (the initial value should be taken). */
1041 if (EDGE_PRED (store_bb, 0) == FALLTHRU_EDGE (loop->latch))
1042 e = EDGE_PRED (store_bb, 1);
1043 else
1044 e = EDGE_PRED (store_bb, 0);
1045 local_res = copy_ssa_name (gimple_assign_lhs (reduc->reduc_stmt));
1046 locus = gimple_location (reduc->reduc_stmt);
1047 new_phi = create_phi_node (local_res, store_bb);
1048 add_phi_arg (new_phi, reduc->init, e, locus);
1049 add_phi_arg (new_phi, gimple_assign_lhs (reduc->reduc_stmt),
1050 FALLTHRU_EDGE (loop->latch), locus);
1051 reduc->new_phi = new_phi;
1053 return 1;
1056 struct clsn_data
1058 tree store;
1059 tree load;
1061 basic_block store_bb;
1062 basic_block load_bb;
1065 /* Callback for htab_traverse. Create an atomic instruction for the
1066 reduction described in SLOT.
1067 DATA annotates the place in memory the atomic operation relates to,
1068 and the basic block it needs to be generated in. */
1071 create_call_for_reduction_1 (reduction_info **slot, struct clsn_data *clsn_data)
1073 struct reduction_info *const reduc = *slot;
1074 gimple_stmt_iterator gsi;
1075 tree type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
1076 tree load_struct;
1077 basic_block bb;
1078 basic_block new_bb;
1079 edge e;
1080 tree t, addr, ref, x;
1081 tree tmp_load, name;
1082 gimple load;
1084 load_struct = build_simple_mem_ref (clsn_data->load);
1085 t = build3 (COMPONENT_REF, type, load_struct, reduc->field, NULL_TREE);
1087 addr = build_addr (t, current_function_decl);
1089 /* Create phi node. */
1090 bb = clsn_data->load_bb;
1092 gsi = gsi_last_bb (bb);
1093 e = split_block (bb, gsi_stmt (gsi));
1094 new_bb = e->dest;
1096 tmp_load = create_tmp_var (TREE_TYPE (TREE_TYPE (addr)));
1097 tmp_load = make_ssa_name (tmp_load);
1098 load = gimple_build_omp_atomic_load (tmp_load, addr);
1099 SSA_NAME_DEF_STMT (tmp_load) = load;
1100 gsi = gsi_start_bb (new_bb);
1101 gsi_insert_after (&gsi, load, GSI_NEW_STMT);
1103 e = split_block (new_bb, load);
1104 new_bb = e->dest;
1105 gsi = gsi_start_bb (new_bb);
1106 ref = tmp_load;
1107 x = fold_build2 (reduc->reduction_code,
1108 TREE_TYPE (PHI_RESULT (reduc->new_phi)), ref,
1109 PHI_RESULT (reduc->new_phi));
1111 name = force_gimple_operand_gsi (&gsi, x, true, NULL_TREE, true,
1112 GSI_CONTINUE_LINKING);
1114 gsi_insert_after (&gsi, gimple_build_omp_atomic_store (name), GSI_NEW_STMT);
1115 return 1;
1118 /* Create the atomic operation at the join point of the threads.
1119 REDUCTION_LIST describes the reductions in the LOOP.
1120 LD_ST_DATA describes the shared data structure where
1121 shared data is stored in and loaded from. */
1122 static void
1123 create_call_for_reduction (struct loop *loop,
1124 reduction_info_table_type *reduction_list,
1125 struct clsn_data *ld_st_data)
1127 reduction_list->traverse <struct loop *, create_phi_for_local_result> (loop);
1128 /* Find the fallthru edge from GIMPLE_OMP_CONTINUE. */
1129 ld_st_data->load_bb = FALLTHRU_EDGE (loop->latch)->dest;
1130 reduction_list
1131 ->traverse <struct clsn_data *, create_call_for_reduction_1> (ld_st_data);
1134 /* Callback for htab_traverse. Loads the final reduction value at the
1135 join point of all threads, and inserts it in the right place. */
1138 create_loads_for_reductions (reduction_info **slot, struct clsn_data *clsn_data)
1140 struct reduction_info *const red = *slot;
1141 gimple stmt;
1142 gimple_stmt_iterator gsi;
1143 tree type = TREE_TYPE (gimple_assign_lhs (red->reduc_stmt));
1144 tree load_struct;
1145 tree name;
1146 tree x;
1148 /* If there's no exit phi, the result of the reduction is unused. */
1149 if (red->keep_res == NULL)
1150 return 1;
1152 gsi = gsi_after_labels (clsn_data->load_bb);
1153 load_struct = build_simple_mem_ref (clsn_data->load);
1154 load_struct = build3 (COMPONENT_REF, type, load_struct, red->field,
1155 NULL_TREE);
1157 x = load_struct;
1158 name = PHI_RESULT (red->keep_res);
1159 stmt = gimple_build_assign (name, x);
1161 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1163 for (gsi = gsi_start_phis (gimple_bb (red->keep_res));
1164 !gsi_end_p (gsi); gsi_next (&gsi))
1165 if (gsi_stmt (gsi) == red->keep_res)
1167 remove_phi_node (&gsi, false);
1168 return 1;
1170 gcc_unreachable ();
1173 /* Load the reduction result that was stored in LD_ST_DATA.
1174 REDUCTION_LIST describes the list of reductions that the
1175 loads should be generated for. */
1176 static void
1177 create_final_loads_for_reduction (reduction_info_table_type *reduction_list,
1178 struct clsn_data *ld_st_data)
1180 gimple_stmt_iterator gsi;
1181 tree t;
1182 gimple stmt;
1184 gsi = gsi_after_labels (ld_st_data->load_bb);
1185 t = build_fold_addr_expr (ld_st_data->store);
1186 stmt = gimple_build_assign (ld_st_data->load, t);
1188 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
1190 reduction_list
1191 ->traverse <struct clsn_data *, create_loads_for_reductions> (ld_st_data);
1195 /* Callback for htab_traverse. Store the neutral value for the
1196 particular reduction's operation, e.g. 0 for PLUS_EXPR,
1197 1 for MULT_EXPR, etc. into the reduction field.
1198 The reduction is specified in SLOT. The store information is
1199 passed in DATA. */
1202 create_stores_for_reduction (reduction_info **slot, struct clsn_data *clsn_data)
1204 struct reduction_info *const red = *slot;
1205 tree t;
1206 gimple stmt;
1207 gimple_stmt_iterator gsi;
1208 tree type = TREE_TYPE (gimple_assign_lhs (red->reduc_stmt));
1210 gsi = gsi_last_bb (clsn_data->store_bb);
1211 t = build3 (COMPONENT_REF, type, clsn_data->store, red->field, NULL_TREE);
1212 stmt = gimple_build_assign (t, red->initial_value);
1213 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1215 return 1;
1218 /* Callback for htab_traverse. Creates loads to a field of LOAD in LOAD_BB and
1219 store to a field of STORE in STORE_BB for the ssa name and its duplicate
1220 specified in SLOT. */
1223 create_loads_and_stores_for_name (name_to_copy_elt **slot,
1224 struct clsn_data *clsn_data)
1226 struct name_to_copy_elt *const elt = *slot;
1227 tree t;
1228 gimple stmt;
1229 gimple_stmt_iterator gsi;
1230 tree type = TREE_TYPE (elt->new_name);
1231 tree load_struct;
1233 gsi = gsi_last_bb (clsn_data->store_bb);
1234 t = build3 (COMPONENT_REF, type, clsn_data->store, elt->field, NULL_TREE);
1235 stmt = gimple_build_assign (t, ssa_name (elt->version));
1236 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1238 gsi = gsi_last_bb (clsn_data->load_bb);
1239 load_struct = build_simple_mem_ref (clsn_data->load);
1240 t = build3 (COMPONENT_REF, type, load_struct, elt->field, NULL_TREE);
1241 stmt = gimple_build_assign (elt->new_name, t);
1242 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1244 return 1;
1247 /* Moves all the variables used in LOOP and defined outside of it (including
1248 the initial values of loop phi nodes, and *PER_THREAD if it is a ssa
1249 name) to a structure created for this purpose. The code
1251 while (1)
1253 use (a);
1254 use (b);
1257 is transformed this way:
1259 bb0:
1260 old.a = a;
1261 old.b = b;
1263 bb1:
1264 a' = new->a;
1265 b' = new->b;
1266 while (1)
1268 use (a');
1269 use (b');
1272 `old' is stored to *ARG_STRUCT and `new' is stored to NEW_ARG_STRUCT. The
1273 pointer `new' is intentionally not initialized (the loop will be split to a
1274 separate function later, and `new' will be initialized from its arguments).
1275 LD_ST_DATA holds information about the shared data structure used to pass
1276 information among the threads. It is initialized here, and
1277 gen_parallel_loop will pass it to create_call_for_reduction that
1278 needs this information. REDUCTION_LIST describes the reductions
1279 in LOOP. */
1281 static void
1282 separate_decls_in_region (edge entry, edge exit,
1283 reduction_info_table_type *reduction_list,
1284 tree *arg_struct, tree *new_arg_struct,
1285 struct clsn_data *ld_st_data)
1288 basic_block bb1 = split_edge (entry);
1289 basic_block bb0 = single_pred (bb1);
1290 name_to_copy_table_type name_copies (10);
1291 int_tree_htab_type decl_copies (10);
1292 unsigned i;
1293 tree type, type_name, nvar;
1294 gimple_stmt_iterator gsi;
1295 struct clsn_data clsn_data;
1296 auto_vec<basic_block, 3> body;
1297 basic_block bb;
1298 basic_block entry_bb = bb1;
1299 basic_block exit_bb = exit->dest;
1300 bool has_debug_stmt = false;
1302 entry = single_succ_edge (entry_bb);
1303 gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
1305 FOR_EACH_VEC_ELT (body, i, bb)
1307 if (bb != entry_bb && bb != exit_bb)
1309 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1310 separate_decls_in_region_stmt (entry, exit, gsi_stmt (gsi),
1311 &name_copies, &decl_copies);
1313 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1315 gimple stmt = gsi_stmt (gsi);
1317 if (is_gimple_debug (stmt))
1318 has_debug_stmt = true;
1319 else
1320 separate_decls_in_region_stmt (entry, exit, stmt,
1321 &name_copies, &decl_copies);
1326 /* Now process debug bind stmts. We must not create decls while
1327 processing debug stmts, so we defer their processing so as to
1328 make sure we will have debug info for as many variables as
1329 possible (all of those that were dealt with in the loop above),
1330 and discard those for which we know there's nothing we can
1331 do. */
1332 if (has_debug_stmt)
1333 FOR_EACH_VEC_ELT (body, i, bb)
1334 if (bb != entry_bb && bb != exit_bb)
1336 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
1338 gimple stmt = gsi_stmt (gsi);
1340 if (is_gimple_debug (stmt))
1342 if (separate_decls_in_region_debug (stmt, &name_copies,
1343 &decl_copies))
1345 gsi_remove (&gsi, true);
1346 continue;
1350 gsi_next (&gsi);
1354 if (name_copies.elements () == 0 && reduction_list->elements () == 0)
1356 /* It may happen that there is nothing to copy (if there are only
1357 loop carried and external variables in the loop). */
1358 *arg_struct = NULL;
1359 *new_arg_struct = NULL;
1361 else
1363 /* Create the type for the structure to store the ssa names to. */
1364 type = lang_hooks.types.make_type (RECORD_TYPE);
1365 type_name = build_decl (UNKNOWN_LOCATION,
1366 TYPE_DECL, create_tmp_var_name (".paral_data"),
1367 type);
1368 TYPE_NAME (type) = type_name;
1370 name_copies.traverse <tree, add_field_for_name> (type);
1371 if (reduction_list && reduction_list->elements () > 0)
1373 /* Create the fields for reductions. */
1374 reduction_list->traverse <tree, add_field_for_reduction> (type);
1376 layout_type (type);
1378 /* Create the loads and stores. */
1379 *arg_struct = create_tmp_var (type, ".paral_data_store");
1380 nvar = create_tmp_var (build_pointer_type (type), ".paral_data_load");
1381 *new_arg_struct = make_ssa_name (nvar);
1383 ld_st_data->store = *arg_struct;
1384 ld_st_data->load = *new_arg_struct;
1385 ld_st_data->store_bb = bb0;
1386 ld_st_data->load_bb = bb1;
1388 name_copies
1389 .traverse <struct clsn_data *, create_loads_and_stores_for_name>
1390 (ld_st_data);
1392 /* Load the calculation from memory (after the join of the threads). */
1394 if (reduction_list && reduction_list->elements () > 0)
1396 reduction_list
1397 ->traverse <struct clsn_data *, create_stores_for_reduction>
1398 (ld_st_data);
1399 clsn_data.load = make_ssa_name (nvar);
1400 clsn_data.load_bb = exit->dest;
1401 clsn_data.store = ld_st_data->store;
1402 create_final_loads_for_reduction (reduction_list, &clsn_data);
1407 /* Returns true if FN was created to run in parallel. */
1409 bool
1410 parallelized_function_p (tree fndecl)
1412 cgraph_node *node = cgraph_node::get (fndecl);
1413 gcc_assert (node != NULL);
1414 return node->parallelized_function;
1417 /* Creates and returns an empty function that will receive the body of
1418 a parallelized loop. */
1420 static tree
1421 create_loop_fn (location_t loc)
1423 char buf[100];
1424 char *tname;
1425 tree decl, type, name, t;
1426 struct function *act_cfun = cfun;
1427 static unsigned loopfn_num;
1429 loc = LOCATION_LOCUS (loc);
1430 snprintf (buf, 100, "%s.$loopfn", current_function_name ());
1431 ASM_FORMAT_PRIVATE_NAME (tname, buf, loopfn_num++);
1432 clean_symbol_name (tname);
1433 name = get_identifier (tname);
1434 type = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
1436 decl = build_decl (loc, FUNCTION_DECL, name, type);
1437 TREE_STATIC (decl) = 1;
1438 TREE_USED (decl) = 1;
1439 DECL_ARTIFICIAL (decl) = 1;
1440 DECL_IGNORED_P (decl) = 0;
1441 TREE_PUBLIC (decl) = 0;
1442 DECL_UNINLINABLE (decl) = 1;
1443 DECL_EXTERNAL (decl) = 0;
1444 DECL_CONTEXT (decl) = NULL_TREE;
1445 DECL_INITIAL (decl) = make_node (BLOCK);
1447 t = build_decl (loc, RESULT_DECL, NULL_TREE, void_type_node);
1448 DECL_ARTIFICIAL (t) = 1;
1449 DECL_IGNORED_P (t) = 1;
1450 DECL_RESULT (decl) = t;
1452 t = build_decl (loc, PARM_DECL, get_identifier (".paral_data_param"),
1453 ptr_type_node);
1454 DECL_ARTIFICIAL (t) = 1;
1455 DECL_ARG_TYPE (t) = ptr_type_node;
1456 DECL_CONTEXT (t) = decl;
1457 TREE_USED (t) = 1;
1458 DECL_ARGUMENTS (decl) = t;
1460 allocate_struct_function (decl, false);
1462 /* The call to allocate_struct_function clobbers CFUN, so we need to restore
1463 it. */
1464 set_cfun (act_cfun);
1466 return decl;
1469 /* Replace uses of NAME by VAL in block BB. */
1471 static void
1472 replace_uses_in_bb_by (tree name, tree val, basic_block bb)
1474 gimple use_stmt;
1475 imm_use_iterator imm_iter;
1477 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, name)
1479 if (gimple_bb (use_stmt) != bb)
1480 continue;
1482 use_operand_p use_p;
1483 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1484 SET_USE (use_p, val);
1488 /* Do transformation from:
1490 <bb preheader>:
1492 goto <bb header>
1494 <bb header>:
1495 ivtmp_a = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
1496 sum_a = PHI <sum_init (preheader), sum_b (latch)>
1498 use (ivtmp_a)
1500 sum_b = sum_a + sum_update
1502 if (ivtmp_a < n)
1503 goto <bb latch>;
1504 else
1505 goto <bb exit>;
1507 <bb latch>:
1508 ivtmp_b = ivtmp_a + 1;
1509 goto <bb header>
1511 <bb exit>:
1512 sum_z = PHI <sum_b (cond[1]), ...>
1514 [1] Where <bb cond> is single_pred (bb latch); In the simplest case,
1515 that's <bb header>.
1519 <bb preheader>:
1521 goto <bb newheader>
1523 <bb header>:
1524 ivtmp_a = PHI <ivtmp_c (latch)>
1525 sum_a = PHI <sum_c (latch)>
1527 use (ivtmp_a)
1529 sum_b = sum_a + sum_update
1531 goto <bb latch>;
1533 <bb newheader>:
1534 ivtmp_c = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
1535 sum_c = PHI <sum_init (preheader), sum_b (latch)>
1536 if (ivtmp_c < n + 1)
1537 goto <bb header>;
1538 else
1539 goto <bb newexit>;
1541 <bb latch>:
1542 ivtmp_b = ivtmp_a + 1;
1543 goto <bb newheader>
1545 <bb newexit>:
1546 sum_y = PHI <sum_c (newheader)>
1548 <bb exit>:
1549 sum_z = PHI <sum_y (newexit), ...>
1552 In unified diff format:
1554 <bb preheader>:
1556 - goto <bb header>
1557 + goto <bb newheader>
1559 <bb header>:
1560 - ivtmp_a = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
1561 - sum_a = PHI <sum_init (preheader), sum_b (latch)>
1562 + ivtmp_a = PHI <ivtmp_c (latch)>
1563 + sum_a = PHI <sum_c (latch)>
1565 use (ivtmp_a)
1567 sum_b = sum_a + sum_update
1569 - if (ivtmp_a < n)
1570 - goto <bb latch>;
1571 + goto <bb latch>;
1573 + <bb newheader>:
1574 + ivtmp_c = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
1575 + sum_c = PHI <sum_init (preheader), sum_b (latch)>
1576 + if (ivtmp_c < n + 1)
1577 + goto <bb header>;
1578 else
1579 goto <bb exit>;
1581 <bb latch>:
1582 ivtmp_b = ivtmp_a + 1;
1583 - goto <bb header>
1584 + goto <bb newheader>
1586 + <bb newexit>:
1587 + sum_y = PHI <sum_c (newheader)>
1589 <bb exit>:
1590 - sum_z = PHI <sum_b (cond[1]), ...>
1591 + sum_z = PHI <sum_y (newexit), ...>
1593 Note: the example does not show any virtual phis, but these are handled more
1594 or less as reductions.
1597 Moves the exit condition of LOOP to the beginning of its header.
1598 REDUCTION_LIST describes the reductions in LOOP. BOUND is the new loop
1599 bound. */
1601 static void
1602 transform_to_exit_first_loop_alt (struct loop *loop,
1603 reduction_info_table_type *reduction_list,
1604 tree bound)
1606 basic_block header = loop->header;
1607 basic_block latch = loop->latch;
1608 edge exit = single_dom_exit (loop);
1609 basic_block exit_block = exit->dest;
1610 gcond *cond_stmt = as_a <gcond *> (last_stmt (exit->src));
1611 tree control = gimple_cond_lhs (cond_stmt);
1612 edge e;
1614 /* Rewriting virtuals into loop-closed ssa normal form makes this
1615 transformation simpler. It also ensures that the virtuals are in
1616 loop-closed ssa normal from after the transformation, which is required by
1617 create_parallel_loop. */
1618 rewrite_virtuals_into_loop_closed_ssa (loop);
1620 /* Create the new_header block. */
1621 basic_block new_header = split_block_before_cond_jump (exit->src);
1622 edge edge_at_split = single_pred_edge (new_header);
1624 /* Redirect entry edge to new_header. */
1625 edge entry = loop_preheader_edge (loop);
1626 e = redirect_edge_and_branch (entry, new_header);
1627 gcc_assert (e == entry);
1629 /* Redirect post_inc_edge to new_header. */
1630 edge post_inc_edge = single_succ_edge (latch);
1631 e = redirect_edge_and_branch (post_inc_edge, new_header);
1632 gcc_assert (e == post_inc_edge);
1634 /* Redirect post_cond_edge to header. */
1635 edge post_cond_edge = single_pred_edge (latch);
1636 e = redirect_edge_and_branch (post_cond_edge, header);
1637 gcc_assert (e == post_cond_edge);
1639 /* Redirect edge_at_split to latch. */
1640 e = redirect_edge_and_branch (edge_at_split, latch);
1641 gcc_assert (e == edge_at_split);
1643 /* Set the new loop bound. */
1644 gimple_cond_set_rhs (cond_stmt, bound);
1645 update_stmt (cond_stmt);
1647 /* Repair the ssa. */
1648 vec<edge_var_map> *v = redirect_edge_var_map_vector (post_inc_edge);
1649 edge_var_map *vm;
1650 gphi_iterator gsi;
1651 int i;
1652 for (gsi = gsi_start_phis (header), i = 0;
1653 !gsi_end_p (gsi) && v->iterate (i, &vm);
1654 gsi_next (&gsi), i++)
1656 gphi *phi = gsi.phi ();
1657 tree res_a = PHI_RESULT (phi);
1659 /* Create new phi. */
1660 tree res_c = copy_ssa_name (res_a, phi);
1661 gphi *nphi = create_phi_node (res_c, new_header);
1663 /* Replace ivtmp_a with ivtmp_c in condition 'if (ivtmp_a < n)'. */
1664 replace_uses_in_bb_by (res_a, res_c, new_header);
1666 /* Replace ivtmp/sum_b with ivtmp/sum_c in header phi. */
1667 add_phi_arg (phi, res_c, post_cond_edge, UNKNOWN_LOCATION);
1669 /* Replace sum_b with sum_c in exit phi. */
1670 tree res_b = redirect_edge_var_map_def (vm);
1671 replace_uses_in_bb_by (res_b, res_c, exit_block);
1673 struct reduction_info *red = reduction_phi (reduction_list, phi);
1674 gcc_assert (virtual_operand_p (res_a)
1675 || res_a == control
1676 || red != NULL);
1678 if (red)
1680 /* Register the new reduction phi. */
1681 red->reduc_phi = nphi;
1682 gimple_set_uid (red->reduc_phi, red->reduc_version);
1685 gcc_assert (gsi_end_p (gsi) && !v->iterate (i, &vm));
1687 /* Set the preheader argument of the new phis to ivtmp/sum_init. */
1688 flush_pending_stmts (entry);
1690 /* Set the latch arguments of the new phis to ivtmp/sum_b. */
1691 flush_pending_stmts (post_inc_edge);
1693 /* Create a new empty exit block, inbetween the new loop header and the old
1694 exit block. The function separate_decls_in_region needs this block to
1695 insert code that is active on loop exit, but not any other path. */
1696 basic_block new_exit_block = split_edge (exit);
1698 /* Insert and register the reduction exit phis. */
1699 for (gphi_iterator gsi = gsi_start_phis (exit_block);
1700 !gsi_end_p (gsi);
1701 gsi_next (&gsi))
1703 gphi *phi = gsi.phi ();
1704 tree res_z = PHI_RESULT (phi);
1706 /* Now that we have a new exit block, duplicate the phi of the old exit
1707 block in the new exit block to preserve loop-closed ssa. */
1708 edge succ_new_exit_block = single_succ_edge (new_exit_block);
1709 edge pred_new_exit_block = single_pred_edge (new_exit_block);
1710 tree res_y = copy_ssa_name (res_z, phi);
1711 gphi *nphi = create_phi_node (res_y, new_exit_block);
1712 tree res_c = PHI_ARG_DEF_FROM_EDGE (phi, succ_new_exit_block);
1713 add_phi_arg (nphi, res_c, pred_new_exit_block, UNKNOWN_LOCATION);
1714 add_phi_arg (phi, res_y, succ_new_exit_block, UNKNOWN_LOCATION);
1716 if (virtual_operand_p (res_z))
1717 continue;
1719 gimple reduc_phi = SSA_NAME_DEF_STMT (res_c);
1720 struct reduction_info *red = reduction_phi (reduction_list, reduc_phi);
1721 if (red != NULL)
1722 red->keep_res = nphi;
1725 /* We're going to cancel the loop at the end of gen_parallel_loop, but until
1726 then we're still using some fields, so only bother about fields that are
1727 still used: header and latch.
1728 The loop has a new header bb, so we update it. The latch bb stays the
1729 same. */
1730 loop->header = new_header;
1732 /* Recalculate dominance info. */
1733 free_dominance_info (CDI_DOMINATORS);
1734 calculate_dominance_info (CDI_DOMINATORS);
1737 /* Tries to moves the exit condition of LOOP to the beginning of its header
1738 without duplication of the loop body. NIT is the number of iterations of the
1739 loop. REDUCTION_LIST describes the reductions in LOOP. Return true if
1740 transformation is successful. */
1742 static bool
1743 try_transform_to_exit_first_loop_alt (struct loop *loop,
1744 reduction_info_table_type *reduction_list,
1745 tree nit)
1747 /* Check whether the latch contains a single statement. */
1748 if (!gimple_seq_nondebug_singleton_p (bb_seq (loop->latch)))
1749 return false;
1751 /* Check whether the latch contains the loop iv increment. */
1752 edge back = single_succ_edge (loop->latch);
1753 edge exit = single_dom_exit (loop);
1754 gcond *cond_stmt = as_a <gcond *> (last_stmt (exit->src));
1755 tree control = gimple_cond_lhs (cond_stmt);
1756 gphi *phi = as_a <gphi *> (SSA_NAME_DEF_STMT (control));
1757 tree inc_res = gimple_phi_arg_def (phi, back->dest_idx);
1758 if (gimple_bb (SSA_NAME_DEF_STMT (inc_res)) != loop->latch)
1759 return false;
1761 /* Check whether there's no code between the loop condition and the latch. */
1762 if (!single_pred_p (loop->latch)
1763 || single_pred (loop->latch) != exit->src)
1764 return false;
1766 tree alt_bound = NULL_TREE;
1767 tree nit_type = TREE_TYPE (nit);
1769 /* Figure out whether nit + 1 overflows. */
1770 if (TREE_CODE (nit) == INTEGER_CST)
1772 if (!tree_int_cst_equal (nit, TYPE_MAXVAL (nit_type)))
1774 alt_bound = fold_build2_loc (UNKNOWN_LOCATION, PLUS_EXPR, nit_type,
1775 nit, build_one_cst (nit_type));
1777 gcc_assert (TREE_CODE (alt_bound) == INTEGER_CST);
1778 transform_to_exit_first_loop_alt (loop, reduction_list, alt_bound);
1779 return true;
1781 else
1783 /* Todo: Figure out if we can trigger this, if it's worth to handle
1784 optimally, and if we can handle it optimally. */
1785 return false;
1789 gcc_assert (TREE_CODE (nit) == SSA_NAME);
1791 /* Variable nit is the loop bound as returned by canonicalize_loop_ivs, for an
1792 iv with base 0 and step 1 that is incremented in the latch, like this:
1794 <bb header>:
1795 # iv_1 = PHI <0 (preheader), iv_2 (latch)>
1797 if (iv_1 < nit)
1798 goto <bb latch>;
1799 else
1800 goto <bb exit>;
1802 <bb latch>:
1803 iv_2 = iv_1 + 1;
1804 goto <bb header>;
1806 The range of iv_1 is [0, nit]. The latch edge is taken for
1807 iv_1 == [0, nit - 1] and the exit edge is taken for iv_1 == nit. So the
1808 number of latch executions is equal to nit.
1810 The function max_loop_iterations gives us the maximum number of latch
1811 executions, so it gives us the maximum value of nit. */
1812 widest_int nit_max;
1813 if (!max_loop_iterations (loop, &nit_max))
1814 return false;
1816 /* Check if nit + 1 overflows. */
1817 widest_int type_max = wi::to_widest (TYPE_MAXVAL (nit_type));
1818 if (!wi::lts_p (nit_max, type_max))
1819 return false;
1821 gimple def = SSA_NAME_DEF_STMT (nit);
1823 /* Try to find nit + 1, in the form of n in an assignment nit = n - 1. */
1824 if (def
1825 && is_gimple_assign (def)
1826 && gimple_assign_rhs_code (def) == PLUS_EXPR)
1828 tree op1 = gimple_assign_rhs1 (def);
1829 tree op2 = gimple_assign_rhs2 (def);
1830 if (integer_minus_onep (op1))
1831 alt_bound = op2;
1832 else if (integer_minus_onep (op2))
1833 alt_bound = op1;
1836 /* If not found, insert nit + 1. */
1837 if (alt_bound == NULL_TREE)
1839 alt_bound = fold_build2 (PLUS_EXPR, nit_type, nit,
1840 build_int_cst_type (nit_type, 1));
1842 gimple_stmt_iterator gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
1844 alt_bound
1845 = force_gimple_operand_gsi (&gsi, alt_bound, true, NULL_TREE, false,
1846 GSI_CONTINUE_LINKING);
1849 transform_to_exit_first_loop_alt (loop, reduction_list, alt_bound);
1850 return true;
1853 /* Moves the exit condition of LOOP to the beginning of its header. NIT is the
1854 number of iterations of the loop. REDUCTION_LIST describes the reductions in
1855 LOOP. */
1857 static void
1858 transform_to_exit_first_loop (struct loop *loop,
1859 reduction_info_table_type *reduction_list,
1860 tree nit)
1862 basic_block *bbs, *nbbs, ex_bb, orig_header;
1863 unsigned n;
1864 bool ok;
1865 edge exit = single_dom_exit (loop), hpred;
1866 tree control, control_name, res, t;
1867 gphi *phi, *nphi;
1868 gassign *stmt;
1869 gcond *cond_stmt, *cond_nit;
1870 tree nit_1;
1872 split_block_after_labels (loop->header);
1873 orig_header = single_succ (loop->header);
1874 hpred = single_succ_edge (loop->header);
1876 cond_stmt = as_a <gcond *> (last_stmt (exit->src));
1877 control = gimple_cond_lhs (cond_stmt);
1878 gcc_assert (gimple_cond_rhs (cond_stmt) == nit);
1880 /* Make sure that we have phi nodes on exit for all loop header phis
1881 (create_parallel_loop requires that). */
1882 for (gphi_iterator gsi = gsi_start_phis (loop->header);
1883 !gsi_end_p (gsi);
1884 gsi_next (&gsi))
1886 phi = gsi.phi ();
1887 res = PHI_RESULT (phi);
1888 t = copy_ssa_name (res, phi);
1889 SET_PHI_RESULT (phi, t);
1890 nphi = create_phi_node (res, orig_header);
1891 add_phi_arg (nphi, t, hpred, UNKNOWN_LOCATION);
1893 if (res == control)
1895 gimple_cond_set_lhs (cond_stmt, t);
1896 update_stmt (cond_stmt);
1897 control = t;
1901 bbs = get_loop_body_in_dom_order (loop);
1903 for (n = 0; bbs[n] != exit->src; n++)
1904 continue;
1905 nbbs = XNEWVEC (basic_block, n);
1906 ok = gimple_duplicate_sese_tail (single_succ_edge (loop->header), exit,
1907 bbs + 1, n, nbbs);
1908 gcc_assert (ok);
1909 free (bbs);
1910 ex_bb = nbbs[0];
1911 free (nbbs);
1913 /* Other than reductions, the only gimple reg that should be copied
1914 out of the loop is the control variable. */
1915 exit = single_dom_exit (loop);
1916 control_name = NULL_TREE;
1917 for (gphi_iterator gsi = gsi_start_phis (ex_bb);
1918 !gsi_end_p (gsi); )
1920 phi = gsi.phi ();
1921 res = PHI_RESULT (phi);
1922 if (virtual_operand_p (res))
1924 gsi_next (&gsi);
1925 continue;
1928 /* Check if it is a part of reduction. If it is,
1929 keep the phi at the reduction's keep_res field. The
1930 PHI_RESULT of this phi is the resulting value of the reduction
1931 variable when exiting the loop. */
1933 if (reduction_list->elements () > 0)
1935 struct reduction_info *red;
1937 tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1938 red = reduction_phi (reduction_list, SSA_NAME_DEF_STMT (val));
1939 if (red)
1941 red->keep_res = phi;
1942 gsi_next (&gsi);
1943 continue;
1946 gcc_assert (control_name == NULL_TREE
1947 && SSA_NAME_VAR (res) == SSA_NAME_VAR (control));
1948 control_name = res;
1949 remove_phi_node (&gsi, false);
1951 gcc_assert (control_name != NULL_TREE);
1953 /* Initialize the control variable to number of iterations
1954 according to the rhs of the exit condition. */
1955 gimple_stmt_iterator gsi = gsi_after_labels (ex_bb);
1956 cond_nit = as_a <gcond *> (last_stmt (exit->src));
1957 nit_1 = gimple_cond_rhs (cond_nit);
1958 nit_1 = force_gimple_operand_gsi (&gsi,
1959 fold_convert (TREE_TYPE (control_name), nit_1),
1960 false, NULL_TREE, false, GSI_SAME_STMT);
1961 stmt = gimple_build_assign (control_name, nit_1);
1962 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
1965 /* Create the parallel constructs for LOOP as described in gen_parallel_loop.
1966 LOOP_FN and DATA are the arguments of GIMPLE_OMP_PARALLEL.
1967 NEW_DATA is the variable that should be initialized from the argument
1968 of LOOP_FN. N_THREADS is the requested number of threads. Returns the
1969 basic block containing GIMPLE_OMP_PARALLEL tree. */
1971 static basic_block
1972 create_parallel_loop (struct loop *loop, tree loop_fn, tree data,
1973 tree new_data, unsigned n_threads, location_t loc)
1975 gimple_stmt_iterator gsi;
1976 basic_block bb, paral_bb, for_bb, ex_bb;
1977 tree t, param;
1978 gomp_parallel *omp_par_stmt;
1979 gimple omp_return_stmt1, omp_return_stmt2;
1980 gimple phi;
1981 gcond *cond_stmt;
1982 gomp_for *for_stmt;
1983 gomp_continue *omp_cont_stmt;
1984 tree cvar, cvar_init, initvar, cvar_next, cvar_base, type;
1985 edge exit, nexit, guard, end, e;
1987 /* Prepare the GIMPLE_OMP_PARALLEL statement. */
1988 bb = loop_preheader_edge (loop)->src;
1989 paral_bb = single_pred (bb);
1990 gsi = gsi_last_bb (paral_bb);
1992 t = build_omp_clause (loc, OMP_CLAUSE_NUM_THREADS);
1993 OMP_CLAUSE_NUM_THREADS_EXPR (t)
1994 = build_int_cst (integer_type_node, n_threads);
1995 omp_par_stmt = gimple_build_omp_parallel (NULL, t, loop_fn, data);
1996 gimple_set_location (omp_par_stmt, loc);
1998 gsi_insert_after (&gsi, omp_par_stmt, GSI_NEW_STMT);
2000 /* Initialize NEW_DATA. */
2001 if (data)
2003 gassign *assign_stmt;
2005 gsi = gsi_after_labels (bb);
2007 param = make_ssa_name (DECL_ARGUMENTS (loop_fn));
2008 assign_stmt = gimple_build_assign (param, build_fold_addr_expr (data));
2009 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
2011 assign_stmt = gimple_build_assign (new_data,
2012 fold_convert (TREE_TYPE (new_data), param));
2013 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
2016 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_PARALLEL. */
2017 bb = split_loop_exit_edge (single_dom_exit (loop));
2018 gsi = gsi_last_bb (bb);
2019 omp_return_stmt1 = gimple_build_omp_return (false);
2020 gimple_set_location (omp_return_stmt1, loc);
2021 gsi_insert_after (&gsi, omp_return_stmt1, GSI_NEW_STMT);
2023 /* Extract data for GIMPLE_OMP_FOR. */
2024 gcc_assert (loop->header == single_dom_exit (loop)->src);
2025 cond_stmt = as_a <gcond *> (last_stmt (loop->header));
2027 cvar = gimple_cond_lhs (cond_stmt);
2028 cvar_base = SSA_NAME_VAR (cvar);
2029 phi = SSA_NAME_DEF_STMT (cvar);
2030 cvar_init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
2031 initvar = copy_ssa_name (cvar);
2032 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, loop_preheader_edge (loop)),
2033 initvar);
2034 cvar_next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
2036 gsi = gsi_last_nondebug_bb (loop->latch);
2037 gcc_assert (gsi_stmt (gsi) == SSA_NAME_DEF_STMT (cvar_next));
2038 gsi_remove (&gsi, true);
2040 /* Prepare cfg. */
2041 for_bb = split_edge (loop_preheader_edge (loop));
2042 ex_bb = split_loop_exit_edge (single_dom_exit (loop));
2043 extract_true_false_edges_from_block (loop->header, &nexit, &exit);
2044 gcc_assert (exit == single_dom_exit (loop));
2046 guard = make_edge (for_bb, ex_bb, 0);
2047 single_succ_edge (loop->latch)->flags = 0;
2048 end = make_edge (loop->latch, ex_bb, EDGE_FALLTHRU);
2049 for (gphi_iterator gpi = gsi_start_phis (ex_bb);
2050 !gsi_end_p (gpi); gsi_next (&gpi))
2052 source_location locus;
2053 gphi *phi = gpi.phi ();
2054 tree def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2055 gimple def_stmt = SSA_NAME_DEF_STMT (def);
2057 /* If the exit phi is not connected to a header phi in the same loop, this
2058 value is not modified in the loop, and we're done with this phi. */
2059 if (!(gimple_code (def_stmt) == GIMPLE_PHI
2060 && gimple_bb (def_stmt) == loop->header))
2061 continue;
2063 gphi *stmt = as_a <gphi *> (def_stmt);
2064 def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_preheader_edge (loop));
2065 locus = gimple_phi_arg_location_from_edge (stmt,
2066 loop_preheader_edge (loop));
2067 add_phi_arg (phi, def, guard, locus);
2069 def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (loop));
2070 locus = gimple_phi_arg_location_from_edge (stmt, loop_latch_edge (loop));
2071 add_phi_arg (phi, def, end, locus);
2073 e = redirect_edge_and_branch (exit, nexit->dest);
2074 PENDING_STMT (e) = NULL;
2076 /* Emit GIMPLE_OMP_FOR. */
2077 gimple_cond_set_lhs (cond_stmt, cvar_base);
2078 type = TREE_TYPE (cvar);
2079 t = build_omp_clause (loc, OMP_CLAUSE_SCHEDULE);
2080 OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_STATIC;
2082 for_stmt = gimple_build_omp_for (NULL, GF_OMP_FOR_KIND_FOR, t, 1, NULL);
2083 gimple_set_location (for_stmt, loc);
2084 gimple_omp_for_set_index (for_stmt, 0, initvar);
2085 gimple_omp_for_set_initial (for_stmt, 0, cvar_init);
2086 gimple_omp_for_set_final (for_stmt, 0, gimple_cond_rhs (cond_stmt));
2087 gimple_omp_for_set_cond (for_stmt, 0, gimple_cond_code (cond_stmt));
2088 gimple_omp_for_set_incr (for_stmt, 0, build2 (PLUS_EXPR, type,
2089 cvar_base,
2090 build_int_cst (type, 1)));
2092 gsi = gsi_last_bb (for_bb);
2093 gsi_insert_after (&gsi, for_stmt, GSI_NEW_STMT);
2094 SSA_NAME_DEF_STMT (initvar) = for_stmt;
2096 /* Emit GIMPLE_OMP_CONTINUE. */
2097 gsi = gsi_last_bb (loop->latch);
2098 omp_cont_stmt = gimple_build_omp_continue (cvar_next, cvar);
2099 gimple_set_location (omp_cont_stmt, loc);
2100 gsi_insert_after (&gsi, omp_cont_stmt, GSI_NEW_STMT);
2101 SSA_NAME_DEF_STMT (cvar_next) = omp_cont_stmt;
2103 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_FOR. */
2104 gsi = gsi_last_bb (ex_bb);
2105 omp_return_stmt2 = gimple_build_omp_return (true);
2106 gimple_set_location (omp_return_stmt2, loc);
2107 gsi_insert_after (&gsi, omp_return_stmt2, GSI_NEW_STMT);
2109 /* After the above dom info is hosed. Re-compute it. */
2110 free_dominance_info (CDI_DOMINATORS);
2111 calculate_dominance_info (CDI_DOMINATORS);
2113 return paral_bb;
2116 /* Generates code to execute the iterations of LOOP in N_THREADS
2117 threads in parallel.
2119 NITER describes number of iterations of LOOP.
2120 REDUCTION_LIST describes the reductions existent in the LOOP. */
2122 static void
2123 gen_parallel_loop (struct loop *loop,
2124 reduction_info_table_type *reduction_list,
2125 unsigned n_threads, struct tree_niter_desc *niter)
2127 tree many_iterations_cond, type, nit;
2128 tree arg_struct, new_arg_struct;
2129 gimple_seq stmts;
2130 edge entry, exit;
2131 struct clsn_data clsn_data;
2132 unsigned prob;
2133 location_t loc;
2134 gimple cond_stmt;
2135 unsigned int m_p_thread=2;
2137 /* From
2139 ---------------------------------------------------------------------
2140 loop
2142 IV = phi (INIT, IV + STEP)
2143 BODY1;
2144 if (COND)
2145 break;
2146 BODY2;
2148 ---------------------------------------------------------------------
2150 with # of iterations NITER (possibly with MAY_BE_ZERO assumption),
2151 we generate the following code:
2153 ---------------------------------------------------------------------
2155 if (MAY_BE_ZERO
2156 || NITER < MIN_PER_THREAD * N_THREADS)
2157 goto original;
2159 BODY1;
2160 store all local loop-invariant variables used in body of the loop to DATA.
2161 GIMPLE_OMP_PARALLEL (OMP_CLAUSE_NUM_THREADS (N_THREADS), LOOPFN, DATA);
2162 load the variables from DATA.
2163 GIMPLE_OMP_FOR (IV = INIT; COND; IV += STEP) (OMP_CLAUSE_SCHEDULE (static))
2164 BODY2;
2165 BODY1;
2166 GIMPLE_OMP_CONTINUE;
2167 GIMPLE_OMP_RETURN -- GIMPLE_OMP_FOR
2168 GIMPLE_OMP_RETURN -- GIMPLE_OMP_PARALLEL
2169 goto end;
2171 original:
2172 loop
2174 IV = phi (INIT, IV + STEP)
2175 BODY1;
2176 if (COND)
2177 break;
2178 BODY2;
2181 end:
2185 /* Create two versions of the loop -- in the old one, we know that the
2186 number of iterations is large enough, and we will transform it into the
2187 loop that will be split to loop_fn, the new one will be used for the
2188 remaining iterations. */
2190 /* We should compute a better number-of-iterations value for outer loops.
2191 That is, if we have
2193 for (i = 0; i < n; ++i)
2194 for (j = 0; j < m; ++j)
2197 we should compute nit = n * m, not nit = n.
2198 Also may_be_zero handling would need to be adjusted. */
2200 type = TREE_TYPE (niter->niter);
2201 nit = force_gimple_operand (unshare_expr (niter->niter), &stmts, true,
2202 NULL_TREE);
2203 if (stmts)
2204 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2206 if (loop->inner)
2207 m_p_thread=2;
2208 else
2209 m_p_thread=MIN_PER_THREAD;
2211 many_iterations_cond =
2212 fold_build2 (GE_EXPR, boolean_type_node,
2213 nit, build_int_cst (type, m_p_thread * n_threads));
2215 many_iterations_cond
2216 = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2217 invert_truthvalue (unshare_expr (niter->may_be_zero)),
2218 many_iterations_cond);
2219 many_iterations_cond
2220 = force_gimple_operand (many_iterations_cond, &stmts, false, NULL_TREE);
2221 if (stmts)
2222 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2223 if (!is_gimple_condexpr (many_iterations_cond))
2225 many_iterations_cond
2226 = force_gimple_operand (many_iterations_cond, &stmts,
2227 true, NULL_TREE);
2228 if (stmts)
2229 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2232 initialize_original_copy_tables ();
2234 /* We assume that the loop usually iterates a lot. */
2235 prob = 4 * REG_BR_PROB_BASE / 5;
2236 loop_version (loop, many_iterations_cond, NULL,
2237 prob, prob, REG_BR_PROB_BASE - prob, true);
2238 update_ssa (TODO_update_ssa);
2239 free_original_copy_tables ();
2241 /* Base all the induction variables in LOOP on a single control one. */
2242 canonicalize_loop_ivs (loop, &nit, true);
2244 /* Ensure that the exit condition is the first statement in the loop.
2245 The common case is that latch of the loop is empty (apart from the
2246 increment) and immediately follows the loop exit test. Attempt to move the
2247 entry of the loop directly before the exit check and increase the number of
2248 iterations of the loop by one. */
2249 if (try_transform_to_exit_first_loop_alt (loop, reduction_list, nit))
2251 if (dump_file
2252 && (dump_flags & TDF_DETAILS))
2253 fprintf (dump_file,
2254 "alternative exit-first loop transform succeeded"
2255 " for loop %d\n", loop->num);
2257 else
2259 /* Fall back on the method that handles more cases, but duplicates the
2260 loop body: move the exit condition of LOOP to the beginning of its
2261 header, and duplicate the part of the last iteration that gets disabled
2262 to the exit of the loop. */
2263 transform_to_exit_first_loop (loop, reduction_list, nit);
2266 /* Generate initializations for reductions. */
2267 if (reduction_list->elements () > 0)
2268 reduction_list->traverse <struct loop *, initialize_reductions> (loop);
2270 /* Eliminate the references to local variables from the loop. */
2271 gcc_assert (single_exit (loop));
2272 entry = loop_preheader_edge (loop);
2273 exit = single_dom_exit (loop);
2275 eliminate_local_variables (entry, exit);
2276 /* In the old loop, move all variables non-local to the loop to a structure
2277 and back, and create separate decls for the variables used in loop. */
2278 separate_decls_in_region (entry, exit, reduction_list, &arg_struct,
2279 &new_arg_struct, &clsn_data);
2281 /* Create the parallel constructs. */
2282 loc = UNKNOWN_LOCATION;
2283 cond_stmt = last_stmt (loop->header);
2284 if (cond_stmt)
2285 loc = gimple_location (cond_stmt);
2286 create_parallel_loop (loop, create_loop_fn (loc), arg_struct,
2287 new_arg_struct, n_threads, loc);
2288 if (reduction_list->elements () > 0)
2289 create_call_for_reduction (loop, reduction_list, &clsn_data);
2291 scev_reset ();
2293 /* Cancel the loop (it is simpler to do it here rather than to teach the
2294 expander to do it). */
2295 cancel_loop_tree (loop);
2297 /* Free loop bound estimations that could contain references to
2298 removed statements. */
2299 FOR_EACH_LOOP (loop, 0)
2300 free_numbers_of_iterations_estimates_loop (loop);
2303 /* Returns true when LOOP contains vector phi nodes. */
2305 static bool
2306 loop_has_vector_phi_nodes (struct loop *loop ATTRIBUTE_UNUSED)
2308 unsigned i;
2309 basic_block *bbs = get_loop_body_in_dom_order (loop);
2310 gphi_iterator gsi;
2311 bool res = true;
2313 for (i = 0; i < loop->num_nodes; i++)
2314 for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
2315 if (TREE_CODE (TREE_TYPE (PHI_RESULT (gsi.phi ()))) == VECTOR_TYPE)
2316 goto end;
2318 res = false;
2319 end:
2320 free (bbs);
2321 return res;
2324 /* Create a reduction_info struct, initialize it with REDUC_STMT
2325 and PHI, insert it to the REDUCTION_LIST. */
2327 static void
2328 build_new_reduction (reduction_info_table_type *reduction_list,
2329 gimple reduc_stmt, gphi *phi)
2331 reduction_info **slot;
2332 struct reduction_info *new_reduction;
2334 gcc_assert (reduc_stmt);
2336 if (dump_file && (dump_flags & TDF_DETAILS))
2338 fprintf (dump_file,
2339 "Detected reduction. reduction stmt is: \n");
2340 print_gimple_stmt (dump_file, reduc_stmt, 0, 0);
2341 fprintf (dump_file, "\n");
2344 new_reduction = XCNEW (struct reduction_info);
2346 new_reduction->reduc_stmt = reduc_stmt;
2347 new_reduction->reduc_phi = phi;
2348 new_reduction->reduc_version = SSA_NAME_VERSION (gimple_phi_result (phi));
2349 new_reduction->reduction_code = gimple_assign_rhs_code (reduc_stmt);
2350 slot = reduction_list->find_slot (new_reduction, INSERT);
2351 *slot = new_reduction;
2354 /* Callback for htab_traverse. Sets gimple_uid of reduc_phi stmts. */
2357 set_reduc_phi_uids (reduction_info **slot, void *data ATTRIBUTE_UNUSED)
2359 struct reduction_info *const red = *slot;
2360 gimple_set_uid (red->reduc_phi, red->reduc_version);
2361 return 1;
2364 /* Detect all reductions in the LOOP, insert them into REDUCTION_LIST. */
2366 static void
2367 gather_scalar_reductions (loop_p loop, reduction_info_table_type *reduction_list)
2369 gphi_iterator gsi;
2370 loop_vec_info simple_loop_info;
2372 simple_loop_info = vect_analyze_loop_form (loop);
2373 if (simple_loop_info == NULL)
2374 return;
2376 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
2378 gphi *phi = gsi.phi ();
2379 affine_iv iv;
2380 tree res = PHI_RESULT (phi);
2381 bool double_reduc;
2383 if (virtual_operand_p (res))
2384 continue;
2386 if (simple_iv (loop, loop, res, &iv, true))
2387 continue;
2389 gimple reduc_stmt
2390 = vect_force_simple_reduction (simple_loop_info, phi, true,
2391 &double_reduc, true);
2392 if (!reduc_stmt || double_reduc)
2393 continue;
2395 build_new_reduction (reduction_list, reduc_stmt, phi);
2397 destroy_loop_vec_info (simple_loop_info, true);
2399 /* As gimple_uid is used by the vectorizer in between vect_analyze_loop_form
2400 and destroy_loop_vec_info, we can set gimple_uid of reduc_phi stmts
2401 only now. */
2402 reduction_list->traverse <void *, set_reduc_phi_uids> (NULL);
2405 /* Try to initialize NITER for code generation part. */
2407 static bool
2408 try_get_loop_niter (loop_p loop, struct tree_niter_desc *niter)
2410 edge exit = single_dom_exit (loop);
2412 gcc_assert (exit);
2414 /* We need to know # of iterations, and there should be no uses of values
2415 defined inside loop outside of it, unless the values are invariants of
2416 the loop. */
2417 if (!number_of_iterations_exit (loop, exit, niter, false))
2419 if (dump_file && (dump_flags & TDF_DETAILS))
2420 fprintf (dump_file, " FAILED: number of iterations not known\n");
2421 return false;
2424 return true;
2427 /* Try to initialize REDUCTION_LIST for code generation part.
2428 REDUCTION_LIST describes the reductions. */
2430 static bool
2431 try_create_reduction_list (loop_p loop,
2432 reduction_info_table_type *reduction_list)
2434 edge exit = single_dom_exit (loop);
2435 gphi_iterator gsi;
2437 gcc_assert (exit);
2439 gather_scalar_reductions (loop, reduction_list);
2442 for (gsi = gsi_start_phis (exit->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2444 gphi *phi = gsi.phi ();
2445 struct reduction_info *red;
2446 imm_use_iterator imm_iter;
2447 use_operand_p use_p;
2448 gimple reduc_phi;
2449 tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2451 if (!virtual_operand_p (val))
2453 if (dump_file && (dump_flags & TDF_DETAILS))
2455 fprintf (dump_file, "phi is ");
2456 print_gimple_stmt (dump_file, phi, 0, 0);
2457 fprintf (dump_file, "arg of phi to exit: value ");
2458 print_generic_expr (dump_file, val, 0);
2459 fprintf (dump_file, " used outside loop\n");
2460 fprintf (dump_file,
2461 " checking if it a part of reduction pattern: \n");
2463 if (reduction_list->elements () == 0)
2465 if (dump_file && (dump_flags & TDF_DETAILS))
2466 fprintf (dump_file,
2467 " FAILED: it is not a part of reduction.\n");
2468 return false;
2470 reduc_phi = NULL;
2471 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, val)
2473 if (!gimple_debug_bind_p (USE_STMT (use_p))
2474 && flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
2476 reduc_phi = USE_STMT (use_p);
2477 break;
2480 red = reduction_phi (reduction_list, reduc_phi);
2481 if (red == NULL)
2483 if (dump_file && (dump_flags & TDF_DETAILS))
2484 fprintf (dump_file,
2485 " FAILED: it is not a part of reduction.\n");
2486 return false;
2488 if (dump_file && (dump_flags & TDF_DETAILS))
2490 fprintf (dump_file, "reduction phi is ");
2491 print_gimple_stmt (dump_file, red->reduc_phi, 0, 0);
2492 fprintf (dump_file, "reduction stmt is ");
2493 print_gimple_stmt (dump_file, red->reduc_stmt, 0, 0);
2498 /* The iterations of the loop may communicate only through bivs whose
2499 iteration space can be distributed efficiently. */
2500 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
2502 gphi *phi = gsi.phi ();
2503 tree def = PHI_RESULT (phi);
2504 affine_iv iv;
2506 if (!virtual_operand_p (def) && !simple_iv (loop, loop, def, &iv, true))
2508 struct reduction_info *red;
2510 red = reduction_phi (reduction_list, phi);
2511 if (red == NULL)
2513 if (dump_file && (dump_flags & TDF_DETAILS))
2514 fprintf (dump_file,
2515 " FAILED: scalar dependency between iterations\n");
2516 return false;
2522 return true;
2525 /* Detect parallel loops and generate parallel code using libgomp
2526 primitives. Returns true if some loop was parallelized, false
2527 otherwise. */
2529 static bool
2530 parallelize_loops (void)
2532 unsigned n_threads = flag_tree_parallelize_loops;
2533 bool changed = false;
2534 struct loop *loop;
2535 struct tree_niter_desc niter_desc;
2536 struct obstack parloop_obstack;
2537 HOST_WIDE_INT estimated;
2538 source_location loop_loc;
2540 /* Do not parallelize loops in the functions created by parallelization. */
2541 if (parallelized_function_p (cfun->decl))
2542 return false;
2543 if (cfun->has_nonlocal_label)
2544 return false;
2546 gcc_obstack_init (&parloop_obstack);
2547 reduction_info_table_type reduction_list (10);
2548 init_stmt_vec_info_vec ();
2550 FOR_EACH_LOOP (loop, 0)
2552 reduction_list.empty ();
2553 if (dump_file && (dump_flags & TDF_DETAILS))
2555 fprintf (dump_file, "Trying loop %d as candidate\n",loop->num);
2556 if (loop->inner)
2557 fprintf (dump_file, "loop %d is not innermost\n",loop->num);
2558 else
2559 fprintf (dump_file, "loop %d is innermost\n",loop->num);
2562 /* If we use autopar in graphite pass, we use its marked dependency
2563 checking results. */
2564 if (flag_loop_parallelize_all && !loop->can_be_parallel)
2566 if (dump_file && (dump_flags & TDF_DETAILS))
2567 fprintf (dump_file, "loop is not parallel according to graphite\n");
2568 continue;
2571 if (!single_dom_exit (loop))
2574 if (dump_file && (dump_flags & TDF_DETAILS))
2575 fprintf (dump_file, "loop is !single_dom_exit\n");
2577 continue;
2580 if (/* And of course, the loop must be parallelizable. */
2581 !can_duplicate_loop_p (loop)
2582 || loop_has_blocks_with_irreducible_flag (loop)
2583 || (loop_preheader_edge (loop)->src->flags & BB_IRREDUCIBLE_LOOP)
2584 /* FIXME: the check for vector phi nodes could be removed. */
2585 || loop_has_vector_phi_nodes (loop))
2586 continue;
2588 estimated = estimated_stmt_executions_int (loop);
2589 if (estimated == -1)
2590 estimated = max_stmt_executions_int (loop);
2591 /* FIXME: Bypass this check as graphite doesn't update the
2592 count and frequency correctly now. */
2593 if (!flag_loop_parallelize_all
2594 && ((estimated != -1
2595 && estimated <= (HOST_WIDE_INT) n_threads * MIN_PER_THREAD)
2596 /* Do not bother with loops in cold areas. */
2597 || optimize_loop_nest_for_size_p (loop)))
2598 continue;
2600 if (!try_get_loop_niter (loop, &niter_desc))
2601 continue;
2603 if (!try_create_reduction_list (loop, &reduction_list))
2604 continue;
2606 if (!flag_loop_parallelize_all
2607 && !loop_parallel_p (loop, &parloop_obstack))
2608 continue;
2610 changed = true;
2611 if (dump_file && (dump_flags & TDF_DETAILS))
2613 if (loop->inner)
2614 fprintf (dump_file, "parallelizing outer loop %d\n",loop->header->index);
2615 else
2616 fprintf (dump_file, "parallelizing inner loop %d\n",loop->header->index);
2617 loop_loc = find_loop_location (loop);
2618 if (loop_loc != UNKNOWN_LOCATION)
2619 fprintf (dump_file, "\nloop at %s:%d: ",
2620 LOCATION_FILE (loop_loc), LOCATION_LINE (loop_loc));
2622 gen_parallel_loop (loop, &reduction_list,
2623 n_threads, &niter_desc);
2626 free_stmt_vec_info_vec ();
2627 obstack_free (&parloop_obstack, NULL);
2629 /* Parallelization will cause new function calls to be inserted through
2630 which local variables will escape. Reset the points-to solution
2631 for ESCAPED. */
2632 if (changed)
2633 pt_solution_reset (&cfun->gimple_df->escaped);
2635 return changed;
2638 /* Parallelization. */
2640 namespace {
2642 const pass_data pass_data_parallelize_loops =
2644 GIMPLE_PASS, /* type */
2645 "parloops", /* name */
2646 OPTGROUP_LOOP, /* optinfo_flags */
2647 TV_TREE_PARALLELIZE_LOOPS, /* tv_id */
2648 ( PROP_cfg | PROP_ssa ), /* properties_required */
2649 0, /* properties_provided */
2650 0, /* properties_destroyed */
2651 0, /* todo_flags_start */
2652 0, /* todo_flags_finish */
2655 class pass_parallelize_loops : public gimple_opt_pass
2657 public:
2658 pass_parallelize_loops (gcc::context *ctxt)
2659 : gimple_opt_pass (pass_data_parallelize_loops, ctxt)
2662 /* opt_pass methods: */
2663 virtual bool gate (function *) { return flag_tree_parallelize_loops > 1; }
2664 virtual unsigned int execute (function *);
2666 }; // class pass_parallelize_loops
2668 unsigned
2669 pass_parallelize_loops::execute (function *fun)
2671 if (number_of_loops (fun) <= 1)
2672 return 0;
2674 if (parallelize_loops ())
2676 fun->curr_properties &= ~(PROP_gimple_eomp);
2677 return TODO_update_ssa;
2680 return 0;
2683 } // anon namespace
2685 gimple_opt_pass *
2686 make_pass_parallelize_loops (gcc::context *ctxt)
2688 return new pass_parallelize_loops (ctxt);