This patch syncs zlib.m4 with binutils-gdb and uses AM_ZLIB from zlib.m4
[official-gcc.git] / gcc / tree-parloops.c
blob036677bb123f59a578df92d4d394c378ca95d213
1 /* Loop autoparallelization.
2 Copyright (C) 2006-2015 Free Software Foundation, Inc.
3 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
4 Zdenek Dvorak <dvorakz@suse.cz> and Razya Ladelsky <razya@il.ibm.com>.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "alias.h"
26 #include "backend.h"
27 #include "cfghooks.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "hard-reg-set.h"
31 #include "ssa.h"
32 #include "options.h"
33 #include "fold-const.h"
34 #include "internal-fn.h"
35 #include "gimplify.h"
36 #include "gimple-iterator.h"
37 #include "gimplify-me.h"
38 #include "gimple-walk.h"
39 #include "stor-layout.h"
40 #include "tree-nested.h"
41 #include "tree-cfg.h"
42 #include "tree-ssa-loop-ivopts.h"
43 #include "tree-ssa-loop-manip.h"
44 #include "tree-ssa-loop-niter.h"
45 #include "tree-ssa-loop.h"
46 #include "tree-into-ssa.h"
47 #include "cfgloop.h"
48 #include "tree-data-ref.h"
49 #include "tree-scalar-evolution.h"
50 #include "gimple-pretty-print.h"
51 #include "tree-pass.h"
52 #include "langhooks.h"
53 #include "tree-vectorizer.h"
54 #include "tree-hasher.h"
55 #include "tree-parloops.h"
56 #include "omp-low.h"
57 #include "tree-nested.h"
58 #include "cgraph.h"
59 #include "tree-ssa.h"
61 /* This pass tries to distribute iterations of loops into several threads.
62 The implementation is straightforward -- for each loop we test whether its
63 iterations are independent, and if it is the case (and some additional
64 conditions regarding profitability and correctness are satisfied), we
65 add GIMPLE_OMP_PARALLEL and GIMPLE_OMP_FOR codes and let omp expansion
66 machinery do its job.
68 The most of the complexity is in bringing the code into shape expected
69 by the omp expanders:
70 -- for GIMPLE_OMP_FOR, ensuring that the loop has only one induction
71 variable and that the exit test is at the start of the loop body
72 -- for GIMPLE_OMP_PARALLEL, replacing the references to local addressable
73 variables by accesses through pointers, and breaking up ssa chains
74 by storing the values incoming to the parallelized loop to a structure
75 passed to the new function as an argument (something similar is done
76 in omp gimplification, unfortunately only a small part of the code
77 can be shared).
79 TODO:
80 -- if there are several parallelizable loops in a function, it may be
81 possible to generate the threads just once (using synchronization to
82 ensure that cross-loop dependences are obeyed).
83 -- handling of common reduction patterns for outer loops.
85 More info can also be found at http://gcc.gnu.org/wiki/AutoParInGCC */
87 Reduction handling:
88 currently we use vect_force_simple_reduction() to detect reduction patterns.
89 The code transformation will be introduced by an example.
92 parloop
94 int sum=1;
96 for (i = 0; i < N; i++)
98 x[i] = i + 3;
99 sum+=x[i];
103 gimple-like code:
104 header_bb:
106 # sum_29 = PHI <sum_11(5), 1(3)>
107 # i_28 = PHI <i_12(5), 0(3)>
108 D.1795_8 = i_28 + 3;
109 x[i_28] = D.1795_8;
110 sum_11 = D.1795_8 + sum_29;
111 i_12 = i_28 + 1;
112 if (N_6(D) > i_12)
113 goto header_bb;
116 exit_bb:
118 # sum_21 = PHI <sum_11(4)>
119 printf (&"%d"[0], sum_21);
122 after reduction transformation (only relevant parts):
124 parloop
127 ....
130 # Storing the initial value given by the user. #
132 .paral_data_store.32.sum.27 = 1;
134 #pragma omp parallel num_threads(4)
136 #pragma omp for schedule(static)
138 # The neutral element corresponding to the particular
139 reduction's operation, e.g. 0 for PLUS_EXPR,
140 1 for MULT_EXPR, etc. replaces the user's initial value. #
142 # sum.27_29 = PHI <sum.27_11, 0>
144 sum.27_11 = D.1827_8 + sum.27_29;
146 GIMPLE_OMP_CONTINUE
148 # Adding this reduction phi is done at create_phi_for_local_result() #
149 # sum.27_56 = PHI <sum.27_11, 0>
150 GIMPLE_OMP_RETURN
152 # Creating the atomic operation is done at
153 create_call_for_reduction_1() #
155 #pragma omp atomic_load
156 D.1839_59 = *&.paral_data_load.33_51->reduction.23;
157 D.1840_60 = sum.27_56 + D.1839_59;
158 #pragma omp atomic_store (D.1840_60);
160 GIMPLE_OMP_RETURN
162 # collecting the result after the join of the threads is done at
163 create_loads_for_reductions().
164 The value computed by the threads is loaded from the
165 shared struct. #
168 .paral_data_load.33_52 = &.paral_data_store.32;
169 sum_37 = .paral_data_load.33_52->sum.27;
170 sum_43 = D.1795_41 + sum_37;
172 exit bb:
173 # sum_21 = PHI <sum_43, sum_26>
174 printf (&"%d"[0], sum_21);
182 /* Minimal number of iterations of a loop that should be executed in each
183 thread. */
184 #define MIN_PER_THREAD 100
186 /* Element of the hashtable, representing a
187 reduction in the current loop. */
188 struct reduction_info
190 gimple reduc_stmt; /* reduction statement. */
191 gimple reduc_phi; /* The phi node defining the reduction. */
192 enum tree_code reduction_code;/* code for the reduction operation. */
193 unsigned reduc_version; /* SSA_NAME_VERSION of original reduc_phi
194 result. */
195 gphi *keep_res; /* The PHI_RESULT of this phi is the resulting value
196 of the reduction variable when existing the loop. */
197 tree initial_value; /* The initial value of the reduction var before entering the loop. */
198 tree field; /* the name of the field in the parloop data structure intended for reduction. */
199 tree init; /* reduction initialization value. */
200 gphi *new_phi; /* (helper field) Newly created phi node whose result
201 will be passed to the atomic operation. Represents
202 the local result each thread computed for the reduction
203 operation. */
206 /* Reduction info hashtable helpers. */
208 struct reduction_hasher : free_ptr_hash <reduction_info>
210 static inline hashval_t hash (const reduction_info *);
211 static inline bool equal (const reduction_info *, const reduction_info *);
214 /* Equality and hash functions for hashtab code. */
216 inline bool
217 reduction_hasher::equal (const reduction_info *a, const reduction_info *b)
219 return (a->reduc_phi == b->reduc_phi);
222 inline hashval_t
223 reduction_hasher::hash (const reduction_info *a)
225 return a->reduc_version;
228 typedef hash_table<reduction_hasher> reduction_info_table_type;
231 static struct reduction_info *
232 reduction_phi (reduction_info_table_type *reduction_list, gimple phi)
234 struct reduction_info tmpred, *red;
236 if (reduction_list->elements () == 0 || phi == NULL)
237 return NULL;
239 tmpred.reduc_phi = phi;
240 tmpred.reduc_version = gimple_uid (phi);
241 red = reduction_list->find (&tmpred);
243 return red;
246 /* Element of hashtable of names to copy. */
248 struct name_to_copy_elt
250 unsigned version; /* The version of the name to copy. */
251 tree new_name; /* The new name used in the copy. */
252 tree field; /* The field of the structure used to pass the
253 value. */
256 /* Name copies hashtable helpers. */
258 struct name_to_copy_hasher : free_ptr_hash <name_to_copy_elt>
260 static inline hashval_t hash (const name_to_copy_elt *);
261 static inline bool equal (const name_to_copy_elt *, const name_to_copy_elt *);
264 /* Equality and hash functions for hashtab code. */
266 inline bool
267 name_to_copy_hasher::equal (const name_to_copy_elt *a, const name_to_copy_elt *b)
269 return a->version == b->version;
272 inline hashval_t
273 name_to_copy_hasher::hash (const name_to_copy_elt *a)
275 return (hashval_t) a->version;
278 typedef hash_table<name_to_copy_hasher> name_to_copy_table_type;
280 /* A transformation matrix, which is a self-contained ROWSIZE x COLSIZE
281 matrix. Rather than use floats, we simply keep a single DENOMINATOR that
282 represents the denominator for every element in the matrix. */
283 typedef struct lambda_trans_matrix_s
285 lambda_matrix matrix;
286 int rowsize;
287 int colsize;
288 int denominator;
289 } *lambda_trans_matrix;
290 #define LTM_MATRIX(T) ((T)->matrix)
291 #define LTM_ROWSIZE(T) ((T)->rowsize)
292 #define LTM_COLSIZE(T) ((T)->colsize)
293 #define LTM_DENOMINATOR(T) ((T)->denominator)
295 /* Allocate a new transformation matrix. */
297 static lambda_trans_matrix
298 lambda_trans_matrix_new (int colsize, int rowsize,
299 struct obstack * lambda_obstack)
301 lambda_trans_matrix ret;
303 ret = (lambda_trans_matrix)
304 obstack_alloc (lambda_obstack, sizeof (struct lambda_trans_matrix_s));
305 LTM_MATRIX (ret) = lambda_matrix_new (rowsize, colsize, lambda_obstack);
306 LTM_ROWSIZE (ret) = rowsize;
307 LTM_COLSIZE (ret) = colsize;
308 LTM_DENOMINATOR (ret) = 1;
309 return ret;
312 /* Multiply a vector VEC by a matrix MAT.
313 MAT is an M*N matrix, and VEC is a vector with length N. The result
314 is stored in DEST which must be a vector of length M. */
316 static void
317 lambda_matrix_vector_mult (lambda_matrix matrix, int m, int n,
318 lambda_vector vec, lambda_vector dest)
320 int i, j;
322 lambda_vector_clear (dest, m);
323 for (i = 0; i < m; i++)
324 for (j = 0; j < n; j++)
325 dest[i] += matrix[i][j] * vec[j];
328 /* Return true if TRANS is a legal transformation matrix that respects
329 the dependence vectors in DISTS and DIRS. The conservative answer
330 is false.
332 "Wolfe proves that a unimodular transformation represented by the
333 matrix T is legal when applied to a loop nest with a set of
334 lexicographically non-negative distance vectors RDG if and only if
335 for each vector d in RDG, (T.d >= 0) is lexicographically positive.
336 i.e.: if and only if it transforms the lexicographically positive
337 distance vectors to lexicographically positive vectors. Note that
338 a unimodular matrix must transform the zero vector (and only it) to
339 the zero vector." S.Muchnick. */
341 static bool
342 lambda_transform_legal_p (lambda_trans_matrix trans,
343 int nb_loops,
344 vec<ddr_p> dependence_relations)
346 unsigned int i, j;
347 lambda_vector distres;
348 struct data_dependence_relation *ddr;
350 gcc_assert (LTM_COLSIZE (trans) == nb_loops
351 && LTM_ROWSIZE (trans) == nb_loops);
353 /* When there are no dependences, the transformation is correct. */
354 if (dependence_relations.length () == 0)
355 return true;
357 ddr = dependence_relations[0];
358 if (ddr == NULL)
359 return true;
361 /* When there is an unknown relation in the dependence_relations, we
362 know that it is no worth looking at this loop nest: give up. */
363 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
364 return false;
366 distres = lambda_vector_new (nb_loops);
368 /* For each distance vector in the dependence graph. */
369 FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
371 /* Don't care about relations for which we know that there is no
372 dependence, nor about read-read (aka. output-dependences):
373 these data accesses can happen in any order. */
374 if (DDR_ARE_DEPENDENT (ddr) == chrec_known
375 || (DR_IS_READ (DDR_A (ddr)) && DR_IS_READ (DDR_B (ddr))))
376 continue;
378 /* Conservatively answer: "this transformation is not valid". */
379 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
380 return false;
382 /* If the dependence could not be captured by a distance vector,
383 conservatively answer that the transform is not valid. */
384 if (DDR_NUM_DIST_VECTS (ddr) == 0)
385 return false;
387 /* Compute trans.dist_vect */
388 for (j = 0; j < DDR_NUM_DIST_VECTS (ddr); j++)
390 lambda_matrix_vector_mult (LTM_MATRIX (trans), nb_loops, nb_loops,
391 DDR_DIST_VECT (ddr, j), distres);
393 if (!lambda_vector_lexico_pos (distres, nb_loops))
394 return false;
397 return true;
400 /* Data dependency analysis. Returns true if the iterations of LOOP
401 are independent on each other (that is, if we can execute them
402 in parallel). */
404 static bool
405 loop_parallel_p (struct loop *loop, struct obstack * parloop_obstack)
407 vec<ddr_p> dependence_relations;
408 vec<data_reference_p> datarefs;
409 lambda_trans_matrix trans;
410 bool ret = false;
412 if (dump_file && (dump_flags & TDF_DETAILS))
414 fprintf (dump_file, "Considering loop %d\n", loop->num);
415 if (!loop->inner)
416 fprintf (dump_file, "loop is innermost\n");
417 else
418 fprintf (dump_file, "loop NOT innermost\n");
421 /* Check for problems with dependences. If the loop can be reversed,
422 the iterations are independent. */
423 auto_vec<loop_p, 3> loop_nest;
424 datarefs.create (10);
425 dependence_relations.create (100);
426 if (! compute_data_dependences_for_loop (loop, true, &loop_nest, &datarefs,
427 &dependence_relations))
429 if (dump_file && (dump_flags & TDF_DETAILS))
430 fprintf (dump_file, " FAILED: cannot analyze data dependencies\n");
431 ret = false;
432 goto end;
434 if (dump_file && (dump_flags & TDF_DETAILS))
435 dump_data_dependence_relations (dump_file, dependence_relations);
437 trans = lambda_trans_matrix_new (1, 1, parloop_obstack);
438 LTM_MATRIX (trans)[0][0] = -1;
440 if (lambda_transform_legal_p (trans, 1, dependence_relations))
442 ret = true;
443 if (dump_file && (dump_flags & TDF_DETAILS))
444 fprintf (dump_file, " SUCCESS: may be parallelized\n");
446 else if (dump_file && (dump_flags & TDF_DETAILS))
447 fprintf (dump_file,
448 " FAILED: data dependencies exist across iterations\n");
450 end:
451 free_dependence_relations (dependence_relations);
452 free_data_refs (datarefs);
454 return ret;
457 /* Return true when LOOP contains basic blocks marked with the
458 BB_IRREDUCIBLE_LOOP flag. */
460 static inline bool
461 loop_has_blocks_with_irreducible_flag (struct loop *loop)
463 unsigned i;
464 basic_block *bbs = get_loop_body_in_dom_order (loop);
465 bool res = true;
467 for (i = 0; i < loop->num_nodes; i++)
468 if (bbs[i]->flags & BB_IRREDUCIBLE_LOOP)
469 goto end;
471 res = false;
472 end:
473 free (bbs);
474 return res;
477 /* Assigns the address of OBJ in TYPE to an ssa name, and returns this name.
478 The assignment statement is placed on edge ENTRY. DECL_ADDRESS maps decls
479 to their addresses that can be reused. The address of OBJ is known to
480 be invariant in the whole function. Other needed statements are placed
481 right before GSI. */
483 static tree
484 take_address_of (tree obj, tree type, edge entry,
485 int_tree_htab_type *decl_address, gimple_stmt_iterator *gsi)
487 int uid;
488 tree *var_p, name, addr;
489 gassign *stmt;
490 gimple_seq stmts;
492 /* Since the address of OBJ is invariant, the trees may be shared.
493 Avoid rewriting unrelated parts of the code. */
494 obj = unshare_expr (obj);
495 for (var_p = &obj;
496 handled_component_p (*var_p);
497 var_p = &TREE_OPERAND (*var_p, 0))
498 continue;
500 /* Canonicalize the access to base on a MEM_REF. */
501 if (DECL_P (*var_p))
502 *var_p = build_simple_mem_ref (build_fold_addr_expr (*var_p));
504 /* Assign a canonical SSA name to the address of the base decl used
505 in the address and share it for all accesses and addresses based
506 on it. */
507 uid = DECL_UID (TREE_OPERAND (TREE_OPERAND (*var_p, 0), 0));
508 int_tree_map elt;
509 elt.uid = uid;
510 int_tree_map *slot = decl_address->find_slot (elt, INSERT);
511 if (!slot->to)
513 if (gsi == NULL)
514 return NULL;
515 addr = TREE_OPERAND (*var_p, 0);
516 const char *obj_name
517 = get_name (TREE_OPERAND (TREE_OPERAND (*var_p, 0), 0));
518 if (obj_name)
519 name = make_temp_ssa_name (TREE_TYPE (addr), NULL, obj_name);
520 else
521 name = make_ssa_name (TREE_TYPE (addr));
522 stmt = gimple_build_assign (name, addr);
523 gsi_insert_on_edge_immediate (entry, stmt);
525 slot->uid = uid;
526 slot->to = name;
528 else
529 name = slot->to;
531 /* Express the address in terms of the canonical SSA name. */
532 TREE_OPERAND (*var_p, 0) = name;
533 if (gsi == NULL)
534 return build_fold_addr_expr_with_type (obj, type);
536 name = force_gimple_operand (build_addr (obj, current_function_decl),
537 &stmts, true, NULL_TREE);
538 if (!gimple_seq_empty_p (stmts))
539 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
541 if (!useless_type_conversion_p (type, TREE_TYPE (name)))
543 name = force_gimple_operand (fold_convert (type, name), &stmts, true,
544 NULL_TREE);
545 if (!gimple_seq_empty_p (stmts))
546 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
549 return name;
552 /* Callback for htab_traverse. Create the initialization statement
553 for reduction described in SLOT, and place it at the preheader of
554 the loop described in DATA. */
557 initialize_reductions (reduction_info **slot, struct loop *loop)
559 tree init, c;
560 tree bvar, type, arg;
561 edge e;
563 struct reduction_info *const reduc = *slot;
565 /* Create initialization in preheader:
566 reduction_variable = initialization value of reduction. */
568 /* In the phi node at the header, replace the argument coming
569 from the preheader with the reduction initialization value. */
571 /* Create a new variable to initialize the reduction. */
572 type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
573 bvar = create_tmp_var (type, "reduction");
575 c = build_omp_clause (gimple_location (reduc->reduc_stmt),
576 OMP_CLAUSE_REDUCTION);
577 OMP_CLAUSE_REDUCTION_CODE (c) = reduc->reduction_code;
578 OMP_CLAUSE_DECL (c) = SSA_NAME_VAR (gimple_assign_lhs (reduc->reduc_stmt));
580 init = omp_reduction_init (c, TREE_TYPE (bvar));
581 reduc->init = init;
583 /* Replace the argument representing the initialization value
584 with the initialization value for the reduction (neutral
585 element for the particular operation, e.g. 0 for PLUS_EXPR,
586 1 for MULT_EXPR, etc).
587 Keep the old value in a new variable "reduction_initial",
588 that will be taken in consideration after the parallel
589 computing is done. */
591 e = loop_preheader_edge (loop);
592 arg = PHI_ARG_DEF_FROM_EDGE (reduc->reduc_phi, e);
593 /* Create new variable to hold the initial value. */
595 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE
596 (reduc->reduc_phi, loop_preheader_edge (loop)), init);
597 reduc->initial_value = arg;
598 return 1;
601 struct elv_data
603 struct walk_stmt_info info;
604 edge entry;
605 int_tree_htab_type *decl_address;
606 gimple_stmt_iterator *gsi;
607 bool changed;
608 bool reset;
611 /* Eliminates references to local variables in *TP out of the single
612 entry single exit region starting at DTA->ENTRY.
613 DECL_ADDRESS contains addresses of the references that had their
614 address taken already. If the expression is changed, CHANGED is
615 set to true. Callback for walk_tree. */
617 static tree
618 eliminate_local_variables_1 (tree *tp, int *walk_subtrees, void *data)
620 struct elv_data *const dta = (struct elv_data *) data;
621 tree t = *tp, var, addr, addr_type, type, obj;
623 if (DECL_P (t))
625 *walk_subtrees = 0;
627 if (!SSA_VAR_P (t) || DECL_EXTERNAL (t))
628 return NULL_TREE;
630 type = TREE_TYPE (t);
631 addr_type = build_pointer_type (type);
632 addr = take_address_of (t, addr_type, dta->entry, dta->decl_address,
633 dta->gsi);
634 if (dta->gsi == NULL && addr == NULL_TREE)
636 dta->reset = true;
637 return NULL_TREE;
640 *tp = build_simple_mem_ref (addr);
642 dta->changed = true;
643 return NULL_TREE;
646 if (TREE_CODE (t) == ADDR_EXPR)
648 /* ADDR_EXPR may appear in two contexts:
649 -- as a gimple operand, when the address taken is a function invariant
650 -- as gimple rhs, when the resulting address in not a function
651 invariant
652 We do not need to do anything special in the latter case (the base of
653 the memory reference whose address is taken may be replaced in the
654 DECL_P case). The former case is more complicated, as we need to
655 ensure that the new address is still a gimple operand. Thus, it
656 is not sufficient to replace just the base of the memory reference --
657 we need to move the whole computation of the address out of the
658 loop. */
659 if (!is_gimple_val (t))
660 return NULL_TREE;
662 *walk_subtrees = 0;
663 obj = TREE_OPERAND (t, 0);
664 var = get_base_address (obj);
665 if (!var || !SSA_VAR_P (var) || DECL_EXTERNAL (var))
666 return NULL_TREE;
668 addr_type = TREE_TYPE (t);
669 addr = take_address_of (obj, addr_type, dta->entry, dta->decl_address,
670 dta->gsi);
671 if (dta->gsi == NULL && addr == NULL_TREE)
673 dta->reset = true;
674 return NULL_TREE;
676 *tp = addr;
678 dta->changed = true;
679 return NULL_TREE;
682 if (!EXPR_P (t))
683 *walk_subtrees = 0;
685 return NULL_TREE;
688 /* Moves the references to local variables in STMT at *GSI out of the single
689 entry single exit region starting at ENTRY. DECL_ADDRESS contains
690 addresses of the references that had their address taken
691 already. */
693 static void
694 eliminate_local_variables_stmt (edge entry, gimple_stmt_iterator *gsi,
695 int_tree_htab_type *decl_address)
697 struct elv_data dta;
698 gimple stmt = gsi_stmt (*gsi);
700 memset (&dta.info, '\0', sizeof (dta.info));
701 dta.entry = entry;
702 dta.decl_address = decl_address;
703 dta.changed = false;
704 dta.reset = false;
706 if (gimple_debug_bind_p (stmt))
708 dta.gsi = NULL;
709 walk_tree (gimple_debug_bind_get_value_ptr (stmt),
710 eliminate_local_variables_1, &dta.info, NULL);
711 if (dta.reset)
713 gimple_debug_bind_reset_value (stmt);
714 dta.changed = true;
717 else if (gimple_clobber_p (stmt))
719 stmt = gimple_build_nop ();
720 gsi_replace (gsi, stmt, false);
721 dta.changed = true;
723 else
725 dta.gsi = gsi;
726 walk_gimple_op (stmt, eliminate_local_variables_1, &dta.info);
729 if (dta.changed)
730 update_stmt (stmt);
733 /* Eliminates the references to local variables from the single entry
734 single exit region between the ENTRY and EXIT edges.
736 This includes:
737 1) Taking address of a local variable -- these are moved out of the
738 region (and temporary variable is created to hold the address if
739 necessary).
741 2) Dereferencing a local variable -- these are replaced with indirect
742 references. */
744 static void
745 eliminate_local_variables (edge entry, edge exit)
747 basic_block bb;
748 auto_vec<basic_block, 3> body;
749 unsigned i;
750 gimple_stmt_iterator gsi;
751 bool has_debug_stmt = false;
752 int_tree_htab_type decl_address (10);
753 basic_block entry_bb = entry->src;
754 basic_block exit_bb = exit->dest;
756 gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
758 FOR_EACH_VEC_ELT (body, i, bb)
759 if (bb != entry_bb && bb != exit_bb)
760 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
761 if (is_gimple_debug (gsi_stmt (gsi)))
763 if (gimple_debug_bind_p (gsi_stmt (gsi)))
764 has_debug_stmt = true;
766 else
767 eliminate_local_variables_stmt (entry, &gsi, &decl_address);
769 if (has_debug_stmt)
770 FOR_EACH_VEC_ELT (body, i, bb)
771 if (bb != entry_bb && bb != exit_bb)
772 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
773 if (gimple_debug_bind_p (gsi_stmt (gsi)))
774 eliminate_local_variables_stmt (entry, &gsi, &decl_address);
777 /* Returns true if expression EXPR is not defined between ENTRY and
778 EXIT, i.e. if all its operands are defined outside of the region. */
780 static bool
781 expr_invariant_in_region_p (edge entry, edge exit, tree expr)
783 basic_block entry_bb = entry->src;
784 basic_block exit_bb = exit->dest;
785 basic_block def_bb;
787 if (is_gimple_min_invariant (expr))
788 return true;
790 if (TREE_CODE (expr) == SSA_NAME)
792 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
793 if (def_bb
794 && dominated_by_p (CDI_DOMINATORS, def_bb, entry_bb)
795 && !dominated_by_p (CDI_DOMINATORS, def_bb, exit_bb))
796 return false;
798 return true;
801 return false;
804 /* If COPY_NAME_P is true, creates and returns a duplicate of NAME.
805 The copies are stored to NAME_COPIES, if NAME was already duplicated,
806 its duplicate stored in NAME_COPIES is returned.
808 Regardless of COPY_NAME_P, the decl used as a base of the ssa name is also
809 duplicated, storing the copies in DECL_COPIES. */
811 static tree
812 separate_decls_in_region_name (tree name, name_to_copy_table_type *name_copies,
813 int_tree_htab_type *decl_copies,
814 bool copy_name_p)
816 tree copy, var, var_copy;
817 unsigned idx, uid, nuid;
818 struct int_tree_map ielt;
819 struct name_to_copy_elt elt, *nelt;
820 name_to_copy_elt **slot;
821 int_tree_map *dslot;
823 if (TREE_CODE (name) != SSA_NAME)
824 return name;
826 idx = SSA_NAME_VERSION (name);
827 elt.version = idx;
828 slot = name_copies->find_slot_with_hash (&elt, idx,
829 copy_name_p ? INSERT : NO_INSERT);
830 if (slot && *slot)
831 return (*slot)->new_name;
833 if (copy_name_p)
835 copy = duplicate_ssa_name (name, NULL);
836 nelt = XNEW (struct name_to_copy_elt);
837 nelt->version = idx;
838 nelt->new_name = copy;
839 nelt->field = NULL_TREE;
840 *slot = nelt;
842 else
844 gcc_assert (!slot);
845 copy = name;
848 var = SSA_NAME_VAR (name);
849 if (!var)
850 return copy;
852 uid = DECL_UID (var);
853 ielt.uid = uid;
854 dslot = decl_copies->find_slot_with_hash (ielt, uid, INSERT);
855 if (!dslot->to)
857 var_copy = create_tmp_var (TREE_TYPE (var), get_name (var));
858 DECL_GIMPLE_REG_P (var_copy) = DECL_GIMPLE_REG_P (var);
859 dslot->uid = uid;
860 dslot->to = var_copy;
862 /* Ensure that when we meet this decl next time, we won't duplicate
863 it again. */
864 nuid = DECL_UID (var_copy);
865 ielt.uid = nuid;
866 dslot = decl_copies->find_slot_with_hash (ielt, nuid, INSERT);
867 gcc_assert (!dslot->to);
868 dslot->uid = nuid;
869 dslot->to = var_copy;
871 else
872 var_copy = dslot->to;
874 replace_ssa_name_symbol (copy, var_copy);
875 return copy;
878 /* Finds the ssa names used in STMT that are defined outside the
879 region between ENTRY and EXIT and replaces such ssa names with
880 their duplicates. The duplicates are stored to NAME_COPIES. Base
881 decls of all ssa names used in STMT (including those defined in
882 LOOP) are replaced with the new temporary variables; the
883 replacement decls are stored in DECL_COPIES. */
885 static void
886 separate_decls_in_region_stmt (edge entry, edge exit, gimple stmt,
887 name_to_copy_table_type *name_copies,
888 int_tree_htab_type *decl_copies)
890 use_operand_p use;
891 def_operand_p def;
892 ssa_op_iter oi;
893 tree name, copy;
894 bool copy_name_p;
896 FOR_EACH_PHI_OR_STMT_DEF (def, stmt, oi, SSA_OP_DEF)
898 name = DEF_FROM_PTR (def);
899 gcc_assert (TREE_CODE (name) == SSA_NAME);
900 copy = separate_decls_in_region_name (name, name_copies, decl_copies,
901 false);
902 gcc_assert (copy == name);
905 FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
907 name = USE_FROM_PTR (use);
908 if (TREE_CODE (name) != SSA_NAME)
909 continue;
911 copy_name_p = expr_invariant_in_region_p (entry, exit, name);
912 copy = separate_decls_in_region_name (name, name_copies, decl_copies,
913 copy_name_p);
914 SET_USE (use, copy);
918 /* Finds the ssa names used in STMT that are defined outside the
919 region between ENTRY and EXIT and replaces such ssa names with
920 their duplicates. The duplicates are stored to NAME_COPIES. Base
921 decls of all ssa names used in STMT (including those defined in
922 LOOP) are replaced with the new temporary variables; the
923 replacement decls are stored in DECL_COPIES. */
925 static bool
926 separate_decls_in_region_debug (gimple stmt,
927 name_to_copy_table_type *name_copies,
928 int_tree_htab_type *decl_copies)
930 use_operand_p use;
931 ssa_op_iter oi;
932 tree var, name;
933 struct int_tree_map ielt;
934 struct name_to_copy_elt elt;
935 name_to_copy_elt **slot;
936 int_tree_map *dslot;
938 if (gimple_debug_bind_p (stmt))
939 var = gimple_debug_bind_get_var (stmt);
940 else if (gimple_debug_source_bind_p (stmt))
941 var = gimple_debug_source_bind_get_var (stmt);
942 else
943 return true;
944 if (TREE_CODE (var) == DEBUG_EXPR_DECL || TREE_CODE (var) == LABEL_DECL)
945 return true;
946 gcc_assert (DECL_P (var) && SSA_VAR_P (var));
947 ielt.uid = DECL_UID (var);
948 dslot = decl_copies->find_slot_with_hash (ielt, ielt.uid, NO_INSERT);
949 if (!dslot)
950 return true;
951 if (gimple_debug_bind_p (stmt))
952 gimple_debug_bind_set_var (stmt, dslot->to);
953 else if (gimple_debug_source_bind_p (stmt))
954 gimple_debug_source_bind_set_var (stmt, dslot->to);
956 FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
958 name = USE_FROM_PTR (use);
959 if (TREE_CODE (name) != SSA_NAME)
960 continue;
962 elt.version = SSA_NAME_VERSION (name);
963 slot = name_copies->find_slot_with_hash (&elt, elt.version, NO_INSERT);
964 if (!slot)
966 gimple_debug_bind_reset_value (stmt);
967 update_stmt (stmt);
968 break;
971 SET_USE (use, (*slot)->new_name);
974 return false;
977 /* Callback for htab_traverse. Adds a field corresponding to the reduction
978 specified in SLOT. The type is passed in DATA. */
981 add_field_for_reduction (reduction_info **slot, tree type)
984 struct reduction_info *const red = *slot;
985 tree var = gimple_assign_lhs (red->reduc_stmt);
986 tree field = build_decl (gimple_location (red->reduc_stmt), FIELD_DECL,
987 SSA_NAME_IDENTIFIER (var), TREE_TYPE (var));
989 insert_field_into_struct (type, field);
991 red->field = field;
993 return 1;
996 /* Callback for htab_traverse. Adds a field corresponding to a ssa name
997 described in SLOT. The type is passed in DATA. */
1000 add_field_for_name (name_to_copy_elt **slot, tree type)
1002 struct name_to_copy_elt *const elt = *slot;
1003 tree name = ssa_name (elt->version);
1004 tree field = build_decl (UNKNOWN_LOCATION,
1005 FIELD_DECL, SSA_NAME_IDENTIFIER (name),
1006 TREE_TYPE (name));
1008 insert_field_into_struct (type, field);
1009 elt->field = field;
1011 return 1;
1014 /* Callback for htab_traverse. A local result is the intermediate result
1015 computed by a single
1016 thread, or the initial value in case no iteration was executed.
1017 This function creates a phi node reflecting these values.
1018 The phi's result will be stored in NEW_PHI field of the
1019 reduction's data structure. */
1022 create_phi_for_local_result (reduction_info **slot, struct loop *loop)
1024 struct reduction_info *const reduc = *slot;
1025 edge e;
1026 gphi *new_phi;
1027 basic_block store_bb;
1028 tree local_res;
1029 source_location locus;
1031 /* STORE_BB is the block where the phi
1032 should be stored. It is the destination of the loop exit.
1033 (Find the fallthru edge from GIMPLE_OMP_CONTINUE). */
1034 store_bb = FALLTHRU_EDGE (loop->latch)->dest;
1036 /* STORE_BB has two predecessors. One coming from the loop
1037 (the reduction's result is computed at the loop),
1038 and another coming from a block preceding the loop,
1039 when no iterations
1040 are executed (the initial value should be taken). */
1041 if (EDGE_PRED (store_bb, 0) == FALLTHRU_EDGE (loop->latch))
1042 e = EDGE_PRED (store_bb, 1);
1043 else
1044 e = EDGE_PRED (store_bb, 0);
1045 local_res = copy_ssa_name (gimple_assign_lhs (reduc->reduc_stmt));
1046 locus = gimple_location (reduc->reduc_stmt);
1047 new_phi = create_phi_node (local_res, store_bb);
1048 add_phi_arg (new_phi, reduc->init, e, locus);
1049 add_phi_arg (new_phi, gimple_assign_lhs (reduc->reduc_stmt),
1050 FALLTHRU_EDGE (loop->latch), locus);
1051 reduc->new_phi = new_phi;
1053 return 1;
1056 struct clsn_data
1058 tree store;
1059 tree load;
1061 basic_block store_bb;
1062 basic_block load_bb;
1065 /* Callback for htab_traverse. Create an atomic instruction for the
1066 reduction described in SLOT.
1067 DATA annotates the place in memory the atomic operation relates to,
1068 and the basic block it needs to be generated in. */
1071 create_call_for_reduction_1 (reduction_info **slot, struct clsn_data *clsn_data)
1073 struct reduction_info *const reduc = *slot;
1074 gimple_stmt_iterator gsi;
1075 tree type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
1076 tree load_struct;
1077 basic_block bb;
1078 basic_block new_bb;
1079 edge e;
1080 tree t, addr, ref, x;
1081 tree tmp_load, name;
1082 gimple load;
1084 load_struct = build_simple_mem_ref (clsn_data->load);
1085 t = build3 (COMPONENT_REF, type, load_struct, reduc->field, NULL_TREE);
1087 addr = build_addr (t, current_function_decl);
1089 /* Create phi node. */
1090 bb = clsn_data->load_bb;
1092 gsi = gsi_last_bb (bb);
1093 e = split_block (bb, gsi_stmt (gsi));
1094 new_bb = e->dest;
1096 tmp_load = create_tmp_var (TREE_TYPE (TREE_TYPE (addr)));
1097 tmp_load = make_ssa_name (tmp_load);
1098 load = gimple_build_omp_atomic_load (tmp_load, addr);
1099 SSA_NAME_DEF_STMT (tmp_load) = load;
1100 gsi = gsi_start_bb (new_bb);
1101 gsi_insert_after (&gsi, load, GSI_NEW_STMT);
1103 e = split_block (new_bb, load);
1104 new_bb = e->dest;
1105 gsi = gsi_start_bb (new_bb);
1106 ref = tmp_load;
1107 x = fold_build2 (reduc->reduction_code,
1108 TREE_TYPE (PHI_RESULT (reduc->new_phi)), ref,
1109 PHI_RESULT (reduc->new_phi));
1111 name = force_gimple_operand_gsi (&gsi, x, true, NULL_TREE, true,
1112 GSI_CONTINUE_LINKING);
1114 gsi_insert_after (&gsi, gimple_build_omp_atomic_store (name), GSI_NEW_STMT);
1115 return 1;
1118 /* Create the atomic operation at the join point of the threads.
1119 REDUCTION_LIST describes the reductions in the LOOP.
1120 LD_ST_DATA describes the shared data structure where
1121 shared data is stored in and loaded from. */
1122 static void
1123 create_call_for_reduction (struct loop *loop,
1124 reduction_info_table_type *reduction_list,
1125 struct clsn_data *ld_st_data)
1127 reduction_list->traverse <struct loop *, create_phi_for_local_result> (loop);
1128 /* Find the fallthru edge from GIMPLE_OMP_CONTINUE. */
1129 ld_st_data->load_bb = FALLTHRU_EDGE (loop->latch)->dest;
1130 reduction_list
1131 ->traverse <struct clsn_data *, create_call_for_reduction_1> (ld_st_data);
1134 /* Callback for htab_traverse. Loads the final reduction value at the
1135 join point of all threads, and inserts it in the right place. */
1138 create_loads_for_reductions (reduction_info **slot, struct clsn_data *clsn_data)
1140 struct reduction_info *const red = *slot;
1141 gimple stmt;
1142 gimple_stmt_iterator gsi;
1143 tree type = TREE_TYPE (gimple_assign_lhs (red->reduc_stmt));
1144 tree load_struct;
1145 tree name;
1146 tree x;
1148 gsi = gsi_after_labels (clsn_data->load_bb);
1149 load_struct = build_simple_mem_ref (clsn_data->load);
1150 load_struct = build3 (COMPONENT_REF, type, load_struct, red->field,
1151 NULL_TREE);
1153 x = load_struct;
1154 name = PHI_RESULT (red->keep_res);
1155 stmt = gimple_build_assign (name, x);
1157 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1159 for (gsi = gsi_start_phis (gimple_bb (red->keep_res));
1160 !gsi_end_p (gsi); gsi_next (&gsi))
1161 if (gsi_stmt (gsi) == red->keep_res)
1163 remove_phi_node (&gsi, false);
1164 return 1;
1166 gcc_unreachable ();
1169 /* Load the reduction result that was stored in LD_ST_DATA.
1170 REDUCTION_LIST describes the list of reductions that the
1171 loads should be generated for. */
1172 static void
1173 create_final_loads_for_reduction (reduction_info_table_type *reduction_list,
1174 struct clsn_data *ld_st_data)
1176 gimple_stmt_iterator gsi;
1177 tree t;
1178 gimple stmt;
1180 gsi = gsi_after_labels (ld_st_data->load_bb);
1181 t = build_fold_addr_expr (ld_st_data->store);
1182 stmt = gimple_build_assign (ld_st_data->load, t);
1184 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
1186 reduction_list
1187 ->traverse <struct clsn_data *, create_loads_for_reductions> (ld_st_data);
1191 /* Callback for htab_traverse. Store the neutral value for the
1192 particular reduction's operation, e.g. 0 for PLUS_EXPR,
1193 1 for MULT_EXPR, etc. into the reduction field.
1194 The reduction is specified in SLOT. The store information is
1195 passed in DATA. */
1198 create_stores_for_reduction (reduction_info **slot, struct clsn_data *clsn_data)
1200 struct reduction_info *const red = *slot;
1201 tree t;
1202 gimple stmt;
1203 gimple_stmt_iterator gsi;
1204 tree type = TREE_TYPE (gimple_assign_lhs (red->reduc_stmt));
1206 gsi = gsi_last_bb (clsn_data->store_bb);
1207 t = build3 (COMPONENT_REF, type, clsn_data->store, red->field, NULL_TREE);
1208 stmt = gimple_build_assign (t, red->initial_value);
1209 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1211 return 1;
1214 /* Callback for htab_traverse. Creates loads to a field of LOAD in LOAD_BB and
1215 store to a field of STORE in STORE_BB for the ssa name and its duplicate
1216 specified in SLOT. */
1219 create_loads_and_stores_for_name (name_to_copy_elt **slot,
1220 struct clsn_data *clsn_data)
1222 struct name_to_copy_elt *const elt = *slot;
1223 tree t;
1224 gimple stmt;
1225 gimple_stmt_iterator gsi;
1226 tree type = TREE_TYPE (elt->new_name);
1227 tree load_struct;
1229 gsi = gsi_last_bb (clsn_data->store_bb);
1230 t = build3 (COMPONENT_REF, type, clsn_data->store, elt->field, NULL_TREE);
1231 stmt = gimple_build_assign (t, ssa_name (elt->version));
1232 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1234 gsi = gsi_last_bb (clsn_data->load_bb);
1235 load_struct = build_simple_mem_ref (clsn_data->load);
1236 t = build3 (COMPONENT_REF, type, load_struct, elt->field, NULL_TREE);
1237 stmt = gimple_build_assign (elt->new_name, t);
1238 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1240 return 1;
1243 /* Moves all the variables used in LOOP and defined outside of it (including
1244 the initial values of loop phi nodes, and *PER_THREAD if it is a ssa
1245 name) to a structure created for this purpose. The code
1247 while (1)
1249 use (a);
1250 use (b);
1253 is transformed this way:
1255 bb0:
1256 old.a = a;
1257 old.b = b;
1259 bb1:
1260 a' = new->a;
1261 b' = new->b;
1262 while (1)
1264 use (a');
1265 use (b');
1268 `old' is stored to *ARG_STRUCT and `new' is stored to NEW_ARG_STRUCT. The
1269 pointer `new' is intentionally not initialized (the loop will be split to a
1270 separate function later, and `new' will be initialized from its arguments).
1271 LD_ST_DATA holds information about the shared data structure used to pass
1272 information among the threads. It is initialized here, and
1273 gen_parallel_loop will pass it to create_call_for_reduction that
1274 needs this information. REDUCTION_LIST describes the reductions
1275 in LOOP. */
1277 static void
1278 separate_decls_in_region (edge entry, edge exit,
1279 reduction_info_table_type *reduction_list,
1280 tree *arg_struct, tree *new_arg_struct,
1281 struct clsn_data *ld_st_data)
1284 basic_block bb1 = split_edge (entry);
1285 basic_block bb0 = single_pred (bb1);
1286 name_to_copy_table_type name_copies (10);
1287 int_tree_htab_type decl_copies (10);
1288 unsigned i;
1289 tree type, type_name, nvar;
1290 gimple_stmt_iterator gsi;
1291 struct clsn_data clsn_data;
1292 auto_vec<basic_block, 3> body;
1293 basic_block bb;
1294 basic_block entry_bb = bb1;
1295 basic_block exit_bb = exit->dest;
1296 bool has_debug_stmt = false;
1298 entry = single_succ_edge (entry_bb);
1299 gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
1301 FOR_EACH_VEC_ELT (body, i, bb)
1303 if (bb != entry_bb && bb != exit_bb)
1305 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1306 separate_decls_in_region_stmt (entry, exit, gsi_stmt (gsi),
1307 &name_copies, &decl_copies);
1309 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1311 gimple stmt = gsi_stmt (gsi);
1313 if (is_gimple_debug (stmt))
1314 has_debug_stmt = true;
1315 else
1316 separate_decls_in_region_stmt (entry, exit, stmt,
1317 &name_copies, &decl_copies);
1322 /* Now process debug bind stmts. We must not create decls while
1323 processing debug stmts, so we defer their processing so as to
1324 make sure we will have debug info for as many variables as
1325 possible (all of those that were dealt with in the loop above),
1326 and discard those for which we know there's nothing we can
1327 do. */
1328 if (has_debug_stmt)
1329 FOR_EACH_VEC_ELT (body, i, bb)
1330 if (bb != entry_bb && bb != exit_bb)
1332 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
1334 gimple stmt = gsi_stmt (gsi);
1336 if (is_gimple_debug (stmt))
1338 if (separate_decls_in_region_debug (stmt, &name_copies,
1339 &decl_copies))
1341 gsi_remove (&gsi, true);
1342 continue;
1346 gsi_next (&gsi);
1350 if (name_copies.elements () == 0 && reduction_list->elements () == 0)
1352 /* It may happen that there is nothing to copy (if there are only
1353 loop carried and external variables in the loop). */
1354 *arg_struct = NULL;
1355 *new_arg_struct = NULL;
1357 else
1359 /* Create the type for the structure to store the ssa names to. */
1360 type = lang_hooks.types.make_type (RECORD_TYPE);
1361 type_name = build_decl (UNKNOWN_LOCATION,
1362 TYPE_DECL, create_tmp_var_name (".paral_data"),
1363 type);
1364 TYPE_NAME (type) = type_name;
1366 name_copies.traverse <tree, add_field_for_name> (type);
1367 if (reduction_list && reduction_list->elements () > 0)
1369 /* Create the fields for reductions. */
1370 reduction_list->traverse <tree, add_field_for_reduction> (type);
1372 layout_type (type);
1374 /* Create the loads and stores. */
1375 *arg_struct = create_tmp_var (type, ".paral_data_store");
1376 nvar = create_tmp_var (build_pointer_type (type), ".paral_data_load");
1377 *new_arg_struct = make_ssa_name (nvar);
1379 ld_st_data->store = *arg_struct;
1380 ld_st_data->load = *new_arg_struct;
1381 ld_st_data->store_bb = bb0;
1382 ld_st_data->load_bb = bb1;
1384 name_copies
1385 .traverse <struct clsn_data *, create_loads_and_stores_for_name>
1386 (ld_st_data);
1388 /* Load the calculation from memory (after the join of the threads). */
1390 if (reduction_list && reduction_list->elements () > 0)
1392 reduction_list
1393 ->traverse <struct clsn_data *, create_stores_for_reduction>
1394 (ld_st_data);
1395 clsn_data.load = make_ssa_name (nvar);
1396 clsn_data.load_bb = exit->dest;
1397 clsn_data.store = ld_st_data->store;
1398 create_final_loads_for_reduction (reduction_list, &clsn_data);
1403 /* Returns true if FN was created to run in parallel. */
1405 bool
1406 parallelized_function_p (tree fndecl)
1408 cgraph_node *node = cgraph_node::get (fndecl);
1409 gcc_assert (node != NULL);
1410 return node->parallelized_function;
1413 /* Creates and returns an empty function that will receive the body of
1414 a parallelized loop. */
1416 static tree
1417 create_loop_fn (location_t loc)
1419 char buf[100];
1420 char *tname;
1421 tree decl, type, name, t;
1422 struct function *act_cfun = cfun;
1423 static unsigned loopfn_num;
1425 loc = LOCATION_LOCUS (loc);
1426 snprintf (buf, 100, "%s.$loopfn", current_function_name ());
1427 ASM_FORMAT_PRIVATE_NAME (tname, buf, loopfn_num++);
1428 clean_symbol_name (tname);
1429 name = get_identifier (tname);
1430 type = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
1432 decl = build_decl (loc, FUNCTION_DECL, name, type);
1433 TREE_STATIC (decl) = 1;
1434 TREE_USED (decl) = 1;
1435 DECL_ARTIFICIAL (decl) = 1;
1436 DECL_IGNORED_P (decl) = 0;
1437 TREE_PUBLIC (decl) = 0;
1438 DECL_UNINLINABLE (decl) = 1;
1439 DECL_EXTERNAL (decl) = 0;
1440 DECL_CONTEXT (decl) = NULL_TREE;
1441 DECL_INITIAL (decl) = make_node (BLOCK);
1443 t = build_decl (loc, RESULT_DECL, NULL_TREE, void_type_node);
1444 DECL_ARTIFICIAL (t) = 1;
1445 DECL_IGNORED_P (t) = 1;
1446 DECL_RESULT (decl) = t;
1448 t = build_decl (loc, PARM_DECL, get_identifier (".paral_data_param"),
1449 ptr_type_node);
1450 DECL_ARTIFICIAL (t) = 1;
1451 DECL_ARG_TYPE (t) = ptr_type_node;
1452 DECL_CONTEXT (t) = decl;
1453 TREE_USED (t) = 1;
1454 DECL_ARGUMENTS (decl) = t;
1456 allocate_struct_function (decl, false);
1458 /* The call to allocate_struct_function clobbers CFUN, so we need to restore
1459 it. */
1460 set_cfun (act_cfun);
1462 return decl;
1465 /* Replace uses of NAME by VAL in block BB. */
1467 static void
1468 replace_uses_in_bb_by (tree name, tree val, basic_block bb)
1470 gimple use_stmt;
1471 imm_use_iterator imm_iter;
1473 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, name)
1475 if (gimple_bb (use_stmt) != bb)
1476 continue;
1478 use_operand_p use_p;
1479 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1480 SET_USE (use_p, val);
1484 /* Do transformation from:
1486 <bb preheader>:
1488 goto <bb header>
1490 <bb header>:
1491 ivtmp_a = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
1492 sum_a = PHI <sum_init (preheader), sum_b (latch)>
1494 use (ivtmp_a)
1496 sum_b = sum_a + sum_update
1498 if (ivtmp_a < n)
1499 goto <bb latch>;
1500 else
1501 goto <bb exit>;
1503 <bb latch>:
1504 ivtmp_b = ivtmp_a + 1;
1505 goto <bb header>
1507 <bb exit>:
1508 sum_z = PHI <sum_b (cond[1]), ...>
1510 [1] Where <bb cond> is single_pred (bb latch); In the simplest case,
1511 that's <bb header>.
1515 <bb preheader>:
1517 goto <bb newheader>
1519 <bb header>:
1520 ivtmp_a = PHI <ivtmp_c (latch)>
1521 sum_a = PHI <sum_c (latch)>
1523 use (ivtmp_a)
1525 sum_b = sum_a + sum_update
1527 goto <bb latch>;
1529 <bb newheader>:
1530 ivtmp_c = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
1531 sum_c = PHI <sum_init (preheader), sum_b (latch)>
1532 if (ivtmp_c < n + 1)
1533 goto <bb header>;
1534 else
1535 goto <bb newexit>;
1537 <bb latch>:
1538 ivtmp_b = ivtmp_a + 1;
1539 goto <bb newheader>
1541 <bb newexit>:
1542 sum_y = PHI <sum_c (newheader)>
1544 <bb exit>:
1545 sum_z = PHI <sum_y (newexit), ...>
1548 In unified diff format:
1550 <bb preheader>:
1552 - goto <bb header>
1553 + goto <bb newheader>
1555 <bb header>:
1556 - ivtmp_a = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
1557 - sum_a = PHI <sum_init (preheader), sum_b (latch)>
1558 + ivtmp_a = PHI <ivtmp_c (latch)>
1559 + sum_a = PHI <sum_c (latch)>
1561 use (ivtmp_a)
1563 sum_b = sum_a + sum_update
1565 - if (ivtmp_a < n)
1566 - goto <bb latch>;
1567 + goto <bb latch>;
1569 + <bb newheader>:
1570 + ivtmp_c = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
1571 + sum_c = PHI <sum_init (preheader), sum_b (latch)>
1572 + if (ivtmp_c < n + 1)
1573 + goto <bb header>;
1574 else
1575 goto <bb exit>;
1577 <bb latch>:
1578 ivtmp_b = ivtmp_a + 1;
1579 - goto <bb header>
1580 + goto <bb newheader>
1582 + <bb newexit>:
1583 + sum_y = PHI <sum_c (newheader)>
1585 <bb exit>:
1586 - sum_z = PHI <sum_b (cond[1]), ...>
1587 + sum_z = PHI <sum_y (newexit), ...>
1589 Note: the example does not show any virtual phis, but these are handled more
1590 or less as reductions.
1593 Moves the exit condition of LOOP to the beginning of its header.
1594 REDUCTION_LIST describes the reductions in LOOP. BOUND is the new loop
1595 bound. */
1597 static void
1598 transform_to_exit_first_loop_alt (struct loop *loop,
1599 reduction_info_table_type *reduction_list,
1600 tree bound)
1602 basic_block header = loop->header;
1603 basic_block latch = loop->latch;
1604 edge exit = single_dom_exit (loop);
1605 basic_block exit_block = exit->dest;
1606 gcond *cond_stmt = as_a <gcond *> (last_stmt (exit->src));
1607 tree control = gimple_cond_lhs (cond_stmt);
1608 edge e;
1610 /* Rewriting virtuals into loop-closed ssa normal form makes this
1611 transformation simpler. It also ensures that the virtuals are in
1612 loop-closed ssa normal from after the transformation, which is required by
1613 create_parallel_loop. */
1614 rewrite_virtuals_into_loop_closed_ssa (loop);
1616 /* Create the new_header block. */
1617 basic_block new_header = split_block_before_cond_jump (exit->src);
1618 edge edge_at_split = single_pred_edge (new_header);
1620 /* Redirect entry edge to new_header. */
1621 edge entry = loop_preheader_edge (loop);
1622 e = redirect_edge_and_branch (entry, new_header);
1623 gcc_assert (e == entry);
1625 /* Redirect post_inc_edge to new_header. */
1626 edge post_inc_edge = single_succ_edge (latch);
1627 e = redirect_edge_and_branch (post_inc_edge, new_header);
1628 gcc_assert (e == post_inc_edge);
1630 /* Redirect post_cond_edge to header. */
1631 edge post_cond_edge = single_pred_edge (latch);
1632 e = redirect_edge_and_branch (post_cond_edge, header);
1633 gcc_assert (e == post_cond_edge);
1635 /* Redirect edge_at_split to latch. */
1636 e = redirect_edge_and_branch (edge_at_split, latch);
1637 gcc_assert (e == edge_at_split);
1639 /* Set the new loop bound. */
1640 gimple_cond_set_rhs (cond_stmt, bound);
1641 update_stmt (cond_stmt);
1643 /* Repair the ssa. */
1644 vec<edge_var_map> *v = redirect_edge_var_map_vector (post_inc_edge);
1645 edge_var_map *vm;
1646 gphi_iterator gsi;
1647 int i;
1648 for (gsi = gsi_start_phis (header), i = 0;
1649 !gsi_end_p (gsi) && v->iterate (i, &vm);
1650 gsi_next (&gsi), i++)
1652 gphi *phi = gsi.phi ();
1653 tree res_a = PHI_RESULT (phi);
1655 /* Create new phi. */
1656 tree res_c = copy_ssa_name (res_a, phi);
1657 gphi *nphi = create_phi_node (res_c, new_header);
1659 /* Replace ivtmp_a with ivtmp_c in condition 'if (ivtmp_a < n)'. */
1660 replace_uses_in_bb_by (res_a, res_c, new_header);
1662 /* Replace ivtmp/sum_b with ivtmp/sum_c in header phi. */
1663 add_phi_arg (phi, res_c, post_cond_edge, UNKNOWN_LOCATION);
1665 /* Replace sum_b with sum_c in exit phi. */
1666 tree res_b = redirect_edge_var_map_def (vm);
1667 replace_uses_in_bb_by (res_b, res_c, exit_block);
1669 struct reduction_info *red = reduction_phi (reduction_list, phi);
1670 gcc_assert (virtual_operand_p (res_a)
1671 || res_a == control
1672 || red != NULL);
1674 if (red)
1676 /* Register the new reduction phi. */
1677 red->reduc_phi = nphi;
1678 gimple_set_uid (red->reduc_phi, red->reduc_version);
1681 gcc_assert (gsi_end_p (gsi) && !v->iterate (i, &vm));
1683 /* Set the preheader argument of the new phis to ivtmp/sum_init. */
1684 flush_pending_stmts (entry);
1686 /* Set the latch arguments of the new phis to ivtmp/sum_b. */
1687 flush_pending_stmts (post_inc_edge);
1689 /* Create a new empty exit block, inbetween the new loop header and the old
1690 exit block. The function separate_decls_in_region needs this block to
1691 insert code that is active on loop exit, but not any other path. */
1692 basic_block new_exit_block = split_edge (exit);
1694 /* Insert and register the reduction exit phis. */
1695 for (gphi_iterator gsi = gsi_start_phis (exit_block);
1696 !gsi_end_p (gsi);
1697 gsi_next (&gsi))
1699 gphi *phi = gsi.phi ();
1700 tree res_z = PHI_RESULT (phi);
1702 /* Now that we have a new exit block, duplicate the phi of the old exit
1703 block in the new exit block to preserve loop-closed ssa. */
1704 edge succ_new_exit_block = single_succ_edge (new_exit_block);
1705 edge pred_new_exit_block = single_pred_edge (new_exit_block);
1706 tree res_y = copy_ssa_name (res_z, phi);
1707 gphi *nphi = create_phi_node (res_y, new_exit_block);
1708 tree res_c = PHI_ARG_DEF_FROM_EDGE (phi, succ_new_exit_block);
1709 add_phi_arg (nphi, res_c, pred_new_exit_block, UNKNOWN_LOCATION);
1710 add_phi_arg (phi, res_y, succ_new_exit_block, UNKNOWN_LOCATION);
1712 if (virtual_operand_p (res_z))
1713 continue;
1715 gimple reduc_phi = SSA_NAME_DEF_STMT (res_c);
1716 struct reduction_info *red = reduction_phi (reduction_list, reduc_phi);
1717 if (red != NULL)
1718 red->keep_res = nphi;
1721 /* We're going to cancel the loop at the end of gen_parallel_loop, but until
1722 then we're still using some fields, so only bother about fields that are
1723 still used: header and latch.
1724 The loop has a new header bb, so we update it. The latch bb stays the
1725 same. */
1726 loop->header = new_header;
1728 /* Recalculate dominance info. */
1729 free_dominance_info (CDI_DOMINATORS);
1730 calculate_dominance_info (CDI_DOMINATORS);
1733 /* Tries to moves the exit condition of LOOP to the beginning of its header
1734 without duplication of the loop body. NIT is the number of iterations of the
1735 loop. REDUCTION_LIST describes the reductions in LOOP. Return true if
1736 transformation is successful. */
1738 static bool
1739 try_transform_to_exit_first_loop_alt (struct loop *loop,
1740 reduction_info_table_type *reduction_list,
1741 tree nit)
1743 /* Check whether the latch contains a single statement. */
1744 if (!gimple_seq_nondebug_singleton_p (bb_seq (loop->latch)))
1745 return false;
1747 /* Check whether the latch contains the loop iv increment. */
1748 edge back = single_succ_edge (loop->latch);
1749 edge exit = single_dom_exit (loop);
1750 gcond *cond_stmt = as_a <gcond *> (last_stmt (exit->src));
1751 tree control = gimple_cond_lhs (cond_stmt);
1752 gphi *phi = as_a <gphi *> (SSA_NAME_DEF_STMT (control));
1753 tree inc_res = gimple_phi_arg_def (phi, back->dest_idx);
1754 if (gimple_bb (SSA_NAME_DEF_STMT (inc_res)) != loop->latch)
1755 return false;
1757 /* Check whether there's no code between the loop condition and the latch. */
1758 if (!single_pred_p (loop->latch)
1759 || single_pred (loop->latch) != exit->src)
1760 return false;
1762 tree alt_bound = NULL_TREE;
1763 tree nit_type = TREE_TYPE (nit);
1765 /* Figure out whether nit + 1 overflows. */
1766 if (TREE_CODE (nit) == INTEGER_CST)
1768 if (!tree_int_cst_equal (nit, TYPE_MAXVAL (nit_type)))
1770 alt_bound = fold_build2_loc (UNKNOWN_LOCATION, PLUS_EXPR, nit_type,
1771 nit, build_one_cst (nit_type));
1773 gcc_assert (TREE_CODE (alt_bound) == INTEGER_CST);
1774 transform_to_exit_first_loop_alt (loop, reduction_list, alt_bound);
1775 return true;
1777 else
1779 /* Todo: Figure out if we can trigger this, if it's worth to handle
1780 optimally, and if we can handle it optimally. */
1781 return false;
1785 gcc_assert (TREE_CODE (nit) == SSA_NAME);
1787 /* Variable nit is the loop bound as returned by canonicalize_loop_ivs, for an
1788 iv with base 0 and step 1 that is incremented in the latch, like this:
1790 <bb header>:
1791 # iv_1 = PHI <0 (preheader), iv_2 (latch)>
1793 if (iv_1 < nit)
1794 goto <bb latch>;
1795 else
1796 goto <bb exit>;
1798 <bb latch>:
1799 iv_2 = iv_1 + 1;
1800 goto <bb header>;
1802 The range of iv_1 is [0, nit]. The latch edge is taken for
1803 iv_1 == [0, nit - 1] and the exit edge is taken for iv_1 == nit. So the
1804 number of latch executions is equal to nit.
1806 The function max_loop_iterations gives us the maximum number of latch
1807 executions, so it gives us the maximum value of nit. */
1808 widest_int nit_max;
1809 if (!max_loop_iterations (loop, &nit_max))
1810 return false;
1812 /* Check if nit + 1 overflows. */
1813 widest_int type_max = wi::to_widest (TYPE_MAXVAL (nit_type));
1814 if (!wi::lts_p (nit_max, type_max))
1815 return false;
1817 gimple def = SSA_NAME_DEF_STMT (nit);
1819 /* Try to find nit + 1, in the form of n in an assignment nit = n - 1. */
1820 if (def
1821 && is_gimple_assign (def)
1822 && gimple_assign_rhs_code (def) == PLUS_EXPR)
1824 tree op1 = gimple_assign_rhs1 (def);
1825 tree op2 = gimple_assign_rhs2 (def);
1826 if (integer_minus_onep (op1))
1827 alt_bound = op2;
1828 else if (integer_minus_onep (op2))
1829 alt_bound = op1;
1832 /* If not found, insert nit + 1. */
1833 if (alt_bound == NULL_TREE)
1835 alt_bound = fold_build2 (PLUS_EXPR, nit_type, nit,
1836 build_int_cst_type (nit_type, 1));
1838 gimple_stmt_iterator gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
1840 alt_bound
1841 = force_gimple_operand_gsi (&gsi, alt_bound, true, NULL_TREE, false,
1842 GSI_CONTINUE_LINKING);
1845 transform_to_exit_first_loop_alt (loop, reduction_list, alt_bound);
1846 return true;
1849 /* Moves the exit condition of LOOP to the beginning of its header. NIT is the
1850 number of iterations of the loop. REDUCTION_LIST describes the reductions in
1851 LOOP. */
1853 static void
1854 transform_to_exit_first_loop (struct loop *loop,
1855 reduction_info_table_type *reduction_list,
1856 tree nit)
1858 basic_block *bbs, *nbbs, ex_bb, orig_header;
1859 unsigned n;
1860 bool ok;
1861 edge exit = single_dom_exit (loop), hpred;
1862 tree control, control_name, res, t;
1863 gphi *phi, *nphi;
1864 gassign *stmt;
1865 gcond *cond_stmt, *cond_nit;
1866 tree nit_1;
1868 split_block_after_labels (loop->header);
1869 orig_header = single_succ (loop->header);
1870 hpred = single_succ_edge (loop->header);
1872 cond_stmt = as_a <gcond *> (last_stmt (exit->src));
1873 control = gimple_cond_lhs (cond_stmt);
1874 gcc_assert (gimple_cond_rhs (cond_stmt) == nit);
1876 /* Make sure that we have phi nodes on exit for all loop header phis
1877 (create_parallel_loop requires that). */
1878 for (gphi_iterator gsi = gsi_start_phis (loop->header);
1879 !gsi_end_p (gsi);
1880 gsi_next (&gsi))
1882 phi = gsi.phi ();
1883 res = PHI_RESULT (phi);
1884 t = copy_ssa_name (res, phi);
1885 SET_PHI_RESULT (phi, t);
1886 nphi = create_phi_node (res, orig_header);
1887 add_phi_arg (nphi, t, hpred, UNKNOWN_LOCATION);
1889 if (res == control)
1891 gimple_cond_set_lhs (cond_stmt, t);
1892 update_stmt (cond_stmt);
1893 control = t;
1897 bbs = get_loop_body_in_dom_order (loop);
1899 for (n = 0; bbs[n] != exit->src; n++)
1900 continue;
1901 nbbs = XNEWVEC (basic_block, n);
1902 ok = gimple_duplicate_sese_tail (single_succ_edge (loop->header), exit,
1903 bbs + 1, n, nbbs);
1904 gcc_assert (ok);
1905 free (bbs);
1906 ex_bb = nbbs[0];
1907 free (nbbs);
1909 /* Other than reductions, the only gimple reg that should be copied
1910 out of the loop is the control variable. */
1911 exit = single_dom_exit (loop);
1912 control_name = NULL_TREE;
1913 for (gphi_iterator gsi = gsi_start_phis (ex_bb);
1914 !gsi_end_p (gsi); )
1916 phi = gsi.phi ();
1917 res = PHI_RESULT (phi);
1918 if (virtual_operand_p (res))
1920 gsi_next (&gsi);
1921 continue;
1924 /* Check if it is a part of reduction. If it is,
1925 keep the phi at the reduction's keep_res field. The
1926 PHI_RESULT of this phi is the resulting value of the reduction
1927 variable when exiting the loop. */
1929 if (reduction_list->elements () > 0)
1931 struct reduction_info *red;
1933 tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1934 red = reduction_phi (reduction_list, SSA_NAME_DEF_STMT (val));
1935 if (red)
1937 red->keep_res = phi;
1938 gsi_next (&gsi);
1939 continue;
1942 gcc_assert (control_name == NULL_TREE
1943 && SSA_NAME_VAR (res) == SSA_NAME_VAR (control));
1944 control_name = res;
1945 remove_phi_node (&gsi, false);
1947 gcc_assert (control_name != NULL_TREE);
1949 /* Initialize the control variable to number of iterations
1950 according to the rhs of the exit condition. */
1951 gimple_stmt_iterator gsi = gsi_after_labels (ex_bb);
1952 cond_nit = as_a <gcond *> (last_stmt (exit->src));
1953 nit_1 = gimple_cond_rhs (cond_nit);
1954 nit_1 = force_gimple_operand_gsi (&gsi,
1955 fold_convert (TREE_TYPE (control_name), nit_1),
1956 false, NULL_TREE, false, GSI_SAME_STMT);
1957 stmt = gimple_build_assign (control_name, nit_1);
1958 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
1961 /* Create the parallel constructs for LOOP as described in gen_parallel_loop.
1962 LOOP_FN and DATA are the arguments of GIMPLE_OMP_PARALLEL.
1963 NEW_DATA is the variable that should be initialized from the argument
1964 of LOOP_FN. N_THREADS is the requested number of threads. Returns the
1965 basic block containing GIMPLE_OMP_PARALLEL tree. */
1967 static basic_block
1968 create_parallel_loop (struct loop *loop, tree loop_fn, tree data,
1969 tree new_data, unsigned n_threads, location_t loc)
1971 gimple_stmt_iterator gsi;
1972 basic_block bb, paral_bb, for_bb, ex_bb;
1973 tree t, param;
1974 gomp_parallel *omp_par_stmt;
1975 gimple omp_return_stmt1, omp_return_stmt2;
1976 gimple phi;
1977 gcond *cond_stmt;
1978 gomp_for *for_stmt;
1979 gomp_continue *omp_cont_stmt;
1980 tree cvar, cvar_init, initvar, cvar_next, cvar_base, type;
1981 edge exit, nexit, guard, end, e;
1983 /* Prepare the GIMPLE_OMP_PARALLEL statement. */
1984 bb = loop_preheader_edge (loop)->src;
1985 paral_bb = single_pred (bb);
1986 gsi = gsi_last_bb (paral_bb);
1988 t = build_omp_clause (loc, OMP_CLAUSE_NUM_THREADS);
1989 OMP_CLAUSE_NUM_THREADS_EXPR (t)
1990 = build_int_cst (integer_type_node, n_threads);
1991 omp_par_stmt = gimple_build_omp_parallel (NULL, t, loop_fn, data);
1992 gimple_set_location (omp_par_stmt, loc);
1994 gsi_insert_after (&gsi, omp_par_stmt, GSI_NEW_STMT);
1996 /* Initialize NEW_DATA. */
1997 if (data)
1999 gassign *assign_stmt;
2001 gsi = gsi_after_labels (bb);
2003 param = make_ssa_name (DECL_ARGUMENTS (loop_fn));
2004 assign_stmt = gimple_build_assign (param, build_fold_addr_expr (data));
2005 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
2007 assign_stmt = gimple_build_assign (new_data,
2008 fold_convert (TREE_TYPE (new_data), param));
2009 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
2012 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_PARALLEL. */
2013 bb = split_loop_exit_edge (single_dom_exit (loop));
2014 gsi = gsi_last_bb (bb);
2015 omp_return_stmt1 = gimple_build_omp_return (false);
2016 gimple_set_location (omp_return_stmt1, loc);
2017 gsi_insert_after (&gsi, omp_return_stmt1, GSI_NEW_STMT);
2019 /* Extract data for GIMPLE_OMP_FOR. */
2020 gcc_assert (loop->header == single_dom_exit (loop)->src);
2021 cond_stmt = as_a <gcond *> (last_stmt (loop->header));
2023 cvar = gimple_cond_lhs (cond_stmt);
2024 cvar_base = SSA_NAME_VAR (cvar);
2025 phi = SSA_NAME_DEF_STMT (cvar);
2026 cvar_init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
2027 initvar = copy_ssa_name (cvar);
2028 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, loop_preheader_edge (loop)),
2029 initvar);
2030 cvar_next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
2032 gsi = gsi_last_nondebug_bb (loop->latch);
2033 gcc_assert (gsi_stmt (gsi) == SSA_NAME_DEF_STMT (cvar_next));
2034 gsi_remove (&gsi, true);
2036 /* Prepare cfg. */
2037 for_bb = split_edge (loop_preheader_edge (loop));
2038 ex_bb = split_loop_exit_edge (single_dom_exit (loop));
2039 extract_true_false_edges_from_block (loop->header, &nexit, &exit);
2040 gcc_assert (exit == single_dom_exit (loop));
2042 guard = make_edge (for_bb, ex_bb, 0);
2043 single_succ_edge (loop->latch)->flags = 0;
2044 end = make_edge (loop->latch, ex_bb, EDGE_FALLTHRU);
2045 for (gphi_iterator gpi = gsi_start_phis (ex_bb);
2046 !gsi_end_p (gpi); gsi_next (&gpi))
2048 source_location locus;
2049 tree def;
2050 gphi *phi = gpi.phi ();
2051 gphi *stmt;
2053 stmt = as_a <gphi *> (
2054 SSA_NAME_DEF_STMT (PHI_ARG_DEF_FROM_EDGE (phi, exit)));
2056 def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_preheader_edge (loop));
2057 locus = gimple_phi_arg_location_from_edge (stmt,
2058 loop_preheader_edge (loop));
2059 add_phi_arg (phi, def, guard, locus);
2061 def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (loop));
2062 locus = gimple_phi_arg_location_from_edge (stmt, loop_latch_edge (loop));
2063 add_phi_arg (phi, def, end, locus);
2065 e = redirect_edge_and_branch (exit, nexit->dest);
2066 PENDING_STMT (e) = NULL;
2068 /* Emit GIMPLE_OMP_FOR. */
2069 gimple_cond_set_lhs (cond_stmt, cvar_base);
2070 type = TREE_TYPE (cvar);
2071 t = build_omp_clause (loc, OMP_CLAUSE_SCHEDULE);
2072 OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_STATIC;
2074 for_stmt = gimple_build_omp_for (NULL, GF_OMP_FOR_KIND_FOR, t, 1, NULL);
2075 gimple_set_location (for_stmt, loc);
2076 gimple_omp_for_set_index (for_stmt, 0, initvar);
2077 gimple_omp_for_set_initial (for_stmt, 0, cvar_init);
2078 gimple_omp_for_set_final (for_stmt, 0, gimple_cond_rhs (cond_stmt));
2079 gimple_omp_for_set_cond (for_stmt, 0, gimple_cond_code (cond_stmt));
2080 gimple_omp_for_set_incr (for_stmt, 0, build2 (PLUS_EXPR, type,
2081 cvar_base,
2082 build_int_cst (type, 1)));
2084 gsi = gsi_last_bb (for_bb);
2085 gsi_insert_after (&gsi, for_stmt, GSI_NEW_STMT);
2086 SSA_NAME_DEF_STMT (initvar) = for_stmt;
2088 /* Emit GIMPLE_OMP_CONTINUE. */
2089 gsi = gsi_last_bb (loop->latch);
2090 omp_cont_stmt = gimple_build_omp_continue (cvar_next, cvar);
2091 gimple_set_location (omp_cont_stmt, loc);
2092 gsi_insert_after (&gsi, omp_cont_stmt, GSI_NEW_STMT);
2093 SSA_NAME_DEF_STMT (cvar_next) = omp_cont_stmt;
2095 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_FOR. */
2096 gsi = gsi_last_bb (ex_bb);
2097 omp_return_stmt2 = gimple_build_omp_return (true);
2098 gimple_set_location (omp_return_stmt2, loc);
2099 gsi_insert_after (&gsi, omp_return_stmt2, GSI_NEW_STMT);
2101 /* After the above dom info is hosed. Re-compute it. */
2102 free_dominance_info (CDI_DOMINATORS);
2103 calculate_dominance_info (CDI_DOMINATORS);
2105 return paral_bb;
2108 /* Generates code to execute the iterations of LOOP in N_THREADS
2109 threads in parallel.
2111 NITER describes number of iterations of LOOP.
2112 REDUCTION_LIST describes the reductions existent in the LOOP. */
2114 static void
2115 gen_parallel_loop (struct loop *loop,
2116 reduction_info_table_type *reduction_list,
2117 unsigned n_threads, struct tree_niter_desc *niter)
2119 tree many_iterations_cond, type, nit;
2120 tree arg_struct, new_arg_struct;
2121 gimple_seq stmts;
2122 edge entry, exit;
2123 struct clsn_data clsn_data;
2124 unsigned prob;
2125 location_t loc;
2126 gimple cond_stmt;
2127 unsigned int m_p_thread=2;
2129 /* From
2131 ---------------------------------------------------------------------
2132 loop
2134 IV = phi (INIT, IV + STEP)
2135 BODY1;
2136 if (COND)
2137 break;
2138 BODY2;
2140 ---------------------------------------------------------------------
2142 with # of iterations NITER (possibly with MAY_BE_ZERO assumption),
2143 we generate the following code:
2145 ---------------------------------------------------------------------
2147 if (MAY_BE_ZERO
2148 || NITER < MIN_PER_THREAD * N_THREADS)
2149 goto original;
2151 BODY1;
2152 store all local loop-invariant variables used in body of the loop to DATA.
2153 GIMPLE_OMP_PARALLEL (OMP_CLAUSE_NUM_THREADS (N_THREADS), LOOPFN, DATA);
2154 load the variables from DATA.
2155 GIMPLE_OMP_FOR (IV = INIT; COND; IV += STEP) (OMP_CLAUSE_SCHEDULE (static))
2156 BODY2;
2157 BODY1;
2158 GIMPLE_OMP_CONTINUE;
2159 GIMPLE_OMP_RETURN -- GIMPLE_OMP_FOR
2160 GIMPLE_OMP_RETURN -- GIMPLE_OMP_PARALLEL
2161 goto end;
2163 original:
2164 loop
2166 IV = phi (INIT, IV + STEP)
2167 BODY1;
2168 if (COND)
2169 break;
2170 BODY2;
2173 end:
2177 /* Create two versions of the loop -- in the old one, we know that the
2178 number of iterations is large enough, and we will transform it into the
2179 loop that will be split to loop_fn, the new one will be used for the
2180 remaining iterations. */
2182 /* We should compute a better number-of-iterations value for outer loops.
2183 That is, if we have
2185 for (i = 0; i < n; ++i)
2186 for (j = 0; j < m; ++j)
2189 we should compute nit = n * m, not nit = n.
2190 Also may_be_zero handling would need to be adjusted. */
2192 type = TREE_TYPE (niter->niter);
2193 nit = force_gimple_operand (unshare_expr (niter->niter), &stmts, true,
2194 NULL_TREE);
2195 if (stmts)
2196 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2198 if (loop->inner)
2199 m_p_thread=2;
2200 else
2201 m_p_thread=MIN_PER_THREAD;
2203 many_iterations_cond =
2204 fold_build2 (GE_EXPR, boolean_type_node,
2205 nit, build_int_cst (type, m_p_thread * n_threads));
2207 many_iterations_cond
2208 = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2209 invert_truthvalue (unshare_expr (niter->may_be_zero)),
2210 many_iterations_cond);
2211 many_iterations_cond
2212 = force_gimple_operand (many_iterations_cond, &stmts, false, NULL_TREE);
2213 if (stmts)
2214 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2215 if (!is_gimple_condexpr (many_iterations_cond))
2217 many_iterations_cond
2218 = force_gimple_operand (many_iterations_cond, &stmts,
2219 true, NULL_TREE);
2220 if (stmts)
2221 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2224 initialize_original_copy_tables ();
2226 /* We assume that the loop usually iterates a lot. */
2227 prob = 4 * REG_BR_PROB_BASE / 5;
2228 loop_version (loop, many_iterations_cond, NULL,
2229 prob, prob, REG_BR_PROB_BASE - prob, true);
2230 update_ssa (TODO_update_ssa);
2231 free_original_copy_tables ();
2233 /* Base all the induction variables in LOOP on a single control one. */
2234 canonicalize_loop_ivs (loop, &nit, true);
2236 /* Ensure that the exit condition is the first statement in the loop.
2237 The common case is that latch of the loop is empty (apart from the
2238 increment) and immediately follows the loop exit test. Attempt to move the
2239 entry of the loop directly before the exit check and increase the number of
2240 iterations of the loop by one. */
2241 if (!try_transform_to_exit_first_loop_alt (loop, reduction_list, nit))
2243 /* Fall back on the method that handles more cases, but duplicates the
2244 loop body: move the exit condition of LOOP to the beginning of its
2245 header, and duplicate the part of the last iteration that gets disabled
2246 to the exit of the loop. */
2247 transform_to_exit_first_loop (loop, reduction_list, nit);
2250 /* Generate initializations for reductions. */
2251 if (reduction_list->elements () > 0)
2252 reduction_list->traverse <struct loop *, initialize_reductions> (loop);
2254 /* Eliminate the references to local variables from the loop. */
2255 gcc_assert (single_exit (loop));
2256 entry = loop_preheader_edge (loop);
2257 exit = single_dom_exit (loop);
2259 eliminate_local_variables (entry, exit);
2260 /* In the old loop, move all variables non-local to the loop to a structure
2261 and back, and create separate decls for the variables used in loop. */
2262 separate_decls_in_region (entry, exit, reduction_list, &arg_struct,
2263 &new_arg_struct, &clsn_data);
2265 /* Create the parallel constructs. */
2266 loc = UNKNOWN_LOCATION;
2267 cond_stmt = last_stmt (loop->header);
2268 if (cond_stmt)
2269 loc = gimple_location (cond_stmt);
2270 create_parallel_loop (loop, create_loop_fn (loc), arg_struct,
2271 new_arg_struct, n_threads, loc);
2272 if (reduction_list->elements () > 0)
2273 create_call_for_reduction (loop, reduction_list, &clsn_data);
2275 scev_reset ();
2277 /* Cancel the loop (it is simpler to do it here rather than to teach the
2278 expander to do it). */
2279 cancel_loop_tree (loop);
2281 /* Free loop bound estimations that could contain references to
2282 removed statements. */
2283 FOR_EACH_LOOP (loop, 0)
2284 free_numbers_of_iterations_estimates_loop (loop);
2287 /* Returns true when LOOP contains vector phi nodes. */
2289 static bool
2290 loop_has_vector_phi_nodes (struct loop *loop ATTRIBUTE_UNUSED)
2292 unsigned i;
2293 basic_block *bbs = get_loop_body_in_dom_order (loop);
2294 gphi_iterator gsi;
2295 bool res = true;
2297 for (i = 0; i < loop->num_nodes; i++)
2298 for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
2299 if (TREE_CODE (TREE_TYPE (PHI_RESULT (gsi.phi ()))) == VECTOR_TYPE)
2300 goto end;
2302 res = false;
2303 end:
2304 free (bbs);
2305 return res;
2308 /* Create a reduction_info struct, initialize it with REDUC_STMT
2309 and PHI, insert it to the REDUCTION_LIST. */
2311 static void
2312 build_new_reduction (reduction_info_table_type *reduction_list,
2313 gimple reduc_stmt, gphi *phi)
2315 reduction_info **slot;
2316 struct reduction_info *new_reduction;
2318 gcc_assert (reduc_stmt);
2320 if (dump_file && (dump_flags & TDF_DETAILS))
2322 fprintf (dump_file,
2323 "Detected reduction. reduction stmt is: \n");
2324 print_gimple_stmt (dump_file, reduc_stmt, 0, 0);
2325 fprintf (dump_file, "\n");
2328 new_reduction = XCNEW (struct reduction_info);
2330 new_reduction->reduc_stmt = reduc_stmt;
2331 new_reduction->reduc_phi = phi;
2332 new_reduction->reduc_version = SSA_NAME_VERSION (gimple_phi_result (phi));
2333 new_reduction->reduction_code = gimple_assign_rhs_code (reduc_stmt);
2334 slot = reduction_list->find_slot (new_reduction, INSERT);
2335 *slot = new_reduction;
2338 /* Callback for htab_traverse. Sets gimple_uid of reduc_phi stmts. */
2341 set_reduc_phi_uids (reduction_info **slot, void *data ATTRIBUTE_UNUSED)
2343 struct reduction_info *const red = *slot;
2344 gimple_set_uid (red->reduc_phi, red->reduc_version);
2345 return 1;
2348 /* Detect all reductions in the LOOP, insert them into REDUCTION_LIST. */
2350 static void
2351 gather_scalar_reductions (loop_p loop, reduction_info_table_type *reduction_list)
2353 gphi_iterator gsi;
2354 loop_vec_info simple_loop_info;
2356 simple_loop_info = vect_analyze_loop_form (loop);
2358 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
2360 gphi *phi = gsi.phi ();
2361 affine_iv iv;
2362 tree res = PHI_RESULT (phi);
2363 bool double_reduc;
2365 if (virtual_operand_p (res))
2366 continue;
2368 if (!simple_iv (loop, loop, res, &iv, true)
2369 && simple_loop_info)
2371 gimple reduc_stmt = vect_force_simple_reduction (simple_loop_info,
2372 phi, true,
2373 &double_reduc);
2374 if (reduc_stmt && !double_reduc)
2375 build_new_reduction (reduction_list, reduc_stmt, phi);
2378 destroy_loop_vec_info (simple_loop_info, true);
2380 /* As gimple_uid is used by the vectorizer in between vect_analyze_loop_form
2381 and destroy_loop_vec_info, we can set gimple_uid of reduc_phi stmts
2382 only now. */
2383 reduction_list->traverse <void *, set_reduc_phi_uids> (NULL);
2386 /* Try to initialize NITER for code generation part. */
2388 static bool
2389 try_get_loop_niter (loop_p loop, struct tree_niter_desc *niter)
2391 edge exit = single_dom_exit (loop);
2393 gcc_assert (exit);
2395 /* We need to know # of iterations, and there should be no uses of values
2396 defined inside loop outside of it, unless the values are invariants of
2397 the loop. */
2398 if (!number_of_iterations_exit (loop, exit, niter, false))
2400 if (dump_file && (dump_flags & TDF_DETAILS))
2401 fprintf (dump_file, " FAILED: number of iterations not known\n");
2402 return false;
2405 return true;
2408 /* Try to initialize REDUCTION_LIST for code generation part.
2409 REDUCTION_LIST describes the reductions. */
2411 static bool
2412 try_create_reduction_list (loop_p loop,
2413 reduction_info_table_type *reduction_list)
2415 edge exit = single_dom_exit (loop);
2416 gphi_iterator gsi;
2418 gcc_assert (exit);
2420 gather_scalar_reductions (loop, reduction_list);
2423 for (gsi = gsi_start_phis (exit->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2425 gphi *phi = gsi.phi ();
2426 struct reduction_info *red;
2427 imm_use_iterator imm_iter;
2428 use_operand_p use_p;
2429 gimple reduc_phi;
2430 tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2432 if (!virtual_operand_p (val))
2434 if (dump_file && (dump_flags & TDF_DETAILS))
2436 fprintf (dump_file, "phi is ");
2437 print_gimple_stmt (dump_file, phi, 0, 0);
2438 fprintf (dump_file, "arg of phi to exit: value ");
2439 print_generic_expr (dump_file, val, 0);
2440 fprintf (dump_file, " used outside loop\n");
2441 fprintf (dump_file,
2442 " checking if it a part of reduction pattern: \n");
2444 if (reduction_list->elements () == 0)
2446 if (dump_file && (dump_flags & TDF_DETAILS))
2447 fprintf (dump_file,
2448 " FAILED: it is not a part of reduction.\n");
2449 return false;
2451 reduc_phi = NULL;
2452 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, val)
2454 if (!gimple_debug_bind_p (USE_STMT (use_p))
2455 && flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
2457 reduc_phi = USE_STMT (use_p);
2458 break;
2461 red = reduction_phi (reduction_list, reduc_phi);
2462 if (red == NULL)
2464 if (dump_file && (dump_flags & TDF_DETAILS))
2465 fprintf (dump_file,
2466 " FAILED: it is not a part of reduction.\n");
2467 return false;
2469 if (dump_file && (dump_flags & TDF_DETAILS))
2471 fprintf (dump_file, "reduction phi is ");
2472 print_gimple_stmt (dump_file, red->reduc_phi, 0, 0);
2473 fprintf (dump_file, "reduction stmt is ");
2474 print_gimple_stmt (dump_file, red->reduc_stmt, 0, 0);
2479 /* The iterations of the loop may communicate only through bivs whose
2480 iteration space can be distributed efficiently. */
2481 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
2483 gphi *phi = gsi.phi ();
2484 tree def = PHI_RESULT (phi);
2485 affine_iv iv;
2487 if (!virtual_operand_p (def) && !simple_iv (loop, loop, def, &iv, true))
2489 struct reduction_info *red;
2491 red = reduction_phi (reduction_list, phi);
2492 if (red == NULL)
2494 if (dump_file && (dump_flags & TDF_DETAILS))
2495 fprintf (dump_file,
2496 " FAILED: scalar dependency between iterations\n");
2497 return false;
2503 return true;
2506 /* Detect parallel loops and generate parallel code using libgomp
2507 primitives. Returns true if some loop was parallelized, false
2508 otherwise. */
2510 static bool
2511 parallelize_loops (void)
2513 unsigned n_threads = flag_tree_parallelize_loops;
2514 bool changed = false;
2515 struct loop *loop;
2516 struct tree_niter_desc niter_desc;
2517 struct obstack parloop_obstack;
2518 HOST_WIDE_INT estimated;
2519 source_location loop_loc;
2521 /* Do not parallelize loops in the functions created by parallelization. */
2522 if (parallelized_function_p (cfun->decl))
2523 return false;
2524 if (cfun->has_nonlocal_label)
2525 return false;
2527 gcc_obstack_init (&parloop_obstack);
2528 reduction_info_table_type reduction_list (10);
2529 init_stmt_vec_info_vec ();
2531 FOR_EACH_LOOP (loop, 0)
2533 reduction_list.empty ();
2534 if (dump_file && (dump_flags & TDF_DETAILS))
2536 fprintf (dump_file, "Trying loop %d as candidate\n",loop->num);
2537 if (loop->inner)
2538 fprintf (dump_file, "loop %d is not innermost\n",loop->num);
2539 else
2540 fprintf (dump_file, "loop %d is innermost\n",loop->num);
2543 /* If we use autopar in graphite pass, we use its marked dependency
2544 checking results. */
2545 if (flag_loop_parallelize_all && !loop->can_be_parallel)
2547 if (dump_file && (dump_flags & TDF_DETAILS))
2548 fprintf (dump_file, "loop is not parallel according to graphite\n");
2549 continue;
2552 if (!single_dom_exit (loop))
2555 if (dump_file && (dump_flags & TDF_DETAILS))
2556 fprintf (dump_file, "loop is !single_dom_exit\n");
2558 continue;
2561 if (/* And of course, the loop must be parallelizable. */
2562 !can_duplicate_loop_p (loop)
2563 || loop_has_blocks_with_irreducible_flag (loop)
2564 || (loop_preheader_edge (loop)->src->flags & BB_IRREDUCIBLE_LOOP)
2565 /* FIXME: the check for vector phi nodes could be removed. */
2566 || loop_has_vector_phi_nodes (loop))
2567 continue;
2569 estimated = estimated_stmt_executions_int (loop);
2570 if (estimated == -1)
2571 estimated = max_stmt_executions_int (loop);
2572 /* FIXME: Bypass this check as graphite doesn't update the
2573 count and frequency correctly now. */
2574 if (!flag_loop_parallelize_all
2575 && ((estimated != -1
2576 && estimated <= (HOST_WIDE_INT) n_threads * MIN_PER_THREAD)
2577 /* Do not bother with loops in cold areas. */
2578 || optimize_loop_nest_for_size_p (loop)))
2579 continue;
2581 if (!try_get_loop_niter (loop, &niter_desc))
2582 continue;
2584 if (!try_create_reduction_list (loop, &reduction_list))
2585 continue;
2587 if (!flag_loop_parallelize_all
2588 && !loop_parallel_p (loop, &parloop_obstack))
2589 continue;
2591 changed = true;
2592 if (dump_file && (dump_flags & TDF_DETAILS))
2594 if (loop->inner)
2595 fprintf (dump_file, "parallelizing outer loop %d\n",loop->header->index);
2596 else
2597 fprintf (dump_file, "parallelizing inner loop %d\n",loop->header->index);
2598 loop_loc = find_loop_location (loop);
2599 if (loop_loc != UNKNOWN_LOCATION)
2600 fprintf (dump_file, "\nloop at %s:%d: ",
2601 LOCATION_FILE (loop_loc), LOCATION_LINE (loop_loc));
2603 gen_parallel_loop (loop, &reduction_list,
2604 n_threads, &niter_desc);
2607 free_stmt_vec_info_vec ();
2608 obstack_free (&parloop_obstack, NULL);
2610 /* Parallelization will cause new function calls to be inserted through
2611 which local variables will escape. Reset the points-to solution
2612 for ESCAPED. */
2613 if (changed)
2614 pt_solution_reset (&cfun->gimple_df->escaped);
2616 return changed;
2619 /* Parallelization. */
2621 namespace {
2623 const pass_data pass_data_parallelize_loops =
2625 GIMPLE_PASS, /* type */
2626 "parloops", /* name */
2627 OPTGROUP_LOOP, /* optinfo_flags */
2628 TV_TREE_PARALLELIZE_LOOPS, /* tv_id */
2629 ( PROP_cfg | PROP_ssa ), /* properties_required */
2630 0, /* properties_provided */
2631 0, /* properties_destroyed */
2632 0, /* todo_flags_start */
2633 0, /* todo_flags_finish */
2636 class pass_parallelize_loops : public gimple_opt_pass
2638 public:
2639 pass_parallelize_loops (gcc::context *ctxt)
2640 : gimple_opt_pass (pass_data_parallelize_loops, ctxt)
2643 /* opt_pass methods: */
2644 virtual bool gate (function *) { return flag_tree_parallelize_loops > 1; }
2645 virtual unsigned int execute (function *);
2647 }; // class pass_parallelize_loops
2649 unsigned
2650 pass_parallelize_loops::execute (function *fun)
2652 if (number_of_loops (fun) <= 1)
2653 return 0;
2655 if (parallelize_loops ())
2657 fun->curr_properties &= ~(PROP_gimple_eomp);
2658 return TODO_update_ssa;
2661 return 0;
2664 } // anon namespace
2666 gimple_opt_pass *
2667 make_pass_parallelize_loops (gcc::context *ctxt)
2669 return new pass_parallelize_loops (ctxt);