Merge from mainline (168000:168310).
[official-gcc/graphite-test-results.git] / gcc / tree-parloops.c
blob9ece8879e2ef26929ca528a793fb8adccd12fc0e
1 /* Loop autoparallelization.
2 Copyright (C) 2006, 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
4 Contributed by Sebastian Pop <pop@cri.ensmp.fr> and
5 Zdenek Dvorak <dvorakz@suse.cz>.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tree-flow.h"
27 #include "cfgloop.h"
28 #include "tree-data-ref.h"
29 #include "tree-scalar-evolution.h"
30 #include "gimple-pretty-print.h"
31 #include "tree-pass.h"
32 #include "langhooks.h"
33 #include "tree-vectorizer.h"
35 /* This pass tries to distribute iterations of loops into several threads.
36 The implementation is straightforward -- for each loop we test whether its
37 iterations are independent, and if it is the case (and some additional
38 conditions regarding profitability and correctness are satisfied), we
39 add GIMPLE_OMP_PARALLEL and GIMPLE_OMP_FOR codes and let omp expansion
40 machinery do its job.
42 The most of the complexity is in bringing the code into shape expected
43 by the omp expanders:
44 -- for GIMPLE_OMP_FOR, ensuring that the loop has only one induction
45 variable and that the exit test is at the start of the loop body
46 -- for GIMPLE_OMP_PARALLEL, replacing the references to local addressable
47 variables by accesses through pointers, and breaking up ssa chains
48 by storing the values incoming to the parallelized loop to a structure
49 passed to the new function as an argument (something similar is done
50 in omp gimplification, unfortunately only a small part of the code
51 can be shared).
53 TODO:
54 -- if there are several parallelizable loops in a function, it may be
55 possible to generate the threads just once (using synchronization to
56 ensure that cross-loop dependences are obeyed).
57 -- handling of common scalar dependence patterns (accumulation, ...)
58 -- handling of non-innermost loops */
61 Reduction handling:
62 currently we use vect_force_simple_reduction() to detect reduction patterns.
63 The code transformation will be introduced by an example.
66 parloop
68 int sum=1;
70 for (i = 0; i < N; i++)
72 x[i] = i + 3;
73 sum+=x[i];
77 gimple-like code:
78 header_bb:
80 # sum_29 = PHI <sum_11(5), 1(3)>
81 # i_28 = PHI <i_12(5), 0(3)>
82 D.1795_8 = i_28 + 3;
83 x[i_28] = D.1795_8;
84 sum_11 = D.1795_8 + sum_29;
85 i_12 = i_28 + 1;
86 if (N_6(D) > i_12)
87 goto header_bb;
90 exit_bb:
92 # sum_21 = PHI <sum_11(4)>
93 printf (&"%d"[0], sum_21);
96 after reduction transformation (only relevant parts):
98 parloop
101 ....
104 # Storing the initial value given by the user. #
106 .paral_data_store.32.sum.27 = 1;
108 #pragma omp parallel num_threads(4)
110 #pragma omp for schedule(static)
112 # The neutral element corresponding to the particular
113 reduction's operation, e.g. 0 for PLUS_EXPR,
114 1 for MULT_EXPR, etc. replaces the user's initial value. #
116 # sum.27_29 = PHI <sum.27_11, 0>
118 sum.27_11 = D.1827_8 + sum.27_29;
120 GIMPLE_OMP_CONTINUE
122 # Adding this reduction phi is done at create_phi_for_local_result() #
123 # sum.27_56 = PHI <sum.27_11, 0>
124 GIMPLE_OMP_RETURN
126 # Creating the atomic operation is done at
127 create_call_for_reduction_1() #
129 #pragma omp atomic_load
130 D.1839_59 = *&.paral_data_load.33_51->reduction.23;
131 D.1840_60 = sum.27_56 + D.1839_59;
132 #pragma omp atomic_store (D.1840_60);
134 GIMPLE_OMP_RETURN
136 # collecting the result after the join of the threads is done at
137 create_loads_for_reductions().
138 The value computed by the threads is loaded from the
139 shared struct. #
142 .paral_data_load.33_52 = &.paral_data_store.32;
143 sum_37 = .paral_data_load.33_52->sum.27;
144 sum_43 = D.1795_41 + sum_37;
146 exit bb:
147 # sum_21 = PHI <sum_43, sum_26>
148 printf (&"%d"[0], sum_21);
156 /* Minimal number of iterations of a loop that should be executed in each
157 thread. */
158 #define MIN_PER_THREAD 100
160 /* Element of the hashtable, representing a
161 reduction in the current loop. */
162 struct reduction_info
164 gimple reduc_stmt; /* reduction statement. */
165 gimple reduc_phi; /* The phi node defining the reduction. */
166 enum tree_code reduction_code;/* code for the reduction operation. */
167 unsigned reduc_version; /* SSA_NAME_VERSION of original reduc_phi
168 result. */
169 gimple keep_res; /* The PHI_RESULT of this phi is the resulting value
170 of the reduction variable when existing the loop. */
171 tree initial_value; /* The initial value of the reduction var before entering the loop. */
172 tree field; /* the name of the field in the parloop data structure intended for reduction. */
173 tree init; /* reduction initialization value. */
174 gimple new_phi; /* (helper field) Newly created phi node whose result
175 will be passed to the atomic operation. Represents
176 the local result each thread computed for the reduction
177 operation. */
180 /* Equality and hash functions for hashtab code. */
182 static int
183 reduction_info_eq (const void *aa, const void *bb)
185 const struct reduction_info *a = (const struct reduction_info *) aa;
186 const struct reduction_info *b = (const struct reduction_info *) bb;
188 return (a->reduc_phi == b->reduc_phi);
191 static hashval_t
192 reduction_info_hash (const void *aa)
194 const struct reduction_info *a = (const struct reduction_info *) aa;
196 return a->reduc_version;
199 static struct reduction_info *
200 reduction_phi (htab_t reduction_list, gimple phi)
202 struct reduction_info tmpred, *red;
204 if (htab_elements (reduction_list) == 0)
205 return NULL;
207 tmpred.reduc_phi = phi;
208 tmpred.reduc_version = gimple_uid (phi);
209 red = (struct reduction_info *) htab_find (reduction_list, &tmpred);
211 return red;
214 /* Element of hashtable of names to copy. */
216 struct name_to_copy_elt
218 unsigned version; /* The version of the name to copy. */
219 tree new_name; /* The new name used in the copy. */
220 tree field; /* The field of the structure used to pass the
221 value. */
224 /* Equality and hash functions for hashtab code. */
226 static int
227 name_to_copy_elt_eq (const void *aa, const void *bb)
229 const struct name_to_copy_elt *a = (const struct name_to_copy_elt *) aa;
230 const struct name_to_copy_elt *b = (const struct name_to_copy_elt *) bb;
232 return a->version == b->version;
235 static hashval_t
236 name_to_copy_elt_hash (const void *aa)
238 const struct name_to_copy_elt *a = (const struct name_to_copy_elt *) aa;
240 return (hashval_t) a->version;
244 /* Data dependency analysis. Returns true if the iterations of LOOP
245 are independent on each other (that is, if we can execute them
246 in parallel). */
248 static bool
249 loop_parallel_p (struct loop *loop, struct obstack * parloop_obstack)
251 VEC (loop_p, heap) *loop_nest;
252 VEC (ddr_p, heap) *dependence_relations;
253 VEC (data_reference_p, heap) *datarefs;
254 lambda_trans_matrix trans;
255 bool ret = false;
257 if (dump_file && (dump_flags & TDF_DETAILS))
259 fprintf (dump_file, "Considering loop %d\n", loop->num);
260 if (!loop->inner)
261 fprintf (dump_file, "loop is innermost\n");
262 else
263 fprintf (dump_file, "loop NOT innermost\n");
266 /* Check for problems with dependences. If the loop can be reversed,
267 the iterations are independent. */
268 datarefs = VEC_alloc (data_reference_p, heap, 10);
269 dependence_relations = VEC_alloc (ddr_p, heap, 10 * 10);
270 loop_nest = VEC_alloc (loop_p, heap, 3);
271 compute_data_dependences_for_loop (loop, true, &loop_nest, &datarefs,
272 &dependence_relations);
273 if (dump_file && (dump_flags & TDF_DETAILS))
274 dump_data_dependence_relations (dump_file, dependence_relations);
276 trans = lambda_trans_matrix_new (1, 1, parloop_obstack);
277 LTM_MATRIX (trans)[0][0] = -1;
279 if (lambda_transform_legal_p (trans, 1, dependence_relations))
281 ret = true;
282 if (dump_file && (dump_flags & TDF_DETAILS))
283 fprintf (dump_file, " SUCCESS: may be parallelized\n");
285 else if (dump_file && (dump_flags & TDF_DETAILS))
286 fprintf (dump_file,
287 " FAILED: data dependencies exist across iterations\n");
289 VEC_free (loop_p, heap, loop_nest);
290 free_dependence_relations (dependence_relations);
291 free_data_refs (datarefs);
293 return ret;
296 /* Return true when LOOP contains basic blocks marked with the
297 BB_IRREDUCIBLE_LOOP flag. */
299 static inline bool
300 loop_has_blocks_with_irreducible_flag (struct loop *loop)
302 unsigned i;
303 basic_block *bbs = get_loop_body_in_dom_order (loop);
304 bool res = true;
306 for (i = 0; i < loop->num_nodes; i++)
307 if (bbs[i]->flags & BB_IRREDUCIBLE_LOOP)
308 goto end;
310 res = false;
311 end:
312 free (bbs);
313 return res;
316 /* Assigns the address of OBJ in TYPE to an ssa name, and returns this name.
317 The assignment statement is placed on edge ENTRY. DECL_ADDRESS maps decls
318 to their addresses that can be reused. The address of OBJ is known to
319 be invariant in the whole function. Other needed statements are placed
320 right before GSI. */
322 static tree
323 take_address_of (tree obj, tree type, edge entry, htab_t decl_address,
324 gimple_stmt_iterator *gsi)
326 int uid;
327 void **dslot;
328 struct int_tree_map ielt, *nielt;
329 tree *var_p, name, bvar, addr;
330 gimple stmt;
331 gimple_seq stmts;
333 /* Since the address of OBJ is invariant, the trees may be shared.
334 Avoid rewriting unrelated parts of the code. */
335 obj = unshare_expr (obj);
336 for (var_p = &obj;
337 handled_component_p (*var_p);
338 var_p = &TREE_OPERAND (*var_p, 0))
339 continue;
341 /* Canonicalize the access to base on a MEM_REF. */
342 if (DECL_P (*var_p))
343 *var_p = build_simple_mem_ref (build_fold_addr_expr (*var_p));
345 /* Assign a canonical SSA name to the address of the base decl used
346 in the address and share it for all accesses and addresses based
347 on it. */
348 uid = DECL_UID (TREE_OPERAND (TREE_OPERAND (*var_p, 0), 0));
349 ielt.uid = uid;
350 dslot = htab_find_slot_with_hash (decl_address, &ielt, uid, INSERT);
351 if (!*dslot)
353 if (gsi == NULL)
354 return NULL;
355 addr = TREE_OPERAND (*var_p, 0);
356 bvar = create_tmp_var (TREE_TYPE (addr),
357 get_name (TREE_OPERAND
358 (TREE_OPERAND (*var_p, 0), 0)));
359 add_referenced_var (bvar);
360 stmt = gimple_build_assign (bvar, addr);
361 name = make_ssa_name (bvar, stmt);
362 gimple_assign_set_lhs (stmt, name);
363 gsi_insert_on_edge_immediate (entry, stmt);
365 nielt = XNEW (struct int_tree_map);
366 nielt->uid = uid;
367 nielt->to = name;
368 *dslot = nielt;
370 else
371 name = ((struct int_tree_map *) *dslot)->to;
373 /* Express the address in terms of the canonical SSA name. */
374 TREE_OPERAND (*var_p, 0) = name;
375 if (gsi == NULL)
376 return build_fold_addr_expr_with_type (obj, type);
378 name = force_gimple_operand (build_addr (obj, current_function_decl),
379 &stmts, true, NULL_TREE);
380 if (!gimple_seq_empty_p (stmts))
381 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
383 if (!useless_type_conversion_p (type, TREE_TYPE (name)))
385 name = force_gimple_operand (fold_convert (type, name), &stmts, true,
386 NULL_TREE);
387 if (!gimple_seq_empty_p (stmts))
388 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
391 return name;
394 /* Callback for htab_traverse. Create the initialization statement
395 for reduction described in SLOT, and place it at the preheader of
396 the loop described in DATA. */
398 static int
399 initialize_reductions (void **slot, void *data)
401 tree init, c;
402 tree bvar, type, arg;
403 edge e;
405 struct reduction_info *const reduc = (struct reduction_info *) *slot;
406 struct loop *loop = (struct loop *) data;
408 /* Create initialization in preheader:
409 reduction_variable = initialization value of reduction. */
411 /* In the phi node at the header, replace the argument coming
412 from the preheader with the reduction initialization value. */
414 /* Create a new variable to initialize the reduction. */
415 type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
416 bvar = create_tmp_var (type, "reduction");
417 add_referenced_var (bvar);
419 c = build_omp_clause (gimple_location (reduc->reduc_stmt),
420 OMP_CLAUSE_REDUCTION);
421 OMP_CLAUSE_REDUCTION_CODE (c) = reduc->reduction_code;
422 OMP_CLAUSE_DECL (c) = SSA_NAME_VAR (gimple_assign_lhs (reduc->reduc_stmt));
424 init = omp_reduction_init (c, TREE_TYPE (bvar));
425 reduc->init = init;
427 /* Replace the argument representing the initialization value
428 with the initialization value for the reduction (neutral
429 element for the particular operation, e.g. 0 for PLUS_EXPR,
430 1 for MULT_EXPR, etc).
431 Keep the old value in a new variable "reduction_initial",
432 that will be taken in consideration after the parallel
433 computing is done. */
435 e = loop_preheader_edge (loop);
436 arg = PHI_ARG_DEF_FROM_EDGE (reduc->reduc_phi, e);
437 /* Create new variable to hold the initial value. */
439 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE
440 (reduc->reduc_phi, loop_preheader_edge (loop)), init);
441 reduc->initial_value = arg;
442 return 1;
445 struct elv_data
447 struct walk_stmt_info info;
448 edge entry;
449 htab_t decl_address;
450 gimple_stmt_iterator *gsi;
451 bool changed;
452 bool reset;
455 /* Eliminates references to local variables in *TP out of the single
456 entry single exit region starting at DTA->ENTRY.
457 DECL_ADDRESS contains addresses of the references that had their
458 address taken already. If the expression is changed, CHANGED is
459 set to true. Callback for walk_tree. */
461 static tree
462 eliminate_local_variables_1 (tree *tp, int *walk_subtrees, void *data)
464 struct elv_data *const dta = (struct elv_data *) data;
465 tree t = *tp, var, addr, addr_type, type, obj;
467 if (DECL_P (t))
469 *walk_subtrees = 0;
471 if (!SSA_VAR_P (t) || DECL_EXTERNAL (t))
472 return NULL_TREE;
474 type = TREE_TYPE (t);
475 addr_type = build_pointer_type (type);
476 addr = take_address_of (t, addr_type, dta->entry, dta->decl_address,
477 dta->gsi);
478 if (dta->gsi == NULL && addr == NULL_TREE)
480 dta->reset = true;
481 return NULL_TREE;
484 *tp = build_simple_mem_ref (addr);
486 dta->changed = true;
487 return NULL_TREE;
490 if (TREE_CODE (t) == ADDR_EXPR)
492 /* ADDR_EXPR may appear in two contexts:
493 -- as a gimple operand, when the address taken is a function invariant
494 -- as gimple rhs, when the resulting address in not a function
495 invariant
496 We do not need to do anything special in the latter case (the base of
497 the memory reference whose address is taken may be replaced in the
498 DECL_P case). The former case is more complicated, as we need to
499 ensure that the new address is still a gimple operand. Thus, it
500 is not sufficient to replace just the base of the memory reference --
501 we need to move the whole computation of the address out of the
502 loop. */
503 if (!is_gimple_val (t))
504 return NULL_TREE;
506 *walk_subtrees = 0;
507 obj = TREE_OPERAND (t, 0);
508 var = get_base_address (obj);
509 if (!var || !SSA_VAR_P (var) || DECL_EXTERNAL (var))
510 return NULL_TREE;
512 addr_type = TREE_TYPE (t);
513 addr = take_address_of (obj, addr_type, dta->entry, dta->decl_address,
514 dta->gsi);
515 if (dta->gsi == NULL && addr == NULL_TREE)
517 dta->reset = true;
518 return NULL_TREE;
520 *tp = addr;
522 dta->changed = true;
523 return NULL_TREE;
526 if (!EXPR_P (t))
527 *walk_subtrees = 0;
529 return NULL_TREE;
532 /* Moves the references to local variables in STMT at *GSI out of the single
533 entry single exit region starting at ENTRY. DECL_ADDRESS contains
534 addresses of the references that had their address taken
535 already. */
537 static void
538 eliminate_local_variables_stmt (edge entry, gimple_stmt_iterator *gsi,
539 htab_t decl_address)
541 struct elv_data dta;
542 gimple stmt = gsi_stmt (*gsi);
544 memset (&dta.info, '\0', sizeof (dta.info));
545 dta.entry = entry;
546 dta.decl_address = decl_address;
547 dta.changed = false;
548 dta.reset = false;
550 if (gimple_debug_bind_p (stmt))
552 dta.gsi = NULL;
553 walk_tree (gimple_debug_bind_get_value_ptr (stmt),
554 eliminate_local_variables_1, &dta.info, NULL);
555 if (dta.reset)
557 gimple_debug_bind_reset_value (stmt);
558 dta.changed = true;
561 else
563 dta.gsi = gsi;
564 walk_gimple_op (stmt, eliminate_local_variables_1, &dta.info);
567 if (dta.changed)
568 update_stmt (stmt);
571 /* Eliminates the references to local variables from the single entry
572 single exit region between the ENTRY and EXIT edges.
574 This includes:
575 1) Taking address of a local variable -- these are moved out of the
576 region (and temporary variable is created to hold the address if
577 necessary).
579 2) Dereferencing a local variable -- these are replaced with indirect
580 references. */
582 static void
583 eliminate_local_variables (edge entry, edge exit)
585 basic_block bb;
586 VEC (basic_block, heap) *body = VEC_alloc (basic_block, heap, 3);
587 unsigned i;
588 gimple_stmt_iterator gsi;
589 bool has_debug_stmt = false;
590 htab_t decl_address = htab_create (10, int_tree_map_hash, int_tree_map_eq,
591 free);
592 basic_block entry_bb = entry->src;
593 basic_block exit_bb = exit->dest;
595 gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
597 FOR_EACH_VEC_ELT (basic_block, body, i, bb)
598 if (bb != entry_bb && bb != exit_bb)
599 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
600 if (gimple_debug_bind_p (gsi_stmt (gsi)))
601 has_debug_stmt = true;
602 else
603 eliminate_local_variables_stmt (entry, &gsi, decl_address);
605 if (has_debug_stmt)
606 FOR_EACH_VEC_ELT (basic_block, body, i, bb)
607 if (bb != entry_bb && bb != exit_bb)
608 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
609 if (gimple_debug_bind_p (gsi_stmt (gsi)))
610 eliminate_local_variables_stmt (entry, &gsi, decl_address);
612 htab_delete (decl_address);
613 VEC_free (basic_block, heap, body);
616 /* Returns true if expression EXPR is not defined between ENTRY and
617 EXIT, i.e. if all its operands are defined outside of the region. */
619 static bool
620 expr_invariant_in_region_p (edge entry, edge exit, tree expr)
622 basic_block entry_bb = entry->src;
623 basic_block exit_bb = exit->dest;
624 basic_block def_bb;
626 if (is_gimple_min_invariant (expr))
627 return true;
629 if (TREE_CODE (expr) == SSA_NAME)
631 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
632 if (def_bb
633 && dominated_by_p (CDI_DOMINATORS, def_bb, entry_bb)
634 && !dominated_by_p (CDI_DOMINATORS, def_bb, exit_bb))
635 return false;
637 return true;
640 return false;
643 /* If COPY_NAME_P is true, creates and returns a duplicate of NAME.
644 The copies are stored to NAME_COPIES, if NAME was already duplicated,
645 its duplicate stored in NAME_COPIES is returned.
647 Regardless of COPY_NAME_P, the decl used as a base of the ssa name is also
648 duplicated, storing the copies in DECL_COPIES. */
650 static tree
651 separate_decls_in_region_name (tree name,
652 htab_t name_copies, htab_t decl_copies,
653 bool copy_name_p)
655 tree copy, var, var_copy;
656 unsigned idx, uid, nuid;
657 struct int_tree_map ielt, *nielt;
658 struct name_to_copy_elt elt, *nelt;
659 void **slot, **dslot;
661 if (TREE_CODE (name) != SSA_NAME)
662 return name;
664 idx = SSA_NAME_VERSION (name);
665 elt.version = idx;
666 slot = htab_find_slot_with_hash (name_copies, &elt, idx,
667 copy_name_p ? INSERT : NO_INSERT);
668 if (slot && *slot)
669 return ((struct name_to_copy_elt *) *slot)->new_name;
671 var = SSA_NAME_VAR (name);
672 uid = DECL_UID (var);
673 ielt.uid = uid;
674 dslot = htab_find_slot_with_hash (decl_copies, &ielt, uid, INSERT);
675 if (!*dslot)
677 var_copy = create_tmp_var (TREE_TYPE (var), get_name (var));
678 DECL_GIMPLE_REG_P (var_copy) = DECL_GIMPLE_REG_P (var);
679 add_referenced_var (var_copy);
680 nielt = XNEW (struct int_tree_map);
681 nielt->uid = uid;
682 nielt->to = var_copy;
683 *dslot = nielt;
685 /* Ensure that when we meet this decl next time, we won't duplicate
686 it again. */
687 nuid = DECL_UID (var_copy);
688 ielt.uid = nuid;
689 dslot = htab_find_slot_with_hash (decl_copies, &ielt, nuid, INSERT);
690 gcc_assert (!*dslot);
691 nielt = XNEW (struct int_tree_map);
692 nielt->uid = nuid;
693 nielt->to = var_copy;
694 *dslot = nielt;
696 else
697 var_copy = ((struct int_tree_map *) *dslot)->to;
699 if (copy_name_p)
701 copy = duplicate_ssa_name (name, NULL);
702 nelt = XNEW (struct name_to_copy_elt);
703 nelt->version = idx;
704 nelt->new_name = copy;
705 nelt->field = NULL_TREE;
706 *slot = nelt;
708 else
710 gcc_assert (!slot);
711 copy = name;
714 SSA_NAME_VAR (copy) = var_copy;
715 return copy;
718 /* Finds the ssa names used in STMT that are defined outside the
719 region between ENTRY and EXIT and replaces such ssa names with
720 their duplicates. The duplicates are stored to NAME_COPIES. Base
721 decls of all ssa names used in STMT (including those defined in
722 LOOP) are replaced with the new temporary variables; the
723 replacement decls are stored in DECL_COPIES. */
725 static void
726 separate_decls_in_region_stmt (edge entry, edge exit, gimple stmt,
727 htab_t name_copies, htab_t decl_copies)
729 use_operand_p use;
730 def_operand_p def;
731 ssa_op_iter oi;
732 tree name, copy;
733 bool copy_name_p;
735 mark_virtual_ops_for_renaming (stmt);
737 FOR_EACH_PHI_OR_STMT_DEF (def, stmt, oi, SSA_OP_DEF)
739 name = DEF_FROM_PTR (def);
740 gcc_assert (TREE_CODE (name) == SSA_NAME);
741 copy = separate_decls_in_region_name (name, name_copies, decl_copies,
742 false);
743 gcc_assert (copy == name);
746 FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
748 name = USE_FROM_PTR (use);
749 if (TREE_CODE (name) != SSA_NAME)
750 continue;
752 copy_name_p = expr_invariant_in_region_p (entry, exit, name);
753 copy = separate_decls_in_region_name (name, name_copies, decl_copies,
754 copy_name_p);
755 SET_USE (use, copy);
759 /* Finds the ssa names used in STMT that are defined outside the
760 region between ENTRY and EXIT and replaces such ssa names with
761 their duplicates. The duplicates are stored to NAME_COPIES. Base
762 decls of all ssa names used in STMT (including those defined in
763 LOOP) are replaced with the new temporary variables; the
764 replacement decls are stored in DECL_COPIES. */
766 static bool
767 separate_decls_in_region_debug_bind (gimple stmt,
768 htab_t name_copies, htab_t decl_copies)
770 use_operand_p use;
771 ssa_op_iter oi;
772 tree var, name;
773 struct int_tree_map ielt;
774 struct name_to_copy_elt elt;
775 void **slot, **dslot;
777 var = gimple_debug_bind_get_var (stmt);
778 if (TREE_CODE (var) == DEBUG_EXPR_DECL)
779 return true;
780 gcc_assert (DECL_P (var) && SSA_VAR_P (var));
781 ielt.uid = DECL_UID (var);
782 dslot = htab_find_slot_with_hash (decl_copies, &ielt, ielt.uid, NO_INSERT);
783 if (!dslot)
784 return true;
785 gimple_debug_bind_set_var (stmt, ((struct int_tree_map *) *dslot)->to);
787 FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
789 name = USE_FROM_PTR (use);
790 if (TREE_CODE (name) != SSA_NAME)
791 continue;
793 elt.version = SSA_NAME_VERSION (name);
794 slot = htab_find_slot_with_hash (name_copies, &elt, elt.version, NO_INSERT);
795 if (!slot)
797 gimple_debug_bind_reset_value (stmt);
798 update_stmt (stmt);
799 break;
802 SET_USE (use, ((struct name_to_copy_elt *) *slot)->new_name);
805 return false;
808 /* Callback for htab_traverse. Adds a field corresponding to the reduction
809 specified in SLOT. The type is passed in DATA. */
811 static int
812 add_field_for_reduction (void **slot, void *data)
815 struct reduction_info *const red = (struct reduction_info *) *slot;
816 tree const type = (tree) data;
817 tree var = SSA_NAME_VAR (gimple_assign_lhs (red->reduc_stmt));
818 tree field = build_decl (gimple_location (red->reduc_stmt),
819 FIELD_DECL, DECL_NAME (var), TREE_TYPE (var));
821 insert_field_into_struct (type, field);
823 red->field = field;
825 return 1;
828 /* Callback for htab_traverse. Adds a field corresponding to a ssa name
829 described in SLOT. The type is passed in DATA. */
831 static int
832 add_field_for_name (void **slot, void *data)
834 struct name_to_copy_elt *const elt = (struct name_to_copy_elt *) *slot;
835 tree type = (tree) data;
836 tree name = ssa_name (elt->version);
837 tree var = SSA_NAME_VAR (name);
838 tree field = build_decl (DECL_SOURCE_LOCATION (var),
839 FIELD_DECL, DECL_NAME (var), TREE_TYPE (var));
841 insert_field_into_struct (type, field);
842 elt->field = field;
844 return 1;
847 /* Callback for htab_traverse. A local result is the intermediate result
848 computed by a single
849 thread, or the initial value in case no iteration was executed.
850 This function creates a phi node reflecting these values.
851 The phi's result will be stored in NEW_PHI field of the
852 reduction's data structure. */
854 static int
855 create_phi_for_local_result (void **slot, void *data)
857 struct reduction_info *const reduc = (struct reduction_info *) *slot;
858 const struct loop *const loop = (const struct loop *) data;
859 edge e;
860 gimple new_phi;
861 basic_block store_bb;
862 tree local_res;
863 source_location locus;
865 /* STORE_BB is the block where the phi
866 should be stored. It is the destination of the loop exit.
867 (Find the fallthru edge from GIMPLE_OMP_CONTINUE). */
868 store_bb = FALLTHRU_EDGE (loop->latch)->dest;
870 /* STORE_BB has two predecessors. One coming from the loop
871 (the reduction's result is computed at the loop),
872 and another coming from a block preceding the loop,
873 when no iterations
874 are executed (the initial value should be taken). */
875 if (EDGE_PRED (store_bb, 0) == FALLTHRU_EDGE (loop->latch))
876 e = EDGE_PRED (store_bb, 1);
877 else
878 e = EDGE_PRED (store_bb, 0);
879 local_res
880 = make_ssa_name (SSA_NAME_VAR (gimple_assign_lhs (reduc->reduc_stmt)),
881 NULL);
882 locus = gimple_location (reduc->reduc_stmt);
883 new_phi = create_phi_node (local_res, store_bb);
884 SSA_NAME_DEF_STMT (local_res) = new_phi;
885 add_phi_arg (new_phi, reduc->init, e, locus);
886 add_phi_arg (new_phi, gimple_assign_lhs (reduc->reduc_stmt),
887 FALLTHRU_EDGE (loop->latch), locus);
888 reduc->new_phi = new_phi;
890 return 1;
893 struct clsn_data
895 tree store;
896 tree load;
898 basic_block store_bb;
899 basic_block load_bb;
902 /* Callback for htab_traverse. Create an atomic instruction for the
903 reduction described in SLOT.
904 DATA annotates the place in memory the atomic operation relates to,
905 and the basic block it needs to be generated in. */
907 static int
908 create_call_for_reduction_1 (void **slot, void *data)
910 struct reduction_info *const reduc = (struct reduction_info *) *slot;
911 struct clsn_data *const clsn_data = (struct clsn_data *) data;
912 gimple_stmt_iterator gsi;
913 tree type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
914 tree load_struct;
915 basic_block bb;
916 basic_block new_bb;
917 edge e;
918 tree t, addr, ref, x;
919 tree tmp_load, name;
920 gimple load;
922 load_struct = build_simple_mem_ref (clsn_data->load);
923 t = build3 (COMPONENT_REF, type, load_struct, reduc->field, NULL_TREE);
925 addr = build_addr (t, current_function_decl);
927 /* Create phi node. */
928 bb = clsn_data->load_bb;
930 e = split_block (bb, t);
931 new_bb = e->dest;
933 tmp_load = create_tmp_var (TREE_TYPE (TREE_TYPE (addr)), NULL);
934 add_referenced_var (tmp_load);
935 tmp_load = make_ssa_name (tmp_load, NULL);
936 load = gimple_build_omp_atomic_load (tmp_load, addr);
937 SSA_NAME_DEF_STMT (tmp_load) = load;
938 gsi = gsi_start_bb (new_bb);
939 gsi_insert_after (&gsi, load, GSI_NEW_STMT);
941 e = split_block (new_bb, load);
942 new_bb = e->dest;
943 gsi = gsi_start_bb (new_bb);
944 ref = tmp_load;
945 x = fold_build2 (reduc->reduction_code,
946 TREE_TYPE (PHI_RESULT (reduc->new_phi)), ref,
947 PHI_RESULT (reduc->new_phi));
949 name = force_gimple_operand_gsi (&gsi, x, true, NULL_TREE, true,
950 GSI_CONTINUE_LINKING);
952 gsi_insert_after (&gsi, gimple_build_omp_atomic_store (name), GSI_NEW_STMT);
953 return 1;
956 /* Create the atomic operation at the join point of the threads.
957 REDUCTION_LIST describes the reductions in the LOOP.
958 LD_ST_DATA describes the shared data structure where
959 shared data is stored in and loaded from. */
960 static void
961 create_call_for_reduction (struct loop *loop, htab_t reduction_list,
962 struct clsn_data *ld_st_data)
964 htab_traverse (reduction_list, create_phi_for_local_result, loop);
965 /* Find the fallthru edge from GIMPLE_OMP_CONTINUE. */
966 ld_st_data->load_bb = FALLTHRU_EDGE (loop->latch)->dest;
967 htab_traverse (reduction_list, create_call_for_reduction_1, ld_st_data);
970 /* Callback for htab_traverse. Loads the final reduction value at the
971 join point of all threads, and inserts it in the right place. */
973 static int
974 create_loads_for_reductions (void **slot, void *data)
976 struct reduction_info *const red = (struct reduction_info *) *slot;
977 struct clsn_data *const clsn_data = (struct clsn_data *) data;
978 gimple stmt;
979 gimple_stmt_iterator gsi;
980 tree type = TREE_TYPE (gimple_assign_lhs (red->reduc_stmt));
981 tree load_struct;
982 tree name;
983 tree x;
985 gsi = gsi_after_labels (clsn_data->load_bb);
986 load_struct = build_simple_mem_ref (clsn_data->load);
987 load_struct = build3 (COMPONENT_REF, type, load_struct, red->field,
988 NULL_TREE);
990 x = load_struct;
991 name = PHI_RESULT (red->keep_res);
992 stmt = gimple_build_assign (name, x);
993 SSA_NAME_DEF_STMT (name) = stmt;
995 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
997 for (gsi = gsi_start_phis (gimple_bb (red->keep_res));
998 !gsi_end_p (gsi); gsi_next (&gsi))
999 if (gsi_stmt (gsi) == red->keep_res)
1001 remove_phi_node (&gsi, false);
1002 return 1;
1004 gcc_unreachable ();
1007 /* Load the reduction result that was stored in LD_ST_DATA.
1008 REDUCTION_LIST describes the list of reductions that the
1009 loads should be generated for. */
1010 static void
1011 create_final_loads_for_reduction (htab_t reduction_list,
1012 struct clsn_data *ld_st_data)
1014 gimple_stmt_iterator gsi;
1015 tree t;
1016 gimple stmt;
1018 gsi = gsi_after_labels (ld_st_data->load_bb);
1019 t = build_fold_addr_expr (ld_st_data->store);
1020 stmt = gimple_build_assign (ld_st_data->load, t);
1022 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
1023 SSA_NAME_DEF_STMT (ld_st_data->load) = stmt;
1025 htab_traverse (reduction_list, create_loads_for_reductions, ld_st_data);
1029 /* Callback for htab_traverse. Store the neutral value for the
1030 particular reduction's operation, e.g. 0 for PLUS_EXPR,
1031 1 for MULT_EXPR, etc. into the reduction field.
1032 The reduction is specified in SLOT. The store information is
1033 passed in DATA. */
1035 static int
1036 create_stores_for_reduction (void **slot, void *data)
1038 struct reduction_info *const red = (struct reduction_info *) *slot;
1039 struct clsn_data *const clsn_data = (struct clsn_data *) data;
1040 tree t;
1041 gimple stmt;
1042 gimple_stmt_iterator gsi;
1043 tree type = TREE_TYPE (gimple_assign_lhs (red->reduc_stmt));
1045 gsi = gsi_last_bb (clsn_data->store_bb);
1046 t = build3 (COMPONENT_REF, type, clsn_data->store, red->field, NULL_TREE);
1047 stmt = gimple_build_assign (t, red->initial_value);
1048 mark_virtual_ops_for_renaming (stmt);
1049 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1051 return 1;
1054 /* Callback for htab_traverse. Creates loads to a field of LOAD in LOAD_BB and
1055 store to a field of STORE in STORE_BB for the ssa name and its duplicate
1056 specified in SLOT. */
1058 static int
1059 create_loads_and_stores_for_name (void **slot, void *data)
1061 struct name_to_copy_elt *const elt = (struct name_to_copy_elt *) *slot;
1062 struct clsn_data *const clsn_data = (struct clsn_data *) data;
1063 tree t;
1064 gimple stmt;
1065 gimple_stmt_iterator gsi;
1066 tree type = TREE_TYPE (elt->new_name);
1067 tree load_struct;
1069 gsi = gsi_last_bb (clsn_data->store_bb);
1070 t = build3 (COMPONENT_REF, type, clsn_data->store, elt->field, NULL_TREE);
1071 stmt = gimple_build_assign (t, ssa_name (elt->version));
1072 mark_virtual_ops_for_renaming (stmt);
1073 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1075 gsi = gsi_last_bb (clsn_data->load_bb);
1076 load_struct = build_simple_mem_ref (clsn_data->load);
1077 t = build3 (COMPONENT_REF, type, load_struct, elt->field, NULL_TREE);
1078 stmt = gimple_build_assign (elt->new_name, t);
1079 SSA_NAME_DEF_STMT (elt->new_name) = stmt;
1080 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1082 return 1;
1085 /* Moves all the variables used in LOOP and defined outside of it (including
1086 the initial values of loop phi nodes, and *PER_THREAD if it is a ssa
1087 name) to a structure created for this purpose. The code
1089 while (1)
1091 use (a);
1092 use (b);
1095 is transformed this way:
1097 bb0:
1098 old.a = a;
1099 old.b = b;
1101 bb1:
1102 a' = new->a;
1103 b' = new->b;
1104 while (1)
1106 use (a');
1107 use (b');
1110 `old' is stored to *ARG_STRUCT and `new' is stored to NEW_ARG_STRUCT. The
1111 pointer `new' is intentionally not initialized (the loop will be split to a
1112 separate function later, and `new' will be initialized from its arguments).
1113 LD_ST_DATA holds information about the shared data structure used to pass
1114 information among the threads. It is initialized here, and
1115 gen_parallel_loop will pass it to create_call_for_reduction that
1116 needs this information. REDUCTION_LIST describes the reductions
1117 in LOOP. */
1119 static void
1120 separate_decls_in_region (edge entry, edge exit, htab_t reduction_list,
1121 tree *arg_struct, tree *new_arg_struct,
1122 struct clsn_data *ld_st_data)
1125 basic_block bb1 = split_edge (entry);
1126 basic_block bb0 = single_pred (bb1);
1127 htab_t name_copies = htab_create (10, name_to_copy_elt_hash,
1128 name_to_copy_elt_eq, free);
1129 htab_t decl_copies = htab_create (10, int_tree_map_hash, int_tree_map_eq,
1130 free);
1131 unsigned i;
1132 tree type, type_name, nvar;
1133 gimple_stmt_iterator gsi;
1134 struct clsn_data clsn_data;
1135 VEC (basic_block, heap) *body = VEC_alloc (basic_block, heap, 3);
1136 basic_block bb;
1137 basic_block entry_bb = bb1;
1138 basic_block exit_bb = exit->dest;
1139 bool has_debug_stmt = false;
1141 entry = single_succ_edge (entry_bb);
1142 gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
1144 FOR_EACH_VEC_ELT (basic_block, body, i, bb)
1146 if (bb != entry_bb && bb != exit_bb)
1148 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1149 separate_decls_in_region_stmt (entry, exit, gsi_stmt (gsi),
1150 name_copies, decl_copies);
1152 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1154 gimple stmt = gsi_stmt (gsi);
1156 if (is_gimple_debug (stmt))
1157 has_debug_stmt = true;
1158 else
1159 separate_decls_in_region_stmt (entry, exit, stmt,
1160 name_copies, decl_copies);
1165 /* Now process debug bind stmts. We must not create decls while
1166 processing debug stmts, so we defer their processing so as to
1167 make sure we will have debug info for as many variables as
1168 possible (all of those that were dealt with in the loop above),
1169 and discard those for which we know there's nothing we can
1170 do. */
1171 if (has_debug_stmt)
1172 FOR_EACH_VEC_ELT (basic_block, body, i, bb)
1173 if (bb != entry_bb && bb != exit_bb)
1175 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
1177 gimple stmt = gsi_stmt (gsi);
1179 if (gimple_debug_bind_p (stmt))
1181 if (separate_decls_in_region_debug_bind (stmt,
1182 name_copies,
1183 decl_copies))
1185 gsi_remove (&gsi, true);
1186 continue;
1190 gsi_next (&gsi);
1194 VEC_free (basic_block, heap, body);
1196 if (htab_elements (name_copies) == 0 && htab_elements (reduction_list) == 0)
1198 /* It may happen that there is nothing to copy (if there are only
1199 loop carried and external variables in the loop). */
1200 *arg_struct = NULL;
1201 *new_arg_struct = NULL;
1203 else
1205 /* Create the type for the structure to store the ssa names to. */
1206 type = lang_hooks.types.make_type (RECORD_TYPE);
1207 type_name = build_decl (UNKNOWN_LOCATION,
1208 TYPE_DECL, create_tmp_var_name (".paral_data"),
1209 type);
1210 TYPE_NAME (type) = type_name;
1212 htab_traverse (name_copies, add_field_for_name, type);
1213 if (reduction_list && htab_elements (reduction_list) > 0)
1215 /* Create the fields for reductions. */
1216 htab_traverse (reduction_list, add_field_for_reduction,
1217 type);
1219 layout_type (type);
1221 /* Create the loads and stores. */
1222 *arg_struct = create_tmp_var (type, ".paral_data_store");
1223 add_referenced_var (*arg_struct);
1224 nvar = create_tmp_var (build_pointer_type (type), ".paral_data_load");
1225 add_referenced_var (nvar);
1226 *new_arg_struct = make_ssa_name (nvar, NULL);
1228 ld_st_data->store = *arg_struct;
1229 ld_st_data->load = *new_arg_struct;
1230 ld_st_data->store_bb = bb0;
1231 ld_st_data->load_bb = bb1;
1233 htab_traverse (name_copies, create_loads_and_stores_for_name,
1234 ld_st_data);
1236 /* Load the calculation from memory (after the join of the threads). */
1238 if (reduction_list && htab_elements (reduction_list) > 0)
1240 htab_traverse (reduction_list, create_stores_for_reduction,
1241 ld_st_data);
1242 clsn_data.load = make_ssa_name (nvar, NULL);
1243 clsn_data.load_bb = exit->dest;
1244 clsn_data.store = ld_st_data->store;
1245 create_final_loads_for_reduction (reduction_list, &clsn_data);
1249 htab_delete (decl_copies);
1250 htab_delete (name_copies);
1253 /* Bitmap containing uids of functions created by parallelization. We cannot
1254 allocate it from the default obstack, as it must live across compilation
1255 of several functions; we make it gc allocated instead. */
1257 static GTY(()) bitmap parallelized_functions;
1259 /* Returns true if FN was created by create_loop_fn. */
1261 static bool
1262 parallelized_function_p (tree fn)
1264 if (!parallelized_functions || !DECL_ARTIFICIAL (fn))
1265 return false;
1267 return bitmap_bit_p (parallelized_functions, DECL_UID (fn));
1270 /* Creates and returns an empty function that will receive the body of
1271 a parallelized loop. */
1273 static tree
1274 create_loop_fn (location_t loc)
1276 char buf[100];
1277 char *tname;
1278 tree decl, type, name, t;
1279 struct function *act_cfun = cfun;
1280 static unsigned loopfn_num;
1282 snprintf (buf, 100, "%s.$loopfn", current_function_name ());
1283 ASM_FORMAT_PRIVATE_NAME (tname, buf, loopfn_num++);
1284 clean_symbol_name (tname);
1285 name = get_identifier (tname);
1286 type = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
1288 decl = build_decl (loc, FUNCTION_DECL, name, type);
1289 if (!parallelized_functions)
1290 parallelized_functions = BITMAP_GGC_ALLOC ();
1291 bitmap_set_bit (parallelized_functions, DECL_UID (decl));
1293 TREE_STATIC (decl) = 1;
1294 TREE_USED (decl) = 1;
1295 DECL_ARTIFICIAL (decl) = 1;
1296 DECL_IGNORED_P (decl) = 0;
1297 TREE_PUBLIC (decl) = 0;
1298 DECL_UNINLINABLE (decl) = 1;
1299 DECL_EXTERNAL (decl) = 0;
1300 DECL_CONTEXT (decl) = NULL_TREE;
1301 DECL_INITIAL (decl) = make_node (BLOCK);
1303 t = build_decl (loc, RESULT_DECL, NULL_TREE, void_type_node);
1304 DECL_ARTIFICIAL (t) = 1;
1305 DECL_IGNORED_P (t) = 1;
1306 DECL_RESULT (decl) = t;
1308 t = build_decl (loc, PARM_DECL, get_identifier (".paral_data_param"),
1309 ptr_type_node);
1310 DECL_ARTIFICIAL (t) = 1;
1311 DECL_ARG_TYPE (t) = ptr_type_node;
1312 DECL_CONTEXT (t) = decl;
1313 TREE_USED (t) = 1;
1314 DECL_ARGUMENTS (decl) = t;
1316 allocate_struct_function (decl, false);
1318 /* The call to allocate_struct_function clobbers CFUN, so we need to restore
1319 it. */
1320 set_cfun (act_cfun);
1322 return decl;
1325 /* Moves the exit condition of LOOP to the beginning of its header, and
1326 duplicates the part of the last iteration that gets disabled to the
1327 exit of the loop. NIT is the number of iterations of the loop
1328 (used to initialize the variables in the duplicated part).
1330 TODO: the common case is that latch of the loop is empty and immediately
1331 follows the loop exit. In this case, it would be better not to copy the
1332 body of the loop, but only move the entry of the loop directly before the
1333 exit check and increase the number of iterations of the loop by one.
1334 This may need some additional preconditioning in case NIT = ~0.
1335 REDUCTION_LIST describes the reductions in LOOP. */
1337 static void
1338 transform_to_exit_first_loop (struct loop *loop, htab_t reduction_list, tree nit)
1340 basic_block *bbs, *nbbs, ex_bb, orig_header;
1341 unsigned n;
1342 bool ok;
1343 edge exit = single_dom_exit (loop), hpred;
1344 tree control, control_name, res, t;
1345 gimple phi, nphi, cond_stmt, stmt, cond_nit;
1346 gimple_stmt_iterator gsi;
1347 tree nit_1;
1349 split_block_after_labels (loop->header);
1350 orig_header = single_succ (loop->header);
1351 hpred = single_succ_edge (loop->header);
1353 cond_stmt = last_stmt (exit->src);
1354 control = gimple_cond_lhs (cond_stmt);
1355 gcc_assert (gimple_cond_rhs (cond_stmt) == nit);
1357 /* Make sure that we have phi nodes on exit for all loop header phis
1358 (create_parallel_loop requires that). */
1359 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
1361 phi = gsi_stmt (gsi);
1362 res = PHI_RESULT (phi);
1363 t = make_ssa_name (SSA_NAME_VAR (res), phi);
1364 SET_PHI_RESULT (phi, t);
1365 nphi = create_phi_node (res, orig_header);
1366 SSA_NAME_DEF_STMT (res) = nphi;
1367 add_phi_arg (nphi, t, hpred, UNKNOWN_LOCATION);
1369 if (res == control)
1371 gimple_cond_set_lhs (cond_stmt, t);
1372 update_stmt (cond_stmt);
1373 control = t;
1376 bbs = get_loop_body_in_dom_order (loop);
1378 for (n = 0; bbs[n] != loop->latch; n++)
1379 continue;
1380 nbbs = XNEWVEC (basic_block, n);
1381 ok = gimple_duplicate_sese_tail (single_succ_edge (loop->header), exit,
1382 bbs + 1, n, nbbs);
1383 gcc_assert (ok);
1384 free (bbs);
1385 ex_bb = nbbs[0];
1386 free (nbbs);
1388 /* Other than reductions, the only gimple reg that should be copied
1389 out of the loop is the control variable. */
1391 control_name = NULL_TREE;
1392 for (gsi = gsi_start_phis (ex_bb); !gsi_end_p (gsi); )
1394 phi = gsi_stmt (gsi);
1395 res = PHI_RESULT (phi);
1396 if (!is_gimple_reg (res))
1398 gsi_next (&gsi);
1399 continue;
1402 /* Check if it is a part of reduction. If it is,
1403 keep the phi at the reduction's keep_res field. The
1404 PHI_RESULT of this phi is the resulting value of the reduction
1405 variable when exiting the loop. */
1407 exit = single_dom_exit (loop);
1409 if (htab_elements (reduction_list) > 0)
1411 struct reduction_info *red;
1413 tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1414 red = reduction_phi (reduction_list, SSA_NAME_DEF_STMT (val));
1415 if (red)
1417 red->keep_res = phi;
1418 gsi_next (&gsi);
1419 continue;
1422 gcc_assert (control_name == NULL_TREE
1423 && SSA_NAME_VAR (res) == SSA_NAME_VAR (control));
1424 control_name = res;
1425 remove_phi_node (&gsi, false);
1427 gcc_assert (control_name != NULL_TREE);
1429 /* Initialize the control variable to number of iterations
1430 according to the rhs of the exit condition. */
1431 gsi = gsi_after_labels (ex_bb);
1432 cond_nit = last_stmt (exit->src);
1433 nit_1 = gimple_cond_rhs (cond_nit);
1434 nit_1 = force_gimple_operand_gsi (&gsi,
1435 fold_convert (TREE_TYPE (control_name), nit_1),
1436 false, NULL_TREE, false, GSI_SAME_STMT);
1437 stmt = gimple_build_assign (control_name, nit_1);
1438 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
1439 SSA_NAME_DEF_STMT (control_name) = stmt;
1442 /* Create the parallel constructs for LOOP as described in gen_parallel_loop.
1443 LOOP_FN and DATA are the arguments of GIMPLE_OMP_PARALLEL.
1444 NEW_DATA is the variable that should be initialized from the argument
1445 of LOOP_FN. N_THREADS is the requested number of threads. Returns the
1446 basic block containing GIMPLE_OMP_PARALLEL tree. */
1448 static basic_block
1449 create_parallel_loop (struct loop *loop, tree loop_fn, tree data,
1450 tree new_data, unsigned n_threads, location_t loc)
1452 gimple_stmt_iterator gsi;
1453 basic_block bb, paral_bb, for_bb, ex_bb;
1454 tree t, param;
1455 gimple stmt, for_stmt, phi, cond_stmt;
1456 tree cvar, cvar_init, initvar, cvar_next, cvar_base, type;
1457 edge exit, nexit, guard, end, e;
1459 /* Prepare the GIMPLE_OMP_PARALLEL statement. */
1460 bb = loop_preheader_edge (loop)->src;
1461 paral_bb = single_pred (bb);
1462 gsi = gsi_last_bb (paral_bb);
1464 t = build_omp_clause (loc, OMP_CLAUSE_NUM_THREADS);
1465 OMP_CLAUSE_NUM_THREADS_EXPR (t)
1466 = build_int_cst (integer_type_node, n_threads);
1467 stmt = gimple_build_omp_parallel (NULL, t, loop_fn, data);
1468 gimple_set_location (stmt, loc);
1470 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1472 /* Initialize NEW_DATA. */
1473 if (data)
1475 gsi = gsi_after_labels (bb);
1477 param = make_ssa_name (DECL_ARGUMENTS (loop_fn), NULL);
1478 stmt = gimple_build_assign (param, build_fold_addr_expr (data));
1479 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1480 SSA_NAME_DEF_STMT (param) = stmt;
1482 stmt = gimple_build_assign (new_data,
1483 fold_convert (TREE_TYPE (new_data), param));
1484 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1485 SSA_NAME_DEF_STMT (new_data) = stmt;
1488 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_PARALLEL. */
1489 bb = split_loop_exit_edge (single_dom_exit (loop));
1490 gsi = gsi_last_bb (bb);
1491 stmt = gimple_build_omp_return (false);
1492 gimple_set_location (stmt, loc);
1493 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1495 /* Extract data for GIMPLE_OMP_FOR. */
1496 gcc_assert (loop->header == single_dom_exit (loop)->src);
1497 cond_stmt = last_stmt (loop->header);
1499 cvar = gimple_cond_lhs (cond_stmt);
1500 cvar_base = SSA_NAME_VAR (cvar);
1501 phi = SSA_NAME_DEF_STMT (cvar);
1502 cvar_init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1503 initvar = make_ssa_name (cvar_base, NULL);
1504 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, loop_preheader_edge (loop)),
1505 initvar);
1506 cvar_next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1508 gsi = gsi_last_nondebug_bb (loop->latch);
1509 gcc_assert (gsi_stmt (gsi) == SSA_NAME_DEF_STMT (cvar_next));
1510 gsi_remove (&gsi, true);
1512 /* Prepare cfg. */
1513 for_bb = split_edge (loop_preheader_edge (loop));
1514 ex_bb = split_loop_exit_edge (single_dom_exit (loop));
1515 extract_true_false_edges_from_block (loop->header, &nexit, &exit);
1516 gcc_assert (exit == single_dom_exit (loop));
1518 guard = make_edge (for_bb, ex_bb, 0);
1519 single_succ_edge (loop->latch)->flags = 0;
1520 end = make_edge (loop->latch, ex_bb, EDGE_FALLTHRU);
1521 for (gsi = gsi_start_phis (ex_bb); !gsi_end_p (gsi); gsi_next (&gsi))
1523 source_location locus;
1524 tree def;
1525 phi = gsi_stmt (gsi);
1526 stmt = SSA_NAME_DEF_STMT (PHI_ARG_DEF_FROM_EDGE (phi, exit));
1528 def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_preheader_edge (loop));
1529 locus = gimple_phi_arg_location_from_edge (stmt,
1530 loop_preheader_edge (loop));
1531 add_phi_arg (phi, def, guard, locus);
1533 def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (loop));
1534 locus = gimple_phi_arg_location_from_edge (stmt, loop_latch_edge (loop));
1535 add_phi_arg (phi, def, end, locus);
1537 e = redirect_edge_and_branch (exit, nexit->dest);
1538 PENDING_STMT (e) = NULL;
1540 /* Emit GIMPLE_OMP_FOR. */
1541 gimple_cond_set_lhs (cond_stmt, cvar_base);
1542 type = TREE_TYPE (cvar);
1543 t = build_omp_clause (loc, OMP_CLAUSE_SCHEDULE);
1544 OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_STATIC;
1546 for_stmt = gimple_build_omp_for (NULL, t, 1, NULL);
1547 gimple_set_location (for_stmt, loc);
1548 gimple_omp_for_set_index (for_stmt, 0, initvar);
1549 gimple_omp_for_set_initial (for_stmt, 0, cvar_init);
1550 gimple_omp_for_set_final (for_stmt, 0, gimple_cond_rhs (cond_stmt));
1551 gimple_omp_for_set_cond (for_stmt, 0, gimple_cond_code (cond_stmt));
1552 gimple_omp_for_set_incr (for_stmt, 0, build2 (PLUS_EXPR, type,
1553 cvar_base,
1554 build_int_cst (type, 1)));
1556 gsi = gsi_last_bb (for_bb);
1557 gsi_insert_after (&gsi, for_stmt, GSI_NEW_STMT);
1558 SSA_NAME_DEF_STMT (initvar) = for_stmt;
1560 /* Emit GIMPLE_OMP_CONTINUE. */
1561 gsi = gsi_last_bb (loop->latch);
1562 stmt = gimple_build_omp_continue (cvar_next, cvar);
1563 gimple_set_location (stmt, loc);
1564 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1565 SSA_NAME_DEF_STMT (cvar_next) = stmt;
1567 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_FOR. */
1568 gsi = gsi_last_bb (ex_bb);
1569 stmt = gimple_build_omp_return (true);
1570 gimple_set_location (stmt, loc);
1571 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1573 return paral_bb;
1576 /* Generates code to execute the iterations of LOOP in N_THREADS
1577 threads in parallel.
1579 NITER describes number of iterations of LOOP.
1580 REDUCTION_LIST describes the reductions existent in the LOOP. */
1582 static void
1583 gen_parallel_loop (struct loop *loop, htab_t reduction_list,
1584 unsigned n_threads, struct tree_niter_desc *niter)
1586 loop_iterator li;
1587 tree many_iterations_cond, type, nit;
1588 tree arg_struct, new_arg_struct;
1589 gimple_seq stmts;
1590 basic_block parallel_head;
1591 edge entry, exit;
1592 struct clsn_data clsn_data;
1593 unsigned prob;
1594 location_t loc;
1595 gimple cond_stmt;
1597 /* From
1599 ---------------------------------------------------------------------
1600 loop
1602 IV = phi (INIT, IV + STEP)
1603 BODY1;
1604 if (COND)
1605 break;
1606 BODY2;
1608 ---------------------------------------------------------------------
1610 with # of iterations NITER (possibly with MAY_BE_ZERO assumption),
1611 we generate the following code:
1613 ---------------------------------------------------------------------
1615 if (MAY_BE_ZERO
1616 || NITER < MIN_PER_THREAD * N_THREADS)
1617 goto original;
1619 BODY1;
1620 store all local loop-invariant variables used in body of the loop to DATA.
1621 GIMPLE_OMP_PARALLEL (OMP_CLAUSE_NUM_THREADS (N_THREADS), LOOPFN, DATA);
1622 load the variables from DATA.
1623 GIMPLE_OMP_FOR (IV = INIT; COND; IV += STEP) (OMP_CLAUSE_SCHEDULE (static))
1624 BODY2;
1625 BODY1;
1626 GIMPLE_OMP_CONTINUE;
1627 GIMPLE_OMP_RETURN -- GIMPLE_OMP_FOR
1628 GIMPLE_OMP_RETURN -- GIMPLE_OMP_PARALLEL
1629 goto end;
1631 original:
1632 loop
1634 IV = phi (INIT, IV + STEP)
1635 BODY1;
1636 if (COND)
1637 break;
1638 BODY2;
1641 end:
1645 /* Create two versions of the loop -- in the old one, we know that the
1646 number of iterations is large enough, and we will transform it into the
1647 loop that will be split to loop_fn, the new one will be used for the
1648 remaining iterations. */
1650 type = TREE_TYPE (niter->niter);
1651 nit = force_gimple_operand (unshare_expr (niter->niter), &stmts, true,
1652 NULL_TREE);
1653 if (stmts)
1654 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1656 many_iterations_cond =
1657 fold_build2 (GE_EXPR, boolean_type_node,
1658 nit, build_int_cst (type, MIN_PER_THREAD * n_threads));
1659 many_iterations_cond
1660 = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1661 invert_truthvalue (unshare_expr (niter->may_be_zero)),
1662 many_iterations_cond);
1663 many_iterations_cond
1664 = force_gimple_operand (many_iterations_cond, &stmts, false, NULL_TREE);
1665 if (stmts)
1666 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1667 if (!is_gimple_condexpr (many_iterations_cond))
1669 many_iterations_cond
1670 = force_gimple_operand (many_iterations_cond, &stmts,
1671 true, NULL_TREE);
1672 if (stmts)
1673 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1676 initialize_original_copy_tables ();
1678 /* We assume that the loop usually iterates a lot. */
1679 prob = 4 * REG_BR_PROB_BASE / 5;
1680 loop_version (loop, many_iterations_cond, NULL,
1681 prob, prob, REG_BR_PROB_BASE - prob, true);
1682 update_ssa (TODO_update_ssa);
1683 free_original_copy_tables ();
1685 /* Base all the induction variables in LOOP on a single control one. */
1686 canonicalize_loop_ivs (loop, &nit, true);
1688 /* Ensure that the exit condition is the first statement in the loop. */
1689 transform_to_exit_first_loop (loop, reduction_list, nit);
1691 /* Generate initializations for reductions. */
1692 if (htab_elements (reduction_list) > 0)
1693 htab_traverse (reduction_list, initialize_reductions, loop);
1695 /* Eliminate the references to local variables from the loop. */
1696 gcc_assert (single_exit (loop));
1697 entry = loop_preheader_edge (loop);
1698 exit = single_dom_exit (loop);
1700 eliminate_local_variables (entry, exit);
1701 /* In the old loop, move all variables non-local to the loop to a structure
1702 and back, and create separate decls for the variables used in loop. */
1703 separate_decls_in_region (entry, exit, reduction_list, &arg_struct,
1704 &new_arg_struct, &clsn_data);
1706 /* Create the parallel constructs. */
1707 loc = UNKNOWN_LOCATION;
1708 cond_stmt = last_stmt (loop->header);
1709 if (cond_stmt)
1710 loc = gimple_location (cond_stmt);
1711 parallel_head = create_parallel_loop (loop, create_loop_fn (loc), arg_struct,
1712 new_arg_struct, n_threads, loc);
1713 if (htab_elements (reduction_list) > 0)
1714 create_call_for_reduction (loop, reduction_list, &clsn_data);
1716 scev_reset ();
1718 /* Cancel the loop (it is simpler to do it here rather than to teach the
1719 expander to do it). */
1720 cancel_loop_tree (loop);
1722 /* Free loop bound estimations that could contain references to
1723 removed statements. */
1724 FOR_EACH_LOOP (li, loop, 0)
1725 free_numbers_of_iterations_estimates_loop (loop);
1727 /* Expand the parallel constructs. We do it directly here instead of running
1728 a separate expand_omp pass, since it is more efficient, and less likely to
1729 cause troubles with further analyses not being able to deal with the
1730 OMP trees. */
1732 omp_expand_local (parallel_head);
1735 /* Returns true when LOOP contains vector phi nodes. */
1737 static bool
1738 loop_has_vector_phi_nodes (struct loop *loop ATTRIBUTE_UNUSED)
1740 unsigned i;
1741 basic_block *bbs = get_loop_body_in_dom_order (loop);
1742 gimple_stmt_iterator gsi;
1743 bool res = true;
1745 for (i = 0; i < loop->num_nodes; i++)
1746 for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
1747 if (TREE_CODE (TREE_TYPE (PHI_RESULT (gsi_stmt (gsi)))) == VECTOR_TYPE)
1748 goto end;
1750 res = false;
1751 end:
1752 free (bbs);
1753 return res;
1756 /* Create a reduction_info struct, initialize it with REDUC_STMT
1757 and PHI, insert it to the REDUCTION_LIST. */
1759 static void
1760 build_new_reduction (htab_t reduction_list, gimple reduc_stmt, gimple phi)
1762 PTR *slot;
1763 struct reduction_info *new_reduction;
1765 gcc_assert (reduc_stmt);
1767 if (dump_file && (dump_flags & TDF_DETAILS))
1769 fprintf (dump_file,
1770 "Detected reduction. reduction stmt is: \n");
1771 print_gimple_stmt (dump_file, reduc_stmt, 0, 0);
1772 fprintf (dump_file, "\n");
1775 new_reduction = XCNEW (struct reduction_info);
1777 new_reduction->reduc_stmt = reduc_stmt;
1778 new_reduction->reduc_phi = phi;
1779 new_reduction->reduc_version = SSA_NAME_VERSION (gimple_phi_result (phi));
1780 new_reduction->reduction_code = gimple_assign_rhs_code (reduc_stmt);
1781 slot = htab_find_slot (reduction_list, new_reduction, INSERT);
1782 *slot = new_reduction;
1785 /* Callback for htab_traverse. Sets gimple_uid of reduc_phi stmts. */
1787 static int
1788 set_reduc_phi_uids (void **slot, void *data ATTRIBUTE_UNUSED)
1790 struct reduction_info *const red = (struct reduction_info *) *slot;
1791 gimple_set_uid (red->reduc_phi, red->reduc_version);
1792 return 1;
1795 /* Detect all reductions in the LOOP, insert them into REDUCTION_LIST. */
1797 static void
1798 gather_scalar_reductions (loop_p loop, htab_t reduction_list)
1800 gimple_stmt_iterator gsi;
1801 loop_vec_info simple_loop_info;
1803 vect_dump = NULL;
1804 simple_loop_info = vect_analyze_loop_form (loop);
1806 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
1808 gimple phi = gsi_stmt (gsi);
1809 affine_iv iv;
1810 tree res = PHI_RESULT (phi);
1811 bool double_reduc;
1813 if (!is_gimple_reg (res))
1814 continue;
1816 if (!simple_iv (loop, loop, res, &iv, true)
1817 && simple_loop_info)
1819 gimple reduc_stmt = vect_force_simple_reduction (simple_loop_info,
1820 phi, true,
1821 &double_reduc);
1822 if (reduc_stmt && !double_reduc)
1823 build_new_reduction (reduction_list, reduc_stmt, phi);
1826 destroy_loop_vec_info (simple_loop_info, true);
1828 /* As gimple_uid is used by the vectorizer in between vect_analyze_loop_form
1829 and destroy_loop_vec_info, we can set gimple_uid of reduc_phi stmts
1830 only now. */
1831 htab_traverse (reduction_list, set_reduc_phi_uids, NULL);
1834 /* Try to initialize NITER for code generation part. */
1836 static bool
1837 try_get_loop_niter (loop_p loop, struct tree_niter_desc *niter)
1839 edge exit = single_dom_exit (loop);
1841 gcc_assert (exit);
1843 /* We need to know # of iterations, and there should be no uses of values
1844 defined inside loop outside of it, unless the values are invariants of
1845 the loop. */
1846 if (!number_of_iterations_exit (loop, exit, niter, false))
1848 if (dump_file && (dump_flags & TDF_DETAILS))
1849 fprintf (dump_file, " FAILED: number of iterations not known\n");
1850 return false;
1853 return true;
1856 /* Try to initialize REDUCTION_LIST for code generation part.
1857 REDUCTION_LIST describes the reductions. */
1859 static bool
1860 try_create_reduction_list (loop_p loop, htab_t reduction_list)
1862 edge exit = single_dom_exit (loop);
1863 gimple_stmt_iterator gsi;
1865 gcc_assert (exit);
1867 gather_scalar_reductions (loop, reduction_list);
1870 for (gsi = gsi_start_phis (exit->dest); !gsi_end_p (gsi); gsi_next (&gsi))
1872 gimple phi = gsi_stmt (gsi);
1873 struct reduction_info *red;
1874 imm_use_iterator imm_iter;
1875 use_operand_p use_p;
1876 gimple reduc_phi;
1877 tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1879 if (is_gimple_reg (val))
1881 if (dump_file && (dump_flags & TDF_DETAILS))
1883 fprintf (dump_file, "phi is ");
1884 print_gimple_stmt (dump_file, phi, 0, 0);
1885 fprintf (dump_file, "arg of phi to exit: value ");
1886 print_generic_expr (dump_file, val, 0);
1887 fprintf (dump_file, " used outside loop\n");
1888 fprintf (dump_file,
1889 " checking if it a part of reduction pattern: \n");
1891 if (htab_elements (reduction_list) == 0)
1893 if (dump_file && (dump_flags & TDF_DETAILS))
1894 fprintf (dump_file,
1895 " FAILED: it is not a part of reduction.\n");
1896 return false;
1898 reduc_phi = NULL;
1899 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, val)
1901 if (!gimple_debug_bind_p (USE_STMT (use_p))
1902 && flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
1904 reduc_phi = USE_STMT (use_p);
1905 break;
1908 red = reduction_phi (reduction_list, reduc_phi);
1909 if (red == NULL)
1911 if (dump_file && (dump_flags & TDF_DETAILS))
1912 fprintf (dump_file,
1913 " FAILED: it is not a part of reduction.\n");
1914 return false;
1916 if (dump_file && (dump_flags & TDF_DETAILS))
1918 fprintf (dump_file, "reduction phi is ");
1919 print_gimple_stmt (dump_file, red->reduc_phi, 0, 0);
1920 fprintf (dump_file, "reduction stmt is ");
1921 print_gimple_stmt (dump_file, red->reduc_stmt, 0, 0);
1926 /* The iterations of the loop may communicate only through bivs whose
1927 iteration space can be distributed efficiently. */
1928 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
1930 gimple phi = gsi_stmt (gsi);
1931 tree def = PHI_RESULT (phi);
1932 affine_iv iv;
1934 if (is_gimple_reg (def) && !simple_iv (loop, loop, def, &iv, true))
1936 struct reduction_info *red;
1938 red = reduction_phi (reduction_list, phi);
1939 if (red == NULL)
1941 if (dump_file && (dump_flags & TDF_DETAILS))
1942 fprintf (dump_file,
1943 " FAILED: scalar dependency between iterations\n");
1944 return false;
1950 return true;
1953 /* Detect parallel loops and generate parallel code using libgomp
1954 primitives. Returns true if some loop was parallelized, false
1955 otherwise. */
1957 bool
1958 parallelize_loops (void)
1960 unsigned n_threads = flag_tree_parallelize_loops;
1961 bool changed = false;
1962 struct loop *loop;
1963 struct tree_niter_desc niter_desc;
1964 loop_iterator li;
1965 htab_t reduction_list;
1966 struct obstack parloop_obstack;
1967 HOST_WIDE_INT estimated;
1968 LOC loop_loc;
1970 /* Do not parallelize loops in the functions created by parallelization. */
1971 if (parallelized_function_p (cfun->decl))
1972 return false;
1973 if (cfun->has_nonlocal_label)
1974 return false;
1976 gcc_obstack_init (&parloop_obstack);
1977 reduction_list = htab_create (10, reduction_info_hash,
1978 reduction_info_eq, free);
1979 init_stmt_vec_info_vec ();
1981 FOR_EACH_LOOP (li, loop, 0)
1983 htab_empty (reduction_list);
1984 if (dump_file && (dump_flags & TDF_DETAILS))
1986 fprintf (dump_file, "Trying loop %d as candidate\n",loop->num);
1987 if (loop->inner)
1988 fprintf (dump_file, "loop %d is not innermost\n",loop->num);
1989 else
1990 fprintf (dump_file, "loop %d is innermost\n",loop->num);
1993 /* If we use autopar in graphite pass, we use its marked dependency
1994 checking results. */
1995 if (flag_loop_parallelize_all && !loop->can_be_parallel)
1997 if (dump_file && (dump_flags & TDF_DETAILS))
1998 fprintf (dump_file, "loop is not parallel according to graphite\n");
1999 continue;
2002 if (!single_dom_exit (loop))
2005 if (dump_file && (dump_flags & TDF_DETAILS))
2006 fprintf (dump_file, "loop is !single_dom_exit\n");
2008 continue;
2011 if (/* And of course, the loop must be parallelizable. */
2012 !can_duplicate_loop_p (loop)
2013 || loop_has_blocks_with_irreducible_flag (loop)
2014 || (loop_preheader_edge (loop)->src->flags & BB_IRREDUCIBLE_LOOP)
2015 /* FIXME: the check for vector phi nodes could be removed. */
2016 || loop_has_vector_phi_nodes (loop))
2017 continue;
2018 estimated = estimated_loop_iterations_int (loop, false);
2019 /* FIXME: Bypass this check as graphite doesn't update the
2020 count and frequency correctly now. */
2021 if (!flag_loop_parallelize_all
2022 && ((estimated !=-1
2023 && estimated <= (HOST_WIDE_INT) n_threads * MIN_PER_THREAD)
2024 /* Do not bother with loops in cold areas. */
2025 || optimize_loop_nest_for_size_p (loop)))
2026 continue;
2028 if (!try_get_loop_niter (loop, &niter_desc))
2029 continue;
2031 if (!try_create_reduction_list (loop, reduction_list))
2032 continue;
2034 if (!flag_loop_parallelize_all
2035 && !loop_parallel_p (loop, &parloop_obstack))
2036 continue;
2038 changed = true;
2039 if (dump_file && (dump_flags & TDF_DETAILS))
2041 if (loop->inner)
2042 fprintf (dump_file, "parallelizing outer loop %d\n",loop->header->index);
2043 else
2044 fprintf (dump_file, "parallelizing inner loop %d\n",loop->header->index);
2045 loop_loc = find_loop_location (loop);
2046 if (loop_loc != UNKNOWN_LOC)
2047 fprintf (dump_file, "\nloop at %s:%d: ",
2048 LOC_FILE (loop_loc), LOC_LINE (loop_loc));
2050 gen_parallel_loop (loop, reduction_list,
2051 n_threads, &niter_desc);
2052 verify_flow_info ();
2053 verify_dominators (CDI_DOMINATORS);
2054 verify_loop_structure ();
2055 verify_loop_closed_ssa (true);
2058 free_stmt_vec_info_vec ();
2059 htab_delete (reduction_list);
2060 obstack_free (&parloop_obstack, NULL);
2062 /* Parallelization will cause new function calls to be inserted through
2063 which local variables will escape. Reset the points-to solution
2064 for ESCAPED. */
2065 if (changed)
2066 pt_solution_reset (&cfun->gimple_df->escaped);
2068 return changed;
2071 #include "gt-tree-parloops.h"