mips.c (mips16_copy_fpr_return_value): New function, split out from...
[official-gcc.git] / gcc / tree-parloops.c
blobaca2b74c31a0d43307b1e149506f6aab23e0716c
1 /* Loop autoparallelization.
2 Copyright (C) 2006 Free Software Foundation, Inc.
3 Contributed by Sebastian Pop <pop@cri.ensmp.fr> and
4 Zdenek Dvorak <dvorakz@suse.cz>.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 2, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING. If not, write to the Free
20 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
21 02110-1301, USA. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "tree.h"
28 #include "rtl.h"
29 #include "tree-flow.h"
30 #include "cfgloop.h"
31 #include "ggc.h"
32 #include "tree-data-ref.h"
33 #include "diagnostic.h"
34 #include "tree-pass.h"
35 #include "tree-scalar-evolution.h"
36 #include "hashtab.h"
37 #include "langhooks.h"
39 /* This pass tries to distribute iterations of loops into several threads.
40 The implementation is straightforward -- for each loop we test whether its
41 iterations are independent, and if it is the case (and some additional
42 conditions regarding profitability and correctness are satisfied), we
43 add OMP_PARALLEL and OMP_FOR codes and let omp expansion machinery do
44 its job.
46 The most of the complexity is in bringing the code into shape expected
47 by the omp expanders:
48 -- for OMP_FOR, ensuring that the loop has only one induction variable
49 and that the exit test is at the start of the loop body
50 -- for OMP_PARALLEL, replacing the references to local addressable
51 variables by accesses through pointers, and breaking up ssa chains
52 by storing the values incoming to the parallelized loop to a structure
53 passed to the new function as an argument (something similar is done
54 in omp gimplification, unfortunately only a small part of the code
55 can be shared).
57 TODO:
58 -- if there are several parallelizable loops in a function, it may be
59 possible to generate the threads just once (using synchronization to
60 ensure that cross-loop dependences are obeyed).
61 -- handling of common scalar dependence patterns (accumulation, ...)
62 -- handling of non-innermost loops */
64 /* Minimal number of iterations of a loop that should be executed in each
65 thread. */
66 #define MIN_PER_THREAD 100
68 /* Element of hashtable of names to copy. */
70 struct name_to_copy_elt
72 unsigned version; /* The version of the name to copy. */
73 tree new_name; /* The new name used in the copy. */
74 tree field; /* The field of the structure used to pass the
75 value. */
78 /* Equality and hash functions for hashtab code. */
80 static int
81 name_to_copy_elt_eq (const void *aa, const void *bb)
83 struct name_to_copy_elt *a = (struct name_to_copy_elt *) aa;
84 struct name_to_copy_elt *b = (struct name_to_copy_elt *) bb;
86 return a->version == b->version;
89 static hashval_t
90 name_to_copy_elt_hash (const void *aa)
92 struct name_to_copy_elt *a = (struct name_to_copy_elt *) aa;
94 return (hashval_t) a->version;
97 /* Returns true if the iterations of LOOP are independent on each other (that
98 is, if we can execute them in parallel), and if LOOP satisfies other
99 conditions that we need to be able to parallelize it. Description of number
100 of iterations is stored to NITER. */
102 static bool
103 loop_parallel_p (struct loop *loop, struct tree_niter_desc *niter)
105 edge exit = single_dom_exit (loop);
106 VEC (ddr_p, heap) *dependence_relations;
107 VEC (data_reference_p, heap) *datarefs;
108 lambda_trans_matrix trans;
109 bool ret = false;
110 tree phi;
112 /* Only consider innermost loops with just one exit. The innermost-loop
113 restriction is not necessary, but it makes things simpler. */
114 if (loop->inner || !exit)
115 return false;
117 if (dump_file && (dump_flags & TDF_DETAILS))
118 fprintf (dump_file, "\nConsidering loop %d\n", loop->num);
120 /* We need to know # of iterations, and there should be no uses of values
121 defined inside loop outside of it, unless the values are invariants of
122 the loop. */
123 if (!number_of_iterations_exit (loop, exit, niter, false))
125 if (dump_file && (dump_flags & TDF_DETAILS))
126 fprintf (dump_file, " FAILED: number of iterations not known\n");
127 return false;
130 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
132 tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
134 if (is_gimple_reg (val))
136 if (dump_file && (dump_flags & TDF_DETAILS))
137 fprintf (dump_file, " FAILED: value used outside loop\n");
138 return false;
142 /* The iterations of the loop may communicate only through bivs whose
143 iteration space can be distributed efficiently. */
144 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
146 tree def = PHI_RESULT (phi);
147 affine_iv iv;
149 if (is_gimple_reg (def)
150 && !simple_iv (loop, phi, def, &iv, true))
152 if (dump_file && (dump_flags & TDF_DETAILS))
153 fprintf (dump_file,
154 " FAILED: scalar dependency between iterations\n");
155 return false;
159 /* We need to version the loop to verify assumptions in runtime. */
160 if (!can_duplicate_loop_p (loop))
162 if (dump_file && (dump_flags & TDF_DETAILS))
163 fprintf (dump_file, " FAILED: cannot be duplicated\n");
164 return false;
167 /* Check for problems with dependences. If the loop can be reversed,
168 the iterations are independent. */
169 datarefs = VEC_alloc (data_reference_p, heap, 10);
170 dependence_relations = VEC_alloc (ddr_p, heap, 10 * 10);
171 compute_data_dependences_for_loop (loop, true, &datarefs,
172 &dependence_relations);
173 if (dump_file && (dump_flags & TDF_DETAILS))
174 dump_data_dependence_relations (dump_file, dependence_relations);
176 trans = lambda_trans_matrix_new (1, 1);
177 LTM_MATRIX (trans)[0][0] = -1;
179 if (lambda_transform_legal_p (trans, 1, dependence_relations))
181 ret = true;
182 if (dump_file && (dump_flags & TDF_DETAILS))
183 fprintf (dump_file, " SUCCESS: may be parallelized\n");
185 else if (dump_file && (dump_flags & TDF_DETAILS))
186 fprintf (dump_file, " FAILED: data dependencies exist across iterations\n");
188 free_dependence_relations (dependence_relations);
189 free_data_refs (datarefs);
191 return ret;
194 /* Assigns the address of VAR in TYPE to an ssa name, and returns this name.
195 The assignment statement is placed before LOOP. DECL_ADDRESS maps decls
196 to their addresses that can be reused. */
198 static tree
199 take_address_of (tree var, tree type, struct loop *loop, htab_t decl_address)
201 int uid = DECL_UID (var);
202 void **dslot;
203 struct int_tree_map ielt, *nielt;
204 tree name, bvar, stmt;
205 edge entry = loop_preheader_edge (loop);
207 ielt.uid = uid;
208 dslot = htab_find_slot_with_hash (decl_address, &ielt, uid, INSERT);
209 if (!*dslot)
211 bvar = create_tmp_var (type, get_name (var));
212 add_referenced_var (bvar);
213 stmt = build_gimple_modify_stmt (bvar,
214 fold_convert (type,
215 build_addr (var, current_function_decl)));
216 name = make_ssa_name (bvar, stmt);
217 GIMPLE_STMT_OPERAND (stmt, 0) = name;
218 bsi_insert_on_edge_immediate (entry, stmt);
220 nielt = XNEW (struct int_tree_map);
221 nielt->uid = uid;
222 nielt->to = name;
223 *dslot = nielt;
225 return name;
228 name = ((struct int_tree_map *) *dslot)->to;
229 if (TREE_TYPE (name) == type)
230 return name;
232 bvar = SSA_NAME_VAR (name);
233 stmt = build_gimple_modify_stmt (bvar,
234 fold_convert (type, name));
235 name = make_ssa_name (bvar, stmt);
236 GIMPLE_STMT_OPERAND (stmt, 0) = name;
237 bsi_insert_on_edge_immediate (entry, stmt);
239 return name;
242 /* Eliminates references to local variables in *TP out of LOOP. DECL_ADDRESS
243 contains addresses of the references that had their address taken already.
244 If the expression is changed, CHANGED is set to true. Callback for
245 walk_tree. */
247 struct elv_data
249 struct loop *loop;
250 htab_t decl_address;
251 bool changed;
254 static tree
255 eliminate_local_variables_1 (tree *tp, int *walk_subtrees, void *data)
257 struct elv_data *dta = data;
258 tree t = *tp, var, addr, addr_type, type;
260 if (DECL_P (t))
262 *walk_subtrees = 0;
264 if (!SSA_VAR_P (t) || DECL_EXTERNAL (t))
265 return NULL_TREE;
267 type = TREE_TYPE (t);
268 addr_type = build_pointer_type (type);
269 addr = take_address_of (t, addr_type, dta->loop, dta->decl_address);
270 *tp = build1 (INDIRECT_REF, TREE_TYPE (*tp), addr);
272 dta->changed = true;
273 return NULL_TREE;
276 if (TREE_CODE (t) == ADDR_EXPR)
278 var = TREE_OPERAND (t, 0);
279 if (!DECL_P (var))
280 return NULL_TREE;
282 *walk_subtrees = 0;
283 if (!SSA_VAR_P (var) || DECL_EXTERNAL (var))
284 return NULL_TREE;
286 addr_type = TREE_TYPE (t);
287 addr = take_address_of (var, addr_type, dta->loop, dta->decl_address);
288 *tp = addr;
290 dta->changed = true;
291 return NULL_TREE;
294 if (!EXPR_P (t)
295 && !GIMPLE_STMT_P (t))
296 *walk_subtrees = 0;
298 return NULL_TREE;
301 /* Moves the references to local variables in STMT from LOOP. DECL_ADDRESS
302 contains addresses for the references for that we have already taken
303 them. */
305 static void
306 eliminate_local_variables_stmt (struct loop *loop, tree stmt,
307 htab_t decl_address)
309 struct elv_data dta;
311 dta.loop = loop;
312 dta.decl_address = decl_address;
313 dta.changed = false;
315 walk_tree (&stmt, eliminate_local_variables_1, &dta, NULL);
317 if (dta.changed)
318 update_stmt (stmt);
321 /* Eliminates the references to local variables from LOOP. This includes:
323 1) Taking address of a local variable -- these are moved out of the loop
324 (and temporary variable is created to hold the address if necessary).
325 2) Dereferencing a local variable -- these are replaced with indirect
326 references. */
328 static void
329 eliminate_local_variables (struct loop *loop)
331 basic_block bb, *body = get_loop_body (loop);
332 unsigned i;
333 block_stmt_iterator bsi;
334 htab_t decl_address = htab_create (10, int_tree_map_hash, int_tree_map_eq,
335 free);
337 /* Find and rename the ssa names defined outside of loop. */
338 for (i = 0; i < loop->num_nodes; i++)
340 bb = body[i];
342 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
343 eliminate_local_variables_stmt (loop, bsi_stmt (bsi), decl_address);
346 htab_delete (decl_address);
349 /* If COPY_NAME_P is true, creates and returns a duplicate of NAME.
350 The copies are stored to NAME_COPIES, if NAME was already duplicated,
351 its duplicate stored in NAME_COPIES is returned.
353 Regardless of COPY_NAME_P, the decl used as a base of the ssa name is also
354 duplicated, storing the copies in DECL_COPIES. */
356 static tree
357 separate_decls_in_loop_name (tree name,
358 htab_t name_copies, htab_t decl_copies,
359 bool copy_name_p)
361 tree copy, var, var_copy;
362 unsigned idx, uid, nuid;
363 struct int_tree_map ielt, *nielt;
364 struct name_to_copy_elt elt, *nelt;
365 void **slot, **dslot;
367 if (TREE_CODE (name) != SSA_NAME)
368 return name;
370 idx = SSA_NAME_VERSION (name);
371 elt.version = idx;
372 slot = htab_find_slot_with_hash (name_copies, &elt, idx,
373 copy_name_p ? INSERT : NO_INSERT);
374 if (slot && *slot)
375 return ((struct name_to_copy_elt *) *slot)->new_name;
377 var = SSA_NAME_VAR (name);
378 uid = DECL_UID (var);
379 ielt.uid = uid;
380 dslot = htab_find_slot_with_hash (decl_copies, &ielt, uid, INSERT);
381 if (!*dslot)
383 var_copy = create_tmp_var (TREE_TYPE (var), get_name (var));
384 add_referenced_var (var_copy);
385 nielt = XNEW (struct int_tree_map);
386 nielt->uid = uid;
387 nielt->to = var_copy;
388 *dslot = nielt;
390 /* Ensure that when we meet this decl next time, we won't duplicate
391 it again. */
392 nuid = DECL_UID (var_copy);
393 ielt.uid = nuid;
394 dslot = htab_find_slot_with_hash (decl_copies, &ielt, nuid, INSERT);
395 gcc_assert (!*dslot);
396 nielt = XNEW (struct int_tree_map);
397 nielt->uid = nuid;
398 nielt->to = var_copy;
399 *dslot = nielt;
401 else
402 var_copy = ((struct int_tree_map *) *dslot)->to;
404 if (copy_name_p)
406 copy = duplicate_ssa_name (name, NULL_TREE);
407 nelt = XNEW (struct name_to_copy_elt);
408 nelt->version = idx;
409 nelt->new_name = copy;
410 nelt->field = NULL_TREE;
411 *slot = nelt;
413 else
415 gcc_assert (!slot);
416 copy = name;
419 SSA_NAME_VAR (copy) = var_copy;
420 return copy;
423 /* Finds the ssa names used in STMT that are defined outside of LOOP and
424 replaces such ssa names with their duplicates. The duplicates are stored to
425 NAME_COPIES. Base decls of all ssa names used in STMT
426 (including those defined in LOOP) are replaced with the new temporary
427 variables; the replacement decls are stored in DECL_COPIES. */
429 static void
430 separate_decls_in_loop_stmt (struct loop *loop, tree stmt,
431 htab_t name_copies, htab_t decl_copies)
433 use_operand_p use;
434 def_operand_p def;
435 ssa_op_iter oi;
436 tree name, copy;
437 bool copy_name_p;
439 mark_virtual_ops_for_renaming (stmt);
441 FOR_EACH_PHI_OR_STMT_DEF (def, stmt, oi, SSA_OP_DEF)
443 name = DEF_FROM_PTR (def);
444 gcc_assert (TREE_CODE (name) == SSA_NAME);
445 copy = separate_decls_in_loop_name (name, name_copies, decl_copies,
446 false);
447 gcc_assert (copy == name);
450 FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
452 name = USE_FROM_PTR (use);
453 if (TREE_CODE (name) != SSA_NAME)
454 continue;
456 copy_name_p = expr_invariant_in_loop_p (loop, name);
457 copy = separate_decls_in_loop_name (name, name_copies, decl_copies,
458 copy_name_p);
459 SET_USE (use, copy);
463 /* Callback for htab_traverse. Adds a field corresponding to a ssa name
464 described in SLOT to the type passed in DATA. */
466 static int
467 add_field_for_name (void **slot, void *data)
469 struct name_to_copy_elt *elt = *slot;
470 tree type = data;
471 tree name = ssa_name (elt->version);
472 tree var = SSA_NAME_VAR (name);
473 tree field = build_decl (FIELD_DECL, DECL_NAME (var), TREE_TYPE (var));
475 insert_field_into_struct (type, field);
476 elt->field = field;
477 return 1;
480 /* Callback for htab_traverse. Creates loads to a field of LOAD in LOAD_BB and
481 store to a field of STORE in STORE_BB for the ssa name and its duplicate
482 specified in SLOT. */
484 struct clsn_data
486 tree store;
487 tree load;
489 basic_block store_bb;
490 basic_block load_bb;
493 static int
494 create_loads_and_stores_for_name (void **slot, void *data)
496 struct name_to_copy_elt *elt = *slot;
497 struct clsn_data *clsn_data = data;
498 tree stmt;
499 block_stmt_iterator bsi;
500 tree type = TREE_TYPE (elt->new_name);
501 tree struct_type = TREE_TYPE (TREE_TYPE (clsn_data->load));
502 tree load_struct;
504 bsi = bsi_last (clsn_data->store_bb);
505 stmt = build_gimple_modify_stmt (
506 build3 (COMPONENT_REF, type, clsn_data->store, elt->field,
507 NULL_TREE),
508 ssa_name (elt->version));
509 mark_virtual_ops_for_renaming (stmt);
510 bsi_insert_after (&bsi, stmt, BSI_NEW_STMT);
512 bsi = bsi_last (clsn_data->load_bb);
513 load_struct = fold_build1 (INDIRECT_REF, struct_type, clsn_data->load);
514 stmt = build_gimple_modify_stmt (
515 elt->new_name,
516 build3 (COMPONENT_REF, type, load_struct, elt->field,
517 NULL_TREE));
518 SSA_NAME_DEF_STMT (elt->new_name) = stmt;
519 bsi_insert_after (&bsi, stmt, BSI_NEW_STMT);
521 return 1;
524 /* Moves all the variables used in LOOP and defined outside of it (including
525 the initial values of loop phi nodes, and *PER_THREAD if it is a ssa
526 name) to a structure created for this purpose. The code
528 while (1)
530 use (a);
531 use (b);
534 is transformed this way:
536 bb0:
537 old.a = a;
538 old.b = b;
540 bb1:
541 a' = new->a;
542 b' = new->b;
543 while (1)
545 use (a');
546 use (b');
549 `old' is stored to *ARG_STRUCT and `new' is stored to NEW_ARG_STRUCT. The
550 pointer `new' is intentionally not initialized (the loop will be split to a
551 separate function later, and `new' will be initialized from its arguments).
554 static void
555 separate_decls_in_loop (struct loop *loop, tree *arg_struct,
556 tree *new_arg_struct)
558 basic_block bb1 = split_edge (loop_preheader_edge (loop));
559 basic_block bb0 = single_pred (bb1);
560 htab_t name_copies = htab_create (10, name_to_copy_elt_hash,
561 name_to_copy_elt_eq, free);
562 htab_t decl_copies = htab_create (10, int_tree_map_hash, int_tree_map_eq,
563 free);
564 basic_block bb, *body = get_loop_body (loop);
565 unsigned i;
566 tree phi, type, type_name, nvar;
567 block_stmt_iterator bsi;
568 struct clsn_data clsn_data;
570 /* Find and rename the ssa names defined outside of loop. */
571 for (i = 0; i < loop->num_nodes; i++)
573 bb = body[i];
575 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
576 separate_decls_in_loop_stmt (loop, phi, name_copies, decl_copies);
578 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
579 separate_decls_in_loop_stmt (loop, bsi_stmt (bsi), name_copies,
580 decl_copies);
582 free (body);
584 if (htab_elements (name_copies) == 0)
586 /* It may happen that there is nothing to copy (if there are only
587 loop carried and external variables in the loop). */
588 *arg_struct = NULL;
589 *new_arg_struct = NULL;
591 else
593 /* Create the type for the structure to store the ssa names to. */
594 type = lang_hooks.types.make_type (RECORD_TYPE);
595 type_name = build_decl (TYPE_DECL, create_tmp_var_name (".paral_data"),
596 type);
597 TYPE_NAME (type) = type_name;
599 htab_traverse (name_copies, add_field_for_name, type);
600 layout_type (type);
602 /* Create the loads and stores. */
603 *arg_struct = create_tmp_var (type, ".paral_data_store");
604 add_referenced_var (*arg_struct);
605 nvar = create_tmp_var (build_pointer_type (type), ".paral_data_load");
606 add_referenced_var (nvar);
607 *new_arg_struct = make_ssa_name (nvar, NULL_TREE);
609 clsn_data.store = *arg_struct;
610 clsn_data.load = *new_arg_struct;
611 clsn_data.store_bb = bb0;
612 clsn_data.load_bb = bb1;
613 htab_traverse (name_copies, create_loads_and_stores_for_name,
614 &clsn_data);
617 htab_delete (decl_copies);
618 htab_delete (name_copies);
621 /* Bitmap containing uids of functions created by parallelization. We cannot
622 allocate it from the default obstack, as it must live across compilation
623 of several functions; we make it gc allocated instead. */
625 static GTY(()) bitmap parallelized_functions;
627 /* Returns true if FN was created by create_loop_fn. */
629 static bool
630 parallelized_function_p (tree fn)
632 if (!parallelized_functions || !DECL_ARTIFICIAL (fn))
633 return false;
635 return bitmap_bit_p (parallelized_functions, DECL_UID (fn));
638 /* Creates and returns an empty function that will receive the body of
639 a parallelized loop. */
641 static tree
642 create_loop_fn (void)
644 char buf[100];
645 char *tname;
646 tree decl, type, name, t;
647 struct function *act_cfun = cfun;
648 static unsigned loopfn_num;
650 snprintf (buf, 100, "%s.$loopfn", current_function_name ());
651 ASM_FORMAT_PRIVATE_NAME (tname, buf, loopfn_num++);
652 clean_symbol_name (tname);
653 name = get_identifier (tname);
654 type = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
656 decl = build_decl (FUNCTION_DECL, name, type);
657 if (!parallelized_functions)
658 parallelized_functions = BITMAP_GGC_ALLOC ();
659 bitmap_set_bit (parallelized_functions, DECL_UID (decl));
661 TREE_STATIC (decl) = 1;
662 TREE_USED (decl) = 1;
663 DECL_ARTIFICIAL (decl) = 1;
664 DECL_IGNORED_P (decl) = 0;
665 TREE_PUBLIC (decl) = 0;
666 DECL_UNINLINABLE (decl) = 1;
667 DECL_EXTERNAL (decl) = 0;
668 DECL_CONTEXT (decl) = NULL_TREE;
669 DECL_INITIAL (decl) = make_node (BLOCK);
671 t = build_decl (RESULT_DECL, NULL_TREE, void_type_node);
672 DECL_ARTIFICIAL (t) = 1;
673 DECL_IGNORED_P (t) = 1;
674 DECL_RESULT (decl) = t;
676 t = build_decl (PARM_DECL, get_identifier (".paral_data_param"),
677 ptr_type_node);
678 DECL_ARTIFICIAL (t) = 1;
679 DECL_ARG_TYPE (t) = ptr_type_node;
680 DECL_CONTEXT (t) = decl;
681 TREE_USED (t) = 1;
682 DECL_ARGUMENTS (decl) = t;
684 allocate_struct_function (decl);
686 /* The call to allocate_struct_function clobbers CFUN, so we need to restore
687 it. */
688 cfun = act_cfun;
690 return decl;
693 /* Bases all the induction variables in LOOP on a single induction variable
694 (unsigned with base 0 and step 1), whose final value is compared with
695 NIT. The induction variable is incremented in the loop latch. */
697 static void
698 canonicalize_loop_ivs (struct loop *loop, tree nit)
700 unsigned precision = TYPE_PRECISION (TREE_TYPE (nit));
701 tree phi, prev, res, type, var_before, val, atype, t, next;
702 block_stmt_iterator bsi;
703 bool ok;
704 affine_iv iv;
705 edge exit = single_dom_exit (loop);
707 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
709 res = PHI_RESULT (phi);
711 if (is_gimple_reg (res)
712 && TYPE_PRECISION (TREE_TYPE (res)) > precision)
713 precision = TYPE_PRECISION (TREE_TYPE (res));
716 type = lang_hooks.types.type_for_size (precision, 1);
718 bsi = bsi_last (loop->latch);
719 create_iv (build_int_cst_type (type, 0), build_int_cst (type, 1), NULL_TREE,
720 loop, &bsi, true, &var_before, NULL);
722 bsi = bsi_after_labels (loop->header);
723 prev = NULL;
724 for (phi = phi_nodes (loop->header); phi; phi = next)
726 next = PHI_CHAIN (phi);
727 res = PHI_RESULT (phi);
729 if (!is_gimple_reg (res)
730 || res == var_before)
732 prev = phi;
733 continue;
736 ok = simple_iv (loop, phi, res, &iv, true);
737 gcc_assert (ok);
739 remove_phi_node (phi, prev, false);
741 atype = TREE_TYPE (res);
742 val = fold_build2 (PLUS_EXPR, atype,
743 unshare_expr (iv.base),
744 fold_build2 (MULT_EXPR, atype,
745 unshare_expr (iv.step),
746 fold_convert (atype, var_before)));
747 val = force_gimple_operand_bsi (&bsi, val, false, NULL_TREE, true,
748 BSI_SAME_STMT);
749 t = build_gimple_modify_stmt (res, val);
750 bsi_insert_before (&bsi, t, BSI_SAME_STMT);
751 SSA_NAME_DEF_STMT (res) = t;
754 t = last_stmt (exit->src);
755 /* Make the loop exit if the control condition is not satisfied. */
756 if (exit->flags & EDGE_TRUE_VALUE)
758 edge te, fe;
760 extract_true_false_edges_from_block (exit->src, &te, &fe);
761 te->flags = EDGE_FALSE_VALUE;
762 fe->flags = EDGE_TRUE_VALUE;
764 COND_EXPR_COND (t) = build2 (LT_EXPR, boolean_type_node, var_before, nit);
767 /* Moves the exit condition of LOOP to the beginning of its header, and
768 duplicates the part of the last iteration that gets disabled to the
769 exit of the loop. NIT is the number of iterations of the loop
770 (used to initialize the variables in the duplicated part).
772 TODO: the common case is that latch of the loop is empty and immediatelly
773 follows the loop exit. In this case, it would be better not to copy the
774 body of the loop, but only move the entry of the loop directly before the
775 exit check and increase the number of iterations of the loop by one.
776 This may need some additional preconditioning in case NIT = ~0. */
778 static void
779 transform_to_exit_first_loop (struct loop *loop, tree nit)
781 basic_block *bbs, *nbbs, ex_bb, orig_header;
782 unsigned n;
783 bool ok;
784 edge exit = single_dom_exit (loop), hpred;
785 tree phi, nphi, cond, control, control_name, res, t, cond_stmt;
786 block_stmt_iterator bsi;
788 split_block_after_labels (loop->header);
789 orig_header = single_succ (loop->header);
790 hpred = single_succ_edge (loop->header);
792 cond_stmt = last_stmt (exit->src);
793 cond = COND_EXPR_COND (cond_stmt);
794 control = TREE_OPERAND (cond, 0);
795 gcc_assert (TREE_OPERAND (cond, 1) == nit);
797 /* Make sure that we have phi nodes on exit for all loop header phis
798 (create_parallel_loop requires that). */
799 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
801 res = PHI_RESULT (phi);
802 t = make_ssa_name (SSA_NAME_VAR (res), phi);
803 SET_PHI_RESULT (phi, t);
805 nphi = create_phi_node (res, orig_header);
806 SSA_NAME_DEF_STMT (res) = nphi;
807 add_phi_arg (nphi, t, hpred);
809 if (res == control)
811 TREE_OPERAND (cond, 0) = t;
812 update_stmt (cond_stmt);
813 control = t;
817 bbs = get_loop_body_in_dom_order (loop);
818 for (n = 0; bbs[n] != exit->src; n++)
819 continue;
820 nbbs = XNEWVEC (basic_block, n);
821 ok = tree_duplicate_sese_tail (single_succ_edge (loop->header), exit,
822 bbs + 1, n, nbbs);
823 gcc_assert (ok);
824 free (bbs);
825 ex_bb = nbbs[0];
826 free (nbbs);
828 /* The only gimple reg that should be copied out of the loop is the
829 control variable. */
830 control_name = NULL_TREE;
831 for (phi = phi_nodes (ex_bb); phi; phi = PHI_CHAIN (phi))
833 res = PHI_RESULT (phi);
834 if (!is_gimple_reg (res))
835 continue;
837 gcc_assert (control_name == NULL_TREE
838 && SSA_NAME_VAR (res) == SSA_NAME_VAR (control));
839 control_name = res;
841 gcc_assert (control_name != NULL_TREE);
842 phi = SSA_NAME_DEF_STMT (control_name);
843 remove_phi_node (phi, NULL_TREE, false);
845 /* Initialize the control variable to NIT. */
846 bsi = bsi_after_labels (ex_bb);
847 t = build_gimple_modify_stmt (control_name, nit);
848 bsi_insert_before (&bsi, t, BSI_NEW_STMT);
849 SSA_NAME_DEF_STMT (control_name) = t;
852 /* Create the parallel constructs for LOOP as described in gen_parallel_loop.
853 LOOP_FN and DATA are the arguments of OMP_PARALLEL.
854 NEW_DATA is the variable that should be initialized from the argument
855 of LOOP_FN. N_THREADS is the requested number of threads. Returns the
856 basic block containing OMP_PARALLEL tree. */
858 static basic_block
859 create_parallel_loop (struct loop *loop, tree loop_fn, tree data,
860 tree new_data, unsigned n_threads)
862 block_stmt_iterator bsi;
863 basic_block bb, paral_bb, for_bb, ex_bb;
864 tree t, param, res, for_stmt;
865 tree cvar, cvar_init, initvar, cvar_next, cvar_base, cond, phi, type;
866 edge exit, nexit, guard, end, e;
868 /* Prepare the OMP_PARALLEL statement. */
869 bb = loop_preheader_edge (loop)->src;
870 paral_bb = single_pred (bb);
871 bsi = bsi_last (paral_bb);
873 t = build_omp_clause (OMP_CLAUSE_NUM_THREADS);
874 OMP_CLAUSE_NUM_THREADS_EXPR (t)
875 = build_int_cst (integer_type_node, n_threads);
876 t = build4 (OMP_PARALLEL, void_type_node, NULL_TREE, t,
877 loop_fn, data);
879 bsi_insert_after (&bsi, t, BSI_NEW_STMT);
881 /* Initialize NEW_DATA. */
882 if (data)
884 bsi = bsi_after_labels (bb);
886 param = make_ssa_name (DECL_ARGUMENTS (loop_fn), NULL_TREE);
887 t = build_gimple_modify_stmt (param, build_fold_addr_expr (data));
888 bsi_insert_before (&bsi, t, BSI_SAME_STMT);
889 SSA_NAME_DEF_STMT (param) = t;
891 t = build_gimple_modify_stmt (new_data,
892 fold_convert (TREE_TYPE (new_data), param));
893 bsi_insert_before (&bsi, t, BSI_SAME_STMT);
894 SSA_NAME_DEF_STMT (new_data) = t;
897 /* Emit OMP_RETURN for OMP_PARALLEL. */
898 bb = split_loop_exit_edge (single_dom_exit (loop));
899 bsi = bsi_last (bb);
900 bsi_insert_after (&bsi, make_node (OMP_RETURN), BSI_NEW_STMT);
902 /* Extract data for OMP_FOR. */
903 gcc_assert (loop->header == single_dom_exit (loop)->src);
904 cond = COND_EXPR_COND (last_stmt (loop->header));
906 cvar = TREE_OPERAND (cond, 0);
907 cvar_base = SSA_NAME_VAR (cvar);
908 phi = SSA_NAME_DEF_STMT (cvar);
909 cvar_init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
910 initvar = make_ssa_name (cvar_base, NULL_TREE);
911 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, loop_preheader_edge (loop)),
912 initvar);
913 cvar_next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
915 bsi = bsi_last (loop->latch);
916 gcc_assert (bsi_stmt (bsi) == SSA_NAME_DEF_STMT (cvar_next));
917 bsi_remove (&bsi, true);
919 /* Prepare cfg. */
920 for_bb = split_edge (loop_preheader_edge (loop));
921 ex_bb = split_loop_exit_edge (single_dom_exit (loop));
922 extract_true_false_edges_from_block (loop->header, &nexit, &exit);
923 gcc_assert (exit == single_dom_exit (loop));
925 guard = make_edge (for_bb, ex_bb, 0);
926 single_succ_edge (loop->latch)->flags = 0;
927 end = make_edge (loop->latch, ex_bb, EDGE_FALLTHRU);
928 for (phi = phi_nodes (ex_bb); phi; phi = PHI_CHAIN (phi))
930 res = PHI_RESULT (phi);
931 gcc_assert (!is_gimple_reg (phi));
932 t = SSA_NAME_DEF_STMT (PHI_ARG_DEF_FROM_EDGE (phi, exit));
933 add_phi_arg (phi, PHI_ARG_DEF_FROM_EDGE (t, loop_preheader_edge (loop)),
934 guard);
935 add_phi_arg (phi, PHI_ARG_DEF_FROM_EDGE (t, loop_latch_edge (loop)),
936 end);
938 e = redirect_edge_and_branch (exit, nexit->dest);
939 PENDING_STMT (e) = NULL;
941 /* Emit OMP_FOR. */
942 TREE_OPERAND (cond, 0) = cvar_base;
943 type = TREE_TYPE (cvar);
944 t = build_omp_clause (OMP_CLAUSE_SCHEDULE);
945 OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_STATIC;
947 for_stmt = make_node (OMP_FOR);
948 TREE_TYPE (for_stmt) = void_type_node;
949 OMP_FOR_CLAUSES (for_stmt) = t;
950 OMP_FOR_INIT (for_stmt) = build_gimple_modify_stmt (initvar, cvar_init);
951 OMP_FOR_COND (for_stmt) = cond;
952 OMP_FOR_INCR (for_stmt) = build_gimple_modify_stmt (
953 cvar_base,
954 build2 (PLUS_EXPR, type,
955 cvar_base,
956 build_int_cst (type, 1)));
957 OMP_FOR_BODY (for_stmt) = NULL_TREE;
958 OMP_FOR_PRE_BODY (for_stmt) = NULL_TREE;
960 bsi = bsi_last (for_bb);
961 bsi_insert_after (&bsi, for_stmt, BSI_NEW_STMT);
962 SSA_NAME_DEF_STMT (initvar) = for_stmt;
964 /* Emit OMP_CONTINUE. */
965 bsi = bsi_last (loop->latch);
966 t = build2 (OMP_CONTINUE, void_type_node, cvar_next, cvar);
967 bsi_insert_after (&bsi, t, BSI_NEW_STMT);
968 SSA_NAME_DEF_STMT (cvar_next) = t;
970 /* Emit OMP_RETURN for OMP_FOR. */
971 bsi = bsi_last (ex_bb);
972 bsi_insert_after (&bsi, make_node (OMP_RETURN), BSI_NEW_STMT);
974 return paral_bb;
977 /* Generates code to execute the iterations of LOOP in N_THREADS threads in
978 parallel. NITER describes number of iterations of LOOP. */
980 static void
981 gen_parallel_loop (struct loop *loop, unsigned n_threads,
982 struct tree_niter_desc *niter)
984 struct loop *nloop;
985 tree many_iterations_cond, type, nit;
986 tree stmts, arg_struct, new_arg_struct;
987 basic_block parallel_head;
988 unsigned prob;
990 /* From
992 ---------------------------------------------------------------------
993 loop
995 IV = phi (INIT, IV + STEP)
996 BODY1;
997 if (COND)
998 break;
999 BODY2;
1001 ---------------------------------------------------------------------
1003 with # of iterations NITER (possibly with MAY_BE_ZERO assumption),
1004 we generate the following code:
1006 ---------------------------------------------------------------------
1008 if (MAY_BE_ZERO
1009 || NITER < MIN_PER_THREAD * N_THREADS)
1010 goto original;
1012 BODY1;
1013 store all local loop-invariant variables used in body of the loop to DATA.
1014 OMP_PARALLEL (OMP_CLAUSE_NUM_THREADS (N_THREADS), LOOPFN, DATA);
1015 load the variables from DATA.
1016 OMP_FOR (IV = INIT; COND; IV += STEP) (OMP_CLAUSE_SCHEDULE (static))
1017 BODY2;
1018 BODY1;
1019 OMP_CONTINUE;
1020 OMP_RETURN -- OMP_FOR
1021 OMP_RETURN -- OMP_PARALLEL
1022 goto end;
1024 original:
1025 loop
1027 IV = phi (INIT, IV + STEP)
1028 BODY1;
1029 if (COND)
1030 break;
1031 BODY2;
1034 end:
1038 /* Create two versions of the loop -- in the old one, we know that the
1039 number of iterations is large enough, and we will transform it into the
1040 loop that will be split to loop_fn, the new one will be used for the
1041 remaining iterations. */
1043 type = TREE_TYPE (niter->niter);
1044 nit = force_gimple_operand (unshare_expr (niter->niter), &stmts, true,
1045 NULL_TREE);
1046 if (stmts)
1047 bsi_insert_on_edge_immediate (loop_preheader_edge (loop), stmts);
1049 many_iterations_cond =
1050 fold_build2 (GE_EXPR, boolean_type_node,
1051 nit, build_int_cst (type, MIN_PER_THREAD * n_threads));
1052 many_iterations_cond
1053 = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1054 invert_truthvalue (unshare_expr (niter->may_be_zero)),
1055 many_iterations_cond);
1056 many_iterations_cond
1057 = force_gimple_operand (many_iterations_cond, &stmts,
1058 false, NULL_TREE);
1059 if (stmts)
1060 bsi_insert_on_edge_immediate (loop_preheader_edge (loop), stmts);
1061 if (!is_gimple_condexpr (many_iterations_cond))
1063 many_iterations_cond
1064 = force_gimple_operand (many_iterations_cond, &stmts,
1065 true, NULL_TREE);
1066 if (stmts)
1067 bsi_insert_on_edge_immediate (loop_preheader_edge (loop), stmts);
1070 initialize_original_copy_tables ();
1072 /* We assume that the loop usually iterates a lot. */
1073 prob = 4 * REG_BR_PROB_BASE / 5;
1074 nloop = loop_version (loop, many_iterations_cond, NULL,
1075 prob, prob, REG_BR_PROB_BASE - prob, true);
1076 update_ssa (TODO_update_ssa);
1077 free_original_copy_tables ();
1079 /* Base all the induction variables in LOOP on a single control one. */
1080 canonicalize_loop_ivs (loop, nit);
1082 /* Ensure that the exit condition is the first statement in the loop. */
1083 transform_to_exit_first_loop (loop, nit);
1085 /* Eliminate the references to local variables from the loop. */
1086 eliminate_local_variables (loop);
1088 /* In the old loop, move all variables non-local to the loop to a structure
1089 and back, and create separate decls for the variables used in loop. */
1090 separate_decls_in_loop (loop, &arg_struct, &new_arg_struct);
1092 /* Create the parallel constructs. */
1093 parallel_head = create_parallel_loop (loop, create_loop_fn (), arg_struct,
1094 new_arg_struct, n_threads);
1096 scev_reset ();
1098 /* Cancel the loop (it is simpler to do it here rather than to teach the
1099 expander to do it). */
1100 cancel_loop_tree (loop);
1102 /* Expand the parallel constructs. We do it directly here instead of running
1103 a separate expand_omp pass, since it is more efficient, and less likely to
1104 cause troubles with further analyses not being able to deal with the
1105 OMP trees. */
1106 omp_expand_local (parallel_head);
1109 /* Detect parallel loops and generate parallel code using libgomp
1110 primitives. Returns true if some loop was parallelized, false
1111 otherwise. */
1113 bool
1114 parallelize_loops (void)
1116 unsigned n_threads = flag_tree_parallelize_loops;
1117 bool changed = false;
1118 struct loop *loop;
1119 struct tree_niter_desc niter_desc;
1120 loop_iterator li;
1122 /* Do not parallelize loops in the functions created by parallelization. */
1123 if (parallelized_function_p (cfun->decl))
1124 return false;
1126 FOR_EACH_LOOP (li, loop, 0)
1128 if (/* Do not bother with loops in cold areas. */
1129 !maybe_hot_bb_p (loop->header)
1130 /* Or loops that roll too little. */
1131 || expected_loop_iterations (loop) <= n_threads
1132 /* And of course, the loop must be parallelizable. */
1133 || !can_duplicate_loop_p (loop)
1134 || !loop_parallel_p (loop, &niter_desc))
1135 continue;
1137 changed = true;
1138 gen_parallel_loop (loop, n_threads, &niter_desc);
1139 verify_flow_info ();
1140 verify_dominators (CDI_DOMINATORS);
1141 verify_loop_structure ();
1142 verify_loop_closed_ssa ();
1145 return changed;
1148 #include "gt-tree-parloops.h"