* gimple-walk.h: New File. Relocate prototypes from gimple.h.
[official-gcc.git] / gcc / tree-predcom.c
blobe08e9b6c69730058c6ff99d37d6d1471afba938f
1 /* Predictive commoning.
2 Copyright (C) 2005-2013 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This file implements the predictive commoning optimization. Predictive
21 commoning can be viewed as CSE around a loop, and with some improvements,
22 as generalized strength reduction-- i.e., reusing values computed in
23 earlier iterations of a loop in the later ones. So far, the pass only
24 handles the most useful case, that is, reusing values of memory references.
25 If you think this is all just a special case of PRE, you are sort of right;
26 however, concentrating on loops is simpler, and makes it possible to
27 incorporate data dependence analysis to detect the opportunities, perform
28 loop unrolling to avoid copies together with renaming immediately,
29 and if needed, we could also take register pressure into account.
31 Let us demonstrate what is done on an example:
33 for (i = 0; i < 100; i++)
35 a[i+2] = a[i] + a[i+1];
36 b[10] = b[10] + i;
37 c[i] = c[99 - i];
38 d[i] = d[i + 1];
41 1) We find data references in the loop, and split them to mutually
42 independent groups (i.e., we find components of a data dependence
43 graph). We ignore read-read dependences whose distance is not constant.
44 (TODO -- we could also ignore antidependences). In this example, we
45 find the following groups:
47 a[i]{read}, a[i+1]{read}, a[i+2]{write}
48 b[10]{read}, b[10]{write}
49 c[99 - i]{read}, c[i]{write}
50 d[i + 1]{read}, d[i]{write}
52 2) Inside each of the group, we verify several conditions:
53 a) all the references must differ in indices only, and the indices
54 must all have the same step
55 b) the references must dominate loop latch (and thus, they must be
56 ordered by dominance relation).
57 c) the distance of the indices must be a small multiple of the step
58 We are then able to compute the difference of the references (# of
59 iterations before they point to the same place as the first of them).
60 Also, in case there are writes in the loop, we split the groups into
61 chains whose head is the write whose values are used by the reads in
62 the same chain. The chains are then processed independently,
63 making the further transformations simpler. Also, the shorter chains
64 need the same number of registers, but may require lower unrolling
65 factor in order to get rid of the copies on the loop latch.
67 In our example, we get the following chains (the chain for c is invalid).
69 a[i]{read,+0}, a[i+1]{read,-1}, a[i+2]{write,-2}
70 b[10]{read,+0}, b[10]{write,+0}
71 d[i + 1]{read,+0}, d[i]{write,+1}
73 3) For each read, we determine the read or write whose value it reuses,
74 together with the distance of this reuse. I.e. we take the last
75 reference before it with distance 0, or the last of the references
76 with the smallest positive distance to the read. Then, we remove
77 the references that are not used in any of these chains, discard the
78 empty groups, and propagate all the links so that they point to the
79 single root reference of the chain (adjusting their distance
80 appropriately). Some extra care needs to be taken for references with
81 step 0. In our example (the numbers indicate the distance of the
82 reuse),
84 a[i] --> (*) 2, a[i+1] --> (*) 1, a[i+2] (*)
85 b[10] --> (*) 1, b[10] (*)
87 4) The chains are combined together if possible. If the corresponding
88 elements of two chains are always combined together with the same
89 operator, we remember just the result of this combination, instead
90 of remembering the values separately. We may need to perform
91 reassociation to enable combining, for example
93 e[i] + f[i+1] + e[i+1] + f[i]
95 can be reassociated as
97 (e[i] + f[i]) + (e[i+1] + f[i+1])
99 and we can combine the chains for e and f into one chain.
101 5) For each root reference (end of the chain) R, let N be maximum distance
102 of a reference reusing its value. Variables R0 up to RN are created,
103 together with phi nodes that transfer values from R1 .. RN to
104 R0 .. R(N-1).
105 Initial values are loaded to R0..R(N-1) (in case not all references
106 must necessarily be accessed and they may trap, we may fail here;
107 TODO sometimes, the loads could be guarded by a check for the number
108 of iterations). Values loaded/stored in roots are also copied to
109 RN. Other reads are replaced with the appropriate variable Ri.
110 Everything is put to SSA form.
112 As a small improvement, if R0 is dead after the root (i.e., all uses of
113 the value with the maximum distance dominate the root), we can avoid
114 creating RN and use R0 instead of it.
116 In our example, we get (only the parts concerning a and b are shown):
117 for (i = 0; i < 100; i++)
119 f = phi (a[0], s);
120 s = phi (a[1], f);
121 x = phi (b[10], x);
123 f = f + s;
124 a[i+2] = f;
125 x = x + i;
126 b[10] = x;
129 6) Factor F for unrolling is determined as the smallest common multiple of
130 (N + 1) for each root reference (N for references for that we avoided
131 creating RN). If F and the loop is small enough, loop is unrolled F
132 times. The stores to RN (R0) in the copies of the loop body are
133 periodically replaced with R0, R1, ... (R1, R2, ...), so that they can
134 be coalesced and the copies can be eliminated.
136 TODO -- copy propagation and other optimizations may change the live
137 ranges of the temporary registers and prevent them from being coalesced;
138 this may increase the register pressure.
140 In our case, F = 2 and the (main loop of the) result is
142 for (i = 0; i < ...; i += 2)
144 f = phi (a[0], f);
145 s = phi (a[1], s);
146 x = phi (b[10], x);
148 f = f + s;
149 a[i+2] = f;
150 x = x + i;
151 b[10] = x;
153 s = s + f;
154 a[i+3] = s;
155 x = x + i;
156 b[10] = x;
159 TODO -- stores killing other stores can be taken into account, e.g.,
160 for (i = 0; i < n; i++)
162 a[i] = 1;
163 a[i+2] = 2;
166 can be replaced with
168 t0 = a[0];
169 t1 = a[1];
170 for (i = 0; i < n; i++)
172 a[i] = 1;
173 t2 = 2;
174 t0 = t1;
175 t1 = t2;
177 a[n] = t0;
178 a[n+1] = t1;
180 The interesting part is that this would generalize store motion; still, since
181 sm is performed elsewhere, it does not seem that important.
183 Predictive commoning can be generalized for arbitrary computations (not
184 just memory loads), and also nontrivial transfer functions (e.g., replacing
185 i * i with ii_last + 2 * i + 1), to generalize strength reduction. */
187 #include "config.h"
188 #include "system.h"
189 #include "coretypes.h"
190 #include "tm.h"
191 #include "tree.h"
192 #include "tm_p.h"
193 #include "cfgloop.h"
194 #include "gimplify.h"
195 #include "gimple-iterator.h"
196 #include "gimple-ssa.h"
197 #include "tree-phinodes.h"
198 #include "ssa-iterators.h"
199 #include "tree-ssanames.h"
200 #include "tree-ssa-loop-ivopts.h"
201 #include "tree-ssa-loop-manip.h"
202 #include "tree-ssa-loop-niter.h"
203 #include "tree-ssa-loop.h"
204 #include "tree-into-ssa.h"
205 #include "tree-dfa.h"
206 #include "tree-ssa.h"
207 #include "ggc.h"
208 #include "tree-data-ref.h"
209 #include "tree-scalar-evolution.h"
210 #include "tree-chrec.h"
211 #include "params.h"
212 #include "gimple-pretty-print.h"
213 #include "tree-pass.h"
214 #include "tree-affine.h"
215 #include "tree-inline.h"
217 /* The maximum number of iterations between the considered memory
218 references. */
220 #define MAX_DISTANCE (target_avail_regs < 16 ? 4 : 8)
222 /* Data references (or phi nodes that carry data reference values across
223 loop iterations). */
225 typedef struct dref_d
227 /* The reference itself. */
228 struct data_reference *ref;
230 /* The statement in that the reference appears. */
231 gimple stmt;
233 /* In case that STMT is a phi node, this field is set to the SSA name
234 defined by it in replace_phis_by_defined_names (in order to avoid
235 pointing to phi node that got reallocated in the meantime). */
236 tree name_defined_by_phi;
238 /* Distance of the reference from the root of the chain (in number of
239 iterations of the loop). */
240 unsigned distance;
242 /* Number of iterations offset from the first reference in the component. */
243 double_int offset;
245 /* Number of the reference in a component, in dominance ordering. */
246 unsigned pos;
248 /* True if the memory reference is always accessed when the loop is
249 entered. */
250 unsigned always_accessed : 1;
251 } *dref;
254 /* Type of the chain of the references. */
256 enum chain_type
258 /* The addresses of the references in the chain are constant. */
259 CT_INVARIANT,
261 /* There are only loads in the chain. */
262 CT_LOAD,
264 /* Root of the chain is store, the rest are loads. */
265 CT_STORE_LOAD,
267 /* A combination of two chains. */
268 CT_COMBINATION
271 /* Chains of data references. */
273 typedef struct chain
275 /* Type of the chain. */
276 enum chain_type type;
278 /* For combination chains, the operator and the two chains that are
279 combined, and the type of the result. */
280 enum tree_code op;
281 tree rslt_type;
282 struct chain *ch1, *ch2;
284 /* The references in the chain. */
285 vec<dref> refs;
287 /* The maximum distance of the reference in the chain from the root. */
288 unsigned length;
290 /* The variables used to copy the value throughout iterations. */
291 vec<tree> vars;
293 /* Initializers for the variables. */
294 vec<tree> inits;
296 /* True if there is a use of a variable with the maximal distance
297 that comes after the root in the loop. */
298 unsigned has_max_use_after : 1;
300 /* True if all the memory references in the chain are always accessed. */
301 unsigned all_always_accessed : 1;
303 /* True if this chain was combined together with some other chain. */
304 unsigned combined : 1;
305 } *chain_p;
308 /* Describes the knowledge about the step of the memory references in
309 the component. */
311 enum ref_step_type
313 /* The step is zero. */
314 RS_INVARIANT,
316 /* The step is nonzero. */
317 RS_NONZERO,
319 /* The step may or may not be nonzero. */
320 RS_ANY
323 /* Components of the data dependence graph. */
325 struct component
327 /* The references in the component. */
328 vec<dref> refs;
330 /* What we know about the step of the references in the component. */
331 enum ref_step_type comp_step;
333 /* Next component in the list. */
334 struct component *next;
337 /* Bitmap of ssa names defined by looparound phi nodes covered by chains. */
339 static bitmap looparound_phis;
341 /* Cache used by tree_to_aff_combination_expand. */
343 static struct pointer_map_t *name_expansions;
345 /* Dumps data reference REF to FILE. */
347 extern void dump_dref (FILE *, dref);
348 void
349 dump_dref (FILE *file, dref ref)
351 if (ref->ref)
353 fprintf (file, " ");
354 print_generic_expr (file, DR_REF (ref->ref), TDF_SLIM);
355 fprintf (file, " (id %u%s)\n", ref->pos,
356 DR_IS_READ (ref->ref) ? "" : ", write");
358 fprintf (file, " offset ");
359 dump_double_int (file, ref->offset, false);
360 fprintf (file, "\n");
362 fprintf (file, " distance %u\n", ref->distance);
364 else
366 if (gimple_code (ref->stmt) == GIMPLE_PHI)
367 fprintf (file, " looparound ref\n");
368 else
369 fprintf (file, " combination ref\n");
370 fprintf (file, " in statement ");
371 print_gimple_stmt (file, ref->stmt, 0, TDF_SLIM);
372 fprintf (file, "\n");
373 fprintf (file, " distance %u\n", ref->distance);
378 /* Dumps CHAIN to FILE. */
380 extern void dump_chain (FILE *, chain_p);
381 void
382 dump_chain (FILE *file, chain_p chain)
384 dref a;
385 const char *chain_type;
386 unsigned i;
387 tree var;
389 switch (chain->type)
391 case CT_INVARIANT:
392 chain_type = "Load motion";
393 break;
395 case CT_LOAD:
396 chain_type = "Loads-only";
397 break;
399 case CT_STORE_LOAD:
400 chain_type = "Store-loads";
401 break;
403 case CT_COMBINATION:
404 chain_type = "Combination";
405 break;
407 default:
408 gcc_unreachable ();
411 fprintf (file, "%s chain %p%s\n", chain_type, (void *) chain,
412 chain->combined ? " (combined)" : "");
413 if (chain->type != CT_INVARIANT)
414 fprintf (file, " max distance %u%s\n", chain->length,
415 chain->has_max_use_after ? "" : ", may reuse first");
417 if (chain->type == CT_COMBINATION)
419 fprintf (file, " equal to %p %s %p in type ",
420 (void *) chain->ch1, op_symbol_code (chain->op),
421 (void *) chain->ch2);
422 print_generic_expr (file, chain->rslt_type, TDF_SLIM);
423 fprintf (file, "\n");
426 if (chain->vars.exists ())
428 fprintf (file, " vars");
429 FOR_EACH_VEC_ELT (chain->vars, i, var)
431 fprintf (file, " ");
432 print_generic_expr (file, var, TDF_SLIM);
434 fprintf (file, "\n");
437 if (chain->inits.exists ())
439 fprintf (file, " inits");
440 FOR_EACH_VEC_ELT (chain->inits, i, var)
442 fprintf (file, " ");
443 print_generic_expr (file, var, TDF_SLIM);
445 fprintf (file, "\n");
448 fprintf (file, " references:\n");
449 FOR_EACH_VEC_ELT (chain->refs, i, a)
450 dump_dref (file, a);
452 fprintf (file, "\n");
455 /* Dumps CHAINS to FILE. */
457 extern void dump_chains (FILE *, vec<chain_p> );
458 void
459 dump_chains (FILE *file, vec<chain_p> chains)
461 chain_p chain;
462 unsigned i;
464 FOR_EACH_VEC_ELT (chains, i, chain)
465 dump_chain (file, chain);
468 /* Dumps COMP to FILE. */
470 extern void dump_component (FILE *, struct component *);
471 void
472 dump_component (FILE *file, struct component *comp)
474 dref a;
475 unsigned i;
477 fprintf (file, "Component%s:\n",
478 comp->comp_step == RS_INVARIANT ? " (invariant)" : "");
479 FOR_EACH_VEC_ELT (comp->refs, i, a)
480 dump_dref (file, a);
481 fprintf (file, "\n");
484 /* Dumps COMPS to FILE. */
486 extern void dump_components (FILE *, struct component *);
487 void
488 dump_components (FILE *file, struct component *comps)
490 struct component *comp;
492 for (comp = comps; comp; comp = comp->next)
493 dump_component (file, comp);
496 /* Frees a chain CHAIN. */
498 static void
499 release_chain (chain_p chain)
501 dref ref;
502 unsigned i;
504 if (chain == NULL)
505 return;
507 FOR_EACH_VEC_ELT (chain->refs, i, ref)
508 free (ref);
510 chain->refs.release ();
511 chain->vars.release ();
512 chain->inits.release ();
514 free (chain);
517 /* Frees CHAINS. */
519 static void
520 release_chains (vec<chain_p> chains)
522 unsigned i;
523 chain_p chain;
525 FOR_EACH_VEC_ELT (chains, i, chain)
526 release_chain (chain);
527 chains.release ();
530 /* Frees a component COMP. */
532 static void
533 release_component (struct component *comp)
535 comp->refs.release ();
536 free (comp);
539 /* Frees list of components COMPS. */
541 static void
542 release_components (struct component *comps)
544 struct component *act, *next;
546 for (act = comps; act; act = next)
548 next = act->next;
549 release_component (act);
553 /* Finds a root of tree given by FATHERS containing A, and performs path
554 shortening. */
556 static unsigned
557 component_of (unsigned fathers[], unsigned a)
559 unsigned root, n;
561 for (root = a; root != fathers[root]; root = fathers[root])
562 continue;
564 for (; a != root; a = n)
566 n = fathers[a];
567 fathers[a] = root;
570 return root;
573 /* Join operation for DFU. FATHERS gives the tree, SIZES are sizes of the
574 components, A and B are components to merge. */
576 static void
577 merge_comps (unsigned fathers[], unsigned sizes[], unsigned a, unsigned b)
579 unsigned ca = component_of (fathers, a);
580 unsigned cb = component_of (fathers, b);
582 if (ca == cb)
583 return;
585 if (sizes[ca] < sizes[cb])
587 sizes[cb] += sizes[ca];
588 fathers[ca] = cb;
590 else
592 sizes[ca] += sizes[cb];
593 fathers[cb] = ca;
597 /* Returns true if A is a reference that is suitable for predictive commoning
598 in the innermost loop that contains it. REF_STEP is set according to the
599 step of the reference A. */
601 static bool
602 suitable_reference_p (struct data_reference *a, enum ref_step_type *ref_step)
604 tree ref = DR_REF (a), step = DR_STEP (a);
606 if (!step
607 || TREE_THIS_VOLATILE (ref)
608 || !is_gimple_reg_type (TREE_TYPE (ref))
609 || tree_could_throw_p (ref))
610 return false;
612 if (integer_zerop (step))
613 *ref_step = RS_INVARIANT;
614 else if (integer_nonzerop (step))
615 *ref_step = RS_NONZERO;
616 else
617 *ref_step = RS_ANY;
619 return true;
622 /* Stores DR_OFFSET (DR) + DR_INIT (DR) to OFFSET. */
624 static void
625 aff_combination_dr_offset (struct data_reference *dr, aff_tree *offset)
627 tree type = TREE_TYPE (DR_OFFSET (dr));
628 aff_tree delta;
630 tree_to_aff_combination_expand (DR_OFFSET (dr), type, offset,
631 &name_expansions);
632 aff_combination_const (&delta, type, tree_to_double_int (DR_INIT (dr)));
633 aff_combination_add (offset, &delta);
636 /* Determines number of iterations of the innermost enclosing loop before B
637 refers to exactly the same location as A and stores it to OFF. If A and
638 B do not have the same step, they never meet, or anything else fails,
639 returns false, otherwise returns true. Both A and B are assumed to
640 satisfy suitable_reference_p. */
642 static bool
643 determine_offset (struct data_reference *a, struct data_reference *b,
644 double_int *off)
646 aff_tree diff, baseb, step;
647 tree typea, typeb;
649 /* Check that both the references access the location in the same type. */
650 typea = TREE_TYPE (DR_REF (a));
651 typeb = TREE_TYPE (DR_REF (b));
652 if (!useless_type_conversion_p (typeb, typea))
653 return false;
655 /* Check whether the base address and the step of both references is the
656 same. */
657 if (!operand_equal_p (DR_STEP (a), DR_STEP (b), 0)
658 || !operand_equal_p (DR_BASE_ADDRESS (a), DR_BASE_ADDRESS (b), 0))
659 return false;
661 if (integer_zerop (DR_STEP (a)))
663 /* If the references have loop invariant address, check that they access
664 exactly the same location. */
665 *off = double_int_zero;
666 return (operand_equal_p (DR_OFFSET (a), DR_OFFSET (b), 0)
667 && operand_equal_p (DR_INIT (a), DR_INIT (b), 0));
670 /* Compare the offsets of the addresses, and check whether the difference
671 is a multiple of step. */
672 aff_combination_dr_offset (a, &diff);
673 aff_combination_dr_offset (b, &baseb);
674 aff_combination_scale (&baseb, double_int_minus_one);
675 aff_combination_add (&diff, &baseb);
677 tree_to_aff_combination_expand (DR_STEP (a), TREE_TYPE (DR_STEP (a)),
678 &step, &name_expansions);
679 return aff_combination_constant_multiple_p (&diff, &step, off);
682 /* Returns the last basic block in LOOP for that we are sure that
683 it is executed whenever the loop is entered. */
685 static basic_block
686 last_always_executed_block (struct loop *loop)
688 unsigned i;
689 vec<edge> exits = get_loop_exit_edges (loop);
690 edge ex;
691 basic_block last = loop->latch;
693 FOR_EACH_VEC_ELT (exits, i, ex)
694 last = nearest_common_dominator (CDI_DOMINATORS, last, ex->src);
695 exits.release ();
697 return last;
700 /* Splits dependence graph on DATAREFS described by DEPENDS to components. */
702 static struct component *
703 split_data_refs_to_components (struct loop *loop,
704 vec<data_reference_p> datarefs,
705 vec<ddr_p> depends)
707 unsigned i, n = datarefs.length ();
708 unsigned ca, ia, ib, bad;
709 unsigned *comp_father = XNEWVEC (unsigned, n + 1);
710 unsigned *comp_size = XNEWVEC (unsigned, n + 1);
711 struct component **comps;
712 struct data_reference *dr, *dra, *drb;
713 struct data_dependence_relation *ddr;
714 struct component *comp_list = NULL, *comp;
715 dref dataref;
716 basic_block last_always_executed = last_always_executed_block (loop);
718 FOR_EACH_VEC_ELT (datarefs, i, dr)
720 if (!DR_REF (dr))
722 /* A fake reference for call or asm_expr that may clobber memory;
723 just fail. */
724 goto end;
726 dr->aux = (void *) (size_t) i;
727 comp_father[i] = i;
728 comp_size[i] = 1;
731 /* A component reserved for the "bad" data references. */
732 comp_father[n] = n;
733 comp_size[n] = 1;
735 FOR_EACH_VEC_ELT (datarefs, i, dr)
737 enum ref_step_type dummy;
739 if (!suitable_reference_p (dr, &dummy))
741 ia = (unsigned) (size_t) dr->aux;
742 merge_comps (comp_father, comp_size, n, ia);
746 FOR_EACH_VEC_ELT (depends, i, ddr)
748 double_int dummy_off;
750 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
751 continue;
753 dra = DDR_A (ddr);
754 drb = DDR_B (ddr);
755 ia = component_of (comp_father, (unsigned) (size_t) dra->aux);
756 ib = component_of (comp_father, (unsigned) (size_t) drb->aux);
757 if (ia == ib)
758 continue;
760 bad = component_of (comp_father, n);
762 /* If both A and B are reads, we may ignore unsuitable dependences. */
763 if (DR_IS_READ (dra) && DR_IS_READ (drb)
764 && (ia == bad || ib == bad
765 || !determine_offset (dra, drb, &dummy_off)))
766 continue;
768 merge_comps (comp_father, comp_size, ia, ib);
771 comps = XCNEWVEC (struct component *, n);
772 bad = component_of (comp_father, n);
773 FOR_EACH_VEC_ELT (datarefs, i, dr)
775 ia = (unsigned) (size_t) dr->aux;
776 ca = component_of (comp_father, ia);
777 if (ca == bad)
778 continue;
780 comp = comps[ca];
781 if (!comp)
783 comp = XCNEW (struct component);
784 comp->refs.create (comp_size[ca]);
785 comps[ca] = comp;
788 dataref = XCNEW (struct dref_d);
789 dataref->ref = dr;
790 dataref->stmt = DR_STMT (dr);
791 dataref->offset = double_int_zero;
792 dataref->distance = 0;
794 dataref->always_accessed
795 = dominated_by_p (CDI_DOMINATORS, last_always_executed,
796 gimple_bb (dataref->stmt));
797 dataref->pos = comp->refs.length ();
798 comp->refs.quick_push (dataref);
801 for (i = 0; i < n; i++)
803 comp = comps[i];
804 if (comp)
806 comp->next = comp_list;
807 comp_list = comp;
810 free (comps);
812 end:
813 free (comp_father);
814 free (comp_size);
815 return comp_list;
818 /* Returns true if the component COMP satisfies the conditions
819 described in 2) at the beginning of this file. LOOP is the current
820 loop. */
822 static bool
823 suitable_component_p (struct loop *loop, struct component *comp)
825 unsigned i;
826 dref a, first;
827 basic_block ba, bp = loop->header;
828 bool ok, has_write = false;
830 FOR_EACH_VEC_ELT (comp->refs, i, a)
832 ba = gimple_bb (a->stmt);
834 if (!just_once_each_iteration_p (loop, ba))
835 return false;
837 gcc_assert (dominated_by_p (CDI_DOMINATORS, ba, bp));
838 bp = ba;
840 if (DR_IS_WRITE (a->ref))
841 has_write = true;
844 first = comp->refs[0];
845 ok = suitable_reference_p (first->ref, &comp->comp_step);
846 gcc_assert (ok);
847 first->offset = double_int_zero;
849 for (i = 1; comp->refs.iterate (i, &a); i++)
851 if (!determine_offset (first->ref, a->ref, &a->offset))
852 return false;
854 #ifdef ENABLE_CHECKING
856 enum ref_step_type a_step;
857 ok = suitable_reference_p (a->ref, &a_step);
858 gcc_assert (ok && a_step == comp->comp_step);
860 #endif
863 /* If there is a write inside the component, we must know whether the
864 step is nonzero or not -- we would not otherwise be able to recognize
865 whether the value accessed by reads comes from the OFFSET-th iteration
866 or the previous one. */
867 if (has_write && comp->comp_step == RS_ANY)
868 return false;
870 return true;
873 /* Check the conditions on references inside each of components COMPS,
874 and remove the unsuitable components from the list. The new list
875 of components is returned. The conditions are described in 2) at
876 the beginning of this file. LOOP is the current loop. */
878 static struct component *
879 filter_suitable_components (struct loop *loop, struct component *comps)
881 struct component **comp, *act;
883 for (comp = &comps; *comp; )
885 act = *comp;
886 if (suitable_component_p (loop, act))
887 comp = &act->next;
888 else
890 dref ref;
891 unsigned i;
893 *comp = act->next;
894 FOR_EACH_VEC_ELT (act->refs, i, ref)
895 free (ref);
896 release_component (act);
900 return comps;
903 /* Compares two drefs A and B by their offset and position. Callback for
904 qsort. */
906 static int
907 order_drefs (const void *a, const void *b)
909 const dref *const da = (const dref *) a;
910 const dref *const db = (const dref *) b;
911 int offcmp = (*da)->offset.scmp ((*db)->offset);
913 if (offcmp != 0)
914 return offcmp;
916 return (*da)->pos - (*db)->pos;
919 /* Returns root of the CHAIN. */
921 static inline dref
922 get_chain_root (chain_p chain)
924 return chain->refs[0];
927 /* Adds REF to the chain CHAIN. */
929 static void
930 add_ref_to_chain (chain_p chain, dref ref)
932 dref root = get_chain_root (chain);
933 double_int dist;
935 gcc_assert (root->offset.sle (ref->offset));
936 dist = ref->offset - root->offset;
937 if (double_int::from_uhwi (MAX_DISTANCE).ule (dist))
939 free (ref);
940 return;
942 gcc_assert (dist.fits_uhwi ());
944 chain->refs.safe_push (ref);
946 ref->distance = dist.to_uhwi ();
948 if (ref->distance >= chain->length)
950 chain->length = ref->distance;
951 chain->has_max_use_after = false;
954 if (ref->distance == chain->length
955 && ref->pos > root->pos)
956 chain->has_max_use_after = true;
958 chain->all_always_accessed &= ref->always_accessed;
961 /* Returns the chain for invariant component COMP. */
963 static chain_p
964 make_invariant_chain (struct component *comp)
966 chain_p chain = XCNEW (struct chain);
967 unsigned i;
968 dref ref;
970 chain->type = CT_INVARIANT;
972 chain->all_always_accessed = true;
974 FOR_EACH_VEC_ELT (comp->refs, i, ref)
976 chain->refs.safe_push (ref);
977 chain->all_always_accessed &= ref->always_accessed;
980 return chain;
983 /* Make a new chain rooted at REF. */
985 static chain_p
986 make_rooted_chain (dref ref)
988 chain_p chain = XCNEW (struct chain);
990 chain->type = DR_IS_READ (ref->ref) ? CT_LOAD : CT_STORE_LOAD;
992 chain->refs.safe_push (ref);
993 chain->all_always_accessed = ref->always_accessed;
995 ref->distance = 0;
997 return chain;
1000 /* Returns true if CHAIN is not trivial. */
1002 static bool
1003 nontrivial_chain_p (chain_p chain)
1005 return chain != NULL && chain->refs.length () > 1;
1008 /* Returns the ssa name that contains the value of REF, or NULL_TREE if there
1009 is no such name. */
1011 static tree
1012 name_for_ref (dref ref)
1014 tree name;
1016 if (is_gimple_assign (ref->stmt))
1018 if (!ref->ref || DR_IS_READ (ref->ref))
1019 name = gimple_assign_lhs (ref->stmt);
1020 else
1021 name = gimple_assign_rhs1 (ref->stmt);
1023 else
1024 name = PHI_RESULT (ref->stmt);
1026 return (TREE_CODE (name) == SSA_NAME ? name : NULL_TREE);
1029 /* Returns true if REF is a valid initializer for ROOT with given DISTANCE (in
1030 iterations of the innermost enclosing loop). */
1032 static bool
1033 valid_initializer_p (struct data_reference *ref,
1034 unsigned distance, struct data_reference *root)
1036 aff_tree diff, base, step;
1037 double_int off;
1039 /* Both REF and ROOT must be accessing the same object. */
1040 if (!operand_equal_p (DR_BASE_ADDRESS (ref), DR_BASE_ADDRESS (root), 0))
1041 return false;
1043 /* The initializer is defined outside of loop, hence its address must be
1044 invariant inside the loop. */
1045 gcc_assert (integer_zerop (DR_STEP (ref)));
1047 /* If the address of the reference is invariant, initializer must access
1048 exactly the same location. */
1049 if (integer_zerop (DR_STEP (root)))
1050 return (operand_equal_p (DR_OFFSET (ref), DR_OFFSET (root), 0)
1051 && operand_equal_p (DR_INIT (ref), DR_INIT (root), 0));
1053 /* Verify that this index of REF is equal to the root's index at
1054 -DISTANCE-th iteration. */
1055 aff_combination_dr_offset (root, &diff);
1056 aff_combination_dr_offset (ref, &base);
1057 aff_combination_scale (&base, double_int_minus_one);
1058 aff_combination_add (&diff, &base);
1060 tree_to_aff_combination_expand (DR_STEP (root), TREE_TYPE (DR_STEP (root)),
1061 &step, &name_expansions);
1062 if (!aff_combination_constant_multiple_p (&diff, &step, &off))
1063 return false;
1065 if (off != double_int::from_uhwi (distance))
1066 return false;
1068 return true;
1071 /* Finds looparound phi node of LOOP that copies the value of REF, and if its
1072 initial value is correct (equal to initial value of REF shifted by one
1073 iteration), returns the phi node. Otherwise, NULL_TREE is returned. ROOT
1074 is the root of the current chain. */
1076 static gimple
1077 find_looparound_phi (struct loop *loop, dref ref, dref root)
1079 tree name, init, init_ref;
1080 gimple phi = NULL, init_stmt;
1081 edge latch = loop_latch_edge (loop);
1082 struct data_reference init_dr;
1083 gimple_stmt_iterator psi;
1085 if (is_gimple_assign (ref->stmt))
1087 if (DR_IS_READ (ref->ref))
1088 name = gimple_assign_lhs (ref->stmt);
1089 else
1090 name = gimple_assign_rhs1 (ref->stmt);
1092 else
1093 name = PHI_RESULT (ref->stmt);
1094 if (!name)
1095 return NULL;
1097 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1099 phi = gsi_stmt (psi);
1100 if (PHI_ARG_DEF_FROM_EDGE (phi, latch) == name)
1101 break;
1104 if (gsi_end_p (psi))
1105 return NULL;
1107 init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1108 if (TREE_CODE (init) != SSA_NAME)
1109 return NULL;
1110 init_stmt = SSA_NAME_DEF_STMT (init);
1111 if (gimple_code (init_stmt) != GIMPLE_ASSIGN)
1112 return NULL;
1113 gcc_assert (gimple_assign_lhs (init_stmt) == init);
1115 init_ref = gimple_assign_rhs1 (init_stmt);
1116 if (!REFERENCE_CLASS_P (init_ref)
1117 && !DECL_P (init_ref))
1118 return NULL;
1120 /* Analyze the behavior of INIT_REF with respect to LOOP (innermost
1121 loop enclosing PHI). */
1122 memset (&init_dr, 0, sizeof (struct data_reference));
1123 DR_REF (&init_dr) = init_ref;
1124 DR_STMT (&init_dr) = phi;
1125 if (!dr_analyze_innermost (&init_dr, loop))
1126 return NULL;
1128 if (!valid_initializer_p (&init_dr, ref->distance + 1, root->ref))
1129 return NULL;
1131 return phi;
1134 /* Adds a reference for the looparound copy of REF in PHI to CHAIN. */
1136 static void
1137 insert_looparound_copy (chain_p chain, dref ref, gimple phi)
1139 dref nw = XCNEW (struct dref_d), aref;
1140 unsigned i;
1142 nw->stmt = phi;
1143 nw->distance = ref->distance + 1;
1144 nw->always_accessed = 1;
1146 FOR_EACH_VEC_ELT (chain->refs, i, aref)
1147 if (aref->distance >= nw->distance)
1148 break;
1149 chain->refs.safe_insert (i, nw);
1151 if (nw->distance > chain->length)
1153 chain->length = nw->distance;
1154 chain->has_max_use_after = false;
1158 /* For references in CHAIN that are copied around the LOOP (created previously
1159 by PRE, or by user), add the results of such copies to the chain. This
1160 enables us to remove the copies by unrolling, and may need less registers
1161 (also, it may allow us to combine chains together). */
1163 static void
1164 add_looparound_copies (struct loop *loop, chain_p chain)
1166 unsigned i;
1167 dref ref, root = get_chain_root (chain);
1168 gimple phi;
1170 FOR_EACH_VEC_ELT (chain->refs, i, ref)
1172 phi = find_looparound_phi (loop, ref, root);
1173 if (!phi)
1174 continue;
1176 bitmap_set_bit (looparound_phis, SSA_NAME_VERSION (PHI_RESULT (phi)));
1177 insert_looparound_copy (chain, ref, phi);
1181 /* Find roots of the values and determine distances in the component COMP.
1182 The references are redistributed into CHAINS. LOOP is the current
1183 loop. */
1185 static void
1186 determine_roots_comp (struct loop *loop,
1187 struct component *comp,
1188 vec<chain_p> *chains)
1190 unsigned i;
1191 dref a;
1192 chain_p chain = NULL;
1193 double_int last_ofs = double_int_zero;
1195 /* Invariants are handled specially. */
1196 if (comp->comp_step == RS_INVARIANT)
1198 chain = make_invariant_chain (comp);
1199 chains->safe_push (chain);
1200 return;
1203 comp->refs.qsort (order_drefs);
1205 FOR_EACH_VEC_ELT (comp->refs, i, a)
1207 if (!chain || DR_IS_WRITE (a->ref)
1208 || double_int::from_uhwi (MAX_DISTANCE).ule (a->offset - last_ofs))
1210 if (nontrivial_chain_p (chain))
1212 add_looparound_copies (loop, chain);
1213 chains->safe_push (chain);
1215 else
1216 release_chain (chain);
1217 chain = make_rooted_chain (a);
1218 last_ofs = a->offset;
1219 continue;
1222 add_ref_to_chain (chain, a);
1225 if (nontrivial_chain_p (chain))
1227 add_looparound_copies (loop, chain);
1228 chains->safe_push (chain);
1230 else
1231 release_chain (chain);
1234 /* Find roots of the values and determine distances in components COMPS, and
1235 separates the references to CHAINS. LOOP is the current loop. */
1237 static void
1238 determine_roots (struct loop *loop,
1239 struct component *comps, vec<chain_p> *chains)
1241 struct component *comp;
1243 for (comp = comps; comp; comp = comp->next)
1244 determine_roots_comp (loop, comp, chains);
1247 /* Replace the reference in statement STMT with temporary variable
1248 NEW_TREE. If SET is true, NEW_TREE is instead initialized to the value of
1249 the reference in the statement. IN_LHS is true if the reference
1250 is in the lhs of STMT, false if it is in rhs. */
1252 static void
1253 replace_ref_with (gimple stmt, tree new_tree, bool set, bool in_lhs)
1255 tree val;
1256 gimple new_stmt;
1257 gimple_stmt_iterator bsi, psi;
1259 if (gimple_code (stmt) == GIMPLE_PHI)
1261 gcc_assert (!in_lhs && !set);
1263 val = PHI_RESULT (stmt);
1264 bsi = gsi_after_labels (gimple_bb (stmt));
1265 psi = gsi_for_stmt (stmt);
1266 remove_phi_node (&psi, false);
1268 /* Turn the phi node into GIMPLE_ASSIGN. */
1269 new_stmt = gimple_build_assign (val, new_tree);
1270 gsi_insert_before (&bsi, new_stmt, GSI_NEW_STMT);
1271 return;
1274 /* Since the reference is of gimple_reg type, it should only
1275 appear as lhs or rhs of modify statement. */
1276 gcc_assert (is_gimple_assign (stmt));
1278 bsi = gsi_for_stmt (stmt);
1280 /* If we do not need to initialize NEW_TREE, just replace the use of OLD. */
1281 if (!set)
1283 gcc_assert (!in_lhs);
1284 gimple_assign_set_rhs_from_tree (&bsi, new_tree);
1285 stmt = gsi_stmt (bsi);
1286 update_stmt (stmt);
1287 return;
1290 if (in_lhs)
1292 /* We have statement
1294 OLD = VAL
1296 If OLD is a memory reference, then VAL is gimple_val, and we transform
1297 this to
1299 OLD = VAL
1300 NEW = VAL
1302 Otherwise, we are replacing a combination chain,
1303 VAL is the expression that performs the combination, and OLD is an
1304 SSA name. In this case, we transform the assignment to
1306 OLD = VAL
1307 NEW = OLD
1311 val = gimple_assign_lhs (stmt);
1312 if (TREE_CODE (val) != SSA_NAME)
1314 val = gimple_assign_rhs1 (stmt);
1315 gcc_assert (gimple_assign_single_p (stmt));
1316 if (TREE_CLOBBER_P (val))
1317 val = get_or_create_ssa_default_def (cfun, SSA_NAME_VAR (new_tree));
1318 else
1319 gcc_assert (gimple_assign_copy_p (stmt));
1322 else
1324 /* VAL = OLD
1326 is transformed to
1328 VAL = OLD
1329 NEW = VAL */
1331 val = gimple_assign_lhs (stmt);
1334 new_stmt = gimple_build_assign (new_tree, unshare_expr (val));
1335 gsi_insert_after (&bsi, new_stmt, GSI_NEW_STMT);
1338 /* Returns a memory reference to DR in the ITER-th iteration of
1339 the loop it was analyzed in. Append init stmts to STMTS. */
1341 static tree
1342 ref_at_iteration (data_reference_p dr, int iter, gimple_seq *stmts)
1344 tree off = DR_OFFSET (dr);
1345 tree coff = DR_INIT (dr);
1346 if (iter == 0)
1348 else if (TREE_CODE (DR_STEP (dr)) == INTEGER_CST)
1349 coff = size_binop (PLUS_EXPR, coff,
1350 size_binop (MULT_EXPR, DR_STEP (dr), ssize_int (iter)));
1351 else
1352 off = size_binop (PLUS_EXPR, off,
1353 size_binop (MULT_EXPR, DR_STEP (dr), ssize_int (iter)));
1354 tree addr = fold_build_pointer_plus (DR_BASE_ADDRESS (dr), off);
1355 addr = force_gimple_operand_1 (addr, stmts, is_gimple_mem_ref_addr,
1356 NULL_TREE);
1357 tree alias_ptr = fold_convert (reference_alias_ptr_type (DR_REF (dr)), coff);
1358 /* While data-ref analysis punts on bit offsets it still handles
1359 bitfield accesses at byte boundaries. Cope with that. Note that
1360 we cannot simply re-apply the outer COMPONENT_REF because the
1361 byte-granular portion of it is already applied via DR_INIT and
1362 DR_OFFSET, so simply build a BIT_FIELD_REF knowing that the bits
1363 start at offset zero. */
1364 if (TREE_CODE (DR_REF (dr)) == COMPONENT_REF
1365 && DECL_BIT_FIELD (TREE_OPERAND (DR_REF (dr), 1)))
1367 tree field = TREE_OPERAND (DR_REF (dr), 1);
1368 return build3 (BIT_FIELD_REF, TREE_TYPE (DR_REF (dr)),
1369 build2 (MEM_REF, DECL_BIT_FIELD_TYPE (field),
1370 addr, alias_ptr),
1371 DECL_SIZE (field), bitsize_zero_node);
1373 else
1374 return fold_build2 (MEM_REF, TREE_TYPE (DR_REF (dr)), addr, alias_ptr);
1377 /* Get the initialization expression for the INDEX-th temporary variable
1378 of CHAIN. */
1380 static tree
1381 get_init_expr (chain_p chain, unsigned index)
1383 if (chain->type == CT_COMBINATION)
1385 tree e1 = get_init_expr (chain->ch1, index);
1386 tree e2 = get_init_expr (chain->ch2, index);
1388 return fold_build2 (chain->op, chain->rslt_type, e1, e2);
1390 else
1391 return chain->inits[index];
1394 /* Returns a new temporary variable used for the I-th variable carrying
1395 value of REF. The variable's uid is marked in TMP_VARS. */
1397 static tree
1398 predcom_tmp_var (tree ref, unsigned i, bitmap tmp_vars)
1400 tree type = TREE_TYPE (ref);
1401 /* We never access the components of the temporary variable in predictive
1402 commoning. */
1403 tree var = create_tmp_reg (type, get_lsm_tmp_name (ref, i));
1404 bitmap_set_bit (tmp_vars, DECL_UID (var));
1405 return var;
1408 /* Creates the variables for CHAIN, as well as phi nodes for them and
1409 initialization on entry to LOOP. Uids of the newly created
1410 temporary variables are marked in TMP_VARS. */
1412 static void
1413 initialize_root_vars (struct loop *loop, chain_p chain, bitmap tmp_vars)
1415 unsigned i;
1416 unsigned n = chain->length;
1417 dref root = get_chain_root (chain);
1418 bool reuse_first = !chain->has_max_use_after;
1419 tree ref, init, var, next;
1420 gimple phi;
1421 gimple_seq stmts;
1422 edge entry = loop_preheader_edge (loop), latch = loop_latch_edge (loop);
1424 /* If N == 0, then all the references are within the single iteration. And
1425 since this is an nonempty chain, reuse_first cannot be true. */
1426 gcc_assert (n > 0 || !reuse_first);
1428 chain->vars.create (n + 1);
1430 if (chain->type == CT_COMBINATION)
1431 ref = gimple_assign_lhs (root->stmt);
1432 else
1433 ref = DR_REF (root->ref);
1435 for (i = 0; i < n + (reuse_first ? 0 : 1); i++)
1437 var = predcom_tmp_var (ref, i, tmp_vars);
1438 chain->vars.quick_push (var);
1440 if (reuse_first)
1441 chain->vars.quick_push (chain->vars[0]);
1443 FOR_EACH_VEC_ELT (chain->vars, i, var)
1444 chain->vars[i] = make_ssa_name (var, NULL);
1446 for (i = 0; i < n; i++)
1448 var = chain->vars[i];
1449 next = chain->vars[i + 1];
1450 init = get_init_expr (chain, i);
1452 init = force_gimple_operand (init, &stmts, true, NULL_TREE);
1453 if (stmts)
1454 gsi_insert_seq_on_edge_immediate (entry, stmts);
1456 phi = create_phi_node (var, loop->header);
1457 add_phi_arg (phi, init, entry, UNKNOWN_LOCATION);
1458 add_phi_arg (phi, next, latch, UNKNOWN_LOCATION);
1462 /* Create the variables and initialization statement for root of chain
1463 CHAIN. Uids of the newly created temporary variables are marked
1464 in TMP_VARS. */
1466 static void
1467 initialize_root (struct loop *loop, chain_p chain, bitmap tmp_vars)
1469 dref root = get_chain_root (chain);
1470 bool in_lhs = (chain->type == CT_STORE_LOAD
1471 || chain->type == CT_COMBINATION);
1473 initialize_root_vars (loop, chain, tmp_vars);
1474 replace_ref_with (root->stmt,
1475 chain->vars[chain->length],
1476 true, in_lhs);
1479 /* Initializes a variable for load motion for ROOT and prepares phi nodes and
1480 initialization on entry to LOOP if necessary. The ssa name for the variable
1481 is stored in VARS. If WRITTEN is true, also a phi node to copy its value
1482 around the loop is created. Uid of the newly created temporary variable
1483 is marked in TMP_VARS. INITS is the list containing the (single)
1484 initializer. */
1486 static void
1487 initialize_root_vars_lm (struct loop *loop, dref root, bool written,
1488 vec<tree> *vars, vec<tree> inits,
1489 bitmap tmp_vars)
1491 unsigned i;
1492 tree ref = DR_REF (root->ref), init, var, next;
1493 gimple_seq stmts;
1494 gimple phi;
1495 edge entry = loop_preheader_edge (loop), latch = loop_latch_edge (loop);
1497 /* Find the initializer for the variable, and check that it cannot
1498 trap. */
1499 init = inits[0];
1501 vars->create (written ? 2 : 1);
1502 var = predcom_tmp_var (ref, 0, tmp_vars);
1503 vars->quick_push (var);
1504 if (written)
1505 vars->quick_push ((*vars)[0]);
1507 FOR_EACH_VEC_ELT (*vars, i, var)
1508 (*vars)[i] = make_ssa_name (var, NULL);
1510 var = (*vars)[0];
1512 init = force_gimple_operand (init, &stmts, written, NULL_TREE);
1513 if (stmts)
1514 gsi_insert_seq_on_edge_immediate (entry, stmts);
1516 if (written)
1518 next = (*vars)[1];
1519 phi = create_phi_node (var, loop->header);
1520 add_phi_arg (phi, init, entry, UNKNOWN_LOCATION);
1521 add_phi_arg (phi, next, latch, UNKNOWN_LOCATION);
1523 else
1525 gimple init_stmt = gimple_build_assign (var, init);
1526 gsi_insert_on_edge_immediate (entry, init_stmt);
1531 /* Execute load motion for references in chain CHAIN. Uids of the newly
1532 created temporary variables are marked in TMP_VARS. */
1534 static void
1535 execute_load_motion (struct loop *loop, chain_p chain, bitmap tmp_vars)
1537 vec<tree> vars;
1538 dref a;
1539 unsigned n_writes = 0, ridx, i;
1540 tree var;
1542 gcc_assert (chain->type == CT_INVARIANT);
1543 gcc_assert (!chain->combined);
1544 FOR_EACH_VEC_ELT (chain->refs, i, a)
1545 if (DR_IS_WRITE (a->ref))
1546 n_writes++;
1548 /* If there are no reads in the loop, there is nothing to do. */
1549 if (n_writes == chain->refs.length ())
1550 return;
1552 initialize_root_vars_lm (loop, get_chain_root (chain), n_writes > 0,
1553 &vars, chain->inits, tmp_vars);
1555 ridx = 0;
1556 FOR_EACH_VEC_ELT (chain->refs, i, a)
1558 bool is_read = DR_IS_READ (a->ref);
1560 if (DR_IS_WRITE (a->ref))
1562 n_writes--;
1563 if (n_writes)
1565 var = vars[0];
1566 var = make_ssa_name (SSA_NAME_VAR (var), NULL);
1567 vars[0] = var;
1569 else
1570 ridx = 1;
1573 replace_ref_with (a->stmt, vars[ridx],
1574 !is_read, !is_read);
1577 vars.release ();
1580 /* Returns the single statement in that NAME is used, excepting
1581 the looparound phi nodes contained in one of the chains. If there is no
1582 such statement, or more statements, NULL is returned. */
1584 static gimple
1585 single_nonlooparound_use (tree name)
1587 use_operand_p use;
1588 imm_use_iterator it;
1589 gimple stmt, ret = NULL;
1591 FOR_EACH_IMM_USE_FAST (use, it, name)
1593 stmt = USE_STMT (use);
1595 if (gimple_code (stmt) == GIMPLE_PHI)
1597 /* Ignore uses in looparound phi nodes. Uses in other phi nodes
1598 could not be processed anyway, so just fail for them. */
1599 if (bitmap_bit_p (looparound_phis,
1600 SSA_NAME_VERSION (PHI_RESULT (stmt))))
1601 continue;
1603 return NULL;
1605 else if (is_gimple_debug (stmt))
1606 continue;
1607 else if (ret != NULL)
1608 return NULL;
1609 else
1610 ret = stmt;
1613 return ret;
1616 /* Remove statement STMT, as well as the chain of assignments in that it is
1617 used. */
1619 static void
1620 remove_stmt (gimple stmt)
1622 tree name;
1623 gimple next;
1624 gimple_stmt_iterator psi;
1626 if (gimple_code (stmt) == GIMPLE_PHI)
1628 name = PHI_RESULT (stmt);
1629 next = single_nonlooparound_use (name);
1630 reset_debug_uses (stmt);
1631 psi = gsi_for_stmt (stmt);
1632 remove_phi_node (&psi, true);
1634 if (!next
1635 || !gimple_assign_ssa_name_copy_p (next)
1636 || gimple_assign_rhs1 (next) != name)
1637 return;
1639 stmt = next;
1642 while (1)
1644 gimple_stmt_iterator bsi;
1646 bsi = gsi_for_stmt (stmt);
1648 name = gimple_assign_lhs (stmt);
1649 gcc_assert (TREE_CODE (name) == SSA_NAME);
1651 next = single_nonlooparound_use (name);
1652 reset_debug_uses (stmt);
1654 unlink_stmt_vdef (stmt);
1655 gsi_remove (&bsi, true);
1656 release_defs (stmt);
1658 if (!next
1659 || !gimple_assign_ssa_name_copy_p (next)
1660 || gimple_assign_rhs1 (next) != name)
1661 return;
1663 stmt = next;
1667 /* Perform the predictive commoning optimization for a chain CHAIN.
1668 Uids of the newly created temporary variables are marked in TMP_VARS.*/
1670 static void
1671 execute_pred_commoning_chain (struct loop *loop, chain_p chain,
1672 bitmap tmp_vars)
1674 unsigned i;
1675 dref a;
1676 tree var;
1678 if (chain->combined)
1680 /* For combined chains, just remove the statements that are used to
1681 compute the values of the expression (except for the root one). */
1682 for (i = 1; chain->refs.iterate (i, &a); i++)
1683 remove_stmt (a->stmt);
1685 else
1687 /* For non-combined chains, set up the variables that hold its value,
1688 and replace the uses of the original references by these
1689 variables. */
1690 initialize_root (loop, chain, tmp_vars);
1691 for (i = 1; chain->refs.iterate (i, &a); i++)
1693 var = chain->vars[chain->length - a->distance];
1694 replace_ref_with (a->stmt, var, false, false);
1699 /* Determines the unroll factor necessary to remove as many temporary variable
1700 copies as possible. CHAINS is the list of chains that will be
1701 optimized. */
1703 static unsigned
1704 determine_unroll_factor (vec<chain_p> chains)
1706 chain_p chain;
1707 unsigned factor = 1, af, nfactor, i;
1708 unsigned max = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
1710 FOR_EACH_VEC_ELT (chains, i, chain)
1712 if (chain->type == CT_INVARIANT || chain->combined)
1713 continue;
1715 /* The best unroll factor for this chain is equal to the number of
1716 temporary variables that we create for it. */
1717 af = chain->length;
1718 if (chain->has_max_use_after)
1719 af++;
1721 nfactor = factor * af / gcd (factor, af);
1722 if (nfactor <= max)
1723 factor = nfactor;
1726 return factor;
1729 /* Perform the predictive commoning optimization for CHAINS.
1730 Uids of the newly created temporary variables are marked in TMP_VARS. */
1732 static void
1733 execute_pred_commoning (struct loop *loop, vec<chain_p> chains,
1734 bitmap tmp_vars)
1736 chain_p chain;
1737 unsigned i;
1739 FOR_EACH_VEC_ELT (chains, i, chain)
1741 if (chain->type == CT_INVARIANT)
1742 execute_load_motion (loop, chain, tmp_vars);
1743 else
1744 execute_pred_commoning_chain (loop, chain, tmp_vars);
1747 update_ssa (TODO_update_ssa_only_virtuals);
1750 /* For each reference in CHAINS, if its defining statement is
1751 phi node, record the ssa name that is defined by it. */
1753 static void
1754 replace_phis_by_defined_names (vec<chain_p> chains)
1756 chain_p chain;
1757 dref a;
1758 unsigned i, j;
1760 FOR_EACH_VEC_ELT (chains, i, chain)
1761 FOR_EACH_VEC_ELT (chain->refs, j, a)
1763 if (gimple_code (a->stmt) == GIMPLE_PHI)
1765 a->name_defined_by_phi = PHI_RESULT (a->stmt);
1766 a->stmt = NULL;
1771 /* For each reference in CHAINS, if name_defined_by_phi is not
1772 NULL, use it to set the stmt field. */
1774 static void
1775 replace_names_by_phis (vec<chain_p> chains)
1777 chain_p chain;
1778 dref a;
1779 unsigned i, j;
1781 FOR_EACH_VEC_ELT (chains, i, chain)
1782 FOR_EACH_VEC_ELT (chain->refs, j, a)
1783 if (a->stmt == NULL)
1785 a->stmt = SSA_NAME_DEF_STMT (a->name_defined_by_phi);
1786 gcc_assert (gimple_code (a->stmt) == GIMPLE_PHI);
1787 a->name_defined_by_phi = NULL_TREE;
1791 /* Wrapper over execute_pred_commoning, to pass it as a callback
1792 to tree_transform_and_unroll_loop. */
1794 struct epcc_data
1796 vec<chain_p> chains;
1797 bitmap tmp_vars;
1800 static void
1801 execute_pred_commoning_cbck (struct loop *loop, void *data)
1803 struct epcc_data *const dta = (struct epcc_data *) data;
1805 /* Restore phi nodes that were replaced by ssa names before
1806 tree_transform_and_unroll_loop (see detailed description in
1807 tree_predictive_commoning_loop). */
1808 replace_names_by_phis (dta->chains);
1809 execute_pred_commoning (loop, dta->chains, dta->tmp_vars);
1812 /* Base NAME and all the names in the chain of phi nodes that use it
1813 on variable VAR. The phi nodes are recognized by being in the copies of
1814 the header of the LOOP. */
1816 static void
1817 base_names_in_chain_on (struct loop *loop, tree name, tree var)
1819 gimple stmt, phi;
1820 imm_use_iterator iter;
1822 replace_ssa_name_symbol (name, var);
1824 while (1)
1826 phi = NULL;
1827 FOR_EACH_IMM_USE_STMT (stmt, iter, name)
1829 if (gimple_code (stmt) == GIMPLE_PHI
1830 && flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
1832 phi = stmt;
1833 BREAK_FROM_IMM_USE_STMT (iter);
1836 if (!phi)
1837 return;
1839 name = PHI_RESULT (phi);
1840 replace_ssa_name_symbol (name, var);
1844 /* Given an unrolled LOOP after predictive commoning, remove the
1845 register copies arising from phi nodes by changing the base
1846 variables of SSA names. TMP_VARS is the set of the temporary variables
1847 for those we want to perform this. */
1849 static void
1850 eliminate_temp_copies (struct loop *loop, bitmap tmp_vars)
1852 edge e;
1853 gimple phi, stmt;
1854 tree name, use, var;
1855 gimple_stmt_iterator psi;
1857 e = loop_latch_edge (loop);
1858 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1860 phi = gsi_stmt (psi);
1861 name = PHI_RESULT (phi);
1862 var = SSA_NAME_VAR (name);
1863 if (!var || !bitmap_bit_p (tmp_vars, DECL_UID (var)))
1864 continue;
1865 use = PHI_ARG_DEF_FROM_EDGE (phi, e);
1866 gcc_assert (TREE_CODE (use) == SSA_NAME);
1868 /* Base all the ssa names in the ud and du chain of NAME on VAR. */
1869 stmt = SSA_NAME_DEF_STMT (use);
1870 while (gimple_code (stmt) == GIMPLE_PHI
1871 /* In case we could not unroll the loop enough to eliminate
1872 all copies, we may reach the loop header before the defining
1873 statement (in that case, some register copies will be present
1874 in loop latch in the final code, corresponding to the newly
1875 created looparound phi nodes). */
1876 && gimple_bb (stmt) != loop->header)
1878 gcc_assert (single_pred_p (gimple_bb (stmt)));
1879 use = PHI_ARG_DEF (stmt, 0);
1880 stmt = SSA_NAME_DEF_STMT (use);
1883 base_names_in_chain_on (loop, use, var);
1887 /* Returns true if CHAIN is suitable to be combined. */
1889 static bool
1890 chain_can_be_combined_p (chain_p chain)
1892 return (!chain->combined
1893 && (chain->type == CT_LOAD || chain->type == CT_COMBINATION));
1896 /* Returns the modify statement that uses NAME. Skips over assignment
1897 statements, NAME is replaced with the actual name used in the returned
1898 statement. */
1900 static gimple
1901 find_use_stmt (tree *name)
1903 gimple stmt;
1904 tree rhs, lhs;
1906 /* Skip over assignments. */
1907 while (1)
1909 stmt = single_nonlooparound_use (*name);
1910 if (!stmt)
1911 return NULL;
1913 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1914 return NULL;
1916 lhs = gimple_assign_lhs (stmt);
1917 if (TREE_CODE (lhs) != SSA_NAME)
1918 return NULL;
1920 if (gimple_assign_copy_p (stmt))
1922 rhs = gimple_assign_rhs1 (stmt);
1923 if (rhs != *name)
1924 return NULL;
1926 *name = lhs;
1928 else if (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))
1929 == GIMPLE_BINARY_RHS)
1930 return stmt;
1931 else
1932 return NULL;
1936 /* Returns true if we may perform reassociation for operation CODE in TYPE. */
1938 static bool
1939 may_reassociate_p (tree type, enum tree_code code)
1941 if (FLOAT_TYPE_P (type)
1942 && !flag_unsafe_math_optimizations)
1943 return false;
1945 return (commutative_tree_code (code)
1946 && associative_tree_code (code));
1949 /* If the operation used in STMT is associative and commutative, go through the
1950 tree of the same operations and returns its root. Distance to the root
1951 is stored in DISTANCE. */
1953 static gimple
1954 find_associative_operation_root (gimple stmt, unsigned *distance)
1956 tree lhs;
1957 gimple next;
1958 enum tree_code code = gimple_assign_rhs_code (stmt);
1959 tree type = TREE_TYPE (gimple_assign_lhs (stmt));
1960 unsigned dist = 0;
1962 if (!may_reassociate_p (type, code))
1963 return NULL;
1965 while (1)
1967 lhs = gimple_assign_lhs (stmt);
1968 gcc_assert (TREE_CODE (lhs) == SSA_NAME);
1970 next = find_use_stmt (&lhs);
1971 if (!next
1972 || gimple_assign_rhs_code (next) != code)
1973 break;
1975 stmt = next;
1976 dist++;
1979 if (distance)
1980 *distance = dist;
1981 return stmt;
1984 /* Returns the common statement in that NAME1 and NAME2 have a use. If there
1985 is no such statement, returns NULL_TREE. In case the operation used on
1986 NAME1 and NAME2 is associative and commutative, returns the root of the
1987 tree formed by this operation instead of the statement that uses NAME1 or
1988 NAME2. */
1990 static gimple
1991 find_common_use_stmt (tree *name1, tree *name2)
1993 gimple stmt1, stmt2;
1995 stmt1 = find_use_stmt (name1);
1996 if (!stmt1)
1997 return NULL;
1999 stmt2 = find_use_stmt (name2);
2000 if (!stmt2)
2001 return NULL;
2003 if (stmt1 == stmt2)
2004 return stmt1;
2006 stmt1 = find_associative_operation_root (stmt1, NULL);
2007 if (!stmt1)
2008 return NULL;
2009 stmt2 = find_associative_operation_root (stmt2, NULL);
2010 if (!stmt2)
2011 return NULL;
2013 return (stmt1 == stmt2 ? stmt1 : NULL);
2016 /* Checks whether R1 and R2 are combined together using CODE, with the result
2017 in RSLT_TYPE, in order R1 CODE R2 if SWAP is false and in order R2 CODE R1
2018 if it is true. If CODE is ERROR_MARK, set these values instead. */
2020 static bool
2021 combinable_refs_p (dref r1, dref r2,
2022 enum tree_code *code, bool *swap, tree *rslt_type)
2024 enum tree_code acode;
2025 bool aswap;
2026 tree atype;
2027 tree name1, name2;
2028 gimple stmt;
2030 name1 = name_for_ref (r1);
2031 name2 = name_for_ref (r2);
2032 gcc_assert (name1 != NULL_TREE && name2 != NULL_TREE);
2034 stmt = find_common_use_stmt (&name1, &name2);
2036 if (!stmt)
2037 return false;
2039 acode = gimple_assign_rhs_code (stmt);
2040 aswap = (!commutative_tree_code (acode)
2041 && gimple_assign_rhs1 (stmt) != name1);
2042 atype = TREE_TYPE (gimple_assign_lhs (stmt));
2044 if (*code == ERROR_MARK)
2046 *code = acode;
2047 *swap = aswap;
2048 *rslt_type = atype;
2049 return true;
2052 return (*code == acode
2053 && *swap == aswap
2054 && *rslt_type == atype);
2057 /* Remove OP from the operation on rhs of STMT, and replace STMT with
2058 an assignment of the remaining operand. */
2060 static void
2061 remove_name_from_operation (gimple stmt, tree op)
2063 tree other_op;
2064 gimple_stmt_iterator si;
2066 gcc_assert (is_gimple_assign (stmt));
2068 if (gimple_assign_rhs1 (stmt) == op)
2069 other_op = gimple_assign_rhs2 (stmt);
2070 else
2071 other_op = gimple_assign_rhs1 (stmt);
2073 si = gsi_for_stmt (stmt);
2074 gimple_assign_set_rhs_from_tree (&si, other_op);
2076 /* We should not have reallocated STMT. */
2077 gcc_assert (gsi_stmt (si) == stmt);
2079 update_stmt (stmt);
2082 /* Reassociates the expression in that NAME1 and NAME2 are used so that they
2083 are combined in a single statement, and returns this statement. */
2085 static gimple
2086 reassociate_to_the_same_stmt (tree name1, tree name2)
2088 gimple stmt1, stmt2, root1, root2, s1, s2;
2089 gimple new_stmt, tmp_stmt;
2090 tree new_name, tmp_name, var, r1, r2;
2091 unsigned dist1, dist2;
2092 enum tree_code code;
2093 tree type = TREE_TYPE (name1);
2094 gimple_stmt_iterator bsi;
2096 stmt1 = find_use_stmt (&name1);
2097 stmt2 = find_use_stmt (&name2);
2098 root1 = find_associative_operation_root (stmt1, &dist1);
2099 root2 = find_associative_operation_root (stmt2, &dist2);
2100 code = gimple_assign_rhs_code (stmt1);
2102 gcc_assert (root1 && root2 && root1 == root2
2103 && code == gimple_assign_rhs_code (stmt2));
2105 /* Find the root of the nearest expression in that both NAME1 and NAME2
2106 are used. */
2107 r1 = name1;
2108 s1 = stmt1;
2109 r2 = name2;
2110 s2 = stmt2;
2112 while (dist1 > dist2)
2114 s1 = find_use_stmt (&r1);
2115 r1 = gimple_assign_lhs (s1);
2116 dist1--;
2118 while (dist2 > dist1)
2120 s2 = find_use_stmt (&r2);
2121 r2 = gimple_assign_lhs (s2);
2122 dist2--;
2125 while (s1 != s2)
2127 s1 = find_use_stmt (&r1);
2128 r1 = gimple_assign_lhs (s1);
2129 s2 = find_use_stmt (&r2);
2130 r2 = gimple_assign_lhs (s2);
2133 /* Remove NAME1 and NAME2 from the statements in that they are used
2134 currently. */
2135 remove_name_from_operation (stmt1, name1);
2136 remove_name_from_operation (stmt2, name2);
2138 /* Insert the new statement combining NAME1 and NAME2 before S1, and
2139 combine it with the rhs of S1. */
2140 var = create_tmp_reg (type, "predreastmp");
2141 new_name = make_ssa_name (var, NULL);
2142 new_stmt = gimple_build_assign_with_ops (code, new_name, name1, name2);
2144 var = create_tmp_reg (type, "predreastmp");
2145 tmp_name = make_ssa_name (var, NULL);
2147 /* Rhs of S1 may now be either a binary expression with operation
2148 CODE, or gimple_val (in case that stmt1 == s1 or stmt2 == s1,
2149 so that name1 or name2 was removed from it). */
2150 tmp_stmt = gimple_build_assign_with_ops (gimple_assign_rhs_code (s1),
2151 tmp_name,
2152 gimple_assign_rhs1 (s1),
2153 gimple_assign_rhs2 (s1));
2155 bsi = gsi_for_stmt (s1);
2156 gimple_assign_set_rhs_with_ops (&bsi, code, new_name, tmp_name);
2157 s1 = gsi_stmt (bsi);
2158 update_stmt (s1);
2160 gsi_insert_before (&bsi, new_stmt, GSI_SAME_STMT);
2161 gsi_insert_before (&bsi, tmp_stmt, GSI_SAME_STMT);
2163 return new_stmt;
2166 /* Returns the statement that combines references R1 and R2. In case R1
2167 and R2 are not used in the same statement, but they are used with an
2168 associative and commutative operation in the same expression, reassociate
2169 the expression so that they are used in the same statement. */
2171 static gimple
2172 stmt_combining_refs (dref r1, dref r2)
2174 gimple stmt1, stmt2;
2175 tree name1 = name_for_ref (r1);
2176 tree name2 = name_for_ref (r2);
2178 stmt1 = find_use_stmt (&name1);
2179 stmt2 = find_use_stmt (&name2);
2180 if (stmt1 == stmt2)
2181 return stmt1;
2183 return reassociate_to_the_same_stmt (name1, name2);
2186 /* Tries to combine chains CH1 and CH2 together. If this succeeds, the
2187 description of the new chain is returned, otherwise we return NULL. */
2189 static chain_p
2190 combine_chains (chain_p ch1, chain_p ch2)
2192 dref r1, r2, nw;
2193 enum tree_code op = ERROR_MARK;
2194 bool swap = false;
2195 chain_p new_chain;
2196 unsigned i;
2197 gimple root_stmt;
2198 tree rslt_type = NULL_TREE;
2200 if (ch1 == ch2)
2201 return NULL;
2202 if (ch1->length != ch2->length)
2203 return NULL;
2205 if (ch1->refs.length () != ch2->refs.length ())
2206 return NULL;
2208 for (i = 0; (ch1->refs.iterate (i, &r1)
2209 && ch2->refs.iterate (i, &r2)); i++)
2211 if (r1->distance != r2->distance)
2212 return NULL;
2214 if (!combinable_refs_p (r1, r2, &op, &swap, &rslt_type))
2215 return NULL;
2218 if (swap)
2220 chain_p tmp = ch1;
2221 ch1 = ch2;
2222 ch2 = tmp;
2225 new_chain = XCNEW (struct chain);
2226 new_chain->type = CT_COMBINATION;
2227 new_chain->op = op;
2228 new_chain->ch1 = ch1;
2229 new_chain->ch2 = ch2;
2230 new_chain->rslt_type = rslt_type;
2231 new_chain->length = ch1->length;
2233 for (i = 0; (ch1->refs.iterate (i, &r1)
2234 && ch2->refs.iterate (i, &r2)); i++)
2236 nw = XCNEW (struct dref_d);
2237 nw->stmt = stmt_combining_refs (r1, r2);
2238 nw->distance = r1->distance;
2240 new_chain->refs.safe_push (nw);
2243 new_chain->has_max_use_after = false;
2244 root_stmt = get_chain_root (new_chain)->stmt;
2245 for (i = 1; new_chain->refs.iterate (i, &nw); i++)
2247 if (nw->distance == new_chain->length
2248 && !stmt_dominates_stmt_p (nw->stmt, root_stmt))
2250 new_chain->has_max_use_after = true;
2251 break;
2255 ch1->combined = true;
2256 ch2->combined = true;
2257 return new_chain;
2260 /* Try to combine the CHAINS. */
2262 static void
2263 try_combine_chains (vec<chain_p> *chains)
2265 unsigned i, j;
2266 chain_p ch1, ch2, cch;
2267 vec<chain_p> worklist = vNULL;
2269 FOR_EACH_VEC_ELT (*chains, i, ch1)
2270 if (chain_can_be_combined_p (ch1))
2271 worklist.safe_push (ch1);
2273 while (!worklist.is_empty ())
2275 ch1 = worklist.pop ();
2276 if (!chain_can_be_combined_p (ch1))
2277 continue;
2279 FOR_EACH_VEC_ELT (*chains, j, ch2)
2281 if (!chain_can_be_combined_p (ch2))
2282 continue;
2284 cch = combine_chains (ch1, ch2);
2285 if (cch)
2287 worklist.safe_push (cch);
2288 chains->safe_push (cch);
2289 break;
2294 worklist.release ();
2297 /* Prepare initializers for CHAIN in LOOP. Returns false if this is
2298 impossible because one of these initializers may trap, true otherwise. */
2300 static bool
2301 prepare_initializers_chain (struct loop *loop, chain_p chain)
2303 unsigned i, n = (chain->type == CT_INVARIANT) ? 1 : chain->length;
2304 struct data_reference *dr = get_chain_root (chain)->ref;
2305 tree init;
2306 gimple_seq stmts;
2307 dref laref;
2308 edge entry = loop_preheader_edge (loop);
2310 /* Find the initializers for the variables, and check that they cannot
2311 trap. */
2312 chain->inits.create (n);
2313 for (i = 0; i < n; i++)
2314 chain->inits.quick_push (NULL_TREE);
2316 /* If we have replaced some looparound phi nodes, use their initializers
2317 instead of creating our own. */
2318 FOR_EACH_VEC_ELT (chain->refs, i, laref)
2320 if (gimple_code (laref->stmt) != GIMPLE_PHI)
2321 continue;
2323 gcc_assert (laref->distance > 0);
2324 chain->inits[n - laref->distance]
2325 = PHI_ARG_DEF_FROM_EDGE (laref->stmt, entry);
2328 for (i = 0; i < n; i++)
2330 if (chain->inits[i] != NULL_TREE)
2331 continue;
2333 init = ref_at_iteration (dr, (int) i - n, &stmts);
2334 if (!chain->all_always_accessed && tree_could_trap_p (init))
2335 return false;
2337 if (stmts)
2338 gsi_insert_seq_on_edge_immediate (entry, stmts);
2340 chain->inits[i] = init;
2343 return true;
2346 /* Prepare initializers for CHAINS in LOOP, and free chains that cannot
2347 be used because the initializers might trap. */
2349 static void
2350 prepare_initializers (struct loop *loop, vec<chain_p> chains)
2352 chain_p chain;
2353 unsigned i;
2355 for (i = 0; i < chains.length (); )
2357 chain = chains[i];
2358 if (prepare_initializers_chain (loop, chain))
2359 i++;
2360 else
2362 release_chain (chain);
2363 chains.unordered_remove (i);
2368 /* Performs predictive commoning for LOOP. Returns true if LOOP was
2369 unrolled. */
2371 static bool
2372 tree_predictive_commoning_loop (struct loop *loop)
2374 vec<data_reference_p> datarefs;
2375 vec<ddr_p> dependences;
2376 struct component *components;
2377 vec<chain_p> chains = vNULL;
2378 unsigned unroll_factor;
2379 struct tree_niter_desc desc;
2380 bool unroll = false;
2381 edge exit;
2382 bitmap tmp_vars;
2384 if (dump_file && (dump_flags & TDF_DETAILS))
2385 fprintf (dump_file, "Processing loop %d\n", loop->num);
2387 /* Find the data references and split them into components according to their
2388 dependence relations. */
2389 stack_vec<loop_p, 3> loop_nest;
2390 dependences.create (10);
2391 datarefs.create (10);
2392 if (! compute_data_dependences_for_loop (loop, true, &loop_nest, &datarefs,
2393 &dependences))
2395 if (dump_file && (dump_flags & TDF_DETAILS))
2396 fprintf (dump_file, "Cannot analyze data dependencies\n");
2397 free_data_refs (datarefs);
2398 free_dependence_relations (dependences);
2399 return false;
2402 if (dump_file && (dump_flags & TDF_DETAILS))
2403 dump_data_dependence_relations (dump_file, dependences);
2405 components = split_data_refs_to_components (loop, datarefs, dependences);
2406 loop_nest.release ();
2407 free_dependence_relations (dependences);
2408 if (!components)
2410 free_data_refs (datarefs);
2411 return false;
2414 if (dump_file && (dump_flags & TDF_DETAILS))
2416 fprintf (dump_file, "Initial state:\n\n");
2417 dump_components (dump_file, components);
2420 /* Find the suitable components and split them into chains. */
2421 components = filter_suitable_components (loop, components);
2423 tmp_vars = BITMAP_ALLOC (NULL);
2424 looparound_phis = BITMAP_ALLOC (NULL);
2425 determine_roots (loop, components, &chains);
2426 release_components (components);
2428 if (!chains.exists ())
2430 if (dump_file && (dump_flags & TDF_DETAILS))
2431 fprintf (dump_file,
2432 "Predictive commoning failed: no suitable chains\n");
2433 goto end;
2435 prepare_initializers (loop, chains);
2437 /* Try to combine the chains that are always worked with together. */
2438 try_combine_chains (&chains);
2440 if (dump_file && (dump_flags & TDF_DETAILS))
2442 fprintf (dump_file, "Before commoning:\n\n");
2443 dump_chains (dump_file, chains);
2446 /* Determine the unroll factor, and if the loop should be unrolled, ensure
2447 that its number of iterations is divisible by the factor. */
2448 unroll_factor = determine_unroll_factor (chains);
2449 scev_reset ();
2450 unroll = (unroll_factor > 1
2451 && can_unroll_loop_p (loop, unroll_factor, &desc));
2452 exit = single_dom_exit (loop);
2454 /* Execute the predictive commoning transformations, and possibly unroll the
2455 loop. */
2456 if (unroll)
2458 struct epcc_data dta;
2460 if (dump_file && (dump_flags & TDF_DETAILS))
2461 fprintf (dump_file, "Unrolling %u times.\n", unroll_factor);
2463 dta.chains = chains;
2464 dta.tmp_vars = tmp_vars;
2466 update_ssa (TODO_update_ssa_only_virtuals);
2468 /* Cfg manipulations performed in tree_transform_and_unroll_loop before
2469 execute_pred_commoning_cbck is called may cause phi nodes to be
2470 reallocated, which is a problem since CHAINS may point to these
2471 statements. To fix this, we store the ssa names defined by the
2472 phi nodes here instead of the phi nodes themselves, and restore
2473 the phi nodes in execute_pred_commoning_cbck. A bit hacky. */
2474 replace_phis_by_defined_names (chains);
2476 tree_transform_and_unroll_loop (loop, unroll_factor, exit, &desc,
2477 execute_pred_commoning_cbck, &dta);
2478 eliminate_temp_copies (loop, tmp_vars);
2480 else
2482 if (dump_file && (dump_flags & TDF_DETAILS))
2483 fprintf (dump_file,
2484 "Executing predictive commoning without unrolling.\n");
2485 execute_pred_commoning (loop, chains, tmp_vars);
2488 end: ;
2489 release_chains (chains);
2490 free_data_refs (datarefs);
2491 BITMAP_FREE (tmp_vars);
2492 BITMAP_FREE (looparound_phis);
2494 free_affine_expand_cache (&name_expansions);
2496 return unroll;
2499 /* Runs predictive commoning. */
2501 unsigned
2502 tree_predictive_commoning (void)
2504 bool unrolled = false;
2505 struct loop *loop;
2506 loop_iterator li;
2507 unsigned ret = 0;
2509 initialize_original_copy_tables ();
2510 FOR_EACH_LOOP (li, loop, LI_ONLY_INNERMOST)
2511 if (optimize_loop_for_speed_p (loop))
2513 unrolled |= tree_predictive_commoning_loop (loop);
2516 if (unrolled)
2518 scev_reset ();
2519 ret = TODO_cleanup_cfg;
2521 free_original_copy_tables ();
2523 return ret;
2526 /* Predictive commoning Pass. */
2528 static unsigned
2529 run_tree_predictive_commoning (void)
2531 if (!current_loops)
2532 return 0;
2534 return tree_predictive_commoning ();
2537 static bool
2538 gate_tree_predictive_commoning (void)
2540 return flag_predictive_commoning != 0;
2543 namespace {
2545 const pass_data pass_data_predcom =
2547 GIMPLE_PASS, /* type */
2548 "pcom", /* name */
2549 OPTGROUP_LOOP, /* optinfo_flags */
2550 true, /* has_gate */
2551 true, /* has_execute */
2552 TV_PREDCOM, /* tv_id */
2553 PROP_cfg, /* properties_required */
2554 0, /* properties_provided */
2555 0, /* properties_destroyed */
2556 0, /* todo_flags_start */
2557 TODO_update_ssa_only_virtuals, /* todo_flags_finish */
2560 class pass_predcom : public gimple_opt_pass
2562 public:
2563 pass_predcom (gcc::context *ctxt)
2564 : gimple_opt_pass (pass_data_predcom, ctxt)
2567 /* opt_pass methods: */
2568 bool gate () { return gate_tree_predictive_commoning (); }
2569 unsigned int execute () { return run_tree_predictive_commoning (); }
2571 }; // class pass_predcom
2573 } // anon namespace
2575 gimple_opt_pass *
2576 make_pass_predcom (gcc::context *ctxt)
2578 return new pass_predcom (ctxt);