Check int_size_in_bytes in ix86_return_in_memory
[official-gcc.git] / gcc / tree-predcom.c
blob7751c5f9368223d628a0a6b05c7beb1d359e7e90
1 /* Predictive commoning.
2 Copyright (C) 2005-2015 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This file implements the predictive commoning optimization. Predictive
21 commoning can be viewed as CSE around a loop, and with some improvements,
22 as generalized strength reduction-- i.e., reusing values computed in
23 earlier iterations of a loop in the later ones. So far, the pass only
24 handles the most useful case, that is, reusing values of memory references.
25 If you think this is all just a special case of PRE, you are sort of right;
26 however, concentrating on loops is simpler, and makes it possible to
27 incorporate data dependence analysis to detect the opportunities, perform
28 loop unrolling to avoid copies together with renaming immediately,
29 and if needed, we could also take register pressure into account.
31 Let us demonstrate what is done on an example:
33 for (i = 0; i < 100; i++)
35 a[i+2] = a[i] + a[i+1];
36 b[10] = b[10] + i;
37 c[i] = c[99 - i];
38 d[i] = d[i + 1];
41 1) We find data references in the loop, and split them to mutually
42 independent groups (i.e., we find components of a data dependence
43 graph). We ignore read-read dependences whose distance is not constant.
44 (TODO -- we could also ignore antidependences). In this example, we
45 find the following groups:
47 a[i]{read}, a[i+1]{read}, a[i+2]{write}
48 b[10]{read}, b[10]{write}
49 c[99 - i]{read}, c[i]{write}
50 d[i + 1]{read}, d[i]{write}
52 2) Inside each of the group, we verify several conditions:
53 a) all the references must differ in indices only, and the indices
54 must all have the same step
55 b) the references must dominate loop latch (and thus, they must be
56 ordered by dominance relation).
57 c) the distance of the indices must be a small multiple of the step
58 We are then able to compute the difference of the references (# of
59 iterations before they point to the same place as the first of them).
60 Also, in case there are writes in the loop, we split the groups into
61 chains whose head is the write whose values are used by the reads in
62 the same chain. The chains are then processed independently,
63 making the further transformations simpler. Also, the shorter chains
64 need the same number of registers, but may require lower unrolling
65 factor in order to get rid of the copies on the loop latch.
67 In our example, we get the following chains (the chain for c is invalid).
69 a[i]{read,+0}, a[i+1]{read,-1}, a[i+2]{write,-2}
70 b[10]{read,+0}, b[10]{write,+0}
71 d[i + 1]{read,+0}, d[i]{write,+1}
73 3) For each read, we determine the read or write whose value it reuses,
74 together with the distance of this reuse. I.e. we take the last
75 reference before it with distance 0, or the last of the references
76 with the smallest positive distance to the read. Then, we remove
77 the references that are not used in any of these chains, discard the
78 empty groups, and propagate all the links so that they point to the
79 single root reference of the chain (adjusting their distance
80 appropriately). Some extra care needs to be taken for references with
81 step 0. In our example (the numbers indicate the distance of the
82 reuse),
84 a[i] --> (*) 2, a[i+1] --> (*) 1, a[i+2] (*)
85 b[10] --> (*) 1, b[10] (*)
87 4) The chains are combined together if possible. If the corresponding
88 elements of two chains are always combined together with the same
89 operator, we remember just the result of this combination, instead
90 of remembering the values separately. We may need to perform
91 reassociation to enable combining, for example
93 e[i] + f[i+1] + e[i+1] + f[i]
95 can be reassociated as
97 (e[i] + f[i]) + (e[i+1] + f[i+1])
99 and we can combine the chains for e and f into one chain.
101 5) For each root reference (end of the chain) R, let N be maximum distance
102 of a reference reusing its value. Variables R0 up to RN are created,
103 together with phi nodes that transfer values from R1 .. RN to
104 R0 .. R(N-1).
105 Initial values are loaded to R0..R(N-1) (in case not all references
106 must necessarily be accessed and they may trap, we may fail here;
107 TODO sometimes, the loads could be guarded by a check for the number
108 of iterations). Values loaded/stored in roots are also copied to
109 RN. Other reads are replaced with the appropriate variable Ri.
110 Everything is put to SSA form.
112 As a small improvement, if R0 is dead after the root (i.e., all uses of
113 the value with the maximum distance dominate the root), we can avoid
114 creating RN and use R0 instead of it.
116 In our example, we get (only the parts concerning a and b are shown):
117 for (i = 0; i < 100; i++)
119 f = phi (a[0], s);
120 s = phi (a[1], f);
121 x = phi (b[10], x);
123 f = f + s;
124 a[i+2] = f;
125 x = x + i;
126 b[10] = x;
129 6) Factor F for unrolling is determined as the smallest common multiple of
130 (N + 1) for each root reference (N for references for that we avoided
131 creating RN). If F and the loop is small enough, loop is unrolled F
132 times. The stores to RN (R0) in the copies of the loop body are
133 periodically replaced with R0, R1, ... (R1, R2, ...), so that they can
134 be coalesced and the copies can be eliminated.
136 TODO -- copy propagation and other optimizations may change the live
137 ranges of the temporary registers and prevent them from being coalesced;
138 this may increase the register pressure.
140 In our case, F = 2 and the (main loop of the) result is
142 for (i = 0; i < ...; i += 2)
144 f = phi (a[0], f);
145 s = phi (a[1], s);
146 x = phi (b[10], x);
148 f = f + s;
149 a[i+2] = f;
150 x = x + i;
151 b[10] = x;
153 s = s + f;
154 a[i+3] = s;
155 x = x + i;
156 b[10] = x;
159 TODO -- stores killing other stores can be taken into account, e.g.,
160 for (i = 0; i < n; i++)
162 a[i] = 1;
163 a[i+2] = 2;
166 can be replaced with
168 t0 = a[0];
169 t1 = a[1];
170 for (i = 0; i < n; i++)
172 a[i] = 1;
173 t2 = 2;
174 t0 = t1;
175 t1 = t2;
177 a[n] = t0;
178 a[n+1] = t1;
180 The interesting part is that this would generalize store motion; still, since
181 sm is performed elsewhere, it does not seem that important.
183 Predictive commoning can be generalized for arbitrary computations (not
184 just memory loads), and also nontrivial transfer functions (e.g., replacing
185 i * i with ii_last + 2 * i + 1), to generalize strength reduction. */
187 #include "config.h"
188 #include "system.h"
189 #include "coretypes.h"
190 #include "backend.h"
191 #include "tree.h"
192 #include "gimple.h"
193 #include "rtl.h"
194 #include "ssa.h"
195 #include "alias.h"
196 #include "fold-const.h"
197 #include "tm_p.h"
198 #include "cfgloop.h"
199 #include "internal-fn.h"
200 #include "tree-eh.h"
201 #include "gimplify.h"
202 #include "gimple-iterator.h"
203 #include "gimplify-me.h"
204 #include "tree-ssa-loop-ivopts.h"
205 #include "tree-ssa-loop-manip.h"
206 #include "tree-ssa-loop-niter.h"
207 #include "tree-ssa-loop.h"
208 #include "tree-into-ssa.h"
209 #include "flags.h"
210 #include "insn-config.h"
211 #include "expmed.h"
212 #include "dojump.h"
213 #include "explow.h"
214 #include "calls.h"
215 #include "emit-rtl.h"
216 #include "varasm.h"
217 #include "stmt.h"
218 #include "expr.h"
219 #include "tree-dfa.h"
220 #include "tree-ssa.h"
221 #include "tree-data-ref.h"
222 #include "tree-scalar-evolution.h"
223 #include "tree-chrec.h"
224 #include "params.h"
225 #include "gimple-pretty-print.h"
226 #include "tree-pass.h"
227 #include "tree-affine.h"
228 #include "tree-inline.h"
229 #include "wide-int-print.h"
231 /* The maximum number of iterations between the considered memory
232 references. */
234 #define MAX_DISTANCE (target_avail_regs < 16 ? 4 : 8)
236 /* Data references (or phi nodes that carry data reference values across
237 loop iterations). */
239 typedef struct dref_d
241 /* The reference itself. */
242 struct data_reference *ref;
244 /* The statement in that the reference appears. */
245 gimple stmt;
247 /* In case that STMT is a phi node, this field is set to the SSA name
248 defined by it in replace_phis_by_defined_names (in order to avoid
249 pointing to phi node that got reallocated in the meantime). */
250 tree name_defined_by_phi;
252 /* Distance of the reference from the root of the chain (in number of
253 iterations of the loop). */
254 unsigned distance;
256 /* Number of iterations offset from the first reference in the component. */
257 widest_int offset;
259 /* Number of the reference in a component, in dominance ordering. */
260 unsigned pos;
262 /* True if the memory reference is always accessed when the loop is
263 entered. */
264 unsigned always_accessed : 1;
265 } *dref;
268 /* Type of the chain of the references. */
270 enum chain_type
272 /* The addresses of the references in the chain are constant. */
273 CT_INVARIANT,
275 /* There are only loads in the chain. */
276 CT_LOAD,
278 /* Root of the chain is store, the rest are loads. */
279 CT_STORE_LOAD,
281 /* A combination of two chains. */
282 CT_COMBINATION
285 /* Chains of data references. */
287 typedef struct chain
289 /* Type of the chain. */
290 enum chain_type type;
292 /* For combination chains, the operator and the two chains that are
293 combined, and the type of the result. */
294 enum tree_code op;
295 tree rslt_type;
296 struct chain *ch1, *ch2;
298 /* The references in the chain. */
299 vec<dref> refs;
301 /* The maximum distance of the reference in the chain from the root. */
302 unsigned length;
304 /* The variables used to copy the value throughout iterations. */
305 vec<tree> vars;
307 /* Initializers for the variables. */
308 vec<tree> inits;
310 /* True if there is a use of a variable with the maximal distance
311 that comes after the root in the loop. */
312 unsigned has_max_use_after : 1;
314 /* True if all the memory references in the chain are always accessed. */
315 unsigned all_always_accessed : 1;
317 /* True if this chain was combined together with some other chain. */
318 unsigned combined : 1;
319 } *chain_p;
322 /* Describes the knowledge about the step of the memory references in
323 the component. */
325 enum ref_step_type
327 /* The step is zero. */
328 RS_INVARIANT,
330 /* The step is nonzero. */
331 RS_NONZERO,
333 /* The step may or may not be nonzero. */
334 RS_ANY
337 /* Components of the data dependence graph. */
339 struct component
341 /* The references in the component. */
342 vec<dref> refs;
344 /* What we know about the step of the references in the component. */
345 enum ref_step_type comp_step;
347 /* Next component in the list. */
348 struct component *next;
351 /* Bitmap of ssa names defined by looparound phi nodes covered by chains. */
353 static bitmap looparound_phis;
355 /* Cache used by tree_to_aff_combination_expand. */
357 static hash_map<tree, name_expansion *> *name_expansions;
359 /* Dumps data reference REF to FILE. */
361 extern void dump_dref (FILE *, dref);
362 void
363 dump_dref (FILE *file, dref ref)
365 if (ref->ref)
367 fprintf (file, " ");
368 print_generic_expr (file, DR_REF (ref->ref), TDF_SLIM);
369 fprintf (file, " (id %u%s)\n", ref->pos,
370 DR_IS_READ (ref->ref) ? "" : ", write");
372 fprintf (file, " offset ");
373 print_decs (ref->offset, file);
374 fprintf (file, "\n");
376 fprintf (file, " distance %u\n", ref->distance);
378 else
380 if (gimple_code (ref->stmt) == GIMPLE_PHI)
381 fprintf (file, " looparound ref\n");
382 else
383 fprintf (file, " combination ref\n");
384 fprintf (file, " in statement ");
385 print_gimple_stmt (file, ref->stmt, 0, TDF_SLIM);
386 fprintf (file, "\n");
387 fprintf (file, " distance %u\n", ref->distance);
392 /* Dumps CHAIN to FILE. */
394 extern void dump_chain (FILE *, chain_p);
395 void
396 dump_chain (FILE *file, chain_p chain)
398 dref a;
399 const char *chain_type;
400 unsigned i;
401 tree var;
403 switch (chain->type)
405 case CT_INVARIANT:
406 chain_type = "Load motion";
407 break;
409 case CT_LOAD:
410 chain_type = "Loads-only";
411 break;
413 case CT_STORE_LOAD:
414 chain_type = "Store-loads";
415 break;
417 case CT_COMBINATION:
418 chain_type = "Combination";
419 break;
421 default:
422 gcc_unreachable ();
425 fprintf (file, "%s chain %p%s\n", chain_type, (void *) chain,
426 chain->combined ? " (combined)" : "");
427 if (chain->type != CT_INVARIANT)
428 fprintf (file, " max distance %u%s\n", chain->length,
429 chain->has_max_use_after ? "" : ", may reuse first");
431 if (chain->type == CT_COMBINATION)
433 fprintf (file, " equal to %p %s %p in type ",
434 (void *) chain->ch1, op_symbol_code (chain->op),
435 (void *) chain->ch2);
436 print_generic_expr (file, chain->rslt_type, TDF_SLIM);
437 fprintf (file, "\n");
440 if (chain->vars.exists ())
442 fprintf (file, " vars");
443 FOR_EACH_VEC_ELT (chain->vars, i, var)
445 fprintf (file, " ");
446 print_generic_expr (file, var, TDF_SLIM);
448 fprintf (file, "\n");
451 if (chain->inits.exists ())
453 fprintf (file, " inits");
454 FOR_EACH_VEC_ELT (chain->inits, i, var)
456 fprintf (file, " ");
457 print_generic_expr (file, var, TDF_SLIM);
459 fprintf (file, "\n");
462 fprintf (file, " references:\n");
463 FOR_EACH_VEC_ELT (chain->refs, i, a)
464 dump_dref (file, a);
466 fprintf (file, "\n");
469 /* Dumps CHAINS to FILE. */
471 extern void dump_chains (FILE *, vec<chain_p> );
472 void
473 dump_chains (FILE *file, vec<chain_p> chains)
475 chain_p chain;
476 unsigned i;
478 FOR_EACH_VEC_ELT (chains, i, chain)
479 dump_chain (file, chain);
482 /* Dumps COMP to FILE. */
484 extern void dump_component (FILE *, struct component *);
485 void
486 dump_component (FILE *file, struct component *comp)
488 dref a;
489 unsigned i;
491 fprintf (file, "Component%s:\n",
492 comp->comp_step == RS_INVARIANT ? " (invariant)" : "");
493 FOR_EACH_VEC_ELT (comp->refs, i, a)
494 dump_dref (file, a);
495 fprintf (file, "\n");
498 /* Dumps COMPS to FILE. */
500 extern void dump_components (FILE *, struct component *);
501 void
502 dump_components (FILE *file, struct component *comps)
504 struct component *comp;
506 for (comp = comps; comp; comp = comp->next)
507 dump_component (file, comp);
510 /* Frees a chain CHAIN. */
512 static void
513 release_chain (chain_p chain)
515 dref ref;
516 unsigned i;
518 if (chain == NULL)
519 return;
521 FOR_EACH_VEC_ELT (chain->refs, i, ref)
522 free (ref);
524 chain->refs.release ();
525 chain->vars.release ();
526 chain->inits.release ();
528 free (chain);
531 /* Frees CHAINS. */
533 static void
534 release_chains (vec<chain_p> chains)
536 unsigned i;
537 chain_p chain;
539 FOR_EACH_VEC_ELT (chains, i, chain)
540 release_chain (chain);
541 chains.release ();
544 /* Frees a component COMP. */
546 static void
547 release_component (struct component *comp)
549 comp->refs.release ();
550 free (comp);
553 /* Frees list of components COMPS. */
555 static void
556 release_components (struct component *comps)
558 struct component *act, *next;
560 for (act = comps; act; act = next)
562 next = act->next;
563 release_component (act);
567 /* Finds a root of tree given by FATHERS containing A, and performs path
568 shortening. */
570 static unsigned
571 component_of (unsigned fathers[], unsigned a)
573 unsigned root, n;
575 for (root = a; root != fathers[root]; root = fathers[root])
576 continue;
578 for (; a != root; a = n)
580 n = fathers[a];
581 fathers[a] = root;
584 return root;
587 /* Join operation for DFU. FATHERS gives the tree, SIZES are sizes of the
588 components, A and B are components to merge. */
590 static void
591 merge_comps (unsigned fathers[], unsigned sizes[], unsigned a, unsigned b)
593 unsigned ca = component_of (fathers, a);
594 unsigned cb = component_of (fathers, b);
596 if (ca == cb)
597 return;
599 if (sizes[ca] < sizes[cb])
601 sizes[cb] += sizes[ca];
602 fathers[ca] = cb;
604 else
606 sizes[ca] += sizes[cb];
607 fathers[cb] = ca;
611 /* Returns true if A is a reference that is suitable for predictive commoning
612 in the innermost loop that contains it. REF_STEP is set according to the
613 step of the reference A. */
615 static bool
616 suitable_reference_p (struct data_reference *a, enum ref_step_type *ref_step)
618 tree ref = DR_REF (a), step = DR_STEP (a);
620 if (!step
621 || TREE_THIS_VOLATILE (ref)
622 || !is_gimple_reg_type (TREE_TYPE (ref))
623 || tree_could_throw_p (ref))
624 return false;
626 if (integer_zerop (step))
627 *ref_step = RS_INVARIANT;
628 else if (integer_nonzerop (step))
629 *ref_step = RS_NONZERO;
630 else
631 *ref_step = RS_ANY;
633 return true;
636 /* Stores DR_OFFSET (DR) + DR_INIT (DR) to OFFSET. */
638 static void
639 aff_combination_dr_offset (struct data_reference *dr, aff_tree *offset)
641 tree type = TREE_TYPE (DR_OFFSET (dr));
642 aff_tree delta;
644 tree_to_aff_combination_expand (DR_OFFSET (dr), type, offset,
645 &name_expansions);
646 aff_combination_const (&delta, type, wi::to_widest (DR_INIT (dr)));
647 aff_combination_add (offset, &delta);
650 /* Determines number of iterations of the innermost enclosing loop before B
651 refers to exactly the same location as A and stores it to OFF. If A and
652 B do not have the same step, they never meet, or anything else fails,
653 returns false, otherwise returns true. Both A and B are assumed to
654 satisfy suitable_reference_p. */
656 static bool
657 determine_offset (struct data_reference *a, struct data_reference *b,
658 widest_int *off)
660 aff_tree diff, baseb, step;
661 tree typea, typeb;
663 /* Check that both the references access the location in the same type. */
664 typea = TREE_TYPE (DR_REF (a));
665 typeb = TREE_TYPE (DR_REF (b));
666 if (!useless_type_conversion_p (typeb, typea))
667 return false;
669 /* Check whether the base address and the step of both references is the
670 same. */
671 if (!operand_equal_p (DR_STEP (a), DR_STEP (b), 0)
672 || !operand_equal_p (DR_BASE_ADDRESS (a), DR_BASE_ADDRESS (b), 0))
673 return false;
675 if (integer_zerop (DR_STEP (a)))
677 /* If the references have loop invariant address, check that they access
678 exactly the same location. */
679 *off = 0;
680 return (operand_equal_p (DR_OFFSET (a), DR_OFFSET (b), 0)
681 && operand_equal_p (DR_INIT (a), DR_INIT (b), 0));
684 /* Compare the offsets of the addresses, and check whether the difference
685 is a multiple of step. */
686 aff_combination_dr_offset (a, &diff);
687 aff_combination_dr_offset (b, &baseb);
688 aff_combination_scale (&baseb, -1);
689 aff_combination_add (&diff, &baseb);
691 tree_to_aff_combination_expand (DR_STEP (a), TREE_TYPE (DR_STEP (a)),
692 &step, &name_expansions);
693 return aff_combination_constant_multiple_p (&diff, &step, off);
696 /* Returns the last basic block in LOOP for that we are sure that
697 it is executed whenever the loop is entered. */
699 static basic_block
700 last_always_executed_block (struct loop *loop)
702 unsigned i;
703 vec<edge> exits = get_loop_exit_edges (loop);
704 edge ex;
705 basic_block last = loop->latch;
707 FOR_EACH_VEC_ELT (exits, i, ex)
708 last = nearest_common_dominator (CDI_DOMINATORS, last, ex->src);
709 exits.release ();
711 return last;
714 /* Splits dependence graph on DATAREFS described by DEPENDS to components. */
716 static struct component *
717 split_data_refs_to_components (struct loop *loop,
718 vec<data_reference_p> datarefs,
719 vec<ddr_p> depends)
721 unsigned i, n = datarefs.length ();
722 unsigned ca, ia, ib, bad;
723 unsigned *comp_father = XNEWVEC (unsigned, n + 1);
724 unsigned *comp_size = XNEWVEC (unsigned, n + 1);
725 struct component **comps;
726 struct data_reference *dr, *dra, *drb;
727 struct data_dependence_relation *ddr;
728 struct component *comp_list = NULL, *comp;
729 dref dataref;
730 basic_block last_always_executed = last_always_executed_block (loop);
732 FOR_EACH_VEC_ELT (datarefs, i, dr)
734 if (!DR_REF (dr))
736 /* A fake reference for call or asm_expr that may clobber memory;
737 just fail. */
738 goto end;
740 /* predcom pass isn't prepared to handle calls with data references. */
741 if (is_gimple_call (DR_STMT (dr)))
742 goto end;
743 dr->aux = (void *) (size_t) i;
744 comp_father[i] = i;
745 comp_size[i] = 1;
748 /* A component reserved for the "bad" data references. */
749 comp_father[n] = n;
750 comp_size[n] = 1;
752 FOR_EACH_VEC_ELT (datarefs, i, dr)
754 enum ref_step_type dummy;
756 if (!suitable_reference_p (dr, &dummy))
758 ia = (unsigned) (size_t) dr->aux;
759 merge_comps (comp_father, comp_size, n, ia);
763 FOR_EACH_VEC_ELT (depends, i, ddr)
765 widest_int dummy_off;
767 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
768 continue;
770 dra = DDR_A (ddr);
771 drb = DDR_B (ddr);
772 ia = component_of (comp_father, (unsigned) (size_t) dra->aux);
773 ib = component_of (comp_father, (unsigned) (size_t) drb->aux);
774 if (ia == ib)
775 continue;
777 bad = component_of (comp_father, n);
779 /* If both A and B are reads, we may ignore unsuitable dependences. */
780 if (DR_IS_READ (dra) && DR_IS_READ (drb))
782 if (ia == bad || ib == bad
783 || !determine_offset (dra, drb, &dummy_off))
784 continue;
786 /* If A is read and B write or vice versa and there is unsuitable
787 dependence, instead of merging both components into a component
788 that will certainly not pass suitable_component_p, just put the
789 read into bad component, perhaps at least the write together with
790 all the other data refs in it's component will be optimizable. */
791 else if (DR_IS_READ (dra) && ib != bad)
793 if (ia == bad)
794 continue;
795 else if (!determine_offset (dra, drb, &dummy_off))
797 merge_comps (comp_father, comp_size, bad, ia);
798 continue;
801 else if (DR_IS_READ (drb) && ia != bad)
803 if (ib == bad)
804 continue;
805 else if (!determine_offset (dra, drb, &dummy_off))
807 merge_comps (comp_father, comp_size, bad, ib);
808 continue;
812 merge_comps (comp_father, comp_size, ia, ib);
815 comps = XCNEWVEC (struct component *, n);
816 bad = component_of (comp_father, n);
817 FOR_EACH_VEC_ELT (datarefs, i, dr)
819 ia = (unsigned) (size_t) dr->aux;
820 ca = component_of (comp_father, ia);
821 if (ca == bad)
822 continue;
824 comp = comps[ca];
825 if (!comp)
827 comp = XCNEW (struct component);
828 comp->refs.create (comp_size[ca]);
829 comps[ca] = comp;
832 dataref = XCNEW (struct dref_d);
833 dataref->ref = dr;
834 dataref->stmt = DR_STMT (dr);
835 dataref->offset = 0;
836 dataref->distance = 0;
838 dataref->always_accessed
839 = dominated_by_p (CDI_DOMINATORS, last_always_executed,
840 gimple_bb (dataref->stmt));
841 dataref->pos = comp->refs.length ();
842 comp->refs.quick_push (dataref);
845 for (i = 0; i < n; i++)
847 comp = comps[i];
848 if (comp)
850 comp->next = comp_list;
851 comp_list = comp;
854 free (comps);
856 end:
857 free (comp_father);
858 free (comp_size);
859 return comp_list;
862 /* Returns true if the component COMP satisfies the conditions
863 described in 2) at the beginning of this file. LOOP is the current
864 loop. */
866 static bool
867 suitable_component_p (struct loop *loop, struct component *comp)
869 unsigned i;
870 dref a, first;
871 basic_block ba, bp = loop->header;
872 bool ok, has_write = false;
874 FOR_EACH_VEC_ELT (comp->refs, i, a)
876 ba = gimple_bb (a->stmt);
878 if (!just_once_each_iteration_p (loop, ba))
879 return false;
881 gcc_assert (dominated_by_p (CDI_DOMINATORS, ba, bp));
882 bp = ba;
884 if (DR_IS_WRITE (a->ref))
885 has_write = true;
888 first = comp->refs[0];
889 ok = suitable_reference_p (first->ref, &comp->comp_step);
890 gcc_assert (ok);
891 first->offset = 0;
893 for (i = 1; comp->refs.iterate (i, &a); i++)
895 if (!determine_offset (first->ref, a->ref, &a->offset))
896 return false;
898 #ifdef ENABLE_CHECKING
900 enum ref_step_type a_step;
901 ok = suitable_reference_p (a->ref, &a_step);
902 gcc_assert (ok && a_step == comp->comp_step);
904 #endif
907 /* If there is a write inside the component, we must know whether the
908 step is nonzero or not -- we would not otherwise be able to recognize
909 whether the value accessed by reads comes from the OFFSET-th iteration
910 or the previous one. */
911 if (has_write && comp->comp_step == RS_ANY)
912 return false;
914 return true;
917 /* Check the conditions on references inside each of components COMPS,
918 and remove the unsuitable components from the list. The new list
919 of components is returned. The conditions are described in 2) at
920 the beginning of this file. LOOP is the current loop. */
922 static struct component *
923 filter_suitable_components (struct loop *loop, struct component *comps)
925 struct component **comp, *act;
927 for (comp = &comps; *comp; )
929 act = *comp;
930 if (suitable_component_p (loop, act))
931 comp = &act->next;
932 else
934 dref ref;
935 unsigned i;
937 *comp = act->next;
938 FOR_EACH_VEC_ELT (act->refs, i, ref)
939 free (ref);
940 release_component (act);
944 return comps;
947 /* Compares two drefs A and B by their offset and position. Callback for
948 qsort. */
950 static int
951 order_drefs (const void *a, const void *b)
953 const dref *const da = (const dref *) a;
954 const dref *const db = (const dref *) b;
955 int offcmp = wi::cmps ((*da)->offset, (*db)->offset);
957 if (offcmp != 0)
958 return offcmp;
960 return (*da)->pos - (*db)->pos;
963 /* Returns root of the CHAIN. */
965 static inline dref
966 get_chain_root (chain_p chain)
968 return chain->refs[0];
971 /* Adds REF to the chain CHAIN. */
973 static void
974 add_ref_to_chain (chain_p chain, dref ref)
976 dref root = get_chain_root (chain);
978 gcc_assert (wi::les_p (root->offset, ref->offset));
979 widest_int dist = ref->offset - root->offset;
980 if (wi::leu_p (MAX_DISTANCE, dist))
982 free (ref);
983 return;
985 gcc_assert (wi::fits_uhwi_p (dist));
987 chain->refs.safe_push (ref);
989 ref->distance = dist.to_uhwi ();
991 if (ref->distance >= chain->length)
993 chain->length = ref->distance;
994 chain->has_max_use_after = false;
997 if (ref->distance == chain->length
998 && ref->pos > root->pos)
999 chain->has_max_use_after = true;
1001 chain->all_always_accessed &= ref->always_accessed;
1004 /* Returns the chain for invariant component COMP. */
1006 static chain_p
1007 make_invariant_chain (struct component *comp)
1009 chain_p chain = XCNEW (struct chain);
1010 unsigned i;
1011 dref ref;
1013 chain->type = CT_INVARIANT;
1015 chain->all_always_accessed = true;
1017 FOR_EACH_VEC_ELT (comp->refs, i, ref)
1019 chain->refs.safe_push (ref);
1020 chain->all_always_accessed &= ref->always_accessed;
1023 return chain;
1026 /* Make a new chain rooted at REF. */
1028 static chain_p
1029 make_rooted_chain (dref ref)
1031 chain_p chain = XCNEW (struct chain);
1033 chain->type = DR_IS_READ (ref->ref) ? CT_LOAD : CT_STORE_LOAD;
1035 chain->refs.safe_push (ref);
1036 chain->all_always_accessed = ref->always_accessed;
1038 ref->distance = 0;
1040 return chain;
1043 /* Returns true if CHAIN is not trivial. */
1045 static bool
1046 nontrivial_chain_p (chain_p chain)
1048 return chain != NULL && chain->refs.length () > 1;
1051 /* Returns the ssa name that contains the value of REF, or NULL_TREE if there
1052 is no such name. */
1054 static tree
1055 name_for_ref (dref ref)
1057 tree name;
1059 if (is_gimple_assign (ref->stmt))
1061 if (!ref->ref || DR_IS_READ (ref->ref))
1062 name = gimple_assign_lhs (ref->stmt);
1063 else
1064 name = gimple_assign_rhs1 (ref->stmt);
1066 else
1067 name = PHI_RESULT (ref->stmt);
1069 return (TREE_CODE (name) == SSA_NAME ? name : NULL_TREE);
1072 /* Returns true if REF is a valid initializer for ROOT with given DISTANCE (in
1073 iterations of the innermost enclosing loop). */
1075 static bool
1076 valid_initializer_p (struct data_reference *ref,
1077 unsigned distance, struct data_reference *root)
1079 aff_tree diff, base, step;
1080 widest_int off;
1082 /* Both REF and ROOT must be accessing the same object. */
1083 if (!operand_equal_p (DR_BASE_ADDRESS (ref), DR_BASE_ADDRESS (root), 0))
1084 return false;
1086 /* The initializer is defined outside of loop, hence its address must be
1087 invariant inside the loop. */
1088 gcc_assert (integer_zerop (DR_STEP (ref)));
1090 /* If the address of the reference is invariant, initializer must access
1091 exactly the same location. */
1092 if (integer_zerop (DR_STEP (root)))
1093 return (operand_equal_p (DR_OFFSET (ref), DR_OFFSET (root), 0)
1094 && operand_equal_p (DR_INIT (ref), DR_INIT (root), 0));
1096 /* Verify that this index of REF is equal to the root's index at
1097 -DISTANCE-th iteration. */
1098 aff_combination_dr_offset (root, &diff);
1099 aff_combination_dr_offset (ref, &base);
1100 aff_combination_scale (&base, -1);
1101 aff_combination_add (&diff, &base);
1103 tree_to_aff_combination_expand (DR_STEP (root), TREE_TYPE (DR_STEP (root)),
1104 &step, &name_expansions);
1105 if (!aff_combination_constant_multiple_p (&diff, &step, &off))
1106 return false;
1108 if (off != distance)
1109 return false;
1111 return true;
1114 /* Finds looparound phi node of LOOP that copies the value of REF, and if its
1115 initial value is correct (equal to initial value of REF shifted by one
1116 iteration), returns the phi node. Otherwise, NULL_TREE is returned. ROOT
1117 is the root of the current chain. */
1119 static gphi *
1120 find_looparound_phi (struct loop *loop, dref ref, dref root)
1122 tree name, init, init_ref;
1123 gphi *phi = NULL;
1124 gimple init_stmt;
1125 edge latch = loop_latch_edge (loop);
1126 struct data_reference init_dr;
1127 gphi_iterator psi;
1129 if (is_gimple_assign (ref->stmt))
1131 if (DR_IS_READ (ref->ref))
1132 name = gimple_assign_lhs (ref->stmt);
1133 else
1134 name = gimple_assign_rhs1 (ref->stmt);
1136 else
1137 name = PHI_RESULT (ref->stmt);
1138 if (!name)
1139 return NULL;
1141 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1143 phi = psi.phi ();
1144 if (PHI_ARG_DEF_FROM_EDGE (phi, latch) == name)
1145 break;
1148 if (gsi_end_p (psi))
1149 return NULL;
1151 init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1152 if (TREE_CODE (init) != SSA_NAME)
1153 return NULL;
1154 init_stmt = SSA_NAME_DEF_STMT (init);
1155 if (gimple_code (init_stmt) != GIMPLE_ASSIGN)
1156 return NULL;
1157 gcc_assert (gimple_assign_lhs (init_stmt) == init);
1159 init_ref = gimple_assign_rhs1 (init_stmt);
1160 if (!REFERENCE_CLASS_P (init_ref)
1161 && !DECL_P (init_ref))
1162 return NULL;
1164 /* Analyze the behavior of INIT_REF with respect to LOOP (innermost
1165 loop enclosing PHI). */
1166 memset (&init_dr, 0, sizeof (struct data_reference));
1167 DR_REF (&init_dr) = init_ref;
1168 DR_STMT (&init_dr) = phi;
1169 if (!dr_analyze_innermost (&init_dr, loop))
1170 return NULL;
1172 if (!valid_initializer_p (&init_dr, ref->distance + 1, root->ref))
1173 return NULL;
1175 return phi;
1178 /* Adds a reference for the looparound copy of REF in PHI to CHAIN. */
1180 static void
1181 insert_looparound_copy (chain_p chain, dref ref, gphi *phi)
1183 dref nw = XCNEW (struct dref_d), aref;
1184 unsigned i;
1186 nw->stmt = phi;
1187 nw->distance = ref->distance + 1;
1188 nw->always_accessed = 1;
1190 FOR_EACH_VEC_ELT (chain->refs, i, aref)
1191 if (aref->distance >= nw->distance)
1192 break;
1193 chain->refs.safe_insert (i, nw);
1195 if (nw->distance > chain->length)
1197 chain->length = nw->distance;
1198 chain->has_max_use_after = false;
1202 /* For references in CHAIN that are copied around the LOOP (created previously
1203 by PRE, or by user), add the results of such copies to the chain. This
1204 enables us to remove the copies by unrolling, and may need less registers
1205 (also, it may allow us to combine chains together). */
1207 static void
1208 add_looparound_copies (struct loop *loop, chain_p chain)
1210 unsigned i;
1211 dref ref, root = get_chain_root (chain);
1212 gphi *phi;
1214 FOR_EACH_VEC_ELT (chain->refs, i, ref)
1216 phi = find_looparound_phi (loop, ref, root);
1217 if (!phi)
1218 continue;
1220 bitmap_set_bit (looparound_phis, SSA_NAME_VERSION (PHI_RESULT (phi)));
1221 insert_looparound_copy (chain, ref, phi);
1225 /* Find roots of the values and determine distances in the component COMP.
1226 The references are redistributed into CHAINS. LOOP is the current
1227 loop. */
1229 static void
1230 determine_roots_comp (struct loop *loop,
1231 struct component *comp,
1232 vec<chain_p> *chains)
1234 unsigned i;
1235 dref a;
1236 chain_p chain = NULL;
1237 widest_int last_ofs = 0;
1239 /* Invariants are handled specially. */
1240 if (comp->comp_step == RS_INVARIANT)
1242 chain = make_invariant_chain (comp);
1243 chains->safe_push (chain);
1244 return;
1247 comp->refs.qsort (order_drefs);
1249 FOR_EACH_VEC_ELT (comp->refs, i, a)
1251 if (!chain || DR_IS_WRITE (a->ref)
1252 || wi::leu_p (MAX_DISTANCE, a->offset - last_ofs))
1254 if (nontrivial_chain_p (chain))
1256 add_looparound_copies (loop, chain);
1257 chains->safe_push (chain);
1259 else
1260 release_chain (chain);
1261 chain = make_rooted_chain (a);
1262 last_ofs = a->offset;
1263 continue;
1266 add_ref_to_chain (chain, a);
1269 if (nontrivial_chain_p (chain))
1271 add_looparound_copies (loop, chain);
1272 chains->safe_push (chain);
1274 else
1275 release_chain (chain);
1278 /* Find roots of the values and determine distances in components COMPS, and
1279 separates the references to CHAINS. LOOP is the current loop. */
1281 static void
1282 determine_roots (struct loop *loop,
1283 struct component *comps, vec<chain_p> *chains)
1285 struct component *comp;
1287 for (comp = comps; comp; comp = comp->next)
1288 determine_roots_comp (loop, comp, chains);
1291 /* Replace the reference in statement STMT with temporary variable
1292 NEW_TREE. If SET is true, NEW_TREE is instead initialized to the value of
1293 the reference in the statement. IN_LHS is true if the reference
1294 is in the lhs of STMT, false if it is in rhs. */
1296 static void
1297 replace_ref_with (gimple stmt, tree new_tree, bool set, bool in_lhs)
1299 tree val;
1300 gassign *new_stmt;
1301 gimple_stmt_iterator bsi, psi;
1303 if (gimple_code (stmt) == GIMPLE_PHI)
1305 gcc_assert (!in_lhs && !set);
1307 val = PHI_RESULT (stmt);
1308 bsi = gsi_after_labels (gimple_bb (stmt));
1309 psi = gsi_for_stmt (stmt);
1310 remove_phi_node (&psi, false);
1312 /* Turn the phi node into GIMPLE_ASSIGN. */
1313 new_stmt = gimple_build_assign (val, new_tree);
1314 gsi_insert_before (&bsi, new_stmt, GSI_NEW_STMT);
1315 return;
1318 /* Since the reference is of gimple_reg type, it should only
1319 appear as lhs or rhs of modify statement. */
1320 gcc_assert (is_gimple_assign (stmt));
1322 bsi = gsi_for_stmt (stmt);
1324 /* If we do not need to initialize NEW_TREE, just replace the use of OLD. */
1325 if (!set)
1327 gcc_assert (!in_lhs);
1328 gimple_assign_set_rhs_from_tree (&bsi, new_tree);
1329 stmt = gsi_stmt (bsi);
1330 update_stmt (stmt);
1331 return;
1334 if (in_lhs)
1336 /* We have statement
1338 OLD = VAL
1340 If OLD is a memory reference, then VAL is gimple_val, and we transform
1341 this to
1343 OLD = VAL
1344 NEW = VAL
1346 Otherwise, we are replacing a combination chain,
1347 VAL is the expression that performs the combination, and OLD is an
1348 SSA name. In this case, we transform the assignment to
1350 OLD = VAL
1351 NEW = OLD
1355 val = gimple_assign_lhs (stmt);
1356 if (TREE_CODE (val) != SSA_NAME)
1358 val = gimple_assign_rhs1 (stmt);
1359 gcc_assert (gimple_assign_single_p (stmt));
1360 if (TREE_CLOBBER_P (val))
1361 val = get_or_create_ssa_default_def (cfun, SSA_NAME_VAR (new_tree));
1362 else
1363 gcc_assert (gimple_assign_copy_p (stmt));
1366 else
1368 /* VAL = OLD
1370 is transformed to
1372 VAL = OLD
1373 NEW = VAL */
1375 val = gimple_assign_lhs (stmt);
1378 new_stmt = gimple_build_assign (new_tree, unshare_expr (val));
1379 gsi_insert_after (&bsi, new_stmt, GSI_NEW_STMT);
1382 /* Returns a memory reference to DR in the ITER-th iteration of
1383 the loop it was analyzed in. Append init stmts to STMTS. */
1385 static tree
1386 ref_at_iteration (data_reference_p dr, int iter, gimple_seq *stmts)
1388 tree off = DR_OFFSET (dr);
1389 tree coff = DR_INIT (dr);
1390 if (iter == 0)
1392 else if (TREE_CODE (DR_STEP (dr)) == INTEGER_CST)
1393 coff = size_binop (PLUS_EXPR, coff,
1394 size_binop (MULT_EXPR, DR_STEP (dr), ssize_int (iter)));
1395 else
1396 off = size_binop (PLUS_EXPR, off,
1397 size_binop (MULT_EXPR, DR_STEP (dr), ssize_int (iter)));
1398 tree addr = fold_build_pointer_plus (DR_BASE_ADDRESS (dr), off);
1399 addr = force_gimple_operand_1 (unshare_expr (addr), stmts,
1400 is_gimple_mem_ref_addr, NULL_TREE);
1401 tree alias_ptr = fold_convert (reference_alias_ptr_type (DR_REF (dr)), coff);
1402 /* While data-ref analysis punts on bit offsets it still handles
1403 bitfield accesses at byte boundaries. Cope with that. Note that
1404 we cannot simply re-apply the outer COMPONENT_REF because the
1405 byte-granular portion of it is already applied via DR_INIT and
1406 DR_OFFSET, so simply build a BIT_FIELD_REF knowing that the bits
1407 start at offset zero. */
1408 if (TREE_CODE (DR_REF (dr)) == COMPONENT_REF
1409 && DECL_BIT_FIELD (TREE_OPERAND (DR_REF (dr), 1)))
1411 tree field = TREE_OPERAND (DR_REF (dr), 1);
1412 return build3 (BIT_FIELD_REF, TREE_TYPE (DR_REF (dr)),
1413 build2 (MEM_REF, DECL_BIT_FIELD_TYPE (field),
1414 addr, alias_ptr),
1415 DECL_SIZE (field), bitsize_zero_node);
1417 else
1418 return fold_build2 (MEM_REF, TREE_TYPE (DR_REF (dr)), addr, alias_ptr);
1421 /* Get the initialization expression for the INDEX-th temporary variable
1422 of CHAIN. */
1424 static tree
1425 get_init_expr (chain_p chain, unsigned index)
1427 if (chain->type == CT_COMBINATION)
1429 tree e1 = get_init_expr (chain->ch1, index);
1430 tree e2 = get_init_expr (chain->ch2, index);
1432 return fold_build2 (chain->op, chain->rslt_type, e1, e2);
1434 else
1435 return chain->inits[index];
1438 /* Returns a new temporary variable used for the I-th variable carrying
1439 value of REF. The variable's uid is marked in TMP_VARS. */
1441 static tree
1442 predcom_tmp_var (tree ref, unsigned i, bitmap tmp_vars)
1444 tree type = TREE_TYPE (ref);
1445 /* We never access the components of the temporary variable in predictive
1446 commoning. */
1447 tree var = create_tmp_reg (type, get_lsm_tmp_name (ref, i));
1448 bitmap_set_bit (tmp_vars, DECL_UID (var));
1449 return var;
1452 /* Creates the variables for CHAIN, as well as phi nodes for them and
1453 initialization on entry to LOOP. Uids of the newly created
1454 temporary variables are marked in TMP_VARS. */
1456 static void
1457 initialize_root_vars (struct loop *loop, chain_p chain, bitmap tmp_vars)
1459 unsigned i;
1460 unsigned n = chain->length;
1461 dref root = get_chain_root (chain);
1462 bool reuse_first = !chain->has_max_use_after;
1463 tree ref, init, var, next;
1464 gphi *phi;
1465 gimple_seq stmts;
1466 edge entry = loop_preheader_edge (loop), latch = loop_latch_edge (loop);
1468 /* If N == 0, then all the references are within the single iteration. And
1469 since this is an nonempty chain, reuse_first cannot be true. */
1470 gcc_assert (n > 0 || !reuse_first);
1472 chain->vars.create (n + 1);
1474 if (chain->type == CT_COMBINATION)
1475 ref = gimple_assign_lhs (root->stmt);
1476 else
1477 ref = DR_REF (root->ref);
1479 for (i = 0; i < n + (reuse_first ? 0 : 1); i++)
1481 var = predcom_tmp_var (ref, i, tmp_vars);
1482 chain->vars.quick_push (var);
1484 if (reuse_first)
1485 chain->vars.quick_push (chain->vars[0]);
1487 FOR_EACH_VEC_ELT (chain->vars, i, var)
1488 chain->vars[i] = make_ssa_name (var);
1490 for (i = 0; i < n; i++)
1492 var = chain->vars[i];
1493 next = chain->vars[i + 1];
1494 init = get_init_expr (chain, i);
1496 init = force_gimple_operand (init, &stmts, true, NULL_TREE);
1497 if (stmts)
1498 gsi_insert_seq_on_edge_immediate (entry, stmts);
1500 phi = create_phi_node (var, loop->header);
1501 add_phi_arg (phi, init, entry, UNKNOWN_LOCATION);
1502 add_phi_arg (phi, next, latch, UNKNOWN_LOCATION);
1506 /* Create the variables and initialization statement for root of chain
1507 CHAIN. Uids of the newly created temporary variables are marked
1508 in TMP_VARS. */
1510 static void
1511 initialize_root (struct loop *loop, chain_p chain, bitmap tmp_vars)
1513 dref root = get_chain_root (chain);
1514 bool in_lhs = (chain->type == CT_STORE_LOAD
1515 || chain->type == CT_COMBINATION);
1517 initialize_root_vars (loop, chain, tmp_vars);
1518 replace_ref_with (root->stmt,
1519 chain->vars[chain->length],
1520 true, in_lhs);
1523 /* Initializes a variable for load motion for ROOT and prepares phi nodes and
1524 initialization on entry to LOOP if necessary. The ssa name for the variable
1525 is stored in VARS. If WRITTEN is true, also a phi node to copy its value
1526 around the loop is created. Uid of the newly created temporary variable
1527 is marked in TMP_VARS. INITS is the list containing the (single)
1528 initializer. */
1530 static void
1531 initialize_root_vars_lm (struct loop *loop, dref root, bool written,
1532 vec<tree> *vars, vec<tree> inits,
1533 bitmap tmp_vars)
1535 unsigned i;
1536 tree ref = DR_REF (root->ref), init, var, next;
1537 gimple_seq stmts;
1538 gphi *phi;
1539 edge entry = loop_preheader_edge (loop), latch = loop_latch_edge (loop);
1541 /* Find the initializer for the variable, and check that it cannot
1542 trap. */
1543 init = inits[0];
1545 vars->create (written ? 2 : 1);
1546 var = predcom_tmp_var (ref, 0, tmp_vars);
1547 vars->quick_push (var);
1548 if (written)
1549 vars->quick_push ((*vars)[0]);
1551 FOR_EACH_VEC_ELT (*vars, i, var)
1552 (*vars)[i] = make_ssa_name (var);
1554 var = (*vars)[0];
1556 init = force_gimple_operand (init, &stmts, written, NULL_TREE);
1557 if (stmts)
1558 gsi_insert_seq_on_edge_immediate (entry, stmts);
1560 if (written)
1562 next = (*vars)[1];
1563 phi = create_phi_node (var, loop->header);
1564 add_phi_arg (phi, init, entry, UNKNOWN_LOCATION);
1565 add_phi_arg (phi, next, latch, UNKNOWN_LOCATION);
1567 else
1569 gassign *init_stmt = gimple_build_assign (var, init);
1570 gsi_insert_on_edge_immediate (entry, init_stmt);
1575 /* Execute load motion for references in chain CHAIN. Uids of the newly
1576 created temporary variables are marked in TMP_VARS. */
1578 static void
1579 execute_load_motion (struct loop *loop, chain_p chain, bitmap tmp_vars)
1581 auto_vec<tree> vars;
1582 dref a;
1583 unsigned n_writes = 0, ridx, i;
1584 tree var;
1586 gcc_assert (chain->type == CT_INVARIANT);
1587 gcc_assert (!chain->combined);
1588 FOR_EACH_VEC_ELT (chain->refs, i, a)
1589 if (DR_IS_WRITE (a->ref))
1590 n_writes++;
1592 /* If there are no reads in the loop, there is nothing to do. */
1593 if (n_writes == chain->refs.length ())
1594 return;
1596 initialize_root_vars_lm (loop, get_chain_root (chain), n_writes > 0,
1597 &vars, chain->inits, tmp_vars);
1599 ridx = 0;
1600 FOR_EACH_VEC_ELT (chain->refs, i, a)
1602 bool is_read = DR_IS_READ (a->ref);
1604 if (DR_IS_WRITE (a->ref))
1606 n_writes--;
1607 if (n_writes)
1609 var = vars[0];
1610 var = make_ssa_name (SSA_NAME_VAR (var));
1611 vars[0] = var;
1613 else
1614 ridx = 1;
1617 replace_ref_with (a->stmt, vars[ridx],
1618 !is_read, !is_read);
1622 /* Returns the single statement in that NAME is used, excepting
1623 the looparound phi nodes contained in one of the chains. If there is no
1624 such statement, or more statements, NULL is returned. */
1626 static gimple
1627 single_nonlooparound_use (tree name)
1629 use_operand_p use;
1630 imm_use_iterator it;
1631 gimple stmt, ret = NULL;
1633 FOR_EACH_IMM_USE_FAST (use, it, name)
1635 stmt = USE_STMT (use);
1637 if (gimple_code (stmt) == GIMPLE_PHI)
1639 /* Ignore uses in looparound phi nodes. Uses in other phi nodes
1640 could not be processed anyway, so just fail for them. */
1641 if (bitmap_bit_p (looparound_phis,
1642 SSA_NAME_VERSION (PHI_RESULT (stmt))))
1643 continue;
1645 return NULL;
1647 else if (is_gimple_debug (stmt))
1648 continue;
1649 else if (ret != NULL)
1650 return NULL;
1651 else
1652 ret = stmt;
1655 return ret;
1658 /* Remove statement STMT, as well as the chain of assignments in that it is
1659 used. */
1661 static void
1662 remove_stmt (gimple stmt)
1664 tree name;
1665 gimple next;
1666 gimple_stmt_iterator psi;
1668 if (gimple_code (stmt) == GIMPLE_PHI)
1670 name = PHI_RESULT (stmt);
1671 next = single_nonlooparound_use (name);
1672 reset_debug_uses (stmt);
1673 psi = gsi_for_stmt (stmt);
1674 remove_phi_node (&psi, true);
1676 if (!next
1677 || !gimple_assign_ssa_name_copy_p (next)
1678 || gimple_assign_rhs1 (next) != name)
1679 return;
1681 stmt = next;
1684 while (1)
1686 gimple_stmt_iterator bsi;
1688 bsi = gsi_for_stmt (stmt);
1690 name = gimple_assign_lhs (stmt);
1691 gcc_assert (TREE_CODE (name) == SSA_NAME);
1693 next = single_nonlooparound_use (name);
1694 reset_debug_uses (stmt);
1696 unlink_stmt_vdef (stmt);
1697 gsi_remove (&bsi, true);
1698 release_defs (stmt);
1700 if (!next
1701 || !gimple_assign_ssa_name_copy_p (next)
1702 || gimple_assign_rhs1 (next) != name)
1703 return;
1705 stmt = next;
1709 /* Perform the predictive commoning optimization for a chain CHAIN.
1710 Uids of the newly created temporary variables are marked in TMP_VARS.*/
1712 static void
1713 execute_pred_commoning_chain (struct loop *loop, chain_p chain,
1714 bitmap tmp_vars)
1716 unsigned i;
1717 dref a;
1718 tree var;
1720 if (chain->combined)
1722 /* For combined chains, just remove the statements that are used to
1723 compute the values of the expression (except for the root one).
1724 We delay this until after all chains are processed. */
1726 else
1728 /* For non-combined chains, set up the variables that hold its value,
1729 and replace the uses of the original references by these
1730 variables. */
1731 initialize_root (loop, chain, tmp_vars);
1732 for (i = 1; chain->refs.iterate (i, &a); i++)
1734 var = chain->vars[chain->length - a->distance];
1735 replace_ref_with (a->stmt, var, false, false);
1740 /* Determines the unroll factor necessary to remove as many temporary variable
1741 copies as possible. CHAINS is the list of chains that will be
1742 optimized. */
1744 static unsigned
1745 determine_unroll_factor (vec<chain_p> chains)
1747 chain_p chain;
1748 unsigned factor = 1, af, nfactor, i;
1749 unsigned max = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
1751 FOR_EACH_VEC_ELT (chains, i, chain)
1753 if (chain->type == CT_INVARIANT)
1754 continue;
1756 if (chain->combined)
1758 /* For combined chains, we can't handle unrolling if we replace
1759 looparound PHIs. */
1760 dref a;
1761 unsigned j;
1762 for (j = 1; chain->refs.iterate (j, &a); j++)
1763 if (gimple_code (a->stmt) == GIMPLE_PHI)
1764 return 1;
1765 continue;
1768 /* The best unroll factor for this chain is equal to the number of
1769 temporary variables that we create for it. */
1770 af = chain->length;
1771 if (chain->has_max_use_after)
1772 af++;
1774 nfactor = factor * af / gcd (factor, af);
1775 if (nfactor <= max)
1776 factor = nfactor;
1779 return factor;
1782 /* Perform the predictive commoning optimization for CHAINS.
1783 Uids of the newly created temporary variables are marked in TMP_VARS. */
1785 static void
1786 execute_pred_commoning (struct loop *loop, vec<chain_p> chains,
1787 bitmap tmp_vars)
1789 chain_p chain;
1790 unsigned i;
1792 FOR_EACH_VEC_ELT (chains, i, chain)
1794 if (chain->type == CT_INVARIANT)
1795 execute_load_motion (loop, chain, tmp_vars);
1796 else
1797 execute_pred_commoning_chain (loop, chain, tmp_vars);
1800 FOR_EACH_VEC_ELT (chains, i, chain)
1802 if (chain->type == CT_INVARIANT)
1804 else if (chain->combined)
1806 /* For combined chains, just remove the statements that are used to
1807 compute the values of the expression (except for the root one). */
1808 dref a;
1809 unsigned j;
1810 for (j = 1; chain->refs.iterate (j, &a); j++)
1811 remove_stmt (a->stmt);
1815 update_ssa (TODO_update_ssa_only_virtuals);
1818 /* For each reference in CHAINS, if its defining statement is
1819 phi node, record the ssa name that is defined by it. */
1821 static void
1822 replace_phis_by_defined_names (vec<chain_p> chains)
1824 chain_p chain;
1825 dref a;
1826 unsigned i, j;
1828 FOR_EACH_VEC_ELT (chains, i, chain)
1829 FOR_EACH_VEC_ELT (chain->refs, j, a)
1831 if (gimple_code (a->stmt) == GIMPLE_PHI)
1833 a->name_defined_by_phi = PHI_RESULT (a->stmt);
1834 a->stmt = NULL;
1839 /* For each reference in CHAINS, if name_defined_by_phi is not
1840 NULL, use it to set the stmt field. */
1842 static void
1843 replace_names_by_phis (vec<chain_p> chains)
1845 chain_p chain;
1846 dref a;
1847 unsigned i, j;
1849 FOR_EACH_VEC_ELT (chains, i, chain)
1850 FOR_EACH_VEC_ELT (chain->refs, j, a)
1851 if (a->stmt == NULL)
1853 a->stmt = SSA_NAME_DEF_STMT (a->name_defined_by_phi);
1854 gcc_assert (gimple_code (a->stmt) == GIMPLE_PHI);
1855 a->name_defined_by_phi = NULL_TREE;
1859 /* Wrapper over execute_pred_commoning, to pass it as a callback
1860 to tree_transform_and_unroll_loop. */
1862 struct epcc_data
1864 vec<chain_p> chains;
1865 bitmap tmp_vars;
1868 static void
1869 execute_pred_commoning_cbck (struct loop *loop, void *data)
1871 struct epcc_data *const dta = (struct epcc_data *) data;
1873 /* Restore phi nodes that were replaced by ssa names before
1874 tree_transform_and_unroll_loop (see detailed description in
1875 tree_predictive_commoning_loop). */
1876 replace_names_by_phis (dta->chains);
1877 execute_pred_commoning (loop, dta->chains, dta->tmp_vars);
1880 /* Base NAME and all the names in the chain of phi nodes that use it
1881 on variable VAR. The phi nodes are recognized by being in the copies of
1882 the header of the LOOP. */
1884 static void
1885 base_names_in_chain_on (struct loop *loop, tree name, tree var)
1887 gimple stmt, phi;
1888 imm_use_iterator iter;
1890 replace_ssa_name_symbol (name, var);
1892 while (1)
1894 phi = NULL;
1895 FOR_EACH_IMM_USE_STMT (stmt, iter, name)
1897 if (gimple_code (stmt) == GIMPLE_PHI
1898 && flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
1900 phi = stmt;
1901 BREAK_FROM_IMM_USE_STMT (iter);
1904 if (!phi)
1905 return;
1907 name = PHI_RESULT (phi);
1908 replace_ssa_name_symbol (name, var);
1912 /* Given an unrolled LOOP after predictive commoning, remove the
1913 register copies arising from phi nodes by changing the base
1914 variables of SSA names. TMP_VARS is the set of the temporary variables
1915 for those we want to perform this. */
1917 static void
1918 eliminate_temp_copies (struct loop *loop, bitmap tmp_vars)
1920 edge e;
1921 gphi *phi;
1922 gimple stmt;
1923 tree name, use, var;
1924 gphi_iterator psi;
1926 e = loop_latch_edge (loop);
1927 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1929 phi = psi.phi ();
1930 name = PHI_RESULT (phi);
1931 var = SSA_NAME_VAR (name);
1932 if (!var || !bitmap_bit_p (tmp_vars, DECL_UID (var)))
1933 continue;
1934 use = PHI_ARG_DEF_FROM_EDGE (phi, e);
1935 gcc_assert (TREE_CODE (use) == SSA_NAME);
1937 /* Base all the ssa names in the ud and du chain of NAME on VAR. */
1938 stmt = SSA_NAME_DEF_STMT (use);
1939 while (gimple_code (stmt) == GIMPLE_PHI
1940 /* In case we could not unroll the loop enough to eliminate
1941 all copies, we may reach the loop header before the defining
1942 statement (in that case, some register copies will be present
1943 in loop latch in the final code, corresponding to the newly
1944 created looparound phi nodes). */
1945 && gimple_bb (stmt) != loop->header)
1947 gcc_assert (single_pred_p (gimple_bb (stmt)));
1948 use = PHI_ARG_DEF (stmt, 0);
1949 stmt = SSA_NAME_DEF_STMT (use);
1952 base_names_in_chain_on (loop, use, var);
1956 /* Returns true if CHAIN is suitable to be combined. */
1958 static bool
1959 chain_can_be_combined_p (chain_p chain)
1961 return (!chain->combined
1962 && (chain->type == CT_LOAD || chain->type == CT_COMBINATION));
1965 /* Returns the modify statement that uses NAME. Skips over assignment
1966 statements, NAME is replaced with the actual name used in the returned
1967 statement. */
1969 static gimple
1970 find_use_stmt (tree *name)
1972 gimple stmt;
1973 tree rhs, lhs;
1975 /* Skip over assignments. */
1976 while (1)
1978 stmt = single_nonlooparound_use (*name);
1979 if (!stmt)
1980 return NULL;
1982 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1983 return NULL;
1985 lhs = gimple_assign_lhs (stmt);
1986 if (TREE_CODE (lhs) != SSA_NAME)
1987 return NULL;
1989 if (gimple_assign_copy_p (stmt))
1991 rhs = gimple_assign_rhs1 (stmt);
1992 if (rhs != *name)
1993 return NULL;
1995 *name = lhs;
1997 else if (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))
1998 == GIMPLE_BINARY_RHS)
1999 return stmt;
2000 else
2001 return NULL;
2005 /* Returns true if we may perform reassociation for operation CODE in TYPE. */
2007 static bool
2008 may_reassociate_p (tree type, enum tree_code code)
2010 if (FLOAT_TYPE_P (type)
2011 && !flag_unsafe_math_optimizations)
2012 return false;
2014 return (commutative_tree_code (code)
2015 && associative_tree_code (code));
2018 /* If the operation used in STMT is associative and commutative, go through the
2019 tree of the same operations and returns its root. Distance to the root
2020 is stored in DISTANCE. */
2022 static gimple
2023 find_associative_operation_root (gimple stmt, unsigned *distance)
2025 tree lhs;
2026 gimple next;
2027 enum tree_code code = gimple_assign_rhs_code (stmt);
2028 tree type = TREE_TYPE (gimple_assign_lhs (stmt));
2029 unsigned dist = 0;
2031 if (!may_reassociate_p (type, code))
2032 return NULL;
2034 while (1)
2036 lhs = gimple_assign_lhs (stmt);
2037 gcc_assert (TREE_CODE (lhs) == SSA_NAME);
2039 next = find_use_stmt (&lhs);
2040 if (!next
2041 || gimple_assign_rhs_code (next) != code)
2042 break;
2044 stmt = next;
2045 dist++;
2048 if (distance)
2049 *distance = dist;
2050 return stmt;
2053 /* Returns the common statement in that NAME1 and NAME2 have a use. If there
2054 is no such statement, returns NULL_TREE. In case the operation used on
2055 NAME1 and NAME2 is associative and commutative, returns the root of the
2056 tree formed by this operation instead of the statement that uses NAME1 or
2057 NAME2. */
2059 static gimple
2060 find_common_use_stmt (tree *name1, tree *name2)
2062 gimple stmt1, stmt2;
2064 stmt1 = find_use_stmt (name1);
2065 if (!stmt1)
2066 return NULL;
2068 stmt2 = find_use_stmt (name2);
2069 if (!stmt2)
2070 return NULL;
2072 if (stmt1 == stmt2)
2073 return stmt1;
2075 stmt1 = find_associative_operation_root (stmt1, NULL);
2076 if (!stmt1)
2077 return NULL;
2078 stmt2 = find_associative_operation_root (stmt2, NULL);
2079 if (!stmt2)
2080 return NULL;
2082 return (stmt1 == stmt2 ? stmt1 : NULL);
2085 /* Checks whether R1 and R2 are combined together using CODE, with the result
2086 in RSLT_TYPE, in order R1 CODE R2 if SWAP is false and in order R2 CODE R1
2087 if it is true. If CODE is ERROR_MARK, set these values instead. */
2089 static bool
2090 combinable_refs_p (dref r1, dref r2,
2091 enum tree_code *code, bool *swap, tree *rslt_type)
2093 enum tree_code acode;
2094 bool aswap;
2095 tree atype;
2096 tree name1, name2;
2097 gimple stmt;
2099 name1 = name_for_ref (r1);
2100 name2 = name_for_ref (r2);
2101 gcc_assert (name1 != NULL_TREE && name2 != NULL_TREE);
2103 stmt = find_common_use_stmt (&name1, &name2);
2105 if (!stmt
2106 /* A simple post-dominance check - make sure the combination
2107 is executed under the same condition as the references. */
2108 || (gimple_bb (stmt) != gimple_bb (r1->stmt)
2109 && gimple_bb (stmt) != gimple_bb (r2->stmt)))
2110 return false;
2112 acode = gimple_assign_rhs_code (stmt);
2113 aswap = (!commutative_tree_code (acode)
2114 && gimple_assign_rhs1 (stmt) != name1);
2115 atype = TREE_TYPE (gimple_assign_lhs (stmt));
2117 if (*code == ERROR_MARK)
2119 *code = acode;
2120 *swap = aswap;
2121 *rslt_type = atype;
2122 return true;
2125 return (*code == acode
2126 && *swap == aswap
2127 && *rslt_type == atype);
2130 /* Remove OP from the operation on rhs of STMT, and replace STMT with
2131 an assignment of the remaining operand. */
2133 static void
2134 remove_name_from_operation (gimple stmt, tree op)
2136 tree other_op;
2137 gimple_stmt_iterator si;
2139 gcc_assert (is_gimple_assign (stmt));
2141 if (gimple_assign_rhs1 (stmt) == op)
2142 other_op = gimple_assign_rhs2 (stmt);
2143 else
2144 other_op = gimple_assign_rhs1 (stmt);
2146 si = gsi_for_stmt (stmt);
2147 gimple_assign_set_rhs_from_tree (&si, other_op);
2149 /* We should not have reallocated STMT. */
2150 gcc_assert (gsi_stmt (si) == stmt);
2152 update_stmt (stmt);
2155 /* Reassociates the expression in that NAME1 and NAME2 are used so that they
2156 are combined in a single statement, and returns this statement. */
2158 static gimple
2159 reassociate_to_the_same_stmt (tree name1, tree name2)
2161 gimple stmt1, stmt2, root1, root2, s1, s2;
2162 gassign *new_stmt, *tmp_stmt;
2163 tree new_name, tmp_name, var, r1, r2;
2164 unsigned dist1, dist2;
2165 enum tree_code code;
2166 tree type = TREE_TYPE (name1);
2167 gimple_stmt_iterator bsi;
2169 stmt1 = find_use_stmt (&name1);
2170 stmt2 = find_use_stmt (&name2);
2171 root1 = find_associative_operation_root (stmt1, &dist1);
2172 root2 = find_associative_operation_root (stmt2, &dist2);
2173 code = gimple_assign_rhs_code (stmt1);
2175 gcc_assert (root1 && root2 && root1 == root2
2176 && code == gimple_assign_rhs_code (stmt2));
2178 /* Find the root of the nearest expression in that both NAME1 and NAME2
2179 are used. */
2180 r1 = name1;
2181 s1 = stmt1;
2182 r2 = name2;
2183 s2 = stmt2;
2185 while (dist1 > dist2)
2187 s1 = find_use_stmt (&r1);
2188 r1 = gimple_assign_lhs (s1);
2189 dist1--;
2191 while (dist2 > dist1)
2193 s2 = find_use_stmt (&r2);
2194 r2 = gimple_assign_lhs (s2);
2195 dist2--;
2198 while (s1 != s2)
2200 s1 = find_use_stmt (&r1);
2201 r1 = gimple_assign_lhs (s1);
2202 s2 = find_use_stmt (&r2);
2203 r2 = gimple_assign_lhs (s2);
2206 /* Remove NAME1 and NAME2 from the statements in that they are used
2207 currently. */
2208 remove_name_from_operation (stmt1, name1);
2209 remove_name_from_operation (stmt2, name2);
2211 /* Insert the new statement combining NAME1 and NAME2 before S1, and
2212 combine it with the rhs of S1. */
2213 var = create_tmp_reg (type, "predreastmp");
2214 new_name = make_ssa_name (var);
2215 new_stmt = gimple_build_assign (new_name, code, name1, name2);
2217 var = create_tmp_reg (type, "predreastmp");
2218 tmp_name = make_ssa_name (var);
2220 /* Rhs of S1 may now be either a binary expression with operation
2221 CODE, or gimple_val (in case that stmt1 == s1 or stmt2 == s1,
2222 so that name1 or name2 was removed from it). */
2223 tmp_stmt = gimple_build_assign (tmp_name, gimple_assign_rhs_code (s1),
2224 gimple_assign_rhs1 (s1),
2225 gimple_assign_rhs2 (s1));
2227 bsi = gsi_for_stmt (s1);
2228 gimple_assign_set_rhs_with_ops (&bsi, code, new_name, tmp_name);
2229 s1 = gsi_stmt (bsi);
2230 update_stmt (s1);
2232 gsi_insert_before (&bsi, new_stmt, GSI_SAME_STMT);
2233 gsi_insert_before (&bsi, tmp_stmt, GSI_SAME_STMT);
2235 return new_stmt;
2238 /* Returns the statement that combines references R1 and R2. In case R1
2239 and R2 are not used in the same statement, but they are used with an
2240 associative and commutative operation in the same expression, reassociate
2241 the expression so that they are used in the same statement. */
2243 static gimple
2244 stmt_combining_refs (dref r1, dref r2)
2246 gimple stmt1, stmt2;
2247 tree name1 = name_for_ref (r1);
2248 tree name2 = name_for_ref (r2);
2250 stmt1 = find_use_stmt (&name1);
2251 stmt2 = find_use_stmt (&name2);
2252 if (stmt1 == stmt2)
2253 return stmt1;
2255 return reassociate_to_the_same_stmt (name1, name2);
2258 /* Tries to combine chains CH1 and CH2 together. If this succeeds, the
2259 description of the new chain is returned, otherwise we return NULL. */
2261 static chain_p
2262 combine_chains (chain_p ch1, chain_p ch2)
2264 dref r1, r2, nw;
2265 enum tree_code op = ERROR_MARK;
2266 bool swap = false;
2267 chain_p new_chain;
2268 unsigned i;
2269 gimple root_stmt;
2270 tree rslt_type = NULL_TREE;
2272 if (ch1 == ch2)
2273 return NULL;
2274 if (ch1->length != ch2->length)
2275 return NULL;
2277 if (ch1->refs.length () != ch2->refs.length ())
2278 return NULL;
2280 for (i = 0; (ch1->refs.iterate (i, &r1)
2281 && ch2->refs.iterate (i, &r2)); i++)
2283 if (r1->distance != r2->distance)
2284 return NULL;
2286 if (!combinable_refs_p (r1, r2, &op, &swap, &rslt_type))
2287 return NULL;
2290 if (swap)
2291 std::swap (ch1, ch2);
2293 new_chain = XCNEW (struct chain);
2294 new_chain->type = CT_COMBINATION;
2295 new_chain->op = op;
2296 new_chain->ch1 = ch1;
2297 new_chain->ch2 = ch2;
2298 new_chain->rslt_type = rslt_type;
2299 new_chain->length = ch1->length;
2301 for (i = 0; (ch1->refs.iterate (i, &r1)
2302 && ch2->refs.iterate (i, &r2)); i++)
2304 nw = XCNEW (struct dref_d);
2305 nw->stmt = stmt_combining_refs (r1, r2);
2306 nw->distance = r1->distance;
2308 new_chain->refs.safe_push (nw);
2311 new_chain->has_max_use_after = false;
2312 root_stmt = get_chain_root (new_chain)->stmt;
2313 for (i = 1; new_chain->refs.iterate (i, &nw); i++)
2315 if (nw->distance == new_chain->length
2316 && !stmt_dominates_stmt_p (nw->stmt, root_stmt))
2318 new_chain->has_max_use_after = true;
2319 break;
2323 ch1->combined = true;
2324 ch2->combined = true;
2325 return new_chain;
2328 /* Try to combine the CHAINS. */
2330 static void
2331 try_combine_chains (vec<chain_p> *chains)
2333 unsigned i, j;
2334 chain_p ch1, ch2, cch;
2335 auto_vec<chain_p> worklist;
2337 FOR_EACH_VEC_ELT (*chains, i, ch1)
2338 if (chain_can_be_combined_p (ch1))
2339 worklist.safe_push (ch1);
2341 while (!worklist.is_empty ())
2343 ch1 = worklist.pop ();
2344 if (!chain_can_be_combined_p (ch1))
2345 continue;
2347 FOR_EACH_VEC_ELT (*chains, j, ch2)
2349 if (!chain_can_be_combined_p (ch2))
2350 continue;
2352 cch = combine_chains (ch1, ch2);
2353 if (cch)
2355 worklist.safe_push (cch);
2356 chains->safe_push (cch);
2357 break;
2363 /* Prepare initializers for CHAIN in LOOP. Returns false if this is
2364 impossible because one of these initializers may trap, true otherwise. */
2366 static bool
2367 prepare_initializers_chain (struct loop *loop, chain_p chain)
2369 unsigned i, n = (chain->type == CT_INVARIANT) ? 1 : chain->length;
2370 struct data_reference *dr = get_chain_root (chain)->ref;
2371 tree init;
2372 dref laref;
2373 edge entry = loop_preheader_edge (loop);
2375 /* Find the initializers for the variables, and check that they cannot
2376 trap. */
2377 chain->inits.create (n);
2378 for (i = 0; i < n; i++)
2379 chain->inits.quick_push (NULL_TREE);
2381 /* If we have replaced some looparound phi nodes, use their initializers
2382 instead of creating our own. */
2383 FOR_EACH_VEC_ELT (chain->refs, i, laref)
2385 if (gimple_code (laref->stmt) != GIMPLE_PHI)
2386 continue;
2388 gcc_assert (laref->distance > 0);
2389 chain->inits[n - laref->distance]
2390 = PHI_ARG_DEF_FROM_EDGE (laref->stmt, entry);
2393 for (i = 0; i < n; i++)
2395 gimple_seq stmts = NULL;
2397 if (chain->inits[i] != NULL_TREE)
2398 continue;
2400 init = ref_at_iteration (dr, (int) i - n, &stmts);
2401 if (!chain->all_always_accessed && tree_could_trap_p (init))
2403 gimple_seq_discard (stmts);
2404 return false;
2407 if (stmts)
2408 gsi_insert_seq_on_edge_immediate (entry, stmts);
2410 chain->inits[i] = init;
2413 return true;
2416 /* Prepare initializers for CHAINS in LOOP, and free chains that cannot
2417 be used because the initializers might trap. */
2419 static void
2420 prepare_initializers (struct loop *loop, vec<chain_p> chains)
2422 chain_p chain;
2423 unsigned i;
2425 for (i = 0; i < chains.length (); )
2427 chain = chains[i];
2428 if (prepare_initializers_chain (loop, chain))
2429 i++;
2430 else
2432 release_chain (chain);
2433 chains.unordered_remove (i);
2438 /* Performs predictive commoning for LOOP. Returns true if LOOP was
2439 unrolled. */
2441 static bool
2442 tree_predictive_commoning_loop (struct loop *loop)
2444 vec<data_reference_p> datarefs;
2445 vec<ddr_p> dependences;
2446 struct component *components;
2447 vec<chain_p> chains = vNULL;
2448 unsigned unroll_factor;
2449 struct tree_niter_desc desc;
2450 bool unroll = false;
2451 edge exit;
2452 bitmap tmp_vars;
2454 if (dump_file && (dump_flags & TDF_DETAILS))
2455 fprintf (dump_file, "Processing loop %d\n", loop->num);
2457 /* Find the data references and split them into components according to their
2458 dependence relations. */
2459 auto_vec<loop_p, 3> loop_nest;
2460 dependences.create (10);
2461 datarefs.create (10);
2462 if (! compute_data_dependences_for_loop (loop, true, &loop_nest, &datarefs,
2463 &dependences))
2465 if (dump_file && (dump_flags & TDF_DETAILS))
2466 fprintf (dump_file, "Cannot analyze data dependencies\n");
2467 free_data_refs (datarefs);
2468 free_dependence_relations (dependences);
2469 return false;
2472 if (dump_file && (dump_flags & TDF_DETAILS))
2473 dump_data_dependence_relations (dump_file, dependences);
2475 components = split_data_refs_to_components (loop, datarefs, dependences);
2476 loop_nest.release ();
2477 free_dependence_relations (dependences);
2478 if (!components)
2480 free_data_refs (datarefs);
2481 free_affine_expand_cache (&name_expansions);
2482 return false;
2485 if (dump_file && (dump_flags & TDF_DETAILS))
2487 fprintf (dump_file, "Initial state:\n\n");
2488 dump_components (dump_file, components);
2491 /* Find the suitable components and split them into chains. */
2492 components = filter_suitable_components (loop, components);
2494 tmp_vars = BITMAP_ALLOC (NULL);
2495 looparound_phis = BITMAP_ALLOC (NULL);
2496 determine_roots (loop, components, &chains);
2497 release_components (components);
2499 if (!chains.exists ())
2501 if (dump_file && (dump_flags & TDF_DETAILS))
2502 fprintf (dump_file,
2503 "Predictive commoning failed: no suitable chains\n");
2504 goto end;
2506 prepare_initializers (loop, chains);
2508 /* Try to combine the chains that are always worked with together. */
2509 try_combine_chains (&chains);
2511 if (dump_file && (dump_flags & TDF_DETAILS))
2513 fprintf (dump_file, "Before commoning:\n\n");
2514 dump_chains (dump_file, chains);
2517 /* Determine the unroll factor, and if the loop should be unrolled, ensure
2518 that its number of iterations is divisible by the factor. */
2519 unroll_factor = determine_unroll_factor (chains);
2520 scev_reset ();
2521 unroll = (unroll_factor > 1
2522 && can_unroll_loop_p (loop, unroll_factor, &desc));
2523 exit = single_dom_exit (loop);
2525 /* Execute the predictive commoning transformations, and possibly unroll the
2526 loop. */
2527 if (unroll)
2529 struct epcc_data dta;
2531 if (dump_file && (dump_flags & TDF_DETAILS))
2532 fprintf (dump_file, "Unrolling %u times.\n", unroll_factor);
2534 dta.chains = chains;
2535 dta.tmp_vars = tmp_vars;
2537 update_ssa (TODO_update_ssa_only_virtuals);
2539 /* Cfg manipulations performed in tree_transform_and_unroll_loop before
2540 execute_pred_commoning_cbck is called may cause phi nodes to be
2541 reallocated, which is a problem since CHAINS may point to these
2542 statements. To fix this, we store the ssa names defined by the
2543 phi nodes here instead of the phi nodes themselves, and restore
2544 the phi nodes in execute_pred_commoning_cbck. A bit hacky. */
2545 replace_phis_by_defined_names (chains);
2547 tree_transform_and_unroll_loop (loop, unroll_factor, exit, &desc,
2548 execute_pred_commoning_cbck, &dta);
2549 eliminate_temp_copies (loop, tmp_vars);
2551 else
2553 if (dump_file && (dump_flags & TDF_DETAILS))
2554 fprintf (dump_file,
2555 "Executing predictive commoning without unrolling.\n");
2556 execute_pred_commoning (loop, chains, tmp_vars);
2559 end: ;
2560 release_chains (chains);
2561 free_data_refs (datarefs);
2562 BITMAP_FREE (tmp_vars);
2563 BITMAP_FREE (looparound_phis);
2565 free_affine_expand_cache (&name_expansions);
2567 return unroll;
2570 /* Runs predictive commoning. */
2572 unsigned
2573 tree_predictive_commoning (void)
2575 bool unrolled = false;
2576 struct loop *loop;
2577 unsigned ret = 0;
2579 initialize_original_copy_tables ();
2580 FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
2581 if (optimize_loop_for_speed_p (loop))
2583 unrolled |= tree_predictive_commoning_loop (loop);
2586 if (unrolled)
2588 scev_reset ();
2589 ret = TODO_cleanup_cfg;
2591 free_original_copy_tables ();
2593 return ret;
2596 /* Predictive commoning Pass. */
2598 static unsigned
2599 run_tree_predictive_commoning (struct function *fun)
2601 if (number_of_loops (fun) <= 1)
2602 return 0;
2604 return tree_predictive_commoning ();
2607 namespace {
2609 const pass_data pass_data_predcom =
2611 GIMPLE_PASS, /* type */
2612 "pcom", /* name */
2613 OPTGROUP_LOOP, /* optinfo_flags */
2614 TV_PREDCOM, /* tv_id */
2615 PROP_cfg, /* properties_required */
2616 0, /* properties_provided */
2617 0, /* properties_destroyed */
2618 0, /* todo_flags_start */
2619 TODO_update_ssa_only_virtuals, /* todo_flags_finish */
2622 class pass_predcom : public gimple_opt_pass
2624 public:
2625 pass_predcom (gcc::context *ctxt)
2626 : gimple_opt_pass (pass_data_predcom, ctxt)
2629 /* opt_pass methods: */
2630 virtual bool gate (function *) { return flag_predictive_commoning != 0; }
2631 virtual unsigned int execute (function *fun)
2633 return run_tree_predictive_commoning (fun);
2636 }; // class pass_predcom
2638 } // anon namespace
2640 gimple_opt_pass *
2641 make_pass_predcom (gcc::context *ctxt)
2643 return new pass_predcom (ctxt);