libcpp: move line typedef and column-numbering comment to top of file
[official-gcc.git] / gcc / tree-predcom.c
blobd078b96cdf088e19c8afff885fc2a84871ffb209
1 /* Predictive commoning.
2 Copyright (C) 2005-2017 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This file implements the predictive commoning optimization. Predictive
21 commoning can be viewed as CSE around a loop, and with some improvements,
22 as generalized strength reduction-- i.e., reusing values computed in
23 earlier iterations of a loop in the later ones. So far, the pass only
24 handles the most useful case, that is, reusing values of memory references.
25 If you think this is all just a special case of PRE, you are sort of right;
26 however, concentrating on loops is simpler, and makes it possible to
27 incorporate data dependence analysis to detect the opportunities, perform
28 loop unrolling to avoid copies together with renaming immediately,
29 and if needed, we could also take register pressure into account.
31 Let us demonstrate what is done on an example:
33 for (i = 0; i < 100; i++)
35 a[i+2] = a[i] + a[i+1];
36 b[10] = b[10] + i;
37 c[i] = c[99 - i];
38 d[i] = d[i + 1];
41 1) We find data references in the loop, and split them to mutually
42 independent groups (i.e., we find components of a data dependence
43 graph). We ignore read-read dependences whose distance is not constant.
44 (TODO -- we could also ignore antidependences). In this example, we
45 find the following groups:
47 a[i]{read}, a[i+1]{read}, a[i+2]{write}
48 b[10]{read}, b[10]{write}
49 c[99 - i]{read}, c[i]{write}
50 d[i + 1]{read}, d[i]{write}
52 2) Inside each of the group, we verify several conditions:
53 a) all the references must differ in indices only, and the indices
54 must all have the same step
55 b) the references must dominate loop latch (and thus, they must be
56 ordered by dominance relation).
57 c) the distance of the indices must be a small multiple of the step
58 We are then able to compute the difference of the references (# of
59 iterations before they point to the same place as the first of them).
60 Also, in case there are writes in the loop, we split the groups into
61 chains whose head is the write whose values are used by the reads in
62 the same chain. The chains are then processed independently,
63 making the further transformations simpler. Also, the shorter chains
64 need the same number of registers, but may require lower unrolling
65 factor in order to get rid of the copies on the loop latch.
67 In our example, we get the following chains (the chain for c is invalid).
69 a[i]{read,+0}, a[i+1]{read,-1}, a[i+2]{write,-2}
70 b[10]{read,+0}, b[10]{write,+0}
71 d[i + 1]{read,+0}, d[i]{write,+1}
73 3) For each read, we determine the read or write whose value it reuses,
74 together with the distance of this reuse. I.e. we take the last
75 reference before it with distance 0, or the last of the references
76 with the smallest positive distance to the read. Then, we remove
77 the references that are not used in any of these chains, discard the
78 empty groups, and propagate all the links so that they point to the
79 single root reference of the chain (adjusting their distance
80 appropriately). Some extra care needs to be taken for references with
81 step 0. In our example (the numbers indicate the distance of the
82 reuse),
84 a[i] --> (*) 2, a[i+1] --> (*) 1, a[i+2] (*)
85 b[10] --> (*) 1, b[10] (*)
87 4) The chains are combined together if possible. If the corresponding
88 elements of two chains are always combined together with the same
89 operator, we remember just the result of this combination, instead
90 of remembering the values separately. We may need to perform
91 reassociation to enable combining, for example
93 e[i] + f[i+1] + e[i+1] + f[i]
95 can be reassociated as
97 (e[i] + f[i]) + (e[i+1] + f[i+1])
99 and we can combine the chains for e and f into one chain.
101 5) For each root reference (end of the chain) R, let N be maximum distance
102 of a reference reusing its value. Variables R0 up to RN are created,
103 together with phi nodes that transfer values from R1 .. RN to
104 R0 .. R(N-1).
105 Initial values are loaded to R0..R(N-1) (in case not all references
106 must necessarily be accessed and they may trap, we may fail here;
107 TODO sometimes, the loads could be guarded by a check for the number
108 of iterations). Values loaded/stored in roots are also copied to
109 RN. Other reads are replaced with the appropriate variable Ri.
110 Everything is put to SSA form.
112 As a small improvement, if R0 is dead after the root (i.e., all uses of
113 the value with the maximum distance dominate the root), we can avoid
114 creating RN and use R0 instead of it.
116 In our example, we get (only the parts concerning a and b are shown):
117 for (i = 0; i < 100; i++)
119 f = phi (a[0], s);
120 s = phi (a[1], f);
121 x = phi (b[10], x);
123 f = f + s;
124 a[i+2] = f;
125 x = x + i;
126 b[10] = x;
129 6) Factor F for unrolling is determined as the smallest common multiple of
130 (N + 1) for each root reference (N for references for that we avoided
131 creating RN). If F and the loop is small enough, loop is unrolled F
132 times. The stores to RN (R0) in the copies of the loop body are
133 periodically replaced with R0, R1, ... (R1, R2, ...), so that they can
134 be coalesced and the copies can be eliminated.
136 TODO -- copy propagation and other optimizations may change the live
137 ranges of the temporary registers and prevent them from being coalesced;
138 this may increase the register pressure.
140 In our case, F = 2 and the (main loop of the) result is
142 for (i = 0; i < ...; i += 2)
144 f = phi (a[0], f);
145 s = phi (a[1], s);
146 x = phi (b[10], x);
148 f = f + s;
149 a[i+2] = f;
150 x = x + i;
151 b[10] = x;
153 s = s + f;
154 a[i+3] = s;
155 x = x + i;
156 b[10] = x;
159 Apart from predictive commoning on Load-Load and Store-Load chains, we
160 also support Store-Store chains -- stores killed by other store can be
161 eliminated. Given below example:
163 for (i = 0; i < n; i++)
165 a[i] = 1;
166 a[i+2] = 2;
169 It can be replaced with:
171 t0 = a[0];
172 t1 = a[1];
173 for (i = 0; i < n; i++)
175 a[i] = 1;
176 t2 = 2;
177 t0 = t1;
178 t1 = t2;
180 a[n] = t0;
181 a[n+1] = t1;
183 If the loop runs more than 1 iterations, it can be further simplified into:
185 for (i = 0; i < n; i++)
187 a[i] = 1;
189 a[n] = 2;
190 a[n+1] = 2;
192 The interesting part is this can be viewed either as general store motion
193 or general dead store elimination in either intra/inter-iterations way.
195 TODO: For now, we don't support store-store chains in multi-exit loops. We
196 force to not unroll in case of store-store chain even if other chains might
197 ask for unroll.
199 Predictive commoning can be generalized for arbitrary computations (not
200 just memory loads), and also nontrivial transfer functions (e.g., replacing
201 i * i with ii_last + 2 * i + 1), to generalize strength reduction. */
203 #include "config.h"
204 #include "system.h"
205 #include "coretypes.h"
206 #include "backend.h"
207 #include "rtl.h"
208 #include "tree.h"
209 #include "gimple.h"
210 #include "predict.h"
211 #include "tree-pass.h"
212 #include "ssa.h"
213 #include "gimple-pretty-print.h"
214 #include "alias.h"
215 #include "fold-const.h"
216 #include "cfgloop.h"
217 #include "tree-eh.h"
218 #include "gimplify.h"
219 #include "gimple-iterator.h"
220 #include "gimplify-me.h"
221 #include "tree-ssa-loop-ivopts.h"
222 #include "tree-ssa-loop-manip.h"
223 #include "tree-ssa-loop-niter.h"
224 #include "tree-ssa-loop.h"
225 #include "tree-into-ssa.h"
226 #include "tree-dfa.h"
227 #include "tree-ssa.h"
228 #include "tree-data-ref.h"
229 #include "tree-scalar-evolution.h"
230 #include "params.h"
231 #include "tree-affine.h"
232 #include "builtins.h"
234 /* The maximum number of iterations between the considered memory
235 references. */
237 #define MAX_DISTANCE (target_avail_regs < 16 ? 4 : 8)
239 /* Data references (or phi nodes that carry data reference values across
240 loop iterations). */
242 typedef struct dref_d
244 /* The reference itself. */
245 struct data_reference *ref;
247 /* The statement in that the reference appears. */
248 gimple *stmt;
250 /* In case that STMT is a phi node, this field is set to the SSA name
251 defined by it in replace_phis_by_defined_names (in order to avoid
252 pointing to phi node that got reallocated in the meantime). */
253 tree name_defined_by_phi;
255 /* Distance of the reference from the root of the chain (in number of
256 iterations of the loop). */
257 unsigned distance;
259 /* Number of iterations offset from the first reference in the component. */
260 widest_int offset;
262 /* Number of the reference in a component, in dominance ordering. */
263 unsigned pos;
265 /* True if the memory reference is always accessed when the loop is
266 entered. */
267 unsigned always_accessed : 1;
268 } *dref;
271 /* Type of the chain of the references. */
273 enum chain_type
275 /* The addresses of the references in the chain are constant. */
276 CT_INVARIANT,
278 /* There are only loads in the chain. */
279 CT_LOAD,
281 /* Root of the chain is store, the rest are loads. */
282 CT_STORE_LOAD,
284 /* There are only stores in the chain. */
285 CT_STORE_STORE,
287 /* A combination of two chains. */
288 CT_COMBINATION
291 /* Chains of data references. */
293 typedef struct chain
295 /* Type of the chain. */
296 enum chain_type type;
298 /* For combination chains, the operator and the two chains that are
299 combined, and the type of the result. */
300 enum tree_code op;
301 tree rslt_type;
302 struct chain *ch1, *ch2;
304 /* The references in the chain. */
305 vec<dref> refs;
307 /* The maximum distance of the reference in the chain from the root. */
308 unsigned length;
310 /* The variables used to copy the value throughout iterations. */
311 vec<tree> vars;
313 /* Initializers for the variables. */
314 vec<tree> inits;
316 /* Finalizers for the eliminated stores. */
317 vec<tree> finis;
319 /* gimple stmts intializing the initial variables of the chain. */
320 gimple_seq init_seq;
322 /* gimple stmts finalizing the eliminated stores of the chain. */
323 gimple_seq fini_seq;
325 /* True if there is a use of a variable with the maximal distance
326 that comes after the root in the loop. */
327 unsigned has_max_use_after : 1;
329 /* True if all the memory references in the chain are always accessed. */
330 unsigned all_always_accessed : 1;
332 /* True if this chain was combined together with some other chain. */
333 unsigned combined : 1;
335 /* True if this is store elimination chain and eliminated stores store
336 loop invariant value into memory. */
337 unsigned inv_store_elimination : 1;
338 } *chain_p;
341 /* Describes the knowledge about the step of the memory references in
342 the component. */
344 enum ref_step_type
346 /* The step is zero. */
347 RS_INVARIANT,
349 /* The step is nonzero. */
350 RS_NONZERO,
352 /* The step may or may not be nonzero. */
353 RS_ANY
356 /* Components of the data dependence graph. */
358 struct component
360 /* The references in the component. */
361 vec<dref> refs;
363 /* What we know about the step of the references in the component. */
364 enum ref_step_type comp_step;
366 /* True if all references in component are stores and we try to do
367 intra/inter loop iteration dead store elimination. */
368 bool eliminate_store_p;
370 /* Next component in the list. */
371 struct component *next;
374 /* Bitmap of ssa names defined by looparound phi nodes covered by chains. */
376 static bitmap looparound_phis;
378 /* Cache used by tree_to_aff_combination_expand. */
380 static hash_map<tree, name_expansion *> *name_expansions;
382 /* Dumps data reference REF to FILE. */
384 extern void dump_dref (FILE *, dref);
385 void
386 dump_dref (FILE *file, dref ref)
388 if (ref->ref)
390 fprintf (file, " ");
391 print_generic_expr (file, DR_REF (ref->ref), TDF_SLIM);
392 fprintf (file, " (id %u%s)\n", ref->pos,
393 DR_IS_READ (ref->ref) ? "" : ", write");
395 fprintf (file, " offset ");
396 print_decs (ref->offset, file);
397 fprintf (file, "\n");
399 fprintf (file, " distance %u\n", ref->distance);
401 else
403 if (gimple_code (ref->stmt) == GIMPLE_PHI)
404 fprintf (file, " looparound ref\n");
405 else
406 fprintf (file, " combination ref\n");
407 fprintf (file, " in statement ");
408 print_gimple_stmt (file, ref->stmt, 0, TDF_SLIM);
409 fprintf (file, "\n");
410 fprintf (file, " distance %u\n", ref->distance);
415 /* Dumps CHAIN to FILE. */
417 extern void dump_chain (FILE *, chain_p);
418 void
419 dump_chain (FILE *file, chain_p chain)
421 dref a;
422 const char *chain_type;
423 unsigned i;
424 tree var;
426 switch (chain->type)
428 case CT_INVARIANT:
429 chain_type = "Load motion";
430 break;
432 case CT_LOAD:
433 chain_type = "Loads-only";
434 break;
436 case CT_STORE_LOAD:
437 chain_type = "Store-loads";
438 break;
440 case CT_STORE_STORE:
441 chain_type = "Store-stores";
442 break;
444 case CT_COMBINATION:
445 chain_type = "Combination";
446 break;
448 default:
449 gcc_unreachable ();
452 fprintf (file, "%s chain %p%s\n", chain_type, (void *) chain,
453 chain->combined ? " (combined)" : "");
454 if (chain->type != CT_INVARIANT)
455 fprintf (file, " max distance %u%s\n", chain->length,
456 chain->has_max_use_after ? "" : ", may reuse first");
458 if (chain->type == CT_COMBINATION)
460 fprintf (file, " equal to %p %s %p in type ",
461 (void *) chain->ch1, op_symbol_code (chain->op),
462 (void *) chain->ch2);
463 print_generic_expr (file, chain->rslt_type, TDF_SLIM);
464 fprintf (file, "\n");
467 if (chain->vars.exists ())
469 fprintf (file, " vars");
470 FOR_EACH_VEC_ELT (chain->vars, i, var)
472 fprintf (file, " ");
473 print_generic_expr (file, var, TDF_SLIM);
475 fprintf (file, "\n");
478 if (chain->inits.exists ())
480 fprintf (file, " inits");
481 FOR_EACH_VEC_ELT (chain->inits, i, var)
483 fprintf (file, " ");
484 print_generic_expr (file, var, TDF_SLIM);
486 fprintf (file, "\n");
489 fprintf (file, " references:\n");
490 FOR_EACH_VEC_ELT (chain->refs, i, a)
491 dump_dref (file, a);
493 fprintf (file, "\n");
496 /* Dumps CHAINS to FILE. */
498 extern void dump_chains (FILE *, vec<chain_p> );
499 void
500 dump_chains (FILE *file, vec<chain_p> chains)
502 chain_p chain;
503 unsigned i;
505 FOR_EACH_VEC_ELT (chains, i, chain)
506 dump_chain (file, chain);
509 /* Dumps COMP to FILE. */
511 extern void dump_component (FILE *, struct component *);
512 void
513 dump_component (FILE *file, struct component *comp)
515 dref a;
516 unsigned i;
518 fprintf (file, "Component%s:\n",
519 comp->comp_step == RS_INVARIANT ? " (invariant)" : "");
520 FOR_EACH_VEC_ELT (comp->refs, i, a)
521 dump_dref (file, a);
522 fprintf (file, "\n");
525 /* Dumps COMPS to FILE. */
527 extern void dump_components (FILE *, struct component *);
528 void
529 dump_components (FILE *file, struct component *comps)
531 struct component *comp;
533 for (comp = comps; comp; comp = comp->next)
534 dump_component (file, comp);
537 /* Frees a chain CHAIN. */
539 static void
540 release_chain (chain_p chain)
542 dref ref;
543 unsigned i;
545 if (chain == NULL)
546 return;
548 FOR_EACH_VEC_ELT (chain->refs, i, ref)
549 free (ref);
551 chain->refs.release ();
552 chain->vars.release ();
553 chain->inits.release ();
554 if (chain->init_seq)
555 gimple_seq_discard (chain->init_seq);
557 chain->finis.release ();
558 if (chain->fini_seq)
559 gimple_seq_discard (chain->fini_seq);
561 free (chain);
564 /* Frees CHAINS. */
566 static void
567 release_chains (vec<chain_p> chains)
569 unsigned i;
570 chain_p chain;
572 FOR_EACH_VEC_ELT (chains, i, chain)
573 release_chain (chain);
574 chains.release ();
577 /* Frees a component COMP. */
579 static void
580 release_component (struct component *comp)
582 comp->refs.release ();
583 free (comp);
586 /* Frees list of components COMPS. */
588 static void
589 release_components (struct component *comps)
591 struct component *act, *next;
593 for (act = comps; act; act = next)
595 next = act->next;
596 release_component (act);
600 /* Finds a root of tree given by FATHERS containing A, and performs path
601 shortening. */
603 static unsigned
604 component_of (unsigned fathers[], unsigned a)
606 unsigned root, n;
608 for (root = a; root != fathers[root]; root = fathers[root])
609 continue;
611 for (; a != root; a = n)
613 n = fathers[a];
614 fathers[a] = root;
617 return root;
620 /* Join operation for DFU. FATHERS gives the tree, SIZES are sizes of the
621 components, A and B are components to merge. */
623 static void
624 merge_comps (unsigned fathers[], unsigned sizes[], unsigned a, unsigned b)
626 unsigned ca = component_of (fathers, a);
627 unsigned cb = component_of (fathers, b);
629 if (ca == cb)
630 return;
632 if (sizes[ca] < sizes[cb])
634 sizes[cb] += sizes[ca];
635 fathers[ca] = cb;
637 else
639 sizes[ca] += sizes[cb];
640 fathers[cb] = ca;
644 /* Returns true if A is a reference that is suitable for predictive commoning
645 in the innermost loop that contains it. REF_STEP is set according to the
646 step of the reference A. */
648 static bool
649 suitable_reference_p (struct data_reference *a, enum ref_step_type *ref_step)
651 tree ref = DR_REF (a), step = DR_STEP (a);
653 if (!step
654 || TREE_THIS_VOLATILE (ref)
655 || !is_gimple_reg_type (TREE_TYPE (ref))
656 || tree_could_throw_p (ref))
657 return false;
659 if (integer_zerop (step))
660 *ref_step = RS_INVARIANT;
661 else if (integer_nonzerop (step))
662 *ref_step = RS_NONZERO;
663 else
664 *ref_step = RS_ANY;
666 return true;
669 /* Stores DR_OFFSET (DR) + DR_INIT (DR) to OFFSET. */
671 static void
672 aff_combination_dr_offset (struct data_reference *dr, aff_tree *offset)
674 tree type = TREE_TYPE (DR_OFFSET (dr));
675 aff_tree delta;
677 tree_to_aff_combination_expand (DR_OFFSET (dr), type, offset,
678 &name_expansions);
679 aff_combination_const (&delta, type, wi::to_widest (DR_INIT (dr)));
680 aff_combination_add (offset, &delta);
683 /* Determines number of iterations of the innermost enclosing loop before B
684 refers to exactly the same location as A and stores it to OFF. If A and
685 B do not have the same step, they never meet, or anything else fails,
686 returns false, otherwise returns true. Both A and B are assumed to
687 satisfy suitable_reference_p. */
689 static bool
690 determine_offset (struct data_reference *a, struct data_reference *b,
691 widest_int *off)
693 aff_tree diff, baseb, step;
694 tree typea, typeb;
696 /* Check that both the references access the location in the same type. */
697 typea = TREE_TYPE (DR_REF (a));
698 typeb = TREE_TYPE (DR_REF (b));
699 if (!useless_type_conversion_p (typeb, typea))
700 return false;
702 /* Check whether the base address and the step of both references is the
703 same. */
704 if (!operand_equal_p (DR_STEP (a), DR_STEP (b), 0)
705 || !operand_equal_p (DR_BASE_ADDRESS (a), DR_BASE_ADDRESS (b), 0))
706 return false;
708 if (integer_zerop (DR_STEP (a)))
710 /* If the references have loop invariant address, check that they access
711 exactly the same location. */
712 *off = 0;
713 return (operand_equal_p (DR_OFFSET (a), DR_OFFSET (b), 0)
714 && operand_equal_p (DR_INIT (a), DR_INIT (b), 0));
717 /* Compare the offsets of the addresses, and check whether the difference
718 is a multiple of step. */
719 aff_combination_dr_offset (a, &diff);
720 aff_combination_dr_offset (b, &baseb);
721 aff_combination_scale (&baseb, -1);
722 aff_combination_add (&diff, &baseb);
724 tree_to_aff_combination_expand (DR_STEP (a), TREE_TYPE (DR_STEP (a)),
725 &step, &name_expansions);
726 return aff_combination_constant_multiple_p (&diff, &step, off);
729 /* Returns the last basic block in LOOP for that we are sure that
730 it is executed whenever the loop is entered. */
732 static basic_block
733 last_always_executed_block (struct loop *loop)
735 unsigned i;
736 vec<edge> exits = get_loop_exit_edges (loop);
737 edge ex;
738 basic_block last = loop->latch;
740 FOR_EACH_VEC_ELT (exits, i, ex)
741 last = nearest_common_dominator (CDI_DOMINATORS, last, ex->src);
742 exits.release ();
744 return last;
747 /* Splits dependence graph on DATAREFS described by DEPENDS to components. */
749 static struct component *
750 split_data_refs_to_components (struct loop *loop,
751 vec<data_reference_p> datarefs,
752 vec<ddr_p> depends)
754 unsigned i, n = datarefs.length ();
755 unsigned ca, ia, ib, bad;
756 unsigned *comp_father = XNEWVEC (unsigned, n + 1);
757 unsigned *comp_size = XNEWVEC (unsigned, n + 1);
758 struct component **comps;
759 struct data_reference *dr, *dra, *drb;
760 struct data_dependence_relation *ddr;
761 struct component *comp_list = NULL, *comp;
762 dref dataref;
763 /* Don't do store elimination if loop has multiple exit edges. */
764 bool eliminate_store_p = single_exit (loop) != NULL;
765 basic_block last_always_executed = last_always_executed_block (loop);
767 FOR_EACH_VEC_ELT (datarefs, i, dr)
769 if (!DR_REF (dr))
771 /* A fake reference for call or asm_expr that may clobber memory;
772 just fail. */
773 goto end;
775 /* predcom pass isn't prepared to handle calls with data references. */
776 if (is_gimple_call (DR_STMT (dr)))
777 goto end;
778 dr->aux = (void *) (size_t) i;
779 comp_father[i] = i;
780 comp_size[i] = 1;
783 /* A component reserved for the "bad" data references. */
784 comp_father[n] = n;
785 comp_size[n] = 1;
787 FOR_EACH_VEC_ELT (datarefs, i, dr)
789 enum ref_step_type dummy;
791 if (!suitable_reference_p (dr, &dummy))
793 ia = (unsigned) (size_t) dr->aux;
794 merge_comps (comp_father, comp_size, n, ia);
798 FOR_EACH_VEC_ELT (depends, i, ddr)
800 widest_int dummy_off;
802 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
803 continue;
805 dra = DDR_A (ddr);
806 drb = DDR_B (ddr);
808 /* Don't do store elimination if there is any unknown dependence for
809 any store data reference. */
810 if ((DR_IS_WRITE (dra) || DR_IS_WRITE (drb))
811 && (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know
812 || DDR_NUM_DIST_VECTS (ddr) == 0))
813 eliminate_store_p = false;
815 ia = component_of (comp_father, (unsigned) (size_t) dra->aux);
816 ib = component_of (comp_father, (unsigned) (size_t) drb->aux);
817 if (ia == ib)
818 continue;
820 bad = component_of (comp_father, n);
822 /* If both A and B are reads, we may ignore unsuitable dependences. */
823 if (DR_IS_READ (dra) && DR_IS_READ (drb))
825 if (ia == bad || ib == bad
826 || !determine_offset (dra, drb, &dummy_off))
827 continue;
829 /* If A is read and B write or vice versa and there is unsuitable
830 dependence, instead of merging both components into a component
831 that will certainly not pass suitable_component_p, just put the
832 read into bad component, perhaps at least the write together with
833 all the other data refs in it's component will be optimizable. */
834 else if (DR_IS_READ (dra) && ib != bad)
836 if (ia == bad)
837 continue;
838 else if (!determine_offset (dra, drb, &dummy_off))
840 merge_comps (comp_father, comp_size, bad, ia);
841 continue;
844 else if (DR_IS_READ (drb) && ia != bad)
846 if (ib == bad)
847 continue;
848 else if (!determine_offset (dra, drb, &dummy_off))
850 merge_comps (comp_father, comp_size, bad, ib);
851 continue;
854 else if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb)
855 && ia != bad && ib != bad
856 && !determine_offset (dra, drb, &dummy_off))
858 merge_comps (comp_father, comp_size, bad, ia);
859 merge_comps (comp_father, comp_size, bad, ib);
860 continue;
863 merge_comps (comp_father, comp_size, ia, ib);
866 if (eliminate_store_p)
868 tree niters = number_of_latch_executions (loop);
870 /* Don't do store elimination if niters info is unknown because stores
871 in the last iteration can't be eliminated and we need to recover it
872 after loop. */
873 eliminate_store_p = (niters != NULL_TREE && niters != chrec_dont_know);
876 comps = XCNEWVEC (struct component *, n);
877 bad = component_of (comp_father, n);
878 FOR_EACH_VEC_ELT (datarefs, i, dr)
880 ia = (unsigned) (size_t) dr->aux;
881 ca = component_of (comp_father, ia);
882 if (ca == bad)
883 continue;
885 comp = comps[ca];
886 if (!comp)
888 comp = XCNEW (struct component);
889 comp->refs.create (comp_size[ca]);
890 comp->eliminate_store_p = eliminate_store_p;
891 comps[ca] = comp;
894 dataref = XCNEW (struct dref_d);
895 dataref->ref = dr;
896 dataref->stmt = DR_STMT (dr);
897 dataref->offset = 0;
898 dataref->distance = 0;
900 dataref->always_accessed
901 = dominated_by_p (CDI_DOMINATORS, last_always_executed,
902 gimple_bb (dataref->stmt));
903 dataref->pos = comp->refs.length ();
904 comp->refs.quick_push (dataref);
905 if (DR_IS_READ (dr))
906 comp->eliminate_store_p = false;
909 for (i = 0; i < n; i++)
911 comp = comps[i];
912 if (comp)
914 comp->next = comp_list;
915 comp_list = comp;
918 free (comps);
920 end:
921 free (comp_father);
922 free (comp_size);
923 return comp_list;
926 /* Returns true if the component COMP satisfies the conditions
927 described in 2) at the beginning of this file. LOOP is the current
928 loop. */
930 static bool
931 suitable_component_p (struct loop *loop, struct component *comp)
933 unsigned i;
934 dref a, first;
935 basic_block ba, bp = loop->header;
936 bool ok, has_write = false;
938 FOR_EACH_VEC_ELT (comp->refs, i, a)
940 ba = gimple_bb (a->stmt);
942 if (!just_once_each_iteration_p (loop, ba))
943 return false;
945 gcc_assert (dominated_by_p (CDI_DOMINATORS, ba, bp));
946 bp = ba;
948 if (DR_IS_WRITE (a->ref))
949 has_write = true;
952 first = comp->refs[0];
953 ok = suitable_reference_p (first->ref, &comp->comp_step);
954 gcc_assert (ok);
955 first->offset = 0;
957 for (i = 1; comp->refs.iterate (i, &a); i++)
959 if (!determine_offset (first->ref, a->ref, &a->offset))
960 return false;
962 enum ref_step_type a_step;
963 gcc_checking_assert (suitable_reference_p (a->ref, &a_step)
964 && a_step == comp->comp_step);
967 /* If there is a write inside the component, we must know whether the
968 step is nonzero or not -- we would not otherwise be able to recognize
969 whether the value accessed by reads comes from the OFFSET-th iteration
970 or the previous one. */
971 if (has_write && comp->comp_step == RS_ANY)
972 return false;
974 return true;
977 /* Check the conditions on references inside each of components COMPS,
978 and remove the unsuitable components from the list. The new list
979 of components is returned. The conditions are described in 2) at
980 the beginning of this file. LOOP is the current loop. */
982 static struct component *
983 filter_suitable_components (struct loop *loop, struct component *comps)
985 struct component **comp, *act;
987 for (comp = &comps; *comp; )
989 act = *comp;
990 if (suitable_component_p (loop, act))
991 comp = &act->next;
992 else
994 dref ref;
995 unsigned i;
997 *comp = act->next;
998 FOR_EACH_VEC_ELT (act->refs, i, ref)
999 free (ref);
1000 release_component (act);
1004 return comps;
1007 /* Compares two drefs A and B by their offset and position. Callback for
1008 qsort. */
1010 static int
1011 order_drefs (const void *a, const void *b)
1013 const dref *const da = (const dref *) a;
1014 const dref *const db = (const dref *) b;
1015 int offcmp = wi::cmps ((*da)->offset, (*db)->offset);
1017 if (offcmp != 0)
1018 return offcmp;
1020 return (*da)->pos - (*db)->pos;
1023 /* Returns root of the CHAIN. */
1025 static inline dref
1026 get_chain_root (chain_p chain)
1028 return chain->refs[0];
1031 /* Given CHAIN, returns the last ref at DISTANCE, or NULL if it doesn't
1032 exist. */
1034 static inline dref
1035 get_chain_last_ref_at (chain_p chain, unsigned distance)
1037 unsigned i;
1039 for (i = chain->refs.length (); i > 0; i--)
1040 if (distance == chain->refs[i - 1]->distance)
1041 break;
1043 return (i > 0) ? chain->refs[i - 1] : NULL;
1046 /* Adds REF to the chain CHAIN. */
1048 static void
1049 add_ref_to_chain (chain_p chain, dref ref)
1051 dref root = get_chain_root (chain);
1053 gcc_assert (wi::les_p (root->offset, ref->offset));
1054 widest_int dist = ref->offset - root->offset;
1055 if (wi::leu_p (MAX_DISTANCE, dist))
1057 free (ref);
1058 return;
1060 gcc_assert (wi::fits_uhwi_p (dist));
1062 chain->refs.safe_push (ref);
1064 ref->distance = dist.to_uhwi ();
1066 if (ref->distance >= chain->length)
1068 chain->length = ref->distance;
1069 chain->has_max_use_after = false;
1072 /* Don't set the flag for store-store chain since there is no use. */
1073 if (chain->type != CT_STORE_STORE
1074 && ref->distance == chain->length
1075 && ref->pos > root->pos)
1076 chain->has_max_use_after = true;
1078 chain->all_always_accessed &= ref->always_accessed;
1081 /* Returns the chain for invariant component COMP. */
1083 static chain_p
1084 make_invariant_chain (struct component *comp)
1086 chain_p chain = XCNEW (struct chain);
1087 unsigned i;
1088 dref ref;
1090 chain->type = CT_INVARIANT;
1092 chain->all_always_accessed = true;
1094 FOR_EACH_VEC_ELT (comp->refs, i, ref)
1096 chain->refs.safe_push (ref);
1097 chain->all_always_accessed &= ref->always_accessed;
1100 chain->inits = vNULL;
1101 chain->finis = vNULL;
1103 return chain;
1106 /* Make a new chain of type TYPE rooted at REF. */
1108 static chain_p
1109 make_rooted_chain (dref ref, enum chain_type type)
1111 chain_p chain = XCNEW (struct chain);
1113 chain->type = type;
1114 chain->refs.safe_push (ref);
1115 chain->all_always_accessed = ref->always_accessed;
1116 ref->distance = 0;
1118 chain->inits = vNULL;
1119 chain->finis = vNULL;
1121 return chain;
1124 /* Returns true if CHAIN is not trivial. */
1126 static bool
1127 nontrivial_chain_p (chain_p chain)
1129 return chain != NULL && chain->refs.length () > 1;
1132 /* Returns the ssa name that contains the value of REF, or NULL_TREE if there
1133 is no such name. */
1135 static tree
1136 name_for_ref (dref ref)
1138 tree name;
1140 if (is_gimple_assign (ref->stmt))
1142 if (!ref->ref || DR_IS_READ (ref->ref))
1143 name = gimple_assign_lhs (ref->stmt);
1144 else
1145 name = gimple_assign_rhs1 (ref->stmt);
1147 else
1148 name = PHI_RESULT (ref->stmt);
1150 return (TREE_CODE (name) == SSA_NAME ? name : NULL_TREE);
1153 /* Returns true if REF is a valid initializer for ROOT with given DISTANCE (in
1154 iterations of the innermost enclosing loop). */
1156 static bool
1157 valid_initializer_p (struct data_reference *ref,
1158 unsigned distance, struct data_reference *root)
1160 aff_tree diff, base, step;
1161 widest_int off;
1163 /* Both REF and ROOT must be accessing the same object. */
1164 if (!operand_equal_p (DR_BASE_ADDRESS (ref), DR_BASE_ADDRESS (root), 0))
1165 return false;
1167 /* The initializer is defined outside of loop, hence its address must be
1168 invariant inside the loop. */
1169 gcc_assert (integer_zerop (DR_STEP (ref)));
1171 /* If the address of the reference is invariant, initializer must access
1172 exactly the same location. */
1173 if (integer_zerop (DR_STEP (root)))
1174 return (operand_equal_p (DR_OFFSET (ref), DR_OFFSET (root), 0)
1175 && operand_equal_p (DR_INIT (ref), DR_INIT (root), 0));
1177 /* Verify that this index of REF is equal to the root's index at
1178 -DISTANCE-th iteration. */
1179 aff_combination_dr_offset (root, &diff);
1180 aff_combination_dr_offset (ref, &base);
1181 aff_combination_scale (&base, -1);
1182 aff_combination_add (&diff, &base);
1184 tree_to_aff_combination_expand (DR_STEP (root), TREE_TYPE (DR_STEP (root)),
1185 &step, &name_expansions);
1186 if (!aff_combination_constant_multiple_p (&diff, &step, &off))
1187 return false;
1189 if (off != distance)
1190 return false;
1192 return true;
1195 /* Finds looparound phi node of LOOP that copies the value of REF, and if its
1196 initial value is correct (equal to initial value of REF shifted by one
1197 iteration), returns the phi node. Otherwise, NULL_TREE is returned. ROOT
1198 is the root of the current chain. */
1200 static gphi *
1201 find_looparound_phi (struct loop *loop, dref ref, dref root)
1203 tree name, init, init_ref;
1204 gphi *phi = NULL;
1205 gimple *init_stmt;
1206 edge latch = loop_latch_edge (loop);
1207 struct data_reference init_dr;
1208 gphi_iterator psi;
1210 if (is_gimple_assign (ref->stmt))
1212 if (DR_IS_READ (ref->ref))
1213 name = gimple_assign_lhs (ref->stmt);
1214 else
1215 name = gimple_assign_rhs1 (ref->stmt);
1217 else
1218 name = PHI_RESULT (ref->stmt);
1219 if (!name)
1220 return NULL;
1222 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1224 phi = psi.phi ();
1225 if (PHI_ARG_DEF_FROM_EDGE (phi, latch) == name)
1226 break;
1229 if (gsi_end_p (psi))
1230 return NULL;
1232 init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1233 if (TREE_CODE (init) != SSA_NAME)
1234 return NULL;
1235 init_stmt = SSA_NAME_DEF_STMT (init);
1236 if (gimple_code (init_stmt) != GIMPLE_ASSIGN)
1237 return NULL;
1238 gcc_assert (gimple_assign_lhs (init_stmt) == init);
1240 init_ref = gimple_assign_rhs1 (init_stmt);
1241 if (!REFERENCE_CLASS_P (init_ref)
1242 && !DECL_P (init_ref))
1243 return NULL;
1245 /* Analyze the behavior of INIT_REF with respect to LOOP (innermost
1246 loop enclosing PHI). */
1247 memset (&init_dr, 0, sizeof (struct data_reference));
1248 DR_REF (&init_dr) = init_ref;
1249 DR_STMT (&init_dr) = phi;
1250 if (!dr_analyze_innermost (&DR_INNERMOST (&init_dr), init_ref, loop))
1251 return NULL;
1253 if (!valid_initializer_p (&init_dr, ref->distance + 1, root->ref))
1254 return NULL;
1256 return phi;
1259 /* Adds a reference for the looparound copy of REF in PHI to CHAIN. */
1261 static void
1262 insert_looparound_copy (chain_p chain, dref ref, gphi *phi)
1264 dref nw = XCNEW (struct dref_d), aref;
1265 unsigned i;
1267 nw->stmt = phi;
1268 nw->distance = ref->distance + 1;
1269 nw->always_accessed = 1;
1271 FOR_EACH_VEC_ELT (chain->refs, i, aref)
1272 if (aref->distance >= nw->distance)
1273 break;
1274 chain->refs.safe_insert (i, nw);
1276 if (nw->distance > chain->length)
1278 chain->length = nw->distance;
1279 chain->has_max_use_after = false;
1283 /* For references in CHAIN that are copied around the LOOP (created previously
1284 by PRE, or by user), add the results of such copies to the chain. This
1285 enables us to remove the copies by unrolling, and may need less registers
1286 (also, it may allow us to combine chains together). */
1288 static void
1289 add_looparound_copies (struct loop *loop, chain_p chain)
1291 unsigned i;
1292 dref ref, root = get_chain_root (chain);
1293 gphi *phi;
1295 if (chain->type == CT_STORE_STORE)
1296 return;
1298 FOR_EACH_VEC_ELT (chain->refs, i, ref)
1300 phi = find_looparound_phi (loop, ref, root);
1301 if (!phi)
1302 continue;
1304 bitmap_set_bit (looparound_phis, SSA_NAME_VERSION (PHI_RESULT (phi)));
1305 insert_looparound_copy (chain, ref, phi);
1309 /* Find roots of the values and determine distances in the component COMP.
1310 The references are redistributed into CHAINS. LOOP is the current
1311 loop. */
1313 static void
1314 determine_roots_comp (struct loop *loop,
1315 struct component *comp,
1316 vec<chain_p> *chains)
1318 unsigned i;
1319 dref a;
1320 chain_p chain = NULL;
1321 widest_int last_ofs = 0;
1322 enum chain_type type;
1324 /* Invariants are handled specially. */
1325 if (comp->comp_step == RS_INVARIANT)
1327 chain = make_invariant_chain (comp);
1328 chains->safe_push (chain);
1329 return;
1332 /* Trivial component. */
1333 if (comp->refs.length () <= 1)
1335 if (comp->refs.length () == 1)
1337 free (comp->refs[0]);
1338 comp->refs.truncate (0);
1340 return;
1343 comp->refs.qsort (order_drefs);
1344 FOR_EACH_VEC_ELT (comp->refs, i, a)
1346 if (!chain
1347 || (!comp->eliminate_store_p && DR_IS_WRITE (a->ref))
1348 || wi::leu_p (MAX_DISTANCE, a->offset - last_ofs))
1350 if (nontrivial_chain_p (chain))
1352 add_looparound_copies (loop, chain);
1353 chains->safe_push (chain);
1355 else
1356 release_chain (chain);
1358 if (DR_IS_READ (a->ref))
1359 type = CT_LOAD;
1360 else
1361 type = comp->eliminate_store_p ? CT_STORE_STORE : CT_STORE_LOAD;
1363 chain = make_rooted_chain (a, type);
1364 last_ofs = a->offset;
1365 continue;
1368 add_ref_to_chain (chain, a);
1371 if (nontrivial_chain_p (chain))
1373 add_looparound_copies (loop, chain);
1374 chains->safe_push (chain);
1376 else
1377 release_chain (chain);
1380 /* Find roots of the values and determine distances in components COMPS, and
1381 separates the references to CHAINS. LOOP is the current loop. */
1383 static void
1384 determine_roots (struct loop *loop,
1385 struct component *comps, vec<chain_p> *chains)
1387 struct component *comp;
1389 for (comp = comps; comp; comp = comp->next)
1390 determine_roots_comp (loop, comp, chains);
1393 /* Replace the reference in statement STMT with temporary variable
1394 NEW_TREE. If SET is true, NEW_TREE is instead initialized to the value of
1395 the reference in the statement. IN_LHS is true if the reference
1396 is in the lhs of STMT, false if it is in rhs. */
1398 static void
1399 replace_ref_with (gimple *stmt, tree new_tree, bool set, bool in_lhs)
1401 tree val;
1402 gassign *new_stmt;
1403 gimple_stmt_iterator bsi, psi;
1405 if (gimple_code (stmt) == GIMPLE_PHI)
1407 gcc_assert (!in_lhs && !set);
1409 val = PHI_RESULT (stmt);
1410 bsi = gsi_after_labels (gimple_bb (stmt));
1411 psi = gsi_for_stmt (stmt);
1412 remove_phi_node (&psi, false);
1414 /* Turn the phi node into GIMPLE_ASSIGN. */
1415 new_stmt = gimple_build_assign (val, new_tree);
1416 gsi_insert_before (&bsi, new_stmt, GSI_NEW_STMT);
1417 return;
1420 /* Since the reference is of gimple_reg type, it should only
1421 appear as lhs or rhs of modify statement. */
1422 gcc_assert (is_gimple_assign (stmt));
1424 bsi = gsi_for_stmt (stmt);
1426 /* If we do not need to initialize NEW_TREE, just replace the use of OLD. */
1427 if (!set)
1429 gcc_assert (!in_lhs);
1430 gimple_assign_set_rhs_from_tree (&bsi, new_tree);
1431 stmt = gsi_stmt (bsi);
1432 update_stmt (stmt);
1433 return;
1436 if (in_lhs)
1438 /* We have statement
1440 OLD = VAL
1442 If OLD is a memory reference, then VAL is gimple_val, and we transform
1443 this to
1445 OLD = VAL
1446 NEW = VAL
1448 Otherwise, we are replacing a combination chain,
1449 VAL is the expression that performs the combination, and OLD is an
1450 SSA name. In this case, we transform the assignment to
1452 OLD = VAL
1453 NEW = OLD
1457 val = gimple_assign_lhs (stmt);
1458 if (TREE_CODE (val) != SSA_NAME)
1460 val = gimple_assign_rhs1 (stmt);
1461 gcc_assert (gimple_assign_single_p (stmt));
1462 if (TREE_CLOBBER_P (val))
1463 val = get_or_create_ssa_default_def (cfun, SSA_NAME_VAR (new_tree));
1464 else
1465 gcc_assert (gimple_assign_copy_p (stmt));
1468 else
1470 /* VAL = OLD
1472 is transformed to
1474 VAL = OLD
1475 NEW = VAL */
1477 val = gimple_assign_lhs (stmt);
1480 new_stmt = gimple_build_assign (new_tree, unshare_expr (val));
1481 gsi_insert_after (&bsi, new_stmt, GSI_NEW_STMT);
1484 /* Returns a memory reference to DR in the (NITERS + ITER)-th iteration
1485 of the loop it was analyzed in. Append init stmts to STMTS. */
1487 static tree
1488 ref_at_iteration (data_reference_p dr, int iter,
1489 gimple_seq *stmts, tree niters = NULL_TREE)
1491 tree off = DR_OFFSET (dr);
1492 tree coff = DR_INIT (dr);
1493 tree ref = DR_REF (dr);
1494 enum tree_code ref_code = ERROR_MARK;
1495 tree ref_type = NULL_TREE;
1496 tree ref_op1 = NULL_TREE;
1497 tree ref_op2 = NULL_TREE;
1498 tree new_offset;
1500 if (iter != 0)
1502 new_offset = size_binop (MULT_EXPR, DR_STEP (dr), ssize_int (iter));
1503 if (TREE_CODE (new_offset) == INTEGER_CST)
1504 coff = size_binop (PLUS_EXPR, coff, new_offset);
1505 else
1506 off = size_binop (PLUS_EXPR, off, new_offset);
1509 if (niters != NULL_TREE)
1511 niters = fold_convert (ssizetype, niters);
1512 new_offset = size_binop (MULT_EXPR, DR_STEP (dr), niters);
1513 if (TREE_CODE (niters) == INTEGER_CST)
1514 coff = size_binop (PLUS_EXPR, coff, new_offset);
1515 else
1516 off = size_binop (PLUS_EXPR, off, new_offset);
1519 /* While data-ref analysis punts on bit offsets it still handles
1520 bitfield accesses at byte boundaries. Cope with that. Note that
1521 if the bitfield object also starts at a byte-boundary we can simply
1522 replicate the COMPONENT_REF, but we have to subtract the component's
1523 byte-offset from the MEM_REF address first.
1524 Otherwise we simply build a BIT_FIELD_REF knowing that the bits
1525 start at offset zero. */
1526 if (TREE_CODE (ref) == COMPONENT_REF
1527 && DECL_BIT_FIELD (TREE_OPERAND (ref, 1)))
1529 unsigned HOST_WIDE_INT boff;
1530 tree field = TREE_OPERAND (ref, 1);
1531 tree offset = component_ref_field_offset (ref);
1532 ref_type = TREE_TYPE (ref);
1533 boff = tree_to_uhwi (DECL_FIELD_BIT_OFFSET (field));
1534 /* This can occur in Ada. See the comment in get_bit_range. */
1535 if (boff % BITS_PER_UNIT != 0
1536 || !tree_fits_uhwi_p (offset))
1538 ref_code = BIT_FIELD_REF;
1539 ref_op1 = DECL_SIZE (field);
1540 ref_op2 = bitsize_zero_node;
1542 else
1544 boff >>= LOG2_BITS_PER_UNIT;
1545 boff += tree_to_uhwi (offset);
1546 coff = size_binop (MINUS_EXPR, coff, ssize_int (boff));
1547 ref_code = COMPONENT_REF;
1548 ref_op1 = field;
1549 ref_op2 = TREE_OPERAND (ref, 2);
1550 ref = TREE_OPERAND (ref, 0);
1553 tree addr = fold_build_pointer_plus (DR_BASE_ADDRESS (dr), off);
1554 addr = force_gimple_operand_1 (unshare_expr (addr), stmts,
1555 is_gimple_mem_ref_addr, NULL_TREE);
1556 tree alias_ptr = fold_convert (reference_alias_ptr_type (ref), coff);
1557 tree type = build_aligned_type (TREE_TYPE (ref),
1558 get_object_alignment (ref));
1559 ref = build2 (MEM_REF, type, addr, alias_ptr);
1560 if (ref_type)
1561 ref = build3 (ref_code, ref_type, ref, ref_op1, ref_op2);
1562 return ref;
1565 /* Get the initialization expression for the INDEX-th temporary variable
1566 of CHAIN. */
1568 static tree
1569 get_init_expr (chain_p chain, unsigned index)
1571 if (chain->type == CT_COMBINATION)
1573 tree e1 = get_init_expr (chain->ch1, index);
1574 tree e2 = get_init_expr (chain->ch2, index);
1576 return fold_build2 (chain->op, chain->rslt_type, e1, e2);
1578 else
1579 return chain->inits[index];
1582 /* Returns a new temporary variable used for the I-th variable carrying
1583 value of REF. The variable's uid is marked in TMP_VARS. */
1585 static tree
1586 predcom_tmp_var (tree ref, unsigned i, bitmap tmp_vars)
1588 tree type = TREE_TYPE (ref);
1589 /* We never access the components of the temporary variable in predictive
1590 commoning. */
1591 tree var = create_tmp_reg (type, get_lsm_tmp_name (ref, i));
1592 bitmap_set_bit (tmp_vars, DECL_UID (var));
1593 return var;
1596 /* Creates the variables for CHAIN, as well as phi nodes for them and
1597 initialization on entry to LOOP. Uids of the newly created
1598 temporary variables are marked in TMP_VARS. */
1600 static void
1601 initialize_root_vars (struct loop *loop, chain_p chain, bitmap tmp_vars)
1603 unsigned i;
1604 unsigned n = chain->length;
1605 dref root = get_chain_root (chain);
1606 bool reuse_first = !chain->has_max_use_after;
1607 tree ref, init, var, next;
1608 gphi *phi;
1609 gimple_seq stmts;
1610 edge entry = loop_preheader_edge (loop), latch = loop_latch_edge (loop);
1612 /* If N == 0, then all the references are within the single iteration. And
1613 since this is an nonempty chain, reuse_first cannot be true. */
1614 gcc_assert (n > 0 || !reuse_first);
1616 chain->vars.create (n + 1);
1618 if (chain->type == CT_COMBINATION)
1619 ref = gimple_assign_lhs (root->stmt);
1620 else
1621 ref = DR_REF (root->ref);
1623 for (i = 0; i < n + (reuse_first ? 0 : 1); i++)
1625 var = predcom_tmp_var (ref, i, tmp_vars);
1626 chain->vars.quick_push (var);
1628 if (reuse_first)
1629 chain->vars.quick_push (chain->vars[0]);
1631 FOR_EACH_VEC_ELT (chain->vars, i, var)
1632 chain->vars[i] = make_ssa_name (var);
1634 for (i = 0; i < n; i++)
1636 var = chain->vars[i];
1637 next = chain->vars[i + 1];
1638 init = get_init_expr (chain, i);
1640 init = force_gimple_operand (init, &stmts, true, NULL_TREE);
1641 if (stmts)
1642 gsi_insert_seq_on_edge_immediate (entry, stmts);
1644 phi = create_phi_node (var, loop->header);
1645 add_phi_arg (phi, init, entry, UNKNOWN_LOCATION);
1646 add_phi_arg (phi, next, latch, UNKNOWN_LOCATION);
1650 /* For inter-iteration store elimination CHAIN in LOOP, returns true if
1651 all stores to be eliminated store loop invariant values into memory.
1652 In this case, we can use these invariant values directly after LOOP. */
1654 static bool
1655 is_inv_store_elimination_chain (struct loop *loop, chain_p chain)
1657 if (chain->length == 0 || chain->type != CT_STORE_STORE)
1658 return false;
1660 gcc_assert (!chain->has_max_use_after);
1662 /* If loop iterates for unknown times or fewer times than chain->lenght,
1663 we still need to setup root variable and propagate it with PHI node. */
1664 tree niters = number_of_latch_executions (loop);
1665 if (TREE_CODE (niters) != INTEGER_CST
1666 || wi::leu_p (wi::to_wide (niters), chain->length))
1667 return false;
1669 /* Check stores in chain for elimination if they only store loop invariant
1670 values. */
1671 for (unsigned i = 0; i < chain->length; i++)
1673 dref a = get_chain_last_ref_at (chain, i);
1674 if (a == NULL)
1675 continue;
1677 gimple *def_stmt, *stmt = a->stmt;
1678 if (!gimple_assign_single_p (stmt))
1679 return false;
1681 tree val = gimple_assign_rhs1 (stmt);
1682 if (TREE_CLOBBER_P (val))
1683 return false;
1685 if (CONSTANT_CLASS_P (val))
1686 continue;
1688 if (TREE_CODE (val) != SSA_NAME)
1689 return false;
1691 def_stmt = SSA_NAME_DEF_STMT (val);
1692 if (gimple_nop_p (def_stmt))
1693 continue;
1695 if (flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
1696 return false;
1698 return true;
1701 /* Creates root variables for store elimination CHAIN in which stores for
1702 elimination only store loop invariant values. In this case, we neither
1703 need to load root variables before loop nor propagate it with PHI nodes. */
1705 static void
1706 initialize_root_vars_store_elim_1 (chain_p chain)
1708 tree var;
1709 unsigned i, n = chain->length;
1711 chain->vars.create (n);
1712 chain->vars.safe_grow_cleared (n);
1714 /* Initialize root value for eliminated stores at each distance. */
1715 for (i = 0; i < n; i++)
1717 dref a = get_chain_last_ref_at (chain, i);
1718 if (a == NULL)
1719 continue;
1721 var = gimple_assign_rhs1 (a->stmt);
1722 chain->vars[a->distance] = var;
1725 /* We don't propagate values with PHI nodes, so manually propagate value
1726 to bubble positions. */
1727 var = chain->vars[0];
1728 for (i = 1; i < n; i++)
1730 if (chain->vars[i] != NULL_TREE)
1732 var = chain->vars[i];
1733 continue;
1735 chain->vars[i] = var;
1738 /* Revert the vector. */
1739 for (i = 0; i < n / 2; i++)
1740 std::swap (chain->vars[i], chain->vars[n - i - 1]);
1743 /* Creates root variables for store elimination CHAIN in which stores for
1744 elimination store loop variant values. In this case, we may need to
1745 load root variables before LOOP and propagate it with PHI nodes. Uids
1746 of the newly created root variables are marked in TMP_VARS. */
1748 static void
1749 initialize_root_vars_store_elim_2 (struct loop *loop,
1750 chain_p chain, bitmap tmp_vars)
1752 unsigned i, n = chain->length;
1753 tree ref, init, var, next, val, phi_result;
1754 gimple *stmt;
1755 gimple_seq stmts;
1757 chain->vars.create (n);
1759 ref = DR_REF (get_chain_root (chain)->ref);
1760 for (i = 0; i < n; i++)
1762 var = predcom_tmp_var (ref, i, tmp_vars);
1763 chain->vars.quick_push (var);
1766 FOR_EACH_VEC_ELT (chain->vars, i, var)
1767 chain->vars[i] = make_ssa_name (var);
1769 /* Root values are either rhs operand of stores to be eliminated, or
1770 loaded from memory before loop. */
1771 auto_vec<tree> vtemps;
1772 vtemps.safe_grow_cleared (n);
1773 for (i = 0; i < n; i++)
1775 init = get_init_expr (chain, i);
1776 if (init == NULL_TREE)
1778 /* Root value is rhs operand of the store to be eliminated if
1779 it isn't loaded from memory before loop. */
1780 dref a = get_chain_last_ref_at (chain, i);
1781 val = gimple_assign_rhs1 (a->stmt);
1782 if (TREE_CLOBBER_P (val))
1783 val = get_or_create_ssa_default_def (cfun, SSA_NAME_VAR (var));
1785 vtemps[n - i - 1] = val;
1787 else
1789 edge latch = loop_latch_edge (loop);
1790 edge entry = loop_preheader_edge (loop);
1792 /* Root value is loaded from memory before loop, we also need
1793 to add PHI nodes to propagate the value across iterations. */
1794 init = force_gimple_operand (init, &stmts, true, NULL_TREE);
1795 if (stmts)
1796 gsi_insert_seq_on_edge_immediate (entry, stmts);
1798 next = chain->vars[n - i];
1799 phi_result = copy_ssa_name (next);
1800 gphi *phi = create_phi_node (phi_result, loop->header);
1801 add_phi_arg (phi, init, entry, UNKNOWN_LOCATION);
1802 add_phi_arg (phi, next, latch, UNKNOWN_LOCATION);
1803 vtemps[n - i - 1] = phi_result;
1807 /* Find the insertion position. */
1808 dref last = get_chain_root (chain);
1809 for (i = 0; i < chain->refs.length (); i++)
1811 if (chain->refs[i]->pos > last->pos)
1812 last = chain->refs[i];
1815 gimple_stmt_iterator gsi = gsi_for_stmt (last->stmt);
1817 /* Insert statements copying root value to root variable. */
1818 for (i = 0; i < n; i++)
1820 var = chain->vars[i];
1821 val = vtemps[i];
1822 stmt = gimple_build_assign (var, val);
1823 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1827 /* Generates stores for CHAIN's eliminated stores in LOOP's last
1828 (CHAIN->length - 1) iterations. */
1830 static void
1831 finalize_eliminated_stores (struct loop *loop, chain_p chain)
1833 unsigned i, n = chain->length;
1835 for (i = 0; i < n; i++)
1837 tree var = chain->vars[i];
1838 tree fini = chain->finis[n - i - 1];
1839 gimple *stmt = gimple_build_assign (fini, var);
1841 gimple_seq_add_stmt_without_update (&chain->fini_seq, stmt);
1844 if (chain->fini_seq)
1846 gsi_insert_seq_on_edge_immediate (single_exit (loop), chain->fini_seq);
1847 chain->fini_seq = NULL;
1851 /* Initializes a variable for load motion for ROOT and prepares phi nodes and
1852 initialization on entry to LOOP if necessary. The ssa name for the variable
1853 is stored in VARS. If WRITTEN is true, also a phi node to copy its value
1854 around the loop is created. Uid of the newly created temporary variable
1855 is marked in TMP_VARS. INITS is the list containing the (single)
1856 initializer. */
1858 static void
1859 initialize_root_vars_lm (struct loop *loop, dref root, bool written,
1860 vec<tree> *vars, vec<tree> inits,
1861 bitmap tmp_vars)
1863 unsigned i;
1864 tree ref = DR_REF (root->ref), init, var, next;
1865 gimple_seq stmts;
1866 gphi *phi;
1867 edge entry = loop_preheader_edge (loop), latch = loop_latch_edge (loop);
1869 /* Find the initializer for the variable, and check that it cannot
1870 trap. */
1871 init = inits[0];
1873 vars->create (written ? 2 : 1);
1874 var = predcom_tmp_var (ref, 0, tmp_vars);
1875 vars->quick_push (var);
1876 if (written)
1877 vars->quick_push ((*vars)[0]);
1879 FOR_EACH_VEC_ELT (*vars, i, var)
1880 (*vars)[i] = make_ssa_name (var);
1882 var = (*vars)[0];
1884 init = force_gimple_operand (init, &stmts, written, NULL_TREE);
1885 if (stmts)
1886 gsi_insert_seq_on_edge_immediate (entry, stmts);
1888 if (written)
1890 next = (*vars)[1];
1891 phi = create_phi_node (var, loop->header);
1892 add_phi_arg (phi, init, entry, UNKNOWN_LOCATION);
1893 add_phi_arg (phi, next, latch, UNKNOWN_LOCATION);
1895 else
1897 gassign *init_stmt = gimple_build_assign (var, init);
1898 gsi_insert_on_edge_immediate (entry, init_stmt);
1903 /* Execute load motion for references in chain CHAIN. Uids of the newly
1904 created temporary variables are marked in TMP_VARS. */
1906 static void
1907 execute_load_motion (struct loop *loop, chain_p chain, bitmap tmp_vars)
1909 auto_vec<tree> vars;
1910 dref a;
1911 unsigned n_writes = 0, ridx, i;
1912 tree var;
1914 gcc_assert (chain->type == CT_INVARIANT);
1915 gcc_assert (!chain->combined);
1916 FOR_EACH_VEC_ELT (chain->refs, i, a)
1917 if (DR_IS_WRITE (a->ref))
1918 n_writes++;
1920 /* If there are no reads in the loop, there is nothing to do. */
1921 if (n_writes == chain->refs.length ())
1922 return;
1924 initialize_root_vars_lm (loop, get_chain_root (chain), n_writes > 0,
1925 &vars, chain->inits, tmp_vars);
1927 ridx = 0;
1928 FOR_EACH_VEC_ELT (chain->refs, i, a)
1930 bool is_read = DR_IS_READ (a->ref);
1932 if (DR_IS_WRITE (a->ref))
1934 n_writes--;
1935 if (n_writes)
1937 var = vars[0];
1938 var = make_ssa_name (SSA_NAME_VAR (var));
1939 vars[0] = var;
1941 else
1942 ridx = 1;
1945 replace_ref_with (a->stmt, vars[ridx],
1946 !is_read, !is_read);
1950 /* Returns the single statement in that NAME is used, excepting
1951 the looparound phi nodes contained in one of the chains. If there is no
1952 such statement, or more statements, NULL is returned. */
1954 static gimple *
1955 single_nonlooparound_use (tree name)
1957 use_operand_p use;
1958 imm_use_iterator it;
1959 gimple *stmt, *ret = NULL;
1961 FOR_EACH_IMM_USE_FAST (use, it, name)
1963 stmt = USE_STMT (use);
1965 if (gimple_code (stmt) == GIMPLE_PHI)
1967 /* Ignore uses in looparound phi nodes. Uses in other phi nodes
1968 could not be processed anyway, so just fail for them. */
1969 if (bitmap_bit_p (looparound_phis,
1970 SSA_NAME_VERSION (PHI_RESULT (stmt))))
1971 continue;
1973 return NULL;
1975 else if (is_gimple_debug (stmt))
1976 continue;
1977 else if (ret != NULL)
1978 return NULL;
1979 else
1980 ret = stmt;
1983 return ret;
1986 /* Remove statement STMT, as well as the chain of assignments in that it is
1987 used. */
1989 static void
1990 remove_stmt (gimple *stmt)
1992 tree name;
1993 gimple *next;
1994 gimple_stmt_iterator psi;
1996 if (gimple_code (stmt) == GIMPLE_PHI)
1998 name = PHI_RESULT (stmt);
1999 next = single_nonlooparound_use (name);
2000 reset_debug_uses (stmt);
2001 psi = gsi_for_stmt (stmt);
2002 remove_phi_node (&psi, true);
2004 if (!next
2005 || !gimple_assign_ssa_name_copy_p (next)
2006 || gimple_assign_rhs1 (next) != name)
2007 return;
2009 stmt = next;
2012 while (1)
2014 gimple_stmt_iterator bsi;
2016 bsi = gsi_for_stmt (stmt);
2018 name = gimple_assign_lhs (stmt);
2019 if (TREE_CODE (name) == SSA_NAME)
2021 next = single_nonlooparound_use (name);
2022 reset_debug_uses (stmt);
2024 else
2026 /* This is a store to be eliminated. */
2027 gcc_assert (gimple_vdef (stmt) != NULL);
2028 next = NULL;
2031 unlink_stmt_vdef (stmt);
2032 gsi_remove (&bsi, true);
2033 release_defs (stmt);
2035 if (!next
2036 || !gimple_assign_ssa_name_copy_p (next)
2037 || gimple_assign_rhs1 (next) != name)
2038 return;
2040 stmt = next;
2044 /* Perform the predictive commoning optimization for a chain CHAIN.
2045 Uids of the newly created temporary variables are marked in TMP_VARS.*/
2047 static void
2048 execute_pred_commoning_chain (struct loop *loop, chain_p chain,
2049 bitmap tmp_vars)
2051 unsigned i, n;
2052 dref a;
2053 tree var;
2054 bool in_lhs;
2056 if (chain->combined)
2058 /* For combined chains, just remove the statements that are used to
2059 compute the values of the expression (except for the root one).
2060 We delay this until after all chains are processed. */
2062 else if (chain->type == CT_STORE_STORE)
2064 if (chain->length > 0)
2066 if (chain->inv_store_elimination)
2068 /* If dead stores in this chain only store loop invariant
2069 values, we can simply record the invariant value and use
2070 it directly after loop. */
2071 initialize_root_vars_store_elim_1 (chain);
2073 else
2075 /* If dead stores in this chain store loop variant values,
2076 we need to set up the variables by loading from memory
2077 before loop and propagating it with PHI nodes. */
2078 initialize_root_vars_store_elim_2 (loop, chain, tmp_vars);
2081 /* For inter-iteration store elimination chain, stores at each
2082 distance in loop's last (chain->length - 1) iterations can't
2083 be eliminated, because there is no following killing store.
2084 We need to generate these stores after loop. */
2085 finalize_eliminated_stores (loop, chain);
2088 /* Eliminate the stores killed by following store. */
2089 n = chain->refs.length ();
2090 for (i = 0; i < n - 1; i++)
2091 remove_stmt (chain->refs[i]->stmt);
2093 else
2095 /* For non-combined chains, set up the variables that hold its value. */
2096 initialize_root_vars (loop, chain, tmp_vars);
2097 a = get_chain_root (chain);
2098 in_lhs = (chain->type == CT_STORE_LOAD
2099 || chain->type == CT_COMBINATION);
2100 replace_ref_with (a->stmt, chain->vars[chain->length], true, in_lhs);
2102 /* Replace the uses of the original references by these variables. */
2103 for (i = 1; chain->refs.iterate (i, &a); i++)
2105 var = chain->vars[chain->length - a->distance];
2106 replace_ref_with (a->stmt, var, false, false);
2111 /* Determines the unroll factor necessary to remove as many temporary variable
2112 copies as possible. CHAINS is the list of chains that will be
2113 optimized. */
2115 static unsigned
2116 determine_unroll_factor (vec<chain_p> chains)
2118 chain_p chain;
2119 unsigned factor = 1, af, nfactor, i;
2120 unsigned max = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
2122 FOR_EACH_VEC_ELT (chains, i, chain)
2124 if (chain->type == CT_INVARIANT)
2125 continue;
2126 /* For now we can't handle unrolling when eliminating stores. */
2127 else if (chain->type == CT_STORE_STORE)
2128 return 1;
2130 if (chain->combined)
2132 /* For combined chains, we can't handle unrolling if we replace
2133 looparound PHIs. */
2134 dref a;
2135 unsigned j;
2136 for (j = 1; chain->refs.iterate (j, &a); j++)
2137 if (gimple_code (a->stmt) == GIMPLE_PHI)
2138 return 1;
2139 continue;
2142 /* The best unroll factor for this chain is equal to the number of
2143 temporary variables that we create for it. */
2144 af = chain->length;
2145 if (chain->has_max_use_after)
2146 af++;
2148 nfactor = factor * af / gcd (factor, af);
2149 if (nfactor <= max)
2150 factor = nfactor;
2153 return factor;
2156 /* Perform the predictive commoning optimization for CHAINS.
2157 Uids of the newly created temporary variables are marked in TMP_VARS. */
2159 static void
2160 execute_pred_commoning (struct loop *loop, vec<chain_p> chains,
2161 bitmap tmp_vars)
2163 chain_p chain;
2164 unsigned i;
2166 FOR_EACH_VEC_ELT (chains, i, chain)
2168 if (chain->type == CT_INVARIANT)
2169 execute_load_motion (loop, chain, tmp_vars);
2170 else
2171 execute_pred_commoning_chain (loop, chain, tmp_vars);
2174 FOR_EACH_VEC_ELT (chains, i, chain)
2176 if (chain->type == CT_INVARIANT)
2178 else if (chain->combined)
2180 /* For combined chains, just remove the statements that are used to
2181 compute the values of the expression (except for the root one). */
2182 dref a;
2183 unsigned j;
2184 for (j = 1; chain->refs.iterate (j, &a); j++)
2185 remove_stmt (a->stmt);
2189 update_ssa (TODO_update_ssa_only_virtuals);
2192 /* For each reference in CHAINS, if its defining statement is
2193 phi node, record the ssa name that is defined by it. */
2195 static void
2196 replace_phis_by_defined_names (vec<chain_p> chains)
2198 chain_p chain;
2199 dref a;
2200 unsigned i, j;
2202 FOR_EACH_VEC_ELT (chains, i, chain)
2203 FOR_EACH_VEC_ELT (chain->refs, j, a)
2205 if (gimple_code (a->stmt) == GIMPLE_PHI)
2207 a->name_defined_by_phi = PHI_RESULT (a->stmt);
2208 a->stmt = NULL;
2213 /* For each reference in CHAINS, if name_defined_by_phi is not
2214 NULL, use it to set the stmt field. */
2216 static void
2217 replace_names_by_phis (vec<chain_p> chains)
2219 chain_p chain;
2220 dref a;
2221 unsigned i, j;
2223 FOR_EACH_VEC_ELT (chains, i, chain)
2224 FOR_EACH_VEC_ELT (chain->refs, j, a)
2225 if (a->stmt == NULL)
2227 a->stmt = SSA_NAME_DEF_STMT (a->name_defined_by_phi);
2228 gcc_assert (gimple_code (a->stmt) == GIMPLE_PHI);
2229 a->name_defined_by_phi = NULL_TREE;
2233 /* Wrapper over execute_pred_commoning, to pass it as a callback
2234 to tree_transform_and_unroll_loop. */
2236 struct epcc_data
2238 vec<chain_p> chains;
2239 bitmap tmp_vars;
2242 static void
2243 execute_pred_commoning_cbck (struct loop *loop, void *data)
2245 struct epcc_data *const dta = (struct epcc_data *) data;
2247 /* Restore phi nodes that were replaced by ssa names before
2248 tree_transform_and_unroll_loop (see detailed description in
2249 tree_predictive_commoning_loop). */
2250 replace_names_by_phis (dta->chains);
2251 execute_pred_commoning (loop, dta->chains, dta->tmp_vars);
2254 /* Base NAME and all the names in the chain of phi nodes that use it
2255 on variable VAR. The phi nodes are recognized by being in the copies of
2256 the header of the LOOP. */
2258 static void
2259 base_names_in_chain_on (struct loop *loop, tree name, tree var)
2261 gimple *stmt, *phi;
2262 imm_use_iterator iter;
2264 replace_ssa_name_symbol (name, var);
2266 while (1)
2268 phi = NULL;
2269 FOR_EACH_IMM_USE_STMT (stmt, iter, name)
2271 if (gimple_code (stmt) == GIMPLE_PHI
2272 && flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
2274 phi = stmt;
2275 BREAK_FROM_IMM_USE_STMT (iter);
2278 if (!phi)
2279 return;
2281 name = PHI_RESULT (phi);
2282 replace_ssa_name_symbol (name, var);
2286 /* Given an unrolled LOOP after predictive commoning, remove the
2287 register copies arising from phi nodes by changing the base
2288 variables of SSA names. TMP_VARS is the set of the temporary variables
2289 for those we want to perform this. */
2291 static void
2292 eliminate_temp_copies (struct loop *loop, bitmap tmp_vars)
2294 edge e;
2295 gphi *phi;
2296 gimple *stmt;
2297 tree name, use, var;
2298 gphi_iterator psi;
2300 e = loop_latch_edge (loop);
2301 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
2303 phi = psi.phi ();
2304 name = PHI_RESULT (phi);
2305 var = SSA_NAME_VAR (name);
2306 if (!var || !bitmap_bit_p (tmp_vars, DECL_UID (var)))
2307 continue;
2308 use = PHI_ARG_DEF_FROM_EDGE (phi, e);
2309 gcc_assert (TREE_CODE (use) == SSA_NAME);
2311 /* Base all the ssa names in the ud and du chain of NAME on VAR. */
2312 stmt = SSA_NAME_DEF_STMT (use);
2313 while (gimple_code (stmt) == GIMPLE_PHI
2314 /* In case we could not unroll the loop enough to eliminate
2315 all copies, we may reach the loop header before the defining
2316 statement (in that case, some register copies will be present
2317 in loop latch in the final code, corresponding to the newly
2318 created looparound phi nodes). */
2319 && gimple_bb (stmt) != loop->header)
2321 gcc_assert (single_pred_p (gimple_bb (stmt)));
2322 use = PHI_ARG_DEF (stmt, 0);
2323 stmt = SSA_NAME_DEF_STMT (use);
2326 base_names_in_chain_on (loop, use, var);
2330 /* Returns true if CHAIN is suitable to be combined. */
2332 static bool
2333 chain_can_be_combined_p (chain_p chain)
2335 return (!chain->combined
2336 && (chain->type == CT_LOAD || chain->type == CT_COMBINATION));
2339 /* Returns the modify statement that uses NAME. Skips over assignment
2340 statements, NAME is replaced with the actual name used in the returned
2341 statement. */
2343 static gimple *
2344 find_use_stmt (tree *name)
2346 gimple *stmt;
2347 tree rhs, lhs;
2349 /* Skip over assignments. */
2350 while (1)
2352 stmt = single_nonlooparound_use (*name);
2353 if (!stmt)
2354 return NULL;
2356 if (gimple_code (stmt) != GIMPLE_ASSIGN)
2357 return NULL;
2359 lhs = gimple_assign_lhs (stmt);
2360 if (TREE_CODE (lhs) != SSA_NAME)
2361 return NULL;
2363 if (gimple_assign_copy_p (stmt))
2365 rhs = gimple_assign_rhs1 (stmt);
2366 if (rhs != *name)
2367 return NULL;
2369 *name = lhs;
2371 else if (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))
2372 == GIMPLE_BINARY_RHS)
2373 return stmt;
2374 else
2375 return NULL;
2379 /* Returns true if we may perform reassociation for operation CODE in TYPE. */
2381 static bool
2382 may_reassociate_p (tree type, enum tree_code code)
2384 if (FLOAT_TYPE_P (type)
2385 && !flag_unsafe_math_optimizations)
2386 return false;
2388 return (commutative_tree_code (code)
2389 && associative_tree_code (code));
2392 /* If the operation used in STMT is associative and commutative, go through the
2393 tree of the same operations and returns its root. Distance to the root
2394 is stored in DISTANCE. */
2396 static gimple *
2397 find_associative_operation_root (gimple *stmt, unsigned *distance)
2399 tree lhs;
2400 gimple *next;
2401 enum tree_code code = gimple_assign_rhs_code (stmt);
2402 tree type = TREE_TYPE (gimple_assign_lhs (stmt));
2403 unsigned dist = 0;
2405 if (!may_reassociate_p (type, code))
2406 return NULL;
2408 while (1)
2410 lhs = gimple_assign_lhs (stmt);
2411 gcc_assert (TREE_CODE (lhs) == SSA_NAME);
2413 next = find_use_stmt (&lhs);
2414 if (!next
2415 || gimple_assign_rhs_code (next) != code)
2416 break;
2418 stmt = next;
2419 dist++;
2422 if (distance)
2423 *distance = dist;
2424 return stmt;
2427 /* Returns the common statement in that NAME1 and NAME2 have a use. If there
2428 is no such statement, returns NULL_TREE. In case the operation used on
2429 NAME1 and NAME2 is associative and commutative, returns the root of the
2430 tree formed by this operation instead of the statement that uses NAME1 or
2431 NAME2. */
2433 static gimple *
2434 find_common_use_stmt (tree *name1, tree *name2)
2436 gimple *stmt1, *stmt2;
2438 stmt1 = find_use_stmt (name1);
2439 if (!stmt1)
2440 return NULL;
2442 stmt2 = find_use_stmt (name2);
2443 if (!stmt2)
2444 return NULL;
2446 if (stmt1 == stmt2)
2447 return stmt1;
2449 stmt1 = find_associative_operation_root (stmt1, NULL);
2450 if (!stmt1)
2451 return NULL;
2452 stmt2 = find_associative_operation_root (stmt2, NULL);
2453 if (!stmt2)
2454 return NULL;
2456 return (stmt1 == stmt2 ? stmt1 : NULL);
2459 /* Checks whether R1 and R2 are combined together using CODE, with the result
2460 in RSLT_TYPE, in order R1 CODE R2 if SWAP is false and in order R2 CODE R1
2461 if it is true. If CODE is ERROR_MARK, set these values instead. */
2463 static bool
2464 combinable_refs_p (dref r1, dref r2,
2465 enum tree_code *code, bool *swap, tree *rslt_type)
2467 enum tree_code acode;
2468 bool aswap;
2469 tree atype;
2470 tree name1, name2;
2471 gimple *stmt;
2473 name1 = name_for_ref (r1);
2474 name2 = name_for_ref (r2);
2475 gcc_assert (name1 != NULL_TREE && name2 != NULL_TREE);
2477 stmt = find_common_use_stmt (&name1, &name2);
2479 if (!stmt
2480 /* A simple post-dominance check - make sure the combination
2481 is executed under the same condition as the references. */
2482 || (gimple_bb (stmt) != gimple_bb (r1->stmt)
2483 && gimple_bb (stmt) != gimple_bb (r2->stmt)))
2484 return false;
2486 acode = gimple_assign_rhs_code (stmt);
2487 aswap = (!commutative_tree_code (acode)
2488 && gimple_assign_rhs1 (stmt) != name1);
2489 atype = TREE_TYPE (gimple_assign_lhs (stmt));
2491 if (*code == ERROR_MARK)
2493 *code = acode;
2494 *swap = aswap;
2495 *rslt_type = atype;
2496 return true;
2499 return (*code == acode
2500 && *swap == aswap
2501 && *rslt_type == atype);
2504 /* Remove OP from the operation on rhs of STMT, and replace STMT with
2505 an assignment of the remaining operand. */
2507 static void
2508 remove_name_from_operation (gimple *stmt, tree op)
2510 tree other_op;
2511 gimple_stmt_iterator si;
2513 gcc_assert (is_gimple_assign (stmt));
2515 if (gimple_assign_rhs1 (stmt) == op)
2516 other_op = gimple_assign_rhs2 (stmt);
2517 else
2518 other_op = gimple_assign_rhs1 (stmt);
2520 si = gsi_for_stmt (stmt);
2521 gimple_assign_set_rhs_from_tree (&si, other_op);
2523 /* We should not have reallocated STMT. */
2524 gcc_assert (gsi_stmt (si) == stmt);
2526 update_stmt (stmt);
2529 /* Reassociates the expression in that NAME1 and NAME2 are used so that they
2530 are combined in a single statement, and returns this statement. Note the
2531 statement is inserted before INSERT_BEFORE if it's not NULL. */
2533 static gimple *
2534 reassociate_to_the_same_stmt (tree name1, tree name2, gimple *insert_before)
2536 gimple *stmt1, *stmt2, *root1, *root2, *s1, *s2;
2537 gassign *new_stmt, *tmp_stmt;
2538 tree new_name, tmp_name, var, r1, r2;
2539 unsigned dist1, dist2;
2540 enum tree_code code;
2541 tree type = TREE_TYPE (name1);
2542 gimple_stmt_iterator bsi;
2544 stmt1 = find_use_stmt (&name1);
2545 stmt2 = find_use_stmt (&name2);
2546 root1 = find_associative_operation_root (stmt1, &dist1);
2547 root2 = find_associative_operation_root (stmt2, &dist2);
2548 code = gimple_assign_rhs_code (stmt1);
2550 gcc_assert (root1 && root2 && root1 == root2
2551 && code == gimple_assign_rhs_code (stmt2));
2553 /* Find the root of the nearest expression in that both NAME1 and NAME2
2554 are used. */
2555 r1 = name1;
2556 s1 = stmt1;
2557 r2 = name2;
2558 s2 = stmt2;
2560 while (dist1 > dist2)
2562 s1 = find_use_stmt (&r1);
2563 r1 = gimple_assign_lhs (s1);
2564 dist1--;
2566 while (dist2 > dist1)
2568 s2 = find_use_stmt (&r2);
2569 r2 = gimple_assign_lhs (s2);
2570 dist2--;
2573 while (s1 != s2)
2575 s1 = find_use_stmt (&r1);
2576 r1 = gimple_assign_lhs (s1);
2577 s2 = find_use_stmt (&r2);
2578 r2 = gimple_assign_lhs (s2);
2581 /* Remove NAME1 and NAME2 from the statements in that they are used
2582 currently. */
2583 remove_name_from_operation (stmt1, name1);
2584 remove_name_from_operation (stmt2, name2);
2586 /* Insert the new statement combining NAME1 and NAME2 before S1, and
2587 combine it with the rhs of S1. */
2588 var = create_tmp_reg (type, "predreastmp");
2589 new_name = make_ssa_name (var);
2590 new_stmt = gimple_build_assign (new_name, code, name1, name2);
2591 if (insert_before && stmt_dominates_stmt_p (insert_before, s1))
2592 bsi = gsi_for_stmt (insert_before);
2593 else
2594 bsi = gsi_for_stmt (s1);
2596 gsi_insert_before (&bsi, new_stmt, GSI_SAME_STMT);
2598 var = create_tmp_reg (type, "predreastmp");
2599 tmp_name = make_ssa_name (var);
2601 /* Rhs of S1 may now be either a binary expression with operation
2602 CODE, or gimple_val (in case that stmt1 == s1 or stmt2 == s1,
2603 so that name1 or name2 was removed from it). */
2604 tmp_stmt = gimple_build_assign (tmp_name, gimple_assign_rhs_code (s1),
2605 gimple_assign_rhs1 (s1),
2606 gimple_assign_rhs2 (s1));
2608 bsi = gsi_for_stmt (s1);
2609 gimple_assign_set_rhs_with_ops (&bsi, code, new_name, tmp_name);
2610 s1 = gsi_stmt (bsi);
2611 update_stmt (s1);
2613 gsi_insert_before (&bsi, tmp_stmt, GSI_SAME_STMT);
2615 return new_stmt;
2618 /* Returns the statement that combines references R1 and R2. In case R1
2619 and R2 are not used in the same statement, but they are used with an
2620 associative and commutative operation in the same expression, reassociate
2621 the expression so that they are used in the same statement. The combined
2622 statement is inserted before INSERT_BEFORE if it's not NULL. */
2624 static gimple *
2625 stmt_combining_refs (dref r1, dref r2, gimple *insert_before)
2627 gimple *stmt1, *stmt2;
2628 tree name1 = name_for_ref (r1);
2629 tree name2 = name_for_ref (r2);
2631 stmt1 = find_use_stmt (&name1);
2632 stmt2 = find_use_stmt (&name2);
2633 if (stmt1 == stmt2)
2634 return stmt1;
2636 return reassociate_to_the_same_stmt (name1, name2, insert_before);
2639 /* Tries to combine chains CH1 and CH2 together. If this succeeds, the
2640 description of the new chain is returned, otherwise we return NULL. */
2642 static chain_p
2643 combine_chains (chain_p ch1, chain_p ch2)
2645 dref r1, r2, nw;
2646 enum tree_code op = ERROR_MARK;
2647 bool swap = false;
2648 chain_p new_chain;
2649 int i, j, num;
2650 gimple *root_stmt;
2651 tree rslt_type = NULL_TREE;
2653 if (ch1 == ch2)
2654 return NULL;
2655 if (ch1->length != ch2->length)
2656 return NULL;
2658 if (ch1->refs.length () != ch2->refs.length ())
2659 return NULL;
2661 for (i = 0; (ch1->refs.iterate (i, &r1)
2662 && ch2->refs.iterate (i, &r2)); i++)
2664 if (r1->distance != r2->distance)
2665 return NULL;
2667 if (!combinable_refs_p (r1, r2, &op, &swap, &rslt_type))
2668 return NULL;
2671 ch1->combined = true;
2672 ch2->combined = true;
2674 if (swap)
2675 std::swap (ch1, ch2);
2677 new_chain = XCNEW (struct chain);
2678 new_chain->type = CT_COMBINATION;
2679 new_chain->op = op;
2680 new_chain->ch1 = ch1;
2681 new_chain->ch2 = ch2;
2682 new_chain->rslt_type = rslt_type;
2683 new_chain->length = ch1->length;
2685 gimple *insert = NULL;
2686 num = ch1->refs.length ();
2687 i = (new_chain->length == 0) ? num - 1 : 0;
2688 j = (new_chain->length == 0) ? -1 : 1;
2689 /* For ZERO length chain, process refs in reverse order so that dominant
2690 position is ready when it comes to the root ref.
2691 For non-ZERO length chain, process refs in order. See PR79663. */
2692 for (; num > 0; num--, i += j)
2694 r1 = ch1->refs[i];
2695 r2 = ch2->refs[i];
2696 nw = XCNEW (struct dref_d);
2697 nw->distance = r1->distance;
2699 /* For ZERO length chain, insert combined stmt of root ref at dominant
2700 position. */
2701 nw->stmt = stmt_combining_refs (r1, r2, i == 0 ? insert : NULL);
2702 /* For ZERO length chain, record dominant position where combined stmt
2703 of root ref should be inserted. In this case, though all root refs
2704 dominate following ones, it's possible that combined stmt doesn't.
2705 See PR70754. */
2706 if (new_chain->length == 0
2707 && (insert == NULL || stmt_dominates_stmt_p (nw->stmt, insert)))
2708 insert = nw->stmt;
2710 new_chain->refs.safe_push (nw);
2712 if (new_chain->length == 0)
2714 /* Restore the order for ZERO length chain's refs. */
2715 num = new_chain->refs.length () >> 1;
2716 for (i = 0, j = new_chain->refs.length () - 1; i < num; i++, j--)
2717 std::swap (new_chain->refs[i], new_chain->refs[j]);
2719 /* For ZERO length chain, has_max_use_after must be true since root
2720 combined stmt must dominates others. */
2721 new_chain->has_max_use_after = true;
2722 return new_chain;
2725 new_chain->has_max_use_after = false;
2726 root_stmt = get_chain_root (new_chain)->stmt;
2727 for (i = 1; new_chain->refs.iterate (i, &nw); i++)
2729 if (nw->distance == new_chain->length
2730 && !stmt_dominates_stmt_p (nw->stmt, root_stmt))
2732 new_chain->has_max_use_after = true;
2733 break;
2737 return new_chain;
2740 /* Try to combine the CHAINS. */
2742 static void
2743 try_combine_chains (vec<chain_p> *chains)
2745 unsigned i, j;
2746 chain_p ch1, ch2, cch;
2747 auto_vec<chain_p> worklist;
2749 FOR_EACH_VEC_ELT (*chains, i, ch1)
2750 if (chain_can_be_combined_p (ch1))
2751 worklist.safe_push (ch1);
2753 while (!worklist.is_empty ())
2755 ch1 = worklist.pop ();
2756 if (!chain_can_be_combined_p (ch1))
2757 continue;
2759 FOR_EACH_VEC_ELT (*chains, j, ch2)
2761 if (!chain_can_be_combined_p (ch2))
2762 continue;
2764 cch = combine_chains (ch1, ch2);
2765 if (cch)
2767 worklist.safe_push (cch);
2768 chains->safe_push (cch);
2769 break;
2775 /* Prepare initializers for store elimination CHAIN in LOOP. Returns false
2776 if this is impossible because one of these initializers may trap, true
2777 otherwise. */
2779 static bool
2780 prepare_initializers_chain_store_elim (struct loop *loop, chain_p chain)
2782 unsigned i, n = chain->length;
2784 /* For now we can't eliminate stores if some of them are conditional
2785 executed. */
2786 if (!chain->all_always_accessed)
2787 return false;
2789 /* Nothing to intialize for intra-iteration store elimination. */
2790 if (n == 0 && chain->type == CT_STORE_STORE)
2791 return true;
2793 /* For store elimination chain, there is nothing to initialize if stores
2794 to be eliminated only store loop invariant values into memory. */
2795 if (chain->type == CT_STORE_STORE
2796 && is_inv_store_elimination_chain (loop, chain))
2798 chain->inv_store_elimination = true;
2799 return true;
2802 chain->inits.create (n);
2803 chain->inits.safe_grow_cleared (n);
2805 /* For store eliminatin chain like below:
2807 for (i = 0; i < len; i++)
2809 a[i] = 1;
2810 // a[i + 1] = ...
2811 a[i + 2] = 3;
2814 store to a[i + 1] is missed in loop body, it acts like bubbles. The
2815 content of a[i + 1] remain the same if the loop iterates fewer times
2816 than chain->length. We need to set up root variables for such stores
2817 by loading from memory before loop. Note we only need to load bubble
2818 elements because loop body is guaranteed to be executed at least once
2819 after loop's preheader edge. */
2820 auto_vec<bool> bubbles;
2821 bubbles.safe_grow_cleared (n + 1);
2822 for (i = 0; i < chain->refs.length (); i++)
2823 bubbles[chain->refs[i]->distance] = true;
2825 struct data_reference *dr = get_chain_root (chain)->ref;
2826 for (i = 0; i < n; i++)
2828 if (bubbles[i])
2829 continue;
2831 gimple_seq stmts = NULL;
2833 tree init = ref_at_iteration (dr, (int) 0 - i, &stmts);
2834 if (stmts)
2835 gimple_seq_add_seq_without_update (&chain->init_seq, stmts);
2837 chain->inits[i] = init;
2840 return true;
2843 /* Prepare initializers for CHAIN in LOOP. Returns false if this is
2844 impossible because one of these initializers may trap, true otherwise. */
2846 static bool
2847 prepare_initializers_chain (struct loop *loop, chain_p chain)
2849 unsigned i, n = (chain->type == CT_INVARIANT) ? 1 : chain->length;
2850 struct data_reference *dr = get_chain_root (chain)->ref;
2851 tree init;
2852 dref laref;
2853 edge entry = loop_preheader_edge (loop);
2855 if (chain->type == CT_STORE_STORE)
2856 return prepare_initializers_chain_store_elim (loop, chain);
2858 /* Find the initializers for the variables, and check that they cannot
2859 trap. */
2860 chain->inits.create (n);
2861 for (i = 0; i < n; i++)
2862 chain->inits.quick_push (NULL_TREE);
2864 /* If we have replaced some looparound phi nodes, use their initializers
2865 instead of creating our own. */
2866 FOR_EACH_VEC_ELT (chain->refs, i, laref)
2868 if (gimple_code (laref->stmt) != GIMPLE_PHI)
2869 continue;
2871 gcc_assert (laref->distance > 0);
2872 chain->inits[n - laref->distance]
2873 = PHI_ARG_DEF_FROM_EDGE (laref->stmt, entry);
2876 for (i = 0; i < n; i++)
2878 gimple_seq stmts = NULL;
2880 if (chain->inits[i] != NULL_TREE)
2881 continue;
2883 init = ref_at_iteration (dr, (int) i - n, &stmts);
2884 if (!chain->all_always_accessed && tree_could_trap_p (init))
2886 gimple_seq_discard (stmts);
2887 return false;
2890 if (stmts)
2891 gimple_seq_add_seq_without_update (&chain->init_seq, stmts);
2893 chain->inits[i] = init;
2896 return true;
2899 /* Prepare initializers for CHAINS in LOOP, and free chains that cannot
2900 be used because the initializers might trap. */
2902 static void
2903 prepare_initializers (struct loop *loop, vec<chain_p> chains)
2905 chain_p chain;
2906 unsigned i;
2908 for (i = 0; i < chains.length (); )
2910 chain = chains[i];
2911 if (prepare_initializers_chain (loop, chain))
2912 i++;
2913 else
2915 release_chain (chain);
2916 chains.unordered_remove (i);
2921 /* Generates finalizer memory references for CHAIN in LOOP. Returns true
2922 if finalizer code for CHAIN can be generated, otherwise false. */
2924 static bool
2925 prepare_finalizers_chain (struct loop *loop, chain_p chain)
2927 unsigned i, n = chain->length;
2928 struct data_reference *dr = get_chain_root (chain)->ref;
2929 tree fini, niters = number_of_latch_executions (loop);
2931 /* For now we can't eliminate stores if some of them are conditional
2932 executed. */
2933 if (!chain->all_always_accessed)
2934 return false;
2936 chain->finis.create (n);
2937 for (i = 0; i < n; i++)
2938 chain->finis.quick_push (NULL_TREE);
2940 /* We never use looparound phi node for store elimination chains. */
2942 /* Find the finalizers for the variables, and check that they cannot
2943 trap. */
2944 for (i = 0; i < n; i++)
2946 gimple_seq stmts = NULL;
2947 gcc_assert (chain->finis[i] == NULL_TREE);
2949 if (TREE_CODE (niters) != INTEGER_CST && TREE_CODE (niters) != SSA_NAME)
2951 niters = unshare_expr (niters);
2952 niters = force_gimple_operand (niters, &stmts, true, NULL);
2953 if (stmts)
2955 gimple_seq_add_seq_without_update (&chain->fini_seq, stmts);
2956 stmts = NULL;
2959 fini = ref_at_iteration (dr, (int) 0 - i, &stmts, niters);
2960 if (stmts)
2961 gimple_seq_add_seq_without_update (&chain->fini_seq, stmts);
2963 chain->finis[i] = fini;
2966 return true;
2969 /* Generates finalizer memory reference for CHAINS in LOOP. Returns true
2970 if finalizer code generation for CHAINS breaks loop closed ssa form. */
2972 static bool
2973 prepare_finalizers (struct loop *loop, vec<chain_p> chains)
2975 chain_p chain;
2976 unsigned i;
2977 bool loop_closed_ssa = false;
2979 for (i = 0; i < chains.length ();)
2981 chain = chains[i];
2983 /* Finalizer is only necessary for inter-iteration store elimination
2984 chains. */
2985 if (chain->length == 0 || chain->type != CT_STORE_STORE)
2987 i++;
2988 continue;
2991 if (prepare_finalizers_chain (loop, chain))
2993 i++;
2994 /* Be conservative, assume loop closed ssa form is corrupted
2995 by store-store chain. Though it's not always the case if
2996 eliminated stores only store loop invariant values into
2997 memory. */
2998 loop_closed_ssa = true;
3000 else
3002 release_chain (chain);
3003 chains.unordered_remove (i);
3006 return loop_closed_ssa;
3009 /* Insert all initializing gimple stmts into loop's entry edge. */
3011 static void
3012 insert_init_seqs (struct loop *loop, vec<chain_p> chains)
3014 unsigned i;
3015 edge entry = loop_preheader_edge (loop);
3017 for (i = 0; i < chains.length (); ++i)
3018 if (chains[i]->init_seq)
3020 gsi_insert_seq_on_edge_immediate (entry, chains[i]->init_seq);
3021 chains[i]->init_seq = NULL;
3025 /* Performs predictive commoning for LOOP. Sets bit 1<<0 of return value
3026 if LOOP was unrolled; Sets bit 1<<1 of return value if loop closed ssa
3027 form was corrupted. */
3029 static unsigned
3030 tree_predictive_commoning_loop (struct loop *loop)
3032 vec<data_reference_p> datarefs;
3033 vec<ddr_p> dependences;
3034 struct component *components;
3035 vec<chain_p> chains = vNULL;
3036 unsigned unroll_factor;
3037 struct tree_niter_desc desc;
3038 bool unroll = false, loop_closed_ssa = false;
3039 edge exit;
3041 if (dump_file && (dump_flags & TDF_DETAILS))
3042 fprintf (dump_file, "Processing loop %d\n", loop->num);
3044 /* Nothing for predicitive commoning if loop only iterates 1 time. */
3045 if (get_max_loop_iterations_int (loop) == 0)
3047 if (dump_file && (dump_flags & TDF_DETAILS))
3048 fprintf (dump_file, "Loop iterates only 1 time, nothing to do.\n");
3050 return 0;
3053 /* Find the data references and split them into components according to their
3054 dependence relations. */
3055 auto_vec<loop_p, 3> loop_nest;
3056 dependences.create (10);
3057 datarefs.create (10);
3058 if (! compute_data_dependences_for_loop (loop, true, &loop_nest, &datarefs,
3059 &dependences))
3061 if (dump_file && (dump_flags & TDF_DETAILS))
3062 fprintf (dump_file, "Cannot analyze data dependencies\n");
3063 free_data_refs (datarefs);
3064 free_dependence_relations (dependences);
3065 return 0;
3068 if (dump_file && (dump_flags & TDF_DETAILS))
3069 dump_data_dependence_relations (dump_file, dependences);
3071 components = split_data_refs_to_components (loop, datarefs, dependences);
3072 loop_nest.release ();
3073 free_dependence_relations (dependences);
3074 if (!components)
3076 free_data_refs (datarefs);
3077 free_affine_expand_cache (&name_expansions);
3078 return 0;
3081 if (dump_file && (dump_flags & TDF_DETAILS))
3083 fprintf (dump_file, "Initial state:\n\n");
3084 dump_components (dump_file, components);
3087 /* Find the suitable components and split them into chains. */
3088 components = filter_suitable_components (loop, components);
3090 auto_bitmap tmp_vars;
3091 looparound_phis = BITMAP_ALLOC (NULL);
3092 determine_roots (loop, components, &chains);
3093 release_components (components);
3095 if (!chains.exists ())
3097 if (dump_file && (dump_flags & TDF_DETAILS))
3098 fprintf (dump_file,
3099 "Predictive commoning failed: no suitable chains\n");
3100 goto end;
3102 prepare_initializers (loop, chains);
3103 loop_closed_ssa = prepare_finalizers (loop, chains);
3105 /* Try to combine the chains that are always worked with together. */
3106 try_combine_chains (&chains);
3108 insert_init_seqs (loop, chains);
3110 if (dump_file && (dump_flags & TDF_DETAILS))
3112 fprintf (dump_file, "Before commoning:\n\n");
3113 dump_chains (dump_file, chains);
3116 /* Determine the unroll factor, and if the loop should be unrolled, ensure
3117 that its number of iterations is divisible by the factor. */
3118 unroll_factor = determine_unroll_factor (chains);
3119 scev_reset ();
3120 unroll = (unroll_factor > 1
3121 && can_unroll_loop_p (loop, unroll_factor, &desc));
3122 exit = single_dom_exit (loop);
3124 /* Execute the predictive commoning transformations, and possibly unroll the
3125 loop. */
3126 if (unroll)
3128 struct epcc_data dta;
3130 if (dump_file && (dump_flags & TDF_DETAILS))
3131 fprintf (dump_file, "Unrolling %u times.\n", unroll_factor);
3133 dta.chains = chains;
3134 dta.tmp_vars = tmp_vars;
3136 update_ssa (TODO_update_ssa_only_virtuals);
3138 /* Cfg manipulations performed in tree_transform_and_unroll_loop before
3139 execute_pred_commoning_cbck is called may cause phi nodes to be
3140 reallocated, which is a problem since CHAINS may point to these
3141 statements. To fix this, we store the ssa names defined by the
3142 phi nodes here instead of the phi nodes themselves, and restore
3143 the phi nodes in execute_pred_commoning_cbck. A bit hacky. */
3144 replace_phis_by_defined_names (chains);
3146 tree_transform_and_unroll_loop (loop, unroll_factor, exit, &desc,
3147 execute_pred_commoning_cbck, &dta);
3148 eliminate_temp_copies (loop, tmp_vars);
3150 else
3152 if (dump_file && (dump_flags & TDF_DETAILS))
3153 fprintf (dump_file,
3154 "Executing predictive commoning without unrolling.\n");
3155 execute_pred_commoning (loop, chains, tmp_vars);
3158 end: ;
3159 release_chains (chains);
3160 free_data_refs (datarefs);
3161 BITMAP_FREE (looparound_phis);
3163 free_affine_expand_cache (&name_expansions);
3165 return (unroll ? 1 : 0) | (loop_closed_ssa ? 2 : 0);
3168 /* Runs predictive commoning. */
3170 unsigned
3171 tree_predictive_commoning (void)
3173 struct loop *loop;
3174 unsigned ret = 0, changed = 0;
3176 initialize_original_copy_tables ();
3177 FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
3178 if (optimize_loop_for_speed_p (loop))
3180 changed |= tree_predictive_commoning_loop (loop);
3182 free_original_copy_tables ();
3184 if (changed > 0)
3186 scev_reset ();
3188 if (changed > 1)
3189 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
3191 ret = TODO_cleanup_cfg;
3194 return ret;
3197 /* Predictive commoning Pass. */
3199 static unsigned
3200 run_tree_predictive_commoning (struct function *fun)
3202 if (number_of_loops (fun) <= 1)
3203 return 0;
3205 return tree_predictive_commoning ();
3208 namespace {
3210 const pass_data pass_data_predcom =
3212 GIMPLE_PASS, /* type */
3213 "pcom", /* name */
3214 OPTGROUP_LOOP, /* optinfo_flags */
3215 TV_PREDCOM, /* tv_id */
3216 PROP_cfg, /* properties_required */
3217 0, /* properties_provided */
3218 0, /* properties_destroyed */
3219 0, /* todo_flags_start */
3220 TODO_update_ssa_only_virtuals, /* todo_flags_finish */
3223 class pass_predcom : public gimple_opt_pass
3225 public:
3226 pass_predcom (gcc::context *ctxt)
3227 : gimple_opt_pass (pass_data_predcom, ctxt)
3230 /* opt_pass methods: */
3231 virtual bool gate (function *) { return flag_predictive_commoning != 0; }
3232 virtual unsigned int execute (function *fun)
3234 return run_tree_predictive_commoning (fun);
3237 }; // class pass_predcom
3239 } // anon namespace
3241 gimple_opt_pass *
3242 make_pass_predcom (gcc::context *ctxt)
3244 return new pass_predcom (ctxt);