1 /* Predictive commoning.
2 Copyright (C) 2005-2019 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This file implements the predictive commoning optimization. Predictive
21 commoning can be viewed as CSE around a loop, and with some improvements,
22 as generalized strength reduction-- i.e., reusing values computed in
23 earlier iterations of a loop in the later ones. So far, the pass only
24 handles the most useful case, that is, reusing values of memory references.
25 If you think this is all just a special case of PRE, you are sort of right;
26 however, concentrating on loops is simpler, and makes it possible to
27 incorporate data dependence analysis to detect the opportunities, perform
28 loop unrolling to avoid copies together with renaming immediately,
29 and if needed, we could also take register pressure into account.
31 Let us demonstrate what is done on an example:
33 for (i = 0; i < 100; i++)
35 a[i+2] = a[i] + a[i+1];
41 1) We find data references in the loop, and split them to mutually
42 independent groups (i.e., we find components of a data dependence
43 graph). We ignore read-read dependences whose distance is not constant.
44 (TODO -- we could also ignore antidependences). In this example, we
45 find the following groups:
47 a[i]{read}, a[i+1]{read}, a[i+2]{write}
48 b[10]{read}, b[10]{write}
49 c[99 - i]{read}, c[i]{write}
50 d[i + 1]{read}, d[i]{write}
52 2) Inside each of the group, we verify several conditions:
53 a) all the references must differ in indices only, and the indices
54 must all have the same step
55 b) the references must dominate loop latch (and thus, they must be
56 ordered by dominance relation).
57 c) the distance of the indices must be a small multiple of the step
58 We are then able to compute the difference of the references (# of
59 iterations before they point to the same place as the first of them).
60 Also, in case there are writes in the loop, we split the groups into
61 chains whose head is the write whose values are used by the reads in
62 the same chain. The chains are then processed independently,
63 making the further transformations simpler. Also, the shorter chains
64 need the same number of registers, but may require lower unrolling
65 factor in order to get rid of the copies on the loop latch.
67 In our example, we get the following chains (the chain for c is invalid).
69 a[i]{read,+0}, a[i+1]{read,-1}, a[i+2]{write,-2}
70 b[10]{read,+0}, b[10]{write,+0}
71 d[i + 1]{read,+0}, d[i]{write,+1}
73 3) For each read, we determine the read or write whose value it reuses,
74 together with the distance of this reuse. I.e. we take the last
75 reference before it with distance 0, or the last of the references
76 with the smallest positive distance to the read. Then, we remove
77 the references that are not used in any of these chains, discard the
78 empty groups, and propagate all the links so that they point to the
79 single root reference of the chain (adjusting their distance
80 appropriately). Some extra care needs to be taken for references with
81 step 0. In our example (the numbers indicate the distance of the
84 a[i] --> (*) 2, a[i+1] --> (*) 1, a[i+2] (*)
85 b[10] --> (*) 1, b[10] (*)
87 4) The chains are combined together if possible. If the corresponding
88 elements of two chains are always combined together with the same
89 operator, we remember just the result of this combination, instead
90 of remembering the values separately. We may need to perform
91 reassociation to enable combining, for example
93 e[i] + f[i+1] + e[i+1] + f[i]
95 can be reassociated as
97 (e[i] + f[i]) + (e[i+1] + f[i+1])
99 and we can combine the chains for e and f into one chain.
101 5) For each root reference (end of the chain) R, let N be maximum distance
102 of a reference reusing its value. Variables R0 up to RN are created,
103 together with phi nodes that transfer values from R1 .. RN to
105 Initial values are loaded to R0..R(N-1) (in case not all references
106 must necessarily be accessed and they may trap, we may fail here;
107 TODO sometimes, the loads could be guarded by a check for the number
108 of iterations). Values loaded/stored in roots are also copied to
109 RN. Other reads are replaced with the appropriate variable Ri.
110 Everything is put to SSA form.
112 As a small improvement, if R0 is dead after the root (i.e., all uses of
113 the value with the maximum distance dominate the root), we can avoid
114 creating RN and use R0 instead of it.
116 In our example, we get (only the parts concerning a and b are shown):
117 for (i = 0; i < 100; i++)
129 6) Factor F for unrolling is determined as the smallest common multiple of
130 (N + 1) for each root reference (N for references for that we avoided
131 creating RN). If F and the loop is small enough, loop is unrolled F
132 times. The stores to RN (R0) in the copies of the loop body are
133 periodically replaced with R0, R1, ... (R1, R2, ...), so that they can
134 be coalesced and the copies can be eliminated.
136 TODO -- copy propagation and other optimizations may change the live
137 ranges of the temporary registers and prevent them from being coalesced;
138 this may increase the register pressure.
140 In our case, F = 2 and the (main loop of the) result is
142 for (i = 0; i < ...; i += 2)
159 Apart from predictive commoning on Load-Load and Store-Load chains, we
160 also support Store-Store chains -- stores killed by other store can be
161 eliminated. Given below example:
163 for (i = 0; i < n; i++)
169 It can be replaced with:
173 for (i = 0; i < n; i++)
183 If the loop runs more than 1 iterations, it can be further simplified into:
185 for (i = 0; i < n; i++)
192 The interesting part is this can be viewed either as general store motion
193 or general dead store elimination in either intra/inter-iterations way.
195 With trivial effort, we also support load inside Store-Store chains if the
196 load is dominated by a store statement in the same iteration of loop. You
197 can see this as a restricted Store-Mixed-Load-Store chain.
199 TODO: For now, we don't support store-store chains in multi-exit loops. We
200 force to not unroll in case of store-store chain even if other chains might
203 Predictive commoning can be generalized for arbitrary computations (not
204 just memory loads), and also nontrivial transfer functions (e.g., replacing
205 i * i with ii_last + 2 * i + 1), to generalize strength reduction. */
209 #include "coretypes.h"
215 #include "tree-pass.h"
217 #include "gimple-pretty-print.h"
219 #include "fold-const.h"
222 #include "gimplify.h"
223 #include "gimple-iterator.h"
224 #include "gimplify-me.h"
225 #include "tree-ssa-loop-ivopts.h"
226 #include "tree-ssa-loop-manip.h"
227 #include "tree-ssa-loop-niter.h"
228 #include "tree-ssa-loop.h"
229 #include "tree-into-ssa.h"
230 #include "tree-dfa.h"
231 #include "tree-ssa.h"
232 #include "tree-data-ref.h"
233 #include "tree-scalar-evolution.h"
235 #include "tree-affine.h"
236 #include "builtins.h"
238 /* The maximum number of iterations between the considered memory
241 #define MAX_DISTANCE (target_avail_regs < 16 ? 4 : 8)
243 /* Data references (or phi nodes that carry data reference values across
249 /* The reference itself. */
250 struct data_reference
*ref
;
252 /* The statement in that the reference appears. */
255 /* In case that STMT is a phi node, this field is set to the SSA name
256 defined by it in replace_phis_by_defined_names (in order to avoid
257 pointing to phi node that got reallocated in the meantime). */
258 tree name_defined_by_phi
;
260 /* Distance of the reference from the root of the chain (in number of
261 iterations of the loop). */
264 /* Number of iterations offset from the first reference in the component. */
267 /* Number of the reference in a component, in dominance ordering. */
270 /* True if the memory reference is always accessed when the loop is
272 unsigned always_accessed
: 1;
276 /* Type of the chain of the references. */
280 /* The addresses of the references in the chain are constant. */
283 /* There are only loads in the chain. */
286 /* Root of the chain is store, the rest are loads. */
289 /* There are only stores in the chain. */
292 /* A combination of two chains. */
296 /* Chains of data references. */
300 /* Type of the chain. */
301 enum chain_type type
;
303 /* For combination chains, the operator and the two chains that are
304 combined, and the type of the result. */
307 struct chain
*ch1
, *ch2
;
309 /* The references in the chain. */
312 /* The maximum distance of the reference in the chain from the root. */
315 /* The variables used to copy the value throughout iterations. */
318 /* Initializers for the variables. */
321 /* Finalizers for the eliminated stores. */
324 /* gimple stmts intializing the initial variables of the chain. */
327 /* gimple stmts finalizing the eliminated stores of the chain. */
330 /* True if there is a use of a variable with the maximal distance
331 that comes after the root in the loop. */
332 unsigned has_max_use_after
: 1;
334 /* True if all the memory references in the chain are always accessed. */
335 unsigned all_always_accessed
: 1;
337 /* True if this chain was combined together with some other chain. */
338 unsigned combined
: 1;
340 /* True if this is store elimination chain and eliminated stores store
341 loop invariant value into memory. */
342 unsigned inv_store_elimination
: 1;
346 /* Describes the knowledge about the step of the memory references in
351 /* The step is zero. */
354 /* The step is nonzero. */
357 /* The step may or may not be nonzero. */
361 /* Components of the data dependence graph. */
365 /* The references in the component. */
368 /* What we know about the step of the references in the component. */
369 enum ref_step_type comp_step
;
371 /* True if all references in component are stores and we try to do
372 intra/inter loop iteration dead store elimination. */
373 bool eliminate_store_p
;
375 /* Next component in the list. */
376 struct component
*next
;
379 /* Bitmap of ssa names defined by looparound phi nodes covered by chains. */
381 static bitmap looparound_phis
;
383 /* Cache used by tree_to_aff_combination_expand. */
385 static hash_map
<tree
, name_expansion
*> *name_expansions
;
387 /* Dumps data reference REF to FILE. */
389 extern void dump_dref (FILE *, dref
);
391 dump_dref (FILE *file
, dref ref
)
396 print_generic_expr (file
, DR_REF (ref
->ref
), TDF_SLIM
);
397 fprintf (file
, " (id %u%s)\n", ref
->pos
,
398 DR_IS_READ (ref
->ref
) ? "" : ", write");
400 fprintf (file
, " offset ");
401 print_decs (ref
->offset
, file
);
402 fprintf (file
, "\n");
404 fprintf (file
, " distance %u\n", ref
->distance
);
408 if (gimple_code (ref
->stmt
) == GIMPLE_PHI
)
409 fprintf (file
, " looparound ref\n");
411 fprintf (file
, " combination ref\n");
412 fprintf (file
, " in statement ");
413 print_gimple_stmt (file
, ref
->stmt
, 0, TDF_SLIM
);
414 fprintf (file
, "\n");
415 fprintf (file
, " distance %u\n", ref
->distance
);
420 /* Dumps CHAIN to FILE. */
422 extern void dump_chain (FILE *, chain_p
);
424 dump_chain (FILE *file
, chain_p chain
)
427 const char *chain_type
;
434 chain_type
= "Load motion";
438 chain_type
= "Loads-only";
442 chain_type
= "Store-loads";
446 chain_type
= "Store-stores";
450 chain_type
= "Combination";
457 fprintf (file
, "%s chain %p%s\n", chain_type
, (void *) chain
,
458 chain
->combined
? " (combined)" : "");
459 if (chain
->type
!= CT_INVARIANT
)
460 fprintf (file
, " max distance %u%s\n", chain
->length
,
461 chain
->has_max_use_after
? "" : ", may reuse first");
463 if (chain
->type
== CT_COMBINATION
)
465 fprintf (file
, " equal to %p %s %p in type ",
466 (void *) chain
->ch1
, op_symbol_code (chain
->op
),
467 (void *) chain
->ch2
);
468 print_generic_expr (file
, chain
->rslt_type
, TDF_SLIM
);
469 fprintf (file
, "\n");
472 if (chain
->vars
.exists ())
474 fprintf (file
, " vars");
475 FOR_EACH_VEC_ELT (chain
->vars
, i
, var
)
478 print_generic_expr (file
, var
, TDF_SLIM
);
480 fprintf (file
, "\n");
483 if (chain
->inits
.exists ())
485 fprintf (file
, " inits");
486 FOR_EACH_VEC_ELT (chain
->inits
, i
, var
)
489 print_generic_expr (file
, var
, TDF_SLIM
);
491 fprintf (file
, "\n");
494 fprintf (file
, " references:\n");
495 FOR_EACH_VEC_ELT (chain
->refs
, i
, a
)
498 fprintf (file
, "\n");
501 /* Dumps CHAINS to FILE. */
503 extern void dump_chains (FILE *, vec
<chain_p
> );
505 dump_chains (FILE *file
, vec
<chain_p
> chains
)
510 FOR_EACH_VEC_ELT (chains
, i
, chain
)
511 dump_chain (file
, chain
);
514 /* Dumps COMP to FILE. */
516 extern void dump_component (FILE *, struct component
*);
518 dump_component (FILE *file
, struct component
*comp
)
523 fprintf (file
, "Component%s:\n",
524 comp
->comp_step
== RS_INVARIANT
? " (invariant)" : "");
525 FOR_EACH_VEC_ELT (comp
->refs
, i
, a
)
527 fprintf (file
, "\n");
530 /* Dumps COMPS to FILE. */
532 extern void dump_components (FILE *, struct component
*);
534 dump_components (FILE *file
, struct component
*comps
)
536 struct component
*comp
;
538 for (comp
= comps
; comp
; comp
= comp
->next
)
539 dump_component (file
, comp
);
542 /* Frees a chain CHAIN. */
545 release_chain (chain_p chain
)
553 FOR_EACH_VEC_ELT (chain
->refs
, i
, ref
)
556 chain
->refs
.release ();
557 chain
->vars
.release ();
558 chain
->inits
.release ();
560 gimple_seq_discard (chain
->init_seq
);
562 chain
->finis
.release ();
564 gimple_seq_discard (chain
->fini_seq
);
572 release_chains (vec
<chain_p
> chains
)
577 FOR_EACH_VEC_ELT (chains
, i
, chain
)
578 release_chain (chain
);
582 /* Frees a component COMP. */
585 release_component (struct component
*comp
)
587 comp
->refs
.release ();
591 /* Frees list of components COMPS. */
594 release_components (struct component
*comps
)
596 struct component
*act
, *next
;
598 for (act
= comps
; act
; act
= next
)
601 release_component (act
);
605 /* Finds a root of tree given by FATHERS containing A, and performs path
609 component_of (unsigned fathers
[], unsigned a
)
613 for (root
= a
; root
!= fathers
[root
]; root
= fathers
[root
])
616 for (; a
!= root
; a
= n
)
625 /* Join operation for DFU. FATHERS gives the tree, SIZES are sizes of the
626 components, A and B are components to merge. */
629 merge_comps (unsigned fathers
[], unsigned sizes
[], unsigned a
, unsigned b
)
631 unsigned ca
= component_of (fathers
, a
);
632 unsigned cb
= component_of (fathers
, b
);
637 if (sizes
[ca
] < sizes
[cb
])
639 sizes
[cb
] += sizes
[ca
];
644 sizes
[ca
] += sizes
[cb
];
649 /* Returns true if A is a reference that is suitable for predictive commoning
650 in the innermost loop that contains it. REF_STEP is set according to the
651 step of the reference A. */
654 suitable_reference_p (struct data_reference
*a
, enum ref_step_type
*ref_step
)
656 tree ref
= DR_REF (a
), step
= DR_STEP (a
);
659 || TREE_THIS_VOLATILE (ref
)
660 || !is_gimple_reg_type (TREE_TYPE (ref
))
661 || tree_could_throw_p (ref
))
664 if (integer_zerop (step
))
665 *ref_step
= RS_INVARIANT
;
666 else if (integer_nonzerop (step
))
667 *ref_step
= RS_NONZERO
;
674 /* Stores DR_OFFSET (DR) + DR_INIT (DR) to OFFSET. */
677 aff_combination_dr_offset (struct data_reference
*dr
, aff_tree
*offset
)
679 tree type
= TREE_TYPE (DR_OFFSET (dr
));
682 tree_to_aff_combination_expand (DR_OFFSET (dr
), type
, offset
,
684 aff_combination_const (&delta
, type
, wi::to_poly_widest (DR_INIT (dr
)));
685 aff_combination_add (offset
, &delta
);
688 /* Determines number of iterations of the innermost enclosing loop before B
689 refers to exactly the same location as A and stores it to OFF. If A and
690 B do not have the same step, they never meet, or anything else fails,
691 returns false, otherwise returns true. Both A and B are assumed to
692 satisfy suitable_reference_p. */
695 determine_offset (struct data_reference
*a
, struct data_reference
*b
,
696 poly_widest_int
*off
)
698 aff_tree diff
, baseb
, step
;
701 /* Check that both the references access the location in the same type. */
702 typea
= TREE_TYPE (DR_REF (a
));
703 typeb
= TREE_TYPE (DR_REF (b
));
704 if (!useless_type_conversion_p (typeb
, typea
))
707 /* Check whether the base address and the step of both references is the
709 if (!operand_equal_p (DR_STEP (a
), DR_STEP (b
), 0)
710 || !operand_equal_p (DR_BASE_ADDRESS (a
), DR_BASE_ADDRESS (b
), 0))
713 if (integer_zerop (DR_STEP (a
)))
715 /* If the references have loop invariant address, check that they access
716 exactly the same location. */
718 return (operand_equal_p (DR_OFFSET (a
), DR_OFFSET (b
), 0)
719 && operand_equal_p (DR_INIT (a
), DR_INIT (b
), 0));
722 /* Compare the offsets of the addresses, and check whether the difference
723 is a multiple of step. */
724 aff_combination_dr_offset (a
, &diff
);
725 aff_combination_dr_offset (b
, &baseb
);
726 aff_combination_scale (&baseb
, -1);
727 aff_combination_add (&diff
, &baseb
);
729 tree_to_aff_combination_expand (DR_STEP (a
), TREE_TYPE (DR_STEP (a
)),
730 &step
, &name_expansions
);
731 return aff_combination_constant_multiple_p (&diff
, &step
, off
);
734 /* Returns the last basic block in LOOP for that we are sure that
735 it is executed whenever the loop is entered. */
738 last_always_executed_block (class loop
*loop
)
741 vec
<edge
> exits
= get_loop_exit_edges (loop
);
743 basic_block last
= loop
->latch
;
745 FOR_EACH_VEC_ELT (exits
, i
, ex
)
746 last
= nearest_common_dominator (CDI_DOMINATORS
, last
, ex
->src
);
752 /* Splits dependence graph on DATAREFS described by DEPENDS to components. */
754 static struct component
*
755 split_data_refs_to_components (class loop
*loop
,
756 vec
<data_reference_p
> datarefs
,
759 unsigned i
, n
= datarefs
.length ();
760 unsigned ca
, ia
, ib
, bad
;
761 unsigned *comp_father
= XNEWVEC (unsigned, n
+ 1);
762 unsigned *comp_size
= XNEWVEC (unsigned, n
+ 1);
763 struct component
**comps
;
764 struct data_reference
*dr
, *dra
, *drb
;
765 struct data_dependence_relation
*ddr
;
766 struct component
*comp_list
= NULL
, *comp
;
768 /* Don't do store elimination if loop has multiple exit edges. */
769 bool eliminate_store_p
= single_exit (loop
) != NULL
;
770 basic_block last_always_executed
= last_always_executed_block (loop
);
772 FOR_EACH_VEC_ELT (datarefs
, i
, dr
)
776 /* A fake reference for call or asm_expr that may clobber memory;
780 /* predcom pass isn't prepared to handle calls with data references. */
781 if (is_gimple_call (DR_STMT (dr
)))
783 dr
->aux
= (void *) (size_t) i
;
788 /* A component reserved for the "bad" data references. */
792 FOR_EACH_VEC_ELT (datarefs
, i
, dr
)
794 enum ref_step_type dummy
;
796 if (!suitable_reference_p (dr
, &dummy
))
798 ia
= (unsigned) (size_t) dr
->aux
;
799 merge_comps (comp_father
, comp_size
, n
, ia
);
803 FOR_EACH_VEC_ELT (depends
, i
, ddr
)
805 poly_widest_int dummy_off
;
807 if (DDR_ARE_DEPENDENT (ddr
) == chrec_known
)
813 /* Don't do store elimination if there is any unknown dependence for
814 any store data reference. */
815 if ((DR_IS_WRITE (dra
) || DR_IS_WRITE (drb
))
816 && (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
817 || DDR_NUM_DIST_VECTS (ddr
) == 0))
818 eliminate_store_p
= false;
820 ia
= component_of (comp_father
, (unsigned) (size_t) dra
->aux
);
821 ib
= component_of (comp_father
, (unsigned) (size_t) drb
->aux
);
825 bad
= component_of (comp_father
, n
);
827 /* If both A and B are reads, we may ignore unsuitable dependences. */
828 if (DR_IS_READ (dra
) && DR_IS_READ (drb
))
830 if (ia
== bad
|| ib
== bad
831 || !determine_offset (dra
, drb
, &dummy_off
))
834 /* If A is read and B write or vice versa and there is unsuitable
835 dependence, instead of merging both components into a component
836 that will certainly not pass suitable_component_p, just put the
837 read into bad component, perhaps at least the write together with
838 all the other data refs in it's component will be optimizable. */
839 else if (DR_IS_READ (dra
) && ib
!= bad
)
843 else if (!determine_offset (dra
, drb
, &dummy_off
))
845 merge_comps (comp_father
, comp_size
, bad
, ia
);
849 else if (DR_IS_READ (drb
) && ia
!= bad
)
853 else if (!determine_offset (dra
, drb
, &dummy_off
))
855 merge_comps (comp_father
, comp_size
, bad
, ib
);
859 else if (DR_IS_WRITE (dra
) && DR_IS_WRITE (drb
)
860 && ia
!= bad
&& ib
!= bad
861 && !determine_offset (dra
, drb
, &dummy_off
))
863 merge_comps (comp_father
, comp_size
, bad
, ia
);
864 merge_comps (comp_father
, comp_size
, bad
, ib
);
868 merge_comps (comp_father
, comp_size
, ia
, ib
);
871 if (eliminate_store_p
)
873 tree niters
= number_of_latch_executions (loop
);
875 /* Don't do store elimination if niters info is unknown because stores
876 in the last iteration can't be eliminated and we need to recover it
878 eliminate_store_p
= (niters
!= NULL_TREE
&& niters
!= chrec_dont_know
);
881 comps
= XCNEWVEC (struct component
*, n
);
882 bad
= component_of (comp_father
, n
);
883 FOR_EACH_VEC_ELT (datarefs
, i
, dr
)
885 ia
= (unsigned) (size_t) dr
->aux
;
886 ca
= component_of (comp_father
, ia
);
893 comp
= XCNEW (struct component
);
894 comp
->refs
.create (comp_size
[ca
]);
895 comp
->eliminate_store_p
= eliminate_store_p
;
899 dataref
= XCNEW (class dref_d
);
901 dataref
->stmt
= DR_STMT (dr
);
903 dataref
->distance
= 0;
905 dataref
->always_accessed
906 = dominated_by_p (CDI_DOMINATORS
, last_always_executed
,
907 gimple_bb (dataref
->stmt
));
908 dataref
->pos
= comp
->refs
.length ();
909 comp
->refs
.quick_push (dataref
);
912 for (i
= 0; i
< n
; i
++)
917 comp
->next
= comp_list
;
929 /* Returns true if the component COMP satisfies the conditions
930 described in 2) at the beginning of this file. LOOP is the current
934 suitable_component_p (class loop
*loop
, struct component
*comp
)
938 basic_block ba
, bp
= loop
->header
;
939 bool ok
, has_write
= false;
941 FOR_EACH_VEC_ELT (comp
->refs
, i
, a
)
943 ba
= gimple_bb (a
->stmt
);
945 if (!just_once_each_iteration_p (loop
, ba
))
948 gcc_assert (dominated_by_p (CDI_DOMINATORS
, ba
, bp
));
951 if (DR_IS_WRITE (a
->ref
))
955 first
= comp
->refs
[0];
956 ok
= suitable_reference_p (first
->ref
, &comp
->comp_step
);
960 for (i
= 1; comp
->refs
.iterate (i
, &a
); i
++)
962 /* Polynomial offsets are no use, since we need to know the
963 gap between iteration numbers at compile time. */
964 poly_widest_int offset
;
965 if (!determine_offset (first
->ref
, a
->ref
, &offset
)
966 || !offset
.is_constant (&a
->offset
))
969 enum ref_step_type a_step
;
970 gcc_checking_assert (suitable_reference_p (a
->ref
, &a_step
)
971 && a_step
== comp
->comp_step
);
974 /* If there is a write inside the component, we must know whether the
975 step is nonzero or not -- we would not otherwise be able to recognize
976 whether the value accessed by reads comes from the OFFSET-th iteration
977 or the previous one. */
978 if (has_write
&& comp
->comp_step
== RS_ANY
)
984 /* Check the conditions on references inside each of components COMPS,
985 and remove the unsuitable components from the list. The new list
986 of components is returned. The conditions are described in 2) at
987 the beginning of this file. LOOP is the current loop. */
989 static struct component
*
990 filter_suitable_components (class loop
*loop
, struct component
*comps
)
992 struct component
**comp
, *act
;
994 for (comp
= &comps
; *comp
; )
997 if (suitable_component_p (loop
, act
))
1005 FOR_EACH_VEC_ELT (act
->refs
, i
, ref
)
1007 release_component (act
);
1014 /* Compares two drefs A and B by their offset and position. Callback for
1018 order_drefs (const void *a
, const void *b
)
1020 const dref
*const da
= (const dref
*) a
;
1021 const dref
*const db
= (const dref
*) b
;
1022 int offcmp
= wi::cmps ((*da
)->offset
, (*db
)->offset
);
1027 return (*da
)->pos
- (*db
)->pos
;
1030 /* Compares two drefs A and B by their position. Callback for qsort. */
1033 order_drefs_by_pos (const void *a
, const void *b
)
1035 const dref
*const da
= (const dref
*) a
;
1036 const dref
*const db
= (const dref
*) b
;
1038 return (*da
)->pos
- (*db
)->pos
;
1041 /* Returns root of the CHAIN. */
1044 get_chain_root (chain_p chain
)
1046 return chain
->refs
[0];
1049 /* Given CHAIN, returns the last write ref at DISTANCE, or NULL if it doesn't
1053 get_chain_last_write_at (chain_p chain
, unsigned distance
)
1055 for (unsigned i
= chain
->refs
.length (); i
> 0; i
--)
1056 if (DR_IS_WRITE (chain
->refs
[i
- 1]->ref
)
1057 && distance
== chain
->refs
[i
- 1]->distance
)
1058 return chain
->refs
[i
- 1];
1063 /* Given CHAIN, returns the last write ref with the same distance before load
1064 at index LOAD_IDX, or NULL if it doesn't exist. */
1067 get_chain_last_write_before_load (chain_p chain
, unsigned load_idx
)
1069 gcc_assert (load_idx
< chain
->refs
.length ());
1071 unsigned distance
= chain
->refs
[load_idx
]->distance
;
1073 for (unsigned i
= load_idx
; i
> 0; i
--)
1074 if (DR_IS_WRITE (chain
->refs
[i
- 1]->ref
)
1075 && distance
== chain
->refs
[i
- 1]->distance
)
1076 return chain
->refs
[i
- 1];
1081 /* Adds REF to the chain CHAIN. */
1084 add_ref_to_chain (chain_p chain
, dref ref
)
1086 dref root
= get_chain_root (chain
);
1088 gcc_assert (wi::les_p (root
->offset
, ref
->offset
));
1089 widest_int dist
= ref
->offset
- root
->offset
;
1090 gcc_assert (wi::fits_uhwi_p (dist
));
1092 chain
->refs
.safe_push (ref
);
1094 ref
->distance
= dist
.to_uhwi ();
1096 if (ref
->distance
>= chain
->length
)
1098 chain
->length
= ref
->distance
;
1099 chain
->has_max_use_after
= false;
1102 /* Promote this chain to CT_STORE_STORE if it has multiple stores. */
1103 if (DR_IS_WRITE (ref
->ref
))
1104 chain
->type
= CT_STORE_STORE
;
1106 /* Don't set the flag for store-store chain since there is no use. */
1107 if (chain
->type
!= CT_STORE_STORE
1108 && ref
->distance
== chain
->length
1109 && ref
->pos
> root
->pos
)
1110 chain
->has_max_use_after
= true;
1112 chain
->all_always_accessed
&= ref
->always_accessed
;
1115 /* Returns the chain for invariant component COMP. */
1118 make_invariant_chain (struct component
*comp
)
1120 chain_p chain
= XCNEW (struct chain
);
1124 chain
->type
= CT_INVARIANT
;
1126 chain
->all_always_accessed
= true;
1128 FOR_EACH_VEC_ELT (comp
->refs
, i
, ref
)
1130 chain
->refs
.safe_push (ref
);
1131 chain
->all_always_accessed
&= ref
->always_accessed
;
1134 chain
->inits
= vNULL
;
1135 chain
->finis
= vNULL
;
1140 /* Make a new chain of type TYPE rooted at REF. */
1143 make_rooted_chain (dref ref
, enum chain_type type
)
1145 chain_p chain
= XCNEW (struct chain
);
1148 chain
->refs
.safe_push (ref
);
1149 chain
->all_always_accessed
= ref
->always_accessed
;
1152 chain
->inits
= vNULL
;
1153 chain
->finis
= vNULL
;
1158 /* Returns true if CHAIN is not trivial. */
1161 nontrivial_chain_p (chain_p chain
)
1163 return chain
!= NULL
&& chain
->refs
.length () > 1;
1166 /* Returns the ssa name that contains the value of REF, or NULL_TREE if there
1170 name_for_ref (dref ref
)
1174 if (is_gimple_assign (ref
->stmt
))
1176 if (!ref
->ref
|| DR_IS_READ (ref
->ref
))
1177 name
= gimple_assign_lhs (ref
->stmt
);
1179 name
= gimple_assign_rhs1 (ref
->stmt
);
1182 name
= PHI_RESULT (ref
->stmt
);
1184 return (TREE_CODE (name
) == SSA_NAME
? name
: NULL_TREE
);
1187 /* Returns true if REF is a valid initializer for ROOT with given DISTANCE (in
1188 iterations of the innermost enclosing loop). */
1191 valid_initializer_p (struct data_reference
*ref
,
1192 unsigned distance
, struct data_reference
*root
)
1194 aff_tree diff
, base
, step
;
1195 poly_widest_int off
;
1197 /* Both REF and ROOT must be accessing the same object. */
1198 if (!operand_equal_p (DR_BASE_ADDRESS (ref
), DR_BASE_ADDRESS (root
), 0))
1201 /* The initializer is defined outside of loop, hence its address must be
1202 invariant inside the loop. */
1203 gcc_assert (integer_zerop (DR_STEP (ref
)));
1205 /* If the address of the reference is invariant, initializer must access
1206 exactly the same location. */
1207 if (integer_zerop (DR_STEP (root
)))
1208 return (operand_equal_p (DR_OFFSET (ref
), DR_OFFSET (root
), 0)
1209 && operand_equal_p (DR_INIT (ref
), DR_INIT (root
), 0));
1211 /* Verify that this index of REF is equal to the root's index at
1212 -DISTANCE-th iteration. */
1213 aff_combination_dr_offset (root
, &diff
);
1214 aff_combination_dr_offset (ref
, &base
);
1215 aff_combination_scale (&base
, -1);
1216 aff_combination_add (&diff
, &base
);
1218 tree_to_aff_combination_expand (DR_STEP (root
), TREE_TYPE (DR_STEP (root
)),
1219 &step
, &name_expansions
);
1220 if (!aff_combination_constant_multiple_p (&diff
, &step
, &off
))
1223 if (maybe_ne (off
, distance
))
1229 /* Finds looparound phi node of LOOP that copies the value of REF, and if its
1230 initial value is correct (equal to initial value of REF shifted by one
1231 iteration), returns the phi node. Otherwise, NULL_TREE is returned. ROOT
1232 is the root of the current chain. */
1235 find_looparound_phi (class loop
*loop
, dref ref
, dref root
)
1237 tree name
, init
, init_ref
;
1240 edge latch
= loop_latch_edge (loop
);
1241 struct data_reference init_dr
;
1244 if (is_gimple_assign (ref
->stmt
))
1246 if (DR_IS_READ (ref
->ref
))
1247 name
= gimple_assign_lhs (ref
->stmt
);
1249 name
= gimple_assign_rhs1 (ref
->stmt
);
1252 name
= PHI_RESULT (ref
->stmt
);
1256 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
1259 if (PHI_ARG_DEF_FROM_EDGE (phi
, latch
) == name
)
1263 if (gsi_end_p (psi
))
1266 init
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_preheader_edge (loop
));
1267 if (TREE_CODE (init
) != SSA_NAME
)
1269 init_stmt
= SSA_NAME_DEF_STMT (init
);
1270 if (gimple_code (init_stmt
) != GIMPLE_ASSIGN
)
1272 gcc_assert (gimple_assign_lhs (init_stmt
) == init
);
1274 init_ref
= gimple_assign_rhs1 (init_stmt
);
1275 if (!REFERENCE_CLASS_P (init_ref
)
1276 && !DECL_P (init_ref
))
1279 /* Analyze the behavior of INIT_REF with respect to LOOP (innermost
1280 loop enclosing PHI). */
1281 memset (&init_dr
, 0, sizeof (struct data_reference
));
1282 DR_REF (&init_dr
) = init_ref
;
1283 DR_STMT (&init_dr
) = phi
;
1284 if (!dr_analyze_innermost (&DR_INNERMOST (&init_dr
), init_ref
, loop
,
1288 if (!valid_initializer_p (&init_dr
, ref
->distance
+ 1, root
->ref
))
1294 /* Adds a reference for the looparound copy of REF in PHI to CHAIN. */
1297 insert_looparound_copy (chain_p chain
, dref ref
, gphi
*phi
)
1299 dref nw
= XCNEW (class dref_d
), aref
;
1303 nw
->distance
= ref
->distance
+ 1;
1304 nw
->always_accessed
= 1;
1306 FOR_EACH_VEC_ELT (chain
->refs
, i
, aref
)
1307 if (aref
->distance
>= nw
->distance
)
1309 chain
->refs
.safe_insert (i
, nw
);
1311 if (nw
->distance
> chain
->length
)
1313 chain
->length
= nw
->distance
;
1314 chain
->has_max_use_after
= false;
1318 /* For references in CHAIN that are copied around the LOOP (created previously
1319 by PRE, or by user), add the results of such copies to the chain. This
1320 enables us to remove the copies by unrolling, and may need less registers
1321 (also, it may allow us to combine chains together). */
1324 add_looparound_copies (class loop
*loop
, chain_p chain
)
1327 dref ref
, root
= get_chain_root (chain
);
1330 if (chain
->type
== CT_STORE_STORE
)
1333 FOR_EACH_VEC_ELT (chain
->refs
, i
, ref
)
1335 phi
= find_looparound_phi (loop
, ref
, root
);
1339 bitmap_set_bit (looparound_phis
, SSA_NAME_VERSION (PHI_RESULT (phi
)));
1340 insert_looparound_copy (chain
, ref
, phi
);
1344 /* Find roots of the values and determine distances in the component COMP.
1345 The references are redistributed into CHAINS. LOOP is the current
1349 determine_roots_comp (class loop
*loop
,
1350 struct component
*comp
,
1351 vec
<chain_p
> *chains
)
1355 chain_p chain
= NULL
;
1356 widest_int last_ofs
= 0;
1357 enum chain_type type
;
1359 /* Invariants are handled specially. */
1360 if (comp
->comp_step
== RS_INVARIANT
)
1362 chain
= make_invariant_chain (comp
);
1363 chains
->safe_push (chain
);
1367 /* Trivial component. */
1368 if (comp
->refs
.length () <= 1)
1370 if (comp
->refs
.length () == 1)
1372 free (comp
->refs
[0]);
1373 comp
->refs
.truncate (0);
1378 comp
->refs
.qsort (order_drefs
);
1380 /* For Store-Store chain, we only support load if it is dominated by a
1381 store statement in the same iteration of loop. */
1382 if (comp
->eliminate_store_p
)
1383 for (a
= NULL
, i
= 0; i
< comp
->refs
.length (); i
++)
1385 if (DR_IS_WRITE (comp
->refs
[i
]->ref
))
1387 else if (a
== NULL
|| a
->offset
!= comp
->refs
[i
]->offset
)
1389 /* If there is load that is not dominated by a store in the
1390 same iteration of loop, clear the flag so no Store-Store
1391 chain is generated for this component. */
1392 comp
->eliminate_store_p
= false;
1397 /* Determine roots and create chains for components. */
1398 FOR_EACH_VEC_ELT (comp
->refs
, i
, a
)
1401 || (chain
->type
== CT_LOAD
&& DR_IS_WRITE (a
->ref
))
1402 || (!comp
->eliminate_store_p
&& DR_IS_WRITE (a
->ref
))
1403 || wi::leu_p (MAX_DISTANCE
, a
->offset
- last_ofs
))
1405 if (nontrivial_chain_p (chain
))
1407 add_looparound_copies (loop
, chain
);
1408 chains
->safe_push (chain
);
1411 release_chain (chain
);
1413 /* Determine type of the chain. If the root reference is a load,
1414 this can only be a CT_LOAD chain; other chains are intialized
1415 to CT_STORE_LOAD and might be promoted to CT_STORE_STORE when
1416 new reference is added. */
1417 type
= DR_IS_READ (a
->ref
) ? CT_LOAD
: CT_STORE_LOAD
;
1418 chain
= make_rooted_chain (a
, type
);
1419 last_ofs
= a
->offset
;
1423 add_ref_to_chain (chain
, a
);
1426 if (nontrivial_chain_p (chain
))
1428 add_looparound_copies (loop
, chain
);
1429 chains
->safe_push (chain
);
1432 release_chain (chain
);
1435 /* Find roots of the values and determine distances in components COMPS, and
1436 separates the references to CHAINS. LOOP is the current loop. */
1439 determine_roots (class loop
*loop
,
1440 struct component
*comps
, vec
<chain_p
> *chains
)
1442 struct component
*comp
;
1444 for (comp
= comps
; comp
; comp
= comp
->next
)
1445 determine_roots_comp (loop
, comp
, chains
);
1448 /* Replace the reference in statement STMT with temporary variable
1449 NEW_TREE. If SET is true, NEW_TREE is instead initialized to the value of
1450 the reference in the statement. IN_LHS is true if the reference
1451 is in the lhs of STMT, false if it is in rhs. */
1454 replace_ref_with (gimple
*stmt
, tree new_tree
, bool set
, bool in_lhs
)
1458 gimple_stmt_iterator bsi
, psi
;
1460 if (gimple_code (stmt
) == GIMPLE_PHI
)
1462 gcc_assert (!in_lhs
&& !set
);
1464 val
= PHI_RESULT (stmt
);
1465 bsi
= gsi_after_labels (gimple_bb (stmt
));
1466 psi
= gsi_for_stmt (stmt
);
1467 remove_phi_node (&psi
, false);
1469 /* Turn the phi node into GIMPLE_ASSIGN. */
1470 new_stmt
= gimple_build_assign (val
, new_tree
);
1471 gsi_insert_before (&bsi
, new_stmt
, GSI_NEW_STMT
);
1475 /* Since the reference is of gimple_reg type, it should only
1476 appear as lhs or rhs of modify statement. */
1477 gcc_assert (is_gimple_assign (stmt
));
1479 bsi
= gsi_for_stmt (stmt
);
1481 /* If we do not need to initialize NEW_TREE, just replace the use of OLD. */
1484 gcc_assert (!in_lhs
);
1485 gimple_assign_set_rhs_from_tree (&bsi
, new_tree
);
1486 stmt
= gsi_stmt (bsi
);
1493 /* We have statement
1497 If OLD is a memory reference, then VAL is gimple_val, and we transform
1503 Otherwise, we are replacing a combination chain,
1504 VAL is the expression that performs the combination, and OLD is an
1505 SSA name. In this case, we transform the assignment to
1512 val
= gimple_assign_lhs (stmt
);
1513 if (TREE_CODE (val
) != SSA_NAME
)
1515 val
= gimple_assign_rhs1 (stmt
);
1516 gcc_assert (gimple_assign_single_p (stmt
));
1517 if (TREE_CLOBBER_P (val
))
1518 val
= get_or_create_ssa_default_def (cfun
, SSA_NAME_VAR (new_tree
));
1520 gcc_assert (gimple_assign_copy_p (stmt
));
1532 val
= gimple_assign_lhs (stmt
);
1535 new_stmt
= gimple_build_assign (new_tree
, unshare_expr (val
));
1536 gsi_insert_after (&bsi
, new_stmt
, GSI_NEW_STMT
);
1539 /* Returns a memory reference to DR in the (NITERS + ITER)-th iteration
1540 of the loop it was analyzed in. Append init stmts to STMTS. */
1543 ref_at_iteration (data_reference_p dr
, int iter
,
1544 gimple_seq
*stmts
, tree niters
= NULL_TREE
)
1546 tree off
= DR_OFFSET (dr
);
1547 tree coff
= DR_INIT (dr
);
1548 tree ref
= DR_REF (dr
);
1549 enum tree_code ref_code
= ERROR_MARK
;
1550 tree ref_type
= NULL_TREE
;
1551 tree ref_op1
= NULL_TREE
;
1552 tree ref_op2
= NULL_TREE
;
1557 new_offset
= size_binop (MULT_EXPR
, DR_STEP (dr
), ssize_int (iter
));
1558 if (TREE_CODE (new_offset
) == INTEGER_CST
)
1559 coff
= size_binop (PLUS_EXPR
, coff
, new_offset
);
1561 off
= size_binop (PLUS_EXPR
, off
, new_offset
);
1564 if (niters
!= NULL_TREE
)
1566 niters
= fold_convert (ssizetype
, niters
);
1567 new_offset
= size_binop (MULT_EXPR
, DR_STEP (dr
), niters
);
1568 if (TREE_CODE (niters
) == INTEGER_CST
)
1569 coff
= size_binop (PLUS_EXPR
, coff
, new_offset
);
1571 off
= size_binop (PLUS_EXPR
, off
, new_offset
);
1574 /* While data-ref analysis punts on bit offsets it still handles
1575 bitfield accesses at byte boundaries. Cope with that. Note that
1576 if the bitfield object also starts at a byte-boundary we can simply
1577 replicate the COMPONENT_REF, but we have to subtract the component's
1578 byte-offset from the MEM_REF address first.
1579 Otherwise we simply build a BIT_FIELD_REF knowing that the bits
1580 start at offset zero. */
1581 if (TREE_CODE (ref
) == COMPONENT_REF
1582 && DECL_BIT_FIELD (TREE_OPERAND (ref
, 1)))
1584 unsigned HOST_WIDE_INT boff
;
1585 tree field
= TREE_OPERAND (ref
, 1);
1586 tree offset
= component_ref_field_offset (ref
);
1587 ref_type
= TREE_TYPE (ref
);
1588 boff
= tree_to_uhwi (DECL_FIELD_BIT_OFFSET (field
));
1589 /* This can occur in Ada. See the comment in get_bit_range. */
1590 if (boff
% BITS_PER_UNIT
!= 0
1591 || !tree_fits_uhwi_p (offset
))
1593 ref_code
= BIT_FIELD_REF
;
1594 ref_op1
= DECL_SIZE (field
);
1595 ref_op2
= bitsize_zero_node
;
1599 boff
>>= LOG2_BITS_PER_UNIT
;
1600 boff
+= tree_to_uhwi (offset
);
1601 coff
= size_binop (MINUS_EXPR
, coff
, ssize_int (boff
));
1602 ref_code
= COMPONENT_REF
;
1604 ref_op2
= TREE_OPERAND (ref
, 2);
1605 ref
= TREE_OPERAND (ref
, 0);
1608 tree addr
= fold_build_pointer_plus (DR_BASE_ADDRESS (dr
), off
);
1609 addr
= force_gimple_operand_1 (unshare_expr (addr
), stmts
,
1610 is_gimple_mem_ref_addr
, NULL_TREE
);
1611 tree alias_ptr
= fold_convert (reference_alias_ptr_type (ref
), coff
);
1612 tree type
= build_aligned_type (TREE_TYPE (ref
),
1613 get_object_alignment (ref
));
1614 ref
= build2 (MEM_REF
, type
, addr
, alias_ptr
);
1616 ref
= build3 (ref_code
, ref_type
, ref
, ref_op1
, ref_op2
);
1620 /* Get the initialization expression for the INDEX-th temporary variable
1624 get_init_expr (chain_p chain
, unsigned index
)
1626 if (chain
->type
== CT_COMBINATION
)
1628 tree e1
= get_init_expr (chain
->ch1
, index
);
1629 tree e2
= get_init_expr (chain
->ch2
, index
);
1631 return fold_build2 (chain
->op
, chain
->rslt_type
, e1
, e2
);
1634 return chain
->inits
[index
];
1637 /* Returns a new temporary variable used for the I-th variable carrying
1638 value of REF. The variable's uid is marked in TMP_VARS. */
1641 predcom_tmp_var (tree ref
, unsigned i
, bitmap tmp_vars
)
1643 tree type
= TREE_TYPE (ref
);
1644 /* We never access the components of the temporary variable in predictive
1646 tree var
= create_tmp_reg (type
, get_lsm_tmp_name (ref
, i
));
1647 bitmap_set_bit (tmp_vars
, DECL_UID (var
));
1651 /* Creates the variables for CHAIN, as well as phi nodes for them and
1652 initialization on entry to LOOP. Uids of the newly created
1653 temporary variables are marked in TMP_VARS. */
1656 initialize_root_vars (class loop
*loop
, chain_p chain
, bitmap tmp_vars
)
1659 unsigned n
= chain
->length
;
1660 dref root
= get_chain_root (chain
);
1661 bool reuse_first
= !chain
->has_max_use_after
;
1662 tree ref
, init
, var
, next
;
1665 edge entry
= loop_preheader_edge (loop
), latch
= loop_latch_edge (loop
);
1667 /* If N == 0, then all the references are within the single iteration. And
1668 since this is an nonempty chain, reuse_first cannot be true. */
1669 gcc_assert (n
> 0 || !reuse_first
);
1671 chain
->vars
.create (n
+ 1);
1673 if (chain
->type
== CT_COMBINATION
)
1674 ref
= gimple_assign_lhs (root
->stmt
);
1676 ref
= DR_REF (root
->ref
);
1678 for (i
= 0; i
< n
+ (reuse_first
? 0 : 1); i
++)
1680 var
= predcom_tmp_var (ref
, i
, tmp_vars
);
1681 chain
->vars
.quick_push (var
);
1684 chain
->vars
.quick_push (chain
->vars
[0]);
1686 FOR_EACH_VEC_ELT (chain
->vars
, i
, var
)
1687 chain
->vars
[i
] = make_ssa_name (var
);
1689 for (i
= 0; i
< n
; i
++)
1691 var
= chain
->vars
[i
];
1692 next
= chain
->vars
[i
+ 1];
1693 init
= get_init_expr (chain
, i
);
1695 init
= force_gimple_operand (init
, &stmts
, true, NULL_TREE
);
1697 gsi_insert_seq_on_edge_immediate (entry
, stmts
);
1699 phi
= create_phi_node (var
, loop
->header
);
1700 add_phi_arg (phi
, init
, entry
, UNKNOWN_LOCATION
);
1701 add_phi_arg (phi
, next
, latch
, UNKNOWN_LOCATION
);
1705 /* For inter-iteration store elimination CHAIN in LOOP, returns true if
1706 all stores to be eliminated store loop invariant values into memory.
1707 In this case, we can use these invariant values directly after LOOP. */
1710 is_inv_store_elimination_chain (class loop
*loop
, chain_p chain
)
1712 if (chain
->length
== 0 || chain
->type
!= CT_STORE_STORE
)
1715 gcc_assert (!chain
->has_max_use_after
);
1717 /* If loop iterates for unknown times or fewer times than chain->length,
1718 we still need to setup root variable and propagate it with PHI node. */
1719 tree niters
= number_of_latch_executions (loop
);
1720 if (TREE_CODE (niters
) != INTEGER_CST
1721 || wi::leu_p (wi::to_wide (niters
), chain
->length
))
1724 /* Check stores in chain for elimination if they only store loop invariant
1726 for (unsigned i
= 0; i
< chain
->length
; i
++)
1728 dref a
= get_chain_last_write_at (chain
, i
);
1732 gimple
*def_stmt
, *stmt
= a
->stmt
;
1733 if (!gimple_assign_single_p (stmt
))
1736 tree val
= gimple_assign_rhs1 (stmt
);
1737 if (TREE_CLOBBER_P (val
))
1740 if (CONSTANT_CLASS_P (val
))
1743 if (TREE_CODE (val
) != SSA_NAME
)
1746 def_stmt
= SSA_NAME_DEF_STMT (val
);
1747 if (gimple_nop_p (def_stmt
))
1750 if (flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
1756 /* Creates root variables for store elimination CHAIN in which stores for
1757 elimination only store loop invariant values. In this case, we neither
1758 need to load root variables before loop nor propagate it with PHI nodes. */
1761 initialize_root_vars_store_elim_1 (chain_p chain
)
1764 unsigned i
, n
= chain
->length
;
1766 chain
->vars
.create (n
);
1767 chain
->vars
.safe_grow_cleared (n
);
1769 /* Initialize root value for eliminated stores at each distance. */
1770 for (i
= 0; i
< n
; i
++)
1772 dref a
= get_chain_last_write_at (chain
, i
);
1776 var
= gimple_assign_rhs1 (a
->stmt
);
1777 chain
->vars
[a
->distance
] = var
;
1780 /* We don't propagate values with PHI nodes, so manually propagate value
1781 to bubble positions. */
1782 var
= chain
->vars
[0];
1783 for (i
= 1; i
< n
; i
++)
1785 if (chain
->vars
[i
] != NULL_TREE
)
1787 var
= chain
->vars
[i
];
1790 chain
->vars
[i
] = var
;
1793 /* Revert the vector. */
1794 for (i
= 0; i
< n
/ 2; i
++)
1795 std::swap (chain
->vars
[i
], chain
->vars
[n
- i
- 1]);
1798 /* Creates root variables for store elimination CHAIN in which stores for
1799 elimination store loop variant values. In this case, we may need to
1800 load root variables before LOOP and propagate it with PHI nodes. Uids
1801 of the newly created root variables are marked in TMP_VARS. */
1804 initialize_root_vars_store_elim_2 (class loop
*loop
,
1805 chain_p chain
, bitmap tmp_vars
)
1807 unsigned i
, n
= chain
->length
;
1808 tree ref
, init
, var
, next
, val
, phi_result
;
1812 chain
->vars
.create (n
);
1814 ref
= DR_REF (get_chain_root (chain
)->ref
);
1815 for (i
= 0; i
< n
; i
++)
1817 var
= predcom_tmp_var (ref
, i
, tmp_vars
);
1818 chain
->vars
.quick_push (var
);
1821 FOR_EACH_VEC_ELT (chain
->vars
, i
, var
)
1822 chain
->vars
[i
] = make_ssa_name (var
);
1824 /* Root values are either rhs operand of stores to be eliminated, or
1825 loaded from memory before loop. */
1826 auto_vec
<tree
> vtemps
;
1827 vtemps
.safe_grow_cleared (n
);
1828 for (i
= 0; i
< n
; i
++)
1830 init
= get_init_expr (chain
, i
);
1831 if (init
== NULL_TREE
)
1833 /* Root value is rhs operand of the store to be eliminated if
1834 it isn't loaded from memory before loop. */
1835 dref a
= get_chain_last_write_at (chain
, i
);
1836 val
= gimple_assign_rhs1 (a
->stmt
);
1837 if (TREE_CLOBBER_P (val
))
1839 val
= get_or_create_ssa_default_def (cfun
, SSA_NAME_VAR (var
));
1840 gimple_assign_set_rhs1 (a
->stmt
, val
);
1843 vtemps
[n
- i
- 1] = val
;
1847 edge latch
= loop_latch_edge (loop
);
1848 edge entry
= loop_preheader_edge (loop
);
1850 /* Root value is loaded from memory before loop, we also need
1851 to add PHI nodes to propagate the value across iterations. */
1852 init
= force_gimple_operand (init
, &stmts
, true, NULL_TREE
);
1854 gsi_insert_seq_on_edge_immediate (entry
, stmts
);
1856 next
= chain
->vars
[n
- i
];
1857 phi_result
= copy_ssa_name (next
);
1858 gphi
*phi
= create_phi_node (phi_result
, loop
->header
);
1859 add_phi_arg (phi
, init
, entry
, UNKNOWN_LOCATION
);
1860 add_phi_arg (phi
, next
, latch
, UNKNOWN_LOCATION
);
1861 vtemps
[n
- i
- 1] = phi_result
;
1865 /* Find the insertion position. */
1866 dref last
= get_chain_root (chain
);
1867 for (i
= 0; i
< chain
->refs
.length (); i
++)
1869 if (chain
->refs
[i
]->pos
> last
->pos
)
1870 last
= chain
->refs
[i
];
1873 gimple_stmt_iterator gsi
= gsi_for_stmt (last
->stmt
);
1875 /* Insert statements copying root value to root variable. */
1876 for (i
= 0; i
< n
; i
++)
1878 var
= chain
->vars
[i
];
1880 stmt
= gimple_build_assign (var
, val
);
1881 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1885 /* Generates stores for CHAIN's eliminated stores in LOOP's last
1886 (CHAIN->length - 1) iterations. */
1889 finalize_eliminated_stores (class loop
*loop
, chain_p chain
)
1891 unsigned i
, n
= chain
->length
;
1893 for (i
= 0; i
< n
; i
++)
1895 tree var
= chain
->vars
[i
];
1896 tree fini
= chain
->finis
[n
- i
- 1];
1897 gimple
*stmt
= gimple_build_assign (fini
, var
);
1899 gimple_seq_add_stmt_without_update (&chain
->fini_seq
, stmt
);
1902 if (chain
->fini_seq
)
1904 gsi_insert_seq_on_edge_immediate (single_exit (loop
), chain
->fini_seq
);
1905 chain
->fini_seq
= NULL
;
1909 /* Initializes a variable for load motion for ROOT and prepares phi nodes and
1910 initialization on entry to LOOP if necessary. The ssa name for the variable
1911 is stored in VARS. If WRITTEN is true, also a phi node to copy its value
1912 around the loop is created. Uid of the newly created temporary variable
1913 is marked in TMP_VARS. INITS is the list containing the (single)
1917 initialize_root_vars_lm (class loop
*loop
, dref root
, bool written
,
1918 vec
<tree
> *vars
, vec
<tree
> inits
,
1922 tree ref
= DR_REF (root
->ref
), init
, var
, next
;
1925 edge entry
= loop_preheader_edge (loop
), latch
= loop_latch_edge (loop
);
1927 /* Find the initializer for the variable, and check that it cannot
1931 vars
->create (written
? 2 : 1);
1932 var
= predcom_tmp_var (ref
, 0, tmp_vars
);
1933 vars
->quick_push (var
);
1935 vars
->quick_push ((*vars
)[0]);
1937 FOR_EACH_VEC_ELT (*vars
, i
, var
)
1938 (*vars
)[i
] = make_ssa_name (var
);
1942 init
= force_gimple_operand (init
, &stmts
, written
, NULL_TREE
);
1944 gsi_insert_seq_on_edge_immediate (entry
, stmts
);
1949 phi
= create_phi_node (var
, loop
->header
);
1950 add_phi_arg (phi
, init
, entry
, UNKNOWN_LOCATION
);
1951 add_phi_arg (phi
, next
, latch
, UNKNOWN_LOCATION
);
1955 gassign
*init_stmt
= gimple_build_assign (var
, init
);
1956 gsi_insert_on_edge_immediate (entry
, init_stmt
);
1961 /* Execute load motion for references in chain CHAIN. Uids of the newly
1962 created temporary variables are marked in TMP_VARS. */
1965 execute_load_motion (class loop
*loop
, chain_p chain
, bitmap tmp_vars
)
1967 auto_vec
<tree
> vars
;
1969 unsigned n_writes
= 0, ridx
, i
;
1972 gcc_assert (chain
->type
== CT_INVARIANT
);
1973 gcc_assert (!chain
->combined
);
1974 FOR_EACH_VEC_ELT (chain
->refs
, i
, a
)
1975 if (DR_IS_WRITE (a
->ref
))
1978 /* If there are no reads in the loop, there is nothing to do. */
1979 if (n_writes
== chain
->refs
.length ())
1982 initialize_root_vars_lm (loop
, get_chain_root (chain
), n_writes
> 0,
1983 &vars
, chain
->inits
, tmp_vars
);
1986 FOR_EACH_VEC_ELT (chain
->refs
, i
, a
)
1988 bool is_read
= DR_IS_READ (a
->ref
);
1990 if (DR_IS_WRITE (a
->ref
))
1996 var
= make_ssa_name (SSA_NAME_VAR (var
));
2003 replace_ref_with (a
->stmt
, vars
[ridx
],
2004 !is_read
, !is_read
);
2008 /* Returns the single statement in that NAME is used, excepting
2009 the looparound phi nodes contained in one of the chains. If there is no
2010 such statement, or more statements, NULL is returned. */
2013 single_nonlooparound_use (tree name
)
2016 imm_use_iterator it
;
2017 gimple
*stmt
, *ret
= NULL
;
2019 FOR_EACH_IMM_USE_FAST (use
, it
, name
)
2021 stmt
= USE_STMT (use
);
2023 if (gimple_code (stmt
) == GIMPLE_PHI
)
2025 /* Ignore uses in looparound phi nodes. Uses in other phi nodes
2026 could not be processed anyway, so just fail for them. */
2027 if (bitmap_bit_p (looparound_phis
,
2028 SSA_NAME_VERSION (PHI_RESULT (stmt
))))
2033 else if (is_gimple_debug (stmt
))
2035 else if (ret
!= NULL
)
2044 /* Remove statement STMT, as well as the chain of assignments in that it is
2048 remove_stmt (gimple
*stmt
)
2052 gimple_stmt_iterator psi
;
2054 if (gimple_code (stmt
) == GIMPLE_PHI
)
2056 name
= PHI_RESULT (stmt
);
2057 next
= single_nonlooparound_use (name
);
2058 reset_debug_uses (stmt
);
2059 psi
= gsi_for_stmt (stmt
);
2060 remove_phi_node (&psi
, true);
2063 || !gimple_assign_ssa_name_copy_p (next
)
2064 || gimple_assign_rhs1 (next
) != name
)
2072 gimple_stmt_iterator bsi
;
2074 bsi
= gsi_for_stmt (stmt
);
2076 name
= gimple_assign_lhs (stmt
);
2077 if (TREE_CODE (name
) == SSA_NAME
)
2079 next
= single_nonlooparound_use (name
);
2080 reset_debug_uses (stmt
);
2084 /* This is a store to be eliminated. */
2085 gcc_assert (gimple_vdef (stmt
) != NULL
);
2089 unlink_stmt_vdef (stmt
);
2090 gsi_remove (&bsi
, true);
2091 release_defs (stmt
);
2094 || !gimple_assign_ssa_name_copy_p (next
)
2095 || gimple_assign_rhs1 (next
) != name
)
2102 /* Perform the predictive commoning optimization for a chain CHAIN.
2103 Uids of the newly created temporary variables are marked in TMP_VARS.*/
2106 execute_pred_commoning_chain (class loop
*loop
, chain_p chain
,
2114 if (chain
->combined
)
2116 /* For combined chains, just remove the statements that are used to
2117 compute the values of the expression (except for the root one).
2118 We delay this until after all chains are processed. */
2120 else if (chain
->type
== CT_STORE_STORE
)
2122 if (chain
->length
> 0)
2124 if (chain
->inv_store_elimination
)
2126 /* If dead stores in this chain only store loop invariant
2127 values, we can simply record the invariant value and use
2128 it directly after loop. */
2129 initialize_root_vars_store_elim_1 (chain
);
2133 /* If dead stores in this chain store loop variant values,
2134 we need to set up the variables by loading from memory
2135 before loop and propagating it with PHI nodes. */
2136 initialize_root_vars_store_elim_2 (loop
, chain
, tmp_vars
);
2139 /* For inter-iteration store elimination chain, stores at each
2140 distance in loop's last (chain->length - 1) iterations can't
2141 be eliminated, because there is no following killing store.
2142 We need to generate these stores after loop. */
2143 finalize_eliminated_stores (loop
, chain
);
2146 bool last_store_p
= true;
2147 for (i
= chain
->refs
.length (); i
> 0; i
--)
2149 a
= chain
->refs
[i
- 1];
2150 /* Preserve the last store of the chain. Eliminate other stores
2151 which are killed by the last one. */
2152 if (DR_IS_WRITE (a
->ref
))
2155 last_store_p
= false;
2157 remove_stmt (a
->stmt
);
2162 /* Any load in Store-Store chain must be dominated by a previous
2163 store, we replace the load reference with rhs of the store. */
2164 dref b
= get_chain_last_write_before_load (chain
, i
- 1);
2165 gcc_assert (b
!= NULL
);
2166 var
= gimple_assign_rhs1 (b
->stmt
);
2167 replace_ref_with (a
->stmt
, var
, false, false);
2172 /* For non-combined chains, set up the variables that hold its value. */
2173 initialize_root_vars (loop
, chain
, tmp_vars
);
2174 a
= get_chain_root (chain
);
2175 in_lhs
= (chain
->type
== CT_STORE_LOAD
2176 || chain
->type
== CT_COMBINATION
);
2177 replace_ref_with (a
->stmt
, chain
->vars
[chain
->length
], true, in_lhs
);
2179 /* Replace the uses of the original references by these variables. */
2180 for (i
= 1; chain
->refs
.iterate (i
, &a
); i
++)
2182 var
= chain
->vars
[chain
->length
- a
->distance
];
2183 replace_ref_with (a
->stmt
, var
, false, false);
2188 /* Determines the unroll factor necessary to remove as many temporary variable
2189 copies as possible. CHAINS is the list of chains that will be
2193 determine_unroll_factor (vec
<chain_p
> chains
)
2196 unsigned factor
= 1, af
, nfactor
, i
;
2197 unsigned max
= PARAM_VALUE (PARAM_MAX_UNROLL_TIMES
);
2199 FOR_EACH_VEC_ELT (chains
, i
, chain
)
2201 if (chain
->type
== CT_INVARIANT
)
2203 /* For now we can't handle unrolling when eliminating stores. */
2204 else if (chain
->type
== CT_STORE_STORE
)
2207 if (chain
->combined
)
2209 /* For combined chains, we can't handle unrolling if we replace
2213 for (j
= 1; chain
->refs
.iterate (j
, &a
); j
++)
2214 if (gimple_code (a
->stmt
) == GIMPLE_PHI
)
2219 /* The best unroll factor for this chain is equal to the number of
2220 temporary variables that we create for it. */
2222 if (chain
->has_max_use_after
)
2225 nfactor
= factor
* af
/ gcd (factor
, af
);
2233 /* Perform the predictive commoning optimization for CHAINS.
2234 Uids of the newly created temporary variables are marked in TMP_VARS. */
2237 execute_pred_commoning (class loop
*loop
, vec
<chain_p
> chains
,
2243 FOR_EACH_VEC_ELT (chains
, i
, chain
)
2245 if (chain
->type
== CT_INVARIANT
)
2246 execute_load_motion (loop
, chain
, tmp_vars
);
2248 execute_pred_commoning_chain (loop
, chain
, tmp_vars
);
2251 FOR_EACH_VEC_ELT (chains
, i
, chain
)
2253 if (chain
->type
== CT_INVARIANT
)
2255 else if (chain
->combined
)
2257 /* For combined chains, just remove the statements that are used to
2258 compute the values of the expression (except for the root one). */
2261 for (j
= 1; chain
->refs
.iterate (j
, &a
); j
++)
2262 remove_stmt (a
->stmt
);
2266 update_ssa (TODO_update_ssa_only_virtuals
);
2269 /* For each reference in CHAINS, if its defining statement is
2270 phi node, record the ssa name that is defined by it. */
2273 replace_phis_by_defined_names (vec
<chain_p
> chains
)
2279 FOR_EACH_VEC_ELT (chains
, i
, chain
)
2280 FOR_EACH_VEC_ELT (chain
->refs
, j
, a
)
2282 if (gimple_code (a
->stmt
) == GIMPLE_PHI
)
2284 a
->name_defined_by_phi
= PHI_RESULT (a
->stmt
);
2290 /* For each reference in CHAINS, if name_defined_by_phi is not
2291 NULL, use it to set the stmt field. */
2294 replace_names_by_phis (vec
<chain_p
> chains
)
2300 FOR_EACH_VEC_ELT (chains
, i
, chain
)
2301 FOR_EACH_VEC_ELT (chain
->refs
, j
, a
)
2302 if (a
->stmt
== NULL
)
2304 a
->stmt
= SSA_NAME_DEF_STMT (a
->name_defined_by_phi
);
2305 gcc_assert (gimple_code (a
->stmt
) == GIMPLE_PHI
);
2306 a
->name_defined_by_phi
= NULL_TREE
;
2310 /* Wrapper over execute_pred_commoning, to pass it as a callback
2311 to tree_transform_and_unroll_loop. */
2315 vec
<chain_p
> chains
;
2320 execute_pred_commoning_cbck (class loop
*loop
, void *data
)
2322 struct epcc_data
*const dta
= (struct epcc_data
*) data
;
2324 /* Restore phi nodes that were replaced by ssa names before
2325 tree_transform_and_unroll_loop (see detailed description in
2326 tree_predictive_commoning_loop). */
2327 replace_names_by_phis (dta
->chains
);
2328 execute_pred_commoning (loop
, dta
->chains
, dta
->tmp_vars
);
2331 /* Base NAME and all the names in the chain of phi nodes that use it
2332 on variable VAR. The phi nodes are recognized by being in the copies of
2333 the header of the LOOP. */
2336 base_names_in_chain_on (class loop
*loop
, tree name
, tree var
)
2339 imm_use_iterator iter
;
2341 replace_ssa_name_symbol (name
, var
);
2346 FOR_EACH_IMM_USE_STMT (stmt
, iter
, name
)
2348 if (gimple_code (stmt
) == GIMPLE_PHI
2349 && flow_bb_inside_loop_p (loop
, gimple_bb (stmt
)))
2352 BREAK_FROM_IMM_USE_STMT (iter
);
2358 name
= PHI_RESULT (phi
);
2359 replace_ssa_name_symbol (name
, var
);
2363 /* Given an unrolled LOOP after predictive commoning, remove the
2364 register copies arising from phi nodes by changing the base
2365 variables of SSA names. TMP_VARS is the set of the temporary variables
2366 for those we want to perform this. */
2369 eliminate_temp_copies (class loop
*loop
, bitmap tmp_vars
)
2374 tree name
, use
, var
;
2377 e
= loop_latch_edge (loop
);
2378 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
2381 name
= PHI_RESULT (phi
);
2382 var
= SSA_NAME_VAR (name
);
2383 if (!var
|| !bitmap_bit_p (tmp_vars
, DECL_UID (var
)))
2385 use
= PHI_ARG_DEF_FROM_EDGE (phi
, e
);
2386 gcc_assert (TREE_CODE (use
) == SSA_NAME
);
2388 /* Base all the ssa names in the ud and du chain of NAME on VAR. */
2389 stmt
= SSA_NAME_DEF_STMT (use
);
2390 while (gimple_code (stmt
) == GIMPLE_PHI
2391 /* In case we could not unroll the loop enough to eliminate
2392 all copies, we may reach the loop header before the defining
2393 statement (in that case, some register copies will be present
2394 in loop latch in the final code, corresponding to the newly
2395 created looparound phi nodes). */
2396 && gimple_bb (stmt
) != loop
->header
)
2398 gcc_assert (single_pred_p (gimple_bb (stmt
)));
2399 use
= PHI_ARG_DEF (stmt
, 0);
2400 stmt
= SSA_NAME_DEF_STMT (use
);
2403 base_names_in_chain_on (loop
, use
, var
);
2407 /* Returns true if CHAIN is suitable to be combined. */
2410 chain_can_be_combined_p (chain_p chain
)
2412 return (!chain
->combined
2413 && (chain
->type
== CT_LOAD
|| chain
->type
== CT_COMBINATION
));
2416 /* Returns the modify statement that uses NAME. Skips over assignment
2417 statements, NAME is replaced with the actual name used in the returned
2421 find_use_stmt (tree
*name
)
2426 /* Skip over assignments. */
2429 stmt
= single_nonlooparound_use (*name
);
2433 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
2436 lhs
= gimple_assign_lhs (stmt
);
2437 if (TREE_CODE (lhs
) != SSA_NAME
)
2440 if (gimple_assign_copy_p (stmt
))
2442 rhs
= gimple_assign_rhs1 (stmt
);
2448 else if (get_gimple_rhs_class (gimple_assign_rhs_code (stmt
))
2449 == GIMPLE_BINARY_RHS
)
2456 /* Returns true if we may perform reassociation for operation CODE in TYPE. */
2459 may_reassociate_p (tree type
, enum tree_code code
)
2461 if (FLOAT_TYPE_P (type
)
2462 && !flag_unsafe_math_optimizations
)
2465 return (commutative_tree_code (code
)
2466 && associative_tree_code (code
));
2469 /* If the operation used in STMT is associative and commutative, go through the
2470 tree of the same operations and returns its root. Distance to the root
2471 is stored in DISTANCE. */
2474 find_associative_operation_root (gimple
*stmt
, unsigned *distance
)
2478 enum tree_code code
= gimple_assign_rhs_code (stmt
);
2479 tree type
= TREE_TYPE (gimple_assign_lhs (stmt
));
2482 if (!may_reassociate_p (type
, code
))
2487 lhs
= gimple_assign_lhs (stmt
);
2488 gcc_assert (TREE_CODE (lhs
) == SSA_NAME
);
2490 next
= find_use_stmt (&lhs
);
2492 || gimple_assign_rhs_code (next
) != code
)
2504 /* Returns the common statement in that NAME1 and NAME2 have a use. If there
2505 is no such statement, returns NULL_TREE. In case the operation used on
2506 NAME1 and NAME2 is associative and commutative, returns the root of the
2507 tree formed by this operation instead of the statement that uses NAME1 or
2511 find_common_use_stmt (tree
*name1
, tree
*name2
)
2513 gimple
*stmt1
, *stmt2
;
2515 stmt1
= find_use_stmt (name1
);
2519 stmt2
= find_use_stmt (name2
);
2526 stmt1
= find_associative_operation_root (stmt1
, NULL
);
2529 stmt2
= find_associative_operation_root (stmt2
, NULL
);
2533 return (stmt1
== stmt2
? stmt1
: NULL
);
2536 /* Checks whether R1 and R2 are combined together using CODE, with the result
2537 in RSLT_TYPE, in order R1 CODE R2 if SWAP is false and in order R2 CODE R1
2538 if it is true. If CODE is ERROR_MARK, set these values instead. */
2541 combinable_refs_p (dref r1
, dref r2
,
2542 enum tree_code
*code
, bool *swap
, tree
*rslt_type
)
2544 enum tree_code acode
;
2550 name1
= name_for_ref (r1
);
2551 name2
= name_for_ref (r2
);
2552 gcc_assert (name1
!= NULL_TREE
&& name2
!= NULL_TREE
);
2554 stmt
= find_common_use_stmt (&name1
, &name2
);
2557 /* A simple post-dominance check - make sure the combination
2558 is executed under the same condition as the references. */
2559 || (gimple_bb (stmt
) != gimple_bb (r1
->stmt
)
2560 && gimple_bb (stmt
) != gimple_bb (r2
->stmt
)))
2563 acode
= gimple_assign_rhs_code (stmt
);
2564 aswap
= (!commutative_tree_code (acode
)
2565 && gimple_assign_rhs1 (stmt
) != name1
);
2566 atype
= TREE_TYPE (gimple_assign_lhs (stmt
));
2568 if (*code
== ERROR_MARK
)
2576 return (*code
== acode
2578 && *rslt_type
== atype
);
2581 /* Remove OP from the operation on rhs of STMT, and replace STMT with
2582 an assignment of the remaining operand. */
2585 remove_name_from_operation (gimple
*stmt
, tree op
)
2588 gimple_stmt_iterator si
;
2590 gcc_assert (is_gimple_assign (stmt
));
2592 if (gimple_assign_rhs1 (stmt
) == op
)
2593 other_op
= gimple_assign_rhs2 (stmt
);
2595 other_op
= gimple_assign_rhs1 (stmt
);
2597 si
= gsi_for_stmt (stmt
);
2598 gimple_assign_set_rhs_from_tree (&si
, other_op
);
2600 /* We should not have reallocated STMT. */
2601 gcc_assert (gsi_stmt (si
) == stmt
);
2606 /* Reassociates the expression in that NAME1 and NAME2 are used so that they
2607 are combined in a single statement, and returns this statement. */
2610 reassociate_to_the_same_stmt (tree name1
, tree name2
)
2612 gimple
*stmt1
, *stmt2
, *root1
, *root2
, *s1
, *s2
;
2613 gassign
*new_stmt
, *tmp_stmt
;
2614 tree new_name
, tmp_name
, var
, r1
, r2
;
2615 unsigned dist1
, dist2
;
2616 enum tree_code code
;
2617 tree type
= TREE_TYPE (name1
);
2618 gimple_stmt_iterator bsi
;
2620 stmt1
= find_use_stmt (&name1
);
2621 stmt2
= find_use_stmt (&name2
);
2622 root1
= find_associative_operation_root (stmt1
, &dist1
);
2623 root2
= find_associative_operation_root (stmt2
, &dist2
);
2624 code
= gimple_assign_rhs_code (stmt1
);
2626 gcc_assert (root1
&& root2
&& root1
== root2
2627 && code
== gimple_assign_rhs_code (stmt2
));
2629 /* Find the root of the nearest expression in that both NAME1 and NAME2
2636 while (dist1
> dist2
)
2638 s1
= find_use_stmt (&r1
);
2639 r1
= gimple_assign_lhs (s1
);
2642 while (dist2
> dist1
)
2644 s2
= find_use_stmt (&r2
);
2645 r2
= gimple_assign_lhs (s2
);
2651 s1
= find_use_stmt (&r1
);
2652 r1
= gimple_assign_lhs (s1
);
2653 s2
= find_use_stmt (&r2
);
2654 r2
= gimple_assign_lhs (s2
);
2657 /* Remove NAME1 and NAME2 from the statements in that they are used
2659 remove_name_from_operation (stmt1
, name1
);
2660 remove_name_from_operation (stmt2
, name2
);
2662 /* Insert the new statement combining NAME1 and NAME2 before S1, and
2663 combine it with the rhs of S1. */
2664 var
= create_tmp_reg (type
, "predreastmp");
2665 new_name
= make_ssa_name (var
);
2666 new_stmt
= gimple_build_assign (new_name
, code
, name1
, name2
);
2668 var
= create_tmp_reg (type
, "predreastmp");
2669 tmp_name
= make_ssa_name (var
);
2671 /* Rhs of S1 may now be either a binary expression with operation
2672 CODE, or gimple_val (in case that stmt1 == s1 or stmt2 == s1,
2673 so that name1 or name2 was removed from it). */
2674 tmp_stmt
= gimple_build_assign (tmp_name
, gimple_assign_rhs_code (s1
),
2675 gimple_assign_rhs1 (s1
),
2676 gimple_assign_rhs2 (s1
));
2678 bsi
= gsi_for_stmt (s1
);
2679 gimple_assign_set_rhs_with_ops (&bsi
, code
, new_name
, tmp_name
);
2680 s1
= gsi_stmt (bsi
);
2683 gsi_insert_before (&bsi
, new_stmt
, GSI_SAME_STMT
);
2684 gsi_insert_before (&bsi
, tmp_stmt
, GSI_SAME_STMT
);
2689 /* Returns the statement that combines references R1 and R2. In case R1
2690 and R2 are not used in the same statement, but they are used with an
2691 associative and commutative operation in the same expression, reassociate
2692 the expression so that they are used in the same statement. */
2695 stmt_combining_refs (dref r1
, dref r2
)
2697 gimple
*stmt1
, *stmt2
;
2698 tree name1
= name_for_ref (r1
);
2699 tree name2
= name_for_ref (r2
);
2701 stmt1
= find_use_stmt (&name1
);
2702 stmt2
= find_use_stmt (&name2
);
2706 return reassociate_to_the_same_stmt (name1
, name2
);
2709 /* Tries to combine chains CH1 and CH2 together. If this succeeds, the
2710 description of the new chain is returned, otherwise we return NULL. */
2713 combine_chains (chain_p ch1
, chain_p ch2
)
2716 enum tree_code op
= ERROR_MARK
;
2720 tree rslt_type
= NULL_TREE
;
2724 if (ch1
->length
!= ch2
->length
)
2727 if (ch1
->refs
.length () != ch2
->refs
.length ())
2730 for (i
= 0; (ch1
->refs
.iterate (i
, &r1
)
2731 && ch2
->refs
.iterate (i
, &r2
)); i
++)
2733 if (r1
->distance
!= r2
->distance
)
2736 if (!combinable_refs_p (r1
, r2
, &op
, &swap
, &rslt_type
))
2741 std::swap (ch1
, ch2
);
2743 new_chain
= XCNEW (struct chain
);
2744 new_chain
->type
= CT_COMBINATION
;
2746 new_chain
->ch1
= ch1
;
2747 new_chain
->ch2
= ch2
;
2748 new_chain
->rslt_type
= rslt_type
;
2749 new_chain
->length
= ch1
->length
;
2751 for (i
= 0; (ch1
->refs
.iterate (i
, &r1
)
2752 && ch2
->refs
.iterate (i
, &r2
)); i
++)
2754 nw
= XCNEW (class dref_d
);
2755 nw
->stmt
= stmt_combining_refs (r1
, r2
);
2756 nw
->distance
= r1
->distance
;
2758 new_chain
->refs
.safe_push (nw
);
2761 ch1
->combined
= true;
2762 ch2
->combined
= true;
2766 /* Recursively update position information of all offspring chains to ROOT
2767 chain's position information. */
2770 update_pos_for_combined_chains (chain_p root
)
2772 chain_p ch1
= root
->ch1
, ch2
= root
->ch2
;
2773 dref ref
, ref1
, ref2
;
2774 for (unsigned j
= 0; (root
->refs
.iterate (j
, &ref
)
2775 && ch1
->refs
.iterate (j
, &ref1
)
2776 && ch2
->refs
.iterate (j
, &ref2
)); ++j
)
2777 ref1
->pos
= ref2
->pos
= ref
->pos
;
2779 if (ch1
->type
== CT_COMBINATION
)
2780 update_pos_for_combined_chains (ch1
);
2781 if (ch2
->type
== CT_COMBINATION
)
2782 update_pos_for_combined_chains (ch2
);
2785 /* Returns true if statement S1 dominates statement S2. */
2788 pcom_stmt_dominates_stmt_p (gimple
*s1
, gimple
*s2
)
2790 basic_block bb1
= gimple_bb (s1
), bb2
= gimple_bb (s2
);
2792 if (!bb1
|| s1
== s2
)
2796 return gimple_uid (s1
) < gimple_uid (s2
);
2798 return dominated_by_p (CDI_DOMINATORS
, bb2
, bb1
);
2801 /* Try to combine the CHAINS in LOOP. */
2804 try_combine_chains (class loop
*loop
, vec
<chain_p
> *chains
)
2807 chain_p ch1
, ch2
, cch
;
2808 auto_vec
<chain_p
> worklist
;
2809 bool combined_p
= false;
2811 FOR_EACH_VEC_ELT (*chains
, i
, ch1
)
2812 if (chain_can_be_combined_p (ch1
))
2813 worklist
.safe_push (ch1
);
2815 while (!worklist
.is_empty ())
2817 ch1
= worklist
.pop ();
2818 if (!chain_can_be_combined_p (ch1
))
2821 FOR_EACH_VEC_ELT (*chains
, j
, ch2
)
2823 if (!chain_can_be_combined_p (ch2
))
2826 cch
= combine_chains (ch1
, ch2
);
2829 worklist
.safe_push (cch
);
2830 chains
->safe_push (cch
);
2839 /* Setup UID for all statements in dominance order. */
2840 basic_block
*bbs
= get_loop_body_in_dom_order (loop
);
2841 renumber_gimple_stmt_uids_in_blocks (bbs
, loop
->num_nodes
);
2844 /* Re-association in combined chains may generate statements different to
2845 order of references of the original chain. We need to keep references
2846 of combined chain in dominance order so that all uses will be inserted
2847 after definitions. Note:
2848 A) This is necessary for all combined chains.
2849 B) This is only necessary for ZERO distance references because other
2850 references inherit value from loop carried PHIs.
2852 We first update position information for all combined chains. */
2854 for (i
= 0; chains
->iterate (i
, &ch1
); ++i
)
2856 if (ch1
->type
!= CT_COMBINATION
|| ch1
->combined
)
2859 for (j
= 0; ch1
->refs
.iterate (j
, &ref
); ++j
)
2860 ref
->pos
= gimple_uid (ref
->stmt
);
2862 update_pos_for_combined_chains (ch1
);
2864 /* Then sort references according to newly updated position information. */
2865 for (i
= 0; chains
->iterate (i
, &ch1
); ++i
)
2867 if (ch1
->type
!= CT_COMBINATION
&& !ch1
->combined
)
2870 /* Find the first reference with non-ZERO distance. */
2871 if (ch1
->length
== 0)
2872 j
= ch1
->refs
.length();
2875 for (j
= 0; ch1
->refs
.iterate (j
, &ref
); ++j
)
2876 if (ref
->distance
!= 0)
2880 /* Sort all ZERO distance references by position. */
2881 qsort (&ch1
->refs
[0], j
, sizeof (ch1
->refs
[0]), order_drefs_by_pos
);
2886 /* For ZERO length chain, has_max_use_after must be true since root
2887 combined stmt must dominates others. */
2888 if (ch1
->length
== 0)
2890 ch1
->has_max_use_after
= true;
2893 /* Check if there is use at max distance after root for combined chains
2894 and set flag accordingly. */
2895 ch1
->has_max_use_after
= false;
2896 gimple
*root_stmt
= get_chain_root (ch1
)->stmt
;
2897 for (j
= 1; ch1
->refs
.iterate (j
, &ref
); ++j
)
2899 if (ref
->distance
== ch1
->length
2900 && !pcom_stmt_dominates_stmt_p (ref
->stmt
, root_stmt
))
2902 ch1
->has_max_use_after
= true;
2909 /* Prepare initializers for store elimination CHAIN in LOOP. Returns false
2910 if this is impossible because one of these initializers may trap, true
2914 prepare_initializers_chain_store_elim (class loop
*loop
, chain_p chain
)
2916 unsigned i
, n
= chain
->length
;
2918 /* For now we can't eliminate stores if some of them are conditional
2920 if (!chain
->all_always_accessed
)
2923 /* Nothing to intialize for intra-iteration store elimination. */
2924 if (n
== 0 && chain
->type
== CT_STORE_STORE
)
2927 /* For store elimination chain, there is nothing to initialize if stores
2928 to be eliminated only store loop invariant values into memory. */
2929 if (chain
->type
== CT_STORE_STORE
2930 && is_inv_store_elimination_chain (loop
, chain
))
2932 chain
->inv_store_elimination
= true;
2936 chain
->inits
.create (n
);
2937 chain
->inits
.safe_grow_cleared (n
);
2939 /* For store eliminatin chain like below:
2941 for (i = 0; i < len; i++)
2948 store to a[i + 1] is missed in loop body, it acts like bubbles. The
2949 content of a[i + 1] remain the same if the loop iterates fewer times
2950 than chain->length. We need to set up root variables for such stores
2951 by loading from memory before loop. Note we only need to load bubble
2952 elements because loop body is guaranteed to be executed at least once
2953 after loop's preheader edge. */
2954 auto_vec
<bool> bubbles
;
2955 bubbles
.safe_grow_cleared (n
+ 1);
2956 for (i
= 0; i
< chain
->refs
.length (); i
++)
2957 bubbles
[chain
->refs
[i
]->distance
] = true;
2959 struct data_reference
*dr
= get_chain_root (chain
)->ref
;
2960 for (i
= 0; i
< n
; i
++)
2965 gimple_seq stmts
= NULL
;
2967 tree init
= ref_at_iteration (dr
, (int) 0 - i
, &stmts
);
2969 gimple_seq_add_seq_without_update (&chain
->init_seq
, stmts
);
2971 chain
->inits
[i
] = init
;
2977 /* Prepare initializers for CHAIN in LOOP. Returns false if this is
2978 impossible because one of these initializers may trap, true otherwise. */
2981 prepare_initializers_chain (class loop
*loop
, chain_p chain
)
2983 unsigned i
, n
= (chain
->type
== CT_INVARIANT
) ? 1 : chain
->length
;
2984 struct data_reference
*dr
= get_chain_root (chain
)->ref
;
2987 edge entry
= loop_preheader_edge (loop
);
2989 if (chain
->type
== CT_STORE_STORE
)
2990 return prepare_initializers_chain_store_elim (loop
, chain
);
2992 /* Find the initializers for the variables, and check that they cannot
2994 chain
->inits
.create (n
);
2995 for (i
= 0; i
< n
; i
++)
2996 chain
->inits
.quick_push (NULL_TREE
);
2998 /* If we have replaced some looparound phi nodes, use their initializers
2999 instead of creating our own. */
3000 FOR_EACH_VEC_ELT (chain
->refs
, i
, laref
)
3002 if (gimple_code (laref
->stmt
) != GIMPLE_PHI
)
3005 gcc_assert (laref
->distance
> 0);
3006 chain
->inits
[n
- laref
->distance
]
3007 = PHI_ARG_DEF_FROM_EDGE (laref
->stmt
, entry
);
3010 for (i
= 0; i
< n
; i
++)
3012 gimple_seq stmts
= NULL
;
3014 if (chain
->inits
[i
] != NULL_TREE
)
3017 init
= ref_at_iteration (dr
, (int) i
- n
, &stmts
);
3018 if (!chain
->all_always_accessed
&& tree_could_trap_p (init
))
3020 gimple_seq_discard (stmts
);
3025 gimple_seq_add_seq_without_update (&chain
->init_seq
, stmts
);
3027 chain
->inits
[i
] = init
;
3033 /* Prepare initializers for CHAINS in LOOP, and free chains that cannot
3034 be used because the initializers might trap. */
3037 prepare_initializers (class loop
*loop
, vec
<chain_p
> chains
)
3042 for (i
= 0; i
< chains
.length (); )
3045 if (prepare_initializers_chain (loop
, chain
))
3049 release_chain (chain
);
3050 chains
.unordered_remove (i
);
3055 /* Generates finalizer memory references for CHAIN in LOOP. Returns true
3056 if finalizer code for CHAIN can be generated, otherwise false. */
3059 prepare_finalizers_chain (class loop
*loop
, chain_p chain
)
3061 unsigned i
, n
= chain
->length
;
3062 struct data_reference
*dr
= get_chain_root (chain
)->ref
;
3063 tree fini
, niters
= number_of_latch_executions (loop
);
3065 /* For now we can't eliminate stores if some of them are conditional
3067 if (!chain
->all_always_accessed
)
3070 chain
->finis
.create (n
);
3071 for (i
= 0; i
< n
; i
++)
3072 chain
->finis
.quick_push (NULL_TREE
);
3074 /* We never use looparound phi node for store elimination chains. */
3076 /* Find the finalizers for the variables, and check that they cannot
3078 for (i
= 0; i
< n
; i
++)
3080 gimple_seq stmts
= NULL
;
3081 gcc_assert (chain
->finis
[i
] == NULL_TREE
);
3083 if (TREE_CODE (niters
) != INTEGER_CST
&& TREE_CODE (niters
) != SSA_NAME
)
3085 niters
= unshare_expr (niters
);
3086 niters
= force_gimple_operand (niters
, &stmts
, true, NULL
);
3089 gimple_seq_add_seq_without_update (&chain
->fini_seq
, stmts
);
3093 fini
= ref_at_iteration (dr
, (int) 0 - i
, &stmts
, niters
);
3095 gimple_seq_add_seq_without_update (&chain
->fini_seq
, stmts
);
3097 chain
->finis
[i
] = fini
;
3103 /* Generates finalizer memory reference for CHAINS in LOOP. Returns true
3104 if finalizer code generation for CHAINS breaks loop closed ssa form. */
3107 prepare_finalizers (class loop
*loop
, vec
<chain_p
> chains
)
3111 bool loop_closed_ssa
= false;
3113 for (i
= 0; i
< chains
.length ();)
3117 /* Finalizer is only necessary for inter-iteration store elimination
3119 if (chain
->length
== 0 || chain
->type
!= CT_STORE_STORE
)
3125 if (prepare_finalizers_chain (loop
, chain
))
3128 /* Be conservative, assume loop closed ssa form is corrupted
3129 by store-store chain. Though it's not always the case if
3130 eliminated stores only store loop invariant values into
3132 loop_closed_ssa
= true;
3136 release_chain (chain
);
3137 chains
.unordered_remove (i
);
3140 return loop_closed_ssa
;
3143 /* Insert all initializing gimple stmts into loop's entry edge. */
3146 insert_init_seqs (class loop
*loop
, vec
<chain_p
> chains
)
3149 edge entry
= loop_preheader_edge (loop
);
3151 for (i
= 0; i
< chains
.length (); ++i
)
3152 if (chains
[i
]->init_seq
)
3154 gsi_insert_seq_on_edge_immediate (entry
, chains
[i
]->init_seq
);
3155 chains
[i
]->init_seq
= NULL
;
3159 /* Performs predictive commoning for LOOP. Sets bit 1<<0 of return value
3160 if LOOP was unrolled; Sets bit 1<<1 of return value if loop closed ssa
3161 form was corrupted. */
3164 tree_predictive_commoning_loop (class loop
*loop
)
3166 vec
<data_reference_p
> datarefs
;
3167 vec
<ddr_p
> dependences
;
3168 struct component
*components
;
3169 vec
<chain_p
> chains
= vNULL
;
3170 unsigned unroll_factor
;
3171 class tree_niter_desc desc
;
3172 bool unroll
= false, loop_closed_ssa
= false;
3175 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3176 fprintf (dump_file
, "Processing loop %d\n", loop
->num
);
3178 /* Nothing for predicitive commoning if loop only iterates 1 time. */
3179 if (get_max_loop_iterations_int (loop
) == 0)
3181 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3182 fprintf (dump_file
, "Loop iterates only 1 time, nothing to do.\n");
3187 /* Find the data references and split them into components according to their
3188 dependence relations. */
3189 auto_vec
<loop_p
, 3> loop_nest
;
3190 dependences
.create (10);
3191 datarefs
.create (10);
3192 if (! compute_data_dependences_for_loop (loop
, true, &loop_nest
, &datarefs
,
3195 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3196 fprintf (dump_file
, "Cannot analyze data dependencies\n");
3197 free_data_refs (datarefs
);
3198 free_dependence_relations (dependences
);
3202 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3203 dump_data_dependence_relations (dump_file
, dependences
);
3205 components
= split_data_refs_to_components (loop
, datarefs
, dependences
);
3206 loop_nest
.release ();
3207 free_dependence_relations (dependences
);
3210 free_data_refs (datarefs
);
3211 free_affine_expand_cache (&name_expansions
);
3215 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3217 fprintf (dump_file
, "Initial state:\n\n");
3218 dump_components (dump_file
, components
);
3221 /* Find the suitable components and split them into chains. */
3222 components
= filter_suitable_components (loop
, components
);
3224 auto_bitmap tmp_vars
;
3225 looparound_phis
= BITMAP_ALLOC (NULL
);
3226 determine_roots (loop
, components
, &chains
);
3227 release_components (components
);
3229 if (!chains
.exists ())
3231 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3233 "Predictive commoning failed: no suitable chains\n");
3236 prepare_initializers (loop
, chains
);
3237 loop_closed_ssa
= prepare_finalizers (loop
, chains
);
3239 /* Try to combine the chains that are always worked with together. */
3240 try_combine_chains (loop
, &chains
);
3242 insert_init_seqs (loop
, chains
);
3244 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3246 fprintf (dump_file
, "Before commoning:\n\n");
3247 dump_chains (dump_file
, chains
);
3250 /* Determine the unroll factor, and if the loop should be unrolled, ensure
3251 that its number of iterations is divisible by the factor. */
3252 unroll_factor
= determine_unroll_factor (chains
);
3254 unroll
= (unroll_factor
> 1
3255 && can_unroll_loop_p (loop
, unroll_factor
, &desc
));
3256 exit
= single_dom_exit (loop
);
3258 /* Execute the predictive commoning transformations, and possibly unroll the
3262 struct epcc_data dta
;
3264 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3265 fprintf (dump_file
, "Unrolling %u times.\n", unroll_factor
);
3267 dta
.chains
= chains
;
3268 dta
.tmp_vars
= tmp_vars
;
3270 update_ssa (TODO_update_ssa_only_virtuals
);
3272 /* Cfg manipulations performed in tree_transform_and_unroll_loop before
3273 execute_pred_commoning_cbck is called may cause phi nodes to be
3274 reallocated, which is a problem since CHAINS may point to these
3275 statements. To fix this, we store the ssa names defined by the
3276 phi nodes here instead of the phi nodes themselves, and restore
3277 the phi nodes in execute_pred_commoning_cbck. A bit hacky. */
3278 replace_phis_by_defined_names (chains
);
3280 tree_transform_and_unroll_loop (loop
, unroll_factor
, exit
, &desc
,
3281 execute_pred_commoning_cbck
, &dta
);
3282 eliminate_temp_copies (loop
, tmp_vars
);
3286 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3288 "Executing predictive commoning without unrolling.\n");
3289 execute_pred_commoning (loop
, chains
, tmp_vars
);
3293 release_chains (chains
);
3294 free_data_refs (datarefs
);
3295 BITMAP_FREE (looparound_phis
);
3297 free_affine_expand_cache (&name_expansions
);
3299 return (unroll
? 1 : 0) | (loop_closed_ssa
? 2 : 0);
3302 /* Runs predictive commoning. */
3305 tree_predictive_commoning (void)
3308 unsigned ret
= 0, changed
= 0;
3310 initialize_original_copy_tables ();
3311 FOR_EACH_LOOP (loop
, LI_ONLY_INNERMOST
)
3312 if (optimize_loop_for_speed_p (loop
))
3314 changed
|= tree_predictive_commoning_loop (loop
);
3316 free_original_copy_tables ();
3323 rewrite_into_loop_closed_ssa (NULL
, TODO_update_ssa
);
3325 ret
= TODO_cleanup_cfg
;
3331 /* Predictive commoning Pass. */
3334 run_tree_predictive_commoning (struct function
*fun
)
3336 if (number_of_loops (fun
) <= 1)
3339 return tree_predictive_commoning ();
3344 const pass_data pass_data_predcom
=
3346 GIMPLE_PASS
, /* type */
3348 OPTGROUP_LOOP
, /* optinfo_flags */
3349 TV_PREDCOM
, /* tv_id */
3350 PROP_cfg
, /* properties_required */
3351 0, /* properties_provided */
3352 0, /* properties_destroyed */
3353 0, /* todo_flags_start */
3354 TODO_update_ssa_only_virtuals
, /* todo_flags_finish */
3357 class pass_predcom
: public gimple_opt_pass
3360 pass_predcom (gcc::context
*ctxt
)
3361 : gimple_opt_pass (pass_data_predcom
, ctxt
)
3364 /* opt_pass methods: */
3365 virtual bool gate (function
*) { return flag_predictive_commoning
!= 0; }
3366 virtual unsigned int execute (function
*fun
)
3368 return run_tree_predictive_commoning (fun
);
3371 }; // class pass_predcom
3376 make_pass_predcom (gcc::context
*ctxt
)
3378 return new pass_predcom (ctxt
);