1 /* Predictive commoning.
2 Copyright (C) 2005-2017 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This file implements the predictive commoning optimization. Predictive
21 commoning can be viewed as CSE around a loop, and with some improvements,
22 as generalized strength reduction-- i.e., reusing values computed in
23 earlier iterations of a loop in the later ones. So far, the pass only
24 handles the most useful case, that is, reusing values of memory references.
25 If you think this is all just a special case of PRE, you are sort of right;
26 however, concentrating on loops is simpler, and makes it possible to
27 incorporate data dependence analysis to detect the opportunities, perform
28 loop unrolling to avoid copies together with renaming immediately,
29 and if needed, we could also take register pressure into account.
31 Let us demonstrate what is done on an example:
33 for (i = 0; i < 100; i++)
35 a[i+2] = a[i] + a[i+1];
41 1) We find data references in the loop, and split them to mutually
42 independent groups (i.e., we find components of a data dependence
43 graph). We ignore read-read dependences whose distance is not constant.
44 (TODO -- we could also ignore antidependences). In this example, we
45 find the following groups:
47 a[i]{read}, a[i+1]{read}, a[i+2]{write}
48 b[10]{read}, b[10]{write}
49 c[99 - i]{read}, c[i]{write}
50 d[i + 1]{read}, d[i]{write}
52 2) Inside each of the group, we verify several conditions:
53 a) all the references must differ in indices only, and the indices
54 must all have the same step
55 b) the references must dominate loop latch (and thus, they must be
56 ordered by dominance relation).
57 c) the distance of the indices must be a small multiple of the step
58 We are then able to compute the difference of the references (# of
59 iterations before they point to the same place as the first of them).
60 Also, in case there are writes in the loop, we split the groups into
61 chains whose head is the write whose values are used by the reads in
62 the same chain. The chains are then processed independently,
63 making the further transformations simpler. Also, the shorter chains
64 need the same number of registers, but may require lower unrolling
65 factor in order to get rid of the copies on the loop latch.
67 In our example, we get the following chains (the chain for c is invalid).
69 a[i]{read,+0}, a[i+1]{read,-1}, a[i+2]{write,-2}
70 b[10]{read,+0}, b[10]{write,+0}
71 d[i + 1]{read,+0}, d[i]{write,+1}
73 3) For each read, we determine the read or write whose value it reuses,
74 together with the distance of this reuse. I.e. we take the last
75 reference before it with distance 0, or the last of the references
76 with the smallest positive distance to the read. Then, we remove
77 the references that are not used in any of these chains, discard the
78 empty groups, and propagate all the links so that they point to the
79 single root reference of the chain (adjusting their distance
80 appropriately). Some extra care needs to be taken for references with
81 step 0. In our example (the numbers indicate the distance of the
84 a[i] --> (*) 2, a[i+1] --> (*) 1, a[i+2] (*)
85 b[10] --> (*) 1, b[10] (*)
87 4) The chains are combined together if possible. If the corresponding
88 elements of two chains are always combined together with the same
89 operator, we remember just the result of this combination, instead
90 of remembering the values separately. We may need to perform
91 reassociation to enable combining, for example
93 e[i] + f[i+1] + e[i+1] + f[i]
95 can be reassociated as
97 (e[i] + f[i]) + (e[i+1] + f[i+1])
99 and we can combine the chains for e and f into one chain.
101 5) For each root reference (end of the chain) R, let N be maximum distance
102 of a reference reusing its value. Variables R0 up to RN are created,
103 together with phi nodes that transfer values from R1 .. RN to
105 Initial values are loaded to R0..R(N-1) (in case not all references
106 must necessarily be accessed and they may trap, we may fail here;
107 TODO sometimes, the loads could be guarded by a check for the number
108 of iterations). Values loaded/stored in roots are also copied to
109 RN. Other reads are replaced with the appropriate variable Ri.
110 Everything is put to SSA form.
112 As a small improvement, if R0 is dead after the root (i.e., all uses of
113 the value with the maximum distance dominate the root), we can avoid
114 creating RN and use R0 instead of it.
116 In our example, we get (only the parts concerning a and b are shown):
117 for (i = 0; i < 100; i++)
129 6) Factor F for unrolling is determined as the smallest common multiple of
130 (N + 1) for each root reference (N for references for that we avoided
131 creating RN). If F and the loop is small enough, loop is unrolled F
132 times. The stores to RN (R0) in the copies of the loop body are
133 periodically replaced with R0, R1, ... (R1, R2, ...), so that they can
134 be coalesced and the copies can be eliminated.
136 TODO -- copy propagation and other optimizations may change the live
137 ranges of the temporary registers and prevent them from being coalesced;
138 this may increase the register pressure.
140 In our case, F = 2 and the (main loop of the) result is
142 for (i = 0; i < ...; i += 2)
159 Apart from predictive commoning on Load-Load and Store-Load chains, we
160 also support Store-Store chains -- stores killed by other store can be
161 eliminated. Given below example:
163 for (i = 0; i < n; i++)
169 It can be replaced with:
173 for (i = 0; i < n; i++)
183 If the loop runs more than 1 iterations, it can be further simplified into:
185 for (i = 0; i < n; i++)
192 The interesting part is this can be viewed either as general store motion
193 or general dead store elimination in either intra/inter-iterations way.
195 TODO: For now, we don't support store-store chains in multi-exit loops. We
196 force to not unroll in case of store-store chain even if other chains might
199 Predictive commoning can be generalized for arbitrary computations (not
200 just memory loads), and also nontrivial transfer functions (e.g., replacing
201 i * i with ii_last + 2 * i + 1), to generalize strength reduction. */
205 #include "coretypes.h"
211 #include "tree-pass.h"
213 #include "gimple-pretty-print.h"
215 #include "fold-const.h"
218 #include "gimplify.h"
219 #include "gimple-iterator.h"
220 #include "gimplify-me.h"
221 #include "tree-ssa-loop-ivopts.h"
222 #include "tree-ssa-loop-manip.h"
223 #include "tree-ssa-loop-niter.h"
224 #include "tree-ssa-loop.h"
225 #include "tree-into-ssa.h"
226 #include "tree-dfa.h"
227 #include "tree-ssa.h"
228 #include "tree-data-ref.h"
229 #include "tree-scalar-evolution.h"
231 #include "tree-affine.h"
232 #include "builtins.h"
234 /* The maximum number of iterations between the considered memory
237 #define MAX_DISTANCE (target_avail_regs < 16 ? 4 : 8)
239 /* Data references (or phi nodes that carry data reference values across
242 typedef struct dref_d
244 /* The reference itself. */
245 struct data_reference
*ref
;
247 /* The statement in that the reference appears. */
250 /* In case that STMT is a phi node, this field is set to the SSA name
251 defined by it in replace_phis_by_defined_names (in order to avoid
252 pointing to phi node that got reallocated in the meantime). */
253 tree name_defined_by_phi
;
255 /* Distance of the reference from the root of the chain (in number of
256 iterations of the loop). */
259 /* Number of iterations offset from the first reference in the component. */
262 /* Number of the reference in a component, in dominance ordering. */
265 /* True if the memory reference is always accessed when the loop is
267 unsigned always_accessed
: 1;
271 /* Type of the chain of the references. */
275 /* The addresses of the references in the chain are constant. */
278 /* There are only loads in the chain. */
281 /* Root of the chain is store, the rest are loads. */
284 /* There are only stores in the chain. */
287 /* A combination of two chains. */
291 /* Chains of data references. */
295 /* Type of the chain. */
296 enum chain_type type
;
298 /* For combination chains, the operator and the two chains that are
299 combined, and the type of the result. */
302 struct chain
*ch1
, *ch2
;
304 /* The references in the chain. */
307 /* The maximum distance of the reference in the chain from the root. */
310 /* The variables used to copy the value throughout iterations. */
313 /* Initializers for the variables. */
316 /* Finalizers for the eliminated stores. */
319 /* gimple stmts intializing the initial variables of the chain. */
322 /* gimple stmts finalizing the eliminated stores of the chain. */
325 /* True if there is a use of a variable with the maximal distance
326 that comes after the root in the loop. */
327 unsigned has_max_use_after
: 1;
329 /* True if all the memory references in the chain are always accessed. */
330 unsigned all_always_accessed
: 1;
332 /* True if this chain was combined together with some other chain. */
333 unsigned combined
: 1;
335 /* True if this is store elimination chain and eliminated stores store
336 loop invariant value into memory. */
337 unsigned inv_store_elimination
: 1;
341 /* Describes the knowledge about the step of the memory references in
346 /* The step is zero. */
349 /* The step is nonzero. */
352 /* The step may or may not be nonzero. */
356 /* Components of the data dependence graph. */
360 /* The references in the component. */
363 /* What we know about the step of the references in the component. */
364 enum ref_step_type comp_step
;
366 /* True if all references in component are stores and we try to do
367 intra/inter loop iteration dead store elimination. */
368 bool eliminate_store_p
;
370 /* Next component in the list. */
371 struct component
*next
;
374 /* Bitmap of ssa names defined by looparound phi nodes covered by chains. */
376 static bitmap looparound_phis
;
378 /* Cache used by tree_to_aff_combination_expand. */
380 static hash_map
<tree
, name_expansion
*> *name_expansions
;
382 /* Dumps data reference REF to FILE. */
384 extern void dump_dref (FILE *, dref
);
386 dump_dref (FILE *file
, dref ref
)
391 print_generic_expr (file
, DR_REF (ref
->ref
), TDF_SLIM
);
392 fprintf (file
, " (id %u%s)\n", ref
->pos
,
393 DR_IS_READ (ref
->ref
) ? "" : ", write");
395 fprintf (file
, " offset ");
396 print_decs (ref
->offset
, file
);
397 fprintf (file
, "\n");
399 fprintf (file
, " distance %u\n", ref
->distance
);
403 if (gimple_code (ref
->stmt
) == GIMPLE_PHI
)
404 fprintf (file
, " looparound ref\n");
406 fprintf (file
, " combination ref\n");
407 fprintf (file
, " in statement ");
408 print_gimple_stmt (file
, ref
->stmt
, 0, TDF_SLIM
);
409 fprintf (file
, "\n");
410 fprintf (file
, " distance %u\n", ref
->distance
);
415 /* Dumps CHAIN to FILE. */
417 extern void dump_chain (FILE *, chain_p
);
419 dump_chain (FILE *file
, chain_p chain
)
422 const char *chain_type
;
429 chain_type
= "Load motion";
433 chain_type
= "Loads-only";
437 chain_type
= "Store-loads";
441 chain_type
= "Store-stores";
445 chain_type
= "Combination";
452 fprintf (file
, "%s chain %p%s\n", chain_type
, (void *) chain
,
453 chain
->combined
? " (combined)" : "");
454 if (chain
->type
!= CT_INVARIANT
)
455 fprintf (file
, " max distance %u%s\n", chain
->length
,
456 chain
->has_max_use_after
? "" : ", may reuse first");
458 if (chain
->type
== CT_COMBINATION
)
460 fprintf (file
, " equal to %p %s %p in type ",
461 (void *) chain
->ch1
, op_symbol_code (chain
->op
),
462 (void *) chain
->ch2
);
463 print_generic_expr (file
, chain
->rslt_type
, TDF_SLIM
);
464 fprintf (file
, "\n");
467 if (chain
->vars
.exists ())
469 fprintf (file
, " vars");
470 FOR_EACH_VEC_ELT (chain
->vars
, i
, var
)
473 print_generic_expr (file
, var
, TDF_SLIM
);
475 fprintf (file
, "\n");
478 if (chain
->inits
.exists ())
480 fprintf (file
, " inits");
481 FOR_EACH_VEC_ELT (chain
->inits
, i
, var
)
484 print_generic_expr (file
, var
, TDF_SLIM
);
486 fprintf (file
, "\n");
489 fprintf (file
, " references:\n");
490 FOR_EACH_VEC_ELT (chain
->refs
, i
, a
)
493 fprintf (file
, "\n");
496 /* Dumps CHAINS to FILE. */
498 extern void dump_chains (FILE *, vec
<chain_p
> );
500 dump_chains (FILE *file
, vec
<chain_p
> chains
)
505 FOR_EACH_VEC_ELT (chains
, i
, chain
)
506 dump_chain (file
, chain
);
509 /* Dumps COMP to FILE. */
511 extern void dump_component (FILE *, struct component
*);
513 dump_component (FILE *file
, struct component
*comp
)
518 fprintf (file
, "Component%s:\n",
519 comp
->comp_step
== RS_INVARIANT
? " (invariant)" : "");
520 FOR_EACH_VEC_ELT (comp
->refs
, i
, a
)
522 fprintf (file
, "\n");
525 /* Dumps COMPS to FILE. */
527 extern void dump_components (FILE *, struct component
*);
529 dump_components (FILE *file
, struct component
*comps
)
531 struct component
*comp
;
533 for (comp
= comps
; comp
; comp
= comp
->next
)
534 dump_component (file
, comp
);
537 /* Frees a chain CHAIN. */
540 release_chain (chain_p chain
)
548 FOR_EACH_VEC_ELT (chain
->refs
, i
, ref
)
551 chain
->refs
.release ();
552 chain
->vars
.release ();
553 chain
->inits
.release ();
555 gimple_seq_discard (chain
->init_seq
);
557 chain
->finis
.release ();
559 gimple_seq_discard (chain
->fini_seq
);
567 release_chains (vec
<chain_p
> chains
)
572 FOR_EACH_VEC_ELT (chains
, i
, chain
)
573 release_chain (chain
);
577 /* Frees a component COMP. */
580 release_component (struct component
*comp
)
582 comp
->refs
.release ();
586 /* Frees list of components COMPS. */
589 release_components (struct component
*comps
)
591 struct component
*act
, *next
;
593 for (act
= comps
; act
; act
= next
)
596 release_component (act
);
600 /* Finds a root of tree given by FATHERS containing A, and performs path
604 component_of (unsigned fathers
[], unsigned a
)
608 for (root
= a
; root
!= fathers
[root
]; root
= fathers
[root
])
611 for (; a
!= root
; a
= n
)
620 /* Join operation for DFU. FATHERS gives the tree, SIZES are sizes of the
621 components, A and B are components to merge. */
624 merge_comps (unsigned fathers
[], unsigned sizes
[], unsigned a
, unsigned b
)
626 unsigned ca
= component_of (fathers
, a
);
627 unsigned cb
= component_of (fathers
, b
);
632 if (sizes
[ca
] < sizes
[cb
])
634 sizes
[cb
] += sizes
[ca
];
639 sizes
[ca
] += sizes
[cb
];
644 /* Returns true if A is a reference that is suitable for predictive commoning
645 in the innermost loop that contains it. REF_STEP is set according to the
646 step of the reference A. */
649 suitable_reference_p (struct data_reference
*a
, enum ref_step_type
*ref_step
)
651 tree ref
= DR_REF (a
), step
= DR_STEP (a
);
654 || TREE_THIS_VOLATILE (ref
)
655 || !is_gimple_reg_type (TREE_TYPE (ref
))
656 || tree_could_throw_p (ref
))
659 if (integer_zerop (step
))
660 *ref_step
= RS_INVARIANT
;
661 else if (integer_nonzerop (step
))
662 *ref_step
= RS_NONZERO
;
669 /* Stores DR_OFFSET (DR) + DR_INIT (DR) to OFFSET. */
672 aff_combination_dr_offset (struct data_reference
*dr
, aff_tree
*offset
)
674 tree type
= TREE_TYPE (DR_OFFSET (dr
));
677 tree_to_aff_combination_expand (DR_OFFSET (dr
), type
, offset
,
679 aff_combination_const (&delta
, type
, wi::to_widest (DR_INIT (dr
)));
680 aff_combination_add (offset
, &delta
);
683 /* Determines number of iterations of the innermost enclosing loop before B
684 refers to exactly the same location as A and stores it to OFF. If A and
685 B do not have the same step, they never meet, or anything else fails,
686 returns false, otherwise returns true. Both A and B are assumed to
687 satisfy suitable_reference_p. */
690 determine_offset (struct data_reference
*a
, struct data_reference
*b
,
693 aff_tree diff
, baseb
, step
;
696 /* Check that both the references access the location in the same type. */
697 typea
= TREE_TYPE (DR_REF (a
));
698 typeb
= TREE_TYPE (DR_REF (b
));
699 if (!useless_type_conversion_p (typeb
, typea
))
702 /* Check whether the base address and the step of both references is the
704 if (!operand_equal_p (DR_STEP (a
), DR_STEP (b
), 0)
705 || !operand_equal_p (DR_BASE_ADDRESS (a
), DR_BASE_ADDRESS (b
), 0))
708 if (integer_zerop (DR_STEP (a
)))
710 /* If the references have loop invariant address, check that they access
711 exactly the same location. */
713 return (operand_equal_p (DR_OFFSET (a
), DR_OFFSET (b
), 0)
714 && operand_equal_p (DR_INIT (a
), DR_INIT (b
), 0));
717 /* Compare the offsets of the addresses, and check whether the difference
718 is a multiple of step. */
719 aff_combination_dr_offset (a
, &diff
);
720 aff_combination_dr_offset (b
, &baseb
);
721 aff_combination_scale (&baseb
, -1);
722 aff_combination_add (&diff
, &baseb
);
724 tree_to_aff_combination_expand (DR_STEP (a
), TREE_TYPE (DR_STEP (a
)),
725 &step
, &name_expansions
);
726 return aff_combination_constant_multiple_p (&diff
, &step
, off
);
729 /* Returns the last basic block in LOOP for that we are sure that
730 it is executed whenever the loop is entered. */
733 last_always_executed_block (struct loop
*loop
)
736 vec
<edge
> exits
= get_loop_exit_edges (loop
);
738 basic_block last
= loop
->latch
;
740 FOR_EACH_VEC_ELT (exits
, i
, ex
)
741 last
= nearest_common_dominator (CDI_DOMINATORS
, last
, ex
->src
);
747 /* Splits dependence graph on DATAREFS described by DEPENDS to components. */
749 static struct component
*
750 split_data_refs_to_components (struct loop
*loop
,
751 vec
<data_reference_p
> datarefs
,
754 unsigned i
, n
= datarefs
.length ();
755 unsigned ca
, ia
, ib
, bad
;
756 unsigned *comp_father
= XNEWVEC (unsigned, n
+ 1);
757 unsigned *comp_size
= XNEWVEC (unsigned, n
+ 1);
758 struct component
**comps
;
759 struct data_reference
*dr
, *dra
, *drb
;
760 struct data_dependence_relation
*ddr
;
761 struct component
*comp_list
= NULL
, *comp
;
763 /* Don't do store elimination if loop has multiple exit edges. */
764 bool eliminate_store_p
= single_exit (loop
) != NULL
;
765 basic_block last_always_executed
= last_always_executed_block (loop
);
767 FOR_EACH_VEC_ELT (datarefs
, i
, dr
)
771 /* A fake reference for call or asm_expr that may clobber memory;
775 /* predcom pass isn't prepared to handle calls with data references. */
776 if (is_gimple_call (DR_STMT (dr
)))
778 dr
->aux
= (void *) (size_t) i
;
783 /* A component reserved for the "bad" data references. */
787 FOR_EACH_VEC_ELT (datarefs
, i
, dr
)
789 enum ref_step_type dummy
;
791 if (!suitable_reference_p (dr
, &dummy
))
793 ia
= (unsigned) (size_t) dr
->aux
;
794 merge_comps (comp_father
, comp_size
, n
, ia
);
798 FOR_EACH_VEC_ELT (depends
, i
, ddr
)
800 widest_int dummy_off
;
802 if (DDR_ARE_DEPENDENT (ddr
) == chrec_known
)
808 /* Don't do store elimination if there is any unknown dependence for
809 any store data reference. */
810 if ((DR_IS_WRITE (dra
) || DR_IS_WRITE (drb
))
811 && (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
812 || DDR_NUM_DIST_VECTS (ddr
) == 0))
813 eliminate_store_p
= false;
815 ia
= component_of (comp_father
, (unsigned) (size_t) dra
->aux
);
816 ib
= component_of (comp_father
, (unsigned) (size_t) drb
->aux
);
820 bad
= component_of (comp_father
, n
);
822 /* If both A and B are reads, we may ignore unsuitable dependences. */
823 if (DR_IS_READ (dra
) && DR_IS_READ (drb
))
825 if (ia
== bad
|| ib
== bad
826 || !determine_offset (dra
, drb
, &dummy_off
))
829 /* If A is read and B write or vice versa and there is unsuitable
830 dependence, instead of merging both components into a component
831 that will certainly not pass suitable_component_p, just put the
832 read into bad component, perhaps at least the write together with
833 all the other data refs in it's component will be optimizable. */
834 else if (DR_IS_READ (dra
) && ib
!= bad
)
838 else if (!determine_offset (dra
, drb
, &dummy_off
))
840 merge_comps (comp_father
, comp_size
, bad
, ia
);
844 else if (DR_IS_READ (drb
) && ia
!= bad
)
848 else if (!determine_offset (dra
, drb
, &dummy_off
))
850 merge_comps (comp_father
, comp_size
, bad
, ib
);
854 else if (DR_IS_WRITE (dra
) && DR_IS_WRITE (drb
)
855 && ia
!= bad
&& ib
!= bad
856 && !determine_offset (dra
, drb
, &dummy_off
))
858 merge_comps (comp_father
, comp_size
, bad
, ia
);
859 merge_comps (comp_father
, comp_size
, bad
, ib
);
863 merge_comps (comp_father
, comp_size
, ia
, ib
);
866 if (eliminate_store_p
)
868 tree niters
= number_of_latch_executions (loop
);
870 /* Don't do store elimination if niters info is unknown because stores
871 in the last iteration can't be eliminated and we need to recover it
873 eliminate_store_p
= (niters
!= NULL_TREE
&& niters
!= chrec_dont_know
);
876 comps
= XCNEWVEC (struct component
*, n
);
877 bad
= component_of (comp_father
, n
);
878 FOR_EACH_VEC_ELT (datarefs
, i
, dr
)
880 ia
= (unsigned) (size_t) dr
->aux
;
881 ca
= component_of (comp_father
, ia
);
888 comp
= XCNEW (struct component
);
889 comp
->refs
.create (comp_size
[ca
]);
890 comp
->eliminate_store_p
= eliminate_store_p
;
894 dataref
= XCNEW (struct dref_d
);
896 dataref
->stmt
= DR_STMT (dr
);
898 dataref
->distance
= 0;
900 dataref
->always_accessed
901 = dominated_by_p (CDI_DOMINATORS
, last_always_executed
,
902 gimple_bb (dataref
->stmt
));
903 dataref
->pos
= comp
->refs
.length ();
904 comp
->refs
.quick_push (dataref
);
906 comp
->eliminate_store_p
= false;
909 for (i
= 0; i
< n
; i
++)
914 comp
->next
= comp_list
;
926 /* Returns true if the component COMP satisfies the conditions
927 described in 2) at the beginning of this file. LOOP is the current
931 suitable_component_p (struct loop
*loop
, struct component
*comp
)
935 basic_block ba
, bp
= loop
->header
;
936 bool ok
, has_write
= false;
938 FOR_EACH_VEC_ELT (comp
->refs
, i
, a
)
940 ba
= gimple_bb (a
->stmt
);
942 if (!just_once_each_iteration_p (loop
, ba
))
945 gcc_assert (dominated_by_p (CDI_DOMINATORS
, ba
, bp
));
948 if (DR_IS_WRITE (a
->ref
))
952 first
= comp
->refs
[0];
953 ok
= suitable_reference_p (first
->ref
, &comp
->comp_step
);
957 for (i
= 1; comp
->refs
.iterate (i
, &a
); i
++)
959 if (!determine_offset (first
->ref
, a
->ref
, &a
->offset
))
962 enum ref_step_type a_step
;
963 gcc_checking_assert (suitable_reference_p (a
->ref
, &a_step
)
964 && a_step
== comp
->comp_step
);
967 /* If there is a write inside the component, we must know whether the
968 step is nonzero or not -- we would not otherwise be able to recognize
969 whether the value accessed by reads comes from the OFFSET-th iteration
970 or the previous one. */
971 if (has_write
&& comp
->comp_step
== RS_ANY
)
977 /* Check the conditions on references inside each of components COMPS,
978 and remove the unsuitable components from the list. The new list
979 of components is returned. The conditions are described in 2) at
980 the beginning of this file. LOOP is the current loop. */
982 static struct component
*
983 filter_suitable_components (struct loop
*loop
, struct component
*comps
)
985 struct component
**comp
, *act
;
987 for (comp
= &comps
; *comp
; )
990 if (suitable_component_p (loop
, act
))
998 FOR_EACH_VEC_ELT (act
->refs
, i
, ref
)
1000 release_component (act
);
1007 /* Compares two drefs A and B by their offset and position. Callback for
1011 order_drefs (const void *a
, const void *b
)
1013 const dref
*const da
= (const dref
*) a
;
1014 const dref
*const db
= (const dref
*) b
;
1015 int offcmp
= wi::cmps ((*da
)->offset
, (*db
)->offset
);
1020 return (*da
)->pos
- (*db
)->pos
;
1023 /* Returns root of the CHAIN. */
1026 get_chain_root (chain_p chain
)
1028 return chain
->refs
[0];
1031 /* Given CHAIN, returns the last ref at DISTANCE, or NULL if it doesn't
1035 get_chain_last_ref_at (chain_p chain
, unsigned distance
)
1039 for (i
= chain
->refs
.length (); i
> 0; i
--)
1040 if (distance
== chain
->refs
[i
- 1]->distance
)
1043 return (i
> 0) ? chain
->refs
[i
- 1] : NULL
;
1046 /* Adds REF to the chain CHAIN. */
1049 add_ref_to_chain (chain_p chain
, dref ref
)
1051 dref root
= get_chain_root (chain
);
1053 gcc_assert (wi::les_p (root
->offset
, ref
->offset
));
1054 widest_int dist
= ref
->offset
- root
->offset
;
1055 if (wi::leu_p (MAX_DISTANCE
, dist
))
1060 gcc_assert (wi::fits_uhwi_p (dist
));
1062 chain
->refs
.safe_push (ref
);
1064 ref
->distance
= dist
.to_uhwi ();
1066 if (ref
->distance
>= chain
->length
)
1068 chain
->length
= ref
->distance
;
1069 chain
->has_max_use_after
= false;
1072 /* Don't set the flag for store-store chain since there is no use. */
1073 if (chain
->type
!= CT_STORE_STORE
1074 && ref
->distance
== chain
->length
1075 && ref
->pos
> root
->pos
)
1076 chain
->has_max_use_after
= true;
1078 chain
->all_always_accessed
&= ref
->always_accessed
;
1081 /* Returns the chain for invariant component COMP. */
1084 make_invariant_chain (struct component
*comp
)
1086 chain_p chain
= XCNEW (struct chain
);
1090 chain
->type
= CT_INVARIANT
;
1092 chain
->all_always_accessed
= true;
1094 FOR_EACH_VEC_ELT (comp
->refs
, i
, ref
)
1096 chain
->refs
.safe_push (ref
);
1097 chain
->all_always_accessed
&= ref
->always_accessed
;
1100 chain
->inits
= vNULL
;
1101 chain
->finis
= vNULL
;
1106 /* Make a new chain of type TYPE rooted at REF. */
1109 make_rooted_chain (dref ref
, enum chain_type type
)
1111 chain_p chain
= XCNEW (struct chain
);
1114 chain
->refs
.safe_push (ref
);
1115 chain
->all_always_accessed
= ref
->always_accessed
;
1118 chain
->inits
= vNULL
;
1119 chain
->finis
= vNULL
;
1124 /* Returns true if CHAIN is not trivial. */
1127 nontrivial_chain_p (chain_p chain
)
1129 return chain
!= NULL
&& chain
->refs
.length () > 1;
1132 /* Returns the ssa name that contains the value of REF, or NULL_TREE if there
1136 name_for_ref (dref ref
)
1140 if (is_gimple_assign (ref
->stmt
))
1142 if (!ref
->ref
|| DR_IS_READ (ref
->ref
))
1143 name
= gimple_assign_lhs (ref
->stmt
);
1145 name
= gimple_assign_rhs1 (ref
->stmt
);
1148 name
= PHI_RESULT (ref
->stmt
);
1150 return (TREE_CODE (name
) == SSA_NAME
? name
: NULL_TREE
);
1153 /* Returns true if REF is a valid initializer for ROOT with given DISTANCE (in
1154 iterations of the innermost enclosing loop). */
1157 valid_initializer_p (struct data_reference
*ref
,
1158 unsigned distance
, struct data_reference
*root
)
1160 aff_tree diff
, base
, step
;
1163 /* Both REF and ROOT must be accessing the same object. */
1164 if (!operand_equal_p (DR_BASE_ADDRESS (ref
), DR_BASE_ADDRESS (root
), 0))
1167 /* The initializer is defined outside of loop, hence its address must be
1168 invariant inside the loop. */
1169 gcc_assert (integer_zerop (DR_STEP (ref
)));
1171 /* If the address of the reference is invariant, initializer must access
1172 exactly the same location. */
1173 if (integer_zerop (DR_STEP (root
)))
1174 return (operand_equal_p (DR_OFFSET (ref
), DR_OFFSET (root
), 0)
1175 && operand_equal_p (DR_INIT (ref
), DR_INIT (root
), 0));
1177 /* Verify that this index of REF is equal to the root's index at
1178 -DISTANCE-th iteration. */
1179 aff_combination_dr_offset (root
, &diff
);
1180 aff_combination_dr_offset (ref
, &base
);
1181 aff_combination_scale (&base
, -1);
1182 aff_combination_add (&diff
, &base
);
1184 tree_to_aff_combination_expand (DR_STEP (root
), TREE_TYPE (DR_STEP (root
)),
1185 &step
, &name_expansions
);
1186 if (!aff_combination_constant_multiple_p (&diff
, &step
, &off
))
1189 if (off
!= distance
)
1195 /* Finds looparound phi node of LOOP that copies the value of REF, and if its
1196 initial value is correct (equal to initial value of REF shifted by one
1197 iteration), returns the phi node. Otherwise, NULL_TREE is returned. ROOT
1198 is the root of the current chain. */
1201 find_looparound_phi (struct loop
*loop
, dref ref
, dref root
)
1203 tree name
, init
, init_ref
;
1206 edge latch
= loop_latch_edge (loop
);
1207 struct data_reference init_dr
;
1210 if (is_gimple_assign (ref
->stmt
))
1212 if (DR_IS_READ (ref
->ref
))
1213 name
= gimple_assign_lhs (ref
->stmt
);
1215 name
= gimple_assign_rhs1 (ref
->stmt
);
1218 name
= PHI_RESULT (ref
->stmt
);
1222 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
1225 if (PHI_ARG_DEF_FROM_EDGE (phi
, latch
) == name
)
1229 if (gsi_end_p (psi
))
1232 init
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_preheader_edge (loop
));
1233 if (TREE_CODE (init
) != SSA_NAME
)
1235 init_stmt
= SSA_NAME_DEF_STMT (init
);
1236 if (gimple_code (init_stmt
) != GIMPLE_ASSIGN
)
1238 gcc_assert (gimple_assign_lhs (init_stmt
) == init
);
1240 init_ref
= gimple_assign_rhs1 (init_stmt
);
1241 if (!REFERENCE_CLASS_P (init_ref
)
1242 && !DECL_P (init_ref
))
1245 /* Analyze the behavior of INIT_REF with respect to LOOP (innermost
1246 loop enclosing PHI). */
1247 memset (&init_dr
, 0, sizeof (struct data_reference
));
1248 DR_REF (&init_dr
) = init_ref
;
1249 DR_STMT (&init_dr
) = phi
;
1250 if (!dr_analyze_innermost (&DR_INNERMOST (&init_dr
), init_ref
, loop
))
1253 if (!valid_initializer_p (&init_dr
, ref
->distance
+ 1, root
->ref
))
1259 /* Adds a reference for the looparound copy of REF in PHI to CHAIN. */
1262 insert_looparound_copy (chain_p chain
, dref ref
, gphi
*phi
)
1264 dref nw
= XCNEW (struct dref_d
), aref
;
1268 nw
->distance
= ref
->distance
+ 1;
1269 nw
->always_accessed
= 1;
1271 FOR_EACH_VEC_ELT (chain
->refs
, i
, aref
)
1272 if (aref
->distance
>= nw
->distance
)
1274 chain
->refs
.safe_insert (i
, nw
);
1276 if (nw
->distance
> chain
->length
)
1278 chain
->length
= nw
->distance
;
1279 chain
->has_max_use_after
= false;
1283 /* For references in CHAIN that are copied around the LOOP (created previously
1284 by PRE, or by user), add the results of such copies to the chain. This
1285 enables us to remove the copies by unrolling, and may need less registers
1286 (also, it may allow us to combine chains together). */
1289 add_looparound_copies (struct loop
*loop
, chain_p chain
)
1292 dref ref
, root
= get_chain_root (chain
);
1295 if (chain
->type
== CT_STORE_STORE
)
1298 FOR_EACH_VEC_ELT (chain
->refs
, i
, ref
)
1300 phi
= find_looparound_phi (loop
, ref
, root
);
1304 bitmap_set_bit (looparound_phis
, SSA_NAME_VERSION (PHI_RESULT (phi
)));
1305 insert_looparound_copy (chain
, ref
, phi
);
1309 /* Find roots of the values and determine distances in the component COMP.
1310 The references are redistributed into CHAINS. LOOP is the current
1314 determine_roots_comp (struct loop
*loop
,
1315 struct component
*comp
,
1316 vec
<chain_p
> *chains
)
1320 chain_p chain
= NULL
;
1321 widest_int last_ofs
= 0;
1322 enum chain_type type
;
1324 /* Invariants are handled specially. */
1325 if (comp
->comp_step
== RS_INVARIANT
)
1327 chain
= make_invariant_chain (comp
);
1328 chains
->safe_push (chain
);
1332 /* Trivial component. */
1333 if (comp
->refs
.length () <= 1)
1336 comp
->refs
.qsort (order_drefs
);
1337 FOR_EACH_VEC_ELT (comp
->refs
, i
, a
)
1340 || (!comp
->eliminate_store_p
&& DR_IS_WRITE (a
->ref
))
1341 || wi::leu_p (MAX_DISTANCE
, a
->offset
- last_ofs
))
1343 if (nontrivial_chain_p (chain
))
1345 add_looparound_copies (loop
, chain
);
1346 chains
->safe_push (chain
);
1349 release_chain (chain
);
1351 if (DR_IS_READ (a
->ref
))
1354 type
= comp
->eliminate_store_p
? CT_STORE_STORE
: CT_STORE_LOAD
;
1356 chain
= make_rooted_chain (a
, type
);
1357 last_ofs
= a
->offset
;
1361 add_ref_to_chain (chain
, a
);
1364 if (nontrivial_chain_p (chain
))
1366 add_looparound_copies (loop
, chain
);
1367 chains
->safe_push (chain
);
1370 release_chain (chain
);
1373 /* Find roots of the values and determine distances in components COMPS, and
1374 separates the references to CHAINS. LOOP is the current loop. */
1377 determine_roots (struct loop
*loop
,
1378 struct component
*comps
, vec
<chain_p
> *chains
)
1380 struct component
*comp
;
1382 for (comp
= comps
; comp
; comp
= comp
->next
)
1383 determine_roots_comp (loop
, comp
, chains
);
1386 /* Replace the reference in statement STMT with temporary variable
1387 NEW_TREE. If SET is true, NEW_TREE is instead initialized to the value of
1388 the reference in the statement. IN_LHS is true if the reference
1389 is in the lhs of STMT, false if it is in rhs. */
1392 replace_ref_with (gimple
*stmt
, tree new_tree
, bool set
, bool in_lhs
)
1396 gimple_stmt_iterator bsi
, psi
;
1398 if (gimple_code (stmt
) == GIMPLE_PHI
)
1400 gcc_assert (!in_lhs
&& !set
);
1402 val
= PHI_RESULT (stmt
);
1403 bsi
= gsi_after_labels (gimple_bb (stmt
));
1404 psi
= gsi_for_stmt (stmt
);
1405 remove_phi_node (&psi
, false);
1407 /* Turn the phi node into GIMPLE_ASSIGN. */
1408 new_stmt
= gimple_build_assign (val
, new_tree
);
1409 gsi_insert_before (&bsi
, new_stmt
, GSI_NEW_STMT
);
1413 /* Since the reference is of gimple_reg type, it should only
1414 appear as lhs or rhs of modify statement. */
1415 gcc_assert (is_gimple_assign (stmt
));
1417 bsi
= gsi_for_stmt (stmt
);
1419 /* If we do not need to initialize NEW_TREE, just replace the use of OLD. */
1422 gcc_assert (!in_lhs
);
1423 gimple_assign_set_rhs_from_tree (&bsi
, new_tree
);
1424 stmt
= gsi_stmt (bsi
);
1431 /* We have statement
1435 If OLD is a memory reference, then VAL is gimple_val, and we transform
1441 Otherwise, we are replacing a combination chain,
1442 VAL is the expression that performs the combination, and OLD is an
1443 SSA name. In this case, we transform the assignment to
1450 val
= gimple_assign_lhs (stmt
);
1451 if (TREE_CODE (val
) != SSA_NAME
)
1453 val
= gimple_assign_rhs1 (stmt
);
1454 gcc_assert (gimple_assign_single_p (stmt
));
1455 if (TREE_CLOBBER_P (val
))
1456 val
= get_or_create_ssa_default_def (cfun
, SSA_NAME_VAR (new_tree
));
1458 gcc_assert (gimple_assign_copy_p (stmt
));
1470 val
= gimple_assign_lhs (stmt
);
1473 new_stmt
= gimple_build_assign (new_tree
, unshare_expr (val
));
1474 gsi_insert_after (&bsi
, new_stmt
, GSI_NEW_STMT
);
1477 /* Returns a memory reference to DR in the (NITERS + ITER)-th iteration
1478 of the loop it was analyzed in. Append init stmts to STMTS. */
1481 ref_at_iteration (data_reference_p dr
, int iter
,
1482 gimple_seq
*stmts
, tree niters
= NULL_TREE
)
1484 tree off
= DR_OFFSET (dr
);
1485 tree coff
= DR_INIT (dr
);
1486 tree ref
= DR_REF (dr
);
1487 enum tree_code ref_code
= ERROR_MARK
;
1488 tree ref_type
= NULL_TREE
;
1489 tree ref_op1
= NULL_TREE
;
1490 tree ref_op2
= NULL_TREE
;
1495 new_offset
= size_binop (MULT_EXPR
, DR_STEP (dr
), ssize_int (iter
));
1496 if (TREE_CODE (new_offset
) == INTEGER_CST
)
1497 coff
= size_binop (PLUS_EXPR
, coff
, new_offset
);
1499 off
= size_binop (PLUS_EXPR
, off
, new_offset
);
1502 if (niters
!= NULL_TREE
)
1504 niters
= fold_convert (ssizetype
, niters
);
1505 new_offset
= size_binop (MULT_EXPR
, DR_STEP (dr
), niters
);
1506 if (TREE_CODE (niters
) == INTEGER_CST
)
1507 coff
= size_binop (PLUS_EXPR
, coff
, new_offset
);
1509 off
= size_binop (PLUS_EXPR
, off
, new_offset
);
1512 /* While data-ref analysis punts on bit offsets it still handles
1513 bitfield accesses at byte boundaries. Cope with that. Note that
1514 if the bitfield object also starts at a byte-boundary we can simply
1515 replicate the COMPONENT_REF, but we have to subtract the component's
1516 byte-offset from the MEM_REF address first.
1517 Otherwise we simply build a BIT_FIELD_REF knowing that the bits
1518 start at offset zero. */
1519 if (TREE_CODE (ref
) == COMPONENT_REF
1520 && DECL_BIT_FIELD (TREE_OPERAND (ref
, 1)))
1522 unsigned HOST_WIDE_INT boff
;
1523 tree field
= TREE_OPERAND (ref
, 1);
1524 tree offset
= component_ref_field_offset (ref
);
1525 ref_type
= TREE_TYPE (ref
);
1526 boff
= tree_to_uhwi (DECL_FIELD_BIT_OFFSET (field
));
1527 /* This can occur in Ada. See the comment in get_bit_range. */
1528 if (boff
% BITS_PER_UNIT
!= 0
1529 || !tree_fits_uhwi_p (offset
))
1531 ref_code
= BIT_FIELD_REF
;
1532 ref_op1
= DECL_SIZE (field
);
1533 ref_op2
= bitsize_zero_node
;
1537 boff
>>= LOG2_BITS_PER_UNIT
;
1538 boff
+= tree_to_uhwi (offset
);
1539 coff
= size_binop (MINUS_EXPR
, coff
, ssize_int (boff
));
1540 ref_code
= COMPONENT_REF
;
1542 ref_op2
= TREE_OPERAND (ref
, 2);
1543 ref
= TREE_OPERAND (ref
, 0);
1546 tree addr
= fold_build_pointer_plus (DR_BASE_ADDRESS (dr
), off
);
1547 addr
= force_gimple_operand_1 (unshare_expr (addr
), stmts
,
1548 is_gimple_mem_ref_addr
, NULL_TREE
);
1549 tree alias_ptr
= fold_convert (reference_alias_ptr_type (ref
), coff
);
1550 tree type
= build_aligned_type (TREE_TYPE (ref
),
1551 get_object_alignment (ref
));
1552 ref
= build2 (MEM_REF
, type
, addr
, alias_ptr
);
1554 ref
= build3 (ref_code
, ref_type
, ref
, ref_op1
, ref_op2
);
1558 /* Get the initialization expression for the INDEX-th temporary variable
1562 get_init_expr (chain_p chain
, unsigned index
)
1564 if (chain
->type
== CT_COMBINATION
)
1566 tree e1
= get_init_expr (chain
->ch1
, index
);
1567 tree e2
= get_init_expr (chain
->ch2
, index
);
1569 return fold_build2 (chain
->op
, chain
->rslt_type
, e1
, e2
);
1572 return chain
->inits
[index
];
1575 /* Returns a new temporary variable used for the I-th variable carrying
1576 value of REF. The variable's uid is marked in TMP_VARS. */
1579 predcom_tmp_var (tree ref
, unsigned i
, bitmap tmp_vars
)
1581 tree type
= TREE_TYPE (ref
);
1582 /* We never access the components of the temporary variable in predictive
1584 tree var
= create_tmp_reg (type
, get_lsm_tmp_name (ref
, i
));
1585 bitmap_set_bit (tmp_vars
, DECL_UID (var
));
1589 /* Creates the variables for CHAIN, as well as phi nodes for them and
1590 initialization on entry to LOOP. Uids of the newly created
1591 temporary variables are marked in TMP_VARS. */
1594 initialize_root_vars (struct loop
*loop
, chain_p chain
, bitmap tmp_vars
)
1597 unsigned n
= chain
->length
;
1598 dref root
= get_chain_root (chain
);
1599 bool reuse_first
= !chain
->has_max_use_after
;
1600 tree ref
, init
, var
, next
;
1603 edge entry
= loop_preheader_edge (loop
), latch
= loop_latch_edge (loop
);
1605 /* If N == 0, then all the references are within the single iteration. And
1606 since this is an nonempty chain, reuse_first cannot be true. */
1607 gcc_assert (n
> 0 || !reuse_first
);
1609 chain
->vars
.create (n
+ 1);
1611 if (chain
->type
== CT_COMBINATION
)
1612 ref
= gimple_assign_lhs (root
->stmt
);
1614 ref
= DR_REF (root
->ref
);
1616 for (i
= 0; i
< n
+ (reuse_first
? 0 : 1); i
++)
1618 var
= predcom_tmp_var (ref
, i
, tmp_vars
);
1619 chain
->vars
.quick_push (var
);
1622 chain
->vars
.quick_push (chain
->vars
[0]);
1624 FOR_EACH_VEC_ELT (chain
->vars
, i
, var
)
1625 chain
->vars
[i
] = make_ssa_name (var
);
1627 for (i
= 0; i
< n
; i
++)
1629 var
= chain
->vars
[i
];
1630 next
= chain
->vars
[i
+ 1];
1631 init
= get_init_expr (chain
, i
);
1633 init
= force_gimple_operand (init
, &stmts
, true, NULL_TREE
);
1635 gsi_insert_seq_on_edge_immediate (entry
, stmts
);
1637 phi
= create_phi_node (var
, loop
->header
);
1638 add_phi_arg (phi
, init
, entry
, UNKNOWN_LOCATION
);
1639 add_phi_arg (phi
, next
, latch
, UNKNOWN_LOCATION
);
1643 /* For inter-iteration store elimination CHAIN in LOOP, returns true if
1644 all stores to be eliminated store loop invariant values into memory.
1645 In this case, we can use these invariant values directly after LOOP. */
1648 is_inv_store_elimination_chain (struct loop
*loop
, chain_p chain
)
1650 if (chain
->length
== 0 || chain
->type
!= CT_STORE_STORE
)
1653 gcc_assert (!chain
->has_max_use_after
);
1655 /* If loop iterates for unknown times or fewer times than chain->lenght,
1656 we still need to setup root variable and propagate it with PHI node. */
1657 tree niters
= number_of_latch_executions (loop
);
1658 if (TREE_CODE (niters
) != INTEGER_CST
1659 || wi::leu_p (wi::to_wide (niters
), chain
->length
))
1662 /* Check stores in chain for elimination if they only store loop invariant
1664 for (unsigned i
= 0; i
< chain
->length
; i
++)
1666 dref a
= get_chain_last_ref_at (chain
, i
);
1670 gimple
*def_stmt
, *stmt
= a
->stmt
;
1671 if (!gimple_assign_single_p (stmt
))
1674 tree val
= gimple_assign_rhs1 (stmt
);
1675 if (TREE_CLOBBER_P (val
))
1678 if (CONSTANT_CLASS_P (val
))
1681 if (TREE_CODE (val
) != SSA_NAME
)
1684 def_stmt
= SSA_NAME_DEF_STMT (val
);
1685 if (gimple_nop_p (def_stmt
))
1688 if (flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
1694 /* Creates root variables for store elimination CHAIN in which stores for
1695 elimination only store loop invariant values. In this case, we neither
1696 need to load root variables before loop nor propagate it with PHI nodes. */
1699 initialize_root_vars_store_elim_1 (chain_p chain
)
1702 unsigned i
, n
= chain
->length
;
1704 chain
->vars
.create (n
);
1705 chain
->vars
.safe_grow_cleared (n
);
1707 /* Initialize root value for eliminated stores at each distance. */
1708 for (i
= 0; i
< n
; i
++)
1710 dref a
= get_chain_last_ref_at (chain
, i
);
1714 var
= gimple_assign_rhs1 (a
->stmt
);
1715 chain
->vars
[a
->distance
] = var
;
1718 /* We don't propagate values with PHI nodes, so manually propagate value
1719 to bubble positions. */
1720 var
= chain
->vars
[0];
1721 for (i
= 1; i
< n
; i
++)
1723 if (chain
->vars
[i
] != NULL_TREE
)
1725 var
= chain
->vars
[i
];
1728 chain
->vars
[i
] = var
;
1731 /* Revert the vector. */
1732 for (i
= 0; i
< n
/ 2; i
++)
1733 std::swap (chain
->vars
[i
], chain
->vars
[n
- i
- 1]);
1736 /* Creates root variables for store elimination CHAIN in which stores for
1737 elimination store loop variant values. In this case, we may need to
1738 load root variables before LOOP and propagate it with PHI nodes. Uids
1739 of the newly created root variables are marked in TMP_VARS. */
1742 initialize_root_vars_store_elim_2 (struct loop
*loop
,
1743 chain_p chain
, bitmap tmp_vars
)
1745 unsigned i
, n
= chain
->length
;
1746 tree ref
, init
, var
, next
, val
, phi_result
;
1750 chain
->vars
.create (n
);
1752 ref
= DR_REF (get_chain_root (chain
)->ref
);
1753 for (i
= 0; i
< n
; i
++)
1755 var
= predcom_tmp_var (ref
, i
, tmp_vars
);
1756 chain
->vars
.quick_push (var
);
1759 FOR_EACH_VEC_ELT (chain
->vars
, i
, var
)
1760 chain
->vars
[i
] = make_ssa_name (var
);
1762 /* Root values are either rhs operand of stores to be eliminated, or
1763 loaded from memory before loop. */
1764 auto_vec
<tree
> vtemps
;
1765 vtemps
.safe_grow_cleared (n
);
1766 for (i
= 0; i
< n
; i
++)
1768 init
= get_init_expr (chain
, i
);
1769 if (init
== NULL_TREE
)
1771 /* Root value is rhs operand of the store to be eliminated if
1772 it isn't loaded from memory before loop. */
1773 dref a
= get_chain_last_ref_at (chain
, i
);
1774 val
= gimple_assign_rhs1 (a
->stmt
);
1775 if (TREE_CLOBBER_P (val
))
1776 val
= get_or_create_ssa_default_def (cfun
, SSA_NAME_VAR (var
));
1778 vtemps
[n
- i
- 1] = val
;
1782 edge latch
= loop_latch_edge (loop
);
1783 edge entry
= loop_preheader_edge (loop
);
1785 /* Root value is loaded from memory before loop, we also need
1786 to add PHI nodes to propagate the value across iterations. */
1787 init
= force_gimple_operand (init
, &stmts
, true, NULL_TREE
);
1789 gsi_insert_seq_on_edge_immediate (entry
, stmts
);
1791 next
= chain
->vars
[n
- i
];
1792 phi_result
= copy_ssa_name (next
);
1793 gphi
*phi
= create_phi_node (phi_result
, loop
->header
);
1794 add_phi_arg (phi
, init
, entry
, UNKNOWN_LOCATION
);
1795 add_phi_arg (phi
, next
, latch
, UNKNOWN_LOCATION
);
1796 vtemps
[n
- i
- 1] = phi_result
;
1800 /* Find the insertion position. */
1801 dref last
= get_chain_root (chain
);
1802 for (i
= 0; i
< chain
->refs
.length (); i
++)
1804 if (chain
->refs
[i
]->pos
> last
->pos
)
1805 last
= chain
->refs
[i
];
1808 gimple_stmt_iterator gsi
= gsi_for_stmt (last
->stmt
);
1810 /* Insert statements copying root value to root variable. */
1811 for (i
= 0; i
< n
; i
++)
1813 var
= chain
->vars
[i
];
1815 stmt
= gimple_build_assign (var
, val
);
1816 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1820 /* Generates stores for CHAIN's eliminated stores in LOOP's last
1821 (CHAIN->length - 1) iterations. */
1824 finalize_eliminated_stores (struct loop
*loop
, chain_p chain
)
1826 unsigned i
, n
= chain
->length
;
1828 for (i
= 0; i
< n
; i
++)
1830 tree var
= chain
->vars
[i
];
1831 tree fini
= chain
->finis
[n
- i
- 1];
1832 gimple
*stmt
= gimple_build_assign (fini
, var
);
1834 gimple_seq_add_stmt_without_update (&chain
->fini_seq
, stmt
);
1837 if (chain
->fini_seq
)
1839 gsi_insert_seq_on_edge_immediate (single_exit (loop
), chain
->fini_seq
);
1840 chain
->fini_seq
= NULL
;
1844 /* Initializes a variable for load motion for ROOT and prepares phi nodes and
1845 initialization on entry to LOOP if necessary. The ssa name for the variable
1846 is stored in VARS. If WRITTEN is true, also a phi node to copy its value
1847 around the loop is created. Uid of the newly created temporary variable
1848 is marked in TMP_VARS. INITS is the list containing the (single)
1852 initialize_root_vars_lm (struct loop
*loop
, dref root
, bool written
,
1853 vec
<tree
> *vars
, vec
<tree
> inits
,
1857 tree ref
= DR_REF (root
->ref
), init
, var
, next
;
1860 edge entry
= loop_preheader_edge (loop
), latch
= loop_latch_edge (loop
);
1862 /* Find the initializer for the variable, and check that it cannot
1866 vars
->create (written
? 2 : 1);
1867 var
= predcom_tmp_var (ref
, 0, tmp_vars
);
1868 vars
->quick_push (var
);
1870 vars
->quick_push ((*vars
)[0]);
1872 FOR_EACH_VEC_ELT (*vars
, i
, var
)
1873 (*vars
)[i
] = make_ssa_name (var
);
1877 init
= force_gimple_operand (init
, &stmts
, written
, NULL_TREE
);
1879 gsi_insert_seq_on_edge_immediate (entry
, stmts
);
1884 phi
= create_phi_node (var
, loop
->header
);
1885 add_phi_arg (phi
, init
, entry
, UNKNOWN_LOCATION
);
1886 add_phi_arg (phi
, next
, latch
, UNKNOWN_LOCATION
);
1890 gassign
*init_stmt
= gimple_build_assign (var
, init
);
1891 gsi_insert_on_edge_immediate (entry
, init_stmt
);
1896 /* Execute load motion for references in chain CHAIN. Uids of the newly
1897 created temporary variables are marked in TMP_VARS. */
1900 execute_load_motion (struct loop
*loop
, chain_p chain
, bitmap tmp_vars
)
1902 auto_vec
<tree
> vars
;
1904 unsigned n_writes
= 0, ridx
, i
;
1907 gcc_assert (chain
->type
== CT_INVARIANT
);
1908 gcc_assert (!chain
->combined
);
1909 FOR_EACH_VEC_ELT (chain
->refs
, i
, a
)
1910 if (DR_IS_WRITE (a
->ref
))
1913 /* If there are no reads in the loop, there is nothing to do. */
1914 if (n_writes
== chain
->refs
.length ())
1917 initialize_root_vars_lm (loop
, get_chain_root (chain
), n_writes
> 0,
1918 &vars
, chain
->inits
, tmp_vars
);
1921 FOR_EACH_VEC_ELT (chain
->refs
, i
, a
)
1923 bool is_read
= DR_IS_READ (a
->ref
);
1925 if (DR_IS_WRITE (a
->ref
))
1931 var
= make_ssa_name (SSA_NAME_VAR (var
));
1938 replace_ref_with (a
->stmt
, vars
[ridx
],
1939 !is_read
, !is_read
);
1943 /* Returns the single statement in that NAME is used, excepting
1944 the looparound phi nodes contained in one of the chains. If there is no
1945 such statement, or more statements, NULL is returned. */
1948 single_nonlooparound_use (tree name
)
1951 imm_use_iterator it
;
1952 gimple
*stmt
, *ret
= NULL
;
1954 FOR_EACH_IMM_USE_FAST (use
, it
, name
)
1956 stmt
= USE_STMT (use
);
1958 if (gimple_code (stmt
) == GIMPLE_PHI
)
1960 /* Ignore uses in looparound phi nodes. Uses in other phi nodes
1961 could not be processed anyway, so just fail for them. */
1962 if (bitmap_bit_p (looparound_phis
,
1963 SSA_NAME_VERSION (PHI_RESULT (stmt
))))
1968 else if (is_gimple_debug (stmt
))
1970 else if (ret
!= NULL
)
1979 /* Remove statement STMT, as well as the chain of assignments in that it is
1983 remove_stmt (gimple
*stmt
)
1987 gimple_stmt_iterator psi
;
1989 if (gimple_code (stmt
) == GIMPLE_PHI
)
1991 name
= PHI_RESULT (stmt
);
1992 next
= single_nonlooparound_use (name
);
1993 reset_debug_uses (stmt
);
1994 psi
= gsi_for_stmt (stmt
);
1995 remove_phi_node (&psi
, true);
1998 || !gimple_assign_ssa_name_copy_p (next
)
1999 || gimple_assign_rhs1 (next
) != name
)
2007 gimple_stmt_iterator bsi
;
2009 bsi
= gsi_for_stmt (stmt
);
2011 name
= gimple_assign_lhs (stmt
);
2012 if (TREE_CODE (name
) == SSA_NAME
)
2014 next
= single_nonlooparound_use (name
);
2015 reset_debug_uses (stmt
);
2019 /* This is a store to be eliminated. */
2020 gcc_assert (gimple_vdef (stmt
) != NULL
);
2024 unlink_stmt_vdef (stmt
);
2025 gsi_remove (&bsi
, true);
2026 release_defs (stmt
);
2029 || !gimple_assign_ssa_name_copy_p (next
)
2030 || gimple_assign_rhs1 (next
) != name
)
2037 /* Perform the predictive commoning optimization for a chain CHAIN.
2038 Uids of the newly created temporary variables are marked in TMP_VARS.*/
2041 execute_pred_commoning_chain (struct loop
*loop
, chain_p chain
,
2049 if (chain
->combined
)
2051 /* For combined chains, just remove the statements that are used to
2052 compute the values of the expression (except for the root one).
2053 We delay this until after all chains are processed. */
2055 else if (chain
->type
== CT_STORE_STORE
)
2057 if (chain
->length
> 0)
2059 if (chain
->inv_store_elimination
)
2061 /* If dead stores in this chain only store loop invariant
2062 values, we can simply record the invariant value and use
2063 it directly after loop. */
2064 initialize_root_vars_store_elim_1 (chain
);
2068 /* If dead stores in this chain store loop variant values,
2069 we need to set up the variables by loading from memory
2070 before loop and propagating it with PHI nodes. */
2071 initialize_root_vars_store_elim_2 (loop
, chain
, tmp_vars
);
2074 /* For inter-iteration store elimination chain, stores at each
2075 distance in loop's last (chain->length - 1) iterations can't
2076 be eliminated, because there is no following killing store.
2077 We need to generate these stores after loop. */
2078 finalize_eliminated_stores (loop
, chain
);
2081 /* Eliminate the stores killed by following store. */
2082 n
= chain
->refs
.length ();
2083 for (i
= 0; i
< n
- 1; i
++)
2084 remove_stmt (chain
->refs
[i
]->stmt
);
2088 /* For non-combined chains, set up the variables that hold its value. */
2089 initialize_root_vars (loop
, chain
, tmp_vars
);
2090 a
= get_chain_root (chain
);
2091 in_lhs
= (chain
->type
== CT_STORE_LOAD
2092 || chain
->type
== CT_COMBINATION
);
2093 replace_ref_with (a
->stmt
, chain
->vars
[chain
->length
], true, in_lhs
);
2095 /* Replace the uses of the original references by these variables. */
2096 for (i
= 1; chain
->refs
.iterate (i
, &a
); i
++)
2098 var
= chain
->vars
[chain
->length
- a
->distance
];
2099 replace_ref_with (a
->stmt
, var
, false, false);
2104 /* Determines the unroll factor necessary to remove as many temporary variable
2105 copies as possible. CHAINS is the list of chains that will be
2109 determine_unroll_factor (vec
<chain_p
> chains
)
2112 unsigned factor
= 1, af
, nfactor
, i
;
2113 unsigned max
= PARAM_VALUE (PARAM_MAX_UNROLL_TIMES
);
2115 FOR_EACH_VEC_ELT (chains
, i
, chain
)
2117 if (chain
->type
== CT_INVARIANT
)
2119 /* For now we can't handle unrolling when eliminating stores. */
2120 else if (chain
->type
== CT_STORE_STORE
)
2123 if (chain
->combined
)
2125 /* For combined chains, we can't handle unrolling if we replace
2129 for (j
= 1; chain
->refs
.iterate (j
, &a
); j
++)
2130 if (gimple_code (a
->stmt
) == GIMPLE_PHI
)
2135 /* The best unroll factor for this chain is equal to the number of
2136 temporary variables that we create for it. */
2138 if (chain
->has_max_use_after
)
2141 nfactor
= factor
* af
/ gcd (factor
, af
);
2149 /* Perform the predictive commoning optimization for CHAINS.
2150 Uids of the newly created temporary variables are marked in TMP_VARS. */
2153 execute_pred_commoning (struct loop
*loop
, vec
<chain_p
> chains
,
2159 FOR_EACH_VEC_ELT (chains
, i
, chain
)
2161 if (chain
->type
== CT_INVARIANT
)
2162 execute_load_motion (loop
, chain
, tmp_vars
);
2164 execute_pred_commoning_chain (loop
, chain
, tmp_vars
);
2167 FOR_EACH_VEC_ELT (chains
, i
, chain
)
2169 if (chain
->type
== CT_INVARIANT
)
2171 else if (chain
->combined
)
2173 /* For combined chains, just remove the statements that are used to
2174 compute the values of the expression (except for the root one). */
2177 for (j
= 1; chain
->refs
.iterate (j
, &a
); j
++)
2178 remove_stmt (a
->stmt
);
2182 update_ssa (TODO_update_ssa_only_virtuals
);
2185 /* For each reference in CHAINS, if its defining statement is
2186 phi node, record the ssa name that is defined by it. */
2189 replace_phis_by_defined_names (vec
<chain_p
> chains
)
2195 FOR_EACH_VEC_ELT (chains
, i
, chain
)
2196 FOR_EACH_VEC_ELT (chain
->refs
, j
, a
)
2198 if (gimple_code (a
->stmt
) == GIMPLE_PHI
)
2200 a
->name_defined_by_phi
= PHI_RESULT (a
->stmt
);
2206 /* For each reference in CHAINS, if name_defined_by_phi is not
2207 NULL, use it to set the stmt field. */
2210 replace_names_by_phis (vec
<chain_p
> chains
)
2216 FOR_EACH_VEC_ELT (chains
, i
, chain
)
2217 FOR_EACH_VEC_ELT (chain
->refs
, j
, a
)
2218 if (a
->stmt
== NULL
)
2220 a
->stmt
= SSA_NAME_DEF_STMT (a
->name_defined_by_phi
);
2221 gcc_assert (gimple_code (a
->stmt
) == GIMPLE_PHI
);
2222 a
->name_defined_by_phi
= NULL_TREE
;
2226 /* Wrapper over execute_pred_commoning, to pass it as a callback
2227 to tree_transform_and_unroll_loop. */
2231 vec
<chain_p
> chains
;
2236 execute_pred_commoning_cbck (struct loop
*loop
, void *data
)
2238 struct epcc_data
*const dta
= (struct epcc_data
*) data
;
2240 /* Restore phi nodes that were replaced by ssa names before
2241 tree_transform_and_unroll_loop (see detailed description in
2242 tree_predictive_commoning_loop). */
2243 replace_names_by_phis (dta
->chains
);
2244 execute_pred_commoning (loop
, dta
->chains
, dta
->tmp_vars
);
2247 /* Base NAME and all the names in the chain of phi nodes that use it
2248 on variable VAR. The phi nodes are recognized by being in the copies of
2249 the header of the LOOP. */
2252 base_names_in_chain_on (struct loop
*loop
, tree name
, tree var
)
2255 imm_use_iterator iter
;
2257 replace_ssa_name_symbol (name
, var
);
2262 FOR_EACH_IMM_USE_STMT (stmt
, iter
, name
)
2264 if (gimple_code (stmt
) == GIMPLE_PHI
2265 && flow_bb_inside_loop_p (loop
, gimple_bb (stmt
)))
2268 BREAK_FROM_IMM_USE_STMT (iter
);
2274 name
= PHI_RESULT (phi
);
2275 replace_ssa_name_symbol (name
, var
);
2279 /* Given an unrolled LOOP after predictive commoning, remove the
2280 register copies arising from phi nodes by changing the base
2281 variables of SSA names. TMP_VARS is the set of the temporary variables
2282 for those we want to perform this. */
2285 eliminate_temp_copies (struct loop
*loop
, bitmap tmp_vars
)
2290 tree name
, use
, var
;
2293 e
= loop_latch_edge (loop
);
2294 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
2297 name
= PHI_RESULT (phi
);
2298 var
= SSA_NAME_VAR (name
);
2299 if (!var
|| !bitmap_bit_p (tmp_vars
, DECL_UID (var
)))
2301 use
= PHI_ARG_DEF_FROM_EDGE (phi
, e
);
2302 gcc_assert (TREE_CODE (use
) == SSA_NAME
);
2304 /* Base all the ssa names in the ud and du chain of NAME on VAR. */
2305 stmt
= SSA_NAME_DEF_STMT (use
);
2306 while (gimple_code (stmt
) == GIMPLE_PHI
2307 /* In case we could not unroll the loop enough to eliminate
2308 all copies, we may reach the loop header before the defining
2309 statement (in that case, some register copies will be present
2310 in loop latch in the final code, corresponding to the newly
2311 created looparound phi nodes). */
2312 && gimple_bb (stmt
) != loop
->header
)
2314 gcc_assert (single_pred_p (gimple_bb (stmt
)));
2315 use
= PHI_ARG_DEF (stmt
, 0);
2316 stmt
= SSA_NAME_DEF_STMT (use
);
2319 base_names_in_chain_on (loop
, use
, var
);
2323 /* Returns true if CHAIN is suitable to be combined. */
2326 chain_can_be_combined_p (chain_p chain
)
2328 return (!chain
->combined
2329 && (chain
->type
== CT_LOAD
|| chain
->type
== CT_COMBINATION
));
2332 /* Returns the modify statement that uses NAME. Skips over assignment
2333 statements, NAME is replaced with the actual name used in the returned
2337 find_use_stmt (tree
*name
)
2342 /* Skip over assignments. */
2345 stmt
= single_nonlooparound_use (*name
);
2349 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
2352 lhs
= gimple_assign_lhs (stmt
);
2353 if (TREE_CODE (lhs
) != SSA_NAME
)
2356 if (gimple_assign_copy_p (stmt
))
2358 rhs
= gimple_assign_rhs1 (stmt
);
2364 else if (get_gimple_rhs_class (gimple_assign_rhs_code (stmt
))
2365 == GIMPLE_BINARY_RHS
)
2372 /* Returns true if we may perform reassociation for operation CODE in TYPE. */
2375 may_reassociate_p (tree type
, enum tree_code code
)
2377 if (FLOAT_TYPE_P (type
)
2378 && !flag_unsafe_math_optimizations
)
2381 return (commutative_tree_code (code
)
2382 && associative_tree_code (code
));
2385 /* If the operation used in STMT is associative and commutative, go through the
2386 tree of the same operations and returns its root. Distance to the root
2387 is stored in DISTANCE. */
2390 find_associative_operation_root (gimple
*stmt
, unsigned *distance
)
2394 enum tree_code code
= gimple_assign_rhs_code (stmt
);
2395 tree type
= TREE_TYPE (gimple_assign_lhs (stmt
));
2398 if (!may_reassociate_p (type
, code
))
2403 lhs
= gimple_assign_lhs (stmt
);
2404 gcc_assert (TREE_CODE (lhs
) == SSA_NAME
);
2406 next
= find_use_stmt (&lhs
);
2408 || gimple_assign_rhs_code (next
) != code
)
2420 /* Returns the common statement in that NAME1 and NAME2 have a use. If there
2421 is no such statement, returns NULL_TREE. In case the operation used on
2422 NAME1 and NAME2 is associative and commutative, returns the root of the
2423 tree formed by this operation instead of the statement that uses NAME1 or
2427 find_common_use_stmt (tree
*name1
, tree
*name2
)
2429 gimple
*stmt1
, *stmt2
;
2431 stmt1
= find_use_stmt (name1
);
2435 stmt2
= find_use_stmt (name2
);
2442 stmt1
= find_associative_operation_root (stmt1
, NULL
);
2445 stmt2
= find_associative_operation_root (stmt2
, NULL
);
2449 return (stmt1
== stmt2
? stmt1
: NULL
);
2452 /* Checks whether R1 and R2 are combined together using CODE, with the result
2453 in RSLT_TYPE, in order R1 CODE R2 if SWAP is false and in order R2 CODE R1
2454 if it is true. If CODE is ERROR_MARK, set these values instead. */
2457 combinable_refs_p (dref r1
, dref r2
,
2458 enum tree_code
*code
, bool *swap
, tree
*rslt_type
)
2460 enum tree_code acode
;
2466 name1
= name_for_ref (r1
);
2467 name2
= name_for_ref (r2
);
2468 gcc_assert (name1
!= NULL_TREE
&& name2
!= NULL_TREE
);
2470 stmt
= find_common_use_stmt (&name1
, &name2
);
2473 /* A simple post-dominance check - make sure the combination
2474 is executed under the same condition as the references. */
2475 || (gimple_bb (stmt
) != gimple_bb (r1
->stmt
)
2476 && gimple_bb (stmt
) != gimple_bb (r2
->stmt
)))
2479 acode
= gimple_assign_rhs_code (stmt
);
2480 aswap
= (!commutative_tree_code (acode
)
2481 && gimple_assign_rhs1 (stmt
) != name1
);
2482 atype
= TREE_TYPE (gimple_assign_lhs (stmt
));
2484 if (*code
== ERROR_MARK
)
2492 return (*code
== acode
2494 && *rslt_type
== atype
);
2497 /* Remove OP from the operation on rhs of STMT, and replace STMT with
2498 an assignment of the remaining operand. */
2501 remove_name_from_operation (gimple
*stmt
, tree op
)
2504 gimple_stmt_iterator si
;
2506 gcc_assert (is_gimple_assign (stmt
));
2508 if (gimple_assign_rhs1 (stmt
) == op
)
2509 other_op
= gimple_assign_rhs2 (stmt
);
2511 other_op
= gimple_assign_rhs1 (stmt
);
2513 si
= gsi_for_stmt (stmt
);
2514 gimple_assign_set_rhs_from_tree (&si
, other_op
);
2516 /* We should not have reallocated STMT. */
2517 gcc_assert (gsi_stmt (si
) == stmt
);
2522 /* Reassociates the expression in that NAME1 and NAME2 are used so that they
2523 are combined in a single statement, and returns this statement. Note the
2524 statement is inserted before INSERT_BEFORE if it's not NULL. */
2527 reassociate_to_the_same_stmt (tree name1
, tree name2
, gimple
*insert_before
)
2529 gimple
*stmt1
, *stmt2
, *root1
, *root2
, *s1
, *s2
;
2530 gassign
*new_stmt
, *tmp_stmt
;
2531 tree new_name
, tmp_name
, var
, r1
, r2
;
2532 unsigned dist1
, dist2
;
2533 enum tree_code code
;
2534 tree type
= TREE_TYPE (name1
);
2535 gimple_stmt_iterator bsi
;
2537 stmt1
= find_use_stmt (&name1
);
2538 stmt2
= find_use_stmt (&name2
);
2539 root1
= find_associative_operation_root (stmt1
, &dist1
);
2540 root2
= find_associative_operation_root (stmt2
, &dist2
);
2541 code
= gimple_assign_rhs_code (stmt1
);
2543 gcc_assert (root1
&& root2
&& root1
== root2
2544 && code
== gimple_assign_rhs_code (stmt2
));
2546 /* Find the root of the nearest expression in that both NAME1 and NAME2
2553 while (dist1
> dist2
)
2555 s1
= find_use_stmt (&r1
);
2556 r1
= gimple_assign_lhs (s1
);
2559 while (dist2
> dist1
)
2561 s2
= find_use_stmt (&r2
);
2562 r2
= gimple_assign_lhs (s2
);
2568 s1
= find_use_stmt (&r1
);
2569 r1
= gimple_assign_lhs (s1
);
2570 s2
= find_use_stmt (&r2
);
2571 r2
= gimple_assign_lhs (s2
);
2574 /* Remove NAME1 and NAME2 from the statements in that they are used
2576 remove_name_from_operation (stmt1
, name1
);
2577 remove_name_from_operation (stmt2
, name2
);
2579 /* Insert the new statement combining NAME1 and NAME2 before S1, and
2580 combine it with the rhs of S1. */
2581 var
= create_tmp_reg (type
, "predreastmp");
2582 new_name
= make_ssa_name (var
);
2583 new_stmt
= gimple_build_assign (new_name
, code
, name1
, name2
);
2584 if (insert_before
&& stmt_dominates_stmt_p (insert_before
, s1
))
2585 bsi
= gsi_for_stmt (insert_before
);
2587 bsi
= gsi_for_stmt (s1
);
2589 gsi_insert_before (&bsi
, new_stmt
, GSI_SAME_STMT
);
2591 var
= create_tmp_reg (type
, "predreastmp");
2592 tmp_name
= make_ssa_name (var
);
2594 /* Rhs of S1 may now be either a binary expression with operation
2595 CODE, or gimple_val (in case that stmt1 == s1 or stmt2 == s1,
2596 so that name1 or name2 was removed from it). */
2597 tmp_stmt
= gimple_build_assign (tmp_name
, gimple_assign_rhs_code (s1
),
2598 gimple_assign_rhs1 (s1
),
2599 gimple_assign_rhs2 (s1
));
2601 bsi
= gsi_for_stmt (s1
);
2602 gimple_assign_set_rhs_with_ops (&bsi
, code
, new_name
, tmp_name
);
2603 s1
= gsi_stmt (bsi
);
2606 gsi_insert_before (&bsi
, tmp_stmt
, GSI_SAME_STMT
);
2611 /* Returns the statement that combines references R1 and R2. In case R1
2612 and R2 are not used in the same statement, but they are used with an
2613 associative and commutative operation in the same expression, reassociate
2614 the expression so that they are used in the same statement. The combined
2615 statement is inserted before INSERT_BEFORE if it's not NULL. */
2618 stmt_combining_refs (dref r1
, dref r2
, gimple
*insert_before
)
2620 gimple
*stmt1
, *stmt2
;
2621 tree name1
= name_for_ref (r1
);
2622 tree name2
= name_for_ref (r2
);
2624 stmt1
= find_use_stmt (&name1
);
2625 stmt2
= find_use_stmt (&name2
);
2629 return reassociate_to_the_same_stmt (name1
, name2
, insert_before
);
2632 /* Tries to combine chains CH1 and CH2 together. If this succeeds, the
2633 description of the new chain is returned, otherwise we return NULL. */
2636 combine_chains (chain_p ch1
, chain_p ch2
)
2639 enum tree_code op
= ERROR_MARK
;
2644 tree rslt_type
= NULL_TREE
;
2648 if (ch1
->length
!= ch2
->length
)
2651 if (ch1
->refs
.length () != ch2
->refs
.length ())
2654 for (i
= 0; (ch1
->refs
.iterate (i
, &r1
)
2655 && ch2
->refs
.iterate (i
, &r2
)); i
++)
2657 if (r1
->distance
!= r2
->distance
)
2660 if (!combinable_refs_p (r1
, r2
, &op
, &swap
, &rslt_type
))
2664 ch1
->combined
= true;
2665 ch2
->combined
= true;
2668 std::swap (ch1
, ch2
);
2670 new_chain
= XCNEW (struct chain
);
2671 new_chain
->type
= CT_COMBINATION
;
2673 new_chain
->ch1
= ch1
;
2674 new_chain
->ch2
= ch2
;
2675 new_chain
->rslt_type
= rslt_type
;
2676 new_chain
->length
= ch1
->length
;
2678 gimple
*insert
= NULL
;
2679 num
= ch1
->refs
.length ();
2680 i
= (new_chain
->length
== 0) ? num
- 1 : 0;
2681 j
= (new_chain
->length
== 0) ? -1 : 1;
2682 /* For ZERO length chain, process refs in reverse order so that dominant
2683 position is ready when it comes to the root ref.
2684 For non-ZERO length chain, process refs in order. See PR79663. */
2685 for (; num
> 0; num
--, i
+= j
)
2689 nw
= XCNEW (struct dref_d
);
2690 nw
->distance
= r1
->distance
;
2692 /* For ZERO length chain, insert combined stmt of root ref at dominant
2694 nw
->stmt
= stmt_combining_refs (r1
, r2
, i
== 0 ? insert
: NULL
);
2695 /* For ZERO length chain, record dominant position where combined stmt
2696 of root ref should be inserted. In this case, though all root refs
2697 dominate following ones, it's possible that combined stmt doesn't.
2699 if (new_chain
->length
== 0
2700 && (insert
== NULL
|| stmt_dominates_stmt_p (nw
->stmt
, insert
)))
2703 new_chain
->refs
.safe_push (nw
);
2705 if (new_chain
->length
== 0)
2707 /* Restore the order for ZERO length chain's refs. */
2708 num
= new_chain
->refs
.length () >> 1;
2709 for (i
= 0, j
= new_chain
->refs
.length () - 1; i
< num
; i
++, j
--)
2710 std::swap (new_chain
->refs
[i
], new_chain
->refs
[j
]);
2712 /* For ZERO length chain, has_max_use_after must be true since root
2713 combined stmt must dominates others. */
2714 new_chain
->has_max_use_after
= true;
2718 new_chain
->has_max_use_after
= false;
2719 root_stmt
= get_chain_root (new_chain
)->stmt
;
2720 for (i
= 1; new_chain
->refs
.iterate (i
, &nw
); i
++)
2722 if (nw
->distance
== new_chain
->length
2723 && !stmt_dominates_stmt_p (nw
->stmt
, root_stmt
))
2725 new_chain
->has_max_use_after
= true;
2733 /* Try to combine the CHAINS. */
2736 try_combine_chains (vec
<chain_p
> *chains
)
2739 chain_p ch1
, ch2
, cch
;
2740 auto_vec
<chain_p
> worklist
;
2742 FOR_EACH_VEC_ELT (*chains
, i
, ch1
)
2743 if (chain_can_be_combined_p (ch1
))
2744 worklist
.safe_push (ch1
);
2746 while (!worklist
.is_empty ())
2748 ch1
= worklist
.pop ();
2749 if (!chain_can_be_combined_p (ch1
))
2752 FOR_EACH_VEC_ELT (*chains
, j
, ch2
)
2754 if (!chain_can_be_combined_p (ch2
))
2757 cch
= combine_chains (ch1
, ch2
);
2760 worklist
.safe_push (cch
);
2761 chains
->safe_push (cch
);
2768 /* Prepare initializers for store elimination CHAIN in LOOP. Returns false
2769 if this is impossible because one of these initializers may trap, true
2773 prepare_initializers_chain_store_elim (struct loop
*loop
, chain_p chain
)
2775 unsigned i
, n
= chain
->length
;
2777 /* For now we can't eliminate stores if some of them are conditional
2779 if (!chain
->all_always_accessed
)
2782 /* Nothing to intialize for intra-iteration store elimination. */
2783 if (n
== 0 && chain
->type
== CT_STORE_STORE
)
2786 /* For store elimination chain, there is nothing to initialize if stores
2787 to be eliminated only store loop invariant values into memory. */
2788 if (chain
->type
== CT_STORE_STORE
2789 && is_inv_store_elimination_chain (loop
, chain
))
2791 chain
->inv_store_elimination
= true;
2795 chain
->inits
.create (n
);
2796 chain
->inits
.safe_grow_cleared (n
);
2798 /* For store eliminatin chain like below:
2800 for (i = 0; i < len; i++)
2807 store to a[i + 1] is missed in loop body, it acts like bubbles. The
2808 content of a[i + 1] remain the same if the loop iterates fewer times
2809 than chain->length. We need to set up root variables for such stores
2810 by loading from memory before loop. Note we only need to load bubble
2811 elements because loop body is guaranteed to be executed at least once
2812 after loop's preheader edge. */
2813 auto_vec
<bool> bubbles
;
2814 bubbles
.safe_grow_cleared (n
+ 1);
2815 for (i
= 0; i
< chain
->refs
.length (); i
++)
2816 bubbles
[chain
->refs
[i
]->distance
] = true;
2818 struct data_reference
*dr
= get_chain_root (chain
)->ref
;
2819 for (i
= 0; i
< n
; i
++)
2824 gimple_seq stmts
= NULL
;
2826 tree init
= ref_at_iteration (dr
, (int) 0 - i
, &stmts
);
2828 gimple_seq_add_seq_without_update (&chain
->init_seq
, stmts
);
2830 chain
->inits
[i
] = init
;
2836 /* Prepare initializers for CHAIN in LOOP. Returns false if this is
2837 impossible because one of these initializers may trap, true otherwise. */
2840 prepare_initializers_chain (struct loop
*loop
, chain_p chain
)
2842 unsigned i
, n
= (chain
->type
== CT_INVARIANT
) ? 1 : chain
->length
;
2843 struct data_reference
*dr
= get_chain_root (chain
)->ref
;
2846 edge entry
= loop_preheader_edge (loop
);
2848 if (chain
->type
== CT_STORE_STORE
)
2849 return prepare_initializers_chain_store_elim (loop
, chain
);
2851 /* Find the initializers for the variables, and check that they cannot
2853 chain
->inits
.create (n
);
2854 for (i
= 0; i
< n
; i
++)
2855 chain
->inits
.quick_push (NULL_TREE
);
2857 /* If we have replaced some looparound phi nodes, use their initializers
2858 instead of creating our own. */
2859 FOR_EACH_VEC_ELT (chain
->refs
, i
, laref
)
2861 if (gimple_code (laref
->stmt
) != GIMPLE_PHI
)
2864 gcc_assert (laref
->distance
> 0);
2865 chain
->inits
[n
- laref
->distance
]
2866 = PHI_ARG_DEF_FROM_EDGE (laref
->stmt
, entry
);
2869 for (i
= 0; i
< n
; i
++)
2871 gimple_seq stmts
= NULL
;
2873 if (chain
->inits
[i
] != NULL_TREE
)
2876 init
= ref_at_iteration (dr
, (int) i
- n
, &stmts
);
2877 if (!chain
->all_always_accessed
&& tree_could_trap_p (init
))
2879 gimple_seq_discard (stmts
);
2884 gimple_seq_add_seq_without_update (&chain
->init_seq
, stmts
);
2886 chain
->inits
[i
] = init
;
2892 /* Prepare initializers for CHAINS in LOOP, and free chains that cannot
2893 be used because the initializers might trap. */
2896 prepare_initializers (struct loop
*loop
, vec
<chain_p
> chains
)
2901 for (i
= 0; i
< chains
.length (); )
2904 if (prepare_initializers_chain (loop
, chain
))
2908 release_chain (chain
);
2909 chains
.unordered_remove (i
);
2914 /* Generates finalizer memory references for CHAIN in LOOP. Returns true
2915 if finalizer code for CHAIN can be generated, otherwise false. */
2918 prepare_finalizers_chain (struct loop
*loop
, chain_p chain
)
2920 unsigned i
, n
= chain
->length
;
2921 struct data_reference
*dr
= get_chain_root (chain
)->ref
;
2922 tree fini
, niters
= number_of_latch_executions (loop
);
2924 /* For now we can't eliminate stores if some of them are conditional
2926 if (!chain
->all_always_accessed
)
2929 chain
->finis
.create (n
);
2930 for (i
= 0; i
< n
; i
++)
2931 chain
->finis
.quick_push (NULL_TREE
);
2933 /* We never use looparound phi node for store elimination chains. */
2935 /* Find the finalizers for the variables, and check that they cannot
2937 for (i
= 0; i
< n
; i
++)
2939 gimple_seq stmts
= NULL
;
2940 gcc_assert (chain
->finis
[i
] == NULL_TREE
);
2942 if (TREE_CODE (niters
) != INTEGER_CST
&& TREE_CODE (niters
) != SSA_NAME
)
2944 niters
= unshare_expr (niters
);
2945 niters
= force_gimple_operand (niters
, &stmts
, true, NULL
);
2948 gimple_seq_add_seq_without_update (&chain
->fini_seq
, stmts
);
2952 fini
= ref_at_iteration (dr
, (int) 0 - i
, &stmts
, niters
);
2954 gimple_seq_add_seq_without_update (&chain
->fini_seq
, stmts
);
2956 chain
->finis
[i
] = fini
;
2962 /* Generates finalizer memory reference for CHAINS in LOOP. Returns true
2963 if finalizer code generation for CHAINS breaks loop closed ssa form. */
2966 prepare_finalizers (struct loop
*loop
, vec
<chain_p
> chains
)
2970 bool loop_closed_ssa
= false;
2972 for (i
= 0; i
< chains
.length ();)
2976 /* Finalizer is only necessary for inter-iteration store elimination
2978 if (chain
->length
== 0 || chain
->type
!= CT_STORE_STORE
)
2984 if (prepare_finalizers_chain (loop
, chain
))
2987 /* Be conservative, assume loop closed ssa form is corrupted
2988 by store-store chain. Though it's not always the case if
2989 eliminated stores only store loop invariant values into
2991 loop_closed_ssa
= true;
2995 release_chain (chain
);
2996 chains
.unordered_remove (i
);
2999 return loop_closed_ssa
;
3002 /* Insert all initializing gimple stmts into loop's entry edge. */
3005 insert_init_seqs (struct loop
*loop
, vec
<chain_p
> chains
)
3008 edge entry
= loop_preheader_edge (loop
);
3010 for (i
= 0; i
< chains
.length (); ++i
)
3011 if (chains
[i
]->init_seq
)
3013 gsi_insert_seq_on_edge_immediate (entry
, chains
[i
]->init_seq
);
3014 chains
[i
]->init_seq
= NULL
;
3018 /* Performs predictive commoning for LOOP. Sets bit 1<<0 of return value
3019 if LOOP was unrolled; Sets bit 1<<1 of return value if loop closed ssa
3020 form was corrupted. */
3023 tree_predictive_commoning_loop (struct loop
*loop
)
3025 vec
<data_reference_p
> datarefs
;
3026 vec
<ddr_p
> dependences
;
3027 struct component
*components
;
3028 vec
<chain_p
> chains
= vNULL
;
3029 unsigned unroll_factor
;
3030 struct tree_niter_desc desc
;
3031 bool unroll
= false, loop_closed_ssa
= false;
3034 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3035 fprintf (dump_file
, "Processing loop %d\n", loop
->num
);
3037 /* Nothing for predicitive commoning if loop only iterates 1 time. */
3038 if (get_max_loop_iterations_int (loop
) == 0)
3040 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3041 fprintf (dump_file
, "Loop iterates only 1 time, nothing to do.\n");
3046 /* Find the data references and split them into components according to their
3047 dependence relations. */
3048 auto_vec
<loop_p
, 3> loop_nest
;
3049 dependences
.create (10);
3050 datarefs
.create (10);
3051 if (! compute_data_dependences_for_loop (loop
, true, &loop_nest
, &datarefs
,
3054 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3055 fprintf (dump_file
, "Cannot analyze data dependencies\n");
3056 free_data_refs (datarefs
);
3057 free_dependence_relations (dependences
);
3061 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3062 dump_data_dependence_relations (dump_file
, dependences
);
3064 components
= split_data_refs_to_components (loop
, datarefs
, dependences
);
3065 loop_nest
.release ();
3066 free_dependence_relations (dependences
);
3069 free_data_refs (datarefs
);
3070 free_affine_expand_cache (&name_expansions
);
3074 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3076 fprintf (dump_file
, "Initial state:\n\n");
3077 dump_components (dump_file
, components
);
3080 /* Find the suitable components and split them into chains. */
3081 components
= filter_suitable_components (loop
, components
);
3083 auto_bitmap tmp_vars
;
3084 looparound_phis
= BITMAP_ALLOC (NULL
);
3085 determine_roots (loop
, components
, &chains
);
3086 release_components (components
);
3088 if (!chains
.exists ())
3090 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3092 "Predictive commoning failed: no suitable chains\n");
3095 prepare_initializers (loop
, chains
);
3096 loop_closed_ssa
= prepare_finalizers (loop
, chains
);
3098 /* Try to combine the chains that are always worked with together. */
3099 try_combine_chains (&chains
);
3101 insert_init_seqs (loop
, chains
);
3103 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3105 fprintf (dump_file
, "Before commoning:\n\n");
3106 dump_chains (dump_file
, chains
);
3109 /* Determine the unroll factor, and if the loop should be unrolled, ensure
3110 that its number of iterations is divisible by the factor. */
3111 unroll_factor
= determine_unroll_factor (chains
);
3113 unroll
= (unroll_factor
> 1
3114 && can_unroll_loop_p (loop
, unroll_factor
, &desc
));
3115 exit
= single_dom_exit (loop
);
3117 /* Execute the predictive commoning transformations, and possibly unroll the
3121 struct epcc_data dta
;
3123 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3124 fprintf (dump_file
, "Unrolling %u times.\n", unroll_factor
);
3126 dta
.chains
= chains
;
3127 dta
.tmp_vars
= tmp_vars
;
3129 update_ssa (TODO_update_ssa_only_virtuals
);
3131 /* Cfg manipulations performed in tree_transform_and_unroll_loop before
3132 execute_pred_commoning_cbck is called may cause phi nodes to be
3133 reallocated, which is a problem since CHAINS may point to these
3134 statements. To fix this, we store the ssa names defined by the
3135 phi nodes here instead of the phi nodes themselves, and restore
3136 the phi nodes in execute_pred_commoning_cbck. A bit hacky. */
3137 replace_phis_by_defined_names (chains
);
3139 tree_transform_and_unroll_loop (loop
, unroll_factor
, exit
, &desc
,
3140 execute_pred_commoning_cbck
, &dta
);
3141 eliminate_temp_copies (loop
, tmp_vars
);
3145 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3147 "Executing predictive commoning without unrolling.\n");
3148 execute_pred_commoning (loop
, chains
, tmp_vars
);
3152 release_chains (chains
);
3153 free_data_refs (datarefs
);
3154 BITMAP_FREE (looparound_phis
);
3156 free_affine_expand_cache (&name_expansions
);
3158 return (unroll
? 1 : 0) | (loop_closed_ssa
? 2 : 0);
3161 /* Runs predictive commoning. */
3164 tree_predictive_commoning (void)
3167 unsigned ret
= 0, changed
= 0;
3169 initialize_original_copy_tables ();
3170 FOR_EACH_LOOP (loop
, LI_ONLY_INNERMOST
)
3171 if (optimize_loop_for_speed_p (loop
))
3173 changed
|= tree_predictive_commoning_loop (loop
);
3175 free_original_copy_tables ();
3182 rewrite_into_loop_closed_ssa (NULL
, TODO_update_ssa
);
3184 ret
= TODO_cleanup_cfg
;
3190 /* Predictive commoning Pass. */
3193 run_tree_predictive_commoning (struct function
*fun
)
3195 if (number_of_loops (fun
) <= 1)
3198 return tree_predictive_commoning ();
3203 const pass_data pass_data_predcom
=
3205 GIMPLE_PASS
, /* type */
3207 OPTGROUP_LOOP
, /* optinfo_flags */
3208 TV_PREDCOM
, /* tv_id */
3209 PROP_cfg
, /* properties_required */
3210 0, /* properties_provided */
3211 0, /* properties_destroyed */
3212 0, /* todo_flags_start */
3213 TODO_update_ssa_only_virtuals
, /* todo_flags_finish */
3216 class pass_predcom
: public gimple_opt_pass
3219 pass_predcom (gcc::context
*ctxt
)
3220 : gimple_opt_pass (pass_data_predcom
, ctxt
)
3223 /* opt_pass methods: */
3224 virtual bool gate (function
*) { return flag_predictive_commoning
!= 0; }
3225 virtual unsigned int execute (function
*fun
)
3227 return run_tree_predictive_commoning (fun
);
3230 }; // class pass_predcom
3235 make_pass_predcom (gcc::context
*ctxt
)
3237 return new pass_predcom (ctxt
);