1 /* Predictive commoning.
2 Copyright (C) 2005-2015 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This file implements the predictive commoning optimization. Predictive
21 commoning can be viewed as CSE around a loop, and with some improvements,
22 as generalized strength reduction-- i.e., reusing values computed in
23 earlier iterations of a loop in the later ones. So far, the pass only
24 handles the most useful case, that is, reusing values of memory references.
25 If you think this is all just a special case of PRE, you are sort of right;
26 however, concentrating on loops is simpler, and makes it possible to
27 incorporate data dependence analysis to detect the opportunities, perform
28 loop unrolling to avoid copies together with renaming immediately,
29 and if needed, we could also take register pressure into account.
31 Let us demonstrate what is done on an example:
33 for (i = 0; i < 100; i++)
35 a[i+2] = a[i] + a[i+1];
41 1) We find data references in the loop, and split them to mutually
42 independent groups (i.e., we find components of a data dependence
43 graph). We ignore read-read dependences whose distance is not constant.
44 (TODO -- we could also ignore antidependences). In this example, we
45 find the following groups:
47 a[i]{read}, a[i+1]{read}, a[i+2]{write}
48 b[10]{read}, b[10]{write}
49 c[99 - i]{read}, c[i]{write}
50 d[i + 1]{read}, d[i]{write}
52 2) Inside each of the group, we verify several conditions:
53 a) all the references must differ in indices only, and the indices
54 must all have the same step
55 b) the references must dominate loop latch (and thus, they must be
56 ordered by dominance relation).
57 c) the distance of the indices must be a small multiple of the step
58 We are then able to compute the difference of the references (# of
59 iterations before they point to the same place as the first of them).
60 Also, in case there are writes in the loop, we split the groups into
61 chains whose head is the write whose values are used by the reads in
62 the same chain. The chains are then processed independently,
63 making the further transformations simpler. Also, the shorter chains
64 need the same number of registers, but may require lower unrolling
65 factor in order to get rid of the copies on the loop latch.
67 In our example, we get the following chains (the chain for c is invalid).
69 a[i]{read,+0}, a[i+1]{read,-1}, a[i+2]{write,-2}
70 b[10]{read,+0}, b[10]{write,+0}
71 d[i + 1]{read,+0}, d[i]{write,+1}
73 3) For each read, we determine the read or write whose value it reuses,
74 together with the distance of this reuse. I.e. we take the last
75 reference before it with distance 0, or the last of the references
76 with the smallest positive distance to the read. Then, we remove
77 the references that are not used in any of these chains, discard the
78 empty groups, and propagate all the links so that they point to the
79 single root reference of the chain (adjusting their distance
80 appropriately). Some extra care needs to be taken for references with
81 step 0. In our example (the numbers indicate the distance of the
84 a[i] --> (*) 2, a[i+1] --> (*) 1, a[i+2] (*)
85 b[10] --> (*) 1, b[10] (*)
87 4) The chains are combined together if possible. If the corresponding
88 elements of two chains are always combined together with the same
89 operator, we remember just the result of this combination, instead
90 of remembering the values separately. We may need to perform
91 reassociation to enable combining, for example
93 e[i] + f[i+1] + e[i+1] + f[i]
95 can be reassociated as
97 (e[i] + f[i]) + (e[i+1] + f[i+1])
99 and we can combine the chains for e and f into one chain.
101 5) For each root reference (end of the chain) R, let N be maximum distance
102 of a reference reusing its value. Variables R0 up to RN are created,
103 together with phi nodes that transfer values from R1 .. RN to
105 Initial values are loaded to R0..R(N-1) (in case not all references
106 must necessarily be accessed and they may trap, we may fail here;
107 TODO sometimes, the loads could be guarded by a check for the number
108 of iterations). Values loaded/stored in roots are also copied to
109 RN. Other reads are replaced with the appropriate variable Ri.
110 Everything is put to SSA form.
112 As a small improvement, if R0 is dead after the root (i.e., all uses of
113 the value with the maximum distance dominate the root), we can avoid
114 creating RN and use R0 instead of it.
116 In our example, we get (only the parts concerning a and b are shown):
117 for (i = 0; i < 100; i++)
129 6) Factor F for unrolling is determined as the smallest common multiple of
130 (N + 1) for each root reference (N for references for that we avoided
131 creating RN). If F and the loop is small enough, loop is unrolled F
132 times. The stores to RN (R0) in the copies of the loop body are
133 periodically replaced with R0, R1, ... (R1, R2, ...), so that they can
134 be coalesced and the copies can be eliminated.
136 TODO -- copy propagation and other optimizations may change the live
137 ranges of the temporary registers and prevent them from being coalesced;
138 this may increase the register pressure.
140 In our case, F = 2 and the (main loop of the) result is
142 for (i = 0; i < ...; i += 2)
159 TODO -- stores killing other stores can be taken into account, e.g.,
160 for (i = 0; i < n; i++)
170 for (i = 0; i < n; i++)
180 The interesting part is that this would generalize store motion; still, since
181 sm is performed elsewhere, it does not seem that important.
183 Predictive commoning can be generalized for arbitrary computations (not
184 just memory loads), and also nontrivial transfer functions (e.g., replacing
185 i * i with ii_last + 2 * i + 1), to generalize strength reduction. */
189 #include "coretypes.h"
195 #include "tree-pass.h"
197 #include "gimple-pretty-print.h"
199 #include "fold-const.h"
202 #include "gimplify.h"
203 #include "gimple-iterator.h"
204 #include "gimplify-me.h"
205 #include "tree-ssa-loop-ivopts.h"
206 #include "tree-ssa-loop-manip.h"
207 #include "tree-ssa-loop-niter.h"
208 #include "tree-ssa-loop.h"
209 #include "tree-into-ssa.h"
210 #include "tree-dfa.h"
211 #include "tree-ssa.h"
212 #include "tree-data-ref.h"
213 #include "tree-scalar-evolution.h"
215 #include "tree-affine.h"
217 /* The maximum number of iterations between the considered memory
220 #define MAX_DISTANCE (target_avail_regs < 16 ? 4 : 8)
222 /* Data references (or phi nodes that carry data reference values across
225 typedef struct dref_d
227 /* The reference itself. */
228 struct data_reference
*ref
;
230 /* The statement in that the reference appears. */
233 /* In case that STMT is a phi node, this field is set to the SSA name
234 defined by it in replace_phis_by_defined_names (in order to avoid
235 pointing to phi node that got reallocated in the meantime). */
236 tree name_defined_by_phi
;
238 /* Distance of the reference from the root of the chain (in number of
239 iterations of the loop). */
242 /* Number of iterations offset from the first reference in the component. */
245 /* Number of the reference in a component, in dominance ordering. */
248 /* True if the memory reference is always accessed when the loop is
250 unsigned always_accessed
: 1;
254 /* Type of the chain of the references. */
258 /* The addresses of the references in the chain are constant. */
261 /* There are only loads in the chain. */
264 /* Root of the chain is store, the rest are loads. */
267 /* A combination of two chains. */
271 /* Chains of data references. */
275 /* Type of the chain. */
276 enum chain_type type
;
278 /* For combination chains, the operator and the two chains that are
279 combined, and the type of the result. */
282 struct chain
*ch1
, *ch2
;
284 /* The references in the chain. */
287 /* The maximum distance of the reference in the chain from the root. */
290 /* The variables used to copy the value throughout iterations. */
293 /* Initializers for the variables. */
296 /* True if there is a use of a variable with the maximal distance
297 that comes after the root in the loop. */
298 unsigned has_max_use_after
: 1;
300 /* True if all the memory references in the chain are always accessed. */
301 unsigned all_always_accessed
: 1;
303 /* True if this chain was combined together with some other chain. */
304 unsigned combined
: 1;
308 /* Describes the knowledge about the step of the memory references in
313 /* The step is zero. */
316 /* The step is nonzero. */
319 /* The step may or may not be nonzero. */
323 /* Components of the data dependence graph. */
327 /* The references in the component. */
330 /* What we know about the step of the references in the component. */
331 enum ref_step_type comp_step
;
333 /* Next component in the list. */
334 struct component
*next
;
337 /* Bitmap of ssa names defined by looparound phi nodes covered by chains. */
339 static bitmap looparound_phis
;
341 /* Cache used by tree_to_aff_combination_expand. */
343 static hash_map
<tree
, name_expansion
*> *name_expansions
;
345 /* Dumps data reference REF to FILE. */
347 extern void dump_dref (FILE *, dref
);
349 dump_dref (FILE *file
, dref ref
)
354 print_generic_expr (file
, DR_REF (ref
->ref
), TDF_SLIM
);
355 fprintf (file
, " (id %u%s)\n", ref
->pos
,
356 DR_IS_READ (ref
->ref
) ? "" : ", write");
358 fprintf (file
, " offset ");
359 print_decs (ref
->offset
, file
);
360 fprintf (file
, "\n");
362 fprintf (file
, " distance %u\n", ref
->distance
);
366 if (gimple_code (ref
->stmt
) == GIMPLE_PHI
)
367 fprintf (file
, " looparound ref\n");
369 fprintf (file
, " combination ref\n");
370 fprintf (file
, " in statement ");
371 print_gimple_stmt (file
, ref
->stmt
, 0, TDF_SLIM
);
372 fprintf (file
, "\n");
373 fprintf (file
, " distance %u\n", ref
->distance
);
378 /* Dumps CHAIN to FILE. */
380 extern void dump_chain (FILE *, chain_p
);
382 dump_chain (FILE *file
, chain_p chain
)
385 const char *chain_type
;
392 chain_type
= "Load motion";
396 chain_type
= "Loads-only";
400 chain_type
= "Store-loads";
404 chain_type
= "Combination";
411 fprintf (file
, "%s chain %p%s\n", chain_type
, (void *) chain
,
412 chain
->combined
? " (combined)" : "");
413 if (chain
->type
!= CT_INVARIANT
)
414 fprintf (file
, " max distance %u%s\n", chain
->length
,
415 chain
->has_max_use_after
? "" : ", may reuse first");
417 if (chain
->type
== CT_COMBINATION
)
419 fprintf (file
, " equal to %p %s %p in type ",
420 (void *) chain
->ch1
, op_symbol_code (chain
->op
),
421 (void *) chain
->ch2
);
422 print_generic_expr (file
, chain
->rslt_type
, TDF_SLIM
);
423 fprintf (file
, "\n");
426 if (chain
->vars
.exists ())
428 fprintf (file
, " vars");
429 FOR_EACH_VEC_ELT (chain
->vars
, i
, var
)
432 print_generic_expr (file
, var
, TDF_SLIM
);
434 fprintf (file
, "\n");
437 if (chain
->inits
.exists ())
439 fprintf (file
, " inits");
440 FOR_EACH_VEC_ELT (chain
->inits
, i
, var
)
443 print_generic_expr (file
, var
, TDF_SLIM
);
445 fprintf (file
, "\n");
448 fprintf (file
, " references:\n");
449 FOR_EACH_VEC_ELT (chain
->refs
, i
, a
)
452 fprintf (file
, "\n");
455 /* Dumps CHAINS to FILE. */
457 extern void dump_chains (FILE *, vec
<chain_p
> );
459 dump_chains (FILE *file
, vec
<chain_p
> chains
)
464 FOR_EACH_VEC_ELT (chains
, i
, chain
)
465 dump_chain (file
, chain
);
468 /* Dumps COMP to FILE. */
470 extern void dump_component (FILE *, struct component
*);
472 dump_component (FILE *file
, struct component
*comp
)
477 fprintf (file
, "Component%s:\n",
478 comp
->comp_step
== RS_INVARIANT
? " (invariant)" : "");
479 FOR_EACH_VEC_ELT (comp
->refs
, i
, a
)
481 fprintf (file
, "\n");
484 /* Dumps COMPS to FILE. */
486 extern void dump_components (FILE *, struct component
*);
488 dump_components (FILE *file
, struct component
*comps
)
490 struct component
*comp
;
492 for (comp
= comps
; comp
; comp
= comp
->next
)
493 dump_component (file
, comp
);
496 /* Frees a chain CHAIN. */
499 release_chain (chain_p chain
)
507 FOR_EACH_VEC_ELT (chain
->refs
, i
, ref
)
510 chain
->refs
.release ();
511 chain
->vars
.release ();
512 chain
->inits
.release ();
520 release_chains (vec
<chain_p
> chains
)
525 FOR_EACH_VEC_ELT (chains
, i
, chain
)
526 release_chain (chain
);
530 /* Frees a component COMP. */
533 release_component (struct component
*comp
)
535 comp
->refs
.release ();
539 /* Frees list of components COMPS. */
542 release_components (struct component
*comps
)
544 struct component
*act
, *next
;
546 for (act
= comps
; act
; act
= next
)
549 release_component (act
);
553 /* Finds a root of tree given by FATHERS containing A, and performs path
557 component_of (unsigned fathers
[], unsigned a
)
561 for (root
= a
; root
!= fathers
[root
]; root
= fathers
[root
])
564 for (; a
!= root
; a
= n
)
573 /* Join operation for DFU. FATHERS gives the tree, SIZES are sizes of the
574 components, A and B are components to merge. */
577 merge_comps (unsigned fathers
[], unsigned sizes
[], unsigned a
, unsigned b
)
579 unsigned ca
= component_of (fathers
, a
);
580 unsigned cb
= component_of (fathers
, b
);
585 if (sizes
[ca
] < sizes
[cb
])
587 sizes
[cb
] += sizes
[ca
];
592 sizes
[ca
] += sizes
[cb
];
597 /* Returns true if A is a reference that is suitable for predictive commoning
598 in the innermost loop that contains it. REF_STEP is set according to the
599 step of the reference A. */
602 suitable_reference_p (struct data_reference
*a
, enum ref_step_type
*ref_step
)
604 tree ref
= DR_REF (a
), step
= DR_STEP (a
);
607 || TREE_THIS_VOLATILE (ref
)
608 || !is_gimple_reg_type (TREE_TYPE (ref
))
609 || tree_could_throw_p (ref
))
612 if (integer_zerop (step
))
613 *ref_step
= RS_INVARIANT
;
614 else if (integer_nonzerop (step
))
615 *ref_step
= RS_NONZERO
;
622 /* Stores DR_OFFSET (DR) + DR_INIT (DR) to OFFSET. */
625 aff_combination_dr_offset (struct data_reference
*dr
, aff_tree
*offset
)
627 tree type
= TREE_TYPE (DR_OFFSET (dr
));
630 tree_to_aff_combination_expand (DR_OFFSET (dr
), type
, offset
,
632 aff_combination_const (&delta
, type
, wi::to_widest (DR_INIT (dr
)));
633 aff_combination_add (offset
, &delta
);
636 /* Determines number of iterations of the innermost enclosing loop before B
637 refers to exactly the same location as A and stores it to OFF. If A and
638 B do not have the same step, they never meet, or anything else fails,
639 returns false, otherwise returns true. Both A and B are assumed to
640 satisfy suitable_reference_p. */
643 determine_offset (struct data_reference
*a
, struct data_reference
*b
,
646 aff_tree diff
, baseb
, step
;
649 /* Check that both the references access the location in the same type. */
650 typea
= TREE_TYPE (DR_REF (a
));
651 typeb
= TREE_TYPE (DR_REF (b
));
652 if (!useless_type_conversion_p (typeb
, typea
))
655 /* Check whether the base address and the step of both references is the
657 if (!operand_equal_p (DR_STEP (a
), DR_STEP (b
), 0)
658 || !operand_equal_p (DR_BASE_ADDRESS (a
), DR_BASE_ADDRESS (b
), 0))
661 if (integer_zerop (DR_STEP (a
)))
663 /* If the references have loop invariant address, check that they access
664 exactly the same location. */
666 return (operand_equal_p (DR_OFFSET (a
), DR_OFFSET (b
), 0)
667 && operand_equal_p (DR_INIT (a
), DR_INIT (b
), 0));
670 /* Compare the offsets of the addresses, and check whether the difference
671 is a multiple of step. */
672 aff_combination_dr_offset (a
, &diff
);
673 aff_combination_dr_offset (b
, &baseb
);
674 aff_combination_scale (&baseb
, -1);
675 aff_combination_add (&diff
, &baseb
);
677 tree_to_aff_combination_expand (DR_STEP (a
), TREE_TYPE (DR_STEP (a
)),
678 &step
, &name_expansions
);
679 return aff_combination_constant_multiple_p (&diff
, &step
, off
);
682 /* Returns the last basic block in LOOP for that we are sure that
683 it is executed whenever the loop is entered. */
686 last_always_executed_block (struct loop
*loop
)
689 vec
<edge
> exits
= get_loop_exit_edges (loop
);
691 basic_block last
= loop
->latch
;
693 FOR_EACH_VEC_ELT (exits
, i
, ex
)
694 last
= nearest_common_dominator (CDI_DOMINATORS
, last
, ex
->src
);
700 /* Splits dependence graph on DATAREFS described by DEPENDS to components. */
702 static struct component
*
703 split_data_refs_to_components (struct loop
*loop
,
704 vec
<data_reference_p
> datarefs
,
707 unsigned i
, n
= datarefs
.length ();
708 unsigned ca
, ia
, ib
, bad
;
709 unsigned *comp_father
= XNEWVEC (unsigned, n
+ 1);
710 unsigned *comp_size
= XNEWVEC (unsigned, n
+ 1);
711 struct component
**comps
;
712 struct data_reference
*dr
, *dra
, *drb
;
713 struct data_dependence_relation
*ddr
;
714 struct component
*comp_list
= NULL
, *comp
;
716 basic_block last_always_executed
= last_always_executed_block (loop
);
718 FOR_EACH_VEC_ELT (datarefs
, i
, dr
)
722 /* A fake reference for call or asm_expr that may clobber memory;
726 /* predcom pass isn't prepared to handle calls with data references. */
727 if (is_gimple_call (DR_STMT (dr
)))
729 dr
->aux
= (void *) (size_t) i
;
734 /* A component reserved for the "bad" data references. */
738 FOR_EACH_VEC_ELT (datarefs
, i
, dr
)
740 enum ref_step_type dummy
;
742 if (!suitable_reference_p (dr
, &dummy
))
744 ia
= (unsigned) (size_t) dr
->aux
;
745 merge_comps (comp_father
, comp_size
, n
, ia
);
749 FOR_EACH_VEC_ELT (depends
, i
, ddr
)
751 widest_int dummy_off
;
753 if (DDR_ARE_DEPENDENT (ddr
) == chrec_known
)
758 ia
= component_of (comp_father
, (unsigned) (size_t) dra
->aux
);
759 ib
= component_of (comp_father
, (unsigned) (size_t) drb
->aux
);
763 bad
= component_of (comp_father
, n
);
765 /* If both A and B are reads, we may ignore unsuitable dependences. */
766 if (DR_IS_READ (dra
) && DR_IS_READ (drb
))
768 if (ia
== bad
|| ib
== bad
769 || !determine_offset (dra
, drb
, &dummy_off
))
772 /* If A is read and B write or vice versa and there is unsuitable
773 dependence, instead of merging both components into a component
774 that will certainly not pass suitable_component_p, just put the
775 read into bad component, perhaps at least the write together with
776 all the other data refs in it's component will be optimizable. */
777 else if (DR_IS_READ (dra
) && ib
!= bad
)
781 else if (!determine_offset (dra
, drb
, &dummy_off
))
783 merge_comps (comp_father
, comp_size
, bad
, ia
);
787 else if (DR_IS_READ (drb
) && ia
!= bad
)
791 else if (!determine_offset (dra
, drb
, &dummy_off
))
793 merge_comps (comp_father
, comp_size
, bad
, ib
);
798 merge_comps (comp_father
, comp_size
, ia
, ib
);
801 comps
= XCNEWVEC (struct component
*, n
);
802 bad
= component_of (comp_father
, n
);
803 FOR_EACH_VEC_ELT (datarefs
, i
, dr
)
805 ia
= (unsigned) (size_t) dr
->aux
;
806 ca
= component_of (comp_father
, ia
);
813 comp
= XCNEW (struct component
);
814 comp
->refs
.create (comp_size
[ca
]);
818 dataref
= XCNEW (struct dref_d
);
820 dataref
->stmt
= DR_STMT (dr
);
822 dataref
->distance
= 0;
824 dataref
->always_accessed
825 = dominated_by_p (CDI_DOMINATORS
, last_always_executed
,
826 gimple_bb (dataref
->stmt
));
827 dataref
->pos
= comp
->refs
.length ();
828 comp
->refs
.quick_push (dataref
);
831 for (i
= 0; i
< n
; i
++)
836 comp
->next
= comp_list
;
848 /* Returns true if the component COMP satisfies the conditions
849 described in 2) at the beginning of this file. LOOP is the current
853 suitable_component_p (struct loop
*loop
, struct component
*comp
)
857 basic_block ba
, bp
= loop
->header
;
858 bool ok
, has_write
= false;
860 FOR_EACH_VEC_ELT (comp
->refs
, i
, a
)
862 ba
= gimple_bb (a
->stmt
);
864 if (!just_once_each_iteration_p (loop
, ba
))
867 gcc_assert (dominated_by_p (CDI_DOMINATORS
, ba
, bp
));
870 if (DR_IS_WRITE (a
->ref
))
874 first
= comp
->refs
[0];
875 ok
= suitable_reference_p (first
->ref
, &comp
->comp_step
);
879 for (i
= 1; comp
->refs
.iterate (i
, &a
); i
++)
881 if (!determine_offset (first
->ref
, a
->ref
, &a
->offset
))
884 enum ref_step_type a_step
;
885 gcc_checking_assert (suitable_reference_p (a
->ref
, &a_step
)
886 && a_step
== comp
->comp_step
);
889 /* If there is a write inside the component, we must know whether the
890 step is nonzero or not -- we would not otherwise be able to recognize
891 whether the value accessed by reads comes from the OFFSET-th iteration
892 or the previous one. */
893 if (has_write
&& comp
->comp_step
== RS_ANY
)
899 /* Check the conditions on references inside each of components COMPS,
900 and remove the unsuitable components from the list. The new list
901 of components is returned. The conditions are described in 2) at
902 the beginning of this file. LOOP is the current loop. */
904 static struct component
*
905 filter_suitable_components (struct loop
*loop
, struct component
*comps
)
907 struct component
**comp
, *act
;
909 for (comp
= &comps
; *comp
; )
912 if (suitable_component_p (loop
, act
))
920 FOR_EACH_VEC_ELT (act
->refs
, i
, ref
)
922 release_component (act
);
929 /* Compares two drefs A and B by their offset and position. Callback for
933 order_drefs (const void *a
, const void *b
)
935 const dref
*const da
= (const dref
*) a
;
936 const dref
*const db
= (const dref
*) b
;
937 int offcmp
= wi::cmps ((*da
)->offset
, (*db
)->offset
);
942 return (*da
)->pos
- (*db
)->pos
;
945 /* Returns root of the CHAIN. */
948 get_chain_root (chain_p chain
)
950 return chain
->refs
[0];
953 /* Adds REF to the chain CHAIN. */
956 add_ref_to_chain (chain_p chain
, dref ref
)
958 dref root
= get_chain_root (chain
);
960 gcc_assert (wi::les_p (root
->offset
, ref
->offset
));
961 widest_int dist
= ref
->offset
- root
->offset
;
962 if (wi::leu_p (MAX_DISTANCE
, dist
))
967 gcc_assert (wi::fits_uhwi_p (dist
));
969 chain
->refs
.safe_push (ref
);
971 ref
->distance
= dist
.to_uhwi ();
973 if (ref
->distance
>= chain
->length
)
975 chain
->length
= ref
->distance
;
976 chain
->has_max_use_after
= false;
979 if (ref
->distance
== chain
->length
980 && ref
->pos
> root
->pos
)
981 chain
->has_max_use_after
= true;
983 chain
->all_always_accessed
&= ref
->always_accessed
;
986 /* Returns the chain for invariant component COMP. */
989 make_invariant_chain (struct component
*comp
)
991 chain_p chain
= XCNEW (struct chain
);
995 chain
->type
= CT_INVARIANT
;
997 chain
->all_always_accessed
= true;
999 FOR_EACH_VEC_ELT (comp
->refs
, i
, ref
)
1001 chain
->refs
.safe_push (ref
);
1002 chain
->all_always_accessed
&= ref
->always_accessed
;
1008 /* Make a new chain rooted at REF. */
1011 make_rooted_chain (dref ref
)
1013 chain_p chain
= XCNEW (struct chain
);
1015 chain
->type
= DR_IS_READ (ref
->ref
) ? CT_LOAD
: CT_STORE_LOAD
;
1017 chain
->refs
.safe_push (ref
);
1018 chain
->all_always_accessed
= ref
->always_accessed
;
1025 /* Returns true if CHAIN is not trivial. */
1028 nontrivial_chain_p (chain_p chain
)
1030 return chain
!= NULL
&& chain
->refs
.length () > 1;
1033 /* Returns the ssa name that contains the value of REF, or NULL_TREE if there
1037 name_for_ref (dref ref
)
1041 if (is_gimple_assign (ref
->stmt
))
1043 if (!ref
->ref
|| DR_IS_READ (ref
->ref
))
1044 name
= gimple_assign_lhs (ref
->stmt
);
1046 name
= gimple_assign_rhs1 (ref
->stmt
);
1049 name
= PHI_RESULT (ref
->stmt
);
1051 return (TREE_CODE (name
) == SSA_NAME
? name
: NULL_TREE
);
1054 /* Returns true if REF is a valid initializer for ROOT with given DISTANCE (in
1055 iterations of the innermost enclosing loop). */
1058 valid_initializer_p (struct data_reference
*ref
,
1059 unsigned distance
, struct data_reference
*root
)
1061 aff_tree diff
, base
, step
;
1064 /* Both REF and ROOT must be accessing the same object. */
1065 if (!operand_equal_p (DR_BASE_ADDRESS (ref
), DR_BASE_ADDRESS (root
), 0))
1068 /* The initializer is defined outside of loop, hence its address must be
1069 invariant inside the loop. */
1070 gcc_assert (integer_zerop (DR_STEP (ref
)));
1072 /* If the address of the reference is invariant, initializer must access
1073 exactly the same location. */
1074 if (integer_zerop (DR_STEP (root
)))
1075 return (operand_equal_p (DR_OFFSET (ref
), DR_OFFSET (root
), 0)
1076 && operand_equal_p (DR_INIT (ref
), DR_INIT (root
), 0));
1078 /* Verify that this index of REF is equal to the root's index at
1079 -DISTANCE-th iteration. */
1080 aff_combination_dr_offset (root
, &diff
);
1081 aff_combination_dr_offset (ref
, &base
);
1082 aff_combination_scale (&base
, -1);
1083 aff_combination_add (&diff
, &base
);
1085 tree_to_aff_combination_expand (DR_STEP (root
), TREE_TYPE (DR_STEP (root
)),
1086 &step
, &name_expansions
);
1087 if (!aff_combination_constant_multiple_p (&diff
, &step
, &off
))
1090 if (off
!= distance
)
1096 /* Finds looparound phi node of LOOP that copies the value of REF, and if its
1097 initial value is correct (equal to initial value of REF shifted by one
1098 iteration), returns the phi node. Otherwise, NULL_TREE is returned. ROOT
1099 is the root of the current chain. */
1102 find_looparound_phi (struct loop
*loop
, dref ref
, dref root
)
1104 tree name
, init
, init_ref
;
1107 edge latch
= loop_latch_edge (loop
);
1108 struct data_reference init_dr
;
1111 if (is_gimple_assign (ref
->stmt
))
1113 if (DR_IS_READ (ref
->ref
))
1114 name
= gimple_assign_lhs (ref
->stmt
);
1116 name
= gimple_assign_rhs1 (ref
->stmt
);
1119 name
= PHI_RESULT (ref
->stmt
);
1123 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
1126 if (PHI_ARG_DEF_FROM_EDGE (phi
, latch
) == name
)
1130 if (gsi_end_p (psi
))
1133 init
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_preheader_edge (loop
));
1134 if (TREE_CODE (init
) != SSA_NAME
)
1136 init_stmt
= SSA_NAME_DEF_STMT (init
);
1137 if (gimple_code (init_stmt
) != GIMPLE_ASSIGN
)
1139 gcc_assert (gimple_assign_lhs (init_stmt
) == init
);
1141 init_ref
= gimple_assign_rhs1 (init_stmt
);
1142 if (!REFERENCE_CLASS_P (init_ref
)
1143 && !DECL_P (init_ref
))
1146 /* Analyze the behavior of INIT_REF with respect to LOOP (innermost
1147 loop enclosing PHI). */
1148 memset (&init_dr
, 0, sizeof (struct data_reference
));
1149 DR_REF (&init_dr
) = init_ref
;
1150 DR_STMT (&init_dr
) = phi
;
1151 if (!dr_analyze_innermost (&init_dr
, loop
))
1154 if (!valid_initializer_p (&init_dr
, ref
->distance
+ 1, root
->ref
))
1160 /* Adds a reference for the looparound copy of REF in PHI to CHAIN. */
1163 insert_looparound_copy (chain_p chain
, dref ref
, gphi
*phi
)
1165 dref nw
= XCNEW (struct dref_d
), aref
;
1169 nw
->distance
= ref
->distance
+ 1;
1170 nw
->always_accessed
= 1;
1172 FOR_EACH_VEC_ELT (chain
->refs
, i
, aref
)
1173 if (aref
->distance
>= nw
->distance
)
1175 chain
->refs
.safe_insert (i
, nw
);
1177 if (nw
->distance
> chain
->length
)
1179 chain
->length
= nw
->distance
;
1180 chain
->has_max_use_after
= false;
1184 /* For references in CHAIN that are copied around the LOOP (created previously
1185 by PRE, or by user), add the results of such copies to the chain. This
1186 enables us to remove the copies by unrolling, and may need less registers
1187 (also, it may allow us to combine chains together). */
1190 add_looparound_copies (struct loop
*loop
, chain_p chain
)
1193 dref ref
, root
= get_chain_root (chain
);
1196 FOR_EACH_VEC_ELT (chain
->refs
, i
, ref
)
1198 phi
= find_looparound_phi (loop
, ref
, root
);
1202 bitmap_set_bit (looparound_phis
, SSA_NAME_VERSION (PHI_RESULT (phi
)));
1203 insert_looparound_copy (chain
, ref
, phi
);
1207 /* Find roots of the values and determine distances in the component COMP.
1208 The references are redistributed into CHAINS. LOOP is the current
1212 determine_roots_comp (struct loop
*loop
,
1213 struct component
*comp
,
1214 vec
<chain_p
> *chains
)
1218 chain_p chain
= NULL
;
1219 widest_int last_ofs
= 0;
1221 /* Invariants are handled specially. */
1222 if (comp
->comp_step
== RS_INVARIANT
)
1224 chain
= make_invariant_chain (comp
);
1225 chains
->safe_push (chain
);
1229 comp
->refs
.qsort (order_drefs
);
1231 FOR_EACH_VEC_ELT (comp
->refs
, i
, a
)
1233 if (!chain
|| DR_IS_WRITE (a
->ref
)
1234 || wi::leu_p (MAX_DISTANCE
, a
->offset
- last_ofs
))
1236 if (nontrivial_chain_p (chain
))
1238 add_looparound_copies (loop
, chain
);
1239 chains
->safe_push (chain
);
1242 release_chain (chain
);
1243 chain
= make_rooted_chain (a
);
1244 last_ofs
= a
->offset
;
1248 add_ref_to_chain (chain
, a
);
1251 if (nontrivial_chain_p (chain
))
1253 add_looparound_copies (loop
, chain
);
1254 chains
->safe_push (chain
);
1257 release_chain (chain
);
1260 /* Find roots of the values and determine distances in components COMPS, and
1261 separates the references to CHAINS. LOOP is the current loop. */
1264 determine_roots (struct loop
*loop
,
1265 struct component
*comps
, vec
<chain_p
> *chains
)
1267 struct component
*comp
;
1269 for (comp
= comps
; comp
; comp
= comp
->next
)
1270 determine_roots_comp (loop
, comp
, chains
);
1273 /* Replace the reference in statement STMT with temporary variable
1274 NEW_TREE. If SET is true, NEW_TREE is instead initialized to the value of
1275 the reference in the statement. IN_LHS is true if the reference
1276 is in the lhs of STMT, false if it is in rhs. */
1279 replace_ref_with (gimple
*stmt
, tree new_tree
, bool set
, bool in_lhs
)
1283 gimple_stmt_iterator bsi
, psi
;
1285 if (gimple_code (stmt
) == GIMPLE_PHI
)
1287 gcc_assert (!in_lhs
&& !set
);
1289 val
= PHI_RESULT (stmt
);
1290 bsi
= gsi_after_labels (gimple_bb (stmt
));
1291 psi
= gsi_for_stmt (stmt
);
1292 remove_phi_node (&psi
, false);
1294 /* Turn the phi node into GIMPLE_ASSIGN. */
1295 new_stmt
= gimple_build_assign (val
, new_tree
);
1296 gsi_insert_before (&bsi
, new_stmt
, GSI_NEW_STMT
);
1300 /* Since the reference is of gimple_reg type, it should only
1301 appear as lhs or rhs of modify statement. */
1302 gcc_assert (is_gimple_assign (stmt
));
1304 bsi
= gsi_for_stmt (stmt
);
1306 /* If we do not need to initialize NEW_TREE, just replace the use of OLD. */
1309 gcc_assert (!in_lhs
);
1310 gimple_assign_set_rhs_from_tree (&bsi
, new_tree
);
1311 stmt
= gsi_stmt (bsi
);
1318 /* We have statement
1322 If OLD is a memory reference, then VAL is gimple_val, and we transform
1328 Otherwise, we are replacing a combination chain,
1329 VAL is the expression that performs the combination, and OLD is an
1330 SSA name. In this case, we transform the assignment to
1337 val
= gimple_assign_lhs (stmt
);
1338 if (TREE_CODE (val
) != SSA_NAME
)
1340 val
= gimple_assign_rhs1 (stmt
);
1341 gcc_assert (gimple_assign_single_p (stmt
));
1342 if (TREE_CLOBBER_P (val
))
1343 val
= get_or_create_ssa_default_def (cfun
, SSA_NAME_VAR (new_tree
));
1345 gcc_assert (gimple_assign_copy_p (stmt
));
1357 val
= gimple_assign_lhs (stmt
);
1360 new_stmt
= gimple_build_assign (new_tree
, unshare_expr (val
));
1361 gsi_insert_after (&bsi
, new_stmt
, GSI_NEW_STMT
);
1364 /* Returns a memory reference to DR in the ITER-th iteration of
1365 the loop it was analyzed in. Append init stmts to STMTS. */
1368 ref_at_iteration (data_reference_p dr
, int iter
, gimple_seq
*stmts
)
1370 tree off
= DR_OFFSET (dr
);
1371 tree coff
= DR_INIT (dr
);
1374 else if (TREE_CODE (DR_STEP (dr
)) == INTEGER_CST
)
1375 coff
= size_binop (PLUS_EXPR
, coff
,
1376 size_binop (MULT_EXPR
, DR_STEP (dr
), ssize_int (iter
)));
1378 off
= size_binop (PLUS_EXPR
, off
,
1379 size_binop (MULT_EXPR
, DR_STEP (dr
), ssize_int (iter
)));
1380 tree addr
= fold_build_pointer_plus (DR_BASE_ADDRESS (dr
), off
);
1381 addr
= force_gimple_operand_1 (unshare_expr (addr
), stmts
,
1382 is_gimple_mem_ref_addr
, NULL_TREE
);
1383 tree alias_ptr
= fold_convert (reference_alias_ptr_type (DR_REF (dr
)), coff
);
1384 /* While data-ref analysis punts on bit offsets it still handles
1385 bitfield accesses at byte boundaries. Cope with that. Note that
1386 we cannot simply re-apply the outer COMPONENT_REF because the
1387 byte-granular portion of it is already applied via DR_INIT and
1388 DR_OFFSET, so simply build a BIT_FIELD_REF knowing that the bits
1389 start at offset zero. */
1390 if (TREE_CODE (DR_REF (dr
)) == COMPONENT_REF
1391 && DECL_BIT_FIELD (TREE_OPERAND (DR_REF (dr
), 1)))
1393 tree field
= TREE_OPERAND (DR_REF (dr
), 1);
1394 return build3 (BIT_FIELD_REF
, TREE_TYPE (DR_REF (dr
)),
1395 build2 (MEM_REF
, DECL_BIT_FIELD_TYPE (field
),
1397 DECL_SIZE (field
), bitsize_zero_node
);
1400 return fold_build2 (MEM_REF
, TREE_TYPE (DR_REF (dr
)), addr
, alias_ptr
);
1403 /* Get the initialization expression for the INDEX-th temporary variable
1407 get_init_expr (chain_p chain
, unsigned index
)
1409 if (chain
->type
== CT_COMBINATION
)
1411 tree e1
= get_init_expr (chain
->ch1
, index
);
1412 tree e2
= get_init_expr (chain
->ch2
, index
);
1414 return fold_build2 (chain
->op
, chain
->rslt_type
, e1
, e2
);
1417 return chain
->inits
[index
];
1420 /* Returns a new temporary variable used for the I-th variable carrying
1421 value of REF. The variable's uid is marked in TMP_VARS. */
1424 predcom_tmp_var (tree ref
, unsigned i
, bitmap tmp_vars
)
1426 tree type
= TREE_TYPE (ref
);
1427 /* We never access the components of the temporary variable in predictive
1429 tree var
= create_tmp_reg (type
, get_lsm_tmp_name (ref
, i
));
1430 bitmap_set_bit (tmp_vars
, DECL_UID (var
));
1434 /* Creates the variables for CHAIN, as well as phi nodes for them and
1435 initialization on entry to LOOP. Uids of the newly created
1436 temporary variables are marked in TMP_VARS. */
1439 initialize_root_vars (struct loop
*loop
, chain_p chain
, bitmap tmp_vars
)
1442 unsigned n
= chain
->length
;
1443 dref root
= get_chain_root (chain
);
1444 bool reuse_first
= !chain
->has_max_use_after
;
1445 tree ref
, init
, var
, next
;
1448 edge entry
= loop_preheader_edge (loop
), latch
= loop_latch_edge (loop
);
1450 /* If N == 0, then all the references are within the single iteration. And
1451 since this is an nonempty chain, reuse_first cannot be true. */
1452 gcc_assert (n
> 0 || !reuse_first
);
1454 chain
->vars
.create (n
+ 1);
1456 if (chain
->type
== CT_COMBINATION
)
1457 ref
= gimple_assign_lhs (root
->stmt
);
1459 ref
= DR_REF (root
->ref
);
1461 for (i
= 0; i
< n
+ (reuse_first
? 0 : 1); i
++)
1463 var
= predcom_tmp_var (ref
, i
, tmp_vars
);
1464 chain
->vars
.quick_push (var
);
1467 chain
->vars
.quick_push (chain
->vars
[0]);
1469 FOR_EACH_VEC_ELT (chain
->vars
, i
, var
)
1470 chain
->vars
[i
] = make_ssa_name (var
);
1472 for (i
= 0; i
< n
; i
++)
1474 var
= chain
->vars
[i
];
1475 next
= chain
->vars
[i
+ 1];
1476 init
= get_init_expr (chain
, i
);
1478 init
= force_gimple_operand (init
, &stmts
, true, NULL_TREE
);
1480 gsi_insert_seq_on_edge_immediate (entry
, stmts
);
1482 phi
= create_phi_node (var
, loop
->header
);
1483 add_phi_arg (phi
, init
, entry
, UNKNOWN_LOCATION
);
1484 add_phi_arg (phi
, next
, latch
, UNKNOWN_LOCATION
);
1488 /* Create the variables and initialization statement for root of chain
1489 CHAIN. Uids of the newly created temporary variables are marked
1493 initialize_root (struct loop
*loop
, chain_p chain
, bitmap tmp_vars
)
1495 dref root
= get_chain_root (chain
);
1496 bool in_lhs
= (chain
->type
== CT_STORE_LOAD
1497 || chain
->type
== CT_COMBINATION
);
1499 initialize_root_vars (loop
, chain
, tmp_vars
);
1500 replace_ref_with (root
->stmt
,
1501 chain
->vars
[chain
->length
],
1505 /* Initializes a variable for load motion for ROOT and prepares phi nodes and
1506 initialization on entry to LOOP if necessary. The ssa name for the variable
1507 is stored in VARS. If WRITTEN is true, also a phi node to copy its value
1508 around the loop is created. Uid of the newly created temporary variable
1509 is marked in TMP_VARS. INITS is the list containing the (single)
1513 initialize_root_vars_lm (struct loop
*loop
, dref root
, bool written
,
1514 vec
<tree
> *vars
, vec
<tree
> inits
,
1518 tree ref
= DR_REF (root
->ref
), init
, var
, next
;
1521 edge entry
= loop_preheader_edge (loop
), latch
= loop_latch_edge (loop
);
1523 /* Find the initializer for the variable, and check that it cannot
1527 vars
->create (written
? 2 : 1);
1528 var
= predcom_tmp_var (ref
, 0, tmp_vars
);
1529 vars
->quick_push (var
);
1531 vars
->quick_push ((*vars
)[0]);
1533 FOR_EACH_VEC_ELT (*vars
, i
, var
)
1534 (*vars
)[i
] = make_ssa_name (var
);
1538 init
= force_gimple_operand (init
, &stmts
, written
, NULL_TREE
);
1540 gsi_insert_seq_on_edge_immediate (entry
, stmts
);
1545 phi
= create_phi_node (var
, loop
->header
);
1546 add_phi_arg (phi
, init
, entry
, UNKNOWN_LOCATION
);
1547 add_phi_arg (phi
, next
, latch
, UNKNOWN_LOCATION
);
1551 gassign
*init_stmt
= gimple_build_assign (var
, init
);
1552 gsi_insert_on_edge_immediate (entry
, init_stmt
);
1557 /* Execute load motion for references in chain CHAIN. Uids of the newly
1558 created temporary variables are marked in TMP_VARS. */
1561 execute_load_motion (struct loop
*loop
, chain_p chain
, bitmap tmp_vars
)
1563 auto_vec
<tree
> vars
;
1565 unsigned n_writes
= 0, ridx
, i
;
1568 gcc_assert (chain
->type
== CT_INVARIANT
);
1569 gcc_assert (!chain
->combined
);
1570 FOR_EACH_VEC_ELT (chain
->refs
, i
, a
)
1571 if (DR_IS_WRITE (a
->ref
))
1574 /* If there are no reads in the loop, there is nothing to do. */
1575 if (n_writes
== chain
->refs
.length ())
1578 initialize_root_vars_lm (loop
, get_chain_root (chain
), n_writes
> 0,
1579 &vars
, chain
->inits
, tmp_vars
);
1582 FOR_EACH_VEC_ELT (chain
->refs
, i
, a
)
1584 bool is_read
= DR_IS_READ (a
->ref
);
1586 if (DR_IS_WRITE (a
->ref
))
1592 var
= make_ssa_name (SSA_NAME_VAR (var
));
1599 replace_ref_with (a
->stmt
, vars
[ridx
],
1600 !is_read
, !is_read
);
1604 /* Returns the single statement in that NAME is used, excepting
1605 the looparound phi nodes contained in one of the chains. If there is no
1606 such statement, or more statements, NULL is returned. */
1609 single_nonlooparound_use (tree name
)
1612 imm_use_iterator it
;
1613 gimple
*stmt
, *ret
= NULL
;
1615 FOR_EACH_IMM_USE_FAST (use
, it
, name
)
1617 stmt
= USE_STMT (use
);
1619 if (gimple_code (stmt
) == GIMPLE_PHI
)
1621 /* Ignore uses in looparound phi nodes. Uses in other phi nodes
1622 could not be processed anyway, so just fail for them. */
1623 if (bitmap_bit_p (looparound_phis
,
1624 SSA_NAME_VERSION (PHI_RESULT (stmt
))))
1629 else if (is_gimple_debug (stmt
))
1631 else if (ret
!= NULL
)
1640 /* Remove statement STMT, as well as the chain of assignments in that it is
1644 remove_stmt (gimple
*stmt
)
1648 gimple_stmt_iterator psi
;
1650 if (gimple_code (stmt
) == GIMPLE_PHI
)
1652 name
= PHI_RESULT (stmt
);
1653 next
= single_nonlooparound_use (name
);
1654 reset_debug_uses (stmt
);
1655 psi
= gsi_for_stmt (stmt
);
1656 remove_phi_node (&psi
, true);
1659 || !gimple_assign_ssa_name_copy_p (next
)
1660 || gimple_assign_rhs1 (next
) != name
)
1668 gimple_stmt_iterator bsi
;
1670 bsi
= gsi_for_stmt (stmt
);
1672 name
= gimple_assign_lhs (stmt
);
1673 gcc_assert (TREE_CODE (name
) == SSA_NAME
);
1675 next
= single_nonlooparound_use (name
);
1676 reset_debug_uses (stmt
);
1678 unlink_stmt_vdef (stmt
);
1679 gsi_remove (&bsi
, true);
1680 release_defs (stmt
);
1683 || !gimple_assign_ssa_name_copy_p (next
)
1684 || gimple_assign_rhs1 (next
) != name
)
1691 /* Perform the predictive commoning optimization for a chain CHAIN.
1692 Uids of the newly created temporary variables are marked in TMP_VARS.*/
1695 execute_pred_commoning_chain (struct loop
*loop
, chain_p chain
,
1702 if (chain
->combined
)
1704 /* For combined chains, just remove the statements that are used to
1705 compute the values of the expression (except for the root one).
1706 We delay this until after all chains are processed. */
1710 /* For non-combined chains, set up the variables that hold its value,
1711 and replace the uses of the original references by these
1713 initialize_root (loop
, chain
, tmp_vars
);
1714 for (i
= 1; chain
->refs
.iterate (i
, &a
); i
++)
1716 var
= chain
->vars
[chain
->length
- a
->distance
];
1717 replace_ref_with (a
->stmt
, var
, false, false);
1722 /* Determines the unroll factor necessary to remove as many temporary variable
1723 copies as possible. CHAINS is the list of chains that will be
1727 determine_unroll_factor (vec
<chain_p
> chains
)
1730 unsigned factor
= 1, af
, nfactor
, i
;
1731 unsigned max
= PARAM_VALUE (PARAM_MAX_UNROLL_TIMES
);
1733 FOR_EACH_VEC_ELT (chains
, i
, chain
)
1735 if (chain
->type
== CT_INVARIANT
)
1738 if (chain
->combined
)
1740 /* For combined chains, we can't handle unrolling if we replace
1744 for (j
= 1; chain
->refs
.iterate (j
, &a
); j
++)
1745 if (gimple_code (a
->stmt
) == GIMPLE_PHI
)
1750 /* The best unroll factor for this chain is equal to the number of
1751 temporary variables that we create for it. */
1753 if (chain
->has_max_use_after
)
1756 nfactor
= factor
* af
/ gcd (factor
, af
);
1764 /* Perform the predictive commoning optimization for CHAINS.
1765 Uids of the newly created temporary variables are marked in TMP_VARS. */
1768 execute_pred_commoning (struct loop
*loop
, vec
<chain_p
> chains
,
1774 FOR_EACH_VEC_ELT (chains
, i
, chain
)
1776 if (chain
->type
== CT_INVARIANT
)
1777 execute_load_motion (loop
, chain
, tmp_vars
);
1779 execute_pred_commoning_chain (loop
, chain
, tmp_vars
);
1782 FOR_EACH_VEC_ELT (chains
, i
, chain
)
1784 if (chain
->type
== CT_INVARIANT
)
1786 else if (chain
->combined
)
1788 /* For combined chains, just remove the statements that are used to
1789 compute the values of the expression (except for the root one). */
1792 for (j
= 1; chain
->refs
.iterate (j
, &a
); j
++)
1793 remove_stmt (a
->stmt
);
1797 update_ssa (TODO_update_ssa_only_virtuals
);
1800 /* For each reference in CHAINS, if its defining statement is
1801 phi node, record the ssa name that is defined by it. */
1804 replace_phis_by_defined_names (vec
<chain_p
> chains
)
1810 FOR_EACH_VEC_ELT (chains
, i
, chain
)
1811 FOR_EACH_VEC_ELT (chain
->refs
, j
, a
)
1813 if (gimple_code (a
->stmt
) == GIMPLE_PHI
)
1815 a
->name_defined_by_phi
= PHI_RESULT (a
->stmt
);
1821 /* For each reference in CHAINS, if name_defined_by_phi is not
1822 NULL, use it to set the stmt field. */
1825 replace_names_by_phis (vec
<chain_p
> chains
)
1831 FOR_EACH_VEC_ELT (chains
, i
, chain
)
1832 FOR_EACH_VEC_ELT (chain
->refs
, j
, a
)
1833 if (a
->stmt
== NULL
)
1835 a
->stmt
= SSA_NAME_DEF_STMT (a
->name_defined_by_phi
);
1836 gcc_assert (gimple_code (a
->stmt
) == GIMPLE_PHI
);
1837 a
->name_defined_by_phi
= NULL_TREE
;
1841 /* Wrapper over execute_pred_commoning, to pass it as a callback
1842 to tree_transform_and_unroll_loop. */
1846 vec
<chain_p
> chains
;
1851 execute_pred_commoning_cbck (struct loop
*loop
, void *data
)
1853 struct epcc_data
*const dta
= (struct epcc_data
*) data
;
1855 /* Restore phi nodes that were replaced by ssa names before
1856 tree_transform_and_unroll_loop (see detailed description in
1857 tree_predictive_commoning_loop). */
1858 replace_names_by_phis (dta
->chains
);
1859 execute_pred_commoning (loop
, dta
->chains
, dta
->tmp_vars
);
1862 /* Base NAME and all the names in the chain of phi nodes that use it
1863 on variable VAR. The phi nodes are recognized by being in the copies of
1864 the header of the LOOP. */
1867 base_names_in_chain_on (struct loop
*loop
, tree name
, tree var
)
1870 imm_use_iterator iter
;
1872 replace_ssa_name_symbol (name
, var
);
1877 FOR_EACH_IMM_USE_STMT (stmt
, iter
, name
)
1879 if (gimple_code (stmt
) == GIMPLE_PHI
1880 && flow_bb_inside_loop_p (loop
, gimple_bb (stmt
)))
1883 BREAK_FROM_IMM_USE_STMT (iter
);
1889 name
= PHI_RESULT (phi
);
1890 replace_ssa_name_symbol (name
, var
);
1894 /* Given an unrolled LOOP after predictive commoning, remove the
1895 register copies arising from phi nodes by changing the base
1896 variables of SSA names. TMP_VARS is the set of the temporary variables
1897 for those we want to perform this. */
1900 eliminate_temp_copies (struct loop
*loop
, bitmap tmp_vars
)
1905 tree name
, use
, var
;
1908 e
= loop_latch_edge (loop
);
1909 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
1912 name
= PHI_RESULT (phi
);
1913 var
= SSA_NAME_VAR (name
);
1914 if (!var
|| !bitmap_bit_p (tmp_vars
, DECL_UID (var
)))
1916 use
= PHI_ARG_DEF_FROM_EDGE (phi
, e
);
1917 gcc_assert (TREE_CODE (use
) == SSA_NAME
);
1919 /* Base all the ssa names in the ud and du chain of NAME on VAR. */
1920 stmt
= SSA_NAME_DEF_STMT (use
);
1921 while (gimple_code (stmt
) == GIMPLE_PHI
1922 /* In case we could not unroll the loop enough to eliminate
1923 all copies, we may reach the loop header before the defining
1924 statement (in that case, some register copies will be present
1925 in loop latch in the final code, corresponding to the newly
1926 created looparound phi nodes). */
1927 && gimple_bb (stmt
) != loop
->header
)
1929 gcc_assert (single_pred_p (gimple_bb (stmt
)));
1930 use
= PHI_ARG_DEF (stmt
, 0);
1931 stmt
= SSA_NAME_DEF_STMT (use
);
1934 base_names_in_chain_on (loop
, use
, var
);
1938 /* Returns true if CHAIN is suitable to be combined. */
1941 chain_can_be_combined_p (chain_p chain
)
1943 return (!chain
->combined
1944 && (chain
->type
== CT_LOAD
|| chain
->type
== CT_COMBINATION
));
1947 /* Returns the modify statement that uses NAME. Skips over assignment
1948 statements, NAME is replaced with the actual name used in the returned
1952 find_use_stmt (tree
*name
)
1957 /* Skip over assignments. */
1960 stmt
= single_nonlooparound_use (*name
);
1964 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
1967 lhs
= gimple_assign_lhs (stmt
);
1968 if (TREE_CODE (lhs
) != SSA_NAME
)
1971 if (gimple_assign_copy_p (stmt
))
1973 rhs
= gimple_assign_rhs1 (stmt
);
1979 else if (get_gimple_rhs_class (gimple_assign_rhs_code (stmt
))
1980 == GIMPLE_BINARY_RHS
)
1987 /* Returns true if we may perform reassociation for operation CODE in TYPE. */
1990 may_reassociate_p (tree type
, enum tree_code code
)
1992 if (FLOAT_TYPE_P (type
)
1993 && !flag_unsafe_math_optimizations
)
1996 return (commutative_tree_code (code
)
1997 && associative_tree_code (code
));
2000 /* If the operation used in STMT is associative and commutative, go through the
2001 tree of the same operations and returns its root. Distance to the root
2002 is stored in DISTANCE. */
2005 find_associative_operation_root (gimple
*stmt
, unsigned *distance
)
2009 enum tree_code code
= gimple_assign_rhs_code (stmt
);
2010 tree type
= TREE_TYPE (gimple_assign_lhs (stmt
));
2013 if (!may_reassociate_p (type
, code
))
2018 lhs
= gimple_assign_lhs (stmt
);
2019 gcc_assert (TREE_CODE (lhs
) == SSA_NAME
);
2021 next
= find_use_stmt (&lhs
);
2023 || gimple_assign_rhs_code (next
) != code
)
2035 /* Returns the common statement in that NAME1 and NAME2 have a use. If there
2036 is no such statement, returns NULL_TREE. In case the operation used on
2037 NAME1 and NAME2 is associative and commutative, returns the root of the
2038 tree formed by this operation instead of the statement that uses NAME1 or
2042 find_common_use_stmt (tree
*name1
, tree
*name2
)
2044 gimple
*stmt1
, *stmt2
;
2046 stmt1
= find_use_stmt (name1
);
2050 stmt2
= find_use_stmt (name2
);
2057 stmt1
= find_associative_operation_root (stmt1
, NULL
);
2060 stmt2
= find_associative_operation_root (stmt2
, NULL
);
2064 return (stmt1
== stmt2
? stmt1
: NULL
);
2067 /* Checks whether R1 and R2 are combined together using CODE, with the result
2068 in RSLT_TYPE, in order R1 CODE R2 if SWAP is false and in order R2 CODE R1
2069 if it is true. If CODE is ERROR_MARK, set these values instead. */
2072 combinable_refs_p (dref r1
, dref r2
,
2073 enum tree_code
*code
, bool *swap
, tree
*rslt_type
)
2075 enum tree_code acode
;
2081 name1
= name_for_ref (r1
);
2082 name2
= name_for_ref (r2
);
2083 gcc_assert (name1
!= NULL_TREE
&& name2
!= NULL_TREE
);
2085 stmt
= find_common_use_stmt (&name1
, &name2
);
2088 /* A simple post-dominance check - make sure the combination
2089 is executed under the same condition as the references. */
2090 || (gimple_bb (stmt
) != gimple_bb (r1
->stmt
)
2091 && gimple_bb (stmt
) != gimple_bb (r2
->stmt
)))
2094 acode
= gimple_assign_rhs_code (stmt
);
2095 aswap
= (!commutative_tree_code (acode
)
2096 && gimple_assign_rhs1 (stmt
) != name1
);
2097 atype
= TREE_TYPE (gimple_assign_lhs (stmt
));
2099 if (*code
== ERROR_MARK
)
2107 return (*code
== acode
2109 && *rslt_type
== atype
);
2112 /* Remove OP from the operation on rhs of STMT, and replace STMT with
2113 an assignment of the remaining operand. */
2116 remove_name_from_operation (gimple
*stmt
, tree op
)
2119 gimple_stmt_iterator si
;
2121 gcc_assert (is_gimple_assign (stmt
));
2123 if (gimple_assign_rhs1 (stmt
) == op
)
2124 other_op
= gimple_assign_rhs2 (stmt
);
2126 other_op
= gimple_assign_rhs1 (stmt
);
2128 si
= gsi_for_stmt (stmt
);
2129 gimple_assign_set_rhs_from_tree (&si
, other_op
);
2131 /* We should not have reallocated STMT. */
2132 gcc_assert (gsi_stmt (si
) == stmt
);
2137 /* Reassociates the expression in that NAME1 and NAME2 are used so that they
2138 are combined in a single statement, and returns this statement. */
2141 reassociate_to_the_same_stmt (tree name1
, tree name2
)
2143 gimple
*stmt1
, *stmt2
, *root1
, *root2
, *s1
, *s2
;
2144 gassign
*new_stmt
, *tmp_stmt
;
2145 tree new_name
, tmp_name
, var
, r1
, r2
;
2146 unsigned dist1
, dist2
;
2147 enum tree_code code
;
2148 tree type
= TREE_TYPE (name1
);
2149 gimple_stmt_iterator bsi
;
2151 stmt1
= find_use_stmt (&name1
);
2152 stmt2
= find_use_stmt (&name2
);
2153 root1
= find_associative_operation_root (stmt1
, &dist1
);
2154 root2
= find_associative_operation_root (stmt2
, &dist2
);
2155 code
= gimple_assign_rhs_code (stmt1
);
2157 gcc_assert (root1
&& root2
&& root1
== root2
2158 && code
== gimple_assign_rhs_code (stmt2
));
2160 /* Find the root of the nearest expression in that both NAME1 and NAME2
2167 while (dist1
> dist2
)
2169 s1
= find_use_stmt (&r1
);
2170 r1
= gimple_assign_lhs (s1
);
2173 while (dist2
> dist1
)
2175 s2
= find_use_stmt (&r2
);
2176 r2
= gimple_assign_lhs (s2
);
2182 s1
= find_use_stmt (&r1
);
2183 r1
= gimple_assign_lhs (s1
);
2184 s2
= find_use_stmt (&r2
);
2185 r2
= gimple_assign_lhs (s2
);
2188 /* Remove NAME1 and NAME2 from the statements in that they are used
2190 remove_name_from_operation (stmt1
, name1
);
2191 remove_name_from_operation (stmt2
, name2
);
2193 /* Insert the new statement combining NAME1 and NAME2 before S1, and
2194 combine it with the rhs of S1. */
2195 var
= create_tmp_reg (type
, "predreastmp");
2196 new_name
= make_ssa_name (var
);
2197 new_stmt
= gimple_build_assign (new_name
, code
, name1
, name2
);
2199 var
= create_tmp_reg (type
, "predreastmp");
2200 tmp_name
= make_ssa_name (var
);
2202 /* Rhs of S1 may now be either a binary expression with operation
2203 CODE, or gimple_val (in case that stmt1 == s1 or stmt2 == s1,
2204 so that name1 or name2 was removed from it). */
2205 tmp_stmt
= gimple_build_assign (tmp_name
, gimple_assign_rhs_code (s1
),
2206 gimple_assign_rhs1 (s1
),
2207 gimple_assign_rhs2 (s1
));
2209 bsi
= gsi_for_stmt (s1
);
2210 gimple_assign_set_rhs_with_ops (&bsi
, code
, new_name
, tmp_name
);
2211 s1
= gsi_stmt (bsi
);
2214 gsi_insert_before (&bsi
, new_stmt
, GSI_SAME_STMT
);
2215 gsi_insert_before (&bsi
, tmp_stmt
, GSI_SAME_STMT
);
2220 /* Returns the statement that combines references R1 and R2. In case R1
2221 and R2 are not used in the same statement, but they are used with an
2222 associative and commutative operation in the same expression, reassociate
2223 the expression so that they are used in the same statement. */
2226 stmt_combining_refs (dref r1
, dref r2
)
2228 gimple
*stmt1
, *stmt2
;
2229 tree name1
= name_for_ref (r1
);
2230 tree name2
= name_for_ref (r2
);
2232 stmt1
= find_use_stmt (&name1
);
2233 stmt2
= find_use_stmt (&name2
);
2237 return reassociate_to_the_same_stmt (name1
, name2
);
2240 /* Tries to combine chains CH1 and CH2 together. If this succeeds, the
2241 description of the new chain is returned, otherwise we return NULL. */
2244 combine_chains (chain_p ch1
, chain_p ch2
)
2247 enum tree_code op
= ERROR_MARK
;
2252 tree rslt_type
= NULL_TREE
;
2256 if (ch1
->length
!= ch2
->length
)
2259 if (ch1
->refs
.length () != ch2
->refs
.length ())
2262 for (i
= 0; (ch1
->refs
.iterate (i
, &r1
)
2263 && ch2
->refs
.iterate (i
, &r2
)); i
++)
2265 if (r1
->distance
!= r2
->distance
)
2268 if (!combinable_refs_p (r1
, r2
, &op
, &swap
, &rslt_type
))
2273 std::swap (ch1
, ch2
);
2275 new_chain
= XCNEW (struct chain
);
2276 new_chain
->type
= CT_COMBINATION
;
2278 new_chain
->ch1
= ch1
;
2279 new_chain
->ch2
= ch2
;
2280 new_chain
->rslt_type
= rslt_type
;
2281 new_chain
->length
= ch1
->length
;
2283 for (i
= 0; (ch1
->refs
.iterate (i
, &r1
)
2284 && ch2
->refs
.iterate (i
, &r2
)); i
++)
2286 nw
= XCNEW (struct dref_d
);
2287 nw
->stmt
= stmt_combining_refs (r1
, r2
);
2288 nw
->distance
= r1
->distance
;
2290 new_chain
->refs
.safe_push (nw
);
2293 new_chain
->has_max_use_after
= false;
2294 root_stmt
= get_chain_root (new_chain
)->stmt
;
2295 for (i
= 1; new_chain
->refs
.iterate (i
, &nw
); i
++)
2297 if (nw
->distance
== new_chain
->length
2298 && !stmt_dominates_stmt_p (nw
->stmt
, root_stmt
))
2300 new_chain
->has_max_use_after
= true;
2305 ch1
->combined
= true;
2306 ch2
->combined
= true;
2310 /* Try to combine the CHAINS. */
2313 try_combine_chains (vec
<chain_p
> *chains
)
2316 chain_p ch1
, ch2
, cch
;
2317 auto_vec
<chain_p
> worklist
;
2319 FOR_EACH_VEC_ELT (*chains
, i
, ch1
)
2320 if (chain_can_be_combined_p (ch1
))
2321 worklist
.safe_push (ch1
);
2323 while (!worklist
.is_empty ())
2325 ch1
= worklist
.pop ();
2326 if (!chain_can_be_combined_p (ch1
))
2329 FOR_EACH_VEC_ELT (*chains
, j
, ch2
)
2331 if (!chain_can_be_combined_p (ch2
))
2334 cch
= combine_chains (ch1
, ch2
);
2337 worklist
.safe_push (cch
);
2338 chains
->safe_push (cch
);
2345 /* Prepare initializers for CHAIN in LOOP. Returns false if this is
2346 impossible because one of these initializers may trap, true otherwise. */
2349 prepare_initializers_chain (struct loop
*loop
, chain_p chain
)
2351 unsigned i
, n
= (chain
->type
== CT_INVARIANT
) ? 1 : chain
->length
;
2352 struct data_reference
*dr
= get_chain_root (chain
)->ref
;
2355 edge entry
= loop_preheader_edge (loop
);
2357 /* Find the initializers for the variables, and check that they cannot
2359 chain
->inits
.create (n
);
2360 for (i
= 0; i
< n
; i
++)
2361 chain
->inits
.quick_push (NULL_TREE
);
2363 /* If we have replaced some looparound phi nodes, use their initializers
2364 instead of creating our own. */
2365 FOR_EACH_VEC_ELT (chain
->refs
, i
, laref
)
2367 if (gimple_code (laref
->stmt
) != GIMPLE_PHI
)
2370 gcc_assert (laref
->distance
> 0);
2371 chain
->inits
[n
- laref
->distance
]
2372 = PHI_ARG_DEF_FROM_EDGE (laref
->stmt
, entry
);
2375 for (i
= 0; i
< n
; i
++)
2377 gimple_seq stmts
= NULL
;
2379 if (chain
->inits
[i
] != NULL_TREE
)
2382 init
= ref_at_iteration (dr
, (int) i
- n
, &stmts
);
2383 if (!chain
->all_always_accessed
&& tree_could_trap_p (init
))
2385 gimple_seq_discard (stmts
);
2390 gsi_insert_seq_on_edge_immediate (entry
, stmts
);
2392 chain
->inits
[i
] = init
;
2398 /* Prepare initializers for CHAINS in LOOP, and free chains that cannot
2399 be used because the initializers might trap. */
2402 prepare_initializers (struct loop
*loop
, vec
<chain_p
> chains
)
2407 for (i
= 0; i
< chains
.length (); )
2410 if (prepare_initializers_chain (loop
, chain
))
2414 release_chain (chain
);
2415 chains
.unordered_remove (i
);
2420 /* Performs predictive commoning for LOOP. Returns true if LOOP was
2424 tree_predictive_commoning_loop (struct loop
*loop
)
2426 vec
<data_reference_p
> datarefs
;
2427 vec
<ddr_p
> dependences
;
2428 struct component
*components
;
2429 vec
<chain_p
> chains
= vNULL
;
2430 unsigned unroll_factor
;
2431 struct tree_niter_desc desc
;
2432 bool unroll
= false;
2436 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2437 fprintf (dump_file
, "Processing loop %d\n", loop
->num
);
2439 /* Find the data references and split them into components according to their
2440 dependence relations. */
2441 auto_vec
<loop_p
, 3> loop_nest
;
2442 dependences
.create (10);
2443 datarefs
.create (10);
2444 if (! compute_data_dependences_for_loop (loop
, true, &loop_nest
, &datarefs
,
2447 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2448 fprintf (dump_file
, "Cannot analyze data dependencies\n");
2449 free_data_refs (datarefs
);
2450 free_dependence_relations (dependences
);
2454 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2455 dump_data_dependence_relations (dump_file
, dependences
);
2457 components
= split_data_refs_to_components (loop
, datarefs
, dependences
);
2458 loop_nest
.release ();
2459 free_dependence_relations (dependences
);
2462 free_data_refs (datarefs
);
2463 free_affine_expand_cache (&name_expansions
);
2467 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2469 fprintf (dump_file
, "Initial state:\n\n");
2470 dump_components (dump_file
, components
);
2473 /* Find the suitable components and split them into chains. */
2474 components
= filter_suitable_components (loop
, components
);
2476 tmp_vars
= BITMAP_ALLOC (NULL
);
2477 looparound_phis
= BITMAP_ALLOC (NULL
);
2478 determine_roots (loop
, components
, &chains
);
2479 release_components (components
);
2481 if (!chains
.exists ())
2483 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2485 "Predictive commoning failed: no suitable chains\n");
2488 prepare_initializers (loop
, chains
);
2490 /* Try to combine the chains that are always worked with together. */
2491 try_combine_chains (&chains
);
2493 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2495 fprintf (dump_file
, "Before commoning:\n\n");
2496 dump_chains (dump_file
, chains
);
2499 /* Determine the unroll factor, and if the loop should be unrolled, ensure
2500 that its number of iterations is divisible by the factor. */
2501 unroll_factor
= determine_unroll_factor (chains
);
2503 unroll
= (unroll_factor
> 1
2504 && can_unroll_loop_p (loop
, unroll_factor
, &desc
));
2505 exit
= single_dom_exit (loop
);
2507 /* Execute the predictive commoning transformations, and possibly unroll the
2511 struct epcc_data dta
;
2513 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2514 fprintf (dump_file
, "Unrolling %u times.\n", unroll_factor
);
2516 dta
.chains
= chains
;
2517 dta
.tmp_vars
= tmp_vars
;
2519 update_ssa (TODO_update_ssa_only_virtuals
);
2521 /* Cfg manipulations performed in tree_transform_and_unroll_loop before
2522 execute_pred_commoning_cbck is called may cause phi nodes to be
2523 reallocated, which is a problem since CHAINS may point to these
2524 statements. To fix this, we store the ssa names defined by the
2525 phi nodes here instead of the phi nodes themselves, and restore
2526 the phi nodes in execute_pred_commoning_cbck. A bit hacky. */
2527 replace_phis_by_defined_names (chains
);
2529 tree_transform_and_unroll_loop (loop
, unroll_factor
, exit
, &desc
,
2530 execute_pred_commoning_cbck
, &dta
);
2531 eliminate_temp_copies (loop
, tmp_vars
);
2535 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2537 "Executing predictive commoning without unrolling.\n");
2538 execute_pred_commoning (loop
, chains
, tmp_vars
);
2542 release_chains (chains
);
2543 free_data_refs (datarefs
);
2544 BITMAP_FREE (tmp_vars
);
2545 BITMAP_FREE (looparound_phis
);
2547 free_affine_expand_cache (&name_expansions
);
2552 /* Runs predictive commoning. */
2555 tree_predictive_commoning (void)
2557 bool unrolled
= false;
2561 initialize_original_copy_tables ();
2562 FOR_EACH_LOOP (loop
, LI_ONLY_INNERMOST
)
2563 if (optimize_loop_for_speed_p (loop
))
2565 unrolled
|= tree_predictive_commoning_loop (loop
);
2571 ret
= TODO_cleanup_cfg
;
2573 free_original_copy_tables ();
2578 /* Predictive commoning Pass. */
2581 run_tree_predictive_commoning (struct function
*fun
)
2583 if (number_of_loops (fun
) <= 1)
2586 return tree_predictive_commoning ();
2591 const pass_data pass_data_predcom
=
2593 GIMPLE_PASS
, /* type */
2595 OPTGROUP_LOOP
, /* optinfo_flags */
2596 TV_PREDCOM
, /* tv_id */
2597 PROP_cfg
, /* properties_required */
2598 0, /* properties_provided */
2599 0, /* properties_destroyed */
2600 0, /* todo_flags_start */
2601 TODO_update_ssa_only_virtuals
, /* todo_flags_finish */
2604 class pass_predcom
: public gimple_opt_pass
2607 pass_predcom (gcc::context
*ctxt
)
2608 : gimple_opt_pass (pass_data_predcom
, ctxt
)
2611 /* opt_pass methods: */
2612 virtual bool gate (function
*) { return flag_predictive_commoning
!= 0; }
2613 virtual unsigned int execute (function
*fun
)
2615 return run_tree_predictive_commoning (fun
);
2618 }; // class pass_predcom
2623 make_pass_predcom (gcc::context
*ctxt
)
2625 return new pass_predcom (ctxt
);