1 /* Predictive commoning.
2 Copyright (C) 2005-2013 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This file implements the predictive commoning optimization. Predictive
21 commoning can be viewed as CSE around a loop, and with some improvements,
22 as generalized strength reduction-- i.e., reusing values computed in
23 earlier iterations of a loop in the later ones. So far, the pass only
24 handles the most useful case, that is, reusing values of memory references.
25 If you think this is all just a special case of PRE, you are sort of right;
26 however, concentrating on loops is simpler, and makes it possible to
27 incorporate data dependence analysis to detect the opportunities, perform
28 loop unrolling to avoid copies together with renaming immediately,
29 and if needed, we could also take register pressure into account.
31 Let us demonstrate what is done on an example:
33 for (i = 0; i < 100; i++)
35 a[i+2] = a[i] + a[i+1];
41 1) We find data references in the loop, and split them to mutually
42 independent groups (i.e., we find components of a data dependence
43 graph). We ignore read-read dependences whose distance is not constant.
44 (TODO -- we could also ignore antidependences). In this example, we
45 find the following groups:
47 a[i]{read}, a[i+1]{read}, a[i+2]{write}
48 b[10]{read}, b[10]{write}
49 c[99 - i]{read}, c[i]{write}
50 d[i + 1]{read}, d[i]{write}
52 2) Inside each of the group, we verify several conditions:
53 a) all the references must differ in indices only, and the indices
54 must all have the same step
55 b) the references must dominate loop latch (and thus, they must be
56 ordered by dominance relation).
57 c) the distance of the indices must be a small multiple of the step
58 We are then able to compute the difference of the references (# of
59 iterations before they point to the same place as the first of them).
60 Also, in case there are writes in the loop, we split the groups into
61 chains whose head is the write whose values are used by the reads in
62 the same chain. The chains are then processed independently,
63 making the further transformations simpler. Also, the shorter chains
64 need the same number of registers, but may require lower unrolling
65 factor in order to get rid of the copies on the loop latch.
67 In our example, we get the following chains (the chain for c is invalid).
69 a[i]{read,+0}, a[i+1]{read,-1}, a[i+2]{write,-2}
70 b[10]{read,+0}, b[10]{write,+0}
71 d[i + 1]{read,+0}, d[i]{write,+1}
73 3) For each read, we determine the read or write whose value it reuses,
74 together with the distance of this reuse. I.e. we take the last
75 reference before it with distance 0, or the last of the references
76 with the smallest positive distance to the read. Then, we remove
77 the references that are not used in any of these chains, discard the
78 empty groups, and propagate all the links so that they point to the
79 single root reference of the chain (adjusting their distance
80 appropriately). Some extra care needs to be taken for references with
81 step 0. In our example (the numbers indicate the distance of the
84 a[i] --> (*) 2, a[i+1] --> (*) 1, a[i+2] (*)
85 b[10] --> (*) 1, b[10] (*)
87 4) The chains are combined together if possible. If the corresponding
88 elements of two chains are always combined together with the same
89 operator, we remember just the result of this combination, instead
90 of remembering the values separately. We may need to perform
91 reassociation to enable combining, for example
93 e[i] + f[i+1] + e[i+1] + f[i]
95 can be reassociated as
97 (e[i] + f[i]) + (e[i+1] + f[i+1])
99 and we can combine the chains for e and f into one chain.
101 5) For each root reference (end of the chain) R, let N be maximum distance
102 of a reference reusing its value. Variables R0 up to RN are created,
103 together with phi nodes that transfer values from R1 .. RN to
105 Initial values are loaded to R0..R(N-1) (in case not all references
106 must necessarily be accessed and they may trap, we may fail here;
107 TODO sometimes, the loads could be guarded by a check for the number
108 of iterations). Values loaded/stored in roots are also copied to
109 RN. Other reads are replaced with the appropriate variable Ri.
110 Everything is put to SSA form.
112 As a small improvement, if R0 is dead after the root (i.e., all uses of
113 the value with the maximum distance dominate the root), we can avoid
114 creating RN and use R0 instead of it.
116 In our example, we get (only the parts concerning a and b are shown):
117 for (i = 0; i < 100; i++)
129 6) Factor F for unrolling is determined as the smallest common multiple of
130 (N + 1) for each root reference (N for references for that we avoided
131 creating RN). If F and the loop is small enough, loop is unrolled F
132 times. The stores to RN (R0) in the copies of the loop body are
133 periodically replaced with R0, R1, ... (R1, R2, ...), so that they can
134 be coalesced and the copies can be eliminated.
136 TODO -- copy propagation and other optimizations may change the live
137 ranges of the temporary registers and prevent them from being coalesced;
138 this may increase the register pressure.
140 In our case, F = 2 and the (main loop of the) result is
142 for (i = 0; i < ...; i += 2)
159 TODO -- stores killing other stores can be taken into account, e.g.,
160 for (i = 0; i < n; i++)
170 for (i = 0; i < n; i++)
180 The interesting part is that this would generalize store motion; still, since
181 sm is performed elsewhere, it does not seem that important.
183 Predictive commoning can be generalized for arbitrary computations (not
184 just memory loads), and also nontrivial transfer functions (e.g., replacing
185 i * i with ii_last + 2 * i + 1), to generalize strength reduction. */
189 #include "coretypes.h"
194 #include "gimplify.h"
195 #include "gimple-iterator.h"
196 #include "gimple-ssa.h"
197 #include "tree-phinodes.h"
198 #include "ssa-iterators.h"
199 #include "tree-ssanames.h"
200 #include "tree-ssa-loop-ivopts.h"
201 #include "tree-ssa-loop-manip.h"
202 #include "tree-ssa-loop-niter.h"
203 #include "tree-ssa-loop.h"
204 #include "tree-into-ssa.h"
205 #include "tree-dfa.h"
206 #include "tree-ssa.h"
208 #include "tree-data-ref.h"
209 #include "tree-scalar-evolution.h"
210 #include "tree-chrec.h"
212 #include "gimple-pretty-print.h"
213 #include "tree-pass.h"
214 #include "tree-affine.h"
215 #include "tree-inline.h"
217 /* The maximum number of iterations between the considered memory
220 #define MAX_DISTANCE (target_avail_regs < 16 ? 4 : 8)
222 /* Data references (or phi nodes that carry data reference values across
225 typedef struct dref_d
227 /* The reference itself. */
228 struct data_reference
*ref
;
230 /* The statement in that the reference appears. */
233 /* In case that STMT is a phi node, this field is set to the SSA name
234 defined by it in replace_phis_by_defined_names (in order to avoid
235 pointing to phi node that got reallocated in the meantime). */
236 tree name_defined_by_phi
;
238 /* Distance of the reference from the root of the chain (in number of
239 iterations of the loop). */
242 /* Number of iterations offset from the first reference in the component. */
245 /* Number of the reference in a component, in dominance ordering. */
248 /* True if the memory reference is always accessed when the loop is
250 unsigned always_accessed
: 1;
254 /* Type of the chain of the references. */
258 /* The addresses of the references in the chain are constant. */
261 /* There are only loads in the chain. */
264 /* Root of the chain is store, the rest are loads. */
267 /* A combination of two chains. */
271 /* Chains of data references. */
275 /* Type of the chain. */
276 enum chain_type type
;
278 /* For combination chains, the operator and the two chains that are
279 combined, and the type of the result. */
282 struct chain
*ch1
, *ch2
;
284 /* The references in the chain. */
287 /* The maximum distance of the reference in the chain from the root. */
290 /* The variables used to copy the value throughout iterations. */
293 /* Initializers for the variables. */
296 /* True if there is a use of a variable with the maximal distance
297 that comes after the root in the loop. */
298 unsigned has_max_use_after
: 1;
300 /* True if all the memory references in the chain are always accessed. */
301 unsigned all_always_accessed
: 1;
303 /* True if this chain was combined together with some other chain. */
304 unsigned combined
: 1;
308 /* Describes the knowledge about the step of the memory references in
313 /* The step is zero. */
316 /* The step is nonzero. */
319 /* The step may or may not be nonzero. */
323 /* Components of the data dependence graph. */
327 /* The references in the component. */
330 /* What we know about the step of the references in the component. */
331 enum ref_step_type comp_step
;
333 /* Next component in the list. */
334 struct component
*next
;
337 /* Bitmap of ssa names defined by looparound phi nodes covered by chains. */
339 static bitmap looparound_phis
;
341 /* Cache used by tree_to_aff_combination_expand. */
343 static struct pointer_map_t
*name_expansions
;
345 /* Dumps data reference REF to FILE. */
347 extern void dump_dref (FILE *, dref
);
349 dump_dref (FILE *file
, dref ref
)
354 print_generic_expr (file
, DR_REF (ref
->ref
), TDF_SLIM
);
355 fprintf (file
, " (id %u%s)\n", ref
->pos
,
356 DR_IS_READ (ref
->ref
) ? "" : ", write");
358 fprintf (file
, " offset ");
359 dump_double_int (file
, ref
->offset
, false);
360 fprintf (file
, "\n");
362 fprintf (file
, " distance %u\n", ref
->distance
);
366 if (gimple_code (ref
->stmt
) == GIMPLE_PHI
)
367 fprintf (file
, " looparound ref\n");
369 fprintf (file
, " combination ref\n");
370 fprintf (file
, " in statement ");
371 print_gimple_stmt (file
, ref
->stmt
, 0, TDF_SLIM
);
372 fprintf (file
, "\n");
373 fprintf (file
, " distance %u\n", ref
->distance
);
378 /* Dumps CHAIN to FILE. */
380 extern void dump_chain (FILE *, chain_p
);
382 dump_chain (FILE *file
, chain_p chain
)
385 const char *chain_type
;
392 chain_type
= "Load motion";
396 chain_type
= "Loads-only";
400 chain_type
= "Store-loads";
404 chain_type
= "Combination";
411 fprintf (file
, "%s chain %p%s\n", chain_type
, (void *) chain
,
412 chain
->combined
? " (combined)" : "");
413 if (chain
->type
!= CT_INVARIANT
)
414 fprintf (file
, " max distance %u%s\n", chain
->length
,
415 chain
->has_max_use_after
? "" : ", may reuse first");
417 if (chain
->type
== CT_COMBINATION
)
419 fprintf (file
, " equal to %p %s %p in type ",
420 (void *) chain
->ch1
, op_symbol_code (chain
->op
),
421 (void *) chain
->ch2
);
422 print_generic_expr (file
, chain
->rslt_type
, TDF_SLIM
);
423 fprintf (file
, "\n");
426 if (chain
->vars
.exists ())
428 fprintf (file
, " vars");
429 FOR_EACH_VEC_ELT (chain
->vars
, i
, var
)
432 print_generic_expr (file
, var
, TDF_SLIM
);
434 fprintf (file
, "\n");
437 if (chain
->inits
.exists ())
439 fprintf (file
, " inits");
440 FOR_EACH_VEC_ELT (chain
->inits
, i
, var
)
443 print_generic_expr (file
, var
, TDF_SLIM
);
445 fprintf (file
, "\n");
448 fprintf (file
, " references:\n");
449 FOR_EACH_VEC_ELT (chain
->refs
, i
, a
)
452 fprintf (file
, "\n");
455 /* Dumps CHAINS to FILE. */
457 extern void dump_chains (FILE *, vec
<chain_p
> );
459 dump_chains (FILE *file
, vec
<chain_p
> chains
)
464 FOR_EACH_VEC_ELT (chains
, i
, chain
)
465 dump_chain (file
, chain
);
468 /* Dumps COMP to FILE. */
470 extern void dump_component (FILE *, struct component
*);
472 dump_component (FILE *file
, struct component
*comp
)
477 fprintf (file
, "Component%s:\n",
478 comp
->comp_step
== RS_INVARIANT
? " (invariant)" : "");
479 FOR_EACH_VEC_ELT (comp
->refs
, i
, a
)
481 fprintf (file
, "\n");
484 /* Dumps COMPS to FILE. */
486 extern void dump_components (FILE *, struct component
*);
488 dump_components (FILE *file
, struct component
*comps
)
490 struct component
*comp
;
492 for (comp
= comps
; comp
; comp
= comp
->next
)
493 dump_component (file
, comp
);
496 /* Frees a chain CHAIN. */
499 release_chain (chain_p chain
)
507 FOR_EACH_VEC_ELT (chain
->refs
, i
, ref
)
510 chain
->refs
.release ();
511 chain
->vars
.release ();
512 chain
->inits
.release ();
520 release_chains (vec
<chain_p
> chains
)
525 FOR_EACH_VEC_ELT (chains
, i
, chain
)
526 release_chain (chain
);
530 /* Frees a component COMP. */
533 release_component (struct component
*comp
)
535 comp
->refs
.release ();
539 /* Frees list of components COMPS. */
542 release_components (struct component
*comps
)
544 struct component
*act
, *next
;
546 for (act
= comps
; act
; act
= next
)
549 release_component (act
);
553 /* Finds a root of tree given by FATHERS containing A, and performs path
557 component_of (unsigned fathers
[], unsigned a
)
561 for (root
= a
; root
!= fathers
[root
]; root
= fathers
[root
])
564 for (; a
!= root
; a
= n
)
573 /* Join operation for DFU. FATHERS gives the tree, SIZES are sizes of the
574 components, A and B are components to merge. */
577 merge_comps (unsigned fathers
[], unsigned sizes
[], unsigned a
, unsigned b
)
579 unsigned ca
= component_of (fathers
, a
);
580 unsigned cb
= component_of (fathers
, b
);
585 if (sizes
[ca
] < sizes
[cb
])
587 sizes
[cb
] += sizes
[ca
];
592 sizes
[ca
] += sizes
[cb
];
597 /* Returns true if A is a reference that is suitable for predictive commoning
598 in the innermost loop that contains it. REF_STEP is set according to the
599 step of the reference A. */
602 suitable_reference_p (struct data_reference
*a
, enum ref_step_type
*ref_step
)
604 tree ref
= DR_REF (a
), step
= DR_STEP (a
);
607 || TREE_THIS_VOLATILE (ref
)
608 || !is_gimple_reg_type (TREE_TYPE (ref
))
609 || tree_could_throw_p (ref
))
612 if (integer_zerop (step
))
613 *ref_step
= RS_INVARIANT
;
614 else if (integer_nonzerop (step
))
615 *ref_step
= RS_NONZERO
;
622 /* Stores DR_OFFSET (DR) + DR_INIT (DR) to OFFSET. */
625 aff_combination_dr_offset (struct data_reference
*dr
, aff_tree
*offset
)
627 tree type
= TREE_TYPE (DR_OFFSET (dr
));
630 tree_to_aff_combination_expand (DR_OFFSET (dr
), type
, offset
,
632 aff_combination_const (&delta
, type
, tree_to_double_int (DR_INIT (dr
)));
633 aff_combination_add (offset
, &delta
);
636 /* Determines number of iterations of the innermost enclosing loop before B
637 refers to exactly the same location as A and stores it to OFF. If A and
638 B do not have the same step, they never meet, or anything else fails,
639 returns false, otherwise returns true. Both A and B are assumed to
640 satisfy suitable_reference_p. */
643 determine_offset (struct data_reference
*a
, struct data_reference
*b
,
646 aff_tree diff
, baseb
, step
;
649 /* Check that both the references access the location in the same type. */
650 typea
= TREE_TYPE (DR_REF (a
));
651 typeb
= TREE_TYPE (DR_REF (b
));
652 if (!useless_type_conversion_p (typeb
, typea
))
655 /* Check whether the base address and the step of both references is the
657 if (!operand_equal_p (DR_STEP (a
), DR_STEP (b
), 0)
658 || !operand_equal_p (DR_BASE_ADDRESS (a
), DR_BASE_ADDRESS (b
), 0))
661 if (integer_zerop (DR_STEP (a
)))
663 /* If the references have loop invariant address, check that they access
664 exactly the same location. */
665 *off
= double_int_zero
;
666 return (operand_equal_p (DR_OFFSET (a
), DR_OFFSET (b
), 0)
667 && operand_equal_p (DR_INIT (a
), DR_INIT (b
), 0));
670 /* Compare the offsets of the addresses, and check whether the difference
671 is a multiple of step. */
672 aff_combination_dr_offset (a
, &diff
);
673 aff_combination_dr_offset (b
, &baseb
);
674 aff_combination_scale (&baseb
, double_int_minus_one
);
675 aff_combination_add (&diff
, &baseb
);
677 tree_to_aff_combination_expand (DR_STEP (a
), TREE_TYPE (DR_STEP (a
)),
678 &step
, &name_expansions
);
679 return aff_combination_constant_multiple_p (&diff
, &step
, off
);
682 /* Returns the last basic block in LOOP for that we are sure that
683 it is executed whenever the loop is entered. */
686 last_always_executed_block (struct loop
*loop
)
689 vec
<edge
> exits
= get_loop_exit_edges (loop
);
691 basic_block last
= loop
->latch
;
693 FOR_EACH_VEC_ELT (exits
, i
, ex
)
694 last
= nearest_common_dominator (CDI_DOMINATORS
, last
, ex
->src
);
700 /* Splits dependence graph on DATAREFS described by DEPENDS to components. */
702 static struct component
*
703 split_data_refs_to_components (struct loop
*loop
,
704 vec
<data_reference_p
> datarefs
,
707 unsigned i
, n
= datarefs
.length ();
708 unsigned ca
, ia
, ib
, bad
;
709 unsigned *comp_father
= XNEWVEC (unsigned, n
+ 1);
710 unsigned *comp_size
= XNEWVEC (unsigned, n
+ 1);
711 struct component
**comps
;
712 struct data_reference
*dr
, *dra
, *drb
;
713 struct data_dependence_relation
*ddr
;
714 struct component
*comp_list
= NULL
, *comp
;
716 basic_block last_always_executed
= last_always_executed_block (loop
);
718 FOR_EACH_VEC_ELT (datarefs
, i
, dr
)
722 /* A fake reference for call or asm_expr that may clobber memory;
726 dr
->aux
= (void *) (size_t) i
;
731 /* A component reserved for the "bad" data references. */
735 FOR_EACH_VEC_ELT (datarefs
, i
, dr
)
737 enum ref_step_type dummy
;
739 if (!suitable_reference_p (dr
, &dummy
))
741 ia
= (unsigned) (size_t) dr
->aux
;
742 merge_comps (comp_father
, comp_size
, n
, ia
);
746 FOR_EACH_VEC_ELT (depends
, i
, ddr
)
748 double_int dummy_off
;
750 if (DDR_ARE_DEPENDENT (ddr
) == chrec_known
)
755 ia
= component_of (comp_father
, (unsigned) (size_t) dra
->aux
);
756 ib
= component_of (comp_father
, (unsigned) (size_t) drb
->aux
);
760 bad
= component_of (comp_father
, n
);
762 /* If both A and B are reads, we may ignore unsuitable dependences. */
763 if (DR_IS_READ (dra
) && DR_IS_READ (drb
)
764 && (ia
== bad
|| ib
== bad
765 || !determine_offset (dra
, drb
, &dummy_off
)))
768 merge_comps (comp_father
, comp_size
, ia
, ib
);
771 comps
= XCNEWVEC (struct component
*, n
);
772 bad
= component_of (comp_father
, n
);
773 FOR_EACH_VEC_ELT (datarefs
, i
, dr
)
775 ia
= (unsigned) (size_t) dr
->aux
;
776 ca
= component_of (comp_father
, ia
);
783 comp
= XCNEW (struct component
);
784 comp
->refs
.create (comp_size
[ca
]);
788 dataref
= XCNEW (struct dref_d
);
790 dataref
->stmt
= DR_STMT (dr
);
791 dataref
->offset
= double_int_zero
;
792 dataref
->distance
= 0;
794 dataref
->always_accessed
795 = dominated_by_p (CDI_DOMINATORS
, last_always_executed
,
796 gimple_bb (dataref
->stmt
));
797 dataref
->pos
= comp
->refs
.length ();
798 comp
->refs
.quick_push (dataref
);
801 for (i
= 0; i
< n
; i
++)
806 comp
->next
= comp_list
;
818 /* Returns true if the component COMP satisfies the conditions
819 described in 2) at the beginning of this file. LOOP is the current
823 suitable_component_p (struct loop
*loop
, struct component
*comp
)
827 basic_block ba
, bp
= loop
->header
;
828 bool ok
, has_write
= false;
830 FOR_EACH_VEC_ELT (comp
->refs
, i
, a
)
832 ba
= gimple_bb (a
->stmt
);
834 if (!just_once_each_iteration_p (loop
, ba
))
837 gcc_assert (dominated_by_p (CDI_DOMINATORS
, ba
, bp
));
840 if (DR_IS_WRITE (a
->ref
))
844 first
= comp
->refs
[0];
845 ok
= suitable_reference_p (first
->ref
, &comp
->comp_step
);
847 first
->offset
= double_int_zero
;
849 for (i
= 1; comp
->refs
.iterate (i
, &a
); i
++)
851 if (!determine_offset (first
->ref
, a
->ref
, &a
->offset
))
854 #ifdef ENABLE_CHECKING
856 enum ref_step_type a_step
;
857 ok
= suitable_reference_p (a
->ref
, &a_step
);
858 gcc_assert (ok
&& a_step
== comp
->comp_step
);
863 /* If there is a write inside the component, we must know whether the
864 step is nonzero or not -- we would not otherwise be able to recognize
865 whether the value accessed by reads comes from the OFFSET-th iteration
866 or the previous one. */
867 if (has_write
&& comp
->comp_step
== RS_ANY
)
873 /* Check the conditions on references inside each of components COMPS,
874 and remove the unsuitable components from the list. The new list
875 of components is returned. The conditions are described in 2) at
876 the beginning of this file. LOOP is the current loop. */
878 static struct component
*
879 filter_suitable_components (struct loop
*loop
, struct component
*comps
)
881 struct component
**comp
, *act
;
883 for (comp
= &comps
; *comp
; )
886 if (suitable_component_p (loop
, act
))
894 FOR_EACH_VEC_ELT (act
->refs
, i
, ref
)
896 release_component (act
);
903 /* Compares two drefs A and B by their offset and position. Callback for
907 order_drefs (const void *a
, const void *b
)
909 const dref
*const da
= (const dref
*) a
;
910 const dref
*const db
= (const dref
*) b
;
911 int offcmp
= (*da
)->offset
.scmp ((*db
)->offset
);
916 return (*da
)->pos
- (*db
)->pos
;
919 /* Returns root of the CHAIN. */
922 get_chain_root (chain_p chain
)
924 return chain
->refs
[0];
927 /* Adds REF to the chain CHAIN. */
930 add_ref_to_chain (chain_p chain
, dref ref
)
932 dref root
= get_chain_root (chain
);
935 gcc_assert (root
->offset
.sle (ref
->offset
));
936 dist
= ref
->offset
- root
->offset
;
937 if (double_int::from_uhwi (MAX_DISTANCE
).ule (dist
))
942 gcc_assert (dist
.fits_uhwi ());
944 chain
->refs
.safe_push (ref
);
946 ref
->distance
= dist
.to_uhwi ();
948 if (ref
->distance
>= chain
->length
)
950 chain
->length
= ref
->distance
;
951 chain
->has_max_use_after
= false;
954 if (ref
->distance
== chain
->length
955 && ref
->pos
> root
->pos
)
956 chain
->has_max_use_after
= true;
958 chain
->all_always_accessed
&= ref
->always_accessed
;
961 /* Returns the chain for invariant component COMP. */
964 make_invariant_chain (struct component
*comp
)
966 chain_p chain
= XCNEW (struct chain
);
970 chain
->type
= CT_INVARIANT
;
972 chain
->all_always_accessed
= true;
974 FOR_EACH_VEC_ELT (comp
->refs
, i
, ref
)
976 chain
->refs
.safe_push (ref
);
977 chain
->all_always_accessed
&= ref
->always_accessed
;
983 /* Make a new chain rooted at REF. */
986 make_rooted_chain (dref ref
)
988 chain_p chain
= XCNEW (struct chain
);
990 chain
->type
= DR_IS_READ (ref
->ref
) ? CT_LOAD
: CT_STORE_LOAD
;
992 chain
->refs
.safe_push (ref
);
993 chain
->all_always_accessed
= ref
->always_accessed
;
1000 /* Returns true if CHAIN is not trivial. */
1003 nontrivial_chain_p (chain_p chain
)
1005 return chain
!= NULL
&& chain
->refs
.length () > 1;
1008 /* Returns the ssa name that contains the value of REF, or NULL_TREE if there
1012 name_for_ref (dref ref
)
1016 if (is_gimple_assign (ref
->stmt
))
1018 if (!ref
->ref
|| DR_IS_READ (ref
->ref
))
1019 name
= gimple_assign_lhs (ref
->stmt
);
1021 name
= gimple_assign_rhs1 (ref
->stmt
);
1024 name
= PHI_RESULT (ref
->stmt
);
1026 return (TREE_CODE (name
) == SSA_NAME
? name
: NULL_TREE
);
1029 /* Returns true if REF is a valid initializer for ROOT with given DISTANCE (in
1030 iterations of the innermost enclosing loop). */
1033 valid_initializer_p (struct data_reference
*ref
,
1034 unsigned distance
, struct data_reference
*root
)
1036 aff_tree diff
, base
, step
;
1039 /* Both REF and ROOT must be accessing the same object. */
1040 if (!operand_equal_p (DR_BASE_ADDRESS (ref
), DR_BASE_ADDRESS (root
), 0))
1043 /* The initializer is defined outside of loop, hence its address must be
1044 invariant inside the loop. */
1045 gcc_assert (integer_zerop (DR_STEP (ref
)));
1047 /* If the address of the reference is invariant, initializer must access
1048 exactly the same location. */
1049 if (integer_zerop (DR_STEP (root
)))
1050 return (operand_equal_p (DR_OFFSET (ref
), DR_OFFSET (root
), 0)
1051 && operand_equal_p (DR_INIT (ref
), DR_INIT (root
), 0));
1053 /* Verify that this index of REF is equal to the root's index at
1054 -DISTANCE-th iteration. */
1055 aff_combination_dr_offset (root
, &diff
);
1056 aff_combination_dr_offset (ref
, &base
);
1057 aff_combination_scale (&base
, double_int_minus_one
);
1058 aff_combination_add (&diff
, &base
);
1060 tree_to_aff_combination_expand (DR_STEP (root
), TREE_TYPE (DR_STEP (root
)),
1061 &step
, &name_expansions
);
1062 if (!aff_combination_constant_multiple_p (&diff
, &step
, &off
))
1065 if (off
!= double_int::from_uhwi (distance
))
1071 /* Finds looparound phi node of LOOP that copies the value of REF, and if its
1072 initial value is correct (equal to initial value of REF shifted by one
1073 iteration), returns the phi node. Otherwise, NULL_TREE is returned. ROOT
1074 is the root of the current chain. */
1077 find_looparound_phi (struct loop
*loop
, dref ref
, dref root
)
1079 tree name
, init
, init_ref
;
1080 gimple phi
= NULL
, init_stmt
;
1081 edge latch
= loop_latch_edge (loop
);
1082 struct data_reference init_dr
;
1083 gimple_stmt_iterator psi
;
1085 if (is_gimple_assign (ref
->stmt
))
1087 if (DR_IS_READ (ref
->ref
))
1088 name
= gimple_assign_lhs (ref
->stmt
);
1090 name
= gimple_assign_rhs1 (ref
->stmt
);
1093 name
= PHI_RESULT (ref
->stmt
);
1097 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
1099 phi
= gsi_stmt (psi
);
1100 if (PHI_ARG_DEF_FROM_EDGE (phi
, latch
) == name
)
1104 if (gsi_end_p (psi
))
1107 init
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_preheader_edge (loop
));
1108 if (TREE_CODE (init
) != SSA_NAME
)
1110 init_stmt
= SSA_NAME_DEF_STMT (init
);
1111 if (gimple_code (init_stmt
) != GIMPLE_ASSIGN
)
1113 gcc_assert (gimple_assign_lhs (init_stmt
) == init
);
1115 init_ref
= gimple_assign_rhs1 (init_stmt
);
1116 if (!REFERENCE_CLASS_P (init_ref
)
1117 && !DECL_P (init_ref
))
1120 /* Analyze the behavior of INIT_REF with respect to LOOP (innermost
1121 loop enclosing PHI). */
1122 memset (&init_dr
, 0, sizeof (struct data_reference
));
1123 DR_REF (&init_dr
) = init_ref
;
1124 DR_STMT (&init_dr
) = phi
;
1125 if (!dr_analyze_innermost (&init_dr
, loop
))
1128 if (!valid_initializer_p (&init_dr
, ref
->distance
+ 1, root
->ref
))
1134 /* Adds a reference for the looparound copy of REF in PHI to CHAIN. */
1137 insert_looparound_copy (chain_p chain
, dref ref
, gimple phi
)
1139 dref nw
= XCNEW (struct dref_d
), aref
;
1143 nw
->distance
= ref
->distance
+ 1;
1144 nw
->always_accessed
= 1;
1146 FOR_EACH_VEC_ELT (chain
->refs
, i
, aref
)
1147 if (aref
->distance
>= nw
->distance
)
1149 chain
->refs
.safe_insert (i
, nw
);
1151 if (nw
->distance
> chain
->length
)
1153 chain
->length
= nw
->distance
;
1154 chain
->has_max_use_after
= false;
1158 /* For references in CHAIN that are copied around the LOOP (created previously
1159 by PRE, or by user), add the results of such copies to the chain. This
1160 enables us to remove the copies by unrolling, and may need less registers
1161 (also, it may allow us to combine chains together). */
1164 add_looparound_copies (struct loop
*loop
, chain_p chain
)
1167 dref ref
, root
= get_chain_root (chain
);
1170 FOR_EACH_VEC_ELT (chain
->refs
, i
, ref
)
1172 phi
= find_looparound_phi (loop
, ref
, root
);
1176 bitmap_set_bit (looparound_phis
, SSA_NAME_VERSION (PHI_RESULT (phi
)));
1177 insert_looparound_copy (chain
, ref
, phi
);
1181 /* Find roots of the values and determine distances in the component COMP.
1182 The references are redistributed into CHAINS. LOOP is the current
1186 determine_roots_comp (struct loop
*loop
,
1187 struct component
*comp
,
1188 vec
<chain_p
> *chains
)
1192 chain_p chain
= NULL
;
1193 double_int last_ofs
= double_int_zero
;
1195 /* Invariants are handled specially. */
1196 if (comp
->comp_step
== RS_INVARIANT
)
1198 chain
= make_invariant_chain (comp
);
1199 chains
->safe_push (chain
);
1203 comp
->refs
.qsort (order_drefs
);
1205 FOR_EACH_VEC_ELT (comp
->refs
, i
, a
)
1207 if (!chain
|| DR_IS_WRITE (a
->ref
)
1208 || double_int::from_uhwi (MAX_DISTANCE
).ule (a
->offset
- last_ofs
))
1210 if (nontrivial_chain_p (chain
))
1212 add_looparound_copies (loop
, chain
);
1213 chains
->safe_push (chain
);
1216 release_chain (chain
);
1217 chain
= make_rooted_chain (a
);
1218 last_ofs
= a
->offset
;
1222 add_ref_to_chain (chain
, a
);
1225 if (nontrivial_chain_p (chain
))
1227 add_looparound_copies (loop
, chain
);
1228 chains
->safe_push (chain
);
1231 release_chain (chain
);
1234 /* Find roots of the values and determine distances in components COMPS, and
1235 separates the references to CHAINS. LOOP is the current loop. */
1238 determine_roots (struct loop
*loop
,
1239 struct component
*comps
, vec
<chain_p
> *chains
)
1241 struct component
*comp
;
1243 for (comp
= comps
; comp
; comp
= comp
->next
)
1244 determine_roots_comp (loop
, comp
, chains
);
1247 /* Replace the reference in statement STMT with temporary variable
1248 NEW_TREE. If SET is true, NEW_TREE is instead initialized to the value of
1249 the reference in the statement. IN_LHS is true if the reference
1250 is in the lhs of STMT, false if it is in rhs. */
1253 replace_ref_with (gimple stmt
, tree new_tree
, bool set
, bool in_lhs
)
1257 gimple_stmt_iterator bsi
, psi
;
1259 if (gimple_code (stmt
) == GIMPLE_PHI
)
1261 gcc_assert (!in_lhs
&& !set
);
1263 val
= PHI_RESULT (stmt
);
1264 bsi
= gsi_after_labels (gimple_bb (stmt
));
1265 psi
= gsi_for_stmt (stmt
);
1266 remove_phi_node (&psi
, false);
1268 /* Turn the phi node into GIMPLE_ASSIGN. */
1269 new_stmt
= gimple_build_assign (val
, new_tree
);
1270 gsi_insert_before (&bsi
, new_stmt
, GSI_NEW_STMT
);
1274 /* Since the reference is of gimple_reg type, it should only
1275 appear as lhs or rhs of modify statement. */
1276 gcc_assert (is_gimple_assign (stmt
));
1278 bsi
= gsi_for_stmt (stmt
);
1280 /* If we do not need to initialize NEW_TREE, just replace the use of OLD. */
1283 gcc_assert (!in_lhs
);
1284 gimple_assign_set_rhs_from_tree (&bsi
, new_tree
);
1285 stmt
= gsi_stmt (bsi
);
1292 /* We have statement
1296 If OLD is a memory reference, then VAL is gimple_val, and we transform
1302 Otherwise, we are replacing a combination chain,
1303 VAL is the expression that performs the combination, and OLD is an
1304 SSA name. In this case, we transform the assignment to
1311 val
= gimple_assign_lhs (stmt
);
1312 if (TREE_CODE (val
) != SSA_NAME
)
1314 val
= gimple_assign_rhs1 (stmt
);
1315 gcc_assert (gimple_assign_single_p (stmt
));
1316 if (TREE_CLOBBER_P (val
))
1317 val
= get_or_create_ssa_default_def (cfun
, SSA_NAME_VAR (new_tree
));
1319 gcc_assert (gimple_assign_copy_p (stmt
));
1331 val
= gimple_assign_lhs (stmt
);
1334 new_stmt
= gimple_build_assign (new_tree
, unshare_expr (val
));
1335 gsi_insert_after (&bsi
, new_stmt
, GSI_NEW_STMT
);
1338 /* Returns a memory reference to DR in the ITER-th iteration of
1339 the loop it was analyzed in. Append init stmts to STMTS. */
1342 ref_at_iteration (data_reference_p dr
, int iter
, gimple_seq
*stmts
)
1344 tree off
= DR_OFFSET (dr
);
1345 tree coff
= DR_INIT (dr
);
1348 else if (TREE_CODE (DR_STEP (dr
)) == INTEGER_CST
)
1349 coff
= size_binop (PLUS_EXPR
, coff
,
1350 size_binop (MULT_EXPR
, DR_STEP (dr
), ssize_int (iter
)));
1352 off
= size_binop (PLUS_EXPR
, off
,
1353 size_binop (MULT_EXPR
, DR_STEP (dr
), ssize_int (iter
)));
1354 tree addr
= fold_build_pointer_plus (DR_BASE_ADDRESS (dr
), off
);
1355 addr
= force_gimple_operand_1 (addr
, stmts
, is_gimple_mem_ref_addr
,
1357 tree alias_ptr
= fold_convert (reference_alias_ptr_type (DR_REF (dr
)), coff
);
1358 /* While data-ref analysis punts on bit offsets it still handles
1359 bitfield accesses at byte boundaries. Cope with that. Note that
1360 we cannot simply re-apply the outer COMPONENT_REF because the
1361 byte-granular portion of it is already applied via DR_INIT and
1362 DR_OFFSET, so simply build a BIT_FIELD_REF knowing that the bits
1363 start at offset zero. */
1364 if (TREE_CODE (DR_REF (dr
)) == COMPONENT_REF
1365 && DECL_BIT_FIELD (TREE_OPERAND (DR_REF (dr
), 1)))
1367 tree field
= TREE_OPERAND (DR_REF (dr
), 1);
1368 return build3 (BIT_FIELD_REF
, TREE_TYPE (DR_REF (dr
)),
1369 build2 (MEM_REF
, DECL_BIT_FIELD_TYPE (field
),
1371 DECL_SIZE (field
), bitsize_zero_node
);
1374 return fold_build2 (MEM_REF
, TREE_TYPE (DR_REF (dr
)), addr
, alias_ptr
);
1377 /* Get the initialization expression for the INDEX-th temporary variable
1381 get_init_expr (chain_p chain
, unsigned index
)
1383 if (chain
->type
== CT_COMBINATION
)
1385 tree e1
= get_init_expr (chain
->ch1
, index
);
1386 tree e2
= get_init_expr (chain
->ch2
, index
);
1388 return fold_build2 (chain
->op
, chain
->rslt_type
, e1
, e2
);
1391 return chain
->inits
[index
];
1394 /* Returns a new temporary variable used for the I-th variable carrying
1395 value of REF. The variable's uid is marked in TMP_VARS. */
1398 predcom_tmp_var (tree ref
, unsigned i
, bitmap tmp_vars
)
1400 tree type
= TREE_TYPE (ref
);
1401 /* We never access the components of the temporary variable in predictive
1403 tree var
= create_tmp_reg (type
, get_lsm_tmp_name (ref
, i
));
1404 bitmap_set_bit (tmp_vars
, DECL_UID (var
));
1408 /* Creates the variables for CHAIN, as well as phi nodes for them and
1409 initialization on entry to LOOP. Uids of the newly created
1410 temporary variables are marked in TMP_VARS. */
1413 initialize_root_vars (struct loop
*loop
, chain_p chain
, bitmap tmp_vars
)
1416 unsigned n
= chain
->length
;
1417 dref root
= get_chain_root (chain
);
1418 bool reuse_first
= !chain
->has_max_use_after
;
1419 tree ref
, init
, var
, next
;
1422 edge entry
= loop_preheader_edge (loop
), latch
= loop_latch_edge (loop
);
1424 /* If N == 0, then all the references are within the single iteration. And
1425 since this is an nonempty chain, reuse_first cannot be true. */
1426 gcc_assert (n
> 0 || !reuse_first
);
1428 chain
->vars
.create (n
+ 1);
1430 if (chain
->type
== CT_COMBINATION
)
1431 ref
= gimple_assign_lhs (root
->stmt
);
1433 ref
= DR_REF (root
->ref
);
1435 for (i
= 0; i
< n
+ (reuse_first
? 0 : 1); i
++)
1437 var
= predcom_tmp_var (ref
, i
, tmp_vars
);
1438 chain
->vars
.quick_push (var
);
1441 chain
->vars
.quick_push (chain
->vars
[0]);
1443 FOR_EACH_VEC_ELT (chain
->vars
, i
, var
)
1444 chain
->vars
[i
] = make_ssa_name (var
, NULL
);
1446 for (i
= 0; i
< n
; i
++)
1448 var
= chain
->vars
[i
];
1449 next
= chain
->vars
[i
+ 1];
1450 init
= get_init_expr (chain
, i
);
1452 init
= force_gimple_operand (init
, &stmts
, true, NULL_TREE
);
1454 gsi_insert_seq_on_edge_immediate (entry
, stmts
);
1456 phi
= create_phi_node (var
, loop
->header
);
1457 add_phi_arg (phi
, init
, entry
, UNKNOWN_LOCATION
);
1458 add_phi_arg (phi
, next
, latch
, UNKNOWN_LOCATION
);
1462 /* Create the variables and initialization statement for root of chain
1463 CHAIN. Uids of the newly created temporary variables are marked
1467 initialize_root (struct loop
*loop
, chain_p chain
, bitmap tmp_vars
)
1469 dref root
= get_chain_root (chain
);
1470 bool in_lhs
= (chain
->type
== CT_STORE_LOAD
1471 || chain
->type
== CT_COMBINATION
);
1473 initialize_root_vars (loop
, chain
, tmp_vars
);
1474 replace_ref_with (root
->stmt
,
1475 chain
->vars
[chain
->length
],
1479 /* Initializes a variable for load motion for ROOT and prepares phi nodes and
1480 initialization on entry to LOOP if necessary. The ssa name for the variable
1481 is stored in VARS. If WRITTEN is true, also a phi node to copy its value
1482 around the loop is created. Uid of the newly created temporary variable
1483 is marked in TMP_VARS. INITS is the list containing the (single)
1487 initialize_root_vars_lm (struct loop
*loop
, dref root
, bool written
,
1488 vec
<tree
> *vars
, vec
<tree
> inits
,
1492 tree ref
= DR_REF (root
->ref
), init
, var
, next
;
1495 edge entry
= loop_preheader_edge (loop
), latch
= loop_latch_edge (loop
);
1497 /* Find the initializer for the variable, and check that it cannot
1501 vars
->create (written
? 2 : 1);
1502 var
= predcom_tmp_var (ref
, 0, tmp_vars
);
1503 vars
->quick_push (var
);
1505 vars
->quick_push ((*vars
)[0]);
1507 FOR_EACH_VEC_ELT (*vars
, i
, var
)
1508 (*vars
)[i
] = make_ssa_name (var
, NULL
);
1512 init
= force_gimple_operand (init
, &stmts
, written
, NULL_TREE
);
1514 gsi_insert_seq_on_edge_immediate (entry
, stmts
);
1519 phi
= create_phi_node (var
, loop
->header
);
1520 add_phi_arg (phi
, init
, entry
, UNKNOWN_LOCATION
);
1521 add_phi_arg (phi
, next
, latch
, UNKNOWN_LOCATION
);
1525 gimple init_stmt
= gimple_build_assign (var
, init
);
1526 gsi_insert_on_edge_immediate (entry
, init_stmt
);
1531 /* Execute load motion for references in chain CHAIN. Uids of the newly
1532 created temporary variables are marked in TMP_VARS. */
1535 execute_load_motion (struct loop
*loop
, chain_p chain
, bitmap tmp_vars
)
1539 unsigned n_writes
= 0, ridx
, i
;
1542 gcc_assert (chain
->type
== CT_INVARIANT
);
1543 gcc_assert (!chain
->combined
);
1544 FOR_EACH_VEC_ELT (chain
->refs
, i
, a
)
1545 if (DR_IS_WRITE (a
->ref
))
1548 /* If there are no reads in the loop, there is nothing to do. */
1549 if (n_writes
== chain
->refs
.length ())
1552 initialize_root_vars_lm (loop
, get_chain_root (chain
), n_writes
> 0,
1553 &vars
, chain
->inits
, tmp_vars
);
1556 FOR_EACH_VEC_ELT (chain
->refs
, i
, a
)
1558 bool is_read
= DR_IS_READ (a
->ref
);
1560 if (DR_IS_WRITE (a
->ref
))
1566 var
= make_ssa_name (SSA_NAME_VAR (var
), NULL
);
1573 replace_ref_with (a
->stmt
, vars
[ridx
],
1574 !is_read
, !is_read
);
1580 /* Returns the single statement in that NAME is used, excepting
1581 the looparound phi nodes contained in one of the chains. If there is no
1582 such statement, or more statements, NULL is returned. */
1585 single_nonlooparound_use (tree name
)
1588 imm_use_iterator it
;
1589 gimple stmt
, ret
= NULL
;
1591 FOR_EACH_IMM_USE_FAST (use
, it
, name
)
1593 stmt
= USE_STMT (use
);
1595 if (gimple_code (stmt
) == GIMPLE_PHI
)
1597 /* Ignore uses in looparound phi nodes. Uses in other phi nodes
1598 could not be processed anyway, so just fail for them. */
1599 if (bitmap_bit_p (looparound_phis
,
1600 SSA_NAME_VERSION (PHI_RESULT (stmt
))))
1605 else if (is_gimple_debug (stmt
))
1607 else if (ret
!= NULL
)
1616 /* Remove statement STMT, as well as the chain of assignments in that it is
1620 remove_stmt (gimple stmt
)
1624 gimple_stmt_iterator psi
;
1626 if (gimple_code (stmt
) == GIMPLE_PHI
)
1628 name
= PHI_RESULT (stmt
);
1629 next
= single_nonlooparound_use (name
);
1630 reset_debug_uses (stmt
);
1631 psi
= gsi_for_stmt (stmt
);
1632 remove_phi_node (&psi
, true);
1635 || !gimple_assign_ssa_name_copy_p (next
)
1636 || gimple_assign_rhs1 (next
) != name
)
1644 gimple_stmt_iterator bsi
;
1646 bsi
= gsi_for_stmt (stmt
);
1648 name
= gimple_assign_lhs (stmt
);
1649 gcc_assert (TREE_CODE (name
) == SSA_NAME
);
1651 next
= single_nonlooparound_use (name
);
1652 reset_debug_uses (stmt
);
1654 unlink_stmt_vdef (stmt
);
1655 gsi_remove (&bsi
, true);
1656 release_defs (stmt
);
1659 || !gimple_assign_ssa_name_copy_p (next
)
1660 || gimple_assign_rhs1 (next
) != name
)
1667 /* Perform the predictive commoning optimization for a chain CHAIN.
1668 Uids of the newly created temporary variables are marked in TMP_VARS.*/
1671 execute_pred_commoning_chain (struct loop
*loop
, chain_p chain
,
1678 if (chain
->combined
)
1680 /* For combined chains, just remove the statements that are used to
1681 compute the values of the expression (except for the root one). */
1682 for (i
= 1; chain
->refs
.iterate (i
, &a
); i
++)
1683 remove_stmt (a
->stmt
);
1687 /* For non-combined chains, set up the variables that hold its value,
1688 and replace the uses of the original references by these
1690 initialize_root (loop
, chain
, tmp_vars
);
1691 for (i
= 1; chain
->refs
.iterate (i
, &a
); i
++)
1693 var
= chain
->vars
[chain
->length
- a
->distance
];
1694 replace_ref_with (a
->stmt
, var
, false, false);
1699 /* Determines the unroll factor necessary to remove as many temporary variable
1700 copies as possible. CHAINS is the list of chains that will be
1704 determine_unroll_factor (vec
<chain_p
> chains
)
1707 unsigned factor
= 1, af
, nfactor
, i
;
1708 unsigned max
= PARAM_VALUE (PARAM_MAX_UNROLL_TIMES
);
1710 FOR_EACH_VEC_ELT (chains
, i
, chain
)
1712 if (chain
->type
== CT_INVARIANT
|| chain
->combined
)
1715 /* The best unroll factor for this chain is equal to the number of
1716 temporary variables that we create for it. */
1718 if (chain
->has_max_use_after
)
1721 nfactor
= factor
* af
/ gcd (factor
, af
);
1729 /* Perform the predictive commoning optimization for CHAINS.
1730 Uids of the newly created temporary variables are marked in TMP_VARS. */
1733 execute_pred_commoning (struct loop
*loop
, vec
<chain_p
> chains
,
1739 FOR_EACH_VEC_ELT (chains
, i
, chain
)
1741 if (chain
->type
== CT_INVARIANT
)
1742 execute_load_motion (loop
, chain
, tmp_vars
);
1744 execute_pred_commoning_chain (loop
, chain
, tmp_vars
);
1747 update_ssa (TODO_update_ssa_only_virtuals
);
1750 /* For each reference in CHAINS, if its defining statement is
1751 phi node, record the ssa name that is defined by it. */
1754 replace_phis_by_defined_names (vec
<chain_p
> chains
)
1760 FOR_EACH_VEC_ELT (chains
, i
, chain
)
1761 FOR_EACH_VEC_ELT (chain
->refs
, j
, a
)
1763 if (gimple_code (a
->stmt
) == GIMPLE_PHI
)
1765 a
->name_defined_by_phi
= PHI_RESULT (a
->stmt
);
1771 /* For each reference in CHAINS, if name_defined_by_phi is not
1772 NULL, use it to set the stmt field. */
1775 replace_names_by_phis (vec
<chain_p
> chains
)
1781 FOR_EACH_VEC_ELT (chains
, i
, chain
)
1782 FOR_EACH_VEC_ELT (chain
->refs
, j
, a
)
1783 if (a
->stmt
== NULL
)
1785 a
->stmt
= SSA_NAME_DEF_STMT (a
->name_defined_by_phi
);
1786 gcc_assert (gimple_code (a
->stmt
) == GIMPLE_PHI
);
1787 a
->name_defined_by_phi
= NULL_TREE
;
1791 /* Wrapper over execute_pred_commoning, to pass it as a callback
1792 to tree_transform_and_unroll_loop. */
1796 vec
<chain_p
> chains
;
1801 execute_pred_commoning_cbck (struct loop
*loop
, void *data
)
1803 struct epcc_data
*const dta
= (struct epcc_data
*) data
;
1805 /* Restore phi nodes that were replaced by ssa names before
1806 tree_transform_and_unroll_loop (see detailed description in
1807 tree_predictive_commoning_loop). */
1808 replace_names_by_phis (dta
->chains
);
1809 execute_pred_commoning (loop
, dta
->chains
, dta
->tmp_vars
);
1812 /* Base NAME and all the names in the chain of phi nodes that use it
1813 on variable VAR. The phi nodes are recognized by being in the copies of
1814 the header of the LOOP. */
1817 base_names_in_chain_on (struct loop
*loop
, tree name
, tree var
)
1820 imm_use_iterator iter
;
1822 replace_ssa_name_symbol (name
, var
);
1827 FOR_EACH_IMM_USE_STMT (stmt
, iter
, name
)
1829 if (gimple_code (stmt
) == GIMPLE_PHI
1830 && flow_bb_inside_loop_p (loop
, gimple_bb (stmt
)))
1833 BREAK_FROM_IMM_USE_STMT (iter
);
1839 name
= PHI_RESULT (phi
);
1840 replace_ssa_name_symbol (name
, var
);
1844 /* Given an unrolled LOOP after predictive commoning, remove the
1845 register copies arising from phi nodes by changing the base
1846 variables of SSA names. TMP_VARS is the set of the temporary variables
1847 for those we want to perform this. */
1850 eliminate_temp_copies (struct loop
*loop
, bitmap tmp_vars
)
1854 tree name
, use
, var
;
1855 gimple_stmt_iterator psi
;
1857 e
= loop_latch_edge (loop
);
1858 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
1860 phi
= gsi_stmt (psi
);
1861 name
= PHI_RESULT (phi
);
1862 var
= SSA_NAME_VAR (name
);
1863 if (!var
|| !bitmap_bit_p (tmp_vars
, DECL_UID (var
)))
1865 use
= PHI_ARG_DEF_FROM_EDGE (phi
, e
);
1866 gcc_assert (TREE_CODE (use
) == SSA_NAME
);
1868 /* Base all the ssa names in the ud and du chain of NAME on VAR. */
1869 stmt
= SSA_NAME_DEF_STMT (use
);
1870 while (gimple_code (stmt
) == GIMPLE_PHI
1871 /* In case we could not unroll the loop enough to eliminate
1872 all copies, we may reach the loop header before the defining
1873 statement (in that case, some register copies will be present
1874 in loop latch in the final code, corresponding to the newly
1875 created looparound phi nodes). */
1876 && gimple_bb (stmt
) != loop
->header
)
1878 gcc_assert (single_pred_p (gimple_bb (stmt
)));
1879 use
= PHI_ARG_DEF (stmt
, 0);
1880 stmt
= SSA_NAME_DEF_STMT (use
);
1883 base_names_in_chain_on (loop
, use
, var
);
1887 /* Returns true if CHAIN is suitable to be combined. */
1890 chain_can_be_combined_p (chain_p chain
)
1892 return (!chain
->combined
1893 && (chain
->type
== CT_LOAD
|| chain
->type
== CT_COMBINATION
));
1896 /* Returns the modify statement that uses NAME. Skips over assignment
1897 statements, NAME is replaced with the actual name used in the returned
1901 find_use_stmt (tree
*name
)
1906 /* Skip over assignments. */
1909 stmt
= single_nonlooparound_use (*name
);
1913 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
1916 lhs
= gimple_assign_lhs (stmt
);
1917 if (TREE_CODE (lhs
) != SSA_NAME
)
1920 if (gimple_assign_copy_p (stmt
))
1922 rhs
= gimple_assign_rhs1 (stmt
);
1928 else if (get_gimple_rhs_class (gimple_assign_rhs_code (stmt
))
1929 == GIMPLE_BINARY_RHS
)
1936 /* Returns true if we may perform reassociation for operation CODE in TYPE. */
1939 may_reassociate_p (tree type
, enum tree_code code
)
1941 if (FLOAT_TYPE_P (type
)
1942 && !flag_unsafe_math_optimizations
)
1945 return (commutative_tree_code (code
)
1946 && associative_tree_code (code
));
1949 /* If the operation used in STMT is associative and commutative, go through the
1950 tree of the same operations and returns its root. Distance to the root
1951 is stored in DISTANCE. */
1954 find_associative_operation_root (gimple stmt
, unsigned *distance
)
1958 enum tree_code code
= gimple_assign_rhs_code (stmt
);
1959 tree type
= TREE_TYPE (gimple_assign_lhs (stmt
));
1962 if (!may_reassociate_p (type
, code
))
1967 lhs
= gimple_assign_lhs (stmt
);
1968 gcc_assert (TREE_CODE (lhs
) == SSA_NAME
);
1970 next
= find_use_stmt (&lhs
);
1972 || gimple_assign_rhs_code (next
) != code
)
1984 /* Returns the common statement in that NAME1 and NAME2 have a use. If there
1985 is no such statement, returns NULL_TREE. In case the operation used on
1986 NAME1 and NAME2 is associative and commutative, returns the root of the
1987 tree formed by this operation instead of the statement that uses NAME1 or
1991 find_common_use_stmt (tree
*name1
, tree
*name2
)
1993 gimple stmt1
, stmt2
;
1995 stmt1
= find_use_stmt (name1
);
1999 stmt2
= find_use_stmt (name2
);
2006 stmt1
= find_associative_operation_root (stmt1
, NULL
);
2009 stmt2
= find_associative_operation_root (stmt2
, NULL
);
2013 return (stmt1
== stmt2
? stmt1
: NULL
);
2016 /* Checks whether R1 and R2 are combined together using CODE, with the result
2017 in RSLT_TYPE, in order R1 CODE R2 if SWAP is false and in order R2 CODE R1
2018 if it is true. If CODE is ERROR_MARK, set these values instead. */
2021 combinable_refs_p (dref r1
, dref r2
,
2022 enum tree_code
*code
, bool *swap
, tree
*rslt_type
)
2024 enum tree_code acode
;
2030 name1
= name_for_ref (r1
);
2031 name2
= name_for_ref (r2
);
2032 gcc_assert (name1
!= NULL_TREE
&& name2
!= NULL_TREE
);
2034 stmt
= find_common_use_stmt (&name1
, &name2
);
2039 acode
= gimple_assign_rhs_code (stmt
);
2040 aswap
= (!commutative_tree_code (acode
)
2041 && gimple_assign_rhs1 (stmt
) != name1
);
2042 atype
= TREE_TYPE (gimple_assign_lhs (stmt
));
2044 if (*code
== ERROR_MARK
)
2052 return (*code
== acode
2054 && *rslt_type
== atype
);
2057 /* Remove OP from the operation on rhs of STMT, and replace STMT with
2058 an assignment of the remaining operand. */
2061 remove_name_from_operation (gimple stmt
, tree op
)
2064 gimple_stmt_iterator si
;
2066 gcc_assert (is_gimple_assign (stmt
));
2068 if (gimple_assign_rhs1 (stmt
) == op
)
2069 other_op
= gimple_assign_rhs2 (stmt
);
2071 other_op
= gimple_assign_rhs1 (stmt
);
2073 si
= gsi_for_stmt (stmt
);
2074 gimple_assign_set_rhs_from_tree (&si
, other_op
);
2076 /* We should not have reallocated STMT. */
2077 gcc_assert (gsi_stmt (si
) == stmt
);
2082 /* Reassociates the expression in that NAME1 and NAME2 are used so that they
2083 are combined in a single statement, and returns this statement. */
2086 reassociate_to_the_same_stmt (tree name1
, tree name2
)
2088 gimple stmt1
, stmt2
, root1
, root2
, s1
, s2
;
2089 gimple new_stmt
, tmp_stmt
;
2090 tree new_name
, tmp_name
, var
, r1
, r2
;
2091 unsigned dist1
, dist2
;
2092 enum tree_code code
;
2093 tree type
= TREE_TYPE (name1
);
2094 gimple_stmt_iterator bsi
;
2096 stmt1
= find_use_stmt (&name1
);
2097 stmt2
= find_use_stmt (&name2
);
2098 root1
= find_associative_operation_root (stmt1
, &dist1
);
2099 root2
= find_associative_operation_root (stmt2
, &dist2
);
2100 code
= gimple_assign_rhs_code (stmt1
);
2102 gcc_assert (root1
&& root2
&& root1
== root2
2103 && code
== gimple_assign_rhs_code (stmt2
));
2105 /* Find the root of the nearest expression in that both NAME1 and NAME2
2112 while (dist1
> dist2
)
2114 s1
= find_use_stmt (&r1
);
2115 r1
= gimple_assign_lhs (s1
);
2118 while (dist2
> dist1
)
2120 s2
= find_use_stmt (&r2
);
2121 r2
= gimple_assign_lhs (s2
);
2127 s1
= find_use_stmt (&r1
);
2128 r1
= gimple_assign_lhs (s1
);
2129 s2
= find_use_stmt (&r2
);
2130 r2
= gimple_assign_lhs (s2
);
2133 /* Remove NAME1 and NAME2 from the statements in that they are used
2135 remove_name_from_operation (stmt1
, name1
);
2136 remove_name_from_operation (stmt2
, name2
);
2138 /* Insert the new statement combining NAME1 and NAME2 before S1, and
2139 combine it with the rhs of S1. */
2140 var
= create_tmp_reg (type
, "predreastmp");
2141 new_name
= make_ssa_name (var
, NULL
);
2142 new_stmt
= gimple_build_assign_with_ops (code
, new_name
, name1
, name2
);
2144 var
= create_tmp_reg (type
, "predreastmp");
2145 tmp_name
= make_ssa_name (var
, NULL
);
2147 /* Rhs of S1 may now be either a binary expression with operation
2148 CODE, or gimple_val (in case that stmt1 == s1 or stmt2 == s1,
2149 so that name1 or name2 was removed from it). */
2150 tmp_stmt
= gimple_build_assign_with_ops (gimple_assign_rhs_code (s1
),
2152 gimple_assign_rhs1 (s1
),
2153 gimple_assign_rhs2 (s1
));
2155 bsi
= gsi_for_stmt (s1
);
2156 gimple_assign_set_rhs_with_ops (&bsi
, code
, new_name
, tmp_name
);
2157 s1
= gsi_stmt (bsi
);
2160 gsi_insert_before (&bsi
, new_stmt
, GSI_SAME_STMT
);
2161 gsi_insert_before (&bsi
, tmp_stmt
, GSI_SAME_STMT
);
2166 /* Returns the statement that combines references R1 and R2. In case R1
2167 and R2 are not used in the same statement, but they are used with an
2168 associative and commutative operation in the same expression, reassociate
2169 the expression so that they are used in the same statement. */
2172 stmt_combining_refs (dref r1
, dref r2
)
2174 gimple stmt1
, stmt2
;
2175 tree name1
= name_for_ref (r1
);
2176 tree name2
= name_for_ref (r2
);
2178 stmt1
= find_use_stmt (&name1
);
2179 stmt2
= find_use_stmt (&name2
);
2183 return reassociate_to_the_same_stmt (name1
, name2
);
2186 /* Tries to combine chains CH1 and CH2 together. If this succeeds, the
2187 description of the new chain is returned, otherwise we return NULL. */
2190 combine_chains (chain_p ch1
, chain_p ch2
)
2193 enum tree_code op
= ERROR_MARK
;
2198 tree rslt_type
= NULL_TREE
;
2202 if (ch1
->length
!= ch2
->length
)
2205 if (ch1
->refs
.length () != ch2
->refs
.length ())
2208 for (i
= 0; (ch1
->refs
.iterate (i
, &r1
)
2209 && ch2
->refs
.iterate (i
, &r2
)); i
++)
2211 if (r1
->distance
!= r2
->distance
)
2214 if (!combinable_refs_p (r1
, r2
, &op
, &swap
, &rslt_type
))
2225 new_chain
= XCNEW (struct chain
);
2226 new_chain
->type
= CT_COMBINATION
;
2228 new_chain
->ch1
= ch1
;
2229 new_chain
->ch2
= ch2
;
2230 new_chain
->rslt_type
= rslt_type
;
2231 new_chain
->length
= ch1
->length
;
2233 for (i
= 0; (ch1
->refs
.iterate (i
, &r1
)
2234 && ch2
->refs
.iterate (i
, &r2
)); i
++)
2236 nw
= XCNEW (struct dref_d
);
2237 nw
->stmt
= stmt_combining_refs (r1
, r2
);
2238 nw
->distance
= r1
->distance
;
2240 new_chain
->refs
.safe_push (nw
);
2243 new_chain
->has_max_use_after
= false;
2244 root_stmt
= get_chain_root (new_chain
)->stmt
;
2245 for (i
= 1; new_chain
->refs
.iterate (i
, &nw
); i
++)
2247 if (nw
->distance
== new_chain
->length
2248 && !stmt_dominates_stmt_p (nw
->stmt
, root_stmt
))
2250 new_chain
->has_max_use_after
= true;
2255 ch1
->combined
= true;
2256 ch2
->combined
= true;
2260 /* Try to combine the CHAINS. */
2263 try_combine_chains (vec
<chain_p
> *chains
)
2266 chain_p ch1
, ch2
, cch
;
2267 vec
<chain_p
> worklist
= vNULL
;
2269 FOR_EACH_VEC_ELT (*chains
, i
, ch1
)
2270 if (chain_can_be_combined_p (ch1
))
2271 worklist
.safe_push (ch1
);
2273 while (!worklist
.is_empty ())
2275 ch1
= worklist
.pop ();
2276 if (!chain_can_be_combined_p (ch1
))
2279 FOR_EACH_VEC_ELT (*chains
, j
, ch2
)
2281 if (!chain_can_be_combined_p (ch2
))
2284 cch
= combine_chains (ch1
, ch2
);
2287 worklist
.safe_push (cch
);
2288 chains
->safe_push (cch
);
2294 worklist
.release ();
2297 /* Prepare initializers for CHAIN in LOOP. Returns false if this is
2298 impossible because one of these initializers may trap, true otherwise. */
2301 prepare_initializers_chain (struct loop
*loop
, chain_p chain
)
2303 unsigned i
, n
= (chain
->type
== CT_INVARIANT
) ? 1 : chain
->length
;
2304 struct data_reference
*dr
= get_chain_root (chain
)->ref
;
2308 edge entry
= loop_preheader_edge (loop
);
2310 /* Find the initializers for the variables, and check that they cannot
2312 chain
->inits
.create (n
);
2313 for (i
= 0; i
< n
; i
++)
2314 chain
->inits
.quick_push (NULL_TREE
);
2316 /* If we have replaced some looparound phi nodes, use their initializers
2317 instead of creating our own. */
2318 FOR_EACH_VEC_ELT (chain
->refs
, i
, laref
)
2320 if (gimple_code (laref
->stmt
) != GIMPLE_PHI
)
2323 gcc_assert (laref
->distance
> 0);
2324 chain
->inits
[n
- laref
->distance
]
2325 = PHI_ARG_DEF_FROM_EDGE (laref
->stmt
, entry
);
2328 for (i
= 0; i
< n
; i
++)
2330 if (chain
->inits
[i
] != NULL_TREE
)
2333 init
= ref_at_iteration (dr
, (int) i
- n
, &stmts
);
2334 if (!chain
->all_always_accessed
&& tree_could_trap_p (init
))
2338 gsi_insert_seq_on_edge_immediate (entry
, stmts
);
2340 chain
->inits
[i
] = init
;
2346 /* Prepare initializers for CHAINS in LOOP, and free chains that cannot
2347 be used because the initializers might trap. */
2350 prepare_initializers (struct loop
*loop
, vec
<chain_p
> chains
)
2355 for (i
= 0; i
< chains
.length (); )
2358 if (prepare_initializers_chain (loop
, chain
))
2362 release_chain (chain
);
2363 chains
.unordered_remove (i
);
2368 /* Performs predictive commoning for LOOP. Returns true if LOOP was
2372 tree_predictive_commoning_loop (struct loop
*loop
)
2374 vec
<data_reference_p
> datarefs
;
2375 vec
<ddr_p
> dependences
;
2376 struct component
*components
;
2377 vec
<chain_p
> chains
= vNULL
;
2378 unsigned unroll_factor
;
2379 struct tree_niter_desc desc
;
2380 bool unroll
= false;
2384 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2385 fprintf (dump_file
, "Processing loop %d\n", loop
->num
);
2387 /* Find the data references and split them into components according to their
2388 dependence relations. */
2389 stack_vec
<loop_p
, 3> loop_nest
;
2390 dependences
.create (10);
2391 datarefs
.create (10);
2392 if (! compute_data_dependences_for_loop (loop
, true, &loop_nest
, &datarefs
,
2395 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2396 fprintf (dump_file
, "Cannot analyze data dependencies\n");
2397 free_data_refs (datarefs
);
2398 free_dependence_relations (dependences
);
2402 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2403 dump_data_dependence_relations (dump_file
, dependences
);
2405 components
= split_data_refs_to_components (loop
, datarefs
, dependences
);
2406 loop_nest
.release ();
2407 free_dependence_relations (dependences
);
2410 free_data_refs (datarefs
);
2414 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2416 fprintf (dump_file
, "Initial state:\n\n");
2417 dump_components (dump_file
, components
);
2420 /* Find the suitable components and split them into chains. */
2421 components
= filter_suitable_components (loop
, components
);
2423 tmp_vars
= BITMAP_ALLOC (NULL
);
2424 looparound_phis
= BITMAP_ALLOC (NULL
);
2425 determine_roots (loop
, components
, &chains
);
2426 release_components (components
);
2428 if (!chains
.exists ())
2430 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2432 "Predictive commoning failed: no suitable chains\n");
2435 prepare_initializers (loop
, chains
);
2437 /* Try to combine the chains that are always worked with together. */
2438 try_combine_chains (&chains
);
2440 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2442 fprintf (dump_file
, "Before commoning:\n\n");
2443 dump_chains (dump_file
, chains
);
2446 /* Determine the unroll factor, and if the loop should be unrolled, ensure
2447 that its number of iterations is divisible by the factor. */
2448 unroll_factor
= determine_unroll_factor (chains
);
2450 unroll
= (unroll_factor
> 1
2451 && can_unroll_loop_p (loop
, unroll_factor
, &desc
));
2452 exit
= single_dom_exit (loop
);
2454 /* Execute the predictive commoning transformations, and possibly unroll the
2458 struct epcc_data dta
;
2460 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2461 fprintf (dump_file
, "Unrolling %u times.\n", unroll_factor
);
2463 dta
.chains
= chains
;
2464 dta
.tmp_vars
= tmp_vars
;
2466 update_ssa (TODO_update_ssa_only_virtuals
);
2468 /* Cfg manipulations performed in tree_transform_and_unroll_loop before
2469 execute_pred_commoning_cbck is called may cause phi nodes to be
2470 reallocated, which is a problem since CHAINS may point to these
2471 statements. To fix this, we store the ssa names defined by the
2472 phi nodes here instead of the phi nodes themselves, and restore
2473 the phi nodes in execute_pred_commoning_cbck. A bit hacky. */
2474 replace_phis_by_defined_names (chains
);
2476 tree_transform_and_unroll_loop (loop
, unroll_factor
, exit
, &desc
,
2477 execute_pred_commoning_cbck
, &dta
);
2478 eliminate_temp_copies (loop
, tmp_vars
);
2482 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2484 "Executing predictive commoning without unrolling.\n");
2485 execute_pred_commoning (loop
, chains
, tmp_vars
);
2489 release_chains (chains
);
2490 free_data_refs (datarefs
);
2491 BITMAP_FREE (tmp_vars
);
2492 BITMAP_FREE (looparound_phis
);
2494 free_affine_expand_cache (&name_expansions
);
2499 /* Runs predictive commoning. */
2502 tree_predictive_commoning (void)
2504 bool unrolled
= false;
2509 initialize_original_copy_tables ();
2510 FOR_EACH_LOOP (li
, loop
, LI_ONLY_INNERMOST
)
2511 if (optimize_loop_for_speed_p (loop
))
2513 unrolled
|= tree_predictive_commoning_loop (loop
);
2519 ret
= TODO_cleanup_cfg
;
2521 free_original_copy_tables ();
2526 /* Predictive commoning Pass. */
2529 run_tree_predictive_commoning (void)
2534 return tree_predictive_commoning ();
2538 gate_tree_predictive_commoning (void)
2540 return flag_predictive_commoning
!= 0;
2545 const pass_data pass_data_predcom
=
2547 GIMPLE_PASS
, /* type */
2549 OPTGROUP_LOOP
, /* optinfo_flags */
2550 true, /* has_gate */
2551 true, /* has_execute */
2552 TV_PREDCOM
, /* tv_id */
2553 PROP_cfg
, /* properties_required */
2554 0, /* properties_provided */
2555 0, /* properties_destroyed */
2556 0, /* todo_flags_start */
2557 TODO_update_ssa_only_virtuals
, /* todo_flags_finish */
2560 class pass_predcom
: public gimple_opt_pass
2563 pass_predcom (gcc::context
*ctxt
)
2564 : gimple_opt_pass (pass_data_predcom
, ctxt
)
2567 /* opt_pass methods: */
2568 bool gate () { return gate_tree_predictive_commoning (); }
2569 unsigned int execute () { return run_tree_predictive_commoning (); }
2571 }; // class pass_predcom
2576 make_pass_predcom (gcc::context
*ctxt
)
2578 return new pass_predcom (ctxt
);