1 /* Predictive commoning.
2 Copyright (C) 2005-2016 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This file implements the predictive commoning optimization. Predictive
21 commoning can be viewed as CSE around a loop, and with some improvements,
22 as generalized strength reduction-- i.e., reusing values computed in
23 earlier iterations of a loop in the later ones. So far, the pass only
24 handles the most useful case, that is, reusing values of memory references.
25 If you think this is all just a special case of PRE, you are sort of right;
26 however, concentrating on loops is simpler, and makes it possible to
27 incorporate data dependence analysis to detect the opportunities, perform
28 loop unrolling to avoid copies together with renaming immediately,
29 and if needed, we could also take register pressure into account.
31 Let us demonstrate what is done on an example:
33 for (i = 0; i < 100; i++)
35 a[i+2] = a[i] + a[i+1];
41 1) We find data references in the loop, and split them to mutually
42 independent groups (i.e., we find components of a data dependence
43 graph). We ignore read-read dependences whose distance is not constant.
44 (TODO -- we could also ignore antidependences). In this example, we
45 find the following groups:
47 a[i]{read}, a[i+1]{read}, a[i+2]{write}
48 b[10]{read}, b[10]{write}
49 c[99 - i]{read}, c[i]{write}
50 d[i + 1]{read}, d[i]{write}
52 2) Inside each of the group, we verify several conditions:
53 a) all the references must differ in indices only, and the indices
54 must all have the same step
55 b) the references must dominate loop latch (and thus, they must be
56 ordered by dominance relation).
57 c) the distance of the indices must be a small multiple of the step
58 We are then able to compute the difference of the references (# of
59 iterations before they point to the same place as the first of them).
60 Also, in case there are writes in the loop, we split the groups into
61 chains whose head is the write whose values are used by the reads in
62 the same chain. The chains are then processed independently,
63 making the further transformations simpler. Also, the shorter chains
64 need the same number of registers, but may require lower unrolling
65 factor in order to get rid of the copies on the loop latch.
67 In our example, we get the following chains (the chain for c is invalid).
69 a[i]{read,+0}, a[i+1]{read,-1}, a[i+2]{write,-2}
70 b[10]{read,+0}, b[10]{write,+0}
71 d[i + 1]{read,+0}, d[i]{write,+1}
73 3) For each read, we determine the read or write whose value it reuses,
74 together with the distance of this reuse. I.e. we take the last
75 reference before it with distance 0, or the last of the references
76 with the smallest positive distance to the read. Then, we remove
77 the references that are not used in any of these chains, discard the
78 empty groups, and propagate all the links so that they point to the
79 single root reference of the chain (adjusting their distance
80 appropriately). Some extra care needs to be taken for references with
81 step 0. In our example (the numbers indicate the distance of the
84 a[i] --> (*) 2, a[i+1] --> (*) 1, a[i+2] (*)
85 b[10] --> (*) 1, b[10] (*)
87 4) The chains are combined together if possible. If the corresponding
88 elements of two chains are always combined together with the same
89 operator, we remember just the result of this combination, instead
90 of remembering the values separately. We may need to perform
91 reassociation to enable combining, for example
93 e[i] + f[i+1] + e[i+1] + f[i]
95 can be reassociated as
97 (e[i] + f[i]) + (e[i+1] + f[i+1])
99 and we can combine the chains for e and f into one chain.
101 5) For each root reference (end of the chain) R, let N be maximum distance
102 of a reference reusing its value. Variables R0 up to RN are created,
103 together with phi nodes that transfer values from R1 .. RN to
105 Initial values are loaded to R0..R(N-1) (in case not all references
106 must necessarily be accessed and they may trap, we may fail here;
107 TODO sometimes, the loads could be guarded by a check for the number
108 of iterations). Values loaded/stored in roots are also copied to
109 RN. Other reads are replaced with the appropriate variable Ri.
110 Everything is put to SSA form.
112 As a small improvement, if R0 is dead after the root (i.e., all uses of
113 the value with the maximum distance dominate the root), we can avoid
114 creating RN and use R0 instead of it.
116 In our example, we get (only the parts concerning a and b are shown):
117 for (i = 0; i < 100; i++)
129 6) Factor F for unrolling is determined as the smallest common multiple of
130 (N + 1) for each root reference (N for references for that we avoided
131 creating RN). If F and the loop is small enough, loop is unrolled F
132 times. The stores to RN (R0) in the copies of the loop body are
133 periodically replaced with R0, R1, ... (R1, R2, ...), so that they can
134 be coalesced and the copies can be eliminated.
136 TODO -- copy propagation and other optimizations may change the live
137 ranges of the temporary registers and prevent them from being coalesced;
138 this may increase the register pressure.
140 In our case, F = 2 and the (main loop of the) result is
142 for (i = 0; i < ...; i += 2)
159 TODO -- stores killing other stores can be taken into account, e.g.,
160 for (i = 0; i < n; i++)
170 for (i = 0; i < n; i++)
180 The interesting part is that this would generalize store motion; still, since
181 sm is performed elsewhere, it does not seem that important.
183 Predictive commoning can be generalized for arbitrary computations (not
184 just memory loads), and also nontrivial transfer functions (e.g., replacing
185 i * i with ii_last + 2 * i + 1), to generalize strength reduction. */
189 #include "coretypes.h"
195 #include "tree-pass.h"
197 #include "gimple-pretty-print.h"
199 #include "fold-const.h"
202 #include "gimplify.h"
203 #include "gimple-iterator.h"
204 #include "gimplify-me.h"
205 #include "tree-ssa-loop-ivopts.h"
206 #include "tree-ssa-loop-manip.h"
207 #include "tree-ssa-loop-niter.h"
208 #include "tree-ssa-loop.h"
209 #include "tree-into-ssa.h"
210 #include "tree-dfa.h"
211 #include "tree-ssa.h"
212 #include "tree-data-ref.h"
213 #include "tree-scalar-evolution.h"
215 #include "tree-affine.h"
216 #include "builtins.h"
218 /* The maximum number of iterations between the considered memory
221 #define MAX_DISTANCE (target_avail_regs < 16 ? 4 : 8)
223 /* Data references (or phi nodes that carry data reference values across
226 typedef struct dref_d
228 /* The reference itself. */
229 struct data_reference
*ref
;
231 /* The statement in that the reference appears. */
234 /* In case that STMT is a phi node, this field is set to the SSA name
235 defined by it in replace_phis_by_defined_names (in order to avoid
236 pointing to phi node that got reallocated in the meantime). */
237 tree name_defined_by_phi
;
239 /* Distance of the reference from the root of the chain (in number of
240 iterations of the loop). */
243 /* Number of iterations offset from the first reference in the component. */
246 /* Number of the reference in a component, in dominance ordering. */
249 /* True if the memory reference is always accessed when the loop is
251 unsigned always_accessed
: 1;
255 /* Type of the chain of the references. */
259 /* The addresses of the references in the chain are constant. */
262 /* There are only loads in the chain. */
265 /* Root of the chain is store, the rest are loads. */
268 /* A combination of two chains. */
272 /* Chains of data references. */
276 /* Type of the chain. */
277 enum chain_type type
;
279 /* For combination chains, the operator and the two chains that are
280 combined, and the type of the result. */
283 struct chain
*ch1
, *ch2
;
285 /* The references in the chain. */
288 /* The maximum distance of the reference in the chain from the root. */
291 /* The variables used to copy the value throughout iterations. */
294 /* Initializers for the variables. */
297 /* True if there is a use of a variable with the maximal distance
298 that comes after the root in the loop. */
299 unsigned has_max_use_after
: 1;
301 /* True if all the memory references in the chain are always accessed. */
302 unsigned all_always_accessed
: 1;
304 /* True if this chain was combined together with some other chain. */
305 unsigned combined
: 1;
309 /* Describes the knowledge about the step of the memory references in
314 /* The step is zero. */
317 /* The step is nonzero. */
320 /* The step may or may not be nonzero. */
324 /* Components of the data dependence graph. */
328 /* The references in the component. */
331 /* What we know about the step of the references in the component. */
332 enum ref_step_type comp_step
;
334 /* Next component in the list. */
335 struct component
*next
;
338 /* Bitmap of ssa names defined by looparound phi nodes covered by chains. */
340 static bitmap looparound_phis
;
342 /* Cache used by tree_to_aff_combination_expand. */
344 static hash_map
<tree
, name_expansion
*> *name_expansions
;
346 /* Dumps data reference REF to FILE. */
348 extern void dump_dref (FILE *, dref
);
350 dump_dref (FILE *file
, dref ref
)
355 print_generic_expr (file
, DR_REF (ref
->ref
), TDF_SLIM
);
356 fprintf (file
, " (id %u%s)\n", ref
->pos
,
357 DR_IS_READ (ref
->ref
) ? "" : ", write");
359 fprintf (file
, " offset ");
360 print_decs (ref
->offset
, file
);
361 fprintf (file
, "\n");
363 fprintf (file
, " distance %u\n", ref
->distance
);
367 if (gimple_code (ref
->stmt
) == GIMPLE_PHI
)
368 fprintf (file
, " looparound ref\n");
370 fprintf (file
, " combination ref\n");
371 fprintf (file
, " in statement ");
372 print_gimple_stmt (file
, ref
->stmt
, 0, TDF_SLIM
);
373 fprintf (file
, "\n");
374 fprintf (file
, " distance %u\n", ref
->distance
);
379 /* Dumps CHAIN to FILE. */
381 extern void dump_chain (FILE *, chain_p
);
383 dump_chain (FILE *file
, chain_p chain
)
386 const char *chain_type
;
393 chain_type
= "Load motion";
397 chain_type
= "Loads-only";
401 chain_type
= "Store-loads";
405 chain_type
= "Combination";
412 fprintf (file
, "%s chain %p%s\n", chain_type
, (void *) chain
,
413 chain
->combined
? " (combined)" : "");
414 if (chain
->type
!= CT_INVARIANT
)
415 fprintf (file
, " max distance %u%s\n", chain
->length
,
416 chain
->has_max_use_after
? "" : ", may reuse first");
418 if (chain
->type
== CT_COMBINATION
)
420 fprintf (file
, " equal to %p %s %p in type ",
421 (void *) chain
->ch1
, op_symbol_code (chain
->op
),
422 (void *) chain
->ch2
);
423 print_generic_expr (file
, chain
->rslt_type
, TDF_SLIM
);
424 fprintf (file
, "\n");
427 if (chain
->vars
.exists ())
429 fprintf (file
, " vars");
430 FOR_EACH_VEC_ELT (chain
->vars
, i
, var
)
433 print_generic_expr (file
, var
, TDF_SLIM
);
435 fprintf (file
, "\n");
438 if (chain
->inits
.exists ())
440 fprintf (file
, " inits");
441 FOR_EACH_VEC_ELT (chain
->inits
, i
, var
)
444 print_generic_expr (file
, var
, TDF_SLIM
);
446 fprintf (file
, "\n");
449 fprintf (file
, " references:\n");
450 FOR_EACH_VEC_ELT (chain
->refs
, i
, a
)
453 fprintf (file
, "\n");
456 /* Dumps CHAINS to FILE. */
458 extern void dump_chains (FILE *, vec
<chain_p
> );
460 dump_chains (FILE *file
, vec
<chain_p
> chains
)
465 FOR_EACH_VEC_ELT (chains
, i
, chain
)
466 dump_chain (file
, chain
);
469 /* Dumps COMP to FILE. */
471 extern void dump_component (FILE *, struct component
*);
473 dump_component (FILE *file
, struct component
*comp
)
478 fprintf (file
, "Component%s:\n",
479 comp
->comp_step
== RS_INVARIANT
? " (invariant)" : "");
480 FOR_EACH_VEC_ELT (comp
->refs
, i
, a
)
482 fprintf (file
, "\n");
485 /* Dumps COMPS to FILE. */
487 extern void dump_components (FILE *, struct component
*);
489 dump_components (FILE *file
, struct component
*comps
)
491 struct component
*comp
;
493 for (comp
= comps
; comp
; comp
= comp
->next
)
494 dump_component (file
, comp
);
497 /* Frees a chain CHAIN. */
500 release_chain (chain_p chain
)
508 FOR_EACH_VEC_ELT (chain
->refs
, i
, ref
)
511 chain
->refs
.release ();
512 chain
->vars
.release ();
513 chain
->inits
.release ();
521 release_chains (vec
<chain_p
> chains
)
526 FOR_EACH_VEC_ELT (chains
, i
, chain
)
527 release_chain (chain
);
531 /* Frees a component COMP. */
534 release_component (struct component
*comp
)
536 comp
->refs
.release ();
540 /* Frees list of components COMPS. */
543 release_components (struct component
*comps
)
545 struct component
*act
, *next
;
547 for (act
= comps
; act
; act
= next
)
550 release_component (act
);
554 /* Finds a root of tree given by FATHERS containing A, and performs path
558 component_of (unsigned fathers
[], unsigned a
)
562 for (root
= a
; root
!= fathers
[root
]; root
= fathers
[root
])
565 for (; a
!= root
; a
= n
)
574 /* Join operation for DFU. FATHERS gives the tree, SIZES are sizes of the
575 components, A and B are components to merge. */
578 merge_comps (unsigned fathers
[], unsigned sizes
[], unsigned a
, unsigned b
)
580 unsigned ca
= component_of (fathers
, a
);
581 unsigned cb
= component_of (fathers
, b
);
586 if (sizes
[ca
] < sizes
[cb
])
588 sizes
[cb
] += sizes
[ca
];
593 sizes
[ca
] += sizes
[cb
];
598 /* Returns true if A is a reference that is suitable for predictive commoning
599 in the innermost loop that contains it. REF_STEP is set according to the
600 step of the reference A. */
603 suitable_reference_p (struct data_reference
*a
, enum ref_step_type
*ref_step
)
605 tree ref
= DR_REF (a
), step
= DR_STEP (a
);
608 || TREE_THIS_VOLATILE (ref
)
609 || !is_gimple_reg_type (TREE_TYPE (ref
))
610 || tree_could_throw_p (ref
))
613 if (integer_zerop (step
))
614 *ref_step
= RS_INVARIANT
;
615 else if (integer_nonzerop (step
))
616 *ref_step
= RS_NONZERO
;
623 /* Stores DR_OFFSET (DR) + DR_INIT (DR) to OFFSET. */
626 aff_combination_dr_offset (struct data_reference
*dr
, aff_tree
*offset
)
628 tree type
= TREE_TYPE (DR_OFFSET (dr
));
631 tree_to_aff_combination_expand (DR_OFFSET (dr
), type
, offset
,
633 aff_combination_const (&delta
, type
, wi::to_widest (DR_INIT (dr
)));
634 aff_combination_add (offset
, &delta
);
637 /* Determines number of iterations of the innermost enclosing loop before B
638 refers to exactly the same location as A and stores it to OFF. If A and
639 B do not have the same step, they never meet, or anything else fails,
640 returns false, otherwise returns true. Both A and B are assumed to
641 satisfy suitable_reference_p. */
644 determine_offset (struct data_reference
*a
, struct data_reference
*b
,
647 aff_tree diff
, baseb
, step
;
650 /* Check that both the references access the location in the same type. */
651 typea
= TREE_TYPE (DR_REF (a
));
652 typeb
= TREE_TYPE (DR_REF (b
));
653 if (!useless_type_conversion_p (typeb
, typea
))
656 /* Check whether the base address and the step of both references is the
658 if (!operand_equal_p (DR_STEP (a
), DR_STEP (b
), 0)
659 || !operand_equal_p (DR_BASE_ADDRESS (a
), DR_BASE_ADDRESS (b
), 0))
662 if (integer_zerop (DR_STEP (a
)))
664 /* If the references have loop invariant address, check that they access
665 exactly the same location. */
667 return (operand_equal_p (DR_OFFSET (a
), DR_OFFSET (b
), 0)
668 && operand_equal_p (DR_INIT (a
), DR_INIT (b
), 0));
671 /* Compare the offsets of the addresses, and check whether the difference
672 is a multiple of step. */
673 aff_combination_dr_offset (a
, &diff
);
674 aff_combination_dr_offset (b
, &baseb
);
675 aff_combination_scale (&baseb
, -1);
676 aff_combination_add (&diff
, &baseb
);
678 tree_to_aff_combination_expand (DR_STEP (a
), TREE_TYPE (DR_STEP (a
)),
679 &step
, &name_expansions
);
680 return aff_combination_constant_multiple_p (&diff
, &step
, off
);
683 /* Returns the last basic block in LOOP for that we are sure that
684 it is executed whenever the loop is entered. */
687 last_always_executed_block (struct loop
*loop
)
690 vec
<edge
> exits
= get_loop_exit_edges (loop
);
692 basic_block last
= loop
->latch
;
694 FOR_EACH_VEC_ELT (exits
, i
, ex
)
695 last
= nearest_common_dominator (CDI_DOMINATORS
, last
, ex
->src
);
701 /* Splits dependence graph on DATAREFS described by DEPENDS to components. */
703 static struct component
*
704 split_data_refs_to_components (struct loop
*loop
,
705 vec
<data_reference_p
> datarefs
,
708 unsigned i
, n
= datarefs
.length ();
709 unsigned ca
, ia
, ib
, bad
;
710 unsigned *comp_father
= XNEWVEC (unsigned, n
+ 1);
711 unsigned *comp_size
= XNEWVEC (unsigned, n
+ 1);
712 struct component
**comps
;
713 struct data_reference
*dr
, *dra
, *drb
;
714 struct data_dependence_relation
*ddr
;
715 struct component
*comp_list
= NULL
, *comp
;
717 basic_block last_always_executed
= last_always_executed_block (loop
);
719 FOR_EACH_VEC_ELT (datarefs
, i
, dr
)
723 /* A fake reference for call or asm_expr that may clobber memory;
727 /* predcom pass isn't prepared to handle calls with data references. */
728 if (is_gimple_call (DR_STMT (dr
)))
730 dr
->aux
= (void *) (size_t) i
;
735 /* A component reserved for the "bad" data references. */
739 FOR_EACH_VEC_ELT (datarefs
, i
, dr
)
741 enum ref_step_type dummy
;
743 if (!suitable_reference_p (dr
, &dummy
))
745 ia
= (unsigned) (size_t) dr
->aux
;
746 merge_comps (comp_father
, comp_size
, n
, ia
);
750 FOR_EACH_VEC_ELT (depends
, i
, ddr
)
752 widest_int dummy_off
;
754 if (DDR_ARE_DEPENDENT (ddr
) == chrec_known
)
759 ia
= component_of (comp_father
, (unsigned) (size_t) dra
->aux
);
760 ib
= component_of (comp_father
, (unsigned) (size_t) drb
->aux
);
764 bad
= component_of (comp_father
, n
);
766 /* If both A and B are reads, we may ignore unsuitable dependences. */
767 if (DR_IS_READ (dra
) && DR_IS_READ (drb
))
769 if (ia
== bad
|| ib
== bad
770 || !determine_offset (dra
, drb
, &dummy_off
))
773 /* If A is read and B write or vice versa and there is unsuitable
774 dependence, instead of merging both components into a component
775 that will certainly not pass suitable_component_p, just put the
776 read into bad component, perhaps at least the write together with
777 all the other data refs in it's component will be optimizable. */
778 else if (DR_IS_READ (dra
) && ib
!= bad
)
782 else if (!determine_offset (dra
, drb
, &dummy_off
))
784 merge_comps (comp_father
, comp_size
, bad
, ia
);
788 else if (DR_IS_READ (drb
) && ia
!= bad
)
792 else if (!determine_offset (dra
, drb
, &dummy_off
))
794 merge_comps (comp_father
, comp_size
, bad
, ib
);
799 merge_comps (comp_father
, comp_size
, ia
, ib
);
802 comps
= XCNEWVEC (struct component
*, n
);
803 bad
= component_of (comp_father
, n
);
804 FOR_EACH_VEC_ELT (datarefs
, i
, dr
)
806 ia
= (unsigned) (size_t) dr
->aux
;
807 ca
= component_of (comp_father
, ia
);
814 comp
= XCNEW (struct component
);
815 comp
->refs
.create (comp_size
[ca
]);
819 dataref
= XCNEW (struct dref_d
);
821 dataref
->stmt
= DR_STMT (dr
);
823 dataref
->distance
= 0;
825 dataref
->always_accessed
826 = dominated_by_p (CDI_DOMINATORS
, last_always_executed
,
827 gimple_bb (dataref
->stmt
));
828 dataref
->pos
= comp
->refs
.length ();
829 comp
->refs
.quick_push (dataref
);
832 for (i
= 0; i
< n
; i
++)
837 comp
->next
= comp_list
;
849 /* Returns true if the component COMP satisfies the conditions
850 described in 2) at the beginning of this file. LOOP is the current
854 suitable_component_p (struct loop
*loop
, struct component
*comp
)
858 basic_block ba
, bp
= loop
->header
;
859 bool ok
, has_write
= false;
861 FOR_EACH_VEC_ELT (comp
->refs
, i
, a
)
863 ba
= gimple_bb (a
->stmt
);
865 if (!just_once_each_iteration_p (loop
, ba
))
868 gcc_assert (dominated_by_p (CDI_DOMINATORS
, ba
, bp
));
871 if (DR_IS_WRITE (a
->ref
))
875 first
= comp
->refs
[0];
876 ok
= suitable_reference_p (first
->ref
, &comp
->comp_step
);
880 for (i
= 1; comp
->refs
.iterate (i
, &a
); i
++)
882 if (!determine_offset (first
->ref
, a
->ref
, &a
->offset
))
885 enum ref_step_type a_step
;
886 gcc_checking_assert (suitable_reference_p (a
->ref
, &a_step
)
887 && a_step
== comp
->comp_step
);
890 /* If there is a write inside the component, we must know whether the
891 step is nonzero or not -- we would not otherwise be able to recognize
892 whether the value accessed by reads comes from the OFFSET-th iteration
893 or the previous one. */
894 if (has_write
&& comp
->comp_step
== RS_ANY
)
900 /* Check the conditions on references inside each of components COMPS,
901 and remove the unsuitable components from the list. The new list
902 of components is returned. The conditions are described in 2) at
903 the beginning of this file. LOOP is the current loop. */
905 static struct component
*
906 filter_suitable_components (struct loop
*loop
, struct component
*comps
)
908 struct component
**comp
, *act
;
910 for (comp
= &comps
; *comp
; )
913 if (suitable_component_p (loop
, act
))
921 FOR_EACH_VEC_ELT (act
->refs
, i
, ref
)
923 release_component (act
);
930 /* Compares two drefs A and B by their offset and position. Callback for
934 order_drefs (const void *a
, const void *b
)
936 const dref
*const da
= (const dref
*) a
;
937 const dref
*const db
= (const dref
*) b
;
938 int offcmp
= wi::cmps ((*da
)->offset
, (*db
)->offset
);
943 return (*da
)->pos
- (*db
)->pos
;
946 /* Returns root of the CHAIN. */
949 get_chain_root (chain_p chain
)
951 return chain
->refs
[0];
954 /* Adds REF to the chain CHAIN. */
957 add_ref_to_chain (chain_p chain
, dref ref
)
959 dref root
= get_chain_root (chain
);
961 gcc_assert (wi::les_p (root
->offset
, ref
->offset
));
962 widest_int dist
= ref
->offset
- root
->offset
;
963 if (wi::leu_p (MAX_DISTANCE
, dist
))
968 gcc_assert (wi::fits_uhwi_p (dist
));
970 chain
->refs
.safe_push (ref
);
972 ref
->distance
= dist
.to_uhwi ();
974 if (ref
->distance
>= chain
->length
)
976 chain
->length
= ref
->distance
;
977 chain
->has_max_use_after
= false;
980 if (ref
->distance
== chain
->length
981 && ref
->pos
> root
->pos
)
982 chain
->has_max_use_after
= true;
984 chain
->all_always_accessed
&= ref
->always_accessed
;
987 /* Returns the chain for invariant component COMP. */
990 make_invariant_chain (struct component
*comp
)
992 chain_p chain
= XCNEW (struct chain
);
996 chain
->type
= CT_INVARIANT
;
998 chain
->all_always_accessed
= true;
1000 FOR_EACH_VEC_ELT (comp
->refs
, i
, ref
)
1002 chain
->refs
.safe_push (ref
);
1003 chain
->all_always_accessed
&= ref
->always_accessed
;
1009 /* Make a new chain rooted at REF. */
1012 make_rooted_chain (dref ref
)
1014 chain_p chain
= XCNEW (struct chain
);
1016 chain
->type
= DR_IS_READ (ref
->ref
) ? CT_LOAD
: CT_STORE_LOAD
;
1018 chain
->refs
.safe_push (ref
);
1019 chain
->all_always_accessed
= ref
->always_accessed
;
1026 /* Returns true if CHAIN is not trivial. */
1029 nontrivial_chain_p (chain_p chain
)
1031 return chain
!= NULL
&& chain
->refs
.length () > 1;
1034 /* Returns the ssa name that contains the value of REF, or NULL_TREE if there
1038 name_for_ref (dref ref
)
1042 if (is_gimple_assign (ref
->stmt
))
1044 if (!ref
->ref
|| DR_IS_READ (ref
->ref
))
1045 name
= gimple_assign_lhs (ref
->stmt
);
1047 name
= gimple_assign_rhs1 (ref
->stmt
);
1050 name
= PHI_RESULT (ref
->stmt
);
1052 return (TREE_CODE (name
) == SSA_NAME
? name
: NULL_TREE
);
1055 /* Returns true if REF is a valid initializer for ROOT with given DISTANCE (in
1056 iterations of the innermost enclosing loop). */
1059 valid_initializer_p (struct data_reference
*ref
,
1060 unsigned distance
, struct data_reference
*root
)
1062 aff_tree diff
, base
, step
;
1065 /* Both REF and ROOT must be accessing the same object. */
1066 if (!operand_equal_p (DR_BASE_ADDRESS (ref
), DR_BASE_ADDRESS (root
), 0))
1069 /* The initializer is defined outside of loop, hence its address must be
1070 invariant inside the loop. */
1071 gcc_assert (integer_zerop (DR_STEP (ref
)));
1073 /* If the address of the reference is invariant, initializer must access
1074 exactly the same location. */
1075 if (integer_zerop (DR_STEP (root
)))
1076 return (operand_equal_p (DR_OFFSET (ref
), DR_OFFSET (root
), 0)
1077 && operand_equal_p (DR_INIT (ref
), DR_INIT (root
), 0));
1079 /* Verify that this index of REF is equal to the root's index at
1080 -DISTANCE-th iteration. */
1081 aff_combination_dr_offset (root
, &diff
);
1082 aff_combination_dr_offset (ref
, &base
);
1083 aff_combination_scale (&base
, -1);
1084 aff_combination_add (&diff
, &base
);
1086 tree_to_aff_combination_expand (DR_STEP (root
), TREE_TYPE (DR_STEP (root
)),
1087 &step
, &name_expansions
);
1088 if (!aff_combination_constant_multiple_p (&diff
, &step
, &off
))
1091 if (off
!= distance
)
1097 /* Finds looparound phi node of LOOP that copies the value of REF, and if its
1098 initial value is correct (equal to initial value of REF shifted by one
1099 iteration), returns the phi node. Otherwise, NULL_TREE is returned. ROOT
1100 is the root of the current chain. */
1103 find_looparound_phi (struct loop
*loop
, dref ref
, dref root
)
1105 tree name
, init
, init_ref
;
1108 edge latch
= loop_latch_edge (loop
);
1109 struct data_reference init_dr
;
1112 if (is_gimple_assign (ref
->stmt
))
1114 if (DR_IS_READ (ref
->ref
))
1115 name
= gimple_assign_lhs (ref
->stmt
);
1117 name
= gimple_assign_rhs1 (ref
->stmt
);
1120 name
= PHI_RESULT (ref
->stmt
);
1124 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
1127 if (PHI_ARG_DEF_FROM_EDGE (phi
, latch
) == name
)
1131 if (gsi_end_p (psi
))
1134 init
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_preheader_edge (loop
));
1135 if (TREE_CODE (init
) != SSA_NAME
)
1137 init_stmt
= SSA_NAME_DEF_STMT (init
);
1138 if (gimple_code (init_stmt
) != GIMPLE_ASSIGN
)
1140 gcc_assert (gimple_assign_lhs (init_stmt
) == init
);
1142 init_ref
= gimple_assign_rhs1 (init_stmt
);
1143 if (!REFERENCE_CLASS_P (init_ref
)
1144 && !DECL_P (init_ref
))
1147 /* Analyze the behavior of INIT_REF with respect to LOOP (innermost
1148 loop enclosing PHI). */
1149 memset (&init_dr
, 0, sizeof (struct data_reference
));
1150 DR_REF (&init_dr
) = init_ref
;
1151 DR_STMT (&init_dr
) = phi
;
1152 if (!dr_analyze_innermost (&init_dr
, loop
))
1155 if (!valid_initializer_p (&init_dr
, ref
->distance
+ 1, root
->ref
))
1161 /* Adds a reference for the looparound copy of REF in PHI to CHAIN. */
1164 insert_looparound_copy (chain_p chain
, dref ref
, gphi
*phi
)
1166 dref nw
= XCNEW (struct dref_d
), aref
;
1170 nw
->distance
= ref
->distance
+ 1;
1171 nw
->always_accessed
= 1;
1173 FOR_EACH_VEC_ELT (chain
->refs
, i
, aref
)
1174 if (aref
->distance
>= nw
->distance
)
1176 chain
->refs
.safe_insert (i
, nw
);
1178 if (nw
->distance
> chain
->length
)
1180 chain
->length
= nw
->distance
;
1181 chain
->has_max_use_after
= false;
1185 /* For references in CHAIN that are copied around the LOOP (created previously
1186 by PRE, or by user), add the results of such copies to the chain. This
1187 enables us to remove the copies by unrolling, and may need less registers
1188 (also, it may allow us to combine chains together). */
1191 add_looparound_copies (struct loop
*loop
, chain_p chain
)
1194 dref ref
, root
= get_chain_root (chain
);
1197 FOR_EACH_VEC_ELT (chain
->refs
, i
, ref
)
1199 phi
= find_looparound_phi (loop
, ref
, root
);
1203 bitmap_set_bit (looparound_phis
, SSA_NAME_VERSION (PHI_RESULT (phi
)));
1204 insert_looparound_copy (chain
, ref
, phi
);
1208 /* Find roots of the values and determine distances in the component COMP.
1209 The references are redistributed into CHAINS. LOOP is the current
1213 determine_roots_comp (struct loop
*loop
,
1214 struct component
*comp
,
1215 vec
<chain_p
> *chains
)
1219 chain_p chain
= NULL
;
1220 widest_int last_ofs
= 0;
1222 /* Invariants are handled specially. */
1223 if (comp
->comp_step
== RS_INVARIANT
)
1225 chain
= make_invariant_chain (comp
);
1226 chains
->safe_push (chain
);
1230 comp
->refs
.qsort (order_drefs
);
1232 FOR_EACH_VEC_ELT (comp
->refs
, i
, a
)
1234 if (!chain
|| DR_IS_WRITE (a
->ref
)
1235 || wi::leu_p (MAX_DISTANCE
, a
->offset
- last_ofs
))
1237 if (nontrivial_chain_p (chain
))
1239 add_looparound_copies (loop
, chain
);
1240 chains
->safe_push (chain
);
1243 release_chain (chain
);
1244 chain
= make_rooted_chain (a
);
1245 last_ofs
= a
->offset
;
1249 add_ref_to_chain (chain
, a
);
1252 if (nontrivial_chain_p (chain
))
1254 add_looparound_copies (loop
, chain
);
1255 chains
->safe_push (chain
);
1258 release_chain (chain
);
1261 /* Find roots of the values and determine distances in components COMPS, and
1262 separates the references to CHAINS. LOOP is the current loop. */
1265 determine_roots (struct loop
*loop
,
1266 struct component
*comps
, vec
<chain_p
> *chains
)
1268 struct component
*comp
;
1270 for (comp
= comps
; comp
; comp
= comp
->next
)
1271 determine_roots_comp (loop
, comp
, chains
);
1274 /* Replace the reference in statement STMT with temporary variable
1275 NEW_TREE. If SET is true, NEW_TREE is instead initialized to the value of
1276 the reference in the statement. IN_LHS is true if the reference
1277 is in the lhs of STMT, false if it is in rhs. */
1280 replace_ref_with (gimple
*stmt
, tree new_tree
, bool set
, bool in_lhs
)
1284 gimple_stmt_iterator bsi
, psi
;
1286 if (gimple_code (stmt
) == GIMPLE_PHI
)
1288 gcc_assert (!in_lhs
&& !set
);
1290 val
= PHI_RESULT (stmt
);
1291 bsi
= gsi_after_labels (gimple_bb (stmt
));
1292 psi
= gsi_for_stmt (stmt
);
1293 remove_phi_node (&psi
, false);
1295 /* Turn the phi node into GIMPLE_ASSIGN. */
1296 new_stmt
= gimple_build_assign (val
, new_tree
);
1297 gsi_insert_before (&bsi
, new_stmt
, GSI_NEW_STMT
);
1301 /* Since the reference is of gimple_reg type, it should only
1302 appear as lhs or rhs of modify statement. */
1303 gcc_assert (is_gimple_assign (stmt
));
1305 bsi
= gsi_for_stmt (stmt
);
1307 /* If we do not need to initialize NEW_TREE, just replace the use of OLD. */
1310 gcc_assert (!in_lhs
);
1311 gimple_assign_set_rhs_from_tree (&bsi
, new_tree
);
1312 stmt
= gsi_stmt (bsi
);
1319 /* We have statement
1323 If OLD is a memory reference, then VAL is gimple_val, and we transform
1329 Otherwise, we are replacing a combination chain,
1330 VAL is the expression that performs the combination, and OLD is an
1331 SSA name. In this case, we transform the assignment to
1338 val
= gimple_assign_lhs (stmt
);
1339 if (TREE_CODE (val
) != SSA_NAME
)
1341 val
= gimple_assign_rhs1 (stmt
);
1342 gcc_assert (gimple_assign_single_p (stmt
));
1343 if (TREE_CLOBBER_P (val
))
1344 val
= get_or_create_ssa_default_def (cfun
, SSA_NAME_VAR (new_tree
));
1346 gcc_assert (gimple_assign_copy_p (stmt
));
1358 val
= gimple_assign_lhs (stmt
);
1361 new_stmt
= gimple_build_assign (new_tree
, unshare_expr (val
));
1362 gsi_insert_after (&bsi
, new_stmt
, GSI_NEW_STMT
);
1365 /* Returns a memory reference to DR in the ITER-th iteration of
1366 the loop it was analyzed in. Append init stmts to STMTS. */
1369 ref_at_iteration (data_reference_p dr
, int iter
, gimple_seq
*stmts
)
1371 tree off
= DR_OFFSET (dr
);
1372 tree coff
= DR_INIT (dr
);
1373 tree ref
= DR_REF (dr
);
1374 enum tree_code ref_code
= ERROR_MARK
;
1375 tree ref_type
= NULL_TREE
;
1376 tree ref_op1
= NULL_TREE
;
1377 tree ref_op2
= NULL_TREE
;
1380 else if (TREE_CODE (DR_STEP (dr
)) == INTEGER_CST
)
1381 coff
= size_binop (PLUS_EXPR
, coff
,
1382 size_binop (MULT_EXPR
, DR_STEP (dr
), ssize_int (iter
)));
1384 off
= size_binop (PLUS_EXPR
, off
,
1385 size_binop (MULT_EXPR
, DR_STEP (dr
), ssize_int (iter
)));
1386 /* While data-ref analysis punts on bit offsets it still handles
1387 bitfield accesses at byte boundaries. Cope with that. Note that
1388 if the bitfield object also starts at a byte-boundary we can simply
1389 replicate the COMPONENT_REF, but we have to subtract the component's
1390 byte-offset from the MEM_REF address first.
1391 Otherwise we simply build a BIT_FIELD_REF knowing that the bits
1392 start at offset zero. */
1393 if (TREE_CODE (ref
) == COMPONENT_REF
1394 && DECL_BIT_FIELD (TREE_OPERAND (ref
, 1)))
1396 unsigned HOST_WIDE_INT boff
;
1397 tree field
= TREE_OPERAND (ref
, 1);
1398 tree offset
= component_ref_field_offset (ref
);
1399 ref_type
= TREE_TYPE (ref
);
1400 boff
= tree_to_uhwi (DECL_FIELD_BIT_OFFSET (field
));
1401 /* This can occur in Ada. See the comment in get_bit_range. */
1402 if (boff
% BITS_PER_UNIT
!= 0
1403 || !tree_fits_uhwi_p (offset
))
1405 ref_code
= BIT_FIELD_REF
;
1406 ref_op1
= DECL_SIZE (field
);
1407 ref_op2
= bitsize_zero_node
;
1411 boff
>>= LOG2_BITS_PER_UNIT
;
1412 boff
+= tree_to_uhwi (offset
);
1413 coff
= size_binop (MINUS_EXPR
, coff
, ssize_int (boff
));
1414 ref_code
= COMPONENT_REF
;
1416 ref_op2
= TREE_OPERAND (ref
, 2);
1417 ref
= TREE_OPERAND (ref
, 0);
1420 tree addr
= fold_build_pointer_plus (DR_BASE_ADDRESS (dr
), off
);
1421 addr
= force_gimple_operand_1 (unshare_expr (addr
), stmts
,
1422 is_gimple_mem_ref_addr
, NULL_TREE
);
1423 tree alias_ptr
= fold_convert (reference_alias_ptr_type (ref
), coff
);
1424 tree type
= build_aligned_type (TREE_TYPE (ref
),
1425 get_object_alignment (ref
));
1426 ref
= build2 (MEM_REF
, type
, addr
, alias_ptr
);
1428 ref
= build3 (ref_code
, ref_type
, ref
, ref_op1
, ref_op2
);
1432 /* Get the initialization expression for the INDEX-th temporary variable
1436 get_init_expr (chain_p chain
, unsigned index
)
1438 if (chain
->type
== CT_COMBINATION
)
1440 tree e1
= get_init_expr (chain
->ch1
, index
);
1441 tree e2
= get_init_expr (chain
->ch2
, index
);
1443 return fold_build2 (chain
->op
, chain
->rslt_type
, e1
, e2
);
1446 return chain
->inits
[index
];
1449 /* Returns a new temporary variable used for the I-th variable carrying
1450 value of REF. The variable's uid is marked in TMP_VARS. */
1453 predcom_tmp_var (tree ref
, unsigned i
, bitmap tmp_vars
)
1455 tree type
= TREE_TYPE (ref
);
1456 /* We never access the components of the temporary variable in predictive
1458 tree var
= create_tmp_reg (type
, get_lsm_tmp_name (ref
, i
));
1459 bitmap_set_bit (tmp_vars
, DECL_UID (var
));
1463 /* Creates the variables for CHAIN, as well as phi nodes for them and
1464 initialization on entry to LOOP. Uids of the newly created
1465 temporary variables are marked in TMP_VARS. */
1468 initialize_root_vars (struct loop
*loop
, chain_p chain
, bitmap tmp_vars
)
1471 unsigned n
= chain
->length
;
1472 dref root
= get_chain_root (chain
);
1473 bool reuse_first
= !chain
->has_max_use_after
;
1474 tree ref
, init
, var
, next
;
1477 edge entry
= loop_preheader_edge (loop
), latch
= loop_latch_edge (loop
);
1479 /* If N == 0, then all the references are within the single iteration. And
1480 since this is an nonempty chain, reuse_first cannot be true. */
1481 gcc_assert (n
> 0 || !reuse_first
);
1483 chain
->vars
.create (n
+ 1);
1485 if (chain
->type
== CT_COMBINATION
)
1486 ref
= gimple_assign_lhs (root
->stmt
);
1488 ref
= DR_REF (root
->ref
);
1490 for (i
= 0; i
< n
+ (reuse_first
? 0 : 1); i
++)
1492 var
= predcom_tmp_var (ref
, i
, tmp_vars
);
1493 chain
->vars
.quick_push (var
);
1496 chain
->vars
.quick_push (chain
->vars
[0]);
1498 FOR_EACH_VEC_ELT (chain
->vars
, i
, var
)
1499 chain
->vars
[i
] = make_ssa_name (var
);
1501 for (i
= 0; i
< n
; i
++)
1503 var
= chain
->vars
[i
];
1504 next
= chain
->vars
[i
+ 1];
1505 init
= get_init_expr (chain
, i
);
1507 init
= force_gimple_operand (init
, &stmts
, true, NULL_TREE
);
1509 gsi_insert_seq_on_edge_immediate (entry
, stmts
);
1511 phi
= create_phi_node (var
, loop
->header
);
1512 add_phi_arg (phi
, init
, entry
, UNKNOWN_LOCATION
);
1513 add_phi_arg (phi
, next
, latch
, UNKNOWN_LOCATION
);
1517 /* Create the variables and initialization statement for root of chain
1518 CHAIN. Uids of the newly created temporary variables are marked
1522 initialize_root (struct loop
*loop
, chain_p chain
, bitmap tmp_vars
)
1524 dref root
= get_chain_root (chain
);
1525 bool in_lhs
= (chain
->type
== CT_STORE_LOAD
1526 || chain
->type
== CT_COMBINATION
);
1528 initialize_root_vars (loop
, chain
, tmp_vars
);
1529 replace_ref_with (root
->stmt
,
1530 chain
->vars
[chain
->length
],
1534 /* Initializes a variable for load motion for ROOT and prepares phi nodes and
1535 initialization on entry to LOOP if necessary. The ssa name for the variable
1536 is stored in VARS. If WRITTEN is true, also a phi node to copy its value
1537 around the loop is created. Uid of the newly created temporary variable
1538 is marked in TMP_VARS. INITS is the list containing the (single)
1542 initialize_root_vars_lm (struct loop
*loop
, dref root
, bool written
,
1543 vec
<tree
> *vars
, vec
<tree
> inits
,
1547 tree ref
= DR_REF (root
->ref
), init
, var
, next
;
1550 edge entry
= loop_preheader_edge (loop
), latch
= loop_latch_edge (loop
);
1552 /* Find the initializer for the variable, and check that it cannot
1556 vars
->create (written
? 2 : 1);
1557 var
= predcom_tmp_var (ref
, 0, tmp_vars
);
1558 vars
->quick_push (var
);
1560 vars
->quick_push ((*vars
)[0]);
1562 FOR_EACH_VEC_ELT (*vars
, i
, var
)
1563 (*vars
)[i
] = make_ssa_name (var
);
1567 init
= force_gimple_operand (init
, &stmts
, written
, NULL_TREE
);
1569 gsi_insert_seq_on_edge_immediate (entry
, stmts
);
1574 phi
= create_phi_node (var
, loop
->header
);
1575 add_phi_arg (phi
, init
, entry
, UNKNOWN_LOCATION
);
1576 add_phi_arg (phi
, next
, latch
, UNKNOWN_LOCATION
);
1580 gassign
*init_stmt
= gimple_build_assign (var
, init
);
1581 gsi_insert_on_edge_immediate (entry
, init_stmt
);
1586 /* Execute load motion for references in chain CHAIN. Uids of the newly
1587 created temporary variables are marked in TMP_VARS. */
1590 execute_load_motion (struct loop
*loop
, chain_p chain
, bitmap tmp_vars
)
1592 auto_vec
<tree
> vars
;
1594 unsigned n_writes
= 0, ridx
, i
;
1597 gcc_assert (chain
->type
== CT_INVARIANT
);
1598 gcc_assert (!chain
->combined
);
1599 FOR_EACH_VEC_ELT (chain
->refs
, i
, a
)
1600 if (DR_IS_WRITE (a
->ref
))
1603 /* If there are no reads in the loop, there is nothing to do. */
1604 if (n_writes
== chain
->refs
.length ())
1607 initialize_root_vars_lm (loop
, get_chain_root (chain
), n_writes
> 0,
1608 &vars
, chain
->inits
, tmp_vars
);
1611 FOR_EACH_VEC_ELT (chain
->refs
, i
, a
)
1613 bool is_read
= DR_IS_READ (a
->ref
);
1615 if (DR_IS_WRITE (a
->ref
))
1621 var
= make_ssa_name (SSA_NAME_VAR (var
));
1628 replace_ref_with (a
->stmt
, vars
[ridx
],
1629 !is_read
, !is_read
);
1633 /* Returns the single statement in that NAME is used, excepting
1634 the looparound phi nodes contained in one of the chains. If there is no
1635 such statement, or more statements, NULL is returned. */
1638 single_nonlooparound_use (tree name
)
1641 imm_use_iterator it
;
1642 gimple
*stmt
, *ret
= NULL
;
1644 FOR_EACH_IMM_USE_FAST (use
, it
, name
)
1646 stmt
= USE_STMT (use
);
1648 if (gimple_code (stmt
) == GIMPLE_PHI
)
1650 /* Ignore uses in looparound phi nodes. Uses in other phi nodes
1651 could not be processed anyway, so just fail for them. */
1652 if (bitmap_bit_p (looparound_phis
,
1653 SSA_NAME_VERSION (PHI_RESULT (stmt
))))
1658 else if (is_gimple_debug (stmt
))
1660 else if (ret
!= NULL
)
1669 /* Remove statement STMT, as well as the chain of assignments in that it is
1673 remove_stmt (gimple
*stmt
)
1677 gimple_stmt_iterator psi
;
1679 if (gimple_code (stmt
) == GIMPLE_PHI
)
1681 name
= PHI_RESULT (stmt
);
1682 next
= single_nonlooparound_use (name
);
1683 reset_debug_uses (stmt
);
1684 psi
= gsi_for_stmt (stmt
);
1685 remove_phi_node (&psi
, true);
1688 || !gimple_assign_ssa_name_copy_p (next
)
1689 || gimple_assign_rhs1 (next
) != name
)
1697 gimple_stmt_iterator bsi
;
1699 bsi
= gsi_for_stmt (stmt
);
1701 name
= gimple_assign_lhs (stmt
);
1702 gcc_assert (TREE_CODE (name
) == SSA_NAME
);
1704 next
= single_nonlooparound_use (name
);
1705 reset_debug_uses (stmt
);
1707 unlink_stmt_vdef (stmt
);
1708 gsi_remove (&bsi
, true);
1709 release_defs (stmt
);
1712 || !gimple_assign_ssa_name_copy_p (next
)
1713 || gimple_assign_rhs1 (next
) != name
)
1720 /* Perform the predictive commoning optimization for a chain CHAIN.
1721 Uids of the newly created temporary variables are marked in TMP_VARS.*/
1724 execute_pred_commoning_chain (struct loop
*loop
, chain_p chain
,
1731 if (chain
->combined
)
1733 /* For combined chains, just remove the statements that are used to
1734 compute the values of the expression (except for the root one).
1735 We delay this until after all chains are processed. */
1739 /* For non-combined chains, set up the variables that hold its value,
1740 and replace the uses of the original references by these
1742 initialize_root (loop
, chain
, tmp_vars
);
1743 for (i
= 1; chain
->refs
.iterate (i
, &a
); i
++)
1745 var
= chain
->vars
[chain
->length
- a
->distance
];
1746 replace_ref_with (a
->stmt
, var
, false, false);
1751 /* Determines the unroll factor necessary to remove as many temporary variable
1752 copies as possible. CHAINS is the list of chains that will be
1756 determine_unroll_factor (vec
<chain_p
> chains
)
1759 unsigned factor
= 1, af
, nfactor
, i
;
1760 unsigned max
= PARAM_VALUE (PARAM_MAX_UNROLL_TIMES
);
1762 FOR_EACH_VEC_ELT (chains
, i
, chain
)
1764 if (chain
->type
== CT_INVARIANT
)
1767 if (chain
->combined
)
1769 /* For combined chains, we can't handle unrolling if we replace
1773 for (j
= 1; chain
->refs
.iterate (j
, &a
); j
++)
1774 if (gimple_code (a
->stmt
) == GIMPLE_PHI
)
1779 /* The best unroll factor for this chain is equal to the number of
1780 temporary variables that we create for it. */
1782 if (chain
->has_max_use_after
)
1785 nfactor
= factor
* af
/ gcd (factor
, af
);
1793 /* Perform the predictive commoning optimization for CHAINS.
1794 Uids of the newly created temporary variables are marked in TMP_VARS. */
1797 execute_pred_commoning (struct loop
*loop
, vec
<chain_p
> chains
,
1803 FOR_EACH_VEC_ELT (chains
, i
, chain
)
1805 if (chain
->type
== CT_INVARIANT
)
1806 execute_load_motion (loop
, chain
, tmp_vars
);
1808 execute_pred_commoning_chain (loop
, chain
, tmp_vars
);
1811 FOR_EACH_VEC_ELT (chains
, i
, chain
)
1813 if (chain
->type
== CT_INVARIANT
)
1815 else if (chain
->combined
)
1817 /* For combined chains, just remove the statements that are used to
1818 compute the values of the expression (except for the root one). */
1821 for (j
= 1; chain
->refs
.iterate (j
, &a
); j
++)
1822 remove_stmt (a
->stmt
);
1826 update_ssa (TODO_update_ssa_only_virtuals
);
1829 /* For each reference in CHAINS, if its defining statement is
1830 phi node, record the ssa name that is defined by it. */
1833 replace_phis_by_defined_names (vec
<chain_p
> chains
)
1839 FOR_EACH_VEC_ELT (chains
, i
, chain
)
1840 FOR_EACH_VEC_ELT (chain
->refs
, j
, a
)
1842 if (gimple_code (a
->stmt
) == GIMPLE_PHI
)
1844 a
->name_defined_by_phi
= PHI_RESULT (a
->stmt
);
1850 /* For each reference in CHAINS, if name_defined_by_phi is not
1851 NULL, use it to set the stmt field. */
1854 replace_names_by_phis (vec
<chain_p
> chains
)
1860 FOR_EACH_VEC_ELT (chains
, i
, chain
)
1861 FOR_EACH_VEC_ELT (chain
->refs
, j
, a
)
1862 if (a
->stmt
== NULL
)
1864 a
->stmt
= SSA_NAME_DEF_STMT (a
->name_defined_by_phi
);
1865 gcc_assert (gimple_code (a
->stmt
) == GIMPLE_PHI
);
1866 a
->name_defined_by_phi
= NULL_TREE
;
1870 /* Wrapper over execute_pred_commoning, to pass it as a callback
1871 to tree_transform_and_unroll_loop. */
1875 vec
<chain_p
> chains
;
1880 execute_pred_commoning_cbck (struct loop
*loop
, void *data
)
1882 struct epcc_data
*const dta
= (struct epcc_data
*) data
;
1884 /* Restore phi nodes that were replaced by ssa names before
1885 tree_transform_and_unroll_loop (see detailed description in
1886 tree_predictive_commoning_loop). */
1887 replace_names_by_phis (dta
->chains
);
1888 execute_pred_commoning (loop
, dta
->chains
, dta
->tmp_vars
);
1891 /* Base NAME and all the names in the chain of phi nodes that use it
1892 on variable VAR. The phi nodes are recognized by being in the copies of
1893 the header of the LOOP. */
1896 base_names_in_chain_on (struct loop
*loop
, tree name
, tree var
)
1899 imm_use_iterator iter
;
1901 replace_ssa_name_symbol (name
, var
);
1906 FOR_EACH_IMM_USE_STMT (stmt
, iter
, name
)
1908 if (gimple_code (stmt
) == GIMPLE_PHI
1909 && flow_bb_inside_loop_p (loop
, gimple_bb (stmt
)))
1912 BREAK_FROM_IMM_USE_STMT (iter
);
1918 name
= PHI_RESULT (phi
);
1919 replace_ssa_name_symbol (name
, var
);
1923 /* Given an unrolled LOOP after predictive commoning, remove the
1924 register copies arising from phi nodes by changing the base
1925 variables of SSA names. TMP_VARS is the set of the temporary variables
1926 for those we want to perform this. */
1929 eliminate_temp_copies (struct loop
*loop
, bitmap tmp_vars
)
1934 tree name
, use
, var
;
1937 e
= loop_latch_edge (loop
);
1938 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
1941 name
= PHI_RESULT (phi
);
1942 var
= SSA_NAME_VAR (name
);
1943 if (!var
|| !bitmap_bit_p (tmp_vars
, DECL_UID (var
)))
1945 use
= PHI_ARG_DEF_FROM_EDGE (phi
, e
);
1946 gcc_assert (TREE_CODE (use
) == SSA_NAME
);
1948 /* Base all the ssa names in the ud and du chain of NAME on VAR. */
1949 stmt
= SSA_NAME_DEF_STMT (use
);
1950 while (gimple_code (stmt
) == GIMPLE_PHI
1951 /* In case we could not unroll the loop enough to eliminate
1952 all copies, we may reach the loop header before the defining
1953 statement (in that case, some register copies will be present
1954 in loop latch in the final code, corresponding to the newly
1955 created looparound phi nodes). */
1956 && gimple_bb (stmt
) != loop
->header
)
1958 gcc_assert (single_pred_p (gimple_bb (stmt
)));
1959 use
= PHI_ARG_DEF (stmt
, 0);
1960 stmt
= SSA_NAME_DEF_STMT (use
);
1963 base_names_in_chain_on (loop
, use
, var
);
1967 /* Returns true if CHAIN is suitable to be combined. */
1970 chain_can_be_combined_p (chain_p chain
)
1972 return (!chain
->combined
1973 && (chain
->type
== CT_LOAD
|| chain
->type
== CT_COMBINATION
));
1976 /* Returns the modify statement that uses NAME. Skips over assignment
1977 statements, NAME is replaced with the actual name used in the returned
1981 find_use_stmt (tree
*name
)
1986 /* Skip over assignments. */
1989 stmt
= single_nonlooparound_use (*name
);
1993 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
1996 lhs
= gimple_assign_lhs (stmt
);
1997 if (TREE_CODE (lhs
) != SSA_NAME
)
2000 if (gimple_assign_copy_p (stmt
))
2002 rhs
= gimple_assign_rhs1 (stmt
);
2008 else if (get_gimple_rhs_class (gimple_assign_rhs_code (stmt
))
2009 == GIMPLE_BINARY_RHS
)
2016 /* Returns true if we may perform reassociation for operation CODE in TYPE. */
2019 may_reassociate_p (tree type
, enum tree_code code
)
2021 if (FLOAT_TYPE_P (type
)
2022 && !flag_unsafe_math_optimizations
)
2025 return (commutative_tree_code (code
)
2026 && associative_tree_code (code
));
2029 /* If the operation used in STMT is associative and commutative, go through the
2030 tree of the same operations and returns its root. Distance to the root
2031 is stored in DISTANCE. */
2034 find_associative_operation_root (gimple
*stmt
, unsigned *distance
)
2038 enum tree_code code
= gimple_assign_rhs_code (stmt
);
2039 tree type
= TREE_TYPE (gimple_assign_lhs (stmt
));
2042 if (!may_reassociate_p (type
, code
))
2047 lhs
= gimple_assign_lhs (stmt
);
2048 gcc_assert (TREE_CODE (lhs
) == SSA_NAME
);
2050 next
= find_use_stmt (&lhs
);
2052 || gimple_assign_rhs_code (next
) != code
)
2064 /* Returns the common statement in that NAME1 and NAME2 have a use. If there
2065 is no such statement, returns NULL_TREE. In case the operation used on
2066 NAME1 and NAME2 is associative and commutative, returns the root of the
2067 tree formed by this operation instead of the statement that uses NAME1 or
2071 find_common_use_stmt (tree
*name1
, tree
*name2
)
2073 gimple
*stmt1
, *stmt2
;
2075 stmt1
= find_use_stmt (name1
);
2079 stmt2
= find_use_stmt (name2
);
2086 stmt1
= find_associative_operation_root (stmt1
, NULL
);
2089 stmt2
= find_associative_operation_root (stmt2
, NULL
);
2093 return (stmt1
== stmt2
? stmt1
: NULL
);
2096 /* Checks whether R1 and R2 are combined together using CODE, with the result
2097 in RSLT_TYPE, in order R1 CODE R2 if SWAP is false and in order R2 CODE R1
2098 if it is true. If CODE is ERROR_MARK, set these values instead. */
2101 combinable_refs_p (dref r1
, dref r2
,
2102 enum tree_code
*code
, bool *swap
, tree
*rslt_type
)
2104 enum tree_code acode
;
2110 name1
= name_for_ref (r1
);
2111 name2
= name_for_ref (r2
);
2112 gcc_assert (name1
!= NULL_TREE
&& name2
!= NULL_TREE
);
2114 stmt
= find_common_use_stmt (&name1
, &name2
);
2117 /* A simple post-dominance check - make sure the combination
2118 is executed under the same condition as the references. */
2119 || (gimple_bb (stmt
) != gimple_bb (r1
->stmt
)
2120 && gimple_bb (stmt
) != gimple_bb (r2
->stmt
)))
2123 acode
= gimple_assign_rhs_code (stmt
);
2124 aswap
= (!commutative_tree_code (acode
)
2125 && gimple_assign_rhs1 (stmt
) != name1
);
2126 atype
= TREE_TYPE (gimple_assign_lhs (stmt
));
2128 if (*code
== ERROR_MARK
)
2136 return (*code
== acode
2138 && *rslt_type
== atype
);
2141 /* Remove OP from the operation on rhs of STMT, and replace STMT with
2142 an assignment of the remaining operand. */
2145 remove_name_from_operation (gimple
*stmt
, tree op
)
2148 gimple_stmt_iterator si
;
2150 gcc_assert (is_gimple_assign (stmt
));
2152 if (gimple_assign_rhs1 (stmt
) == op
)
2153 other_op
= gimple_assign_rhs2 (stmt
);
2155 other_op
= gimple_assign_rhs1 (stmt
);
2157 si
= gsi_for_stmt (stmt
);
2158 gimple_assign_set_rhs_from_tree (&si
, other_op
);
2160 /* We should not have reallocated STMT. */
2161 gcc_assert (gsi_stmt (si
) == stmt
);
2166 /* Reassociates the expression in that NAME1 and NAME2 are used so that they
2167 are combined in a single statement, and returns this statement. */
2170 reassociate_to_the_same_stmt (tree name1
, tree name2
)
2172 gimple
*stmt1
, *stmt2
, *root1
, *root2
, *s1
, *s2
;
2173 gassign
*new_stmt
, *tmp_stmt
;
2174 tree new_name
, tmp_name
, var
, r1
, r2
;
2175 unsigned dist1
, dist2
;
2176 enum tree_code code
;
2177 tree type
= TREE_TYPE (name1
);
2178 gimple_stmt_iterator bsi
;
2180 stmt1
= find_use_stmt (&name1
);
2181 stmt2
= find_use_stmt (&name2
);
2182 root1
= find_associative_operation_root (stmt1
, &dist1
);
2183 root2
= find_associative_operation_root (stmt2
, &dist2
);
2184 code
= gimple_assign_rhs_code (stmt1
);
2186 gcc_assert (root1
&& root2
&& root1
== root2
2187 && code
== gimple_assign_rhs_code (stmt2
));
2189 /* Find the root of the nearest expression in that both NAME1 and NAME2
2196 while (dist1
> dist2
)
2198 s1
= find_use_stmt (&r1
);
2199 r1
= gimple_assign_lhs (s1
);
2202 while (dist2
> dist1
)
2204 s2
= find_use_stmt (&r2
);
2205 r2
= gimple_assign_lhs (s2
);
2211 s1
= find_use_stmt (&r1
);
2212 r1
= gimple_assign_lhs (s1
);
2213 s2
= find_use_stmt (&r2
);
2214 r2
= gimple_assign_lhs (s2
);
2217 /* Remove NAME1 and NAME2 from the statements in that they are used
2219 remove_name_from_operation (stmt1
, name1
);
2220 remove_name_from_operation (stmt2
, name2
);
2222 /* Insert the new statement combining NAME1 and NAME2 before S1, and
2223 combine it with the rhs of S1. */
2224 var
= create_tmp_reg (type
, "predreastmp");
2225 new_name
= make_ssa_name (var
);
2226 new_stmt
= gimple_build_assign (new_name
, code
, name1
, name2
);
2228 var
= create_tmp_reg (type
, "predreastmp");
2229 tmp_name
= make_ssa_name (var
);
2231 /* Rhs of S1 may now be either a binary expression with operation
2232 CODE, or gimple_val (in case that stmt1 == s1 or stmt2 == s1,
2233 so that name1 or name2 was removed from it). */
2234 tmp_stmt
= gimple_build_assign (tmp_name
, gimple_assign_rhs_code (s1
),
2235 gimple_assign_rhs1 (s1
),
2236 gimple_assign_rhs2 (s1
));
2238 bsi
= gsi_for_stmt (s1
);
2239 gimple_assign_set_rhs_with_ops (&bsi
, code
, new_name
, tmp_name
);
2240 s1
= gsi_stmt (bsi
);
2243 gsi_insert_before (&bsi
, new_stmt
, GSI_SAME_STMT
);
2244 gsi_insert_before (&bsi
, tmp_stmt
, GSI_SAME_STMT
);
2249 /* Returns the statement that combines references R1 and R2. In case R1
2250 and R2 are not used in the same statement, but they are used with an
2251 associative and commutative operation in the same expression, reassociate
2252 the expression so that they are used in the same statement. */
2255 stmt_combining_refs (dref r1
, dref r2
)
2257 gimple
*stmt1
, *stmt2
;
2258 tree name1
= name_for_ref (r1
);
2259 tree name2
= name_for_ref (r2
);
2261 stmt1
= find_use_stmt (&name1
);
2262 stmt2
= find_use_stmt (&name2
);
2266 return reassociate_to_the_same_stmt (name1
, name2
);
2269 /* Tries to combine chains CH1 and CH2 together. If this succeeds, the
2270 description of the new chain is returned, otherwise we return NULL. */
2273 combine_chains (chain_p ch1
, chain_p ch2
)
2276 enum tree_code op
= ERROR_MARK
;
2281 tree rslt_type
= NULL_TREE
;
2285 if (ch1
->length
!= ch2
->length
)
2288 if (ch1
->refs
.length () != ch2
->refs
.length ())
2291 for (i
= 0; (ch1
->refs
.iterate (i
, &r1
)
2292 && ch2
->refs
.iterate (i
, &r2
)); i
++)
2294 if (r1
->distance
!= r2
->distance
)
2297 if (!combinable_refs_p (r1
, r2
, &op
, &swap
, &rslt_type
))
2302 std::swap (ch1
, ch2
);
2304 new_chain
= XCNEW (struct chain
);
2305 new_chain
->type
= CT_COMBINATION
;
2307 new_chain
->ch1
= ch1
;
2308 new_chain
->ch2
= ch2
;
2309 new_chain
->rslt_type
= rslt_type
;
2310 new_chain
->length
= ch1
->length
;
2312 for (i
= 0; (ch1
->refs
.iterate (i
, &r1
)
2313 && ch2
->refs
.iterate (i
, &r2
)); i
++)
2315 nw
= XCNEW (struct dref_d
);
2316 nw
->stmt
= stmt_combining_refs (r1
, r2
);
2317 nw
->distance
= r1
->distance
;
2319 new_chain
->refs
.safe_push (nw
);
2322 new_chain
->has_max_use_after
= false;
2323 root_stmt
= get_chain_root (new_chain
)->stmt
;
2324 for (i
= 1; new_chain
->refs
.iterate (i
, &nw
); i
++)
2326 if (nw
->distance
== new_chain
->length
2327 && !stmt_dominates_stmt_p (nw
->stmt
, root_stmt
))
2329 new_chain
->has_max_use_after
= true;
2334 ch1
->combined
= true;
2335 ch2
->combined
= true;
2339 /* Try to combine the CHAINS. */
2342 try_combine_chains (vec
<chain_p
> *chains
)
2345 chain_p ch1
, ch2
, cch
;
2346 auto_vec
<chain_p
> worklist
;
2348 FOR_EACH_VEC_ELT (*chains
, i
, ch1
)
2349 if (chain_can_be_combined_p (ch1
))
2350 worklist
.safe_push (ch1
);
2352 while (!worklist
.is_empty ())
2354 ch1
= worklist
.pop ();
2355 if (!chain_can_be_combined_p (ch1
))
2358 FOR_EACH_VEC_ELT (*chains
, j
, ch2
)
2360 if (!chain_can_be_combined_p (ch2
))
2363 cch
= combine_chains (ch1
, ch2
);
2366 worklist
.safe_push (cch
);
2367 chains
->safe_push (cch
);
2374 /* Prepare initializers for CHAIN in LOOP. Returns false if this is
2375 impossible because one of these initializers may trap, true otherwise. */
2378 prepare_initializers_chain (struct loop
*loop
, chain_p chain
)
2380 unsigned i
, n
= (chain
->type
== CT_INVARIANT
) ? 1 : chain
->length
;
2381 struct data_reference
*dr
= get_chain_root (chain
)->ref
;
2384 edge entry
= loop_preheader_edge (loop
);
2386 /* Find the initializers for the variables, and check that they cannot
2388 chain
->inits
.create (n
);
2389 for (i
= 0; i
< n
; i
++)
2390 chain
->inits
.quick_push (NULL_TREE
);
2392 /* If we have replaced some looparound phi nodes, use their initializers
2393 instead of creating our own. */
2394 FOR_EACH_VEC_ELT (chain
->refs
, i
, laref
)
2396 if (gimple_code (laref
->stmt
) != GIMPLE_PHI
)
2399 gcc_assert (laref
->distance
> 0);
2400 chain
->inits
[n
- laref
->distance
]
2401 = PHI_ARG_DEF_FROM_EDGE (laref
->stmt
, entry
);
2404 for (i
= 0; i
< n
; i
++)
2406 gimple_seq stmts
= NULL
;
2408 if (chain
->inits
[i
] != NULL_TREE
)
2411 init
= ref_at_iteration (dr
, (int) i
- n
, &stmts
);
2412 if (!chain
->all_always_accessed
&& tree_could_trap_p (init
))
2414 gimple_seq_discard (stmts
);
2419 gsi_insert_seq_on_edge_immediate (entry
, stmts
);
2421 chain
->inits
[i
] = init
;
2427 /* Prepare initializers for CHAINS in LOOP, and free chains that cannot
2428 be used because the initializers might trap. */
2431 prepare_initializers (struct loop
*loop
, vec
<chain_p
> chains
)
2436 for (i
= 0; i
< chains
.length (); )
2439 if (prepare_initializers_chain (loop
, chain
))
2443 release_chain (chain
);
2444 chains
.unordered_remove (i
);
2449 /* Performs predictive commoning for LOOP. Returns true if LOOP was
2453 tree_predictive_commoning_loop (struct loop
*loop
)
2455 vec
<data_reference_p
> datarefs
;
2456 vec
<ddr_p
> dependences
;
2457 struct component
*components
;
2458 vec
<chain_p
> chains
= vNULL
;
2459 unsigned unroll_factor
;
2460 struct tree_niter_desc desc
;
2461 bool unroll
= false;
2465 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2466 fprintf (dump_file
, "Processing loop %d\n", loop
->num
);
2468 /* Nothing for predicitive commoning if loop only iterates 1 time. */
2469 if (get_max_loop_iterations_int (loop
) == 0)
2471 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2472 fprintf (dump_file
, "Loop iterates only 1 time, nothing to do.\n");
2477 /* Find the data references and split them into components according to their
2478 dependence relations. */
2479 auto_vec
<loop_p
, 3> loop_nest
;
2480 dependences
.create (10);
2481 datarefs
.create (10);
2482 if (! compute_data_dependences_for_loop (loop
, true, &loop_nest
, &datarefs
,
2485 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2486 fprintf (dump_file
, "Cannot analyze data dependencies\n");
2487 free_data_refs (datarefs
);
2488 free_dependence_relations (dependences
);
2492 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2493 dump_data_dependence_relations (dump_file
, dependences
);
2495 components
= split_data_refs_to_components (loop
, datarefs
, dependences
);
2496 loop_nest
.release ();
2497 free_dependence_relations (dependences
);
2500 free_data_refs (datarefs
);
2501 free_affine_expand_cache (&name_expansions
);
2505 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2507 fprintf (dump_file
, "Initial state:\n\n");
2508 dump_components (dump_file
, components
);
2511 /* Find the suitable components and split them into chains. */
2512 components
= filter_suitable_components (loop
, components
);
2514 tmp_vars
= BITMAP_ALLOC (NULL
);
2515 looparound_phis
= BITMAP_ALLOC (NULL
);
2516 determine_roots (loop
, components
, &chains
);
2517 release_components (components
);
2519 if (!chains
.exists ())
2521 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2523 "Predictive commoning failed: no suitable chains\n");
2526 prepare_initializers (loop
, chains
);
2528 /* Try to combine the chains that are always worked with together. */
2529 try_combine_chains (&chains
);
2531 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2533 fprintf (dump_file
, "Before commoning:\n\n");
2534 dump_chains (dump_file
, chains
);
2537 /* Determine the unroll factor, and if the loop should be unrolled, ensure
2538 that its number of iterations is divisible by the factor. */
2539 unroll_factor
= determine_unroll_factor (chains
);
2541 unroll
= (unroll_factor
> 1
2542 && can_unroll_loop_p (loop
, unroll_factor
, &desc
));
2543 exit
= single_dom_exit (loop
);
2545 /* Execute the predictive commoning transformations, and possibly unroll the
2549 struct epcc_data dta
;
2551 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2552 fprintf (dump_file
, "Unrolling %u times.\n", unroll_factor
);
2554 dta
.chains
= chains
;
2555 dta
.tmp_vars
= tmp_vars
;
2557 update_ssa (TODO_update_ssa_only_virtuals
);
2559 /* Cfg manipulations performed in tree_transform_and_unroll_loop before
2560 execute_pred_commoning_cbck is called may cause phi nodes to be
2561 reallocated, which is a problem since CHAINS may point to these
2562 statements. To fix this, we store the ssa names defined by the
2563 phi nodes here instead of the phi nodes themselves, and restore
2564 the phi nodes in execute_pred_commoning_cbck. A bit hacky. */
2565 replace_phis_by_defined_names (chains
);
2567 tree_transform_and_unroll_loop (loop
, unroll_factor
, exit
, &desc
,
2568 execute_pred_commoning_cbck
, &dta
);
2569 eliminate_temp_copies (loop
, tmp_vars
);
2573 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2575 "Executing predictive commoning without unrolling.\n");
2576 execute_pred_commoning (loop
, chains
, tmp_vars
);
2580 release_chains (chains
);
2581 free_data_refs (datarefs
);
2582 BITMAP_FREE (tmp_vars
);
2583 BITMAP_FREE (looparound_phis
);
2585 free_affine_expand_cache (&name_expansions
);
2590 /* Runs predictive commoning. */
2593 tree_predictive_commoning (void)
2595 bool unrolled
= false;
2599 initialize_original_copy_tables ();
2600 FOR_EACH_LOOP (loop
, LI_ONLY_INNERMOST
)
2601 if (optimize_loop_for_speed_p (loop
))
2603 unrolled
|= tree_predictive_commoning_loop (loop
);
2609 ret
= TODO_cleanup_cfg
;
2611 free_original_copy_tables ();
2616 /* Predictive commoning Pass. */
2619 run_tree_predictive_commoning (struct function
*fun
)
2621 if (number_of_loops (fun
) <= 1)
2624 return tree_predictive_commoning ();
2629 const pass_data pass_data_predcom
=
2631 GIMPLE_PASS
, /* type */
2633 OPTGROUP_LOOP
, /* optinfo_flags */
2634 TV_PREDCOM
, /* tv_id */
2635 PROP_cfg
, /* properties_required */
2636 0, /* properties_provided */
2637 0, /* properties_destroyed */
2638 0, /* todo_flags_start */
2639 TODO_update_ssa_only_virtuals
, /* todo_flags_finish */
2642 class pass_predcom
: public gimple_opt_pass
2645 pass_predcom (gcc::context
*ctxt
)
2646 : gimple_opt_pass (pass_data_predcom
, ctxt
)
2649 /* opt_pass methods: */
2650 virtual bool gate (function
*) { return flag_predictive_commoning
!= 0; }
2651 virtual unsigned int execute (function
*fun
)
2653 return run_tree_predictive_commoning (fun
);
2656 }; // class pass_predcom
2661 make_pass_predcom (gcc::context
*ctxt
)
2663 return new pass_predcom (ctxt
);