1 /* Predictive commoning.
2 Copyright (C) 2005-2015 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This file implements the predictive commoning optimization. Predictive
21 commoning can be viewed as CSE around a loop, and with some improvements,
22 as generalized strength reduction-- i.e., reusing values computed in
23 earlier iterations of a loop in the later ones. So far, the pass only
24 handles the most useful case, that is, reusing values of memory references.
25 If you think this is all just a special case of PRE, you are sort of right;
26 however, concentrating on loops is simpler, and makes it possible to
27 incorporate data dependence analysis to detect the opportunities, perform
28 loop unrolling to avoid copies together with renaming immediately,
29 and if needed, we could also take register pressure into account.
31 Let us demonstrate what is done on an example:
33 for (i = 0; i < 100; i++)
35 a[i+2] = a[i] + a[i+1];
41 1) We find data references in the loop, and split them to mutually
42 independent groups (i.e., we find components of a data dependence
43 graph). We ignore read-read dependences whose distance is not constant.
44 (TODO -- we could also ignore antidependences). In this example, we
45 find the following groups:
47 a[i]{read}, a[i+1]{read}, a[i+2]{write}
48 b[10]{read}, b[10]{write}
49 c[99 - i]{read}, c[i]{write}
50 d[i + 1]{read}, d[i]{write}
52 2) Inside each of the group, we verify several conditions:
53 a) all the references must differ in indices only, and the indices
54 must all have the same step
55 b) the references must dominate loop latch (and thus, they must be
56 ordered by dominance relation).
57 c) the distance of the indices must be a small multiple of the step
58 We are then able to compute the difference of the references (# of
59 iterations before they point to the same place as the first of them).
60 Also, in case there are writes in the loop, we split the groups into
61 chains whose head is the write whose values are used by the reads in
62 the same chain. The chains are then processed independently,
63 making the further transformations simpler. Also, the shorter chains
64 need the same number of registers, but may require lower unrolling
65 factor in order to get rid of the copies on the loop latch.
67 In our example, we get the following chains (the chain for c is invalid).
69 a[i]{read,+0}, a[i+1]{read,-1}, a[i+2]{write,-2}
70 b[10]{read,+0}, b[10]{write,+0}
71 d[i + 1]{read,+0}, d[i]{write,+1}
73 3) For each read, we determine the read or write whose value it reuses,
74 together with the distance of this reuse. I.e. we take the last
75 reference before it with distance 0, or the last of the references
76 with the smallest positive distance to the read. Then, we remove
77 the references that are not used in any of these chains, discard the
78 empty groups, and propagate all the links so that they point to the
79 single root reference of the chain (adjusting their distance
80 appropriately). Some extra care needs to be taken for references with
81 step 0. In our example (the numbers indicate the distance of the
84 a[i] --> (*) 2, a[i+1] --> (*) 1, a[i+2] (*)
85 b[10] --> (*) 1, b[10] (*)
87 4) The chains are combined together if possible. If the corresponding
88 elements of two chains are always combined together with the same
89 operator, we remember just the result of this combination, instead
90 of remembering the values separately. We may need to perform
91 reassociation to enable combining, for example
93 e[i] + f[i+1] + e[i+1] + f[i]
95 can be reassociated as
97 (e[i] + f[i]) + (e[i+1] + f[i+1])
99 and we can combine the chains for e and f into one chain.
101 5) For each root reference (end of the chain) R, let N be maximum distance
102 of a reference reusing its value. Variables R0 up to RN are created,
103 together with phi nodes that transfer values from R1 .. RN to
105 Initial values are loaded to R0..R(N-1) (in case not all references
106 must necessarily be accessed and they may trap, we may fail here;
107 TODO sometimes, the loads could be guarded by a check for the number
108 of iterations). Values loaded/stored in roots are also copied to
109 RN. Other reads are replaced with the appropriate variable Ri.
110 Everything is put to SSA form.
112 As a small improvement, if R0 is dead after the root (i.e., all uses of
113 the value with the maximum distance dominate the root), we can avoid
114 creating RN and use R0 instead of it.
116 In our example, we get (only the parts concerning a and b are shown):
117 for (i = 0; i < 100; i++)
129 6) Factor F for unrolling is determined as the smallest common multiple of
130 (N + 1) for each root reference (N for references for that we avoided
131 creating RN). If F and the loop is small enough, loop is unrolled F
132 times. The stores to RN (R0) in the copies of the loop body are
133 periodically replaced with R0, R1, ... (R1, R2, ...), so that they can
134 be coalesced and the copies can be eliminated.
136 TODO -- copy propagation and other optimizations may change the live
137 ranges of the temporary registers and prevent them from being coalesced;
138 this may increase the register pressure.
140 In our case, F = 2 and the (main loop of the) result is
142 for (i = 0; i < ...; i += 2)
159 TODO -- stores killing other stores can be taken into account, e.g.,
160 for (i = 0; i < n; i++)
170 for (i = 0; i < n; i++)
180 The interesting part is that this would generalize store motion; still, since
181 sm is performed elsewhere, it does not seem that important.
183 Predictive commoning can be generalized for arbitrary computations (not
184 just memory loads), and also nontrivial transfer functions (e.g., replacing
185 i * i with ii_last + 2 * i + 1), to generalize strength reduction. */
189 #include "coretypes.h"
196 #include "fold-const.h"
199 #include "internal-fn.h"
201 #include "gimplify.h"
202 #include "gimple-iterator.h"
203 #include "gimplify-me.h"
204 #include "tree-ssa-loop-ivopts.h"
205 #include "tree-ssa-loop-manip.h"
206 #include "tree-ssa-loop-niter.h"
207 #include "tree-ssa-loop.h"
208 #include "tree-into-ssa.h"
210 #include "insn-config.h"
215 #include "emit-rtl.h"
219 #include "tree-dfa.h"
220 #include "tree-ssa.h"
221 #include "tree-data-ref.h"
222 #include "tree-scalar-evolution.h"
223 #include "tree-chrec.h"
225 #include "gimple-pretty-print.h"
226 #include "tree-pass.h"
227 #include "tree-affine.h"
228 #include "tree-inline.h"
229 #include "wide-int-print.h"
231 /* The maximum number of iterations between the considered memory
234 #define MAX_DISTANCE (target_avail_regs < 16 ? 4 : 8)
236 /* Data references (or phi nodes that carry data reference values across
239 typedef struct dref_d
241 /* The reference itself. */
242 struct data_reference
*ref
;
244 /* The statement in that the reference appears. */
247 /* In case that STMT is a phi node, this field is set to the SSA name
248 defined by it in replace_phis_by_defined_names (in order to avoid
249 pointing to phi node that got reallocated in the meantime). */
250 tree name_defined_by_phi
;
252 /* Distance of the reference from the root of the chain (in number of
253 iterations of the loop). */
256 /* Number of iterations offset from the first reference in the component. */
259 /* Number of the reference in a component, in dominance ordering. */
262 /* True if the memory reference is always accessed when the loop is
264 unsigned always_accessed
: 1;
268 /* Type of the chain of the references. */
272 /* The addresses of the references in the chain are constant. */
275 /* There are only loads in the chain. */
278 /* Root of the chain is store, the rest are loads. */
281 /* A combination of two chains. */
285 /* Chains of data references. */
289 /* Type of the chain. */
290 enum chain_type type
;
292 /* For combination chains, the operator and the two chains that are
293 combined, and the type of the result. */
296 struct chain
*ch1
, *ch2
;
298 /* The references in the chain. */
301 /* The maximum distance of the reference in the chain from the root. */
304 /* The variables used to copy the value throughout iterations. */
307 /* Initializers for the variables. */
310 /* True if there is a use of a variable with the maximal distance
311 that comes after the root in the loop. */
312 unsigned has_max_use_after
: 1;
314 /* True if all the memory references in the chain are always accessed. */
315 unsigned all_always_accessed
: 1;
317 /* True if this chain was combined together with some other chain. */
318 unsigned combined
: 1;
322 /* Describes the knowledge about the step of the memory references in
327 /* The step is zero. */
330 /* The step is nonzero. */
333 /* The step may or may not be nonzero. */
337 /* Components of the data dependence graph. */
341 /* The references in the component. */
344 /* What we know about the step of the references in the component. */
345 enum ref_step_type comp_step
;
347 /* Next component in the list. */
348 struct component
*next
;
351 /* Bitmap of ssa names defined by looparound phi nodes covered by chains. */
353 static bitmap looparound_phis
;
355 /* Cache used by tree_to_aff_combination_expand. */
357 static hash_map
<tree
, name_expansion
*> *name_expansions
;
359 /* Dumps data reference REF to FILE. */
361 extern void dump_dref (FILE *, dref
);
363 dump_dref (FILE *file
, dref ref
)
368 print_generic_expr (file
, DR_REF (ref
->ref
), TDF_SLIM
);
369 fprintf (file
, " (id %u%s)\n", ref
->pos
,
370 DR_IS_READ (ref
->ref
) ? "" : ", write");
372 fprintf (file
, " offset ");
373 print_decs (ref
->offset
, file
);
374 fprintf (file
, "\n");
376 fprintf (file
, " distance %u\n", ref
->distance
);
380 if (gimple_code (ref
->stmt
) == GIMPLE_PHI
)
381 fprintf (file
, " looparound ref\n");
383 fprintf (file
, " combination ref\n");
384 fprintf (file
, " in statement ");
385 print_gimple_stmt (file
, ref
->stmt
, 0, TDF_SLIM
);
386 fprintf (file
, "\n");
387 fprintf (file
, " distance %u\n", ref
->distance
);
392 /* Dumps CHAIN to FILE. */
394 extern void dump_chain (FILE *, chain_p
);
396 dump_chain (FILE *file
, chain_p chain
)
399 const char *chain_type
;
406 chain_type
= "Load motion";
410 chain_type
= "Loads-only";
414 chain_type
= "Store-loads";
418 chain_type
= "Combination";
425 fprintf (file
, "%s chain %p%s\n", chain_type
, (void *) chain
,
426 chain
->combined
? " (combined)" : "");
427 if (chain
->type
!= CT_INVARIANT
)
428 fprintf (file
, " max distance %u%s\n", chain
->length
,
429 chain
->has_max_use_after
? "" : ", may reuse first");
431 if (chain
->type
== CT_COMBINATION
)
433 fprintf (file
, " equal to %p %s %p in type ",
434 (void *) chain
->ch1
, op_symbol_code (chain
->op
),
435 (void *) chain
->ch2
);
436 print_generic_expr (file
, chain
->rslt_type
, TDF_SLIM
);
437 fprintf (file
, "\n");
440 if (chain
->vars
.exists ())
442 fprintf (file
, " vars");
443 FOR_EACH_VEC_ELT (chain
->vars
, i
, var
)
446 print_generic_expr (file
, var
, TDF_SLIM
);
448 fprintf (file
, "\n");
451 if (chain
->inits
.exists ())
453 fprintf (file
, " inits");
454 FOR_EACH_VEC_ELT (chain
->inits
, i
, var
)
457 print_generic_expr (file
, var
, TDF_SLIM
);
459 fprintf (file
, "\n");
462 fprintf (file
, " references:\n");
463 FOR_EACH_VEC_ELT (chain
->refs
, i
, a
)
466 fprintf (file
, "\n");
469 /* Dumps CHAINS to FILE. */
471 extern void dump_chains (FILE *, vec
<chain_p
> );
473 dump_chains (FILE *file
, vec
<chain_p
> chains
)
478 FOR_EACH_VEC_ELT (chains
, i
, chain
)
479 dump_chain (file
, chain
);
482 /* Dumps COMP to FILE. */
484 extern void dump_component (FILE *, struct component
*);
486 dump_component (FILE *file
, struct component
*comp
)
491 fprintf (file
, "Component%s:\n",
492 comp
->comp_step
== RS_INVARIANT
? " (invariant)" : "");
493 FOR_EACH_VEC_ELT (comp
->refs
, i
, a
)
495 fprintf (file
, "\n");
498 /* Dumps COMPS to FILE. */
500 extern void dump_components (FILE *, struct component
*);
502 dump_components (FILE *file
, struct component
*comps
)
504 struct component
*comp
;
506 for (comp
= comps
; comp
; comp
= comp
->next
)
507 dump_component (file
, comp
);
510 /* Frees a chain CHAIN. */
513 release_chain (chain_p chain
)
521 FOR_EACH_VEC_ELT (chain
->refs
, i
, ref
)
524 chain
->refs
.release ();
525 chain
->vars
.release ();
526 chain
->inits
.release ();
534 release_chains (vec
<chain_p
> chains
)
539 FOR_EACH_VEC_ELT (chains
, i
, chain
)
540 release_chain (chain
);
544 /* Frees a component COMP. */
547 release_component (struct component
*comp
)
549 comp
->refs
.release ();
553 /* Frees list of components COMPS. */
556 release_components (struct component
*comps
)
558 struct component
*act
, *next
;
560 for (act
= comps
; act
; act
= next
)
563 release_component (act
);
567 /* Finds a root of tree given by FATHERS containing A, and performs path
571 component_of (unsigned fathers
[], unsigned a
)
575 for (root
= a
; root
!= fathers
[root
]; root
= fathers
[root
])
578 for (; a
!= root
; a
= n
)
587 /* Join operation for DFU. FATHERS gives the tree, SIZES are sizes of the
588 components, A and B are components to merge. */
591 merge_comps (unsigned fathers
[], unsigned sizes
[], unsigned a
, unsigned b
)
593 unsigned ca
= component_of (fathers
, a
);
594 unsigned cb
= component_of (fathers
, b
);
599 if (sizes
[ca
] < sizes
[cb
])
601 sizes
[cb
] += sizes
[ca
];
606 sizes
[ca
] += sizes
[cb
];
611 /* Returns true if A is a reference that is suitable for predictive commoning
612 in the innermost loop that contains it. REF_STEP is set according to the
613 step of the reference A. */
616 suitable_reference_p (struct data_reference
*a
, enum ref_step_type
*ref_step
)
618 tree ref
= DR_REF (a
), step
= DR_STEP (a
);
621 || TREE_THIS_VOLATILE (ref
)
622 || !is_gimple_reg_type (TREE_TYPE (ref
))
623 || tree_could_throw_p (ref
))
626 if (integer_zerop (step
))
627 *ref_step
= RS_INVARIANT
;
628 else if (integer_nonzerop (step
))
629 *ref_step
= RS_NONZERO
;
636 /* Stores DR_OFFSET (DR) + DR_INIT (DR) to OFFSET. */
639 aff_combination_dr_offset (struct data_reference
*dr
, aff_tree
*offset
)
641 tree type
= TREE_TYPE (DR_OFFSET (dr
));
644 tree_to_aff_combination_expand (DR_OFFSET (dr
), type
, offset
,
646 aff_combination_const (&delta
, type
, wi::to_widest (DR_INIT (dr
)));
647 aff_combination_add (offset
, &delta
);
650 /* Determines number of iterations of the innermost enclosing loop before B
651 refers to exactly the same location as A and stores it to OFF. If A and
652 B do not have the same step, they never meet, or anything else fails,
653 returns false, otherwise returns true. Both A and B are assumed to
654 satisfy suitable_reference_p. */
657 determine_offset (struct data_reference
*a
, struct data_reference
*b
,
660 aff_tree diff
, baseb
, step
;
663 /* Check that both the references access the location in the same type. */
664 typea
= TREE_TYPE (DR_REF (a
));
665 typeb
= TREE_TYPE (DR_REF (b
));
666 if (!useless_type_conversion_p (typeb
, typea
))
669 /* Check whether the base address and the step of both references is the
671 if (!operand_equal_p (DR_STEP (a
), DR_STEP (b
), 0)
672 || !operand_equal_p (DR_BASE_ADDRESS (a
), DR_BASE_ADDRESS (b
), 0))
675 if (integer_zerop (DR_STEP (a
)))
677 /* If the references have loop invariant address, check that they access
678 exactly the same location. */
680 return (operand_equal_p (DR_OFFSET (a
), DR_OFFSET (b
), 0)
681 && operand_equal_p (DR_INIT (a
), DR_INIT (b
), 0));
684 /* Compare the offsets of the addresses, and check whether the difference
685 is a multiple of step. */
686 aff_combination_dr_offset (a
, &diff
);
687 aff_combination_dr_offset (b
, &baseb
);
688 aff_combination_scale (&baseb
, -1);
689 aff_combination_add (&diff
, &baseb
);
691 tree_to_aff_combination_expand (DR_STEP (a
), TREE_TYPE (DR_STEP (a
)),
692 &step
, &name_expansions
);
693 return aff_combination_constant_multiple_p (&diff
, &step
, off
);
696 /* Returns the last basic block in LOOP for that we are sure that
697 it is executed whenever the loop is entered. */
700 last_always_executed_block (struct loop
*loop
)
703 vec
<edge
> exits
= get_loop_exit_edges (loop
);
705 basic_block last
= loop
->latch
;
707 FOR_EACH_VEC_ELT (exits
, i
, ex
)
708 last
= nearest_common_dominator (CDI_DOMINATORS
, last
, ex
->src
);
714 /* Splits dependence graph on DATAREFS described by DEPENDS to components. */
716 static struct component
*
717 split_data_refs_to_components (struct loop
*loop
,
718 vec
<data_reference_p
> datarefs
,
721 unsigned i
, n
= datarefs
.length ();
722 unsigned ca
, ia
, ib
, bad
;
723 unsigned *comp_father
= XNEWVEC (unsigned, n
+ 1);
724 unsigned *comp_size
= XNEWVEC (unsigned, n
+ 1);
725 struct component
**comps
;
726 struct data_reference
*dr
, *dra
, *drb
;
727 struct data_dependence_relation
*ddr
;
728 struct component
*comp_list
= NULL
, *comp
;
730 basic_block last_always_executed
= last_always_executed_block (loop
);
732 FOR_EACH_VEC_ELT (datarefs
, i
, dr
)
736 /* A fake reference for call or asm_expr that may clobber memory;
740 /* predcom pass isn't prepared to handle calls with data references. */
741 if (is_gimple_call (DR_STMT (dr
)))
743 dr
->aux
= (void *) (size_t) i
;
748 /* A component reserved for the "bad" data references. */
752 FOR_EACH_VEC_ELT (datarefs
, i
, dr
)
754 enum ref_step_type dummy
;
756 if (!suitable_reference_p (dr
, &dummy
))
758 ia
= (unsigned) (size_t) dr
->aux
;
759 merge_comps (comp_father
, comp_size
, n
, ia
);
763 FOR_EACH_VEC_ELT (depends
, i
, ddr
)
765 widest_int dummy_off
;
767 if (DDR_ARE_DEPENDENT (ddr
) == chrec_known
)
772 ia
= component_of (comp_father
, (unsigned) (size_t) dra
->aux
);
773 ib
= component_of (comp_father
, (unsigned) (size_t) drb
->aux
);
777 bad
= component_of (comp_father
, n
);
779 /* If both A and B are reads, we may ignore unsuitable dependences. */
780 if (DR_IS_READ (dra
) && DR_IS_READ (drb
))
782 if (ia
== bad
|| ib
== bad
783 || !determine_offset (dra
, drb
, &dummy_off
))
786 /* If A is read and B write or vice versa and there is unsuitable
787 dependence, instead of merging both components into a component
788 that will certainly not pass suitable_component_p, just put the
789 read into bad component, perhaps at least the write together with
790 all the other data refs in it's component will be optimizable. */
791 else if (DR_IS_READ (dra
) && ib
!= bad
)
795 else if (!determine_offset (dra
, drb
, &dummy_off
))
797 merge_comps (comp_father
, comp_size
, bad
, ia
);
801 else if (DR_IS_READ (drb
) && ia
!= bad
)
805 else if (!determine_offset (dra
, drb
, &dummy_off
))
807 merge_comps (comp_father
, comp_size
, bad
, ib
);
812 merge_comps (comp_father
, comp_size
, ia
, ib
);
815 comps
= XCNEWVEC (struct component
*, n
);
816 bad
= component_of (comp_father
, n
);
817 FOR_EACH_VEC_ELT (datarefs
, i
, dr
)
819 ia
= (unsigned) (size_t) dr
->aux
;
820 ca
= component_of (comp_father
, ia
);
827 comp
= XCNEW (struct component
);
828 comp
->refs
.create (comp_size
[ca
]);
832 dataref
= XCNEW (struct dref_d
);
834 dataref
->stmt
= DR_STMT (dr
);
836 dataref
->distance
= 0;
838 dataref
->always_accessed
839 = dominated_by_p (CDI_DOMINATORS
, last_always_executed
,
840 gimple_bb (dataref
->stmt
));
841 dataref
->pos
= comp
->refs
.length ();
842 comp
->refs
.quick_push (dataref
);
845 for (i
= 0; i
< n
; i
++)
850 comp
->next
= comp_list
;
862 /* Returns true if the component COMP satisfies the conditions
863 described in 2) at the beginning of this file. LOOP is the current
867 suitable_component_p (struct loop
*loop
, struct component
*comp
)
871 basic_block ba
, bp
= loop
->header
;
872 bool ok
, has_write
= false;
874 FOR_EACH_VEC_ELT (comp
->refs
, i
, a
)
876 ba
= gimple_bb (a
->stmt
);
878 if (!just_once_each_iteration_p (loop
, ba
))
881 gcc_assert (dominated_by_p (CDI_DOMINATORS
, ba
, bp
));
884 if (DR_IS_WRITE (a
->ref
))
888 first
= comp
->refs
[0];
889 ok
= suitable_reference_p (first
->ref
, &comp
->comp_step
);
893 for (i
= 1; comp
->refs
.iterate (i
, &a
); i
++)
895 if (!determine_offset (first
->ref
, a
->ref
, &a
->offset
))
898 #ifdef ENABLE_CHECKING
900 enum ref_step_type a_step
;
901 ok
= suitable_reference_p (a
->ref
, &a_step
);
902 gcc_assert (ok
&& a_step
== comp
->comp_step
);
907 /* If there is a write inside the component, we must know whether the
908 step is nonzero or not -- we would not otherwise be able to recognize
909 whether the value accessed by reads comes from the OFFSET-th iteration
910 or the previous one. */
911 if (has_write
&& comp
->comp_step
== RS_ANY
)
917 /* Check the conditions on references inside each of components COMPS,
918 and remove the unsuitable components from the list. The new list
919 of components is returned. The conditions are described in 2) at
920 the beginning of this file. LOOP is the current loop. */
922 static struct component
*
923 filter_suitable_components (struct loop
*loop
, struct component
*comps
)
925 struct component
**comp
, *act
;
927 for (comp
= &comps
; *comp
; )
930 if (suitable_component_p (loop
, act
))
938 FOR_EACH_VEC_ELT (act
->refs
, i
, ref
)
940 release_component (act
);
947 /* Compares two drefs A and B by their offset and position. Callback for
951 order_drefs (const void *a
, const void *b
)
953 const dref
*const da
= (const dref
*) a
;
954 const dref
*const db
= (const dref
*) b
;
955 int offcmp
= wi::cmps ((*da
)->offset
, (*db
)->offset
);
960 return (*da
)->pos
- (*db
)->pos
;
963 /* Returns root of the CHAIN. */
966 get_chain_root (chain_p chain
)
968 return chain
->refs
[0];
971 /* Adds REF to the chain CHAIN. */
974 add_ref_to_chain (chain_p chain
, dref ref
)
976 dref root
= get_chain_root (chain
);
978 gcc_assert (wi::les_p (root
->offset
, ref
->offset
));
979 widest_int dist
= ref
->offset
- root
->offset
;
980 if (wi::leu_p (MAX_DISTANCE
, dist
))
985 gcc_assert (wi::fits_uhwi_p (dist
));
987 chain
->refs
.safe_push (ref
);
989 ref
->distance
= dist
.to_uhwi ();
991 if (ref
->distance
>= chain
->length
)
993 chain
->length
= ref
->distance
;
994 chain
->has_max_use_after
= false;
997 if (ref
->distance
== chain
->length
998 && ref
->pos
> root
->pos
)
999 chain
->has_max_use_after
= true;
1001 chain
->all_always_accessed
&= ref
->always_accessed
;
1004 /* Returns the chain for invariant component COMP. */
1007 make_invariant_chain (struct component
*comp
)
1009 chain_p chain
= XCNEW (struct chain
);
1013 chain
->type
= CT_INVARIANT
;
1015 chain
->all_always_accessed
= true;
1017 FOR_EACH_VEC_ELT (comp
->refs
, i
, ref
)
1019 chain
->refs
.safe_push (ref
);
1020 chain
->all_always_accessed
&= ref
->always_accessed
;
1026 /* Make a new chain rooted at REF. */
1029 make_rooted_chain (dref ref
)
1031 chain_p chain
= XCNEW (struct chain
);
1033 chain
->type
= DR_IS_READ (ref
->ref
) ? CT_LOAD
: CT_STORE_LOAD
;
1035 chain
->refs
.safe_push (ref
);
1036 chain
->all_always_accessed
= ref
->always_accessed
;
1043 /* Returns true if CHAIN is not trivial. */
1046 nontrivial_chain_p (chain_p chain
)
1048 return chain
!= NULL
&& chain
->refs
.length () > 1;
1051 /* Returns the ssa name that contains the value of REF, or NULL_TREE if there
1055 name_for_ref (dref ref
)
1059 if (is_gimple_assign (ref
->stmt
))
1061 if (!ref
->ref
|| DR_IS_READ (ref
->ref
))
1062 name
= gimple_assign_lhs (ref
->stmt
);
1064 name
= gimple_assign_rhs1 (ref
->stmt
);
1067 name
= PHI_RESULT (ref
->stmt
);
1069 return (TREE_CODE (name
) == SSA_NAME
? name
: NULL_TREE
);
1072 /* Returns true if REF is a valid initializer for ROOT with given DISTANCE (in
1073 iterations of the innermost enclosing loop). */
1076 valid_initializer_p (struct data_reference
*ref
,
1077 unsigned distance
, struct data_reference
*root
)
1079 aff_tree diff
, base
, step
;
1082 /* Both REF and ROOT must be accessing the same object. */
1083 if (!operand_equal_p (DR_BASE_ADDRESS (ref
), DR_BASE_ADDRESS (root
), 0))
1086 /* The initializer is defined outside of loop, hence its address must be
1087 invariant inside the loop. */
1088 gcc_assert (integer_zerop (DR_STEP (ref
)));
1090 /* If the address of the reference is invariant, initializer must access
1091 exactly the same location. */
1092 if (integer_zerop (DR_STEP (root
)))
1093 return (operand_equal_p (DR_OFFSET (ref
), DR_OFFSET (root
), 0)
1094 && operand_equal_p (DR_INIT (ref
), DR_INIT (root
), 0));
1096 /* Verify that this index of REF is equal to the root's index at
1097 -DISTANCE-th iteration. */
1098 aff_combination_dr_offset (root
, &diff
);
1099 aff_combination_dr_offset (ref
, &base
);
1100 aff_combination_scale (&base
, -1);
1101 aff_combination_add (&diff
, &base
);
1103 tree_to_aff_combination_expand (DR_STEP (root
), TREE_TYPE (DR_STEP (root
)),
1104 &step
, &name_expansions
);
1105 if (!aff_combination_constant_multiple_p (&diff
, &step
, &off
))
1108 if (off
!= distance
)
1114 /* Finds looparound phi node of LOOP that copies the value of REF, and if its
1115 initial value is correct (equal to initial value of REF shifted by one
1116 iteration), returns the phi node. Otherwise, NULL_TREE is returned. ROOT
1117 is the root of the current chain. */
1120 find_looparound_phi (struct loop
*loop
, dref ref
, dref root
)
1122 tree name
, init
, init_ref
;
1125 edge latch
= loop_latch_edge (loop
);
1126 struct data_reference init_dr
;
1129 if (is_gimple_assign (ref
->stmt
))
1131 if (DR_IS_READ (ref
->ref
))
1132 name
= gimple_assign_lhs (ref
->stmt
);
1134 name
= gimple_assign_rhs1 (ref
->stmt
);
1137 name
= PHI_RESULT (ref
->stmt
);
1141 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
1144 if (PHI_ARG_DEF_FROM_EDGE (phi
, latch
) == name
)
1148 if (gsi_end_p (psi
))
1151 init
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_preheader_edge (loop
));
1152 if (TREE_CODE (init
) != SSA_NAME
)
1154 init_stmt
= SSA_NAME_DEF_STMT (init
);
1155 if (gimple_code (init_stmt
) != GIMPLE_ASSIGN
)
1157 gcc_assert (gimple_assign_lhs (init_stmt
) == init
);
1159 init_ref
= gimple_assign_rhs1 (init_stmt
);
1160 if (!REFERENCE_CLASS_P (init_ref
)
1161 && !DECL_P (init_ref
))
1164 /* Analyze the behavior of INIT_REF with respect to LOOP (innermost
1165 loop enclosing PHI). */
1166 memset (&init_dr
, 0, sizeof (struct data_reference
));
1167 DR_REF (&init_dr
) = init_ref
;
1168 DR_STMT (&init_dr
) = phi
;
1169 if (!dr_analyze_innermost (&init_dr
, loop
))
1172 if (!valid_initializer_p (&init_dr
, ref
->distance
+ 1, root
->ref
))
1178 /* Adds a reference for the looparound copy of REF in PHI to CHAIN. */
1181 insert_looparound_copy (chain_p chain
, dref ref
, gphi
*phi
)
1183 dref nw
= XCNEW (struct dref_d
), aref
;
1187 nw
->distance
= ref
->distance
+ 1;
1188 nw
->always_accessed
= 1;
1190 FOR_EACH_VEC_ELT (chain
->refs
, i
, aref
)
1191 if (aref
->distance
>= nw
->distance
)
1193 chain
->refs
.safe_insert (i
, nw
);
1195 if (nw
->distance
> chain
->length
)
1197 chain
->length
= nw
->distance
;
1198 chain
->has_max_use_after
= false;
1202 /* For references in CHAIN that are copied around the LOOP (created previously
1203 by PRE, or by user), add the results of such copies to the chain. This
1204 enables us to remove the copies by unrolling, and may need less registers
1205 (also, it may allow us to combine chains together). */
1208 add_looparound_copies (struct loop
*loop
, chain_p chain
)
1211 dref ref
, root
= get_chain_root (chain
);
1214 FOR_EACH_VEC_ELT (chain
->refs
, i
, ref
)
1216 phi
= find_looparound_phi (loop
, ref
, root
);
1220 bitmap_set_bit (looparound_phis
, SSA_NAME_VERSION (PHI_RESULT (phi
)));
1221 insert_looparound_copy (chain
, ref
, phi
);
1225 /* Find roots of the values and determine distances in the component COMP.
1226 The references are redistributed into CHAINS. LOOP is the current
1230 determine_roots_comp (struct loop
*loop
,
1231 struct component
*comp
,
1232 vec
<chain_p
> *chains
)
1236 chain_p chain
= NULL
;
1237 widest_int last_ofs
= 0;
1239 /* Invariants are handled specially. */
1240 if (comp
->comp_step
== RS_INVARIANT
)
1242 chain
= make_invariant_chain (comp
);
1243 chains
->safe_push (chain
);
1247 comp
->refs
.qsort (order_drefs
);
1249 FOR_EACH_VEC_ELT (comp
->refs
, i
, a
)
1251 if (!chain
|| DR_IS_WRITE (a
->ref
)
1252 || wi::leu_p (MAX_DISTANCE
, a
->offset
- last_ofs
))
1254 if (nontrivial_chain_p (chain
))
1256 add_looparound_copies (loop
, chain
);
1257 chains
->safe_push (chain
);
1260 release_chain (chain
);
1261 chain
= make_rooted_chain (a
);
1262 last_ofs
= a
->offset
;
1266 add_ref_to_chain (chain
, a
);
1269 if (nontrivial_chain_p (chain
))
1271 add_looparound_copies (loop
, chain
);
1272 chains
->safe_push (chain
);
1275 release_chain (chain
);
1278 /* Find roots of the values and determine distances in components COMPS, and
1279 separates the references to CHAINS. LOOP is the current loop. */
1282 determine_roots (struct loop
*loop
,
1283 struct component
*comps
, vec
<chain_p
> *chains
)
1285 struct component
*comp
;
1287 for (comp
= comps
; comp
; comp
= comp
->next
)
1288 determine_roots_comp (loop
, comp
, chains
);
1291 /* Replace the reference in statement STMT with temporary variable
1292 NEW_TREE. If SET is true, NEW_TREE is instead initialized to the value of
1293 the reference in the statement. IN_LHS is true if the reference
1294 is in the lhs of STMT, false if it is in rhs. */
1297 replace_ref_with (gimple stmt
, tree new_tree
, bool set
, bool in_lhs
)
1301 gimple_stmt_iterator bsi
, psi
;
1303 if (gimple_code (stmt
) == GIMPLE_PHI
)
1305 gcc_assert (!in_lhs
&& !set
);
1307 val
= PHI_RESULT (stmt
);
1308 bsi
= gsi_after_labels (gimple_bb (stmt
));
1309 psi
= gsi_for_stmt (stmt
);
1310 remove_phi_node (&psi
, false);
1312 /* Turn the phi node into GIMPLE_ASSIGN. */
1313 new_stmt
= gimple_build_assign (val
, new_tree
);
1314 gsi_insert_before (&bsi
, new_stmt
, GSI_NEW_STMT
);
1318 /* Since the reference is of gimple_reg type, it should only
1319 appear as lhs or rhs of modify statement. */
1320 gcc_assert (is_gimple_assign (stmt
));
1322 bsi
= gsi_for_stmt (stmt
);
1324 /* If we do not need to initialize NEW_TREE, just replace the use of OLD. */
1327 gcc_assert (!in_lhs
);
1328 gimple_assign_set_rhs_from_tree (&bsi
, new_tree
);
1329 stmt
= gsi_stmt (bsi
);
1336 /* We have statement
1340 If OLD is a memory reference, then VAL is gimple_val, and we transform
1346 Otherwise, we are replacing a combination chain,
1347 VAL is the expression that performs the combination, and OLD is an
1348 SSA name. In this case, we transform the assignment to
1355 val
= gimple_assign_lhs (stmt
);
1356 if (TREE_CODE (val
) != SSA_NAME
)
1358 val
= gimple_assign_rhs1 (stmt
);
1359 gcc_assert (gimple_assign_single_p (stmt
));
1360 if (TREE_CLOBBER_P (val
))
1361 val
= get_or_create_ssa_default_def (cfun
, SSA_NAME_VAR (new_tree
));
1363 gcc_assert (gimple_assign_copy_p (stmt
));
1375 val
= gimple_assign_lhs (stmt
);
1378 new_stmt
= gimple_build_assign (new_tree
, unshare_expr (val
));
1379 gsi_insert_after (&bsi
, new_stmt
, GSI_NEW_STMT
);
1382 /* Returns a memory reference to DR in the ITER-th iteration of
1383 the loop it was analyzed in. Append init stmts to STMTS. */
1386 ref_at_iteration (data_reference_p dr
, int iter
, gimple_seq
*stmts
)
1388 tree off
= DR_OFFSET (dr
);
1389 tree coff
= DR_INIT (dr
);
1392 else if (TREE_CODE (DR_STEP (dr
)) == INTEGER_CST
)
1393 coff
= size_binop (PLUS_EXPR
, coff
,
1394 size_binop (MULT_EXPR
, DR_STEP (dr
), ssize_int (iter
)));
1396 off
= size_binop (PLUS_EXPR
, off
,
1397 size_binop (MULT_EXPR
, DR_STEP (dr
), ssize_int (iter
)));
1398 tree addr
= fold_build_pointer_plus (DR_BASE_ADDRESS (dr
), off
);
1399 addr
= force_gimple_operand_1 (unshare_expr (addr
), stmts
,
1400 is_gimple_mem_ref_addr
, NULL_TREE
);
1401 tree alias_ptr
= fold_convert (reference_alias_ptr_type (DR_REF (dr
)), coff
);
1402 /* While data-ref analysis punts on bit offsets it still handles
1403 bitfield accesses at byte boundaries. Cope with that. Note that
1404 we cannot simply re-apply the outer COMPONENT_REF because the
1405 byte-granular portion of it is already applied via DR_INIT and
1406 DR_OFFSET, so simply build a BIT_FIELD_REF knowing that the bits
1407 start at offset zero. */
1408 if (TREE_CODE (DR_REF (dr
)) == COMPONENT_REF
1409 && DECL_BIT_FIELD (TREE_OPERAND (DR_REF (dr
), 1)))
1411 tree field
= TREE_OPERAND (DR_REF (dr
), 1);
1412 return build3 (BIT_FIELD_REF
, TREE_TYPE (DR_REF (dr
)),
1413 build2 (MEM_REF
, DECL_BIT_FIELD_TYPE (field
),
1415 DECL_SIZE (field
), bitsize_zero_node
);
1418 return fold_build2 (MEM_REF
, TREE_TYPE (DR_REF (dr
)), addr
, alias_ptr
);
1421 /* Get the initialization expression for the INDEX-th temporary variable
1425 get_init_expr (chain_p chain
, unsigned index
)
1427 if (chain
->type
== CT_COMBINATION
)
1429 tree e1
= get_init_expr (chain
->ch1
, index
);
1430 tree e2
= get_init_expr (chain
->ch2
, index
);
1432 return fold_build2 (chain
->op
, chain
->rslt_type
, e1
, e2
);
1435 return chain
->inits
[index
];
1438 /* Returns a new temporary variable used for the I-th variable carrying
1439 value of REF. The variable's uid is marked in TMP_VARS. */
1442 predcom_tmp_var (tree ref
, unsigned i
, bitmap tmp_vars
)
1444 tree type
= TREE_TYPE (ref
);
1445 /* We never access the components of the temporary variable in predictive
1447 tree var
= create_tmp_reg (type
, get_lsm_tmp_name (ref
, i
));
1448 bitmap_set_bit (tmp_vars
, DECL_UID (var
));
1452 /* Creates the variables for CHAIN, as well as phi nodes for them and
1453 initialization on entry to LOOP. Uids of the newly created
1454 temporary variables are marked in TMP_VARS. */
1457 initialize_root_vars (struct loop
*loop
, chain_p chain
, bitmap tmp_vars
)
1460 unsigned n
= chain
->length
;
1461 dref root
= get_chain_root (chain
);
1462 bool reuse_first
= !chain
->has_max_use_after
;
1463 tree ref
, init
, var
, next
;
1466 edge entry
= loop_preheader_edge (loop
), latch
= loop_latch_edge (loop
);
1468 /* If N == 0, then all the references are within the single iteration. And
1469 since this is an nonempty chain, reuse_first cannot be true. */
1470 gcc_assert (n
> 0 || !reuse_first
);
1472 chain
->vars
.create (n
+ 1);
1474 if (chain
->type
== CT_COMBINATION
)
1475 ref
= gimple_assign_lhs (root
->stmt
);
1477 ref
= DR_REF (root
->ref
);
1479 for (i
= 0; i
< n
+ (reuse_first
? 0 : 1); i
++)
1481 var
= predcom_tmp_var (ref
, i
, tmp_vars
);
1482 chain
->vars
.quick_push (var
);
1485 chain
->vars
.quick_push (chain
->vars
[0]);
1487 FOR_EACH_VEC_ELT (chain
->vars
, i
, var
)
1488 chain
->vars
[i
] = make_ssa_name (var
);
1490 for (i
= 0; i
< n
; i
++)
1492 var
= chain
->vars
[i
];
1493 next
= chain
->vars
[i
+ 1];
1494 init
= get_init_expr (chain
, i
);
1496 init
= force_gimple_operand (init
, &stmts
, true, NULL_TREE
);
1498 gsi_insert_seq_on_edge_immediate (entry
, stmts
);
1500 phi
= create_phi_node (var
, loop
->header
);
1501 add_phi_arg (phi
, init
, entry
, UNKNOWN_LOCATION
);
1502 add_phi_arg (phi
, next
, latch
, UNKNOWN_LOCATION
);
1506 /* Create the variables and initialization statement for root of chain
1507 CHAIN. Uids of the newly created temporary variables are marked
1511 initialize_root (struct loop
*loop
, chain_p chain
, bitmap tmp_vars
)
1513 dref root
= get_chain_root (chain
);
1514 bool in_lhs
= (chain
->type
== CT_STORE_LOAD
1515 || chain
->type
== CT_COMBINATION
);
1517 initialize_root_vars (loop
, chain
, tmp_vars
);
1518 replace_ref_with (root
->stmt
,
1519 chain
->vars
[chain
->length
],
1523 /* Initializes a variable for load motion for ROOT and prepares phi nodes and
1524 initialization on entry to LOOP if necessary. The ssa name for the variable
1525 is stored in VARS. If WRITTEN is true, also a phi node to copy its value
1526 around the loop is created. Uid of the newly created temporary variable
1527 is marked in TMP_VARS. INITS is the list containing the (single)
1531 initialize_root_vars_lm (struct loop
*loop
, dref root
, bool written
,
1532 vec
<tree
> *vars
, vec
<tree
> inits
,
1536 tree ref
= DR_REF (root
->ref
), init
, var
, next
;
1539 edge entry
= loop_preheader_edge (loop
), latch
= loop_latch_edge (loop
);
1541 /* Find the initializer for the variable, and check that it cannot
1545 vars
->create (written
? 2 : 1);
1546 var
= predcom_tmp_var (ref
, 0, tmp_vars
);
1547 vars
->quick_push (var
);
1549 vars
->quick_push ((*vars
)[0]);
1551 FOR_EACH_VEC_ELT (*vars
, i
, var
)
1552 (*vars
)[i
] = make_ssa_name (var
);
1556 init
= force_gimple_operand (init
, &stmts
, written
, NULL_TREE
);
1558 gsi_insert_seq_on_edge_immediate (entry
, stmts
);
1563 phi
= create_phi_node (var
, loop
->header
);
1564 add_phi_arg (phi
, init
, entry
, UNKNOWN_LOCATION
);
1565 add_phi_arg (phi
, next
, latch
, UNKNOWN_LOCATION
);
1569 gassign
*init_stmt
= gimple_build_assign (var
, init
);
1570 gsi_insert_on_edge_immediate (entry
, init_stmt
);
1575 /* Execute load motion for references in chain CHAIN. Uids of the newly
1576 created temporary variables are marked in TMP_VARS. */
1579 execute_load_motion (struct loop
*loop
, chain_p chain
, bitmap tmp_vars
)
1581 auto_vec
<tree
> vars
;
1583 unsigned n_writes
= 0, ridx
, i
;
1586 gcc_assert (chain
->type
== CT_INVARIANT
);
1587 gcc_assert (!chain
->combined
);
1588 FOR_EACH_VEC_ELT (chain
->refs
, i
, a
)
1589 if (DR_IS_WRITE (a
->ref
))
1592 /* If there are no reads in the loop, there is nothing to do. */
1593 if (n_writes
== chain
->refs
.length ())
1596 initialize_root_vars_lm (loop
, get_chain_root (chain
), n_writes
> 0,
1597 &vars
, chain
->inits
, tmp_vars
);
1600 FOR_EACH_VEC_ELT (chain
->refs
, i
, a
)
1602 bool is_read
= DR_IS_READ (a
->ref
);
1604 if (DR_IS_WRITE (a
->ref
))
1610 var
= make_ssa_name (SSA_NAME_VAR (var
));
1617 replace_ref_with (a
->stmt
, vars
[ridx
],
1618 !is_read
, !is_read
);
1622 /* Returns the single statement in that NAME is used, excepting
1623 the looparound phi nodes contained in one of the chains. If there is no
1624 such statement, or more statements, NULL is returned. */
1627 single_nonlooparound_use (tree name
)
1630 imm_use_iterator it
;
1631 gimple stmt
, ret
= NULL
;
1633 FOR_EACH_IMM_USE_FAST (use
, it
, name
)
1635 stmt
= USE_STMT (use
);
1637 if (gimple_code (stmt
) == GIMPLE_PHI
)
1639 /* Ignore uses in looparound phi nodes. Uses in other phi nodes
1640 could not be processed anyway, so just fail for them. */
1641 if (bitmap_bit_p (looparound_phis
,
1642 SSA_NAME_VERSION (PHI_RESULT (stmt
))))
1647 else if (is_gimple_debug (stmt
))
1649 else if (ret
!= NULL
)
1658 /* Remove statement STMT, as well as the chain of assignments in that it is
1662 remove_stmt (gimple stmt
)
1666 gimple_stmt_iterator psi
;
1668 if (gimple_code (stmt
) == GIMPLE_PHI
)
1670 name
= PHI_RESULT (stmt
);
1671 next
= single_nonlooparound_use (name
);
1672 reset_debug_uses (stmt
);
1673 psi
= gsi_for_stmt (stmt
);
1674 remove_phi_node (&psi
, true);
1677 || !gimple_assign_ssa_name_copy_p (next
)
1678 || gimple_assign_rhs1 (next
) != name
)
1686 gimple_stmt_iterator bsi
;
1688 bsi
= gsi_for_stmt (stmt
);
1690 name
= gimple_assign_lhs (stmt
);
1691 gcc_assert (TREE_CODE (name
) == SSA_NAME
);
1693 next
= single_nonlooparound_use (name
);
1694 reset_debug_uses (stmt
);
1696 unlink_stmt_vdef (stmt
);
1697 gsi_remove (&bsi
, true);
1698 release_defs (stmt
);
1701 || !gimple_assign_ssa_name_copy_p (next
)
1702 || gimple_assign_rhs1 (next
) != name
)
1709 /* Perform the predictive commoning optimization for a chain CHAIN.
1710 Uids of the newly created temporary variables are marked in TMP_VARS.*/
1713 execute_pred_commoning_chain (struct loop
*loop
, chain_p chain
,
1720 if (chain
->combined
)
1722 /* For combined chains, just remove the statements that are used to
1723 compute the values of the expression (except for the root one).
1724 We delay this until after all chains are processed. */
1728 /* For non-combined chains, set up the variables that hold its value,
1729 and replace the uses of the original references by these
1731 initialize_root (loop
, chain
, tmp_vars
);
1732 for (i
= 1; chain
->refs
.iterate (i
, &a
); i
++)
1734 var
= chain
->vars
[chain
->length
- a
->distance
];
1735 replace_ref_with (a
->stmt
, var
, false, false);
1740 /* Determines the unroll factor necessary to remove as many temporary variable
1741 copies as possible. CHAINS is the list of chains that will be
1745 determine_unroll_factor (vec
<chain_p
> chains
)
1748 unsigned factor
= 1, af
, nfactor
, i
;
1749 unsigned max
= PARAM_VALUE (PARAM_MAX_UNROLL_TIMES
);
1751 FOR_EACH_VEC_ELT (chains
, i
, chain
)
1753 if (chain
->type
== CT_INVARIANT
)
1756 if (chain
->combined
)
1758 /* For combined chains, we can't handle unrolling if we replace
1762 for (j
= 1; chain
->refs
.iterate (j
, &a
); j
++)
1763 if (gimple_code (a
->stmt
) == GIMPLE_PHI
)
1768 /* The best unroll factor for this chain is equal to the number of
1769 temporary variables that we create for it. */
1771 if (chain
->has_max_use_after
)
1774 nfactor
= factor
* af
/ gcd (factor
, af
);
1782 /* Perform the predictive commoning optimization for CHAINS.
1783 Uids of the newly created temporary variables are marked in TMP_VARS. */
1786 execute_pred_commoning (struct loop
*loop
, vec
<chain_p
> chains
,
1792 FOR_EACH_VEC_ELT (chains
, i
, chain
)
1794 if (chain
->type
== CT_INVARIANT
)
1795 execute_load_motion (loop
, chain
, tmp_vars
);
1797 execute_pred_commoning_chain (loop
, chain
, tmp_vars
);
1800 FOR_EACH_VEC_ELT (chains
, i
, chain
)
1802 if (chain
->type
== CT_INVARIANT
)
1804 else if (chain
->combined
)
1806 /* For combined chains, just remove the statements that are used to
1807 compute the values of the expression (except for the root one). */
1810 for (j
= 1; chain
->refs
.iterate (j
, &a
); j
++)
1811 remove_stmt (a
->stmt
);
1815 update_ssa (TODO_update_ssa_only_virtuals
);
1818 /* For each reference in CHAINS, if its defining statement is
1819 phi node, record the ssa name that is defined by it. */
1822 replace_phis_by_defined_names (vec
<chain_p
> chains
)
1828 FOR_EACH_VEC_ELT (chains
, i
, chain
)
1829 FOR_EACH_VEC_ELT (chain
->refs
, j
, a
)
1831 if (gimple_code (a
->stmt
) == GIMPLE_PHI
)
1833 a
->name_defined_by_phi
= PHI_RESULT (a
->stmt
);
1839 /* For each reference in CHAINS, if name_defined_by_phi is not
1840 NULL, use it to set the stmt field. */
1843 replace_names_by_phis (vec
<chain_p
> chains
)
1849 FOR_EACH_VEC_ELT (chains
, i
, chain
)
1850 FOR_EACH_VEC_ELT (chain
->refs
, j
, a
)
1851 if (a
->stmt
== NULL
)
1853 a
->stmt
= SSA_NAME_DEF_STMT (a
->name_defined_by_phi
);
1854 gcc_assert (gimple_code (a
->stmt
) == GIMPLE_PHI
);
1855 a
->name_defined_by_phi
= NULL_TREE
;
1859 /* Wrapper over execute_pred_commoning, to pass it as a callback
1860 to tree_transform_and_unroll_loop. */
1864 vec
<chain_p
> chains
;
1869 execute_pred_commoning_cbck (struct loop
*loop
, void *data
)
1871 struct epcc_data
*const dta
= (struct epcc_data
*) data
;
1873 /* Restore phi nodes that were replaced by ssa names before
1874 tree_transform_and_unroll_loop (see detailed description in
1875 tree_predictive_commoning_loop). */
1876 replace_names_by_phis (dta
->chains
);
1877 execute_pred_commoning (loop
, dta
->chains
, dta
->tmp_vars
);
1880 /* Base NAME and all the names in the chain of phi nodes that use it
1881 on variable VAR. The phi nodes are recognized by being in the copies of
1882 the header of the LOOP. */
1885 base_names_in_chain_on (struct loop
*loop
, tree name
, tree var
)
1888 imm_use_iterator iter
;
1890 replace_ssa_name_symbol (name
, var
);
1895 FOR_EACH_IMM_USE_STMT (stmt
, iter
, name
)
1897 if (gimple_code (stmt
) == GIMPLE_PHI
1898 && flow_bb_inside_loop_p (loop
, gimple_bb (stmt
)))
1901 BREAK_FROM_IMM_USE_STMT (iter
);
1907 name
= PHI_RESULT (phi
);
1908 replace_ssa_name_symbol (name
, var
);
1912 /* Given an unrolled LOOP after predictive commoning, remove the
1913 register copies arising from phi nodes by changing the base
1914 variables of SSA names. TMP_VARS is the set of the temporary variables
1915 for those we want to perform this. */
1918 eliminate_temp_copies (struct loop
*loop
, bitmap tmp_vars
)
1923 tree name
, use
, var
;
1926 e
= loop_latch_edge (loop
);
1927 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
1930 name
= PHI_RESULT (phi
);
1931 var
= SSA_NAME_VAR (name
);
1932 if (!var
|| !bitmap_bit_p (tmp_vars
, DECL_UID (var
)))
1934 use
= PHI_ARG_DEF_FROM_EDGE (phi
, e
);
1935 gcc_assert (TREE_CODE (use
) == SSA_NAME
);
1937 /* Base all the ssa names in the ud and du chain of NAME on VAR. */
1938 stmt
= SSA_NAME_DEF_STMT (use
);
1939 while (gimple_code (stmt
) == GIMPLE_PHI
1940 /* In case we could not unroll the loop enough to eliminate
1941 all copies, we may reach the loop header before the defining
1942 statement (in that case, some register copies will be present
1943 in loop latch in the final code, corresponding to the newly
1944 created looparound phi nodes). */
1945 && gimple_bb (stmt
) != loop
->header
)
1947 gcc_assert (single_pred_p (gimple_bb (stmt
)));
1948 use
= PHI_ARG_DEF (stmt
, 0);
1949 stmt
= SSA_NAME_DEF_STMT (use
);
1952 base_names_in_chain_on (loop
, use
, var
);
1956 /* Returns true if CHAIN is suitable to be combined. */
1959 chain_can_be_combined_p (chain_p chain
)
1961 return (!chain
->combined
1962 && (chain
->type
== CT_LOAD
|| chain
->type
== CT_COMBINATION
));
1965 /* Returns the modify statement that uses NAME. Skips over assignment
1966 statements, NAME is replaced with the actual name used in the returned
1970 find_use_stmt (tree
*name
)
1975 /* Skip over assignments. */
1978 stmt
= single_nonlooparound_use (*name
);
1982 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
1985 lhs
= gimple_assign_lhs (stmt
);
1986 if (TREE_CODE (lhs
) != SSA_NAME
)
1989 if (gimple_assign_copy_p (stmt
))
1991 rhs
= gimple_assign_rhs1 (stmt
);
1997 else if (get_gimple_rhs_class (gimple_assign_rhs_code (stmt
))
1998 == GIMPLE_BINARY_RHS
)
2005 /* Returns true if we may perform reassociation for operation CODE in TYPE. */
2008 may_reassociate_p (tree type
, enum tree_code code
)
2010 if (FLOAT_TYPE_P (type
)
2011 && !flag_unsafe_math_optimizations
)
2014 return (commutative_tree_code (code
)
2015 && associative_tree_code (code
));
2018 /* If the operation used in STMT is associative and commutative, go through the
2019 tree of the same operations and returns its root. Distance to the root
2020 is stored in DISTANCE. */
2023 find_associative_operation_root (gimple stmt
, unsigned *distance
)
2027 enum tree_code code
= gimple_assign_rhs_code (stmt
);
2028 tree type
= TREE_TYPE (gimple_assign_lhs (stmt
));
2031 if (!may_reassociate_p (type
, code
))
2036 lhs
= gimple_assign_lhs (stmt
);
2037 gcc_assert (TREE_CODE (lhs
) == SSA_NAME
);
2039 next
= find_use_stmt (&lhs
);
2041 || gimple_assign_rhs_code (next
) != code
)
2053 /* Returns the common statement in that NAME1 and NAME2 have a use. If there
2054 is no such statement, returns NULL_TREE. In case the operation used on
2055 NAME1 and NAME2 is associative and commutative, returns the root of the
2056 tree formed by this operation instead of the statement that uses NAME1 or
2060 find_common_use_stmt (tree
*name1
, tree
*name2
)
2062 gimple stmt1
, stmt2
;
2064 stmt1
= find_use_stmt (name1
);
2068 stmt2
= find_use_stmt (name2
);
2075 stmt1
= find_associative_operation_root (stmt1
, NULL
);
2078 stmt2
= find_associative_operation_root (stmt2
, NULL
);
2082 return (stmt1
== stmt2
? stmt1
: NULL
);
2085 /* Checks whether R1 and R2 are combined together using CODE, with the result
2086 in RSLT_TYPE, in order R1 CODE R2 if SWAP is false and in order R2 CODE R1
2087 if it is true. If CODE is ERROR_MARK, set these values instead. */
2090 combinable_refs_p (dref r1
, dref r2
,
2091 enum tree_code
*code
, bool *swap
, tree
*rslt_type
)
2093 enum tree_code acode
;
2099 name1
= name_for_ref (r1
);
2100 name2
= name_for_ref (r2
);
2101 gcc_assert (name1
!= NULL_TREE
&& name2
!= NULL_TREE
);
2103 stmt
= find_common_use_stmt (&name1
, &name2
);
2106 /* A simple post-dominance check - make sure the combination
2107 is executed under the same condition as the references. */
2108 || (gimple_bb (stmt
) != gimple_bb (r1
->stmt
)
2109 && gimple_bb (stmt
) != gimple_bb (r2
->stmt
)))
2112 acode
= gimple_assign_rhs_code (stmt
);
2113 aswap
= (!commutative_tree_code (acode
)
2114 && gimple_assign_rhs1 (stmt
) != name1
);
2115 atype
= TREE_TYPE (gimple_assign_lhs (stmt
));
2117 if (*code
== ERROR_MARK
)
2125 return (*code
== acode
2127 && *rslt_type
== atype
);
2130 /* Remove OP from the operation on rhs of STMT, and replace STMT with
2131 an assignment of the remaining operand. */
2134 remove_name_from_operation (gimple stmt
, tree op
)
2137 gimple_stmt_iterator si
;
2139 gcc_assert (is_gimple_assign (stmt
));
2141 if (gimple_assign_rhs1 (stmt
) == op
)
2142 other_op
= gimple_assign_rhs2 (stmt
);
2144 other_op
= gimple_assign_rhs1 (stmt
);
2146 si
= gsi_for_stmt (stmt
);
2147 gimple_assign_set_rhs_from_tree (&si
, other_op
);
2149 /* We should not have reallocated STMT. */
2150 gcc_assert (gsi_stmt (si
) == stmt
);
2155 /* Reassociates the expression in that NAME1 and NAME2 are used so that they
2156 are combined in a single statement, and returns this statement. */
2159 reassociate_to_the_same_stmt (tree name1
, tree name2
)
2161 gimple stmt1
, stmt2
, root1
, root2
, s1
, s2
;
2162 gassign
*new_stmt
, *tmp_stmt
;
2163 tree new_name
, tmp_name
, var
, r1
, r2
;
2164 unsigned dist1
, dist2
;
2165 enum tree_code code
;
2166 tree type
= TREE_TYPE (name1
);
2167 gimple_stmt_iterator bsi
;
2169 stmt1
= find_use_stmt (&name1
);
2170 stmt2
= find_use_stmt (&name2
);
2171 root1
= find_associative_operation_root (stmt1
, &dist1
);
2172 root2
= find_associative_operation_root (stmt2
, &dist2
);
2173 code
= gimple_assign_rhs_code (stmt1
);
2175 gcc_assert (root1
&& root2
&& root1
== root2
2176 && code
== gimple_assign_rhs_code (stmt2
));
2178 /* Find the root of the nearest expression in that both NAME1 and NAME2
2185 while (dist1
> dist2
)
2187 s1
= find_use_stmt (&r1
);
2188 r1
= gimple_assign_lhs (s1
);
2191 while (dist2
> dist1
)
2193 s2
= find_use_stmt (&r2
);
2194 r2
= gimple_assign_lhs (s2
);
2200 s1
= find_use_stmt (&r1
);
2201 r1
= gimple_assign_lhs (s1
);
2202 s2
= find_use_stmt (&r2
);
2203 r2
= gimple_assign_lhs (s2
);
2206 /* Remove NAME1 and NAME2 from the statements in that they are used
2208 remove_name_from_operation (stmt1
, name1
);
2209 remove_name_from_operation (stmt2
, name2
);
2211 /* Insert the new statement combining NAME1 and NAME2 before S1, and
2212 combine it with the rhs of S1. */
2213 var
= create_tmp_reg (type
, "predreastmp");
2214 new_name
= make_ssa_name (var
);
2215 new_stmt
= gimple_build_assign (new_name
, code
, name1
, name2
);
2217 var
= create_tmp_reg (type
, "predreastmp");
2218 tmp_name
= make_ssa_name (var
);
2220 /* Rhs of S1 may now be either a binary expression with operation
2221 CODE, or gimple_val (in case that stmt1 == s1 or stmt2 == s1,
2222 so that name1 or name2 was removed from it). */
2223 tmp_stmt
= gimple_build_assign (tmp_name
, gimple_assign_rhs_code (s1
),
2224 gimple_assign_rhs1 (s1
),
2225 gimple_assign_rhs2 (s1
));
2227 bsi
= gsi_for_stmt (s1
);
2228 gimple_assign_set_rhs_with_ops (&bsi
, code
, new_name
, tmp_name
);
2229 s1
= gsi_stmt (bsi
);
2232 gsi_insert_before (&bsi
, new_stmt
, GSI_SAME_STMT
);
2233 gsi_insert_before (&bsi
, tmp_stmt
, GSI_SAME_STMT
);
2238 /* Returns the statement that combines references R1 and R2. In case R1
2239 and R2 are not used in the same statement, but they are used with an
2240 associative and commutative operation in the same expression, reassociate
2241 the expression so that they are used in the same statement. */
2244 stmt_combining_refs (dref r1
, dref r2
)
2246 gimple stmt1
, stmt2
;
2247 tree name1
= name_for_ref (r1
);
2248 tree name2
= name_for_ref (r2
);
2250 stmt1
= find_use_stmt (&name1
);
2251 stmt2
= find_use_stmt (&name2
);
2255 return reassociate_to_the_same_stmt (name1
, name2
);
2258 /* Tries to combine chains CH1 and CH2 together. If this succeeds, the
2259 description of the new chain is returned, otherwise we return NULL. */
2262 combine_chains (chain_p ch1
, chain_p ch2
)
2265 enum tree_code op
= ERROR_MARK
;
2270 tree rslt_type
= NULL_TREE
;
2274 if (ch1
->length
!= ch2
->length
)
2277 if (ch1
->refs
.length () != ch2
->refs
.length ())
2280 for (i
= 0; (ch1
->refs
.iterate (i
, &r1
)
2281 && ch2
->refs
.iterate (i
, &r2
)); i
++)
2283 if (r1
->distance
!= r2
->distance
)
2286 if (!combinable_refs_p (r1
, r2
, &op
, &swap
, &rslt_type
))
2291 std::swap (ch1
, ch2
);
2293 new_chain
= XCNEW (struct chain
);
2294 new_chain
->type
= CT_COMBINATION
;
2296 new_chain
->ch1
= ch1
;
2297 new_chain
->ch2
= ch2
;
2298 new_chain
->rslt_type
= rslt_type
;
2299 new_chain
->length
= ch1
->length
;
2301 for (i
= 0; (ch1
->refs
.iterate (i
, &r1
)
2302 && ch2
->refs
.iterate (i
, &r2
)); i
++)
2304 nw
= XCNEW (struct dref_d
);
2305 nw
->stmt
= stmt_combining_refs (r1
, r2
);
2306 nw
->distance
= r1
->distance
;
2308 new_chain
->refs
.safe_push (nw
);
2311 new_chain
->has_max_use_after
= false;
2312 root_stmt
= get_chain_root (new_chain
)->stmt
;
2313 for (i
= 1; new_chain
->refs
.iterate (i
, &nw
); i
++)
2315 if (nw
->distance
== new_chain
->length
2316 && !stmt_dominates_stmt_p (nw
->stmt
, root_stmt
))
2318 new_chain
->has_max_use_after
= true;
2323 ch1
->combined
= true;
2324 ch2
->combined
= true;
2328 /* Try to combine the CHAINS. */
2331 try_combine_chains (vec
<chain_p
> *chains
)
2334 chain_p ch1
, ch2
, cch
;
2335 auto_vec
<chain_p
> worklist
;
2337 FOR_EACH_VEC_ELT (*chains
, i
, ch1
)
2338 if (chain_can_be_combined_p (ch1
))
2339 worklist
.safe_push (ch1
);
2341 while (!worklist
.is_empty ())
2343 ch1
= worklist
.pop ();
2344 if (!chain_can_be_combined_p (ch1
))
2347 FOR_EACH_VEC_ELT (*chains
, j
, ch2
)
2349 if (!chain_can_be_combined_p (ch2
))
2352 cch
= combine_chains (ch1
, ch2
);
2355 worklist
.safe_push (cch
);
2356 chains
->safe_push (cch
);
2363 /* Prepare initializers for CHAIN in LOOP. Returns false if this is
2364 impossible because one of these initializers may trap, true otherwise. */
2367 prepare_initializers_chain (struct loop
*loop
, chain_p chain
)
2369 unsigned i
, n
= (chain
->type
== CT_INVARIANT
) ? 1 : chain
->length
;
2370 struct data_reference
*dr
= get_chain_root (chain
)->ref
;
2373 edge entry
= loop_preheader_edge (loop
);
2375 /* Find the initializers for the variables, and check that they cannot
2377 chain
->inits
.create (n
);
2378 for (i
= 0; i
< n
; i
++)
2379 chain
->inits
.quick_push (NULL_TREE
);
2381 /* If we have replaced some looparound phi nodes, use their initializers
2382 instead of creating our own. */
2383 FOR_EACH_VEC_ELT (chain
->refs
, i
, laref
)
2385 if (gimple_code (laref
->stmt
) != GIMPLE_PHI
)
2388 gcc_assert (laref
->distance
> 0);
2389 chain
->inits
[n
- laref
->distance
]
2390 = PHI_ARG_DEF_FROM_EDGE (laref
->stmt
, entry
);
2393 for (i
= 0; i
< n
; i
++)
2395 gimple_seq stmts
= NULL
;
2397 if (chain
->inits
[i
] != NULL_TREE
)
2400 init
= ref_at_iteration (dr
, (int) i
- n
, &stmts
);
2401 if (!chain
->all_always_accessed
&& tree_could_trap_p (init
))
2403 gimple_seq_discard (stmts
);
2408 gsi_insert_seq_on_edge_immediate (entry
, stmts
);
2410 chain
->inits
[i
] = init
;
2416 /* Prepare initializers for CHAINS in LOOP, and free chains that cannot
2417 be used because the initializers might trap. */
2420 prepare_initializers (struct loop
*loop
, vec
<chain_p
> chains
)
2425 for (i
= 0; i
< chains
.length (); )
2428 if (prepare_initializers_chain (loop
, chain
))
2432 release_chain (chain
);
2433 chains
.unordered_remove (i
);
2438 /* Performs predictive commoning for LOOP. Returns true if LOOP was
2442 tree_predictive_commoning_loop (struct loop
*loop
)
2444 vec
<data_reference_p
> datarefs
;
2445 vec
<ddr_p
> dependences
;
2446 struct component
*components
;
2447 vec
<chain_p
> chains
= vNULL
;
2448 unsigned unroll_factor
;
2449 struct tree_niter_desc desc
;
2450 bool unroll
= false;
2454 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2455 fprintf (dump_file
, "Processing loop %d\n", loop
->num
);
2457 /* Find the data references and split them into components according to their
2458 dependence relations. */
2459 auto_vec
<loop_p
, 3> loop_nest
;
2460 dependences
.create (10);
2461 datarefs
.create (10);
2462 if (! compute_data_dependences_for_loop (loop
, true, &loop_nest
, &datarefs
,
2465 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2466 fprintf (dump_file
, "Cannot analyze data dependencies\n");
2467 free_data_refs (datarefs
);
2468 free_dependence_relations (dependences
);
2472 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2473 dump_data_dependence_relations (dump_file
, dependences
);
2475 components
= split_data_refs_to_components (loop
, datarefs
, dependences
);
2476 loop_nest
.release ();
2477 free_dependence_relations (dependences
);
2480 free_data_refs (datarefs
);
2481 free_affine_expand_cache (&name_expansions
);
2485 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2487 fprintf (dump_file
, "Initial state:\n\n");
2488 dump_components (dump_file
, components
);
2491 /* Find the suitable components and split them into chains. */
2492 components
= filter_suitable_components (loop
, components
);
2494 tmp_vars
= BITMAP_ALLOC (NULL
);
2495 looparound_phis
= BITMAP_ALLOC (NULL
);
2496 determine_roots (loop
, components
, &chains
);
2497 release_components (components
);
2499 if (!chains
.exists ())
2501 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2503 "Predictive commoning failed: no suitable chains\n");
2506 prepare_initializers (loop
, chains
);
2508 /* Try to combine the chains that are always worked with together. */
2509 try_combine_chains (&chains
);
2511 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2513 fprintf (dump_file
, "Before commoning:\n\n");
2514 dump_chains (dump_file
, chains
);
2517 /* Determine the unroll factor, and if the loop should be unrolled, ensure
2518 that its number of iterations is divisible by the factor. */
2519 unroll_factor
= determine_unroll_factor (chains
);
2521 unroll
= (unroll_factor
> 1
2522 && can_unroll_loop_p (loop
, unroll_factor
, &desc
));
2523 exit
= single_dom_exit (loop
);
2525 /* Execute the predictive commoning transformations, and possibly unroll the
2529 struct epcc_data dta
;
2531 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2532 fprintf (dump_file
, "Unrolling %u times.\n", unroll_factor
);
2534 dta
.chains
= chains
;
2535 dta
.tmp_vars
= tmp_vars
;
2537 update_ssa (TODO_update_ssa_only_virtuals
);
2539 /* Cfg manipulations performed in tree_transform_and_unroll_loop before
2540 execute_pred_commoning_cbck is called may cause phi nodes to be
2541 reallocated, which is a problem since CHAINS may point to these
2542 statements. To fix this, we store the ssa names defined by the
2543 phi nodes here instead of the phi nodes themselves, and restore
2544 the phi nodes in execute_pred_commoning_cbck. A bit hacky. */
2545 replace_phis_by_defined_names (chains
);
2547 tree_transform_and_unroll_loop (loop
, unroll_factor
, exit
, &desc
,
2548 execute_pred_commoning_cbck
, &dta
);
2549 eliminate_temp_copies (loop
, tmp_vars
);
2553 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2555 "Executing predictive commoning without unrolling.\n");
2556 execute_pred_commoning (loop
, chains
, tmp_vars
);
2560 release_chains (chains
);
2561 free_data_refs (datarefs
);
2562 BITMAP_FREE (tmp_vars
);
2563 BITMAP_FREE (looparound_phis
);
2565 free_affine_expand_cache (&name_expansions
);
2570 /* Runs predictive commoning. */
2573 tree_predictive_commoning (void)
2575 bool unrolled
= false;
2579 initialize_original_copy_tables ();
2580 FOR_EACH_LOOP (loop
, LI_ONLY_INNERMOST
)
2581 if (optimize_loop_for_speed_p (loop
))
2583 unrolled
|= tree_predictive_commoning_loop (loop
);
2589 ret
= TODO_cleanup_cfg
;
2591 free_original_copy_tables ();
2596 /* Predictive commoning Pass. */
2599 run_tree_predictive_commoning (struct function
*fun
)
2601 if (number_of_loops (fun
) <= 1)
2604 return tree_predictive_commoning ();
2609 const pass_data pass_data_predcom
=
2611 GIMPLE_PASS
, /* type */
2613 OPTGROUP_LOOP
, /* optinfo_flags */
2614 TV_PREDCOM
, /* tv_id */
2615 PROP_cfg
, /* properties_required */
2616 0, /* properties_provided */
2617 0, /* properties_destroyed */
2618 0, /* todo_flags_start */
2619 TODO_update_ssa_only_virtuals
, /* todo_flags_finish */
2622 class pass_predcom
: public gimple_opt_pass
2625 pass_predcom (gcc::context
*ctxt
)
2626 : gimple_opt_pass (pass_data_predcom
, ctxt
)
2629 /* opt_pass methods: */
2630 virtual bool gate (function
*) { return flag_predictive_commoning
!= 0; }
2631 virtual unsigned int execute (function
*fun
)
2633 return run_tree_predictive_commoning (fun
);
2636 }; // class pass_predcom
2641 make_pass_predcom (gcc::context
*ctxt
)
2643 return new pass_predcom (ctxt
);