1 /* Predictive commoning.
2 Copyright (C) 2005-2021 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This file implements the predictive commoning optimization. Predictive
21 commoning can be viewed as CSE around a loop, and with some improvements,
22 as generalized strength reduction-- i.e., reusing values computed in
23 earlier iterations of a loop in the later ones. So far, the pass only
24 handles the most useful case, that is, reusing values of memory references.
25 If you think this is all just a special case of PRE, you are sort of right;
26 however, concentrating on loops is simpler, and makes it possible to
27 incorporate data dependence analysis to detect the opportunities, perform
28 loop unrolling to avoid copies together with renaming immediately,
29 and if needed, we could also take register pressure into account.
31 Let us demonstrate what is done on an example:
33 for (i = 0; i < 100; i++)
35 a[i+2] = a[i] + a[i+1];
41 1) We find data references in the loop, and split them to mutually
42 independent groups (i.e., we find components of a data dependence
43 graph). We ignore read-read dependences whose distance is not constant.
44 (TODO -- we could also ignore antidependences). In this example, we
45 find the following groups:
47 a[i]{read}, a[i+1]{read}, a[i+2]{write}
48 b[10]{read}, b[10]{write}
49 c[99 - i]{read}, c[i]{write}
50 d[i + 1]{read}, d[i]{write}
52 2) Inside each of the group, we verify several conditions:
53 a) all the references must differ in indices only, and the indices
54 must all have the same step
55 b) the references must dominate loop latch (and thus, they must be
56 ordered by dominance relation).
57 c) the distance of the indices must be a small multiple of the step
58 We are then able to compute the difference of the references (# of
59 iterations before they point to the same place as the first of them).
60 Also, in case there are writes in the loop, we split the groups into
61 chains whose head is the write whose values are used by the reads in
62 the same chain. The chains are then processed independently,
63 making the further transformations simpler. Also, the shorter chains
64 need the same number of registers, but may require lower unrolling
65 factor in order to get rid of the copies on the loop latch.
67 In our example, we get the following chains (the chain for c is invalid).
69 a[i]{read,+0}, a[i+1]{read,-1}, a[i+2]{write,-2}
70 b[10]{read,+0}, b[10]{write,+0}
71 d[i + 1]{read,+0}, d[i]{write,+1}
73 3) For each read, we determine the read or write whose value it reuses,
74 together with the distance of this reuse. I.e. we take the last
75 reference before it with distance 0, or the last of the references
76 with the smallest positive distance to the read. Then, we remove
77 the references that are not used in any of these chains, discard the
78 empty groups, and propagate all the links so that they point to the
79 single root reference of the chain (adjusting their distance
80 appropriately). Some extra care needs to be taken for references with
81 step 0. In our example (the numbers indicate the distance of the
84 a[i] --> (*) 2, a[i+1] --> (*) 1, a[i+2] (*)
85 b[10] --> (*) 1, b[10] (*)
87 4) The chains are combined together if possible. If the corresponding
88 elements of two chains are always combined together with the same
89 operator, we remember just the result of this combination, instead
90 of remembering the values separately. We may need to perform
91 reassociation to enable combining, for example
93 e[i] + f[i+1] + e[i+1] + f[i]
95 can be reassociated as
97 (e[i] + f[i]) + (e[i+1] + f[i+1])
99 and we can combine the chains for e and f into one chain.
101 5) For each root reference (end of the chain) R, let N be maximum distance
102 of a reference reusing its value. Variables R0 up to RN are created,
103 together with phi nodes that transfer values from R1 .. RN to
105 Initial values are loaded to R0..R(N-1) (in case not all references
106 must necessarily be accessed and they may trap, we may fail here;
107 TODO sometimes, the loads could be guarded by a check for the number
108 of iterations). Values loaded/stored in roots are also copied to
109 RN. Other reads are replaced with the appropriate variable Ri.
110 Everything is put to SSA form.
112 As a small improvement, if R0 is dead after the root (i.e., all uses of
113 the value with the maximum distance dominate the root), we can avoid
114 creating RN and use R0 instead of it.
116 In our example, we get (only the parts concerning a and b are shown):
117 for (i = 0; i < 100; i++)
129 6) Factor F for unrolling is determined as the smallest common multiple of
130 (N + 1) for each root reference (N for references for that we avoided
131 creating RN). If F and the loop is small enough, loop is unrolled F
132 times. The stores to RN (R0) in the copies of the loop body are
133 periodically replaced with R0, R1, ... (R1, R2, ...), so that they can
134 be coalesced and the copies can be eliminated.
136 TODO -- copy propagation and other optimizations may change the live
137 ranges of the temporary registers and prevent them from being coalesced;
138 this may increase the register pressure.
140 In our case, F = 2 and the (main loop of the) result is
142 for (i = 0; i < ...; i += 2)
159 Apart from predictive commoning on Load-Load and Store-Load chains, we
160 also support Store-Store chains -- stores killed by other store can be
161 eliminated. Given below example:
163 for (i = 0; i < n; i++)
169 It can be replaced with:
173 for (i = 0; i < n; i++)
183 If the loop runs more than 1 iterations, it can be further simplified into:
185 for (i = 0; i < n; i++)
192 The interesting part is this can be viewed either as general store motion
193 or general dead store elimination in either intra/inter-iterations way.
195 With trivial effort, we also support load inside Store-Store chains if the
196 load is dominated by a store statement in the same iteration of loop. You
197 can see this as a restricted Store-Mixed-Load-Store chain.
199 TODO: For now, we don't support store-store chains in multi-exit loops. We
200 force to not unroll in case of store-store chain even if other chains might
203 Predictive commoning can be generalized for arbitrary computations (not
204 just memory loads), and also nontrivial transfer functions (e.g., replacing
205 i * i with ii_last + 2 * i + 1), to generalize strength reduction. */
209 #include "coretypes.h"
215 #include "tree-pass.h"
217 #include "gimple-pretty-print.h"
219 #include "fold-const.h"
222 #include "gimplify.h"
223 #include "gimple-iterator.h"
224 #include "gimplify-me.h"
225 #include "tree-ssa-loop-ivopts.h"
226 #include "tree-ssa-loop-manip.h"
227 #include "tree-ssa-loop-niter.h"
228 #include "tree-ssa-loop.h"
229 #include "tree-into-ssa.h"
230 #include "tree-dfa.h"
231 #include "tree-ssa.h"
232 #include "tree-data-ref.h"
233 #include "tree-scalar-evolution.h"
234 #include "tree-affine.h"
235 #include "builtins.h"
237 /* The maximum number of iterations between the considered memory
240 #define MAX_DISTANCE (target_avail_regs < 16 ? 4 : 8)
242 /* Data references (or phi nodes that carry data reference values across
248 /* The reference itself. */
249 struct data_reference
*ref
;
251 /* The statement in that the reference appears. */
254 /* In case that STMT is a phi node, this field is set to the SSA name
255 defined by it in replace_phis_by_defined_names (in order to avoid
256 pointing to phi node that got reallocated in the meantime). */
257 tree name_defined_by_phi
;
259 /* Distance of the reference from the root of the chain (in number of
260 iterations of the loop). */
263 /* Number of iterations offset from the first reference in the component. */
266 /* Number of the reference in a component, in dominance ordering. */
269 /* True if the memory reference is always accessed when the loop is
271 unsigned always_accessed
: 1;
275 /* Type of the chain of the references. */
279 /* The addresses of the references in the chain are constant. */
282 /* There are only loads in the chain. */
285 /* Root of the chain is store, the rest are loads. */
288 /* There are only stores in the chain. */
291 /* A combination of two chains. */
295 /* Chains of data references. */
299 /* Type of the chain. */
300 enum chain_type type
;
302 /* For combination chains, the operator and the two chains that are
303 combined, and the type of the result. */
306 struct chain
*ch1
, *ch2
;
308 /* The references in the chain. */
311 /* The maximum distance of the reference in the chain from the root. */
314 /* The variables used to copy the value throughout iterations. */
317 /* Initializers for the variables. */
320 /* Finalizers for the eliminated stores. */
323 /* gimple stmts intializing the initial variables of the chain. */
326 /* gimple stmts finalizing the eliminated stores of the chain. */
329 /* True if there is a use of a variable with the maximal distance
330 that comes after the root in the loop. */
331 unsigned has_max_use_after
: 1;
333 /* True if all the memory references in the chain are always accessed. */
334 unsigned all_always_accessed
: 1;
336 /* True if this chain was combined together with some other chain. */
337 unsigned combined
: 1;
339 /* True if this is store elimination chain and eliminated stores store
340 loop invariant value into memory. */
341 unsigned inv_store_elimination
: 1;
345 /* Describes the knowledge about the step of the memory references in
350 /* The step is zero. */
353 /* The step is nonzero. */
356 /* The step may or may not be nonzero. */
360 /* Components of the data dependence graph. */
364 /* The references in the component. */
367 /* What we know about the step of the references in the component. */
368 enum ref_step_type comp_step
;
370 /* True if all references in component are stores and we try to do
371 intra/inter loop iteration dead store elimination. */
372 bool eliminate_store_p
;
374 /* Next component in the list. */
375 struct component
*next
;
378 /* Bitmap of ssa names defined by looparound phi nodes covered by chains. */
380 static bitmap looparound_phis
;
382 /* Cache used by tree_to_aff_combination_expand. */
384 static hash_map
<tree
, name_expansion
*> *name_expansions
;
386 /* Dumps data reference REF to FILE. */
388 extern void dump_dref (FILE *, dref
);
390 dump_dref (FILE *file
, dref ref
)
395 print_generic_expr (file
, DR_REF (ref
->ref
), TDF_SLIM
);
396 fprintf (file
, " (id %u%s)\n", ref
->pos
,
397 DR_IS_READ (ref
->ref
) ? "" : ", write");
399 fprintf (file
, " offset ");
400 print_decs (ref
->offset
, file
);
401 fprintf (file
, "\n");
403 fprintf (file
, " distance %u\n", ref
->distance
);
407 if (gimple_code (ref
->stmt
) == GIMPLE_PHI
)
408 fprintf (file
, " looparound ref\n");
410 fprintf (file
, " combination ref\n");
411 fprintf (file
, " in statement ");
412 print_gimple_stmt (file
, ref
->stmt
, 0, TDF_SLIM
);
413 fprintf (file
, "\n");
414 fprintf (file
, " distance %u\n", ref
->distance
);
419 /* Dumps CHAIN to FILE. */
421 extern void dump_chain (FILE *, chain_p
);
423 dump_chain (FILE *file
, chain_p chain
)
426 const char *chain_type
;
433 chain_type
= "Load motion";
437 chain_type
= "Loads-only";
441 chain_type
= "Store-loads";
445 chain_type
= "Store-stores";
449 chain_type
= "Combination";
456 fprintf (file
, "%s chain %p%s\n", chain_type
, (void *) chain
,
457 chain
->combined
? " (combined)" : "");
458 if (chain
->type
!= CT_INVARIANT
)
459 fprintf (file
, " max distance %u%s\n", chain
->length
,
460 chain
->has_max_use_after
? "" : ", may reuse first");
462 if (chain
->type
== CT_COMBINATION
)
464 fprintf (file
, " equal to %p %s %p in type ",
465 (void *) chain
->ch1
, op_symbol_code (chain
->op
),
466 (void *) chain
->ch2
);
467 print_generic_expr (file
, chain
->rslt_type
, TDF_SLIM
);
468 fprintf (file
, "\n");
471 if (chain
->vars
.exists ())
473 fprintf (file
, " vars");
474 FOR_EACH_VEC_ELT (chain
->vars
, i
, var
)
477 print_generic_expr (file
, var
, TDF_SLIM
);
479 fprintf (file
, "\n");
482 if (chain
->inits
.exists ())
484 fprintf (file
, " inits");
485 FOR_EACH_VEC_ELT (chain
->inits
, i
, var
)
488 print_generic_expr (file
, var
, TDF_SLIM
);
490 fprintf (file
, "\n");
493 fprintf (file
, " references:\n");
494 FOR_EACH_VEC_ELT (chain
->refs
, i
, a
)
497 fprintf (file
, "\n");
500 /* Dumps CHAINS to FILE. */
502 extern void dump_chains (FILE *, vec
<chain_p
> );
504 dump_chains (FILE *file
, vec
<chain_p
> chains
)
509 FOR_EACH_VEC_ELT (chains
, i
, chain
)
510 dump_chain (file
, chain
);
513 /* Dumps COMP to FILE. */
515 extern void dump_component (FILE *, struct component
*);
517 dump_component (FILE *file
, struct component
*comp
)
522 fprintf (file
, "Component%s:\n",
523 comp
->comp_step
== RS_INVARIANT
? " (invariant)" : "");
524 FOR_EACH_VEC_ELT (comp
->refs
, i
, a
)
526 fprintf (file
, "\n");
529 /* Dumps COMPS to FILE. */
531 extern void dump_components (FILE *, struct component
*);
533 dump_components (FILE *file
, struct component
*comps
)
535 struct component
*comp
;
537 for (comp
= comps
; comp
; comp
= comp
->next
)
538 dump_component (file
, comp
);
541 /* Frees a chain CHAIN. */
544 release_chain (chain_p chain
)
552 FOR_EACH_VEC_ELT (chain
->refs
, i
, ref
)
555 chain
->refs
.release ();
556 chain
->vars
.release ();
557 chain
->inits
.release ();
559 gimple_seq_discard (chain
->init_seq
);
561 chain
->finis
.release ();
563 gimple_seq_discard (chain
->fini_seq
);
571 release_chains (vec
<chain_p
> chains
)
576 FOR_EACH_VEC_ELT (chains
, i
, chain
)
577 release_chain (chain
);
581 /* Frees a component COMP. */
584 release_component (struct component
*comp
)
586 comp
->refs
.release ();
590 /* Frees list of components COMPS. */
593 release_components (struct component
*comps
)
595 struct component
*act
, *next
;
597 for (act
= comps
; act
; act
= next
)
600 release_component (act
);
604 /* Finds a root of tree given by FATHERS containing A, and performs path
608 component_of (unsigned fathers
[], unsigned a
)
612 for (root
= a
; root
!= fathers
[root
]; root
= fathers
[root
])
615 for (; a
!= root
; a
= n
)
624 /* Join operation for DFU. FATHERS gives the tree, SIZES are sizes of the
625 components, A and B are components to merge. */
628 merge_comps (unsigned fathers
[], unsigned sizes
[], unsigned a
, unsigned b
)
630 unsigned ca
= component_of (fathers
, a
);
631 unsigned cb
= component_of (fathers
, b
);
636 if (sizes
[ca
] < sizes
[cb
])
638 sizes
[cb
] += sizes
[ca
];
643 sizes
[ca
] += sizes
[cb
];
648 /* Returns true if A is a reference that is suitable for predictive commoning
649 in the innermost loop that contains it. REF_STEP is set according to the
650 step of the reference A. */
653 suitable_reference_p (struct data_reference
*a
, enum ref_step_type
*ref_step
)
655 tree ref
= DR_REF (a
), step
= DR_STEP (a
);
658 || TREE_THIS_VOLATILE (ref
)
659 || !is_gimple_reg_type (TREE_TYPE (ref
))
660 || tree_could_throw_p (ref
))
663 if (integer_zerop (step
))
664 *ref_step
= RS_INVARIANT
;
665 else if (integer_nonzerop (step
))
666 *ref_step
= RS_NONZERO
;
673 /* Stores DR_OFFSET (DR) + DR_INIT (DR) to OFFSET. */
676 aff_combination_dr_offset (struct data_reference
*dr
, aff_tree
*offset
)
678 tree type
= TREE_TYPE (DR_OFFSET (dr
));
681 tree_to_aff_combination_expand (DR_OFFSET (dr
), type
, offset
,
683 aff_combination_const (&delta
, type
, wi::to_poly_widest (DR_INIT (dr
)));
684 aff_combination_add (offset
, &delta
);
687 /* Determines number of iterations of the innermost enclosing loop before B
688 refers to exactly the same location as A and stores it to OFF. If A and
689 B do not have the same step, they never meet, or anything else fails,
690 returns false, otherwise returns true. Both A and B are assumed to
691 satisfy suitable_reference_p. */
694 determine_offset (struct data_reference
*a
, struct data_reference
*b
,
695 poly_widest_int
*off
)
697 aff_tree diff
, baseb
, step
;
700 /* Check that both the references access the location in the same type. */
701 typea
= TREE_TYPE (DR_REF (a
));
702 typeb
= TREE_TYPE (DR_REF (b
));
703 if (!useless_type_conversion_p (typeb
, typea
))
706 /* Check whether the base address and the step of both references is the
708 if (!operand_equal_p (DR_STEP (a
), DR_STEP (b
), 0)
709 || !operand_equal_p (DR_BASE_ADDRESS (a
), DR_BASE_ADDRESS (b
), 0))
712 if (integer_zerop (DR_STEP (a
)))
714 /* If the references have loop invariant address, check that they access
715 exactly the same location. */
717 return (operand_equal_p (DR_OFFSET (a
), DR_OFFSET (b
), 0)
718 && operand_equal_p (DR_INIT (a
), DR_INIT (b
), 0));
721 /* Compare the offsets of the addresses, and check whether the difference
722 is a multiple of step. */
723 aff_combination_dr_offset (a
, &diff
);
724 aff_combination_dr_offset (b
, &baseb
);
725 aff_combination_scale (&baseb
, -1);
726 aff_combination_add (&diff
, &baseb
);
728 tree_to_aff_combination_expand (DR_STEP (a
), TREE_TYPE (DR_STEP (a
)),
729 &step
, &name_expansions
);
730 return aff_combination_constant_multiple_p (&diff
, &step
, off
);
733 /* Returns the last basic block in LOOP for that we are sure that
734 it is executed whenever the loop is entered. */
737 last_always_executed_block (class loop
*loop
)
740 auto_vec
<edge
> exits
= get_loop_exit_edges (loop
);
742 basic_block last
= loop
->latch
;
744 FOR_EACH_VEC_ELT (exits
, i
, ex
)
745 last
= nearest_common_dominator (CDI_DOMINATORS
, last
, ex
->src
);
750 /* Splits dependence graph on DATAREFS described by DEPENDS to components. */
752 static struct component
*
753 split_data_refs_to_components (class loop
*loop
,
754 vec
<data_reference_p
> datarefs
,
757 unsigned i
, n
= datarefs
.length ();
758 unsigned ca
, ia
, ib
, bad
;
759 unsigned *comp_father
= XNEWVEC (unsigned, n
+ 1);
760 unsigned *comp_size
= XNEWVEC (unsigned, n
+ 1);
761 struct component
**comps
;
762 struct data_reference
*dr
, *dra
, *drb
;
763 struct data_dependence_relation
*ddr
;
764 struct component
*comp_list
= NULL
, *comp
;
766 /* Don't do store elimination if loop has multiple exit edges. */
767 bool eliminate_store_p
= single_exit (loop
) != NULL
;
768 basic_block last_always_executed
= last_always_executed_block (loop
);
769 auto_bitmap no_store_store_comps
;
771 FOR_EACH_VEC_ELT (datarefs
, i
, dr
)
775 /* A fake reference for call or asm_expr that may clobber memory;
779 /* predcom pass isn't prepared to handle calls with data references. */
780 if (is_gimple_call (DR_STMT (dr
)))
782 dr
->aux
= (void *) (size_t) i
;
787 /* A component reserved for the "bad" data references. */
791 FOR_EACH_VEC_ELT (datarefs
, i
, dr
)
793 enum ref_step_type dummy
;
795 if (!suitable_reference_p (dr
, &dummy
))
797 ia
= (unsigned) (size_t) dr
->aux
;
798 merge_comps (comp_father
, comp_size
, n
, ia
);
802 FOR_EACH_VEC_ELT (depends
, i
, ddr
)
804 poly_widest_int dummy_off
;
806 if (DDR_ARE_DEPENDENT (ddr
) == chrec_known
)
812 /* Don't do store elimination if there is any unknown dependence for
813 any store data reference. */
814 if ((DR_IS_WRITE (dra
) || DR_IS_WRITE (drb
))
815 && (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
816 || DDR_NUM_DIST_VECTS (ddr
) == 0))
817 eliminate_store_p
= false;
819 ia
= component_of (comp_father
, (unsigned) (size_t) dra
->aux
);
820 ib
= component_of (comp_father
, (unsigned) (size_t) drb
->aux
);
824 bad
= component_of (comp_father
, n
);
826 /* If both A and B are reads, we may ignore unsuitable dependences. */
827 if (DR_IS_READ (dra
) && DR_IS_READ (drb
))
829 if (ia
== bad
|| ib
== bad
830 || !determine_offset (dra
, drb
, &dummy_off
))
833 /* If A is read and B write or vice versa and there is unsuitable
834 dependence, instead of merging both components into a component
835 that will certainly not pass suitable_component_p, just put the
836 read into bad component, perhaps at least the write together with
837 all the other data refs in it's component will be optimizable. */
838 else if (DR_IS_READ (dra
) && ib
!= bad
)
842 bitmap_set_bit (no_store_store_comps
, ib
);
845 else if (!determine_offset (dra
, drb
, &dummy_off
))
847 bitmap_set_bit (no_store_store_comps
, ib
);
848 merge_comps (comp_father
, comp_size
, bad
, ia
);
852 else if (DR_IS_READ (drb
) && ia
!= bad
)
856 bitmap_set_bit (no_store_store_comps
, ia
);
859 else if (!determine_offset (dra
, drb
, &dummy_off
))
861 bitmap_set_bit (no_store_store_comps
, ia
);
862 merge_comps (comp_father
, comp_size
, bad
, ib
);
866 else if (DR_IS_WRITE (dra
) && DR_IS_WRITE (drb
)
867 && ia
!= bad
&& ib
!= bad
868 && !determine_offset (dra
, drb
, &dummy_off
))
870 merge_comps (comp_father
, comp_size
, bad
, ia
);
871 merge_comps (comp_father
, comp_size
, bad
, ib
);
875 merge_comps (comp_father
, comp_size
, ia
, ib
);
878 if (eliminate_store_p
)
880 tree niters
= number_of_latch_executions (loop
);
882 /* Don't do store elimination if niters info is unknown because stores
883 in the last iteration can't be eliminated and we need to recover it
885 eliminate_store_p
= (niters
!= NULL_TREE
&& niters
!= chrec_dont_know
);
888 comps
= XCNEWVEC (struct component
*, n
);
889 bad
= component_of (comp_father
, n
);
890 FOR_EACH_VEC_ELT (datarefs
, i
, dr
)
892 ia
= (unsigned) (size_t) dr
->aux
;
893 ca
= component_of (comp_father
, ia
);
900 comp
= XCNEW (struct component
);
901 comp
->refs
.create (comp_size
[ca
]);
902 comp
->eliminate_store_p
= eliminate_store_p
;
906 dataref
= XCNEW (class dref_d
);
908 dataref
->stmt
= DR_STMT (dr
);
910 dataref
->distance
= 0;
912 dataref
->always_accessed
913 = dominated_by_p (CDI_DOMINATORS
, last_always_executed
,
914 gimple_bb (dataref
->stmt
));
915 dataref
->pos
= comp
->refs
.length ();
916 comp
->refs
.quick_push (dataref
);
919 if (eliminate_store_p
)
922 EXECUTE_IF_SET_IN_BITMAP (no_store_store_comps
, 0, ia
, bi
)
924 ca
= component_of (comp_father
, ia
);
926 comps
[ca
]->eliminate_store_p
= false;
930 for (i
= 0; i
< n
; i
++)
935 comp
->next
= comp_list
;
947 /* Returns true if the component COMP satisfies the conditions
948 described in 2) at the beginning of this file. LOOP is the current
952 suitable_component_p (class loop
*loop
, struct component
*comp
)
956 basic_block ba
, bp
= loop
->header
;
957 bool ok
, has_write
= false;
959 FOR_EACH_VEC_ELT (comp
->refs
, i
, a
)
961 ba
= gimple_bb (a
->stmt
);
963 if (!just_once_each_iteration_p (loop
, ba
))
966 gcc_assert (dominated_by_p (CDI_DOMINATORS
, ba
, bp
));
969 if (DR_IS_WRITE (a
->ref
))
973 first
= comp
->refs
[0];
974 ok
= suitable_reference_p (first
->ref
, &comp
->comp_step
);
978 for (i
= 1; comp
->refs
.iterate (i
, &a
); i
++)
980 /* Polynomial offsets are no use, since we need to know the
981 gap between iteration numbers at compile time. */
982 poly_widest_int offset
;
983 if (!determine_offset (first
->ref
, a
->ref
, &offset
)
984 || !offset
.is_constant (&a
->offset
))
987 enum ref_step_type a_step
;
988 gcc_checking_assert (suitable_reference_p (a
->ref
, &a_step
)
989 && a_step
== comp
->comp_step
);
992 /* If there is a write inside the component, we must know whether the
993 step is nonzero or not -- we would not otherwise be able to recognize
994 whether the value accessed by reads comes from the OFFSET-th iteration
995 or the previous one. */
996 if (has_write
&& comp
->comp_step
== RS_ANY
)
1002 /* Check the conditions on references inside each of components COMPS,
1003 and remove the unsuitable components from the list. The new list
1004 of components is returned. The conditions are described in 2) at
1005 the beginning of this file. LOOP is the current loop. */
1007 static struct component
*
1008 filter_suitable_components (class loop
*loop
, struct component
*comps
)
1010 struct component
**comp
, *act
;
1012 for (comp
= &comps
; *comp
; )
1015 if (suitable_component_p (loop
, act
))
1023 FOR_EACH_VEC_ELT (act
->refs
, i
, ref
)
1025 release_component (act
);
1032 /* Compares two drefs A and B by their offset and position. Callback for
1036 order_drefs (const void *a
, const void *b
)
1038 const dref
*const da
= (const dref
*) a
;
1039 const dref
*const db
= (const dref
*) b
;
1040 int offcmp
= wi::cmps ((*da
)->offset
, (*db
)->offset
);
1045 return (*da
)->pos
- (*db
)->pos
;
1048 /* Compares two drefs A and B by their position. Callback for qsort. */
1051 order_drefs_by_pos (const void *a
, const void *b
)
1053 const dref
*const da
= (const dref
*) a
;
1054 const dref
*const db
= (const dref
*) b
;
1056 return (*da
)->pos
- (*db
)->pos
;
1059 /* Returns root of the CHAIN. */
1062 get_chain_root (chain_p chain
)
1064 return chain
->refs
[0];
1067 /* Given CHAIN, returns the last write ref at DISTANCE, or NULL if it doesn't
1071 get_chain_last_write_at (chain_p chain
, unsigned distance
)
1073 for (unsigned i
= chain
->refs
.length (); i
> 0; i
--)
1074 if (DR_IS_WRITE (chain
->refs
[i
- 1]->ref
)
1075 && distance
== chain
->refs
[i
- 1]->distance
)
1076 return chain
->refs
[i
- 1];
1081 /* Given CHAIN, returns the last write ref with the same distance before load
1082 at index LOAD_IDX, or NULL if it doesn't exist. */
1085 get_chain_last_write_before_load (chain_p chain
, unsigned load_idx
)
1087 gcc_assert (load_idx
< chain
->refs
.length ());
1089 unsigned distance
= chain
->refs
[load_idx
]->distance
;
1091 for (unsigned i
= load_idx
; i
> 0; i
--)
1092 if (DR_IS_WRITE (chain
->refs
[i
- 1]->ref
)
1093 && distance
== chain
->refs
[i
- 1]->distance
)
1094 return chain
->refs
[i
- 1];
1099 /* Adds REF to the chain CHAIN. */
1102 add_ref_to_chain (chain_p chain
, dref ref
)
1104 dref root
= get_chain_root (chain
);
1106 gcc_assert (wi::les_p (root
->offset
, ref
->offset
));
1107 widest_int dist
= ref
->offset
- root
->offset
;
1108 gcc_assert (wi::fits_uhwi_p (dist
));
1110 chain
->refs
.safe_push (ref
);
1112 ref
->distance
= dist
.to_uhwi ();
1114 if (ref
->distance
>= chain
->length
)
1116 chain
->length
= ref
->distance
;
1117 chain
->has_max_use_after
= false;
1120 /* Promote this chain to CT_STORE_STORE if it has multiple stores. */
1121 if (DR_IS_WRITE (ref
->ref
))
1122 chain
->type
= CT_STORE_STORE
;
1124 /* Don't set the flag for store-store chain since there is no use. */
1125 if (chain
->type
!= CT_STORE_STORE
1126 && ref
->distance
== chain
->length
1127 && ref
->pos
> root
->pos
)
1128 chain
->has_max_use_after
= true;
1130 chain
->all_always_accessed
&= ref
->always_accessed
;
1133 /* Returns the chain for invariant component COMP. */
1136 make_invariant_chain (struct component
*comp
)
1138 chain_p chain
= XCNEW (struct chain
);
1142 chain
->type
= CT_INVARIANT
;
1144 chain
->all_always_accessed
= true;
1146 FOR_EACH_VEC_ELT (comp
->refs
, i
, ref
)
1148 chain
->refs
.safe_push (ref
);
1149 chain
->all_always_accessed
&= ref
->always_accessed
;
1152 chain
->inits
= vNULL
;
1153 chain
->finis
= vNULL
;
1158 /* Make a new chain of type TYPE rooted at REF. */
1161 make_rooted_chain (dref ref
, enum chain_type type
)
1163 chain_p chain
= XCNEW (struct chain
);
1166 chain
->refs
.safe_push (ref
);
1167 chain
->all_always_accessed
= ref
->always_accessed
;
1170 chain
->inits
= vNULL
;
1171 chain
->finis
= vNULL
;
1176 /* Returns true if CHAIN is not trivial. */
1179 nontrivial_chain_p (chain_p chain
)
1181 return chain
!= NULL
&& chain
->refs
.length () > 1;
1184 /* Returns the ssa name that contains the value of REF, or NULL_TREE if there
1188 name_for_ref (dref ref
)
1192 if (is_gimple_assign (ref
->stmt
))
1194 if (!ref
->ref
|| DR_IS_READ (ref
->ref
))
1195 name
= gimple_assign_lhs (ref
->stmt
);
1197 name
= gimple_assign_rhs1 (ref
->stmt
);
1200 name
= PHI_RESULT (ref
->stmt
);
1202 return (TREE_CODE (name
) == SSA_NAME
? name
: NULL_TREE
);
1205 /* Returns true if REF is a valid initializer for ROOT with given DISTANCE (in
1206 iterations of the innermost enclosing loop). */
1209 valid_initializer_p (struct data_reference
*ref
,
1210 unsigned distance
, struct data_reference
*root
)
1212 aff_tree diff
, base
, step
;
1213 poly_widest_int off
;
1215 /* Both REF and ROOT must be accessing the same object. */
1216 if (!operand_equal_p (DR_BASE_ADDRESS (ref
), DR_BASE_ADDRESS (root
), 0))
1219 /* The initializer is defined outside of loop, hence its address must be
1220 invariant inside the loop. */
1221 gcc_assert (integer_zerop (DR_STEP (ref
)));
1223 /* If the address of the reference is invariant, initializer must access
1224 exactly the same location. */
1225 if (integer_zerop (DR_STEP (root
)))
1226 return (operand_equal_p (DR_OFFSET (ref
), DR_OFFSET (root
), 0)
1227 && operand_equal_p (DR_INIT (ref
), DR_INIT (root
), 0));
1229 /* Verify that this index of REF is equal to the root's index at
1230 -DISTANCE-th iteration. */
1231 aff_combination_dr_offset (root
, &diff
);
1232 aff_combination_dr_offset (ref
, &base
);
1233 aff_combination_scale (&base
, -1);
1234 aff_combination_add (&diff
, &base
);
1236 tree_to_aff_combination_expand (DR_STEP (root
), TREE_TYPE (DR_STEP (root
)),
1237 &step
, &name_expansions
);
1238 if (!aff_combination_constant_multiple_p (&diff
, &step
, &off
))
1241 if (maybe_ne (off
, distance
))
1247 /* Finds looparound phi node of LOOP that copies the value of REF, and if its
1248 initial value is correct (equal to initial value of REF shifted by one
1249 iteration), returns the phi node. Otherwise, NULL_TREE is returned. ROOT
1250 is the root of the current chain. */
1253 find_looparound_phi (class loop
*loop
, dref ref
, dref root
)
1255 tree name
, init
, init_ref
;
1258 edge latch
= loop_latch_edge (loop
);
1259 struct data_reference init_dr
;
1262 if (is_gimple_assign (ref
->stmt
))
1264 if (DR_IS_READ (ref
->ref
))
1265 name
= gimple_assign_lhs (ref
->stmt
);
1267 name
= gimple_assign_rhs1 (ref
->stmt
);
1270 name
= PHI_RESULT (ref
->stmt
);
1274 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
1277 if (PHI_ARG_DEF_FROM_EDGE (phi
, latch
) == name
)
1281 if (gsi_end_p (psi
))
1284 init
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_preheader_edge (loop
));
1285 if (TREE_CODE (init
) != SSA_NAME
)
1287 init_stmt
= SSA_NAME_DEF_STMT (init
);
1288 if (gimple_code (init_stmt
) != GIMPLE_ASSIGN
)
1290 gcc_assert (gimple_assign_lhs (init_stmt
) == init
);
1292 init_ref
= gimple_assign_rhs1 (init_stmt
);
1293 if (!REFERENCE_CLASS_P (init_ref
)
1294 && !DECL_P (init_ref
))
1297 /* Analyze the behavior of INIT_REF with respect to LOOP (innermost
1298 loop enclosing PHI). */
1299 memset (&init_dr
, 0, sizeof (struct data_reference
));
1300 DR_REF (&init_dr
) = init_ref
;
1301 DR_STMT (&init_dr
) = phi
;
1302 if (!dr_analyze_innermost (&DR_INNERMOST (&init_dr
), init_ref
, loop
,
1306 if (!valid_initializer_p (&init_dr
, ref
->distance
+ 1, root
->ref
))
1312 /* Adds a reference for the looparound copy of REF in PHI to CHAIN. */
1315 insert_looparound_copy (chain_p chain
, dref ref
, gphi
*phi
)
1317 dref nw
= XCNEW (class dref_d
), aref
;
1321 nw
->distance
= ref
->distance
+ 1;
1322 nw
->always_accessed
= 1;
1324 FOR_EACH_VEC_ELT (chain
->refs
, i
, aref
)
1325 if (aref
->distance
>= nw
->distance
)
1327 chain
->refs
.safe_insert (i
, nw
);
1329 if (nw
->distance
> chain
->length
)
1331 chain
->length
= nw
->distance
;
1332 chain
->has_max_use_after
= false;
1336 /* For references in CHAIN that are copied around the LOOP (created previously
1337 by PRE, or by user), add the results of such copies to the chain. This
1338 enables us to remove the copies by unrolling, and may need less registers
1339 (also, it may allow us to combine chains together). */
1342 add_looparound_copies (class loop
*loop
, chain_p chain
)
1345 dref ref
, root
= get_chain_root (chain
);
1348 if (chain
->type
== CT_STORE_STORE
)
1351 FOR_EACH_VEC_ELT (chain
->refs
, i
, ref
)
1353 phi
= find_looparound_phi (loop
, ref
, root
);
1357 bitmap_set_bit (looparound_phis
, SSA_NAME_VERSION (PHI_RESULT (phi
)));
1358 insert_looparound_copy (chain
, ref
, phi
);
1362 /* Find roots of the values and determine distances in the component COMP.
1363 The references are redistributed into CHAINS. LOOP is the current
1367 determine_roots_comp (class loop
*loop
,
1368 struct component
*comp
,
1369 vec
<chain_p
> *chains
)
1373 chain_p chain
= NULL
;
1374 widest_int last_ofs
= 0;
1375 enum chain_type type
;
1377 /* Invariants are handled specially. */
1378 if (comp
->comp_step
== RS_INVARIANT
)
1380 chain
= make_invariant_chain (comp
);
1381 chains
->safe_push (chain
);
1385 /* Trivial component. */
1386 if (comp
->refs
.length () <= 1)
1388 if (comp
->refs
.length () == 1)
1390 free (comp
->refs
[0]);
1391 comp
->refs
.truncate (0);
1396 comp
->refs
.qsort (order_drefs
);
1398 /* For Store-Store chain, we only support load if it is dominated by a
1399 store statement in the same iteration of loop. */
1400 if (comp
->eliminate_store_p
)
1401 for (a
= NULL
, i
= 0; i
< comp
->refs
.length (); i
++)
1403 if (DR_IS_WRITE (comp
->refs
[i
]->ref
))
1405 else if (a
== NULL
|| a
->offset
!= comp
->refs
[i
]->offset
)
1407 /* If there is load that is not dominated by a store in the
1408 same iteration of loop, clear the flag so no Store-Store
1409 chain is generated for this component. */
1410 comp
->eliminate_store_p
= false;
1415 /* Determine roots and create chains for components. */
1416 FOR_EACH_VEC_ELT (comp
->refs
, i
, a
)
1419 || (chain
->type
== CT_LOAD
&& DR_IS_WRITE (a
->ref
))
1420 || (!comp
->eliminate_store_p
&& DR_IS_WRITE (a
->ref
))
1421 || wi::leu_p (MAX_DISTANCE
, a
->offset
- last_ofs
))
1423 if (nontrivial_chain_p (chain
))
1425 add_looparound_copies (loop
, chain
);
1426 chains
->safe_push (chain
);
1429 release_chain (chain
);
1431 /* Determine type of the chain. If the root reference is a load,
1432 this can only be a CT_LOAD chain; other chains are intialized
1433 to CT_STORE_LOAD and might be promoted to CT_STORE_STORE when
1434 new reference is added. */
1435 type
= DR_IS_READ (a
->ref
) ? CT_LOAD
: CT_STORE_LOAD
;
1436 chain
= make_rooted_chain (a
, type
);
1437 last_ofs
= a
->offset
;
1441 add_ref_to_chain (chain
, a
);
1444 if (nontrivial_chain_p (chain
))
1446 add_looparound_copies (loop
, chain
);
1447 chains
->safe_push (chain
);
1450 release_chain (chain
);
1453 /* Find roots of the values and determine distances in components COMPS, and
1454 separates the references to CHAINS. LOOP is the current loop. */
1457 determine_roots (class loop
*loop
,
1458 struct component
*comps
, vec
<chain_p
> *chains
)
1460 struct component
*comp
;
1462 for (comp
= comps
; comp
; comp
= comp
->next
)
1463 determine_roots_comp (loop
, comp
, chains
);
1466 /* Replace the reference in statement STMT with temporary variable
1467 NEW_TREE. If SET is true, NEW_TREE is instead initialized to the value of
1468 the reference in the statement. IN_LHS is true if the reference
1469 is in the lhs of STMT, false if it is in rhs. */
1472 replace_ref_with (gimple
*stmt
, tree new_tree
, bool set
, bool in_lhs
)
1476 gimple_stmt_iterator bsi
, psi
;
1478 if (gimple_code (stmt
) == GIMPLE_PHI
)
1480 gcc_assert (!in_lhs
&& !set
);
1482 val
= PHI_RESULT (stmt
);
1483 bsi
= gsi_after_labels (gimple_bb (stmt
));
1484 psi
= gsi_for_stmt (stmt
);
1485 remove_phi_node (&psi
, false);
1487 /* Turn the phi node into GIMPLE_ASSIGN. */
1488 new_stmt
= gimple_build_assign (val
, new_tree
);
1489 gsi_insert_before (&bsi
, new_stmt
, GSI_NEW_STMT
);
1493 /* Since the reference is of gimple_reg type, it should only
1494 appear as lhs or rhs of modify statement. */
1495 gcc_assert (is_gimple_assign (stmt
));
1497 bsi
= gsi_for_stmt (stmt
);
1499 /* If we do not need to initialize NEW_TREE, just replace the use of OLD. */
1502 gcc_assert (!in_lhs
);
1503 gimple_assign_set_rhs_from_tree (&bsi
, new_tree
);
1504 stmt
= gsi_stmt (bsi
);
1511 /* We have statement
1515 If OLD is a memory reference, then VAL is gimple_val, and we transform
1521 Otherwise, we are replacing a combination chain,
1522 VAL is the expression that performs the combination, and OLD is an
1523 SSA name. In this case, we transform the assignment to
1530 val
= gimple_assign_lhs (stmt
);
1531 if (TREE_CODE (val
) != SSA_NAME
)
1533 val
= gimple_assign_rhs1 (stmt
);
1534 gcc_assert (gimple_assign_single_p (stmt
));
1535 if (TREE_CLOBBER_P (val
))
1536 val
= get_or_create_ssa_default_def (cfun
, SSA_NAME_VAR (new_tree
));
1538 gcc_assert (gimple_assign_copy_p (stmt
));
1550 val
= gimple_assign_lhs (stmt
);
1553 new_stmt
= gimple_build_assign (new_tree
, unshare_expr (val
));
1554 gsi_insert_after (&bsi
, new_stmt
, GSI_NEW_STMT
);
1557 /* Returns a memory reference to DR in the (NITERS + ITER)-th iteration
1558 of the loop it was analyzed in. Append init stmts to STMTS. */
1561 ref_at_iteration (data_reference_p dr
, int iter
,
1562 gimple_seq
*stmts
, tree niters
= NULL_TREE
)
1564 tree off
= DR_OFFSET (dr
);
1565 tree coff
= DR_INIT (dr
);
1566 tree ref
= DR_REF (dr
);
1567 enum tree_code ref_code
= ERROR_MARK
;
1568 tree ref_type
= NULL_TREE
;
1569 tree ref_op1
= NULL_TREE
;
1570 tree ref_op2
= NULL_TREE
;
1575 new_offset
= size_binop (MULT_EXPR
, DR_STEP (dr
), ssize_int (iter
));
1576 if (TREE_CODE (new_offset
) == INTEGER_CST
)
1577 coff
= size_binop (PLUS_EXPR
, coff
, new_offset
);
1579 off
= size_binop (PLUS_EXPR
, off
, new_offset
);
1582 if (niters
!= NULL_TREE
)
1584 niters
= fold_convert (ssizetype
, niters
);
1585 new_offset
= size_binop (MULT_EXPR
, DR_STEP (dr
), niters
);
1586 if (TREE_CODE (niters
) == INTEGER_CST
)
1587 coff
= size_binop (PLUS_EXPR
, coff
, new_offset
);
1589 off
= size_binop (PLUS_EXPR
, off
, new_offset
);
1592 /* While data-ref analysis punts on bit offsets it still handles
1593 bitfield accesses at byte boundaries. Cope with that. Note that
1594 if the bitfield object also starts at a byte-boundary we can simply
1595 replicate the COMPONENT_REF, but we have to subtract the component's
1596 byte-offset from the MEM_REF address first.
1597 Otherwise we simply build a BIT_FIELD_REF knowing that the bits
1598 start at offset zero. */
1599 if (TREE_CODE (ref
) == COMPONENT_REF
1600 && DECL_BIT_FIELD (TREE_OPERAND (ref
, 1)))
1602 unsigned HOST_WIDE_INT boff
;
1603 tree field
= TREE_OPERAND (ref
, 1);
1604 tree offset
= component_ref_field_offset (ref
);
1605 ref_type
= TREE_TYPE (ref
);
1606 boff
= tree_to_uhwi (DECL_FIELD_BIT_OFFSET (field
));
1607 /* This can occur in Ada. See the comment in get_bit_range. */
1608 if (boff
% BITS_PER_UNIT
!= 0
1609 || !tree_fits_uhwi_p (offset
))
1611 ref_code
= BIT_FIELD_REF
;
1612 ref_op1
= DECL_SIZE (field
);
1613 ref_op2
= bitsize_zero_node
;
1617 boff
>>= LOG2_BITS_PER_UNIT
;
1618 boff
+= tree_to_uhwi (offset
);
1619 coff
= size_binop (MINUS_EXPR
, coff
, ssize_int (boff
));
1620 ref_code
= COMPONENT_REF
;
1622 ref_op2
= TREE_OPERAND (ref
, 2);
1623 ref
= TREE_OPERAND (ref
, 0);
1626 tree addr
= fold_build_pointer_plus (DR_BASE_ADDRESS (dr
), off
);
1627 addr
= force_gimple_operand_1 (unshare_expr (addr
), stmts
,
1628 is_gimple_mem_ref_addr
, NULL_TREE
);
1629 tree alias_ptr
= fold_convert (reference_alias_ptr_type (ref
), coff
);
1630 tree type
= build_aligned_type (TREE_TYPE (ref
),
1631 get_object_alignment (ref
));
1632 ref
= build2 (MEM_REF
, type
, addr
, alias_ptr
);
1634 ref
= build3 (ref_code
, ref_type
, ref
, ref_op1
, ref_op2
);
1638 /* Get the initialization expression for the INDEX-th temporary variable
1642 get_init_expr (chain_p chain
, unsigned index
)
1644 if (chain
->type
== CT_COMBINATION
)
1646 tree e1
= get_init_expr (chain
->ch1
, index
);
1647 tree e2
= get_init_expr (chain
->ch2
, index
);
1649 return fold_build2 (chain
->op
, chain
->rslt_type
, e1
, e2
);
1652 return chain
->inits
[index
];
1655 /* Returns a new temporary variable used for the I-th variable carrying
1656 value of REF. The variable's uid is marked in TMP_VARS. */
1659 predcom_tmp_var (tree ref
, unsigned i
, bitmap tmp_vars
)
1661 tree type
= TREE_TYPE (ref
);
1662 /* We never access the components of the temporary variable in predictive
1664 tree var
= create_tmp_reg (type
, get_lsm_tmp_name (ref
, i
));
1665 bitmap_set_bit (tmp_vars
, DECL_UID (var
));
1669 /* Creates the variables for CHAIN, as well as phi nodes for them and
1670 initialization on entry to LOOP. Uids of the newly created
1671 temporary variables are marked in TMP_VARS. */
1674 initialize_root_vars (class loop
*loop
, chain_p chain
, bitmap tmp_vars
)
1677 unsigned n
= chain
->length
;
1678 dref root
= get_chain_root (chain
);
1679 bool reuse_first
= !chain
->has_max_use_after
;
1680 tree ref
, init
, var
, next
;
1683 edge entry
= loop_preheader_edge (loop
), latch
= loop_latch_edge (loop
);
1685 /* If N == 0, then all the references are within the single iteration. And
1686 since this is an nonempty chain, reuse_first cannot be true. */
1687 gcc_assert (n
> 0 || !reuse_first
);
1689 chain
->vars
.create (n
+ 1);
1691 if (chain
->type
== CT_COMBINATION
)
1692 ref
= gimple_assign_lhs (root
->stmt
);
1694 ref
= DR_REF (root
->ref
);
1696 for (i
= 0; i
< n
+ (reuse_first
? 0 : 1); i
++)
1698 var
= predcom_tmp_var (ref
, i
, tmp_vars
);
1699 chain
->vars
.quick_push (var
);
1702 chain
->vars
.quick_push (chain
->vars
[0]);
1704 FOR_EACH_VEC_ELT (chain
->vars
, i
, var
)
1705 chain
->vars
[i
] = make_ssa_name (var
);
1707 for (i
= 0; i
< n
; i
++)
1709 var
= chain
->vars
[i
];
1710 next
= chain
->vars
[i
+ 1];
1711 init
= get_init_expr (chain
, i
);
1713 init
= force_gimple_operand (init
, &stmts
, true, NULL_TREE
);
1715 gsi_insert_seq_on_edge_immediate (entry
, stmts
);
1717 phi
= create_phi_node (var
, loop
->header
);
1718 add_phi_arg (phi
, init
, entry
, UNKNOWN_LOCATION
);
1719 add_phi_arg (phi
, next
, latch
, UNKNOWN_LOCATION
);
1723 /* For inter-iteration store elimination CHAIN in LOOP, returns true if
1724 all stores to be eliminated store loop invariant values into memory.
1725 In this case, we can use these invariant values directly after LOOP. */
1728 is_inv_store_elimination_chain (class loop
*loop
, chain_p chain
)
1730 if (chain
->length
== 0 || chain
->type
!= CT_STORE_STORE
)
1733 gcc_assert (!chain
->has_max_use_after
);
1735 /* If loop iterates for unknown times or fewer times than chain->length,
1736 we still need to setup root variable and propagate it with PHI node. */
1737 tree niters
= number_of_latch_executions (loop
);
1738 if (TREE_CODE (niters
) != INTEGER_CST
1739 || wi::leu_p (wi::to_wide (niters
), chain
->length
))
1742 /* Check stores in chain for elimination if they only store loop invariant
1744 for (unsigned i
= 0; i
< chain
->length
; i
++)
1746 dref a
= get_chain_last_write_at (chain
, i
);
1750 gimple
*def_stmt
, *stmt
= a
->stmt
;
1751 if (!gimple_assign_single_p (stmt
))
1754 tree val
= gimple_assign_rhs1 (stmt
);
1755 if (TREE_CLOBBER_P (val
))
1758 if (CONSTANT_CLASS_P (val
))
1761 if (TREE_CODE (val
) != SSA_NAME
)
1764 def_stmt
= SSA_NAME_DEF_STMT (val
);
1765 if (gimple_nop_p (def_stmt
))
1768 if (flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
1774 /* Creates root variables for store elimination CHAIN in which stores for
1775 elimination only store loop invariant values. In this case, we neither
1776 need to load root variables before loop nor propagate it with PHI nodes. */
1779 initialize_root_vars_store_elim_1 (chain_p chain
)
1782 unsigned i
, n
= chain
->length
;
1784 chain
->vars
.create (n
);
1785 chain
->vars
.safe_grow_cleared (n
, true);
1787 /* Initialize root value for eliminated stores at each distance. */
1788 for (i
= 0; i
< n
; i
++)
1790 dref a
= get_chain_last_write_at (chain
, i
);
1794 var
= gimple_assign_rhs1 (a
->stmt
);
1795 chain
->vars
[a
->distance
] = var
;
1798 /* We don't propagate values with PHI nodes, so manually propagate value
1799 to bubble positions. */
1800 var
= chain
->vars
[0];
1801 for (i
= 1; i
< n
; i
++)
1803 if (chain
->vars
[i
] != NULL_TREE
)
1805 var
= chain
->vars
[i
];
1808 chain
->vars
[i
] = var
;
1811 /* Revert the vector. */
1812 for (i
= 0; i
< n
/ 2; i
++)
1813 std::swap (chain
->vars
[i
], chain
->vars
[n
- i
- 1]);
1816 /* Creates root variables for store elimination CHAIN in which stores for
1817 elimination store loop variant values. In this case, we may need to
1818 load root variables before LOOP and propagate it with PHI nodes. Uids
1819 of the newly created root variables are marked in TMP_VARS. */
1822 initialize_root_vars_store_elim_2 (class loop
*loop
,
1823 chain_p chain
, bitmap tmp_vars
)
1825 unsigned i
, n
= chain
->length
;
1826 tree ref
, init
, var
, next
, val
, phi_result
;
1830 chain
->vars
.create (n
);
1832 ref
= DR_REF (get_chain_root (chain
)->ref
);
1833 for (i
= 0; i
< n
; i
++)
1835 var
= predcom_tmp_var (ref
, i
, tmp_vars
);
1836 chain
->vars
.quick_push (var
);
1839 FOR_EACH_VEC_ELT (chain
->vars
, i
, var
)
1840 chain
->vars
[i
] = make_ssa_name (var
);
1842 /* Root values are either rhs operand of stores to be eliminated, or
1843 loaded from memory before loop. */
1844 auto_vec
<tree
> vtemps
;
1845 vtemps
.safe_grow_cleared (n
, true);
1846 for (i
= 0; i
< n
; i
++)
1848 init
= get_init_expr (chain
, i
);
1849 if (init
== NULL_TREE
)
1851 /* Root value is rhs operand of the store to be eliminated if
1852 it isn't loaded from memory before loop. */
1853 dref a
= get_chain_last_write_at (chain
, i
);
1854 val
= gimple_assign_rhs1 (a
->stmt
);
1855 if (TREE_CLOBBER_P (val
))
1857 val
= get_or_create_ssa_default_def (cfun
, SSA_NAME_VAR (var
));
1858 gimple_assign_set_rhs1 (a
->stmt
, val
);
1861 vtemps
[n
- i
- 1] = val
;
1865 edge latch
= loop_latch_edge (loop
);
1866 edge entry
= loop_preheader_edge (loop
);
1868 /* Root value is loaded from memory before loop, we also need
1869 to add PHI nodes to propagate the value across iterations. */
1870 init
= force_gimple_operand (init
, &stmts
, true, NULL_TREE
);
1872 gsi_insert_seq_on_edge_immediate (entry
, stmts
);
1874 next
= chain
->vars
[n
- i
];
1875 phi_result
= copy_ssa_name (next
);
1876 gphi
*phi
= create_phi_node (phi_result
, loop
->header
);
1877 add_phi_arg (phi
, init
, entry
, UNKNOWN_LOCATION
);
1878 add_phi_arg (phi
, next
, latch
, UNKNOWN_LOCATION
);
1879 vtemps
[n
- i
- 1] = phi_result
;
1883 /* Find the insertion position. */
1884 dref last
= get_chain_root (chain
);
1885 for (i
= 0; i
< chain
->refs
.length (); i
++)
1887 if (chain
->refs
[i
]->pos
> last
->pos
)
1888 last
= chain
->refs
[i
];
1891 gimple_stmt_iterator gsi
= gsi_for_stmt (last
->stmt
);
1893 /* Insert statements copying root value to root variable. */
1894 for (i
= 0; i
< n
; i
++)
1896 var
= chain
->vars
[i
];
1898 stmt
= gimple_build_assign (var
, val
);
1899 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1903 /* Generates stores for CHAIN's eliminated stores in LOOP's last
1904 (CHAIN->length - 1) iterations. */
1907 finalize_eliminated_stores (class loop
*loop
, chain_p chain
)
1909 unsigned i
, n
= chain
->length
;
1911 for (i
= 0; i
< n
; i
++)
1913 tree var
= chain
->vars
[i
];
1914 tree fini
= chain
->finis
[n
- i
- 1];
1915 gimple
*stmt
= gimple_build_assign (fini
, var
);
1917 gimple_seq_add_stmt_without_update (&chain
->fini_seq
, stmt
);
1920 if (chain
->fini_seq
)
1922 gsi_insert_seq_on_edge_immediate (single_exit (loop
), chain
->fini_seq
);
1923 chain
->fini_seq
= NULL
;
1927 /* Initializes a variable for load motion for ROOT and prepares phi nodes and
1928 initialization on entry to LOOP if necessary. The ssa name for the variable
1929 is stored in VARS. If WRITTEN is true, also a phi node to copy its value
1930 around the loop is created. Uid of the newly created temporary variable
1931 is marked in TMP_VARS. INITS is the list containing the (single)
1935 initialize_root_vars_lm (class loop
*loop
, dref root
, bool written
,
1936 vec
<tree
> *vars
, vec
<tree
> inits
,
1940 tree ref
= DR_REF (root
->ref
), init
, var
, next
;
1943 edge entry
= loop_preheader_edge (loop
), latch
= loop_latch_edge (loop
);
1945 /* Find the initializer for the variable, and check that it cannot
1949 vars
->create (written
? 2 : 1);
1950 var
= predcom_tmp_var (ref
, 0, tmp_vars
);
1951 vars
->quick_push (var
);
1953 vars
->quick_push ((*vars
)[0]);
1955 FOR_EACH_VEC_ELT (*vars
, i
, var
)
1956 (*vars
)[i
] = make_ssa_name (var
);
1960 init
= force_gimple_operand (init
, &stmts
, written
, NULL_TREE
);
1962 gsi_insert_seq_on_edge_immediate (entry
, stmts
);
1967 phi
= create_phi_node (var
, loop
->header
);
1968 add_phi_arg (phi
, init
, entry
, UNKNOWN_LOCATION
);
1969 add_phi_arg (phi
, next
, latch
, UNKNOWN_LOCATION
);
1973 gassign
*init_stmt
= gimple_build_assign (var
, init
);
1974 gsi_insert_on_edge_immediate (entry
, init_stmt
);
1979 /* Execute load motion for references in chain CHAIN. Uids of the newly
1980 created temporary variables are marked in TMP_VARS. */
1983 execute_load_motion (class loop
*loop
, chain_p chain
, bitmap tmp_vars
)
1985 auto_vec
<tree
> vars
;
1987 unsigned n_writes
= 0, ridx
, i
;
1990 gcc_assert (chain
->type
== CT_INVARIANT
);
1991 gcc_assert (!chain
->combined
);
1992 FOR_EACH_VEC_ELT (chain
->refs
, i
, a
)
1993 if (DR_IS_WRITE (a
->ref
))
1996 /* If there are no reads in the loop, there is nothing to do. */
1997 if (n_writes
== chain
->refs
.length ())
2000 initialize_root_vars_lm (loop
, get_chain_root (chain
), n_writes
> 0,
2001 &vars
, chain
->inits
, tmp_vars
);
2004 FOR_EACH_VEC_ELT (chain
->refs
, i
, a
)
2006 bool is_read
= DR_IS_READ (a
->ref
);
2008 if (DR_IS_WRITE (a
->ref
))
2014 var
= make_ssa_name (SSA_NAME_VAR (var
));
2021 replace_ref_with (a
->stmt
, vars
[ridx
],
2022 !is_read
, !is_read
);
2026 /* Returns the single statement in that NAME is used, excepting
2027 the looparound phi nodes contained in one of the chains. If there is no
2028 such statement, or more statements, NULL is returned. */
2031 single_nonlooparound_use (tree name
)
2034 imm_use_iterator it
;
2035 gimple
*stmt
, *ret
= NULL
;
2037 FOR_EACH_IMM_USE_FAST (use
, it
, name
)
2039 stmt
= USE_STMT (use
);
2041 if (gimple_code (stmt
) == GIMPLE_PHI
)
2043 /* Ignore uses in looparound phi nodes. Uses in other phi nodes
2044 could not be processed anyway, so just fail for them. */
2045 if (bitmap_bit_p (looparound_phis
,
2046 SSA_NAME_VERSION (PHI_RESULT (stmt
))))
2051 else if (is_gimple_debug (stmt
))
2053 else if (ret
!= NULL
)
2062 /* Remove statement STMT, as well as the chain of assignments in that it is
2066 remove_stmt (gimple
*stmt
)
2070 gimple_stmt_iterator psi
;
2072 if (gimple_code (stmt
) == GIMPLE_PHI
)
2074 name
= PHI_RESULT (stmt
);
2075 next
= single_nonlooparound_use (name
);
2076 reset_debug_uses (stmt
);
2077 psi
= gsi_for_stmt (stmt
);
2078 remove_phi_node (&psi
, true);
2081 || !gimple_assign_ssa_name_copy_p (next
)
2082 || gimple_assign_rhs1 (next
) != name
)
2090 gimple_stmt_iterator bsi
;
2092 bsi
= gsi_for_stmt (stmt
);
2094 name
= gimple_assign_lhs (stmt
);
2095 if (TREE_CODE (name
) == SSA_NAME
)
2097 next
= single_nonlooparound_use (name
);
2098 reset_debug_uses (stmt
);
2102 /* This is a store to be eliminated. */
2103 gcc_assert (gimple_vdef (stmt
) != NULL
);
2107 unlink_stmt_vdef (stmt
);
2108 gsi_remove (&bsi
, true);
2109 release_defs (stmt
);
2112 || !gimple_assign_ssa_name_copy_p (next
)
2113 || gimple_assign_rhs1 (next
) != name
)
2120 /* Perform the predictive commoning optimization for a chain CHAIN.
2121 Uids of the newly created temporary variables are marked in TMP_VARS.*/
2124 execute_pred_commoning_chain (class loop
*loop
, chain_p chain
,
2132 if (chain
->combined
)
2134 /* For combined chains, just remove the statements that are used to
2135 compute the values of the expression (except for the root one).
2136 We delay this until after all chains are processed. */
2138 else if (chain
->type
== CT_STORE_STORE
)
2140 if (chain
->length
> 0)
2142 if (chain
->inv_store_elimination
)
2144 /* If dead stores in this chain only store loop invariant
2145 values, we can simply record the invariant value and use
2146 it directly after loop. */
2147 initialize_root_vars_store_elim_1 (chain
);
2151 /* If dead stores in this chain store loop variant values,
2152 we need to set up the variables by loading from memory
2153 before loop and propagating it with PHI nodes. */
2154 initialize_root_vars_store_elim_2 (loop
, chain
, tmp_vars
);
2157 /* For inter-iteration store elimination chain, stores at each
2158 distance in loop's last (chain->length - 1) iterations can't
2159 be eliminated, because there is no following killing store.
2160 We need to generate these stores after loop. */
2161 finalize_eliminated_stores (loop
, chain
);
2164 bool last_store_p
= true;
2165 for (i
= chain
->refs
.length (); i
> 0; i
--)
2167 a
= chain
->refs
[i
- 1];
2168 /* Preserve the last store of the chain. Eliminate other stores
2169 which are killed by the last one. */
2170 if (DR_IS_WRITE (a
->ref
))
2173 last_store_p
= false;
2175 remove_stmt (a
->stmt
);
2180 /* Any load in Store-Store chain must be dominated by a previous
2181 store, we replace the load reference with rhs of the store. */
2182 dref b
= get_chain_last_write_before_load (chain
, i
- 1);
2183 gcc_assert (b
!= NULL
);
2184 var
= gimple_assign_rhs1 (b
->stmt
);
2185 replace_ref_with (a
->stmt
, var
, false, false);
2190 /* For non-combined chains, set up the variables that hold its value. */
2191 initialize_root_vars (loop
, chain
, tmp_vars
);
2192 a
= get_chain_root (chain
);
2193 in_lhs
= (chain
->type
== CT_STORE_LOAD
2194 || chain
->type
== CT_COMBINATION
);
2195 replace_ref_with (a
->stmt
, chain
->vars
[chain
->length
], true, in_lhs
);
2197 /* Replace the uses of the original references by these variables. */
2198 for (i
= 1; chain
->refs
.iterate (i
, &a
); i
++)
2200 var
= chain
->vars
[chain
->length
- a
->distance
];
2201 replace_ref_with (a
->stmt
, var
, false, false);
2206 /* Determines the unroll factor necessary to remove as many temporary variable
2207 copies as possible. CHAINS is the list of chains that will be
2211 determine_unroll_factor (vec
<chain_p
> chains
)
2214 unsigned factor
= 1, af
, nfactor
, i
;
2215 unsigned max
= param_max_unroll_times
;
2217 FOR_EACH_VEC_ELT (chains
, i
, chain
)
2219 if (chain
->type
== CT_INVARIANT
)
2221 /* For now we can't handle unrolling when eliminating stores. */
2222 else if (chain
->type
== CT_STORE_STORE
)
2225 if (chain
->combined
)
2227 /* For combined chains, we can't handle unrolling if we replace
2231 for (j
= 1; chain
->refs
.iterate (j
, &a
); j
++)
2232 if (gimple_code (a
->stmt
) == GIMPLE_PHI
)
2237 /* The best unroll factor for this chain is equal to the number of
2238 temporary variables that we create for it. */
2240 if (chain
->has_max_use_after
)
2243 nfactor
= factor
* af
/ gcd (factor
, af
);
2251 /* Perform the predictive commoning optimization for CHAINS.
2252 Uids of the newly created temporary variables are marked in TMP_VARS. */
2255 execute_pred_commoning (class loop
*loop
, vec
<chain_p
> chains
,
2261 FOR_EACH_VEC_ELT (chains
, i
, chain
)
2263 if (chain
->type
== CT_INVARIANT
)
2264 execute_load_motion (loop
, chain
, tmp_vars
);
2266 execute_pred_commoning_chain (loop
, chain
, tmp_vars
);
2269 FOR_EACH_VEC_ELT (chains
, i
, chain
)
2271 if (chain
->type
== CT_INVARIANT
)
2273 else if (chain
->combined
)
2275 /* For combined chains, just remove the statements that are used to
2276 compute the values of the expression (except for the root one). */
2279 for (j
= 1; chain
->refs
.iterate (j
, &a
); j
++)
2280 remove_stmt (a
->stmt
);
2285 /* For each reference in CHAINS, if its defining statement is
2286 phi node, record the ssa name that is defined by it. */
2289 replace_phis_by_defined_names (vec
<chain_p
> chains
)
2295 FOR_EACH_VEC_ELT (chains
, i
, chain
)
2296 FOR_EACH_VEC_ELT (chain
->refs
, j
, a
)
2298 if (gimple_code (a
->stmt
) == GIMPLE_PHI
)
2300 a
->name_defined_by_phi
= PHI_RESULT (a
->stmt
);
2306 /* For each reference in CHAINS, if name_defined_by_phi is not
2307 NULL, use it to set the stmt field. */
2310 replace_names_by_phis (vec
<chain_p
> chains
)
2316 FOR_EACH_VEC_ELT (chains
, i
, chain
)
2317 FOR_EACH_VEC_ELT (chain
->refs
, j
, a
)
2318 if (a
->stmt
== NULL
)
2320 a
->stmt
= SSA_NAME_DEF_STMT (a
->name_defined_by_phi
);
2321 gcc_assert (gimple_code (a
->stmt
) == GIMPLE_PHI
);
2322 a
->name_defined_by_phi
= NULL_TREE
;
2326 /* Wrapper over execute_pred_commoning, to pass it as a callback
2327 to tree_transform_and_unroll_loop. */
2331 vec
<chain_p
> chains
;
2336 execute_pred_commoning_cbck (class loop
*loop
, void *data
)
2338 struct epcc_data
*const dta
= (struct epcc_data
*) data
;
2340 /* Restore phi nodes that were replaced by ssa names before
2341 tree_transform_and_unroll_loop (see detailed description in
2342 tree_predictive_commoning_loop). */
2343 replace_names_by_phis (dta
->chains
);
2344 execute_pred_commoning (loop
, dta
->chains
, dta
->tmp_vars
);
2347 /* Base NAME and all the names in the chain of phi nodes that use it
2348 on variable VAR. The phi nodes are recognized by being in the copies of
2349 the header of the LOOP. */
2352 base_names_in_chain_on (class loop
*loop
, tree name
, tree var
)
2355 imm_use_iterator iter
;
2357 replace_ssa_name_symbol (name
, var
);
2362 FOR_EACH_IMM_USE_STMT (stmt
, iter
, name
)
2364 if (gimple_code (stmt
) == GIMPLE_PHI
2365 && flow_bb_inside_loop_p (loop
, gimple_bb (stmt
)))
2374 name
= PHI_RESULT (phi
);
2375 replace_ssa_name_symbol (name
, var
);
2379 /* Given an unrolled LOOP after predictive commoning, remove the
2380 register copies arising from phi nodes by changing the base
2381 variables of SSA names. TMP_VARS is the set of the temporary variables
2382 for those we want to perform this. */
2385 eliminate_temp_copies (class loop
*loop
, bitmap tmp_vars
)
2390 tree name
, use
, var
;
2393 e
= loop_latch_edge (loop
);
2394 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
2397 name
= PHI_RESULT (phi
);
2398 var
= SSA_NAME_VAR (name
);
2399 if (!var
|| !bitmap_bit_p (tmp_vars
, DECL_UID (var
)))
2401 use
= PHI_ARG_DEF_FROM_EDGE (phi
, e
);
2402 gcc_assert (TREE_CODE (use
) == SSA_NAME
);
2404 /* Base all the ssa names in the ud and du chain of NAME on VAR. */
2405 stmt
= SSA_NAME_DEF_STMT (use
);
2406 while (gimple_code (stmt
) == GIMPLE_PHI
2407 /* In case we could not unroll the loop enough to eliminate
2408 all copies, we may reach the loop header before the defining
2409 statement (in that case, some register copies will be present
2410 in loop latch in the final code, corresponding to the newly
2411 created looparound phi nodes). */
2412 && gimple_bb (stmt
) != loop
->header
)
2414 gcc_assert (single_pred_p (gimple_bb (stmt
)));
2415 use
= PHI_ARG_DEF (stmt
, 0);
2416 stmt
= SSA_NAME_DEF_STMT (use
);
2419 base_names_in_chain_on (loop
, use
, var
);
2423 /* Returns true if CHAIN is suitable to be combined. */
2426 chain_can_be_combined_p (chain_p chain
)
2428 return (!chain
->combined
2429 && (chain
->type
== CT_LOAD
|| chain
->type
== CT_COMBINATION
));
2432 /* Returns the modify statement that uses NAME. Skips over assignment
2433 statements, NAME is replaced with the actual name used in the returned
2437 find_use_stmt (tree
*name
)
2442 /* Skip over assignments. */
2445 stmt
= single_nonlooparound_use (*name
);
2449 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
2452 lhs
= gimple_assign_lhs (stmt
);
2453 if (TREE_CODE (lhs
) != SSA_NAME
)
2456 if (gimple_assign_copy_p (stmt
))
2458 rhs
= gimple_assign_rhs1 (stmt
);
2464 else if (get_gimple_rhs_class (gimple_assign_rhs_code (stmt
))
2465 == GIMPLE_BINARY_RHS
)
2472 /* Returns true if we may perform reassociation for operation CODE in TYPE. */
2475 may_reassociate_p (tree type
, enum tree_code code
)
2477 if (FLOAT_TYPE_P (type
)
2478 && !flag_unsafe_math_optimizations
)
2481 return (commutative_tree_code (code
)
2482 && associative_tree_code (code
));
2485 /* If the operation used in STMT is associative and commutative, go through the
2486 tree of the same operations and returns its root. Distance to the root
2487 is stored in DISTANCE. */
2490 find_associative_operation_root (gimple
*stmt
, unsigned *distance
)
2494 enum tree_code code
= gimple_assign_rhs_code (stmt
);
2495 tree type
= TREE_TYPE (gimple_assign_lhs (stmt
));
2498 if (!may_reassociate_p (type
, code
))
2503 lhs
= gimple_assign_lhs (stmt
);
2504 gcc_assert (TREE_CODE (lhs
) == SSA_NAME
);
2506 next
= find_use_stmt (&lhs
);
2508 || gimple_assign_rhs_code (next
) != code
)
2520 /* Returns the common statement in that NAME1 and NAME2 have a use. If there
2521 is no such statement, returns NULL_TREE. In case the operation used on
2522 NAME1 and NAME2 is associative and commutative, returns the root of the
2523 tree formed by this operation instead of the statement that uses NAME1 or
2527 find_common_use_stmt (tree
*name1
, tree
*name2
)
2529 gimple
*stmt1
, *stmt2
;
2531 stmt1
= find_use_stmt (name1
);
2535 stmt2
= find_use_stmt (name2
);
2542 stmt1
= find_associative_operation_root (stmt1
, NULL
);
2545 stmt2
= find_associative_operation_root (stmt2
, NULL
);
2549 return (stmt1
== stmt2
? stmt1
: NULL
);
2552 /* Checks whether R1 and R2 are combined together using CODE, with the result
2553 in RSLT_TYPE, in order R1 CODE R2 if SWAP is false and in order R2 CODE R1
2554 if it is true. If CODE is ERROR_MARK, set these values instead. */
2557 combinable_refs_p (dref r1
, dref r2
,
2558 enum tree_code
*code
, bool *swap
, tree
*rslt_type
)
2560 enum tree_code acode
;
2566 name1
= name_for_ref (r1
);
2567 name2
= name_for_ref (r2
);
2568 gcc_assert (name1
!= NULL_TREE
&& name2
!= NULL_TREE
);
2570 stmt
= find_common_use_stmt (&name1
, &name2
);
2573 /* A simple post-dominance check - make sure the combination
2574 is executed under the same condition as the references. */
2575 || (gimple_bb (stmt
) != gimple_bb (r1
->stmt
)
2576 && gimple_bb (stmt
) != gimple_bb (r2
->stmt
)))
2579 acode
= gimple_assign_rhs_code (stmt
);
2580 aswap
= (!commutative_tree_code (acode
)
2581 && gimple_assign_rhs1 (stmt
) != name1
);
2582 atype
= TREE_TYPE (gimple_assign_lhs (stmt
));
2584 if (*code
== ERROR_MARK
)
2592 return (*code
== acode
2594 && *rslt_type
== atype
);
2597 /* Remove OP from the operation on rhs of STMT, and replace STMT with
2598 an assignment of the remaining operand. */
2601 remove_name_from_operation (gimple
*stmt
, tree op
)
2604 gimple_stmt_iterator si
;
2606 gcc_assert (is_gimple_assign (stmt
));
2608 if (gimple_assign_rhs1 (stmt
) == op
)
2609 other_op
= gimple_assign_rhs2 (stmt
);
2611 other_op
= gimple_assign_rhs1 (stmt
);
2613 si
= gsi_for_stmt (stmt
);
2614 gimple_assign_set_rhs_from_tree (&si
, other_op
);
2616 /* We should not have reallocated STMT. */
2617 gcc_assert (gsi_stmt (si
) == stmt
);
2622 /* Reassociates the expression in that NAME1 and NAME2 are used so that they
2623 are combined in a single statement, and returns this statement. */
2626 reassociate_to_the_same_stmt (tree name1
, tree name2
)
2628 gimple
*stmt1
, *stmt2
, *root1
, *root2
, *s1
, *s2
;
2629 gassign
*new_stmt
, *tmp_stmt
;
2630 tree new_name
, tmp_name
, var
, r1
, r2
;
2631 unsigned dist1
, dist2
;
2632 enum tree_code code
;
2633 tree type
= TREE_TYPE (name1
);
2634 gimple_stmt_iterator bsi
;
2636 stmt1
= find_use_stmt (&name1
);
2637 stmt2
= find_use_stmt (&name2
);
2638 root1
= find_associative_operation_root (stmt1
, &dist1
);
2639 root2
= find_associative_operation_root (stmt2
, &dist2
);
2640 code
= gimple_assign_rhs_code (stmt1
);
2642 gcc_assert (root1
&& root2
&& root1
== root2
2643 && code
== gimple_assign_rhs_code (stmt2
));
2645 /* Find the root of the nearest expression in that both NAME1 and NAME2
2652 while (dist1
> dist2
)
2654 s1
= find_use_stmt (&r1
);
2655 r1
= gimple_assign_lhs (s1
);
2658 while (dist2
> dist1
)
2660 s2
= find_use_stmt (&r2
);
2661 r2
= gimple_assign_lhs (s2
);
2667 s1
= find_use_stmt (&r1
);
2668 r1
= gimple_assign_lhs (s1
);
2669 s2
= find_use_stmt (&r2
);
2670 r2
= gimple_assign_lhs (s2
);
2673 /* Remove NAME1 and NAME2 from the statements in that they are used
2675 remove_name_from_operation (stmt1
, name1
);
2676 remove_name_from_operation (stmt2
, name2
);
2678 /* Insert the new statement combining NAME1 and NAME2 before S1, and
2679 combine it with the rhs of S1. */
2680 var
= create_tmp_reg (type
, "predreastmp");
2681 new_name
= make_ssa_name (var
);
2682 new_stmt
= gimple_build_assign (new_name
, code
, name1
, name2
);
2684 var
= create_tmp_reg (type
, "predreastmp");
2685 tmp_name
= make_ssa_name (var
);
2687 /* Rhs of S1 may now be either a binary expression with operation
2688 CODE, or gimple_val (in case that stmt1 == s1 or stmt2 == s1,
2689 so that name1 or name2 was removed from it). */
2690 tmp_stmt
= gimple_build_assign (tmp_name
, gimple_assign_rhs_code (s1
),
2691 gimple_assign_rhs1 (s1
),
2692 gimple_assign_rhs2 (s1
));
2694 bsi
= gsi_for_stmt (s1
);
2695 gimple_assign_set_rhs_with_ops (&bsi
, code
, new_name
, tmp_name
);
2696 s1
= gsi_stmt (bsi
);
2699 gsi_insert_before (&bsi
, new_stmt
, GSI_SAME_STMT
);
2700 gsi_insert_before (&bsi
, tmp_stmt
, GSI_SAME_STMT
);
2705 /* Returns the statement that combines references R1 and R2. In case R1
2706 and R2 are not used in the same statement, but they are used with an
2707 associative and commutative operation in the same expression, reassociate
2708 the expression so that they are used in the same statement. */
2711 stmt_combining_refs (dref r1
, dref r2
)
2713 gimple
*stmt1
, *stmt2
;
2714 tree name1
= name_for_ref (r1
);
2715 tree name2
= name_for_ref (r2
);
2717 stmt1
= find_use_stmt (&name1
);
2718 stmt2
= find_use_stmt (&name2
);
2722 return reassociate_to_the_same_stmt (name1
, name2
);
2725 /* Tries to combine chains CH1 and CH2 together. If this succeeds, the
2726 description of the new chain is returned, otherwise we return NULL. */
2729 combine_chains (chain_p ch1
, chain_p ch2
)
2732 enum tree_code op
= ERROR_MARK
;
2736 tree rslt_type
= NULL_TREE
;
2740 if (ch1
->length
!= ch2
->length
)
2743 if (ch1
->refs
.length () != ch2
->refs
.length ())
2746 for (i
= 0; (ch1
->refs
.iterate (i
, &r1
)
2747 && ch2
->refs
.iterate (i
, &r2
)); i
++)
2749 if (r1
->distance
!= r2
->distance
)
2752 if (!combinable_refs_p (r1
, r2
, &op
, &swap
, &rslt_type
))
2757 std::swap (ch1
, ch2
);
2759 new_chain
= XCNEW (struct chain
);
2760 new_chain
->type
= CT_COMBINATION
;
2762 new_chain
->ch1
= ch1
;
2763 new_chain
->ch2
= ch2
;
2764 new_chain
->rslt_type
= rslt_type
;
2765 new_chain
->length
= ch1
->length
;
2767 for (i
= 0; (ch1
->refs
.iterate (i
, &r1
)
2768 && ch2
->refs
.iterate (i
, &r2
)); i
++)
2770 nw
= XCNEW (class dref_d
);
2771 nw
->stmt
= stmt_combining_refs (r1
, r2
);
2772 nw
->distance
= r1
->distance
;
2774 new_chain
->refs
.safe_push (nw
);
2777 ch1
->combined
= true;
2778 ch2
->combined
= true;
2782 /* Recursively update position information of all offspring chains to ROOT
2783 chain's position information. */
2786 update_pos_for_combined_chains (chain_p root
)
2788 chain_p ch1
= root
->ch1
, ch2
= root
->ch2
;
2789 dref ref
, ref1
, ref2
;
2790 for (unsigned j
= 0; (root
->refs
.iterate (j
, &ref
)
2791 && ch1
->refs
.iterate (j
, &ref1
)
2792 && ch2
->refs
.iterate (j
, &ref2
)); ++j
)
2793 ref1
->pos
= ref2
->pos
= ref
->pos
;
2795 if (ch1
->type
== CT_COMBINATION
)
2796 update_pos_for_combined_chains (ch1
);
2797 if (ch2
->type
== CT_COMBINATION
)
2798 update_pos_for_combined_chains (ch2
);
2801 /* Returns true if statement S1 dominates statement S2. */
2804 pcom_stmt_dominates_stmt_p (gimple
*s1
, gimple
*s2
)
2806 basic_block bb1
= gimple_bb (s1
), bb2
= gimple_bb (s2
);
2808 if (!bb1
|| s1
== s2
)
2812 return gimple_uid (s1
) < gimple_uid (s2
);
2814 return dominated_by_p (CDI_DOMINATORS
, bb2
, bb1
);
2817 /* Try to combine the CHAINS in LOOP. */
2820 try_combine_chains (class loop
*loop
, vec
<chain_p
> *chains
)
2823 chain_p ch1
, ch2
, cch
;
2824 auto_vec
<chain_p
> worklist
;
2825 bool combined_p
= false;
2827 FOR_EACH_VEC_ELT (*chains
, i
, ch1
)
2828 if (chain_can_be_combined_p (ch1
))
2829 worklist
.safe_push (ch1
);
2831 while (!worklist
.is_empty ())
2833 ch1
= worklist
.pop ();
2834 if (!chain_can_be_combined_p (ch1
))
2837 FOR_EACH_VEC_ELT (*chains
, j
, ch2
)
2839 if (!chain_can_be_combined_p (ch2
))
2842 cch
= combine_chains (ch1
, ch2
);
2845 worklist
.safe_push (cch
);
2846 chains
->safe_push (cch
);
2855 /* Setup UID for all statements in dominance order. */
2856 basic_block
*bbs
= get_loop_body_in_dom_order (loop
);
2857 renumber_gimple_stmt_uids_in_blocks (bbs
, loop
->num_nodes
);
2860 /* Re-association in combined chains may generate statements different to
2861 order of references of the original chain. We need to keep references
2862 of combined chain in dominance order so that all uses will be inserted
2863 after definitions. Note:
2864 A) This is necessary for all combined chains.
2865 B) This is only necessary for ZERO distance references because other
2866 references inherit value from loop carried PHIs.
2868 We first update position information for all combined chains. */
2870 for (i
= 0; chains
->iterate (i
, &ch1
); ++i
)
2872 if (ch1
->type
!= CT_COMBINATION
|| ch1
->combined
)
2875 for (j
= 0; ch1
->refs
.iterate (j
, &ref
); ++j
)
2876 ref
->pos
= gimple_uid (ref
->stmt
);
2878 update_pos_for_combined_chains (ch1
);
2880 /* Then sort references according to newly updated position information. */
2881 for (i
= 0; chains
->iterate (i
, &ch1
); ++i
)
2883 if (ch1
->type
!= CT_COMBINATION
&& !ch1
->combined
)
2886 /* Find the first reference with non-ZERO distance. */
2887 if (ch1
->length
== 0)
2888 j
= ch1
->refs
.length();
2891 for (j
= 0; ch1
->refs
.iterate (j
, &ref
); ++j
)
2892 if (ref
->distance
!= 0)
2896 /* Sort all ZERO distance references by position. */
2897 qsort (&ch1
->refs
[0], j
, sizeof (ch1
->refs
[0]), order_drefs_by_pos
);
2902 /* For ZERO length chain, has_max_use_after must be true since root
2903 combined stmt must dominates others. */
2904 if (ch1
->length
== 0)
2906 ch1
->has_max_use_after
= true;
2909 /* Check if there is use at max distance after root for combined chains
2910 and set flag accordingly. */
2911 ch1
->has_max_use_after
= false;
2912 gimple
*root_stmt
= get_chain_root (ch1
)->stmt
;
2913 for (j
= 1; ch1
->refs
.iterate (j
, &ref
); ++j
)
2915 if (ref
->distance
== ch1
->length
2916 && !pcom_stmt_dominates_stmt_p (ref
->stmt
, root_stmt
))
2918 ch1
->has_max_use_after
= true;
2925 /* Prepare initializers for store elimination CHAIN in LOOP. Returns false
2926 if this is impossible because one of these initializers may trap, true
2930 prepare_initializers_chain_store_elim (class loop
*loop
, chain_p chain
)
2932 unsigned i
, n
= chain
->length
;
2934 /* For now we can't eliminate stores if some of them are conditional
2936 if (!chain
->all_always_accessed
)
2939 /* Nothing to intialize for intra-iteration store elimination. */
2940 if (n
== 0 && chain
->type
== CT_STORE_STORE
)
2943 /* For store elimination chain, there is nothing to initialize if stores
2944 to be eliminated only store loop invariant values into memory. */
2945 if (chain
->type
== CT_STORE_STORE
2946 && is_inv_store_elimination_chain (loop
, chain
))
2948 chain
->inv_store_elimination
= true;
2952 chain
->inits
.create (n
);
2953 chain
->inits
.safe_grow_cleared (n
, true);
2955 /* For store eliminatin chain like below:
2957 for (i = 0; i < len; i++)
2964 store to a[i + 1] is missed in loop body, it acts like bubbles. The
2965 content of a[i + 1] remain the same if the loop iterates fewer times
2966 than chain->length. We need to set up root variables for such stores
2967 by loading from memory before loop. Note we only need to load bubble
2968 elements because loop body is guaranteed to be executed at least once
2969 after loop's preheader edge. */
2970 auto_vec
<bool> bubbles
;
2971 bubbles
.safe_grow_cleared (n
+ 1, true);
2972 for (i
= 0; i
< chain
->refs
.length (); i
++)
2973 bubbles
[chain
->refs
[i
]->distance
] = true;
2975 struct data_reference
*dr
= get_chain_root (chain
)->ref
;
2976 for (i
= 0; i
< n
; i
++)
2981 gimple_seq stmts
= NULL
;
2983 tree init
= ref_at_iteration (dr
, (int) 0 - i
, &stmts
);
2985 gimple_seq_add_seq_without_update (&chain
->init_seq
, stmts
);
2987 chain
->inits
[i
] = init
;
2993 /* Prepare initializers for CHAIN in LOOP. Returns false if this is
2994 impossible because one of these initializers may trap, true otherwise. */
2997 prepare_initializers_chain (class loop
*loop
, chain_p chain
)
2999 unsigned i
, n
= (chain
->type
== CT_INVARIANT
) ? 1 : chain
->length
;
3000 struct data_reference
*dr
= get_chain_root (chain
)->ref
;
3003 edge entry
= loop_preheader_edge (loop
);
3005 if (chain
->type
== CT_STORE_STORE
)
3006 return prepare_initializers_chain_store_elim (loop
, chain
);
3008 /* Find the initializers for the variables, and check that they cannot
3010 chain
->inits
.create (n
);
3011 for (i
= 0; i
< n
; i
++)
3012 chain
->inits
.quick_push (NULL_TREE
);
3014 /* If we have replaced some looparound phi nodes, use their initializers
3015 instead of creating our own. */
3016 FOR_EACH_VEC_ELT (chain
->refs
, i
, laref
)
3018 if (gimple_code (laref
->stmt
) != GIMPLE_PHI
)
3021 gcc_assert (laref
->distance
> 0);
3022 chain
->inits
[n
- laref
->distance
]
3023 = PHI_ARG_DEF_FROM_EDGE (laref
->stmt
, entry
);
3026 for (i
= 0; i
< n
; i
++)
3028 gimple_seq stmts
= NULL
;
3030 if (chain
->inits
[i
] != NULL_TREE
)
3033 init
= ref_at_iteration (dr
, (int) i
- n
, &stmts
);
3034 if (!chain
->all_always_accessed
&& tree_could_trap_p (init
))
3036 gimple_seq_discard (stmts
);
3041 gimple_seq_add_seq_without_update (&chain
->init_seq
, stmts
);
3043 chain
->inits
[i
] = init
;
3049 /* Prepare initializers for CHAINS in LOOP, and free chains that cannot
3050 be used because the initializers might trap. */
3053 prepare_initializers (class loop
*loop
, vec
<chain_p
> chains
)
3058 for (i
= 0; i
< chains
.length (); )
3061 if (prepare_initializers_chain (loop
, chain
))
3065 release_chain (chain
);
3066 chains
.unordered_remove (i
);
3071 /* Generates finalizer memory references for CHAIN in LOOP. Returns true
3072 if finalizer code for CHAIN can be generated, otherwise false. */
3075 prepare_finalizers_chain (class loop
*loop
, chain_p chain
)
3077 unsigned i
, n
= chain
->length
;
3078 struct data_reference
*dr
= get_chain_root (chain
)->ref
;
3079 tree fini
, niters
= number_of_latch_executions (loop
);
3081 /* For now we can't eliminate stores if some of them are conditional
3083 if (!chain
->all_always_accessed
)
3086 chain
->finis
.create (n
);
3087 for (i
= 0; i
< n
; i
++)
3088 chain
->finis
.quick_push (NULL_TREE
);
3090 /* We never use looparound phi node for store elimination chains. */
3092 /* Find the finalizers for the variables, and check that they cannot
3094 for (i
= 0; i
< n
; i
++)
3096 gimple_seq stmts
= NULL
;
3097 gcc_assert (chain
->finis
[i
] == NULL_TREE
);
3099 if (TREE_CODE (niters
) != INTEGER_CST
&& TREE_CODE (niters
) != SSA_NAME
)
3101 niters
= unshare_expr (niters
);
3102 niters
= force_gimple_operand (niters
, &stmts
, true, NULL
);
3105 gimple_seq_add_seq_without_update (&chain
->fini_seq
, stmts
);
3109 fini
= ref_at_iteration (dr
, (int) 0 - i
, &stmts
, niters
);
3111 gimple_seq_add_seq_without_update (&chain
->fini_seq
, stmts
);
3113 chain
->finis
[i
] = fini
;
3119 /* Generates finalizer memory reference for CHAINS in LOOP. Returns true
3120 if finalizer code generation for CHAINS breaks loop closed ssa form. */
3123 prepare_finalizers (class loop
*loop
, vec
<chain_p
> chains
)
3127 bool loop_closed_ssa
= false;
3129 for (i
= 0; i
< chains
.length ();)
3133 /* Finalizer is only necessary for inter-iteration store elimination
3135 if (chain
->length
== 0 || chain
->type
!= CT_STORE_STORE
)
3141 if (prepare_finalizers_chain (loop
, chain
))
3144 /* Be conservative, assume loop closed ssa form is corrupted
3145 by store-store chain. Though it's not always the case if
3146 eliminated stores only store loop invariant values into
3148 loop_closed_ssa
= true;
3152 release_chain (chain
);
3153 chains
.unordered_remove (i
);
3156 return loop_closed_ssa
;
3159 /* Insert all initializing gimple stmts into loop's entry edge. */
3162 insert_init_seqs (class loop
*loop
, vec
<chain_p
> chains
)
3165 edge entry
= loop_preheader_edge (loop
);
3167 for (i
= 0; i
< chains
.length (); ++i
)
3168 if (chains
[i
]->init_seq
)
3170 gsi_insert_seq_on_edge_immediate (entry
, chains
[i
]->init_seq
);
3171 chains
[i
]->init_seq
= NULL
;
3175 /* Performs predictive commoning for LOOP. Sets bit 1<<1 of return value
3176 if LOOP was unrolled; Sets bit 1<<2 of return value if loop closed ssa
3177 form was corrupted. Non-zero return value indicates some changes were
3178 applied to this loop. */
3181 tree_predictive_commoning_loop (class loop
*loop
, bool allow_unroll_p
)
3183 vec
<data_reference_p
> datarefs
;
3184 vec
<ddr_p
> dependences
;
3185 struct component
*components
;
3186 vec
<chain_p
> chains
= vNULL
;
3187 unsigned unroll_factor
= 0;
3188 class tree_niter_desc desc
;
3189 bool unroll
= false, loop_closed_ssa
= false;
3191 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3192 fprintf (dump_file
, "Processing loop %d\n", loop
->num
);
3194 /* Nothing for predicitive commoning if loop only iterates 1 time. */
3195 if (get_max_loop_iterations_int (loop
) == 0)
3197 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3198 fprintf (dump_file
, "Loop iterates only 1 time, nothing to do.\n");
3203 /* Find the data references and split them into components according to their
3204 dependence relations. */
3205 auto_vec
<loop_p
, 3> loop_nest
;
3206 dependences
.create (10);
3207 datarefs
.create (10);
3208 if (! compute_data_dependences_for_loop (loop
, true, &loop_nest
, &datarefs
,
3211 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3212 fprintf (dump_file
, "Cannot analyze data dependencies\n");
3213 free_data_refs (datarefs
);
3214 free_dependence_relations (dependences
);
3218 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3219 dump_data_dependence_relations (dump_file
, dependences
);
3221 components
= split_data_refs_to_components (loop
, datarefs
, dependences
);
3222 loop_nest
.release ();
3223 free_dependence_relations (dependences
);
3226 free_data_refs (datarefs
);
3227 free_affine_expand_cache (&name_expansions
);
3231 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3233 fprintf (dump_file
, "Initial state:\n\n");
3234 dump_components (dump_file
, components
);
3237 /* Find the suitable components and split them into chains. */
3238 components
= filter_suitable_components (loop
, components
);
3240 auto_bitmap tmp_vars
;
3241 looparound_phis
= BITMAP_ALLOC (NULL
);
3242 determine_roots (loop
, components
, &chains
);
3243 release_components (components
);
3245 auto cleanup
= [&]() {
3246 release_chains (chains
);
3247 free_data_refs (datarefs
);
3248 BITMAP_FREE (looparound_phis
);
3249 free_affine_expand_cache (&name_expansions
);
3252 if (!chains
.exists ())
3254 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3256 "Predictive commoning failed: no suitable chains\n");
3261 prepare_initializers (loop
, chains
);
3262 loop_closed_ssa
= prepare_finalizers (loop
, chains
);
3264 /* Try to combine the chains that are always worked with together. */
3265 try_combine_chains (loop
, &chains
);
3267 insert_init_seqs (loop
, chains
);
3269 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3271 fprintf (dump_file
, "Before commoning:\n\n");
3272 dump_chains (dump_file
, chains
);
3276 /* Determine the unroll factor, and if the loop should be unrolled, ensure
3277 that its number of iterations is divisible by the factor. */
3278 unroll_factor
= determine_unroll_factor (chains
);
3280 if (unroll_factor
> 1)
3281 unroll
= can_unroll_loop_p (loop
, unroll_factor
, &desc
);
3283 /* Execute the predictive commoning transformations, and possibly unroll the
3287 struct epcc_data dta
;
3289 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3290 fprintf (dump_file
, "Unrolling %u times.\n", unroll_factor
);
3292 dta
.chains
= chains
;
3293 dta
.tmp_vars
= tmp_vars
;
3295 /* Cfg manipulations performed in tree_transform_and_unroll_loop before
3296 execute_pred_commoning_cbck is called may cause phi nodes to be
3297 reallocated, which is a problem since CHAINS may point to these
3298 statements. To fix this, we store the ssa names defined by the
3299 phi nodes here instead of the phi nodes themselves, and restore
3300 the phi nodes in execute_pred_commoning_cbck. A bit hacky. */
3301 replace_phis_by_defined_names (chains
);
3303 edge exit
= single_dom_exit (loop
);
3304 tree_transform_and_unroll_loop (loop
, unroll_factor
, exit
, &desc
,
3305 execute_pred_commoning_cbck
, &dta
);
3306 eliminate_temp_copies (loop
, tmp_vars
);
3310 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3312 "Executing predictive commoning without unrolling.\n");
3313 execute_pred_commoning (loop
, chains
, tmp_vars
);
3318 return (unroll
? 2 : 1) | (loop_closed_ssa
? 4 : 1);
3321 /* Runs predictive commoning. */
3324 tree_predictive_commoning (bool allow_unroll_p
)
3327 unsigned ret
= 0, changed
= 0;
3329 initialize_original_copy_tables ();
3330 FOR_EACH_LOOP (loop
, LI_ONLY_INNERMOST
)
3331 if (optimize_loop_for_speed_p (loop
))
3333 changed
|= tree_predictive_commoning_loop (loop
, allow_unroll_p
);
3335 free_original_copy_tables ();
3339 ret
= TODO_update_ssa_only_virtuals
;
3341 /* Some loop(s) got unrolled. */
3346 /* Need to fix up loop closed SSA. */
3348 rewrite_into_loop_closed_ssa (NULL
, TODO_update_ssa
);
3350 ret
|= TODO_cleanup_cfg
;
3357 /* Predictive commoning Pass. */
3360 run_tree_predictive_commoning (struct function
*fun
, bool allow_unroll_p
)
3362 if (number_of_loops (fun
) <= 1)
3365 return tree_predictive_commoning (allow_unroll_p
);
3370 const pass_data pass_data_predcom
=
3372 GIMPLE_PASS
, /* type */
3374 OPTGROUP_LOOP
, /* optinfo_flags */
3375 TV_PREDCOM
, /* tv_id */
3376 PROP_cfg
, /* properties_required */
3377 0, /* properties_provided */
3378 0, /* properties_destroyed */
3379 0, /* todo_flags_start */
3380 0, /* todo_flags_finish */
3383 class pass_predcom
: public gimple_opt_pass
3386 pass_predcom (gcc::context
*ctxt
)
3387 : gimple_opt_pass (pass_data_predcom
, ctxt
)
3390 /* opt_pass methods: */
3394 if (flag_predictive_commoning
!= 0)
3396 /* Loop vectorization enables predictive commoning implicitly
3397 only if predictive commoning isn't set explicitly, and it
3398 doesn't allow unrolling. */
3399 if (flag_tree_loop_vectorize
3400 && !global_options_set
.x_flag_predictive_commoning
)
3406 virtual unsigned int
3407 execute (function
*fun
)
3409 bool allow_unroll_p
= flag_predictive_commoning
!= 0;
3410 return run_tree_predictive_commoning (fun
, allow_unroll_p
);
3413 }; // class pass_predcom
3418 make_pass_predcom (gcc::context
*ctxt
)
3420 return new pass_predcom (ctxt
);