1 /* Interprocedural scalar replacement of aggregates
2 Copyright (C) 2008-2020 Free Software Foundation, Inc.
4 Contributed by Martin Jambor <mjambor@suse.cz>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 /* IPA-SRA is an interprocedural pass that removes unused function return
23 values (turning functions returning a value which is never used into void
24 functions), removes unused function parameters. It can also replace an
25 aggregate parameter by a set of other parameters representing part of the
26 original, turning those passed by reference into new ones which pass the
29 The pass is a true IPA one, which means that it works in three stages in
30 order to be able to take advantage of LTO. First, summaries about functions
31 and each calls are generated. Function summaries (often called call graph
32 node summaries) contain mainly information about which parameters are
33 potential transformation candidates and which bits of candidates are
34 accessed. We differentiate between accesses done as a part of a call
35 statement (which might be not necessary if the callee is also transformed)
36 and others (which are mandatory). Call summaries (often called call graph
37 edge summaries) contain information about which function formal parameters
38 feed into which actual call arguments so that if two parameters are only
39 used in a sum which is then passed to another function which then however
40 does not use this parameter, all three parameters of the two functions can
41 be eliminated. Edge summaries also have flags whether the return value is
42 used or if it is only returned in the caller too. In LTO mode these
43 summaries are then streamed to the object file in the compilation phase and
44 streamed back in in the WPA analysis stage.
46 The interprocedural analysis phase traverses the graph in topological order
47 in two sweeps, one in each direction. First, from callees to callers for
48 parameter removal and splitting. Each strongly-connected component is
49 processed iteratively until the situation in it stabilizes. The pass from
50 callers to callees is then carried out to remove unused return values in a
53 Because parameter manipulation has big implications for call redirection
54 which is done only after all call graph nodes materialize, the
55 transformation phase is not part of this patch but is carried out by the
56 clone materialization and edge redirection itself, see comments in
57 ipa-param-manipulation.h for more details. */
63 #include "coretypes.h"
68 #include "tree-pass.h"
71 #include "gimple-pretty-print.h"
74 #include "gimple-iterator.h"
75 #include "gimple-walk.h"
78 #include "alloc-pool.h"
79 #include "symbol-summary.h"
81 #include "tree-inline.h"
82 #include "ipa-utils.h"
85 #include "tree-streamer.h"
86 #include "internal-fn.h"
87 #include "symtab-clones.h"
89 static void ipa_sra_summarize_function (cgraph_node
*);
91 /* Bits used to track size of an aggregate in bytes interprocedurally. */
92 #define ISRA_ARG_SIZE_LIMIT_BITS 16
93 #define ISRA_ARG_SIZE_LIMIT (1 << ISRA_ARG_SIZE_LIMIT_BITS)
94 /* How many parameters can feed into a call actual argument and still be
96 #define IPA_SRA_MAX_PARAM_FLOW_LEN 7
98 /* Structure describing accesses to a specific portion of an aggregate
99 parameter, as given by the offset and size. Any smaller accesses that occur
100 within a function that fall within another access form a tree. The pass
101 cannot analyze parameters with only partially overlapping accesses. */
103 struct GTY(()) param_access
105 /* Type that a potential replacement should have. This field only has
106 meaning in the summary building and transformation phases, when it is
107 reconstructed from the body. Must not be touched in IPA analysis
111 /* Alias reference type to be used in MEM_REFs when adjusting caller
115 /* Values returned by get_ref_base_and_extent but converted to bytes and
116 stored as unsigned ints. */
117 unsigned unit_offset
;
118 unsigned unit_size
: ISRA_ARG_SIZE_LIMIT_BITS
;
120 /* Set once we are sure that the access will really end up in a potentially
121 transformed function - initially not set for portions of formal parameters
122 that are only used as actual function arguments passed to callees. */
123 unsigned certain
: 1;
124 /* Set if the access has a reversed scalar storage order. */
125 unsigned reverse
: 1;
128 /* This structure has the same purpose as the one above and additionally it
129 contains some fields that are only necessary in the summary generation
132 struct gensum_param_access
134 /* Values returned by get_ref_base_and_extent. */
135 HOST_WIDE_INT offset
;
138 /* if this access has any children (in terms of the definition above), this
139 points to the first one. */
140 struct gensum_param_access
*first_child
;
141 /* In intraprocedural SRA, pointer to the next sibling in the access tree as
143 struct gensum_param_access
*next_sibling
;
145 /* Type that a potential replacement should have. This field only has
146 meaning in the summary building and transformation phases, when it is
147 reconstructed from the body. Must not be touched in IPA analysis
150 /* Alias reference type to be used in MEM_REFs when adjusting caller
154 /* Have there been writes to or reads from this exact location except for as
155 arguments to a function call that can be tracked. */
158 /* Set if the access has a reversed scalar storage order. */
162 /* Summary describing a parameter in the IPA stages. */
164 struct GTY(()) isra_param_desc
166 /* List of access representatives to the parameters, sorted according to
168 vec
<param_access
*, va_gc
> *accesses
;
170 /* Unit size limit of total size of all replacements. */
171 unsigned param_size_limit
: ISRA_ARG_SIZE_LIMIT_BITS
;
172 /* Sum of unit sizes of all certain replacements. */
173 unsigned size_reached
: ISRA_ARG_SIZE_LIMIT_BITS
;
175 /* A parameter that is used only in call arguments and can be removed if all
176 concerned actual arguments are removed. */
177 unsigned locally_unused
: 1;
178 /* An aggregate that is a candidate for breaking up or complete removal. */
179 unsigned split_candidate
: 1;
180 /* Is this a parameter passing stuff by reference? */
184 /* Structure used when generating summaries that describes a parameter. */
186 struct gensum_param_desc
188 /* Roots of param_accesses. */
189 gensum_param_access
*accesses
;
190 /* Number of accesses in the access tree rooted in field accesses. */
191 unsigned access_count
;
193 /* If the below is non-zero, this is the number of uses as actual arguments. */
195 /* Number of times this parameter has been directly passed to. */
196 unsigned ptr_pt_count
;
198 /* Size limit of total size of all replacements. */
199 unsigned param_size_limit
;
200 /* Sum of sizes of nonarg accesses. */
201 unsigned nonarg_acc_size
;
203 /* A parameter that is used only in call arguments and can be removed if all
204 concerned actual arguments are removed. */
206 /* An aggregate that is a candidate for breaking up or a pointer passing data
207 by reference that is a candidate for being converted to a set of
208 parameters passing those data by value. */
209 bool split_candidate
;
210 /* Is this a parameter passing stuff by reference? */
213 /* The number of this parameter as they are ordered in function decl. */
215 /* For parameters passing data by reference, this is parameter index to
216 compute indices to bb_dereferences. */
220 /* Properly deallocate accesses of DESC. TODO: Since this data structure is
221 not in GC memory, this is not necessary and we can consider removing the
225 free_param_decl_accesses (isra_param_desc
*desc
)
227 unsigned len
= vec_safe_length (desc
->accesses
);
228 for (unsigned i
= 0; i
< len
; ++i
)
229 ggc_free ((*desc
->accesses
)[i
]);
230 vec_free (desc
->accesses
);
233 /* Class used to convey information about functions from the
234 intra-procedural analysis stage to inter-procedural one. */
236 class GTY((for_user
)) isra_func_summary
239 /* initialize the object. */
242 : m_parameters (NULL
), m_candidate (false), m_returns_value (false),
243 m_return_ignored (false), m_queued (false)
246 /* Destroy m_parameters. */
248 ~isra_func_summary ();
250 /* Mark the function as not a candidate for any IPA-SRA transformation.
251 Return true if it was a candidate until now. */
255 /* Vector of parameter descriptors corresponding to the function being
257 vec
<isra_param_desc
, va_gc
> *m_parameters
;
259 /* Whether the node is even a candidate for any IPA-SRA transformation at
261 unsigned m_candidate
: 1;
263 /* Whether the original function returns any value. */
264 unsigned m_returns_value
: 1;
266 /* Set to true if all call statements do not actually use the returned
269 unsigned m_return_ignored
: 1;
271 /* Whether the node is already queued in IPA SRA stack during processing of
274 unsigned m_queued
: 1;
277 /* Clean up and deallocate isra_func_summary points to. TODO: Since this data
278 structure is not in GC memory, this is not necessary and we can consider
279 removing the destructor. */
281 isra_func_summary::~isra_func_summary ()
283 unsigned len
= vec_safe_length (m_parameters
);
284 for (unsigned i
= 0; i
< len
; ++i
)
285 free_param_decl_accesses (&(*m_parameters
)[i
]);
286 vec_free (m_parameters
);
290 /* Mark the function as not a candidate for any IPA-SRA transformation. Return
291 true if it was a candidate until now. */
294 isra_func_summary::zap ()
296 bool ret
= m_candidate
;
299 unsigned len
= vec_safe_length (m_parameters
);
300 for (unsigned i
= 0; i
< len
; ++i
)
301 free_param_decl_accesses (&(*m_parameters
)[i
]);
302 vec_free (m_parameters
);
307 /* Structure to describe which formal parameters feed into a particular actual
310 struct isra_param_flow
312 /* Number of elements in array inputs that contain valid data. */
314 /* Indices of formal parameters that feed into the described actual argument.
315 If aggregate_pass_through or pointer_pass_through below are true, it must
316 contain exactly one element which is passed through from a formal
317 parameter if the given number. Otherwise, the array contains indices of
318 callee's formal parameters which are used to calculate value of this
320 unsigned char inputs
[IPA_SRA_MAX_PARAM_FLOW_LEN
];
322 /* Offset within the formal parameter. */
323 unsigned unit_offset
;
324 /* Size of the portion of the formal parameter that is being passed. */
325 unsigned unit_size
: ISRA_ARG_SIZE_LIMIT_BITS
;
327 /* True when the value of this actual copy is a portion of a formal
329 unsigned aggregate_pass_through
: 1;
330 /* True when the value of this actual copy is a verbatim pass through of an
332 unsigned pointer_pass_through
: 1;
333 /* True when it is safe to copy access candidates here from the callee, which
334 would mean introducing dereferences into callers of the caller. */
335 unsigned safe_to_import_accesses
: 1;
338 /* Structure used to convey information about calls from the intra-procedural
339 analysis stage to inter-procedural one. */
341 class isra_call_summary
345 : m_arg_flow (), m_return_ignored (false), m_return_returned (false),
346 m_bit_aligned_arg (false)
349 void init_inputs (unsigned arg_count
);
352 /* Information about what formal parameters of the caller are used to compute
353 individual actual arguments of this call. */
354 auto_vec
<isra_param_flow
> m_arg_flow
;
356 /* Set to true if the call statement does not have a LHS. */
357 unsigned m_return_ignored
: 1;
359 /* Set to true if the LHS of call statement is only used to construct the
360 return value of the caller. */
361 unsigned m_return_returned
: 1;
363 /* Set when any of the call arguments are not byte-aligned. */
364 unsigned m_bit_aligned_arg
: 1;
367 /* Class to manage function summaries. */
369 class GTY((user
)) ipa_sra_function_summaries
370 : public function_summary
<isra_func_summary
*>
373 ipa_sra_function_summaries (symbol_table
*table
, bool ggc
):
374 function_summary
<isra_func_summary
*> (table
, ggc
) { }
376 virtual void duplicate (cgraph_node
*, cgraph_node
*,
377 isra_func_summary
*old_sum
,
378 isra_func_summary
*new_sum
);
379 virtual void insert (cgraph_node
*, isra_func_summary
*);
382 /* Hook that is called by summary when a node is duplicated. */
385 ipa_sra_function_summaries::duplicate (cgraph_node
*, cgraph_node
*,
386 isra_func_summary
*old_sum
,
387 isra_func_summary
*new_sum
)
389 /* TODO: Somehow stop copying when ISRA is doing the cloning, it is
391 new_sum
->m_candidate
= old_sum
->m_candidate
;
392 new_sum
->m_returns_value
= old_sum
->m_returns_value
;
393 new_sum
->m_return_ignored
= old_sum
->m_return_ignored
;
394 gcc_assert (!old_sum
->m_queued
);
395 new_sum
->m_queued
= false;
397 unsigned param_count
= vec_safe_length (old_sum
->m_parameters
);
400 vec_safe_reserve_exact (new_sum
->m_parameters
, param_count
);
401 new_sum
->m_parameters
->quick_grow_cleared (param_count
);
402 for (unsigned i
= 0; i
< param_count
; i
++)
404 isra_param_desc
*s
= &(*old_sum
->m_parameters
)[i
];
405 isra_param_desc
*d
= &(*new_sum
->m_parameters
)[i
];
407 d
->param_size_limit
= s
->param_size_limit
;
408 d
->size_reached
= s
->size_reached
;
409 d
->locally_unused
= s
->locally_unused
;
410 d
->split_candidate
= s
->split_candidate
;
411 d
->by_ref
= s
->by_ref
;
413 unsigned acc_count
= vec_safe_length (s
->accesses
);
414 vec_safe_reserve_exact (d
->accesses
, acc_count
);
415 for (unsigned j
= 0; j
< acc_count
; j
++)
417 param_access
*from
= (*s
->accesses
)[j
];
418 param_access
*to
= ggc_cleared_alloc
<param_access
> ();
419 to
->type
= from
->type
;
420 to
->alias_ptr_type
= from
->alias_ptr_type
;
421 to
->unit_offset
= from
->unit_offset
;
422 to
->unit_size
= from
->unit_size
;
423 to
->certain
= from
->certain
;
424 d
->accesses
->quick_push (to
);
429 /* Pointer to the pass function summary holder. */
431 static GTY(()) ipa_sra_function_summaries
*func_sums
;
433 /* Hook that is called by summary when new node appears. */
436 ipa_sra_function_summaries::insert (cgraph_node
*node
, isra_func_summary
*)
438 if (opt_for_fn (node
->decl
, flag_ipa_sra
))
440 push_cfun (DECL_STRUCT_FUNCTION (node
->decl
));
441 ipa_sra_summarize_function (node
);
445 func_sums
->remove (node
);
448 /* Class to manage call summaries. */
450 class ipa_sra_call_summaries
: public call_summary
<isra_call_summary
*>
453 ipa_sra_call_summaries (symbol_table
*table
):
454 call_summary
<isra_call_summary
*> (table
) { }
456 /* Duplicate info when an edge is cloned. */
457 virtual void duplicate (cgraph_edge
*, cgraph_edge
*,
458 isra_call_summary
*old_sum
,
459 isra_call_summary
*new_sum
);
462 static ipa_sra_call_summaries
*call_sums
;
465 /* Initialize m_arg_flow of a particular instance of isra_call_summary.
466 ARG_COUNT is the number of actual arguments passed. */
469 isra_call_summary::init_inputs (unsigned arg_count
)
473 gcc_checking_assert (m_arg_flow
.length () == 0);
476 if (m_arg_flow
.length () == 0)
478 m_arg_flow
.reserve_exact (arg_count
);
479 m_arg_flow
.quick_grow_cleared (arg_count
);
482 gcc_checking_assert (arg_count
== m_arg_flow
.length ());
485 /* Dump all information in call summary to F. */
488 isra_call_summary::dump (FILE *f
)
490 if (m_return_ignored
)
491 fprintf (f
, " return value ignored\n");
492 if (m_return_returned
)
493 fprintf (f
, " return value used only to compute caller return value\n");
494 for (unsigned i
= 0; i
< m_arg_flow
.length (); i
++)
496 fprintf (f
, " Parameter %u:\n", i
);
497 isra_param_flow
*ipf
= &m_arg_flow
[i
];
502 fprintf (f
, " Scalar param sources: ");
503 for (int j
= 0; j
< ipf
->length
; j
++)
509 fprintf (f
, "%i", (int) ipf
->inputs
[j
]);
513 if (ipf
->aggregate_pass_through
)
514 fprintf (f
, " Aggregate pass through from the param given above, "
515 "unit offset: %u , unit size: %u\n",
516 ipf
->unit_offset
, ipf
->unit_size
);
517 if (ipf
->pointer_pass_through
)
518 fprintf (f
, " Pointer pass through from the param given above, "
519 "safe_to_import_accesses: %u\n", ipf
->safe_to_import_accesses
);
523 /* Duplicate edge summary when an edge is cloned. */
526 ipa_sra_call_summaries::duplicate (cgraph_edge
*, cgraph_edge
*,
527 isra_call_summary
*old_sum
,
528 isra_call_summary
*new_sum
)
530 unsigned arg_count
= old_sum
->m_arg_flow
.length ();
531 new_sum
->init_inputs (arg_count
);
532 for (unsigned i
= 0; i
< arg_count
; i
++)
533 new_sum
->m_arg_flow
[i
] = old_sum
->m_arg_flow
[i
];
535 new_sum
->m_return_ignored
= old_sum
->m_return_ignored
;
536 new_sum
->m_return_returned
= old_sum
->m_return_returned
;
537 new_sum
->m_bit_aligned_arg
= old_sum
->m_bit_aligned_arg
;
541 /* With all GTY stuff done, we can move to anonymous namespace. */
543 /* Quick mapping from a decl to its param descriptor. */
545 hash_map
<tree
, gensum_param_desc
*> *decl2desc
;
547 /* Countdown of allowed Alias analysis steps during summary building. */
549 int aa_walking_limit
;
551 /* This is a table in which for each basic block and parameter there is a
552 distance (offset + size) in that parameter which is dereferenced and
553 accessed in that BB. */
554 HOST_WIDE_INT
*bb_dereferences
= NULL
;
555 /* How many by-reference parameters there are in the current function. */
558 /* Bitmap of BBs that can cause the function to "stop" progressing by
559 returning, throwing externally, looping infinitely or calling a function
560 which might abort etc.. */
563 /* Obstack to allocate various small structures required only when generating
564 summary for a function. */
565 struct obstack gensum_obstack
;
567 /* Return false the function is apparently unsuitable for IPA-SRA based on it's
568 attributes, return true otherwise. NODE is the cgraph node of the current
572 ipa_sra_preliminary_function_checks (cgraph_node
*node
)
574 if (!node
->can_change_signature
)
577 fprintf (dump_file
, "Function cannot change signature.\n");
581 if (!tree_versionable_function_p (node
->decl
))
584 fprintf (dump_file
, "Function is not versionable.\n");
588 if (!opt_for_fn (node
->decl
, optimize
)
589 || !opt_for_fn (node
->decl
, flag_ipa_sra
))
592 fprintf (dump_file
, "Not optimizing or IPA-SRA turned off for this "
597 if (DECL_VIRTUAL_P (node
->decl
))
600 fprintf (dump_file
, "Function is a virtual method.\n");
604 struct function
*fun
= DECL_STRUCT_FUNCTION (node
->decl
);
608 fprintf (dump_file
, "Function uses stdarg. \n");
612 if (TYPE_ATTRIBUTES (TREE_TYPE (node
->decl
)))
615 fprintf (dump_file
, "Function type has attributes. \n");
619 if (DECL_DISREGARD_INLINE_LIMITS (node
->decl
))
622 fprintf (dump_file
, "Always inline function will be inlined "
630 /* Print access tree starting at ACCESS to F. */
633 dump_gensum_access (FILE *f
, gensum_param_access
*access
, unsigned indent
)
636 for (unsigned i
= 0; i
< indent
; i
++)
638 fprintf (f
, " * Access to offset: " HOST_WIDE_INT_PRINT_DEC
,
640 fprintf (f
, ", size: " HOST_WIDE_INT_PRINT_DEC
, access
->size
);
641 fprintf (f
, ", type: ");
642 print_generic_expr (f
, access
->type
);
643 fprintf (f
, ", alias_ptr_type: ");
644 print_generic_expr (f
, access
->alias_ptr_type
);
645 fprintf (f
, ", nonarg: %u, reverse: %u\n", access
->nonarg
, access
->reverse
);
646 for (gensum_param_access
*ch
= access
->first_child
;
648 ch
= ch
->next_sibling
)
649 dump_gensum_access (f
, ch
, indent
+ 2);
653 /* Print access tree starting at ACCESS to F. */
656 dump_isra_access (FILE *f
, param_access
*access
)
658 fprintf (f
, " * Access to unit offset: %u", access
->unit_offset
);
659 fprintf (f
, ", unit size: %u", access
->unit_size
);
660 fprintf (f
, ", type: ");
661 print_generic_expr (f
, access
->type
);
662 fprintf (f
, ", alias_ptr_type: ");
663 print_generic_expr (f
, access
->alias_ptr_type
);
665 fprintf (f
, ", certain");
667 fprintf (f
, ", not-certain");
669 fprintf (f
, ", reverse");
673 /* Dump access tree starting at ACCESS to stderr. */
676 debug_isra_access (param_access
*access
)
678 dump_isra_access (stderr
, access
);
681 /* Dump DESC to F. */
684 dump_gensum_param_descriptor (FILE *f
, gensum_param_desc
*desc
)
686 if (desc
->locally_unused
)
687 fprintf (f
, " unused with %i call_uses\n", desc
->call_uses
);
688 if (!desc
->split_candidate
)
690 fprintf (f
, " not a candidate\n");
694 fprintf (f
, " by_ref with %u pass throughs\n", desc
->ptr_pt_count
);
696 for (gensum_param_access
*acc
= desc
->accesses
; acc
; acc
= acc
->next_sibling
)
697 dump_gensum_access (f
, acc
, 2);
700 /* Dump all parameter descriptors in IFS, assuming it describes FNDECL, to
704 dump_gensum_param_descriptors (FILE *f
, tree fndecl
,
705 vec
<gensum_param_desc
> *param_descriptions
)
707 tree parm
= DECL_ARGUMENTS (fndecl
);
709 i
< param_descriptions
->length ();
710 ++i
, parm
= DECL_CHAIN (parm
))
712 fprintf (f
, " Descriptor for parameter %i ", i
);
713 print_generic_expr (f
, parm
, TDF_UID
);
715 dump_gensum_param_descriptor (f
, &(*param_descriptions
)[i
]);
720 /* Dump DESC to F. */
723 dump_isra_param_descriptor (FILE *f
, isra_param_desc
*desc
)
725 if (desc
->locally_unused
)
727 fprintf (f
, " (locally) unused\n");
729 if (!desc
->split_candidate
)
731 fprintf (f
, " not a candidate for splitting\n");
734 fprintf (f
, " param_size_limit: %u, size_reached: %u%s\n",
735 desc
->param_size_limit
, desc
->size_reached
,
736 desc
->by_ref
? ", by_ref" : "");
738 for (unsigned i
= 0; i
< vec_safe_length (desc
->accesses
); ++i
)
740 param_access
*access
= (*desc
->accesses
)[i
];
741 dump_isra_access (f
, access
);
745 /* Dump all parameter descriptors in IFS, assuming it describes FNDECL, to
749 dump_isra_param_descriptors (FILE *f
, tree fndecl
,
750 isra_func_summary
*ifs
)
752 tree parm
= DECL_ARGUMENTS (fndecl
);
753 if (!ifs
->m_parameters
)
755 fprintf (f
, " parameter descriptors not available\n");
760 i
< ifs
->m_parameters
->length ();
761 ++i
, parm
= DECL_CHAIN (parm
))
763 fprintf (f
, " Descriptor for parameter %i ", i
);
764 print_generic_expr (f
, parm
, TDF_UID
);
766 dump_isra_param_descriptor (f
, &(*ifs
->m_parameters
)[i
]);
770 /* Add SRC to inputs of PARAM_FLOW, unless it would exceed storage. If the
771 function fails return false, otherwise return true. SRC must fit into an
772 unsigned char. Used for purposes of transitive unused parameter
776 add_src_to_param_flow (isra_param_flow
*param_flow
, int src
)
778 gcc_checking_assert (src
>= 0 && src
<= UCHAR_MAX
);
779 if (param_flow
->length
== IPA_SRA_MAX_PARAM_FLOW_LEN
)
782 param_flow
->inputs
[(int) param_flow
->length
] = src
;
783 param_flow
->length
++;
787 /* Add a SRC to the inputs of PARAM_FLOW unless it is already there and assert
788 it is the only input. Used for purposes of transitive parameter
792 set_single_param_flow_source (isra_param_flow
*param_flow
, int src
)
794 gcc_checking_assert (src
>= 0 && src
<= UCHAR_MAX
);
795 if (param_flow
->length
== 0)
797 param_flow
->inputs
[0] = src
;
798 param_flow
->length
= 1;
800 else if (param_flow
->length
== 1)
801 gcc_assert (param_flow
->inputs
[0] == src
);
806 /* Assert that there is only a single value in PARAM_FLOW's inputs and return
810 get_single_param_flow_source (const isra_param_flow
*param_flow
)
812 gcc_assert (param_flow
->length
== 1);
813 return param_flow
->inputs
[0];
816 /* Inspect all uses of NAME and simple arithmetic calculations involving NAME
817 in FUN represented with NODE and return a negative number if any of them is
818 used for something else than either an actual call argument, simple
819 arithmetic operation or debug statement. If there are no such uses, return
820 the number of actual arguments that this parameter eventually feeds to (or
821 zero if there is none). For any such parameter, mark PARM_NUM as one of its
822 sources. ANALYZED is a bitmap that tracks which SSA names we have already
823 started investigating. */
826 isra_track_scalar_value_uses (function
*fun
, cgraph_node
*node
, tree name
,
827 int parm_num
, bitmap analyzed
)
830 imm_use_iterator imm_iter
;
833 FOR_EACH_IMM_USE_STMT (stmt
, imm_iter
, name
)
835 if (is_gimple_debug (stmt
))
838 /* TODO: We could handle at least const builtin functions like arithmetic
840 if (is_gimple_call (stmt
))
844 FOR_EACH_IMM_USE_ON_STMT (use_p
, imm_iter
)
847 gcall
*call
= as_a
<gcall
*> (stmt
);
849 if (gimple_call_internal_p (call
)
850 || (arg_count
= gimple_call_num_args (call
)) == 0)
853 BREAK_FROM_IMM_USE_STMT (imm_iter
);
856 cgraph_edge
*cs
= node
->get_edge (stmt
);
857 gcc_checking_assert (cs
);
858 isra_call_summary
*csum
= call_sums
->get_create (cs
);
859 csum
->init_inputs (arg_count
);
862 for (unsigned i
= 0; i
< arg_count
; i
++)
863 if (gimple_call_arg (call
, i
) == name
)
865 if (!add_src_to_param_flow (&csum
->m_arg_flow
[i
], parm_num
))
874 || all_uses
!= simple_uses
)
877 BREAK_FROM_IMM_USE_STMT (imm_iter
);
881 else if (!stmt_unremovable_because_of_non_call_eh_p (fun
, stmt
)
882 && ((is_gimple_assign (stmt
) && !gimple_has_volatile_ops (stmt
))
883 || gimple_code (stmt
) == GIMPLE_PHI
))
886 if (gimple_code (stmt
) == GIMPLE_PHI
)
887 lhs
= gimple_phi_result (stmt
);
889 lhs
= gimple_assign_lhs (stmt
);
891 if (TREE_CODE (lhs
) != SSA_NAME
)
894 BREAK_FROM_IMM_USE_STMT (imm_iter
);
896 gcc_assert (!gimple_vdef (stmt
));
897 if (bitmap_set_bit (analyzed
, SSA_NAME_VERSION (lhs
)))
899 int tmp
= isra_track_scalar_value_uses (fun
, node
, lhs
, parm_num
,
904 BREAK_FROM_IMM_USE_STMT (imm_iter
);
912 BREAK_FROM_IMM_USE_STMT (imm_iter
);
918 /* Inspect all uses of PARM, which must be a gimple register, in FUN (which is
919 also described by NODE) and simple arithmetic calculations involving PARM
920 and return false if any of them is used for something else than either an
921 actual call argument, simple arithmetic operation or debug statement. If
922 there are no such uses, return true and store the number of actual arguments
923 that this parameter eventually feeds to (or zero if there is none) to
924 *CALL_USES_P. For any such parameter, mark PARM_NUM as one of its
927 This function is similar to ptr_parm_has_nonarg_uses but its results are
928 meant for unused parameter removal, as opposed to splitting of parameters
929 passed by reference or converting them to passed by value.
933 isra_track_scalar_param_local_uses (function
*fun
, cgraph_node
*node
, tree parm
,
934 int parm_num
, int *call_uses_p
)
936 gcc_checking_assert (is_gimple_reg (parm
));
938 tree name
= ssa_default_def (fun
, parm
);
939 if (!name
|| has_zero_uses (name
))
945 /* Edge summaries can only handle callers with fewer than 256 parameters. */
946 if (parm_num
> UCHAR_MAX
)
949 bitmap analyzed
= BITMAP_ALLOC (NULL
);
950 int call_uses
= isra_track_scalar_value_uses (fun
, node
, name
, parm_num
,
952 BITMAP_FREE (analyzed
);
955 *call_uses_p
= call_uses
;
959 /* Scan immediate uses of a default definition SSA name of a parameter PARM and
960 examine whether there are any nonarg uses that are not actual arguments or
961 otherwise infeasible uses. If so, return true, otherwise return false.
962 Create pass-through IPA flow records for any direct uses as argument calls
963 and if returning false, store their number into *PT_COUNT_P. NODE and FUN
964 must represent the function that is currently analyzed, PARM_NUM must be the
965 index of the analyzed parameter.
967 This function is similar to isra_track_scalar_param_local_uses but its
968 results are meant for splitting of parameters passed by reference or turning
969 them into bits passed by value, as opposed to generic unused parameter
974 ptr_parm_has_nonarg_uses (cgraph_node
*node
, function
*fun
, tree parm
,
975 int parm_num
, unsigned *pt_count_p
)
979 tree name
= ssa_default_def (fun
, parm
);
981 unsigned pt_count
= 0;
983 if (!name
|| has_zero_uses (name
))
986 /* Edge summaries can only handle callers with fewer than 256 parameters. */
987 if (parm_num
> UCHAR_MAX
)
990 FOR_EACH_IMM_USE_STMT (stmt
, ui
, name
)
992 unsigned uses_ok
= 0;
995 if (is_gimple_debug (stmt
))
998 if (gimple_assign_single_p (stmt
))
1000 tree rhs
= gimple_assign_rhs1 (stmt
);
1001 while (handled_component_p (rhs
))
1002 rhs
= TREE_OPERAND (rhs
, 0);
1003 if (TREE_CODE (rhs
) == MEM_REF
1004 && TREE_OPERAND (rhs
, 0) == name
1005 && integer_zerop (TREE_OPERAND (rhs
, 1))
1006 && types_compatible_p (TREE_TYPE (rhs
),
1007 TREE_TYPE (TREE_TYPE (name
)))
1008 && !TREE_THIS_VOLATILE (rhs
))
1011 else if (is_gimple_call (stmt
))
1013 gcall
*call
= as_a
<gcall
*> (stmt
);
1015 if (gimple_call_internal_p (call
)
1016 || (arg_count
= gimple_call_num_args (call
)) == 0)
1019 BREAK_FROM_IMM_USE_STMT (ui
);
1022 cgraph_edge
*cs
= node
->get_edge (stmt
);
1023 gcc_checking_assert (cs
);
1024 isra_call_summary
*csum
= call_sums
->get_create (cs
);
1025 csum
->init_inputs (arg_count
);
1027 for (unsigned i
= 0; i
< arg_count
; ++i
)
1029 tree arg
= gimple_call_arg (stmt
, i
);
1033 /* TODO: Allow &MEM_REF[name + offset] here,
1034 ipa_param_body_adjustments::modify_call_stmt has to be
1036 csum
->m_arg_flow
[i
].pointer_pass_through
= true;
1037 set_single_param_flow_source (&csum
->m_arg_flow
[i
], parm_num
);
1043 while (handled_component_p (arg
))
1044 arg
= TREE_OPERAND (arg
, 0);
1045 if (TREE_CODE (arg
) == MEM_REF
1046 && TREE_OPERAND (arg
, 0) == name
1047 && integer_zerop (TREE_OPERAND (arg
, 1))
1048 && types_compatible_p (TREE_TYPE (arg
),
1049 TREE_TYPE (TREE_TYPE (name
)))
1050 && !TREE_THIS_VOLATILE (arg
))
1055 /* If the number of valid uses does not match the number of
1056 uses in this stmt there is an unhandled use. */
1057 unsigned all_uses
= 0;
1058 FOR_EACH_IMM_USE_ON_STMT (use_p
, ui
)
1061 gcc_checking_assert (uses_ok
<= all_uses
);
1062 if (uses_ok
!= all_uses
)
1065 BREAK_FROM_IMM_USE_STMT (ui
);
1069 *pt_count_p
= pt_count
;
1073 /* Initialize vector of parameter descriptors of NODE. Return true if there
1074 are any candidates for splitting or unused aggregate parameter removal (the
1075 function may return false if there are candidates for removal of register
1076 parameters) and function body must be scanned. */
1079 create_parameter_descriptors (cgraph_node
*node
,
1080 vec
<gensum_param_desc
> *param_descriptions
)
1082 function
*fun
= DECL_STRUCT_FUNCTION (node
->decl
);
1086 for (tree parm
= DECL_ARGUMENTS (node
->decl
);
1088 parm
= DECL_CHAIN (parm
), num
++)
1091 gensum_param_desc
*desc
= &(*param_descriptions
)[num
];
1092 /* param_descriptions vector is grown cleared in the caller. */
1093 desc
->param_number
= num
;
1094 decl2desc
->put (parm
, desc
);
1096 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1097 print_generic_expr (dump_file
, parm
, TDF_UID
);
1099 int scalar_call_uses
;
1100 tree type
= TREE_TYPE (parm
);
1101 if (TREE_THIS_VOLATILE (parm
))
1103 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1104 fprintf (dump_file
, " not a candidate, is volatile\n");
1107 if (!is_gimple_reg_type (type
) && is_va_list_type (type
))
1109 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1110 fprintf (dump_file
, " not a candidate, is a va_list type\n");
1113 if (TREE_ADDRESSABLE (parm
))
1115 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1116 fprintf (dump_file
, " not a candidate, is addressable\n");
1119 if (TREE_ADDRESSABLE (type
))
1121 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1122 fprintf (dump_file
, " not a candidate, type cannot be split\n");
1126 if (is_gimple_reg (parm
)
1127 && !isra_track_scalar_param_local_uses (fun
, node
, parm
, num
,
1130 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1131 fprintf (dump_file
, " is a scalar with only %i call uses\n",
1134 desc
->locally_unused
= true;
1135 desc
->call_uses
= scalar_call_uses
;
1138 if (POINTER_TYPE_P (type
))
1140 type
= TREE_TYPE (type
);
1142 if (TREE_CODE (type
) == FUNCTION_TYPE
1143 || TREE_CODE (type
) == METHOD_TYPE
)
1145 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1146 fprintf (dump_file
, " not a candidate, reference to "
1150 if (TYPE_VOLATILE (type
))
1152 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1153 fprintf (dump_file
, " not a candidate, reference to "
1154 "a volatile type\n");
1157 if (TREE_CODE (type
) == ARRAY_TYPE
1158 && TYPE_NONALIASED_COMPONENT (type
))
1160 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1161 fprintf (dump_file
, " not a candidate, reference to "
1162 "a nonaliased component array\n");
1165 if (!is_gimple_reg (parm
))
1167 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1168 fprintf (dump_file
, " not a candidate, a reference which is "
1169 "not a gimple register (probably addressable)\n");
1172 if (is_va_list_type (type
))
1174 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1175 fprintf (dump_file
, " not a candidate, reference to "
1179 if (ptr_parm_has_nonarg_uses (node
, fun
, parm
, num
,
1180 &desc
->ptr_pt_count
))
1182 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1183 fprintf (dump_file
, " not a candidate, reference has "
1187 desc
->by_ref
= true;
1189 else if (!AGGREGATE_TYPE_P (type
))
1191 /* This is in an else branch because scalars passed by reference are
1192 still candidates to be passed by value. */
1193 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1194 fprintf (dump_file
, " not a candidate, not an aggregate\n");
1198 if (!COMPLETE_TYPE_P (type
))
1200 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1201 fprintf (dump_file
, " not a candidate, not a complete type\n");
1204 if (!tree_fits_uhwi_p (TYPE_SIZE (type
)))
1206 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1207 fprintf (dump_file
, " not a candidate, size not representable\n");
1210 unsigned HOST_WIDE_INT type_size
1211 = tree_to_uhwi (TYPE_SIZE (type
)) / BITS_PER_UNIT
;
1213 || type_size
>= ISRA_ARG_SIZE_LIMIT
)
1215 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1216 fprintf (dump_file
, " not a candidate, has zero or huge size\n");
1219 if (type_internals_preclude_sra_p (type
, &msg
))
1221 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1222 fprintf (dump_file
, " not a candidate, %s\n", msg
);
1226 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1227 fprintf (dump_file
, " is a candidate\n");
1230 desc
->split_candidate
= true;
1232 desc
->deref_index
= by_ref_count
++;
1237 /* Return pointer to descriptor of parameter DECL or NULL if it cannot be
1238 found, which happens if DECL is for a static chain. */
1240 static gensum_param_desc
*
1241 get_gensum_param_desc (tree decl
)
1243 gcc_checking_assert (TREE_CODE (decl
) == PARM_DECL
);
1244 gensum_param_desc
**slot
= decl2desc
->get (decl
);
1246 /* This can happen for static chains which we cannot handle so far. */
1248 gcc_checking_assert (*slot
);
1253 /* Remove parameter described by DESC from candidates for IPA-SRA splitting and
1254 write REASON to the dump file if there is one. */
1257 disqualify_split_candidate (gensum_param_desc
*desc
, const char *reason
)
1259 if (!desc
->split_candidate
)
1262 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1263 fprintf (dump_file
, "! Disqualifying parameter number %i - %s\n",
1264 desc
->param_number
, reason
);
1266 desc
->split_candidate
= false;
1269 /* Remove DECL from candidates for IPA-SRA and write REASON to the dump file if
1273 disqualify_split_candidate (tree decl
, const char *reason
)
1275 gensum_param_desc
*desc
= get_gensum_param_desc (decl
);
1277 disqualify_split_candidate (desc
, reason
);
1280 /* Allocate a new access to DESC and fill it in with OFFSET and SIZE. But
1281 first, check that there are not too many of them already. If so, do not
1282 allocate anything and return NULL. */
1284 static gensum_param_access
*
1285 allocate_access (gensum_param_desc
*desc
,
1286 HOST_WIDE_INT offset
, HOST_WIDE_INT size
)
1288 if (desc
->access_count
1289 == (unsigned) param_ipa_sra_max_replacements
)
1291 disqualify_split_candidate (desc
, "Too many replacement candidates");
1295 gensum_param_access
*access
1296 = (gensum_param_access
*) obstack_alloc (&gensum_obstack
,
1297 sizeof (gensum_param_access
));
1298 memset (access
, 0, sizeof (*access
));
1299 access
->offset
= offset
;
1300 access
->size
= size
;
1304 /* In what context scan_expr_access has been called, whether it deals with a
1305 load, a function argument, or a store. Please note that in rare
1306 circumstances when it is not clear if the access is a load or store,
1307 ISRA_CTX_STORE is used too. */
1309 enum isra_scan_context
{ISRA_CTX_LOAD
, ISRA_CTX_ARG
, ISRA_CTX_STORE
};
1311 /* Return an access describing memory access to the variable described by DESC
1312 at OFFSET with SIZE in context CTX, starting at pointer to the linked list
1313 at a certain tree level FIRST. Attempt to create it and put into the
1314 appropriate place in the access tree if does not exist, but fail and return
1315 NULL if there are already too many accesses, if it would create a partially
1316 overlapping access or if an access would end up within a pre-existing
1319 static gensum_param_access
*
1320 get_access_1 (gensum_param_desc
*desc
, gensum_param_access
**first
,
1321 HOST_WIDE_INT offset
, HOST_WIDE_INT size
, isra_scan_context ctx
)
1323 gensum_param_access
*access
= *first
, **ptr
= first
;
1327 /* No pre-existing access at this level, just create it. */
1328 gensum_param_access
*a
= allocate_access (desc
, offset
, size
);
1335 if (access
->offset
>= offset
+ size
)
1337 /* We want to squeeze it in front of the very first access, just do
1339 gensum_param_access
*r
= allocate_access (desc
, offset
, size
);
1342 r
->next_sibling
= access
;
1347 /* Skip all accesses that have to come before us until the next sibling is
1349 while (offset
>= access
->offset
+ access
->size
1350 && access
->next_sibling
1351 && access
->next_sibling
->offset
< offset
+ size
)
1353 ptr
= &access
->next_sibling
;
1354 access
= access
->next_sibling
;
1357 /* At this point we know we do not belong before access. */
1358 gcc_assert (access
->offset
< offset
+ size
);
1360 if (access
->offset
== offset
&& access
->size
== size
)
1361 /* We found what we were looking for. */
1364 if (access
->offset
<= offset
1365 && access
->offset
+ access
->size
>= offset
+ size
)
1367 /* We fit into access which is larger than us. We need to find/create
1368 something below access. But we only allow nesting in call
1373 return get_access_1 (desc
, &access
->first_child
, offset
, size
, ctx
);
1376 if (offset
<= access
->offset
1377 && offset
+ size
>= access
->offset
+ access
->size
)
1378 /* We are actually bigger than access, which fully fits into us, take its
1379 place and make all accesses fitting into it its children. */
1381 /* But first, we only allow nesting in call arguments so check if that is
1382 what we are trying to represent. */
1383 if (ctx
!= ISRA_CTX_ARG
)
1386 gensum_param_access
*r
= allocate_access (desc
, offset
, size
);
1389 r
->first_child
= access
;
1391 while (access
->next_sibling
1392 && access
->next_sibling
->offset
< offset
+ size
)
1393 access
= access
->next_sibling
;
1394 if (access
->offset
+ access
->size
> offset
+ size
)
1396 /* This must be a different access, which are sorted, so the
1397 following must be true and this signals a partial overlap. */
1398 gcc_assert (access
->offset
> offset
);
1402 r
->next_sibling
= access
->next_sibling
;
1403 access
->next_sibling
= NULL
;
1408 if (offset
>= access
->offset
+ access
->size
)
1410 /* We belong after access. */
1411 gensum_param_access
*r
= allocate_access (desc
, offset
, size
);
1414 r
->next_sibling
= access
->next_sibling
;
1415 access
->next_sibling
= r
;
1419 if (offset
< access
->offset
)
1421 /* We know the following, otherwise we would have created a
1423 gcc_checking_assert (offset
+ size
< access
->offset
+ access
->size
);
1427 if (offset
+ size
> access
->offset
+ access
->size
)
1430 gcc_checking_assert (offset
> access
->offset
);
1437 /* Return an access describing memory access to the variable described by DESC
1438 at OFFSET with SIZE in context CTX, mark it as used in context CTX. Attempt
1439 to create if it does not exist, but fail and return NULL if there are
1440 already too many accesses, if it would create a partially overlapping access
1441 or if an access would end up in a non-call access. */
1443 static gensum_param_access
*
1444 get_access (gensum_param_desc
*desc
, HOST_WIDE_INT offset
, HOST_WIDE_INT size
,
1445 isra_scan_context ctx
)
1447 gcc_checking_assert (desc
->split_candidate
);
1449 gensum_param_access
*access
= get_access_1 (desc
, &desc
->accesses
, offset
,
1453 disqualify_split_candidate (desc
,
1454 "Bad access overlap or too many accesses");
1460 case ISRA_CTX_STORE
:
1461 gcc_assert (!desc
->by_ref
);
1464 access
->nonarg
= true;
1473 /* Verify that parameter access tree starting with ACCESS is in good shape.
1474 PARENT_OFFSET and PARENT_SIZE are the corresponding fields of parent of
1475 ACCESS or zero if there is none. */
1478 verify_access_tree_1 (gensum_param_access
*access
, HOST_WIDE_INT parent_offset
,
1479 HOST_WIDE_INT parent_size
)
1483 gcc_assert (access
->offset
>= 0 && access
->size
> 0);
1485 if (parent_size
!= 0)
1487 if (access
->offset
< parent_offset
)
1489 error ("Access offset before parent offset");
1492 if (access
->size
>= parent_size
)
1494 error ("Access size greater or equal to its parent size");
1497 if (access
->offset
+ access
->size
> parent_offset
+ parent_size
)
1499 error ("Access terminates outside of its parent");
1504 if (verify_access_tree_1 (access
->first_child
, access
->offset
,
1508 if (access
->next_sibling
1509 && (access
->next_sibling
->offset
< access
->offset
+ access
->size
))
1511 error ("Access overlaps with its sibling");
1515 access
= access
->next_sibling
;
1520 /* Verify that parameter access tree starting with ACCESS is in good shape,
1521 halt compilation and dump the tree to stderr if not. */
1524 isra_verify_access_tree (gensum_param_access
*access
)
1526 if (verify_access_tree_1 (access
, 0, 0))
1528 for (; access
; access
= access
->next_sibling
)
1529 dump_gensum_access (stderr
, access
, 2);
1530 internal_error ("IPA-SRA access verification failed");
1535 /* Callback of walk_stmt_load_store_addr_ops visit_addr used to determine
1536 GIMPLE_ASM operands with memory constrains which cannot be scalarized. */
1539 asm_visit_addr (gimple
*, tree op
, tree
, void *)
1541 op
= get_base_address (op
);
1543 && TREE_CODE (op
) == PARM_DECL
)
1544 disqualify_split_candidate (op
, "Non-scalarizable GIMPLE_ASM operand.");
1549 /* Mark a dereference of parameter identified by DESC of distance DIST in a
1550 basic block BB, unless the BB has already been marked as a potentially
1554 mark_param_dereference (gensum_param_desc
*desc
, HOST_WIDE_INT dist
,
1557 gcc_assert (desc
->by_ref
);
1558 gcc_checking_assert (desc
->split_candidate
);
1560 if (bitmap_bit_p (final_bbs
, bb
->index
))
1563 int idx
= bb
->index
* by_ref_count
+ desc
->deref_index
;
1564 if (bb_dereferences
[idx
] < dist
)
1565 bb_dereferences
[idx
] = dist
;
1568 /* Return true, if any potential replacements should use NEW_TYPE as opposed to
1569 previously recorded OLD_TYPE. */
1572 type_prevails_p (tree old_type
, tree new_type
)
1574 if (old_type
== new_type
)
1577 /* Non-aggregates are always better. */
1578 if (!is_gimple_reg_type (old_type
)
1579 && is_gimple_reg_type (new_type
))
1581 if (is_gimple_reg_type (old_type
)
1582 && !is_gimple_reg_type (new_type
))
1585 /* Prefer any complex or vector type over any other scalar type. */
1586 if (TREE_CODE (old_type
) != COMPLEX_TYPE
1587 && TREE_CODE (old_type
) != VECTOR_TYPE
1588 && (TREE_CODE (new_type
) == COMPLEX_TYPE
1589 || TREE_CODE (new_type
) == VECTOR_TYPE
))
1591 if ((TREE_CODE (old_type
) == COMPLEX_TYPE
1592 || TREE_CODE (old_type
) == VECTOR_TYPE
)
1593 && TREE_CODE (new_type
) != COMPLEX_TYPE
1594 && TREE_CODE (new_type
) != VECTOR_TYPE
)
1597 /* Use the integral type with the bigger precision. */
1598 if (INTEGRAL_TYPE_P (old_type
)
1599 && INTEGRAL_TYPE_P (new_type
))
1600 return (TYPE_PRECISION (new_type
) > TYPE_PRECISION (old_type
));
1602 /* Attempt to disregard any integral type with non-full precision. */
1603 if (INTEGRAL_TYPE_P (old_type
)
1604 && (TREE_INT_CST_LOW (TYPE_SIZE (old_type
))
1605 != TYPE_PRECISION (old_type
)))
1607 if (INTEGRAL_TYPE_P (new_type
)
1608 && (TREE_INT_CST_LOW (TYPE_SIZE (new_type
))
1609 != TYPE_PRECISION (new_type
)))
1611 /* Stabilize the selection. */
1612 return TYPE_UID (old_type
) < TYPE_UID (new_type
);
1615 /* When scanning an expression which is a call argument, this structure
1616 specifies the call and the position of the argument. */
1618 struct scan_call_info
1620 /* Call graph edge representing the call. */
1622 /* Total number of arguments in the call. */
1623 unsigned argument_count
;
1624 /* Number of the actual argument being scanned. */
1628 /* Record use of ACCESS which belongs to a parameter described by DESC in a
1629 call argument described by CALL_INFO. */
1632 record_nonregister_call_use (gensum_param_desc
*desc
,
1633 scan_call_info
*call_info
,
1634 unsigned unit_offset
, unsigned unit_size
)
1636 isra_call_summary
*csum
= call_sums
->get_create (call_info
->cs
);
1637 csum
->init_inputs (call_info
->argument_count
);
1639 isra_param_flow
*param_flow
= &csum
->m_arg_flow
[call_info
->arg_idx
];
1640 param_flow
->aggregate_pass_through
= true;
1641 set_single_param_flow_source (param_flow
, desc
->param_number
);
1642 param_flow
->unit_offset
= unit_offset
;
1643 param_flow
->unit_size
= unit_size
;
1647 /* Callback of walk_aliased_vdefs, just mark that there was a possible
1651 mark_maybe_modified (ao_ref
*, tree
, void *data
)
1653 bool *maybe_modified
= (bool *) data
;
1654 *maybe_modified
= true;
1658 /* Analyze expression EXPR from GIMPLE for accesses to parameters. CTX
1659 specifies whether EXPR is used in a load, store or as an argument call. BB
1660 must be the basic block in which expr resides. If CTX specifies call
1661 argument context, CALL_INFO must describe that call and argument position,
1662 otherwise it is ignored. */
1665 scan_expr_access (tree expr
, gimple
*stmt
, isra_scan_context ctx
,
1666 basic_block bb
, scan_call_info
*call_info
= NULL
)
1668 poly_int64 poffset
, psize
, pmax_size
;
1669 HOST_WIDE_INT offset
, size
, max_size
;
1674 if (TREE_CODE (expr
) == BIT_FIELD_REF
1675 || TREE_CODE (expr
) == IMAGPART_EXPR
1676 || TREE_CODE (expr
) == REALPART_EXPR
)
1677 expr
= TREE_OPERAND (expr
, 0);
1679 base
= get_ref_base_and_extent (expr
, &poffset
, &psize
, &pmax_size
, &reverse
);
1681 if (TREE_CODE (base
) == MEM_REF
)
1683 tree op
= TREE_OPERAND (base
, 0);
1684 if (TREE_CODE (op
) != SSA_NAME
1685 || !SSA_NAME_IS_DEFAULT_DEF (op
))
1687 base
= SSA_NAME_VAR (op
);
1692 if (TREE_CODE (base
) != PARM_DECL
)
1695 gensum_param_desc
*desc
= get_gensum_param_desc (base
);
1696 if (!desc
|| !desc
->split_candidate
)
1699 if (!poffset
.is_constant (&offset
)
1700 || !psize
.is_constant (&size
)
1701 || !pmax_size
.is_constant (&max_size
))
1703 disqualify_split_candidate (desc
, "Encountered a polynomial-sized "
1707 if (size
< 0 || size
!= max_size
)
1709 disqualify_split_candidate (desc
, "Encountered a variable sized access.");
1712 if (TREE_CODE (expr
) == COMPONENT_REF
1713 && DECL_BIT_FIELD (TREE_OPERAND (expr
, 1)))
1715 disqualify_split_candidate (desc
, "Encountered a bit-field access.");
1720 disqualify_split_candidate (desc
, "Encountered an access at a "
1721 "negative offset.");
1724 gcc_assert ((offset
% BITS_PER_UNIT
) == 0);
1725 gcc_assert ((size
% BITS_PER_UNIT
) == 0);
1726 if ((offset
/ BITS_PER_UNIT
) >= (UINT_MAX
- ISRA_ARG_SIZE_LIMIT
)
1727 || (size
/ BITS_PER_UNIT
) >= ISRA_ARG_SIZE_LIMIT
)
1729 disqualify_split_candidate (desc
, "Encountered an access with too big "
1734 tree type
= TREE_TYPE (expr
);
1735 unsigned int exp_align
= get_object_alignment (expr
);
1737 if (exp_align
< TYPE_ALIGN (type
))
1739 disqualify_split_candidate (desc
, "Underaligned access.");
1747 disqualify_split_candidate (desc
, "Dereferencing a non-reference.");
1750 else if (ctx
== ISRA_CTX_STORE
)
1752 disqualify_split_candidate (desc
, "Storing to data passed by "
1757 if (!aa_walking_limit
)
1759 disqualify_split_candidate (desc
, "Out of alias analysis step "
1764 gcc_checking_assert (gimple_vuse (stmt
));
1765 bool maybe_modified
= false;
1768 ao_ref_init (&ar
, expr
);
1769 bitmap visited
= BITMAP_ALLOC (NULL
);
1770 int walked
= walk_aliased_vdefs (&ar
, gimple_vuse (stmt
),
1771 mark_maybe_modified
, &maybe_modified
,
1772 &visited
, NULL
, aa_walking_limit
);
1773 BITMAP_FREE (visited
);
1776 gcc_assert (aa_walking_limit
> walked
);
1777 aa_walking_limit
= aa_walking_limit
- walked
;
1780 aa_walking_limit
= 0;
1781 if (maybe_modified
|| walked
< 0)
1783 disqualify_split_candidate (desc
, "Data passed by reference possibly "
1784 "modified through an alias.");
1788 mark_param_dereference (desc
, offset
+ size
, bb
);
1791 /* Pointer parameters with direct uses should have been ruled out by
1792 analyzing SSA default def when looking at the parameters. */
1793 gcc_assert (!desc
->by_ref
);
1795 gensum_param_access
*access
= get_access (desc
, offset
, size
, ctx
);
1799 if (ctx
== ISRA_CTX_ARG
)
1801 gcc_checking_assert (call_info
);
1804 record_nonregister_call_use (desc
, call_info
, offset
/ BITS_PER_UNIT
,
1805 size
/ BITS_PER_UNIT
);
1807 /* This is not a pass-through of a pointer, this is a use like any
1809 access
->nonarg
= true;
1814 access
->type
= type
;
1815 access
->alias_ptr_type
= reference_alias_ptr_type (expr
);
1816 access
->reverse
= reverse
;
1820 if (exp_align
< TYPE_ALIGN (access
->type
))
1822 disqualify_split_candidate (desc
, "Reference has lower alignment "
1823 "than a previous one.");
1826 if (access
->alias_ptr_type
!= reference_alias_ptr_type (expr
))
1828 disqualify_split_candidate (desc
, "Multiple alias pointer types.");
1831 if (access
->reverse
!= reverse
)
1833 disqualify_split_candidate (desc
, "Both normal and reverse "
1834 "scalar storage order.");
1838 && (AGGREGATE_TYPE_P (type
) || AGGREGATE_TYPE_P (access
->type
))
1839 && (TYPE_MAIN_VARIANT (access
->type
) != TYPE_MAIN_VARIANT (type
)))
1841 /* We need the same aggregate type on all accesses to be able to
1842 distinguish transformation spots from pass-through arguments in
1843 the transformation phase. */
1844 disqualify_split_candidate (desc
, "We do not support aggregate "
1849 if (type_prevails_p (access
->type
, type
))
1850 access
->type
= type
;
1854 /* Scan body function described by NODE and FUN and create access trees for
1858 scan_function (cgraph_node
*node
, struct function
*fun
)
1862 FOR_EACH_BB_FN (bb
, fun
)
1864 gimple_stmt_iterator gsi
;
1865 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1867 gimple
*stmt
= gsi_stmt (gsi
);
1869 if (stmt_can_throw_external (fun
, stmt
))
1870 bitmap_set_bit (final_bbs
, bb
->index
);
1871 switch (gimple_code (stmt
))
1875 tree t
= gimple_return_retval (as_a
<greturn
*> (stmt
));
1877 scan_expr_access (t
, stmt
, ISRA_CTX_LOAD
, bb
);
1878 bitmap_set_bit (final_bbs
, bb
->index
);
1883 if (gimple_assign_single_p (stmt
)
1884 && !gimple_clobber_p (stmt
))
1886 tree rhs
= gimple_assign_rhs1 (stmt
);
1887 scan_expr_access (rhs
, stmt
, ISRA_CTX_LOAD
, bb
);
1888 tree lhs
= gimple_assign_lhs (stmt
);
1889 scan_expr_access (lhs
, stmt
, ISRA_CTX_STORE
, bb
);
1895 unsigned argument_count
= gimple_call_num_args (stmt
);
1896 isra_scan_context ctx
= ISRA_CTX_ARG
;
1897 scan_call_info call_info
, *call_info_p
= &call_info
;
1898 if (gimple_call_internal_p (stmt
))
1901 ctx
= ISRA_CTX_LOAD
;
1902 internal_fn ifn
= gimple_call_internal_fn (stmt
);
1903 if (internal_store_fn_p (ifn
))
1904 ctx
= ISRA_CTX_STORE
;
1908 call_info
.cs
= node
->get_edge (stmt
);
1909 call_info
.argument_count
= argument_count
;
1912 for (unsigned i
= 0; i
< argument_count
; i
++)
1914 call_info
.arg_idx
= i
;
1915 scan_expr_access (gimple_call_arg (stmt
, i
), stmt
,
1916 ctx
, bb
, call_info_p
);
1919 tree lhs
= gimple_call_lhs (stmt
);
1921 scan_expr_access (lhs
, stmt
, ISRA_CTX_STORE
, bb
);
1922 int flags
= gimple_call_flags (stmt
);
1923 if ((flags
& (ECF_CONST
| ECF_PURE
)) == 0)
1924 bitmap_set_bit (final_bbs
, bb
->index
);
1930 gasm
*asm_stmt
= as_a
<gasm
*> (stmt
);
1931 walk_stmt_load_store_addr_ops (asm_stmt
, NULL
, NULL
, NULL
,
1933 bitmap_set_bit (final_bbs
, bb
->index
);
1935 for (unsigned i
= 0; i
< gimple_asm_ninputs (asm_stmt
); i
++)
1937 tree t
= TREE_VALUE (gimple_asm_input_op (asm_stmt
, i
));
1938 scan_expr_access (t
, stmt
, ISRA_CTX_LOAD
, bb
);
1940 for (unsigned i
= 0; i
< gimple_asm_noutputs (asm_stmt
); i
++)
1942 tree t
= TREE_VALUE (gimple_asm_output_op (asm_stmt
, i
));
1943 scan_expr_access (t
, stmt
, ISRA_CTX_STORE
, bb
);
1955 /* Return true if SSA_NAME NAME is only used in return statements, or if
1956 results of any operations it is involved in are only used in return
1957 statements. ANALYZED is a bitmap that tracks which SSA names we have
1958 already started investigating. */
1961 ssa_name_only_returned_p (tree name
, bitmap analyzed
)
1964 imm_use_iterator imm_iter
;
1967 FOR_EACH_IMM_USE_STMT (stmt
, imm_iter
, name
)
1969 if (is_gimple_debug (stmt
))
1972 if (gimple_code (stmt
) == GIMPLE_RETURN
)
1974 tree t
= gimple_return_retval (as_a
<greturn
*> (stmt
));
1978 BREAK_FROM_IMM_USE_STMT (imm_iter
);
1981 else if ((is_gimple_assign (stmt
) && !gimple_has_volatile_ops (stmt
))
1982 || gimple_code (stmt
) == GIMPLE_PHI
)
1984 /* TODO: And perhaps for const function calls too? */
1986 if (gimple_code (stmt
) == GIMPLE_PHI
)
1987 lhs
= gimple_phi_result (stmt
);
1989 lhs
= gimple_assign_lhs (stmt
);
1991 if (TREE_CODE (lhs
) != SSA_NAME
)
1994 BREAK_FROM_IMM_USE_STMT (imm_iter
);
1996 gcc_assert (!gimple_vdef (stmt
));
1997 if (bitmap_set_bit (analyzed
, SSA_NAME_VERSION (lhs
))
1998 && !ssa_name_only_returned_p (lhs
, analyzed
))
2001 BREAK_FROM_IMM_USE_STMT (imm_iter
);
2007 BREAK_FROM_IMM_USE_STMT (imm_iter
);
2013 /* Inspect the uses of the return value of the call associated with CS, and if
2014 it is not used or if it is only used to construct the return value of the
2015 caller, mark it as such in call or caller summary. Also check for
2016 misaligned arguments. */
2019 isra_analyze_call (cgraph_edge
*cs
)
2021 gcall
*call_stmt
= cs
->call_stmt
;
2022 unsigned count
= gimple_call_num_args (call_stmt
);
2023 isra_call_summary
*csum
= call_sums
->get_create (cs
);
2025 for (unsigned i
= 0; i
< count
; i
++)
2027 tree arg
= gimple_call_arg (call_stmt
, i
);
2028 if (is_gimple_reg (arg
))
2032 poly_int64 bitsize
, bitpos
;
2034 int unsignedp
, reversep
, volatilep
= 0;
2035 get_inner_reference (arg
, &bitsize
, &bitpos
, &offset
, &mode
,
2036 &unsignedp
, &reversep
, &volatilep
);
2037 if (!multiple_p (bitpos
, BITS_PER_UNIT
))
2039 csum
->m_bit_aligned_arg
= true;
2044 tree lhs
= gimple_call_lhs (call_stmt
);
2047 /* TODO: Also detect aggregates on a LHS of a call that are only returned
2048 from this function (without being read anywhere). */
2049 if (TREE_CODE (lhs
) == SSA_NAME
)
2051 bitmap analyzed
= BITMAP_ALLOC (NULL
);
2052 if (ssa_name_only_returned_p (lhs
, analyzed
))
2053 csum
->m_return_returned
= true;
2054 BITMAP_FREE (analyzed
);
2058 csum
->m_return_ignored
= true;
2061 /* Look at all calls going out of NODE, described also by IFS and perform all
2062 analyses necessary for IPA-SRA that are not done at body scan time or done
2063 even when body is not scanned because the function is not a candidate. */
2066 isra_analyze_all_outgoing_calls (cgraph_node
*node
)
2068 for (cgraph_edge
*cs
= node
->callees
; cs
; cs
= cs
->next_callee
)
2069 isra_analyze_call (cs
);
2070 for (cgraph_edge
*cs
= node
->indirect_calls
; cs
; cs
= cs
->next_callee
)
2071 isra_analyze_call (cs
);
2074 /* Dump a dereferences table with heading STR to file F. */
2077 dump_dereferences_table (FILE *f
, struct function
*fun
, const char *str
)
2081 fprintf (dump_file
, "%s", str
);
2082 FOR_BB_BETWEEN (bb
, ENTRY_BLOCK_PTR_FOR_FN (fun
),
2083 EXIT_BLOCK_PTR_FOR_FN (fun
), next_bb
)
2085 fprintf (f
, "%4i %i ", bb
->index
, bitmap_bit_p (final_bbs
, bb
->index
));
2086 if (bb
!= EXIT_BLOCK_PTR_FOR_FN (fun
))
2089 for (i
= 0; i
< by_ref_count
; i
++)
2091 int idx
= bb
->index
* by_ref_count
+ i
;
2092 fprintf (f
, " %4" HOST_WIDE_INT_PRINT
"d", bb_dereferences
[idx
]);
2097 fprintf (dump_file
, "\n");
2100 /* Propagate distances in bb_dereferences in the opposite direction than the
2101 control flow edges, in each step storing the maximum of the current value
2102 and the minimum of all successors. These steps are repeated until the table
2103 stabilizes. Note that BBs which might terminate the functions (according to
2104 final_bbs bitmap) never updated in this way. */
2107 propagate_dereference_distances (struct function
*fun
)
2111 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2112 dump_dereferences_table (dump_file
, fun
,
2113 "Dereference table before propagation:\n");
2115 auto_vec
<basic_block
> queue (last_basic_block_for_fn (fun
));
2116 queue
.quick_push (ENTRY_BLOCK_PTR_FOR_FN (fun
));
2117 FOR_EACH_BB_FN (bb
, fun
)
2119 queue
.quick_push (bb
);
2123 while (!queue
.is_empty ())
2127 bool change
= false;
2133 if (bitmap_bit_p (final_bbs
, bb
->index
))
2136 for (i
= 0; i
< by_ref_count
; i
++)
2138 int idx
= bb
->index
* by_ref_count
+ i
;
2140 HOST_WIDE_INT inh
= 0;
2142 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
2144 int succ_idx
= e
->dest
->index
* by_ref_count
+ i
;
2146 if (e
->dest
== EXIT_BLOCK_PTR_FOR_FN (fun
))
2152 inh
= bb_dereferences
[succ_idx
];
2154 else if (bb_dereferences
[succ_idx
] < inh
)
2155 inh
= bb_dereferences
[succ_idx
];
2158 if (!first
&& bb_dereferences
[idx
] < inh
)
2160 bb_dereferences
[idx
] = inh
;
2166 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
2171 e
->src
->aux
= e
->src
;
2172 queue
.quick_push (e
->src
);
2176 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2177 dump_dereferences_table (dump_file
, fun
,
2178 "Dereference table after propagation:\n");
2181 /* Perform basic checks on ACCESS to PARM described by DESC and all its
2182 children, return true if the parameter cannot be split, otherwise return
2183 true and update *TOTAL_SIZE and *ONLY_CALLS. ENTRY_BB_INDEX must be the
2184 index of the entry BB in the function of PARM. */
2187 check_gensum_access (tree parm
, gensum_param_desc
*desc
,
2188 gensum_param_access
*access
,
2189 HOST_WIDE_INT
*nonarg_acc_size
, bool *only_calls
,
2194 *only_calls
= false;
2195 *nonarg_acc_size
+= access
->size
;
2197 if (access
->first_child
)
2199 disqualify_split_candidate (desc
, "Overlapping non-call uses.");
2203 /* Do not decompose a non-BLKmode param in a way that would create
2204 BLKmode params. Especially for by-reference passing (thus,
2205 pointer-type param) this is hardly worthwhile. */
2206 if (DECL_MODE (parm
) != BLKmode
2207 && TYPE_MODE (access
->type
) == BLKmode
)
2209 disqualify_split_candidate (desc
, "Would convert a non-BLK to a BLK.");
2215 int idx
= (entry_bb_index
* by_ref_count
+ desc
->deref_index
);
2216 if ((access
->offset
+ access
->size
) > bb_dereferences
[idx
])
2218 disqualify_split_candidate (desc
, "Would create a possibly "
2219 "illegal dereference in a caller.");
2224 for (gensum_param_access
*ch
= access
->first_child
;
2226 ch
= ch
->next_sibling
)
2227 if (check_gensum_access (parm
, desc
, ch
, nonarg_acc_size
, only_calls
,
2234 /* Copy data from FROM and all of its children to a vector of accesses in IPA
2238 copy_accesses_to_ipa_desc (gensum_param_access
*from
, isra_param_desc
*desc
)
2240 param_access
*to
= ggc_cleared_alloc
<param_access
> ();
2241 gcc_checking_assert ((from
->offset
% BITS_PER_UNIT
) == 0);
2242 gcc_checking_assert ((from
->size
% BITS_PER_UNIT
) == 0);
2243 to
->unit_offset
= from
->offset
/ BITS_PER_UNIT
;
2244 to
->unit_size
= from
->size
/ BITS_PER_UNIT
;
2245 to
->type
= from
->type
;
2246 to
->alias_ptr_type
= from
->alias_ptr_type
;
2247 to
->certain
= from
->nonarg
;
2248 to
->reverse
= from
->reverse
;
2249 vec_safe_push (desc
->accesses
, to
);
2251 for (gensum_param_access
*ch
= from
->first_child
;
2253 ch
= ch
->next_sibling
)
2254 copy_accesses_to_ipa_desc (ch
, desc
);
2257 /* Analyze function body scan results stored in param_accesses and
2258 param_accesses, detect possible transformations and store information of
2259 those in function summary. NODE, FUN and IFS are all various structures
2260 describing the currently analyzed function. */
2263 process_scan_results (cgraph_node
*node
, struct function
*fun
,
2264 isra_func_summary
*ifs
,
2265 vec
<gensum_param_desc
> *param_descriptions
)
2267 bool check_pass_throughs
= false;
2268 bool dereferences_propagated
= false;
2269 tree parm
= DECL_ARGUMENTS (node
->decl
);
2270 unsigned param_count
= param_descriptions
->length();
2272 for (unsigned desc_index
= 0;
2273 desc_index
< param_count
;
2274 desc_index
++, parm
= DECL_CHAIN (parm
))
2276 gensum_param_desc
*desc
= &(*param_descriptions
)[desc_index
];
2277 if (!desc
->split_candidate
)
2281 isra_verify_access_tree (desc
->accesses
);
2283 if (!dereferences_propagated
2287 propagate_dereference_distances (fun
);
2288 dereferences_propagated
= true;
2291 HOST_WIDE_INT nonarg_acc_size
= 0;
2292 bool only_calls
= true;
2293 bool check_failed
= false;
2295 int entry_bb_index
= ENTRY_BLOCK_PTR_FOR_FN (fun
)->index
;
2296 for (gensum_param_access
*acc
= desc
->accesses
;
2298 acc
= acc
->next_sibling
)
2299 if (check_gensum_access (parm
, desc
, acc
, &nonarg_acc_size
, &only_calls
,
2302 check_failed
= true;
2309 desc
->locally_unused
= true;
2311 HOST_WIDE_INT cur_param_size
2312 = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (parm
)));
2313 HOST_WIDE_INT param_size_limit
;
2314 if (!desc
->by_ref
|| optimize_function_for_size_p (fun
))
2315 param_size_limit
= cur_param_size
;
2318 = opt_for_fn (node
->decl
,
2319 param_ipa_sra_ptr_growth_factor
) * cur_param_size
;
2320 if (nonarg_acc_size
> param_size_limit
2321 || (!desc
->by_ref
&& nonarg_acc_size
== param_size_limit
))
2323 disqualify_split_candidate (desc
, "Would result into a too big set "
2324 "of replacements.");
2328 /* create_parameter_descriptors makes sure unit sizes of all
2329 candidate parameters fit unsigned integers restricted to
2330 ISRA_ARG_SIZE_LIMIT. */
2331 desc
->param_size_limit
= param_size_limit
/ BITS_PER_UNIT
;
2332 desc
->nonarg_acc_size
= nonarg_acc_size
/ BITS_PER_UNIT
;
2333 if (desc
->split_candidate
&& desc
->ptr_pt_count
)
2335 gcc_assert (desc
->by_ref
);
2336 check_pass_throughs
= true;
2341 /* When a pointer parameter is passed-through to a callee, in which it is
2342 only used to read only one or a few items, we can attempt to transform it
2343 to obtaining and passing through the items instead of the pointer. But we
2344 must take extra care that 1) we do not introduce any segfault by moving
2345 dereferences above control flow and that 2) the data is not modified
2346 through an alias in this function. The IPA analysis must not introduce
2347 any accesses candidates unless it can prove both.
2349 The current solution is very crude as it consists of ensuring that the
2350 call postdominates entry BB and that the definition of VUSE of the call is
2351 default definition. TODO: For non-recursive callees in the same
2352 compilation unit we could do better by doing analysis in topological order
2353 an looking into access candidates of callees, using their alias_ptr_types
2354 to attempt real AA. We could also use the maximum known dereferenced
2355 offset in this function at IPA level.
2357 TODO: Measure the overhead and the effect of just being pessimistic.
2358 Maybe this is only -O3 material?
2360 bool pdoms_calculated
= false;
2361 if (check_pass_throughs
)
2362 for (cgraph_edge
*cs
= node
->callees
; cs
; cs
= cs
->next_callee
)
2364 gcall
*call_stmt
= cs
->call_stmt
;
2365 tree vuse
= gimple_vuse (call_stmt
);
2367 /* If the callee is a const function, we don't get a VUSE. In such
2368 case there will be no memory accesses in the called function (or the
2369 const attribute is wrong) and then we just don't care. */
2370 bool uses_memory_as_obtained
= vuse
&& SSA_NAME_IS_DEFAULT_DEF (vuse
);
2372 unsigned count
= gimple_call_num_args (call_stmt
);
2373 isra_call_summary
*csum
= call_sums
->get_create (cs
);
2374 csum
->init_inputs (count
);
2375 for (unsigned argidx
= 0; argidx
< count
; argidx
++)
2377 if (!csum
->m_arg_flow
[argidx
].pointer_pass_through
)
2380 = get_single_param_flow_source (&csum
->m_arg_flow
[argidx
]);
2381 gensum_param_desc
*desc
= &(*param_descriptions
)[pidx
];
2382 if (!desc
->split_candidate
)
2384 csum
->m_arg_flow
[argidx
].pointer_pass_through
= false;
2387 if (!uses_memory_as_obtained
)
2390 /* Post-dominator check placed last, hoping that it usually won't
2392 if (!pdoms_calculated
)
2394 gcc_checking_assert (cfun
);
2395 add_noreturn_fake_exit_edges ();
2396 connect_infinite_loops_to_exit ();
2397 calculate_dominance_info (CDI_POST_DOMINATORS
);
2398 pdoms_calculated
= true;
2400 if (dominated_by_p (CDI_POST_DOMINATORS
,
2401 gimple_bb (call_stmt
),
2402 single_succ (ENTRY_BLOCK_PTR_FOR_FN (fun
))))
2403 csum
->m_arg_flow
[argidx
].safe_to_import_accesses
= true;
2407 if (pdoms_calculated
)
2409 free_dominance_info (CDI_POST_DOMINATORS
);
2410 remove_fake_exit_edges ();
2413 /* TODO: Add early exit if we disqualified everything. This also requires
2414 that we either relax the restriction that
2415 ipa_param_adjustments.m_always_copy_start must be the number of PARM_DECLs
2416 or store the number of parameters to IPA-SRA function summary and use that
2417 when just removing params. */
2419 vec_safe_reserve_exact (ifs
->m_parameters
, param_count
);
2420 ifs
->m_parameters
->quick_grow_cleared (param_count
);
2421 for (unsigned desc_index
= 0; desc_index
< param_count
; desc_index
++)
2423 gensum_param_desc
*s
= &(*param_descriptions
)[desc_index
];
2424 isra_param_desc
*d
= &(*ifs
->m_parameters
)[desc_index
];
2426 d
->param_size_limit
= s
->param_size_limit
;
2427 d
->size_reached
= s
->nonarg_acc_size
;
2428 d
->locally_unused
= s
->locally_unused
;
2429 d
->split_candidate
= s
->split_candidate
;
2430 d
->by_ref
= s
->by_ref
;
2432 for (gensum_param_access
*acc
= s
->accesses
;
2434 acc
= acc
->next_sibling
)
2435 copy_accesses_to_ipa_desc (acc
, d
);
2439 dump_isra_param_descriptors (dump_file
, node
->decl
, ifs
);
2442 /* Return true if there are any overlaps among certain accesses of DESC. If
2443 non-NULL, set *CERTAIN_ACCESS_PRESENT_P upon encountering a certain access
2444 too. DESC is assumed to be a split candidate that is not locally
2448 overlapping_certain_accesses_p (isra_param_desc
*desc
,
2449 bool *certain_access_present_p
)
2451 unsigned pclen
= vec_safe_length (desc
->accesses
);
2452 for (unsigned i
= 0; i
< pclen
; i
++)
2454 param_access
*a1
= (*desc
->accesses
)[i
];
2458 if (certain_access_present_p
)
2459 *certain_access_present_p
= true;
2460 for (unsigned j
= i
+ 1; j
< pclen
; j
++)
2462 param_access
*a2
= (*desc
->accesses
)[j
];
2464 && a1
->unit_offset
< a2
->unit_offset
+ a2
->unit_size
2465 && a1
->unit_offset
+ a1
->unit_size
> a2
->unit_offset
)
2472 /* Check for any overlaps of certain param accesses among splitting candidates
2473 and signal an ICE if there are any. If CERTAIN_MUST_EXIST is set, also
2474 check that used splitting candidates have at least one certain access. */
2477 verify_splitting_accesses (cgraph_node
*node
, bool certain_must_exist
)
2479 isra_func_summary
*ifs
= func_sums
->get (node
);
2480 if (!ifs
|| !ifs
->m_candidate
)
2482 unsigned param_count
= vec_safe_length (ifs
->m_parameters
);
2483 for (unsigned pidx
= 0; pidx
< param_count
; pidx
++)
2485 isra_param_desc
*desc
= &(*ifs
->m_parameters
)[pidx
];
2486 if (!desc
->split_candidate
|| desc
->locally_unused
)
2489 bool certain_access_present
= !certain_must_exist
;
2490 if (overlapping_certain_accesses_p (desc
, &certain_access_present
))
2491 internal_error ("Function %qs, parameter %u, has IPA-SRA accesses "
2492 "which overlap", node
->dump_name (), pidx
);
2493 if (!certain_access_present
)
2494 internal_error ("Function %s, parameter %u, is used but does not "
2495 "have any certain IPA-SRA access",
2496 node
->dump_name (), pidx
);
2500 /* Intraprocedural part of IPA-SRA analysis. Scan bodies of all functions in
2501 this compilation unit and create summary structures describing IPA-SRA
2502 opportunities and constraints in them. */
2505 ipa_sra_generate_summary (void)
2507 struct cgraph_node
*node
;
2509 gcc_checking_assert (!func_sums
);
2510 gcc_checking_assert (!call_sums
);
2512 = (new (ggc_alloc_no_dtor
<ipa_sra_function_summaries
> ())
2513 ipa_sra_function_summaries (symtab
, true));
2514 call_sums
= new ipa_sra_call_summaries (symtab
);
2516 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node
)
2517 ipa_sra_summarize_function (node
);
2521 /* Write intraprocedural analysis information about E and all of its outgoing
2522 edges into a stream for LTO WPA. */
2525 isra_write_edge_summary (output_block
*ob
, cgraph_edge
*e
)
2527 isra_call_summary
*csum
= call_sums
->get (e
);
2528 unsigned input_count
= csum
->m_arg_flow
.length ();
2529 streamer_write_uhwi (ob
, input_count
);
2530 for (unsigned i
= 0; i
< input_count
; i
++)
2532 isra_param_flow
*ipf
= &csum
->m_arg_flow
[i
];
2533 streamer_write_hwi (ob
, ipf
->length
);
2534 bitpack_d bp
= bitpack_create (ob
->main_stream
);
2535 for (int j
= 0; j
< ipf
->length
; j
++)
2536 bp_pack_value (&bp
, ipf
->inputs
[j
], 8);
2537 bp_pack_value (&bp
, ipf
->aggregate_pass_through
, 1);
2538 bp_pack_value (&bp
, ipf
->pointer_pass_through
, 1);
2539 bp_pack_value (&bp
, ipf
->safe_to_import_accesses
, 1);
2540 streamer_write_bitpack (&bp
);
2541 streamer_write_uhwi (ob
, ipf
->unit_offset
);
2542 streamer_write_uhwi (ob
, ipf
->unit_size
);
2544 bitpack_d bp
= bitpack_create (ob
->main_stream
);
2545 bp_pack_value (&bp
, csum
->m_return_ignored
, 1);
2546 bp_pack_value (&bp
, csum
->m_return_returned
, 1);
2547 bp_pack_value (&bp
, csum
->m_bit_aligned_arg
, 1);
2548 streamer_write_bitpack (&bp
);
2551 /* Write intraprocedural analysis information about NODE and all of its outgoing
2552 edges into a stream for LTO WPA. */
2555 isra_write_node_summary (output_block
*ob
, cgraph_node
*node
)
2557 isra_func_summary
*ifs
= func_sums
->get (node
);
2558 lto_symtab_encoder_t encoder
= ob
->decl_state
->symtab_node_encoder
;
2559 int node_ref
= lto_symtab_encoder_encode (encoder
, node
);
2560 streamer_write_uhwi (ob
, node_ref
);
2562 unsigned param_desc_count
= vec_safe_length (ifs
->m_parameters
);
2563 streamer_write_uhwi (ob
, param_desc_count
);
2564 for (unsigned i
= 0; i
< param_desc_count
; i
++)
2566 isra_param_desc
*desc
= &(*ifs
->m_parameters
)[i
];
2567 unsigned access_count
= vec_safe_length (desc
->accesses
);
2568 streamer_write_uhwi (ob
, access_count
);
2569 for (unsigned j
= 0; j
< access_count
; j
++)
2571 param_access
*acc
= (*desc
->accesses
)[j
];
2572 stream_write_tree (ob
, acc
->type
, true);
2573 stream_write_tree (ob
, acc
->alias_ptr_type
, true);
2574 streamer_write_uhwi (ob
, acc
->unit_offset
);
2575 streamer_write_uhwi (ob
, acc
->unit_size
);
2576 bitpack_d bp
= bitpack_create (ob
->main_stream
);
2577 bp_pack_value (&bp
, acc
->certain
, 1);
2578 streamer_write_bitpack (&bp
);
2580 streamer_write_uhwi (ob
, desc
->param_size_limit
);
2581 streamer_write_uhwi (ob
, desc
->size_reached
);
2582 bitpack_d bp
= bitpack_create (ob
->main_stream
);
2583 bp_pack_value (&bp
, desc
->locally_unused
, 1);
2584 bp_pack_value (&bp
, desc
->split_candidate
, 1);
2585 bp_pack_value (&bp
, desc
->by_ref
, 1);
2586 streamer_write_bitpack (&bp
);
2588 bitpack_d bp
= bitpack_create (ob
->main_stream
);
2589 bp_pack_value (&bp
, ifs
->m_candidate
, 1);
2590 bp_pack_value (&bp
, ifs
->m_returns_value
, 1);
2591 bp_pack_value (&bp
, ifs
->m_return_ignored
, 1);
2592 gcc_assert (!ifs
->m_queued
);
2593 streamer_write_bitpack (&bp
);
2595 for (cgraph_edge
*e
= node
->callees
; e
; e
= e
->next_callee
)
2596 isra_write_edge_summary (ob
, e
);
2597 for (cgraph_edge
*e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
2598 isra_write_edge_summary (ob
, e
);
2601 /* Write intraprocedural analysis information into a stream for LTO WPA. */
2604 ipa_sra_write_summary (void)
2606 if (!func_sums
|| !call_sums
)
2609 struct output_block
*ob
= create_output_block (LTO_section_ipa_sra
);
2610 lto_symtab_encoder_t encoder
= ob
->decl_state
->symtab_node_encoder
;
2613 unsigned int count
= 0;
2614 lto_symtab_encoder_iterator lsei
;
2615 for (lsei
= lsei_start_function_in_partition (encoder
);
2617 lsei_next_function_in_partition (&lsei
))
2619 cgraph_node
*node
= lsei_cgraph_node (lsei
);
2620 if (node
->has_gimple_body_p ()
2621 && func_sums
->get (node
) != NULL
)
2624 streamer_write_uhwi (ob
, count
);
2626 /* Process all of the functions. */
2627 for (lsei
= lsei_start_function_in_partition (encoder
); !lsei_end_p (lsei
);
2628 lsei_next_function_in_partition (&lsei
))
2630 cgraph_node
*node
= lsei_cgraph_node (lsei
);
2631 if (node
->has_gimple_body_p ()
2632 && func_sums
->get (node
) != NULL
)
2633 isra_write_node_summary (ob
, node
);
2635 streamer_write_char_stream (ob
->main_stream
, 0);
2636 produce_asm (ob
, NULL
);
2637 destroy_output_block (ob
);
2640 /* Read intraprocedural analysis information about E and all of its outgoing
2641 edges into a stream for LTO WPA. */
2644 isra_read_edge_summary (struct lto_input_block
*ib
, cgraph_edge
*cs
)
2646 isra_call_summary
*csum
= call_sums
->get_create (cs
);
2647 unsigned input_count
= streamer_read_uhwi (ib
);
2648 csum
->init_inputs (input_count
);
2649 for (unsigned i
= 0; i
< input_count
; i
++)
2651 isra_param_flow
*ipf
= &csum
->m_arg_flow
[i
];
2652 ipf
->length
= streamer_read_hwi (ib
);
2653 bitpack_d bp
= streamer_read_bitpack (ib
);
2654 for (int j
= 0; j
< ipf
->length
; j
++)
2655 ipf
->inputs
[j
] = bp_unpack_value (&bp
, 8);
2656 ipf
->aggregate_pass_through
= bp_unpack_value (&bp
, 1);
2657 ipf
->pointer_pass_through
= bp_unpack_value (&bp
, 1);
2658 ipf
->safe_to_import_accesses
= bp_unpack_value (&bp
, 1);
2659 ipf
->unit_offset
= streamer_read_uhwi (ib
);
2660 ipf
->unit_size
= streamer_read_uhwi (ib
);
2662 bitpack_d bp
= streamer_read_bitpack (ib
);
2663 csum
->m_return_ignored
= bp_unpack_value (&bp
, 1);
2664 csum
->m_return_returned
= bp_unpack_value (&bp
, 1);
2665 csum
->m_bit_aligned_arg
= bp_unpack_value (&bp
, 1);
2668 /* Read intraprocedural analysis information about NODE and all of its outgoing
2669 edges into a stream for LTO WPA. */
2672 isra_read_node_info (struct lto_input_block
*ib
, cgraph_node
*node
,
2673 struct data_in
*data_in
)
2675 isra_func_summary
*ifs
= func_sums
->get_create (node
);
2676 unsigned param_desc_count
= streamer_read_uhwi (ib
);
2677 if (param_desc_count
> 0)
2679 vec_safe_reserve_exact (ifs
->m_parameters
, param_desc_count
);
2680 ifs
->m_parameters
->quick_grow_cleared (param_desc_count
);
2682 for (unsigned i
= 0; i
< param_desc_count
; i
++)
2684 isra_param_desc
*desc
= &(*ifs
->m_parameters
)[i
];
2685 unsigned access_count
= streamer_read_uhwi (ib
);
2686 for (unsigned j
= 0; j
< access_count
; j
++)
2688 param_access
*acc
= ggc_cleared_alloc
<param_access
> ();
2689 acc
->type
= stream_read_tree (ib
, data_in
);
2690 acc
->alias_ptr_type
= stream_read_tree (ib
, data_in
);
2691 acc
->unit_offset
= streamer_read_uhwi (ib
);
2692 acc
->unit_size
= streamer_read_uhwi (ib
);
2693 bitpack_d bp
= streamer_read_bitpack (ib
);
2694 acc
->certain
= bp_unpack_value (&bp
, 1);
2695 vec_safe_push (desc
->accesses
, acc
);
2697 desc
->param_size_limit
= streamer_read_uhwi (ib
);
2698 desc
->size_reached
= streamer_read_uhwi (ib
);
2699 bitpack_d bp
= streamer_read_bitpack (ib
);
2700 desc
->locally_unused
= bp_unpack_value (&bp
, 1);
2701 desc
->split_candidate
= bp_unpack_value (&bp
, 1);
2702 desc
->by_ref
= bp_unpack_value (&bp
, 1);
2704 bitpack_d bp
= streamer_read_bitpack (ib
);
2705 ifs
->m_candidate
= bp_unpack_value (&bp
, 1);
2706 ifs
->m_returns_value
= bp_unpack_value (&bp
, 1);
2707 ifs
->m_return_ignored
= bp_unpack_value (&bp
, 1);
2710 for (cgraph_edge
*e
= node
->callees
; e
; e
= e
->next_callee
)
2711 isra_read_edge_summary (ib
, e
);
2712 for (cgraph_edge
*e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
2713 isra_read_edge_summary (ib
, e
);
2716 /* Read IPA-SRA summaries from a section in file FILE_DATA of length LEN with
2717 data DATA. TODO: This function was copied almost verbatim from ipa-prop.c,
2718 it should be possible to unify them somehow. */
2721 isra_read_summary_section (struct lto_file_decl_data
*file_data
,
2722 const char *data
, size_t len
)
2724 const struct lto_function_header
*header
=
2725 (const struct lto_function_header
*) data
;
2726 const int cfg_offset
= sizeof (struct lto_function_header
);
2727 const int main_offset
= cfg_offset
+ header
->cfg_size
;
2728 const int string_offset
= main_offset
+ header
->main_size
;
2729 struct data_in
*data_in
;
2733 lto_input_block
ib_main ((const char *) data
+ main_offset
,
2734 header
->main_size
, file_data
->mode_table
);
2737 lto_data_in_create (file_data
, (const char *) data
+ string_offset
,
2738 header
->string_size
, vNULL
);
2739 count
= streamer_read_uhwi (&ib_main
);
2741 for (i
= 0; i
< count
; i
++)
2744 struct cgraph_node
*node
;
2745 lto_symtab_encoder_t encoder
;
2747 index
= streamer_read_uhwi (&ib_main
);
2748 encoder
= file_data
->symtab_node_encoder
;
2749 node
= dyn_cast
<cgraph_node
*> (lto_symtab_encoder_deref (encoder
,
2751 gcc_assert (node
->definition
);
2752 isra_read_node_info (&ib_main
, node
, data_in
);
2754 lto_free_section_data (file_data
, LTO_section_ipa_sra
, NULL
, data
,
2756 lto_data_in_delete (data_in
);
2759 /* Read intraprocedural analysis information into a stream for LTO WPA. */
2762 ipa_sra_read_summary (void)
2764 struct lto_file_decl_data
**file_data_vec
= lto_get_file_decl_data ();
2765 struct lto_file_decl_data
*file_data
;
2768 gcc_checking_assert (!func_sums
);
2769 gcc_checking_assert (!call_sums
);
2771 = (new (ggc_alloc_no_dtor
<ipa_sra_function_summaries
> ())
2772 ipa_sra_function_summaries (symtab
, true));
2773 call_sums
= new ipa_sra_call_summaries (symtab
);
2775 while ((file_data
= file_data_vec
[j
++]))
2779 = lto_get_summary_section_data (file_data
, LTO_section_ipa_sra
, &len
);
2781 isra_read_summary_section (file_data
, data
, len
);
2785 /* Dump all IPA-SRA summary data for all cgraph nodes and edges to file F. */
2788 ipa_sra_dump_all_summaries (FILE *f
)
2791 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node
)
2793 fprintf (f
, "\nSummary for node %s:\n", node
->dump_name ());
2795 isra_func_summary
*ifs
= func_sums
->get (node
);
2798 fprintf (f
, " Function does not have any associated IPA-SRA "
2802 if (!ifs
->m_candidate
)
2804 fprintf (f
, " Not a candidate function\n");
2807 if (ifs
->m_returns_value
)
2808 fprintf (f
, " Returns value\n");
2809 if (vec_safe_is_empty (ifs
->m_parameters
))
2810 fprintf (f
, " No parameter information. \n");
2812 for (unsigned i
= 0; i
< ifs
->m_parameters
->length (); ++i
)
2814 fprintf (f
, " Descriptor for parameter %i:\n", i
);
2815 dump_isra_param_descriptor (f
, &(*ifs
->m_parameters
)[i
]);
2819 struct cgraph_edge
*cs
;
2820 for (cs
= node
->callees
; cs
; cs
= cs
->next_callee
)
2822 fprintf (f
, " Summary for edge %s->%s:\n", cs
->caller
->dump_name (),
2823 cs
->callee
->dump_name ());
2824 isra_call_summary
*csum
= call_sums
->get (cs
);
2828 fprintf (f
, " Call summary is MISSING!\n");
2832 fprintf (f
, "\n\n");
2835 /* Perform function-scope viability tests that can be only made at IPA level
2836 and return false if the function is deemed unsuitable for IPA-SRA. */
2839 ipa_sra_ipa_function_checks (cgraph_node
*node
)
2841 if (!node
->can_be_local_p ())
2844 fprintf (dump_file
, "Function %s disqualified because it cannot be "
2845 "made local.\n", node
->dump_name ());
2848 if (!node
->can_change_signature
)
2851 fprintf (dump_file
, "Function can not change signature.\n");
2858 /* Issues found out by check_callers_for_issues. */
2860 struct caller_issues
2862 /* The candidate being considered. */
2863 cgraph_node
*candidate
;
2864 /* There is a thunk among callers. */
2866 /* Call site with no available information. */
2867 bool unknown_callsite
;
2868 /* Call from outside the the candidate's comdat group. */
2869 bool call_from_outside_comdat
;
2870 /* There is a bit-aligned load into one of non-gimple-typed arguments. */
2871 bool bit_aligned_aggregate_argument
;
2874 /* Worker for call_for_symbol_and_aliases, set any flags of passed caller_issues
2878 check_for_caller_issues (struct cgraph_node
*node
, void *data
)
2880 struct caller_issues
*issues
= (struct caller_issues
*) data
;
2882 for (cgraph_edge
*cs
= node
->callers
; cs
; cs
= cs
->next_caller
)
2884 if (cs
->caller
->thunk
)
2886 issues
->thunk
= true;
2887 /* TODO: We should be able to process at least some types of
2891 if (issues
->candidate
->calls_comdat_local
2892 && issues
->candidate
->same_comdat_group
2893 && !issues
->candidate
->in_same_comdat_group_p (cs
->caller
))
2895 issues
->call_from_outside_comdat
= true;
2899 isra_call_summary
*csum
= call_sums
->get (cs
);
2902 issues
->unknown_callsite
= true;
2906 if (csum
->m_bit_aligned_arg
)
2907 issues
->bit_aligned_aggregate_argument
= true;
2912 /* Look at all incoming edges to NODE, including aliases and thunks and look
2913 for problems. Return true if NODE type should not be modified at all. */
2916 check_all_callers_for_issues (cgraph_node
*node
)
2918 struct caller_issues issues
;
2919 memset (&issues
, 0, sizeof (issues
));
2920 issues
.candidate
= node
;
2922 node
->call_for_symbol_and_aliases (check_for_caller_issues
, &issues
, true);
2923 if (issues
.unknown_callsite
)
2925 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2926 fprintf (dump_file
, "A call of %s has not been analyzed. Disabling "
2927 "all modifications.\n", node
->dump_name ());
2930 /* TODO: We should be able to process at least some types of thunks. */
2933 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2934 fprintf (dump_file
, "A call of %s is through thunk, which are not"
2935 " handled yet. Disabling all modifications.\n",
2936 node
->dump_name ());
2939 if (issues
.call_from_outside_comdat
)
2942 fprintf (dump_file
, "Function would become private comdat called "
2943 "outside of its comdat group.\n");
2947 if (issues
.bit_aligned_aggregate_argument
)
2949 /* Let's only remove parameters/return values from such functions.
2950 TODO: We could only prevent splitting the problematic parameters if
2951 anybody thinks it is worth it. */
2952 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2953 fprintf (dump_file
, "A call of %s has bit-aligned aggregate argument,"
2954 " disabling parameter splitting.\n", node
->dump_name ());
2956 isra_func_summary
*ifs
= func_sums
->get (node
);
2957 gcc_checking_assert (ifs
);
2958 unsigned param_count
= vec_safe_length (ifs
->m_parameters
);
2959 for (unsigned i
= 0; i
< param_count
; i
++)
2960 (*ifs
->m_parameters
)[i
].split_candidate
= false;
2965 /* Find the access with corresponding OFFSET and SIZE among accesses in
2966 PARAM_DESC and return it or NULL if such an access is not there. */
2968 static param_access
*
2969 find_param_access (isra_param_desc
*param_desc
, unsigned offset
, unsigned size
)
2971 unsigned pclen
= vec_safe_length (param_desc
->accesses
);
2973 /* The search is linear but the number of stored accesses is bound by
2974 PARAM_IPA_SRA_MAX_REPLACEMENTS, so most probably 8. */
2976 for (unsigned i
= 0; i
< pclen
; i
++)
2977 if ((*param_desc
->accesses
)[i
]->unit_offset
== offset
2978 && (*param_desc
->accesses
)[i
]->unit_size
== size
)
2979 return (*param_desc
->accesses
)[i
];
2984 /* Return iff the total size of definite replacement SIZE would violate the
2985 limit set for it in PARAM. */
2988 size_would_violate_limit_p (isra_param_desc
*desc
, unsigned size
)
2990 unsigned limit
= desc
->param_size_limit
;
2992 || (!desc
->by_ref
&& size
== limit
))
2997 /* Increase reached size of DESC by SIZE or disqualify it if it would violate
2998 the set limit. IDX is the parameter number which is dumped when
3002 bump_reached_size (isra_param_desc
*desc
, unsigned size
, unsigned idx
)
3004 unsigned after
= desc
->size_reached
+ size
;
3005 if (size_would_violate_limit_p (desc
, after
))
3007 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3008 fprintf (dump_file
, " ...size limit reached, disqualifying "
3009 "candidate parameter %u\n", idx
);
3010 desc
->split_candidate
= false;
3013 desc
->size_reached
= after
;
3016 /* Take all actions required to deal with an edge CS that represents a call to
3017 an unknown or un-analyzed function, for both parameter removal and
3021 process_edge_to_unknown_caller (cgraph_edge
*cs
)
3023 isra_func_summary
*from_ifs
= func_sums
->get (cs
->caller
);
3024 gcc_checking_assert (from_ifs
);
3025 isra_call_summary
*csum
= call_sums
->get (cs
);
3027 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3028 fprintf (dump_file
, "Processing an edge to an unknown caller from %s:\n",
3029 cs
->caller
->dump_name ());
3031 unsigned args_count
= csum
->m_arg_flow
.length ();
3032 for (unsigned i
= 0; i
< args_count
; i
++)
3034 isra_param_flow
*ipf
= &csum
->m_arg_flow
[i
];
3036 if (ipf
->pointer_pass_through
)
3038 isra_param_desc
*param_desc
3039 = &(*from_ifs
->m_parameters
)[get_single_param_flow_source (ipf
)];
3040 param_desc
->locally_unused
= false;
3041 param_desc
->split_candidate
= false;
3044 if (ipf
->aggregate_pass_through
)
3046 unsigned idx
= get_single_param_flow_source (ipf
);
3047 isra_param_desc
*param_desc
= &(*from_ifs
->m_parameters
)[idx
];
3049 param_desc
->locally_unused
= false;
3050 if (!param_desc
->split_candidate
)
3052 gcc_assert (!param_desc
->by_ref
);
3053 param_access
*pacc
= find_param_access (param_desc
, ipf
->unit_offset
,
3055 gcc_checking_assert (pacc
);
3056 pacc
->certain
= true;
3057 if (overlapping_certain_accesses_p (param_desc
, NULL
))
3059 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3060 fprintf (dump_file
, " ...leading to overlap, "
3061 " disqualifying candidate parameter %u\n",
3063 param_desc
->split_candidate
= false;
3066 bump_reached_size (param_desc
, pacc
->unit_size
, idx
);
3067 ipf
->aggregate_pass_through
= false;
3071 for (int j
= 0; j
< ipf
->length
; j
++)
3073 int input_idx
= ipf
->inputs
[j
];
3074 (*from_ifs
->m_parameters
)[input_idx
].locally_unused
= false;
3079 /* Propagate parameter removal information through cross-SCC edge CS,
3080 i.e. decrease the use count in the caller parameter descriptor for each use
3084 param_removal_cross_scc_edge (cgraph_edge
*cs
)
3086 enum availability availability
;
3087 cgraph_node
*callee
= cs
->callee
->function_symbol (&availability
);
3088 isra_func_summary
*to_ifs
= func_sums
->get (callee
);
3089 if (!to_ifs
|| !to_ifs
->m_candidate
3090 || (availability
< AVAIL_AVAILABLE
)
3091 || vec_safe_is_empty (to_ifs
->m_parameters
))
3093 process_edge_to_unknown_caller (cs
);
3096 isra_func_summary
*from_ifs
= func_sums
->get (cs
->caller
);
3097 gcc_checking_assert (from_ifs
);
3099 isra_call_summary
*csum
= call_sums
->get (cs
);
3100 unsigned args_count
= csum
->m_arg_flow
.length ();
3101 unsigned param_count
= vec_safe_length (to_ifs
->m_parameters
);
3103 for (unsigned i
= 0; i
< args_count
; i
++)
3105 bool unused_in_callee
;
3106 if (i
< param_count
)
3107 unused_in_callee
= (*to_ifs
->m_parameters
)[i
].locally_unused
;
3109 unused_in_callee
= false;
3111 if (!unused_in_callee
)
3113 isra_param_flow
*ipf
= &csum
->m_arg_flow
[i
];
3114 for (int j
= 0; j
< ipf
->length
; j
++)
3116 int input_idx
= ipf
->inputs
[j
];
3117 (*from_ifs
->m_parameters
)[input_idx
].locally_unused
= false;
3123 /* Unless it is already there, push NODE which is also described by IFS to
3127 isra_push_node_to_stack (cgraph_node
*node
, isra_func_summary
*ifs
,
3128 vec
<cgraph_node
*> *stack
)
3132 ifs
->m_queued
= true;
3133 stack
->safe_push (node
);
3137 /* If parameter with index INPUT_IDX is marked as locally unused, mark it as
3138 used and push CALLER on STACK. */
3141 isra_mark_caller_param_used (isra_func_summary
*from_ifs
, int input_idx
,
3142 cgraph_node
*caller
, vec
<cgraph_node
*> *stack
)
3144 if ((*from_ifs
->m_parameters
)[input_idx
].locally_unused
)
3146 (*from_ifs
->m_parameters
)[input_idx
].locally_unused
= false;
3147 isra_push_node_to_stack (caller
, from_ifs
, stack
);
3152 /* Propagate information that any parameter is not used only locally within a
3153 SCC across CS to the caller, which must be in the same SCC as the
3154 callee. Push any callers that need to be re-processed to STACK. */
3157 propagate_used_across_scc_edge (cgraph_edge
*cs
, vec
<cgraph_node
*> *stack
)
3159 isra_func_summary
*from_ifs
= func_sums
->get (cs
->caller
);
3160 if (!from_ifs
|| vec_safe_is_empty (from_ifs
->m_parameters
))
3163 isra_call_summary
*csum
= call_sums
->get (cs
);
3164 gcc_checking_assert (csum
);
3165 unsigned args_count
= csum
->m_arg_flow
.length ();
3166 enum availability availability
;
3167 cgraph_node
*callee
= cs
->callee
->function_symbol (&availability
);
3168 isra_func_summary
*to_ifs
= func_sums
->get (callee
);
3170 unsigned param_count
3171 = (to_ifs
&& (availability
>= AVAIL_AVAILABLE
))
3172 ? vec_safe_length (to_ifs
->m_parameters
) : 0;
3173 for (unsigned i
= 0; i
< args_count
; i
++)
3176 && (*to_ifs
->m_parameters
)[i
].locally_unused
)
3179 /* The argument is needed in the callee it, we must mark the parameter as
3180 used also in the caller and its callers within this SCC. */
3181 isra_param_flow
*ipf
= &csum
->m_arg_flow
[i
];
3182 for (int j
= 0; j
< ipf
->length
; j
++)
3184 int input_idx
= ipf
->inputs
[j
];
3185 isra_mark_caller_param_used (from_ifs
, input_idx
, cs
->caller
, stack
);
3190 /* Propagate information that any parameter is not used only locally within a
3191 SCC (i.e. is used also elsewhere) to all callers of NODE that are in the
3192 same SCC. Push any callers that need to be re-processed to STACK. */
3195 propagate_used_to_scc_callers (cgraph_node
*node
, void *data
)
3197 vec
<cgraph_node
*> *stack
= (vec
<cgraph_node
*> *) data
;
3199 for (cs
= node
->callers
; cs
; cs
= cs
->next_caller
)
3200 if (ipa_edge_within_scc (cs
))
3201 propagate_used_across_scc_edge (cs
, stack
);
3205 /* Return true iff all certain accesses in ARG_DESC are also present as
3206 certain accesses in PARAM_DESC. */
3209 all_callee_accesses_present_p (isra_param_desc
*param_desc
,
3210 isra_param_desc
*arg_desc
)
3212 unsigned aclen
= vec_safe_length (arg_desc
->accesses
);
3213 for (unsigned j
= 0; j
< aclen
; j
++)
3215 param_access
*argacc
= (*arg_desc
->accesses
)[j
];
3216 if (!argacc
->certain
)
3218 param_access
*pacc
= find_param_access (param_desc
, argacc
->unit_offset
,
3222 || !types_compatible_p (argacc
->type
, pacc
->type
))
3228 /* Type internal to function pull_accesses_from_callee. Unfortunately gcc 4.8
3229 does not allow instantiating an auto_vec with a type defined within a
3230 function so it is a global type. */
3231 enum acc_prop_kind
{ACC_PROP_DONT
, ACC_PROP_COPY
, ACC_PROP_CERTAIN
};
3234 /* Attempt to propagate all definite accesses from ARG_DESC to PARAM_DESC,
3235 (which belongs to CALLER) if they would not violate some constraint there.
3236 If successful, return NULL, otherwise return the string reason for failure
3237 (which can be written to the dump file). DELTA_OFFSET is the known offset
3238 of the actual argument withing the formal parameter (so of ARG_DESCS within
3239 PARAM_DESCS), ARG_SIZE is the size of the actual argument or zero, if not
3240 known. In case of success, set *CHANGE_P to true if propagation actually
3241 changed anything. */
3244 pull_accesses_from_callee (cgraph_node
*caller
, isra_param_desc
*param_desc
,
3245 isra_param_desc
*arg_desc
,
3246 unsigned delta_offset
, unsigned arg_size
,
3249 unsigned pclen
= vec_safe_length (param_desc
->accesses
);
3250 unsigned aclen
= vec_safe_length (arg_desc
->accesses
);
3251 unsigned prop_count
= 0;
3252 unsigned prop_size
= 0;
3253 bool change
= false;
3255 auto_vec
<enum acc_prop_kind
, 8> prop_kinds (aclen
);
3256 for (unsigned j
= 0; j
< aclen
; j
++)
3258 param_access
*argacc
= (*arg_desc
->accesses
)[j
];
3259 prop_kinds
.safe_push (ACC_PROP_DONT
);
3262 && argacc
->unit_offset
+ argacc
->unit_size
> arg_size
)
3263 return "callee access outsize size boundary";
3265 if (!argacc
->certain
)
3268 unsigned offset
= argacc
->unit_offset
+ delta_offset
;
3269 /* Given that accesses are initially stored according to increasing
3270 offset and decreasing size in case of equal offsets, the following
3271 searches could be written more efficiently if we kept the ordering
3272 when copying. But the number of accesses is capped at
3273 PARAM_IPA_SRA_MAX_REPLACEMENTS (so most likely 8) and the code gets
3274 messy quickly, so let's improve on that only if necessary. */
3276 bool exact_match
= false;
3277 for (unsigned i
= 0; i
< pclen
; i
++)
3279 /* Check for overlaps. */
3280 param_access
*pacc
= (*param_desc
->accesses
)[i
];
3281 if (pacc
->unit_offset
== offset
3282 && pacc
->unit_size
== argacc
->unit_size
)
3284 if (argacc
->alias_ptr_type
!= pacc
->alias_ptr_type
3285 || !types_compatible_p (argacc
->type
, pacc
->type
))
3286 return "propagated access types would not match existing ones";
3291 prop_kinds
[j
] = ACC_PROP_CERTAIN
;
3292 prop_size
+= argacc
->unit_size
;
3298 if (offset
< pacc
->unit_offset
+ pacc
->unit_size
3299 && offset
+ argacc
->unit_size
> pacc
->unit_offset
)
3301 /* None permissible with load accesses, possible to fit into
3304 || offset
< pacc
->unit_offset
3305 || (offset
+ argacc
->unit_size
3306 > pacc
->unit_offset
+ pacc
->unit_size
))
3307 return "a propagated access would conflict in caller";
3313 prop_kinds
[j
] = ACC_PROP_COPY
;
3315 prop_size
+= argacc
->unit_size
;
3323 if ((prop_count
+ pclen
3324 > (unsigned) opt_for_fn (caller
->decl
, param_ipa_sra_max_replacements
))
3325 || size_would_violate_limit_p (param_desc
,
3326 param_desc
->size_reached
+ prop_size
))
3327 return "propagating accesses would violate the count or size limit";
3330 for (unsigned j
= 0; j
< aclen
; j
++)
3332 if (prop_kinds
[j
] == ACC_PROP_COPY
)
3334 param_access
*argacc
= (*arg_desc
->accesses
)[j
];
3336 param_access
*copy
= ggc_cleared_alloc
<param_access
> ();
3337 copy
->unit_offset
= argacc
->unit_offset
+ delta_offset
;
3338 copy
->unit_size
= argacc
->unit_size
;
3339 copy
->type
= argacc
->type
;
3340 copy
->alias_ptr_type
= argacc
->alias_ptr_type
;
3341 copy
->certain
= true;
3342 vec_safe_push (param_desc
->accesses
, copy
);
3344 else if (prop_kinds
[j
] == ACC_PROP_CERTAIN
)
3346 param_access
*argacc
= (*arg_desc
->accesses
)[j
];
3348 = find_param_access (param_desc
, argacc
->unit_offset
+ delta_offset
,
3350 csp
->certain
= true;
3354 param_desc
->size_reached
+= prop_size
;
3359 /* Propagate parameter splitting information through call graph edge CS.
3360 Return true if any changes that might need to be propagated within SCCs have
3361 been made. The function also clears the aggregate_pass_through and
3362 pointer_pass_through in call summaries which do not need to be processed
3363 again if this CS is revisited when iterating while changes are propagated
3367 param_splitting_across_edge (cgraph_edge
*cs
)
3370 bool cross_scc
= !ipa_edge_within_scc (cs
);
3371 enum availability availability
;
3372 cgraph_node
*callee
= cs
->callee
->function_symbol (&availability
);
3373 isra_func_summary
*from_ifs
= func_sums
->get (cs
->caller
);
3374 gcc_checking_assert (from_ifs
&& from_ifs
->m_parameters
);
3376 isra_call_summary
*csum
= call_sums
->get (cs
);
3377 gcc_checking_assert (csum
);
3378 unsigned args_count
= csum
->m_arg_flow
.length ();
3379 isra_func_summary
*to_ifs
= func_sums
->get (callee
);
3380 unsigned param_count
3381 = ((to_ifs
&& to_ifs
->m_candidate
&& (availability
>= AVAIL_AVAILABLE
))
3382 ? vec_safe_length (to_ifs
->m_parameters
)
3385 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3386 fprintf (dump_file
, "Splitting across %s->%s:\n",
3387 cs
->caller
->dump_name (), callee
->dump_name ());
3390 for (i
= 0; (i
< args_count
) && (i
< param_count
); i
++)
3392 isra_param_desc
*arg_desc
= &(*to_ifs
->m_parameters
)[i
];
3393 isra_param_flow
*ipf
= &csum
->m_arg_flow
[i
];
3395 if (arg_desc
->locally_unused
)
3397 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3398 fprintf (dump_file
, " ->%u: unused in callee\n", i
);
3399 ipf
->pointer_pass_through
= false;
3403 if (ipf
->pointer_pass_through
)
3405 int idx
= get_single_param_flow_source (ipf
);
3406 isra_param_desc
*param_desc
= &(*from_ifs
->m_parameters
)[idx
];
3407 if (!param_desc
->split_candidate
)
3409 gcc_assert (param_desc
->by_ref
);
3411 if (!arg_desc
->split_candidate
|| !arg_desc
->by_ref
)
3413 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3414 fprintf (dump_file
, " %u->%u: not candidate or not by "
3415 "reference in callee\n", idx
, i
);
3416 param_desc
->split_candidate
= false;
3417 ipf
->pointer_pass_through
= false;
3420 else if (!ipf
->safe_to_import_accesses
)
3422 if (!all_callee_accesses_present_p (param_desc
, arg_desc
))
3424 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3425 fprintf (dump_file
, " %u->%u: cannot import accesses.\n",
3427 param_desc
->split_candidate
= false;
3428 ipf
->pointer_pass_through
= false;
3434 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3435 fprintf (dump_file
, " %u->%u: verified callee accesses "
3436 "present.\n", idx
, i
);
3438 ipf
->pointer_pass_through
= false;
3443 const char *pull_failure
3444 = pull_accesses_from_callee (cs
->caller
, param_desc
, arg_desc
,
3448 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3449 fprintf (dump_file
, " %u->%u: by_ref access pull "
3450 "failed: %s.\n", idx
, i
, pull_failure
);
3451 param_desc
->split_candidate
= false;
3452 ipf
->pointer_pass_through
= false;
3457 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3458 fprintf (dump_file
, " %u->%u: by_ref access pull "
3459 "succeeded.\n", idx
, i
);
3461 ipf
->pointer_pass_through
= false;
3465 else if (ipf
->aggregate_pass_through
)
3467 int idx
= get_single_param_flow_source (ipf
);
3468 isra_param_desc
*param_desc
= &(*from_ifs
->m_parameters
)[idx
];
3469 if (!param_desc
->split_candidate
)
3471 gcc_assert (!param_desc
->by_ref
);
3472 param_access
*pacc
= find_param_access (param_desc
, ipf
->unit_offset
,
3474 gcc_checking_assert (pacc
);
3478 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3479 fprintf (dump_file
, " %u->%u: already certain\n", idx
, i
);
3480 ipf
->aggregate_pass_through
= false;
3482 else if (!arg_desc
->split_candidate
|| arg_desc
->by_ref
)
3484 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3485 fprintf (dump_file
, " %u->%u: not candidate or by "
3486 "reference in callee\n", idx
, i
);
3488 pacc
->certain
= true;
3489 if (overlapping_certain_accesses_p (param_desc
, NULL
))
3491 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3492 fprintf (dump_file
, " ...leading to overlap, "
3493 " disqualifying candidate parameter %u\n",
3495 param_desc
->split_candidate
= false;
3498 bump_reached_size (param_desc
, pacc
->unit_size
, idx
);
3500 ipf
->aggregate_pass_through
= false;
3505 const char *pull_failure
3506 = pull_accesses_from_callee (cs
->caller
, param_desc
, arg_desc
,
3508 ipf
->unit_size
, &res
);
3511 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3512 fprintf (dump_file
, " %u->%u: arg access pull "
3513 "failed: %s.\n", idx
, i
, pull_failure
);
3515 ipf
->aggregate_pass_through
= false;
3516 pacc
->certain
= true;
3518 if (overlapping_certain_accesses_p (param_desc
, NULL
))
3520 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3521 fprintf (dump_file
, " ...leading to overlap, "
3522 " disqualifying candidate parameter %u\n",
3524 param_desc
->split_candidate
= false;
3527 bump_reached_size (param_desc
, pacc
->unit_size
, idx
);
3533 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3534 fprintf (dump_file
, " %u->%u: arg access pull "
3535 "succeeded.\n", idx
, i
);
3537 ipf
->aggregate_pass_through
= false;
3543 /* Handle argument-parameter count mismatches. */
3544 for (; (i
< args_count
); i
++)
3546 isra_param_flow
*ipf
= &csum
->m_arg_flow
[i
];
3548 if (ipf
->pointer_pass_through
|| ipf
->aggregate_pass_through
)
3550 int idx
= get_single_param_flow_source (ipf
);
3551 ipf
->pointer_pass_through
= false;
3552 ipf
->aggregate_pass_through
= false;
3553 isra_param_desc
*param_desc
= &(*from_ifs
->m_parameters
)[idx
];
3554 if (!param_desc
->split_candidate
)
3557 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3558 fprintf (dump_file
, " %u->%u: no corresponding formal parameter\n",
3560 param_desc
->split_candidate
= false;
3567 /* Worker for call_for_symbol_and_aliases, look at all callers and if all their
3568 callers ignore the return value, or come from the same SCC and use the
3569 return value only to compute their return value, return false, otherwise
3573 retval_used_p (cgraph_node
*node
, void *)
3575 for (cgraph_edge
*cs
= node
->callers
; cs
; cs
= cs
->next_caller
)
3577 isra_call_summary
*csum
= call_sums
->get (cs
);
3578 gcc_checking_assert (csum
);
3579 if (csum
->m_return_ignored
)
3581 if (!csum
->m_return_returned
)
3584 isra_func_summary
*from_ifs
= func_sums
->get (cs
->caller
);
3585 if (!from_ifs
|| !from_ifs
->m_candidate
)
3588 if (!ipa_edge_within_scc (cs
)
3589 && !from_ifs
->m_return_ignored
)
3596 /* Push into NEW_PARAMS all required parameter adjustment entries to copy or
3597 modify parameter which originally had index BASE_INDEX, in the adjustment
3598 vector of parent clone (if any) had PREV_CLONE_INDEX and was described by
3599 PREV_ADJUSTMENT. If the parent clone is the original function,
3600 PREV_ADJUSTMENT is NULL and PREV_CLONE_INDEX is equal to BASE_INDEX. */
3604 push_param_adjustments_for_index (isra_func_summary
*ifs
, unsigned base_index
,
3605 unsigned prev_clone_index
,
3606 ipa_adjusted_param
*prev_adjustment
,
3607 vec
<ipa_adjusted_param
, va_gc
> **new_params
)
3609 isra_param_desc
*desc
= &(*ifs
->m_parameters
)[base_index
];
3610 if (desc
->locally_unused
)
3613 fprintf (dump_file
, " Will remove parameter %u\n", base_index
);
3617 if (!desc
->split_candidate
)
3619 ipa_adjusted_param adj
;
3620 if (prev_adjustment
)
3622 adj
= *prev_adjustment
;
3623 adj
.prev_clone_adjustment
= true;
3624 adj
.prev_clone_index
= prev_clone_index
;
3628 memset (&adj
, 0, sizeof (adj
));
3629 adj
.op
= IPA_PARAM_OP_COPY
;
3630 adj
.base_index
= base_index
;
3631 adj
.prev_clone_index
= prev_clone_index
;
3633 vec_safe_push ((*new_params
), adj
);
3638 fprintf (dump_file
, " Will split parameter %u\n", base_index
);
3640 gcc_assert (!prev_adjustment
|| prev_adjustment
->op
== IPA_PARAM_OP_COPY
);
3641 unsigned aclen
= vec_safe_length (desc
->accesses
);
3642 for (unsigned j
= 0; j
< aclen
; j
++)
3644 param_access
*pa
= (*desc
->accesses
)[j
];
3648 fprintf (dump_file
, " - component at byte offset %u, "
3649 "size %u\n", pa
->unit_offset
, pa
->unit_size
);
3651 ipa_adjusted_param adj
;
3652 memset (&adj
, 0, sizeof (adj
));
3653 adj
.op
= IPA_PARAM_OP_SPLIT
;
3654 adj
.base_index
= base_index
;
3655 adj
.prev_clone_index
= prev_clone_index
;
3656 adj
.param_prefix_index
= IPA_PARAM_PREFIX_ISRA
;
3657 adj
.reverse
= pa
->reverse
;
3658 adj
.type
= pa
->type
;
3659 adj
.alias_ptr_type
= pa
->alias_ptr_type
;
3660 adj
.unit_offset
= pa
->unit_offset
;
3661 vec_safe_push ((*new_params
), adj
);
3665 /* Worker for all call_for_symbol_thunks_and_aliases. Set calls_comdat_local
3666 flag of all callers of NODE. */
3669 mark_callers_calls_comdat_local (struct cgraph_node
*node
, void *)
3671 for (cgraph_edge
*cs
= node
->callers
; cs
; cs
= cs
->next_caller
)
3672 cs
->caller
->calls_comdat_local
= true;
3677 /* Do final processing of results of IPA propagation regarding NODE, clone it
3681 process_isra_node_results (cgraph_node
*node
,
3682 hash_map
<const char *, unsigned> *clone_num_suffixes
)
3684 isra_func_summary
*ifs
= func_sums
->get (node
);
3685 if (!ifs
|| !ifs
->m_candidate
)
3688 auto_vec
<bool, 16> surviving_params
;
3689 bool check_surviving
= false;
3690 clone_info
*cinfo
= clone_info::get (node
);
3691 if (cinfo
&& cinfo
->param_adjustments
)
3693 check_surviving
= true;
3694 cinfo
->param_adjustments
->get_surviving_params (&surviving_params
);
3697 unsigned param_count
= vec_safe_length (ifs
->m_parameters
);
3698 bool will_change_function
= false;
3699 if (ifs
->m_returns_value
&& ifs
->m_return_ignored
)
3700 will_change_function
= true;
3702 for (unsigned i
= 0; i
< param_count
; i
++)
3704 isra_param_desc
*desc
= &(*ifs
->m_parameters
)[i
];
3705 if ((desc
->locally_unused
|| desc
->split_candidate
)
3706 /* Make sure we do not clone just to attempt to remove an already
3707 removed unused argument. */
3708 && (!check_surviving
3709 || (i
< surviving_params
.length ()
3710 && surviving_params
[i
])))
3712 will_change_function
= true;
3716 if (!will_change_function
)
3721 fprintf (dump_file
, "\nEvaluating analysis results for %s\n",
3722 node
->dump_name ());
3723 if (ifs
->m_returns_value
&& ifs
->m_return_ignored
)
3724 fprintf (dump_file
, " Will remove return value.\n");
3727 vec
<ipa_adjusted_param
, va_gc
> *new_params
= NULL
;
3728 if (ipa_param_adjustments
*old_adjustments
3729 = cinfo
? cinfo
->param_adjustments
: NULL
)
3731 unsigned old_adj_len
= vec_safe_length (old_adjustments
->m_adj_params
);
3732 for (unsigned i
= 0; i
< old_adj_len
; i
++)
3734 ipa_adjusted_param
*old_adj
= &(*old_adjustments
->m_adj_params
)[i
];
3735 push_param_adjustments_for_index (ifs
, old_adj
->base_index
, i
,
3736 old_adj
, &new_params
);
3740 for (unsigned i
= 0; i
< param_count
; i
++)
3741 push_param_adjustments_for_index (ifs
, i
, i
, NULL
, &new_params
);
3743 ipa_param_adjustments
*new_adjustments
3744 = (new (ggc_alloc
<ipa_param_adjustments
> ())
3745 ipa_param_adjustments (new_params
, param_count
,
3746 ifs
->m_returns_value
&& ifs
->m_return_ignored
));
3748 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3750 fprintf (dump_file
, "\n Created adjustments:\n");
3751 new_adjustments
->dump (dump_file
);
3754 unsigned &suffix_counter
= clone_num_suffixes
->get_or_insert (
3755 IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (
3757 vec
<cgraph_edge
*> callers
= node
->collect_callers ();
3758 cgraph_node
*new_node
3759 = node
->create_virtual_clone (callers
, NULL
, new_adjustments
, "isra",
3762 if (node
->calls_comdat_local
&& node
->same_comdat_group
)
3764 new_node
->add_to_same_comdat_group (node
);
3765 new_node
->call_for_symbol_and_aliases (mark_callers_calls_comdat_local
,
3768 new_node
->calls_comdat_local
= node
->calls_comdat_local
;
3771 fprintf (dump_file
, " Created new node %s\n", new_node
->dump_name ());
3775 /* Check which parameters of NODE described by IFS have survived until IPA-SRA
3776 and disable transformations for those which have not or which should not
3777 transformed because the associated debug counter reached its limit. Return
3778 true if none survived or if there were no candidates to begin with. */
3781 disable_unavailable_parameters (cgraph_node
*node
, isra_func_summary
*ifs
)
3784 unsigned len
= vec_safe_length (ifs
->m_parameters
);
3788 auto_vec
<bool, 16> surviving_params
;
3789 bool check_surviving
= false;
3790 clone_info
*cinfo
= clone_info::get (node
);
3791 if (cinfo
&& cinfo
->param_adjustments
)
3793 check_surviving
= true;
3794 cinfo
->param_adjustments
->get_surviving_params (&surviving_params
);
3796 bool dumped_first
= false;
3797 for (unsigned i
= 0; i
< len
; i
++)
3799 isra_param_desc
*desc
= &(*ifs
->m_parameters
)[i
];
3800 if (!dbg_cnt (ipa_sra_params
))
3802 desc
->locally_unused
= false;
3803 desc
->split_candidate
= false;
3805 else if (check_surviving
3806 && (i
>= surviving_params
.length ()
3807 || !surviving_params
[i
]))
3809 /* Even if the parameter was removed by a previous IPA pass, we do
3810 not clear locally_unused because if it really is unused, this
3811 information might be useful in callers. */
3812 desc
->split_candidate
= false;
3814 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3819 "The following parameters of %s are dead on "
3820 "arrival:", node
->dump_name ());
3821 dumped_first
= true;
3823 fprintf (dump_file
, " %u", i
);
3826 else if (desc
->locally_unused
|| desc
->split_candidate
)
3831 fprintf (dump_file
, "\n");
3837 /* Run the interprocedural part of IPA-SRA. */
3840 ipa_sra_analysis (void)
3844 fprintf (dump_file
, "\n========== IPA-SRA IPA stage ==========\n");
3845 ipa_sra_dump_all_summaries (dump_file
);
3848 gcc_checking_assert (func_sums
);
3849 gcc_checking_assert (call_sums
);
3850 cgraph_node
**order
= XCNEWVEC (cgraph_node
*, symtab
->cgraph_count
);
3851 auto_vec
<cgraph_node
*, 16> stack
;
3852 int node_scc_count
= ipa_reduced_postorder (order
, true, NULL
);
3854 /* One sweep from callees to callers for parameter removal and splitting. */
3855 for (int i
= 0; i
< node_scc_count
; i
++)
3857 cgraph_node
*scc_rep
= order
[i
];
3858 vec
<cgraph_node
*> cycle_nodes
= ipa_get_nodes_in_cycle (scc_rep
);
3861 /* Preliminary IPA function level checks and first step of parameter
3864 FOR_EACH_VEC_ELT (cycle_nodes
, j
, v
)
3866 isra_func_summary
*ifs
= func_sums
->get (v
);
3867 if (!ifs
|| !ifs
->m_candidate
)
3869 if (!ipa_sra_ipa_function_checks (v
)
3870 || check_all_callers_for_issues (v
))
3875 if (disable_unavailable_parameters (v
, ifs
))
3877 for (cgraph_edge
*cs
= v
->indirect_calls
; cs
; cs
= cs
->next_callee
)
3878 process_edge_to_unknown_caller (cs
);
3879 for (cgraph_edge
*cs
= v
->callees
; cs
; cs
= cs
->next_callee
)
3880 if (!ipa_edge_within_scc (cs
))
3881 param_removal_cross_scc_edge (cs
);
3884 /* Look at edges within the current SCC and propagate used-ness across
3885 them, pushing onto the stack all notes which might need to be
3887 FOR_EACH_VEC_ELT (cycle_nodes
, j
, v
)
3888 v
->call_for_symbol_thunks_and_aliases (propagate_used_to_scc_callers
,
3891 /* Keep revisiting and pushing until nothing changes. */
3892 while (!stack
.is_empty ())
3894 cgraph_node
*v
= stack
.pop ();
3895 isra_func_summary
*ifs
= func_sums
->get (v
);
3896 gcc_checking_assert (ifs
&& ifs
->m_queued
);
3897 ifs
->m_queued
= false;
3899 v
->call_for_symbol_thunks_and_aliases (propagate_used_to_scc_callers
,
3903 /* Parameter splitting. */
3904 bool repeat_scc_access_propagation
;
3907 repeat_scc_access_propagation
= false;
3908 FOR_EACH_VEC_ELT (cycle_nodes
, j
, v
)
3910 isra_func_summary
*ifs
= func_sums
->get (v
);
3912 || !ifs
->m_candidate
3913 || vec_safe_is_empty (ifs
->m_parameters
))
3915 for (cgraph_edge
*cs
= v
->callees
; cs
; cs
= cs
->next_callee
)
3916 if (param_splitting_across_edge (cs
))
3917 repeat_scc_access_propagation
= true;
3920 while (repeat_scc_access_propagation
);
3923 FOR_EACH_VEC_ELT (cycle_nodes
, j
, v
)
3924 verify_splitting_accesses (v
, true);
3926 cycle_nodes
.release ();
3929 /* One sweep from caller to callees for result removal. */
3930 for (int i
= node_scc_count
- 1; i
>= 0 ; i
--)
3932 cgraph_node
*scc_rep
= order
[i
];
3933 vec
<cgraph_node
*> cycle_nodes
= ipa_get_nodes_in_cycle (scc_rep
);
3937 FOR_EACH_VEC_ELT (cycle_nodes
, j
, v
)
3939 isra_func_summary
*ifs
= func_sums
->get (v
);
3940 if (!ifs
|| !ifs
->m_candidate
)
3944 = (ifs
->m_returns_value
3945 && (!dbg_cnt (ipa_sra_retvalues
)
3946 || v
->call_for_symbol_and_aliases (retval_used_p
,
3948 ifs
->m_return_ignored
= !return_needed
;
3950 isra_push_node_to_stack (v
, ifs
, &stack
);
3953 while (!stack
.is_empty ())
3955 cgraph_node
*node
= stack
.pop ();
3956 isra_func_summary
*ifs
= func_sums
->get (node
);
3957 gcc_checking_assert (ifs
&& ifs
->m_queued
);
3958 ifs
->m_queued
= false;
3960 for (cgraph_edge
*cs
= node
->callees
; cs
; cs
= cs
->next_callee
)
3961 if (ipa_edge_within_scc (cs
)
3962 && call_sums
->get (cs
)->m_return_returned
)
3964 enum availability av
;
3965 cgraph_node
*callee
= cs
->callee
->function_symbol (&av
);
3966 isra_func_summary
*to_ifs
= func_sums
->get (callee
);
3967 if (to_ifs
&& to_ifs
->m_return_ignored
)
3969 to_ifs
->m_return_ignored
= false;
3970 isra_push_node_to_stack (callee
, to_ifs
, &stack
);
3974 cycle_nodes
.release ();
3977 ipa_free_postorder_info ();
3982 if (dump_flags
& TDF_DETAILS
)
3984 fprintf (dump_file
, "\n========== IPA-SRA propagation final state "
3986 ipa_sra_dump_all_summaries (dump_file
);
3988 fprintf (dump_file
, "\n========== IPA-SRA decisions ==========\n");
3991 hash_map
<const char *, unsigned> *clone_num_suffixes
3992 = new hash_map
<const char *, unsigned>;
3995 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node
)
3996 process_isra_node_results (node
, clone_num_suffixes
);
3998 delete clone_num_suffixes
;
3999 ggc_delete (func_sums
);
4005 fprintf (dump_file
, "\n========== IPA SRA IPA analysis done "
4011 const pass_data pass_data_ipa_sra
=
4013 IPA_PASS
, /* type */
4015 OPTGROUP_NONE
, /* optinfo_flags */
4016 TV_IPA_SRA
, /* tv_id */
4017 0, /* properties_required */
4018 0, /* properties_provided */
4019 0, /* properties_destroyed */
4020 0, /* todo_flags_start */
4021 ( TODO_dump_symtab
| TODO_remove_functions
), /* todo_flags_finish */
4024 class pass_ipa_sra
: public ipa_opt_pass_d
4027 pass_ipa_sra (gcc::context
*ctxt
)
4028 : ipa_opt_pass_d (pass_data_ipa_sra
, ctxt
,
4029 ipa_sra_generate_summary
, /* generate_summary */
4030 ipa_sra_write_summary
, /* write_summary */
4031 ipa_sra_read_summary
, /* read_summary */
4032 NULL
, /* write_optimization_summary */
4033 NULL
, /* read_optimization_summary */
4034 NULL
, /* stmt_fixup */
4035 0, /* function_transform_todo_flags_start */
4036 NULL
, /* function_transform */
4037 NULL
) /* variable_transform */
4040 /* opt_pass methods: */
4041 virtual bool gate (function
*)
4043 /* TODO: We should remove the optimize check after we ensure we never run
4044 IPA passes when not optimizing. */
4045 return (flag_ipa_sra
&& optimize
);
4048 virtual unsigned int execute (function
*) { return ipa_sra_analysis (); }
4050 }; // class pass_ipa_sra
4054 /* Intraprocedural part of IPA-SRA analysis. Scan function body of NODE and
4055 create a summary structure describing IPA-SRA opportunities and constraints
4059 ipa_sra_summarize_function (cgraph_node
*node
)
4062 fprintf (dump_file
, "Creating summary for %s/%i:\n", node
->name (),
4064 if (!ipa_sra_preliminary_function_checks (node
))
4066 gcc_obstack_init (&gensum_obstack
);
4067 isra_func_summary
*ifs
= func_sums
->get_create (node
);
4068 ifs
->m_candidate
= true;
4069 tree ret
= TREE_TYPE (TREE_TYPE (node
->decl
));
4070 ifs
->m_returns_value
= (TREE_CODE (ret
) != VOID_TYPE
);
4072 decl2desc
= new hash_map
<tree
, gensum_param_desc
*>;
4074 for (tree parm
= DECL_ARGUMENTS (node
->decl
); parm
; parm
= DECL_CHAIN (parm
))
4079 auto_vec
<gensum_param_desc
, 16> param_descriptions (count
);
4080 param_descriptions
.reserve_exact (count
);
4081 param_descriptions
.quick_grow_cleared (count
);
4083 bool cfun_pushed
= false;
4084 struct function
*fun
= DECL_STRUCT_FUNCTION (node
->decl
);
4085 if (create_parameter_descriptors (node
, ¶m_descriptions
))
4089 final_bbs
= BITMAP_ALLOC (NULL
);
4090 bb_dereferences
= XCNEWVEC (HOST_WIDE_INT
,
4092 * last_basic_block_for_fn (fun
));
4093 aa_walking_limit
= opt_for_fn (node
->decl
, param_ipa_max_aa_steps
);
4094 scan_function (node
, fun
);
4098 dump_gensum_param_descriptors (dump_file
, node
->decl
,
4099 ¶m_descriptions
);
4100 fprintf (dump_file
, "----------------------------------------\n");
4103 process_scan_results (node
, fun
, ifs
, ¶m_descriptions
);
4107 if (bb_dereferences
)
4109 free (bb_dereferences
);
4110 bb_dereferences
= NULL
;
4111 BITMAP_FREE (final_bbs
);
4115 isra_analyze_all_outgoing_calls (node
);
4119 obstack_free (&gensum_obstack
, NULL
);
4121 fprintf (dump_file
, "\n\n");
4123 verify_splitting_accesses (node
, false);
4128 make_pass_ipa_sra (gcc::context
*ctxt
)
4130 return new pass_ipa_sra (ctxt
);
4134 #include "gt-ipa-sra.h"