1 /* Interprocedural scalar replacement of aggregates
2 Copyright (C) 2019-2022 Free Software Foundation, Inc.
3 Contributed by Martin Jambor <mjambor@suse.cz>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* IPA-SRA is an interprocedural pass that removes unused function return
22 values (turning functions returning a value which is never used into void
23 functions) and removes unused function parameters. It can also replace an
24 aggregate parameter by a set of other parameters representing part of the
25 original, turning those passed by reference into new ones which pass the
28 The pass is a true IPA one, which means that it works in three stages in
29 order to be able to take advantage of LTO. First, summaries about functions
30 and each calls are generated. Function summaries (often called call graph
31 node summaries) contain mainly information about which parameters are
32 potential transformation candidates and which bits of candidates are
33 accessed. We differentiate between accesses done as a part of a call
34 statement (which might be not necessary if the callee is also transformed)
35 and others (which are mandatory). Call summaries (often called call graph
36 edge summaries) contain information about which function formal parameters
37 feed into which actual call arguments so that if two parameters are only
38 used in a sum which is then passed to another function which then however
39 does not use this parameter, all three parameters of the two functions can
40 be eliminated. Edge summaries also have flags whether the return value is
41 used or if it is only returned in the caller too. In LTO mode these
42 summaries are then streamed to the object file in the compilation phase and
43 streamed back in in the WPA analysis stage.
45 The interprocedural analysis phase traverses the graph in topological order
46 in two sweeps, one in each direction. First, from callees to callers for
47 parameter removal and splitting. Each strongly-connected component is
48 processed iteratively until the situation in it stabilizes. The pass from
49 callers to callees is then carried out to remove unused return values in a
52 Because parameter manipulation has big implications for call redirection
53 which is done only after all call graph nodes materialize, the
54 transformation phase is not part of this patch but is carried out by the
55 clone materialization and edge redirection itself, see comments in
56 ipa-param-manipulation.h for more details. */
61 #include "coretypes.h"
66 #include "tree-pass.h"
69 #include "gimple-pretty-print.h"
72 #include "gimple-iterator.h"
73 #include "gimple-walk.h"
76 #include "alloc-pool.h"
77 #include "symbol-summary.h"
79 #include "tree-inline.h"
80 #include "ipa-utils.h"
83 #include "tree-streamer.h"
84 #include "internal-fn.h"
85 #include "symtab-clones.h"
88 static void ipa_sra_summarize_function (cgraph_node
*);
90 /* Bits used to track size of an aggregate in bytes interprocedurally. */
91 #define ISRA_ARG_SIZE_LIMIT_BITS 16
92 #define ISRA_ARG_SIZE_LIMIT (1 << ISRA_ARG_SIZE_LIMIT_BITS)
93 /* How many parameters can feed into a call actual argument and still be
95 #define IPA_SRA_MAX_PARAM_FLOW_LEN 7
97 /* Structure describing accesses to a specific portion of an aggregate
98 parameter, as given by the offset and size. Any smaller accesses that occur
99 within a function that fall within another access form a tree. The pass
100 cannot analyze parameters with only partially overlapping accesses. */
102 struct GTY(()) param_access
104 /* Type that a potential replacement should have. This field only has
105 meaning in the summary building and transformation phases, when it is
106 reconstructed from the body. Must not be touched in IPA analysis
110 /* Alias reference type to be used in MEM_REFs when adjusting caller
114 /* Values returned by get_ref_base_and_extent but converted to bytes and
115 stored as unsigned ints. */
116 unsigned unit_offset
;
117 unsigned unit_size
: ISRA_ARG_SIZE_LIMIT_BITS
;
119 /* Set once we are sure that the access will really end up in a potentially
120 transformed function - initially not set for portions of formal parameters
121 that are only used as actual function arguments passed to callees. */
122 unsigned certain
: 1;
123 /* Set if the access has reverse scalar storage order. */
124 unsigned reverse
: 1;
127 /* This structure has the same purpose as the one above and additionally it
128 contains some fields that are only necessary in the summary generation
131 struct gensum_param_access
133 /* Values returned by get_ref_base_and_extent. */
134 HOST_WIDE_INT offset
;
137 /* if this access has any children (in terms of the definition above), this
138 points to the first one. */
139 struct gensum_param_access
*first_child
;
140 /* In intraprocedural SRA, pointer to the next sibling in the access tree as
142 struct gensum_param_access
*next_sibling
;
144 /* Type that a potential replacement should have. This field only has
145 meaning in the summary building and transformation phases, when it is
146 reconstructed from the body. Must not be touched in IPA analysis
149 /* Alias reference type to be used in MEM_REFs when adjusting caller
153 /* Have there been writes to or reads from this exact location except for as
154 arguments to a function call that can be tracked. */
157 /* Set if the access has reverse scalar storage order. */
161 /* Summary describing a parameter in the IPA stages. */
163 struct GTY(()) isra_param_desc
165 /* List of access representatives to the parameters, sorted according to
167 vec
<param_access
*, va_gc
> *accesses
;
169 /* Unit size limit of total size of all replacements. */
170 unsigned param_size_limit
: ISRA_ARG_SIZE_LIMIT_BITS
;
171 /* Sum of unit sizes of all certain replacements. */
172 unsigned size_reached
: ISRA_ARG_SIZE_LIMIT_BITS
;
174 /* A parameter that is used only in call arguments and can be removed if all
175 concerned actual arguments are removed. */
176 unsigned locally_unused
: 1;
177 /* An aggregate that is a candidate for breaking up or complete removal. */
178 unsigned split_candidate
: 1;
179 /* Is this a parameter passing stuff by reference? */
183 /* Structure used when generating summaries that describes a parameter. */
185 struct gensum_param_desc
187 /* Roots of param_accesses. */
188 gensum_param_access
*accesses
;
189 /* Number of accesses in the access tree rooted in field accesses. */
190 unsigned access_count
;
192 /* If the below is non-zero, this is the number of uses as actual arguments. */
194 /* Number of times this parameter has been directly passed to. */
195 unsigned ptr_pt_count
;
197 /* Size limit of total size of all replacements. */
198 unsigned param_size_limit
;
199 /* Sum of sizes of nonarg accesses. */
200 unsigned nonarg_acc_size
;
202 /* A parameter that is used only in call arguments and can be removed if all
203 concerned actual arguments are removed. */
205 /* An aggregate that is a candidate for breaking up or a pointer passing data
206 by reference that is a candidate for being converted to a set of
207 parameters passing those data by value. */
208 bool split_candidate
;
209 /* Is this a parameter passing stuff by reference? */
212 /* The number of this parameter as they are ordered in function decl. */
214 /* For parameters passing data by reference, this is parameter index to
215 compute indices to bb_dereferences. */
219 /* Properly deallocate accesses of DESC. TODO: Since this data structure is
220 allocated in GC memory, this is not necessary and we can consider removing
224 free_param_decl_accesses (isra_param_desc
*desc
)
226 unsigned len
= vec_safe_length (desc
->accesses
);
227 for (unsigned i
= 0; i
< len
; ++i
)
228 ggc_free ((*desc
->accesses
)[i
]);
229 vec_free (desc
->accesses
);
232 /* Class used to convey information about functions from the
233 intra-procedural analysis stage to inter-procedural one. */
235 class GTY((for_user
)) isra_func_summary
238 /* initialize the object. */
241 : m_parameters (NULL
), m_candidate (false), m_returns_value (false),
242 m_return_ignored (false), m_queued (false)
245 /* Destroy m_parameters. */
247 ~isra_func_summary ();
249 /* Mark the function as not a candidate for any IPA-SRA transformation.
250 Return true if it was a candidate until now. */
254 /* Vector of parameter descriptors corresponding to the function being
256 vec
<isra_param_desc
, va_gc
> *m_parameters
;
258 /* Whether the node is even a candidate for any IPA-SRA transformation at
260 unsigned m_candidate
: 1;
262 /* Whether the original function returns any value. */
263 unsigned m_returns_value
: 1;
265 /* Set to true if all call statements do not actually use the returned
268 unsigned m_return_ignored
: 1;
270 /* Whether the node is already queued in IPA SRA stack during processing of
273 unsigned m_queued
: 1;
276 /* Deallocate the memory pointed to by isra_func_summary. TODO: Since this
277 data structure is allocated in GC memory, this is not necessary and we can
278 consider removing the destructor. */
280 isra_func_summary::~isra_func_summary ()
282 unsigned len
= vec_safe_length (m_parameters
);
283 for (unsigned i
= 0; i
< len
; ++i
)
284 free_param_decl_accesses (&(*m_parameters
)[i
]);
285 vec_free (m_parameters
);
288 /* Mark the function as not a candidate for any IPA-SRA transformation. Return
289 true if it was a candidate until now. */
292 isra_func_summary::zap ()
294 bool ret
= m_candidate
;
297 /* TODO: see the destructor above. */
298 unsigned len
= vec_safe_length (m_parameters
);
299 for (unsigned i
= 0; i
< len
; ++i
)
300 free_param_decl_accesses (&(*m_parameters
)[i
]);
301 vec_free (m_parameters
);
306 /* Structure to describe which formal parameters feed into a particular actual
309 struct isra_param_flow
311 /* Number of elements in array inputs that contain valid data. */
313 /* Indices of formal parameters that feed into the described actual argument.
314 If aggregate_pass_through or pointer_pass_through below are true, it must
315 contain exactly one element which is passed through from a formal
316 parameter if the given number. Otherwise, the array contains indices of
317 callee's formal parameters which are used to calculate value of this
319 unsigned char inputs
[IPA_SRA_MAX_PARAM_FLOW_LEN
];
321 /* Offset within the formal parameter. */
322 unsigned unit_offset
;
323 /* Size of the portion of the formal parameter that is being passed. */
324 unsigned unit_size
: ISRA_ARG_SIZE_LIMIT_BITS
;
326 /* True when the value of this actual copy is a portion of a formal
328 unsigned aggregate_pass_through
: 1;
329 /* True when the value of this actual copy is a verbatim pass through of an
331 unsigned pointer_pass_through
: 1;
332 /* True when it is safe to copy access candidates here from the callee, which
333 would mean introducing dereferences into callers of the caller. */
334 unsigned safe_to_import_accesses
: 1;
337 /* Structure used to convey information about calls from the intra-procedural
338 analysis stage to inter-procedural one. */
340 class isra_call_summary
344 : m_arg_flow (), m_return_ignored (false), m_return_returned (false),
345 m_bit_aligned_arg (false), m_before_any_store (false)
348 void init_inputs (unsigned arg_count
);
351 /* Information about what formal parameters of the caller are used to compute
352 individual actual arguments of this call. */
353 auto_vec
<isra_param_flow
> m_arg_flow
;
355 /* Set to true if the call statement does not have a LHS. */
356 unsigned m_return_ignored
: 1;
358 /* Set to true if the LHS of call statement is only used to construct the
359 return value of the caller. */
360 unsigned m_return_returned
: 1;
362 /* Set when any of the call arguments are not byte-aligned. */
363 unsigned m_bit_aligned_arg
: 1;
365 /* Set to true if the call happend before any (other) store to memory in the
367 unsigned m_before_any_store
: 1;
370 /* Class to manage function summaries. */
372 class GTY((user
)) ipa_sra_function_summaries
373 : public function_summary
<isra_func_summary
*>
376 ipa_sra_function_summaries (symbol_table
*table
, bool ggc
):
377 function_summary
<isra_func_summary
*> (table
, ggc
) { }
379 virtual void duplicate (cgraph_node
*, cgraph_node
*,
380 isra_func_summary
*old_sum
,
381 isra_func_summary
*new_sum
);
382 virtual void insert (cgraph_node
*, isra_func_summary
*);
385 /* Hook that is called by summary when a node is duplicated. */
388 ipa_sra_function_summaries::duplicate (cgraph_node
*, cgraph_node
*,
389 isra_func_summary
*old_sum
,
390 isra_func_summary
*new_sum
)
392 /* TODO: Somehow stop copying when ISRA is doing the cloning, it is
394 new_sum
->m_candidate
= old_sum
->m_candidate
;
395 new_sum
->m_returns_value
= old_sum
->m_returns_value
;
396 new_sum
->m_return_ignored
= old_sum
->m_return_ignored
;
397 gcc_assert (!old_sum
->m_queued
);
398 new_sum
->m_queued
= false;
400 unsigned param_count
= vec_safe_length (old_sum
->m_parameters
);
403 vec_safe_reserve_exact (new_sum
->m_parameters
, param_count
);
404 new_sum
->m_parameters
->quick_grow_cleared (param_count
);
405 for (unsigned i
= 0; i
< param_count
; i
++)
407 isra_param_desc
*s
= &(*old_sum
->m_parameters
)[i
];
408 isra_param_desc
*d
= &(*new_sum
->m_parameters
)[i
];
410 d
->param_size_limit
= s
->param_size_limit
;
411 d
->size_reached
= s
->size_reached
;
412 d
->locally_unused
= s
->locally_unused
;
413 d
->split_candidate
= s
->split_candidate
;
414 d
->by_ref
= s
->by_ref
;
416 unsigned acc_count
= vec_safe_length (s
->accesses
);
417 vec_safe_reserve_exact (d
->accesses
, acc_count
);
418 for (unsigned j
= 0; j
< acc_count
; j
++)
420 param_access
*from
= (*s
->accesses
)[j
];
421 param_access
*to
= ggc_cleared_alloc
<param_access
> ();
422 to
->type
= from
->type
;
423 to
->alias_ptr_type
= from
->alias_ptr_type
;
424 to
->unit_offset
= from
->unit_offset
;
425 to
->unit_size
= from
->unit_size
;
426 to
->certain
= from
->certain
;
427 to
->reverse
= from
->reverse
;
428 d
->accesses
->quick_push (to
);
433 /* Pointer to the pass function summary holder. */
435 static GTY(()) ipa_sra_function_summaries
*func_sums
;
437 /* Hook that is called by summary when new node appears. */
440 ipa_sra_function_summaries::insert (cgraph_node
*node
, isra_func_summary
*)
442 if (opt_for_fn (node
->decl
, flag_ipa_sra
))
444 push_cfun (DECL_STRUCT_FUNCTION (node
->decl
));
445 ipa_sra_summarize_function (node
);
449 func_sums
->remove (node
);
452 /* Class to manage call summaries. */
454 class ipa_sra_call_summaries
: public call_summary
<isra_call_summary
*>
457 ipa_sra_call_summaries (symbol_table
*table
):
458 call_summary
<isra_call_summary
*> (table
) { }
460 /* Duplicate info when an edge is cloned. */
461 virtual void duplicate (cgraph_edge
*, cgraph_edge
*,
462 isra_call_summary
*old_sum
,
463 isra_call_summary
*new_sum
);
466 static ipa_sra_call_summaries
*call_sums
;
469 /* Initialize m_arg_flow of a particular instance of isra_call_summary.
470 ARG_COUNT is the number of actual arguments passed. */
473 isra_call_summary::init_inputs (unsigned arg_count
)
477 gcc_checking_assert (m_arg_flow
.length () == 0);
480 if (m_arg_flow
.length () == 0)
482 m_arg_flow
.reserve_exact (arg_count
);
483 m_arg_flow
.quick_grow_cleared (arg_count
);
486 gcc_checking_assert (arg_count
== m_arg_flow
.length ());
489 /* Dump all information in call summary to F. */
492 isra_call_summary::dump (FILE *f
)
494 if (m_return_ignored
)
495 fprintf (f
, " return value ignored\n");
496 if (m_return_returned
)
497 fprintf (f
, " return value used only to compute caller return value\n");
498 if (m_before_any_store
)
499 fprintf (f
, " happens before any store to memory\n");
500 for (unsigned i
= 0; i
< m_arg_flow
.length (); i
++)
502 fprintf (f
, " Parameter %u:\n", i
);
503 isra_param_flow
*ipf
= &m_arg_flow
[i
];
508 fprintf (f
, " Scalar param sources: ");
509 for (int j
= 0; j
< ipf
->length
; j
++)
515 fprintf (f
, "%i", (int) ipf
->inputs
[j
]);
519 if (ipf
->aggregate_pass_through
)
520 fprintf (f
, " Aggregate pass through from the param given above, "
521 "unit offset: %u , unit size: %u\n",
522 ipf
->unit_offset
, ipf
->unit_size
);
523 if (ipf
->pointer_pass_through
)
524 fprintf (f
, " Pointer pass through from the param given above, "
525 "safe_to_import_accesses: %u\n", ipf
->safe_to_import_accesses
);
529 /* Duplicate edge summary when an edge is cloned. */
532 ipa_sra_call_summaries::duplicate (cgraph_edge
*, cgraph_edge
*,
533 isra_call_summary
*old_sum
,
534 isra_call_summary
*new_sum
)
536 unsigned arg_count
= old_sum
->m_arg_flow
.length ();
537 new_sum
->init_inputs (arg_count
);
538 for (unsigned i
= 0; i
< arg_count
; i
++)
539 new_sum
->m_arg_flow
[i
] = old_sum
->m_arg_flow
[i
];
541 new_sum
->m_return_ignored
= old_sum
->m_return_ignored
;
542 new_sum
->m_return_returned
= old_sum
->m_return_returned
;
543 new_sum
->m_bit_aligned_arg
= old_sum
->m_bit_aligned_arg
;
544 new_sum
->m_before_any_store
= old_sum
->m_before_any_store
;
548 /* With all GTY stuff done, we can move to anonymous namespace. */
550 /* Quick mapping from a decl to its param descriptor. */
552 hash_map
<tree
, gensum_param_desc
*> *decl2desc
;
554 /* Countdown of allowed Alias Analysis steps during summary building. */
556 int aa_walking_limit
;
558 /* This is a table in which for each basic block and parameter there is a
559 distance (offset + size) in that parameter which is dereferenced and
560 accessed in that BB. */
561 HOST_WIDE_INT
*bb_dereferences
= NULL
;
562 /* How many by-reference parameters there are in the current function. */
565 /* Bitmap of BBs that can cause the function to "stop" progressing by
566 returning, throwing externally, looping infinitely or calling a function
567 which might abort etc.. */
570 /* Obstack to allocate various small structures required only when generating
571 summary for a function. */
572 struct obstack gensum_obstack
;
574 /* Return false the function is apparently unsuitable for IPA-SRA based on it's
575 attributes, return true otherwise. NODE is the cgraph node of the current
579 ipa_sra_preliminary_function_checks (cgraph_node
*node
)
581 if (!node
->can_change_signature
)
584 fprintf (dump_file
, "Function cannot change signature.\n");
588 if (!tree_versionable_function_p (node
->decl
))
591 fprintf (dump_file
, "Function is not versionable.\n");
595 if (!opt_for_fn (node
->decl
, optimize
)
596 || !opt_for_fn (node
->decl
, flag_ipa_sra
))
599 fprintf (dump_file
, "Not optimizing or IPA-SRA turned off for this "
604 if (DECL_VIRTUAL_P (node
->decl
))
607 fprintf (dump_file
, "Function is a virtual method.\n");
611 struct function
*fun
= DECL_STRUCT_FUNCTION (node
->decl
);
615 fprintf (dump_file
, "Function uses stdarg. \n");
619 if (DECL_DISREGARD_INLINE_LIMITS (node
->decl
))
622 fprintf (dump_file
, "Always inline function will be inlined "
630 /* Print access tree starting at ACCESS to F. */
633 dump_gensum_access (FILE *f
, gensum_param_access
*access
, unsigned indent
)
636 for (unsigned i
= 0; i
< indent
; i
++)
638 fprintf (f
, " * Access to offset: " HOST_WIDE_INT_PRINT_DEC
,
640 fprintf (f
, ", size: " HOST_WIDE_INT_PRINT_DEC
, access
->size
);
641 fprintf (f
, ", type: ");
642 print_generic_expr (f
, access
->type
);
643 fprintf (f
, ", alias_ptr_type: ");
644 print_generic_expr (f
, access
->alias_ptr_type
);
645 fprintf (f
, ", nonarg: %u, reverse: %u\n", access
->nonarg
, access
->reverse
);
646 for (gensum_param_access
*ch
= access
->first_child
;
648 ch
= ch
->next_sibling
)
649 dump_gensum_access (f
, ch
, indent
+ 2);
653 /* Print access tree starting at ACCESS to F. */
656 dump_isra_access (FILE *f
, param_access
*access
)
658 fprintf (f
, " * Access to unit offset: %u", access
->unit_offset
);
659 fprintf (f
, ", unit size: %u", access
->unit_size
);
660 fprintf (f
, ", type: ");
661 print_generic_expr (f
, access
->type
);
662 fprintf (f
, ", alias_ptr_type: ");
663 print_generic_expr (f
, access
->alias_ptr_type
);
665 fprintf (f
, ", certain");
667 fprintf (f
, ", not certain");
669 fprintf (f
, ", reverse");
673 /* Dump access tree starting at ACCESS to stderr. */
676 debug_isra_access (param_access
*access
)
678 dump_isra_access (stderr
, access
);
681 /* Dump DESC to F. */
684 dump_gensum_param_descriptor (FILE *f
, gensum_param_desc
*desc
)
686 if (desc
->locally_unused
)
687 fprintf (f
, " unused with %i call_uses\n", desc
->call_uses
);
688 if (!desc
->split_candidate
)
690 fprintf (f
, " not a candidate\n");
694 fprintf (f
, " by_ref with %u pass throughs\n", desc
->ptr_pt_count
);
696 for (gensum_param_access
*acc
= desc
->accesses
; acc
; acc
= acc
->next_sibling
)
697 dump_gensum_access (f
, acc
, 2);
700 /* Dump all parameter descriptors in IFS, assuming it describes FNDECL, to
704 dump_gensum_param_descriptors (FILE *f
, tree fndecl
,
705 vec
<gensum_param_desc
> *param_descriptions
)
707 tree parm
= DECL_ARGUMENTS (fndecl
);
709 i
< param_descriptions
->length ();
710 ++i
, parm
= DECL_CHAIN (parm
))
712 fprintf (f
, " Descriptor for parameter %i ", i
);
713 print_generic_expr (f
, parm
, TDF_UID
);
715 dump_gensum_param_descriptor (f
, &(*param_descriptions
)[i
]);
720 /* Dump DESC to F. */
723 dump_isra_param_descriptor (FILE *f
, isra_param_desc
*desc
)
725 if (desc
->locally_unused
)
727 fprintf (f
, " (locally) unused\n");
729 if (!desc
->split_candidate
)
731 fprintf (f
, " not a candidate for splitting\n");
734 fprintf (f
, " param_size_limit: %u, size_reached: %u%s\n",
735 desc
->param_size_limit
, desc
->size_reached
,
736 desc
->by_ref
? ", by_ref" : "");
738 for (unsigned i
= 0; i
< vec_safe_length (desc
->accesses
); ++i
)
740 param_access
*access
= (*desc
->accesses
)[i
];
741 dump_isra_access (f
, access
);
745 /* Dump all parameter descriptors in IFS, assuming it describes FNDECL, to
749 dump_isra_param_descriptors (FILE *f
, tree fndecl
,
750 isra_func_summary
*ifs
)
752 tree parm
= DECL_ARGUMENTS (fndecl
);
753 if (!ifs
->m_parameters
)
755 fprintf (f
, " parameter descriptors not available\n");
760 i
< ifs
->m_parameters
->length ();
761 ++i
, parm
= DECL_CHAIN (parm
))
763 fprintf (f
, " Descriptor for parameter %i ", i
);
764 print_generic_expr (f
, parm
, TDF_UID
);
766 dump_isra_param_descriptor (f
, &(*ifs
->m_parameters
)[i
]);
770 /* Add SRC to inputs of PARAM_FLOW, unless it would exceed storage. If the
771 function fails return false, otherwise return true. SRC must fit into an
772 unsigned char. Used for purposes of transitive unused parameter
776 add_src_to_param_flow (isra_param_flow
*param_flow
, int src
)
778 gcc_checking_assert (src
>= 0 && src
<= UCHAR_MAX
);
779 if (param_flow
->length
== IPA_SRA_MAX_PARAM_FLOW_LEN
)
782 param_flow
->inputs
[(int) param_flow
->length
] = src
;
783 param_flow
->length
++;
787 /* Add a SRC to the inputs of PARAM_FLOW unless it is already there and assert
788 it is the only input. Used for purposes of transitive parameter
792 set_single_param_flow_source (isra_param_flow
*param_flow
, int src
)
794 gcc_checking_assert (src
>= 0 && src
<= UCHAR_MAX
);
795 if (param_flow
->length
== 0)
797 param_flow
->inputs
[0] = src
;
798 param_flow
->length
= 1;
800 else if (param_flow
->length
== 1)
801 gcc_assert (param_flow
->inputs
[0] == src
);
806 /* Assert that there is only a single value in PARAM_FLOW's inputs and return
810 get_single_param_flow_source (const isra_param_flow
*param_flow
)
812 gcc_assert (param_flow
->length
== 1);
813 return param_flow
->inputs
[0];
816 /* Inspect all uses of NAME and simple arithmetic calculations involving NAME
817 in FUN represented with NODE and return a negative number if any of them is
818 used for something else than either an actual call argument, simple
819 arithmetic operation or debug statement. If there are no such uses, return
820 the number of actual arguments that this parameter eventually feeds to (or
821 zero if there is none). For any such parameter, mark PARM_NUM as one of its
822 sources. ANALYZED is a bitmap that tracks which SSA names we have already
823 started investigating. */
826 isra_track_scalar_value_uses (function
*fun
, cgraph_node
*node
, tree name
,
827 int parm_num
, bitmap analyzed
)
830 imm_use_iterator imm_iter
;
833 FOR_EACH_IMM_USE_STMT (stmt
, imm_iter
, name
)
835 if (is_gimple_debug (stmt
))
838 /* TODO: We could handle at least const builtin functions like arithmetic
840 if (is_gimple_call (stmt
))
844 FOR_EACH_IMM_USE_ON_STMT (use_p
, imm_iter
)
847 gcall
*call
= as_a
<gcall
*> (stmt
);
849 if (gimple_call_internal_p (call
)
850 || (arg_count
= gimple_call_num_args (call
)) == 0)
856 cgraph_edge
*cs
= node
->get_edge (stmt
);
857 gcc_checking_assert (cs
);
858 isra_call_summary
*csum
= call_sums
->get_create (cs
);
859 csum
->init_inputs (arg_count
);
862 for (unsigned i
= 0; i
< arg_count
; i
++)
863 if (gimple_call_arg (call
, i
) == name
)
865 if (!add_src_to_param_flow (&csum
->m_arg_flow
[i
], parm_num
))
874 || all_uses
!= simple_uses
)
881 else if (!stmt_unremovable_because_of_non_call_eh_p (fun
, stmt
)
882 && ((is_gimple_assign (stmt
) && !gimple_has_volatile_ops (stmt
))
883 || gimple_code (stmt
) == GIMPLE_PHI
))
886 if (gimple_code (stmt
) == GIMPLE_PHI
)
887 lhs
= gimple_phi_result (stmt
);
889 lhs
= gimple_assign_lhs (stmt
);
891 if (TREE_CODE (lhs
) != SSA_NAME
)
896 gcc_assert (!gimple_vdef (stmt
));
897 if (bitmap_set_bit (analyzed
, SSA_NAME_VERSION (lhs
)))
899 int tmp
= isra_track_scalar_value_uses (fun
, node
, lhs
, parm_num
,
918 /* Inspect all uses of PARM, which must be a gimple register, in FUN (which is
919 also described by NODE) and simple arithmetic calculations involving PARM
920 and return false if any of them is used for something else than either an
921 actual call argument, simple arithmetic operation or debug statement. If
922 there are no such uses, return true and store the number of actual arguments
923 that this parameter eventually feeds to (or zero if there is none) to
924 *CALL_USES_P. For any such parameter, mark PARM_NUM as one of its
927 This function is similar to ptr_parm_has_nonarg_uses but its results are
928 meant for unused parameter removal, as opposed to splitting of parameters
929 passed by reference or converting them to passed by value. */
932 isra_track_scalar_param_local_uses (function
*fun
, cgraph_node
*node
, tree parm
,
933 int parm_num
, int *call_uses_p
)
935 gcc_checking_assert (is_gimple_reg (parm
));
937 tree name
= ssa_default_def (fun
, parm
);
938 if (!name
|| has_zero_uses (name
))
944 /* Edge summaries can only handle callers with fewer than 256 parameters. */
945 if (parm_num
> UCHAR_MAX
)
948 bitmap analyzed
= BITMAP_ALLOC (NULL
);
949 int call_uses
= isra_track_scalar_value_uses (fun
, node
, name
, parm_num
,
951 BITMAP_FREE (analyzed
);
954 *call_uses_p
= call_uses
;
958 /* Scan immediate uses of a default definition SSA name of a parameter PARM and
959 examine whether there are any nonarg uses that are not actual arguments or
960 otherwise infeasible uses. If so, return true, otherwise return false.
961 Create pass-through IPA flow records for any direct uses as argument calls
962 and if returning false, store their number into *PT_COUNT_P. NODE and FUN
963 must represent the function that is currently analyzed, PARM_NUM must be the
964 index of the analyzed parameter.
966 This function is similar to isra_track_scalar_param_local_uses but its
967 results are meant for splitting of parameters passed by reference or turning
968 them into bits passed by value, as opposed to generic unused parameter
972 ptr_parm_has_nonarg_uses (cgraph_node
*node
, function
*fun
, tree parm
,
973 int parm_num
, unsigned *pt_count_p
)
977 tree name
= ssa_default_def (fun
, parm
);
979 unsigned pt_count
= 0;
981 if (!name
|| has_zero_uses (name
))
984 /* Edge summaries can only handle callers with fewer than 256 parameters. */
985 if (parm_num
> UCHAR_MAX
)
988 FOR_EACH_IMM_USE_STMT (stmt
, ui
, name
)
990 unsigned uses_ok
= 0;
993 if (is_gimple_debug (stmt
))
996 if (gimple_assign_single_p (stmt
))
998 tree rhs
= gimple_assign_rhs1 (stmt
);
999 if (!TREE_THIS_VOLATILE (rhs
))
1001 while (handled_component_p (rhs
))
1002 rhs
= TREE_OPERAND (rhs
, 0);
1003 if (TREE_CODE (rhs
) == MEM_REF
1004 && TREE_OPERAND (rhs
, 0) == name
1005 && integer_zerop (TREE_OPERAND (rhs
, 1))
1006 && types_compatible_p (TREE_TYPE (rhs
),
1007 TREE_TYPE (TREE_TYPE (name
))))
1011 else if (is_gimple_call (stmt
))
1013 gcall
*call
= as_a
<gcall
*> (stmt
);
1015 if (gimple_call_internal_p (call
)
1016 || (arg_count
= gimple_call_num_args (call
)) == 0)
1022 cgraph_edge
*cs
= node
->get_edge (stmt
);
1023 gcc_checking_assert (cs
);
1024 isra_call_summary
*csum
= call_sums
->get_create (cs
);
1025 csum
->init_inputs (arg_count
);
1027 for (unsigned i
= 0; i
< arg_count
; ++i
)
1029 tree arg
= gimple_call_arg (stmt
, i
);
1033 /* TODO: Allow &MEM_REF[name + offset] here,
1034 ipa_param_body_adjustments::modify_call_stmt has to be
1036 csum
->m_arg_flow
[i
].pointer_pass_through
= true;
1037 set_single_param_flow_source (&csum
->m_arg_flow
[i
], parm_num
);
1043 if (!TREE_THIS_VOLATILE (arg
))
1045 while (handled_component_p (arg
))
1046 arg
= TREE_OPERAND (arg
, 0);
1047 if (TREE_CODE (arg
) == MEM_REF
1048 && TREE_OPERAND (arg
, 0) == name
1049 && integer_zerop (TREE_OPERAND (arg
, 1))
1050 && types_compatible_p (TREE_TYPE (arg
),
1051 TREE_TYPE (TREE_TYPE (name
))))
1057 /* If the number of valid uses does not match the number of
1058 uses in this stmt there is an unhandled use. */
1059 unsigned all_uses
= 0;
1060 FOR_EACH_IMM_USE_ON_STMT (use_p
, ui
)
1063 gcc_checking_assert (uses_ok
<= all_uses
);
1064 if (uses_ok
!= all_uses
)
1071 *pt_count_p
= pt_count
;
1075 /* Initialize vector of parameter descriptors of NODE. Return true if there
1076 are any candidates for splitting or unused aggregate parameter removal (the
1077 function may return false if there are candidates for removal of register
1078 parameters) and function body must be scanned. */
1081 create_parameter_descriptors (cgraph_node
*node
,
1082 vec
<gensum_param_desc
> *param_descriptions
)
1084 function
*fun
= DECL_STRUCT_FUNCTION (node
->decl
);
1088 for (tree parm
= DECL_ARGUMENTS (node
->decl
);
1090 parm
= DECL_CHAIN (parm
), num
++)
1093 gensum_param_desc
*desc
= &(*param_descriptions
)[num
];
1094 /* param_descriptions vector is grown cleared in the caller. */
1095 desc
->param_number
= num
;
1096 decl2desc
->put (parm
, desc
);
1098 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1099 print_generic_expr (dump_file
, parm
, TDF_UID
);
1101 int scalar_call_uses
;
1102 tree type
= TREE_TYPE (parm
);
1103 if (TREE_THIS_VOLATILE (parm
))
1105 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1106 fprintf (dump_file
, " not a candidate, is volatile\n");
1109 if (!is_gimple_reg_type (type
) && is_va_list_type (type
))
1111 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1112 fprintf (dump_file
, " not a candidate, is a va_list type\n");
1115 if (TREE_ADDRESSABLE (parm
))
1117 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1118 fprintf (dump_file
, " not a candidate, is addressable\n");
1121 if (TREE_ADDRESSABLE (type
))
1123 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1124 fprintf (dump_file
, " not a candidate, type cannot be split\n");
1128 if (is_gimple_reg (parm
)
1129 && !isra_track_scalar_param_local_uses (fun
, node
, parm
, num
,
1132 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1133 fprintf (dump_file
, " is a scalar with only %i call uses\n",
1136 desc
->locally_unused
= true;
1137 desc
->call_uses
= scalar_call_uses
;
1140 if (POINTER_TYPE_P (type
))
1142 type
= TREE_TYPE (type
);
1144 if (TREE_CODE (type
) == FUNCTION_TYPE
1145 || TREE_CODE (type
) == METHOD_TYPE
)
1147 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1148 fprintf (dump_file
, " not a candidate, reference to "
1152 if (TYPE_VOLATILE (type
))
1154 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1155 fprintf (dump_file
, " not a candidate, reference to "
1156 "a volatile type\n");
1159 if (TREE_CODE (type
) == ARRAY_TYPE
1160 && TYPE_NONALIASED_COMPONENT (type
))
1162 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1163 fprintf (dump_file
, " not a candidate, reference to "
1164 "a nonaliased component array\n");
1167 if (!is_gimple_reg (parm
))
1169 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1170 fprintf (dump_file
, " not a candidate, a reference which is "
1171 "not a gimple register (probably addressable)\n");
1174 if (is_va_list_type (type
))
1176 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1177 fprintf (dump_file
, " not a candidate, reference to "
1181 if (ptr_parm_has_nonarg_uses (node
, fun
, parm
, num
,
1182 &desc
->ptr_pt_count
))
1184 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1185 fprintf (dump_file
, " not a candidate, reference has "
1189 desc
->by_ref
= true;
1191 else if (!AGGREGATE_TYPE_P (type
))
1193 /* This is in an else branch because scalars passed by reference are
1194 still candidates to be passed by value. */
1195 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1196 fprintf (dump_file
, " not a candidate, not an aggregate\n");
1200 if (!COMPLETE_TYPE_P (type
))
1202 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1203 fprintf (dump_file
, " not a candidate, not a complete type\n");
1206 if (!tree_fits_uhwi_p (TYPE_SIZE (type
)))
1208 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1209 fprintf (dump_file
, " not a candidate, size not representable\n");
1212 unsigned HOST_WIDE_INT type_size
1213 = tree_to_uhwi (TYPE_SIZE (type
)) / BITS_PER_UNIT
;
1215 || type_size
>= ISRA_ARG_SIZE_LIMIT
)
1217 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1218 fprintf (dump_file
, " not a candidate, has zero or huge size\n");
1221 if (type_internals_preclude_sra_p (type
, &msg
))
1223 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1224 fprintf (dump_file
, " not a candidate, %s\n", msg
);
1228 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1229 fprintf (dump_file
, " is a candidate\n");
1232 desc
->split_candidate
= true;
1234 desc
->deref_index
= by_ref_count
++;
1239 /* Return pointer to descriptor of parameter DECL or NULL if it cannot be
1240 found, which happens if DECL is for a static chain. */
1242 static gensum_param_desc
*
1243 get_gensum_param_desc (tree decl
)
1245 gcc_checking_assert (TREE_CODE (decl
) == PARM_DECL
);
1246 gensum_param_desc
**slot
= decl2desc
->get (decl
);
1248 /* This can happen for static chains which we cannot handle so far. */
1250 gcc_checking_assert (*slot
);
1255 /* Remove parameter described by DESC from candidates for IPA-SRA splitting and
1256 write REASON to the dump file if there is one. */
1259 disqualify_split_candidate (gensum_param_desc
*desc
, const char *reason
)
1261 if (!desc
->split_candidate
)
1264 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1265 fprintf (dump_file
, "! Disqualifying parameter number %i - %s\n",
1266 desc
->param_number
, reason
);
1268 desc
->split_candidate
= false;
1271 /* Remove DECL from candidates for IPA-SRA and write REASON to the dump file if
1275 disqualify_split_candidate (tree decl
, const char *reason
)
1277 gensum_param_desc
*desc
= get_gensum_param_desc (decl
);
1279 disqualify_split_candidate (desc
, reason
);
1282 /* Allocate a new access to DESC and fill it in with OFFSET and SIZE. But
1283 first, check that there are not too many of them already. If so, do not
1284 allocate anything and return NULL. */
1286 static gensum_param_access
*
1287 allocate_access (gensum_param_desc
*desc
,
1288 HOST_WIDE_INT offset
, HOST_WIDE_INT size
)
1290 if (desc
->access_count
1291 == (unsigned) param_ipa_sra_max_replacements
)
1293 disqualify_split_candidate (desc
, "Too many replacement candidates");
1297 gensum_param_access
*access
1298 = (gensum_param_access
*) obstack_alloc (&gensum_obstack
,
1299 sizeof (gensum_param_access
));
1300 memset (access
, 0, sizeof (*access
));
1301 access
->offset
= offset
;
1302 access
->size
= size
;
1306 /* In what context scan_expr_access has been called, whether it deals with a
1307 load, a function argument, or a store. Please note that in rare
1308 circumstances when it is not clear if the access is a load or store,
1309 ISRA_CTX_STORE is used too. */
1311 enum isra_scan_context
{ISRA_CTX_LOAD
, ISRA_CTX_ARG
, ISRA_CTX_STORE
};
1313 /* Return an access describing memory access to the variable described by DESC
1314 at OFFSET with SIZE in context CTX, starting at pointer to the linked list
1315 at a certain tree level FIRST. Attempt to create it and put into the
1316 appropriate place in the access tree if does not exist, but fail and return
1317 NULL if there are already too many accesses, if it would create a partially
1318 overlapping access or if an access would end up within a pre-existing
1321 static gensum_param_access
*
1322 get_access_1 (gensum_param_desc
*desc
, gensum_param_access
**first
,
1323 HOST_WIDE_INT offset
, HOST_WIDE_INT size
, isra_scan_context ctx
)
1325 gensum_param_access
*access
= *first
, **ptr
= first
;
1329 /* No pre-existing access at this level, just create it. */
1330 gensum_param_access
*a
= allocate_access (desc
, offset
, size
);
1337 if (access
->offset
>= offset
+ size
)
1339 /* We want to squeeze it in front of the very first access, just do
1341 gensum_param_access
*r
= allocate_access (desc
, offset
, size
);
1344 r
->next_sibling
= access
;
1349 /* Skip all accesses that have to come before us until the next sibling is
1351 while (offset
>= access
->offset
+ access
->size
1352 && access
->next_sibling
1353 && access
->next_sibling
->offset
< offset
+ size
)
1355 ptr
= &access
->next_sibling
;
1356 access
= access
->next_sibling
;
1359 /* At this point we know we do not belong before access. */
1360 gcc_assert (access
->offset
< offset
+ size
);
1362 if (access
->offset
== offset
&& access
->size
== size
)
1363 /* We found what we were looking for. */
1366 if (access
->offset
<= offset
1367 && access
->offset
+ access
->size
>= offset
+ size
)
1369 /* We fit into access which is larger than us. We need to find/create
1370 something below access. But we only allow nesting in call
1375 return get_access_1 (desc
, &access
->first_child
, offset
, size
, ctx
);
1378 if (offset
<= access
->offset
1379 && offset
+ size
>= access
->offset
+ access
->size
)
1380 /* We are actually bigger than access, which fully fits into us, take its
1381 place and make all accesses fitting into it its children. */
1383 /* But first, we only allow nesting in call arguments so check if that is
1384 what we are trying to represent. */
1385 if (ctx
!= ISRA_CTX_ARG
)
1388 gensum_param_access
*r
= allocate_access (desc
, offset
, size
);
1391 r
->first_child
= access
;
1393 while (access
->next_sibling
1394 && access
->next_sibling
->offset
< offset
+ size
)
1395 access
= access
->next_sibling
;
1396 if (access
->offset
+ access
->size
> offset
+ size
)
1398 /* This must be a different access, which are sorted, so the
1399 following must be true and this signals a partial overlap. */
1400 gcc_assert (access
->offset
> offset
);
1404 r
->next_sibling
= access
->next_sibling
;
1405 access
->next_sibling
= NULL
;
1410 if (offset
>= access
->offset
+ access
->size
)
1412 /* We belong after access. */
1413 gensum_param_access
*r
= allocate_access (desc
, offset
, size
);
1416 r
->next_sibling
= access
->next_sibling
;
1417 access
->next_sibling
= r
;
1421 if (offset
< access
->offset
)
1423 /* We know the following, otherwise we would have created a
1425 gcc_checking_assert (offset
+ size
< access
->offset
+ access
->size
);
1429 if (offset
+ size
> access
->offset
+ access
->size
)
1432 gcc_checking_assert (offset
> access
->offset
);
1439 /* Return an access describing memory access to the variable described by DESC
1440 at OFFSET with SIZE in context CTX, mark it as used in context CTX. Attempt
1441 to create if it does not exist, but fail and return NULL if there are
1442 already too many accesses, if it would create a partially overlapping access
1443 or if an access would end up in a non-call access. */
1445 static gensum_param_access
*
1446 get_access (gensum_param_desc
*desc
, HOST_WIDE_INT offset
, HOST_WIDE_INT size
,
1447 isra_scan_context ctx
)
1449 gcc_checking_assert (desc
->split_candidate
);
1451 gensum_param_access
*access
= get_access_1 (desc
, &desc
->accesses
, offset
,
1455 disqualify_split_candidate (desc
,
1456 "Bad access overlap or too many accesses");
1462 case ISRA_CTX_STORE
:
1463 gcc_assert (!desc
->by_ref
);
1466 access
->nonarg
= true;
1475 /* Verify that parameter access tree starting with ACCESS is in good shape.
1476 PARENT_OFFSET and PARENT_SIZE are the corresponding fields of parent of
1477 ACCESS or zero if there is none. */
1480 verify_access_tree_1 (gensum_param_access
*access
, HOST_WIDE_INT parent_offset
,
1481 HOST_WIDE_INT parent_size
)
1485 gcc_assert (access
->offset
>= 0 && access
->size
>= 0);
1487 if (parent_size
!= 0)
1489 if (access
->offset
< parent_offset
)
1491 error ("Access offset before parent offset");
1494 if (access
->size
>= parent_size
)
1496 error ("Access size greater or equal to its parent size");
1499 if (access
->offset
+ access
->size
> parent_offset
+ parent_size
)
1501 error ("Access terminates outside of its parent");
1506 if (verify_access_tree_1 (access
->first_child
, access
->offset
,
1510 if (access
->next_sibling
1511 && (access
->next_sibling
->offset
< access
->offset
+ access
->size
))
1513 error ("Access overlaps with its sibling");
1517 access
= access
->next_sibling
;
1522 /* Verify that parameter access tree starting with ACCESS is in good shape,
1523 halt compilation and dump the tree to stderr if not. */
1526 isra_verify_access_tree (gensum_param_access
*access
)
1528 if (verify_access_tree_1 (access
, 0, 0))
1530 for (; access
; access
= access
->next_sibling
)
1531 dump_gensum_access (stderr
, access
, 2);
1532 internal_error ("IPA-SRA access verification failed");
1537 /* Callback of walk_stmt_load_store_addr_ops visit_addr used to determine
1538 GIMPLE_ASM operands with memory constrains which cannot be scalarized. */
1541 asm_visit_addr (gimple
*, tree op
, tree
, void *)
1543 op
= get_base_address (op
);
1545 && TREE_CODE (op
) == PARM_DECL
)
1546 disqualify_split_candidate (op
, "Non-scalarizable GIMPLE_ASM operand.");
1551 /* Mark a dereference of parameter identified by DESC of distance DIST in a
1552 basic block BB, unless the BB has already been marked as a potentially
1556 mark_param_dereference (gensum_param_desc
*desc
, HOST_WIDE_INT dist
,
1559 gcc_assert (desc
->by_ref
);
1560 gcc_checking_assert (desc
->split_candidate
);
1562 if (bitmap_bit_p (final_bbs
, bb
->index
))
1565 int idx
= bb
->index
* by_ref_count
+ desc
->deref_index
;
1566 if (bb_dereferences
[idx
] < dist
)
1567 bb_dereferences
[idx
] = dist
;
1570 /* Return true, if any potential replacements should use NEW_TYPE as opposed to
1571 previously recorded OLD_TYPE. */
1574 type_prevails_p (tree old_type
, tree new_type
)
1576 if (old_type
== new_type
)
1579 /* Non-aggregates are always better. */
1580 if (!is_gimple_reg_type (old_type
)
1581 && is_gimple_reg_type (new_type
))
1583 if (is_gimple_reg_type (old_type
)
1584 && !is_gimple_reg_type (new_type
))
1587 /* Prefer any complex or vector type over any other scalar type. */
1588 if (TREE_CODE (old_type
) != COMPLEX_TYPE
1589 && TREE_CODE (old_type
) != VECTOR_TYPE
1590 && (TREE_CODE (new_type
) == COMPLEX_TYPE
1591 || TREE_CODE (new_type
) == VECTOR_TYPE
))
1593 if ((TREE_CODE (old_type
) == COMPLEX_TYPE
1594 || TREE_CODE (old_type
) == VECTOR_TYPE
)
1595 && TREE_CODE (new_type
) != COMPLEX_TYPE
1596 && TREE_CODE (new_type
) != VECTOR_TYPE
)
1599 /* Use the integral type with the bigger precision. */
1600 if (INTEGRAL_TYPE_P (old_type
)
1601 && INTEGRAL_TYPE_P (new_type
))
1602 return (TYPE_PRECISION (new_type
) > TYPE_PRECISION (old_type
));
1604 /* Attempt to disregard any integral type with non-full precision. */
1605 if (INTEGRAL_TYPE_P (old_type
)
1606 && (TREE_INT_CST_LOW (TYPE_SIZE (old_type
))
1607 != TYPE_PRECISION (old_type
)))
1609 if (INTEGRAL_TYPE_P (new_type
)
1610 && (TREE_INT_CST_LOW (TYPE_SIZE (new_type
))
1611 != TYPE_PRECISION (new_type
)))
1613 /* Stabilize the selection. */
1614 return TYPE_UID (old_type
) < TYPE_UID (new_type
);
1617 /* When scanning an expression which is a call argument, this structure
1618 specifies the call and the position of the argument. */
1620 struct scan_call_info
1622 /* Call graph edge representing the call. */
1624 /* Total number of arguments in the call. */
1625 unsigned argument_count
;
1626 /* Number of the actual argument being scanned. */
1630 /* Record use of ACCESS which belongs to a parameter described by DESC in a
1631 call argument described by CALL_INFO. */
1634 record_nonregister_call_use (gensum_param_desc
*desc
,
1635 scan_call_info
*call_info
,
1636 unsigned unit_offset
, unsigned unit_size
)
1638 isra_call_summary
*csum
= call_sums
->get_create (call_info
->cs
);
1639 csum
->init_inputs (call_info
->argument_count
);
1641 isra_param_flow
*param_flow
= &csum
->m_arg_flow
[call_info
->arg_idx
];
1642 param_flow
->aggregate_pass_through
= true;
1643 set_single_param_flow_source (param_flow
, desc
->param_number
);
1644 param_flow
->unit_offset
= unit_offset
;
1645 param_flow
->unit_size
= unit_size
;
1649 /* Callback of walk_aliased_vdefs, just mark that there was a possible
1653 mark_maybe_modified (ao_ref
*, tree
, void *data
)
1655 bool *maybe_modified
= (bool *) data
;
1656 *maybe_modified
= true;
1660 /* Analyze expression EXPR from GIMPLE for accesses to parameters. CTX
1661 specifies whether EXPR is used in a load, store or as an argument call. BB
1662 must be the basic block in which expr resides. If CTX specifies call
1663 argument context, CALL_INFO must describe that call and argument position,
1664 otherwise it is ignored. */
1667 scan_expr_access (tree expr
, gimple
*stmt
, isra_scan_context ctx
,
1668 basic_block bb
, scan_call_info
*call_info
= NULL
)
1670 poly_int64 poffset
, psize
, pmax_size
;
1671 HOST_WIDE_INT offset
, size
, max_size
;
1676 if (TREE_CODE (expr
) == BIT_FIELD_REF
1677 || TREE_CODE (expr
) == IMAGPART_EXPR
1678 || TREE_CODE (expr
) == REALPART_EXPR
)
1679 expr
= TREE_OPERAND (expr
, 0);
1681 base
= get_ref_base_and_extent (expr
, &poffset
, &psize
, &pmax_size
, &reverse
);
1683 if (TREE_CODE (base
) == MEM_REF
)
1685 tree op
= TREE_OPERAND (base
, 0);
1686 if (TREE_CODE (op
) != SSA_NAME
1687 || !SSA_NAME_IS_DEFAULT_DEF (op
))
1689 base
= SSA_NAME_VAR (op
);
1694 if (TREE_CODE (base
) != PARM_DECL
)
1697 gensum_param_desc
*desc
= get_gensum_param_desc (base
);
1698 if (!desc
|| !desc
->split_candidate
)
1701 if (!poffset
.is_constant (&offset
)
1702 || !psize
.is_constant (&size
)
1703 || !pmax_size
.is_constant (&max_size
))
1705 disqualify_split_candidate (desc
, "Encountered a polynomial-sized "
1709 if (size
< 0 || size
!= max_size
)
1711 disqualify_split_candidate (desc
, "Encountered a variable sized access.");
1714 if (TREE_CODE (expr
) == COMPONENT_REF
1715 && DECL_BIT_FIELD (TREE_OPERAND (expr
, 1)))
1717 disqualify_split_candidate (desc
, "Encountered a bit-field access.");
1722 disqualify_split_candidate (desc
, "Encountered an access at a "
1723 "negative offset.");
1726 gcc_assert ((offset
% BITS_PER_UNIT
) == 0);
1727 gcc_assert ((size
% BITS_PER_UNIT
) == 0);
1728 if ((offset
/ BITS_PER_UNIT
) >= (UINT_MAX
- ISRA_ARG_SIZE_LIMIT
)
1729 || (size
/ BITS_PER_UNIT
) >= ISRA_ARG_SIZE_LIMIT
)
1731 disqualify_split_candidate (desc
, "Encountered an access with too big "
1736 tree type
= TREE_TYPE (expr
);
1737 unsigned int exp_align
= get_object_alignment (expr
);
1739 if (exp_align
< TYPE_ALIGN (type
))
1741 disqualify_split_candidate (desc
, "Underaligned access.");
1749 disqualify_split_candidate (desc
, "Dereferencing a non-reference.");
1752 else if (ctx
== ISRA_CTX_STORE
)
1754 disqualify_split_candidate (desc
, "Storing to data passed by "
1759 if (!aa_walking_limit
)
1761 disqualify_split_candidate (desc
, "Out of alias analysis step "
1766 gcc_checking_assert (gimple_vuse (stmt
));
1767 bool maybe_modified
= false;
1770 ao_ref_init (&ar
, expr
);
1771 bitmap visited
= BITMAP_ALLOC (NULL
);
1772 int walked
= walk_aliased_vdefs (&ar
, gimple_vuse (stmt
),
1773 mark_maybe_modified
, &maybe_modified
,
1774 &visited
, NULL
, aa_walking_limit
);
1775 BITMAP_FREE (visited
);
1778 gcc_assert (aa_walking_limit
> walked
);
1779 aa_walking_limit
= aa_walking_limit
- walked
;
1782 aa_walking_limit
= 0;
1783 if (maybe_modified
|| walked
< 0)
1785 disqualify_split_candidate (desc
, "Data passed by reference possibly "
1786 "modified through an alias.");
1790 mark_param_dereference (desc
, offset
+ size
, bb
);
1793 /* Pointer parameters with direct uses should have been ruled out by
1794 analyzing SSA default def when looking at the parameters. */
1795 gcc_assert (!desc
->by_ref
);
1797 gensum_param_access
*access
= get_access (desc
, offset
, size
, ctx
);
1801 if (ctx
== ISRA_CTX_ARG
)
1803 gcc_checking_assert (call_info
);
1806 record_nonregister_call_use (desc
, call_info
, offset
/ BITS_PER_UNIT
,
1807 size
/ BITS_PER_UNIT
);
1809 /* This is not a pass-through of a pointer, this is a use like any
1811 access
->nonarg
= true;
1816 access
->type
= type
;
1817 access
->alias_ptr_type
= reference_alias_ptr_type (expr
);
1818 access
->reverse
= reverse
;
1822 if (exp_align
< TYPE_ALIGN (access
->type
))
1824 disqualify_split_candidate (desc
, "Reference has lower alignment "
1825 "than a previous one.");
1828 if (access
->alias_ptr_type
!= reference_alias_ptr_type (expr
))
1830 disqualify_split_candidate (desc
, "Multiple alias pointer types.");
1833 if (access
->reverse
!= reverse
)
1835 disqualify_split_candidate (desc
, "Both normal and reverse "
1836 "scalar storage order.");
1840 && (AGGREGATE_TYPE_P (type
) || AGGREGATE_TYPE_P (access
->type
))
1841 && (TYPE_MAIN_VARIANT (access
->type
) != TYPE_MAIN_VARIANT (type
)))
1843 /* We need the same aggregate type on all accesses to be able to
1844 distinguish transformation spots from pass-through arguments in
1845 the transformation phase. */
1846 disqualify_split_candidate (desc
, "We do not support aggregate "
1851 if (type_prevails_p (access
->type
, type
))
1852 access
->type
= type
;
1856 /* Scan body function described by NODE and FUN and create access trees for
1860 scan_function (cgraph_node
*node
, struct function
*fun
)
1864 FOR_EACH_BB_FN (bb
, fun
)
1866 gimple_stmt_iterator gsi
;
1867 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1869 gimple
*stmt
= gsi_stmt (gsi
);
1871 if (stmt_can_throw_external (fun
, stmt
))
1872 bitmap_set_bit (final_bbs
, bb
->index
);
1873 switch (gimple_code (stmt
))
1877 tree t
= gimple_return_retval (as_a
<greturn
*> (stmt
));
1879 scan_expr_access (t
, stmt
, ISRA_CTX_LOAD
, bb
);
1880 bitmap_set_bit (final_bbs
, bb
->index
);
1885 if (gimple_assign_single_p (stmt
)
1886 && !gimple_clobber_p (stmt
))
1888 tree rhs
= gimple_assign_rhs1 (stmt
);
1889 scan_expr_access (rhs
, stmt
, ISRA_CTX_LOAD
, bb
);
1890 tree lhs
= gimple_assign_lhs (stmt
);
1891 scan_expr_access (lhs
, stmt
, ISRA_CTX_STORE
, bb
);
1897 unsigned argument_count
= gimple_call_num_args (stmt
);
1898 isra_scan_context ctx
= ISRA_CTX_ARG
;
1899 scan_call_info call_info
, *call_info_p
= &call_info
;
1900 if (gimple_call_internal_p (stmt
))
1903 ctx
= ISRA_CTX_LOAD
;
1904 internal_fn ifn
= gimple_call_internal_fn (stmt
);
1905 if (internal_store_fn_p (ifn
))
1906 ctx
= ISRA_CTX_STORE
;
1910 call_info
.cs
= node
->get_edge (stmt
);
1911 call_info
.argument_count
= argument_count
;
1914 for (unsigned i
= 0; i
< argument_count
; i
++)
1916 call_info
.arg_idx
= i
;
1917 scan_expr_access (gimple_call_arg (stmt
, i
), stmt
,
1918 ctx
, bb
, call_info_p
);
1921 tree lhs
= gimple_call_lhs (stmt
);
1923 scan_expr_access (lhs
, stmt
, ISRA_CTX_STORE
, bb
);
1924 int flags
= gimple_call_flags (stmt
);
1925 if (((flags
& (ECF_CONST
| ECF_PURE
)) == 0)
1926 || (flags
& ECF_LOOPING_CONST_OR_PURE
))
1927 bitmap_set_bit (final_bbs
, bb
->index
);
1933 gasm
*asm_stmt
= as_a
<gasm
*> (stmt
);
1934 walk_stmt_load_store_addr_ops (asm_stmt
, NULL
, NULL
, NULL
,
1936 bitmap_set_bit (final_bbs
, bb
->index
);
1938 for (unsigned i
= 0; i
< gimple_asm_ninputs (asm_stmt
); i
++)
1940 tree t
= TREE_VALUE (gimple_asm_input_op (asm_stmt
, i
));
1941 scan_expr_access (t
, stmt
, ISRA_CTX_LOAD
, bb
);
1943 for (unsigned i
= 0; i
< gimple_asm_noutputs (asm_stmt
); i
++)
1945 tree t
= TREE_VALUE (gimple_asm_output_op (asm_stmt
, i
));
1946 scan_expr_access (t
, stmt
, ISRA_CTX_STORE
, bb
);
1958 /* Return true if SSA_NAME NAME of function described by FUN is only used in
1959 return statements, or if results of any operations it is involved in are
1960 only used in return statements. ANALYZED is a bitmap that tracks which SSA
1961 names we have already started investigating. */
1964 ssa_name_only_returned_p (function
*fun
, tree name
, bitmap analyzed
)
1967 imm_use_iterator imm_iter
;
1970 FOR_EACH_IMM_USE_STMT (stmt
, imm_iter
, name
)
1972 if (is_gimple_debug (stmt
))
1975 if (gimple_code (stmt
) == GIMPLE_RETURN
)
1977 tree t
= gimple_return_retval (as_a
<greturn
*> (stmt
));
1984 else if (!stmt_unremovable_because_of_non_call_eh_p (fun
, stmt
)
1985 && ((is_gimple_assign (stmt
) && !gimple_has_volatile_ops (stmt
))
1986 || gimple_code (stmt
) == GIMPLE_PHI
))
1988 /* TODO: And perhaps for const function calls too? */
1990 if (gimple_code (stmt
) == GIMPLE_PHI
)
1991 lhs
= gimple_phi_result (stmt
);
1993 lhs
= gimple_assign_lhs (stmt
);
1995 if (TREE_CODE (lhs
) != SSA_NAME
)
2000 gcc_assert (!gimple_vdef (stmt
));
2001 if (bitmap_set_bit (analyzed
, SSA_NAME_VERSION (lhs
))
2002 && !ssa_name_only_returned_p (fun
, lhs
, analyzed
))
2017 /* Inspect the uses of the return value of the call associated with CS, and if
2018 it is not used or if it is only used to construct the return value of the
2019 caller, mark it as such in call or caller summary. Also check for
2020 misaligned arguments. */
2023 isra_analyze_call (cgraph_edge
*cs
)
2025 gcall
*call_stmt
= cs
->call_stmt
;
2026 unsigned count
= gimple_call_num_args (call_stmt
);
2027 isra_call_summary
*csum
= call_sums
->get_create (cs
);
2029 for (unsigned i
= 0; i
< count
; i
++)
2031 tree arg
= gimple_call_arg (call_stmt
, i
);
2032 if (is_gimple_reg (arg
))
2036 poly_int64 bitsize
, bitpos
;
2038 int unsignedp
, reversep
, volatilep
= 0;
2039 get_inner_reference (arg
, &bitsize
, &bitpos
, &offset
, &mode
,
2040 &unsignedp
, &reversep
, &volatilep
);
2041 if (!multiple_p (bitpos
, BITS_PER_UNIT
))
2043 csum
->m_bit_aligned_arg
= true;
2048 tree lhs
= gimple_call_lhs (call_stmt
);
2051 /* TODO: Also detect aggregates on a LHS of a call that are only returned
2052 from this function (without being read anywhere). */
2053 if (TREE_CODE (lhs
) == SSA_NAME
)
2055 bitmap analyzed
= BITMAP_ALLOC (NULL
);
2056 if (ssa_name_only_returned_p (DECL_STRUCT_FUNCTION (cs
->caller
->decl
),
2058 csum
->m_return_returned
= true;
2059 BITMAP_FREE (analyzed
);
2063 csum
->m_return_ignored
= true;
2066 /* Look at all calls going out of NODE, described also by IFS and perform all
2067 analyses necessary for IPA-SRA that are not done at body scan time or done
2068 even when body is not scanned because the function is not a candidate. */
2071 isra_analyze_all_outgoing_calls (cgraph_node
*node
)
2073 for (cgraph_edge
*cs
= node
->callees
; cs
; cs
= cs
->next_callee
)
2074 isra_analyze_call (cs
);
2075 for (cgraph_edge
*cs
= node
->indirect_calls
; cs
; cs
= cs
->next_callee
)
2076 isra_analyze_call (cs
);
2079 /* Dump a dereferences table with heading STR to file F. */
2082 dump_dereferences_table (FILE *f
, struct function
*fun
, const char *str
)
2086 fprintf (dump_file
, "%s", str
);
2087 FOR_BB_BETWEEN (bb
, ENTRY_BLOCK_PTR_FOR_FN (fun
),
2088 EXIT_BLOCK_PTR_FOR_FN (fun
), next_bb
)
2090 fprintf (f
, "%4i %i ", bb
->index
, bitmap_bit_p (final_bbs
, bb
->index
));
2091 if (bb
!= EXIT_BLOCK_PTR_FOR_FN (fun
))
2094 for (i
= 0; i
< by_ref_count
; i
++)
2096 int idx
= bb
->index
* by_ref_count
+ i
;
2097 fprintf (f
, " %4" HOST_WIDE_INT_PRINT
"d", bb_dereferences
[idx
]);
2102 fprintf (dump_file
, "\n");
2105 /* Propagate distances in bb_dereferences in the opposite direction than the
2106 control flow edges, in each step storing the maximum of the current value
2107 and the minimum of all successors. These steps are repeated until the table
2108 stabilizes. Note that BBs which might terminate the functions (according to
2109 final_bbs bitmap) never updated in this way. */
2112 propagate_dereference_distances (struct function
*fun
)
2116 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2117 dump_dereferences_table (dump_file
, fun
,
2118 "Dereference table before propagation:\n");
2120 auto_vec
<basic_block
> queue (last_basic_block_for_fn (fun
));
2121 queue
.quick_push (ENTRY_BLOCK_PTR_FOR_FN (fun
));
2122 FOR_EACH_BB_FN (bb
, fun
)
2124 queue
.quick_push (bb
);
2128 while (!queue
.is_empty ())
2132 bool change
= false;
2138 if (bitmap_bit_p (final_bbs
, bb
->index
))
2141 for (i
= 0; i
< by_ref_count
; i
++)
2143 int idx
= bb
->index
* by_ref_count
+ i
;
2145 HOST_WIDE_INT inh
= 0;
2147 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
2149 int succ_idx
= e
->dest
->index
* by_ref_count
+ i
;
2151 if (e
->dest
== EXIT_BLOCK_PTR_FOR_FN (fun
))
2157 inh
= bb_dereferences
[succ_idx
];
2159 else if (bb_dereferences
[succ_idx
] < inh
)
2160 inh
= bb_dereferences
[succ_idx
];
2163 if (!first
&& bb_dereferences
[idx
] < inh
)
2165 bb_dereferences
[idx
] = inh
;
2171 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
2176 e
->src
->aux
= e
->src
;
2177 queue
.quick_push (e
->src
);
2181 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2182 dump_dereferences_table (dump_file
, fun
,
2183 "Dereference table after propagation:\n");
2186 /* Perform basic checks on ACCESS to PARM described by DESC and all its
2187 children, return true if the parameter cannot be split, otherwise return
2188 true and update *TOTAL_SIZE and *ONLY_CALLS. ENTRY_BB_INDEX must be the
2189 index of the entry BB in the function of PARM. */
2192 check_gensum_access (tree parm
, gensum_param_desc
*desc
,
2193 gensum_param_access
*access
,
2194 HOST_WIDE_INT
*nonarg_acc_size
, bool *only_calls
,
2199 *only_calls
= false;
2200 *nonarg_acc_size
+= access
->size
;
2202 if (access
->first_child
)
2204 disqualify_split_candidate (desc
, "Overlapping non-call uses.");
2208 /* Do not decompose a non-BLKmode param in a way that would create
2209 BLKmode params. Especially for by-reference passing (thus,
2210 pointer-type param) this is hardly worthwhile. */
2211 if (DECL_MODE (parm
) != BLKmode
2212 && TYPE_MODE (access
->type
) == BLKmode
)
2214 disqualify_split_candidate (desc
, "Would convert a non-BLK to a BLK.");
2220 int idx
= (entry_bb_index
* by_ref_count
+ desc
->deref_index
);
2221 if ((access
->offset
+ access
->size
) > bb_dereferences
[idx
])
2223 disqualify_split_candidate (desc
, "Would create a possibly "
2224 "illegal dereference in a caller.");
2229 for (gensum_param_access
*ch
= access
->first_child
;
2231 ch
= ch
->next_sibling
)
2232 if (check_gensum_access (parm
, desc
, ch
, nonarg_acc_size
, only_calls
,
2239 /* Copy data from FROM and all of its children to a vector of accesses in IPA
2243 copy_accesses_to_ipa_desc (gensum_param_access
*from
, isra_param_desc
*desc
)
2245 param_access
*to
= ggc_cleared_alloc
<param_access
> ();
2246 gcc_checking_assert ((from
->offset
% BITS_PER_UNIT
) == 0);
2247 gcc_checking_assert ((from
->size
% BITS_PER_UNIT
) == 0);
2248 to
->unit_offset
= from
->offset
/ BITS_PER_UNIT
;
2249 to
->unit_size
= from
->size
/ BITS_PER_UNIT
;
2250 to
->type
= from
->type
;
2251 to
->alias_ptr_type
= from
->alias_ptr_type
;
2252 to
->certain
= from
->nonarg
;
2253 to
->reverse
= from
->reverse
;
2254 vec_safe_push (desc
->accesses
, to
);
2256 for (gensum_param_access
*ch
= from
->first_child
;
2258 ch
= ch
->next_sibling
)
2259 copy_accesses_to_ipa_desc (ch
, desc
);
2262 /* Analyze function body scan results stored in param_accesses and
2263 param_accesses, detect possible transformations and store information of
2264 those in function summary. NODE, FUN and IFS are all various structures
2265 describing the currently analyzed function. */
2268 process_scan_results (cgraph_node
*node
, struct function
*fun
,
2269 isra_func_summary
*ifs
,
2270 vec
<gensum_param_desc
> *param_descriptions
)
2272 bool check_pass_throughs
= false;
2273 bool dereferences_propagated
= false;
2274 tree parm
= DECL_ARGUMENTS (node
->decl
);
2275 unsigned param_count
= param_descriptions
->length();
2277 for (unsigned desc_index
= 0;
2278 desc_index
< param_count
;
2279 desc_index
++, parm
= DECL_CHAIN (parm
))
2281 gensum_param_desc
*desc
= &(*param_descriptions
)[desc_index
];
2282 if (!desc
->split_candidate
)
2286 isra_verify_access_tree (desc
->accesses
);
2288 if (!dereferences_propagated
2292 propagate_dereference_distances (fun
);
2293 dereferences_propagated
= true;
2296 HOST_WIDE_INT nonarg_acc_size
= 0;
2297 bool only_calls
= true;
2298 bool check_failed
= false;
2300 int entry_bb_index
= ENTRY_BLOCK_PTR_FOR_FN (fun
)->index
;
2301 for (gensum_param_access
*acc
= desc
->accesses
;
2303 acc
= acc
->next_sibling
)
2304 if (check_gensum_access (parm
, desc
, acc
, &nonarg_acc_size
, &only_calls
,
2307 check_failed
= true;
2314 desc
->locally_unused
= true;
2316 HOST_WIDE_INT cur_param_size
2317 = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (parm
)));
2318 HOST_WIDE_INT param_size_limit
;
2319 if (!desc
->by_ref
|| optimize_function_for_size_p (fun
))
2320 param_size_limit
= cur_param_size
;
2323 = opt_for_fn (node
->decl
,
2324 param_ipa_sra_ptr_growth_factor
) * cur_param_size
;
2325 if (nonarg_acc_size
> param_size_limit
2326 || (!desc
->by_ref
&& nonarg_acc_size
== param_size_limit
))
2328 disqualify_split_candidate (desc
, "Would result into a too big set "
2329 "of replacements.");
2333 /* create_parameter_descriptors makes sure unit sizes of all
2334 candidate parameters fit unsigned integers restricted to
2335 ISRA_ARG_SIZE_LIMIT. */
2336 desc
->param_size_limit
= param_size_limit
/ BITS_PER_UNIT
;
2337 desc
->nonarg_acc_size
= nonarg_acc_size
/ BITS_PER_UNIT
;
2338 if (desc
->split_candidate
&& desc
->ptr_pt_count
)
2340 gcc_assert (desc
->by_ref
);
2341 check_pass_throughs
= true;
2346 /* When a pointer parameter is passed-through to a callee, in which it is
2347 only used to read only one or a few items, we can attempt to transform it
2348 to obtaining and passing through the items instead of the pointer. But we
2349 must take extra care that 1) we do not introduce any segfault by moving
2350 dereferences above control flow and that 2) the data is not modified
2351 through an alias in this function. The IPA analysis must not introduce
2352 any accesses candidates unless it can prove both.
2354 The current solution is very crude as it consists of ensuring that the
2355 call postdominates entry BB and that the definition of VUSE of the call is
2356 default definition. TODO: For non-recursive callees in the same
2357 compilation unit we could do better by doing analysis in topological order
2358 an looking into access candidates of callees, using their alias_ptr_types
2359 to attempt real AA. We could also use the maximum known dereferenced
2360 offset in this function at IPA level.
2362 TODO: Measure the overhead and the effect of just being pessimistic.
2363 Maybe this is only -O3 material? */
2365 bool pdoms_calculated
= false;
2366 if (check_pass_throughs
)
2367 for (cgraph_edge
*cs
= node
->callees
; cs
; cs
= cs
->next_callee
)
2369 gcall
*call_stmt
= cs
->call_stmt
;
2370 tree vuse
= gimple_vuse (call_stmt
);
2372 /* If the callee is a const function, we don't get a VUSE. In such
2373 case there will be no memory accesses in the called function (or the
2374 const attribute is wrong) and then we just don't care. */
2375 bool uses_memory_as_obtained
= vuse
&& SSA_NAME_IS_DEFAULT_DEF (vuse
);
2377 unsigned count
= gimple_call_num_args (call_stmt
);
2378 isra_call_summary
*csum
= call_sums
->get_create (cs
);
2379 csum
->init_inputs (count
);
2380 csum
->m_before_any_store
= uses_memory_as_obtained
;
2381 for (unsigned argidx
= 0; argidx
< count
; argidx
++)
2383 if (!csum
->m_arg_flow
[argidx
].pointer_pass_through
)
2386 = get_single_param_flow_source (&csum
->m_arg_flow
[argidx
]);
2387 gensum_param_desc
*desc
= &(*param_descriptions
)[pidx
];
2388 if (!desc
->split_candidate
)
2390 csum
->m_arg_flow
[argidx
].pointer_pass_through
= false;
2393 if (!uses_memory_as_obtained
)
2396 /* Post-dominator check placed last, hoping that it usually won't
2398 if (!pdoms_calculated
)
2400 gcc_checking_assert (cfun
);
2401 connect_infinite_loops_to_exit ();
2402 calculate_dominance_info (CDI_POST_DOMINATORS
);
2403 pdoms_calculated
= true;
2405 if (dominated_by_p (CDI_POST_DOMINATORS
,
2406 gimple_bb (call_stmt
),
2407 single_succ (ENTRY_BLOCK_PTR_FOR_FN (fun
))))
2408 csum
->m_arg_flow
[argidx
].safe_to_import_accesses
= true;
2412 if (pdoms_calculated
)
2414 free_dominance_info (CDI_POST_DOMINATORS
);
2415 remove_fake_exit_edges ();
2418 /* TODO: Add early exit if we disqualified everything. This also requires
2419 that we either relax the restriction that
2420 ipa_param_adjustments.m_always_copy_start must be the number of PARM_DECLs
2421 or store the number of parameters to IPA-SRA function summary and use that
2422 when just removing params. */
2424 vec_safe_reserve_exact (ifs
->m_parameters
, param_count
);
2425 ifs
->m_parameters
->quick_grow_cleared (param_count
);
2426 for (unsigned desc_index
= 0; desc_index
< param_count
; desc_index
++)
2428 gensum_param_desc
*s
= &(*param_descriptions
)[desc_index
];
2429 isra_param_desc
*d
= &(*ifs
->m_parameters
)[desc_index
];
2431 d
->param_size_limit
= s
->param_size_limit
;
2432 d
->size_reached
= s
->nonarg_acc_size
;
2433 d
->locally_unused
= s
->locally_unused
;
2434 d
->split_candidate
= s
->split_candidate
;
2435 d
->by_ref
= s
->by_ref
;
2437 for (gensum_param_access
*acc
= s
->accesses
;
2439 acc
= acc
->next_sibling
)
2440 copy_accesses_to_ipa_desc (acc
, d
);
2444 dump_isra_param_descriptors (dump_file
, node
->decl
, ifs
);
2447 /* Return true if there are any overlaps among certain accesses of DESC. If
2448 non-NULL, set *CERTAIN_ACCESS_PRESENT_P upon encountering a certain access
2449 too. DESC is assumed to be a split candidate that is not locally
2453 overlapping_certain_accesses_p (isra_param_desc
*desc
,
2454 bool *certain_access_present_p
)
2456 unsigned pclen
= vec_safe_length (desc
->accesses
);
2457 for (unsigned i
= 0; i
< pclen
; i
++)
2459 param_access
*a1
= (*desc
->accesses
)[i
];
2463 if (certain_access_present_p
)
2464 *certain_access_present_p
= true;
2465 for (unsigned j
= i
+ 1; j
< pclen
; j
++)
2467 param_access
*a2
= (*desc
->accesses
)[j
];
2469 && a1
->unit_offset
< a2
->unit_offset
+ a2
->unit_size
2470 && a1
->unit_offset
+ a1
->unit_size
> a2
->unit_offset
)
2477 /* Check for any overlaps of certain param accesses among splitting candidates
2478 and signal an ICE if there are any. If CERTAIN_MUST_EXIST is set, also
2479 check that used splitting candidates have at least one certain access. */
2482 verify_splitting_accesses (cgraph_node
*node
, bool certain_must_exist
)
2484 isra_func_summary
*ifs
= func_sums
->get (node
);
2485 if (!ifs
|| !ifs
->m_candidate
)
2487 unsigned param_count
= vec_safe_length (ifs
->m_parameters
);
2488 for (unsigned pidx
= 0; pidx
< param_count
; pidx
++)
2490 isra_param_desc
*desc
= &(*ifs
->m_parameters
)[pidx
];
2491 if (!desc
->split_candidate
|| desc
->locally_unused
)
2494 bool certain_access_present
= !certain_must_exist
;
2495 if (overlapping_certain_accesses_p (desc
, &certain_access_present
))
2496 internal_error ("function %qs, parameter %u, has IPA-SRA accesses "
2497 "which overlap", node
->dump_name (), pidx
);
2498 if (!certain_access_present
)
2499 internal_error ("function %qs, parameter %u, is used but does not "
2500 "have any certain IPA-SRA access",
2501 node
->dump_name (), pidx
);
2505 /* Intraprocedural part of IPA-SRA analysis. Scan bodies of all functions in
2506 this compilation unit and create summary structures describing IPA-SRA
2507 opportunities and constraints in them. */
2510 ipa_sra_generate_summary (void)
2512 struct cgraph_node
*node
;
2514 gcc_checking_assert (!func_sums
);
2515 gcc_checking_assert (!call_sums
);
2517 = (new (ggc_alloc_no_dtor
<ipa_sra_function_summaries
> ())
2518 ipa_sra_function_summaries (symtab
, true));
2519 call_sums
= new ipa_sra_call_summaries (symtab
);
2521 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node
)
2522 ipa_sra_summarize_function (node
);
2526 /* Write intraprocedural analysis information about E and all of its outgoing
2527 edges into a stream for LTO WPA. */
2530 isra_write_edge_summary (output_block
*ob
, cgraph_edge
*e
)
2532 isra_call_summary
*csum
= call_sums
->get (e
);
2533 unsigned input_count
= csum
->m_arg_flow
.length ();
2534 streamer_write_uhwi (ob
, input_count
);
2535 for (unsigned i
= 0; i
< input_count
; i
++)
2537 isra_param_flow
*ipf
= &csum
->m_arg_flow
[i
];
2538 streamer_write_hwi (ob
, ipf
->length
);
2539 bitpack_d bp
= bitpack_create (ob
->main_stream
);
2540 for (int j
= 0; j
< ipf
->length
; j
++)
2541 bp_pack_value (&bp
, ipf
->inputs
[j
], 8);
2542 bp_pack_value (&bp
, ipf
->aggregate_pass_through
, 1);
2543 bp_pack_value (&bp
, ipf
->pointer_pass_through
, 1);
2544 bp_pack_value (&bp
, ipf
->safe_to_import_accesses
, 1);
2545 streamer_write_bitpack (&bp
);
2546 streamer_write_uhwi (ob
, ipf
->unit_offset
);
2547 streamer_write_uhwi (ob
, ipf
->unit_size
);
2549 bitpack_d bp
= bitpack_create (ob
->main_stream
);
2550 bp_pack_value (&bp
, csum
->m_return_ignored
, 1);
2551 bp_pack_value (&bp
, csum
->m_return_returned
, 1);
2552 bp_pack_value (&bp
, csum
->m_bit_aligned_arg
, 1);
2553 bp_pack_value (&bp
, csum
->m_before_any_store
, 1);
2554 streamer_write_bitpack (&bp
);
2557 /* Write intraprocedural analysis information about NODE and all of its outgoing
2558 edges into a stream for LTO WPA. */
2561 isra_write_node_summary (output_block
*ob
, cgraph_node
*node
)
2563 isra_func_summary
*ifs
= func_sums
->get (node
);
2564 lto_symtab_encoder_t encoder
= ob
->decl_state
->symtab_node_encoder
;
2565 int node_ref
= lto_symtab_encoder_encode (encoder
, node
);
2566 streamer_write_uhwi (ob
, node_ref
);
2568 unsigned param_desc_count
= vec_safe_length (ifs
->m_parameters
);
2569 streamer_write_uhwi (ob
, param_desc_count
);
2570 for (unsigned i
= 0; i
< param_desc_count
; i
++)
2572 isra_param_desc
*desc
= &(*ifs
->m_parameters
)[i
];
2573 unsigned access_count
= vec_safe_length (desc
->accesses
);
2574 streamer_write_uhwi (ob
, access_count
);
2575 for (unsigned j
= 0; j
< access_count
; j
++)
2577 param_access
*acc
= (*desc
->accesses
)[j
];
2578 stream_write_tree (ob
, acc
->type
, true);
2579 stream_write_tree (ob
, acc
->alias_ptr_type
, true);
2580 streamer_write_uhwi (ob
, acc
->unit_offset
);
2581 streamer_write_uhwi (ob
, acc
->unit_size
);
2582 bitpack_d bp
= bitpack_create (ob
->main_stream
);
2583 bp_pack_value (&bp
, acc
->certain
, 1);
2584 bp_pack_value (&bp
, acc
->reverse
, 1);
2585 streamer_write_bitpack (&bp
);
2587 streamer_write_uhwi (ob
, desc
->param_size_limit
);
2588 streamer_write_uhwi (ob
, desc
->size_reached
);
2589 bitpack_d bp
= bitpack_create (ob
->main_stream
);
2590 bp_pack_value (&bp
, desc
->locally_unused
, 1);
2591 bp_pack_value (&bp
, desc
->split_candidate
, 1);
2592 bp_pack_value (&bp
, desc
->by_ref
, 1);
2593 streamer_write_bitpack (&bp
);
2595 bitpack_d bp
= bitpack_create (ob
->main_stream
);
2596 bp_pack_value (&bp
, ifs
->m_candidate
, 1);
2597 bp_pack_value (&bp
, ifs
->m_returns_value
, 1);
2598 bp_pack_value (&bp
, ifs
->m_return_ignored
, 1);
2599 gcc_assert (!ifs
->m_queued
);
2600 streamer_write_bitpack (&bp
);
2602 for (cgraph_edge
*e
= node
->callees
; e
; e
= e
->next_callee
)
2603 isra_write_edge_summary (ob
, e
);
2604 for (cgraph_edge
*e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
2605 isra_write_edge_summary (ob
, e
);
2608 /* Write intraprocedural analysis information into a stream for LTO WPA. */
2611 ipa_sra_write_summary (void)
2613 if (!func_sums
|| !call_sums
)
2616 struct output_block
*ob
= create_output_block (LTO_section_ipa_sra
);
2617 lto_symtab_encoder_t encoder
= ob
->decl_state
->symtab_node_encoder
;
2620 unsigned int count
= 0;
2621 lto_symtab_encoder_iterator lsei
;
2622 for (lsei
= lsei_start_function_in_partition (encoder
);
2624 lsei_next_function_in_partition (&lsei
))
2626 cgraph_node
*node
= lsei_cgraph_node (lsei
);
2627 if (node
->has_gimple_body_p ()
2628 && func_sums
->get (node
) != NULL
)
2631 streamer_write_uhwi (ob
, count
);
2633 /* Process all of the functions. */
2634 for (lsei
= lsei_start_function_in_partition (encoder
); !lsei_end_p (lsei
);
2635 lsei_next_function_in_partition (&lsei
))
2637 cgraph_node
*node
= lsei_cgraph_node (lsei
);
2638 if (node
->has_gimple_body_p ()
2639 && func_sums
->get (node
) != NULL
)
2640 isra_write_node_summary (ob
, node
);
2642 streamer_write_char_stream (ob
->main_stream
, 0);
2643 produce_asm (ob
, NULL
);
2644 destroy_output_block (ob
);
2647 /* Read intraprocedural analysis information about E and all of its outgoing
2648 edges into a stream for LTO WPA. */
2651 isra_read_edge_summary (struct lto_input_block
*ib
, cgraph_edge
*cs
)
2653 isra_call_summary
*csum
= call_sums
->get_create (cs
);
2654 unsigned input_count
= streamer_read_uhwi (ib
);
2655 csum
->init_inputs (input_count
);
2656 for (unsigned i
= 0; i
< input_count
; i
++)
2658 isra_param_flow
*ipf
= &csum
->m_arg_flow
[i
];
2659 ipf
->length
= streamer_read_hwi (ib
);
2660 bitpack_d bp
= streamer_read_bitpack (ib
);
2661 for (int j
= 0; j
< ipf
->length
; j
++)
2662 ipf
->inputs
[j
] = bp_unpack_value (&bp
, 8);
2663 ipf
->aggregate_pass_through
= bp_unpack_value (&bp
, 1);
2664 ipf
->pointer_pass_through
= bp_unpack_value (&bp
, 1);
2665 ipf
->safe_to_import_accesses
= bp_unpack_value (&bp
, 1);
2666 ipf
->unit_offset
= streamer_read_uhwi (ib
);
2667 ipf
->unit_size
= streamer_read_uhwi (ib
);
2669 bitpack_d bp
= streamer_read_bitpack (ib
);
2670 csum
->m_return_ignored
= bp_unpack_value (&bp
, 1);
2671 csum
->m_return_returned
= bp_unpack_value (&bp
, 1);
2672 csum
->m_bit_aligned_arg
= bp_unpack_value (&bp
, 1);
2673 csum
->m_before_any_store
= bp_unpack_value (&bp
, 1);
2676 /* Read intraprocedural analysis information about NODE and all of its outgoing
2677 edges into a stream for LTO WPA. */
2680 isra_read_node_info (struct lto_input_block
*ib
, cgraph_node
*node
,
2681 struct data_in
*data_in
)
2683 isra_func_summary
*ifs
= func_sums
->get_create (node
);
2684 unsigned param_desc_count
= streamer_read_uhwi (ib
);
2685 if (param_desc_count
> 0)
2687 vec_safe_reserve_exact (ifs
->m_parameters
, param_desc_count
);
2688 ifs
->m_parameters
->quick_grow_cleared (param_desc_count
);
2690 for (unsigned i
= 0; i
< param_desc_count
; i
++)
2692 isra_param_desc
*desc
= &(*ifs
->m_parameters
)[i
];
2693 unsigned access_count
= streamer_read_uhwi (ib
);
2694 for (unsigned j
= 0; j
< access_count
; j
++)
2696 param_access
*acc
= ggc_cleared_alloc
<param_access
> ();
2697 acc
->type
= stream_read_tree (ib
, data_in
);
2698 acc
->alias_ptr_type
= stream_read_tree (ib
, data_in
);
2699 acc
->unit_offset
= streamer_read_uhwi (ib
);
2700 acc
->unit_size
= streamer_read_uhwi (ib
);
2701 bitpack_d bp
= streamer_read_bitpack (ib
);
2702 acc
->certain
= bp_unpack_value (&bp
, 1);
2703 acc
->reverse
= bp_unpack_value (&bp
, 1);
2704 vec_safe_push (desc
->accesses
, acc
);
2706 desc
->param_size_limit
= streamer_read_uhwi (ib
);
2707 desc
->size_reached
= streamer_read_uhwi (ib
);
2708 bitpack_d bp
= streamer_read_bitpack (ib
);
2709 desc
->locally_unused
= bp_unpack_value (&bp
, 1);
2710 desc
->split_candidate
= bp_unpack_value (&bp
, 1);
2711 desc
->by_ref
= bp_unpack_value (&bp
, 1);
2713 bitpack_d bp
= streamer_read_bitpack (ib
);
2714 ifs
->m_candidate
= bp_unpack_value (&bp
, 1);
2715 ifs
->m_returns_value
= bp_unpack_value (&bp
, 1);
2716 ifs
->m_return_ignored
= bp_unpack_value (&bp
, 1);
2719 for (cgraph_edge
*e
= node
->callees
; e
; e
= e
->next_callee
)
2720 isra_read_edge_summary (ib
, e
);
2721 for (cgraph_edge
*e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
2722 isra_read_edge_summary (ib
, e
);
2725 /* Read IPA-SRA summaries from a section in file FILE_DATA of length LEN with
2726 data DATA. TODO: This function was copied almost verbatim from ipa-prop.cc,
2727 it should be possible to unify them somehow. */
2730 isra_read_summary_section (struct lto_file_decl_data
*file_data
,
2731 const char *data
, size_t len
)
2733 const struct lto_function_header
*header
=
2734 (const struct lto_function_header
*) data
;
2735 const int cfg_offset
= sizeof (struct lto_function_header
);
2736 const int main_offset
= cfg_offset
+ header
->cfg_size
;
2737 const int string_offset
= main_offset
+ header
->main_size
;
2738 struct data_in
*data_in
;
2742 lto_input_block
ib_main ((const char *) data
+ main_offset
,
2743 header
->main_size
, file_data
->mode_table
);
2746 lto_data_in_create (file_data
, (const char *) data
+ string_offset
,
2747 header
->string_size
, vNULL
);
2748 count
= streamer_read_uhwi (&ib_main
);
2750 for (i
= 0; i
< count
; i
++)
2753 struct cgraph_node
*node
;
2754 lto_symtab_encoder_t encoder
;
2756 index
= streamer_read_uhwi (&ib_main
);
2757 encoder
= file_data
->symtab_node_encoder
;
2758 node
= dyn_cast
<cgraph_node
*> (lto_symtab_encoder_deref (encoder
,
2760 gcc_assert (node
->definition
);
2761 isra_read_node_info (&ib_main
, node
, data_in
);
2763 lto_free_section_data (file_data
, LTO_section_ipa_sra
, NULL
, data
,
2765 lto_data_in_delete (data_in
);
2768 /* Read intraprocedural analysis information into a stream for LTO WPA. */
2771 ipa_sra_read_summary (void)
2773 struct lto_file_decl_data
**file_data_vec
= lto_get_file_decl_data ();
2774 struct lto_file_decl_data
*file_data
;
2777 gcc_checking_assert (!func_sums
);
2778 gcc_checking_assert (!call_sums
);
2780 = (new (ggc_alloc_no_dtor
<ipa_sra_function_summaries
> ())
2781 ipa_sra_function_summaries (symtab
, true));
2782 call_sums
= new ipa_sra_call_summaries (symtab
);
2784 while ((file_data
= file_data_vec
[j
++]))
2788 = lto_get_summary_section_data (file_data
, LTO_section_ipa_sra
, &len
);
2790 isra_read_summary_section (file_data
, data
, len
);
2794 /* Dump all IPA-SRA summary data for all cgraph nodes and edges to file F. */
2797 ipa_sra_dump_all_summaries (FILE *f
)
2800 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node
)
2802 fprintf (f
, "\nSummary for node %s:\n", node
->dump_name ());
2804 isra_func_summary
*ifs
= func_sums
->get (node
);
2806 fprintf (f
, " Function does not have any associated IPA-SRA "
2810 if (!ifs
->m_candidate
)
2812 fprintf (f
, " Not a candidate function\n");
2815 if (ifs
->m_returns_value
)
2816 fprintf (f
, " Returns value\n");
2817 if (vec_safe_is_empty (ifs
->m_parameters
))
2818 fprintf (f
, " No parameter information. \n");
2820 for (unsigned i
= 0; i
< ifs
->m_parameters
->length (); ++i
)
2822 fprintf (f
, " Descriptor for parameter %i:\n", i
);
2823 dump_isra_param_descriptor (f
, &(*ifs
->m_parameters
)[i
]);
2828 struct cgraph_edge
*cs
;
2829 for (cs
= node
->callees
; cs
; cs
= cs
->next_callee
)
2831 fprintf (f
, " Summary for edge %s->%s:\n", cs
->caller
->dump_name (),
2832 cs
->callee
->dump_name ());
2833 isra_call_summary
*csum
= call_sums
->get (cs
);
2837 fprintf (f
, " Call summary is MISSING!\n");
2841 fprintf (f
, "\n\n");
2844 /* Perform function-scope viability tests that can be only made at IPA level
2845 and return false if the function is deemed unsuitable for IPA-SRA. */
2848 ipa_sra_ipa_function_checks (cgraph_node
*node
)
2850 if (!node
->can_be_local_p ())
2853 fprintf (dump_file
, "Function %s disqualified because it cannot be "
2854 "made local.\n", node
->dump_name ());
2857 if (!node
->can_change_signature
)
2860 fprintf (dump_file
, "Function can not change signature.\n");
2867 /* Issues found out by check_callers_for_issues. */
2869 struct caller_issues
2871 /* The candidate being considered. */
2872 cgraph_node
*candidate
;
2873 /* There is a thunk among callers. */
2875 /* Call site with no available information. */
2876 bool unknown_callsite
;
2877 /* Call from outside the candidate's comdat group. */
2878 bool call_from_outside_comdat
;
2879 /* There is a bit-aligned load into one of non-gimple-typed arguments. */
2880 bool bit_aligned_aggregate_argument
;
2883 /* Worker for call_for_symbol_and_aliases, set any flags of passed caller_issues
2887 check_for_caller_issues (struct cgraph_node
*node
, void *data
)
2889 struct caller_issues
*issues
= (struct caller_issues
*) data
;
2891 for (cgraph_edge
*cs
= node
->callers
; cs
; cs
= cs
->next_caller
)
2893 if (cs
->caller
->thunk
)
2895 issues
->thunk
= true;
2896 /* TODO: We should be able to process at least some types of
2900 if (issues
->candidate
->calls_comdat_local
2901 && issues
->candidate
->same_comdat_group
2902 && !issues
->candidate
->in_same_comdat_group_p (cs
->caller
))
2904 issues
->call_from_outside_comdat
= true;
2908 isra_call_summary
*csum
= call_sums
->get (cs
);
2911 issues
->unknown_callsite
= true;
2915 if (csum
->m_bit_aligned_arg
)
2916 issues
->bit_aligned_aggregate_argument
= true;
2921 /* Look at all incoming edges to NODE, including aliases and thunks and look
2922 for problems. Return true if NODE type should not be modified at all. */
2925 check_all_callers_for_issues (cgraph_node
*node
)
2927 struct caller_issues issues
;
2928 memset (&issues
, 0, sizeof (issues
));
2929 issues
.candidate
= node
;
2931 node
->call_for_symbol_and_aliases (check_for_caller_issues
, &issues
, true);
2932 if (issues
.unknown_callsite
)
2934 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2935 fprintf (dump_file
, "A call of %s has not been analyzed. Disabling "
2936 "all modifications.\n", node
->dump_name ());
2939 /* TODO: We should be able to process at least some types of thunks. */
2942 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2943 fprintf (dump_file
, "A call of %s is through thunk, which are not"
2944 " handled yet. Disabling all modifications.\n",
2945 node
->dump_name ());
2948 if (issues
.call_from_outside_comdat
)
2951 fprintf (dump_file
, "Function would become private comdat called "
2952 "outside of its comdat group.\n");
2956 if (issues
.bit_aligned_aggregate_argument
)
2958 /* Let's only remove parameters/return values from such functions.
2959 TODO: We could only prevent splitting the problematic parameters if
2960 anybody thinks it is worth it. */
2961 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2962 fprintf (dump_file
, "A call of %s has bit-aligned aggregate argument,"
2963 " disabling parameter splitting.\n", node
->dump_name ());
2965 isra_func_summary
*ifs
= func_sums
->get (node
);
2966 gcc_checking_assert (ifs
);
2967 unsigned param_count
= vec_safe_length (ifs
->m_parameters
);
2968 for (unsigned i
= 0; i
< param_count
; i
++)
2969 (*ifs
->m_parameters
)[i
].split_candidate
= false;
2974 /* Find the access with corresponding OFFSET and SIZE among accesses in
2975 PARAM_DESC and return it or NULL if such an access is not there. */
2977 static param_access
*
2978 find_param_access (isra_param_desc
*param_desc
, unsigned offset
, unsigned size
)
2980 unsigned pclen
= vec_safe_length (param_desc
->accesses
);
2982 /* The search is linear but the number of stored accesses is bound by
2983 PARAM_IPA_SRA_MAX_REPLACEMENTS, so most probably 8. */
2985 for (unsigned i
= 0; i
< pclen
; i
++)
2986 if ((*param_desc
->accesses
)[i
]->unit_offset
== offset
2987 && (*param_desc
->accesses
)[i
]->unit_size
== size
)
2988 return (*param_desc
->accesses
)[i
];
2993 /* Return iff the total size of definite replacement SIZE would violate the
2994 limit set for it in PARAM. */
2997 size_would_violate_limit_p (isra_param_desc
*desc
, unsigned size
)
2999 unsigned limit
= desc
->param_size_limit
;
3001 || (!desc
->by_ref
&& size
== limit
))
3006 /* Increase reached size of DESC by SIZE or disqualify it if it would violate
3007 the set limit. IDX is the parameter number which is dumped when
3011 bump_reached_size (isra_param_desc
*desc
, unsigned size
, unsigned idx
)
3013 unsigned after
= desc
->size_reached
+ size
;
3014 if (size_would_violate_limit_p (desc
, after
))
3016 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3017 fprintf (dump_file
, " ...size limit reached, disqualifying "
3018 "candidate parameter %u\n", idx
);
3019 desc
->split_candidate
= false;
3022 desc
->size_reached
= after
;
3025 /* Take all actions required to deal with an edge CS that represents a call to
3026 an unknown or un-analyzed function, for both parameter removal and
3030 process_edge_to_unknown_caller (cgraph_edge
*cs
)
3032 isra_func_summary
*from_ifs
= func_sums
->get (cs
->caller
);
3033 gcc_checking_assert (from_ifs
);
3034 isra_call_summary
*csum
= call_sums
->get (cs
);
3036 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3037 fprintf (dump_file
, "Processing an edge to an unknown caller from %s:\n",
3038 cs
->caller
->dump_name ());
3040 unsigned args_count
= csum
->m_arg_flow
.length ();
3041 for (unsigned i
= 0; i
< args_count
; i
++)
3043 isra_param_flow
*ipf
= &csum
->m_arg_flow
[i
];
3045 if (ipf
->pointer_pass_through
)
3047 isra_param_desc
*param_desc
3048 = &(*from_ifs
->m_parameters
)[get_single_param_flow_source (ipf
)];
3049 param_desc
->locally_unused
= false;
3050 param_desc
->split_candidate
= false;
3053 if (ipf
->aggregate_pass_through
)
3055 unsigned idx
= get_single_param_flow_source (ipf
);
3056 isra_param_desc
*param_desc
= &(*from_ifs
->m_parameters
)[idx
];
3058 param_desc
->locally_unused
= false;
3059 if (!param_desc
->split_candidate
)
3061 gcc_assert (!param_desc
->by_ref
);
3062 param_access
*pacc
= find_param_access (param_desc
, ipf
->unit_offset
,
3064 gcc_checking_assert (pacc
);
3065 pacc
->certain
= true;
3066 if (overlapping_certain_accesses_p (param_desc
, NULL
))
3068 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3069 fprintf (dump_file
, " ...leading to overlap, "
3070 " disqualifying candidate parameter %u\n",
3072 param_desc
->split_candidate
= false;
3075 bump_reached_size (param_desc
, pacc
->unit_size
, idx
);
3076 ipf
->aggregate_pass_through
= false;
3080 for (int j
= 0; j
< ipf
->length
; j
++)
3082 int input_idx
= ipf
->inputs
[j
];
3083 (*from_ifs
->m_parameters
)[input_idx
].locally_unused
= false;
3088 /* Propagate parameter removal information through cross-SCC edge CS,
3089 i.e. decrease the use count in the caller parameter descriptor for each use
3093 param_removal_cross_scc_edge (cgraph_edge
*cs
)
3095 enum availability availability
;
3096 cgraph_node
*callee
= cs
->callee
->function_symbol (&availability
);
3097 isra_func_summary
*to_ifs
= func_sums
->get (callee
);
3098 if (!to_ifs
|| !to_ifs
->m_candidate
3099 || (availability
< AVAIL_AVAILABLE
)
3100 || vec_safe_is_empty (to_ifs
->m_parameters
))
3102 process_edge_to_unknown_caller (cs
);
3105 isra_func_summary
*from_ifs
= func_sums
->get (cs
->caller
);
3106 gcc_checking_assert (from_ifs
);
3108 isra_call_summary
*csum
= call_sums
->get (cs
);
3109 unsigned args_count
= csum
->m_arg_flow
.length ();
3110 unsigned param_count
= vec_safe_length (to_ifs
->m_parameters
);
3112 for (unsigned i
= 0; i
< args_count
; i
++)
3114 bool unused_in_callee
;
3115 if (i
< param_count
)
3116 unused_in_callee
= (*to_ifs
->m_parameters
)[i
].locally_unused
;
3118 unused_in_callee
= false;
3120 if (!unused_in_callee
)
3122 isra_param_flow
*ipf
= &csum
->m_arg_flow
[i
];
3123 for (int j
= 0; j
< ipf
->length
; j
++)
3125 int input_idx
= ipf
->inputs
[j
];
3126 (*from_ifs
->m_parameters
)[input_idx
].locally_unused
= false;
3132 /* Unless it is already there, push NODE which is also described by IFS to
3136 isra_push_node_to_stack (cgraph_node
*node
, isra_func_summary
*ifs
,
3137 vec
<cgraph_node
*> *stack
)
3141 ifs
->m_queued
= true;
3142 stack
->safe_push (node
);
3146 /* If parameter with index INPUT_IDX is marked as locally unused, mark it as
3147 used and push CALLER on STACK. */
3150 isra_mark_caller_param_used (isra_func_summary
*from_ifs
, int input_idx
,
3151 cgraph_node
*caller
, vec
<cgraph_node
*> *stack
)
3153 if ((*from_ifs
->m_parameters
)[input_idx
].locally_unused
)
3155 (*from_ifs
->m_parameters
)[input_idx
].locally_unused
= false;
3156 isra_push_node_to_stack (caller
, from_ifs
, stack
);
3161 /* Propagate information that any parameter is not used only locally within a
3162 SCC across CS to the caller, which must be in the same SCC as the
3163 callee. Push any callers that need to be re-processed to STACK. */
3166 propagate_used_across_scc_edge (cgraph_edge
*cs
, vec
<cgraph_node
*> *stack
)
3168 isra_func_summary
*from_ifs
= func_sums
->get (cs
->caller
);
3169 if (!from_ifs
|| vec_safe_is_empty (from_ifs
->m_parameters
))
3172 isra_call_summary
*csum
= call_sums
->get (cs
);
3173 gcc_checking_assert (csum
);
3174 unsigned args_count
= csum
->m_arg_flow
.length ();
3175 enum availability availability
;
3176 cgraph_node
*callee
= cs
->callee
->function_symbol (&availability
);
3177 isra_func_summary
*to_ifs
= func_sums
->get (callee
);
3179 unsigned param_count
3180 = (to_ifs
&& (availability
>= AVAIL_AVAILABLE
))
3181 ? vec_safe_length (to_ifs
->m_parameters
) : 0;
3182 for (unsigned i
= 0; i
< args_count
; i
++)
3185 && (*to_ifs
->m_parameters
)[i
].locally_unused
)
3188 /* The argument is needed in the callee it, we must mark the parameter as
3189 used also in the caller and its callers within this SCC. */
3190 isra_param_flow
*ipf
= &csum
->m_arg_flow
[i
];
3191 for (int j
= 0; j
< ipf
->length
; j
++)
3193 int input_idx
= ipf
->inputs
[j
];
3194 isra_mark_caller_param_used (from_ifs
, input_idx
, cs
->caller
, stack
);
3199 /* Propagate information that any parameter is not used only locally within a
3200 SCC (i.e. is used also elsewhere) to all callers of NODE that are in the
3201 same SCC. Push any callers that need to be re-processed to STACK. */
3204 propagate_used_to_scc_callers (cgraph_node
*node
, void *data
)
3206 vec
<cgraph_node
*> *stack
= (vec
<cgraph_node
*> *) data
;
3208 for (cs
= node
->callers
; cs
; cs
= cs
->next_caller
)
3209 if (ipa_edge_within_scc (cs
))
3210 propagate_used_across_scc_edge (cs
, stack
);
3214 /* Return true iff all certain accesses in ARG_DESC are also present as
3215 certain accesses in PARAM_DESC. */
3218 all_callee_accesses_present_p (isra_param_desc
*param_desc
,
3219 isra_param_desc
*arg_desc
)
3221 unsigned aclen
= vec_safe_length (arg_desc
->accesses
);
3222 for (unsigned j
= 0; j
< aclen
; j
++)
3224 param_access
*argacc
= (*arg_desc
->accesses
)[j
];
3225 if (!argacc
->certain
)
3227 param_access
*pacc
= find_param_access (param_desc
, argacc
->unit_offset
,
3231 || !types_compatible_p (argacc
->type
, pacc
->type
))
3237 /* Type internal to function pull_accesses_from_callee. Unfortunately gcc 4.8
3238 does not allow instantiating an auto_vec with a type defined within a
3239 function so it is a global type. */
3240 enum acc_prop_kind
{ACC_PROP_DONT
, ACC_PROP_COPY
, ACC_PROP_CERTAIN
};
3243 /* Attempt to propagate all definite accesses from ARG_DESC to PARAM_DESC,
3244 (which belongs to CALLER) if they would not violate some constraint there.
3245 If successful, return NULL, otherwise return the string reason for failure
3246 (which can be written to the dump file). DELTA_OFFSET is the known offset
3247 of the actual argument withing the formal parameter (so of ARG_DESCS within
3248 PARAM_DESCS), ARG_SIZE is the size of the actual argument or zero, if not
3249 known. In case of success, set *CHANGE_P to true if propagation actually
3250 changed anything. */
3253 pull_accesses_from_callee (cgraph_node
*caller
, isra_param_desc
*param_desc
,
3254 isra_param_desc
*arg_desc
,
3255 unsigned delta_offset
, unsigned arg_size
,
3258 unsigned pclen
= vec_safe_length (param_desc
->accesses
);
3259 unsigned aclen
= vec_safe_length (arg_desc
->accesses
);
3260 unsigned prop_count
= 0;
3261 unsigned prop_size
= 0;
3262 bool change
= false;
3264 auto_vec
<enum acc_prop_kind
, 8> prop_kinds (aclen
);
3265 for (unsigned j
= 0; j
< aclen
; j
++)
3267 param_access
*argacc
= (*arg_desc
->accesses
)[j
];
3268 prop_kinds
.safe_push (ACC_PROP_DONT
);
3271 && argacc
->unit_offset
+ argacc
->unit_size
> arg_size
)
3272 return "callee access outsize size boundary";
3274 if (!argacc
->certain
)
3277 unsigned offset
= argacc
->unit_offset
+ delta_offset
;
3278 /* Given that accesses are initially stored according to increasing
3279 offset and decreasing size in case of equal offsets, the following
3280 searches could be written more efficiently if we kept the ordering
3281 when copying. But the number of accesses is capped at
3282 PARAM_IPA_SRA_MAX_REPLACEMENTS (so most likely 8) and the code gets
3283 messy quickly, so let's improve on that only if necessary. */
3285 bool exact_match
= false;
3286 for (unsigned i
= 0; i
< pclen
; i
++)
3288 /* Check for overlaps. */
3289 param_access
*pacc
= (*param_desc
->accesses
)[i
];
3290 if (pacc
->unit_offset
== offset
3291 && pacc
->unit_size
== argacc
->unit_size
)
3293 if (argacc
->alias_ptr_type
!= pacc
->alias_ptr_type
3294 || !types_compatible_p (argacc
->type
, pacc
->type
)
3295 || argacc
->reverse
!= pacc
->reverse
)
3296 return "propagated access types would not match existing ones";
3301 prop_kinds
[j
] = ACC_PROP_CERTAIN
;
3302 prop_size
+= argacc
->unit_size
;
3308 if (offset
< pacc
->unit_offset
+ pacc
->unit_size
3309 && offset
+ argacc
->unit_size
> pacc
->unit_offset
)
3311 /* None permissible with load accesses, possible to fit into
3314 || offset
< pacc
->unit_offset
3315 || (offset
+ argacc
->unit_size
3316 > pacc
->unit_offset
+ pacc
->unit_size
))
3317 return "a propagated access would conflict in caller";
3323 prop_kinds
[j
] = ACC_PROP_COPY
;
3325 prop_size
+= argacc
->unit_size
;
3333 if ((prop_count
+ pclen
3334 > (unsigned) opt_for_fn (caller
->decl
, param_ipa_sra_max_replacements
))
3335 || size_would_violate_limit_p (param_desc
,
3336 param_desc
->size_reached
+ prop_size
))
3337 return "propagating accesses would violate the count or size limit";
3340 for (unsigned j
= 0; j
< aclen
; j
++)
3342 if (prop_kinds
[j
] == ACC_PROP_COPY
)
3344 param_access
*argacc
= (*arg_desc
->accesses
)[j
];
3346 param_access
*copy
= ggc_cleared_alloc
<param_access
> ();
3347 copy
->unit_offset
= argacc
->unit_offset
+ delta_offset
;
3348 copy
->unit_size
= argacc
->unit_size
;
3349 copy
->type
= argacc
->type
;
3350 copy
->alias_ptr_type
= argacc
->alias_ptr_type
;
3351 copy
->certain
= true;
3352 copy
->reverse
= argacc
->reverse
;
3353 vec_safe_push (param_desc
->accesses
, copy
);
3355 else if (prop_kinds
[j
] == ACC_PROP_CERTAIN
)
3357 param_access
*argacc
= (*arg_desc
->accesses
)[j
];
3359 = find_param_access (param_desc
, argacc
->unit_offset
+ delta_offset
,
3361 csp
->certain
= true;
3365 param_desc
->size_reached
+= prop_size
;
3370 /* Propagate parameter splitting information through call graph edge CS.
3371 Return true if any changes that might need to be propagated within SCCs have
3372 been made. The function also clears the aggregate_pass_through and
3373 pointer_pass_through in call summaries which do not need to be processed
3374 again if this CS is revisited when iterating while changes are propagated
3378 param_splitting_across_edge (cgraph_edge
*cs
)
3381 bool cross_scc
= !ipa_edge_within_scc (cs
);
3382 enum availability availability
;
3383 cgraph_node
*callee
= cs
->callee
->function_symbol (&availability
);
3384 isra_func_summary
*from_ifs
= func_sums
->get (cs
->caller
);
3385 gcc_checking_assert (from_ifs
&& from_ifs
->m_parameters
);
3387 isra_call_summary
*csum
= call_sums
->get (cs
);
3388 gcc_checking_assert (csum
);
3389 unsigned args_count
= csum
->m_arg_flow
.length ();
3390 isra_func_summary
*to_ifs
= func_sums
->get (callee
);
3391 unsigned param_count
3392 = ((to_ifs
&& to_ifs
->m_candidate
&& (availability
>= AVAIL_AVAILABLE
))
3393 ? vec_safe_length (to_ifs
->m_parameters
)
3396 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3397 fprintf (dump_file
, "Splitting across %s->%s:\n",
3398 cs
->caller
->dump_name (), callee
->dump_name ());
3401 for (i
= 0; (i
< args_count
) && (i
< param_count
); i
++)
3403 isra_param_desc
*arg_desc
= &(*to_ifs
->m_parameters
)[i
];
3404 isra_param_flow
*ipf
= &csum
->m_arg_flow
[i
];
3406 if (arg_desc
->locally_unused
)
3408 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3409 fprintf (dump_file
, " ->%u: unused in callee\n", i
);
3410 ipf
->pointer_pass_through
= false;
3414 if (ipf
->pointer_pass_through
)
3416 int idx
= get_single_param_flow_source (ipf
);
3417 isra_param_desc
*param_desc
= &(*from_ifs
->m_parameters
)[idx
];
3418 if (!param_desc
->split_candidate
)
3420 gcc_assert (param_desc
->by_ref
);
3422 if (!arg_desc
->split_candidate
|| !arg_desc
->by_ref
)
3424 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3425 fprintf (dump_file
, " %u->%u: not candidate or not by "
3426 "reference in callee\n", idx
, i
);
3427 param_desc
->split_candidate
= false;
3428 ipf
->pointer_pass_through
= false;
3431 else if (!ipf
->safe_to_import_accesses
)
3433 if (!csum
->m_before_any_store
3434 || !all_callee_accesses_present_p (param_desc
, arg_desc
))
3436 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3437 fprintf (dump_file
, " %u->%u: cannot import accesses.\n",
3439 param_desc
->split_candidate
= false;
3440 ipf
->pointer_pass_through
= false;
3446 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3447 fprintf (dump_file
, " %u->%u: verified callee accesses "
3448 "present.\n", idx
, i
);
3450 ipf
->pointer_pass_through
= false;
3455 const char *pull_failure
3456 = pull_accesses_from_callee (cs
->caller
, param_desc
, arg_desc
,
3460 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3461 fprintf (dump_file
, " %u->%u: by_ref access pull "
3462 "failed: %s.\n", idx
, i
, pull_failure
);
3463 param_desc
->split_candidate
= false;
3464 ipf
->pointer_pass_through
= false;
3469 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3470 fprintf (dump_file
, " %u->%u: by_ref access pull "
3471 "succeeded.\n", idx
, i
);
3473 ipf
->pointer_pass_through
= false;
3477 else if (ipf
->aggregate_pass_through
)
3479 int idx
= get_single_param_flow_source (ipf
);
3480 isra_param_desc
*param_desc
= &(*from_ifs
->m_parameters
)[idx
];
3481 if (!param_desc
->split_candidate
)
3483 gcc_assert (!param_desc
->by_ref
);
3484 param_access
*pacc
= find_param_access (param_desc
, ipf
->unit_offset
,
3486 gcc_checking_assert (pacc
);
3490 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3491 fprintf (dump_file
, " %u->%u: already certain\n", idx
, i
);
3492 ipf
->aggregate_pass_through
= false;
3494 else if (!arg_desc
->split_candidate
|| arg_desc
->by_ref
)
3496 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3497 fprintf (dump_file
, " %u->%u: not candidate or by "
3498 "reference in callee\n", idx
, i
);
3500 pacc
->certain
= true;
3501 if (overlapping_certain_accesses_p (param_desc
, NULL
))
3503 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3504 fprintf (dump_file
, " ...leading to overlap, "
3505 " disqualifying candidate parameter %u\n",
3507 param_desc
->split_candidate
= false;
3510 bump_reached_size (param_desc
, pacc
->unit_size
, idx
);
3512 ipf
->aggregate_pass_through
= false;
3517 const char *pull_failure
3518 = pull_accesses_from_callee (cs
->caller
, param_desc
, arg_desc
,
3520 ipf
->unit_size
, &res
);
3523 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3524 fprintf (dump_file
, " %u->%u: arg access pull "
3525 "failed: %s.\n", idx
, i
, pull_failure
);
3527 ipf
->aggregate_pass_through
= false;
3528 pacc
->certain
= true;
3530 if (overlapping_certain_accesses_p (param_desc
, NULL
))
3532 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3533 fprintf (dump_file
, " ...leading to overlap, "
3534 " disqualifying candidate parameter %u\n",
3536 param_desc
->split_candidate
= false;
3539 bump_reached_size (param_desc
, pacc
->unit_size
, idx
);
3545 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3546 fprintf (dump_file
, " %u->%u: arg access pull "
3547 "succeeded.\n", idx
, i
);
3549 ipf
->aggregate_pass_through
= false;
3555 /* Handle argument-parameter count mismatches. */
3556 for (; (i
< args_count
); i
++)
3558 isra_param_flow
*ipf
= &csum
->m_arg_flow
[i
];
3560 if (ipf
->pointer_pass_through
|| ipf
->aggregate_pass_through
)
3562 int idx
= get_single_param_flow_source (ipf
);
3563 ipf
->pointer_pass_through
= false;
3564 ipf
->aggregate_pass_through
= false;
3565 isra_param_desc
*param_desc
= &(*from_ifs
->m_parameters
)[idx
];
3566 if (!param_desc
->split_candidate
)
3569 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3570 fprintf (dump_file
, " %u->%u: no corresponding formal parameter\n",
3572 param_desc
->split_candidate
= false;
3579 /* Worker for call_for_symbol_and_aliases, look at all callers and if all their
3580 callers ignore the return value, or come from the same SCC and use the
3581 return value only to compute their return value, return false, otherwise
3585 retval_used_p (cgraph_node
*node
, void *)
3587 for (cgraph_edge
*cs
= node
->callers
; cs
; cs
= cs
->next_caller
)
3589 isra_call_summary
*csum
= call_sums
->get (cs
);
3590 gcc_checking_assert (csum
);
3591 if (csum
->m_return_ignored
)
3593 if (!csum
->m_return_returned
)
3596 isra_func_summary
*from_ifs
= func_sums
->get (cs
->caller
);
3597 if (!from_ifs
|| !from_ifs
->m_candidate
)
3600 if (!ipa_edge_within_scc (cs
)
3601 && !from_ifs
->m_return_ignored
)
3608 /* Push into NEW_PARAMS all required parameter adjustment entries to copy or
3609 modify parameter which originally had index BASE_INDEX, in the adjustment
3610 vector of parent clone (if any) had PREV_CLONE_INDEX and was described by
3611 PREV_ADJUSTMENT. If the parent clone is the original function,
3612 PREV_ADJUSTMENT is NULL and PREV_CLONE_INDEX is equal to BASE_INDEX. */
3615 push_param_adjustments_for_index (isra_func_summary
*ifs
, unsigned base_index
,
3616 unsigned prev_clone_index
,
3617 ipa_adjusted_param
*prev_adjustment
,
3618 vec
<ipa_adjusted_param
, va_gc
> **new_params
)
3620 isra_param_desc
*desc
= &(*ifs
->m_parameters
)[base_index
];
3621 if (desc
->locally_unused
)
3624 fprintf (dump_file
, " Will remove parameter %u\n", base_index
);
3628 if (!desc
->split_candidate
)
3630 ipa_adjusted_param adj
;
3631 if (prev_adjustment
)
3633 adj
= *prev_adjustment
;
3634 adj
.prev_clone_adjustment
= true;
3635 adj
.prev_clone_index
= prev_clone_index
;
3639 memset (&adj
, 0, sizeof (adj
));
3640 adj
.op
= IPA_PARAM_OP_COPY
;
3641 adj
.base_index
= base_index
;
3642 adj
.prev_clone_index
= prev_clone_index
;
3644 vec_safe_push ((*new_params
), adj
);
3649 fprintf (dump_file
, " Will split parameter %u\n", base_index
);
3651 gcc_assert (!prev_adjustment
|| prev_adjustment
->op
== IPA_PARAM_OP_COPY
);
3652 unsigned aclen
= vec_safe_length (desc
->accesses
);
3653 for (unsigned j
= 0; j
< aclen
; j
++)
3655 param_access
*pa
= (*desc
->accesses
)[j
];
3659 fprintf (dump_file
, " - component at byte offset %u, "
3660 "size %u\n", pa
->unit_offset
, pa
->unit_size
);
3662 ipa_adjusted_param adj
;
3663 memset (&adj
, 0, sizeof (adj
));
3664 adj
.op
= IPA_PARAM_OP_SPLIT
;
3665 adj
.base_index
= base_index
;
3666 adj
.prev_clone_index
= prev_clone_index
;
3667 adj
.param_prefix_index
= IPA_PARAM_PREFIX_ISRA
;
3668 adj
.reverse
= pa
->reverse
;
3669 adj
.type
= pa
->type
;
3670 adj
.alias_ptr_type
= pa
->alias_ptr_type
;
3671 adj
.unit_offset
= pa
->unit_offset
;
3672 vec_safe_push ((*new_params
), adj
);
3676 /* Worker for all call_for_symbol_thunks_and_aliases. Set calls_comdat_local
3677 flag of all callers of NODE. */
3680 mark_callers_calls_comdat_local (struct cgraph_node
*node
, void *)
3682 for (cgraph_edge
*cs
= node
->callers
; cs
; cs
= cs
->next_caller
)
3683 cs
->caller
->calls_comdat_local
= true;
3688 /* Do final processing of results of IPA propagation regarding NODE, clone it
3692 process_isra_node_results (cgraph_node
*node
,
3693 hash_map
<const char *, unsigned> *clone_num_suffixes
)
3695 isra_func_summary
*ifs
= func_sums
->get (node
);
3696 if (!ifs
|| !ifs
->m_candidate
)
3699 auto_vec
<bool, 16> surviving_params
;
3700 bool check_surviving
= false;
3701 clone_info
*cinfo
= clone_info::get (node
);
3702 if (cinfo
&& cinfo
->param_adjustments
)
3704 check_surviving
= true;
3705 cinfo
->param_adjustments
->get_surviving_params (&surviving_params
);
3708 unsigned param_count
= vec_safe_length (ifs
->m_parameters
);
3709 bool will_change_function
= false;
3710 if (ifs
->m_returns_value
&& ifs
->m_return_ignored
)
3711 will_change_function
= true;
3713 for (unsigned i
= 0; i
< param_count
; i
++)
3715 isra_param_desc
*desc
= &(*ifs
->m_parameters
)[i
];
3716 if ((desc
->locally_unused
|| desc
->split_candidate
)
3717 /* Make sure we do not clone just to attempt to remove an already
3718 removed unused argument. */
3719 && (!check_surviving
3720 || (i
< surviving_params
.length ()
3721 && surviving_params
[i
])))
3723 will_change_function
= true;
3727 if (!will_change_function
)
3732 fprintf (dump_file
, "\nEvaluating analysis results for %s\n",
3733 node
->dump_name ());
3734 if (ifs
->m_returns_value
&& ifs
->m_return_ignored
)
3735 fprintf (dump_file
, " Will remove return value.\n");
3738 vec
<ipa_adjusted_param
, va_gc
> *new_params
= NULL
;
3739 if (ipa_param_adjustments
*old_adjustments
3740 = cinfo
? cinfo
->param_adjustments
: NULL
)
3742 unsigned old_adj_len
= vec_safe_length (old_adjustments
->m_adj_params
);
3743 for (unsigned i
= 0; i
< old_adj_len
; i
++)
3745 ipa_adjusted_param
*old_adj
= &(*old_adjustments
->m_adj_params
)[i
];
3746 push_param_adjustments_for_index (ifs
, old_adj
->base_index
, i
,
3747 old_adj
, &new_params
);
3751 for (unsigned i
= 0; i
< param_count
; i
++)
3752 push_param_adjustments_for_index (ifs
, i
, i
, NULL
, &new_params
);
3754 ipa_param_adjustments
*new_adjustments
3755 = (new (ggc_alloc
<ipa_param_adjustments
> ())
3756 ipa_param_adjustments (new_params
, param_count
,
3757 ifs
->m_returns_value
&& ifs
->m_return_ignored
));
3759 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3761 fprintf (dump_file
, "\n Created adjustments:\n");
3762 new_adjustments
->dump (dump_file
);
3765 unsigned &suffix_counter
= clone_num_suffixes
->get_or_insert (
3766 IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (
3768 auto_vec
<cgraph_edge
*> callers
= node
->collect_callers ();
3769 cgraph_node
*new_node
3770 = node
->create_virtual_clone (callers
, NULL
, new_adjustments
, "isra",
3773 if (node
->calls_comdat_local
&& node
->same_comdat_group
)
3775 new_node
->add_to_same_comdat_group (node
);
3776 new_node
->call_for_symbol_and_aliases (mark_callers_calls_comdat_local
,
3779 new_node
->calls_comdat_local
= node
->calls_comdat_local
;
3782 fprintf (dump_file
, " Created new node %s\n", new_node
->dump_name ());
3786 /* Check which parameters of NODE described by IFS have survived until IPA-SRA
3787 and disable transformations for those which have not or which should not
3788 transformed because the associated debug counter reached its limit. Return
3789 true if none survived or if there were no candidates to begin with. */
3792 disable_unavailable_parameters (cgraph_node
*node
, isra_func_summary
*ifs
)
3795 unsigned len
= vec_safe_length (ifs
->m_parameters
);
3799 auto_vec
<bool, 16> surviving_params
;
3800 bool check_surviving
= false;
3801 clone_info
*cinfo
= clone_info::get (node
);
3802 if (cinfo
&& cinfo
->param_adjustments
)
3804 check_surviving
= true;
3805 cinfo
->param_adjustments
->get_surviving_params (&surviving_params
);
3807 bool dumped_first
= false;
3808 for (unsigned i
= 0; i
< len
; i
++)
3810 isra_param_desc
*desc
= &(*ifs
->m_parameters
)[i
];
3811 if (!dbg_cnt (ipa_sra_params
))
3813 desc
->locally_unused
= false;
3814 desc
->split_candidate
= false;
3816 else if (check_surviving
3817 && (i
>= surviving_params
.length ()
3818 || !surviving_params
[i
]))
3820 /* Even if the parameter was removed by a previous IPA pass, we do
3821 not clear locally_unused because if it really is unused, this
3822 information might be useful in callers. */
3823 desc
->split_candidate
= false;
3825 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3830 "The following parameters of %s are dead on "
3831 "arrival:", node
->dump_name ());
3832 dumped_first
= true;
3834 fprintf (dump_file
, " %u", i
);
3837 else if (desc
->locally_unused
|| desc
->split_candidate
)
3842 fprintf (dump_file
, "\n");
3848 /* Run the interprocedural part of IPA-SRA. */
3851 ipa_sra_analysis (void)
3855 fprintf (dump_file
, "\n========== IPA-SRA IPA stage ==========\n");
3856 ipa_sra_dump_all_summaries (dump_file
);
3859 gcc_checking_assert (func_sums
);
3860 gcc_checking_assert (call_sums
);
3861 cgraph_node
**order
= XCNEWVEC (cgraph_node
*, symtab
->cgraph_count
);
3862 auto_vec
<cgraph_node
*, 16> stack
;
3863 int node_scc_count
= ipa_reduced_postorder (order
, true, NULL
);
3865 /* One sweep from callees to callers for parameter removal and splitting. */
3866 for (int i
= 0; i
< node_scc_count
; i
++)
3868 cgraph_node
*scc_rep
= order
[i
];
3869 vec
<cgraph_node
*> cycle_nodes
= ipa_get_nodes_in_cycle (scc_rep
);
3872 /* Preliminary IPA function level checks and first step of parameter
3875 FOR_EACH_VEC_ELT (cycle_nodes
, j
, v
)
3877 isra_func_summary
*ifs
= func_sums
->get (v
);
3878 if (!ifs
|| !ifs
->m_candidate
)
3880 if (!ipa_sra_ipa_function_checks (v
)
3881 || check_all_callers_for_issues (v
))
3886 if (disable_unavailable_parameters (v
, ifs
))
3888 for (cgraph_edge
*cs
= v
->indirect_calls
; cs
; cs
= cs
->next_callee
)
3889 process_edge_to_unknown_caller (cs
);
3890 for (cgraph_edge
*cs
= v
->callees
; cs
; cs
= cs
->next_callee
)
3891 if (!ipa_edge_within_scc (cs
))
3892 param_removal_cross_scc_edge (cs
);
3895 /* Look at edges within the current SCC and propagate used-ness across
3896 them, pushing onto the stack all notes which might need to be
3898 FOR_EACH_VEC_ELT (cycle_nodes
, j
, v
)
3899 v
->call_for_symbol_thunks_and_aliases (propagate_used_to_scc_callers
,
3902 /* Keep revisiting and pushing until nothing changes. */
3903 while (!stack
.is_empty ())
3905 cgraph_node
*v
= stack
.pop ();
3906 isra_func_summary
*ifs
= func_sums
->get (v
);
3907 gcc_checking_assert (ifs
&& ifs
->m_queued
);
3908 ifs
->m_queued
= false;
3910 v
->call_for_symbol_thunks_and_aliases (propagate_used_to_scc_callers
,
3914 /* Parameter splitting. */
3915 bool repeat_scc_access_propagation
;
3918 repeat_scc_access_propagation
= false;
3919 FOR_EACH_VEC_ELT (cycle_nodes
, j
, v
)
3921 isra_func_summary
*ifs
= func_sums
->get (v
);
3923 || !ifs
->m_candidate
3924 || vec_safe_is_empty (ifs
->m_parameters
))
3926 for (cgraph_edge
*cs
= v
->callees
; cs
; cs
= cs
->next_callee
)
3927 if (param_splitting_across_edge (cs
))
3928 repeat_scc_access_propagation
= true;
3931 while (repeat_scc_access_propagation
);
3934 FOR_EACH_VEC_ELT (cycle_nodes
, j
, v
)
3935 verify_splitting_accesses (v
, true);
3937 cycle_nodes
.release ();
3940 /* One sweep from caller to callees for result removal. */
3941 for (int i
= node_scc_count
- 1; i
>= 0 ; i
--)
3943 cgraph_node
*scc_rep
= order
[i
];
3944 vec
<cgraph_node
*> cycle_nodes
= ipa_get_nodes_in_cycle (scc_rep
);
3948 FOR_EACH_VEC_ELT (cycle_nodes
, j
, v
)
3950 isra_func_summary
*ifs
= func_sums
->get (v
);
3951 if (!ifs
|| !ifs
->m_candidate
)
3955 = (ifs
->m_returns_value
3956 && (!dbg_cnt (ipa_sra_retvalues
)
3957 || v
->call_for_symbol_and_aliases (retval_used_p
,
3959 ifs
->m_return_ignored
= !return_needed
;
3961 isra_push_node_to_stack (v
, ifs
, &stack
);
3964 while (!stack
.is_empty ())
3966 cgraph_node
*node
= stack
.pop ();
3967 isra_func_summary
*ifs
= func_sums
->get (node
);
3968 gcc_checking_assert (ifs
&& ifs
->m_queued
);
3969 ifs
->m_queued
= false;
3971 for (cgraph_edge
*cs
= node
->callees
; cs
; cs
= cs
->next_callee
)
3972 if (ipa_edge_within_scc (cs
)
3973 && call_sums
->get (cs
)->m_return_returned
)
3975 enum availability av
;
3976 cgraph_node
*callee
= cs
->callee
->function_symbol (&av
);
3977 isra_func_summary
*to_ifs
= func_sums
->get (callee
);
3978 if (to_ifs
&& to_ifs
->m_return_ignored
)
3980 to_ifs
->m_return_ignored
= false;
3981 isra_push_node_to_stack (callee
, to_ifs
, &stack
);
3985 cycle_nodes
.release ();
3988 ipa_free_postorder_info ();
3993 if (dump_flags
& TDF_DETAILS
)
3995 fprintf (dump_file
, "\n========== IPA-SRA propagation final state "
3997 ipa_sra_dump_all_summaries (dump_file
);
3999 fprintf (dump_file
, "\n========== IPA-SRA decisions ==========\n");
4002 hash_map
<const char *, unsigned> *clone_num_suffixes
4003 = new hash_map
<const char *, unsigned>;
4006 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node
)
4007 process_isra_node_results (node
, clone_num_suffixes
);
4009 delete clone_num_suffixes
;
4010 ggc_delete (func_sums
);
4016 fprintf (dump_file
, "\n========== IPA SRA IPA analysis done "
4022 const pass_data pass_data_ipa_sra
=
4024 IPA_PASS
, /* type */
4026 OPTGROUP_NONE
, /* optinfo_flags */
4027 TV_IPA_SRA
, /* tv_id */
4028 0, /* properties_required */
4029 0, /* properties_provided */
4030 0, /* properties_destroyed */
4031 0, /* todo_flags_start */
4032 ( TODO_dump_symtab
| TODO_remove_functions
), /* todo_flags_finish */
4035 class pass_ipa_sra
: public ipa_opt_pass_d
4038 pass_ipa_sra (gcc::context
*ctxt
)
4039 : ipa_opt_pass_d (pass_data_ipa_sra
, ctxt
,
4040 ipa_sra_generate_summary
, /* generate_summary */
4041 ipa_sra_write_summary
, /* write_summary */
4042 ipa_sra_read_summary
, /* read_summary */
4043 NULL
, /* write_optimization_summary */
4044 NULL
, /* read_optimization_summary */
4045 NULL
, /* stmt_fixup */
4046 0, /* function_transform_todo_flags_start */
4047 NULL
, /* function_transform */
4048 NULL
) /* variable_transform */
4051 /* opt_pass methods: */
4052 virtual bool gate (function
*)
4054 /* TODO: We should remove the optimize check after we ensure we never run
4055 IPA passes when not optimizing. */
4056 return (flag_ipa_sra
&& optimize
);
4059 virtual unsigned int execute (function
*) { return ipa_sra_analysis (); }
4061 }; // class pass_ipa_sra
4065 /* Intraprocedural part of IPA-SRA analysis. Scan function body of NODE and
4066 create a summary structure describing IPA-SRA opportunities and constraints
4070 ipa_sra_summarize_function (cgraph_node
*node
)
4073 fprintf (dump_file
, "Creating summary for %s/%i:\n", node
->name (),
4075 if (!ipa_sra_preliminary_function_checks (node
))
4077 isra_analyze_all_outgoing_calls (node
);
4080 gcc_obstack_init (&gensum_obstack
);
4081 isra_func_summary
*ifs
= func_sums
->get_create (node
);
4082 ifs
->m_candidate
= true;
4083 tree ret
= TREE_TYPE (TREE_TYPE (node
->decl
));
4084 ifs
->m_returns_value
= (TREE_CODE (ret
) != VOID_TYPE
);
4086 decl2desc
= new hash_map
<tree
, gensum_param_desc
*>;
4088 for (tree parm
= DECL_ARGUMENTS (node
->decl
); parm
; parm
= DECL_CHAIN (parm
))
4093 auto_vec
<gensum_param_desc
, 16> param_descriptions (count
);
4094 param_descriptions
.reserve_exact (count
);
4095 param_descriptions
.quick_grow_cleared (count
);
4097 bool cfun_pushed
= false;
4098 struct function
*fun
= DECL_STRUCT_FUNCTION (node
->decl
);
4099 if (create_parameter_descriptors (node
, ¶m_descriptions
))
4103 final_bbs
= BITMAP_ALLOC (NULL
);
4104 bb_dereferences
= XCNEWVEC (HOST_WIDE_INT
,
4106 * last_basic_block_for_fn (fun
));
4107 aa_walking_limit
= opt_for_fn (node
->decl
, param_ipa_max_aa_steps
);
4108 scan_function (node
, fun
);
4112 dump_gensum_param_descriptors (dump_file
, node
->decl
,
4113 ¶m_descriptions
);
4114 fprintf (dump_file
, "----------------------------------------\n");
4117 process_scan_results (node
, fun
, ifs
, ¶m_descriptions
);
4121 if (bb_dereferences
)
4123 free (bb_dereferences
);
4124 bb_dereferences
= NULL
;
4125 BITMAP_FREE (final_bbs
);
4129 isra_analyze_all_outgoing_calls (node
);
4133 obstack_free (&gensum_obstack
, NULL
);
4135 fprintf (dump_file
, "\n\n");
4137 verify_splitting_accesses (node
, false);
4142 make_pass_ipa_sra (gcc::context
*ctxt
)
4144 return new pass_ipa_sra (ctxt
);
4148 #include "gt-ipa-sra.h"