1 /* Interprocedural scalar replacement of aggregates
2 Copyright (C) 2019-2023 Free Software Foundation, Inc.
3 Contributed by Martin Jambor <mjambor@suse.cz>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* IPA-SRA is an interprocedural pass that removes unused function return
22 values (turning functions returning a value which is never used into void
23 functions) and removes unused function parameters. It can also replace an
24 aggregate parameter by a set of other parameters representing part of the
25 original, turning those passed by reference into new ones which pass the
28 The pass is a true IPA one, which means that it works in three stages in
29 order to be able to take advantage of LTO. First, summaries about functions
30 and each calls are generated. Function summaries (often called call graph
31 node summaries) contain mainly information about which parameters are
32 potential transformation candidates and which bits of candidates are
33 accessed. We differentiate between accesses done as a part of a call
34 statement (which might be not necessary if the callee is also transformed)
35 and others (which are mandatory). Call summaries (often called call graph
36 edge summaries) contain information about which function formal parameters
37 feed into which actual call arguments so that if two parameters are only
38 used in a sum which is then passed to another function which then however
39 does not use this parameter, all three parameters of the two functions can
40 be eliminated. Edge summaries also have flags whether the return value is
41 used or if it is only returned in the caller too. In LTO mode these
42 summaries are then streamed to the object file in the compilation phase and
43 streamed back in in the WPA analysis stage.
45 The interprocedural analysis phase traverses the graph in topological order
46 in two sweeps, one in each direction. First, from callees to callers for
47 parameter removal and splitting. Each strongly-connected component is
48 processed iteratively until the situation in it stabilizes. The pass from
49 callers to callees is then carried out to remove unused return values in a
52 Because parameter manipulation has big implications for call redirection
53 which is done only after all call graph nodes materialize, the
54 transformation phase is not part of this patch but is carried out by the
55 clone materialization and edge redirection itself, see comments in
56 ipa-param-manipulation.h for more details. */
61 #include "coretypes.h"
66 #include "tree-pass.h"
69 #include "gimple-pretty-print.h"
72 #include "gimple-iterator.h"
73 #include "gimple-walk.h"
76 #include "alloc-pool.h"
77 #include "symbol-summary.h"
79 #include "tree-inline.h"
80 #include "ipa-utils.h"
83 #include "tree-streamer.h"
84 #include "internal-fn.h"
85 #include "symtab-clones.h"
89 static void ipa_sra_summarize_function (cgraph_node
*);
91 /* Bits used to track size of an aggregate in bytes interprocedurally. */
92 #define ISRA_ARG_SIZE_LIMIT_BITS 16
93 #define ISRA_ARG_SIZE_LIMIT (1 << ISRA_ARG_SIZE_LIMIT_BITS)
94 /* How many parameters can feed into a call actual argument and still be
96 #define IPA_SRA_MAX_PARAM_FLOW_LEN 7
98 /* Structure describing accesses to a specific portion of an aggregate
99 parameter, as given by the offset and size. Any smaller accesses that occur
100 within a function that fall within another access form a tree. The pass
101 cannot analyze parameters with only partially overlapping accesses. */
103 struct GTY(()) param_access
105 /* Type that a potential replacement should have. This field only has
106 meaning in the summary building and transformation phases, when it is
107 reconstructed from the body. Must not be touched in IPA analysis
111 /* Alias reference type to be used in MEM_REFs when adjusting caller
115 /* Values returned by get_ref_base_and_extent but converted to bytes and
116 stored as unsigned ints. */
117 unsigned unit_offset
;
118 unsigned unit_size
: ISRA_ARG_SIZE_LIMIT_BITS
;
120 /* Set once we are sure that the access will really end up in a potentially
121 transformed function - initially not set for portions of formal parameters
122 that are only used as actual function arguments passed to callees. */
123 unsigned certain
: 1;
124 /* Set if the access has reverse scalar storage order. */
125 unsigned reverse
: 1;
128 /* This structure has the same purpose as the one above and additionally it
129 contains some fields that are only necessary in the summary generation
132 struct gensum_param_access
134 /* Values returned by get_ref_base_and_extent. */
135 HOST_WIDE_INT offset
;
138 /* if this access has any children (in terms of the definition above), this
139 points to the first one. */
140 struct gensum_param_access
*first_child
;
141 /* In intraprocedural SRA, pointer to the next sibling in the access tree as
143 struct gensum_param_access
*next_sibling
;
145 /* Type that a potential replacement should have. This field only has
146 meaning in the summary building and transformation phases, when it is
147 reconstructed from the body. Must not be touched in IPA analysis
150 /* Alias reference type to be used in MEM_REFs when adjusting caller
154 /* Cumulative count of all loads. */
155 profile_count load_count
;
156 /* Have there been writes to or reads from this exact location except for as
157 arguments to a function call that can be tracked. */
160 /* Set if the access has reverse scalar storage order. */
164 /* Summary describing a parameter in the IPA stages. */
166 struct GTY(()) isra_param_desc
168 /* List of access representatives to the parameters, sorted according to
170 vec
<param_access
*, va_gc
> *accesses
;
172 /* Unit size limit of total size of all replacements. */
173 unsigned param_size_limit
: ISRA_ARG_SIZE_LIMIT_BITS
;
174 /* Sum of unit sizes of all certain replacements. */
175 unsigned size_reached
: ISRA_ARG_SIZE_LIMIT_BITS
;
176 /* Minimum offset that is known to be safe to dereference because of callers
177 pass pointers to DECLs of at least this size or because of dereferences in
179 unsigned safe_size
: ISRA_ARG_SIZE_LIMIT_BITS
;
181 /* A parameter that is used only in call arguments and can be removed if all
182 concerned actual arguments are removed. */
183 unsigned locally_unused
: 1;
184 /* An aggregate that is a candidate for breaking up or complete removal. */
185 unsigned split_candidate
: 1;
186 /* Is this a parameter passing stuff by reference? */
188 /* If set, this parameter can only be a candidate for removal if the function
189 is going to loose its return value. */
190 unsigned remove_only_when_retval_removed
: 1;
191 /* If set, this parameter can only be a candidate for splitting if the
192 function is going to loose its return value. Can only be meaningfully set
193 for by_ref parameters. */
194 unsigned split_only_when_retval_removed
: 1;
195 /* Parameter hint set during IPA analysis when there is a caller which does
196 not construct the argument just to pass it to calls. Only meaningful for
197 by_ref parameters. */
198 unsigned not_specially_constructed
: 1;
199 /* Only meaningful for by_ref parameters. If set, this parameter can only be
200 a split candidate if all callers pass pointers that are known to point to
201 a chunk of memory large enough to contain all accesses. */
202 unsigned conditionally_dereferenceable
: 1;
203 /* Set when safe_size has been updated from at least one caller. */
204 unsigned safe_size_set
: 1;
207 /* Structure used when generating summaries that describes a parameter. */
209 struct gensum_param_desc
211 /* Roots of param_accesses. */
212 gensum_param_access
*accesses
;
213 /* Number of accesses in the access tree rooted in field accesses. */
214 unsigned access_count
;
216 /* If the below is non-zero, this is the number of uses as actual
219 /* Number of times this parameter has been directly passed to. */
220 unsigned ptr_pt_count
;
222 /* Size limit of total size of all replacements. */
223 unsigned param_size_limit
;
224 /* Sum of sizes of nonarg accesses. */
225 unsigned nonarg_acc_size
;
227 /* A parameter that is used only in call arguments and can be removed if all
228 concerned actual arguments are removed. */
230 /* An aggregate that is a candidate for breaking up or a pointer passing data
231 by reference that is a candidate for being converted to a set of
232 parameters passing those data by value. */
233 bool split_candidate
;
234 /* Is this a parameter passing stuff by reference (either a pointer or a
235 source language reference type)? */
237 /* If this parameter passes stuff by reference, can it be safely dereferenced
238 without performing further checks (for example because it is a
241 /* If set, this parameter can only be a candidate for removal if the function
242 is going to loose its return value. */
243 bool remove_only_when_retval_removed
;
244 /* If set, this parameter can only be a candidate for splitting if the
245 function is going to loose its return value. Can only be meaningfully set
246 for by_ref parameters. */
247 bool split_only_when_retval_removed
;
248 /* Only meaningful for by_ref parameters. If set, this parameter can only be
249 a split candidate if all callers pass pointers that are known to point to
250 a chunk of memory large enough to contain all accesses. */
251 bool conditionally_dereferenceable
;
253 /* The number of this parameter as they are ordered in function decl. */
255 /* For parameters passing data by reference, this is parameter index to
256 compute indices to bb_dereferences. */
260 /* Properly deallocate accesses of DESC. TODO: Since this data structure is
261 allocated in GC memory, this is not necessary and we can consider removing
265 free_param_decl_accesses (isra_param_desc
*desc
)
267 unsigned len
= vec_safe_length (desc
->accesses
);
268 for (unsigned i
= 0; i
< len
; ++i
)
269 ggc_free ((*desc
->accesses
)[i
]);
270 vec_free (desc
->accesses
);
273 /* Class used to convey information about functions from the
274 intra-procedural analysis stage to inter-procedural one. */
276 class GTY((for_user
)) isra_func_summary
279 /* initialize the object. */
282 : m_parameters (NULL
), m_candidate (false), m_returns_value (false),
283 m_return_ignored (false), m_queued (false)
286 /* Destroy m_parameters. */
288 ~isra_func_summary ();
290 /* Mark the function as not a candidate for any IPA-SRA transformation.
291 Return true if it was a candidate until now. */
295 /* Vector of parameter descriptors corresponding to the function being
297 vec
<isra_param_desc
, va_gc
> *m_parameters
;
299 /* Whether the node is even a candidate for any IPA-SRA transformation at
301 unsigned m_candidate
: 1;
303 /* Whether the original function returns any value. */
304 unsigned m_returns_value
: 1;
306 /* Set to true if all call statements do not actually use the returned
309 unsigned m_return_ignored
: 1;
311 /* Whether the node is already queued in IPA SRA stack during processing of
314 unsigned m_queued
: 1;
317 /* Deallocate the memory pointed to by isra_func_summary. TODO: Since this
318 data structure is allocated in GC memory, this is not necessary and we can
319 consider removing the destructor. */
321 isra_func_summary::~isra_func_summary ()
323 unsigned len
= vec_safe_length (m_parameters
);
324 for (unsigned i
= 0; i
< len
; ++i
)
325 free_param_decl_accesses (&(*m_parameters
)[i
]);
326 vec_free (m_parameters
);
329 /* Mark the function as not a candidate for any IPA-SRA transformation. Return
330 true if it was a candidate until now. */
333 isra_func_summary::zap ()
335 bool ret
= m_candidate
;
338 /* TODO: see the destructor above. */
339 unsigned len
= vec_safe_length (m_parameters
);
340 for (unsigned i
= 0; i
< len
; ++i
)
341 free_param_decl_accesses (&(*m_parameters
)[i
]);
342 vec_free (m_parameters
);
347 /* Structure to describe which formal parameters feed into a particular actual
350 struct isra_param_flow
352 /* Number of elements in array inputs that contain valid data. */
354 /* Indices of formal parameters that feed into the described actual argument.
355 If aggregate_pass_through or pointer_pass_through below are true, it must
356 contain exactly one element which is passed through from a formal
357 parameter if the given number. Otherwise, the array contains indices of
358 callee's formal parameters which are used to calculate value of this
360 unsigned char inputs
[IPA_SRA_MAX_PARAM_FLOW_LEN
];
362 /* Offset within the formal parameter. */
363 unsigned unit_offset
;
364 /* When aggregate_pass_through is set, this is the size of the portion of an
365 aggregate formal parameter that is being passed. Otherwise, this is size
366 of pointed to memory that is known to be valid be dereferenced. */
367 unsigned unit_size
: ISRA_ARG_SIZE_LIMIT_BITS
;
369 /* True when the value of this actual argument is a portion of a formal
371 unsigned aggregate_pass_through
: 1;
372 /* True when the value of this actual copy is a verbatim pass through of an
374 unsigned pointer_pass_through
: 1;
375 /* True when it is safe to copy access candidates here from the callee, which
376 would mean introducing dereferences into callers of the caller. */
377 unsigned safe_to_import_accesses
: 1;
378 /* True when the passed value is an address of a structure that has been
379 constructed in the caller just to be passed by reference to functions
380 (i.e. is never read). */
381 unsigned constructed_for_calls
: 1;
384 /* Structure used to convey information about calls from the intra-procedural
385 analysis stage to inter-procedural one. */
387 class isra_call_summary
391 : m_arg_flow (), m_return_ignored (false), m_return_returned (false),
392 m_bit_aligned_arg (false), m_before_any_store (false)
395 void init_inputs (unsigned arg_count
);
398 /* Information about what formal parameters of the caller are used to compute
399 individual actual arguments of this call. */
400 auto_vec
<isra_param_flow
> m_arg_flow
;
402 /* Set to true if the call statement does not have a LHS. */
403 unsigned m_return_ignored
: 1;
405 /* Set to true if the LHS of call statement is only used to construct the
406 return value of the caller. */
407 unsigned m_return_returned
: 1;
409 /* Set when any of the call arguments are not byte-aligned. */
410 unsigned m_bit_aligned_arg
: 1;
412 /* Set to true if the call happend before any (other) store to memory in the
414 unsigned m_before_any_store
: 1;
417 /* Class to manage function summaries. */
419 class GTY((user
)) ipa_sra_function_summaries
420 : public function_summary
<isra_func_summary
*>
423 ipa_sra_function_summaries (symbol_table
*table
, bool ggc
):
424 function_summary
<isra_func_summary
*> (table
, ggc
) { }
426 void duplicate (cgraph_node
*, cgraph_node
*,
427 isra_func_summary
*old_sum
,
428 isra_func_summary
*new_sum
) final override
;
429 void insert (cgraph_node
*, isra_func_summary
*) final override
;
432 /* Hook that is called by summary when a node is duplicated. */
435 ipa_sra_function_summaries::duplicate (cgraph_node
*, cgraph_node
*,
436 isra_func_summary
*old_sum
,
437 isra_func_summary
*new_sum
)
439 /* TODO: Somehow stop copying when ISRA is doing the cloning, it is
441 new_sum
->m_candidate
= old_sum
->m_candidate
;
442 new_sum
->m_returns_value
= old_sum
->m_returns_value
;
443 new_sum
->m_return_ignored
= old_sum
->m_return_ignored
;
444 gcc_assert (!old_sum
->m_queued
);
445 new_sum
->m_queued
= false;
447 unsigned param_count
= vec_safe_length (old_sum
->m_parameters
);
450 vec_safe_reserve_exact (new_sum
->m_parameters
, param_count
);
451 new_sum
->m_parameters
->quick_grow_cleared (param_count
);
452 for (unsigned i
= 0; i
< param_count
; i
++)
454 isra_param_desc
*s
= &(*old_sum
->m_parameters
)[i
];
455 isra_param_desc
*d
= &(*new_sum
->m_parameters
)[i
];
457 d
->param_size_limit
= s
->param_size_limit
;
458 d
->size_reached
= s
->size_reached
;
459 d
->safe_size
= s
->safe_size
;
460 d
->locally_unused
= s
->locally_unused
;
461 d
->split_candidate
= s
->split_candidate
;
462 d
->by_ref
= s
->by_ref
;
463 d
->remove_only_when_retval_removed
= s
->remove_only_when_retval_removed
;
464 d
->split_only_when_retval_removed
= s
->split_only_when_retval_removed
;
465 d
->not_specially_constructed
= s
->not_specially_constructed
;
466 d
->conditionally_dereferenceable
= s
->conditionally_dereferenceable
;
467 d
->safe_size_set
= s
->safe_size_set
;
469 unsigned acc_count
= vec_safe_length (s
->accesses
);
470 vec_safe_reserve_exact (d
->accesses
, acc_count
);
471 for (unsigned j
= 0; j
< acc_count
; j
++)
473 param_access
*from
= (*s
->accesses
)[j
];
474 param_access
*to
= ggc_cleared_alloc
<param_access
> ();
475 to
->type
= from
->type
;
476 to
->alias_ptr_type
= from
->alias_ptr_type
;
477 to
->unit_offset
= from
->unit_offset
;
478 to
->unit_size
= from
->unit_size
;
479 to
->certain
= from
->certain
;
480 to
->reverse
= from
->reverse
;
481 d
->accesses
->quick_push (to
);
486 /* Pointer to the pass function summary holder. */
488 static GTY(()) ipa_sra_function_summaries
*func_sums
;
490 /* Hook that is called by summary when new node appears. */
493 ipa_sra_function_summaries::insert (cgraph_node
*node
, isra_func_summary
*)
495 if (opt_for_fn (node
->decl
, flag_ipa_sra
))
497 push_cfun (DECL_STRUCT_FUNCTION (node
->decl
));
498 ipa_sra_summarize_function (node
);
502 func_sums
->remove (node
);
505 /* Class to manage call summaries. */
507 class ipa_sra_call_summaries
: public call_summary
<isra_call_summary
*>
510 ipa_sra_call_summaries (symbol_table
*table
):
511 call_summary
<isra_call_summary
*> (table
) { }
513 /* Duplicate info when an edge is cloned. */
514 void duplicate (cgraph_edge
*, cgraph_edge
*,
515 isra_call_summary
*old_sum
,
516 isra_call_summary
*new_sum
) final override
;
519 static ipa_sra_call_summaries
*call_sums
;
522 /* Initialize m_arg_flow of a particular instance of isra_call_summary.
523 ARG_COUNT is the number of actual arguments passed. */
526 isra_call_summary::init_inputs (unsigned arg_count
)
530 gcc_checking_assert (m_arg_flow
.length () == 0);
533 if (m_arg_flow
.length () == 0)
535 m_arg_flow
.reserve_exact (arg_count
);
536 m_arg_flow
.quick_grow_cleared (arg_count
);
539 gcc_checking_assert (arg_count
== m_arg_flow
.length ());
542 /* Dump all information in call summary to F. */
545 isra_call_summary::dump (FILE *f
)
547 if (m_return_ignored
)
548 fprintf (f
, " return value ignored\n");
549 if (m_return_returned
)
550 fprintf (f
, " return value used only to compute caller return value\n");
551 if (m_before_any_store
)
552 fprintf (f
, " happens before any store to memory\n");
553 for (unsigned i
= 0; i
< m_arg_flow
.length (); i
++)
555 fprintf (f
, " Parameter %u:\n", i
);
556 isra_param_flow
*ipf
= &m_arg_flow
[i
];
561 fprintf (f
, " Scalar param sources: ");
562 for (int j
= 0; j
< ipf
->length
; j
++)
568 fprintf (f
, "%i", (int) ipf
->inputs
[j
]);
572 if (ipf
->aggregate_pass_through
)
573 fprintf (f
, " Aggregate pass through from the param given above, "
574 "unit offset: %u , unit size: %u\n",
575 ipf
->unit_offset
, ipf
->unit_size
);
576 else if (ipf
->unit_size
> 0)
577 fprintf (f
, " Known dereferenceable size: %u\n", ipf
->unit_size
);
578 if (ipf
->pointer_pass_through
)
579 fprintf (f
, " Pointer pass through from the param given above, "
580 "safe_to_import_accesses: %u\n", ipf
->safe_to_import_accesses
);
581 if (ipf
->constructed_for_calls
)
582 fprintf (f
, " Variable constructed just to be passed to "
587 /* Duplicate edge summary when an edge is cloned. */
590 ipa_sra_call_summaries::duplicate (cgraph_edge
*, cgraph_edge
*,
591 isra_call_summary
*old_sum
,
592 isra_call_summary
*new_sum
)
594 unsigned arg_count
= old_sum
->m_arg_flow
.length ();
595 new_sum
->init_inputs (arg_count
);
596 for (unsigned i
= 0; i
< arg_count
; i
++)
597 new_sum
->m_arg_flow
[i
] = old_sum
->m_arg_flow
[i
];
599 new_sum
->m_return_ignored
= old_sum
->m_return_ignored
;
600 new_sum
->m_return_returned
= old_sum
->m_return_returned
;
601 new_sum
->m_bit_aligned_arg
= old_sum
->m_bit_aligned_arg
;
602 new_sum
->m_before_any_store
= old_sum
->m_before_any_store
;
606 /* With all GTY stuff done, we can move to anonymous namespace. */
608 /* Quick mapping from a decl to its param descriptor. */
610 hash_map
<tree
, gensum_param_desc
*> *decl2desc
;
612 /* All local DECLs ever loaded from of and of those that have their address
613 assigned to a variable. */
615 hash_set
<tree
> *loaded_decls
;
617 /* Countdown of allowed Alias Analysis steps during summary building. */
619 int aa_walking_limit
;
621 /* This is a table in which for each basic block and parameter there is a
622 distance (offset + size) in that parameter which is dereferenced and
623 accessed in that BB. */
624 HOST_WIDE_INT
*bb_dereferences
= NULL
;
625 /* How many by-reference parameters there are in the current function. */
626 int unsafe_by_ref_count
;
628 /* Bitmap of BBs that can cause the function to "stop" progressing by
629 returning, throwing externally, looping infinitely or calling a function
630 which might abort etc.. */
633 /* Obstack to allocate various small structures required only when generating
634 summary for a function. */
635 struct obstack gensum_obstack
;
637 /* Return false the function is apparently unsuitable for IPA-SRA based on it's
638 attributes, return true otherwise. NODE is the cgraph node of the current
642 ipa_sra_preliminary_function_checks (cgraph_node
*node
)
644 if (!node
->can_change_signature
)
647 fprintf (dump_file
, "Function cannot change signature.\n");
651 if (!tree_versionable_function_p (node
->decl
))
654 fprintf (dump_file
, "Function is not versionable.\n");
658 if (!opt_for_fn (node
->decl
, optimize
)
659 || !opt_for_fn (node
->decl
, flag_ipa_sra
))
662 fprintf (dump_file
, "Not optimizing or IPA-SRA turned off for this "
667 if (DECL_VIRTUAL_P (node
->decl
))
670 fprintf (dump_file
, "Function is a virtual method.\n");
674 struct function
*fun
= DECL_STRUCT_FUNCTION (node
->decl
);
678 fprintf (dump_file
, "Function uses stdarg. \n");
682 if (DECL_DISREGARD_INLINE_LIMITS (node
->decl
))
685 fprintf (dump_file
, "Always inline function will be inlined "
693 /* Print access tree starting at ACCESS to F. */
696 dump_gensum_access (FILE *f
, gensum_param_access
*access
, unsigned indent
)
699 for (unsigned i
= 0; i
< indent
; i
++)
701 fprintf (f
, " * Access to offset: " HOST_WIDE_INT_PRINT_DEC
,
703 fprintf (f
, ", size: " HOST_WIDE_INT_PRINT_DEC
, access
->size
);
704 fprintf (f
, ", type: ");
705 print_generic_expr (f
, access
->type
);
706 fprintf (f
, ", alias_ptr_type: ");
707 print_generic_expr (f
, access
->alias_ptr_type
);
708 fprintf (f
, ", load_count: ");
709 access
->load_count
.dump (f
);
710 fprintf (f
, ", nonarg: %u, reverse: %u\n", access
->nonarg
, access
->reverse
);
711 for (gensum_param_access
*ch
= access
->first_child
;
713 ch
= ch
->next_sibling
)
714 dump_gensum_access (f
, ch
, indent
+ 2);
718 /* Print access tree starting at ACCESS to F. */
721 dump_isra_access (FILE *f
, param_access
*access
)
723 fprintf (f
, " * Access to unit offset: %u", access
->unit_offset
);
724 fprintf (f
, ", unit size: %u", access
->unit_size
);
725 fprintf (f
, ", type: ");
726 print_generic_expr (f
, access
->type
);
727 fprintf (f
, ", alias_ptr_type: ");
728 print_generic_expr (f
, access
->alias_ptr_type
);
730 fprintf (f
, ", certain");
732 fprintf (f
, ", not certain");
734 fprintf (f
, ", reverse");
738 /* Dump access tree starting at ACCESS to stderr. */
741 debug_isra_access (param_access
*access
)
743 dump_isra_access (stderr
, access
);
746 /* Dump DESC to F. */
749 dump_gensum_param_descriptor (FILE *f
, gensum_param_desc
*desc
)
751 if (desc
->locally_unused
)
752 fprintf (f
, " unused with %i call_uses%s\n", desc
->call_uses
,
753 desc
->remove_only_when_retval_removed
?
754 " remove_only_when_retval_removed" : "");
755 if (!desc
->split_candidate
)
757 fprintf (f
, " not a candidate\n");
761 fprintf (f
, " %s%s%s by_ref with %u pass throughs\n",
762 desc
->safe_ref
? "safe" : "unsafe",
763 desc
->conditionally_dereferenceable
764 ? " conditionally_dereferenceable" : "",
765 desc
->split_only_when_retval_removed
766 ? " split_only_when_retval_removed" : "",
769 for (gensum_param_access
*acc
= desc
->accesses
; acc
; acc
= acc
->next_sibling
)
770 dump_gensum_access (f
, acc
, 2);
773 /* Dump all parameter descriptors in IFS, assuming it describes FNDECL, to
777 dump_gensum_param_descriptors (FILE *f
, tree fndecl
,
778 vec
<gensum_param_desc
> *param_descriptions
)
780 tree parm
= DECL_ARGUMENTS (fndecl
);
782 i
< param_descriptions
->length ();
783 ++i
, parm
= DECL_CHAIN (parm
))
785 fprintf (f
, " Descriptor for parameter %i ", i
);
786 print_generic_expr (f
, parm
, TDF_UID
);
788 dump_gensum_param_descriptor (f
, &(*param_descriptions
)[i
]);
793 /* Dump DESC to F. If HINTS is true, also dump IPA-analysis computed
797 dump_isra_param_descriptor (FILE *f
, isra_param_desc
*desc
, bool hints
)
799 if (desc
->locally_unused
)
801 fprintf (f
, " (locally) unused\n");
803 if (!desc
->split_candidate
)
805 fprintf (f
, " not a candidate for splitting");
806 if (hints
&& desc
->by_ref
&& desc
->safe_size_set
)
807 fprintf (f
, ", safe_size: %u", (unsigned) desc
->safe_size
);
811 fprintf (f
, " param_size_limit: %u, size_reached: %u%s",
812 desc
->param_size_limit
, desc
->size_reached
,
813 desc
->by_ref
? ", by_ref" : "");
814 if (desc
->remove_only_when_retval_removed
)
815 fprintf (f
, ", remove_only_when_retval_removed");
816 if (desc
->split_only_when_retval_removed
)
817 fprintf (f
, ", split_only_when_retval_removed");
818 if (desc
->by_ref
&& desc
->conditionally_dereferenceable
)
819 fprintf (f
, ", conditionally_dereferenceable");
822 if (desc
->by_ref
&& !desc
->not_specially_constructed
)
823 fprintf (f
, ", args_specially_constructed");
824 if (desc
->by_ref
&& desc
->safe_size_set
)
825 fprintf (f
, ", safe_size: %u", (unsigned) desc
->safe_size
);
829 for (unsigned i
= 0; i
< vec_safe_length (desc
->accesses
); ++i
)
831 param_access
*access
= (*desc
->accesses
)[i
];
832 dump_isra_access (f
, access
);
836 /* Dump all parameter descriptors in IFS, assuming it describes FNDECL, to F.
837 If HINTS is true, also dump IPA-analysis computed hints. */
840 dump_isra_param_descriptors (FILE *f
, tree fndecl
, isra_func_summary
*ifs
,
843 tree parm
= DECL_ARGUMENTS (fndecl
);
844 if (!ifs
->m_parameters
)
846 fprintf (f
, " parameter descriptors not available\n");
851 i
< ifs
->m_parameters
->length ();
852 ++i
, parm
= DECL_CHAIN (parm
))
854 fprintf (f
, " Descriptor for parameter %i ", i
);
855 print_generic_expr (f
, parm
, TDF_UID
);
857 dump_isra_param_descriptor (f
, &(*ifs
->m_parameters
)[i
], hints
);
861 /* Add SRC to inputs of PARAM_FLOW, unless it would exceed storage. If the
862 function fails return false, otherwise return true. SRC must fit into an
863 unsigned char. Used for purposes of transitive unused parameter
867 add_src_to_param_flow (isra_param_flow
*param_flow
, int src
)
869 gcc_checking_assert (src
>= 0 && src
<= UCHAR_MAX
);
870 if (param_flow
->length
== IPA_SRA_MAX_PARAM_FLOW_LEN
)
873 param_flow
->inputs
[(int) param_flow
->length
] = src
;
874 param_flow
->length
++;
878 /* Add a SRC to the inputs of PARAM_FLOW unless it is already there and assert
879 it is the only input. Used for purposes of transitive parameter
883 set_single_param_flow_source (isra_param_flow
*param_flow
, int src
)
885 gcc_checking_assert (src
>= 0 && src
<= UCHAR_MAX
);
886 if (param_flow
->length
== 0)
888 param_flow
->inputs
[0] = src
;
889 param_flow
->length
= 1;
891 else if (param_flow
->length
== 1)
892 gcc_assert (param_flow
->inputs
[0] == src
);
897 /* Assert that there is only a single value in PARAM_FLOW's inputs and return
901 get_single_param_flow_source (const isra_param_flow
*param_flow
)
903 gcc_assert (param_flow
->length
== 1);
904 return param_flow
->inputs
[0];
907 /* Inspect all uses of NAME and simple arithmetic calculations involving NAME
908 in FUN represented with NODE and return a negative number if any of them is
909 used for something else than either an actual call argument, simple return,
910 simple arithmetic operation or debug statement. If there are no such uses,
911 return the number of actual arguments that this parameter eventually feeds
912 to (or zero if there is none). If there are any simple return uses, set
913 DESC->remove_only_when_retval_removed. For any such parameter, mark
914 PARM_NUM as one of its sources. ANALYZED is a bitmap that tracks which SSA
915 names we have already started investigating. */
918 isra_track_scalar_value_uses (function
*fun
, cgraph_node
*node
, tree name
,
919 int parm_num
, bitmap analyzed
,
920 gensum_param_desc
*desc
)
923 imm_use_iterator imm_iter
;
926 FOR_EACH_IMM_USE_STMT (stmt
, imm_iter
, name
)
928 if (is_gimple_debug (stmt
)
929 || gimple_clobber_p (stmt
))
932 /* TODO: We could handle at least const builtin functions like arithmetic
934 if (is_gimple_call (stmt
))
938 FOR_EACH_IMM_USE_ON_STMT (use_p
, imm_iter
)
941 gcall
*call
= as_a
<gcall
*> (stmt
);
943 if (gimple_call_internal_p (call
)
944 || (arg_count
= gimple_call_num_args (call
)) == 0)
950 cgraph_edge
*cs
= node
->get_edge (stmt
);
951 gcc_checking_assert (cs
);
952 isra_call_summary
*csum
= call_sums
->get_create (cs
);
953 csum
->init_inputs (arg_count
);
956 for (unsigned i
= 0; i
< arg_count
; i
++)
957 if (gimple_call_arg (call
, i
) == name
)
959 if (!add_src_to_param_flow (&csum
->m_arg_flow
[i
], parm_num
))
968 || all_uses
!= simple_uses
)
975 else if (!stmt_unremovable_because_of_non_call_eh_p (fun
, stmt
)
976 && ((is_gimple_assign (stmt
) && !gimple_has_volatile_ops (stmt
))
977 || gimple_code (stmt
) == GIMPLE_PHI
))
980 if (gimple_code (stmt
) == GIMPLE_PHI
)
981 lhs
= gimple_phi_result (stmt
);
983 lhs
= gimple_assign_lhs (stmt
);
985 if (TREE_CODE (lhs
) != SSA_NAME
)
990 gcc_assert (!gimple_vdef (stmt
));
991 if (bitmap_set_bit (analyzed
, SSA_NAME_VERSION (lhs
)))
993 int tmp
= isra_track_scalar_value_uses (fun
, node
, lhs
, parm_num
,
1003 else if (greturn
*gr
= dyn_cast
<greturn
*>(stmt
))
1005 tree rv
= gimple_return_retval (gr
);
1011 desc
->remove_only_when_retval_removed
= true;
1022 /* Inspect all uses of PARM, which must be a gimple register, in FUN (which is
1023 also described by NODE) and simple arithmetic calculations involving PARM
1024 and return false if any of them is used for something else than either an
1025 actual call argument, simple return, simple arithmetic operation or debug
1026 statement. If there are no such uses, return true and store the number of
1027 actual arguments that this parameter eventually feeds to (or zero if there
1028 is none) to DESC->call_uses and set DESC->remove_only_when_retval_removed if
1029 there are any uses in return statemens. For any such parameter, mark
1030 PARM_NUM as one of its sources.
1032 This function is similar to ptr_parm_has_nonarg_uses but its results are
1033 meant for unused parameter removal, as opposed to splitting of parameters
1034 passed by reference or converting them to passed by value. */
1037 isra_track_scalar_param_local_uses (function
*fun
, cgraph_node
*node
, tree parm
,
1038 int parm_num
, gensum_param_desc
*desc
)
1040 gcc_checking_assert (is_gimple_reg (parm
));
1042 tree name
= ssa_default_def (fun
, parm
);
1043 if (!name
|| has_zero_uses (name
))
1045 desc
->call_uses
= 0;
1049 /* Edge summaries can only handle callers with fewer than 256 parameters. */
1050 if (parm_num
> UCHAR_MAX
)
1053 bitmap analyzed
= BITMAP_ALLOC (NULL
);
1054 int call_uses
= isra_track_scalar_value_uses (fun
, node
, name
, parm_num
,
1056 BITMAP_FREE (analyzed
);
1059 desc
->call_uses
= call_uses
;
1063 /* Scan immediate uses of a default definition SSA name of a parameter PARM and
1064 examine whether there are any nonarg uses that are not actual arguments or
1065 otherwise infeasible uses. If so, return true, otherwise return false.
1066 Create pass-through IPA flow records for any direct uses as argument calls
1067 and if returning false, store their number into DESC->ptr_pt_count. If
1068 removal of return value would still allow splitting, return true but set
1069 DESC->split_only_when_retval_removed. NODE and FUN must represent the
1070 function that is currently analyzed, PARM_NUM must be the index of the
1073 This function is similar to isra_track_scalar_param_local_uses but its
1074 results are meant for splitting of parameters passed by reference or turning
1075 them into bits passed by value, as opposed to generic unused parameter
1079 ptr_parm_has_nonarg_uses (cgraph_node
*node
, function
*fun
, tree parm
,
1080 int parm_num
, gensum_param_desc
*desc
)
1082 imm_use_iterator ui
;
1084 tree name
= ssa_default_def (fun
, parm
);
1086 unsigned pt_count
= 0;
1088 if (!name
|| has_zero_uses (name
))
1091 /* Edge summaries can only handle callers with fewer than 256 parameters. */
1092 if (parm_num
> UCHAR_MAX
)
1095 FOR_EACH_IMM_USE_STMT (stmt
, ui
, name
)
1097 unsigned uses_ok
= 0;
1098 use_operand_p use_p
;
1100 if (is_gimple_debug (stmt
)
1101 || gimple_clobber_p (stmt
))
1104 if (gimple_assign_single_p (stmt
))
1106 tree rhs
= gimple_assign_rhs1 (stmt
);
1107 if (!TREE_THIS_VOLATILE (rhs
))
1109 while (handled_component_p (rhs
))
1110 rhs
= TREE_OPERAND (rhs
, 0);
1111 if (TREE_CODE (rhs
) == MEM_REF
1112 && TREE_OPERAND (rhs
, 0) == name
1113 && integer_zerop (TREE_OPERAND (rhs
, 1))
1114 && types_compatible_p (TREE_TYPE (rhs
),
1115 TREE_TYPE (TREE_TYPE (name
))))
1119 else if (is_gimple_call (stmt
))
1121 gcall
*call
= as_a
<gcall
*> (stmt
);
1123 if (gimple_call_internal_p (call
)
1124 || (arg_count
= gimple_call_num_args (call
)) == 0)
1130 cgraph_edge
*cs
= node
->get_edge (stmt
);
1131 gcc_checking_assert (cs
);
1132 isra_call_summary
*csum
= call_sums
->get_create (cs
);
1133 csum
->init_inputs (arg_count
);
1135 for (unsigned i
= 0; i
< arg_count
; ++i
)
1137 tree arg
= gimple_call_arg (stmt
, i
);
1141 /* TODO: Allow &MEM_REF[name + offset] here,
1142 ipa_param_body_adjustments::modify_call_stmt has to be
1144 csum
->m_arg_flow
[i
].pointer_pass_through
= true;
1145 set_single_param_flow_source (&csum
->m_arg_flow
[i
], parm_num
);
1151 if (!TREE_THIS_VOLATILE (arg
))
1153 while (handled_component_p (arg
))
1154 arg
= TREE_OPERAND (arg
, 0);
1155 if (TREE_CODE (arg
) == MEM_REF
1156 && TREE_OPERAND (arg
, 0) == name
1157 && integer_zerop (TREE_OPERAND (arg
, 1))
1158 && types_compatible_p (TREE_TYPE (arg
),
1159 TREE_TYPE (TREE_TYPE (name
))))
1164 else if (greturn
*gr
= dyn_cast
<greturn
*>(stmt
))
1166 tree rv
= gimple_return_retval (gr
);
1170 /* Analysis for feasibility of removal must have already reached
1171 the conclusion that the flag must be set if it completed. */
1172 gcc_assert (!desc
->locally_unused
1173 || desc
->remove_only_when_retval_removed
);
1174 desc
->split_only_when_retval_removed
= true;
1178 /* If the number of valid uses does not match the number of
1179 uses in this stmt there is an unhandled use. */
1180 unsigned all_uses
= 0;
1181 FOR_EACH_IMM_USE_ON_STMT (use_p
, ui
)
1184 gcc_checking_assert (uses_ok
<= all_uses
);
1185 if (uses_ok
!= all_uses
)
1192 desc
->ptr_pt_count
= pt_count
;
1196 /* Initialize vector of parameter descriptors of NODE. Return true if there
1197 are any candidates for splitting or unused aggregate parameter removal (the
1198 function may return false if there are candidates for removal of register
1202 create_parameter_descriptors (cgraph_node
*node
,
1203 vec
<gensum_param_desc
> *param_descriptions
)
1205 function
*fun
= DECL_STRUCT_FUNCTION (node
->decl
);
1209 for (tree parm
= DECL_ARGUMENTS (node
->decl
);
1211 parm
= DECL_CHAIN (parm
), num
++)
1214 gensum_param_desc
*desc
= &(*param_descriptions
)[num
];
1215 /* param_descriptions vector is grown cleared in the caller. */
1216 desc
->param_number
= num
;
1217 decl2desc
->put (parm
, desc
);
1219 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1220 print_generic_expr (dump_file
, parm
, TDF_UID
);
1222 tree type
= TREE_TYPE (parm
);
1223 if (TREE_THIS_VOLATILE (parm
))
1225 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1226 fprintf (dump_file
, " not a candidate, is volatile\n");
1229 if (!is_gimple_reg_type (type
) && is_va_list_type (type
))
1231 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1232 fprintf (dump_file
, " not a candidate, is a va_list type\n");
1235 if (TREE_ADDRESSABLE (parm
))
1237 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1238 fprintf (dump_file
, " not a candidate, is addressable\n");
1241 if (TREE_ADDRESSABLE (type
))
1243 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1244 fprintf (dump_file
, " not a candidate, type cannot be split\n");
1248 if (is_gimple_reg (parm
)
1249 && !isra_track_scalar_param_local_uses (fun
, node
, parm
, num
, desc
))
1251 desc
->locally_unused
= true;
1253 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1254 fprintf (dump_file
, " is a scalar with only %i call uses%s\n",
1256 desc
->remove_only_when_retval_removed
1257 ? " and return uses": "");
1260 if (POINTER_TYPE_P (type
))
1262 desc
->by_ref
= true;
1263 if (TREE_CODE (type
) == REFERENCE_TYPE
1265 && TREE_CODE (TREE_TYPE (node
->decl
)) == METHOD_TYPE
))
1266 desc
->safe_ref
= true;
1268 desc
->safe_ref
= false;
1269 type
= TREE_TYPE (type
);
1271 if (TREE_CODE (type
) == FUNCTION_TYPE
1272 || TREE_CODE (type
) == METHOD_TYPE
)
1274 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1275 fprintf (dump_file
, " not a candidate, reference to "
1279 if (TYPE_VOLATILE (type
))
1281 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1282 fprintf (dump_file
, " not a candidate, reference to "
1283 "a volatile type\n");
1286 if (TREE_CODE (type
) == ARRAY_TYPE
1287 && TYPE_NONALIASED_COMPONENT (type
))
1289 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1290 fprintf (dump_file
, " not a candidate, reference to "
1291 "a nonaliased component array\n");
1294 if (!is_gimple_reg (parm
))
1296 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1297 fprintf (dump_file
, " not a candidate, a reference which is "
1298 "not a gimple register (probably addressable)\n");
1301 if (is_va_list_type (type
))
1303 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1304 fprintf (dump_file
, " not a candidate, reference to "
1308 if (ptr_parm_has_nonarg_uses (node
, fun
, parm
, num
, desc
))
1310 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1311 fprintf (dump_file
, " not a candidate, reference has "
1316 else if (!AGGREGATE_TYPE_P (type
))
1318 /* This is in an else branch because scalars passed by reference are
1319 still candidates to be passed by value. */
1320 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1321 fprintf (dump_file
, " not a candidate, not an aggregate\n");
1325 if (!COMPLETE_TYPE_P (type
))
1327 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1328 fprintf (dump_file
, " not a candidate, not a complete type\n");
1331 if (!tree_fits_uhwi_p (TYPE_SIZE (type
)))
1333 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1334 fprintf (dump_file
, " not a candidate, size not representable\n");
1337 unsigned HOST_WIDE_INT type_size
1338 = tree_to_uhwi (TYPE_SIZE (type
)) / BITS_PER_UNIT
;
1340 || type_size
>= ISRA_ARG_SIZE_LIMIT
)
1342 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1343 fprintf (dump_file
, " not a candidate, has zero or huge size\n");
1346 if (type_internals_preclude_sra_p (type
, &msg
))
1348 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1349 fprintf (dump_file
, " not a candidate, %s\n", msg
);
1353 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1354 fprintf (dump_file
, " is a candidate\n");
1357 desc
->split_candidate
= true;
1358 if (desc
->by_ref
&& !desc
->safe_ref
)
1359 desc
->deref_index
= unsafe_by_ref_count
++;
1364 /* Return pointer to descriptor of parameter DECL or NULL if it cannot be
1365 found, which happens if DECL is for a static chain. */
1367 static gensum_param_desc
*
1368 get_gensum_param_desc (tree decl
)
1372 gcc_checking_assert (TREE_CODE (decl
) == PARM_DECL
);
1373 gensum_param_desc
**slot
= decl2desc
->get (decl
);
1375 /* This can happen for static chains which we cannot handle so far. */
1377 gcc_checking_assert (*slot
);
1382 /* Remove parameter described by DESC from candidates for IPA-SRA splitting and
1383 write REASON to the dump file if there is one. */
1386 disqualify_split_candidate (gensum_param_desc
*desc
, const char *reason
)
1388 if (!desc
->split_candidate
)
1391 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1392 fprintf (dump_file
, "! Disqualifying parameter number %i - %s\n",
1393 desc
->param_number
, reason
);
1395 desc
->split_candidate
= false;
1398 /* Remove DECL from candidates for IPA-SRA and write REASON to the dump file if
1402 disqualify_split_candidate (tree decl
, const char *reason
)
1404 gensum_param_desc
*desc
= get_gensum_param_desc (decl
);
1406 disqualify_split_candidate (desc
, reason
);
1409 /* Allocate a new access to DESC and fill it in with OFFSET and SIZE. But
1410 first, check that there are not too many of them already. If so, do not
1411 allocate anything and return NULL. */
1413 static gensum_param_access
*
1414 allocate_access (gensum_param_desc
*desc
,
1415 HOST_WIDE_INT offset
, HOST_WIDE_INT size
)
1417 if (desc
->access_count
1418 == (unsigned) param_ipa_sra_max_replacements
)
1420 disqualify_split_candidate (desc
, "Too many replacement candidates");
1424 gensum_param_access
*access
1425 = (gensum_param_access
*) obstack_alloc (&gensum_obstack
,
1426 sizeof (gensum_param_access
));
1427 memset (access
, 0, sizeof (*access
));
1428 access
->offset
= offset
;
1429 access
->size
= size
;
1430 access
->load_count
= profile_count::zero ();
1434 /* In what context scan_expr_access has been called, whether it deals with a
1435 load, a function argument, or a store. Please note that in rare
1436 circumstances when it is not clear if the access is a load or store,
1437 ISRA_CTX_STORE is used too. */
1439 enum isra_scan_context
{ISRA_CTX_LOAD
, ISRA_CTX_ARG
, ISRA_CTX_STORE
};
1441 /* Return an access describing memory access to the variable described by DESC
1442 at OFFSET with SIZE in context CTX, starting at pointer to the linked list
1443 at a certain tree level FIRST. Attempt to create it and put into the
1444 appropriate place in the access tree if does not exist, but fail and return
1445 NULL if there are already too many accesses, if it would create a partially
1446 overlapping access or if an access would end up within a pre-existing
1449 static gensum_param_access
*
1450 get_access_1 (gensum_param_desc
*desc
, gensum_param_access
**first
,
1451 HOST_WIDE_INT offset
, HOST_WIDE_INT size
, isra_scan_context ctx
)
1453 gensum_param_access
*access
= *first
, **ptr
= first
;
1457 /* No pre-existing access at this level, just create it. */
1458 gensum_param_access
*a
= allocate_access (desc
, offset
, size
);
1465 if (access
->offset
>= offset
+ size
)
1467 /* We want to squeeze it in front of the very first access, just do
1469 gensum_param_access
*r
= allocate_access (desc
, offset
, size
);
1472 r
->next_sibling
= access
;
1477 /* Skip all accesses that have to come before us until the next sibling is
1479 while (offset
>= access
->offset
+ access
->size
1480 && access
->next_sibling
1481 && access
->next_sibling
->offset
< offset
+ size
)
1483 ptr
= &access
->next_sibling
;
1484 access
= access
->next_sibling
;
1487 /* At this point we know we do not belong before access. */
1488 gcc_assert (access
->offset
< offset
+ size
);
1490 if (access
->offset
== offset
&& access
->size
== size
)
1491 /* We found what we were looking for. */
1494 if (access
->offset
<= offset
1495 && access
->offset
+ access
->size
>= offset
+ size
)
1497 /* We fit into access which is larger than us. We need to find/create
1498 something below access. But we only allow nesting in call
1503 return get_access_1 (desc
, &access
->first_child
, offset
, size
, ctx
);
1506 if (offset
<= access
->offset
1507 && offset
+ size
>= access
->offset
+ access
->size
)
1508 /* We are actually bigger than access, which fully fits into us, take its
1509 place and make all accesses fitting into it its children. */
1511 /* But first, we only allow nesting in call arguments so check if that is
1512 what we are trying to represent. */
1513 if (ctx
!= ISRA_CTX_ARG
)
1516 gensum_param_access
*r
= allocate_access (desc
, offset
, size
);
1519 r
->first_child
= access
;
1521 while (access
->next_sibling
1522 && access
->next_sibling
->offset
< offset
+ size
)
1523 access
= access
->next_sibling
;
1524 if (access
->offset
+ access
->size
> offset
+ size
)
1526 /* This must be a different access, which are sorted, so the
1527 following must be true and this signals a partial overlap. */
1528 gcc_assert (access
->offset
> offset
);
1532 r
->next_sibling
= access
->next_sibling
;
1533 access
->next_sibling
= NULL
;
1538 if (offset
>= access
->offset
+ access
->size
)
1540 /* We belong after access. */
1541 gensum_param_access
*r
= allocate_access (desc
, offset
, size
);
1544 r
->next_sibling
= access
->next_sibling
;
1545 access
->next_sibling
= r
;
1549 if (offset
< access
->offset
)
1551 /* We know the following, otherwise we would have created a
1553 gcc_checking_assert (offset
+ size
< access
->offset
+ access
->size
);
1557 if (offset
+ size
> access
->offset
+ access
->size
)
1560 gcc_checking_assert (offset
> access
->offset
);
1567 /* Return an access describing memory access to the variable described by DESC
1568 at OFFSET with SIZE in context CTX, mark it as used in context CTX. Attempt
1569 to create if it does not exist, but fail and return NULL if there are
1570 already too many accesses, if it would create a partially overlapping access
1571 or if an access would end up in a non-call access. */
1573 static gensum_param_access
*
1574 get_access (gensum_param_desc
*desc
, HOST_WIDE_INT offset
, HOST_WIDE_INT size
,
1575 isra_scan_context ctx
)
1577 gcc_checking_assert (desc
->split_candidate
);
1579 gensum_param_access
*access
= get_access_1 (desc
, &desc
->accesses
, offset
,
1583 disqualify_split_candidate (desc
,
1584 "Bad access overlap or too many accesses");
1590 case ISRA_CTX_STORE
:
1591 gcc_assert (!desc
->by_ref
);
1594 access
->nonarg
= true;
1603 /* Verify that parameter access tree starting with ACCESS is in good shape.
1604 PARENT_OFFSET and PARENT_SIZE are the corresponding fields of parent of
1605 ACCESS or zero if there is none. */
1608 verify_access_tree_1 (gensum_param_access
*access
, HOST_WIDE_INT parent_offset
,
1609 HOST_WIDE_INT parent_size
)
1613 gcc_assert (access
->offset
>= 0 && access
->size
>= 0);
1615 if (parent_size
!= 0)
1617 if (access
->offset
< parent_offset
)
1619 error ("Access offset before parent offset");
1622 if (access
->size
>= parent_size
)
1624 error ("Access size greater or equal to its parent size");
1627 if (access
->offset
+ access
->size
> parent_offset
+ parent_size
)
1629 error ("Access terminates outside of its parent");
1634 if (verify_access_tree_1 (access
->first_child
, access
->offset
,
1638 if (access
->next_sibling
1639 && (access
->next_sibling
->offset
< access
->offset
+ access
->size
))
1641 error ("Access overlaps with its sibling");
1645 access
= access
->next_sibling
;
1650 /* Verify that parameter access tree starting with ACCESS is in good shape,
1651 halt compilation and dump the tree to stderr if not. */
1654 isra_verify_access_tree (gensum_param_access
*access
)
1656 if (verify_access_tree_1 (access
, 0, 0))
1658 for (; access
; access
= access
->next_sibling
)
1659 dump_gensum_access (stderr
, access
, 2);
1660 internal_error ("IPA-SRA access verification failed");
1665 /* Callback of walk_stmt_load_store_addr_ops visit_addr used to determine
1666 GIMPLE_ASM operands with memory constrains which cannot be scalarized. */
1669 asm_visit_addr (gimple
*, tree op
, tree
, void *)
1671 op
= get_base_address (op
);
1673 && TREE_CODE (op
) == PARM_DECL
)
1674 disqualify_split_candidate (op
, "Non-scalarizable GIMPLE_ASM operand.");
1679 /* Mark a dereference of parameter identified by DESC of distance DIST in a
1680 basic block BB, unless the BB has already been marked as a potentially
1684 mark_param_dereference (gensum_param_desc
*desc
, HOST_WIDE_INT dist
,
1687 gcc_assert (desc
->by_ref
);
1688 gcc_checking_assert (desc
->split_candidate
);
1691 || bitmap_bit_p (final_bbs
, bb
->index
))
1694 int idx
= bb
->index
* unsafe_by_ref_count
+ desc
->deref_index
;
1695 if (bb_dereferences
[idx
] < dist
)
1696 bb_dereferences
[idx
] = dist
;
1699 /* Return true, if any potential replacements should use NEW_TYPE as opposed to
1700 previously recorded OLD_TYPE. */
1703 type_prevails_p (tree old_type
, tree new_type
)
1705 if (old_type
== new_type
)
1708 /* Non-aggregates are always better. */
1709 if (!is_gimple_reg_type (old_type
)
1710 && is_gimple_reg_type (new_type
))
1712 if (is_gimple_reg_type (old_type
)
1713 && !is_gimple_reg_type (new_type
))
1716 /* Prefer any complex or vector type over any other scalar type. */
1717 if (TREE_CODE (old_type
) != COMPLEX_TYPE
1718 && TREE_CODE (old_type
) != VECTOR_TYPE
1719 && (TREE_CODE (new_type
) == COMPLEX_TYPE
1720 || VECTOR_TYPE_P (new_type
)))
1722 if ((TREE_CODE (old_type
) == COMPLEX_TYPE
1723 || VECTOR_TYPE_P (old_type
))
1724 && TREE_CODE (new_type
) != COMPLEX_TYPE
1725 && TREE_CODE (new_type
) != VECTOR_TYPE
)
1728 /* Use the integral type with the bigger precision. */
1729 if (INTEGRAL_TYPE_P (old_type
)
1730 && INTEGRAL_TYPE_P (new_type
))
1731 return (TYPE_PRECISION (new_type
) > TYPE_PRECISION (old_type
));
1733 /* Attempt to disregard any integral type with non-full precision. */
1734 if (INTEGRAL_TYPE_P (old_type
)
1735 && (TREE_INT_CST_LOW (TYPE_SIZE (old_type
))
1736 != TYPE_PRECISION (old_type
)))
1738 if (INTEGRAL_TYPE_P (new_type
)
1739 && (TREE_INT_CST_LOW (TYPE_SIZE (new_type
))
1740 != TYPE_PRECISION (new_type
)))
1742 /* Stabilize the selection. */
1743 return TYPE_UID (old_type
) < TYPE_UID (new_type
);
1746 /* When scanning an expression which is a call argument, this structure
1747 specifies the call and the position of the argument. */
1749 struct scan_call_info
1751 /* Call graph edge representing the call. */
1753 /* Total number of arguments in the call. */
1754 unsigned argument_count
;
1755 /* Number of the actual argument being scanned. */
1759 /* Record use of ACCESS which belongs to a parameter described by DESC in a
1760 call argument described by CALL_INFO. */
1763 record_nonregister_call_use (gensum_param_desc
*desc
,
1764 scan_call_info
*call_info
,
1765 unsigned unit_offset
, unsigned unit_size
)
1767 isra_call_summary
*csum
= call_sums
->get_create (call_info
->cs
);
1768 csum
->init_inputs (call_info
->argument_count
);
1770 isra_param_flow
*param_flow
= &csum
->m_arg_flow
[call_info
->arg_idx
];
1771 param_flow
->aggregate_pass_through
= true;
1772 set_single_param_flow_source (param_flow
, desc
->param_number
);
1773 param_flow
->unit_offset
= unit_offset
;
1774 param_flow
->unit_size
= unit_size
;
1778 /* Callback of walk_aliased_vdefs, just mark that there was a possible
1782 mark_maybe_modified (ao_ref
*, tree
, void *data
)
1784 bool *maybe_modified
= (bool *) data
;
1785 *maybe_modified
= true;
1789 /* Analyze expression EXPR from GIMPLE for accesses to parameters. CTX
1790 specifies whether EXPR is used in a load, store or as an argument call. BB
1791 must be the basic block in which expr resides. If CTX specifies call
1792 argument context, CALL_INFO must describe that call and argument position,
1793 otherwise it is ignored. */
1796 scan_expr_access (tree expr
, gimple
*stmt
, isra_scan_context ctx
,
1797 basic_block bb
, scan_call_info
*call_info
= NULL
)
1799 poly_int64 poffset
, psize
, pmax_size
;
1800 HOST_WIDE_INT offset
, size
, max_size
;
1805 if (TREE_CODE (expr
) == ADDR_EXPR
)
1807 if (ctx
== ISRA_CTX_ARG
)
1809 tree t
= get_base_address (TREE_OPERAND (expr
, 0));
1810 if (VAR_P (t
) && !TREE_STATIC (t
))
1811 loaded_decls
->add (t
);
1814 if (TREE_CODE (expr
) == SSA_NAME
1815 || CONSTANT_CLASS_P (expr
))
1818 if (TREE_CODE (expr
) == BIT_FIELD_REF
1819 || TREE_CODE (expr
) == IMAGPART_EXPR
1820 || TREE_CODE (expr
) == REALPART_EXPR
)
1821 expr
= TREE_OPERAND (expr
, 0);
1823 base
= get_ref_base_and_extent (expr
, &poffset
, &psize
, &pmax_size
, &reverse
);
1825 if (TREE_CODE (base
) == MEM_REF
)
1827 tree op
= TREE_OPERAND (base
, 0);
1828 if (TREE_CODE (op
) != SSA_NAME
1829 || !SSA_NAME_IS_DEFAULT_DEF (op
))
1831 base
= SSA_NAME_VAR (op
);
1836 else if (VAR_P (base
)
1837 && !TREE_STATIC (base
)
1838 && (ctx
== ISRA_CTX_ARG
1839 || ctx
== ISRA_CTX_LOAD
))
1840 loaded_decls
->add (base
);
1842 if (TREE_CODE (base
) != PARM_DECL
)
1845 gensum_param_desc
*desc
= get_gensum_param_desc (base
);
1846 if (!desc
|| !desc
->split_candidate
)
1849 if (!poffset
.is_constant (&offset
)
1850 || !psize
.is_constant (&size
)
1851 || !pmax_size
.is_constant (&max_size
))
1853 disqualify_split_candidate (desc
, "Encountered a polynomial-sized "
1857 if (size
< 0 || size
!= max_size
)
1859 disqualify_split_candidate (desc
, "Encountered a variable sized access.");
1862 if (TREE_CODE (expr
) == COMPONENT_REF
1863 && DECL_BIT_FIELD (TREE_OPERAND (expr
, 1)))
1865 disqualify_split_candidate (desc
, "Encountered a bit-field access.");
1870 disqualify_split_candidate (desc
, "Encountered an access at a "
1871 "negative offset.");
1874 gcc_assert ((offset
% BITS_PER_UNIT
) == 0);
1875 gcc_assert ((size
% BITS_PER_UNIT
) == 0);
1876 if ((offset
/ BITS_PER_UNIT
) >= (UINT_MAX
- ISRA_ARG_SIZE_LIMIT
)
1877 || (size
/ BITS_PER_UNIT
) >= ISRA_ARG_SIZE_LIMIT
)
1879 disqualify_split_candidate (desc
, "Encountered an access with too big "
1884 tree type
= TREE_TYPE (expr
);
1885 unsigned int exp_align
= get_object_alignment (expr
);
1887 if (exp_align
< TYPE_ALIGN (type
))
1889 disqualify_split_candidate (desc
, "Underaligned access.");
1897 disqualify_split_candidate (desc
, "Dereferencing a non-reference.");
1900 else if (ctx
== ISRA_CTX_STORE
)
1902 disqualify_split_candidate (desc
, "Storing to data passed by "
1907 if (!aa_walking_limit
)
1909 disqualify_split_candidate (desc
, "Out of alias analysis step "
1914 gcc_checking_assert (gimple_vuse (stmt
));
1915 bool maybe_modified
= false;
1918 ao_ref_init (&ar
, expr
);
1919 bitmap visited
= BITMAP_ALLOC (NULL
);
1920 int walked
= walk_aliased_vdefs (&ar
, gimple_vuse (stmt
),
1921 mark_maybe_modified
, &maybe_modified
,
1922 &visited
, NULL
, aa_walking_limit
);
1923 BITMAP_FREE (visited
);
1926 gcc_assert (aa_walking_limit
> walked
);
1927 aa_walking_limit
= aa_walking_limit
- walked
;
1930 aa_walking_limit
= 0;
1931 if (maybe_modified
|| walked
< 0)
1933 disqualify_split_candidate (desc
, "Data passed by reference possibly "
1934 "modified through an alias.");
1938 mark_param_dereference (desc
, offset
+ size
, bb
);
1941 /* Pointer parameters with direct uses should have been ruled out by
1942 analyzing SSA default def when looking at the parameters. */
1943 gcc_assert (!desc
->by_ref
);
1945 gensum_param_access
*access
= get_access (desc
, offset
, size
, ctx
);
1949 if (ctx
== ISRA_CTX_ARG
)
1951 gcc_checking_assert (call_info
);
1954 record_nonregister_call_use (desc
, call_info
, offset
/ BITS_PER_UNIT
,
1955 size
/ BITS_PER_UNIT
);
1957 /* This is not a pass-through of a pointer, this is a use like any
1959 access
->nonarg
= true;
1961 else if (ctx
== ISRA_CTX_LOAD
&& bb
->count
.initialized_p ())
1962 access
->load_count
+= bb
->count
;
1966 access
->type
= type
;
1967 access
->alias_ptr_type
= reference_alias_ptr_type (expr
);
1968 access
->reverse
= reverse
;
1972 if (exp_align
< TYPE_ALIGN (access
->type
))
1974 disqualify_split_candidate (desc
, "Reference has lower alignment "
1975 "than a previous one.");
1978 if (access
->alias_ptr_type
!= reference_alias_ptr_type (expr
))
1980 disqualify_split_candidate (desc
, "Multiple alias pointer types.");
1983 if (access
->reverse
!= reverse
)
1985 disqualify_split_candidate (desc
, "Both normal and reverse "
1986 "scalar storage order.");
1990 && (AGGREGATE_TYPE_P (type
) || AGGREGATE_TYPE_P (access
->type
))
1991 && (TYPE_MAIN_VARIANT (access
->type
) != TYPE_MAIN_VARIANT (type
)))
1993 /* We need the same aggregate type on all accesses to be able to
1994 distinguish transformation spots from pass-through arguments in
1995 the transformation phase. */
1996 disqualify_split_candidate (desc
, "We do not support aggregate "
2001 if (type_prevails_p (access
->type
, type
))
2002 access
->type
= type
;
2006 /* Scan body function described by NODE and FUN and create access trees for
2010 scan_function (cgraph_node
*node
, struct function
*fun
)
2014 FOR_EACH_BB_FN (bb
, fun
)
2016 gimple_stmt_iterator gsi
;
2017 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2019 gimple
*stmt
= gsi_stmt (gsi
);
2021 if (final_bbs
&& stmt_can_throw_external (fun
, stmt
))
2022 bitmap_set_bit (final_bbs
, bb
->index
);
2023 switch (gimple_code (stmt
))
2027 tree t
= gimple_return_retval (as_a
<greturn
*> (stmt
));
2029 scan_expr_access (t
, stmt
, ISRA_CTX_LOAD
, bb
);
2031 bitmap_set_bit (final_bbs
, bb
->index
);
2036 if (gimple_assign_single_p (stmt
)
2037 && !gimple_clobber_p (stmt
))
2039 tree rhs
= gimple_assign_rhs1 (stmt
);
2040 scan_expr_access (rhs
, stmt
, ISRA_CTX_LOAD
, bb
);
2041 tree lhs
= gimple_assign_lhs (stmt
);
2042 scan_expr_access (lhs
, stmt
, ISRA_CTX_STORE
, bb
);
2048 unsigned argument_count
= gimple_call_num_args (stmt
);
2049 isra_scan_context ctx
= ISRA_CTX_ARG
;
2050 scan_call_info call_info
, *call_info_p
= &call_info
;
2051 if (gimple_call_internal_p (stmt
))
2054 ctx
= ISRA_CTX_LOAD
;
2055 internal_fn ifn
= gimple_call_internal_fn (stmt
);
2056 if (internal_store_fn_p (ifn
))
2057 ctx
= ISRA_CTX_STORE
;
2061 call_info
.cs
= node
->get_edge (stmt
);
2062 call_info
.argument_count
= argument_count
;
2065 for (unsigned i
= 0; i
< argument_count
; i
++)
2067 call_info
.arg_idx
= i
;
2068 scan_expr_access (gimple_call_arg (stmt
, i
), stmt
,
2069 ctx
, bb
, call_info_p
);
2072 tree lhs
= gimple_call_lhs (stmt
);
2074 scan_expr_access (lhs
, stmt
, ISRA_CTX_STORE
, bb
);
2075 int flags
= gimple_call_flags (stmt
);
2077 && (((flags
& (ECF_CONST
| ECF_PURE
)) == 0)
2078 || (flags
& ECF_LOOPING_CONST_OR_PURE
)))
2079 bitmap_set_bit (final_bbs
, bb
->index
);
2085 gasm
*asm_stmt
= as_a
<gasm
*> (stmt
);
2086 walk_stmt_load_store_addr_ops (asm_stmt
, NULL
, NULL
, NULL
,
2089 bitmap_set_bit (final_bbs
, bb
->index
);
2091 for (unsigned i
= 0; i
< gimple_asm_ninputs (asm_stmt
); i
++)
2093 tree t
= TREE_VALUE (gimple_asm_input_op (asm_stmt
, i
));
2094 scan_expr_access (t
, stmt
, ISRA_CTX_LOAD
, bb
);
2096 for (unsigned i
= 0; i
< gimple_asm_noutputs (asm_stmt
); i
++)
2098 tree t
= TREE_VALUE (gimple_asm_output_op (asm_stmt
, i
));
2099 scan_expr_access (t
, stmt
, ISRA_CTX_STORE
, bb
);
2111 /* Return true if SSA_NAME NAME of function described by FUN is only used in
2112 return statements, or if results of any operations it is involved in are
2113 only used in return statements. ANALYZED is a bitmap that tracks which SSA
2114 names we have already started investigating. */
2117 ssa_name_only_returned_p (function
*fun
, tree name
, bitmap analyzed
)
2120 imm_use_iterator imm_iter
;
2123 FOR_EACH_IMM_USE_STMT (stmt
, imm_iter
, name
)
2125 if (is_gimple_debug (stmt
))
2128 if (gimple_code (stmt
) == GIMPLE_RETURN
)
2130 tree t
= gimple_return_retval (as_a
<greturn
*> (stmt
));
2137 else if (!stmt_unremovable_because_of_non_call_eh_p (fun
, stmt
)
2138 && ((is_gimple_assign (stmt
) && !gimple_has_volatile_ops (stmt
))
2139 || gimple_code (stmt
) == GIMPLE_PHI
))
2141 /* TODO: And perhaps for const function calls too? */
2143 if (gimple_code (stmt
) == GIMPLE_PHI
)
2144 lhs
= gimple_phi_result (stmt
);
2146 lhs
= gimple_assign_lhs (stmt
);
2148 if (TREE_CODE (lhs
) != SSA_NAME
)
2153 gcc_assert (!gimple_vdef (stmt
));
2154 if (bitmap_set_bit (analyzed
, SSA_NAME_VERSION (lhs
))
2155 && !ssa_name_only_returned_p (fun
, lhs
, analyzed
))
2170 /* Inspect the uses of the return value of the call associated with CS, and if
2171 it is not used or if it is only used to construct the return value of the
2172 caller, mark it as such in call or caller summary. Also check for
2173 misaligned arguments. */
2176 isra_analyze_call (cgraph_edge
*cs
)
2178 gcall
*call_stmt
= cs
->call_stmt
;
2179 unsigned count
= gimple_call_num_args (call_stmt
);
2180 isra_call_summary
*csum
= call_sums
->get_create (cs
);
2182 for (unsigned i
= 0; i
< count
; i
++)
2184 tree arg
= gimple_call_arg (call_stmt
, i
);
2185 if (TREE_CODE (arg
) == ADDR_EXPR
)
2187 poly_int64 poffset
, psize
, pmax_size
;
2189 tree base
= get_ref_base_and_extent (TREE_OPERAND (arg
, 0), &poffset
,
2190 &psize
, &pmax_size
, &reverse
);
2191 HOST_WIDE_INT offset
;
2192 unsigned HOST_WIDE_INT ds
;
2194 && (poffset
.is_constant (&offset
))
2195 && tree_fits_uhwi_p (DECL_SIZE (base
))
2196 && ((ds
= tree_to_uhwi (DECL_SIZE (base
)) - offset
)
2197 < ISRA_ARG_SIZE_LIMIT
* BITS_PER_UNIT
))
2199 csum
->init_inputs (count
);
2200 gcc_assert (!csum
->m_arg_flow
[i
].aggregate_pass_through
);
2201 csum
->m_arg_flow
[i
].unit_size
= ds
/ BITS_PER_UNIT
;
2204 if (TREE_CODE (base
) == VAR_DECL
2205 && !TREE_STATIC (base
)
2206 && !loaded_decls
->contains (base
))
2208 csum
->init_inputs (count
);
2209 csum
->m_arg_flow
[i
].constructed_for_calls
= true;
2213 if (is_gimple_reg (arg
))
2217 poly_int64 bitsize
, bitpos
;
2219 int unsignedp
, reversep
, volatilep
= 0;
2220 get_inner_reference (arg
, &bitsize
, &bitpos
, &offset
, &mode
,
2221 &unsignedp
, &reversep
, &volatilep
);
2222 if (!multiple_p (bitpos
, BITS_PER_UNIT
))
2224 csum
->m_bit_aligned_arg
= true;
2229 tree lhs
= gimple_call_lhs (call_stmt
);
2232 /* TODO: Also detect aggregates on a LHS of a call that are only returned
2233 from this function (without being read anywhere). */
2234 if (TREE_CODE (lhs
) == SSA_NAME
)
2236 bitmap analyzed
= BITMAP_ALLOC (NULL
);
2237 if (ssa_name_only_returned_p (DECL_STRUCT_FUNCTION (cs
->caller
->decl
),
2239 csum
->m_return_returned
= true;
2240 BITMAP_FREE (analyzed
);
2244 csum
->m_return_ignored
= true;
2247 /* Look at all calls going out of NODE, described also by IFS and perform all
2248 analyses necessary for IPA-SRA that are not done at body scan time or done
2249 even when body is not scanned because the function is not a candidate. */
2252 isra_analyze_all_outgoing_calls (cgraph_node
*node
)
2254 for (cgraph_edge
*cs
= node
->callees
; cs
; cs
= cs
->next_callee
)
2255 isra_analyze_call (cs
);
2256 for (cgraph_edge
*cs
= node
->indirect_calls
; cs
; cs
= cs
->next_callee
)
2257 isra_analyze_call (cs
);
2260 /* Dump a dereferences table with heading STR to file F. */
2263 dump_dereferences_table (FILE *f
, struct function
*fun
, const char *str
)
2267 fprintf (dump_file
, "%s", str
);
2268 FOR_BB_BETWEEN (bb
, ENTRY_BLOCK_PTR_FOR_FN (fun
),
2269 EXIT_BLOCK_PTR_FOR_FN (fun
), next_bb
)
2271 fprintf (f
, "%4i %i ", bb
->index
, bitmap_bit_p (final_bbs
, bb
->index
));
2272 if (bb
!= EXIT_BLOCK_PTR_FOR_FN (fun
))
2275 for (i
= 0; i
< unsafe_by_ref_count
; i
++)
2277 int idx
= bb
->index
* unsafe_by_ref_count
+ i
;
2278 fprintf (f
, " %4" HOST_WIDE_INT_PRINT
"d", bb_dereferences
[idx
]);
2283 fprintf (dump_file
, "\n");
2286 /* Propagate distances in bb_dereferences in the opposite direction than the
2287 control flow edges, in each step storing the maximum of the current value
2288 and the minimum of all successors. These steps are repeated until the table
2289 stabilizes. Note that BBs which might terminate the functions (according to
2290 final_bbs bitmap) never updated in this way. */
2293 propagate_dereference_distances (struct function
*fun
)
2297 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2298 dump_dereferences_table (dump_file
, fun
,
2299 "Dereference table before propagation:\n");
2301 auto_vec
<basic_block
> queue (last_basic_block_for_fn (fun
));
2302 queue
.quick_push (ENTRY_BLOCK_PTR_FOR_FN (fun
));
2303 FOR_EACH_BB_FN (bb
, fun
)
2305 queue
.quick_push (bb
);
2309 while (!queue
.is_empty ())
2313 bool change
= false;
2319 if (bitmap_bit_p (final_bbs
, bb
->index
))
2322 for (i
= 0; i
< unsafe_by_ref_count
; i
++)
2324 int idx
= bb
->index
* unsafe_by_ref_count
+ i
;
2326 HOST_WIDE_INT inh
= 0;
2328 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
2330 int succ_idx
= e
->dest
->index
* unsafe_by_ref_count
+ i
;
2332 if (e
->dest
== EXIT_BLOCK_PTR_FOR_FN (fun
))
2338 inh
= bb_dereferences
[succ_idx
];
2340 else if (bb_dereferences
[succ_idx
] < inh
)
2341 inh
= bb_dereferences
[succ_idx
];
2344 if (!first
&& bb_dereferences
[idx
] < inh
)
2346 bb_dereferences
[idx
] = inh
;
2352 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
2357 e
->src
->aux
= e
->src
;
2358 queue
.quick_push (e
->src
);
2362 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2363 dump_dereferences_table (dump_file
, fun
,
2364 "Dereference table after propagation:\n");
2367 /* Return true if the ACCESS loads happen frequently enough in FUN to risk
2368 moving them to the caller and only pass the result. */
2371 dereference_probable_p (struct function
*fun
, gensum_param_access
*access
)
2373 int threshold
= opt_for_fn (fun
->decl
, param_ipa_sra_deref_prob_threshold
);
2374 return access
->load_count
2375 >= ENTRY_BLOCK_PTR_FOR_FN (fun
)->count
.apply_scale (threshold
, 100);
2378 /* Perform basic checks on ACCESS to PARM (of FUN) described by DESC and all
2379 its children, return true if the parameter cannot be split, otherwise return
2380 false and update *NONARG_ACC_SIZE and *ONLY_CALLS. ENTRY_BB_INDEX must be
2381 the index of the entry BB in the function of PARM. */
2384 check_gensum_access (struct function
*fun
, tree parm
, gensum_param_desc
*desc
,
2385 gensum_param_access
*access
,
2386 HOST_WIDE_INT
*nonarg_acc_size
, bool *only_calls
,
2391 *only_calls
= false;
2392 *nonarg_acc_size
+= access
->size
;
2394 if (access
->first_child
)
2396 disqualify_split_candidate (desc
, "Overlapping non-call uses.");
2400 /* Do not decompose a non-BLKmode param in a way that would create
2401 BLKmode params. Especially for by-reference passing (thus,
2402 pointer-type param) this is hardly worthwhile. */
2403 if (DECL_MODE (parm
) != BLKmode
2404 && TYPE_MODE (access
->type
) == BLKmode
)
2406 disqualify_split_candidate (desc
, "Would convert a non-BLK to a BLK.");
2414 if (!dereference_probable_p (fun
, access
))
2416 disqualify_split_candidate (desc
, "Dereferences in callers "
2417 "would happen much more frequently.");
2423 int idx
= (entry_bb_index
* unsafe_by_ref_count
+ desc
->deref_index
);
2424 if ((access
->offset
+ access
->size
) > bb_dereferences
[idx
])
2426 if (!dereference_probable_p (fun
, access
))
2428 disqualify_split_candidate (desc
, "Would create a possibly "
2429 "illegal dereference in a "
2433 desc
->conditionally_dereferenceable
= true;
2438 for (gensum_param_access
*ch
= access
->first_child
;
2440 ch
= ch
->next_sibling
)
2441 if (check_gensum_access (fun
, parm
, desc
, ch
, nonarg_acc_size
, only_calls
,
2448 /* Copy data from FROM and all of its children to a vector of accesses in IPA
2452 copy_accesses_to_ipa_desc (gensum_param_access
*from
, isra_param_desc
*desc
)
2454 param_access
*to
= ggc_cleared_alloc
<param_access
> ();
2455 gcc_checking_assert ((from
->offset
% BITS_PER_UNIT
) == 0);
2456 gcc_checking_assert ((from
->size
% BITS_PER_UNIT
) == 0);
2457 to
->unit_offset
= from
->offset
/ BITS_PER_UNIT
;
2458 to
->unit_size
= from
->size
/ BITS_PER_UNIT
;
2459 to
->type
= from
->type
;
2460 to
->alias_ptr_type
= from
->alias_ptr_type
;
2461 to
->certain
= from
->nonarg
;
2462 to
->reverse
= from
->reverse
;
2463 vec_safe_push (desc
->accesses
, to
);
2465 for (gensum_param_access
*ch
= from
->first_child
;
2467 ch
= ch
->next_sibling
)
2468 copy_accesses_to_ipa_desc (ch
, desc
);
2471 /* Analyze function body scan results stored in param_accesses and
2472 param_accesses, detect possible transformations and store information of
2473 those in function summary. NODE, FUN and IFS are all various structures
2474 describing the currently analyzed function. */
2477 process_scan_results (cgraph_node
*node
, struct function
*fun
,
2478 isra_func_summary
*ifs
,
2479 vec
<gensum_param_desc
> *param_descriptions
)
2481 bool check_pass_throughs
= false;
2482 bool dereferences_propagated
= false;
2483 tree parm
= DECL_ARGUMENTS (node
->decl
);
2484 unsigned param_count
= param_descriptions
->length();
2486 for (unsigned desc_index
= 0;
2487 desc_index
< param_count
;
2488 desc_index
++, parm
= DECL_CHAIN (parm
))
2490 gensum_param_desc
*desc
= &(*param_descriptions
)[desc_index
];
2491 if (!desc
->split_candidate
)
2495 isra_verify_access_tree (desc
->accesses
);
2497 if (!dereferences_propagated
2502 propagate_dereference_distances (fun
);
2503 dereferences_propagated
= true;
2506 HOST_WIDE_INT nonarg_acc_size
= 0;
2507 bool only_calls
= true;
2508 bool check_failed
= false;
2510 int entry_bb_index
= ENTRY_BLOCK_PTR_FOR_FN (fun
)->index
;
2511 for (gensum_param_access
*acc
= desc
->accesses
;
2513 acc
= acc
->next_sibling
)
2514 if (check_gensum_access (fun
, parm
, desc
, acc
, &nonarg_acc_size
,
2515 &only_calls
, entry_bb_index
))
2517 check_failed
= true;
2524 desc
->locally_unused
= true;
2526 HOST_WIDE_INT cur_param_size
2527 = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (parm
)));
2528 HOST_WIDE_INT param_size_limit
, optimistic_limit
;
2529 if (!desc
->by_ref
|| optimize_function_for_size_p (fun
))
2531 param_size_limit
= cur_param_size
;
2532 optimistic_limit
= cur_param_size
;
2537 = opt_for_fn (node
->decl
,
2538 param_ipa_sra_ptr_growth_factor
) * cur_param_size
;
2540 = (opt_for_fn (node
->decl
, param_ipa_sra_ptrwrap_growth_factor
)
2541 * param_size_limit
);
2544 if (nonarg_acc_size
> optimistic_limit
2545 || (!desc
->by_ref
&& nonarg_acc_size
== param_size_limit
))
2547 disqualify_split_candidate (desc
, "Would result into a too big set "
2548 "of replacements even in best "
2553 /* create_parameter_descriptors makes sure unit sizes of all
2554 candidate parameters fit unsigned integers restricted to
2555 ISRA_ARG_SIZE_LIMIT. */
2556 desc
->param_size_limit
= param_size_limit
/ BITS_PER_UNIT
;
2557 desc
->nonarg_acc_size
= nonarg_acc_size
/ BITS_PER_UNIT
;
2558 if (desc
->split_candidate
&& desc
->ptr_pt_count
)
2560 gcc_assert (desc
->by_ref
);
2561 check_pass_throughs
= true;
2566 /* When a pointer parameter is passed-through to a callee, in which it is
2567 only used to read only one or a few items, we can attempt to transform it
2568 to obtaining and passing through the items instead of the pointer. But we
2569 must take extra care that 1) we do not introduce any segfault by moving
2570 dereferences above control flow and that 2) the data is not modified
2571 through an alias in this function. The IPA analysis must not introduce
2572 any accesses candidates unless it can prove both.
2574 The current solution is very crude as it consists of ensuring that the
2575 call postdominates entry BB and that the definition of VUSE of the call is
2576 default definition. TODO: For non-recursive callees in the same
2577 compilation unit we could do better by doing analysis in topological order
2578 an looking into access candidates of callees, using their alias_ptr_types
2579 to attempt real AA. We could also use the maximum known dereferenced
2580 offset in this function at IPA level.
2582 TODO: Measure the overhead and the effect of just being pessimistic.
2583 Maybe this is only -O3 material? */
2585 hash_map
<gimple
*, bool> analyzed_stmts
;
2586 bitmap always_executed_bbs
= NULL
;
2587 if (check_pass_throughs
)
2588 for (cgraph_edge
*cs
= node
->callees
; cs
; cs
= cs
->next_callee
)
2590 gcall
*call_stmt
= cs
->call_stmt
;
2591 tree vuse
= gimple_vuse (call_stmt
);
2593 /* If the callee is a const function, we don't get a VUSE. In such
2594 case there will be no memory accesses in the called function (or the
2595 const attribute is wrong) and then we just don't care. */
2596 bool uses_memory_as_obtained
= vuse
&& SSA_NAME_IS_DEFAULT_DEF (vuse
);
2598 unsigned count
= gimple_call_num_args (call_stmt
);
2599 isra_call_summary
*csum
= call_sums
->get_create (cs
);
2600 csum
->init_inputs (count
);
2601 csum
->m_before_any_store
= uses_memory_as_obtained
;
2602 for (unsigned argidx
= 0; argidx
< count
; argidx
++)
2604 if (!csum
->m_arg_flow
[argidx
].pointer_pass_through
)
2607 = get_single_param_flow_source (&csum
->m_arg_flow
[argidx
]);
2608 gensum_param_desc
*desc
= &(*param_descriptions
)[pidx
];
2609 if (!desc
->split_candidate
)
2611 csum
->m_arg_flow
[argidx
].pointer_pass_through
= false;
2614 if (!uses_memory_as_obtained
)
2619 csum
->m_arg_flow
[argidx
].safe_to_import_accesses
= true;
2623 /* Walk basic block and see if its execution can terminate earlier.
2624 Keep the info for later re-use to avoid quadratic behavoiur here. */
2625 gimple_stmt_iterator gsi
= gsi_for_stmt (call_stmt
);
2628 for (gsi_prev (&gsi
); !gsi_end_p (gsi
); gsi_prev (&gsi
))
2630 bool *b
= analyzed_stmts
.get (gsi_stmt (gsi
));
2638 if (stmt_may_terminate_function_p (fun
, gsi_stmt (gsi
), false))
2646 if (gsi_end_p (gsi
))
2647 gsi
= gsi_start_bb (gimple_bb (call_stmt
));
2648 for (; gsi_stmt (gsi
) != call_stmt
; gsi_next (&gsi
))
2649 analyzed_stmts
.get_or_insert (gsi_stmt (gsi
)) = safe
;
2652 if (safe
&& !always_executed_bbs
)
2654 mark_dfs_back_edges ();
2655 always_executed_bbs
= find_always_executed_bbs (fun
, false);
2657 if (safe
&& bitmap_bit_p (always_executed_bbs
, gimple_bb (call_stmt
)->index
))
2658 csum
->m_arg_flow
[argidx
].safe_to_import_accesses
= true;
2662 BITMAP_FREE (always_executed_bbs
);
2664 /* TODO: Add early exit if we disqualified everything. This also requires
2665 that we either relax the restriction that
2666 ipa_param_adjustments.m_always_copy_start must be the number of PARM_DECLs
2667 or store the number of parameters to IPA-SRA function summary and use that
2668 when just removing params. */
2670 vec_safe_reserve_exact (ifs
->m_parameters
, param_count
);
2671 ifs
->m_parameters
->quick_grow_cleared (param_count
);
2672 for (unsigned desc_index
= 0; desc_index
< param_count
; desc_index
++)
2674 gensum_param_desc
*s
= &(*param_descriptions
)[desc_index
];
2675 isra_param_desc
*d
= &(*ifs
->m_parameters
)[desc_index
];
2677 d
->param_size_limit
= s
->param_size_limit
;
2678 d
->size_reached
= s
->nonarg_acc_size
;
2679 d
->locally_unused
= s
->locally_unused
;
2680 d
->split_candidate
= s
->split_candidate
;
2681 d
->by_ref
= s
->by_ref
;
2682 d
->remove_only_when_retval_removed
= s
->remove_only_when_retval_removed
;
2683 d
->split_only_when_retval_removed
= s
->split_only_when_retval_removed
;
2684 d
->conditionally_dereferenceable
= s
->conditionally_dereferenceable
;
2686 for (gensum_param_access
*acc
= s
->accesses
;
2688 acc
= acc
->next_sibling
)
2689 copy_accesses_to_ipa_desc (acc
, d
);
2693 dump_isra_param_descriptors (dump_file
, node
->decl
, ifs
, false);
2696 /* Return true if there are any overlaps among certain accesses of DESC. If
2697 non-NULL, set *CERTAIN_ACCESS_PRESENT_P upon encountering a certain access
2698 too. DESC is assumed to be a split candidate that is not locally
2702 overlapping_certain_accesses_p (isra_param_desc
*desc
,
2703 bool *certain_access_present_p
)
2705 unsigned pclen
= vec_safe_length (desc
->accesses
);
2706 for (unsigned i
= 0; i
< pclen
; i
++)
2708 param_access
*a1
= (*desc
->accesses
)[i
];
2712 if (certain_access_present_p
)
2713 *certain_access_present_p
= true;
2714 for (unsigned j
= i
+ 1; j
< pclen
; j
++)
2716 param_access
*a2
= (*desc
->accesses
)[j
];
2718 && a1
->unit_offset
< a2
->unit_offset
+ a2
->unit_size
2719 && a1
->unit_offset
+ a1
->unit_size
> a2
->unit_offset
)
2726 /* Check for any overlaps of certain param accesses among splitting candidates
2727 and signal an ICE if there are any. If CERTAIN_MUST_EXIST is set, also
2728 check that used splitting candidates have at least one certain access. */
2731 verify_splitting_accesses (cgraph_node
*node
, bool certain_must_exist
)
2733 isra_func_summary
*ifs
= func_sums
->get (node
);
2734 if (!ifs
|| !ifs
->m_candidate
)
2736 unsigned param_count
= vec_safe_length (ifs
->m_parameters
);
2737 for (unsigned pidx
= 0; pidx
< param_count
; pidx
++)
2739 isra_param_desc
*desc
= &(*ifs
->m_parameters
)[pidx
];
2740 if (!desc
->split_candidate
|| desc
->locally_unused
)
2743 bool certain_access_present
= !certain_must_exist
;
2744 if (overlapping_certain_accesses_p (desc
, &certain_access_present
))
2745 internal_error ("function %qs, parameter %u, has IPA-SRA accesses "
2746 "which overlap", node
->dump_name (), pidx
);
2747 if (!certain_access_present
)
2748 internal_error ("function %qs, parameter %u, is used but does not "
2749 "have any certain IPA-SRA access",
2750 node
->dump_name (), pidx
);
2754 /* Intraprocedural part of IPA-SRA analysis. Scan bodies of all functions in
2755 this compilation unit and create summary structures describing IPA-SRA
2756 opportunities and constraints in them. */
2759 ipa_sra_generate_summary (void)
2761 struct cgraph_node
*node
;
2763 gcc_checking_assert (!func_sums
);
2764 gcc_checking_assert (!call_sums
);
2766 = (new (ggc_alloc_no_dtor
<ipa_sra_function_summaries
> ())
2767 ipa_sra_function_summaries (symtab
, true));
2768 call_sums
= new ipa_sra_call_summaries (symtab
);
2770 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node
)
2771 ipa_sra_summarize_function (node
);
2775 /* Write intraprocedural analysis information about E and all of its outgoing
2776 edges into a stream for LTO WPA. */
2779 isra_write_edge_summary (output_block
*ob
, cgraph_edge
*e
)
2781 isra_call_summary
*csum
= call_sums
->get (e
);
2782 unsigned input_count
= csum
->m_arg_flow
.length ();
2783 streamer_write_uhwi (ob
, input_count
);
2784 for (unsigned i
= 0; i
< input_count
; i
++)
2786 isra_param_flow
*ipf
= &csum
->m_arg_flow
[i
];
2787 streamer_write_hwi (ob
, ipf
->length
);
2788 bitpack_d bp
= bitpack_create (ob
->main_stream
);
2789 for (int j
= 0; j
< ipf
->length
; j
++)
2790 bp_pack_value (&bp
, ipf
->inputs
[j
], 8);
2791 bp_pack_value (&bp
, ipf
->aggregate_pass_through
, 1);
2792 bp_pack_value (&bp
, ipf
->pointer_pass_through
, 1);
2793 bp_pack_value (&bp
, ipf
->safe_to_import_accesses
, 1);
2794 bp_pack_value (&bp
, ipf
->constructed_for_calls
, 1);
2795 streamer_write_bitpack (&bp
);
2796 streamer_write_uhwi (ob
, ipf
->unit_offset
);
2797 streamer_write_uhwi (ob
, ipf
->unit_size
);
2799 bitpack_d bp
= bitpack_create (ob
->main_stream
);
2800 bp_pack_value (&bp
, csum
->m_return_ignored
, 1);
2801 bp_pack_value (&bp
, csum
->m_return_returned
, 1);
2802 bp_pack_value (&bp
, csum
->m_bit_aligned_arg
, 1);
2803 bp_pack_value (&bp
, csum
->m_before_any_store
, 1);
2804 streamer_write_bitpack (&bp
);
2807 /* Write intraprocedural analysis information about NODE and all of its outgoing
2808 edges into a stream for LTO WPA. */
2811 isra_write_node_summary (output_block
*ob
, cgraph_node
*node
)
2813 isra_func_summary
*ifs
= func_sums
->get (node
);
2814 lto_symtab_encoder_t encoder
= ob
->decl_state
->symtab_node_encoder
;
2815 int node_ref
= lto_symtab_encoder_encode (encoder
, node
);
2816 streamer_write_uhwi (ob
, node_ref
);
2818 unsigned param_desc_count
= vec_safe_length (ifs
->m_parameters
);
2819 streamer_write_uhwi (ob
, param_desc_count
);
2820 for (unsigned i
= 0; i
< param_desc_count
; i
++)
2822 isra_param_desc
*desc
= &(*ifs
->m_parameters
)[i
];
2823 unsigned access_count
= vec_safe_length (desc
->accesses
);
2824 streamer_write_uhwi (ob
, access_count
);
2825 for (unsigned j
= 0; j
< access_count
; j
++)
2827 param_access
*acc
= (*desc
->accesses
)[j
];
2828 stream_write_tree (ob
, acc
->type
, true);
2829 stream_write_tree (ob
, acc
->alias_ptr_type
, true);
2830 streamer_write_uhwi (ob
, acc
->unit_offset
);
2831 streamer_write_uhwi (ob
, acc
->unit_size
);
2832 bitpack_d bp
= bitpack_create (ob
->main_stream
);
2833 bp_pack_value (&bp
, acc
->certain
, 1);
2834 bp_pack_value (&bp
, acc
->reverse
, 1);
2835 streamer_write_bitpack (&bp
);
2837 streamer_write_uhwi (ob
, desc
->param_size_limit
);
2838 streamer_write_uhwi (ob
, desc
->size_reached
);
2839 gcc_assert (desc
->safe_size
== 0);
2840 bitpack_d bp
= bitpack_create (ob
->main_stream
);
2841 bp_pack_value (&bp
, desc
->locally_unused
, 1);
2842 bp_pack_value (&bp
, desc
->split_candidate
, 1);
2843 bp_pack_value (&bp
, desc
->by_ref
, 1);
2844 gcc_assert (!desc
->not_specially_constructed
);
2845 bp_pack_value (&bp
, desc
->remove_only_when_retval_removed
, 1);
2846 bp_pack_value (&bp
, desc
->split_only_when_retval_removed
, 1);
2847 bp_pack_value (&bp
, desc
->conditionally_dereferenceable
, 1);
2848 gcc_assert (!desc
->safe_size_set
);
2849 streamer_write_bitpack (&bp
);
2851 bitpack_d bp
= bitpack_create (ob
->main_stream
);
2852 bp_pack_value (&bp
, ifs
->m_candidate
, 1);
2853 bp_pack_value (&bp
, ifs
->m_returns_value
, 1);
2854 bp_pack_value (&bp
, ifs
->m_return_ignored
, 1);
2855 gcc_assert (!ifs
->m_queued
);
2856 streamer_write_bitpack (&bp
);
2858 for (cgraph_edge
*e
= node
->callees
; e
; e
= e
->next_callee
)
2859 isra_write_edge_summary (ob
, e
);
2860 for (cgraph_edge
*e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
2861 isra_write_edge_summary (ob
, e
);
2864 /* Write intraprocedural analysis information into a stream for LTO WPA. */
2867 ipa_sra_write_summary (void)
2869 if (!func_sums
|| !call_sums
)
2872 struct output_block
*ob
= create_output_block (LTO_section_ipa_sra
);
2873 lto_symtab_encoder_t encoder
= ob
->decl_state
->symtab_node_encoder
;
2876 unsigned int count
= 0;
2877 lto_symtab_encoder_iterator lsei
;
2878 for (lsei
= lsei_start_function_in_partition (encoder
);
2880 lsei_next_function_in_partition (&lsei
))
2882 cgraph_node
*node
= lsei_cgraph_node (lsei
);
2883 if (node
->has_gimple_body_p ()
2884 && func_sums
->get (node
) != NULL
)
2887 streamer_write_uhwi (ob
, count
);
2889 /* Process all of the functions. */
2890 for (lsei
= lsei_start_function_in_partition (encoder
); !lsei_end_p (lsei
);
2891 lsei_next_function_in_partition (&lsei
))
2893 cgraph_node
*node
= lsei_cgraph_node (lsei
);
2894 if (node
->has_gimple_body_p ()
2895 && func_sums
->get (node
) != NULL
)
2896 isra_write_node_summary (ob
, node
);
2898 streamer_write_char_stream (ob
->main_stream
, 0);
2899 produce_asm (ob
, NULL
);
2900 destroy_output_block (ob
);
2903 /* Read intraprocedural analysis information about E and all of its outgoing
2904 edges into a stream for LTO WPA. */
2907 isra_read_edge_summary (struct lto_input_block
*ib
, cgraph_edge
*cs
)
2909 isra_call_summary
*csum
= call_sums
->get_create (cs
);
2910 unsigned input_count
= streamer_read_uhwi (ib
);
2911 csum
->init_inputs (input_count
);
2912 for (unsigned i
= 0; i
< input_count
; i
++)
2914 isra_param_flow
*ipf
= &csum
->m_arg_flow
[i
];
2915 ipf
->length
= streamer_read_hwi (ib
);
2916 bitpack_d bp
= streamer_read_bitpack (ib
);
2917 for (int j
= 0; j
< ipf
->length
; j
++)
2918 ipf
->inputs
[j
] = bp_unpack_value (&bp
, 8);
2919 ipf
->aggregate_pass_through
= bp_unpack_value (&bp
, 1);
2920 ipf
->pointer_pass_through
= bp_unpack_value (&bp
, 1);
2921 ipf
->safe_to_import_accesses
= bp_unpack_value (&bp
, 1);
2922 ipf
->constructed_for_calls
= bp_unpack_value (&bp
, 1);
2923 ipf
->unit_offset
= streamer_read_uhwi (ib
);
2924 ipf
->unit_size
= streamer_read_uhwi (ib
);
2926 bitpack_d bp
= streamer_read_bitpack (ib
);
2927 csum
->m_return_ignored
= bp_unpack_value (&bp
, 1);
2928 csum
->m_return_returned
= bp_unpack_value (&bp
, 1);
2929 csum
->m_bit_aligned_arg
= bp_unpack_value (&bp
, 1);
2930 csum
->m_before_any_store
= bp_unpack_value (&bp
, 1);
2933 /* Read intraprocedural analysis information about NODE and all of its outgoing
2934 edges into a stream for LTO WPA. */
2937 isra_read_node_info (struct lto_input_block
*ib
, cgraph_node
*node
,
2938 struct data_in
*data_in
)
2940 isra_func_summary
*ifs
= func_sums
->get_create (node
);
2941 unsigned param_desc_count
= streamer_read_uhwi (ib
);
2942 if (param_desc_count
> 0)
2944 vec_safe_reserve_exact (ifs
->m_parameters
, param_desc_count
);
2945 ifs
->m_parameters
->quick_grow_cleared (param_desc_count
);
2947 for (unsigned i
= 0; i
< param_desc_count
; i
++)
2949 isra_param_desc
*desc
= &(*ifs
->m_parameters
)[i
];
2950 unsigned access_count
= streamer_read_uhwi (ib
);
2951 for (unsigned j
= 0; j
< access_count
; j
++)
2953 param_access
*acc
= ggc_cleared_alloc
<param_access
> ();
2954 acc
->type
= stream_read_tree (ib
, data_in
);
2955 acc
->alias_ptr_type
= stream_read_tree (ib
, data_in
);
2956 acc
->unit_offset
= streamer_read_uhwi (ib
);
2957 acc
->unit_size
= streamer_read_uhwi (ib
);
2958 bitpack_d bp
= streamer_read_bitpack (ib
);
2959 acc
->certain
= bp_unpack_value (&bp
, 1);
2960 acc
->reverse
= bp_unpack_value (&bp
, 1);
2961 vec_safe_push (desc
->accesses
, acc
);
2963 desc
->param_size_limit
= streamer_read_uhwi (ib
);
2964 desc
->size_reached
= streamer_read_uhwi (ib
);
2965 desc
->safe_size
= 0;
2966 bitpack_d bp
= streamer_read_bitpack (ib
);
2967 desc
->locally_unused
= bp_unpack_value (&bp
, 1);
2968 desc
->split_candidate
= bp_unpack_value (&bp
, 1);
2969 desc
->by_ref
= bp_unpack_value (&bp
, 1);
2970 desc
->not_specially_constructed
= 0;
2971 desc
->remove_only_when_retval_removed
= bp_unpack_value (&bp
, 1);
2972 desc
->split_only_when_retval_removed
= bp_unpack_value (&bp
, 1);
2973 desc
->conditionally_dereferenceable
= bp_unpack_value (&bp
, 1);
2974 desc
->safe_size_set
= 0;
2976 bitpack_d bp
= streamer_read_bitpack (ib
);
2977 ifs
->m_candidate
= bp_unpack_value (&bp
, 1);
2978 ifs
->m_returns_value
= bp_unpack_value (&bp
, 1);
2979 ifs
->m_return_ignored
= bp_unpack_value (&bp
, 1);
2982 for (cgraph_edge
*e
= node
->callees
; e
; e
= e
->next_callee
)
2983 isra_read_edge_summary (ib
, e
);
2984 for (cgraph_edge
*e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
2985 isra_read_edge_summary (ib
, e
);
2988 /* Read IPA-SRA summaries from a section in file FILE_DATA of length LEN with
2989 data DATA. TODO: This function was copied almost verbatim from ipa-prop.cc,
2990 it should be possible to unify them somehow. */
2993 isra_read_summary_section (struct lto_file_decl_data
*file_data
,
2994 const char *data
, size_t len
)
2996 const struct lto_function_header
*header
=
2997 (const struct lto_function_header
*) data
;
2998 const int cfg_offset
= sizeof (struct lto_function_header
);
2999 const int main_offset
= cfg_offset
+ header
->cfg_size
;
3000 const int string_offset
= main_offset
+ header
->main_size
;
3001 struct data_in
*data_in
;
3005 lto_input_block
ib_main ((const char *) data
+ main_offset
,
3006 header
->main_size
, file_data
);
3009 lto_data_in_create (file_data
, (const char *) data
+ string_offset
,
3010 header
->string_size
, vNULL
);
3011 count
= streamer_read_uhwi (&ib_main
);
3013 for (i
= 0; i
< count
; i
++)
3016 struct cgraph_node
*node
;
3017 lto_symtab_encoder_t encoder
;
3019 index
= streamer_read_uhwi (&ib_main
);
3020 encoder
= file_data
->symtab_node_encoder
;
3021 node
= dyn_cast
<cgraph_node
*> (lto_symtab_encoder_deref (encoder
,
3023 gcc_assert (node
->definition
);
3024 isra_read_node_info (&ib_main
, node
, data_in
);
3026 lto_free_section_data (file_data
, LTO_section_ipa_sra
, NULL
, data
,
3028 lto_data_in_delete (data_in
);
3031 /* Read intraprocedural analysis information into a stream for LTO WPA. */
3034 ipa_sra_read_summary (void)
3036 struct lto_file_decl_data
**file_data_vec
= lto_get_file_decl_data ();
3037 struct lto_file_decl_data
*file_data
;
3040 gcc_checking_assert (!func_sums
);
3041 gcc_checking_assert (!call_sums
);
3043 = (new (ggc_alloc_no_dtor
<ipa_sra_function_summaries
> ())
3044 ipa_sra_function_summaries (symtab
, true));
3045 call_sums
= new ipa_sra_call_summaries (symtab
);
3047 while ((file_data
= file_data_vec
[j
++]))
3051 = lto_get_summary_section_data (file_data
, LTO_section_ipa_sra
, &len
);
3053 isra_read_summary_section (file_data
, data
, len
);
3057 /* Dump all IPA-SRA summary data for all cgraph nodes and edges to file F. If
3058 HINTS is true, also dump IPA-analysis computed hints. */
3061 ipa_sra_dump_all_summaries (FILE *f
, bool hints
)
3064 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node
)
3066 fprintf (f
, "\nSummary for node %s:\n", node
->dump_name ());
3068 isra_func_summary
*ifs
= func_sums
->get (node
);
3070 fprintf (f
, " Function does not have any associated IPA-SRA "
3072 else if (!ifs
->m_candidate
)
3073 fprintf (f
, " Not a candidate function\n");
3076 if (ifs
->m_returns_value
)
3077 fprintf (f
, " Returns value\n");
3078 if (vec_safe_is_empty (ifs
->m_parameters
))
3079 fprintf (f
, " No parameter information. \n");
3081 for (unsigned i
= 0; i
< ifs
->m_parameters
->length (); ++i
)
3083 fprintf (f
, " Descriptor for parameter %i:\n", i
);
3084 dump_isra_param_descriptor (f
, &(*ifs
->m_parameters
)[i
], hints
);
3089 struct cgraph_edge
*cs
;
3090 for (cs
= node
->callees
; cs
; cs
= cs
->next_callee
)
3092 fprintf (f
, " Summary for edge %s->%s:\n", cs
->caller
->dump_name (),
3093 cs
->callee
->dump_name ());
3094 isra_call_summary
*csum
= call_sums
->get (cs
);
3098 fprintf (f
, " Call summary is MISSING!\n");
3102 fprintf (f
, "\n\n");
3105 /* Perform function-scope viability tests that can be only made at IPA level
3106 and return false if the function is deemed unsuitable for IPA-SRA. */
3109 ipa_sra_ipa_function_checks (cgraph_node
*node
)
3111 if (!node
->can_be_local_p ())
3114 fprintf (dump_file
, "Function %s disqualified because it cannot be "
3115 "made local.\n", node
->dump_name ());
3118 if (!node
->can_change_signature
)
3121 fprintf (dump_file
, "Function can not change signature.\n");
3128 /* Issues found out by check_callers_for_issues. */
3130 struct caller_issues
3132 /* The candidate being considered. */
3133 cgraph_node
*candidate
;
3134 /* There is a thunk among callers. */
3136 /* Set if there is at least one caller that is OK. */
3138 /* Call site with no available information. */
3139 bool unknown_callsite
;
3140 /* Call from outside the candidate's comdat group. */
3141 bool call_from_outside_comdat
;
3142 /* There is a bit-aligned load into one of non-gimple-typed arguments. */
3143 bool bit_aligned_aggregate_argument
;
3146 /* Worker for call_for_symbol_and_aliases, set any flags of passed caller_issues
3150 check_for_caller_issues (struct cgraph_node
*node
, void *data
)
3152 struct caller_issues
*issues
= (struct caller_issues
*) data
;
3154 for (cgraph_edge
*cs
= node
->callers
; cs
; cs
= cs
->next_caller
)
3156 if (cs
->caller
->thunk
)
3158 issues
->thunk
= true;
3159 /* TODO: We should be able to process at least some types of
3163 if (issues
->candidate
->calls_comdat_local
3164 && issues
->candidate
->same_comdat_group
3165 && !issues
->candidate
->in_same_comdat_group_p (cs
->caller
))
3167 issues
->call_from_outside_comdat
= true;
3171 isra_call_summary
*csum
= call_sums
->get (cs
);
3174 issues
->unknown_callsite
= true;
3178 if (csum
->m_bit_aligned_arg
)
3179 issues
->bit_aligned_aggregate_argument
= true;
3181 issues
->there_is_one
= true;
3186 /* Look at all incoming edges to NODE, including aliases and thunks and look
3187 for problems. Return true if NODE type should not be modified at all. */
3190 check_all_callers_for_issues (cgraph_node
*node
)
3192 struct caller_issues issues
;
3193 memset (&issues
, 0, sizeof (issues
));
3194 issues
.candidate
= node
;
3196 node
->call_for_symbol_and_aliases (check_for_caller_issues
, &issues
, true);
3197 if (issues
.unknown_callsite
)
3199 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3200 fprintf (dump_file
, "A call of %s has not been analyzed. Disabling "
3201 "all modifications.\n", node
->dump_name ());
3204 /* TODO: We should be able to process at least some types of thunks. */
3207 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3208 fprintf (dump_file
, "A call of %s is through thunk, which are not"
3209 " handled yet. Disabling all modifications.\n",
3210 node
->dump_name ());
3213 if (issues
.call_from_outside_comdat
)
3216 fprintf (dump_file
, "Function would become private comdat called "
3217 "outside of its comdat group.\n");
3221 if (issues
.bit_aligned_aggregate_argument
)
3223 /* Let's only remove parameters/return values from such functions.
3224 TODO: We could only prevent splitting the problematic parameters if
3225 anybody thinks it is worth it. */
3226 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3227 fprintf (dump_file
, "A call of %s has bit-aligned aggregate argument,"
3228 " disabling parameter splitting.\n", node
->dump_name ());
3230 isra_func_summary
*ifs
= func_sums
->get (node
);
3231 gcc_checking_assert (ifs
);
3232 unsigned param_count
= vec_safe_length (ifs
->m_parameters
);
3233 for (unsigned i
= 0; i
< param_count
; i
++)
3234 (*ifs
->m_parameters
)[i
].split_candidate
= false;
3236 if (!issues
.there_is_one
)
3238 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3239 fprintf (dump_file
, "There is no call to %s that we can modify. "
3240 "Disabling all modifications.\n", node
->dump_name ());
3246 /* Find the access with corresponding OFFSET and SIZE among accesses in
3247 PARAM_DESC and return it or NULL if such an access is not there. */
3249 static param_access
*
3250 find_param_access (isra_param_desc
*param_desc
, unsigned offset
, unsigned size
)
3252 unsigned pclen
= vec_safe_length (param_desc
->accesses
);
3254 /* The search is linear but the number of stored accesses is bound by
3255 PARAM_IPA_SRA_MAX_REPLACEMENTS, so most probably 8. */
3257 for (unsigned i
= 0; i
< pclen
; i
++)
3258 if ((*param_desc
->accesses
)[i
]->unit_offset
== offset
3259 && (*param_desc
->accesses
)[i
]->unit_size
== size
)
3260 return (*param_desc
->accesses
)[i
];
3265 /* Return iff the total size of definite replacement SIZE would violate the
3266 limit set for it in PARAM. */
3269 size_would_violate_limit_p (isra_param_desc
*desc
, unsigned size
)
3271 unsigned limit
= desc
->param_size_limit
;
3273 || (!desc
->by_ref
&& size
== limit
))
3278 /* Increase reached size of DESC by SIZE or disqualify it if it would violate
3279 the set limit. IDX is the parameter number which is dumped when
3283 bump_reached_size (isra_param_desc
*desc
, unsigned size
, unsigned idx
)
3285 unsigned after
= desc
->size_reached
+ size
;
3286 if (size_would_violate_limit_p (desc
, after
))
3288 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3289 fprintf (dump_file
, " ...size limit reached, disqualifying "
3290 "candidate parameter %u\n", idx
);
3291 desc
->split_candidate
= false;
3294 desc
->size_reached
= after
;
3297 /* Take all actions required to deal with an edge CS that represents a call to
3298 an unknown or un-analyzed function, for both parameter removal and
3302 process_edge_to_unknown_caller (cgraph_edge
*cs
)
3304 isra_func_summary
*from_ifs
= func_sums
->get (cs
->caller
);
3305 gcc_checking_assert (from_ifs
);
3306 isra_call_summary
*csum
= call_sums
->get (cs
);
3308 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3309 fprintf (dump_file
, "Processing an edge to an unknown caller from %s:\n",
3310 cs
->caller
->dump_name ());
3312 unsigned args_count
= csum
->m_arg_flow
.length ();
3313 for (unsigned i
= 0; i
< args_count
; i
++)
3315 isra_param_flow
*ipf
= &csum
->m_arg_flow
[i
];
3317 if (ipf
->pointer_pass_through
)
3319 isra_param_desc
*param_desc
3320 = &(*from_ifs
->m_parameters
)[get_single_param_flow_source (ipf
)];
3321 param_desc
->locally_unused
= false;
3322 param_desc
->split_candidate
= false;
3325 if (ipf
->aggregate_pass_through
)
3327 unsigned idx
= get_single_param_flow_source (ipf
);
3328 isra_param_desc
*param_desc
= &(*from_ifs
->m_parameters
)[idx
];
3330 param_desc
->locally_unused
= false;
3331 if (!param_desc
->split_candidate
)
3333 gcc_assert (!param_desc
->by_ref
);
3334 param_access
*pacc
= find_param_access (param_desc
, ipf
->unit_offset
,
3336 gcc_checking_assert (pacc
);
3337 pacc
->certain
= true;
3338 if (overlapping_certain_accesses_p (param_desc
, NULL
))
3340 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3341 fprintf (dump_file
, " ...leading to overlap, "
3342 " disqualifying candidate parameter %u\n",
3344 param_desc
->split_candidate
= false;
3347 bump_reached_size (param_desc
, pacc
->unit_size
, idx
);
3348 ipf
->aggregate_pass_through
= false;
3352 for (int j
= 0; j
< ipf
->length
; j
++)
3354 int input_idx
= ipf
->inputs
[j
];
3355 (*from_ifs
->m_parameters
)[input_idx
].locally_unused
= false;
3360 /* Propagate parameter removal information through cross-SCC edge CS,
3361 i.e. decrease the use count in the caller parameter descriptor for each use
3365 param_removal_cross_scc_edge (cgraph_edge
*cs
)
3367 enum availability availability
;
3368 cgraph_node
*callee
= cs
->callee
->function_symbol (&availability
);
3369 isra_func_summary
*to_ifs
= func_sums
->get (callee
);
3370 if (!to_ifs
|| !to_ifs
->m_candidate
3371 || (availability
< AVAIL_AVAILABLE
)
3372 || vec_safe_is_empty (to_ifs
->m_parameters
))
3374 process_edge_to_unknown_caller (cs
);
3377 isra_func_summary
*from_ifs
= func_sums
->get (cs
->caller
);
3378 gcc_checking_assert (from_ifs
);
3380 isra_call_summary
*csum
= call_sums
->get (cs
);
3381 unsigned args_count
= csum
->m_arg_flow
.length ();
3382 unsigned param_count
= vec_safe_length (to_ifs
->m_parameters
);
3384 for (unsigned i
= 0; i
< args_count
; i
++)
3386 bool unused_in_callee
;
3387 if (i
< param_count
)
3388 unused_in_callee
= (*to_ifs
->m_parameters
)[i
].locally_unused
;
3390 unused_in_callee
= false;
3392 if (!unused_in_callee
)
3394 isra_param_flow
*ipf
= &csum
->m_arg_flow
[i
];
3395 for (int j
= 0; j
< ipf
->length
; j
++)
3397 int input_idx
= ipf
->inputs
[j
];
3398 (*from_ifs
->m_parameters
)[input_idx
].locally_unused
= false;
3404 /* Unless it is already there, push NODE which is also described by IFS to
3408 isra_push_node_to_stack (cgraph_node
*node
, isra_func_summary
*ifs
,
3409 vec
<cgraph_node
*> *stack
)
3413 ifs
->m_queued
= true;
3414 stack
->safe_push (node
);
3418 /* If parameter with index INPUT_IDX is marked as locally unused, mark it as
3419 used and push CALLER on STACK. */
3422 isra_mark_caller_param_used (isra_func_summary
*from_ifs
, int input_idx
,
3423 cgraph_node
*caller
, vec
<cgraph_node
*> *stack
)
3425 if ((*from_ifs
->m_parameters
)[input_idx
].locally_unused
)
3427 (*from_ifs
->m_parameters
)[input_idx
].locally_unused
= false;
3428 isra_push_node_to_stack (caller
, from_ifs
, stack
);
3432 /* Combine safe_size of DESC with SIZE and return true if it has changed. */
3435 update_safe_size (isra_param_desc
*desc
, unsigned size
)
3437 if (!desc
->safe_size_set
)
3439 desc
->safe_size_set
= 1;
3440 desc
->safe_size
= size
;
3443 if (desc
->safe_size
<= size
)
3445 desc
->safe_size
= size
;
3449 /* Set all param hints in DESC to the pessimistic values. Return true if any
3450 hints that need to potentially trigger further propagation have changed. */
3453 flip_all_hints_pessimistic (isra_param_desc
*desc
)
3455 desc
->not_specially_constructed
= true;
3456 return update_safe_size (desc
, 0);
3459 /* Because we have not analyzed or otherwise problematic caller, go over all
3460 parameter int flags of IFS describing a call graph node of a calllee and
3461 turn them pessimistic. Return true if any hints that need to potentially
3462 trigger further propagation have changed. */
3465 flip_all_param_hints_pessimistic (isra_func_summary
*ifs
)
3467 if (!ifs
|| !ifs
->m_candidate
)
3471 unsigned param_count
= vec_safe_length (ifs
->m_parameters
);
3473 for (unsigned i
= 0; i
< param_count
; i
++)
3474 ret
|= flip_all_hints_pessimistic (&(*ifs
->m_parameters
)[i
]);
3479 /* Propagate hints accross edge CS which ultimately leads to a node described
3480 by TO_IFS. Return true if any hints of the callee which should potentially
3481 trigger further propagation have changed. */
3484 propagate_param_hints_accross_call (cgraph_edge
*cs
, isra_func_summary
*to_ifs
)
3486 if (!to_ifs
|| !to_ifs
->m_candidate
)
3489 isra_call_summary
*csum
= call_sums
->get (cs
);
3491 unsigned args_count
= csum
->m_arg_flow
.length ();
3492 unsigned param_count
= vec_safe_length (to_ifs
->m_parameters
);
3494 for (unsigned i
= 0; i
< param_count
; i
++)
3496 isra_param_desc
*desc
= &(*to_ifs
->m_parameters
)[i
];
3497 if (i
>= args_count
)
3499 ret
|= flip_all_hints_pessimistic (desc
);
3505 isra_param_flow
*ipf
= &csum
->m_arg_flow
[i
];
3507 if (!ipf
->constructed_for_calls
)
3508 desc
->not_specially_constructed
= true;
3510 if (ipf
->pointer_pass_through
)
3512 isra_func_summary
*from_ifs
= func_sums
->get (cs
->caller
);
3513 int srcidx
= get_single_param_flow_source (ipf
);
3514 if (vec_safe_length (from_ifs
->m_parameters
) > (unsigned) srcidx
)
3516 isra_param_desc
*src_d
= &(*from_ifs
->m_parameters
)[srcidx
];
3517 if (src_d
->safe_size_set
)
3518 ret
|= update_safe_size (desc
, src_d
->safe_size
);
3521 ret
|= update_safe_size (desc
, 0);
3523 else if (!ipf
->aggregate_pass_through
)
3524 ret
|= update_safe_size (desc
, ipf
->unit_size
);
3526 /* LTOing type-mismatched programs can end up here. */
3527 ret
|= update_safe_size (desc
, 0);
3533 /* Propagate hints from NODE described by FROM_IFS to all its (dorect) callees,
3534 push those that may need re-visiting onto STACK. */
3537 propagate_hints_to_all_callees (cgraph_node
*node
, isra_func_summary
*from_ifs
,
3538 vec
<cgraph_node
*> *stack
)
3540 for (cgraph_edge
*cs
= node
->callees
; cs
; cs
= cs
->next_callee
)
3542 enum availability availability
;
3543 cgraph_node
*callee
= cs
->callee
->function_symbol (&availability
);
3544 isra_func_summary
*to_ifs
= func_sums
->get (callee
);
3547 if (flip_all_param_hints_pessimistic (to_ifs
)
3548 && ipa_edge_within_scc (cs
))
3549 isra_push_node_to_stack (callee
, to_ifs
, stack
);
3551 else if (propagate_param_hints_accross_call (cs
, to_ifs
)
3552 && ipa_edge_within_scc (cs
))
3553 isra_push_node_to_stack (callee
, to_ifs
, stack
);
3557 /* Propagate information that any parameter is not used only locally within a
3558 SCC across CS to the caller, which must be in the same SCC as the
3559 callee. Push any callers that need to be re-processed to STACK. */
3562 propagate_used_across_scc_edge (cgraph_edge
*cs
, vec
<cgraph_node
*> *stack
)
3564 isra_func_summary
*from_ifs
= func_sums
->get (cs
->caller
);
3565 if (!from_ifs
|| vec_safe_is_empty (from_ifs
->m_parameters
))
3568 isra_call_summary
*csum
= call_sums
->get (cs
);
3569 gcc_checking_assert (csum
);
3570 unsigned args_count
= csum
->m_arg_flow
.length ();
3571 enum availability availability
;
3572 cgraph_node
*callee
= cs
->callee
->function_symbol (&availability
);
3573 isra_func_summary
*to_ifs
= func_sums
->get (callee
);
3575 unsigned param_count
3576 = (to_ifs
&& (availability
>= AVAIL_AVAILABLE
))
3577 ? vec_safe_length (to_ifs
->m_parameters
) : 0;
3578 for (unsigned i
= 0; i
< args_count
; i
++)
3581 && (*to_ifs
->m_parameters
)[i
].locally_unused
)
3584 /* The argument is needed in the callee it, we must mark the parameter as
3585 used also in the caller and its callers within this SCC. */
3586 isra_param_flow
*ipf
= &csum
->m_arg_flow
[i
];
3587 for (int j
= 0; j
< ipf
->length
; j
++)
3589 int input_idx
= ipf
->inputs
[j
];
3590 isra_mark_caller_param_used (from_ifs
, input_idx
, cs
->caller
, stack
);
3595 /* Propagate information that any parameter is not used only locally within a
3596 SCC (i.e. is used also elsewhere) to all callers of NODE that are in the
3597 same SCC. Push any callers that need to be re-processed to STACK. */
3600 propagate_used_to_scc_callers (cgraph_node
*node
, void *data
)
3602 vec
<cgraph_node
*> *stack
= (vec
<cgraph_node
*> *) data
;
3604 for (cs
= node
->callers
; cs
; cs
= cs
->next_caller
)
3605 if (ipa_edge_within_scc (cs
))
3606 propagate_used_across_scc_edge (cs
, stack
);
3610 /* Return true iff all certain accesses in ARG_DESC are also present as
3611 certain accesses in PARAM_DESC. */
3614 all_callee_accesses_present_p (isra_param_desc
*param_desc
,
3615 isra_param_desc
*arg_desc
)
3617 unsigned aclen
= vec_safe_length (arg_desc
->accesses
);
3618 for (unsigned j
= 0; j
< aclen
; j
++)
3620 param_access
*argacc
= (*arg_desc
->accesses
)[j
];
3621 if (!argacc
->certain
)
3623 param_access
*pacc
= find_param_access (param_desc
, argacc
->unit_offset
,
3627 || !types_compatible_p (argacc
->type
, pacc
->type
))
3633 /* Type internal to function pull_accesses_from_callee. Unfortunately gcc 4.8
3634 does not allow instantiating an auto_vec with a type defined within a
3635 function so it is a global type. */
3636 enum acc_prop_kind
{ACC_PROP_DONT
, ACC_PROP_COPY
, ACC_PROP_CERTAIN
};
3639 /* Attempt to propagate all definite accesses from ARG_DESC to PARAM_DESC,
3640 (which belongs to CALLER) if they would not violate some constraint there.
3641 If successful, return NULL, otherwise return the string reason for failure
3642 (which can be written to the dump file). DELTA_OFFSET is the known offset
3643 of the actual argument withing the formal parameter (so of ARG_DESCS within
3644 PARAM_DESCS), ARG_SIZE is the size of the actual argument or zero, if not
3645 known. In case of success, set *CHANGE_P to true if propagation actually
3646 changed anything. */
3649 pull_accesses_from_callee (cgraph_node
*caller
, isra_param_desc
*param_desc
,
3650 isra_param_desc
*arg_desc
,
3651 unsigned delta_offset
, unsigned arg_size
,
3654 unsigned pclen
= vec_safe_length (param_desc
->accesses
);
3655 unsigned aclen
= vec_safe_length (arg_desc
->accesses
);
3656 unsigned prop_count
= 0;
3657 unsigned prop_size
= 0;
3658 bool change
= false;
3660 auto_vec
<enum acc_prop_kind
, 8> prop_kinds (aclen
);
3661 for (unsigned j
= 0; j
< aclen
; j
++)
3663 param_access
*argacc
= (*arg_desc
->accesses
)[j
];
3664 prop_kinds
.safe_push (ACC_PROP_DONT
);
3667 && argacc
->unit_offset
+ argacc
->unit_size
> arg_size
)
3668 return "callee access outsize size boundary";
3670 if (!argacc
->certain
)
3673 unsigned offset
= argacc
->unit_offset
+ delta_offset
;
3674 /* Given that accesses are initially stored according to increasing
3675 offset and decreasing size in case of equal offsets, the following
3676 searches could be written more efficiently if we kept the ordering
3677 when copying. But the number of accesses is capped at
3678 PARAM_IPA_SRA_MAX_REPLACEMENTS (so most likely 8) and the code gets
3679 messy quickly, so let's improve on that only if necessary. */
3681 bool exact_match
= false;
3682 for (unsigned i
= 0; i
< pclen
; i
++)
3684 /* Check for overlaps. */
3685 param_access
*pacc
= (*param_desc
->accesses
)[i
];
3686 if (pacc
->unit_offset
== offset
3687 && pacc
->unit_size
== argacc
->unit_size
)
3689 if (argacc
->alias_ptr_type
!= pacc
->alias_ptr_type
3690 || !types_compatible_p (argacc
->type
, pacc
->type
)
3691 || argacc
->reverse
!= pacc
->reverse
)
3692 return "propagated access types would not match existing ones";
3697 prop_kinds
[j
] = ACC_PROP_CERTAIN
;
3698 prop_size
+= argacc
->unit_size
;
3704 if (offset
< pacc
->unit_offset
+ pacc
->unit_size
3705 && offset
+ argacc
->unit_size
> pacc
->unit_offset
)
3707 /* None permissible with load accesses, possible to fit into
3710 || offset
< pacc
->unit_offset
3711 || (offset
+ argacc
->unit_size
3712 > pacc
->unit_offset
+ pacc
->unit_size
))
3713 return "a propagated access would conflict in caller";
3719 prop_kinds
[j
] = ACC_PROP_COPY
;
3721 prop_size
+= argacc
->unit_size
;
3729 if ((prop_count
+ pclen
3730 > (unsigned) opt_for_fn (caller
->decl
, param_ipa_sra_max_replacements
))
3731 || size_would_violate_limit_p (param_desc
,
3732 param_desc
->size_reached
+ prop_size
))
3733 return "propagating accesses would violate the count or size limit";
3736 for (unsigned j
= 0; j
< aclen
; j
++)
3738 if (prop_kinds
[j
] == ACC_PROP_COPY
)
3740 param_access
*argacc
= (*arg_desc
->accesses
)[j
];
3742 param_access
*copy
= ggc_cleared_alloc
<param_access
> ();
3743 copy
->unit_offset
= argacc
->unit_offset
+ delta_offset
;
3744 copy
->unit_size
= argacc
->unit_size
;
3745 copy
->type
= argacc
->type
;
3746 copy
->alias_ptr_type
= argacc
->alias_ptr_type
;
3747 copy
->certain
= true;
3748 copy
->reverse
= argacc
->reverse
;
3749 vec_safe_push (param_desc
->accesses
, copy
);
3751 else if (prop_kinds
[j
] == ACC_PROP_CERTAIN
)
3753 param_access
*argacc
= (*arg_desc
->accesses
)[j
];
3755 = find_param_access (param_desc
, argacc
->unit_offset
+ delta_offset
,
3757 csp
->certain
= true;
3761 param_desc
->size_reached
+= prop_size
;
3766 /* Propagate parameter splitting information through call graph edge CS.
3767 Return true if any changes that might need to be propagated within SCCs have
3768 been made. The function also clears the aggregate_pass_through and
3769 pointer_pass_through in call summaries which do not need to be processed
3770 again if this CS is revisited when iterating while changes are propagated
3774 param_splitting_across_edge (cgraph_edge
*cs
)
3777 bool cross_scc
= !ipa_edge_within_scc (cs
);
3778 enum availability availability
;
3779 cgraph_node
*callee
= cs
->callee
->function_symbol (&availability
);
3780 isra_func_summary
*from_ifs
= func_sums
->get (cs
->caller
);
3781 gcc_checking_assert (from_ifs
&& from_ifs
->m_parameters
);
3783 isra_call_summary
*csum
= call_sums
->get (cs
);
3784 gcc_checking_assert (csum
);
3785 unsigned args_count
= csum
->m_arg_flow
.length ();
3786 isra_func_summary
*to_ifs
= func_sums
->get (callee
);
3787 unsigned param_count
3788 = ((to_ifs
&& to_ifs
->m_candidate
&& (availability
>= AVAIL_AVAILABLE
))
3789 ? vec_safe_length (to_ifs
->m_parameters
)
3792 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3793 fprintf (dump_file
, "Splitting across %s->%s:\n",
3794 cs
->caller
->dump_name (), callee
->dump_name ());
3797 for (i
= 0; (i
< args_count
) && (i
< param_count
); i
++)
3799 isra_param_desc
*arg_desc
= &(*to_ifs
->m_parameters
)[i
];
3800 isra_param_flow
*ipf
= &csum
->m_arg_flow
[i
];
3802 if (arg_desc
->locally_unused
)
3804 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3805 fprintf (dump_file
, " ->%u: unused in callee\n", i
);
3806 ipf
->pointer_pass_through
= false;
3810 if (ipf
->pointer_pass_through
)
3812 int idx
= get_single_param_flow_source (ipf
);
3813 isra_param_desc
*param_desc
= &(*from_ifs
->m_parameters
)[idx
];
3814 if (!param_desc
->split_candidate
)
3816 gcc_assert (param_desc
->by_ref
);
3818 if (!arg_desc
->split_candidate
|| !arg_desc
->by_ref
)
3820 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3821 fprintf (dump_file
, " %u->%u: not candidate or not by "
3822 "reference in callee\n", idx
, i
);
3823 param_desc
->split_candidate
= false;
3824 ipf
->pointer_pass_through
= false;
3827 else if (!ipf
->safe_to_import_accesses
)
3829 if (!csum
->m_before_any_store
3830 || !all_callee_accesses_present_p (param_desc
, arg_desc
))
3832 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3833 fprintf (dump_file
, " %u->%u: cannot import accesses.\n",
3835 param_desc
->split_candidate
= false;
3836 ipf
->pointer_pass_through
= false;
3842 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3843 fprintf (dump_file
, " %u->%u: verified callee accesses "
3844 "present.\n", idx
, i
);
3846 ipf
->pointer_pass_through
= false;
3851 const char *pull_failure
3852 = pull_accesses_from_callee (cs
->caller
, param_desc
, arg_desc
,
3856 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3857 fprintf (dump_file
, " %u->%u: by_ref access pull "
3858 "failed: %s.\n", idx
, i
, pull_failure
);
3859 param_desc
->split_candidate
= false;
3860 ipf
->pointer_pass_through
= false;
3865 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3866 fprintf (dump_file
, " %u->%u: by_ref access pull "
3867 "succeeded.\n", idx
, i
);
3869 ipf
->pointer_pass_through
= false;
3873 else if (ipf
->aggregate_pass_through
)
3875 int idx
= get_single_param_flow_source (ipf
);
3876 isra_param_desc
*param_desc
= &(*from_ifs
->m_parameters
)[idx
];
3877 if (!param_desc
->split_candidate
)
3879 gcc_assert (!param_desc
->by_ref
);
3880 param_access
*pacc
= find_param_access (param_desc
, ipf
->unit_offset
,
3882 gcc_checking_assert (pacc
);
3886 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3887 fprintf (dump_file
, " %u->%u: already certain\n", idx
, i
);
3888 ipf
->aggregate_pass_through
= false;
3890 else if (!arg_desc
->split_candidate
|| arg_desc
->by_ref
)
3892 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3893 fprintf (dump_file
, " %u->%u: not candidate or by "
3894 "reference in callee\n", idx
, i
);
3896 pacc
->certain
= true;
3897 if (overlapping_certain_accesses_p (param_desc
, NULL
))
3899 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3900 fprintf (dump_file
, " ...leading to overlap, "
3901 " disqualifying candidate parameter %u\n",
3903 param_desc
->split_candidate
= false;
3906 bump_reached_size (param_desc
, pacc
->unit_size
, idx
);
3908 ipf
->aggregate_pass_through
= false;
3913 const char *pull_failure
3914 = pull_accesses_from_callee (cs
->caller
, param_desc
, arg_desc
,
3916 ipf
->unit_size
, &res
);
3919 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3920 fprintf (dump_file
, " %u->%u: arg access pull "
3921 "failed: %s.\n", idx
, i
, pull_failure
);
3923 ipf
->aggregate_pass_through
= false;
3924 pacc
->certain
= true;
3926 if (overlapping_certain_accesses_p (param_desc
, NULL
))
3928 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3929 fprintf (dump_file
, " ...leading to overlap, "
3930 " disqualifying candidate parameter %u\n",
3932 param_desc
->split_candidate
= false;
3935 bump_reached_size (param_desc
, pacc
->unit_size
, idx
);
3941 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3942 fprintf (dump_file
, " %u->%u: arg access pull "
3943 "succeeded.\n", idx
, i
);
3945 ipf
->aggregate_pass_through
= false;
3951 /* Handle argument-parameter count mismatches. */
3952 for (; (i
< args_count
); i
++)
3954 isra_param_flow
*ipf
= &csum
->m_arg_flow
[i
];
3956 if (ipf
->pointer_pass_through
|| ipf
->aggregate_pass_through
)
3958 int idx
= get_single_param_flow_source (ipf
);
3959 ipf
->pointer_pass_through
= false;
3960 ipf
->aggregate_pass_through
= false;
3961 isra_param_desc
*param_desc
= &(*from_ifs
->m_parameters
)[idx
];
3962 if (!param_desc
->split_candidate
)
3965 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3966 fprintf (dump_file
, " %u->%u: no corresponding formal parameter\n",
3968 param_desc
->split_candidate
= false;
3975 /* Worker for call_for_symbol_and_aliases, look at all callers and if all their
3976 callers ignore the return value, or come from the same SCC and use the
3977 return value only to compute their return value, return false, otherwise
3981 retval_used_p (cgraph_node
*node
, void *)
3983 for (cgraph_edge
*cs
= node
->callers
; cs
; cs
= cs
->next_caller
)
3985 isra_call_summary
*csum
= call_sums
->get (cs
);
3986 gcc_checking_assert (csum
);
3987 if (csum
->m_return_ignored
)
3989 if (!csum
->m_return_returned
)
3992 isra_func_summary
*from_ifs
= func_sums
->get (cs
->caller
);
3993 if (!from_ifs
|| !from_ifs
->m_candidate
)
3996 if (!ipa_edge_within_scc (cs
)
3997 && !from_ifs
->m_return_ignored
)
4004 /* Push into NEW_PARAMS all required parameter adjustment entries to copy or
4005 modify parameter which originally had index BASE_INDEX, in the adjustment
4006 vector of parent clone (if any) had PREV_CLONE_INDEX and was described by
4007 PREV_ADJUSTMENT. If IPA-CP has created a transformation summary for the
4008 original node, it needs to be passed in IPCP_TS, otherwise it should be
4009 NULL. If the parent clone is the original function, PREV_ADJUSTMENT is NULL
4010 and PREV_CLONE_INDEX is equal to BASE_INDEX. */
4013 push_param_adjustments_for_index (isra_func_summary
*ifs
, unsigned base_index
,
4014 unsigned prev_clone_index
,
4015 ipa_adjusted_param
*prev_adjustment
,
4016 ipcp_transformation
*ipcp_ts
,
4017 vec
<ipa_adjusted_param
, va_gc
> **new_params
)
4019 isra_param_desc
*desc
= &(*ifs
->m_parameters
)[base_index
];
4020 if (desc
->locally_unused
)
4023 fprintf (dump_file
, " Will remove parameter %u\n", base_index
);
4027 if (!desc
->split_candidate
)
4029 ipa_adjusted_param adj
;
4030 if (prev_adjustment
)
4032 adj
= *prev_adjustment
;
4033 adj
.prev_clone_adjustment
= true;
4034 adj
.prev_clone_index
= prev_clone_index
;
4038 memset (&adj
, 0, sizeof (adj
));
4039 adj
.op
= IPA_PARAM_OP_COPY
;
4040 adj
.base_index
= base_index
;
4041 adj
.prev_clone_index
= prev_clone_index
;
4043 vec_safe_push ((*new_params
), adj
);
4048 fprintf (dump_file
, " Will split parameter %u\n", base_index
);
4050 gcc_assert (!prev_adjustment
|| prev_adjustment
->op
== IPA_PARAM_OP_COPY
);
4051 unsigned aclen
= vec_safe_length (desc
->accesses
);
4052 for (unsigned j
= 0; j
< aclen
; j
++)
4054 param_access
*pa
= (*desc
->accesses
)[j
];
4060 ipa_argagg_value_list
avl (ipcp_ts
);
4061 tree value
= avl
.get_value (base_index
, pa
->unit_offset
);
4062 if (value
&& !AGGREGATE_TYPE_P (pa
->type
))
4065 fprintf (dump_file
, " - omitting component at byte "
4066 "offset %u which is known to have a constant value\n ",
4073 fprintf (dump_file
, " - component at byte offset %u, "
4074 "size %u\n", pa
->unit_offset
, pa
->unit_size
);
4076 ipa_adjusted_param adj
;
4077 memset (&adj
, 0, sizeof (adj
));
4078 adj
.op
= IPA_PARAM_OP_SPLIT
;
4079 adj
.base_index
= base_index
;
4080 adj
.prev_clone_index
= prev_clone_index
;
4081 adj
.param_prefix_index
= IPA_PARAM_PREFIX_ISRA
;
4082 adj
.reverse
= pa
->reverse
;
4083 adj
.type
= pa
->type
;
4084 adj
.alias_ptr_type
= pa
->alias_ptr_type
;
4085 adj
.unit_offset
= pa
->unit_offset
;
4086 vec_safe_push ((*new_params
), adj
);
4090 /* Worker for all call_for_symbol_thunks_and_aliases. Set calls_comdat_local
4091 flag of all callers of NODE. */
4094 mark_callers_calls_comdat_local (struct cgraph_node
*node
, void *)
4096 for (cgraph_edge
*cs
= node
->callers
; cs
; cs
= cs
->next_caller
)
4097 cs
->caller
->calls_comdat_local
= true;
4101 /* Remove any IPA-CP results stored in TS that are associated with removed
4102 parameters as marked in IFS. */
4105 zap_useless_ipcp_results (const isra_func_summary
*ifs
, ipcp_transformation
*ts
)
4107 ts
->remove_argaggs_if ([ifs
](const ipa_argagg_value
&v
)
4109 return (*ifs
->m_parameters
)[v
.index
].locally_unused
;
4112 bool useful_vr
= false;
4113 unsigned count
= vec_safe_length (ts
->m_vr
);
4114 for (unsigned i
= 0; i
< count
; i
++)
4115 if ((*ts
->m_vr
)[i
].known_p ())
4117 const isra_param_desc
*desc
= &(*ifs
->m_parameters
)[i
];
4118 if (desc
->locally_unused
)
4119 (*ts
->m_vr
)[i
].set_unknown ();
4127 /* Do final processing of results of IPA propagation regarding NODE, clone it
4131 process_isra_node_results (cgraph_node
*node
,
4132 hash_map
<const char *, unsigned> *clone_num_suffixes
)
4134 isra_func_summary
*ifs
= func_sums
->get (node
);
4135 if (!ifs
|| !ifs
->m_candidate
)
4138 auto_vec
<bool, 16> surviving_params
;
4139 bool check_surviving
= false;
4140 clone_info
*cinfo
= clone_info::get (node
);
4141 if (cinfo
&& cinfo
->param_adjustments
)
4143 check_surviving
= true;
4144 cinfo
->param_adjustments
->get_surviving_params (&surviving_params
);
4147 unsigned param_count
= vec_safe_length (ifs
->m_parameters
);
4148 bool will_change_function
= false;
4149 if (ifs
->m_returns_value
&& ifs
->m_return_ignored
)
4150 will_change_function
= true;
4152 for (unsigned i
= 0; i
< param_count
; i
++)
4154 isra_param_desc
*desc
= &(*ifs
->m_parameters
)[i
];
4155 if ((desc
->locally_unused
|| desc
->split_candidate
)
4156 /* Make sure we do not clone just to attempt to remove an already
4157 removed unused argument. */
4158 && (!check_surviving
4159 || (i
< surviving_params
.length ()
4160 && surviving_params
[i
])))
4162 will_change_function
= true;
4166 if (!will_change_function
)
4171 fprintf (dump_file
, "\nEvaluating analysis results for %s\n",
4172 node
->dump_name ());
4173 if (ifs
->m_returns_value
&& ifs
->m_return_ignored
)
4174 fprintf (dump_file
, " Will remove return value.\n");
4177 ipcp_transformation
*ipcp_ts
= ipcp_get_transformation_summary (node
);
4179 zap_useless_ipcp_results (ifs
, ipcp_ts
);
4180 vec
<ipa_adjusted_param
, va_gc
> *new_params
= NULL
;
4181 if (ipa_param_adjustments
*old_adjustments
4182 = cinfo
? cinfo
->param_adjustments
: NULL
)
4184 unsigned old_adj_len
= vec_safe_length (old_adjustments
->m_adj_params
);
4185 for (unsigned i
= 0; i
< old_adj_len
; i
++)
4187 ipa_adjusted_param
*old_adj
= &(*old_adjustments
->m_adj_params
)[i
];
4188 push_param_adjustments_for_index (ifs
, old_adj
->base_index
, i
,
4189 old_adj
, ipcp_ts
, &new_params
);
4193 for (unsigned i
= 0; i
< param_count
; i
++)
4194 push_param_adjustments_for_index (ifs
, i
, i
, NULL
, ipcp_ts
, &new_params
);
4196 ipa_param_adjustments
*new_adjustments
4197 = (new (ggc_alloc
<ipa_param_adjustments
> ())
4198 ipa_param_adjustments (new_params
, param_count
,
4199 ifs
->m_returns_value
&& ifs
->m_return_ignored
));
4201 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4203 fprintf (dump_file
, "\n Created adjustments:\n");
4204 new_adjustments
->dump (dump_file
);
4207 unsigned &suffix_counter
= clone_num_suffixes
->get_or_insert (
4208 IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (
4210 auto_vec
<cgraph_edge
*> callers
= node
->collect_callers ();
4211 cgraph_node
*new_node
4212 = node
->create_virtual_clone (callers
, NULL
, new_adjustments
, "isra",
4215 if (node
->calls_comdat_local
&& node
->same_comdat_group
)
4217 new_node
->add_to_same_comdat_group (node
);
4218 new_node
->call_for_symbol_and_aliases (mark_callers_calls_comdat_local
,
4221 new_node
->calls_comdat_local
= node
->calls_comdat_local
;
4224 fprintf (dump_file
, " Created new node %s\n", new_node
->dump_name ());
4228 /* If INDICES is not empty, dump a combination of NODE's dump_name and MSG
4229 followed by the list of numbers in INDICES. */
4232 dump_list_of_param_indices (const cgraph_node
*node
, const char* msg
,
4233 const vec
<unsigned> &indices
)
4235 if (indices
.is_empty ())
4237 fprintf (dump_file
, "The following parameters of %s %s:", node
->dump_name (),
4239 for (unsigned i
: indices
)
4240 fprintf (dump_file
, " %u", i
);
4241 fprintf (dump_file
, "\n");
4244 /* Check which parameters of NODE described by IFS have survived until IPA-SRA
4245 and disable transformations for those which have not or which should not
4246 transformed because the associated debug counter reached its limit. Return
4247 true if none survived or if there were no candidates to begin with.
4248 Additionally, also adjust parameter descriptions based on debug counters and
4249 hints propagated earlier. */
4252 adjust_parameter_descriptions (cgraph_node
*node
, isra_func_summary
*ifs
)
4255 unsigned len
= vec_safe_length (ifs
->m_parameters
);
4259 auto_vec
<bool, 16> surviving_params
;
4260 bool check_surviving
= false;
4261 clone_info
*cinfo
= clone_info::get (node
);
4262 if (cinfo
&& cinfo
->param_adjustments
)
4264 check_surviving
= true;
4265 cinfo
->param_adjustments
->get_surviving_params (&surviving_params
);
4267 ipcp_transformation
*ipcp_ts
= ipcp_get_transformation_summary (node
);
4268 auto_vec
<unsigned> dump_dead_indices
;
4269 auto_vec
<unsigned> dump_bad_cond_indices
;
4270 for (unsigned i
= 0; i
< len
; i
++)
4272 isra_param_desc
*desc
= &(*ifs
->m_parameters
)[i
];
4273 if (!dbg_cnt (ipa_sra_params
))
4275 desc
->locally_unused
= false;
4276 desc
->split_candidate
= false;
4280 if (desc
->split_only_when_retval_removed
4281 && !ifs
->m_return_ignored
)
4283 if (dump_file
&& (dump_flags
& TDF_DETAILS
)
4284 && (desc
->locally_unused
|| desc
->split_candidate
))
4285 dump_bad_cond_indices
.safe_push (i
);
4287 gcc_checking_assert (!desc
->locally_unused
4288 || desc
->remove_only_when_retval_removed
);
4289 desc
->locally_unused
= false;
4290 desc
->split_candidate
= false;
4293 if (desc
->remove_only_when_retval_removed
4294 && !ifs
->m_return_ignored
)
4296 if (dump_file
&& (dump_flags
& TDF_DETAILS
)
4297 && (desc
->locally_unused
|| desc
->split_candidate
))
4298 dump_bad_cond_indices
.safe_push (i
);
4300 desc
->locally_unused
= false;
4303 && (i
>= surviving_params
.length ()
4304 || !surviving_params
[i
]))
4306 /* Even if the parameter was removed by a previous IPA pass, we do
4307 not clear locally_unused because if it really is unused, this
4308 information might be useful in callers. */
4309 desc
->split_candidate
= false;
4311 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4312 dump_dead_indices
.safe_push (i
);
4315 if (desc
->split_candidate
&& desc
->conditionally_dereferenceable
)
4317 gcc_assert (desc
->safe_size_set
);
4318 for (param_access
*pa
: *desc
->accesses
)
4319 if ((pa
->unit_offset
+ pa
->unit_size
) > desc
->safe_size
)
4321 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4322 dump_bad_cond_indices
.safe_push (i
);
4323 desc
->split_candidate
= false;
4328 if (desc
->split_candidate
)
4330 if (desc
->by_ref
&& !desc
->not_specially_constructed
)
4333 = opt_for_fn (node
->decl
,
4334 param_ipa_sra_ptrwrap_growth_factor
);
4335 desc
->param_size_limit
= extra_factor
* desc
->param_size_limit
;
4337 if (size_would_violate_limit_p (desc
, desc
->size_reached
))
4338 desc
->split_candidate
= false;
4341 /* Avoid ICEs on size-mismatched VIEW_CONVERT_EXPRs when callers and
4342 callees don't agree on types in aggregates and we try to do both
4343 IPA-CP and IPA-SRA. */
4344 if (ipcp_ts
&& desc
->split_candidate
)
4346 ipa_argagg_value_list
avl (ipcp_ts
);
4347 for (const param_access
*pa
: desc
->accesses
)
4351 tree value
= avl
.get_value (i
, pa
->unit_offset
);
4353 && ((tree_to_uhwi (TYPE_SIZE (TREE_TYPE (value
)))
4357 desc
->split_candidate
= false;
4358 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4359 dump_dead_indices
.safe_push (i
);
4365 if (desc
->locally_unused
|| desc
->split_candidate
)
4369 dump_list_of_param_indices (node
, "are dead on arrival or have a type "
4370 "mismatch with IPA-CP", dump_dead_indices
);
4371 dump_list_of_param_indices (node
, "fail additional requirements ",
4372 dump_bad_cond_indices
);
4378 /* Run the interprocedural part of IPA-SRA. */
4381 ipa_sra_analysis (void)
4385 fprintf (dump_file
, "\n========== IPA-SRA IPA stage ==========\n");
4386 ipa_sra_dump_all_summaries (dump_file
, false);
4389 gcc_checking_assert (func_sums
);
4390 gcc_checking_assert (call_sums
);
4391 cgraph_node
**order
= XCNEWVEC (cgraph_node
*, symtab
->cgraph_count
);
4392 auto_vec
<cgraph_node
*, 16> stack
;
4393 int node_scc_count
= ipa_reduced_postorder (order
, true, NULL
);
4395 /* One sweep from callers to callees for return value removal. */
4396 for (int i
= node_scc_count
- 1; i
>= 0 ; i
--)
4398 cgraph_node
*scc_rep
= order
[i
];
4399 vec
<cgraph_node
*> cycle_nodes
= ipa_get_nodes_in_cycle (scc_rep
);
4401 /* Preliminary IPA function level checks. */
4402 for (cgraph_node
*v
: cycle_nodes
)
4404 isra_func_summary
*ifs
= func_sums
->get (v
);
4405 if (!ifs
|| !ifs
->m_candidate
)
4407 if (!ipa_sra_ipa_function_checks (v
)
4408 || check_all_callers_for_issues (v
))
4412 for (cgraph_node
*v
: cycle_nodes
)
4414 isra_func_summary
*ifs
= func_sums
->get (v
);
4415 if (!ifs
|| !ifs
->m_candidate
)
4418 = (ifs
->m_returns_value
4419 && (!dbg_cnt (ipa_sra_retvalues
)
4420 || v
->call_for_symbol_and_aliases (retval_used_p
,
4422 ifs
->m_return_ignored
= !return_needed
;
4424 isra_push_node_to_stack (v
, ifs
, &stack
);
4427 while (!stack
.is_empty ())
4429 cgraph_node
*node
= stack
.pop ();
4430 isra_func_summary
*ifs
= func_sums
->get (node
);
4431 gcc_checking_assert (ifs
&& ifs
->m_queued
);
4432 ifs
->m_queued
= false;
4434 for (cgraph_edge
*cs
= node
->callees
; cs
; cs
= cs
->next_callee
)
4435 if (ipa_edge_within_scc (cs
)
4436 && call_sums
->get (cs
)->m_return_returned
)
4438 enum availability av
;
4439 cgraph_node
*callee
= cs
->callee
->function_symbol (&av
);
4440 isra_func_summary
*to_ifs
= func_sums
->get (callee
);
4441 if (to_ifs
&& to_ifs
->m_return_ignored
)
4443 to_ifs
->m_return_ignored
= false;
4444 isra_push_node_to_stack (callee
, to_ifs
, &stack
);
4449 /* Parameter hint propagation. */
4450 for (cgraph_node
*v
: cycle_nodes
)
4452 isra_func_summary
*ifs
= func_sums
->get (v
);
4453 propagate_hints_to_all_callees (v
, ifs
, &stack
);
4456 while (!stack
.is_empty ())
4458 cgraph_node
*node
= stack
.pop ();
4459 isra_func_summary
*ifs
= func_sums
->get (node
);
4460 gcc_checking_assert (ifs
&& ifs
->m_queued
);
4461 ifs
->m_queued
= false;
4462 propagate_hints_to_all_callees (node
, ifs
, &stack
);
4465 cycle_nodes
.release ();
4468 /* One sweep from callees to callers for parameter removal and splitting. */
4469 for (int i
= 0; i
< node_scc_count
; i
++)
4471 cgraph_node
*scc_rep
= order
[i
];
4472 vec
<cgraph_node
*> cycle_nodes
= ipa_get_nodes_in_cycle (scc_rep
);
4474 /* First step of parameter removal. */
4475 for (cgraph_node
*v
: cycle_nodes
)
4477 isra_func_summary
*ifs
= func_sums
->get (v
);
4478 if (!ifs
|| !ifs
->m_candidate
)
4480 if (adjust_parameter_descriptions (v
, ifs
))
4482 for (cgraph_edge
*cs
= v
->indirect_calls
; cs
; cs
= cs
->next_callee
)
4483 process_edge_to_unknown_caller (cs
);
4484 for (cgraph_edge
*cs
= v
->callees
; cs
; cs
= cs
->next_callee
)
4485 if (!ipa_edge_within_scc (cs
))
4486 param_removal_cross_scc_edge (cs
);
4489 /* Look at edges within the current SCC and propagate used-ness across
4490 them, pushing onto the stack all notes which might need to be
4492 for (cgraph_node
*v
: cycle_nodes
)
4493 v
->call_for_symbol_thunks_and_aliases (propagate_used_to_scc_callers
,
4496 /* Keep revisiting and pushing until nothing changes. */
4497 while (!stack
.is_empty ())
4499 cgraph_node
*v
= stack
.pop ();
4500 isra_func_summary
*ifs
= func_sums
->get (v
);
4501 gcc_checking_assert (ifs
&& ifs
->m_queued
);
4502 ifs
->m_queued
= false;
4504 v
->call_for_symbol_thunks_and_aliases (propagate_used_to_scc_callers
,
4508 /* Parameter splitting. */
4509 bool repeat_scc_access_propagation
;
4512 repeat_scc_access_propagation
= false;
4513 for (cgraph_node
*v
: cycle_nodes
)
4515 isra_func_summary
*ifs
= func_sums
->get (v
);
4517 || !ifs
->m_candidate
4518 || vec_safe_is_empty (ifs
->m_parameters
))
4520 for (cgraph_edge
*cs
= v
->callees
; cs
; cs
= cs
->next_callee
)
4521 if (param_splitting_across_edge (cs
))
4522 repeat_scc_access_propagation
= true;
4525 while (repeat_scc_access_propagation
);
4528 for (cgraph_node
*v
: cycle_nodes
)
4529 verify_splitting_accesses (v
, true);
4531 cycle_nodes
.release ();
4534 ipa_free_postorder_info ();
4539 if (dump_flags
& TDF_DETAILS
)
4541 fprintf (dump_file
, "\n========== IPA-SRA propagation final state "
4543 ipa_sra_dump_all_summaries (dump_file
, true);
4545 fprintf (dump_file
, "\n========== IPA-SRA decisions ==========\n");
4548 hash_map
<const char *, unsigned> *clone_num_suffixes
4549 = new hash_map
<const char *, unsigned>;
4552 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node
)
4553 process_isra_node_results (node
, clone_num_suffixes
);
4555 delete clone_num_suffixes
;
4556 ggc_delete (func_sums
);
4562 fprintf (dump_file
, "\n========== IPA SRA IPA analysis done "
4568 const pass_data pass_data_ipa_sra
=
4570 IPA_PASS
, /* type */
4572 OPTGROUP_NONE
, /* optinfo_flags */
4573 TV_IPA_SRA
, /* tv_id */
4574 0, /* properties_required */
4575 0, /* properties_provided */
4576 0, /* properties_destroyed */
4577 0, /* todo_flags_start */
4578 ( TODO_dump_symtab
| TODO_remove_functions
), /* todo_flags_finish */
4581 class pass_ipa_sra
: public ipa_opt_pass_d
4584 pass_ipa_sra (gcc::context
*ctxt
)
4585 : ipa_opt_pass_d (pass_data_ipa_sra
, ctxt
,
4586 ipa_sra_generate_summary
, /* generate_summary */
4587 ipa_sra_write_summary
, /* write_summary */
4588 ipa_sra_read_summary
, /* read_summary */
4589 NULL
, /* write_optimization_summary */
4590 NULL
, /* read_optimization_summary */
4591 NULL
, /* stmt_fixup */
4592 0, /* function_transform_todo_flags_start */
4593 NULL
, /* function_transform */
4594 NULL
) /* variable_transform */
4597 /* opt_pass methods: */
4598 bool gate (function
*) final override
4600 /* TODO: We should remove the optimize check after we ensure we never run
4601 IPA passes when not optimizing. */
4602 return (flag_ipa_sra
&& optimize
);
4605 unsigned int execute (function
*) final override
4607 return ipa_sra_analysis ();
4610 }; // class pass_ipa_sra
4614 /* Intraprocedural part of IPA-SRA analysis. Scan function body of NODE and
4615 create a summary structure describing IPA-SRA opportunities and constraints
4619 ipa_sra_summarize_function (cgraph_node
*node
)
4622 fprintf (dump_file
, "Creating summary for %s/%i:\n", node
->name (),
4624 gcc_obstack_init (&gensum_obstack
);
4625 loaded_decls
= new hash_set
<tree
>;
4627 isra_func_summary
*ifs
= NULL
;
4629 if (ipa_sra_preliminary_function_checks (node
))
4631 ifs
= func_sums
->get_create (node
);
4632 ifs
->m_candidate
= true;
4633 tree ret
= TREE_TYPE (TREE_TYPE (node
->decl
));
4634 ifs
->m_returns_value
= (TREE_CODE (ret
) != VOID_TYPE
);
4635 for (tree parm
= DECL_ARGUMENTS (node
->decl
);
4637 parm
= DECL_CHAIN (parm
))
4640 auto_vec
<gensum_param_desc
, 16> param_descriptions (count
);
4642 struct function
*fun
= DECL_STRUCT_FUNCTION (node
->decl
);
4643 bool cfun_pushed
= false;
4646 decl2desc
= new hash_map
<tree
, gensum_param_desc
*>;
4647 param_descriptions
.reserve_exact (count
);
4648 param_descriptions
.quick_grow_cleared (count
);
4650 if (create_parameter_descriptors (node
, ¶m_descriptions
))
4654 final_bbs
= BITMAP_ALLOC (NULL
);
4655 bb_dereferences
= XCNEWVEC (HOST_WIDE_INT
,
4657 * last_basic_block_for_fn (fun
));
4658 aa_walking_limit
= opt_for_fn (node
->decl
, param_ipa_max_aa_steps
);
4661 /* Scan function is run even when there are no removal or splitting
4662 candidates so that we can calculate hints on call edges which can be
4663 useful in callees. */
4664 scan_function (node
, fun
);
4670 dump_gensum_param_descriptors (dump_file
, node
->decl
,
4671 ¶m_descriptions
);
4672 fprintf (dump_file
, "----------------------------------------\n");
4675 process_scan_results (node
, fun
, ifs
, ¶m_descriptions
);
4679 if (bb_dereferences
)
4681 free (bb_dereferences
);
4682 bb_dereferences
= NULL
;
4683 BITMAP_FREE (final_bbs
);
4687 isra_analyze_all_outgoing_calls (node
);
4689 delete loaded_decls
;
4690 loaded_decls
= NULL
;
4696 obstack_free (&gensum_obstack
, NULL
);
4698 fprintf (dump_file
, "\n\n");
4700 verify_splitting_accesses (node
, false);
4705 make_pass_ipa_sra (gcc::context
*ctxt
)
4707 return new pass_ipa_sra (ctxt
);
4711 #include "gt-ipa-sra.h"