1 /* Interprocedural scalar replacement of aggregates
2 Copyright (C) 2019-2023 Free Software Foundation, Inc.
3 Contributed by Martin Jambor <mjambor@suse.cz>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* IPA-SRA is an interprocedural pass that removes unused function return
22 values (turning functions returning a value which is never used into void
23 functions) and removes unused function parameters. It can also replace an
24 aggregate parameter by a set of other parameters representing part of the
25 original, turning those passed by reference into new ones which pass the
28 The pass is a true IPA one, which means that it works in three stages in
29 order to be able to take advantage of LTO. First, summaries about functions
30 and each calls are generated. Function summaries (often called call graph
31 node summaries) contain mainly information about which parameters are
32 potential transformation candidates and which bits of candidates are
33 accessed. We differentiate between accesses done as a part of a call
34 statement (which might be not necessary if the callee is also transformed)
35 and others (which are mandatory). Call summaries (often called call graph
36 edge summaries) contain information about which function formal parameters
37 feed into which actual call arguments so that if two parameters are only
38 used in a sum which is then passed to another function which then however
39 does not use this parameter, all three parameters of the two functions can
40 be eliminated. Edge summaries also have flags whether the return value is
41 used or if it is only returned in the caller too. In LTO mode these
42 summaries are then streamed to the object file in the compilation phase and
43 streamed back in in the WPA analysis stage.
45 The interprocedural analysis phase traverses the graph in topological order
46 in two sweeps, one in each direction. First, from callees to callers for
47 parameter removal and splitting. Each strongly-connected component is
48 processed iteratively until the situation in it stabilizes. The pass from
49 callers to callees is then carried out to remove unused return values in a
52 Because parameter manipulation has big implications for call redirection
53 which is done only after all call graph nodes materialize, the
54 transformation phase is not part of this patch but is carried out by the
55 clone materialization and edge redirection itself, see comments in
56 ipa-param-manipulation.h for more details. */
61 #include "coretypes.h"
66 #include "tree-pass.h"
69 #include "gimple-pretty-print.h"
72 #include "gimple-iterator.h"
73 #include "gimple-walk.h"
76 #include "alloc-pool.h"
77 #include "symbol-summary.h"
79 #include "tree-inline.h"
80 #include "ipa-utils.h"
83 #include "tree-streamer.h"
84 #include "internal-fn.h"
85 #include "symtab-clones.h"
89 static void ipa_sra_summarize_function (cgraph_node
*);
91 /* Bits used to track size of an aggregate in bytes interprocedurally. */
92 #define ISRA_ARG_SIZE_LIMIT_BITS 16
93 #define ISRA_ARG_SIZE_LIMIT (1 << ISRA_ARG_SIZE_LIMIT_BITS)
94 /* How many parameters can feed into a call actual argument and still be
96 #define IPA_SRA_MAX_PARAM_FLOW_LEN 7
98 /* Structure describing accesses to a specific portion of an aggregate
99 parameter, as given by the offset and size. Any smaller accesses that occur
100 within a function that fall within another access form a tree. The pass
101 cannot analyze parameters with only partially overlapping accesses. */
103 struct GTY(()) param_access
105 /* Type that a potential replacement should have. This field only has
106 meaning in the summary building and transformation phases, when it is
107 reconstructed from the body. Must not be touched in IPA analysis
111 /* Alias reference type to be used in MEM_REFs when adjusting caller
115 /* Values returned by get_ref_base_and_extent but converted to bytes and
116 stored as unsigned ints. */
117 unsigned unit_offset
;
118 unsigned unit_size
: ISRA_ARG_SIZE_LIMIT_BITS
;
120 /* Set once we are sure that the access will really end up in a potentially
121 transformed function - initially not set for portions of formal parameters
122 that are only used as actual function arguments passed to callees. */
123 unsigned certain
: 1;
124 /* Set if the access has reverse scalar storage order. */
125 unsigned reverse
: 1;
128 /* This structure has the same purpose as the one above and additionally it
129 contains some fields that are only necessary in the summary generation
132 struct gensum_param_access
134 /* Values returned by get_ref_base_and_extent. */
135 HOST_WIDE_INT offset
;
138 /* if this access has any children (in terms of the definition above), this
139 points to the first one. */
140 struct gensum_param_access
*first_child
;
141 /* In intraprocedural SRA, pointer to the next sibling in the access tree as
143 struct gensum_param_access
*next_sibling
;
145 /* Type that a potential replacement should have. This field only has
146 meaning in the summary building and transformation phases, when it is
147 reconstructed from the body. Must not be touched in IPA analysis
150 /* Alias reference type to be used in MEM_REFs when adjusting caller
154 /* Cumulative count of all loads. */
155 profile_count load_count
;
156 /* Have there been writes to or reads from this exact location except for as
157 arguments to a function call that can be tracked. */
160 /* Set if the access has reverse scalar storage order. */
164 /* Summary describing a parameter in the IPA stages. */
166 struct GTY(()) isra_param_desc
168 /* List of access representatives to the parameters, sorted according to
170 vec
<param_access
*, va_gc
> *accesses
;
172 /* Unit size limit of total size of all replacements. */
173 unsigned param_size_limit
: ISRA_ARG_SIZE_LIMIT_BITS
;
174 /* Sum of unit sizes of all certain replacements. */
175 unsigned size_reached
: ISRA_ARG_SIZE_LIMIT_BITS
;
176 /* Minimum offset that is known to be safe to dereference because of callers
177 pass pointers to DECLs of at least this size or because of dereferences in
179 unsigned safe_size
: ISRA_ARG_SIZE_LIMIT_BITS
;
181 /* A parameter that is used only in call arguments and can be removed if all
182 concerned actual arguments are removed. */
183 unsigned locally_unused
: 1;
184 /* An aggregate that is a candidate for breaking up or complete removal. */
185 unsigned split_candidate
: 1;
186 /* Is this a parameter passing stuff by reference? */
188 /* Parameter hint set during IPA analysis when there is a caller which does
189 not construct the argument just to pass it to calls. Only meaningful for
190 by_ref parameters. */
191 unsigned not_specially_constructed
: 1;
192 /* Only meaningful for by_ref parameters. If set, this parameter can only be
193 a split candidate if all callers pass pointers that are known to point to
194 a chunk of memory large enough to contain all accesses. */
195 unsigned conditionally_dereferenceable
: 1;
196 /* Set when safe_size has been updated from at least one caller. */
197 unsigned safe_size_set
: 1;
200 /* Structure used when generating summaries that describes a parameter. */
202 struct gensum_param_desc
204 /* Roots of param_accesses. */
205 gensum_param_access
*accesses
;
206 /* Number of accesses in the access tree rooted in field accesses. */
207 unsigned access_count
;
209 /* If the below is non-zero, this is the number of uses as actual arguments. */
211 /* Number of times this parameter has been directly passed to. */
212 unsigned ptr_pt_count
;
214 /* Size limit of total size of all replacements. */
215 unsigned param_size_limit
;
216 /* Sum of sizes of nonarg accesses. */
217 unsigned nonarg_acc_size
;
219 /* A parameter that is used only in call arguments and can be removed if all
220 concerned actual arguments are removed. */
222 /* An aggregate that is a candidate for breaking up or a pointer passing data
223 by reference that is a candidate for being converted to a set of
224 parameters passing those data by value. */
225 bool split_candidate
;
226 /* Is this a parameter passing stuff by reference (either a pointer or a
227 source language reference type)? */
229 /* If this parameter passes stuff by reference, can it be safely dereferenced
230 without performing further checks (for example because it is a
233 /* Only meaningful for by_ref parameters. If set, this parameter can only be
234 a split candidate if all callers pass pointers that are known to point to
235 a chunk of memory large enough to contain all accesses. */
236 bool conditionally_dereferenceable
;
238 /* The number of this parameter as they are ordered in function decl. */
240 /* For parameters passing data by reference, this is parameter index to
241 compute indices to bb_dereferences. */
245 /* Properly deallocate accesses of DESC. TODO: Since this data structure is
246 allocated in GC memory, this is not necessary and we can consider removing
250 free_param_decl_accesses (isra_param_desc
*desc
)
252 unsigned len
= vec_safe_length (desc
->accesses
);
253 for (unsigned i
= 0; i
< len
; ++i
)
254 ggc_free ((*desc
->accesses
)[i
]);
255 vec_free (desc
->accesses
);
258 /* Class used to convey information about functions from the
259 intra-procedural analysis stage to inter-procedural one. */
261 class GTY((for_user
)) isra_func_summary
264 /* initialize the object. */
267 : m_parameters (NULL
), m_candidate (false), m_returns_value (false),
268 m_return_ignored (false), m_queued (false)
271 /* Destroy m_parameters. */
273 ~isra_func_summary ();
275 /* Mark the function as not a candidate for any IPA-SRA transformation.
276 Return true if it was a candidate until now. */
280 /* Vector of parameter descriptors corresponding to the function being
282 vec
<isra_param_desc
, va_gc
> *m_parameters
;
284 /* Whether the node is even a candidate for any IPA-SRA transformation at
286 unsigned m_candidate
: 1;
288 /* Whether the original function returns any value. */
289 unsigned m_returns_value
: 1;
291 /* Set to true if all call statements do not actually use the returned
294 unsigned m_return_ignored
: 1;
296 /* Whether the node is already queued in IPA SRA stack during processing of
299 unsigned m_queued
: 1;
302 /* Deallocate the memory pointed to by isra_func_summary. TODO: Since this
303 data structure is allocated in GC memory, this is not necessary and we can
304 consider removing the destructor. */
306 isra_func_summary::~isra_func_summary ()
308 unsigned len
= vec_safe_length (m_parameters
);
309 for (unsigned i
= 0; i
< len
; ++i
)
310 free_param_decl_accesses (&(*m_parameters
)[i
]);
311 vec_free (m_parameters
);
314 /* Mark the function as not a candidate for any IPA-SRA transformation. Return
315 true if it was a candidate until now. */
318 isra_func_summary::zap ()
320 bool ret
= m_candidate
;
323 /* TODO: see the destructor above. */
324 unsigned len
= vec_safe_length (m_parameters
);
325 for (unsigned i
= 0; i
< len
; ++i
)
326 free_param_decl_accesses (&(*m_parameters
)[i
]);
327 vec_free (m_parameters
);
332 /* Structure to describe which formal parameters feed into a particular actual
335 struct isra_param_flow
337 /* Number of elements in array inputs that contain valid data. */
339 /* Indices of formal parameters that feed into the described actual argument.
340 If aggregate_pass_through or pointer_pass_through below are true, it must
341 contain exactly one element which is passed through from a formal
342 parameter if the given number. Otherwise, the array contains indices of
343 callee's formal parameters which are used to calculate value of this
345 unsigned char inputs
[IPA_SRA_MAX_PARAM_FLOW_LEN
];
347 /* Offset within the formal parameter. */
348 unsigned unit_offset
;
349 /* When aggregate_pass_through is set, this is the size of the portion of an
350 aggregate formal parameter that is being passed. Otherwise, this is size
351 of pointed to memory that is known to be valid be dereferenced. */
352 unsigned unit_size
: ISRA_ARG_SIZE_LIMIT_BITS
;
354 /* True when the value of this actual argument is a portion of a formal
356 unsigned aggregate_pass_through
: 1;
357 /* True when the value of this actual copy is a verbatim pass through of an
359 unsigned pointer_pass_through
: 1;
360 /* True when it is safe to copy access candidates here from the callee, which
361 would mean introducing dereferences into callers of the caller. */
362 unsigned safe_to_import_accesses
: 1;
363 /* True when the passed value is an address of a structure that has been
364 constructed in the caller just to be passed by reference to functions
365 (i.e. is never read). */
366 unsigned constructed_for_calls
: 1;
369 /* Structure used to convey information about calls from the intra-procedural
370 analysis stage to inter-procedural one. */
372 class isra_call_summary
376 : m_arg_flow (), m_return_ignored (false), m_return_returned (false),
377 m_bit_aligned_arg (false), m_before_any_store (false)
380 void init_inputs (unsigned arg_count
);
383 /* Information about what formal parameters of the caller are used to compute
384 individual actual arguments of this call. */
385 auto_vec
<isra_param_flow
> m_arg_flow
;
387 /* Set to true if the call statement does not have a LHS. */
388 unsigned m_return_ignored
: 1;
390 /* Set to true if the LHS of call statement is only used to construct the
391 return value of the caller. */
392 unsigned m_return_returned
: 1;
394 /* Set when any of the call arguments are not byte-aligned. */
395 unsigned m_bit_aligned_arg
: 1;
397 /* Set to true if the call happend before any (other) store to memory in the
399 unsigned m_before_any_store
: 1;
402 /* Class to manage function summaries. */
404 class GTY((user
)) ipa_sra_function_summaries
405 : public function_summary
<isra_func_summary
*>
408 ipa_sra_function_summaries (symbol_table
*table
, bool ggc
):
409 function_summary
<isra_func_summary
*> (table
, ggc
) { }
411 void duplicate (cgraph_node
*, cgraph_node
*,
412 isra_func_summary
*old_sum
,
413 isra_func_summary
*new_sum
) final override
;
414 void insert (cgraph_node
*, isra_func_summary
*) final override
;
417 /* Hook that is called by summary when a node is duplicated. */
420 ipa_sra_function_summaries::duplicate (cgraph_node
*, cgraph_node
*,
421 isra_func_summary
*old_sum
,
422 isra_func_summary
*new_sum
)
424 /* TODO: Somehow stop copying when ISRA is doing the cloning, it is
426 new_sum
->m_candidate
= old_sum
->m_candidate
;
427 new_sum
->m_returns_value
= old_sum
->m_returns_value
;
428 new_sum
->m_return_ignored
= old_sum
->m_return_ignored
;
429 gcc_assert (!old_sum
->m_queued
);
430 new_sum
->m_queued
= false;
432 unsigned param_count
= vec_safe_length (old_sum
->m_parameters
);
435 vec_safe_reserve_exact (new_sum
->m_parameters
, param_count
);
436 new_sum
->m_parameters
->quick_grow_cleared (param_count
);
437 for (unsigned i
= 0; i
< param_count
; i
++)
439 isra_param_desc
*s
= &(*old_sum
->m_parameters
)[i
];
440 isra_param_desc
*d
= &(*new_sum
->m_parameters
)[i
];
442 d
->param_size_limit
= s
->param_size_limit
;
443 d
->size_reached
= s
->size_reached
;
444 d
->safe_size
= s
->safe_size
;
445 d
->locally_unused
= s
->locally_unused
;
446 d
->split_candidate
= s
->split_candidate
;
447 d
->by_ref
= s
->by_ref
;
448 d
->not_specially_constructed
= s
->not_specially_constructed
;
449 d
->conditionally_dereferenceable
= s
->conditionally_dereferenceable
;
450 d
->safe_size_set
= s
->safe_size_set
;
452 unsigned acc_count
= vec_safe_length (s
->accesses
);
453 vec_safe_reserve_exact (d
->accesses
, acc_count
);
454 for (unsigned j
= 0; j
< acc_count
; j
++)
456 param_access
*from
= (*s
->accesses
)[j
];
457 param_access
*to
= ggc_cleared_alloc
<param_access
> ();
458 to
->type
= from
->type
;
459 to
->alias_ptr_type
= from
->alias_ptr_type
;
460 to
->unit_offset
= from
->unit_offset
;
461 to
->unit_size
= from
->unit_size
;
462 to
->certain
= from
->certain
;
463 to
->reverse
= from
->reverse
;
464 d
->accesses
->quick_push (to
);
469 /* Pointer to the pass function summary holder. */
471 static GTY(()) ipa_sra_function_summaries
*func_sums
;
473 /* Hook that is called by summary when new node appears. */
476 ipa_sra_function_summaries::insert (cgraph_node
*node
, isra_func_summary
*)
478 if (opt_for_fn (node
->decl
, flag_ipa_sra
))
480 push_cfun (DECL_STRUCT_FUNCTION (node
->decl
));
481 ipa_sra_summarize_function (node
);
485 func_sums
->remove (node
);
488 /* Class to manage call summaries. */
490 class ipa_sra_call_summaries
: public call_summary
<isra_call_summary
*>
493 ipa_sra_call_summaries (symbol_table
*table
):
494 call_summary
<isra_call_summary
*> (table
) { }
496 /* Duplicate info when an edge is cloned. */
497 void duplicate (cgraph_edge
*, cgraph_edge
*,
498 isra_call_summary
*old_sum
,
499 isra_call_summary
*new_sum
) final override
;
502 static ipa_sra_call_summaries
*call_sums
;
505 /* Initialize m_arg_flow of a particular instance of isra_call_summary.
506 ARG_COUNT is the number of actual arguments passed. */
509 isra_call_summary::init_inputs (unsigned arg_count
)
513 gcc_checking_assert (m_arg_flow
.length () == 0);
516 if (m_arg_flow
.length () == 0)
518 m_arg_flow
.reserve_exact (arg_count
);
519 m_arg_flow
.quick_grow_cleared (arg_count
);
522 gcc_checking_assert (arg_count
== m_arg_flow
.length ());
525 /* Dump all information in call summary to F. */
528 isra_call_summary::dump (FILE *f
)
530 if (m_return_ignored
)
531 fprintf (f
, " return value ignored\n");
532 if (m_return_returned
)
533 fprintf (f
, " return value used only to compute caller return value\n");
534 if (m_before_any_store
)
535 fprintf (f
, " happens before any store to memory\n");
536 for (unsigned i
= 0; i
< m_arg_flow
.length (); i
++)
538 fprintf (f
, " Parameter %u:\n", i
);
539 isra_param_flow
*ipf
= &m_arg_flow
[i
];
544 fprintf (f
, " Scalar param sources: ");
545 for (int j
= 0; j
< ipf
->length
; j
++)
551 fprintf (f
, "%i", (int) ipf
->inputs
[j
]);
555 if (ipf
->aggregate_pass_through
)
556 fprintf (f
, " Aggregate pass through from the param given above, "
557 "unit offset: %u , unit size: %u\n",
558 ipf
->unit_offset
, ipf
->unit_size
);
559 else if (ipf
->unit_size
> 0)
560 fprintf (f
, " Known dereferenceable size: %u\n", ipf
->unit_size
);
561 if (ipf
->pointer_pass_through
)
562 fprintf (f
, " Pointer pass through from the param given above, "
563 "safe_to_import_accesses: %u\n", ipf
->safe_to_import_accesses
);
564 if (ipf
->constructed_for_calls
)
565 fprintf (f
, " Variable constructed just to be passed to "
570 /* Duplicate edge summary when an edge is cloned. */
573 ipa_sra_call_summaries::duplicate (cgraph_edge
*, cgraph_edge
*,
574 isra_call_summary
*old_sum
,
575 isra_call_summary
*new_sum
)
577 unsigned arg_count
= old_sum
->m_arg_flow
.length ();
578 new_sum
->init_inputs (arg_count
);
579 for (unsigned i
= 0; i
< arg_count
; i
++)
580 new_sum
->m_arg_flow
[i
] = old_sum
->m_arg_flow
[i
];
582 new_sum
->m_return_ignored
= old_sum
->m_return_ignored
;
583 new_sum
->m_return_returned
= old_sum
->m_return_returned
;
584 new_sum
->m_bit_aligned_arg
= old_sum
->m_bit_aligned_arg
;
585 new_sum
->m_before_any_store
= old_sum
->m_before_any_store
;
589 /* With all GTY stuff done, we can move to anonymous namespace. */
591 /* Quick mapping from a decl to its param descriptor. */
593 hash_map
<tree
, gensum_param_desc
*> *decl2desc
;
595 /* All local DECLs ever loaded from of and of those that have their address
596 assigned to a variable. */
598 hash_set
<tree
> *loaded_decls
;
600 /* Countdown of allowed Alias Analysis steps during summary building. */
602 int aa_walking_limit
;
604 /* This is a table in which for each basic block and parameter there is a
605 distance (offset + size) in that parameter which is dereferenced and
606 accessed in that BB. */
607 HOST_WIDE_INT
*bb_dereferences
= NULL
;
608 /* How many by-reference parameters there are in the current function. */
609 int unsafe_by_ref_count
;
611 /* Bitmap of BBs that can cause the function to "stop" progressing by
612 returning, throwing externally, looping infinitely or calling a function
613 which might abort etc.. */
616 /* Obstack to allocate various small structures required only when generating
617 summary for a function. */
618 struct obstack gensum_obstack
;
620 /* Return false the function is apparently unsuitable for IPA-SRA based on it's
621 attributes, return true otherwise. NODE is the cgraph node of the current
625 ipa_sra_preliminary_function_checks (cgraph_node
*node
)
627 if (!node
->can_change_signature
)
630 fprintf (dump_file
, "Function cannot change signature.\n");
634 if (!tree_versionable_function_p (node
->decl
))
637 fprintf (dump_file
, "Function is not versionable.\n");
641 if (!opt_for_fn (node
->decl
, optimize
)
642 || !opt_for_fn (node
->decl
, flag_ipa_sra
))
645 fprintf (dump_file
, "Not optimizing or IPA-SRA turned off for this "
650 if (DECL_VIRTUAL_P (node
->decl
))
653 fprintf (dump_file
, "Function is a virtual method.\n");
657 struct function
*fun
= DECL_STRUCT_FUNCTION (node
->decl
);
661 fprintf (dump_file
, "Function uses stdarg. \n");
665 if (DECL_DISREGARD_INLINE_LIMITS (node
->decl
))
668 fprintf (dump_file
, "Always inline function will be inlined "
676 /* Print access tree starting at ACCESS to F. */
679 dump_gensum_access (FILE *f
, gensum_param_access
*access
, unsigned indent
)
682 for (unsigned i
= 0; i
< indent
; i
++)
684 fprintf (f
, " * Access to offset: " HOST_WIDE_INT_PRINT_DEC
,
686 fprintf (f
, ", size: " HOST_WIDE_INT_PRINT_DEC
, access
->size
);
687 fprintf (f
, ", type: ");
688 print_generic_expr (f
, access
->type
);
689 fprintf (f
, ", alias_ptr_type: ");
690 print_generic_expr (f
, access
->alias_ptr_type
);
691 fprintf (f
, ", load_count: ");
692 access
->load_count
.dump (f
);
693 fprintf (f
, ", nonarg: %u, reverse: %u\n", access
->nonarg
, access
->reverse
);
694 for (gensum_param_access
*ch
= access
->first_child
;
696 ch
= ch
->next_sibling
)
697 dump_gensum_access (f
, ch
, indent
+ 2);
701 /* Print access tree starting at ACCESS to F. */
704 dump_isra_access (FILE *f
, param_access
*access
)
706 fprintf (f
, " * Access to unit offset: %u", access
->unit_offset
);
707 fprintf (f
, ", unit size: %u", access
->unit_size
);
708 fprintf (f
, ", type: ");
709 print_generic_expr (f
, access
->type
);
710 fprintf (f
, ", alias_ptr_type: ");
711 print_generic_expr (f
, access
->alias_ptr_type
);
713 fprintf (f
, ", certain");
715 fprintf (f
, ", not certain");
717 fprintf (f
, ", reverse");
721 /* Dump access tree starting at ACCESS to stderr. */
724 debug_isra_access (param_access
*access
)
726 dump_isra_access (stderr
, access
);
729 /* Dump DESC to F. */
732 dump_gensum_param_descriptor (FILE *f
, gensum_param_desc
*desc
)
734 if (desc
->locally_unused
)
735 fprintf (f
, " unused with %i call_uses\n", desc
->call_uses
);
736 if (!desc
->split_candidate
)
738 fprintf (f
, " not a candidate\n");
742 fprintf (f
, " %s%s by_ref with %u pass throughs\n",
743 desc
->safe_ref
? "safe" : "unsafe",
744 desc
->conditionally_dereferenceable
745 ? " conditionally_dereferenceable" : " ok",
748 for (gensum_param_access
*acc
= desc
->accesses
; acc
; acc
= acc
->next_sibling
)
749 dump_gensum_access (f
, acc
, 2);
752 /* Dump all parameter descriptors in IFS, assuming it describes FNDECL, to
756 dump_gensum_param_descriptors (FILE *f
, tree fndecl
,
757 vec
<gensum_param_desc
> *param_descriptions
)
759 tree parm
= DECL_ARGUMENTS (fndecl
);
761 i
< param_descriptions
->length ();
762 ++i
, parm
= DECL_CHAIN (parm
))
764 fprintf (f
, " Descriptor for parameter %i ", i
);
765 print_generic_expr (f
, parm
, TDF_UID
);
767 dump_gensum_param_descriptor (f
, &(*param_descriptions
)[i
]);
772 /* Dump DESC to F. If HINTS is true, also dump IPA-analysis computed
776 dump_isra_param_descriptor (FILE *f
, isra_param_desc
*desc
, bool hints
)
778 if (desc
->locally_unused
)
780 fprintf (f
, " (locally) unused\n");
782 if (!desc
->split_candidate
)
784 fprintf (f
, " not a candidate for splitting");
785 if (hints
&& desc
->by_ref
&& desc
->safe_size_set
)
786 fprintf (f
, ", safe_size: %u", (unsigned) desc
->safe_size
);
790 fprintf (f
, " param_size_limit: %u, size_reached: %u%s",
791 desc
->param_size_limit
, desc
->size_reached
,
792 desc
->by_ref
? ", by_ref" : "");
793 if (desc
->by_ref
&& desc
->conditionally_dereferenceable
)
794 fprintf (f
, ", conditionally_dereferenceable");
797 if (desc
->by_ref
&& !desc
->not_specially_constructed
)
798 fprintf (f
, ", args_specially_constructed");
799 if (desc
->by_ref
&& desc
->safe_size_set
)
800 fprintf (f
, ", safe_size: %u", (unsigned) desc
->safe_size
);
804 for (unsigned i
= 0; i
< vec_safe_length (desc
->accesses
); ++i
)
806 param_access
*access
= (*desc
->accesses
)[i
];
807 dump_isra_access (f
, access
);
811 /* Dump all parameter descriptors in IFS, assuming it describes FNDECL, to F.
812 If HINTS is true, also dump IPA-analysis computed hints. */
815 dump_isra_param_descriptors (FILE *f
, tree fndecl
, isra_func_summary
*ifs
,
818 tree parm
= DECL_ARGUMENTS (fndecl
);
819 if (!ifs
->m_parameters
)
821 fprintf (f
, " parameter descriptors not available\n");
826 i
< ifs
->m_parameters
->length ();
827 ++i
, parm
= DECL_CHAIN (parm
))
829 fprintf (f
, " Descriptor for parameter %i ", i
);
830 print_generic_expr (f
, parm
, TDF_UID
);
832 dump_isra_param_descriptor (f
, &(*ifs
->m_parameters
)[i
], hints
);
836 /* Add SRC to inputs of PARAM_FLOW, unless it would exceed storage. If the
837 function fails return false, otherwise return true. SRC must fit into an
838 unsigned char. Used for purposes of transitive unused parameter
842 add_src_to_param_flow (isra_param_flow
*param_flow
, int src
)
844 gcc_checking_assert (src
>= 0 && src
<= UCHAR_MAX
);
845 if (param_flow
->length
== IPA_SRA_MAX_PARAM_FLOW_LEN
)
848 param_flow
->inputs
[(int) param_flow
->length
] = src
;
849 param_flow
->length
++;
853 /* Add a SRC to the inputs of PARAM_FLOW unless it is already there and assert
854 it is the only input. Used for purposes of transitive parameter
858 set_single_param_flow_source (isra_param_flow
*param_flow
, int src
)
860 gcc_checking_assert (src
>= 0 && src
<= UCHAR_MAX
);
861 if (param_flow
->length
== 0)
863 param_flow
->inputs
[0] = src
;
864 param_flow
->length
= 1;
866 else if (param_flow
->length
== 1)
867 gcc_assert (param_flow
->inputs
[0] == src
);
872 /* Assert that there is only a single value in PARAM_FLOW's inputs and return
876 get_single_param_flow_source (const isra_param_flow
*param_flow
)
878 gcc_assert (param_flow
->length
== 1);
879 return param_flow
->inputs
[0];
882 /* Inspect all uses of NAME and simple arithmetic calculations involving NAME
883 in FUN represented with NODE and return a negative number if any of them is
884 used for something else than either an actual call argument, simple
885 arithmetic operation or debug statement. If there are no such uses, return
886 the number of actual arguments that this parameter eventually feeds to (or
887 zero if there is none). For any such parameter, mark PARM_NUM as one of its
888 sources. ANALYZED is a bitmap that tracks which SSA names we have already
889 started investigating. */
892 isra_track_scalar_value_uses (function
*fun
, cgraph_node
*node
, tree name
,
893 int parm_num
, bitmap analyzed
)
896 imm_use_iterator imm_iter
;
899 FOR_EACH_IMM_USE_STMT (stmt
, imm_iter
, name
)
901 if (is_gimple_debug (stmt
))
904 /* TODO: We could handle at least const builtin functions like arithmetic
906 if (is_gimple_call (stmt
))
910 FOR_EACH_IMM_USE_ON_STMT (use_p
, imm_iter
)
913 gcall
*call
= as_a
<gcall
*> (stmt
);
915 if (gimple_call_internal_p (call
)
916 || (arg_count
= gimple_call_num_args (call
)) == 0)
922 cgraph_edge
*cs
= node
->get_edge (stmt
);
923 gcc_checking_assert (cs
);
924 isra_call_summary
*csum
= call_sums
->get_create (cs
);
925 csum
->init_inputs (arg_count
);
928 for (unsigned i
= 0; i
< arg_count
; i
++)
929 if (gimple_call_arg (call
, i
) == name
)
931 if (!add_src_to_param_flow (&csum
->m_arg_flow
[i
], parm_num
))
940 || all_uses
!= simple_uses
)
947 else if (!stmt_unremovable_because_of_non_call_eh_p (fun
, stmt
)
948 && ((is_gimple_assign (stmt
) && !gimple_has_volatile_ops (stmt
))
949 || gimple_code (stmt
) == GIMPLE_PHI
))
952 if (gimple_code (stmt
) == GIMPLE_PHI
)
953 lhs
= gimple_phi_result (stmt
);
955 lhs
= gimple_assign_lhs (stmt
);
957 if (TREE_CODE (lhs
) != SSA_NAME
)
962 gcc_assert (!gimple_vdef (stmt
));
963 if (bitmap_set_bit (analyzed
, SSA_NAME_VERSION (lhs
)))
965 int tmp
= isra_track_scalar_value_uses (fun
, node
, lhs
, parm_num
,
984 /* Inspect all uses of PARM, which must be a gimple register, in FUN (which is
985 also described by NODE) and simple arithmetic calculations involving PARM
986 and return false if any of them is used for something else than either an
987 actual call argument, simple arithmetic operation or debug statement. If
988 there are no such uses, return true and store the number of actual arguments
989 that this parameter eventually feeds to (or zero if there is none) to
990 *CALL_USES_P. For any such parameter, mark PARM_NUM as one of its
993 This function is similar to ptr_parm_has_nonarg_uses but its results are
994 meant for unused parameter removal, as opposed to splitting of parameters
995 passed by reference or converting them to passed by value. */
998 isra_track_scalar_param_local_uses (function
*fun
, cgraph_node
*node
, tree parm
,
999 int parm_num
, int *call_uses_p
)
1001 gcc_checking_assert (is_gimple_reg (parm
));
1003 tree name
= ssa_default_def (fun
, parm
);
1004 if (!name
|| has_zero_uses (name
))
1010 /* Edge summaries can only handle callers with fewer than 256 parameters. */
1011 if (parm_num
> UCHAR_MAX
)
1014 bitmap analyzed
= BITMAP_ALLOC (NULL
);
1015 int call_uses
= isra_track_scalar_value_uses (fun
, node
, name
, parm_num
,
1017 BITMAP_FREE (analyzed
);
1020 *call_uses_p
= call_uses
;
1024 /* Scan immediate uses of a default definition SSA name of a parameter PARM and
1025 examine whether there are any nonarg uses that are not actual arguments or
1026 otherwise infeasible uses. If so, return true, otherwise return false.
1027 Create pass-through IPA flow records for any direct uses as argument calls
1028 and if returning false, store their number into *PT_COUNT_P. NODE and FUN
1029 must represent the function that is currently analyzed, PARM_NUM must be the
1030 index of the analyzed parameter.
1032 This function is similar to isra_track_scalar_param_local_uses but its
1033 results are meant for splitting of parameters passed by reference or turning
1034 them into bits passed by value, as opposed to generic unused parameter
1038 ptr_parm_has_nonarg_uses (cgraph_node
*node
, function
*fun
, tree parm
,
1039 int parm_num
, unsigned *pt_count_p
)
1041 imm_use_iterator ui
;
1043 tree name
= ssa_default_def (fun
, parm
);
1045 unsigned pt_count
= 0;
1047 if (!name
|| has_zero_uses (name
))
1050 /* Edge summaries can only handle callers with fewer than 256 parameters. */
1051 if (parm_num
> UCHAR_MAX
)
1054 FOR_EACH_IMM_USE_STMT (stmt
, ui
, name
)
1056 unsigned uses_ok
= 0;
1057 use_operand_p use_p
;
1059 if (is_gimple_debug (stmt
))
1062 if (gimple_assign_single_p (stmt
))
1064 tree rhs
= gimple_assign_rhs1 (stmt
);
1065 if (!TREE_THIS_VOLATILE (rhs
))
1067 while (handled_component_p (rhs
))
1068 rhs
= TREE_OPERAND (rhs
, 0);
1069 if (TREE_CODE (rhs
) == MEM_REF
1070 && TREE_OPERAND (rhs
, 0) == name
1071 && integer_zerop (TREE_OPERAND (rhs
, 1))
1072 && types_compatible_p (TREE_TYPE (rhs
),
1073 TREE_TYPE (TREE_TYPE (name
))))
1077 else if (is_gimple_call (stmt
))
1079 gcall
*call
= as_a
<gcall
*> (stmt
);
1081 if (gimple_call_internal_p (call
)
1082 || (arg_count
= gimple_call_num_args (call
)) == 0)
1088 cgraph_edge
*cs
= node
->get_edge (stmt
);
1089 gcc_checking_assert (cs
);
1090 isra_call_summary
*csum
= call_sums
->get_create (cs
);
1091 csum
->init_inputs (arg_count
);
1093 for (unsigned i
= 0; i
< arg_count
; ++i
)
1095 tree arg
= gimple_call_arg (stmt
, i
);
1099 /* TODO: Allow &MEM_REF[name + offset] here,
1100 ipa_param_body_adjustments::modify_call_stmt has to be
1102 csum
->m_arg_flow
[i
].pointer_pass_through
= true;
1103 set_single_param_flow_source (&csum
->m_arg_flow
[i
], parm_num
);
1109 if (!TREE_THIS_VOLATILE (arg
))
1111 while (handled_component_p (arg
))
1112 arg
= TREE_OPERAND (arg
, 0);
1113 if (TREE_CODE (arg
) == MEM_REF
1114 && TREE_OPERAND (arg
, 0) == name
1115 && integer_zerop (TREE_OPERAND (arg
, 1))
1116 && types_compatible_p (TREE_TYPE (arg
),
1117 TREE_TYPE (TREE_TYPE (name
))))
1123 /* If the number of valid uses does not match the number of
1124 uses in this stmt there is an unhandled use. */
1125 unsigned all_uses
= 0;
1126 FOR_EACH_IMM_USE_ON_STMT (use_p
, ui
)
1129 gcc_checking_assert (uses_ok
<= all_uses
);
1130 if (uses_ok
!= all_uses
)
1137 *pt_count_p
= pt_count
;
1141 /* Initialize vector of parameter descriptors of NODE. Return true if there
1142 are any candidates for splitting or unused aggregate parameter removal (the
1143 function may return false if there are candidates for removal of register
1147 create_parameter_descriptors (cgraph_node
*node
,
1148 vec
<gensum_param_desc
> *param_descriptions
)
1150 function
*fun
= DECL_STRUCT_FUNCTION (node
->decl
);
1154 for (tree parm
= DECL_ARGUMENTS (node
->decl
);
1156 parm
= DECL_CHAIN (parm
), num
++)
1159 gensum_param_desc
*desc
= &(*param_descriptions
)[num
];
1160 /* param_descriptions vector is grown cleared in the caller. */
1161 desc
->param_number
= num
;
1162 decl2desc
->put (parm
, desc
);
1164 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1165 print_generic_expr (dump_file
, parm
, TDF_UID
);
1167 int scalar_call_uses
;
1168 tree type
= TREE_TYPE (parm
);
1169 if (TREE_THIS_VOLATILE (parm
))
1171 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1172 fprintf (dump_file
, " not a candidate, is volatile\n");
1175 if (!is_gimple_reg_type (type
) && is_va_list_type (type
))
1177 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1178 fprintf (dump_file
, " not a candidate, is a va_list type\n");
1181 if (TREE_ADDRESSABLE (parm
))
1183 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1184 fprintf (dump_file
, " not a candidate, is addressable\n");
1187 if (TREE_ADDRESSABLE (type
))
1189 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1190 fprintf (dump_file
, " not a candidate, type cannot be split\n");
1194 if (is_gimple_reg (parm
)
1195 && !isra_track_scalar_param_local_uses (fun
, node
, parm
, num
,
1198 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1199 fprintf (dump_file
, " is a scalar with only %i call uses\n",
1202 desc
->locally_unused
= true;
1203 desc
->call_uses
= scalar_call_uses
;
1206 if (POINTER_TYPE_P (type
))
1208 desc
->by_ref
= true;
1209 if (TREE_CODE (type
) == REFERENCE_TYPE
1211 && TREE_CODE (TREE_TYPE (node
->decl
)) == METHOD_TYPE
))
1212 desc
->safe_ref
= true;
1214 desc
->safe_ref
= false;
1215 type
= TREE_TYPE (type
);
1217 if (TREE_CODE (type
) == FUNCTION_TYPE
1218 || TREE_CODE (type
) == METHOD_TYPE
)
1220 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1221 fprintf (dump_file
, " not a candidate, reference to "
1225 if (TYPE_VOLATILE (type
))
1227 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1228 fprintf (dump_file
, " not a candidate, reference to "
1229 "a volatile type\n");
1232 if (TREE_CODE (type
) == ARRAY_TYPE
1233 && TYPE_NONALIASED_COMPONENT (type
))
1235 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1236 fprintf (dump_file
, " not a candidate, reference to "
1237 "a nonaliased component array\n");
1240 if (!is_gimple_reg (parm
))
1242 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1243 fprintf (dump_file
, " not a candidate, a reference which is "
1244 "not a gimple register (probably addressable)\n");
1247 if (is_va_list_type (type
))
1249 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1250 fprintf (dump_file
, " not a candidate, reference to "
1254 if (ptr_parm_has_nonarg_uses (node
, fun
, parm
, num
,
1255 &desc
->ptr_pt_count
))
1257 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1258 fprintf (dump_file
, " not a candidate, reference has "
1263 else if (!AGGREGATE_TYPE_P (type
))
1265 /* This is in an else branch because scalars passed by reference are
1266 still candidates to be passed by value. */
1267 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1268 fprintf (dump_file
, " not a candidate, not an aggregate\n");
1272 if (!COMPLETE_TYPE_P (type
))
1274 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1275 fprintf (dump_file
, " not a candidate, not a complete type\n");
1278 if (!tree_fits_uhwi_p (TYPE_SIZE (type
)))
1280 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1281 fprintf (dump_file
, " not a candidate, size not representable\n");
1284 unsigned HOST_WIDE_INT type_size
1285 = tree_to_uhwi (TYPE_SIZE (type
)) / BITS_PER_UNIT
;
1287 || type_size
>= ISRA_ARG_SIZE_LIMIT
)
1289 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1290 fprintf (dump_file
, " not a candidate, has zero or huge size\n");
1293 if (type_internals_preclude_sra_p (type
, &msg
))
1295 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1296 fprintf (dump_file
, " not a candidate, %s\n", msg
);
1300 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1301 fprintf (dump_file
, " is a candidate\n");
1304 desc
->split_candidate
= true;
1305 if (desc
->by_ref
&& !desc
->safe_ref
)
1306 desc
->deref_index
= unsafe_by_ref_count
++;
1311 /* Return pointer to descriptor of parameter DECL or NULL if it cannot be
1312 found, which happens if DECL is for a static chain. */
1314 static gensum_param_desc
*
1315 get_gensum_param_desc (tree decl
)
1319 gcc_checking_assert (TREE_CODE (decl
) == PARM_DECL
);
1320 gensum_param_desc
**slot
= decl2desc
->get (decl
);
1322 /* This can happen for static chains which we cannot handle so far. */
1324 gcc_checking_assert (*slot
);
1329 /* Remove parameter described by DESC from candidates for IPA-SRA splitting and
1330 write REASON to the dump file if there is one. */
1333 disqualify_split_candidate (gensum_param_desc
*desc
, const char *reason
)
1335 if (!desc
->split_candidate
)
1338 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1339 fprintf (dump_file
, "! Disqualifying parameter number %i - %s\n",
1340 desc
->param_number
, reason
);
1342 desc
->split_candidate
= false;
1345 /* Remove DECL from candidates for IPA-SRA and write REASON to the dump file if
1349 disqualify_split_candidate (tree decl
, const char *reason
)
1351 gensum_param_desc
*desc
= get_gensum_param_desc (decl
);
1353 disqualify_split_candidate (desc
, reason
);
1356 /* Allocate a new access to DESC and fill it in with OFFSET and SIZE. But
1357 first, check that there are not too many of them already. If so, do not
1358 allocate anything and return NULL. */
1360 static gensum_param_access
*
1361 allocate_access (gensum_param_desc
*desc
,
1362 HOST_WIDE_INT offset
, HOST_WIDE_INT size
)
1364 if (desc
->access_count
1365 == (unsigned) param_ipa_sra_max_replacements
)
1367 disqualify_split_candidate (desc
, "Too many replacement candidates");
1371 gensum_param_access
*access
1372 = (gensum_param_access
*) obstack_alloc (&gensum_obstack
,
1373 sizeof (gensum_param_access
));
1374 memset (access
, 0, sizeof (*access
));
1375 access
->offset
= offset
;
1376 access
->size
= size
;
1377 access
->load_count
= profile_count::zero ();
1381 /* In what context scan_expr_access has been called, whether it deals with a
1382 load, a function argument, or a store. Please note that in rare
1383 circumstances when it is not clear if the access is a load or store,
1384 ISRA_CTX_STORE is used too. */
1386 enum isra_scan_context
{ISRA_CTX_LOAD
, ISRA_CTX_ARG
, ISRA_CTX_STORE
};
1388 /* Return an access describing memory access to the variable described by DESC
1389 at OFFSET with SIZE in context CTX, starting at pointer to the linked list
1390 at a certain tree level FIRST. Attempt to create it and put into the
1391 appropriate place in the access tree if does not exist, but fail and return
1392 NULL if there are already too many accesses, if it would create a partially
1393 overlapping access or if an access would end up within a pre-existing
1396 static gensum_param_access
*
1397 get_access_1 (gensum_param_desc
*desc
, gensum_param_access
**first
,
1398 HOST_WIDE_INT offset
, HOST_WIDE_INT size
, isra_scan_context ctx
)
1400 gensum_param_access
*access
= *first
, **ptr
= first
;
1404 /* No pre-existing access at this level, just create it. */
1405 gensum_param_access
*a
= allocate_access (desc
, offset
, size
);
1412 if (access
->offset
>= offset
+ size
)
1414 /* We want to squeeze it in front of the very first access, just do
1416 gensum_param_access
*r
= allocate_access (desc
, offset
, size
);
1419 r
->next_sibling
= access
;
1424 /* Skip all accesses that have to come before us until the next sibling is
1426 while (offset
>= access
->offset
+ access
->size
1427 && access
->next_sibling
1428 && access
->next_sibling
->offset
< offset
+ size
)
1430 ptr
= &access
->next_sibling
;
1431 access
= access
->next_sibling
;
1434 /* At this point we know we do not belong before access. */
1435 gcc_assert (access
->offset
< offset
+ size
);
1437 if (access
->offset
== offset
&& access
->size
== size
)
1438 /* We found what we were looking for. */
1441 if (access
->offset
<= offset
1442 && access
->offset
+ access
->size
>= offset
+ size
)
1444 /* We fit into access which is larger than us. We need to find/create
1445 something below access. But we only allow nesting in call
1450 return get_access_1 (desc
, &access
->first_child
, offset
, size
, ctx
);
1453 if (offset
<= access
->offset
1454 && offset
+ size
>= access
->offset
+ access
->size
)
1455 /* We are actually bigger than access, which fully fits into us, take its
1456 place and make all accesses fitting into it its children. */
1458 /* But first, we only allow nesting in call arguments so check if that is
1459 what we are trying to represent. */
1460 if (ctx
!= ISRA_CTX_ARG
)
1463 gensum_param_access
*r
= allocate_access (desc
, offset
, size
);
1466 r
->first_child
= access
;
1468 while (access
->next_sibling
1469 && access
->next_sibling
->offset
< offset
+ size
)
1470 access
= access
->next_sibling
;
1471 if (access
->offset
+ access
->size
> offset
+ size
)
1473 /* This must be a different access, which are sorted, so the
1474 following must be true and this signals a partial overlap. */
1475 gcc_assert (access
->offset
> offset
);
1479 r
->next_sibling
= access
->next_sibling
;
1480 access
->next_sibling
= NULL
;
1485 if (offset
>= access
->offset
+ access
->size
)
1487 /* We belong after access. */
1488 gensum_param_access
*r
= allocate_access (desc
, offset
, size
);
1491 r
->next_sibling
= access
->next_sibling
;
1492 access
->next_sibling
= r
;
1496 if (offset
< access
->offset
)
1498 /* We know the following, otherwise we would have created a
1500 gcc_checking_assert (offset
+ size
< access
->offset
+ access
->size
);
1504 if (offset
+ size
> access
->offset
+ access
->size
)
1507 gcc_checking_assert (offset
> access
->offset
);
1514 /* Return an access describing memory access to the variable described by DESC
1515 at OFFSET with SIZE in context CTX, mark it as used in context CTX. Attempt
1516 to create if it does not exist, but fail and return NULL if there are
1517 already too many accesses, if it would create a partially overlapping access
1518 or if an access would end up in a non-call access. */
1520 static gensum_param_access
*
1521 get_access (gensum_param_desc
*desc
, HOST_WIDE_INT offset
, HOST_WIDE_INT size
,
1522 isra_scan_context ctx
)
1524 gcc_checking_assert (desc
->split_candidate
);
1526 gensum_param_access
*access
= get_access_1 (desc
, &desc
->accesses
, offset
,
1530 disqualify_split_candidate (desc
,
1531 "Bad access overlap or too many accesses");
1537 case ISRA_CTX_STORE
:
1538 gcc_assert (!desc
->by_ref
);
1541 access
->nonarg
= true;
1550 /* Verify that parameter access tree starting with ACCESS is in good shape.
1551 PARENT_OFFSET and PARENT_SIZE are the corresponding fields of parent of
1552 ACCESS or zero if there is none. */
1555 verify_access_tree_1 (gensum_param_access
*access
, HOST_WIDE_INT parent_offset
,
1556 HOST_WIDE_INT parent_size
)
1560 gcc_assert (access
->offset
>= 0 && access
->size
>= 0);
1562 if (parent_size
!= 0)
1564 if (access
->offset
< parent_offset
)
1566 error ("Access offset before parent offset");
1569 if (access
->size
>= parent_size
)
1571 error ("Access size greater or equal to its parent size");
1574 if (access
->offset
+ access
->size
> parent_offset
+ parent_size
)
1576 error ("Access terminates outside of its parent");
1581 if (verify_access_tree_1 (access
->first_child
, access
->offset
,
1585 if (access
->next_sibling
1586 && (access
->next_sibling
->offset
< access
->offset
+ access
->size
))
1588 error ("Access overlaps with its sibling");
1592 access
= access
->next_sibling
;
1597 /* Verify that parameter access tree starting with ACCESS is in good shape,
1598 halt compilation and dump the tree to stderr if not. */
1601 isra_verify_access_tree (gensum_param_access
*access
)
1603 if (verify_access_tree_1 (access
, 0, 0))
1605 for (; access
; access
= access
->next_sibling
)
1606 dump_gensum_access (stderr
, access
, 2);
1607 internal_error ("IPA-SRA access verification failed");
1612 /* Callback of walk_stmt_load_store_addr_ops visit_addr used to determine
1613 GIMPLE_ASM operands with memory constrains which cannot be scalarized. */
1616 asm_visit_addr (gimple
*, tree op
, tree
, void *)
1618 op
= get_base_address (op
);
1620 && TREE_CODE (op
) == PARM_DECL
)
1621 disqualify_split_candidate (op
, "Non-scalarizable GIMPLE_ASM operand.");
1626 /* Mark a dereference of parameter identified by DESC of distance DIST in a
1627 basic block BB, unless the BB has already been marked as a potentially
1631 mark_param_dereference (gensum_param_desc
*desc
, HOST_WIDE_INT dist
,
1634 gcc_assert (desc
->by_ref
);
1635 gcc_checking_assert (desc
->split_candidate
);
1638 || bitmap_bit_p (final_bbs
, bb
->index
))
1641 int idx
= bb
->index
* unsafe_by_ref_count
+ desc
->deref_index
;
1642 if (bb_dereferences
[idx
] < dist
)
1643 bb_dereferences
[idx
] = dist
;
1646 /* Return true, if any potential replacements should use NEW_TYPE as opposed to
1647 previously recorded OLD_TYPE. */
1650 type_prevails_p (tree old_type
, tree new_type
)
1652 if (old_type
== new_type
)
1655 /* Non-aggregates are always better. */
1656 if (!is_gimple_reg_type (old_type
)
1657 && is_gimple_reg_type (new_type
))
1659 if (is_gimple_reg_type (old_type
)
1660 && !is_gimple_reg_type (new_type
))
1663 /* Prefer any complex or vector type over any other scalar type. */
1664 if (TREE_CODE (old_type
) != COMPLEX_TYPE
1665 && TREE_CODE (old_type
) != VECTOR_TYPE
1666 && (TREE_CODE (new_type
) == COMPLEX_TYPE
1667 || VECTOR_TYPE_P (new_type
)))
1669 if ((TREE_CODE (old_type
) == COMPLEX_TYPE
1670 || VECTOR_TYPE_P (old_type
))
1671 && TREE_CODE (new_type
) != COMPLEX_TYPE
1672 && TREE_CODE (new_type
) != VECTOR_TYPE
)
1675 /* Use the integral type with the bigger precision. */
1676 if (INTEGRAL_TYPE_P (old_type
)
1677 && INTEGRAL_TYPE_P (new_type
))
1678 return (TYPE_PRECISION (new_type
) > TYPE_PRECISION (old_type
));
1680 /* Attempt to disregard any integral type with non-full precision. */
1681 if (INTEGRAL_TYPE_P (old_type
)
1682 && (TREE_INT_CST_LOW (TYPE_SIZE (old_type
))
1683 != TYPE_PRECISION (old_type
)))
1685 if (INTEGRAL_TYPE_P (new_type
)
1686 && (TREE_INT_CST_LOW (TYPE_SIZE (new_type
))
1687 != TYPE_PRECISION (new_type
)))
1689 /* Stabilize the selection. */
1690 return TYPE_UID (old_type
) < TYPE_UID (new_type
);
1693 /* When scanning an expression which is a call argument, this structure
1694 specifies the call and the position of the argument. */
1696 struct scan_call_info
1698 /* Call graph edge representing the call. */
1700 /* Total number of arguments in the call. */
1701 unsigned argument_count
;
1702 /* Number of the actual argument being scanned. */
1706 /* Record use of ACCESS which belongs to a parameter described by DESC in a
1707 call argument described by CALL_INFO. */
1710 record_nonregister_call_use (gensum_param_desc
*desc
,
1711 scan_call_info
*call_info
,
1712 unsigned unit_offset
, unsigned unit_size
)
1714 isra_call_summary
*csum
= call_sums
->get_create (call_info
->cs
);
1715 csum
->init_inputs (call_info
->argument_count
);
1717 isra_param_flow
*param_flow
= &csum
->m_arg_flow
[call_info
->arg_idx
];
1718 param_flow
->aggregate_pass_through
= true;
1719 set_single_param_flow_source (param_flow
, desc
->param_number
);
1720 param_flow
->unit_offset
= unit_offset
;
1721 param_flow
->unit_size
= unit_size
;
1725 /* Callback of walk_aliased_vdefs, just mark that there was a possible
1729 mark_maybe_modified (ao_ref
*, tree
, void *data
)
1731 bool *maybe_modified
= (bool *) data
;
1732 *maybe_modified
= true;
1736 /* Analyze expression EXPR from GIMPLE for accesses to parameters. CTX
1737 specifies whether EXPR is used in a load, store or as an argument call. BB
1738 must be the basic block in which expr resides. If CTX specifies call
1739 argument context, CALL_INFO must describe that call and argument position,
1740 otherwise it is ignored. */
1743 scan_expr_access (tree expr
, gimple
*stmt
, isra_scan_context ctx
,
1744 basic_block bb
, scan_call_info
*call_info
= NULL
)
1746 poly_int64 poffset
, psize
, pmax_size
;
1747 HOST_WIDE_INT offset
, size
, max_size
;
1752 if (TREE_CODE (expr
) == ADDR_EXPR
)
1754 if (ctx
== ISRA_CTX_ARG
)
1756 tree t
= get_base_address (TREE_OPERAND (expr
, 0));
1757 if (VAR_P (t
) && !TREE_STATIC (t
))
1758 loaded_decls
->add (t
);
1761 if (TREE_CODE (expr
) == SSA_NAME
1762 || CONSTANT_CLASS_P (expr
))
1765 if (TREE_CODE (expr
) == BIT_FIELD_REF
1766 || TREE_CODE (expr
) == IMAGPART_EXPR
1767 || TREE_CODE (expr
) == REALPART_EXPR
)
1768 expr
= TREE_OPERAND (expr
, 0);
1770 base
= get_ref_base_and_extent (expr
, &poffset
, &psize
, &pmax_size
, &reverse
);
1772 if (TREE_CODE (base
) == MEM_REF
)
1774 tree op
= TREE_OPERAND (base
, 0);
1775 if (TREE_CODE (op
) != SSA_NAME
1776 || !SSA_NAME_IS_DEFAULT_DEF (op
))
1778 base
= SSA_NAME_VAR (op
);
1783 else if (VAR_P (base
)
1784 && !TREE_STATIC (base
)
1785 && (ctx
== ISRA_CTX_ARG
1786 || ctx
== ISRA_CTX_LOAD
))
1787 loaded_decls
->add (base
);
1789 if (TREE_CODE (base
) != PARM_DECL
)
1792 gensum_param_desc
*desc
= get_gensum_param_desc (base
);
1793 if (!desc
|| !desc
->split_candidate
)
1796 if (!poffset
.is_constant (&offset
)
1797 || !psize
.is_constant (&size
)
1798 || !pmax_size
.is_constant (&max_size
))
1800 disqualify_split_candidate (desc
, "Encountered a polynomial-sized "
1804 if (size
< 0 || size
!= max_size
)
1806 disqualify_split_candidate (desc
, "Encountered a variable sized access.");
1809 if (TREE_CODE (expr
) == COMPONENT_REF
1810 && DECL_BIT_FIELD (TREE_OPERAND (expr
, 1)))
1812 disqualify_split_candidate (desc
, "Encountered a bit-field access.");
1817 disqualify_split_candidate (desc
, "Encountered an access at a "
1818 "negative offset.");
1821 gcc_assert ((offset
% BITS_PER_UNIT
) == 0);
1822 gcc_assert ((size
% BITS_PER_UNIT
) == 0);
1823 if ((offset
/ BITS_PER_UNIT
) >= (UINT_MAX
- ISRA_ARG_SIZE_LIMIT
)
1824 || (size
/ BITS_PER_UNIT
) >= ISRA_ARG_SIZE_LIMIT
)
1826 disqualify_split_candidate (desc
, "Encountered an access with too big "
1831 tree type
= TREE_TYPE (expr
);
1832 unsigned int exp_align
= get_object_alignment (expr
);
1834 if (exp_align
< TYPE_ALIGN (type
))
1836 disqualify_split_candidate (desc
, "Underaligned access.");
1844 disqualify_split_candidate (desc
, "Dereferencing a non-reference.");
1847 else if (ctx
== ISRA_CTX_STORE
)
1849 disqualify_split_candidate (desc
, "Storing to data passed by "
1854 if (!aa_walking_limit
)
1856 disqualify_split_candidate (desc
, "Out of alias analysis step "
1861 gcc_checking_assert (gimple_vuse (stmt
));
1862 bool maybe_modified
= false;
1865 ao_ref_init (&ar
, expr
);
1866 bitmap visited
= BITMAP_ALLOC (NULL
);
1867 int walked
= walk_aliased_vdefs (&ar
, gimple_vuse (stmt
),
1868 mark_maybe_modified
, &maybe_modified
,
1869 &visited
, NULL
, aa_walking_limit
);
1870 BITMAP_FREE (visited
);
1873 gcc_assert (aa_walking_limit
> walked
);
1874 aa_walking_limit
= aa_walking_limit
- walked
;
1877 aa_walking_limit
= 0;
1878 if (maybe_modified
|| walked
< 0)
1880 disqualify_split_candidate (desc
, "Data passed by reference possibly "
1881 "modified through an alias.");
1885 mark_param_dereference (desc
, offset
+ size
, bb
);
1888 /* Pointer parameters with direct uses should have been ruled out by
1889 analyzing SSA default def when looking at the parameters. */
1890 gcc_assert (!desc
->by_ref
);
1892 gensum_param_access
*access
= get_access (desc
, offset
, size
, ctx
);
1896 if (ctx
== ISRA_CTX_ARG
)
1898 gcc_checking_assert (call_info
);
1901 record_nonregister_call_use (desc
, call_info
, offset
/ BITS_PER_UNIT
,
1902 size
/ BITS_PER_UNIT
);
1904 /* This is not a pass-through of a pointer, this is a use like any
1906 access
->nonarg
= true;
1908 else if (ctx
== ISRA_CTX_LOAD
&& bb
->count
.initialized_p ())
1909 access
->load_count
+= bb
->count
;
1913 access
->type
= type
;
1914 access
->alias_ptr_type
= reference_alias_ptr_type (expr
);
1915 access
->reverse
= reverse
;
1919 if (exp_align
< TYPE_ALIGN (access
->type
))
1921 disqualify_split_candidate (desc
, "Reference has lower alignment "
1922 "than a previous one.");
1925 if (access
->alias_ptr_type
!= reference_alias_ptr_type (expr
))
1927 disqualify_split_candidate (desc
, "Multiple alias pointer types.");
1930 if (access
->reverse
!= reverse
)
1932 disqualify_split_candidate (desc
, "Both normal and reverse "
1933 "scalar storage order.");
1937 && (AGGREGATE_TYPE_P (type
) || AGGREGATE_TYPE_P (access
->type
))
1938 && (TYPE_MAIN_VARIANT (access
->type
) != TYPE_MAIN_VARIANT (type
)))
1940 /* We need the same aggregate type on all accesses to be able to
1941 distinguish transformation spots from pass-through arguments in
1942 the transformation phase. */
1943 disqualify_split_candidate (desc
, "We do not support aggregate "
1948 if (type_prevails_p (access
->type
, type
))
1949 access
->type
= type
;
1953 /* Scan body function described by NODE and FUN and create access trees for
1957 scan_function (cgraph_node
*node
, struct function
*fun
)
1961 FOR_EACH_BB_FN (bb
, fun
)
1963 gimple_stmt_iterator gsi
;
1964 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1966 gimple
*stmt
= gsi_stmt (gsi
);
1968 if (final_bbs
&& stmt_can_throw_external (fun
, stmt
))
1969 bitmap_set_bit (final_bbs
, bb
->index
);
1970 switch (gimple_code (stmt
))
1974 tree t
= gimple_return_retval (as_a
<greturn
*> (stmt
));
1976 scan_expr_access (t
, stmt
, ISRA_CTX_LOAD
, bb
);
1978 bitmap_set_bit (final_bbs
, bb
->index
);
1983 if (gimple_assign_single_p (stmt
)
1984 && !gimple_clobber_p (stmt
))
1986 tree rhs
= gimple_assign_rhs1 (stmt
);
1987 scan_expr_access (rhs
, stmt
, ISRA_CTX_LOAD
, bb
);
1988 tree lhs
= gimple_assign_lhs (stmt
);
1989 scan_expr_access (lhs
, stmt
, ISRA_CTX_STORE
, bb
);
1995 unsigned argument_count
= gimple_call_num_args (stmt
);
1996 isra_scan_context ctx
= ISRA_CTX_ARG
;
1997 scan_call_info call_info
, *call_info_p
= &call_info
;
1998 if (gimple_call_internal_p (stmt
))
2001 ctx
= ISRA_CTX_LOAD
;
2002 internal_fn ifn
= gimple_call_internal_fn (stmt
);
2003 if (internal_store_fn_p (ifn
))
2004 ctx
= ISRA_CTX_STORE
;
2008 call_info
.cs
= node
->get_edge (stmt
);
2009 call_info
.argument_count
= argument_count
;
2012 for (unsigned i
= 0; i
< argument_count
; i
++)
2014 call_info
.arg_idx
= i
;
2015 scan_expr_access (gimple_call_arg (stmt
, i
), stmt
,
2016 ctx
, bb
, call_info_p
);
2019 tree lhs
= gimple_call_lhs (stmt
);
2021 scan_expr_access (lhs
, stmt
, ISRA_CTX_STORE
, bb
);
2022 int flags
= gimple_call_flags (stmt
);
2024 && (((flags
& (ECF_CONST
| ECF_PURE
)) == 0)
2025 || (flags
& ECF_LOOPING_CONST_OR_PURE
)))
2026 bitmap_set_bit (final_bbs
, bb
->index
);
2032 gasm
*asm_stmt
= as_a
<gasm
*> (stmt
);
2033 walk_stmt_load_store_addr_ops (asm_stmt
, NULL
, NULL
, NULL
,
2036 bitmap_set_bit (final_bbs
, bb
->index
);
2038 for (unsigned i
= 0; i
< gimple_asm_ninputs (asm_stmt
); i
++)
2040 tree t
= TREE_VALUE (gimple_asm_input_op (asm_stmt
, i
));
2041 scan_expr_access (t
, stmt
, ISRA_CTX_LOAD
, bb
);
2043 for (unsigned i
= 0; i
< gimple_asm_noutputs (asm_stmt
); i
++)
2045 tree t
= TREE_VALUE (gimple_asm_output_op (asm_stmt
, i
));
2046 scan_expr_access (t
, stmt
, ISRA_CTX_STORE
, bb
);
2058 /* Return true if SSA_NAME NAME of function described by FUN is only used in
2059 return statements, or if results of any operations it is involved in are
2060 only used in return statements. ANALYZED is a bitmap that tracks which SSA
2061 names we have already started investigating. */
2064 ssa_name_only_returned_p (function
*fun
, tree name
, bitmap analyzed
)
2067 imm_use_iterator imm_iter
;
2070 FOR_EACH_IMM_USE_STMT (stmt
, imm_iter
, name
)
2072 if (is_gimple_debug (stmt
))
2075 if (gimple_code (stmt
) == GIMPLE_RETURN
)
2077 tree t
= gimple_return_retval (as_a
<greturn
*> (stmt
));
2084 else if (!stmt_unremovable_because_of_non_call_eh_p (fun
, stmt
)
2085 && ((is_gimple_assign (stmt
) && !gimple_has_volatile_ops (stmt
))
2086 || gimple_code (stmt
) == GIMPLE_PHI
))
2088 /* TODO: And perhaps for const function calls too? */
2090 if (gimple_code (stmt
) == GIMPLE_PHI
)
2091 lhs
= gimple_phi_result (stmt
);
2093 lhs
= gimple_assign_lhs (stmt
);
2095 if (TREE_CODE (lhs
) != SSA_NAME
)
2100 gcc_assert (!gimple_vdef (stmt
));
2101 if (bitmap_set_bit (analyzed
, SSA_NAME_VERSION (lhs
))
2102 && !ssa_name_only_returned_p (fun
, lhs
, analyzed
))
2117 /* Inspect the uses of the return value of the call associated with CS, and if
2118 it is not used or if it is only used to construct the return value of the
2119 caller, mark it as such in call or caller summary. Also check for
2120 misaligned arguments. */
2123 isra_analyze_call (cgraph_edge
*cs
)
2125 gcall
*call_stmt
= cs
->call_stmt
;
2126 unsigned count
= gimple_call_num_args (call_stmt
);
2127 isra_call_summary
*csum
= call_sums
->get_create (cs
);
2129 for (unsigned i
= 0; i
< count
; i
++)
2131 tree arg
= gimple_call_arg (call_stmt
, i
);
2132 if (TREE_CODE (arg
) == ADDR_EXPR
)
2134 poly_int64 poffset
, psize
, pmax_size
;
2136 tree base
= get_ref_base_and_extent (TREE_OPERAND (arg
, 0), &poffset
,
2137 &psize
, &pmax_size
, &reverse
);
2138 HOST_WIDE_INT offset
;
2139 unsigned HOST_WIDE_INT ds
;
2141 && (poffset
.is_constant (&offset
))
2142 && tree_fits_uhwi_p (DECL_SIZE (base
))
2143 && ((ds
= tree_to_uhwi (DECL_SIZE (base
)) - offset
)
2144 < ISRA_ARG_SIZE_LIMIT
* BITS_PER_UNIT
))
2146 csum
->init_inputs (count
);
2147 gcc_assert (!csum
->m_arg_flow
[i
].aggregate_pass_through
);
2148 csum
->m_arg_flow
[i
].unit_size
= ds
/ BITS_PER_UNIT
;
2151 if (TREE_CODE (base
) == VAR_DECL
2152 && !TREE_STATIC (base
)
2153 && !loaded_decls
->contains (base
))
2155 csum
->init_inputs (count
);
2156 csum
->m_arg_flow
[i
].constructed_for_calls
= true;
2160 if (is_gimple_reg (arg
))
2164 poly_int64 bitsize
, bitpos
;
2166 int unsignedp
, reversep
, volatilep
= 0;
2167 get_inner_reference (arg
, &bitsize
, &bitpos
, &offset
, &mode
,
2168 &unsignedp
, &reversep
, &volatilep
);
2169 if (!multiple_p (bitpos
, BITS_PER_UNIT
))
2171 csum
->m_bit_aligned_arg
= true;
2176 tree lhs
= gimple_call_lhs (call_stmt
);
2179 /* TODO: Also detect aggregates on a LHS of a call that are only returned
2180 from this function (without being read anywhere). */
2181 if (TREE_CODE (lhs
) == SSA_NAME
)
2183 bitmap analyzed
= BITMAP_ALLOC (NULL
);
2184 if (ssa_name_only_returned_p (DECL_STRUCT_FUNCTION (cs
->caller
->decl
),
2186 csum
->m_return_returned
= true;
2187 BITMAP_FREE (analyzed
);
2191 csum
->m_return_ignored
= true;
2194 /* Look at all calls going out of NODE, described also by IFS and perform all
2195 analyses necessary for IPA-SRA that are not done at body scan time or done
2196 even when body is not scanned because the function is not a candidate. */
2199 isra_analyze_all_outgoing_calls (cgraph_node
*node
)
2201 for (cgraph_edge
*cs
= node
->callees
; cs
; cs
= cs
->next_callee
)
2202 isra_analyze_call (cs
);
2203 for (cgraph_edge
*cs
= node
->indirect_calls
; cs
; cs
= cs
->next_callee
)
2204 isra_analyze_call (cs
);
2207 /* Dump a dereferences table with heading STR to file F. */
2210 dump_dereferences_table (FILE *f
, struct function
*fun
, const char *str
)
2214 fprintf (dump_file
, "%s", str
);
2215 FOR_BB_BETWEEN (bb
, ENTRY_BLOCK_PTR_FOR_FN (fun
),
2216 EXIT_BLOCK_PTR_FOR_FN (fun
), next_bb
)
2218 fprintf (f
, "%4i %i ", bb
->index
, bitmap_bit_p (final_bbs
, bb
->index
));
2219 if (bb
!= EXIT_BLOCK_PTR_FOR_FN (fun
))
2222 for (i
= 0; i
< unsafe_by_ref_count
; i
++)
2224 int idx
= bb
->index
* unsafe_by_ref_count
+ i
;
2225 fprintf (f
, " %4" HOST_WIDE_INT_PRINT
"d", bb_dereferences
[idx
]);
2230 fprintf (dump_file
, "\n");
2233 /* Propagate distances in bb_dereferences in the opposite direction than the
2234 control flow edges, in each step storing the maximum of the current value
2235 and the minimum of all successors. These steps are repeated until the table
2236 stabilizes. Note that BBs which might terminate the functions (according to
2237 final_bbs bitmap) never updated in this way. */
2240 propagate_dereference_distances (struct function
*fun
)
2244 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2245 dump_dereferences_table (dump_file
, fun
,
2246 "Dereference table before propagation:\n");
2248 auto_vec
<basic_block
> queue (last_basic_block_for_fn (fun
));
2249 queue
.quick_push (ENTRY_BLOCK_PTR_FOR_FN (fun
));
2250 FOR_EACH_BB_FN (bb
, fun
)
2252 queue
.quick_push (bb
);
2256 while (!queue
.is_empty ())
2260 bool change
= false;
2266 if (bitmap_bit_p (final_bbs
, bb
->index
))
2269 for (i
= 0; i
< unsafe_by_ref_count
; i
++)
2271 int idx
= bb
->index
* unsafe_by_ref_count
+ i
;
2273 HOST_WIDE_INT inh
= 0;
2275 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
2277 int succ_idx
= e
->dest
->index
* unsafe_by_ref_count
+ i
;
2279 if (e
->dest
== EXIT_BLOCK_PTR_FOR_FN (fun
))
2285 inh
= bb_dereferences
[succ_idx
];
2287 else if (bb_dereferences
[succ_idx
] < inh
)
2288 inh
= bb_dereferences
[succ_idx
];
2291 if (!first
&& bb_dereferences
[idx
] < inh
)
2293 bb_dereferences
[idx
] = inh
;
2299 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
2304 e
->src
->aux
= e
->src
;
2305 queue
.quick_push (e
->src
);
2309 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2310 dump_dereferences_table (dump_file
, fun
,
2311 "Dereference table after propagation:\n");
2314 /* Return true if the ACCESS loads happen frequently enough in FUN to risk
2315 moving them to the caller and only pass the result. */
2318 dereference_probable_p (struct function
*fun
, gensum_param_access
*access
)
2320 int threshold
= opt_for_fn (fun
->decl
, param_ipa_sra_deref_prob_threshold
);
2321 return access
->load_count
2322 >= ENTRY_BLOCK_PTR_FOR_FN (fun
)->count
.apply_scale (threshold
, 100);
2325 /* Perform basic checks on ACCESS to PARM (of FUN) described by DESC and all
2326 its children, return true if the parameter cannot be split, otherwise return
2327 false and update *NONARG_ACC_SIZE and *ONLY_CALLS. ENTRY_BB_INDEX must be
2328 the index of the entry BB in the function of PARM. */
2331 check_gensum_access (struct function
*fun
, tree parm
, gensum_param_desc
*desc
,
2332 gensum_param_access
*access
,
2333 HOST_WIDE_INT
*nonarg_acc_size
, bool *only_calls
,
2338 *only_calls
= false;
2339 *nonarg_acc_size
+= access
->size
;
2341 if (access
->first_child
)
2343 disqualify_split_candidate (desc
, "Overlapping non-call uses.");
2347 /* Do not decompose a non-BLKmode param in a way that would create
2348 BLKmode params. Especially for by-reference passing (thus,
2349 pointer-type param) this is hardly worthwhile. */
2350 if (DECL_MODE (parm
) != BLKmode
2351 && TYPE_MODE (access
->type
) == BLKmode
)
2353 disqualify_split_candidate (desc
, "Would convert a non-BLK to a BLK.");
2361 if (!dereference_probable_p (fun
, access
))
2363 disqualify_split_candidate (desc
, "Dereferences in callers "
2364 "would happen much more frequently.");
2370 int idx
= (entry_bb_index
* unsafe_by_ref_count
+ desc
->deref_index
);
2371 if ((access
->offset
+ access
->size
) > bb_dereferences
[idx
])
2373 if (!dereference_probable_p (fun
, access
))
2375 disqualify_split_candidate (desc
, "Would create a possibly "
2376 "illegal dereference in a "
2380 desc
->conditionally_dereferenceable
= true;
2385 for (gensum_param_access
*ch
= access
->first_child
;
2387 ch
= ch
->next_sibling
)
2388 if (check_gensum_access (fun
, parm
, desc
, ch
, nonarg_acc_size
, only_calls
,
2395 /* Copy data from FROM and all of its children to a vector of accesses in IPA
2399 copy_accesses_to_ipa_desc (gensum_param_access
*from
, isra_param_desc
*desc
)
2401 param_access
*to
= ggc_cleared_alloc
<param_access
> ();
2402 gcc_checking_assert ((from
->offset
% BITS_PER_UNIT
) == 0);
2403 gcc_checking_assert ((from
->size
% BITS_PER_UNIT
) == 0);
2404 to
->unit_offset
= from
->offset
/ BITS_PER_UNIT
;
2405 to
->unit_size
= from
->size
/ BITS_PER_UNIT
;
2406 to
->type
= from
->type
;
2407 to
->alias_ptr_type
= from
->alias_ptr_type
;
2408 to
->certain
= from
->nonarg
;
2409 to
->reverse
= from
->reverse
;
2410 vec_safe_push (desc
->accesses
, to
);
2412 for (gensum_param_access
*ch
= from
->first_child
;
2414 ch
= ch
->next_sibling
)
2415 copy_accesses_to_ipa_desc (ch
, desc
);
2418 /* Analyze function body scan results stored in param_accesses and
2419 param_accesses, detect possible transformations and store information of
2420 those in function summary. NODE, FUN and IFS are all various structures
2421 describing the currently analyzed function. */
2424 process_scan_results (cgraph_node
*node
, struct function
*fun
,
2425 isra_func_summary
*ifs
,
2426 vec
<gensum_param_desc
> *param_descriptions
)
2428 bool check_pass_throughs
= false;
2429 bool dereferences_propagated
= false;
2430 tree parm
= DECL_ARGUMENTS (node
->decl
);
2431 unsigned param_count
= param_descriptions
->length();
2433 for (unsigned desc_index
= 0;
2434 desc_index
< param_count
;
2435 desc_index
++, parm
= DECL_CHAIN (parm
))
2437 gensum_param_desc
*desc
= &(*param_descriptions
)[desc_index
];
2438 if (!desc
->split_candidate
)
2442 isra_verify_access_tree (desc
->accesses
);
2444 if (!dereferences_propagated
2449 propagate_dereference_distances (fun
);
2450 dereferences_propagated
= true;
2453 HOST_WIDE_INT nonarg_acc_size
= 0;
2454 bool only_calls
= true;
2455 bool check_failed
= false;
2457 int entry_bb_index
= ENTRY_BLOCK_PTR_FOR_FN (fun
)->index
;
2458 for (gensum_param_access
*acc
= desc
->accesses
;
2460 acc
= acc
->next_sibling
)
2461 if (check_gensum_access (fun
, parm
, desc
, acc
, &nonarg_acc_size
,
2462 &only_calls
, entry_bb_index
))
2464 check_failed
= true;
2471 desc
->locally_unused
= true;
2473 HOST_WIDE_INT cur_param_size
2474 = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (parm
)));
2475 HOST_WIDE_INT param_size_limit
, optimistic_limit
;
2476 if (!desc
->by_ref
|| optimize_function_for_size_p (fun
))
2478 param_size_limit
= cur_param_size
;
2479 optimistic_limit
= cur_param_size
;
2484 = opt_for_fn (node
->decl
,
2485 param_ipa_sra_ptr_growth_factor
) * cur_param_size
;
2487 = (opt_for_fn (node
->decl
, param_ipa_sra_ptrwrap_growth_factor
)
2488 * param_size_limit
);
2491 if (nonarg_acc_size
> optimistic_limit
2492 || (!desc
->by_ref
&& nonarg_acc_size
== param_size_limit
))
2494 disqualify_split_candidate (desc
, "Would result into a too big set "
2495 "of replacements even in best "
2500 /* create_parameter_descriptors makes sure unit sizes of all
2501 candidate parameters fit unsigned integers restricted to
2502 ISRA_ARG_SIZE_LIMIT. */
2503 desc
->param_size_limit
= param_size_limit
/ BITS_PER_UNIT
;
2504 desc
->nonarg_acc_size
= nonarg_acc_size
/ BITS_PER_UNIT
;
2505 if (desc
->split_candidate
&& desc
->ptr_pt_count
)
2507 gcc_assert (desc
->by_ref
);
2508 check_pass_throughs
= true;
2513 /* When a pointer parameter is passed-through to a callee, in which it is
2514 only used to read only one or a few items, we can attempt to transform it
2515 to obtaining and passing through the items instead of the pointer. But we
2516 must take extra care that 1) we do not introduce any segfault by moving
2517 dereferences above control flow and that 2) the data is not modified
2518 through an alias in this function. The IPA analysis must not introduce
2519 any accesses candidates unless it can prove both.
2521 The current solution is very crude as it consists of ensuring that the
2522 call postdominates entry BB and that the definition of VUSE of the call is
2523 default definition. TODO: For non-recursive callees in the same
2524 compilation unit we could do better by doing analysis in topological order
2525 an looking into access candidates of callees, using their alias_ptr_types
2526 to attempt real AA. We could also use the maximum known dereferenced
2527 offset in this function at IPA level.
2529 TODO: Measure the overhead and the effect of just being pessimistic.
2530 Maybe this is only -O3 material? */
2532 hash_map
<gimple
*, bool> analyzed_stmts
;
2533 bitmap always_executed_bbs
= NULL
;
2534 if (check_pass_throughs
)
2535 for (cgraph_edge
*cs
= node
->callees
; cs
; cs
= cs
->next_callee
)
2537 gcall
*call_stmt
= cs
->call_stmt
;
2538 tree vuse
= gimple_vuse (call_stmt
);
2540 /* If the callee is a const function, we don't get a VUSE. In such
2541 case there will be no memory accesses in the called function (or the
2542 const attribute is wrong) and then we just don't care. */
2543 bool uses_memory_as_obtained
= vuse
&& SSA_NAME_IS_DEFAULT_DEF (vuse
);
2545 unsigned count
= gimple_call_num_args (call_stmt
);
2546 isra_call_summary
*csum
= call_sums
->get_create (cs
);
2547 csum
->init_inputs (count
);
2548 csum
->m_before_any_store
= uses_memory_as_obtained
;
2549 for (unsigned argidx
= 0; argidx
< count
; argidx
++)
2551 if (!csum
->m_arg_flow
[argidx
].pointer_pass_through
)
2554 = get_single_param_flow_source (&csum
->m_arg_flow
[argidx
]);
2555 gensum_param_desc
*desc
= &(*param_descriptions
)[pidx
];
2556 if (!desc
->split_candidate
)
2558 csum
->m_arg_flow
[argidx
].pointer_pass_through
= false;
2561 if (!uses_memory_as_obtained
)
2566 csum
->m_arg_flow
[argidx
].safe_to_import_accesses
= true;
2570 /* Walk basic block and see if its execution can terminate earlier.
2571 Keep the info for later re-use to avoid quadratic behavoiur here. */
2572 gimple_stmt_iterator gsi
= gsi_for_stmt (call_stmt
);
2575 for (gsi_prev (&gsi
); !gsi_end_p (gsi
); gsi_prev (&gsi
))
2577 bool *b
= analyzed_stmts
.get (gsi_stmt (gsi
));
2585 if (stmt_may_terminate_function_p (fun
, gsi_stmt (gsi
), false))
2593 if (gsi_end_p (gsi
))
2594 gsi
= gsi_start_bb (gimple_bb (call_stmt
));
2595 for (; gsi_stmt (gsi
) != call_stmt
; gsi_next (&gsi
))
2596 analyzed_stmts
.get_or_insert (gsi_stmt (gsi
)) = safe
;
2599 if (safe
&& !always_executed_bbs
)
2601 mark_dfs_back_edges ();
2602 always_executed_bbs
= find_always_executed_bbs (fun
, false);
2604 if (safe
&& bitmap_bit_p (always_executed_bbs
, gimple_bb (call_stmt
)->index
))
2605 csum
->m_arg_flow
[argidx
].safe_to_import_accesses
= true;
2609 BITMAP_FREE (always_executed_bbs
);
2611 /* TODO: Add early exit if we disqualified everything. This also requires
2612 that we either relax the restriction that
2613 ipa_param_adjustments.m_always_copy_start must be the number of PARM_DECLs
2614 or store the number of parameters to IPA-SRA function summary and use that
2615 when just removing params. */
2617 vec_safe_reserve_exact (ifs
->m_parameters
, param_count
);
2618 ifs
->m_parameters
->quick_grow_cleared (param_count
);
2619 for (unsigned desc_index
= 0; desc_index
< param_count
; desc_index
++)
2621 gensum_param_desc
*s
= &(*param_descriptions
)[desc_index
];
2622 isra_param_desc
*d
= &(*ifs
->m_parameters
)[desc_index
];
2624 d
->param_size_limit
= s
->param_size_limit
;
2625 d
->size_reached
= s
->nonarg_acc_size
;
2626 d
->locally_unused
= s
->locally_unused
;
2627 d
->split_candidate
= s
->split_candidate
;
2628 d
->by_ref
= s
->by_ref
;
2629 d
->conditionally_dereferenceable
= s
->conditionally_dereferenceable
;
2631 for (gensum_param_access
*acc
= s
->accesses
;
2633 acc
= acc
->next_sibling
)
2634 copy_accesses_to_ipa_desc (acc
, d
);
2638 dump_isra_param_descriptors (dump_file
, node
->decl
, ifs
, false);
2641 /* Return true if there are any overlaps among certain accesses of DESC. If
2642 non-NULL, set *CERTAIN_ACCESS_PRESENT_P upon encountering a certain access
2643 too. DESC is assumed to be a split candidate that is not locally
2647 overlapping_certain_accesses_p (isra_param_desc
*desc
,
2648 bool *certain_access_present_p
)
2650 unsigned pclen
= vec_safe_length (desc
->accesses
);
2651 for (unsigned i
= 0; i
< pclen
; i
++)
2653 param_access
*a1
= (*desc
->accesses
)[i
];
2657 if (certain_access_present_p
)
2658 *certain_access_present_p
= true;
2659 for (unsigned j
= i
+ 1; j
< pclen
; j
++)
2661 param_access
*a2
= (*desc
->accesses
)[j
];
2663 && a1
->unit_offset
< a2
->unit_offset
+ a2
->unit_size
2664 && a1
->unit_offset
+ a1
->unit_size
> a2
->unit_offset
)
2671 /* Check for any overlaps of certain param accesses among splitting candidates
2672 and signal an ICE if there are any. If CERTAIN_MUST_EXIST is set, also
2673 check that used splitting candidates have at least one certain access. */
2676 verify_splitting_accesses (cgraph_node
*node
, bool certain_must_exist
)
2678 isra_func_summary
*ifs
= func_sums
->get (node
);
2679 if (!ifs
|| !ifs
->m_candidate
)
2681 unsigned param_count
= vec_safe_length (ifs
->m_parameters
);
2682 for (unsigned pidx
= 0; pidx
< param_count
; pidx
++)
2684 isra_param_desc
*desc
= &(*ifs
->m_parameters
)[pidx
];
2685 if (!desc
->split_candidate
|| desc
->locally_unused
)
2688 bool certain_access_present
= !certain_must_exist
;
2689 if (overlapping_certain_accesses_p (desc
, &certain_access_present
))
2690 internal_error ("function %qs, parameter %u, has IPA-SRA accesses "
2691 "which overlap", node
->dump_name (), pidx
);
2692 if (!certain_access_present
)
2693 internal_error ("function %qs, parameter %u, is used but does not "
2694 "have any certain IPA-SRA access",
2695 node
->dump_name (), pidx
);
2699 /* Intraprocedural part of IPA-SRA analysis. Scan bodies of all functions in
2700 this compilation unit and create summary structures describing IPA-SRA
2701 opportunities and constraints in them. */
2704 ipa_sra_generate_summary (void)
2706 struct cgraph_node
*node
;
2708 gcc_checking_assert (!func_sums
);
2709 gcc_checking_assert (!call_sums
);
2711 = (new (ggc_alloc_no_dtor
<ipa_sra_function_summaries
> ())
2712 ipa_sra_function_summaries (symtab
, true));
2713 call_sums
= new ipa_sra_call_summaries (symtab
);
2715 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node
)
2716 ipa_sra_summarize_function (node
);
2720 /* Write intraprocedural analysis information about E and all of its outgoing
2721 edges into a stream for LTO WPA. */
2724 isra_write_edge_summary (output_block
*ob
, cgraph_edge
*e
)
2726 isra_call_summary
*csum
= call_sums
->get (e
);
2727 unsigned input_count
= csum
->m_arg_flow
.length ();
2728 streamer_write_uhwi (ob
, input_count
);
2729 for (unsigned i
= 0; i
< input_count
; i
++)
2731 isra_param_flow
*ipf
= &csum
->m_arg_flow
[i
];
2732 streamer_write_hwi (ob
, ipf
->length
);
2733 bitpack_d bp
= bitpack_create (ob
->main_stream
);
2734 for (int j
= 0; j
< ipf
->length
; j
++)
2735 bp_pack_value (&bp
, ipf
->inputs
[j
], 8);
2736 bp_pack_value (&bp
, ipf
->aggregate_pass_through
, 1);
2737 bp_pack_value (&bp
, ipf
->pointer_pass_through
, 1);
2738 bp_pack_value (&bp
, ipf
->safe_to_import_accesses
, 1);
2739 bp_pack_value (&bp
, ipf
->constructed_for_calls
, 1);
2740 streamer_write_bitpack (&bp
);
2741 streamer_write_uhwi (ob
, ipf
->unit_offset
);
2742 streamer_write_uhwi (ob
, ipf
->unit_size
);
2744 bitpack_d bp
= bitpack_create (ob
->main_stream
);
2745 bp_pack_value (&bp
, csum
->m_return_ignored
, 1);
2746 bp_pack_value (&bp
, csum
->m_return_returned
, 1);
2747 bp_pack_value (&bp
, csum
->m_bit_aligned_arg
, 1);
2748 bp_pack_value (&bp
, csum
->m_before_any_store
, 1);
2749 streamer_write_bitpack (&bp
);
2752 /* Write intraprocedural analysis information about NODE and all of its outgoing
2753 edges into a stream for LTO WPA. */
2756 isra_write_node_summary (output_block
*ob
, cgraph_node
*node
)
2758 isra_func_summary
*ifs
= func_sums
->get (node
);
2759 lto_symtab_encoder_t encoder
= ob
->decl_state
->symtab_node_encoder
;
2760 int node_ref
= lto_symtab_encoder_encode (encoder
, node
);
2761 streamer_write_uhwi (ob
, node_ref
);
2763 unsigned param_desc_count
= vec_safe_length (ifs
->m_parameters
);
2764 streamer_write_uhwi (ob
, param_desc_count
);
2765 for (unsigned i
= 0; i
< param_desc_count
; i
++)
2767 isra_param_desc
*desc
= &(*ifs
->m_parameters
)[i
];
2768 unsigned access_count
= vec_safe_length (desc
->accesses
);
2769 streamer_write_uhwi (ob
, access_count
);
2770 for (unsigned j
= 0; j
< access_count
; j
++)
2772 param_access
*acc
= (*desc
->accesses
)[j
];
2773 stream_write_tree (ob
, acc
->type
, true);
2774 stream_write_tree (ob
, acc
->alias_ptr_type
, true);
2775 streamer_write_uhwi (ob
, acc
->unit_offset
);
2776 streamer_write_uhwi (ob
, acc
->unit_size
);
2777 bitpack_d bp
= bitpack_create (ob
->main_stream
);
2778 bp_pack_value (&bp
, acc
->certain
, 1);
2779 bp_pack_value (&bp
, acc
->reverse
, 1);
2780 streamer_write_bitpack (&bp
);
2782 streamer_write_uhwi (ob
, desc
->param_size_limit
);
2783 streamer_write_uhwi (ob
, desc
->size_reached
);
2784 gcc_assert (desc
->safe_size
== 0);
2785 bitpack_d bp
= bitpack_create (ob
->main_stream
);
2786 bp_pack_value (&bp
, desc
->locally_unused
, 1);
2787 bp_pack_value (&bp
, desc
->split_candidate
, 1);
2788 bp_pack_value (&bp
, desc
->by_ref
, 1);
2789 gcc_assert (!desc
->not_specially_constructed
);
2790 bp_pack_value (&bp
, desc
->conditionally_dereferenceable
, 1);
2791 gcc_assert (!desc
->safe_size_set
);
2792 streamer_write_bitpack (&bp
);
2794 bitpack_d bp
= bitpack_create (ob
->main_stream
);
2795 bp_pack_value (&bp
, ifs
->m_candidate
, 1);
2796 bp_pack_value (&bp
, ifs
->m_returns_value
, 1);
2797 bp_pack_value (&bp
, ifs
->m_return_ignored
, 1);
2798 gcc_assert (!ifs
->m_queued
);
2799 streamer_write_bitpack (&bp
);
2801 for (cgraph_edge
*e
= node
->callees
; e
; e
= e
->next_callee
)
2802 isra_write_edge_summary (ob
, e
);
2803 for (cgraph_edge
*e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
2804 isra_write_edge_summary (ob
, e
);
2807 /* Write intraprocedural analysis information into a stream for LTO WPA. */
2810 ipa_sra_write_summary (void)
2812 if (!func_sums
|| !call_sums
)
2815 struct output_block
*ob
= create_output_block (LTO_section_ipa_sra
);
2816 lto_symtab_encoder_t encoder
= ob
->decl_state
->symtab_node_encoder
;
2819 unsigned int count
= 0;
2820 lto_symtab_encoder_iterator lsei
;
2821 for (lsei
= lsei_start_function_in_partition (encoder
);
2823 lsei_next_function_in_partition (&lsei
))
2825 cgraph_node
*node
= lsei_cgraph_node (lsei
);
2826 if (node
->has_gimple_body_p ()
2827 && func_sums
->get (node
) != NULL
)
2830 streamer_write_uhwi (ob
, count
);
2832 /* Process all of the functions. */
2833 for (lsei
= lsei_start_function_in_partition (encoder
); !lsei_end_p (lsei
);
2834 lsei_next_function_in_partition (&lsei
))
2836 cgraph_node
*node
= lsei_cgraph_node (lsei
);
2837 if (node
->has_gimple_body_p ()
2838 && func_sums
->get (node
) != NULL
)
2839 isra_write_node_summary (ob
, node
);
2841 streamer_write_char_stream (ob
->main_stream
, 0);
2842 produce_asm (ob
, NULL
);
2843 destroy_output_block (ob
);
2846 /* Read intraprocedural analysis information about E and all of its outgoing
2847 edges into a stream for LTO WPA. */
2850 isra_read_edge_summary (struct lto_input_block
*ib
, cgraph_edge
*cs
)
2852 isra_call_summary
*csum
= call_sums
->get_create (cs
);
2853 unsigned input_count
= streamer_read_uhwi (ib
);
2854 csum
->init_inputs (input_count
);
2855 for (unsigned i
= 0; i
< input_count
; i
++)
2857 isra_param_flow
*ipf
= &csum
->m_arg_flow
[i
];
2858 ipf
->length
= streamer_read_hwi (ib
);
2859 bitpack_d bp
= streamer_read_bitpack (ib
);
2860 for (int j
= 0; j
< ipf
->length
; j
++)
2861 ipf
->inputs
[j
] = bp_unpack_value (&bp
, 8);
2862 ipf
->aggregate_pass_through
= bp_unpack_value (&bp
, 1);
2863 ipf
->pointer_pass_through
= bp_unpack_value (&bp
, 1);
2864 ipf
->safe_to_import_accesses
= bp_unpack_value (&bp
, 1);
2865 ipf
->constructed_for_calls
= bp_unpack_value (&bp
, 1);
2866 ipf
->unit_offset
= streamer_read_uhwi (ib
);
2867 ipf
->unit_size
= streamer_read_uhwi (ib
);
2869 bitpack_d bp
= streamer_read_bitpack (ib
);
2870 csum
->m_return_ignored
= bp_unpack_value (&bp
, 1);
2871 csum
->m_return_returned
= bp_unpack_value (&bp
, 1);
2872 csum
->m_bit_aligned_arg
= bp_unpack_value (&bp
, 1);
2873 csum
->m_before_any_store
= bp_unpack_value (&bp
, 1);
2876 /* Read intraprocedural analysis information about NODE and all of its outgoing
2877 edges into a stream for LTO WPA. */
2880 isra_read_node_info (struct lto_input_block
*ib
, cgraph_node
*node
,
2881 struct data_in
*data_in
)
2883 isra_func_summary
*ifs
= func_sums
->get_create (node
);
2884 unsigned param_desc_count
= streamer_read_uhwi (ib
);
2885 if (param_desc_count
> 0)
2887 vec_safe_reserve_exact (ifs
->m_parameters
, param_desc_count
);
2888 ifs
->m_parameters
->quick_grow_cleared (param_desc_count
);
2890 for (unsigned i
= 0; i
< param_desc_count
; i
++)
2892 isra_param_desc
*desc
= &(*ifs
->m_parameters
)[i
];
2893 unsigned access_count
= streamer_read_uhwi (ib
);
2894 for (unsigned j
= 0; j
< access_count
; j
++)
2896 param_access
*acc
= ggc_cleared_alloc
<param_access
> ();
2897 acc
->type
= stream_read_tree (ib
, data_in
);
2898 acc
->alias_ptr_type
= stream_read_tree (ib
, data_in
);
2899 acc
->unit_offset
= streamer_read_uhwi (ib
);
2900 acc
->unit_size
= streamer_read_uhwi (ib
);
2901 bitpack_d bp
= streamer_read_bitpack (ib
);
2902 acc
->certain
= bp_unpack_value (&bp
, 1);
2903 acc
->reverse
= bp_unpack_value (&bp
, 1);
2904 vec_safe_push (desc
->accesses
, acc
);
2906 desc
->param_size_limit
= streamer_read_uhwi (ib
);
2907 desc
->size_reached
= streamer_read_uhwi (ib
);
2908 desc
->safe_size
= 0;
2909 bitpack_d bp
= streamer_read_bitpack (ib
);
2910 desc
->locally_unused
= bp_unpack_value (&bp
, 1);
2911 desc
->split_candidate
= bp_unpack_value (&bp
, 1);
2912 desc
->by_ref
= bp_unpack_value (&bp
, 1);
2913 desc
->not_specially_constructed
= 0;
2914 desc
->conditionally_dereferenceable
= bp_unpack_value (&bp
, 1);
2915 desc
->safe_size_set
= 0;
2917 bitpack_d bp
= streamer_read_bitpack (ib
);
2918 ifs
->m_candidate
= bp_unpack_value (&bp
, 1);
2919 ifs
->m_returns_value
= bp_unpack_value (&bp
, 1);
2920 ifs
->m_return_ignored
= bp_unpack_value (&bp
, 1);
2923 for (cgraph_edge
*e
= node
->callees
; e
; e
= e
->next_callee
)
2924 isra_read_edge_summary (ib
, e
);
2925 for (cgraph_edge
*e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
2926 isra_read_edge_summary (ib
, e
);
2929 /* Read IPA-SRA summaries from a section in file FILE_DATA of length LEN with
2930 data DATA. TODO: This function was copied almost verbatim from ipa-prop.cc,
2931 it should be possible to unify them somehow. */
2934 isra_read_summary_section (struct lto_file_decl_data
*file_data
,
2935 const char *data
, size_t len
)
2937 const struct lto_function_header
*header
=
2938 (const struct lto_function_header
*) data
;
2939 const int cfg_offset
= sizeof (struct lto_function_header
);
2940 const int main_offset
= cfg_offset
+ header
->cfg_size
;
2941 const int string_offset
= main_offset
+ header
->main_size
;
2942 struct data_in
*data_in
;
2946 lto_input_block
ib_main ((const char *) data
+ main_offset
,
2947 header
->main_size
, file_data
);
2950 lto_data_in_create (file_data
, (const char *) data
+ string_offset
,
2951 header
->string_size
, vNULL
);
2952 count
= streamer_read_uhwi (&ib_main
);
2954 for (i
= 0; i
< count
; i
++)
2957 struct cgraph_node
*node
;
2958 lto_symtab_encoder_t encoder
;
2960 index
= streamer_read_uhwi (&ib_main
);
2961 encoder
= file_data
->symtab_node_encoder
;
2962 node
= dyn_cast
<cgraph_node
*> (lto_symtab_encoder_deref (encoder
,
2964 gcc_assert (node
->definition
);
2965 isra_read_node_info (&ib_main
, node
, data_in
);
2967 lto_free_section_data (file_data
, LTO_section_ipa_sra
, NULL
, data
,
2969 lto_data_in_delete (data_in
);
2972 /* Read intraprocedural analysis information into a stream for LTO WPA. */
2975 ipa_sra_read_summary (void)
2977 struct lto_file_decl_data
**file_data_vec
= lto_get_file_decl_data ();
2978 struct lto_file_decl_data
*file_data
;
2981 gcc_checking_assert (!func_sums
);
2982 gcc_checking_assert (!call_sums
);
2984 = (new (ggc_alloc_no_dtor
<ipa_sra_function_summaries
> ())
2985 ipa_sra_function_summaries (symtab
, true));
2986 call_sums
= new ipa_sra_call_summaries (symtab
);
2988 while ((file_data
= file_data_vec
[j
++]))
2992 = lto_get_summary_section_data (file_data
, LTO_section_ipa_sra
, &len
);
2994 isra_read_summary_section (file_data
, data
, len
);
2998 /* Dump all IPA-SRA summary data for all cgraph nodes and edges to file F. If
2999 HINTS is true, also dump IPA-analysis computed hints. */
3002 ipa_sra_dump_all_summaries (FILE *f
, bool hints
)
3005 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node
)
3007 fprintf (f
, "\nSummary for node %s:\n", node
->dump_name ());
3009 isra_func_summary
*ifs
= func_sums
->get (node
);
3011 fprintf (f
, " Function does not have any associated IPA-SRA "
3013 else if (!ifs
->m_candidate
)
3014 fprintf (f
, " Not a candidate function\n");
3017 if (ifs
->m_returns_value
)
3018 fprintf (f
, " Returns value\n");
3019 if (vec_safe_is_empty (ifs
->m_parameters
))
3020 fprintf (f
, " No parameter information. \n");
3022 for (unsigned i
= 0; i
< ifs
->m_parameters
->length (); ++i
)
3024 fprintf (f
, " Descriptor for parameter %i:\n", i
);
3025 dump_isra_param_descriptor (f
, &(*ifs
->m_parameters
)[i
], hints
);
3030 struct cgraph_edge
*cs
;
3031 for (cs
= node
->callees
; cs
; cs
= cs
->next_callee
)
3033 fprintf (f
, " Summary for edge %s->%s:\n", cs
->caller
->dump_name (),
3034 cs
->callee
->dump_name ());
3035 isra_call_summary
*csum
= call_sums
->get (cs
);
3039 fprintf (f
, " Call summary is MISSING!\n");
3043 fprintf (f
, "\n\n");
3046 /* Perform function-scope viability tests that can be only made at IPA level
3047 and return false if the function is deemed unsuitable for IPA-SRA. */
3050 ipa_sra_ipa_function_checks (cgraph_node
*node
)
3052 if (!node
->can_be_local_p ())
3055 fprintf (dump_file
, "Function %s disqualified because it cannot be "
3056 "made local.\n", node
->dump_name ());
3059 if (!node
->can_change_signature
)
3062 fprintf (dump_file
, "Function can not change signature.\n");
3069 /* Issues found out by check_callers_for_issues. */
3071 struct caller_issues
3073 /* The candidate being considered. */
3074 cgraph_node
*candidate
;
3075 /* There is a thunk among callers. */
3077 /* Set if there is at least one caller that is OK. */
3079 /* Call site with no available information. */
3080 bool unknown_callsite
;
3081 /* Call from outside the candidate's comdat group. */
3082 bool call_from_outside_comdat
;
3083 /* There is a bit-aligned load into one of non-gimple-typed arguments. */
3084 bool bit_aligned_aggregate_argument
;
3087 /* Worker for call_for_symbol_and_aliases, set any flags of passed caller_issues
3091 check_for_caller_issues (struct cgraph_node
*node
, void *data
)
3093 struct caller_issues
*issues
= (struct caller_issues
*) data
;
3095 for (cgraph_edge
*cs
= node
->callers
; cs
; cs
= cs
->next_caller
)
3097 if (cs
->caller
->thunk
)
3099 issues
->thunk
= true;
3100 /* TODO: We should be able to process at least some types of
3104 if (issues
->candidate
->calls_comdat_local
3105 && issues
->candidate
->same_comdat_group
3106 && !issues
->candidate
->in_same_comdat_group_p (cs
->caller
))
3108 issues
->call_from_outside_comdat
= true;
3112 isra_call_summary
*csum
= call_sums
->get (cs
);
3115 issues
->unknown_callsite
= true;
3119 if (csum
->m_bit_aligned_arg
)
3120 issues
->bit_aligned_aggregate_argument
= true;
3122 issues
->there_is_one
= true;
3127 /* Look at all incoming edges to NODE, including aliases and thunks and look
3128 for problems. Return true if NODE type should not be modified at all. */
3131 check_all_callers_for_issues (cgraph_node
*node
)
3133 struct caller_issues issues
;
3134 memset (&issues
, 0, sizeof (issues
));
3135 issues
.candidate
= node
;
3137 node
->call_for_symbol_and_aliases (check_for_caller_issues
, &issues
, true);
3138 if (issues
.unknown_callsite
)
3140 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3141 fprintf (dump_file
, "A call of %s has not been analyzed. Disabling "
3142 "all modifications.\n", node
->dump_name ());
3145 /* TODO: We should be able to process at least some types of thunks. */
3148 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3149 fprintf (dump_file
, "A call of %s is through thunk, which are not"
3150 " handled yet. Disabling all modifications.\n",
3151 node
->dump_name ());
3154 if (issues
.call_from_outside_comdat
)
3157 fprintf (dump_file
, "Function would become private comdat called "
3158 "outside of its comdat group.\n");
3162 if (issues
.bit_aligned_aggregate_argument
)
3164 /* Let's only remove parameters/return values from such functions.
3165 TODO: We could only prevent splitting the problematic parameters if
3166 anybody thinks it is worth it. */
3167 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3168 fprintf (dump_file
, "A call of %s has bit-aligned aggregate argument,"
3169 " disabling parameter splitting.\n", node
->dump_name ());
3171 isra_func_summary
*ifs
= func_sums
->get (node
);
3172 gcc_checking_assert (ifs
);
3173 unsigned param_count
= vec_safe_length (ifs
->m_parameters
);
3174 for (unsigned i
= 0; i
< param_count
; i
++)
3175 (*ifs
->m_parameters
)[i
].split_candidate
= false;
3177 if (!issues
.there_is_one
)
3179 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3180 fprintf (dump_file
, "There is no call to %s that we can modify. "
3181 "Disabling all modifications.\n", node
->dump_name ());
3187 /* Find the access with corresponding OFFSET and SIZE among accesses in
3188 PARAM_DESC and return it or NULL if such an access is not there. */
3190 static param_access
*
3191 find_param_access (isra_param_desc
*param_desc
, unsigned offset
, unsigned size
)
3193 unsigned pclen
= vec_safe_length (param_desc
->accesses
);
3195 /* The search is linear but the number of stored accesses is bound by
3196 PARAM_IPA_SRA_MAX_REPLACEMENTS, so most probably 8. */
3198 for (unsigned i
= 0; i
< pclen
; i
++)
3199 if ((*param_desc
->accesses
)[i
]->unit_offset
== offset
3200 && (*param_desc
->accesses
)[i
]->unit_size
== size
)
3201 return (*param_desc
->accesses
)[i
];
3206 /* Return iff the total size of definite replacement SIZE would violate the
3207 limit set for it in PARAM. */
3210 size_would_violate_limit_p (isra_param_desc
*desc
, unsigned size
)
3212 unsigned limit
= desc
->param_size_limit
;
3214 || (!desc
->by_ref
&& size
== limit
))
3219 /* Increase reached size of DESC by SIZE or disqualify it if it would violate
3220 the set limit. IDX is the parameter number which is dumped when
3224 bump_reached_size (isra_param_desc
*desc
, unsigned size
, unsigned idx
)
3226 unsigned after
= desc
->size_reached
+ size
;
3227 if (size_would_violate_limit_p (desc
, after
))
3229 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3230 fprintf (dump_file
, " ...size limit reached, disqualifying "
3231 "candidate parameter %u\n", idx
);
3232 desc
->split_candidate
= false;
3235 desc
->size_reached
= after
;
3238 /* Take all actions required to deal with an edge CS that represents a call to
3239 an unknown or un-analyzed function, for both parameter removal and
3243 process_edge_to_unknown_caller (cgraph_edge
*cs
)
3245 isra_func_summary
*from_ifs
= func_sums
->get (cs
->caller
);
3246 gcc_checking_assert (from_ifs
);
3247 isra_call_summary
*csum
= call_sums
->get (cs
);
3249 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3250 fprintf (dump_file
, "Processing an edge to an unknown caller from %s:\n",
3251 cs
->caller
->dump_name ());
3253 unsigned args_count
= csum
->m_arg_flow
.length ();
3254 for (unsigned i
= 0; i
< args_count
; i
++)
3256 isra_param_flow
*ipf
= &csum
->m_arg_flow
[i
];
3258 if (ipf
->pointer_pass_through
)
3260 isra_param_desc
*param_desc
3261 = &(*from_ifs
->m_parameters
)[get_single_param_flow_source (ipf
)];
3262 param_desc
->locally_unused
= false;
3263 param_desc
->split_candidate
= false;
3266 if (ipf
->aggregate_pass_through
)
3268 unsigned idx
= get_single_param_flow_source (ipf
);
3269 isra_param_desc
*param_desc
= &(*from_ifs
->m_parameters
)[idx
];
3271 param_desc
->locally_unused
= false;
3272 if (!param_desc
->split_candidate
)
3274 gcc_assert (!param_desc
->by_ref
);
3275 param_access
*pacc
= find_param_access (param_desc
, ipf
->unit_offset
,
3277 gcc_checking_assert (pacc
);
3278 pacc
->certain
= true;
3279 if (overlapping_certain_accesses_p (param_desc
, NULL
))
3281 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3282 fprintf (dump_file
, " ...leading to overlap, "
3283 " disqualifying candidate parameter %u\n",
3285 param_desc
->split_candidate
= false;
3288 bump_reached_size (param_desc
, pacc
->unit_size
, idx
);
3289 ipf
->aggregate_pass_through
= false;
3293 for (int j
= 0; j
< ipf
->length
; j
++)
3295 int input_idx
= ipf
->inputs
[j
];
3296 (*from_ifs
->m_parameters
)[input_idx
].locally_unused
= false;
3301 /* Propagate parameter removal information through cross-SCC edge CS,
3302 i.e. decrease the use count in the caller parameter descriptor for each use
3306 param_removal_cross_scc_edge (cgraph_edge
*cs
)
3308 enum availability availability
;
3309 cgraph_node
*callee
= cs
->callee
->function_symbol (&availability
);
3310 isra_func_summary
*to_ifs
= func_sums
->get (callee
);
3311 if (!to_ifs
|| !to_ifs
->m_candidate
3312 || (availability
< AVAIL_AVAILABLE
)
3313 || vec_safe_is_empty (to_ifs
->m_parameters
))
3315 process_edge_to_unknown_caller (cs
);
3318 isra_func_summary
*from_ifs
= func_sums
->get (cs
->caller
);
3319 gcc_checking_assert (from_ifs
);
3321 isra_call_summary
*csum
= call_sums
->get (cs
);
3322 unsigned args_count
= csum
->m_arg_flow
.length ();
3323 unsigned param_count
= vec_safe_length (to_ifs
->m_parameters
);
3325 for (unsigned i
= 0; i
< args_count
; i
++)
3327 bool unused_in_callee
;
3328 if (i
< param_count
)
3329 unused_in_callee
= (*to_ifs
->m_parameters
)[i
].locally_unused
;
3331 unused_in_callee
= false;
3333 if (!unused_in_callee
)
3335 isra_param_flow
*ipf
= &csum
->m_arg_flow
[i
];
3336 for (int j
= 0; j
< ipf
->length
; j
++)
3338 int input_idx
= ipf
->inputs
[j
];
3339 (*from_ifs
->m_parameters
)[input_idx
].locally_unused
= false;
3345 /* Unless it is already there, push NODE which is also described by IFS to
3349 isra_push_node_to_stack (cgraph_node
*node
, isra_func_summary
*ifs
,
3350 vec
<cgraph_node
*> *stack
)
3354 ifs
->m_queued
= true;
3355 stack
->safe_push (node
);
3359 /* If parameter with index INPUT_IDX is marked as locally unused, mark it as
3360 used and push CALLER on STACK. */
3363 isra_mark_caller_param_used (isra_func_summary
*from_ifs
, int input_idx
,
3364 cgraph_node
*caller
, vec
<cgraph_node
*> *stack
)
3366 if ((*from_ifs
->m_parameters
)[input_idx
].locally_unused
)
3368 (*from_ifs
->m_parameters
)[input_idx
].locally_unused
= false;
3369 isra_push_node_to_stack (caller
, from_ifs
, stack
);
3373 /* Combine safe_size of DESC with SIZE and return true if it has changed. */
3376 update_safe_size (isra_param_desc
*desc
, unsigned size
)
3378 if (!desc
->safe_size_set
)
3380 desc
->safe_size_set
= 1;
3381 desc
->safe_size
= size
;
3384 if (desc
->safe_size
<= size
)
3386 desc
->safe_size
= size
;
3390 /* Set all param hints in DESC to the pessimistic values. Return true if any
3391 hints that need to potentially trigger further propagation have changed. */
3394 flip_all_hints_pessimistic (isra_param_desc
*desc
)
3396 desc
->not_specially_constructed
= true;
3397 return update_safe_size (desc
, 0);
3400 /* Because we have not analyzed or otherwise problematic caller, go over all
3401 parameter int flags of IFS describing a call graph node of a calllee and
3402 turn them pessimistic. Return true if any hints that need to potentially
3403 trigger further propagation have changed. */
3406 flip_all_param_hints_pessimistic (isra_func_summary
*ifs
)
3408 if (!ifs
|| !ifs
->m_candidate
)
3412 unsigned param_count
= vec_safe_length (ifs
->m_parameters
);
3414 for (unsigned i
= 0; i
< param_count
; i
++)
3415 ret
|= flip_all_hints_pessimistic (&(*ifs
->m_parameters
)[i
]);
3420 /* Propagate hints accross edge CS which ultimately leads to a node described
3421 by TO_IFS. Return true if any hints of the callee which should potentially
3422 trigger further propagation have changed. */
3425 propagate_param_hints_accross_call (cgraph_edge
*cs
, isra_func_summary
*to_ifs
)
3427 if (!to_ifs
|| !to_ifs
->m_candidate
)
3430 isra_call_summary
*csum
= call_sums
->get (cs
);
3432 unsigned args_count
= csum
->m_arg_flow
.length ();
3433 unsigned param_count
= vec_safe_length (to_ifs
->m_parameters
);
3435 for (unsigned i
= 0; i
< param_count
; i
++)
3437 isra_param_desc
*desc
= &(*to_ifs
->m_parameters
)[i
];
3438 if (i
>= args_count
)
3440 ret
|= flip_all_hints_pessimistic (desc
);
3446 isra_param_flow
*ipf
= &csum
->m_arg_flow
[i
];
3448 if (!ipf
->constructed_for_calls
)
3449 desc
->not_specially_constructed
= true;
3451 if (ipf
->pointer_pass_through
)
3453 isra_func_summary
*from_ifs
= func_sums
->get (cs
->caller
);
3454 int srcidx
= get_single_param_flow_source (ipf
);
3455 if (vec_safe_length (from_ifs
->m_parameters
) > (unsigned) srcidx
)
3457 isra_param_desc
*src_d
= &(*from_ifs
->m_parameters
)[srcidx
];
3458 if (src_d
->safe_size_set
)
3459 ret
|= update_safe_size (desc
, src_d
->safe_size
);
3462 ret
|= update_safe_size (desc
, 0);
3464 else if (!ipf
->aggregate_pass_through
)
3465 ret
|= update_safe_size (desc
, ipf
->unit_size
);
3467 /* LTOing type-mismatched programs can end up here. */
3468 ret
|= update_safe_size (desc
, 0);
3474 /* Propagate hints from NODE described by FROM_IFS to all its (dorect) callees,
3475 push those that may need re-visiting onto STACK. */
3478 propagate_hints_to_all_callees (cgraph_node
*node
, isra_func_summary
*from_ifs
,
3479 vec
<cgraph_node
*> *stack
)
3481 for (cgraph_edge
*cs
= node
->callees
; cs
; cs
= cs
->next_callee
)
3483 enum availability availability
;
3484 cgraph_node
*callee
= cs
->callee
->function_symbol (&availability
);
3485 isra_func_summary
*to_ifs
= func_sums
->get (callee
);
3488 if (flip_all_param_hints_pessimistic (to_ifs
)
3489 && ipa_edge_within_scc (cs
))
3490 isra_push_node_to_stack (callee
, to_ifs
, stack
);
3492 else if (propagate_param_hints_accross_call (cs
, to_ifs
)
3493 && ipa_edge_within_scc (cs
))
3494 isra_push_node_to_stack (callee
, to_ifs
, stack
);
3498 /* Propagate information that any parameter is not used only locally within a
3499 SCC across CS to the caller, which must be in the same SCC as the
3500 callee. Push any callers that need to be re-processed to STACK. */
3503 propagate_used_across_scc_edge (cgraph_edge
*cs
, vec
<cgraph_node
*> *stack
)
3505 isra_func_summary
*from_ifs
= func_sums
->get (cs
->caller
);
3506 if (!from_ifs
|| vec_safe_is_empty (from_ifs
->m_parameters
))
3509 isra_call_summary
*csum
= call_sums
->get (cs
);
3510 gcc_checking_assert (csum
);
3511 unsigned args_count
= csum
->m_arg_flow
.length ();
3512 enum availability availability
;
3513 cgraph_node
*callee
= cs
->callee
->function_symbol (&availability
);
3514 isra_func_summary
*to_ifs
= func_sums
->get (callee
);
3516 unsigned param_count
3517 = (to_ifs
&& (availability
>= AVAIL_AVAILABLE
))
3518 ? vec_safe_length (to_ifs
->m_parameters
) : 0;
3519 for (unsigned i
= 0; i
< args_count
; i
++)
3522 && (*to_ifs
->m_parameters
)[i
].locally_unused
)
3525 /* The argument is needed in the callee it, we must mark the parameter as
3526 used also in the caller and its callers within this SCC. */
3527 isra_param_flow
*ipf
= &csum
->m_arg_flow
[i
];
3528 for (int j
= 0; j
< ipf
->length
; j
++)
3530 int input_idx
= ipf
->inputs
[j
];
3531 isra_mark_caller_param_used (from_ifs
, input_idx
, cs
->caller
, stack
);
3536 /* Propagate information that any parameter is not used only locally within a
3537 SCC (i.e. is used also elsewhere) to all callers of NODE that are in the
3538 same SCC. Push any callers that need to be re-processed to STACK. */
3541 propagate_used_to_scc_callers (cgraph_node
*node
, void *data
)
3543 vec
<cgraph_node
*> *stack
= (vec
<cgraph_node
*> *) data
;
3545 for (cs
= node
->callers
; cs
; cs
= cs
->next_caller
)
3546 if (ipa_edge_within_scc (cs
))
3547 propagate_used_across_scc_edge (cs
, stack
);
3551 /* Return true iff all certain accesses in ARG_DESC are also present as
3552 certain accesses in PARAM_DESC. */
3555 all_callee_accesses_present_p (isra_param_desc
*param_desc
,
3556 isra_param_desc
*arg_desc
)
3558 unsigned aclen
= vec_safe_length (arg_desc
->accesses
);
3559 for (unsigned j
= 0; j
< aclen
; j
++)
3561 param_access
*argacc
= (*arg_desc
->accesses
)[j
];
3562 if (!argacc
->certain
)
3564 param_access
*pacc
= find_param_access (param_desc
, argacc
->unit_offset
,
3568 || !types_compatible_p (argacc
->type
, pacc
->type
))
3574 /* Type internal to function pull_accesses_from_callee. Unfortunately gcc 4.8
3575 does not allow instantiating an auto_vec with a type defined within a
3576 function so it is a global type. */
3577 enum acc_prop_kind
{ACC_PROP_DONT
, ACC_PROP_COPY
, ACC_PROP_CERTAIN
};
3580 /* Attempt to propagate all definite accesses from ARG_DESC to PARAM_DESC,
3581 (which belongs to CALLER) if they would not violate some constraint there.
3582 If successful, return NULL, otherwise return the string reason for failure
3583 (which can be written to the dump file). DELTA_OFFSET is the known offset
3584 of the actual argument withing the formal parameter (so of ARG_DESCS within
3585 PARAM_DESCS), ARG_SIZE is the size of the actual argument or zero, if not
3586 known. In case of success, set *CHANGE_P to true if propagation actually
3587 changed anything. */
3590 pull_accesses_from_callee (cgraph_node
*caller
, isra_param_desc
*param_desc
,
3591 isra_param_desc
*arg_desc
,
3592 unsigned delta_offset
, unsigned arg_size
,
3595 unsigned pclen
= vec_safe_length (param_desc
->accesses
);
3596 unsigned aclen
= vec_safe_length (arg_desc
->accesses
);
3597 unsigned prop_count
= 0;
3598 unsigned prop_size
= 0;
3599 bool change
= false;
3601 auto_vec
<enum acc_prop_kind
, 8> prop_kinds (aclen
);
3602 for (unsigned j
= 0; j
< aclen
; j
++)
3604 param_access
*argacc
= (*arg_desc
->accesses
)[j
];
3605 prop_kinds
.safe_push (ACC_PROP_DONT
);
3608 && argacc
->unit_offset
+ argacc
->unit_size
> arg_size
)
3609 return "callee access outsize size boundary";
3611 if (!argacc
->certain
)
3614 unsigned offset
= argacc
->unit_offset
+ delta_offset
;
3615 /* Given that accesses are initially stored according to increasing
3616 offset and decreasing size in case of equal offsets, the following
3617 searches could be written more efficiently if we kept the ordering
3618 when copying. But the number of accesses is capped at
3619 PARAM_IPA_SRA_MAX_REPLACEMENTS (so most likely 8) and the code gets
3620 messy quickly, so let's improve on that only if necessary. */
3622 bool exact_match
= false;
3623 for (unsigned i
= 0; i
< pclen
; i
++)
3625 /* Check for overlaps. */
3626 param_access
*pacc
= (*param_desc
->accesses
)[i
];
3627 if (pacc
->unit_offset
== offset
3628 && pacc
->unit_size
== argacc
->unit_size
)
3630 if (argacc
->alias_ptr_type
!= pacc
->alias_ptr_type
3631 || !types_compatible_p (argacc
->type
, pacc
->type
)
3632 || argacc
->reverse
!= pacc
->reverse
)
3633 return "propagated access types would not match existing ones";
3638 prop_kinds
[j
] = ACC_PROP_CERTAIN
;
3639 prop_size
+= argacc
->unit_size
;
3645 if (offset
< pacc
->unit_offset
+ pacc
->unit_size
3646 && offset
+ argacc
->unit_size
> pacc
->unit_offset
)
3648 /* None permissible with load accesses, possible to fit into
3651 || offset
< pacc
->unit_offset
3652 || (offset
+ argacc
->unit_size
3653 > pacc
->unit_offset
+ pacc
->unit_size
))
3654 return "a propagated access would conflict in caller";
3660 prop_kinds
[j
] = ACC_PROP_COPY
;
3662 prop_size
+= argacc
->unit_size
;
3670 if ((prop_count
+ pclen
3671 > (unsigned) opt_for_fn (caller
->decl
, param_ipa_sra_max_replacements
))
3672 || size_would_violate_limit_p (param_desc
,
3673 param_desc
->size_reached
+ prop_size
))
3674 return "propagating accesses would violate the count or size limit";
3677 for (unsigned j
= 0; j
< aclen
; j
++)
3679 if (prop_kinds
[j
] == ACC_PROP_COPY
)
3681 param_access
*argacc
= (*arg_desc
->accesses
)[j
];
3683 param_access
*copy
= ggc_cleared_alloc
<param_access
> ();
3684 copy
->unit_offset
= argacc
->unit_offset
+ delta_offset
;
3685 copy
->unit_size
= argacc
->unit_size
;
3686 copy
->type
= argacc
->type
;
3687 copy
->alias_ptr_type
= argacc
->alias_ptr_type
;
3688 copy
->certain
= true;
3689 copy
->reverse
= argacc
->reverse
;
3690 vec_safe_push (param_desc
->accesses
, copy
);
3692 else if (prop_kinds
[j
] == ACC_PROP_CERTAIN
)
3694 param_access
*argacc
= (*arg_desc
->accesses
)[j
];
3696 = find_param_access (param_desc
, argacc
->unit_offset
+ delta_offset
,
3698 csp
->certain
= true;
3702 param_desc
->size_reached
+= prop_size
;
3707 /* Propagate parameter splitting information through call graph edge CS.
3708 Return true if any changes that might need to be propagated within SCCs have
3709 been made. The function also clears the aggregate_pass_through and
3710 pointer_pass_through in call summaries which do not need to be processed
3711 again if this CS is revisited when iterating while changes are propagated
3715 param_splitting_across_edge (cgraph_edge
*cs
)
3718 bool cross_scc
= !ipa_edge_within_scc (cs
);
3719 enum availability availability
;
3720 cgraph_node
*callee
= cs
->callee
->function_symbol (&availability
);
3721 isra_func_summary
*from_ifs
= func_sums
->get (cs
->caller
);
3722 gcc_checking_assert (from_ifs
&& from_ifs
->m_parameters
);
3724 isra_call_summary
*csum
= call_sums
->get (cs
);
3725 gcc_checking_assert (csum
);
3726 unsigned args_count
= csum
->m_arg_flow
.length ();
3727 isra_func_summary
*to_ifs
= func_sums
->get (callee
);
3728 unsigned param_count
3729 = ((to_ifs
&& to_ifs
->m_candidate
&& (availability
>= AVAIL_AVAILABLE
))
3730 ? vec_safe_length (to_ifs
->m_parameters
)
3733 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3734 fprintf (dump_file
, "Splitting across %s->%s:\n",
3735 cs
->caller
->dump_name (), callee
->dump_name ());
3738 for (i
= 0; (i
< args_count
) && (i
< param_count
); i
++)
3740 isra_param_desc
*arg_desc
= &(*to_ifs
->m_parameters
)[i
];
3741 isra_param_flow
*ipf
= &csum
->m_arg_flow
[i
];
3743 if (arg_desc
->locally_unused
)
3745 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3746 fprintf (dump_file
, " ->%u: unused in callee\n", i
);
3747 ipf
->pointer_pass_through
= false;
3751 if (ipf
->pointer_pass_through
)
3753 int idx
= get_single_param_flow_source (ipf
);
3754 isra_param_desc
*param_desc
= &(*from_ifs
->m_parameters
)[idx
];
3755 if (!param_desc
->split_candidate
)
3757 gcc_assert (param_desc
->by_ref
);
3759 if (!arg_desc
->split_candidate
|| !arg_desc
->by_ref
)
3761 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3762 fprintf (dump_file
, " %u->%u: not candidate or not by "
3763 "reference in callee\n", idx
, i
);
3764 param_desc
->split_candidate
= false;
3765 ipf
->pointer_pass_through
= false;
3768 else if (!ipf
->safe_to_import_accesses
)
3770 if (!csum
->m_before_any_store
3771 || !all_callee_accesses_present_p (param_desc
, arg_desc
))
3773 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3774 fprintf (dump_file
, " %u->%u: cannot import accesses.\n",
3776 param_desc
->split_candidate
= false;
3777 ipf
->pointer_pass_through
= false;
3783 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3784 fprintf (dump_file
, " %u->%u: verified callee accesses "
3785 "present.\n", idx
, i
);
3787 ipf
->pointer_pass_through
= false;
3792 const char *pull_failure
3793 = pull_accesses_from_callee (cs
->caller
, param_desc
, arg_desc
,
3797 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3798 fprintf (dump_file
, " %u->%u: by_ref access pull "
3799 "failed: %s.\n", idx
, i
, pull_failure
);
3800 param_desc
->split_candidate
= false;
3801 ipf
->pointer_pass_through
= false;
3806 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3807 fprintf (dump_file
, " %u->%u: by_ref access pull "
3808 "succeeded.\n", idx
, i
);
3810 ipf
->pointer_pass_through
= false;
3814 else if (ipf
->aggregate_pass_through
)
3816 int idx
= get_single_param_flow_source (ipf
);
3817 isra_param_desc
*param_desc
= &(*from_ifs
->m_parameters
)[idx
];
3818 if (!param_desc
->split_candidate
)
3820 gcc_assert (!param_desc
->by_ref
);
3821 param_access
*pacc
= find_param_access (param_desc
, ipf
->unit_offset
,
3823 gcc_checking_assert (pacc
);
3827 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3828 fprintf (dump_file
, " %u->%u: already certain\n", idx
, i
);
3829 ipf
->aggregate_pass_through
= false;
3831 else if (!arg_desc
->split_candidate
|| arg_desc
->by_ref
)
3833 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3834 fprintf (dump_file
, " %u->%u: not candidate or by "
3835 "reference in callee\n", idx
, i
);
3837 pacc
->certain
= true;
3838 if (overlapping_certain_accesses_p (param_desc
, NULL
))
3840 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3841 fprintf (dump_file
, " ...leading to overlap, "
3842 " disqualifying candidate parameter %u\n",
3844 param_desc
->split_candidate
= false;
3847 bump_reached_size (param_desc
, pacc
->unit_size
, idx
);
3849 ipf
->aggregate_pass_through
= false;
3854 const char *pull_failure
3855 = pull_accesses_from_callee (cs
->caller
, param_desc
, arg_desc
,
3857 ipf
->unit_size
, &res
);
3860 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3861 fprintf (dump_file
, " %u->%u: arg access pull "
3862 "failed: %s.\n", idx
, i
, pull_failure
);
3864 ipf
->aggregate_pass_through
= false;
3865 pacc
->certain
= true;
3867 if (overlapping_certain_accesses_p (param_desc
, NULL
))
3869 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3870 fprintf (dump_file
, " ...leading to overlap, "
3871 " disqualifying candidate parameter %u\n",
3873 param_desc
->split_candidate
= false;
3876 bump_reached_size (param_desc
, pacc
->unit_size
, idx
);
3882 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3883 fprintf (dump_file
, " %u->%u: arg access pull "
3884 "succeeded.\n", idx
, i
);
3886 ipf
->aggregate_pass_through
= false;
3892 /* Handle argument-parameter count mismatches. */
3893 for (; (i
< args_count
); i
++)
3895 isra_param_flow
*ipf
= &csum
->m_arg_flow
[i
];
3897 if (ipf
->pointer_pass_through
|| ipf
->aggregate_pass_through
)
3899 int idx
= get_single_param_flow_source (ipf
);
3900 ipf
->pointer_pass_through
= false;
3901 ipf
->aggregate_pass_through
= false;
3902 isra_param_desc
*param_desc
= &(*from_ifs
->m_parameters
)[idx
];
3903 if (!param_desc
->split_candidate
)
3906 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3907 fprintf (dump_file
, " %u->%u: no corresponding formal parameter\n",
3909 param_desc
->split_candidate
= false;
3916 /* Worker for call_for_symbol_and_aliases, look at all callers and if all their
3917 callers ignore the return value, or come from the same SCC and use the
3918 return value only to compute their return value, return false, otherwise
3922 retval_used_p (cgraph_node
*node
, void *)
3924 for (cgraph_edge
*cs
= node
->callers
; cs
; cs
= cs
->next_caller
)
3926 isra_call_summary
*csum
= call_sums
->get (cs
);
3927 gcc_checking_assert (csum
);
3928 if (csum
->m_return_ignored
)
3930 if (!csum
->m_return_returned
)
3933 isra_func_summary
*from_ifs
= func_sums
->get (cs
->caller
);
3934 if (!from_ifs
|| !from_ifs
->m_candidate
)
3937 if (!ipa_edge_within_scc (cs
)
3938 && !from_ifs
->m_return_ignored
)
3945 /* Push into NEW_PARAMS all required parameter adjustment entries to copy or
3946 modify parameter which originally had index BASE_INDEX, in the adjustment
3947 vector of parent clone (if any) had PREV_CLONE_INDEX and was described by
3948 PREV_ADJUSTMENT. If IPA-CP has created a transformation summary for the
3949 original node, it needs to be passed in IPCP_TS, otherwise it should be
3950 NULL. If the parent clone is the original function, PREV_ADJUSTMENT is NULL
3951 and PREV_CLONE_INDEX is equal to BASE_INDEX. */
3954 push_param_adjustments_for_index (isra_func_summary
*ifs
, unsigned base_index
,
3955 unsigned prev_clone_index
,
3956 ipa_adjusted_param
*prev_adjustment
,
3957 ipcp_transformation
*ipcp_ts
,
3958 vec
<ipa_adjusted_param
, va_gc
> **new_params
)
3960 isra_param_desc
*desc
= &(*ifs
->m_parameters
)[base_index
];
3961 if (desc
->locally_unused
)
3964 fprintf (dump_file
, " Will remove parameter %u\n", base_index
);
3968 if (!desc
->split_candidate
)
3970 ipa_adjusted_param adj
;
3971 if (prev_adjustment
)
3973 adj
= *prev_adjustment
;
3974 adj
.prev_clone_adjustment
= true;
3975 adj
.prev_clone_index
= prev_clone_index
;
3979 memset (&adj
, 0, sizeof (adj
));
3980 adj
.op
= IPA_PARAM_OP_COPY
;
3981 adj
.base_index
= base_index
;
3982 adj
.prev_clone_index
= prev_clone_index
;
3984 vec_safe_push ((*new_params
), adj
);
3989 fprintf (dump_file
, " Will split parameter %u\n", base_index
);
3991 gcc_assert (!prev_adjustment
|| prev_adjustment
->op
== IPA_PARAM_OP_COPY
);
3992 unsigned aclen
= vec_safe_length (desc
->accesses
);
3993 for (unsigned j
= 0; j
< aclen
; j
++)
3995 param_access
*pa
= (*desc
->accesses
)[j
];
4001 ipa_argagg_value_list
avl (ipcp_ts
);
4002 tree value
= avl
.get_value (base_index
, pa
->unit_offset
);
4003 if (value
&& !AGGREGATE_TYPE_P (pa
->type
))
4006 fprintf (dump_file
, " - omitting component at byte "
4007 "offset %u which is known to have a constant value\n ",
4014 fprintf (dump_file
, " - component at byte offset %u, "
4015 "size %u\n", pa
->unit_offset
, pa
->unit_size
);
4017 ipa_adjusted_param adj
;
4018 memset (&adj
, 0, sizeof (adj
));
4019 adj
.op
= IPA_PARAM_OP_SPLIT
;
4020 adj
.base_index
= base_index
;
4021 adj
.prev_clone_index
= prev_clone_index
;
4022 adj
.param_prefix_index
= IPA_PARAM_PREFIX_ISRA
;
4023 adj
.reverse
= pa
->reverse
;
4024 adj
.type
= pa
->type
;
4025 adj
.alias_ptr_type
= pa
->alias_ptr_type
;
4026 adj
.unit_offset
= pa
->unit_offset
;
4027 vec_safe_push ((*new_params
), adj
);
4031 /* Worker for all call_for_symbol_thunks_and_aliases. Set calls_comdat_local
4032 flag of all callers of NODE. */
4035 mark_callers_calls_comdat_local (struct cgraph_node
*node
, void *)
4037 for (cgraph_edge
*cs
= node
->callers
; cs
; cs
= cs
->next_caller
)
4038 cs
->caller
->calls_comdat_local
= true;
4042 /* Remove any IPA-CP results stored in TS that are associated with removed
4043 parameters as marked in IFS. */
4046 zap_useless_ipcp_results (const isra_func_summary
*ifs
, ipcp_transformation
*ts
)
4048 unsigned ts_len
= vec_safe_length (ts
->m_agg_values
);
4053 bool removed_item
= false;
4054 unsigned dst_index
= 0;
4056 for (unsigned i
= 0; i
< ts_len
; i
++)
4058 ipa_argagg_value
*v
= &(*ts
->m_agg_values
)[i
];
4059 const isra_param_desc
*desc
= &(*ifs
->m_parameters
)[v
->index
];
4061 if (!desc
->locally_unused
)
4064 (*ts
->m_agg_values
)[dst_index
] = *v
;
4068 removed_item
= true;
4072 ggc_free (ts
->m_agg_values
);
4073 ts
->m_agg_values
= NULL
;
4075 else if (removed_item
)
4076 ts
->m_agg_values
->truncate (dst_index
);
4078 bool useful_bits
= false;
4079 unsigned count
= vec_safe_length (ts
->bits
);
4080 for (unsigned i
= 0; i
< count
; i
++)
4083 const isra_param_desc
*desc
= &(*ifs
->m_parameters
)[i
];
4084 if (desc
->locally_unused
)
4085 (*ts
->bits
)[i
] = NULL
;
4092 bool useful_vr
= false;
4093 count
= vec_safe_length (ts
->m_vr
);
4094 for (unsigned i
= 0; i
< count
; i
++)
4095 if ((*ts
->m_vr
)[i
].known_p ())
4097 const isra_param_desc
*desc
= &(*ifs
->m_parameters
)[i
];
4098 if (desc
->locally_unused
)
4099 (*ts
->m_vr
)[i
].set_unknown ();
4107 /* Do final processing of results of IPA propagation regarding NODE, clone it
4111 process_isra_node_results (cgraph_node
*node
,
4112 hash_map
<const char *, unsigned> *clone_num_suffixes
)
4114 isra_func_summary
*ifs
= func_sums
->get (node
);
4115 if (!ifs
|| !ifs
->m_candidate
)
4118 auto_vec
<bool, 16> surviving_params
;
4119 bool check_surviving
= false;
4120 clone_info
*cinfo
= clone_info::get (node
);
4121 if (cinfo
&& cinfo
->param_adjustments
)
4123 check_surviving
= true;
4124 cinfo
->param_adjustments
->get_surviving_params (&surviving_params
);
4127 unsigned param_count
= vec_safe_length (ifs
->m_parameters
);
4128 bool will_change_function
= false;
4129 if (ifs
->m_returns_value
&& ifs
->m_return_ignored
)
4130 will_change_function
= true;
4132 for (unsigned i
= 0; i
< param_count
; i
++)
4134 isra_param_desc
*desc
= &(*ifs
->m_parameters
)[i
];
4135 if ((desc
->locally_unused
|| desc
->split_candidate
)
4136 /* Make sure we do not clone just to attempt to remove an already
4137 removed unused argument. */
4138 && (!check_surviving
4139 || (i
< surviving_params
.length ()
4140 && surviving_params
[i
])))
4142 will_change_function
= true;
4146 if (!will_change_function
)
4151 fprintf (dump_file
, "\nEvaluating analysis results for %s\n",
4152 node
->dump_name ());
4153 if (ifs
->m_returns_value
&& ifs
->m_return_ignored
)
4154 fprintf (dump_file
, " Will remove return value.\n");
4157 ipcp_transformation
*ipcp_ts
= ipcp_get_transformation_summary (node
);
4159 zap_useless_ipcp_results (ifs
, ipcp_ts
);
4160 vec
<ipa_adjusted_param
, va_gc
> *new_params
= NULL
;
4161 if (ipa_param_adjustments
*old_adjustments
4162 = cinfo
? cinfo
->param_adjustments
: NULL
)
4164 unsigned old_adj_len
= vec_safe_length (old_adjustments
->m_adj_params
);
4165 for (unsigned i
= 0; i
< old_adj_len
; i
++)
4167 ipa_adjusted_param
*old_adj
= &(*old_adjustments
->m_adj_params
)[i
];
4168 push_param_adjustments_for_index (ifs
, old_adj
->base_index
, i
,
4169 old_adj
, ipcp_ts
, &new_params
);
4173 for (unsigned i
= 0; i
< param_count
; i
++)
4174 push_param_adjustments_for_index (ifs
, i
, i
, NULL
, ipcp_ts
, &new_params
);
4176 ipa_param_adjustments
*new_adjustments
4177 = (new (ggc_alloc
<ipa_param_adjustments
> ())
4178 ipa_param_adjustments (new_params
, param_count
,
4179 ifs
->m_returns_value
&& ifs
->m_return_ignored
));
4181 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4183 fprintf (dump_file
, "\n Created adjustments:\n");
4184 new_adjustments
->dump (dump_file
);
4187 unsigned &suffix_counter
= clone_num_suffixes
->get_or_insert (
4188 IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (
4190 auto_vec
<cgraph_edge
*> callers
= node
->collect_callers ();
4191 cgraph_node
*new_node
4192 = node
->create_virtual_clone (callers
, NULL
, new_adjustments
, "isra",
4195 if (node
->calls_comdat_local
&& node
->same_comdat_group
)
4197 new_node
->add_to_same_comdat_group (node
);
4198 new_node
->call_for_symbol_and_aliases (mark_callers_calls_comdat_local
,
4201 new_node
->calls_comdat_local
= node
->calls_comdat_local
;
4204 fprintf (dump_file
, " Created new node %s\n", new_node
->dump_name ());
4208 /* If INDICES is not empty, dump a combination of NODE's dump_name and MSG
4209 followed by the list of numbers in INDICES. */
4212 dump_list_of_param_indices (const cgraph_node
*node
, const char* msg
,
4213 const vec
<unsigned> &indices
)
4215 if (indices
.is_empty ())
4217 fprintf (dump_file
, "The following parameters of %s %s:", node
->dump_name (),
4219 for (unsigned i
: indices
)
4220 fprintf (dump_file
, " %u", i
);
4221 fprintf (dump_file
, "\n");
4224 /* Check which parameters of NODE described by IFS have survived until IPA-SRA
4225 and disable transformations for those which have not or which should not
4226 transformed because the associated debug counter reached its limit. Return
4227 true if none survived or if there were no candidates to begin with.
4228 Additionally, also adjust parameter descriptions based on debug counters and
4229 hints propagated earlier. */
4232 adjust_parameter_descriptions (cgraph_node
*node
, isra_func_summary
*ifs
)
4235 unsigned len
= vec_safe_length (ifs
->m_parameters
);
4239 auto_vec
<bool, 16> surviving_params
;
4240 bool check_surviving
= false;
4241 clone_info
*cinfo
= clone_info::get (node
);
4242 if (cinfo
&& cinfo
->param_adjustments
)
4244 check_surviving
= true;
4245 cinfo
->param_adjustments
->get_surviving_params (&surviving_params
);
4247 ipcp_transformation
*ipcp_ts
= ipcp_get_transformation_summary (node
);
4248 auto_vec
<unsigned> dump_dead_indices
;
4249 auto_vec
<unsigned> dump_bad_cond_indices
;
4250 for (unsigned i
= 0; i
< len
; i
++)
4252 isra_param_desc
*desc
= &(*ifs
->m_parameters
)[i
];
4253 if (!dbg_cnt (ipa_sra_params
))
4255 desc
->locally_unused
= false;
4256 desc
->split_candidate
= false;
4258 else if (check_surviving
4259 && (i
>= surviving_params
.length ()
4260 || !surviving_params
[i
]))
4262 /* Even if the parameter was removed by a previous IPA pass, we do
4263 not clear locally_unused because if it really is unused, this
4264 information might be useful in callers. */
4265 desc
->split_candidate
= false;
4267 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4268 dump_dead_indices
.safe_push (i
);
4272 if (desc
->split_candidate
&& desc
->conditionally_dereferenceable
)
4274 gcc_assert (desc
->safe_size_set
);
4275 for (param_access
*pa
: *desc
->accesses
)
4276 if ((pa
->unit_offset
+ pa
->unit_size
) > desc
->safe_size
)
4278 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4279 dump_bad_cond_indices
.safe_push (i
);
4280 desc
->split_candidate
= false;
4285 if (desc
->split_candidate
)
4287 if (desc
->by_ref
&& !desc
->not_specially_constructed
)
4290 = opt_for_fn (node
->decl
,
4291 param_ipa_sra_ptrwrap_growth_factor
);
4292 desc
->param_size_limit
= extra_factor
* desc
->param_size_limit
;
4294 if (size_would_violate_limit_p (desc
, desc
->size_reached
))
4295 desc
->split_candidate
= false;
4298 /* Avoid ICEs on size-mismatched VIEW_CONVERT_EXPRs when callers and
4299 callees don't agree on types in aggregates and we try to do both
4300 IPA-CP and IPA-SRA. */
4301 if (ipcp_ts
&& desc
->split_candidate
)
4303 ipa_argagg_value_list
avl (ipcp_ts
);
4304 for (const param_access
*pa
: desc
->accesses
)
4308 tree value
= avl
.get_value (i
, pa
->unit_offset
);
4310 && ((tree_to_uhwi (TYPE_SIZE (TREE_TYPE (value
)))
4314 desc
->split_candidate
= false;
4315 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4316 dump_dead_indices
.safe_push (i
);
4322 if (desc
->locally_unused
|| desc
->split_candidate
)
4327 dump_list_of_param_indices (node
, "are dead on arrival or have a type "
4328 "mismatch with IPA-CP", dump_dead_indices
);
4329 dump_list_of_param_indices (node
, "are not safe to dereference in all "
4330 "callers", dump_bad_cond_indices
);
4336 /* Run the interprocedural part of IPA-SRA. */
4339 ipa_sra_analysis (void)
4343 fprintf (dump_file
, "\n========== IPA-SRA IPA stage ==========\n");
4344 ipa_sra_dump_all_summaries (dump_file
, false);
4347 gcc_checking_assert (func_sums
);
4348 gcc_checking_assert (call_sums
);
4349 cgraph_node
**order
= XCNEWVEC (cgraph_node
*, symtab
->cgraph_count
);
4350 auto_vec
<cgraph_node
*, 16> stack
;
4351 int node_scc_count
= ipa_reduced_postorder (order
, true, NULL
);
4353 /* One sweep from callers to callees for return value removal. */
4354 for (int i
= node_scc_count
- 1; i
>= 0 ; i
--)
4356 cgraph_node
*scc_rep
= order
[i
];
4357 vec
<cgraph_node
*> cycle_nodes
= ipa_get_nodes_in_cycle (scc_rep
);
4359 /* Preliminary IPA function level checks. */
4360 for (cgraph_node
*v
: cycle_nodes
)
4362 isra_func_summary
*ifs
= func_sums
->get (v
);
4363 if (!ifs
|| !ifs
->m_candidate
)
4365 if (!ipa_sra_ipa_function_checks (v
)
4366 || check_all_callers_for_issues (v
))
4370 for (cgraph_node
*v
: cycle_nodes
)
4372 isra_func_summary
*ifs
= func_sums
->get (v
);
4373 if (!ifs
|| !ifs
->m_candidate
)
4376 = (ifs
->m_returns_value
4377 && (!dbg_cnt (ipa_sra_retvalues
)
4378 || v
->call_for_symbol_and_aliases (retval_used_p
,
4380 ifs
->m_return_ignored
= !return_needed
;
4382 isra_push_node_to_stack (v
, ifs
, &stack
);
4385 while (!stack
.is_empty ())
4387 cgraph_node
*node
= stack
.pop ();
4388 isra_func_summary
*ifs
= func_sums
->get (node
);
4389 gcc_checking_assert (ifs
&& ifs
->m_queued
);
4390 ifs
->m_queued
= false;
4392 for (cgraph_edge
*cs
= node
->callees
; cs
; cs
= cs
->next_callee
)
4393 if (ipa_edge_within_scc (cs
)
4394 && call_sums
->get (cs
)->m_return_returned
)
4396 enum availability av
;
4397 cgraph_node
*callee
= cs
->callee
->function_symbol (&av
);
4398 isra_func_summary
*to_ifs
= func_sums
->get (callee
);
4399 if (to_ifs
&& to_ifs
->m_return_ignored
)
4401 to_ifs
->m_return_ignored
= false;
4402 isra_push_node_to_stack (callee
, to_ifs
, &stack
);
4407 /* Parameter hint propagation. */
4408 for (cgraph_node
*v
: cycle_nodes
)
4410 isra_func_summary
*ifs
= func_sums
->get (v
);
4411 propagate_hints_to_all_callees (v
, ifs
, &stack
);
4414 while (!stack
.is_empty ())
4416 cgraph_node
*node
= stack
.pop ();
4417 isra_func_summary
*ifs
= func_sums
->get (node
);
4418 gcc_checking_assert (ifs
&& ifs
->m_queued
);
4419 ifs
->m_queued
= false;
4420 propagate_hints_to_all_callees (node
, ifs
, &stack
);
4423 cycle_nodes
.release ();
4426 /* One sweep from callees to callers for parameter removal and splitting. */
4427 for (int i
= 0; i
< node_scc_count
; i
++)
4429 cgraph_node
*scc_rep
= order
[i
];
4430 vec
<cgraph_node
*> cycle_nodes
= ipa_get_nodes_in_cycle (scc_rep
);
4432 /* First step of parameter removal. */
4433 for (cgraph_node
*v
: cycle_nodes
)
4435 isra_func_summary
*ifs
= func_sums
->get (v
);
4436 if (!ifs
|| !ifs
->m_candidate
)
4438 if (adjust_parameter_descriptions (v
, ifs
))
4440 for (cgraph_edge
*cs
= v
->indirect_calls
; cs
; cs
= cs
->next_callee
)
4441 process_edge_to_unknown_caller (cs
);
4442 for (cgraph_edge
*cs
= v
->callees
; cs
; cs
= cs
->next_callee
)
4443 if (!ipa_edge_within_scc (cs
))
4444 param_removal_cross_scc_edge (cs
);
4447 /* Look at edges within the current SCC and propagate used-ness across
4448 them, pushing onto the stack all notes which might need to be
4450 for (cgraph_node
*v
: cycle_nodes
)
4451 v
->call_for_symbol_thunks_and_aliases (propagate_used_to_scc_callers
,
4454 /* Keep revisiting and pushing until nothing changes. */
4455 while (!stack
.is_empty ())
4457 cgraph_node
*v
= stack
.pop ();
4458 isra_func_summary
*ifs
= func_sums
->get (v
);
4459 gcc_checking_assert (ifs
&& ifs
->m_queued
);
4460 ifs
->m_queued
= false;
4462 v
->call_for_symbol_thunks_and_aliases (propagate_used_to_scc_callers
,
4466 /* Parameter splitting. */
4467 bool repeat_scc_access_propagation
;
4470 repeat_scc_access_propagation
= false;
4471 for (cgraph_node
*v
: cycle_nodes
)
4473 isra_func_summary
*ifs
= func_sums
->get (v
);
4475 || !ifs
->m_candidate
4476 || vec_safe_is_empty (ifs
->m_parameters
))
4478 for (cgraph_edge
*cs
= v
->callees
; cs
; cs
= cs
->next_callee
)
4479 if (param_splitting_across_edge (cs
))
4480 repeat_scc_access_propagation
= true;
4483 while (repeat_scc_access_propagation
);
4486 for (cgraph_node
*v
: cycle_nodes
)
4487 verify_splitting_accesses (v
, true);
4489 cycle_nodes
.release ();
4492 ipa_free_postorder_info ();
4497 if (dump_flags
& TDF_DETAILS
)
4499 fprintf (dump_file
, "\n========== IPA-SRA propagation final state "
4501 ipa_sra_dump_all_summaries (dump_file
, true);
4503 fprintf (dump_file
, "\n========== IPA-SRA decisions ==========\n");
4506 hash_map
<const char *, unsigned> *clone_num_suffixes
4507 = new hash_map
<const char *, unsigned>;
4510 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node
)
4511 process_isra_node_results (node
, clone_num_suffixes
);
4513 delete clone_num_suffixes
;
4514 ggc_delete (func_sums
);
4520 fprintf (dump_file
, "\n========== IPA SRA IPA analysis done "
4526 const pass_data pass_data_ipa_sra
=
4528 IPA_PASS
, /* type */
4530 OPTGROUP_NONE
, /* optinfo_flags */
4531 TV_IPA_SRA
, /* tv_id */
4532 0, /* properties_required */
4533 0, /* properties_provided */
4534 0, /* properties_destroyed */
4535 0, /* todo_flags_start */
4536 ( TODO_dump_symtab
| TODO_remove_functions
), /* todo_flags_finish */
4539 class pass_ipa_sra
: public ipa_opt_pass_d
4542 pass_ipa_sra (gcc::context
*ctxt
)
4543 : ipa_opt_pass_d (pass_data_ipa_sra
, ctxt
,
4544 ipa_sra_generate_summary
, /* generate_summary */
4545 ipa_sra_write_summary
, /* write_summary */
4546 ipa_sra_read_summary
, /* read_summary */
4547 NULL
, /* write_optimization_summary */
4548 NULL
, /* read_optimization_summary */
4549 NULL
, /* stmt_fixup */
4550 0, /* function_transform_todo_flags_start */
4551 NULL
, /* function_transform */
4552 NULL
) /* variable_transform */
4555 /* opt_pass methods: */
4556 bool gate (function
*) final override
4558 /* TODO: We should remove the optimize check after we ensure we never run
4559 IPA passes when not optimizing. */
4560 return (flag_ipa_sra
&& optimize
);
4563 unsigned int execute (function
*) final override
4565 return ipa_sra_analysis ();
4568 }; // class pass_ipa_sra
4572 /* Intraprocedural part of IPA-SRA analysis. Scan function body of NODE and
4573 create a summary structure describing IPA-SRA opportunities and constraints
4577 ipa_sra_summarize_function (cgraph_node
*node
)
4580 fprintf (dump_file
, "Creating summary for %s/%i:\n", node
->name (),
4582 gcc_obstack_init (&gensum_obstack
);
4583 loaded_decls
= new hash_set
<tree
>;
4585 isra_func_summary
*ifs
= NULL
;
4587 if (ipa_sra_preliminary_function_checks (node
))
4589 ifs
= func_sums
->get_create (node
);
4590 ifs
->m_candidate
= true;
4591 tree ret
= TREE_TYPE (TREE_TYPE (node
->decl
));
4592 ifs
->m_returns_value
= (TREE_CODE (ret
) != VOID_TYPE
);
4593 for (tree parm
= DECL_ARGUMENTS (node
->decl
);
4595 parm
= DECL_CHAIN (parm
))
4598 auto_vec
<gensum_param_desc
, 16> param_descriptions (count
);
4600 struct function
*fun
= DECL_STRUCT_FUNCTION (node
->decl
);
4601 bool cfun_pushed
= false;
4604 decl2desc
= new hash_map
<tree
, gensum_param_desc
*>;
4605 param_descriptions
.reserve_exact (count
);
4606 param_descriptions
.quick_grow_cleared (count
);
4608 if (create_parameter_descriptors (node
, ¶m_descriptions
))
4612 final_bbs
= BITMAP_ALLOC (NULL
);
4613 bb_dereferences
= XCNEWVEC (HOST_WIDE_INT
,
4615 * last_basic_block_for_fn (fun
));
4616 aa_walking_limit
= opt_for_fn (node
->decl
, param_ipa_max_aa_steps
);
4619 /* Scan function is run even when there are no removal or splitting
4620 candidates so that we can calculate hints on call edges which can be
4621 useful in callees. */
4622 scan_function (node
, fun
);
4628 dump_gensum_param_descriptors (dump_file
, node
->decl
,
4629 ¶m_descriptions
);
4630 fprintf (dump_file
, "----------------------------------------\n");
4633 process_scan_results (node
, fun
, ifs
, ¶m_descriptions
);
4637 if (bb_dereferences
)
4639 free (bb_dereferences
);
4640 bb_dereferences
= NULL
;
4641 BITMAP_FREE (final_bbs
);
4645 isra_analyze_all_outgoing_calls (node
);
4647 delete loaded_decls
;
4648 loaded_decls
= NULL
;
4654 obstack_free (&gensum_obstack
, NULL
);
4656 fprintf (dump_file
, "\n\n");
4658 verify_splitting_accesses (node
, false);
4663 make_pass_ipa_sra (gcc::context
*ctxt
)
4665 return new pass_ipa_sra (ctxt
);
4669 #include "gt-ipa-sra.h"