c++: Implement C++26 P2573R2 - = delete("should have a reason"); [PR114458]
[official-gcc.git] / gcc / ipa-sra.cc
blob6d6da408925161705508d46e1ba163a9ba956c41
1 /* Interprocedural scalar replacement of aggregates
2 Copyright (C) 2019-2024 Free Software Foundation, Inc.
3 Contributed by Martin Jambor <mjambor@suse.cz>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* IPA-SRA is an interprocedural pass that removes unused function return
22 values (turning functions returning a value which is never used into void
23 functions) and removes unused function parameters. It can also replace an
24 aggregate parameter by a set of other parameters representing part of the
25 original, turning those passed by reference into new ones which pass the
26 value directly.
28 The pass is a true IPA one, which means that it works in three stages in
29 order to be able to take advantage of LTO. First, summaries about functions
30 and each calls are generated. Function summaries (often called call graph
31 node summaries) contain mainly information about which parameters are
32 potential transformation candidates and which bits of candidates are
33 accessed. We differentiate between accesses done as a part of a call
34 statement (which might be not necessary if the callee is also transformed)
35 and others (which are mandatory). Call summaries (often called call graph
36 edge summaries) contain information about which function formal parameters
37 feed into which actual call arguments so that if two parameters are only
38 used in a sum which is then passed to another function which then however
39 does not use this parameter, all three parameters of the two functions can
40 be eliminated. Edge summaries also have flags whether the return value is
41 used or if it is only returned in the caller too. In LTO mode these
42 summaries are then streamed to the object file in the compilation phase and
43 streamed back in in the WPA analysis stage.
45 The interprocedural analysis phase traverses the graph in topological order
46 in two sweeps, one in each direction. First, from callees to callers for
47 parameter removal and splitting. Each strongly-connected component is
48 processed iteratively until the situation in it stabilizes. The pass from
49 callers to callees is then carried out to remove unused return values in a
50 very similar fashion.
52 Because parameter manipulation has big implications for call redirection
53 which is done only after all call graph nodes materialize, the
54 transformation phase is not part of this patch but is carried out by the
55 clone materialization and edge redirection itself, see comments in
56 ipa-param-manipulation.h for more details. */
59 #include "config.h"
60 #include "system.h"
61 #include "coretypes.h"
62 #include "backend.h"
63 #include "tree.h"
64 #include "gimple.h"
65 #include "predict.h"
66 #include "tree-pass.h"
67 #include "ssa.h"
68 #include "cgraph.h"
69 #include "gimple-pretty-print.h"
70 #include "alias.h"
71 #include "tree-eh.h"
72 #include "gimple-iterator.h"
73 #include "gimple-walk.h"
74 #include "tree-dfa.h"
75 #include "tree-sra.h"
76 #include "alloc-pool.h"
77 #include "symbol-summary.h"
78 #include "dbgcnt.h"
79 #include "tree-inline.h"
80 #include "ipa-utils.h"
81 #include "builtins.h"
82 #include "cfganal.h"
83 #include "tree-streamer.h"
84 #include "internal-fn.h"
85 #include "symtab-clones.h"
86 #include "attribs.h"
87 #include "sreal.h"
88 #include "ipa-cp.h"
89 #include "ipa-prop.h"
91 static void ipa_sra_summarize_function (cgraph_node *);
93 /* Bits used to track size of an aggregate in bytes interprocedurally. */
94 #define ISRA_ARG_SIZE_LIMIT_BITS 16
95 #define ISRA_ARG_SIZE_LIMIT (1 << ISRA_ARG_SIZE_LIMIT_BITS)
96 /* How many parameters can feed into a call actual argument and still be
97 tracked. */
98 #define IPA_SRA_MAX_PARAM_FLOW_LEN 7
100 /* Structure describing accesses to a specific portion of an aggregate
101 parameter, as given by the offset and size. Any smaller accesses that occur
102 within a function that fall within another access form a tree. The pass
103 cannot analyze parameters with only partially overlapping accesses. */
105 struct GTY(()) param_access
107 /* Type that a potential replacement should have. This field only has
108 meaning in the summary building and transformation phases, when it is
109 reconstructed from the body. Must not be touched in IPA analysis
110 stage. */
111 tree type;
113 /* Alias reference type to be used in MEM_REFs when adjusting caller
114 arguments. */
115 tree alias_ptr_type;
117 /* Values returned by get_ref_base_and_extent but converted to bytes and
118 stored as unsigned ints. */
119 unsigned unit_offset;
120 unsigned unit_size : ISRA_ARG_SIZE_LIMIT_BITS;
122 /* Set once we are sure that the access will really end up in a potentially
123 transformed function - initially not set for portions of formal parameters
124 that are only used as actual function arguments passed to callees. */
125 unsigned certain : 1;
126 /* Set if the access has reverse scalar storage order. */
127 unsigned reverse : 1;
130 /* This structure has the same purpose as the one above and additionally it
131 contains some fields that are only necessary in the summary generation
132 phase. */
134 struct gensum_param_access
136 /* Values returned by get_ref_base_and_extent. */
137 HOST_WIDE_INT offset;
138 HOST_WIDE_INT size;
140 /* if this access has any children (in terms of the definition above), this
141 points to the first one. */
142 struct gensum_param_access *first_child;
143 /* In intraprocedural SRA, pointer to the next sibling in the access tree as
144 described above. */
145 struct gensum_param_access *next_sibling;
147 /* Type that a potential replacement should have. This field only has
148 meaning in the summary building and transformation phases, when it is
149 reconstructed from the body. Must not be touched in IPA analysis
150 stage. */
151 tree type;
152 /* Alias reference type to be used in MEM_REFs when adjusting caller
153 arguments. */
154 tree alias_ptr_type;
156 /* Cumulative count of all loads. */
157 profile_count load_count;
158 /* Have there been writes to or reads from this exact location except for as
159 arguments to a function call that can be tracked. */
160 bool nonarg;
162 /* Set if the access has reverse scalar storage order. */
163 bool reverse;
166 /* Summary describing a parameter in the IPA stages. */
168 struct GTY(()) isra_param_desc
170 /* List of access representatives to the parameters, sorted according to
171 their offset. */
172 vec <param_access *, va_gc> *accesses;
174 /* Unit size limit of total size of all replacements. */
175 unsigned param_size_limit : ISRA_ARG_SIZE_LIMIT_BITS;
176 /* Sum of unit sizes of all certain replacements. */
177 unsigned size_reached : ISRA_ARG_SIZE_LIMIT_BITS;
178 /* Minimum offset that is known to be safe to dereference because of callers
179 pass pointers to DECLs of at least this size or because of dereferences in
180 callers. */
181 unsigned safe_size : ISRA_ARG_SIZE_LIMIT_BITS;
183 /* A parameter that is used only in call arguments and can be removed if all
184 concerned actual arguments are removed. */
185 unsigned locally_unused : 1;
186 /* An aggregate that is a candidate for breaking up or complete removal. */
187 unsigned split_candidate : 1;
188 /* Is this a parameter passing stuff by reference? */
189 unsigned by_ref : 1;
190 /* If set, this parameter can only be a candidate for removal if the function
191 is going to loose its return value. */
192 unsigned remove_only_when_retval_removed : 1;
193 /* If set, this parameter can only be a candidate for splitting if the
194 function is going to loose its return value. Can only be meaningfully set
195 for by_ref parameters. */
196 unsigned split_only_when_retval_removed : 1;
197 /* Parameter hint set during IPA analysis when there is a caller which does
198 not construct the argument just to pass it to calls. Only meaningful for
199 by_ref parameters. */
200 unsigned not_specially_constructed : 1;
201 /* Only meaningful for by_ref parameters. If set, this parameter can only be
202 a split candidate if all callers pass pointers that are known to point to
203 a chunk of memory large enough to contain all accesses. */
204 unsigned conditionally_dereferenceable : 1;
205 /* Set when safe_size has been updated from at least one caller. */
206 unsigned safe_size_set : 1;
209 /* Structure used when generating summaries that describes a parameter. */
211 struct gensum_param_desc
213 /* Roots of param_accesses. */
214 gensum_param_access *accesses;
215 /* Number of accesses in the access tree rooted in field accesses. */
216 unsigned access_count;
218 /* If the below is non-zero, this is the number of uses as actual
219 arguments. */
220 int call_uses;
221 /* Number of times this parameter has been directly passed to. */
222 unsigned ptr_pt_count;
224 /* Size limit of total size of all replacements. */
225 unsigned param_size_limit;
226 /* Sum of sizes of nonarg accesses. */
227 unsigned nonarg_acc_size;
229 /* A parameter that is used only in call arguments and can be removed if all
230 concerned actual arguments are removed. */
231 bool locally_unused;
232 /* An aggregate that is a candidate for breaking up or a pointer passing data
233 by reference that is a candidate for being converted to a set of
234 parameters passing those data by value. */
235 bool split_candidate;
236 /* Is this a parameter passing stuff by reference (either a pointer or a
237 source language reference type)? */
238 bool by_ref;
239 /* If this parameter passes stuff by reference, can it be safely dereferenced
240 without performing further checks (for example because it is a
241 REFERENCE_TYPE)? */
242 bool safe_ref;
243 /* If set, this parameter can only be a candidate for removal if the function
244 is going to loose its return value. */
245 bool remove_only_when_retval_removed;
246 /* If set, this parameter can only be a candidate for splitting if the
247 function is going to loose its return value. Can only be meaningfully set
248 for by_ref parameters. */
249 bool split_only_when_retval_removed;
250 /* Only meaningful for by_ref parameters. If set, this parameter can only be
251 a split candidate if all callers pass pointers that are known to point to
252 a chunk of memory large enough to contain all accesses. */
253 bool conditionally_dereferenceable;
255 /* The number of this parameter as they are ordered in function decl. */
256 int param_number;
257 /* For parameters passing data by reference, this is parameter index to
258 compute indices to bb_dereferences. */
259 int deref_index;
262 /* Properly deallocate accesses of DESC. TODO: Since this data structure is
263 allocated in GC memory, this is not necessary and we can consider removing
264 the function. */
266 static void
267 free_param_decl_accesses (isra_param_desc *desc)
269 unsigned len = vec_safe_length (desc->accesses);
270 for (unsigned i = 0; i < len; ++i)
271 ggc_free ((*desc->accesses)[i]);
272 vec_free (desc->accesses);
275 /* Class used to convey information about functions from the
276 intra-procedural analysis stage to inter-procedural one. */
278 class GTY((for_user)) isra_func_summary
280 public:
281 /* initialize the object. */
283 isra_func_summary ()
284 : m_parameters (NULL), m_candidate (false), m_returns_value (false),
285 m_return_ignored (false), m_queued (false)
288 /* Destroy m_parameters. */
290 ~isra_func_summary ();
292 /* Mark the function as not a candidate for any IPA-SRA transformation.
293 Return true if it was a candidate until now. */
295 bool zap ();
297 /* Vector of parameter descriptors corresponding to the function being
298 analyzed. */
299 vec<isra_param_desc, va_gc> *m_parameters;
301 /* Whether the node is even a candidate for any IPA-SRA transformation at
302 all. */
303 unsigned m_candidate : 1;
305 /* Whether the original function returns any value. */
306 unsigned m_returns_value : 1;
308 /* Set to true if all call statements do not actually use the returned
309 value. */
311 unsigned m_return_ignored : 1;
313 /* Whether the node is already queued in IPA SRA stack during processing of
314 call graphs SCCs. */
316 unsigned m_queued : 1;
319 /* Deallocate the memory pointed to by isra_func_summary. TODO: Since this
320 data structure is allocated in GC memory, this is not necessary and we can
321 consider removing the destructor. */
323 isra_func_summary::~isra_func_summary ()
325 unsigned len = vec_safe_length (m_parameters);
326 for (unsigned i = 0; i < len; ++i)
327 free_param_decl_accesses (&(*m_parameters)[i]);
328 vec_free (m_parameters);
331 /* Mark the function as not a candidate for any IPA-SRA transformation. Return
332 true if it was a candidate until now. */
334 bool
335 isra_func_summary::zap ()
337 bool ret = m_candidate;
338 m_candidate = false;
340 /* TODO: see the destructor above. */
341 unsigned len = vec_safe_length (m_parameters);
342 for (unsigned i = 0; i < len; ++i)
343 free_param_decl_accesses (&(*m_parameters)[i]);
344 vec_free (m_parameters);
346 return ret;
349 /* Structure to describe which formal parameters feed into a particular actual
350 argument. */
352 struct isra_param_flow
354 /* Number of elements in array inputs that contain valid data. */
355 char length;
356 /* Indices of formal parameters that feed into the described actual argument.
357 If aggregate_pass_through or pointer_pass_through below are true, it must
358 contain exactly one element which is passed through from a formal
359 parameter if the given number. Otherwise, the array contains indices of
360 callee's formal parameters which are used to calculate value of this
361 actual argument. */
362 unsigned char inputs[IPA_SRA_MAX_PARAM_FLOW_LEN];
364 /* Offset within the formal parameter. */
365 unsigned unit_offset;
366 /* When aggregate_pass_through is set, this is the size of the portion of an
367 aggregate formal parameter that is being passed. Otherwise, this is size
368 of pointed to memory that is known to be valid be dereferenced. */
369 unsigned unit_size : ISRA_ARG_SIZE_LIMIT_BITS;
371 /* True when the value of this actual argument is a portion of a formal
372 parameter. */
373 unsigned aggregate_pass_through : 1;
374 /* True when the value of this actual copy is a verbatim pass through of an
375 obtained pointer. */
376 unsigned pointer_pass_through : 1;
377 /* True when it is safe to copy access candidates here from the callee, which
378 would mean introducing dereferences into callers of the caller. */
379 unsigned safe_to_import_accesses : 1;
380 /* True when the passed value is an address of a structure that has been
381 constructed in the caller just to be passed by reference to functions
382 (i.e. is never read). */
383 unsigned constructed_for_calls : 1;
386 /* Structure used to convey information about calls from the intra-procedural
387 analysis stage to inter-procedural one. */
389 class isra_call_summary
391 public:
392 isra_call_summary ()
393 : m_arg_flow (), m_return_ignored (false), m_return_returned (false),
394 m_bit_aligned_arg (false), m_before_any_store (false)
397 void init_inputs (unsigned arg_count);
398 void dump (FILE *f);
400 /* Information about what formal parameters of the caller are used to compute
401 individual actual arguments of this call. */
402 auto_vec <isra_param_flow> m_arg_flow;
404 /* Set to true if the call statement does not have a LHS. */
405 unsigned m_return_ignored : 1;
407 /* Set to true if the LHS of call statement is only used to construct the
408 return value of the caller. */
409 unsigned m_return_returned : 1;
411 /* Set when any of the call arguments are not byte-aligned. */
412 unsigned m_bit_aligned_arg : 1;
414 /* Set to true if the call happend before any (other) store to memory in the
415 caller. */
416 unsigned m_before_any_store : 1;
419 /* Class to manage function summaries. */
421 class GTY((user)) ipa_sra_function_summaries
422 : public function_summary <isra_func_summary *>
424 public:
425 ipa_sra_function_summaries (symbol_table *table, bool ggc):
426 function_summary<isra_func_summary *> (table, ggc) { }
428 void duplicate (cgraph_node *, cgraph_node *,
429 isra_func_summary *old_sum,
430 isra_func_summary *new_sum) final override;
431 void insert (cgraph_node *, isra_func_summary *) final override;
434 /* Hook that is called by summary when a node is duplicated. */
436 void
437 ipa_sra_function_summaries::duplicate (cgraph_node *, cgraph_node *,
438 isra_func_summary *old_sum,
439 isra_func_summary *new_sum)
441 /* TODO: Somehow stop copying when ISRA is doing the cloning, it is
442 useless. */
443 new_sum->m_candidate = old_sum->m_candidate;
444 new_sum->m_returns_value = old_sum->m_returns_value;
445 new_sum->m_return_ignored = old_sum->m_return_ignored;
446 gcc_assert (!old_sum->m_queued);
447 new_sum->m_queued = false;
449 unsigned param_count = vec_safe_length (old_sum->m_parameters);
450 if (!param_count)
451 return;
452 vec_safe_reserve_exact (new_sum->m_parameters, param_count);
453 new_sum->m_parameters->quick_grow_cleared (param_count);
454 for (unsigned i = 0; i < param_count; i++)
456 isra_param_desc *s = &(*old_sum->m_parameters)[i];
457 isra_param_desc *d = &(*new_sum->m_parameters)[i];
459 d->param_size_limit = s->param_size_limit;
460 d->size_reached = s->size_reached;
461 d->safe_size = s->safe_size;
462 d->locally_unused = s->locally_unused;
463 d->split_candidate = s->split_candidate;
464 d->by_ref = s->by_ref;
465 d->remove_only_when_retval_removed = s->remove_only_when_retval_removed;
466 d->split_only_when_retval_removed = s->split_only_when_retval_removed;
467 d->not_specially_constructed = s->not_specially_constructed;
468 d->conditionally_dereferenceable = s->conditionally_dereferenceable;
469 d->safe_size_set = s->safe_size_set;
471 unsigned acc_count = vec_safe_length (s->accesses);
472 vec_safe_reserve_exact (d->accesses, acc_count);
473 for (unsigned j = 0; j < acc_count; j++)
475 param_access *from = (*s->accesses)[j];
476 param_access *to = ggc_cleared_alloc<param_access> ();
477 to->type = from->type;
478 to->alias_ptr_type = from->alias_ptr_type;
479 to->unit_offset = from->unit_offset;
480 to->unit_size = from->unit_size;
481 to->certain = from->certain;
482 to->reverse = from->reverse;
483 d->accesses->quick_push (to);
488 /* Pointer to the pass function summary holder. */
490 static GTY(()) ipa_sra_function_summaries *func_sums;
492 /* Hook that is called by summary when new node appears. */
494 void
495 ipa_sra_function_summaries::insert (cgraph_node *node, isra_func_summary *)
497 if (opt_for_fn (node->decl, flag_ipa_sra))
499 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
500 ipa_sra_summarize_function (node);
501 pop_cfun ();
503 else
504 func_sums->remove (node);
507 /* Class to manage call summaries. */
509 class ipa_sra_call_summaries: public call_summary <isra_call_summary *>
511 public:
512 ipa_sra_call_summaries (symbol_table *table):
513 call_summary<isra_call_summary *> (table) { }
515 /* Duplicate info when an edge is cloned. */
516 void duplicate (cgraph_edge *, cgraph_edge *,
517 isra_call_summary *old_sum,
518 isra_call_summary *new_sum) final override;
521 static ipa_sra_call_summaries *call_sums;
524 /* Initialize m_arg_flow of a particular instance of isra_call_summary.
525 ARG_COUNT is the number of actual arguments passed. */
527 void
528 isra_call_summary::init_inputs (unsigned arg_count)
530 if (arg_count == 0)
532 gcc_checking_assert (m_arg_flow.length () == 0);
533 return;
535 if (m_arg_flow.length () == 0)
537 m_arg_flow.reserve_exact (arg_count);
538 m_arg_flow.quick_grow_cleared (arg_count);
540 else
541 gcc_checking_assert (arg_count == m_arg_flow.length ());
544 /* Dump all information in call summary to F. */
546 void
547 isra_call_summary::dump (FILE *f)
549 if (m_return_ignored)
550 fprintf (f, " return value ignored\n");
551 if (m_return_returned)
552 fprintf (f, " return value used only to compute caller return value\n");
553 if (m_before_any_store)
554 fprintf (f, " happens before any store to memory\n");
555 for (unsigned i = 0; i < m_arg_flow.length (); i++)
557 fprintf (f, " Parameter %u:\n", i);
558 isra_param_flow *ipf = &m_arg_flow[i];
560 if (ipf->length)
562 bool first = true;
563 fprintf (f, " Scalar param sources: ");
564 for (int j = 0; j < ipf->length; j++)
566 if (!first)
567 fprintf (f, ", ");
568 else
569 first = false;
570 fprintf (f, "%i", (int) ipf->inputs[j]);
572 fprintf (f, "\n");
574 if (ipf->aggregate_pass_through)
575 fprintf (f, " Aggregate pass through from the param given above, "
576 "unit offset: %u , unit size: %u\n",
577 ipf->unit_offset, ipf->unit_size);
578 else if (ipf->unit_size > 0)
579 fprintf (f, " Known dereferenceable size: %u\n", ipf->unit_size);
580 if (ipf->pointer_pass_through)
581 fprintf (f, " Pointer pass through from the param given above, "
582 "safe_to_import_accesses: %u\n", ipf->safe_to_import_accesses);
583 if (ipf->constructed_for_calls)
584 fprintf (f, " Variable constructed just to be passed to "
585 "calls.\n");
589 /* Duplicate edge summary when an edge is cloned. */
591 void
592 ipa_sra_call_summaries::duplicate (cgraph_edge *, cgraph_edge *,
593 isra_call_summary *old_sum,
594 isra_call_summary *new_sum)
596 unsigned arg_count = old_sum->m_arg_flow.length ();
597 new_sum->init_inputs (arg_count);
598 for (unsigned i = 0; i < arg_count; i++)
599 new_sum->m_arg_flow[i] = old_sum->m_arg_flow[i];
601 new_sum->m_return_ignored = old_sum->m_return_ignored;
602 new_sum->m_return_returned = old_sum->m_return_returned;
603 new_sum->m_bit_aligned_arg = old_sum->m_bit_aligned_arg;
604 new_sum->m_before_any_store = old_sum->m_before_any_store;
608 /* With all GTY stuff done, we can move to anonymous namespace. */
609 namespace {
610 /* Quick mapping from a decl to its param descriptor. */
612 hash_map<tree, gensum_param_desc *> *decl2desc;
614 /* All local DECLs ever loaded from of and of those that have their address
615 assigned to a variable. */
617 hash_set <tree> *loaded_decls;
619 /* Countdown of allowed Alias Analysis steps during summary building. */
621 int aa_walking_limit;
623 /* This is a table in which for each basic block and parameter there is a
624 distance (offset + size) in that parameter which is dereferenced and
625 accessed in that BB. */
626 HOST_WIDE_INT *bb_dereferences = NULL;
627 /* How many by-reference parameters there are in the current function. */
628 int unsafe_by_ref_count;
630 /* Bitmap of BBs that can cause the function to "stop" progressing by
631 returning, throwing externally, looping infinitely or calling a function
632 which might abort etc.. */
633 bitmap final_bbs;
635 /* Obstack to allocate various small structures required only when generating
636 summary for a function. */
637 struct obstack gensum_obstack;
639 /* Return false the function is apparently unsuitable for IPA-SRA based on it's
640 attributes, return true otherwise. NODE is the cgraph node of the current
641 function. */
643 static bool
644 ipa_sra_preliminary_function_checks (cgraph_node *node)
646 if (!node->can_change_signature)
648 if (dump_file)
649 fprintf (dump_file, "Function cannot change signature.\n");
650 return false;
653 if (!tree_versionable_function_p (node->decl))
655 if (dump_file)
656 fprintf (dump_file, "Function is not versionable.\n");
657 return false;
660 if (!opt_for_fn (node->decl, optimize)
661 || !opt_for_fn (node->decl, flag_ipa_sra))
663 if (dump_file)
664 fprintf (dump_file, "Not optimizing or IPA-SRA turned off for this "
665 "function.\n");
666 return false;
669 if (DECL_VIRTUAL_P (node->decl))
671 if (dump_file)
672 fprintf (dump_file, "Function is a virtual method.\n");
673 return false;
676 struct function *fun = DECL_STRUCT_FUNCTION (node->decl);
677 if (fun->stdarg)
679 if (dump_file)
680 fprintf (dump_file, "Function uses stdarg. \n");
681 return false;
684 if (DECL_DISREGARD_INLINE_LIMITS (node->decl))
686 if (dump_file)
687 fprintf (dump_file, "Always inline function will be inlined "
688 "anyway. \n");
689 return false;
692 return true;
695 /* Print access tree starting at ACCESS to F. */
697 static void
698 dump_gensum_access (FILE *f, gensum_param_access *access, unsigned indent)
700 fprintf (f, " ");
701 for (unsigned i = 0; i < indent; i++)
702 fprintf (f, " ");
703 fprintf (f, " * Access to offset: " HOST_WIDE_INT_PRINT_DEC,
704 access->offset);
705 fprintf (f, ", size: " HOST_WIDE_INT_PRINT_DEC, access->size);
706 fprintf (f, ", type: ");
707 print_generic_expr (f, access->type);
708 fprintf (f, ", alias_ptr_type: ");
709 print_generic_expr (f, access->alias_ptr_type);
710 fprintf (f, ", load_count: ");
711 access->load_count.dump (f);
712 fprintf (f, ", nonarg: %u, reverse: %u\n", access->nonarg, access->reverse);
713 for (gensum_param_access *ch = access->first_child;
715 ch = ch->next_sibling)
716 dump_gensum_access (f, ch, indent + 2);
720 /* Print access tree starting at ACCESS to F. */
722 static void
723 dump_isra_access (FILE *f, param_access *access)
725 fprintf (f, " * Access to unit offset: %u", access->unit_offset);
726 fprintf (f, ", unit size: %u", access->unit_size);
727 fprintf (f, ", type: ");
728 print_generic_expr (f, access->type);
729 fprintf (f, ", alias_ptr_type: ");
730 print_generic_expr (f, access->alias_ptr_type);
731 if (access->certain)
732 fprintf (f, ", certain");
733 else
734 fprintf (f, ", not certain");
735 if (access->reverse)
736 fprintf (f, ", reverse");
737 fprintf (f, "\n");
740 /* Dump access tree starting at ACCESS to stderr. */
742 DEBUG_FUNCTION void
743 debug_isra_access (param_access *access)
745 dump_isra_access (stderr, access);
748 /* Dump DESC to F. */
750 static void
751 dump_gensum_param_descriptor (FILE *f, gensum_param_desc *desc)
753 if (desc->locally_unused)
754 fprintf (f, " unused with %i call_uses%s\n", desc->call_uses,
755 desc->remove_only_when_retval_removed ?
756 " remove_only_when_retval_removed" : "");
757 if (!desc->split_candidate)
759 fprintf (f, " not a candidate\n");
760 return;
762 if (desc->by_ref)
763 fprintf (f, " %s%s%s by_ref with %u pass throughs\n",
764 desc->safe_ref ? "safe" : "unsafe",
765 desc->conditionally_dereferenceable
766 ? " conditionally_dereferenceable" : "",
767 desc->split_only_when_retval_removed
768 ? " split_only_when_retval_removed" : "",
769 desc->ptr_pt_count);
771 for (gensum_param_access *acc = desc->accesses; acc; acc = acc->next_sibling)
772 dump_gensum_access (f, acc, 2);
775 /* Dump all parameter descriptors in IFS, assuming it describes FNDECL, to
776 F. */
778 static void
779 dump_gensum_param_descriptors (FILE *f, tree fndecl,
780 vec<gensum_param_desc> *param_descriptions)
782 tree parm = DECL_ARGUMENTS (fndecl);
783 for (unsigned i = 0;
784 i < param_descriptions->length ();
785 ++i, parm = DECL_CHAIN (parm))
787 fprintf (f, " Descriptor for parameter %i ", i);
788 print_generic_expr (f, parm, TDF_UID);
789 fprintf (f, "\n");
790 dump_gensum_param_descriptor (f, &(*param_descriptions)[i]);
795 /* Dump DESC to F. If HINTS is true, also dump IPA-analysis computed
796 hints. */
798 static void
799 dump_isra_param_descriptor (FILE *f, isra_param_desc *desc, bool hints)
801 if (desc->locally_unused)
803 fprintf (f, " (locally) unused\n");
805 if (!desc->split_candidate)
807 fprintf (f, " not a candidate for splitting");
808 if (hints && desc->by_ref && desc->safe_size_set)
809 fprintf (f, ", safe_size: %u", (unsigned) desc->safe_size);
810 fprintf (f, "\n");
811 return;
813 fprintf (f, " param_size_limit: %u, size_reached: %u%s",
814 desc->param_size_limit, desc->size_reached,
815 desc->by_ref ? ", by_ref" : "");
816 if (desc->remove_only_when_retval_removed)
817 fprintf (f, ", remove_only_when_retval_removed");
818 if (desc->split_only_when_retval_removed)
819 fprintf (f, ", split_only_when_retval_removed");
820 if (desc->by_ref && desc->conditionally_dereferenceable)
821 fprintf (f, ", conditionally_dereferenceable");
822 if (hints)
824 if (desc->by_ref && !desc->not_specially_constructed)
825 fprintf (f, ", args_specially_constructed");
826 if (desc->by_ref && desc->safe_size_set)
827 fprintf (f, ", safe_size: %u", (unsigned) desc->safe_size);
829 fprintf (f, "\n");
831 for (unsigned i = 0; i < vec_safe_length (desc->accesses); ++i)
833 param_access *access = (*desc->accesses)[i];
834 dump_isra_access (f, access);
838 /* Dump all parameter descriptors in IFS, assuming it describes FNDECL, to F.
839 If HINTS is true, also dump IPA-analysis computed hints. */
841 static void
842 dump_isra_param_descriptors (FILE *f, tree fndecl, isra_func_summary *ifs,
843 bool hints)
845 tree parm = DECL_ARGUMENTS (fndecl);
846 if (!ifs->m_parameters)
848 fprintf (f, " parameter descriptors not available\n");
849 return;
852 for (unsigned i = 0;
853 i < ifs->m_parameters->length ();
854 ++i, parm = DECL_CHAIN (parm))
856 fprintf (f, " Descriptor for parameter %i ", i);
857 print_generic_expr (f, parm, TDF_UID);
858 fprintf (f, "\n");
859 dump_isra_param_descriptor (f, &(*ifs->m_parameters)[i], hints);
863 /* Add SRC to inputs of PARAM_FLOW, unless it would exceed storage. If the
864 function fails return false, otherwise return true. SRC must fit into an
865 unsigned char. Used for purposes of transitive unused parameter
866 removal. */
868 static bool
869 add_src_to_param_flow (isra_param_flow *param_flow, int src)
871 gcc_checking_assert (src >= 0 && src <= UCHAR_MAX);
872 if (param_flow->length == IPA_SRA_MAX_PARAM_FLOW_LEN)
873 return false;
875 param_flow->inputs[(int) param_flow->length] = src;
876 param_flow->length++;
877 return true;
880 /* Add a SRC to the inputs of PARAM_FLOW unless it is already there and assert
881 it is the only input. Used for purposes of transitive parameter
882 splitting. */
884 static void
885 set_single_param_flow_source (isra_param_flow *param_flow, int src)
887 gcc_checking_assert (src >= 0 && src <= UCHAR_MAX);
888 if (param_flow->length == 0)
890 param_flow->inputs[0] = src;
891 param_flow->length = 1;
893 else if (param_flow->length == 1)
894 gcc_assert (param_flow->inputs[0] == src);
895 else
896 gcc_unreachable ();
899 /* Assert that there is only a single value in PARAM_FLOW's inputs and return
900 it. */
902 static unsigned
903 get_single_param_flow_source (const isra_param_flow *param_flow)
905 gcc_assert (param_flow->length == 1);
906 return param_flow->inputs[0];
909 /* Inspect all uses of NAME and simple arithmetic calculations involving NAME
910 in FUN represented with NODE and return a negative number if any of them is
911 used for something else than either an actual call argument, simple return,
912 simple arithmetic operation or debug statement. If there are no such uses,
913 return the number of actual arguments that this parameter eventually feeds
914 to (or zero if there is none). If there are any simple return uses, set
915 DESC->remove_only_when_retval_removed. For any such parameter, mark
916 PARM_NUM as one of its sources. ANALYZED is a bitmap that tracks which SSA
917 names we have already started investigating. */
919 static int
920 isra_track_scalar_value_uses (function *fun, cgraph_node *node, tree name,
921 int parm_num, bitmap analyzed,
922 gensum_param_desc *desc)
924 int res = 0;
925 imm_use_iterator imm_iter;
926 gimple *stmt;
928 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, name)
930 if (is_gimple_debug (stmt)
931 || gimple_clobber_p (stmt))
932 continue;
934 /* TODO: We could handle at least const builtin functions like arithmetic
935 operations below. */
936 if (is_gimple_call (stmt))
938 int all_uses = 0;
939 use_operand_p use_p;
940 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
941 all_uses++;
943 gcall *call = as_a <gcall *> (stmt);
944 unsigned arg_count;
945 if (gimple_call_internal_p (call)
946 || (arg_count = gimple_call_num_args (call)) == 0)
948 res = -1;
949 break;
952 cgraph_edge *cs = node->get_edge (stmt);
953 gcc_checking_assert (cs);
954 isra_call_summary *csum = call_sums->get_create (cs);
955 csum->init_inputs (arg_count);
957 int simple_uses = 0;
958 for (unsigned i = 0; i < arg_count; i++)
959 if (gimple_call_arg (call, i) == name)
961 if (!add_src_to_param_flow (&csum->m_arg_flow[i], parm_num))
963 simple_uses = -1;
964 break;
966 simple_uses++;
969 if (simple_uses < 0
970 || all_uses != simple_uses)
972 res = -1;
973 break;
975 res += all_uses;
977 else if (!stmt_unremovable_because_of_non_call_eh_p (fun, stmt)
978 && ((is_gimple_assign (stmt) && !gimple_has_volatile_ops (stmt))
979 || gimple_code (stmt) == GIMPLE_PHI))
981 tree lhs;
982 if (gimple_code (stmt) == GIMPLE_PHI)
983 lhs = gimple_phi_result (stmt);
984 else
985 lhs = gimple_assign_lhs (stmt);
987 if (TREE_CODE (lhs) != SSA_NAME)
989 res = -1;
990 break;
992 gcc_assert (!gimple_vdef (stmt));
993 if (bitmap_set_bit (analyzed, SSA_NAME_VERSION (lhs)))
995 int tmp = isra_track_scalar_value_uses (fun, node, lhs, parm_num,
996 analyzed, desc);
997 if (tmp < 0)
999 res = tmp;
1000 break;
1002 res += tmp;
1005 else if (greturn *gr = dyn_cast<greturn *>(stmt))
1007 tree rv = gimple_return_retval (gr);
1008 if (rv != name)
1010 res = -1;
1011 break;
1013 desc->remove_only_when_retval_removed = true;
1015 else
1017 res = -1;
1018 break;
1021 return res;
1024 /* Inspect all uses of PARM, which must be a gimple register, in FUN (which is
1025 also described by NODE) and simple arithmetic calculations involving PARM
1026 and return false if any of them is used for something else than either an
1027 actual call argument, simple return, simple arithmetic operation or debug
1028 statement. If there are no such uses, return true and store the number of
1029 actual arguments that this parameter eventually feeds to (or zero if there
1030 is none) to DESC->call_uses and set DESC->remove_only_when_retval_removed if
1031 there are any uses in return statemens. For any such parameter, mark
1032 PARM_NUM as one of its sources.
1034 This function is similar to ptr_parm_has_nonarg_uses but its results are
1035 meant for unused parameter removal, as opposed to splitting of parameters
1036 passed by reference or converting them to passed by value. */
1038 static bool
1039 isra_track_scalar_param_local_uses (function *fun, cgraph_node *node, tree parm,
1040 int parm_num, gensum_param_desc *desc)
1042 gcc_checking_assert (is_gimple_reg (parm));
1044 tree name = ssa_default_def (fun, parm);
1045 if (!name || has_zero_uses (name))
1047 desc->call_uses = 0;
1048 return false;
1051 /* Edge summaries can only handle callers with fewer than 256 parameters. */
1052 if (parm_num > UCHAR_MAX)
1053 return true;
1055 bitmap analyzed = BITMAP_ALLOC (NULL);
1056 int call_uses = isra_track_scalar_value_uses (fun, node, name, parm_num,
1057 analyzed, desc);
1058 BITMAP_FREE (analyzed);
1059 if (call_uses < 0)
1060 return true;
1061 desc->call_uses = call_uses;
1062 return false;
1065 /* Scan immediate uses of a default definition SSA name of a parameter PARM and
1066 examine whether there are any nonarg uses that are not actual arguments or
1067 otherwise infeasible uses. If so, return true, otherwise return false.
1068 Create pass-through IPA flow records for any direct uses as argument calls
1069 and if returning false, store their number into DESC->ptr_pt_count. If
1070 removal of return value would still allow splitting, return true but set
1071 DESC->split_only_when_retval_removed. NODE and FUN must represent the
1072 function that is currently analyzed, PARM_NUM must be the index of the
1073 analyzed parameter.
1075 This function is similar to isra_track_scalar_param_local_uses but its
1076 results are meant for splitting of parameters passed by reference or turning
1077 them into bits passed by value, as opposed to generic unused parameter
1078 removal. */
1080 static bool
1081 ptr_parm_has_nonarg_uses (cgraph_node *node, function *fun, tree parm,
1082 int parm_num, gensum_param_desc *desc)
1084 imm_use_iterator ui;
1085 gimple *stmt;
1086 tree name = ssa_default_def (fun, parm);
1087 bool ret = false;
1088 unsigned pt_count = 0;
1090 if (!name || has_zero_uses (name))
1091 return false;
1093 /* Edge summaries can only handle callers with fewer than 256 parameters. */
1094 if (parm_num > UCHAR_MAX)
1095 return true;
1097 FOR_EACH_IMM_USE_STMT (stmt, ui, name)
1099 unsigned uses_ok = 0;
1100 use_operand_p use_p;
1102 if (is_gimple_debug (stmt)
1103 || gimple_clobber_p (stmt))
1104 continue;
1106 if (gimple_assign_single_p (stmt))
1108 tree rhs = gimple_assign_rhs1 (stmt);
1109 if (!TREE_THIS_VOLATILE (rhs))
1111 while (handled_component_p (rhs))
1112 rhs = TREE_OPERAND (rhs, 0);
1113 if (TREE_CODE (rhs) == MEM_REF
1114 && TREE_OPERAND (rhs, 0) == name
1115 && integer_zerop (TREE_OPERAND (rhs, 1))
1116 && types_compatible_p (TREE_TYPE (rhs),
1117 TREE_TYPE (TREE_TYPE (name))))
1118 uses_ok++;
1121 else if (is_gimple_call (stmt))
1123 gcall *call = as_a <gcall *> (stmt);
1124 unsigned arg_count;
1125 if (gimple_call_internal_p (call)
1126 || (arg_count = gimple_call_num_args (call)) == 0)
1128 ret = true;
1129 break;
1132 cgraph_edge *cs = node->get_edge (stmt);
1133 gcc_checking_assert (cs);
1134 isra_call_summary *csum = call_sums->get_create (cs);
1135 csum->init_inputs (arg_count);
1137 for (unsigned i = 0; i < arg_count; ++i)
1139 tree arg = gimple_call_arg (stmt, i);
1141 if (arg == name)
1143 /* TODO: Allow &MEM_REF[name + offset] here,
1144 ipa_param_body_adjustments::modify_call_stmt has to be
1145 adjusted too. */
1146 csum->m_arg_flow[i].pointer_pass_through = true;
1147 set_single_param_flow_source (&csum->m_arg_flow[i], parm_num);
1148 pt_count++;
1149 uses_ok++;
1150 continue;
1153 if (!TREE_THIS_VOLATILE (arg))
1155 while (handled_component_p (arg))
1156 arg = TREE_OPERAND (arg, 0);
1157 if (TREE_CODE (arg) == MEM_REF
1158 && TREE_OPERAND (arg, 0) == name
1159 && integer_zerop (TREE_OPERAND (arg, 1))
1160 && types_compatible_p (TREE_TYPE (arg),
1161 TREE_TYPE (TREE_TYPE (name))))
1162 uses_ok++;
1166 else if (greturn *gr = dyn_cast<greturn *>(stmt))
1168 tree rv = gimple_return_retval (gr);
1169 if (rv == name)
1171 uses_ok++;
1172 /* Analysis for feasibility of removal must have already reached
1173 the conclusion that the flag must be set if it completed. */
1174 gcc_assert (!desc->locally_unused
1175 || desc->remove_only_when_retval_removed);
1176 desc->split_only_when_retval_removed = true;
1180 /* If the number of valid uses does not match the number of
1181 uses in this stmt there is an unhandled use. */
1182 unsigned all_uses = 0;
1183 FOR_EACH_IMM_USE_ON_STMT (use_p, ui)
1184 all_uses++;
1186 gcc_checking_assert (uses_ok <= all_uses);
1187 if (uses_ok != all_uses)
1189 ret = true;
1190 break;
1194 desc->ptr_pt_count = pt_count;
1195 return ret;
1198 /* Initialize vector of parameter descriptors of NODE. Return true if there
1199 are any candidates for splitting or unused aggregate parameter removal (the
1200 function may return false if there are candidates for removal of register
1201 parameters). */
1203 static bool
1204 create_parameter_descriptors (cgraph_node *node,
1205 vec<gensum_param_desc> *param_descriptions)
1207 function *fun = DECL_STRUCT_FUNCTION (node->decl);
1208 bool ret = false;
1210 int num = 0;
1211 for (tree parm = DECL_ARGUMENTS (node->decl);
1212 parm;
1213 parm = DECL_CHAIN (parm), num++)
1215 const char *msg;
1216 gensum_param_desc *desc = &(*param_descriptions)[num];
1217 /* param_descriptions vector is grown cleared in the caller. */
1218 desc->param_number = num;
1219 decl2desc->put (parm, desc);
1221 if (dump_file && (dump_flags & TDF_DETAILS))
1222 print_generic_expr (dump_file, parm, TDF_UID);
1224 tree type = TREE_TYPE (parm);
1225 if (TREE_THIS_VOLATILE (parm))
1227 if (dump_file && (dump_flags & TDF_DETAILS))
1228 fprintf (dump_file, " not a candidate, is volatile\n");
1229 continue;
1231 if (!is_gimple_reg_type (type) && is_va_list_type (type))
1233 if (dump_file && (dump_flags & TDF_DETAILS))
1234 fprintf (dump_file, " not a candidate, is a va_list type\n");
1235 continue;
1237 if (TREE_ADDRESSABLE (parm))
1239 if (dump_file && (dump_flags & TDF_DETAILS))
1240 fprintf (dump_file, " not a candidate, is addressable\n");
1241 continue;
1243 if (TREE_ADDRESSABLE (type))
1245 if (dump_file && (dump_flags & TDF_DETAILS))
1246 fprintf (dump_file, " not a candidate, type cannot be split\n");
1247 continue;
1250 if (is_gimple_reg (parm)
1251 && !isra_track_scalar_param_local_uses (fun, node, parm, num, desc))
1253 desc->locally_unused = true;
1255 if (dump_file && (dump_flags & TDF_DETAILS))
1256 fprintf (dump_file, " is a scalar with only %i call uses%s\n",
1257 desc->call_uses,
1258 desc->remove_only_when_retval_removed
1259 ? " and return uses": "");
1262 if (POINTER_TYPE_P (type))
1264 desc->by_ref = true;
1265 if (TREE_CODE (type) == REFERENCE_TYPE
1266 || (num == 0
1267 && TREE_CODE (TREE_TYPE (node->decl)) == METHOD_TYPE))
1268 desc->safe_ref = true;
1269 else
1270 desc->safe_ref = false;
1271 type = TREE_TYPE (type);
1273 if (TREE_CODE (type) == FUNCTION_TYPE
1274 || TREE_CODE (type) == METHOD_TYPE)
1276 if (dump_file && (dump_flags & TDF_DETAILS))
1277 fprintf (dump_file, " not a candidate, reference to "
1278 "a function\n");
1279 continue;
1281 if (TYPE_VOLATILE (type))
1283 if (dump_file && (dump_flags & TDF_DETAILS))
1284 fprintf (dump_file, " not a candidate, reference to "
1285 "a volatile type\n");
1286 continue;
1288 if (TREE_CODE (type) == ARRAY_TYPE
1289 && TYPE_NONALIASED_COMPONENT (type))
1291 if (dump_file && (dump_flags & TDF_DETAILS))
1292 fprintf (dump_file, " not a candidate, reference to "
1293 "a nonaliased component array\n");
1294 continue;
1296 if (!is_gimple_reg (parm))
1298 if (dump_file && (dump_flags & TDF_DETAILS))
1299 fprintf (dump_file, " not a candidate, a reference which is "
1300 "not a gimple register (probably addressable)\n");
1301 continue;
1303 if (is_va_list_type (type))
1305 if (dump_file && (dump_flags & TDF_DETAILS))
1306 fprintf (dump_file, " not a candidate, reference to "
1307 "a va list\n");
1308 continue;
1310 if (ptr_parm_has_nonarg_uses (node, fun, parm, num, desc))
1312 if (dump_file && (dump_flags & TDF_DETAILS))
1313 fprintf (dump_file, " not a candidate, reference has "
1314 "nonarg uses\n");
1315 continue;
1318 else if (!AGGREGATE_TYPE_P (type))
1320 /* This is in an else branch because scalars passed by reference are
1321 still candidates to be passed by value. */
1322 if (dump_file && (dump_flags & TDF_DETAILS))
1323 fprintf (dump_file, " not a candidate, not an aggregate\n");
1324 continue;
1327 if (!COMPLETE_TYPE_P (type))
1329 if (dump_file && (dump_flags & TDF_DETAILS))
1330 fprintf (dump_file, " not a candidate, not a complete type\n");
1331 continue;
1333 if (!tree_fits_uhwi_p (TYPE_SIZE (type)))
1335 if (dump_file && (dump_flags & TDF_DETAILS))
1336 fprintf (dump_file, " not a candidate, size not representable\n");
1337 continue;
1339 unsigned HOST_WIDE_INT type_size
1340 = tree_to_uhwi (TYPE_SIZE (type)) / BITS_PER_UNIT;
1341 if (type_size == 0
1342 || type_size >= ISRA_ARG_SIZE_LIMIT)
1344 if (dump_file && (dump_flags & TDF_DETAILS))
1345 fprintf (dump_file, " not a candidate, has zero or huge size\n");
1346 continue;
1348 if (type_internals_preclude_sra_p (type, &msg))
1350 if (dump_file && (dump_flags & TDF_DETAILS))
1351 fprintf (dump_file, " not a candidate, %s\n", msg);
1352 continue;
1355 if (dump_file && (dump_flags & TDF_DETAILS))
1356 fprintf (dump_file, " is a candidate\n");
1358 ret = true;
1359 desc->split_candidate = true;
1360 if (desc->by_ref && !desc->safe_ref)
1361 desc->deref_index = unsafe_by_ref_count++;
1363 return ret;
1366 /* Return pointer to descriptor of parameter DECL or NULL if it cannot be
1367 found, which happens if DECL is for a static chain. */
1369 static gensum_param_desc *
1370 get_gensum_param_desc (tree decl)
1372 if (!decl2desc)
1373 return NULL;
1374 gcc_checking_assert (TREE_CODE (decl) == PARM_DECL);
1375 gensum_param_desc **slot = decl2desc->get (decl);
1376 if (!slot)
1377 /* This can happen for static chains which we cannot handle so far. */
1378 return NULL;
1379 gcc_checking_assert (*slot);
1380 return *slot;
1384 /* Remove parameter described by DESC from candidates for IPA-SRA splitting and
1385 write REASON to the dump file if there is one. */
1387 static void
1388 disqualify_split_candidate (gensum_param_desc *desc, const char *reason)
1390 if (!desc->split_candidate)
1391 return;
1393 if (dump_file && (dump_flags & TDF_DETAILS))
1394 fprintf (dump_file, "! Disqualifying parameter number %i - %s\n",
1395 desc->param_number, reason);
1397 desc->split_candidate = false;
1400 /* Remove DECL from candidates for IPA-SRA and write REASON to the dump file if
1401 there is one. */
1403 static void
1404 disqualify_split_candidate (tree decl, const char *reason)
1406 gensum_param_desc *desc = get_gensum_param_desc (decl);
1407 if (desc)
1408 disqualify_split_candidate (desc, reason);
1411 /* Allocate a new access to DESC and fill it in with OFFSET and SIZE. But
1412 first, check that there are not too many of them already. If so, do not
1413 allocate anything and return NULL. */
1415 static gensum_param_access *
1416 allocate_access (gensum_param_desc *desc,
1417 HOST_WIDE_INT offset, HOST_WIDE_INT size)
1419 if (desc->access_count
1420 == (unsigned) param_ipa_sra_max_replacements)
1422 disqualify_split_candidate (desc, "Too many replacement candidates");
1423 return NULL;
1426 gensum_param_access *access
1427 = (gensum_param_access *) obstack_alloc (&gensum_obstack,
1428 sizeof (gensum_param_access));
1429 memset (access, 0, sizeof (*access));
1430 access->offset = offset;
1431 access->size = size;
1432 access->load_count = profile_count::zero ();
1433 return access;
1436 /* In what context scan_expr_access has been called, whether it deals with a
1437 load, a function argument, or a store. Please note that in rare
1438 circumstances when it is not clear if the access is a load or store,
1439 ISRA_CTX_STORE is used too. */
1441 enum isra_scan_context {ISRA_CTX_LOAD, ISRA_CTX_ARG, ISRA_CTX_STORE};
1443 /* Return an access describing memory access to the variable described by DESC
1444 at OFFSET with SIZE in context CTX, starting at pointer to the linked list
1445 at a certain tree level FIRST. Attempt to create it and put into the
1446 appropriate place in the access tree if does not exist, but fail and return
1447 NULL if there are already too many accesses, if it would create a partially
1448 overlapping access or if an access would end up within a pre-existing
1449 non-call access. */
1451 static gensum_param_access *
1452 get_access_1 (gensum_param_desc *desc, gensum_param_access **first,
1453 HOST_WIDE_INT offset, HOST_WIDE_INT size, isra_scan_context ctx)
1455 gensum_param_access *access = *first, **ptr = first;
1457 if (!access)
1459 /* No pre-existing access at this level, just create it. */
1460 gensum_param_access *a = allocate_access (desc, offset, size);
1461 if (!a)
1462 return NULL;
1463 *first = a;
1464 return *first;
1467 if (access->offset >= offset + size)
1469 /* We want to squeeze it in front of the very first access, just do
1470 it. */
1471 gensum_param_access *r = allocate_access (desc, offset, size);
1472 if (!r)
1473 return NULL;
1474 r->next_sibling = access;
1475 *first = r;
1476 return r;
1479 /* Skip all accesses that have to come before us until the next sibling is
1480 already too far. */
1481 while (offset >= access->offset + access->size
1482 && access->next_sibling
1483 && access->next_sibling->offset < offset + size)
1485 ptr = &access->next_sibling;
1486 access = access->next_sibling;
1489 /* At this point we know we do not belong before access. */
1490 gcc_assert (access->offset < offset + size);
1492 if (access->offset == offset && access->size == size)
1493 /* We found what we were looking for. */
1494 return access;
1496 if (access->offset <= offset
1497 && access->offset + access->size >= offset + size)
1499 /* We fit into access which is larger than us. We need to find/create
1500 something below access. But we only allow nesting in call
1501 arguments. */
1502 if (access->nonarg)
1503 return NULL;
1505 return get_access_1 (desc, &access->first_child, offset, size, ctx);
1508 if (offset <= access->offset
1509 && offset + size >= access->offset + access->size)
1510 /* We are actually bigger than access, which fully fits into us, take its
1511 place and make all accesses fitting into it its children. */
1513 /* But first, we only allow nesting in call arguments so check if that is
1514 what we are trying to represent. */
1515 if (ctx != ISRA_CTX_ARG)
1516 return NULL;
1518 gensum_param_access *r = allocate_access (desc, offset, size);
1519 if (!r)
1520 return NULL;
1521 r->first_child = access;
1523 while (access->next_sibling
1524 && access->next_sibling->offset < offset + size)
1525 access = access->next_sibling;
1526 if (access->offset + access->size > offset + size)
1528 /* This must be a different access, which are sorted, so the
1529 following must be true and this signals a partial overlap. */
1530 gcc_assert (access->offset > offset);
1531 return NULL;
1534 r->next_sibling = access->next_sibling;
1535 access->next_sibling = NULL;
1536 *ptr = r;
1537 return r;
1540 if (offset >= access->offset + access->size)
1542 /* We belong after access. */
1543 gensum_param_access *r = allocate_access (desc, offset, size);
1544 if (!r)
1545 return NULL;
1546 r->next_sibling = access->next_sibling;
1547 access->next_sibling = r;
1548 return r;
1551 if (offset < access->offset)
1553 /* We know the following, otherwise we would have created a
1554 super-access. */
1555 gcc_checking_assert (offset + size < access->offset + access->size);
1556 return NULL;
1559 if (offset + size > access->offset + access->size)
1561 /* Likewise. */
1562 gcc_checking_assert (offset > access->offset);
1563 return NULL;
1566 gcc_unreachable ();
1569 /* Return an access describing memory access to the variable described by DESC
1570 at OFFSET with SIZE in context CTX, mark it as used in context CTX. Attempt
1571 to create if it does not exist, but fail and return NULL if there are
1572 already too many accesses, if it would create a partially overlapping access
1573 or if an access would end up in a non-call access. */
1575 static gensum_param_access *
1576 get_access (gensum_param_desc *desc, HOST_WIDE_INT offset, HOST_WIDE_INT size,
1577 isra_scan_context ctx)
1579 gcc_checking_assert (desc->split_candidate);
1581 gensum_param_access *access = get_access_1 (desc, &desc->accesses, offset,
1582 size, ctx);
1583 if (!access)
1585 disqualify_split_candidate (desc,
1586 "Bad access overlap or too many accesses");
1587 return NULL;
1590 switch (ctx)
1592 case ISRA_CTX_STORE:
1593 gcc_assert (!desc->by_ref);
1594 /* Fall-through */
1595 case ISRA_CTX_LOAD:
1596 access->nonarg = true;
1597 break;
1598 case ISRA_CTX_ARG:
1599 break;
1602 return access;
1605 /* Verify that parameter access tree starting with ACCESS is in good shape.
1606 PARENT_OFFSET and PARENT_SIZE are the corresponding fields of parent of
1607 ACCESS or zero if there is none. */
1609 static bool
1610 verify_access_tree_1 (gensum_param_access *access, HOST_WIDE_INT parent_offset,
1611 HOST_WIDE_INT parent_size)
1613 while (access)
1615 gcc_assert (access->offset >= 0 && access->size >= 0);
1617 if (parent_size != 0)
1619 if (access->offset < parent_offset)
1621 error ("Access offset before parent offset");
1622 return true;
1624 if (access->size >= parent_size)
1626 error ("Access size greater or equal to its parent size");
1627 return true;
1629 if (access->offset + access->size > parent_offset + parent_size)
1631 error ("Access terminates outside of its parent");
1632 return true;
1636 if (verify_access_tree_1 (access->first_child, access->offset,
1637 access->size))
1638 return true;
1640 if (access->next_sibling
1641 && (access->next_sibling->offset < access->offset + access->size))
1643 error ("Access overlaps with its sibling");
1644 return true;
1647 access = access->next_sibling;
1649 return false;
1652 /* Verify that parameter access tree starting with ACCESS is in good shape,
1653 halt compilation and dump the tree to stderr if not. */
1655 DEBUG_FUNCTION void
1656 isra_verify_access_tree (gensum_param_access *access)
1658 if (verify_access_tree_1 (access, 0, 0))
1660 for (; access; access = access->next_sibling)
1661 dump_gensum_access (stderr, access, 2);
1662 internal_error ("IPA-SRA access verification failed");
1667 /* Callback of walk_stmt_load_store_addr_ops visit_addr used to determine
1668 GIMPLE_ASM operands with memory constrains which cannot be scalarized. */
1670 static bool
1671 asm_visit_addr (gimple *, tree op, tree, void *)
1673 op = get_base_address (op);
1674 if (op
1675 && TREE_CODE (op) == PARM_DECL)
1676 disqualify_split_candidate (op, "Non-scalarizable GIMPLE_ASM operand.");
1678 return false;
1681 /* Mark a dereference of parameter identified by DESC of distance DIST in a
1682 basic block BB, unless the BB has already been marked as a potentially
1683 final. */
1685 static void
1686 mark_param_dereference (gensum_param_desc *desc, HOST_WIDE_INT dist,
1687 basic_block bb)
1689 gcc_assert (desc->by_ref);
1690 gcc_checking_assert (desc->split_candidate);
1692 if (desc->safe_ref
1693 || bitmap_bit_p (final_bbs, bb->index))
1694 return;
1696 int idx = bb->index * unsafe_by_ref_count + desc->deref_index;
1697 if (bb_dereferences[idx] < dist)
1698 bb_dereferences[idx] = dist;
1701 /* Return true, if any potential replacements should use NEW_TYPE as opposed to
1702 previously recorded OLD_TYPE. */
1704 static bool
1705 type_prevails_p (tree old_type, tree new_type)
1707 if (old_type == new_type)
1708 return false;
1710 /* Non-aggregates are always better. */
1711 if (!is_gimple_reg_type (old_type)
1712 && is_gimple_reg_type (new_type))
1713 return true;
1714 if (is_gimple_reg_type (old_type)
1715 && !is_gimple_reg_type (new_type))
1716 return false;
1718 /* Prefer any complex or vector type over any other scalar type. */
1719 if (TREE_CODE (old_type) != COMPLEX_TYPE
1720 && TREE_CODE (old_type) != VECTOR_TYPE
1721 && (TREE_CODE (new_type) == COMPLEX_TYPE
1722 || VECTOR_TYPE_P (new_type)))
1723 return true;
1724 if ((TREE_CODE (old_type) == COMPLEX_TYPE
1725 || VECTOR_TYPE_P (old_type))
1726 && TREE_CODE (new_type) != COMPLEX_TYPE
1727 && TREE_CODE (new_type) != VECTOR_TYPE)
1728 return false;
1730 /* Use the integral type with the bigger precision. */
1731 if (INTEGRAL_TYPE_P (old_type)
1732 && INTEGRAL_TYPE_P (new_type))
1733 return (TYPE_PRECISION (new_type) > TYPE_PRECISION (old_type));
1735 /* Attempt to disregard any integral type with non-full precision. */
1736 if (INTEGRAL_TYPE_P (old_type)
1737 && (TREE_INT_CST_LOW (TYPE_SIZE (old_type))
1738 != TYPE_PRECISION (old_type)))
1739 return true;
1740 if (INTEGRAL_TYPE_P (new_type)
1741 && (TREE_INT_CST_LOW (TYPE_SIZE (new_type))
1742 != TYPE_PRECISION (new_type)))
1743 return false;
1744 /* Stabilize the selection. */
1745 return TYPE_UID (old_type) < TYPE_UID (new_type);
1748 /* When scanning an expression which is a call argument, this structure
1749 specifies the call and the position of the argument. */
1751 struct scan_call_info
1753 /* Call graph edge representing the call. */
1754 cgraph_edge *cs;
1755 /* Total number of arguments in the call. */
1756 unsigned argument_count;
1757 /* Number of the actual argument being scanned. */
1758 unsigned arg_idx;
1761 /* Record use of ACCESS which belongs to a parameter described by DESC in a
1762 call argument described by CALL_INFO. */
1764 static void
1765 record_nonregister_call_use (gensum_param_desc *desc,
1766 scan_call_info *call_info,
1767 unsigned unit_offset, unsigned unit_size)
1769 isra_call_summary *csum = call_sums->get_create (call_info->cs);
1770 csum->init_inputs (call_info->argument_count);
1772 isra_param_flow *param_flow = &csum->m_arg_flow[call_info->arg_idx];
1773 param_flow->aggregate_pass_through = true;
1774 set_single_param_flow_source (param_flow, desc->param_number);
1775 param_flow->unit_offset = unit_offset;
1776 param_flow->unit_size = unit_size;
1777 desc->call_uses++;
1780 /* Callback of walk_aliased_vdefs, just mark that there was a possible
1781 modification. */
1783 static bool
1784 mark_maybe_modified (ao_ref *, tree, void *data)
1786 bool *maybe_modified = (bool *) data;
1787 *maybe_modified = true;
1788 return true;
1791 /* Analyze expression EXPR from GIMPLE for accesses to parameters. CTX
1792 specifies whether EXPR is used in a load, store or as an argument call. BB
1793 must be the basic block in which expr resides. If CTX specifies call
1794 argument context, CALL_INFO must describe that call and argument position,
1795 otherwise it is ignored. */
1797 static void
1798 scan_expr_access (tree expr, gimple *stmt, isra_scan_context ctx,
1799 basic_block bb, scan_call_info *call_info = NULL)
1801 poly_int64 poffset, psize, pmax_size;
1802 HOST_WIDE_INT offset, size, max_size;
1803 tree base;
1804 bool deref = false;
1805 bool reverse;
1807 if (TREE_CODE (expr) == ADDR_EXPR)
1809 if (ctx == ISRA_CTX_ARG)
1810 return;
1811 tree t = get_base_address (TREE_OPERAND (expr, 0));
1812 if (VAR_P (t) && !TREE_STATIC (t))
1813 loaded_decls->add (t);
1814 return;
1816 if (TREE_CODE (expr) == SSA_NAME
1817 || CONSTANT_CLASS_P (expr))
1818 return;
1820 if (TREE_CODE (expr) == BIT_FIELD_REF
1821 || TREE_CODE (expr) == IMAGPART_EXPR
1822 || TREE_CODE (expr) == REALPART_EXPR)
1823 expr = TREE_OPERAND (expr, 0);
1825 base = get_ref_base_and_extent (expr, &poffset, &psize, &pmax_size, &reverse);
1827 if (TREE_CODE (base) == MEM_REF)
1829 tree op = TREE_OPERAND (base, 0);
1830 if (TREE_CODE (op) != SSA_NAME
1831 || !SSA_NAME_IS_DEFAULT_DEF (op))
1832 return;
1833 base = SSA_NAME_VAR (op);
1834 if (!base)
1835 return;
1836 deref = true;
1838 else if (VAR_P (base)
1839 && !TREE_STATIC (base)
1840 && (ctx == ISRA_CTX_ARG
1841 || ctx == ISRA_CTX_LOAD))
1842 loaded_decls->add (base);
1844 if (TREE_CODE (base) != PARM_DECL)
1845 return;
1847 gensum_param_desc *desc = get_gensum_param_desc (base);
1848 if (!desc || !desc->split_candidate)
1849 return;
1851 if (!poffset.is_constant (&offset)
1852 || !psize.is_constant (&size)
1853 || !pmax_size.is_constant (&max_size))
1855 disqualify_split_candidate (desc, "Encountered a polynomial-sized "
1856 "access.");
1857 return;
1859 if (size < 0 || size != max_size)
1861 disqualify_split_candidate (desc, "Encountered a variable sized access.");
1862 return;
1864 if (TREE_CODE (expr) == COMPONENT_REF
1865 && DECL_BIT_FIELD (TREE_OPERAND (expr, 1)))
1867 disqualify_split_candidate (desc, "Encountered a bit-field access.");
1868 return;
1870 if (offset < 0)
1872 disqualify_split_candidate (desc, "Encountered an access at a "
1873 "negative offset.");
1874 return;
1876 gcc_assert ((offset % BITS_PER_UNIT) == 0);
1877 gcc_assert ((size % BITS_PER_UNIT) == 0);
1878 if ((offset / BITS_PER_UNIT) >= (UINT_MAX - ISRA_ARG_SIZE_LIMIT)
1879 || (size / BITS_PER_UNIT) >= ISRA_ARG_SIZE_LIMIT)
1881 disqualify_split_candidate (desc, "Encountered an access with too big "
1882 "offset or size");
1883 return;
1886 tree type = TREE_TYPE (expr);
1887 unsigned int exp_align = get_object_alignment (expr);
1889 if (exp_align < TYPE_ALIGN (type))
1891 disqualify_split_candidate (desc, "Underaligned access.");
1892 return;
1895 if (deref)
1897 if (!desc->by_ref)
1899 disqualify_split_candidate (desc, "Dereferencing a non-reference.");
1900 return;
1902 else if (ctx == ISRA_CTX_STORE)
1904 disqualify_split_candidate (desc, "Storing to data passed by "
1905 "reference.");
1906 return;
1909 if (!aa_walking_limit)
1911 disqualify_split_candidate (desc, "Out of alias analysis step "
1912 "limit.");
1913 return;
1916 gcc_checking_assert (gimple_vuse (stmt));
1917 bool maybe_modified = false;
1918 ao_ref ar;
1920 ao_ref_init (&ar, expr);
1921 bitmap visited = BITMAP_ALLOC (NULL);
1922 int walked = walk_aliased_vdefs (&ar, gimple_vuse (stmt),
1923 mark_maybe_modified, &maybe_modified,
1924 &visited, NULL, aa_walking_limit);
1925 BITMAP_FREE (visited);
1926 if (walked > 0)
1928 gcc_assert (aa_walking_limit > walked);
1929 aa_walking_limit = aa_walking_limit - walked;
1931 if (walked < 0)
1932 aa_walking_limit = 0;
1933 if (maybe_modified || walked < 0)
1935 disqualify_split_candidate (desc, "Data passed by reference possibly "
1936 "modified through an alias.");
1937 return;
1939 else
1940 mark_param_dereference (desc, offset + size, bb);
1942 else
1943 /* Pointer parameters with direct uses should have been ruled out by
1944 analyzing SSA default def when looking at the parameters. */
1945 gcc_assert (!desc->by_ref);
1947 gensum_param_access *access = get_access (desc, offset, size, ctx);
1948 if (!access)
1949 return;
1951 if (ctx == ISRA_CTX_ARG)
1953 gcc_checking_assert (call_info);
1955 if (!deref)
1956 record_nonregister_call_use (desc, call_info, offset / BITS_PER_UNIT,
1957 size / BITS_PER_UNIT);
1958 else
1959 /* This is not a pass-through of a pointer, this is a use like any
1960 other. */
1961 access->nonarg = true;
1963 else if (ctx == ISRA_CTX_LOAD && bb->count.initialized_p ())
1964 access->load_count += bb->count;
1966 if (!access->type)
1968 access->type = type;
1969 access->alias_ptr_type = reference_alias_ptr_type (expr);
1970 access->reverse = reverse;
1972 else
1974 if (exp_align < TYPE_ALIGN (access->type))
1976 disqualify_split_candidate (desc, "Reference has lower alignment "
1977 "than a previous one.");
1978 return;
1980 if (access->alias_ptr_type != reference_alias_ptr_type (expr))
1982 disqualify_split_candidate (desc, "Multiple alias pointer types.");
1983 return;
1985 if (access->reverse != reverse)
1987 disqualify_split_candidate (desc, "Both normal and reverse "
1988 "scalar storage order.");
1989 return;
1991 if (!deref
1992 && (AGGREGATE_TYPE_P (type) || AGGREGATE_TYPE_P (access->type))
1993 && (TYPE_MAIN_VARIANT (access->type) != TYPE_MAIN_VARIANT (type)))
1995 /* We need the same aggregate type on all accesses to be able to
1996 distinguish transformation spots from pass-through arguments in
1997 the transformation phase. */
1998 disqualify_split_candidate (desc, "We do not support aggregate "
1999 "type punning.");
2000 return;
2003 if (type_prevails_p (access->type, type))
2004 access->type = type;
2008 /* Scan body function described by NODE and FUN and create access trees for
2009 parameters. */
2011 static void
2012 scan_function (cgraph_node *node, struct function *fun)
2014 basic_block bb;
2016 FOR_EACH_BB_FN (bb, fun)
2018 gimple_stmt_iterator gsi;
2019 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2021 gimple *stmt = gsi_stmt (gsi);
2023 if (final_bbs && stmt_can_throw_external (fun, stmt))
2024 bitmap_set_bit (final_bbs, bb->index);
2025 switch (gimple_code (stmt))
2027 case GIMPLE_RETURN:
2029 tree t = gimple_return_retval (as_a <greturn *> (stmt));
2030 if (t != NULL_TREE)
2031 scan_expr_access (t, stmt, ISRA_CTX_LOAD, bb);
2032 if (final_bbs)
2033 bitmap_set_bit (final_bbs, bb->index);
2035 break;
2037 case GIMPLE_ASSIGN:
2038 if (gimple_assign_single_p (stmt)
2039 && !gimple_clobber_p (stmt))
2041 tree rhs = gimple_assign_rhs1 (stmt);
2042 scan_expr_access (rhs, stmt, ISRA_CTX_LOAD, bb);
2043 tree lhs = gimple_assign_lhs (stmt);
2044 scan_expr_access (lhs, stmt, ISRA_CTX_STORE, bb);
2046 break;
2048 case GIMPLE_CALL:
2050 unsigned argument_count = gimple_call_num_args (stmt);
2051 isra_scan_context ctx = ISRA_CTX_ARG;
2052 scan_call_info call_info, *call_info_p = &call_info;
2053 if (gimple_call_internal_p (stmt))
2055 call_info_p = NULL;
2056 ctx = ISRA_CTX_LOAD;
2057 internal_fn ifn = gimple_call_internal_fn (stmt);
2058 if (internal_store_fn_p (ifn))
2059 ctx = ISRA_CTX_STORE;
2061 else
2063 call_info.cs = node->get_edge (stmt);
2064 call_info.argument_count = argument_count;
2067 for (unsigned i = 0; i < argument_count; i++)
2069 call_info.arg_idx = i;
2070 scan_expr_access (gimple_call_arg (stmt, i), stmt,
2071 ctx, bb, call_info_p);
2074 tree lhs = gimple_call_lhs (stmt);
2075 if (lhs)
2076 scan_expr_access (lhs, stmt, ISRA_CTX_STORE, bb);
2077 int flags = gimple_call_flags (stmt);
2078 if (final_bbs
2079 && (((flags & (ECF_CONST | ECF_PURE)) == 0)
2080 || (flags & ECF_LOOPING_CONST_OR_PURE)))
2081 bitmap_set_bit (final_bbs, bb->index);
2083 break;
2085 case GIMPLE_ASM:
2087 gasm *asm_stmt = as_a <gasm *> (stmt);
2088 walk_stmt_load_store_addr_ops (asm_stmt, NULL, NULL, NULL,
2089 asm_visit_addr);
2090 if (final_bbs)
2091 bitmap_set_bit (final_bbs, bb->index);
2093 for (unsigned i = 0; i < gimple_asm_ninputs (asm_stmt); i++)
2095 tree t = TREE_VALUE (gimple_asm_input_op (asm_stmt, i));
2096 scan_expr_access (t, stmt, ISRA_CTX_LOAD, bb);
2098 for (unsigned i = 0; i < gimple_asm_noutputs (asm_stmt); i++)
2100 tree t = TREE_VALUE (gimple_asm_output_op (asm_stmt, i));
2101 scan_expr_access (t, stmt, ISRA_CTX_STORE, bb);
2104 break;
2106 default:
2107 break;
2113 /* Return true if SSA_NAME NAME of function described by FUN is only used in
2114 return statements, or if results of any operations it is involved in are
2115 only used in return statements. ANALYZED is a bitmap that tracks which SSA
2116 names we have already started investigating. */
2118 static bool
2119 ssa_name_only_returned_p (function *fun, tree name, bitmap analyzed)
2121 bool res = true;
2122 imm_use_iterator imm_iter;
2123 gimple *stmt;
2125 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, name)
2127 if (is_gimple_debug (stmt))
2128 continue;
2130 if (gimple_code (stmt) == GIMPLE_RETURN)
2132 tree t = gimple_return_retval (as_a <greturn *> (stmt));
2133 if (t != name)
2135 res = false;
2136 break;
2139 else if (!stmt_unremovable_because_of_non_call_eh_p (fun, stmt)
2140 && ((is_gimple_assign (stmt) && !gimple_has_volatile_ops (stmt))
2141 || gimple_code (stmt) == GIMPLE_PHI))
2143 /* TODO: And perhaps for const function calls too? */
2144 tree lhs;
2145 if (gimple_code (stmt) == GIMPLE_PHI)
2146 lhs = gimple_phi_result (stmt);
2147 else
2148 lhs = gimple_assign_lhs (stmt);
2150 if (TREE_CODE (lhs) != SSA_NAME)
2152 res = false;
2153 break;
2155 gcc_assert (!gimple_vdef (stmt));
2156 if (bitmap_set_bit (analyzed, SSA_NAME_VERSION (lhs))
2157 && !ssa_name_only_returned_p (fun, lhs, analyzed))
2159 res = false;
2160 break;
2163 else
2165 res = false;
2166 break;
2169 return res;
2172 /* Inspect the uses of the return value of the call associated with CS, and if
2173 it is not used or if it is only used to construct the return value of the
2174 caller, mark it as such in call or caller summary. Also check for
2175 misaligned arguments. */
2177 static void
2178 isra_analyze_call (cgraph_edge *cs)
2180 gcall *call_stmt = cs->call_stmt;
2181 unsigned count = gimple_call_num_args (call_stmt);
2182 isra_call_summary *csum = call_sums->get_create (cs);
2184 for (unsigned i = 0; i < count; i++)
2186 tree arg = gimple_call_arg (call_stmt, i);
2187 if (TREE_CODE (arg) == ADDR_EXPR)
2189 poly_int64 poffset, psize, pmax_size;
2190 bool reverse;
2191 tree base = get_ref_base_and_extent (TREE_OPERAND (arg, 0), &poffset,
2192 &psize, &pmax_size, &reverse);
2193 HOST_WIDE_INT offset;
2194 unsigned HOST_WIDE_INT ds;
2195 if (DECL_P (base)
2196 && (poffset.is_constant (&offset))
2197 && tree_fits_uhwi_p (DECL_SIZE (base))
2198 && ((ds = tree_to_uhwi (DECL_SIZE (base)) - offset)
2199 < ISRA_ARG_SIZE_LIMIT * BITS_PER_UNIT))
2201 csum->init_inputs (count);
2202 gcc_assert (!csum->m_arg_flow[i].aggregate_pass_through);
2203 csum->m_arg_flow[i].unit_size = ds / BITS_PER_UNIT;
2206 if (TREE_CODE (base) == VAR_DECL
2207 && !TREE_STATIC (base)
2208 && !loaded_decls->contains (base))
2210 csum->init_inputs (count);
2211 csum->m_arg_flow[i].constructed_for_calls = true;
2215 if (is_gimple_reg (arg))
2216 continue;
2218 tree offset;
2219 poly_int64 bitsize, bitpos;
2220 machine_mode mode;
2221 int unsignedp, reversep, volatilep = 0;
2222 get_inner_reference (arg, &bitsize, &bitpos, &offset, &mode,
2223 &unsignedp, &reversep, &volatilep);
2224 if (!multiple_p (bitpos, BITS_PER_UNIT))
2226 csum->m_bit_aligned_arg = true;
2227 break;
2231 tree lhs = gimple_call_lhs (call_stmt);
2232 if (lhs)
2234 /* TODO: Also detect aggregates on a LHS of a call that are only returned
2235 from this function (without being read anywhere). */
2236 if (TREE_CODE (lhs) == SSA_NAME)
2238 bitmap analyzed = BITMAP_ALLOC (NULL);
2239 if (ssa_name_only_returned_p (DECL_STRUCT_FUNCTION (cs->caller->decl),
2240 lhs, analyzed))
2241 csum->m_return_returned = true;
2242 BITMAP_FREE (analyzed);
2245 else
2246 csum->m_return_ignored = true;
2249 /* Look at all calls going out of NODE, described also by IFS and perform all
2250 analyses necessary for IPA-SRA that are not done at body scan time or done
2251 even when body is not scanned because the function is not a candidate. */
2253 static void
2254 isra_analyze_all_outgoing_calls (cgraph_node *node)
2256 for (cgraph_edge *cs = node->callees; cs; cs = cs->next_callee)
2257 isra_analyze_call (cs);
2258 for (cgraph_edge *cs = node->indirect_calls; cs; cs = cs->next_callee)
2259 isra_analyze_call (cs);
2262 /* Dump a dereferences table with heading STR to file F. */
2264 static void
2265 dump_dereferences_table (FILE *f, struct function *fun, const char *str)
2267 basic_block bb;
2269 fprintf (dump_file, "%s", str);
2270 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (fun),
2271 EXIT_BLOCK_PTR_FOR_FN (fun), next_bb)
2273 fprintf (f, "%4i %i ", bb->index, bitmap_bit_p (final_bbs, bb->index));
2274 if (bb != EXIT_BLOCK_PTR_FOR_FN (fun))
2276 int i;
2277 for (i = 0; i < unsafe_by_ref_count; i++)
2279 int idx = bb->index * unsafe_by_ref_count + i;
2280 fprintf (f, " %4" HOST_WIDE_INT_PRINT "d", bb_dereferences[idx]);
2283 fprintf (f, "\n");
2285 fprintf (dump_file, "\n");
2288 /* Propagate distances in bb_dereferences in the opposite direction than the
2289 control flow edges, in each step storing the maximum of the current value
2290 and the minimum of all successors. These steps are repeated until the table
2291 stabilizes. Note that BBs which might terminate the functions (according to
2292 final_bbs bitmap) never updated in this way. */
2294 static void
2295 propagate_dereference_distances (struct function *fun)
2297 basic_block bb;
2299 if (dump_file && (dump_flags & TDF_DETAILS))
2300 dump_dereferences_table (dump_file, fun,
2301 "Dereference table before propagation:\n");
2303 auto_vec<basic_block> queue (last_basic_block_for_fn (fun));
2304 queue.quick_push (ENTRY_BLOCK_PTR_FOR_FN (fun));
2305 FOR_EACH_BB_FN (bb, fun)
2307 queue.quick_push (bb);
2308 bb->aux = bb;
2311 while (!queue.is_empty ())
2313 edge_iterator ei;
2314 edge e;
2315 bool change = false;
2316 int i;
2318 bb = queue.pop ();
2319 bb->aux = NULL;
2321 if (bitmap_bit_p (final_bbs, bb->index))
2322 continue;
2324 for (i = 0; i < unsafe_by_ref_count; i++)
2326 int idx = bb->index * unsafe_by_ref_count + i;
2327 bool first = true;
2328 HOST_WIDE_INT inh = 0;
2330 FOR_EACH_EDGE (e, ei, bb->succs)
2332 int succ_idx = e->dest->index * unsafe_by_ref_count + i;
2334 if (e->dest == EXIT_BLOCK_PTR_FOR_FN (fun))
2335 continue;
2337 if (first)
2339 first = false;
2340 inh = bb_dereferences [succ_idx];
2342 else if (bb_dereferences [succ_idx] < inh)
2343 inh = bb_dereferences [succ_idx];
2346 if (!first && bb_dereferences[idx] < inh)
2348 bb_dereferences[idx] = inh;
2349 change = true;
2353 if (change)
2354 FOR_EACH_EDGE (e, ei, bb->preds)
2356 if (e->src->aux)
2357 continue;
2359 e->src->aux = e->src;
2360 queue.quick_push (e->src);
2364 if (dump_file && (dump_flags & TDF_DETAILS))
2365 dump_dereferences_table (dump_file, fun,
2366 "Dereference table after propagation:\n");
2369 /* Return true if the ACCESS loads happen frequently enough in FUN to risk
2370 moving them to the caller and only pass the result. */
2372 static bool
2373 dereference_probable_p (struct function *fun, gensum_param_access *access)
2375 int threshold = opt_for_fn (fun->decl, param_ipa_sra_deref_prob_threshold);
2376 return access->load_count
2377 >= ENTRY_BLOCK_PTR_FOR_FN (fun)->count.apply_scale (threshold, 100);
2380 /* Perform basic checks on ACCESS to PARM (of FUN) described by DESC and all
2381 its children, return true if the parameter cannot be split, otherwise return
2382 false and update *NONARG_ACC_SIZE and *ONLY_CALLS. ENTRY_BB_INDEX must be
2383 the index of the entry BB in the function of PARM. */
2385 static bool
2386 check_gensum_access (struct function *fun, tree parm, gensum_param_desc *desc,
2387 gensum_param_access *access,
2388 HOST_WIDE_INT *nonarg_acc_size, bool *only_calls,
2389 int entry_bb_index)
2391 if (access->nonarg)
2393 *only_calls = false;
2394 *nonarg_acc_size += access->size;
2396 if (access->first_child)
2398 disqualify_split_candidate (desc, "Overlapping non-call uses.");
2399 return true;
2402 /* Do not decompose a non-BLKmode param in a way that would create
2403 BLKmode params. Especially for by-reference passing (thus,
2404 pointer-type param) this is hardly worthwhile. */
2405 if (DECL_MODE (parm) != BLKmode
2406 && TYPE_MODE (access->type) == BLKmode)
2408 disqualify_split_candidate (desc, "Would convert a non-BLK to a BLK.");
2409 return true;
2412 if (desc->by_ref)
2414 if (desc->safe_ref)
2416 if (!dereference_probable_p (fun, access))
2418 disqualify_split_candidate (desc, "Dereferences in callers "
2419 "would happen much more frequently.");
2420 return true;
2423 else
2425 int idx = (entry_bb_index * unsafe_by_ref_count + desc->deref_index);
2426 if ((access->offset + access->size) > bb_dereferences[idx])
2428 if (!dereference_probable_p (fun, access))
2430 disqualify_split_candidate (desc, "Would create a possibly "
2431 "illegal dereference in a "
2432 "caller.");
2433 return true;
2435 desc->conditionally_dereferenceable = true;
2440 for (gensum_param_access *ch = access->first_child;
2442 ch = ch->next_sibling)
2443 if (check_gensum_access (fun, parm, desc, ch, nonarg_acc_size, only_calls,
2444 entry_bb_index))
2445 return true;
2447 return false;
2450 /* Copy data from FROM and all of its children to a vector of accesses in IPA
2451 descriptor DESC. */
2453 static void
2454 copy_accesses_to_ipa_desc (gensum_param_access *from, isra_param_desc *desc)
2456 param_access *to = ggc_cleared_alloc<param_access> ();
2457 gcc_checking_assert ((from->offset % BITS_PER_UNIT) == 0);
2458 gcc_checking_assert ((from->size % BITS_PER_UNIT) == 0);
2459 to->unit_offset = from->offset / BITS_PER_UNIT;
2460 to->unit_size = from->size / BITS_PER_UNIT;
2461 to->type = from->type;
2462 to->alias_ptr_type = from->alias_ptr_type;
2463 to->certain = from->nonarg;
2464 to->reverse = from->reverse;
2465 vec_safe_push (desc->accesses, to);
2467 for (gensum_param_access *ch = from->first_child;
2469 ch = ch->next_sibling)
2470 copy_accesses_to_ipa_desc (ch, desc);
2473 /* Analyze function body scan results stored in param_accesses and
2474 param_accesses, detect possible transformations and store information of
2475 those in function summary. NODE, FUN and IFS are all various structures
2476 describing the currently analyzed function. */
2478 static void
2479 process_scan_results (cgraph_node *node, struct function *fun,
2480 isra_func_summary *ifs,
2481 vec<gensum_param_desc> *param_descriptions)
2483 bool check_pass_throughs = false;
2484 bool dereferences_propagated = false;
2485 tree parm = DECL_ARGUMENTS (node->decl);
2486 unsigned param_count = param_descriptions->length();
2488 for (unsigned desc_index = 0;
2489 desc_index < param_count;
2490 desc_index++, parm = DECL_CHAIN (parm))
2492 gensum_param_desc *desc = &(*param_descriptions)[desc_index];
2493 if (!desc->split_candidate)
2494 continue;
2496 if (flag_checking)
2497 isra_verify_access_tree (desc->accesses);
2499 if (!dereferences_propagated
2500 && desc->by_ref
2501 && !desc->safe_ref
2502 && desc->accesses)
2504 propagate_dereference_distances (fun);
2505 dereferences_propagated = true;
2508 HOST_WIDE_INT nonarg_acc_size = 0;
2509 bool only_calls = true;
2510 bool check_failed = false;
2512 int entry_bb_index = ENTRY_BLOCK_PTR_FOR_FN (fun)->index;
2513 for (gensum_param_access *acc = desc->accesses;
2514 acc;
2515 acc = acc->next_sibling)
2516 if (check_gensum_access (fun, parm, desc, acc, &nonarg_acc_size,
2517 &only_calls, entry_bb_index))
2519 check_failed = true;
2520 break;
2522 if (check_failed)
2523 continue;
2525 if (only_calls)
2526 desc->locally_unused = true;
2528 HOST_WIDE_INT cur_param_size
2529 = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (parm)));
2530 HOST_WIDE_INT param_size_limit, optimistic_limit;
2531 if (!desc->by_ref || optimize_function_for_size_p (fun))
2533 param_size_limit = cur_param_size;
2534 optimistic_limit = cur_param_size;
2536 else
2538 param_size_limit
2539 = opt_for_fn (node->decl,
2540 param_ipa_sra_ptr_growth_factor) * cur_param_size;
2541 optimistic_limit
2542 = (opt_for_fn (node->decl, param_ipa_sra_ptrwrap_growth_factor)
2543 * param_size_limit);
2546 if (nonarg_acc_size > optimistic_limit
2547 || (!desc->by_ref && nonarg_acc_size == param_size_limit))
2549 disqualify_split_candidate (desc, "Would result into a too big set "
2550 "of replacements even in best "
2551 "scenarios.");
2553 else
2555 /* create_parameter_descriptors makes sure unit sizes of all
2556 candidate parameters fit unsigned integers restricted to
2557 ISRA_ARG_SIZE_LIMIT. */
2558 desc->param_size_limit = param_size_limit / BITS_PER_UNIT;
2559 desc->nonarg_acc_size = nonarg_acc_size / BITS_PER_UNIT;
2560 if (desc->split_candidate && desc->ptr_pt_count)
2562 gcc_assert (desc->by_ref);
2563 check_pass_throughs = true;
2568 /* When a pointer parameter is passed-through to a callee, in which it is
2569 only used to read only one or a few items, we can attempt to transform it
2570 to obtaining and passing through the items instead of the pointer. But we
2571 must take extra care that 1) we do not introduce any segfault by moving
2572 dereferences above control flow and that 2) the data is not modified
2573 through an alias in this function. The IPA analysis must not introduce
2574 any accesses candidates unless it can prove both.
2576 The current solution is very crude as it consists of ensuring that the
2577 call postdominates entry BB and that the definition of VUSE of the call is
2578 default definition. TODO: For non-recursive callees in the same
2579 compilation unit we could do better by doing analysis in topological order
2580 an looking into access candidates of callees, using their alias_ptr_types
2581 to attempt real AA. We could also use the maximum known dereferenced
2582 offset in this function at IPA level.
2584 TODO: Measure the overhead and the effect of just being pessimistic.
2585 Maybe this is only -O3 material? */
2587 hash_map<gimple *, bool> analyzed_stmts;
2588 bitmap always_executed_bbs = NULL;
2589 if (check_pass_throughs)
2590 for (cgraph_edge *cs = node->callees; cs; cs = cs->next_callee)
2592 gcall *call_stmt = cs->call_stmt;
2593 tree vuse = gimple_vuse (call_stmt);
2595 /* If the callee is a const function, we don't get a VUSE. In such
2596 case there will be no memory accesses in the called function (or the
2597 const attribute is wrong) and then we just don't care. */
2598 bool uses_memory_as_obtained = vuse && SSA_NAME_IS_DEFAULT_DEF (vuse);
2600 unsigned count = gimple_call_num_args (call_stmt);
2601 isra_call_summary *csum = call_sums->get_create (cs);
2602 csum->init_inputs (count);
2603 csum->m_before_any_store = uses_memory_as_obtained;
2604 for (unsigned argidx = 0; argidx < count; argidx++)
2606 if (!csum->m_arg_flow[argidx].pointer_pass_through)
2607 continue;
2608 unsigned pidx
2609 = get_single_param_flow_source (&csum->m_arg_flow[argidx]);
2610 gensum_param_desc *desc = &(*param_descriptions)[pidx];
2611 if (!desc->split_candidate)
2613 csum->m_arg_flow[argidx].pointer_pass_through = false;
2614 continue;
2616 if (!uses_memory_as_obtained)
2617 continue;
2619 if (desc->safe_ref)
2621 csum->m_arg_flow[argidx].safe_to_import_accesses = true;
2622 continue;
2625 /* Walk basic block and see if its execution can terminate earlier.
2626 Keep the info for later re-use to avoid quadratic behavoiur here. */
2627 gimple_stmt_iterator gsi = gsi_for_stmt (call_stmt);
2628 bool safe = true;
2629 int n = 0;
2630 for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
2632 bool *b = analyzed_stmts.get (gsi_stmt (gsi));
2633 if (b)
2635 safe = *b;
2636 gsi_next (&gsi);
2637 break;
2639 n++;
2640 if (stmt_may_terminate_function_p (fun, gsi_stmt (gsi), false))
2642 safe = false;
2643 break;
2646 if (n)
2648 if (gsi_end_p (gsi))
2649 gsi = gsi_start_bb (gimple_bb (call_stmt));
2650 for (; gsi_stmt (gsi) != call_stmt; gsi_next (&gsi))
2651 analyzed_stmts.get_or_insert (gsi_stmt (gsi)) = safe;
2654 if (safe && !always_executed_bbs)
2656 mark_dfs_back_edges ();
2657 always_executed_bbs = find_always_executed_bbs (fun, false);
2659 if (safe && bitmap_bit_p (always_executed_bbs, gimple_bb (call_stmt)->index))
2660 csum->m_arg_flow[argidx].safe_to_import_accesses = true;
2664 BITMAP_FREE (always_executed_bbs);
2666 /* TODO: Add early exit if we disqualified everything. This also requires
2667 that we either relax the restriction that
2668 ipa_param_adjustments.m_always_copy_start must be the number of PARM_DECLs
2669 or store the number of parameters to IPA-SRA function summary and use that
2670 when just removing params. */
2672 vec_safe_reserve_exact (ifs->m_parameters, param_count);
2673 ifs->m_parameters->quick_grow_cleared (param_count);
2674 for (unsigned desc_index = 0; desc_index < param_count; desc_index++)
2676 gensum_param_desc *s = &(*param_descriptions)[desc_index];
2677 isra_param_desc *d = &(*ifs->m_parameters)[desc_index];
2679 d->param_size_limit = s->param_size_limit;
2680 d->size_reached = s->nonarg_acc_size;
2681 d->locally_unused = s->locally_unused;
2682 d->split_candidate = s->split_candidate;
2683 d->by_ref = s->by_ref;
2684 d->remove_only_when_retval_removed = s->remove_only_when_retval_removed;
2685 d->split_only_when_retval_removed = s->split_only_when_retval_removed;
2686 d->conditionally_dereferenceable = s->conditionally_dereferenceable;
2688 for (gensum_param_access *acc = s->accesses;
2689 acc;
2690 acc = acc->next_sibling)
2691 copy_accesses_to_ipa_desc (acc, d);
2694 if (dump_file)
2695 dump_isra_param_descriptors (dump_file, node->decl, ifs, false);
2698 /* Return true if there are any overlaps among certain accesses of DESC. If
2699 non-NULL, set *CERTAIN_ACCESS_PRESENT_P upon encountering a certain access
2700 too. DESC is assumed to be a split candidate that is not locally
2701 unused. */
2703 static bool
2704 overlapping_certain_accesses_p (isra_param_desc *desc,
2705 bool *certain_access_present_p)
2707 unsigned pclen = vec_safe_length (desc->accesses);
2708 for (unsigned i = 0; i < pclen; i++)
2710 param_access *a1 = (*desc->accesses)[i];
2712 if (!a1->certain)
2713 continue;
2714 if (certain_access_present_p)
2715 *certain_access_present_p = true;
2716 for (unsigned j = i + 1; j < pclen; j++)
2718 param_access *a2 = (*desc->accesses)[j];
2719 if (a2->certain
2720 && a1->unit_offset < a2->unit_offset + a2->unit_size
2721 && a1->unit_offset + a1->unit_size > a2->unit_offset)
2722 return true;
2725 return false;
2728 /* Check for any overlaps of certain param accesses among splitting candidates
2729 and signal an ICE if there are any. If CERTAIN_MUST_EXIST is set, also
2730 check that used splitting candidates have at least one certain access. */
2732 static void
2733 verify_splitting_accesses (cgraph_node *node, bool certain_must_exist)
2735 isra_func_summary *ifs = func_sums->get (node);
2736 if (!ifs || !ifs->m_candidate)
2737 return;
2738 unsigned param_count = vec_safe_length (ifs->m_parameters);
2739 for (unsigned pidx = 0; pidx < param_count; pidx++)
2741 isra_param_desc *desc = &(*ifs->m_parameters)[pidx];
2742 if (!desc->split_candidate || desc->locally_unused)
2743 continue;
2745 bool certain_access_present = !certain_must_exist;
2746 if (overlapping_certain_accesses_p (desc, &certain_access_present))
2747 internal_error ("function %qs, parameter %u, has IPA-SRA accesses "
2748 "which overlap", node->dump_name (), pidx);
2749 if (!certain_access_present)
2750 internal_error ("function %qs, parameter %u, is used but does not "
2751 "have any certain IPA-SRA access",
2752 node->dump_name (), pidx);
2756 /* Intraprocedural part of IPA-SRA analysis. Scan bodies of all functions in
2757 this compilation unit and create summary structures describing IPA-SRA
2758 opportunities and constraints in them. */
2760 static void
2761 ipa_sra_generate_summary (void)
2763 struct cgraph_node *node;
2765 gcc_checking_assert (!func_sums);
2766 gcc_checking_assert (!call_sums);
2767 func_sums
2768 = (new (ggc_alloc_no_dtor <ipa_sra_function_summaries> ())
2769 ipa_sra_function_summaries (symtab, true));
2770 call_sums = new ipa_sra_call_summaries (symtab);
2772 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
2773 ipa_sra_summarize_function (node);
2774 return;
2777 /* Write intraprocedural analysis information about E and all of its outgoing
2778 edges into a stream for LTO WPA. */
2780 static void
2781 isra_write_edge_summary (output_block *ob, cgraph_edge *e)
2783 isra_call_summary *csum = call_sums->get (e);
2784 unsigned input_count = csum->m_arg_flow.length ();
2785 streamer_write_uhwi (ob, input_count);
2786 for (unsigned i = 0; i < input_count; i++)
2788 isra_param_flow *ipf = &csum->m_arg_flow[i];
2789 streamer_write_hwi (ob, ipf->length);
2790 bitpack_d bp = bitpack_create (ob->main_stream);
2791 for (int j = 0; j < ipf->length; j++)
2792 bp_pack_value (&bp, ipf->inputs[j], 8);
2793 bp_pack_value (&bp, ipf->aggregate_pass_through, 1);
2794 bp_pack_value (&bp, ipf->pointer_pass_through, 1);
2795 bp_pack_value (&bp, ipf->safe_to_import_accesses, 1);
2796 bp_pack_value (&bp, ipf->constructed_for_calls, 1);
2797 streamer_write_bitpack (&bp);
2798 streamer_write_uhwi (ob, ipf->unit_offset);
2799 streamer_write_uhwi (ob, ipf->unit_size);
2801 bitpack_d bp = bitpack_create (ob->main_stream);
2802 bp_pack_value (&bp, csum->m_return_ignored, 1);
2803 bp_pack_value (&bp, csum->m_return_returned, 1);
2804 bp_pack_value (&bp, csum->m_bit_aligned_arg, 1);
2805 bp_pack_value (&bp, csum->m_before_any_store, 1);
2806 streamer_write_bitpack (&bp);
2809 /* Write intraprocedural analysis information about NODE and all of its outgoing
2810 edges into a stream for LTO WPA. */
2812 static void
2813 isra_write_node_summary (output_block *ob, cgraph_node *node)
2815 isra_func_summary *ifs = func_sums->get (node);
2816 lto_symtab_encoder_t encoder = ob->decl_state->symtab_node_encoder;
2817 int node_ref = lto_symtab_encoder_encode (encoder, node);
2818 streamer_write_uhwi (ob, node_ref);
2820 unsigned param_desc_count = vec_safe_length (ifs->m_parameters);
2821 streamer_write_uhwi (ob, param_desc_count);
2822 for (unsigned i = 0; i < param_desc_count; i++)
2824 isra_param_desc *desc = &(*ifs->m_parameters)[i];
2825 unsigned access_count = vec_safe_length (desc->accesses);
2826 streamer_write_uhwi (ob, access_count);
2827 for (unsigned j = 0; j < access_count; j++)
2829 param_access *acc = (*desc->accesses)[j];
2830 stream_write_tree (ob, acc->type, true);
2831 stream_write_tree (ob, acc->alias_ptr_type, true);
2832 streamer_write_uhwi (ob, acc->unit_offset);
2833 streamer_write_uhwi (ob, acc->unit_size);
2834 bitpack_d bp = bitpack_create (ob->main_stream);
2835 bp_pack_value (&bp, acc->certain, 1);
2836 bp_pack_value (&bp, acc->reverse, 1);
2837 streamer_write_bitpack (&bp);
2839 streamer_write_uhwi (ob, desc->param_size_limit);
2840 streamer_write_uhwi (ob, desc->size_reached);
2841 gcc_assert (desc->safe_size == 0);
2842 bitpack_d bp = bitpack_create (ob->main_stream);
2843 bp_pack_value (&bp, desc->locally_unused, 1);
2844 bp_pack_value (&bp, desc->split_candidate, 1);
2845 bp_pack_value (&bp, desc->by_ref, 1);
2846 gcc_assert (!desc->not_specially_constructed);
2847 bp_pack_value (&bp, desc->remove_only_when_retval_removed, 1);
2848 bp_pack_value (&bp, desc->split_only_when_retval_removed, 1);
2849 bp_pack_value (&bp, desc->conditionally_dereferenceable, 1);
2850 gcc_assert (!desc->safe_size_set);
2851 streamer_write_bitpack (&bp);
2853 bitpack_d bp = bitpack_create (ob->main_stream);
2854 bp_pack_value (&bp, ifs->m_candidate, 1);
2855 bp_pack_value (&bp, ifs->m_returns_value, 1);
2856 bp_pack_value (&bp, ifs->m_return_ignored, 1);
2857 gcc_assert (!ifs->m_queued);
2858 streamer_write_bitpack (&bp);
2860 for (cgraph_edge *e = node->callees; e; e = e->next_callee)
2861 isra_write_edge_summary (ob, e);
2862 for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee)
2863 isra_write_edge_summary (ob, e);
2866 /* Write intraprocedural analysis information into a stream for LTO WPA. */
2868 static void
2869 ipa_sra_write_summary (void)
2871 if (!func_sums || !call_sums)
2872 return;
2874 struct output_block *ob = create_output_block (LTO_section_ipa_sra);
2875 lto_symtab_encoder_t encoder = ob->decl_state->symtab_node_encoder;
2876 ob->symbol = NULL;
2878 unsigned int count = 0;
2879 lto_symtab_encoder_iterator lsei;
2880 for (lsei = lsei_start_function_in_partition (encoder);
2881 !lsei_end_p (lsei);
2882 lsei_next_function_in_partition (&lsei))
2884 cgraph_node *node = lsei_cgraph_node (lsei);
2885 if (node->has_gimple_body_p ()
2886 && func_sums->get (node) != NULL)
2887 count++;
2889 streamer_write_uhwi (ob, count);
2891 /* Process all of the functions. */
2892 for (lsei = lsei_start_function_in_partition (encoder); !lsei_end_p (lsei);
2893 lsei_next_function_in_partition (&lsei))
2895 cgraph_node *node = lsei_cgraph_node (lsei);
2896 if (node->has_gimple_body_p ()
2897 && func_sums->get (node) != NULL)
2898 isra_write_node_summary (ob, node);
2900 streamer_write_char_stream (ob->main_stream, 0);
2901 produce_asm (ob, NULL);
2902 destroy_output_block (ob);
2905 /* Read intraprocedural analysis information about E and all of its outgoing
2906 edges into a stream for LTO WPA. */
2908 static void
2909 isra_read_edge_summary (struct lto_input_block *ib, cgraph_edge *cs)
2911 isra_call_summary *csum = call_sums->get_create (cs);
2912 unsigned input_count = streamer_read_uhwi (ib);
2913 csum->init_inputs (input_count);
2914 for (unsigned i = 0; i < input_count; i++)
2916 isra_param_flow *ipf = &csum->m_arg_flow[i];
2917 ipf->length = streamer_read_hwi (ib);
2918 bitpack_d bp = streamer_read_bitpack (ib);
2919 for (int j = 0; j < ipf->length; j++)
2920 ipf->inputs[j] = bp_unpack_value (&bp, 8);
2921 ipf->aggregate_pass_through = bp_unpack_value (&bp, 1);
2922 ipf->pointer_pass_through = bp_unpack_value (&bp, 1);
2923 ipf->safe_to_import_accesses = bp_unpack_value (&bp, 1);
2924 ipf->constructed_for_calls = bp_unpack_value (&bp, 1);
2925 ipf->unit_offset = streamer_read_uhwi (ib);
2926 ipf->unit_size = streamer_read_uhwi (ib);
2928 bitpack_d bp = streamer_read_bitpack (ib);
2929 csum->m_return_ignored = bp_unpack_value (&bp, 1);
2930 csum->m_return_returned = bp_unpack_value (&bp, 1);
2931 csum->m_bit_aligned_arg = bp_unpack_value (&bp, 1);
2932 csum->m_before_any_store = bp_unpack_value (&bp, 1);
2935 /* Read intraprocedural analysis information about NODE and all of its outgoing
2936 edges into a stream for LTO WPA. */
2938 static void
2939 isra_read_node_info (struct lto_input_block *ib, cgraph_node *node,
2940 struct data_in *data_in)
2942 isra_func_summary *ifs = func_sums->get_create (node);
2943 unsigned param_desc_count = streamer_read_uhwi (ib);
2944 if (param_desc_count > 0)
2946 vec_safe_reserve_exact (ifs->m_parameters, param_desc_count);
2947 ifs->m_parameters->quick_grow_cleared (param_desc_count);
2949 for (unsigned i = 0; i < param_desc_count; i++)
2951 isra_param_desc *desc = &(*ifs->m_parameters)[i];
2952 unsigned access_count = streamer_read_uhwi (ib);
2953 for (unsigned j = 0; j < access_count; j++)
2955 param_access *acc = ggc_cleared_alloc<param_access> ();
2956 acc->type = stream_read_tree (ib, data_in);
2957 acc->alias_ptr_type = stream_read_tree (ib, data_in);
2958 acc->unit_offset = streamer_read_uhwi (ib);
2959 acc->unit_size = streamer_read_uhwi (ib);
2960 bitpack_d bp = streamer_read_bitpack (ib);
2961 acc->certain = bp_unpack_value (&bp, 1);
2962 acc->reverse = bp_unpack_value (&bp, 1);
2963 vec_safe_push (desc->accesses, acc);
2965 desc->param_size_limit = streamer_read_uhwi (ib);
2966 desc->size_reached = streamer_read_uhwi (ib);
2967 desc->safe_size = 0;
2968 bitpack_d bp = streamer_read_bitpack (ib);
2969 desc->locally_unused = bp_unpack_value (&bp, 1);
2970 desc->split_candidate = bp_unpack_value (&bp, 1);
2971 desc->by_ref = bp_unpack_value (&bp, 1);
2972 desc->not_specially_constructed = 0;
2973 desc->remove_only_when_retval_removed = bp_unpack_value (&bp, 1);
2974 desc->split_only_when_retval_removed = bp_unpack_value (&bp, 1);
2975 desc->conditionally_dereferenceable = bp_unpack_value (&bp, 1);
2976 desc->safe_size_set = 0;
2978 bitpack_d bp = streamer_read_bitpack (ib);
2979 ifs->m_candidate = bp_unpack_value (&bp, 1);
2980 ifs->m_returns_value = bp_unpack_value (&bp, 1);
2981 ifs->m_return_ignored = bp_unpack_value (&bp, 1);
2982 ifs->m_queued = 0;
2984 for (cgraph_edge *e = node->callees; e; e = e->next_callee)
2985 isra_read_edge_summary (ib, e);
2986 for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee)
2987 isra_read_edge_summary (ib, e);
2990 /* Read IPA-SRA summaries from a section in file FILE_DATA of length LEN with
2991 data DATA. TODO: This function was copied almost verbatim from ipa-prop.cc,
2992 it should be possible to unify them somehow. */
2994 static void
2995 isra_read_summary_section (struct lto_file_decl_data *file_data,
2996 const char *data, size_t len)
2998 const struct lto_function_header *header =
2999 (const struct lto_function_header *) data;
3000 const int cfg_offset = sizeof (struct lto_function_header);
3001 const int main_offset = cfg_offset + header->cfg_size;
3002 const int string_offset = main_offset + header->main_size;
3003 struct data_in *data_in;
3004 unsigned int i;
3005 unsigned int count;
3007 lto_input_block ib_main ((const char *) data + main_offset,
3008 header->main_size, file_data);
3010 data_in =
3011 lto_data_in_create (file_data, (const char *) data + string_offset,
3012 header->string_size, vNULL);
3013 count = streamer_read_uhwi (&ib_main);
3015 for (i = 0; i < count; i++)
3017 unsigned int index;
3018 struct cgraph_node *node;
3019 lto_symtab_encoder_t encoder;
3021 index = streamer_read_uhwi (&ib_main);
3022 encoder = file_data->symtab_node_encoder;
3023 node = dyn_cast<cgraph_node *> (lto_symtab_encoder_deref (encoder,
3024 index));
3025 gcc_assert (node->definition);
3026 isra_read_node_info (&ib_main, node, data_in);
3028 lto_free_section_data (file_data, LTO_section_ipa_sra, NULL, data,
3029 len);
3030 lto_data_in_delete (data_in);
3033 /* Read intraprocedural analysis information into a stream for LTO WPA. */
3035 static void
3036 ipa_sra_read_summary (void)
3038 struct lto_file_decl_data **file_data_vec = lto_get_file_decl_data ();
3039 struct lto_file_decl_data *file_data;
3040 unsigned int j = 0;
3042 gcc_checking_assert (!func_sums);
3043 gcc_checking_assert (!call_sums);
3044 func_sums
3045 = (new (ggc_alloc_no_dtor <ipa_sra_function_summaries> ())
3046 ipa_sra_function_summaries (symtab, true));
3047 call_sums = new ipa_sra_call_summaries (symtab);
3049 while ((file_data = file_data_vec[j++]))
3051 size_t len;
3052 const char *data
3053 = lto_get_summary_section_data (file_data, LTO_section_ipa_sra, &len);
3054 if (data)
3055 isra_read_summary_section (file_data, data, len);
3059 /* Dump all IPA-SRA summary data for all cgraph nodes and edges to file F. If
3060 HINTS is true, also dump IPA-analysis computed hints. */
3062 static void
3063 ipa_sra_dump_all_summaries (FILE *f, bool hints)
3065 cgraph_node *node;
3066 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
3068 fprintf (f, "\nSummary for node %s:\n", node->dump_name ());
3070 isra_func_summary *ifs = func_sums->get (node);
3071 if (!ifs)
3072 fprintf (f, " Function does not have any associated IPA-SRA "
3073 "summary\n");
3074 else if (!ifs->m_candidate)
3075 fprintf (f, " Not a candidate function\n");
3076 else
3078 if (ifs->m_returns_value)
3079 fprintf (f, " Returns value\n");
3080 if (vec_safe_is_empty (ifs->m_parameters))
3081 fprintf (f, " No parameter information. \n");
3082 else
3083 for (unsigned i = 0; i < ifs->m_parameters->length (); ++i)
3085 fprintf (f, " Descriptor for parameter %i:\n", i);
3086 dump_isra_param_descriptor (f, &(*ifs->m_parameters)[i], hints);
3088 fprintf (f, "\n");
3091 struct cgraph_edge *cs;
3092 for (cs = node->callees; cs; cs = cs->next_callee)
3094 fprintf (f, " Summary for edge %s->%s:\n", cs->caller->dump_name (),
3095 cs->callee->dump_name ());
3096 isra_call_summary *csum = call_sums->get (cs);
3097 if (csum)
3098 csum->dump (f);
3099 else
3100 fprintf (f, " Call summary is MISSING!\n");
3104 fprintf (f, "\n\n");
3107 /* Perform function-scope viability tests that can be only made at IPA level
3108 and return false if the function is deemed unsuitable for IPA-SRA. */
3110 static bool
3111 ipa_sra_ipa_function_checks (cgraph_node *node)
3113 if (!node->can_be_local_p ())
3115 if (dump_file)
3116 fprintf (dump_file, "Function %s disqualified because it cannot be "
3117 "made local.\n", node->dump_name ());
3118 return false;
3120 if (!node->can_change_signature)
3122 if (dump_file)
3123 fprintf (dump_file, "Function can not change signature.\n");
3124 return false;
3127 return true;
3130 /* Issues found out by check_callers_for_issues. */
3132 struct caller_issues
3134 /* The candidate being considered. */
3135 cgraph_node *candidate;
3136 /* There is a thunk among callers. */
3137 bool thunk;
3138 /* Set if there is at least one caller that is OK. */
3139 bool there_is_one;
3140 /* Call site with no available information. */
3141 bool unknown_callsite;
3142 /* Call from outside the candidate's comdat group. */
3143 bool call_from_outside_comdat;
3144 /* There is a bit-aligned load into one of non-gimple-typed arguments. */
3145 bool bit_aligned_aggregate_argument;
3148 /* Worker for call_for_symbol_and_aliases, set any flags of passed caller_issues
3149 that apply. */
3151 static bool
3152 check_for_caller_issues (struct cgraph_node *node, void *data)
3154 struct caller_issues *issues = (struct caller_issues *) data;
3156 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
3158 if (cs->caller->thunk)
3160 issues->thunk = true;
3161 /* TODO: We should be able to process at least some types of
3162 thunks. */
3163 return true;
3165 if (issues->candidate->calls_comdat_local
3166 && issues->candidate->same_comdat_group
3167 && !issues->candidate->in_same_comdat_group_p (cs->caller))
3169 issues->call_from_outside_comdat = true;
3170 return true;
3173 isra_call_summary *csum = call_sums->get (cs);
3174 if (!csum)
3176 issues->unknown_callsite = true;
3177 return true;
3180 if (csum->m_bit_aligned_arg)
3181 issues->bit_aligned_aggregate_argument = true;
3183 issues->there_is_one = true;
3185 return false;
3188 /* Look at all incoming edges to NODE, including aliases and thunks and look
3189 for problems. Return true if NODE type should not be modified at all. */
3191 static bool
3192 check_all_callers_for_issues (cgraph_node *node)
3194 struct caller_issues issues;
3195 memset (&issues, 0, sizeof (issues));
3196 issues.candidate = node;
3198 node->call_for_symbol_and_aliases (check_for_caller_issues, &issues, true);
3199 if (issues.unknown_callsite)
3201 if (dump_file && (dump_flags & TDF_DETAILS))
3202 fprintf (dump_file, "A call of %s has not been analyzed. Disabling "
3203 "all modifications.\n", node->dump_name ());
3204 return true;
3206 /* TODO: We should be able to process at least some types of thunks. */
3207 if (issues.thunk)
3209 if (dump_file && (dump_flags & TDF_DETAILS))
3210 fprintf (dump_file, "A call of %s is through thunk, which are not"
3211 " handled yet. Disabling all modifications.\n",
3212 node->dump_name ());
3213 return true;
3215 if (issues.call_from_outside_comdat)
3217 if (dump_file)
3218 fprintf (dump_file, "Function would become private comdat called "
3219 "outside of its comdat group.\n");
3220 return true;
3223 if (issues.bit_aligned_aggregate_argument)
3225 /* Let's only remove parameters/return values from such functions.
3226 TODO: We could only prevent splitting the problematic parameters if
3227 anybody thinks it is worth it. */
3228 if (dump_file && (dump_flags & TDF_DETAILS))
3229 fprintf (dump_file, "A call of %s has bit-aligned aggregate argument,"
3230 " disabling parameter splitting.\n", node->dump_name ());
3232 isra_func_summary *ifs = func_sums->get (node);
3233 gcc_checking_assert (ifs);
3234 unsigned param_count = vec_safe_length (ifs->m_parameters);
3235 for (unsigned i = 0; i < param_count; i++)
3236 (*ifs->m_parameters)[i].split_candidate = false;
3238 if (!issues.there_is_one)
3240 if (dump_file && (dump_flags & TDF_DETAILS))
3241 fprintf (dump_file, "There is no call to %s that we can modify. "
3242 "Disabling all modifications.\n", node->dump_name ());
3243 return true;
3245 return false;
3248 /* Find the access with corresponding OFFSET and SIZE among accesses in
3249 PARAM_DESC and return it or NULL if such an access is not there. */
3251 static param_access *
3252 find_param_access (isra_param_desc *param_desc, unsigned offset, unsigned size)
3254 unsigned pclen = vec_safe_length (param_desc->accesses);
3256 /* The search is linear but the number of stored accesses is bound by
3257 PARAM_IPA_SRA_MAX_REPLACEMENTS, so most probably 8. */
3259 for (unsigned i = 0; i < pclen; i++)
3260 if ((*param_desc->accesses)[i]->unit_offset == offset
3261 && (*param_desc->accesses)[i]->unit_size == size)
3262 return (*param_desc->accesses)[i];
3264 return NULL;
3267 /* Return iff the total size of definite replacement SIZE would violate the
3268 limit set for it in PARAM. */
3270 static bool
3271 size_would_violate_limit_p (isra_param_desc *desc, unsigned size)
3273 unsigned limit = desc->param_size_limit;
3274 if (size > limit
3275 || (!desc->by_ref && size == limit))
3276 return true;
3277 return false;
3280 /* Increase reached size of DESC by SIZE or disqualify it if it would violate
3281 the set limit. IDX is the parameter number which is dumped when
3282 disqualifying. */
3284 static void
3285 bump_reached_size (isra_param_desc *desc, unsigned size, unsigned idx)
3287 unsigned after = desc->size_reached + size;
3288 if (size_would_violate_limit_p (desc, after))
3290 if (dump_file && (dump_flags & TDF_DETAILS))
3291 fprintf (dump_file, " ...size limit reached, disqualifying "
3292 "candidate parameter %u\n", idx);
3293 desc->split_candidate = false;
3294 return;
3296 desc->size_reached = after;
3299 /* Take all actions required to deal with an edge CS that represents a call to
3300 an unknown or un-analyzed function, for both parameter removal and
3301 splitting. */
3303 static void
3304 process_edge_to_unknown_caller (cgraph_edge *cs)
3306 isra_func_summary *from_ifs = func_sums->get (cs->caller);
3307 gcc_checking_assert (from_ifs);
3308 isra_call_summary *csum = call_sums->get (cs);
3310 if (dump_file && (dump_flags & TDF_DETAILS))
3311 fprintf (dump_file, "Processing an edge to an unknown caller from %s:\n",
3312 cs->caller->dump_name ());
3314 unsigned args_count = csum->m_arg_flow.length ();
3315 for (unsigned i = 0; i < args_count; i++)
3317 isra_param_flow *ipf = &csum->m_arg_flow[i];
3319 if (ipf->pointer_pass_through)
3321 isra_param_desc *param_desc
3322 = &(*from_ifs->m_parameters)[get_single_param_flow_source (ipf)];
3323 param_desc->locally_unused = false;
3324 param_desc->split_candidate = false;
3325 continue;
3327 if (ipf->aggregate_pass_through)
3329 unsigned idx = get_single_param_flow_source (ipf);
3330 isra_param_desc *param_desc = &(*from_ifs->m_parameters)[idx];
3332 param_desc->locally_unused = false;
3333 if (!param_desc->split_candidate)
3334 continue;
3335 gcc_assert (!param_desc->by_ref);
3336 param_access *pacc = find_param_access (param_desc, ipf->unit_offset,
3337 ipf->unit_size);
3338 gcc_checking_assert (pacc);
3339 pacc->certain = true;
3340 if (overlapping_certain_accesses_p (param_desc, NULL))
3342 if (dump_file && (dump_flags & TDF_DETAILS))
3343 fprintf (dump_file, " ...leading to overlap, "
3344 " disqualifying candidate parameter %u\n",
3345 idx);
3346 param_desc->split_candidate = false;
3348 else
3349 bump_reached_size (param_desc, pacc->unit_size, idx);
3350 ipf->aggregate_pass_through = false;
3351 continue;
3354 for (int j = 0; j < ipf->length; j++)
3356 int input_idx = ipf->inputs[j];
3357 (*from_ifs->m_parameters)[input_idx].locally_unused = false;
3362 /* Propagate parameter removal information through cross-SCC edge CS,
3363 i.e. decrease the use count in the caller parameter descriptor for each use
3364 in this call. */
3366 static void
3367 param_removal_cross_scc_edge (cgraph_edge *cs)
3369 enum availability availability;
3370 cgraph_node *callee = cs->callee->function_symbol (&availability);
3371 isra_func_summary *to_ifs = func_sums->get (callee);
3372 if (!to_ifs || !to_ifs->m_candidate
3373 || (availability < AVAIL_AVAILABLE)
3374 || vec_safe_is_empty (to_ifs->m_parameters))
3376 process_edge_to_unknown_caller (cs);
3377 return;
3379 isra_func_summary *from_ifs = func_sums->get (cs->caller);
3380 gcc_checking_assert (from_ifs);
3382 isra_call_summary *csum = call_sums->get (cs);
3383 unsigned args_count = csum->m_arg_flow.length ();
3384 unsigned param_count = vec_safe_length (to_ifs->m_parameters);
3386 for (unsigned i = 0; i < args_count; i++)
3388 bool unused_in_callee;
3389 if (i < param_count)
3390 unused_in_callee = (*to_ifs->m_parameters)[i].locally_unused;
3391 else
3392 unused_in_callee = false;
3394 if (!unused_in_callee)
3396 isra_param_flow *ipf = &csum->m_arg_flow[i];
3397 for (int j = 0; j < ipf->length; j++)
3399 int input_idx = ipf->inputs[j];
3400 (*from_ifs->m_parameters)[input_idx].locally_unused = false;
3406 /* Unless it is already there, push NODE which is also described by IFS to
3407 STACK. */
3409 static void
3410 isra_push_node_to_stack (cgraph_node *node, isra_func_summary *ifs,
3411 vec<cgraph_node *> *stack)
3413 if (!ifs->m_queued)
3415 ifs->m_queued = true;
3416 stack->safe_push (node);
3420 /* If parameter with index INPUT_IDX is marked as locally unused, mark it as
3421 used and push CALLER on STACK. */
3423 static void
3424 isra_mark_caller_param_used (isra_func_summary *from_ifs, int input_idx,
3425 cgraph_node *caller, vec<cgraph_node *> *stack)
3427 if ((*from_ifs->m_parameters)[input_idx].locally_unused)
3429 (*from_ifs->m_parameters)[input_idx].locally_unused = false;
3430 isra_push_node_to_stack (caller, from_ifs, stack);
3434 /* Combine safe_size of DESC with SIZE and return true if it has changed. */
3436 static bool
3437 update_safe_size (isra_param_desc *desc, unsigned size)
3439 if (!desc->safe_size_set)
3441 desc->safe_size_set = 1;
3442 desc->safe_size = size;
3443 return true;
3445 if (desc->safe_size <= size)
3446 return false;
3447 desc->safe_size = size;
3448 return true;
3451 /* Set all param hints in DESC to the pessimistic values. Return true if any
3452 hints that need to potentially trigger further propagation have changed. */
3454 static bool
3455 flip_all_hints_pessimistic (isra_param_desc *desc)
3457 desc->not_specially_constructed = true;
3458 return update_safe_size (desc, 0);
3461 /* Because we have not analyzed or otherwise problematic caller, go over all
3462 parameter int flags of IFS describing a call graph node of a calllee and
3463 turn them pessimistic. Return true if any hints that need to potentially
3464 trigger further propagation have changed. */
3466 static bool
3467 flip_all_param_hints_pessimistic (isra_func_summary *ifs)
3469 if (!ifs || !ifs->m_candidate)
3470 return false;
3472 bool ret = false;
3473 unsigned param_count = vec_safe_length (ifs->m_parameters);
3475 for (unsigned i = 0; i < param_count; i++)
3476 ret |= flip_all_hints_pessimistic (&(*ifs->m_parameters)[i]);
3478 return ret;
3481 /* Propagate hints accross edge CS which ultimately leads to a node described
3482 by TO_IFS. Return true if any hints of the callee which should potentially
3483 trigger further propagation have changed. */
3485 static bool
3486 propagate_param_hints_accross_call (cgraph_edge *cs, isra_func_summary *to_ifs)
3488 if (!to_ifs || !to_ifs->m_candidate)
3489 return false;
3491 isra_call_summary *csum = call_sums->get (cs);
3492 bool ret = false;
3493 unsigned args_count = csum->m_arg_flow.length ();
3494 unsigned param_count = vec_safe_length (to_ifs->m_parameters);
3496 for (unsigned i = 0; i < param_count; i++)
3498 isra_param_desc *desc = &(*to_ifs->m_parameters)[i];
3499 if (i >= args_count)
3501 ret |= flip_all_hints_pessimistic (desc);
3502 continue;
3505 if (desc->by_ref)
3507 isra_param_flow *ipf = &csum->m_arg_flow[i];
3509 if (!ipf->constructed_for_calls)
3510 desc->not_specially_constructed = true;
3512 if (ipf->pointer_pass_through)
3514 isra_func_summary *from_ifs = func_sums->get (cs->caller);
3515 int srcidx = get_single_param_flow_source (ipf);
3516 if (vec_safe_length (from_ifs->m_parameters) > (unsigned) srcidx)
3518 isra_param_desc *src_d = &(*from_ifs->m_parameters)[srcidx];
3519 if (src_d->safe_size_set)
3520 ret |= update_safe_size (desc, src_d->safe_size);
3522 else
3523 ret |= update_safe_size (desc, 0);
3525 else if (!ipf->aggregate_pass_through)
3526 ret |= update_safe_size (desc, ipf->unit_size);
3527 else
3528 /* LTOing type-mismatched programs can end up here. */
3529 ret |= update_safe_size (desc, 0);
3532 return ret;
3535 /* Propagate hints from NODE described by FROM_IFS to all its (dorect) callees,
3536 push those that may need re-visiting onto STACK. */
3538 static void
3539 propagate_hints_to_all_callees (cgraph_node *node, isra_func_summary *from_ifs,
3540 vec<cgraph_node *> *stack)
3542 for (cgraph_edge *cs = node->callees; cs; cs = cs->next_callee)
3544 enum availability availability;
3545 cgraph_node *callee = cs->callee->function_symbol (&availability);
3546 isra_func_summary *to_ifs = func_sums->get (callee);
3547 if (!from_ifs)
3549 if (flip_all_param_hints_pessimistic (to_ifs)
3550 && ipa_edge_within_scc (cs))
3551 isra_push_node_to_stack (callee, to_ifs, stack);
3553 else if (propagate_param_hints_accross_call (cs, to_ifs)
3554 && ipa_edge_within_scc (cs))
3555 isra_push_node_to_stack (callee, to_ifs, stack);
3559 /* Propagate information that any parameter is not used only locally within a
3560 SCC across CS to the caller, which must be in the same SCC as the
3561 callee. Push any callers that need to be re-processed to STACK. */
3563 static void
3564 propagate_used_across_scc_edge (cgraph_edge *cs, vec<cgraph_node *> *stack)
3566 isra_func_summary *from_ifs = func_sums->get (cs->caller);
3567 if (!from_ifs || vec_safe_is_empty (from_ifs->m_parameters))
3568 return;
3570 isra_call_summary *csum = call_sums->get (cs);
3571 gcc_checking_assert (csum);
3572 unsigned args_count = csum->m_arg_flow.length ();
3573 enum availability availability;
3574 cgraph_node *callee = cs->callee->function_symbol (&availability);
3575 isra_func_summary *to_ifs = func_sums->get (callee);
3577 unsigned param_count
3578 = (to_ifs && (availability >= AVAIL_AVAILABLE))
3579 ? vec_safe_length (to_ifs->m_parameters) : 0;
3580 for (unsigned i = 0; i < args_count; i++)
3582 if (i < param_count
3583 && (*to_ifs->m_parameters)[i].locally_unused)
3584 continue;
3586 /* The argument is needed in the callee it, we must mark the parameter as
3587 used also in the caller and its callers within this SCC. */
3588 isra_param_flow *ipf = &csum->m_arg_flow[i];
3589 for (int j = 0; j < ipf->length; j++)
3591 int input_idx = ipf->inputs[j];
3592 isra_mark_caller_param_used (from_ifs, input_idx, cs->caller, stack);
3597 /* Propagate information that any parameter is not used only locally within a
3598 SCC (i.e. is used also elsewhere) to all callers of NODE that are in the
3599 same SCC. Push any callers that need to be re-processed to STACK. */
3601 static bool
3602 propagate_used_to_scc_callers (cgraph_node *node, void *data)
3604 vec<cgraph_node *> *stack = (vec<cgraph_node *> *) data;
3605 cgraph_edge *cs;
3606 for (cs = node->callers; cs; cs = cs->next_caller)
3607 if (ipa_edge_within_scc (cs))
3608 propagate_used_across_scc_edge (cs, stack);
3609 return false;
3612 /* Return true iff all certain accesses in ARG_DESC are also present as
3613 certain accesses in PARAM_DESC. */
3615 static bool
3616 all_callee_accesses_present_p (isra_param_desc *param_desc,
3617 isra_param_desc *arg_desc)
3619 unsigned aclen = vec_safe_length (arg_desc->accesses);
3620 for (unsigned j = 0; j < aclen; j++)
3622 param_access *argacc = (*arg_desc->accesses)[j];
3623 if (!argacc->certain)
3624 continue;
3625 param_access *pacc = find_param_access (param_desc, argacc->unit_offset,
3626 argacc->unit_size);
3627 if (!pacc
3628 || !pacc->certain
3629 || !types_compatible_p (argacc->type, pacc->type))
3630 return false;
3632 return true;
3635 /* Type internal to function pull_accesses_from_callee. Unfortunately gcc 4.8
3636 does not allow instantiating an auto_vec with a type defined within a
3637 function so it is a global type. */
3638 enum acc_prop_kind {ACC_PROP_DONT, ACC_PROP_COPY, ACC_PROP_CERTAIN};
3641 /* Attempt to propagate all definite accesses from ARG_DESC to PARAM_DESC,
3642 (which belongs to CALLER) if they would not violate some constraint there.
3643 If successful, return NULL, otherwise return the string reason for failure
3644 (which can be written to the dump file). DELTA_OFFSET is the known offset
3645 of the actual argument withing the formal parameter (so of ARG_DESCS within
3646 PARAM_DESCS), ARG_SIZE is the size of the actual argument or zero, if not
3647 known. In case of success, set *CHANGE_P to true if propagation actually
3648 changed anything. */
3650 static const char *
3651 pull_accesses_from_callee (cgraph_node *caller, isra_param_desc *param_desc,
3652 isra_param_desc *arg_desc,
3653 unsigned delta_offset, unsigned arg_size,
3654 bool *change_p)
3656 unsigned pclen = vec_safe_length (param_desc->accesses);
3657 unsigned aclen = vec_safe_length (arg_desc->accesses);
3658 unsigned prop_count = 0;
3659 unsigned prop_size = 0;
3660 bool change = false;
3662 auto_vec <enum acc_prop_kind, 8> prop_kinds (aclen);
3663 for (unsigned j = 0; j < aclen; j++)
3665 param_access *argacc = (*arg_desc->accesses)[j];
3666 prop_kinds.safe_push (ACC_PROP_DONT);
3668 if (arg_size > 0
3669 && argacc->unit_offset + argacc->unit_size > arg_size)
3670 return "callee access outsize size boundary";
3672 if (!argacc->certain)
3673 continue;
3675 unsigned offset = argacc->unit_offset + delta_offset;
3676 /* Given that accesses are initially stored according to increasing
3677 offset and decreasing size in case of equal offsets, the following
3678 searches could be written more efficiently if we kept the ordering
3679 when copying. But the number of accesses is capped at
3680 PARAM_IPA_SRA_MAX_REPLACEMENTS (so most likely 8) and the code gets
3681 messy quickly, so let's improve on that only if necessary. */
3683 bool exact_match = false;
3684 for (unsigned i = 0; i < pclen; i++)
3686 /* Check for overlaps. */
3687 param_access *pacc = (*param_desc->accesses)[i];
3688 if (pacc->unit_offset == offset
3689 && pacc->unit_size == argacc->unit_size)
3691 if (argacc->alias_ptr_type != pacc->alias_ptr_type
3692 || !types_compatible_p (argacc->type, pacc->type)
3693 || argacc->reverse != pacc->reverse)
3694 return "propagated access types would not match existing ones";
3696 exact_match = true;
3697 if (!pacc->certain)
3699 prop_kinds[j] = ACC_PROP_CERTAIN;
3700 prop_size += argacc->unit_size;
3701 change = true;
3703 continue;
3706 if (offset < pacc->unit_offset + pacc->unit_size
3707 && offset + argacc->unit_size > pacc->unit_offset)
3709 /* None permissible with load accesses, possible to fit into
3710 argument ones. */
3711 if (pacc->certain
3712 || offset < pacc->unit_offset
3713 || (offset + argacc->unit_size
3714 > pacc->unit_offset + pacc->unit_size))
3715 return "a propagated access would conflict in caller";
3719 if (!exact_match)
3721 prop_kinds[j] = ACC_PROP_COPY;
3722 prop_count++;
3723 prop_size += argacc->unit_size;
3724 change = true;
3728 if (!change)
3729 return NULL;
3731 if ((prop_count + pclen
3732 > (unsigned) opt_for_fn (caller->decl, param_ipa_sra_max_replacements))
3733 || size_would_violate_limit_p (param_desc,
3734 param_desc->size_reached + prop_size))
3735 return "propagating accesses would violate the count or size limit";
3737 *change_p = true;
3738 for (unsigned j = 0; j < aclen; j++)
3740 if (prop_kinds[j] == ACC_PROP_COPY)
3742 param_access *argacc = (*arg_desc->accesses)[j];
3744 param_access *copy = ggc_cleared_alloc<param_access> ();
3745 copy->unit_offset = argacc->unit_offset + delta_offset;
3746 copy->unit_size = argacc->unit_size;
3747 copy->type = argacc->type;
3748 copy->alias_ptr_type = argacc->alias_ptr_type;
3749 copy->certain = true;
3750 copy->reverse = argacc->reverse;
3751 vec_safe_push (param_desc->accesses, copy);
3753 else if (prop_kinds[j] == ACC_PROP_CERTAIN)
3755 param_access *argacc = (*arg_desc->accesses)[j];
3756 param_access *csp
3757 = find_param_access (param_desc, argacc->unit_offset + delta_offset,
3758 argacc->unit_size);
3759 csp->certain = true;
3763 param_desc->size_reached += prop_size;
3765 return NULL;
3768 /* Propagate parameter splitting information through call graph edge CS.
3769 Return true if any changes that might need to be propagated within SCCs have
3770 been made. The function also clears the aggregate_pass_through and
3771 pointer_pass_through in call summaries which do not need to be processed
3772 again if this CS is revisited when iterating while changes are propagated
3773 within an SCC. */
3775 static bool
3776 param_splitting_across_edge (cgraph_edge *cs)
3778 bool res = false;
3779 bool cross_scc = !ipa_edge_within_scc (cs);
3780 enum availability availability;
3781 cgraph_node *callee = cs->callee->function_symbol (&availability);
3782 isra_func_summary *from_ifs = func_sums->get (cs->caller);
3783 gcc_checking_assert (from_ifs && from_ifs->m_parameters);
3785 isra_call_summary *csum = call_sums->get (cs);
3786 gcc_checking_assert (csum);
3787 unsigned args_count = csum->m_arg_flow.length ();
3788 isra_func_summary *to_ifs = func_sums->get (callee);
3789 unsigned param_count
3790 = ((to_ifs && to_ifs->m_candidate && (availability >= AVAIL_AVAILABLE))
3791 ? vec_safe_length (to_ifs->m_parameters)
3792 : 0);
3794 if (dump_file && (dump_flags & TDF_DETAILS))
3795 fprintf (dump_file, "Splitting across %s->%s:\n",
3796 cs->caller->dump_name (), callee->dump_name ());
3798 unsigned i;
3799 for (i = 0; (i < args_count) && (i < param_count); i++)
3801 isra_param_desc *arg_desc = &(*to_ifs->m_parameters)[i];
3802 isra_param_flow *ipf = &csum->m_arg_flow[i];
3804 if (arg_desc->locally_unused)
3806 if (dump_file && (dump_flags & TDF_DETAILS))
3807 fprintf (dump_file, " ->%u: unused in callee\n", i);
3808 ipf->pointer_pass_through = false;
3809 continue;
3812 if (ipf->pointer_pass_through)
3814 int idx = get_single_param_flow_source (ipf);
3815 isra_param_desc *param_desc = &(*from_ifs->m_parameters)[idx];
3816 if (!param_desc->split_candidate)
3817 continue;
3818 gcc_assert (param_desc->by_ref);
3820 if (!arg_desc->split_candidate || !arg_desc->by_ref)
3822 if (dump_file && (dump_flags & TDF_DETAILS))
3823 fprintf (dump_file, " %u->%u: not candidate or not by "
3824 "reference in callee\n", idx, i);
3825 param_desc->split_candidate = false;
3826 ipf->pointer_pass_through = false;
3827 res = true;
3829 else if (!ipf->safe_to_import_accesses)
3831 if (!csum->m_before_any_store
3832 || !all_callee_accesses_present_p (param_desc, arg_desc))
3834 if (dump_file && (dump_flags & TDF_DETAILS))
3835 fprintf (dump_file, " %u->%u: cannot import accesses.\n",
3836 idx, i);
3837 param_desc->split_candidate = false;
3838 ipf->pointer_pass_through = false;
3839 res = true;
3842 else
3844 if (dump_file && (dump_flags & TDF_DETAILS))
3845 fprintf (dump_file, " %u->%u: verified callee accesses "
3846 "present.\n", idx, i);
3847 if (cross_scc)
3848 ipf->pointer_pass_through = false;
3851 else
3853 const char *pull_failure
3854 = pull_accesses_from_callee (cs->caller, param_desc, arg_desc,
3855 0, 0, &res);
3856 if (pull_failure)
3858 if (dump_file && (dump_flags & TDF_DETAILS))
3859 fprintf (dump_file, " %u->%u: by_ref access pull "
3860 "failed: %s.\n", idx, i, pull_failure);
3861 param_desc->split_candidate = false;
3862 ipf->pointer_pass_through = false;
3863 res = true;
3865 else
3867 if (dump_file && (dump_flags & TDF_DETAILS))
3868 fprintf (dump_file, " %u->%u: by_ref access pull "
3869 "succeeded.\n", idx, i);
3870 if (cross_scc)
3871 ipf->pointer_pass_through = false;
3875 else if (ipf->aggregate_pass_through)
3877 int idx = get_single_param_flow_source (ipf);
3878 isra_param_desc *param_desc = &(*from_ifs->m_parameters)[idx];
3879 if (!param_desc->split_candidate)
3880 continue;
3881 gcc_assert (!param_desc->by_ref);
3882 param_access *pacc = find_param_access (param_desc, ipf->unit_offset,
3883 ipf->unit_size);
3884 gcc_checking_assert (pacc);
3886 if (pacc->certain)
3888 if (dump_file && (dump_flags & TDF_DETAILS))
3889 fprintf (dump_file, " %u->%u: already certain\n", idx, i);
3890 ipf->aggregate_pass_through = false;
3892 else if (!arg_desc->split_candidate || arg_desc->by_ref)
3894 if (dump_file && (dump_flags & TDF_DETAILS))
3895 fprintf (dump_file, " %u->%u: not candidate or by "
3896 "reference in callee\n", idx, i);
3898 pacc->certain = true;
3899 if (overlapping_certain_accesses_p (param_desc, NULL))
3901 if (dump_file && (dump_flags & TDF_DETAILS))
3902 fprintf (dump_file, " ...leading to overlap, "
3903 " disqualifying candidate parameter %u\n",
3904 idx);
3905 param_desc->split_candidate = false;
3907 else
3908 bump_reached_size (param_desc, pacc->unit_size, idx);
3910 ipf->aggregate_pass_through = false;
3911 res = true;
3913 else
3915 const char *pull_failure
3916 = pull_accesses_from_callee (cs->caller, param_desc, arg_desc,
3917 ipf->unit_offset,
3918 ipf->unit_size, &res);
3919 if (pull_failure)
3921 if (dump_file && (dump_flags & TDF_DETAILS))
3922 fprintf (dump_file, " %u->%u: arg access pull "
3923 "failed: %s.\n", idx, i, pull_failure);
3925 ipf->aggregate_pass_through = false;
3926 pacc->certain = true;
3928 if (overlapping_certain_accesses_p (param_desc, NULL))
3930 if (dump_file && (dump_flags & TDF_DETAILS))
3931 fprintf (dump_file, " ...leading to overlap, "
3932 " disqualifying candidate parameter %u\n",
3933 idx);
3934 param_desc->split_candidate = false;
3936 else
3937 bump_reached_size (param_desc, pacc->unit_size, idx);
3939 res = true;
3941 else
3943 if (dump_file && (dump_flags & TDF_DETAILS))
3944 fprintf (dump_file, " %u->%u: arg access pull "
3945 "succeeded.\n", idx, i);
3946 if (cross_scc)
3947 ipf->aggregate_pass_through = false;
3953 /* Handle argument-parameter count mismatches. */
3954 for (; (i < args_count); i++)
3956 isra_param_flow *ipf = &csum->m_arg_flow[i];
3958 if (ipf->pointer_pass_through || ipf->aggregate_pass_through)
3960 int idx = get_single_param_flow_source (ipf);
3961 ipf->pointer_pass_through = false;
3962 ipf->aggregate_pass_through = false;
3963 isra_param_desc *param_desc = &(*from_ifs->m_parameters)[idx];
3964 if (!param_desc->split_candidate)
3965 continue;
3967 if (dump_file && (dump_flags & TDF_DETAILS))
3968 fprintf (dump_file, " %u->%u: no corresponding formal parameter\n",
3969 idx, i);
3970 param_desc->split_candidate = false;
3971 res = true;
3974 return res;
3977 /* Worker for call_for_symbol_and_aliases, look at all callers and if all their
3978 callers ignore the return value, or come from the same SCC and use the
3979 return value only to compute their return value, return false, otherwise
3980 return true. */
3982 static bool
3983 retval_used_p (cgraph_node *node, void *)
3985 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
3987 isra_call_summary *csum = call_sums->get (cs);
3988 gcc_checking_assert (csum);
3989 if (csum->m_return_ignored)
3990 continue;
3991 if (!csum->m_return_returned)
3992 return true;
3994 isra_func_summary *from_ifs = func_sums->get (cs->caller);
3995 if (!from_ifs || !from_ifs->m_candidate)
3996 return true;
3998 if (!ipa_edge_within_scc (cs)
3999 && !from_ifs->m_return_ignored)
4000 return true;
4003 return false;
4006 /* Push into NEW_PARAMS all required parameter adjustment entries to copy or
4007 modify parameter which originally had index BASE_INDEX, in the adjustment
4008 vector of parent clone (if any) had PREV_CLONE_INDEX and was described by
4009 PREV_ADJUSTMENT. If IPA-CP has created a transformation summary for the
4010 original node, it needs to be passed in IPCP_TS, otherwise it should be
4011 NULL. If the parent clone is the original function, PREV_ADJUSTMENT is NULL
4012 and PREV_CLONE_INDEX is equal to BASE_INDEX. */
4014 static void
4015 push_param_adjustments_for_index (isra_func_summary *ifs, unsigned base_index,
4016 unsigned prev_clone_index,
4017 ipa_adjusted_param *prev_adjustment,
4018 ipcp_transformation *ipcp_ts,
4019 vec<ipa_adjusted_param, va_gc> **new_params)
4021 isra_param_desc *desc = &(*ifs->m_parameters)[base_index];
4022 if (desc->locally_unused)
4024 if (dump_file)
4025 fprintf (dump_file, " Will remove parameter %u\n", base_index);
4026 return;
4029 if (!desc->split_candidate)
4031 ipa_adjusted_param adj;
4032 if (prev_adjustment)
4034 adj = *prev_adjustment;
4035 adj.prev_clone_adjustment = true;
4036 adj.prev_clone_index = prev_clone_index;
4038 else
4040 memset (&adj, 0, sizeof (adj));
4041 adj.op = IPA_PARAM_OP_COPY;
4042 adj.base_index = base_index;
4043 adj.prev_clone_index = prev_clone_index;
4045 vec_safe_push ((*new_params), adj);
4046 return;
4049 if (dump_file)
4050 fprintf (dump_file, " Will split parameter %u\n", base_index);
4052 gcc_assert (!prev_adjustment || prev_adjustment->op == IPA_PARAM_OP_COPY);
4053 unsigned aclen = vec_safe_length (desc->accesses);
4054 for (unsigned j = 0; j < aclen; j++)
4056 param_access *pa = (*desc->accesses)[j];
4057 if (!pa->certain)
4058 continue;
4060 if (ipcp_ts)
4062 ipa_argagg_value_list avl (ipcp_ts);
4063 tree value = avl.get_value (base_index, pa->unit_offset);
4064 if (value && !AGGREGATE_TYPE_P (pa->type))
4066 if (dump_file)
4067 fprintf (dump_file, " - omitting component at byte "
4068 "offset %u which is known to have a constant value\n ",
4069 pa->unit_offset);
4070 continue;
4074 if (dump_file)
4075 fprintf (dump_file, " - component at byte offset %u, "
4076 "size %u\n", pa->unit_offset, pa->unit_size);
4078 ipa_adjusted_param adj;
4079 memset (&adj, 0, sizeof (adj));
4080 adj.op = IPA_PARAM_OP_SPLIT;
4081 adj.base_index = base_index;
4082 adj.prev_clone_index = prev_clone_index;
4083 adj.param_prefix_index = IPA_PARAM_PREFIX_ISRA;
4084 adj.reverse = pa->reverse;
4085 adj.type = pa->type;
4086 adj.alias_ptr_type = pa->alias_ptr_type;
4087 adj.unit_offset = pa->unit_offset;
4088 vec_safe_push ((*new_params), adj);
4092 /* Worker for all call_for_symbol_thunks_and_aliases. Set calls_comdat_local
4093 flag of all callers of NODE. */
4095 static bool
4096 mark_callers_calls_comdat_local (struct cgraph_node *node, void *)
4098 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
4099 cs->caller->calls_comdat_local = true;
4100 return false;
4103 /* Remove any IPA-CP results stored in TS that are associated with removed
4104 parameters as marked in IFS. */
4106 static void
4107 zap_useless_ipcp_results (const isra_func_summary *ifs, ipcp_transformation *ts)
4109 ts->remove_argaggs_if ([ifs](const ipa_argagg_value &v)
4111 return (*ifs->m_parameters)[v.index].locally_unused;
4114 bool useful_vr = false;
4115 unsigned count = vec_safe_length (ts->m_vr);
4116 for (unsigned i = 0; i < count; i++)
4117 if ((*ts->m_vr)[i].known_p ())
4119 const isra_param_desc *desc = &(*ifs->m_parameters)[i];
4120 if (desc->locally_unused)
4121 (*ts->m_vr)[i].set_unknown ();
4122 else
4123 useful_vr = true;
4125 if (!useful_vr)
4126 ts->m_vr = NULL;
4129 /* Do final processing of results of IPA propagation regarding NODE, clone it
4130 if appropriate. */
4132 static void
4133 process_isra_node_results (cgraph_node *node,
4134 hash_map<const char *, unsigned> *clone_num_suffixes)
4136 isra_func_summary *ifs = func_sums->get (node);
4137 if (!ifs || !ifs->m_candidate)
4138 return;
4140 auto_vec<bool, 16> surviving_params;
4141 bool check_surviving = false;
4142 clone_info *cinfo = clone_info::get (node);
4143 if (cinfo && cinfo->param_adjustments)
4145 check_surviving = true;
4146 cinfo->param_adjustments->get_surviving_params (&surviving_params);
4149 unsigned param_count = vec_safe_length (ifs->m_parameters);
4150 bool will_change_function = false;
4151 if (ifs->m_returns_value && ifs->m_return_ignored)
4152 will_change_function = true;
4153 else
4154 for (unsigned i = 0; i < param_count; i++)
4156 isra_param_desc *desc = &(*ifs->m_parameters)[i];
4157 if ((desc->locally_unused || desc->split_candidate)
4158 /* Make sure we do not clone just to attempt to remove an already
4159 removed unused argument. */
4160 && (!check_surviving
4161 || (i < surviving_params.length ()
4162 && surviving_params[i])))
4164 will_change_function = true;
4165 break;
4168 if (!will_change_function)
4169 return;
4171 if (dump_file)
4173 fprintf (dump_file, "\nEvaluating analysis results for %s\n",
4174 node->dump_name ());
4175 if (ifs->m_returns_value && ifs->m_return_ignored)
4176 fprintf (dump_file, " Will remove return value.\n");
4179 ipcp_transformation *ipcp_ts = ipcp_get_transformation_summary (node);
4180 if (ipcp_ts)
4181 zap_useless_ipcp_results (ifs, ipcp_ts);
4182 vec<ipa_adjusted_param, va_gc> *new_params = NULL;
4183 if (ipa_param_adjustments *old_adjustments
4184 = cinfo ? cinfo->param_adjustments : NULL)
4186 unsigned old_adj_len = vec_safe_length (old_adjustments->m_adj_params);
4187 for (unsigned i = 0; i < old_adj_len; i++)
4189 ipa_adjusted_param *old_adj = &(*old_adjustments->m_adj_params)[i];
4190 push_param_adjustments_for_index (ifs, old_adj->base_index, i,
4191 old_adj, ipcp_ts, &new_params);
4194 else
4195 for (unsigned i = 0; i < param_count; i++)
4196 push_param_adjustments_for_index (ifs, i, i, NULL, ipcp_ts, &new_params);
4198 ipa_param_adjustments *new_adjustments
4199 = (new (ggc_alloc <ipa_param_adjustments> ())
4200 ipa_param_adjustments (new_params, param_count,
4201 ifs->m_returns_value && ifs->m_return_ignored));
4203 if (dump_file && (dump_flags & TDF_DETAILS))
4205 fprintf (dump_file, "\n Created adjustments:\n");
4206 new_adjustments->dump (dump_file);
4209 unsigned &suffix_counter = clone_num_suffixes->get_or_insert (
4210 IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (
4211 node->decl)));
4212 auto_vec<cgraph_edge *> callers = node->collect_callers ();
4213 cgraph_node *new_node
4214 = node->create_virtual_clone (callers, NULL, new_adjustments, "isra",
4215 suffix_counter);
4216 suffix_counter++;
4217 if (node->calls_comdat_local && node->same_comdat_group)
4219 new_node->add_to_same_comdat_group (node);
4220 new_node->call_for_symbol_and_aliases (mark_callers_calls_comdat_local,
4221 NULL, true);
4223 new_node->calls_comdat_local = node->calls_comdat_local;
4225 if (dump_file)
4226 fprintf (dump_file, " Created new node %s\n", new_node->dump_name ());
4227 callers.release ();
4230 /* If INDICES is not empty, dump a combination of NODE's dump_name and MSG
4231 followed by the list of numbers in INDICES. */
4233 static void
4234 dump_list_of_param_indices (const cgraph_node *node, const char* msg,
4235 const vec<unsigned> &indices)
4237 if (indices.is_empty ())
4238 return;
4239 fprintf (dump_file, "The following parameters of %s %s:", node->dump_name (),
4240 msg);
4241 for (unsigned i : indices)
4242 fprintf (dump_file, " %u", i);
4243 fprintf (dump_file, "\n");
4246 /* Check which parameters of NODE described by IFS have survived until IPA-SRA
4247 and disable transformations for those which have not or which should not
4248 transformed because the associated debug counter reached its limit. Return
4249 true if none survived or if there were no candidates to begin with.
4250 Additionally, also adjust parameter descriptions based on debug counters and
4251 hints propagated earlier. */
4253 static bool
4254 adjust_parameter_descriptions (cgraph_node *node, isra_func_summary *ifs)
4256 bool ret = true;
4257 unsigned len = vec_safe_length (ifs->m_parameters);
4258 if (!len)
4259 return true;
4261 auto_vec<bool, 16> surviving_params;
4262 bool check_surviving = false;
4263 clone_info *cinfo = clone_info::get (node);
4264 if (cinfo && cinfo->param_adjustments)
4266 check_surviving = true;
4267 cinfo->param_adjustments->get_surviving_params (&surviving_params);
4269 ipcp_transformation *ipcp_ts = ipcp_get_transformation_summary (node);
4270 auto_vec <unsigned> dump_dead_indices;
4271 auto_vec <unsigned> dump_bad_cond_indices;
4272 for (unsigned i = 0; i < len; i++)
4274 isra_param_desc *desc = &(*ifs->m_parameters)[i];
4275 if (!dbg_cnt (ipa_sra_params))
4277 desc->locally_unused = false;
4278 desc->split_candidate = false;
4279 continue;
4282 if (desc->split_only_when_retval_removed
4283 && !ifs->m_return_ignored)
4285 if (dump_file && (dump_flags & TDF_DETAILS)
4286 && (desc->locally_unused || desc->split_candidate))
4287 dump_bad_cond_indices.safe_push (i);
4289 gcc_checking_assert (!desc->locally_unused
4290 || desc->remove_only_when_retval_removed);
4291 desc->locally_unused = false;
4292 desc->split_candidate = false;
4293 continue;
4295 if (desc->remove_only_when_retval_removed
4296 && !ifs->m_return_ignored)
4298 if (dump_file && (dump_flags & TDF_DETAILS)
4299 && (desc->locally_unused || desc->split_candidate))
4300 dump_bad_cond_indices.safe_push (i);
4302 desc->locally_unused = false;
4304 if (check_surviving
4305 && (i >= surviving_params.length ()
4306 || !surviving_params[i]))
4308 /* Even if the parameter was removed by a previous IPA pass, we do
4309 not clear locally_unused because if it really is unused, this
4310 information might be useful in callers. */
4311 desc->split_candidate = false;
4313 if (dump_file && (dump_flags & TDF_DETAILS))
4314 dump_dead_indices.safe_push (i);
4317 if (desc->split_candidate && desc->conditionally_dereferenceable)
4319 gcc_assert (desc->safe_size_set);
4320 for (param_access *pa : *desc->accesses)
4321 if ((pa->unit_offset + pa->unit_size) > desc->safe_size)
4323 if (dump_file && (dump_flags & TDF_DETAILS))
4324 dump_bad_cond_indices.safe_push (i);
4325 desc->split_candidate = false;
4326 break;
4330 if (desc->split_candidate)
4332 if (desc->by_ref && !desc->not_specially_constructed)
4334 int extra_factor
4335 = opt_for_fn (node->decl,
4336 param_ipa_sra_ptrwrap_growth_factor);
4337 desc->param_size_limit = extra_factor * desc->param_size_limit;
4339 if (size_would_violate_limit_p (desc, desc->size_reached))
4340 desc->split_candidate = false;
4343 /* Avoid ICEs on size-mismatched VIEW_CONVERT_EXPRs when callers and
4344 callees don't agree on types in aggregates and we try to do both
4345 IPA-CP and IPA-SRA. */
4346 if (ipcp_ts && desc->split_candidate)
4348 ipa_argagg_value_list avl (ipcp_ts);
4349 for (const param_access *pa : desc->accesses)
4351 if (!pa->certain)
4352 continue;
4353 tree value = avl.get_value (i, pa->unit_offset);
4354 if (value
4355 && ((tree_to_uhwi (TYPE_SIZE (TREE_TYPE (value)))
4356 / BITS_PER_UNIT)
4357 != pa->unit_size))
4359 desc->split_candidate = false;
4360 if (dump_file && (dump_flags & TDF_DETAILS))
4361 dump_dead_indices.safe_push (i);
4362 break;
4367 if (desc->locally_unused || desc->split_candidate)
4368 ret = false;
4371 dump_list_of_param_indices (node, "are dead on arrival or have a type "
4372 "mismatch with IPA-CP", dump_dead_indices);
4373 dump_list_of_param_indices (node, "fail additional requirements ",
4374 dump_bad_cond_indices);
4376 return ret;
4380 /* Run the interprocedural part of IPA-SRA. */
4382 static unsigned int
4383 ipa_sra_analysis (void)
4385 if (dump_file)
4387 fprintf (dump_file, "\n========== IPA-SRA IPA stage ==========\n");
4388 ipa_sra_dump_all_summaries (dump_file, false);
4391 gcc_checking_assert (func_sums);
4392 gcc_checking_assert (call_sums);
4393 cgraph_node **order = XCNEWVEC (cgraph_node *, symtab->cgraph_count);
4394 auto_vec <cgraph_node *, 16> stack;
4395 int node_scc_count = ipa_reduced_postorder (order, true, NULL);
4397 /* One sweep from callers to callees for return value removal. */
4398 for (int i = node_scc_count - 1; i >= 0 ; i--)
4400 cgraph_node *scc_rep = order[i];
4401 vec<cgraph_node *> cycle_nodes = ipa_get_nodes_in_cycle (scc_rep);
4403 /* Preliminary IPA function level checks. */
4404 for (cgraph_node *v : cycle_nodes)
4406 isra_func_summary *ifs = func_sums->get (v);
4407 if (!ifs || !ifs->m_candidate)
4408 continue;
4409 if (!ipa_sra_ipa_function_checks (v)
4410 || check_all_callers_for_issues (v))
4411 ifs->zap ();
4414 for (cgraph_node *v : cycle_nodes)
4416 isra_func_summary *ifs = func_sums->get (v);
4417 if (!ifs || !ifs->m_candidate)
4418 continue;
4419 bool return_needed
4420 = (ifs->m_returns_value
4421 && (!dbg_cnt (ipa_sra_retvalues)
4422 || v->call_for_symbol_and_aliases (retval_used_p,
4423 NULL, true)));
4424 ifs->m_return_ignored = !return_needed;
4425 if (return_needed)
4426 isra_push_node_to_stack (v, ifs, &stack);
4429 while (!stack.is_empty ())
4431 cgraph_node *node = stack.pop ();
4432 isra_func_summary *ifs = func_sums->get (node);
4433 gcc_checking_assert (ifs && ifs->m_queued);
4434 ifs->m_queued = false;
4436 for (cgraph_edge *cs = node->callees; cs; cs = cs->next_callee)
4437 if (ipa_edge_within_scc (cs)
4438 && call_sums->get (cs)->m_return_returned)
4440 enum availability av;
4441 cgraph_node *callee = cs->callee->function_symbol (&av);
4442 isra_func_summary *to_ifs = func_sums->get (callee);
4443 if (to_ifs && to_ifs->m_return_ignored)
4445 to_ifs->m_return_ignored = false;
4446 isra_push_node_to_stack (callee, to_ifs, &stack);
4451 /* Parameter hint propagation. */
4452 for (cgraph_node *v : cycle_nodes)
4454 isra_func_summary *ifs = func_sums->get (v);
4455 propagate_hints_to_all_callees (v, ifs, &stack);
4458 while (!stack.is_empty ())
4460 cgraph_node *node = stack.pop ();
4461 isra_func_summary *ifs = func_sums->get (node);
4462 gcc_checking_assert (ifs && ifs->m_queued);
4463 ifs->m_queued = false;
4464 propagate_hints_to_all_callees (node, ifs, &stack);
4467 cycle_nodes.release ();
4470 /* One sweep from callees to callers for parameter removal and splitting. */
4471 for (int i = 0; i < node_scc_count; i++)
4473 cgraph_node *scc_rep = order[i];
4474 vec<cgraph_node *> cycle_nodes = ipa_get_nodes_in_cycle (scc_rep);
4476 /* First step of parameter removal. */
4477 for (cgraph_node *v : cycle_nodes)
4479 isra_func_summary *ifs = func_sums->get (v);
4480 if (!ifs || !ifs->m_candidate)
4481 continue;
4482 if (adjust_parameter_descriptions (v, ifs))
4483 continue;
4484 for (cgraph_edge *cs = v->indirect_calls; cs; cs = cs->next_callee)
4485 process_edge_to_unknown_caller (cs);
4486 for (cgraph_edge *cs = v->callees; cs; cs = cs->next_callee)
4487 if (!ipa_edge_within_scc (cs))
4488 param_removal_cross_scc_edge (cs);
4491 /* Look at edges within the current SCC and propagate used-ness across
4492 them, pushing onto the stack all notes which might need to be
4493 revisited. */
4494 for (cgraph_node *v : cycle_nodes)
4495 v->call_for_symbol_thunks_and_aliases (propagate_used_to_scc_callers,
4496 &stack, true);
4498 /* Keep revisiting and pushing until nothing changes. */
4499 while (!stack.is_empty ())
4501 cgraph_node *v = stack.pop ();
4502 isra_func_summary *ifs = func_sums->get (v);
4503 gcc_checking_assert (ifs && ifs->m_queued);
4504 ifs->m_queued = false;
4506 v->call_for_symbol_thunks_and_aliases (propagate_used_to_scc_callers,
4507 &stack, true);
4510 /* Parameter splitting. */
4511 bool repeat_scc_access_propagation;
4514 repeat_scc_access_propagation = false;
4515 for (cgraph_node *v : cycle_nodes)
4517 isra_func_summary *ifs = func_sums->get (v);
4518 if (!ifs
4519 || !ifs->m_candidate
4520 || vec_safe_is_empty (ifs->m_parameters))
4521 continue;
4522 for (cgraph_edge *cs = v->callees; cs; cs = cs->next_callee)
4523 if (param_splitting_across_edge (cs))
4524 repeat_scc_access_propagation = true;
4527 while (repeat_scc_access_propagation);
4529 if (flag_checking)
4530 for (cgraph_node *v : cycle_nodes)
4531 verify_splitting_accesses (v, true);
4533 cycle_nodes.release ();
4536 ipa_free_postorder_info ();
4537 free (order);
4539 if (dump_file)
4541 if (dump_flags & TDF_DETAILS)
4543 fprintf (dump_file, "\n========== IPA-SRA propagation final state "
4544 " ==========\n");
4545 ipa_sra_dump_all_summaries (dump_file, true);
4547 fprintf (dump_file, "\n========== IPA-SRA decisions ==========\n");
4550 hash_map<const char *, unsigned> *clone_num_suffixes
4551 = new hash_map<const char *, unsigned>;
4553 cgraph_node *node;
4554 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
4555 process_isra_node_results (node, clone_num_suffixes);
4557 delete clone_num_suffixes;
4558 ggc_delete (func_sums);
4559 func_sums = NULL;
4560 delete call_sums;
4561 call_sums = NULL;
4563 if (dump_file)
4564 fprintf (dump_file, "\n========== IPA SRA IPA analysis done "
4565 "==========\n\n");
4566 return 0;
4570 const pass_data pass_data_ipa_sra =
4572 IPA_PASS, /* type */
4573 "sra", /* name */
4574 OPTGROUP_NONE, /* optinfo_flags */
4575 TV_IPA_SRA, /* tv_id */
4576 0, /* properties_required */
4577 0, /* properties_provided */
4578 0, /* properties_destroyed */
4579 0, /* todo_flags_start */
4580 ( TODO_dump_symtab | TODO_remove_functions ), /* todo_flags_finish */
4583 class pass_ipa_sra : public ipa_opt_pass_d
4585 public:
4586 pass_ipa_sra (gcc::context *ctxt)
4587 : ipa_opt_pass_d (pass_data_ipa_sra, ctxt,
4588 ipa_sra_generate_summary, /* generate_summary */
4589 ipa_sra_write_summary, /* write_summary */
4590 ipa_sra_read_summary, /* read_summary */
4591 NULL , /* write_optimization_summary */
4592 NULL, /* read_optimization_summary */
4593 NULL, /* stmt_fixup */
4594 0, /* function_transform_todo_flags_start */
4595 NULL, /* function_transform */
4596 NULL) /* variable_transform */
4599 /* opt_pass methods: */
4600 bool gate (function *) final override
4602 /* TODO: We should remove the optimize check after we ensure we never run
4603 IPA passes when not optimizing. */
4604 return (flag_ipa_sra && optimize);
4607 unsigned int execute (function *) final override
4609 return ipa_sra_analysis ();
4612 }; // class pass_ipa_sra
4614 } // anon namespace
4616 /* Intraprocedural part of IPA-SRA analysis. Scan function body of NODE and
4617 create a summary structure describing IPA-SRA opportunities and constraints
4618 in it. */
4620 static void
4621 ipa_sra_summarize_function (cgraph_node *node)
4623 if (dump_file)
4624 fprintf (dump_file, "Creating summary for %s/%i:\n", node->name (),
4625 node->order);
4626 gcc_obstack_init (&gensum_obstack);
4627 loaded_decls = new hash_set<tree>;
4629 isra_func_summary *ifs = NULL;
4630 unsigned count = 0;
4631 if (ipa_sra_preliminary_function_checks (node))
4633 ifs = func_sums->get_create (node);
4634 ifs->m_candidate = true;
4635 tree ret = TREE_TYPE (TREE_TYPE (node->decl));
4636 ifs->m_returns_value = (TREE_CODE (ret) != VOID_TYPE);
4637 for (tree parm = DECL_ARGUMENTS (node->decl);
4638 parm;
4639 parm = DECL_CHAIN (parm))
4640 count++;
4642 auto_vec<gensum_param_desc, 16> param_descriptions (count);
4644 struct function *fun = DECL_STRUCT_FUNCTION (node->decl);
4645 bool cfun_pushed = false;
4646 if (count > 0)
4648 decl2desc = new hash_map<tree, gensum_param_desc *>;
4649 param_descriptions.reserve_exact (count);
4650 param_descriptions.quick_grow_cleared (count);
4652 if (create_parameter_descriptors (node, &param_descriptions))
4654 push_cfun (fun);
4655 cfun_pushed = true;
4656 final_bbs = BITMAP_ALLOC (NULL);
4657 bb_dereferences = XCNEWVEC (HOST_WIDE_INT,
4658 unsafe_by_ref_count
4659 * last_basic_block_for_fn (fun));
4660 aa_walking_limit = opt_for_fn (node->decl, param_ipa_max_aa_steps);
4663 /* Scan function is run even when there are no removal or splitting
4664 candidates so that we can calculate hints on call edges which can be
4665 useful in callees. */
4666 scan_function (node, fun);
4668 if (count > 0)
4670 if (dump_file)
4672 dump_gensum_param_descriptors (dump_file, node->decl,
4673 &param_descriptions);
4674 fprintf (dump_file, "----------------------------------------\n");
4677 process_scan_results (node, fun, ifs, &param_descriptions);
4679 if (cfun_pushed)
4680 pop_cfun ();
4681 if (bb_dereferences)
4683 free (bb_dereferences);
4684 bb_dereferences = NULL;
4685 BITMAP_FREE (final_bbs);
4686 final_bbs = NULL;
4689 isra_analyze_all_outgoing_calls (node);
4691 delete loaded_decls;
4692 loaded_decls = NULL;
4693 if (decl2desc)
4695 delete decl2desc;
4696 decl2desc = NULL;
4698 obstack_free (&gensum_obstack, NULL);
4699 if (dump_file)
4700 fprintf (dump_file, "\n\n");
4701 if (flag_checking)
4702 verify_splitting_accesses (node, false);
4703 return;
4706 ipa_opt_pass_d *
4707 make_pass_ipa_sra (gcc::context *ctxt)
4709 return new pass_ipa_sra (ctxt);
4712 /* Reset all state within ipa-sra.cc so that we can rerun the compiler
4713 within the same process. For use by toplev::finalize. */
4715 void
4716 ipa_sra_cc_finalize (void)
4718 if (func_sums)
4719 ggc_delete (func_sums);
4720 func_sums = NULL;
4721 delete call_sums;
4722 call_sums = NULL;
4725 #include "gt-ipa-sra.h"