compiler: only build thunk struct type when it is needed
[official-gcc.git] / gcc / ipa-sra.cc
blob2237ac6d92f2f37fe6a77d09069ab434fe40d389
1 /* Interprocedural scalar replacement of aggregates
2 Copyright (C) 2019-2022 Free Software Foundation, Inc.
3 Contributed by Martin Jambor <mjambor@suse.cz>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* IPA-SRA is an interprocedural pass that removes unused function return
22 values (turning functions returning a value which is never used into void
23 functions) and removes unused function parameters. It can also replace an
24 aggregate parameter by a set of other parameters representing part of the
25 original, turning those passed by reference into new ones which pass the
26 value directly.
28 The pass is a true IPA one, which means that it works in three stages in
29 order to be able to take advantage of LTO. First, summaries about functions
30 and each calls are generated. Function summaries (often called call graph
31 node summaries) contain mainly information about which parameters are
32 potential transformation candidates and which bits of candidates are
33 accessed. We differentiate between accesses done as a part of a call
34 statement (which might be not necessary if the callee is also transformed)
35 and others (which are mandatory). Call summaries (often called call graph
36 edge summaries) contain information about which function formal parameters
37 feed into which actual call arguments so that if two parameters are only
38 used in a sum which is then passed to another function which then however
39 does not use this parameter, all three parameters of the two functions can
40 be eliminated. Edge summaries also have flags whether the return value is
41 used or if it is only returned in the caller too. In LTO mode these
42 summaries are then streamed to the object file in the compilation phase and
43 streamed back in in the WPA analysis stage.
45 The interprocedural analysis phase traverses the graph in topological order
46 in two sweeps, one in each direction. First, from callees to callers for
47 parameter removal and splitting. Each strongly-connected component is
48 processed iteratively until the situation in it stabilizes. The pass from
49 callers to callees is then carried out to remove unused return values in a
50 very similar fashion.
52 Because parameter manipulation has big implications for call redirection
53 which is done only after all call graph nodes materialize, the
54 transformation phase is not part of this patch but is carried out by the
55 clone materialization and edge redirection itself, see comments in
56 ipa-param-manipulation.h for more details. */
59 #include "config.h"
60 #include "system.h"
61 #include "coretypes.h"
62 #include "backend.h"
63 #include "tree.h"
64 #include "gimple.h"
65 #include "predict.h"
66 #include "tree-pass.h"
67 #include "ssa.h"
68 #include "cgraph.h"
69 #include "gimple-pretty-print.h"
70 #include "alias.h"
71 #include "tree-eh.h"
72 #include "gimple-iterator.h"
73 #include "gimple-walk.h"
74 #include "tree-dfa.h"
75 #include "tree-sra.h"
76 #include "alloc-pool.h"
77 #include "symbol-summary.h"
78 #include "dbgcnt.h"
79 #include "tree-inline.h"
80 #include "ipa-utils.h"
81 #include "builtins.h"
82 #include "cfganal.h"
83 #include "tree-streamer.h"
84 #include "internal-fn.h"
85 #include "symtab-clones.h"
86 #include "attribs.h"
88 static void ipa_sra_summarize_function (cgraph_node *);
90 /* Bits used to track size of an aggregate in bytes interprocedurally. */
91 #define ISRA_ARG_SIZE_LIMIT_BITS 16
92 #define ISRA_ARG_SIZE_LIMIT (1 << ISRA_ARG_SIZE_LIMIT_BITS)
93 /* How many parameters can feed into a call actual argument and still be
94 tracked. */
95 #define IPA_SRA_MAX_PARAM_FLOW_LEN 7
97 /* Structure describing accesses to a specific portion of an aggregate
98 parameter, as given by the offset and size. Any smaller accesses that occur
99 within a function that fall within another access form a tree. The pass
100 cannot analyze parameters with only partially overlapping accesses. */
102 struct GTY(()) param_access
104 /* Type that a potential replacement should have. This field only has
105 meaning in the summary building and transformation phases, when it is
106 reconstructed from the body. Must not be touched in IPA analysis
107 stage. */
108 tree type;
110 /* Alias reference type to be used in MEM_REFs when adjusting caller
111 arguments. */
112 tree alias_ptr_type;
114 /* Values returned by get_ref_base_and_extent but converted to bytes and
115 stored as unsigned ints. */
116 unsigned unit_offset;
117 unsigned unit_size : ISRA_ARG_SIZE_LIMIT_BITS;
119 /* Set once we are sure that the access will really end up in a potentially
120 transformed function - initially not set for portions of formal parameters
121 that are only used as actual function arguments passed to callees. */
122 unsigned certain : 1;
123 /* Set if the access has reverse scalar storage order. */
124 unsigned reverse : 1;
127 /* This structure has the same purpose as the one above and additionally it
128 contains some fields that are only necessary in the summary generation
129 phase. */
131 struct gensum_param_access
133 /* Values returned by get_ref_base_and_extent. */
134 HOST_WIDE_INT offset;
135 HOST_WIDE_INT size;
137 /* if this access has any children (in terms of the definition above), this
138 points to the first one. */
139 struct gensum_param_access *first_child;
140 /* In intraprocedural SRA, pointer to the next sibling in the access tree as
141 described above. */
142 struct gensum_param_access *next_sibling;
144 /* Type that a potential replacement should have. This field only has
145 meaning in the summary building and transformation phases, when it is
146 reconstructed from the body. Must not be touched in IPA analysis
147 stage. */
148 tree type;
149 /* Alias reference type to be used in MEM_REFs when adjusting caller
150 arguments. */
151 tree alias_ptr_type;
153 /* Have there been writes to or reads from this exact location except for as
154 arguments to a function call that can be tracked. */
155 bool nonarg;
157 /* Set if the access has reverse scalar storage order. */
158 bool reverse;
161 /* Summary describing a parameter in the IPA stages. */
163 struct GTY(()) isra_param_desc
165 /* List of access representatives to the parameters, sorted according to
166 their offset. */
167 vec <param_access *, va_gc> *accesses;
169 /* Unit size limit of total size of all replacements. */
170 unsigned param_size_limit : ISRA_ARG_SIZE_LIMIT_BITS;
171 /* Sum of unit sizes of all certain replacements. */
172 unsigned size_reached : ISRA_ARG_SIZE_LIMIT_BITS;
174 /* A parameter that is used only in call arguments and can be removed if all
175 concerned actual arguments are removed. */
176 unsigned locally_unused : 1;
177 /* An aggregate that is a candidate for breaking up or complete removal. */
178 unsigned split_candidate : 1;
179 /* Is this a parameter passing stuff by reference? */
180 unsigned by_ref : 1;
183 /* Structure used when generating summaries that describes a parameter. */
185 struct gensum_param_desc
187 /* Roots of param_accesses. */
188 gensum_param_access *accesses;
189 /* Number of accesses in the access tree rooted in field accesses. */
190 unsigned access_count;
192 /* If the below is non-zero, this is the number of uses as actual arguments. */
193 int call_uses;
194 /* Number of times this parameter has been directly passed to. */
195 unsigned ptr_pt_count;
197 /* Size limit of total size of all replacements. */
198 unsigned param_size_limit;
199 /* Sum of sizes of nonarg accesses. */
200 unsigned nonarg_acc_size;
202 /* A parameter that is used only in call arguments and can be removed if all
203 concerned actual arguments are removed. */
204 bool locally_unused;
205 /* An aggregate that is a candidate for breaking up or a pointer passing data
206 by reference that is a candidate for being converted to a set of
207 parameters passing those data by value. */
208 bool split_candidate;
209 /* Is this a parameter passing stuff by reference? */
210 bool by_ref;
212 /* The number of this parameter as they are ordered in function decl. */
213 int param_number;
214 /* For parameters passing data by reference, this is parameter index to
215 compute indices to bb_dereferences. */
216 int deref_index;
219 /* Properly deallocate accesses of DESC. TODO: Since this data structure is
220 allocated in GC memory, this is not necessary and we can consider removing
221 the function. */
223 static void
224 free_param_decl_accesses (isra_param_desc *desc)
226 unsigned len = vec_safe_length (desc->accesses);
227 for (unsigned i = 0; i < len; ++i)
228 ggc_free ((*desc->accesses)[i]);
229 vec_free (desc->accesses);
232 /* Class used to convey information about functions from the
233 intra-procedural analysis stage to inter-procedural one. */
235 class GTY((for_user)) isra_func_summary
237 public:
238 /* initialize the object. */
240 isra_func_summary ()
241 : m_parameters (NULL), m_candidate (false), m_returns_value (false),
242 m_return_ignored (false), m_queued (false)
245 /* Destroy m_parameters. */
247 ~isra_func_summary ();
249 /* Mark the function as not a candidate for any IPA-SRA transformation.
250 Return true if it was a candidate until now. */
252 bool zap ();
254 /* Vector of parameter descriptors corresponding to the function being
255 analyzed. */
256 vec<isra_param_desc, va_gc> *m_parameters;
258 /* Whether the node is even a candidate for any IPA-SRA transformation at
259 all. */
260 unsigned m_candidate : 1;
262 /* Whether the original function returns any value. */
263 unsigned m_returns_value : 1;
265 /* Set to true if all call statements do not actually use the returned
266 value. */
268 unsigned m_return_ignored : 1;
270 /* Whether the node is already queued in IPA SRA stack during processing of
271 call graphs SCCs. */
273 unsigned m_queued : 1;
276 /* Deallocate the memory pointed to by isra_func_summary. TODO: Since this
277 data structure is allocated in GC memory, this is not necessary and we can
278 consider removing the destructor. */
280 isra_func_summary::~isra_func_summary ()
282 unsigned len = vec_safe_length (m_parameters);
283 for (unsigned i = 0; i < len; ++i)
284 free_param_decl_accesses (&(*m_parameters)[i]);
285 vec_free (m_parameters);
288 /* Mark the function as not a candidate for any IPA-SRA transformation. Return
289 true if it was a candidate until now. */
291 bool
292 isra_func_summary::zap ()
294 bool ret = m_candidate;
295 m_candidate = false;
297 /* TODO: see the destructor above. */
298 unsigned len = vec_safe_length (m_parameters);
299 for (unsigned i = 0; i < len; ++i)
300 free_param_decl_accesses (&(*m_parameters)[i]);
301 vec_free (m_parameters);
303 return ret;
306 /* Structure to describe which formal parameters feed into a particular actual
307 argument. */
309 struct isra_param_flow
311 /* Number of elements in array inputs that contain valid data. */
312 char length;
313 /* Indices of formal parameters that feed into the described actual argument.
314 If aggregate_pass_through or pointer_pass_through below are true, it must
315 contain exactly one element which is passed through from a formal
316 parameter if the given number. Otherwise, the array contains indices of
317 callee's formal parameters which are used to calculate value of this
318 actual argument. */
319 unsigned char inputs[IPA_SRA_MAX_PARAM_FLOW_LEN];
321 /* Offset within the formal parameter. */
322 unsigned unit_offset;
323 /* Size of the portion of the formal parameter that is being passed. */
324 unsigned unit_size : ISRA_ARG_SIZE_LIMIT_BITS;
326 /* True when the value of this actual copy is a portion of a formal
327 parameter. */
328 unsigned aggregate_pass_through : 1;
329 /* True when the value of this actual copy is a verbatim pass through of an
330 obtained pointer. */
331 unsigned pointer_pass_through : 1;
332 /* True when it is safe to copy access candidates here from the callee, which
333 would mean introducing dereferences into callers of the caller. */
334 unsigned safe_to_import_accesses : 1;
337 /* Structure used to convey information about calls from the intra-procedural
338 analysis stage to inter-procedural one. */
340 class isra_call_summary
342 public:
343 isra_call_summary ()
344 : m_arg_flow (), m_return_ignored (false), m_return_returned (false),
345 m_bit_aligned_arg (false), m_before_any_store (false)
348 void init_inputs (unsigned arg_count);
349 void dump (FILE *f);
351 /* Information about what formal parameters of the caller are used to compute
352 individual actual arguments of this call. */
353 auto_vec <isra_param_flow> m_arg_flow;
355 /* Set to true if the call statement does not have a LHS. */
356 unsigned m_return_ignored : 1;
358 /* Set to true if the LHS of call statement is only used to construct the
359 return value of the caller. */
360 unsigned m_return_returned : 1;
362 /* Set when any of the call arguments are not byte-aligned. */
363 unsigned m_bit_aligned_arg : 1;
365 /* Set to true if the call happend before any (other) store to memory in the
366 caller. */
367 unsigned m_before_any_store : 1;
370 /* Class to manage function summaries. */
372 class GTY((user)) ipa_sra_function_summaries
373 : public function_summary <isra_func_summary *>
375 public:
376 ipa_sra_function_summaries (symbol_table *table, bool ggc):
377 function_summary<isra_func_summary *> (table, ggc) { }
379 void duplicate (cgraph_node *, cgraph_node *,
380 isra_func_summary *old_sum,
381 isra_func_summary *new_sum) final override;
382 void insert (cgraph_node *, isra_func_summary *) final override;
385 /* Hook that is called by summary when a node is duplicated. */
387 void
388 ipa_sra_function_summaries::duplicate (cgraph_node *, cgraph_node *,
389 isra_func_summary *old_sum,
390 isra_func_summary *new_sum)
392 /* TODO: Somehow stop copying when ISRA is doing the cloning, it is
393 useless. */
394 new_sum->m_candidate = old_sum->m_candidate;
395 new_sum->m_returns_value = old_sum->m_returns_value;
396 new_sum->m_return_ignored = old_sum->m_return_ignored;
397 gcc_assert (!old_sum->m_queued);
398 new_sum->m_queued = false;
400 unsigned param_count = vec_safe_length (old_sum->m_parameters);
401 if (!param_count)
402 return;
403 vec_safe_reserve_exact (new_sum->m_parameters, param_count);
404 new_sum->m_parameters->quick_grow_cleared (param_count);
405 for (unsigned i = 0; i < param_count; i++)
407 isra_param_desc *s = &(*old_sum->m_parameters)[i];
408 isra_param_desc *d = &(*new_sum->m_parameters)[i];
410 d->param_size_limit = s->param_size_limit;
411 d->size_reached = s->size_reached;
412 d->locally_unused = s->locally_unused;
413 d->split_candidate = s->split_candidate;
414 d->by_ref = s->by_ref;
416 unsigned acc_count = vec_safe_length (s->accesses);
417 vec_safe_reserve_exact (d->accesses, acc_count);
418 for (unsigned j = 0; j < acc_count; j++)
420 param_access *from = (*s->accesses)[j];
421 param_access *to = ggc_cleared_alloc<param_access> ();
422 to->type = from->type;
423 to->alias_ptr_type = from->alias_ptr_type;
424 to->unit_offset = from->unit_offset;
425 to->unit_size = from->unit_size;
426 to->certain = from->certain;
427 to->reverse = from->reverse;
428 d->accesses->quick_push (to);
433 /* Pointer to the pass function summary holder. */
435 static GTY(()) ipa_sra_function_summaries *func_sums;
437 /* Hook that is called by summary when new node appears. */
439 void
440 ipa_sra_function_summaries::insert (cgraph_node *node, isra_func_summary *)
442 if (opt_for_fn (node->decl, flag_ipa_sra))
444 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
445 ipa_sra_summarize_function (node);
446 pop_cfun ();
448 else
449 func_sums->remove (node);
452 /* Class to manage call summaries. */
454 class ipa_sra_call_summaries: public call_summary <isra_call_summary *>
456 public:
457 ipa_sra_call_summaries (symbol_table *table):
458 call_summary<isra_call_summary *> (table) { }
460 /* Duplicate info when an edge is cloned. */
461 void duplicate (cgraph_edge *, cgraph_edge *,
462 isra_call_summary *old_sum,
463 isra_call_summary *new_sum) final override;
466 static ipa_sra_call_summaries *call_sums;
469 /* Initialize m_arg_flow of a particular instance of isra_call_summary.
470 ARG_COUNT is the number of actual arguments passed. */
472 void
473 isra_call_summary::init_inputs (unsigned arg_count)
475 if (arg_count == 0)
477 gcc_checking_assert (m_arg_flow.length () == 0);
478 return;
480 if (m_arg_flow.length () == 0)
482 m_arg_flow.reserve_exact (arg_count);
483 m_arg_flow.quick_grow_cleared (arg_count);
485 else
486 gcc_checking_assert (arg_count == m_arg_flow.length ());
489 /* Dump all information in call summary to F. */
491 void
492 isra_call_summary::dump (FILE *f)
494 if (m_return_ignored)
495 fprintf (f, " return value ignored\n");
496 if (m_return_returned)
497 fprintf (f, " return value used only to compute caller return value\n");
498 if (m_before_any_store)
499 fprintf (f, " happens before any store to memory\n");
500 for (unsigned i = 0; i < m_arg_flow.length (); i++)
502 fprintf (f, " Parameter %u:\n", i);
503 isra_param_flow *ipf = &m_arg_flow[i];
505 if (ipf->length)
507 bool first = true;
508 fprintf (f, " Scalar param sources: ");
509 for (int j = 0; j < ipf->length; j++)
511 if (!first)
512 fprintf (f, ", ");
513 else
514 first = false;
515 fprintf (f, "%i", (int) ipf->inputs[j]);
517 fprintf (f, "\n");
519 if (ipf->aggregate_pass_through)
520 fprintf (f, " Aggregate pass through from the param given above, "
521 "unit offset: %u , unit size: %u\n",
522 ipf->unit_offset, ipf->unit_size);
523 if (ipf->pointer_pass_through)
524 fprintf (f, " Pointer pass through from the param given above, "
525 "safe_to_import_accesses: %u\n", ipf->safe_to_import_accesses);
529 /* Duplicate edge summary when an edge is cloned. */
531 void
532 ipa_sra_call_summaries::duplicate (cgraph_edge *, cgraph_edge *,
533 isra_call_summary *old_sum,
534 isra_call_summary *new_sum)
536 unsigned arg_count = old_sum->m_arg_flow.length ();
537 new_sum->init_inputs (arg_count);
538 for (unsigned i = 0; i < arg_count; i++)
539 new_sum->m_arg_flow[i] = old_sum->m_arg_flow[i];
541 new_sum->m_return_ignored = old_sum->m_return_ignored;
542 new_sum->m_return_returned = old_sum->m_return_returned;
543 new_sum->m_bit_aligned_arg = old_sum->m_bit_aligned_arg;
544 new_sum->m_before_any_store = old_sum->m_before_any_store;
548 /* With all GTY stuff done, we can move to anonymous namespace. */
549 namespace {
550 /* Quick mapping from a decl to its param descriptor. */
552 hash_map<tree, gensum_param_desc *> *decl2desc;
554 /* Countdown of allowed Alias Analysis steps during summary building. */
556 int aa_walking_limit;
558 /* This is a table in which for each basic block and parameter there is a
559 distance (offset + size) in that parameter which is dereferenced and
560 accessed in that BB. */
561 HOST_WIDE_INT *bb_dereferences = NULL;
562 /* How many by-reference parameters there are in the current function. */
563 int by_ref_count;
565 /* Bitmap of BBs that can cause the function to "stop" progressing by
566 returning, throwing externally, looping infinitely or calling a function
567 which might abort etc.. */
568 bitmap final_bbs;
570 /* Obstack to allocate various small structures required only when generating
571 summary for a function. */
572 struct obstack gensum_obstack;
574 /* Return false the function is apparently unsuitable for IPA-SRA based on it's
575 attributes, return true otherwise. NODE is the cgraph node of the current
576 function. */
578 static bool
579 ipa_sra_preliminary_function_checks (cgraph_node *node)
581 if (!node->can_change_signature)
583 if (dump_file)
584 fprintf (dump_file, "Function cannot change signature.\n");
585 return false;
588 if (!tree_versionable_function_p (node->decl))
590 if (dump_file)
591 fprintf (dump_file, "Function is not versionable.\n");
592 return false;
595 if (!opt_for_fn (node->decl, optimize)
596 || !opt_for_fn (node->decl, flag_ipa_sra))
598 if (dump_file)
599 fprintf (dump_file, "Not optimizing or IPA-SRA turned off for this "
600 "function.\n");
601 return false;
604 if (DECL_VIRTUAL_P (node->decl))
606 if (dump_file)
607 fprintf (dump_file, "Function is a virtual method.\n");
608 return false;
611 struct function *fun = DECL_STRUCT_FUNCTION (node->decl);
612 if (fun->stdarg)
614 if (dump_file)
615 fprintf (dump_file, "Function uses stdarg. \n");
616 return false;
619 if (DECL_DISREGARD_INLINE_LIMITS (node->decl))
621 if (dump_file)
622 fprintf (dump_file, "Always inline function will be inlined "
623 "anyway. \n");
624 return false;
627 return true;
630 /* Print access tree starting at ACCESS to F. */
632 static void
633 dump_gensum_access (FILE *f, gensum_param_access *access, unsigned indent)
635 fprintf (f, " ");
636 for (unsigned i = 0; i < indent; i++)
637 fprintf (f, " ");
638 fprintf (f, " * Access to offset: " HOST_WIDE_INT_PRINT_DEC,
639 access->offset);
640 fprintf (f, ", size: " HOST_WIDE_INT_PRINT_DEC, access->size);
641 fprintf (f, ", type: ");
642 print_generic_expr (f, access->type);
643 fprintf (f, ", alias_ptr_type: ");
644 print_generic_expr (f, access->alias_ptr_type);
645 fprintf (f, ", nonarg: %u, reverse: %u\n", access->nonarg, access->reverse);
646 for (gensum_param_access *ch = access->first_child;
648 ch = ch->next_sibling)
649 dump_gensum_access (f, ch, indent + 2);
653 /* Print access tree starting at ACCESS to F. */
655 static void
656 dump_isra_access (FILE *f, param_access *access)
658 fprintf (f, " * Access to unit offset: %u", access->unit_offset);
659 fprintf (f, ", unit size: %u", access->unit_size);
660 fprintf (f, ", type: ");
661 print_generic_expr (f, access->type);
662 fprintf (f, ", alias_ptr_type: ");
663 print_generic_expr (f, access->alias_ptr_type);
664 if (access->certain)
665 fprintf (f, ", certain");
666 else
667 fprintf (f, ", not certain");
668 if (access->reverse)
669 fprintf (f, ", reverse");
670 fprintf (f, "\n");
673 /* Dump access tree starting at ACCESS to stderr. */
675 DEBUG_FUNCTION void
676 debug_isra_access (param_access *access)
678 dump_isra_access (stderr, access);
681 /* Dump DESC to F. */
683 static void
684 dump_gensum_param_descriptor (FILE *f, gensum_param_desc *desc)
686 if (desc->locally_unused)
687 fprintf (f, " unused with %i call_uses\n", desc->call_uses);
688 if (!desc->split_candidate)
690 fprintf (f, " not a candidate\n");
691 return;
693 if (desc->by_ref)
694 fprintf (f, " by_ref with %u pass throughs\n", desc->ptr_pt_count);
696 for (gensum_param_access *acc = desc->accesses; acc; acc = acc->next_sibling)
697 dump_gensum_access (f, acc, 2);
700 /* Dump all parameter descriptors in IFS, assuming it describes FNDECL, to
701 F. */
703 static void
704 dump_gensum_param_descriptors (FILE *f, tree fndecl,
705 vec<gensum_param_desc> *param_descriptions)
707 tree parm = DECL_ARGUMENTS (fndecl);
708 for (unsigned i = 0;
709 i < param_descriptions->length ();
710 ++i, parm = DECL_CHAIN (parm))
712 fprintf (f, " Descriptor for parameter %i ", i);
713 print_generic_expr (f, parm, TDF_UID);
714 fprintf (f, "\n");
715 dump_gensum_param_descriptor (f, &(*param_descriptions)[i]);
720 /* Dump DESC to F. */
722 static void
723 dump_isra_param_descriptor (FILE *f, isra_param_desc *desc)
725 if (desc->locally_unused)
727 fprintf (f, " (locally) unused\n");
729 if (!desc->split_candidate)
731 fprintf (f, " not a candidate for splitting\n");
732 return;
734 fprintf (f, " param_size_limit: %u, size_reached: %u%s\n",
735 desc->param_size_limit, desc->size_reached,
736 desc->by_ref ? ", by_ref" : "");
738 for (unsigned i = 0; i < vec_safe_length (desc->accesses); ++i)
740 param_access *access = (*desc->accesses)[i];
741 dump_isra_access (f, access);
745 /* Dump all parameter descriptors in IFS, assuming it describes FNDECL, to
746 F. */
748 static void
749 dump_isra_param_descriptors (FILE *f, tree fndecl,
750 isra_func_summary *ifs)
752 tree parm = DECL_ARGUMENTS (fndecl);
753 if (!ifs->m_parameters)
755 fprintf (f, " parameter descriptors not available\n");
756 return;
759 for (unsigned i = 0;
760 i < ifs->m_parameters->length ();
761 ++i, parm = DECL_CHAIN (parm))
763 fprintf (f, " Descriptor for parameter %i ", i);
764 print_generic_expr (f, parm, TDF_UID);
765 fprintf (f, "\n");
766 dump_isra_param_descriptor (f, &(*ifs->m_parameters)[i]);
770 /* Add SRC to inputs of PARAM_FLOW, unless it would exceed storage. If the
771 function fails return false, otherwise return true. SRC must fit into an
772 unsigned char. Used for purposes of transitive unused parameter
773 removal. */
775 static bool
776 add_src_to_param_flow (isra_param_flow *param_flow, int src)
778 gcc_checking_assert (src >= 0 && src <= UCHAR_MAX);
779 if (param_flow->length == IPA_SRA_MAX_PARAM_FLOW_LEN)
780 return false;
782 param_flow->inputs[(int) param_flow->length] = src;
783 param_flow->length++;
784 return true;
787 /* Add a SRC to the inputs of PARAM_FLOW unless it is already there and assert
788 it is the only input. Used for purposes of transitive parameter
789 splitting. */
791 static void
792 set_single_param_flow_source (isra_param_flow *param_flow, int src)
794 gcc_checking_assert (src >= 0 && src <= UCHAR_MAX);
795 if (param_flow->length == 0)
797 param_flow->inputs[0] = src;
798 param_flow->length = 1;
800 else if (param_flow->length == 1)
801 gcc_assert (param_flow->inputs[0] == src);
802 else
803 gcc_unreachable ();
806 /* Assert that there is only a single value in PARAM_FLOW's inputs and return
807 it. */
809 static unsigned
810 get_single_param_flow_source (const isra_param_flow *param_flow)
812 gcc_assert (param_flow->length == 1);
813 return param_flow->inputs[0];
816 /* Inspect all uses of NAME and simple arithmetic calculations involving NAME
817 in FUN represented with NODE and return a negative number if any of them is
818 used for something else than either an actual call argument, simple
819 arithmetic operation or debug statement. If there are no such uses, return
820 the number of actual arguments that this parameter eventually feeds to (or
821 zero if there is none). For any such parameter, mark PARM_NUM as one of its
822 sources. ANALYZED is a bitmap that tracks which SSA names we have already
823 started investigating. */
825 static int
826 isra_track_scalar_value_uses (function *fun, cgraph_node *node, tree name,
827 int parm_num, bitmap analyzed)
829 int res = 0;
830 imm_use_iterator imm_iter;
831 gimple *stmt;
833 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, name)
835 if (is_gimple_debug (stmt))
836 continue;
838 /* TODO: We could handle at least const builtin functions like arithmetic
839 operations below. */
840 if (is_gimple_call (stmt))
842 int all_uses = 0;
843 use_operand_p use_p;
844 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
845 all_uses++;
847 gcall *call = as_a <gcall *> (stmt);
848 unsigned arg_count;
849 if (gimple_call_internal_p (call)
850 || (arg_count = gimple_call_num_args (call)) == 0)
852 res = -1;
853 break;
856 cgraph_edge *cs = node->get_edge (stmt);
857 gcc_checking_assert (cs);
858 isra_call_summary *csum = call_sums->get_create (cs);
859 csum->init_inputs (arg_count);
861 int simple_uses = 0;
862 for (unsigned i = 0; i < arg_count; i++)
863 if (gimple_call_arg (call, i) == name)
865 if (!add_src_to_param_flow (&csum->m_arg_flow[i], parm_num))
867 simple_uses = -1;
868 break;
870 simple_uses++;
873 if (simple_uses < 0
874 || all_uses != simple_uses)
876 res = -1;
877 break;
879 res += all_uses;
881 else if (!stmt_unremovable_because_of_non_call_eh_p (fun, stmt)
882 && ((is_gimple_assign (stmt) && !gimple_has_volatile_ops (stmt))
883 || gimple_code (stmt) == GIMPLE_PHI))
885 tree lhs;
886 if (gimple_code (stmt) == GIMPLE_PHI)
887 lhs = gimple_phi_result (stmt);
888 else
889 lhs = gimple_assign_lhs (stmt);
891 if (TREE_CODE (lhs) != SSA_NAME)
893 res = -1;
894 break;
896 gcc_assert (!gimple_vdef (stmt));
897 if (bitmap_set_bit (analyzed, SSA_NAME_VERSION (lhs)))
899 int tmp = isra_track_scalar_value_uses (fun, node, lhs, parm_num,
900 analyzed);
901 if (tmp < 0)
903 res = tmp;
904 break;
906 res += tmp;
909 else
911 res = -1;
912 break;
915 return res;
918 /* Inspect all uses of PARM, which must be a gimple register, in FUN (which is
919 also described by NODE) and simple arithmetic calculations involving PARM
920 and return false if any of them is used for something else than either an
921 actual call argument, simple arithmetic operation or debug statement. If
922 there are no such uses, return true and store the number of actual arguments
923 that this parameter eventually feeds to (or zero if there is none) to
924 *CALL_USES_P. For any such parameter, mark PARM_NUM as one of its
925 sources.
927 This function is similar to ptr_parm_has_nonarg_uses but its results are
928 meant for unused parameter removal, as opposed to splitting of parameters
929 passed by reference or converting them to passed by value. */
931 static bool
932 isra_track_scalar_param_local_uses (function *fun, cgraph_node *node, tree parm,
933 int parm_num, int *call_uses_p)
935 gcc_checking_assert (is_gimple_reg (parm));
937 tree name = ssa_default_def (fun, parm);
938 if (!name || has_zero_uses (name))
940 *call_uses_p = 0;
941 return false;
944 /* Edge summaries can only handle callers with fewer than 256 parameters. */
945 if (parm_num > UCHAR_MAX)
946 return true;
948 bitmap analyzed = BITMAP_ALLOC (NULL);
949 int call_uses = isra_track_scalar_value_uses (fun, node, name, parm_num,
950 analyzed);
951 BITMAP_FREE (analyzed);
952 if (call_uses < 0)
953 return true;
954 *call_uses_p = call_uses;
955 return false;
958 /* Scan immediate uses of a default definition SSA name of a parameter PARM and
959 examine whether there are any nonarg uses that are not actual arguments or
960 otherwise infeasible uses. If so, return true, otherwise return false.
961 Create pass-through IPA flow records for any direct uses as argument calls
962 and if returning false, store their number into *PT_COUNT_P. NODE and FUN
963 must represent the function that is currently analyzed, PARM_NUM must be the
964 index of the analyzed parameter.
966 This function is similar to isra_track_scalar_param_local_uses but its
967 results are meant for splitting of parameters passed by reference or turning
968 them into bits passed by value, as opposed to generic unused parameter
969 removal. */
971 static bool
972 ptr_parm_has_nonarg_uses (cgraph_node *node, function *fun, tree parm,
973 int parm_num, unsigned *pt_count_p)
975 imm_use_iterator ui;
976 gimple *stmt;
977 tree name = ssa_default_def (fun, parm);
978 bool ret = false;
979 unsigned pt_count = 0;
981 if (!name || has_zero_uses (name))
982 return false;
984 /* Edge summaries can only handle callers with fewer than 256 parameters. */
985 if (parm_num > UCHAR_MAX)
986 return true;
988 FOR_EACH_IMM_USE_STMT (stmt, ui, name)
990 unsigned uses_ok = 0;
991 use_operand_p use_p;
993 if (is_gimple_debug (stmt))
994 continue;
996 if (gimple_assign_single_p (stmt))
998 tree rhs = gimple_assign_rhs1 (stmt);
999 if (!TREE_THIS_VOLATILE (rhs))
1001 while (handled_component_p (rhs))
1002 rhs = TREE_OPERAND (rhs, 0);
1003 if (TREE_CODE (rhs) == MEM_REF
1004 && TREE_OPERAND (rhs, 0) == name
1005 && integer_zerop (TREE_OPERAND (rhs, 1))
1006 && types_compatible_p (TREE_TYPE (rhs),
1007 TREE_TYPE (TREE_TYPE (name))))
1008 uses_ok++;
1011 else if (is_gimple_call (stmt))
1013 gcall *call = as_a <gcall *> (stmt);
1014 unsigned arg_count;
1015 if (gimple_call_internal_p (call)
1016 || (arg_count = gimple_call_num_args (call)) == 0)
1018 ret = true;
1019 break;
1022 cgraph_edge *cs = node->get_edge (stmt);
1023 gcc_checking_assert (cs);
1024 isra_call_summary *csum = call_sums->get_create (cs);
1025 csum->init_inputs (arg_count);
1027 for (unsigned i = 0; i < arg_count; ++i)
1029 tree arg = gimple_call_arg (stmt, i);
1031 if (arg == name)
1033 /* TODO: Allow &MEM_REF[name + offset] here,
1034 ipa_param_body_adjustments::modify_call_stmt has to be
1035 adjusted too. */
1036 csum->m_arg_flow[i].pointer_pass_through = true;
1037 set_single_param_flow_source (&csum->m_arg_flow[i], parm_num);
1038 pt_count++;
1039 uses_ok++;
1040 continue;
1043 if (!TREE_THIS_VOLATILE (arg))
1045 while (handled_component_p (arg))
1046 arg = TREE_OPERAND (arg, 0);
1047 if (TREE_CODE (arg) == MEM_REF
1048 && TREE_OPERAND (arg, 0) == name
1049 && integer_zerop (TREE_OPERAND (arg, 1))
1050 && types_compatible_p (TREE_TYPE (arg),
1051 TREE_TYPE (TREE_TYPE (name))))
1052 uses_ok++;
1057 /* If the number of valid uses does not match the number of
1058 uses in this stmt there is an unhandled use. */
1059 unsigned all_uses = 0;
1060 FOR_EACH_IMM_USE_ON_STMT (use_p, ui)
1061 all_uses++;
1063 gcc_checking_assert (uses_ok <= all_uses);
1064 if (uses_ok != all_uses)
1066 ret = true;
1067 break;
1071 *pt_count_p = pt_count;
1072 return ret;
1075 /* Initialize vector of parameter descriptors of NODE. Return true if there
1076 are any candidates for splitting or unused aggregate parameter removal (the
1077 function may return false if there are candidates for removal of register
1078 parameters) and function body must be scanned. */
1080 static bool
1081 create_parameter_descriptors (cgraph_node *node,
1082 vec<gensum_param_desc> *param_descriptions)
1084 function *fun = DECL_STRUCT_FUNCTION (node->decl);
1085 bool ret = false;
1087 int num = 0;
1088 for (tree parm = DECL_ARGUMENTS (node->decl);
1089 parm;
1090 parm = DECL_CHAIN (parm), num++)
1092 const char *msg;
1093 gensum_param_desc *desc = &(*param_descriptions)[num];
1094 /* param_descriptions vector is grown cleared in the caller. */
1095 desc->param_number = num;
1096 decl2desc->put (parm, desc);
1098 if (dump_file && (dump_flags & TDF_DETAILS))
1099 print_generic_expr (dump_file, parm, TDF_UID);
1101 int scalar_call_uses;
1102 tree type = TREE_TYPE (parm);
1103 if (TREE_THIS_VOLATILE (parm))
1105 if (dump_file && (dump_flags & TDF_DETAILS))
1106 fprintf (dump_file, " not a candidate, is volatile\n");
1107 continue;
1109 if (!is_gimple_reg_type (type) && is_va_list_type (type))
1111 if (dump_file && (dump_flags & TDF_DETAILS))
1112 fprintf (dump_file, " not a candidate, is a va_list type\n");
1113 continue;
1115 if (TREE_ADDRESSABLE (parm))
1117 if (dump_file && (dump_flags & TDF_DETAILS))
1118 fprintf (dump_file, " not a candidate, is addressable\n");
1119 continue;
1121 if (TREE_ADDRESSABLE (type))
1123 if (dump_file && (dump_flags & TDF_DETAILS))
1124 fprintf (dump_file, " not a candidate, type cannot be split\n");
1125 continue;
1128 if (is_gimple_reg (parm)
1129 && !isra_track_scalar_param_local_uses (fun, node, parm, num,
1130 &scalar_call_uses))
1132 if (dump_file && (dump_flags & TDF_DETAILS))
1133 fprintf (dump_file, " is a scalar with only %i call uses\n",
1134 scalar_call_uses);
1136 desc->locally_unused = true;
1137 desc->call_uses = scalar_call_uses;
1140 if (POINTER_TYPE_P (type))
1142 type = TREE_TYPE (type);
1144 if (TREE_CODE (type) == FUNCTION_TYPE
1145 || TREE_CODE (type) == METHOD_TYPE)
1147 if (dump_file && (dump_flags & TDF_DETAILS))
1148 fprintf (dump_file, " not a candidate, reference to "
1149 "a function\n");
1150 continue;
1152 if (TYPE_VOLATILE (type))
1154 if (dump_file && (dump_flags & TDF_DETAILS))
1155 fprintf (dump_file, " not a candidate, reference to "
1156 "a volatile type\n");
1157 continue;
1159 if (TREE_CODE (type) == ARRAY_TYPE
1160 && TYPE_NONALIASED_COMPONENT (type))
1162 if (dump_file && (dump_flags & TDF_DETAILS))
1163 fprintf (dump_file, " not a candidate, reference to "
1164 "a nonaliased component array\n");
1165 continue;
1167 if (!is_gimple_reg (parm))
1169 if (dump_file && (dump_flags & TDF_DETAILS))
1170 fprintf (dump_file, " not a candidate, a reference which is "
1171 "not a gimple register (probably addressable)\n");
1172 continue;
1174 if (is_va_list_type (type))
1176 if (dump_file && (dump_flags & TDF_DETAILS))
1177 fprintf (dump_file, " not a candidate, reference to "
1178 "a va list\n");
1179 continue;
1181 if (ptr_parm_has_nonarg_uses (node, fun, parm, num,
1182 &desc->ptr_pt_count))
1184 if (dump_file && (dump_flags & TDF_DETAILS))
1185 fprintf (dump_file, " not a candidate, reference has "
1186 "nonarg uses\n");
1187 continue;
1189 desc->by_ref = true;
1191 else if (!AGGREGATE_TYPE_P (type))
1193 /* This is in an else branch because scalars passed by reference are
1194 still candidates to be passed by value. */
1195 if (dump_file && (dump_flags & TDF_DETAILS))
1196 fprintf (dump_file, " not a candidate, not an aggregate\n");
1197 continue;
1200 if (!COMPLETE_TYPE_P (type))
1202 if (dump_file && (dump_flags & TDF_DETAILS))
1203 fprintf (dump_file, " not a candidate, not a complete type\n");
1204 continue;
1206 if (!tree_fits_uhwi_p (TYPE_SIZE (type)))
1208 if (dump_file && (dump_flags & TDF_DETAILS))
1209 fprintf (dump_file, " not a candidate, size not representable\n");
1210 continue;
1212 unsigned HOST_WIDE_INT type_size
1213 = tree_to_uhwi (TYPE_SIZE (type)) / BITS_PER_UNIT;
1214 if (type_size == 0
1215 || type_size >= ISRA_ARG_SIZE_LIMIT)
1217 if (dump_file && (dump_flags & TDF_DETAILS))
1218 fprintf (dump_file, " not a candidate, has zero or huge size\n");
1219 continue;
1221 if (type_internals_preclude_sra_p (type, &msg))
1223 if (dump_file && (dump_flags & TDF_DETAILS))
1224 fprintf (dump_file, " not a candidate, %s\n", msg);
1225 continue;
1228 if (dump_file && (dump_flags & TDF_DETAILS))
1229 fprintf (dump_file, " is a candidate\n");
1231 ret = true;
1232 desc->split_candidate = true;
1233 if (desc->by_ref)
1234 desc->deref_index = by_ref_count++;
1236 return ret;
1239 /* Return pointer to descriptor of parameter DECL or NULL if it cannot be
1240 found, which happens if DECL is for a static chain. */
1242 static gensum_param_desc *
1243 get_gensum_param_desc (tree decl)
1245 gcc_checking_assert (TREE_CODE (decl) == PARM_DECL);
1246 gensum_param_desc **slot = decl2desc->get (decl);
1247 if (!slot)
1248 /* This can happen for static chains which we cannot handle so far. */
1249 return NULL;
1250 gcc_checking_assert (*slot);
1251 return *slot;
1255 /* Remove parameter described by DESC from candidates for IPA-SRA splitting and
1256 write REASON to the dump file if there is one. */
1258 static void
1259 disqualify_split_candidate (gensum_param_desc *desc, const char *reason)
1261 if (!desc->split_candidate)
1262 return;
1264 if (dump_file && (dump_flags & TDF_DETAILS))
1265 fprintf (dump_file, "! Disqualifying parameter number %i - %s\n",
1266 desc->param_number, reason);
1268 desc->split_candidate = false;
1271 /* Remove DECL from candidates for IPA-SRA and write REASON to the dump file if
1272 there is one. */
1274 static void
1275 disqualify_split_candidate (tree decl, const char *reason)
1277 gensum_param_desc *desc = get_gensum_param_desc (decl);
1278 if (desc)
1279 disqualify_split_candidate (desc, reason);
1282 /* Allocate a new access to DESC and fill it in with OFFSET and SIZE. But
1283 first, check that there are not too many of them already. If so, do not
1284 allocate anything and return NULL. */
1286 static gensum_param_access *
1287 allocate_access (gensum_param_desc *desc,
1288 HOST_WIDE_INT offset, HOST_WIDE_INT size)
1290 if (desc->access_count
1291 == (unsigned) param_ipa_sra_max_replacements)
1293 disqualify_split_candidate (desc, "Too many replacement candidates");
1294 return NULL;
1297 gensum_param_access *access
1298 = (gensum_param_access *) obstack_alloc (&gensum_obstack,
1299 sizeof (gensum_param_access));
1300 memset (access, 0, sizeof (*access));
1301 access->offset = offset;
1302 access->size = size;
1303 return access;
1306 /* In what context scan_expr_access has been called, whether it deals with a
1307 load, a function argument, or a store. Please note that in rare
1308 circumstances when it is not clear if the access is a load or store,
1309 ISRA_CTX_STORE is used too. */
1311 enum isra_scan_context {ISRA_CTX_LOAD, ISRA_CTX_ARG, ISRA_CTX_STORE};
1313 /* Return an access describing memory access to the variable described by DESC
1314 at OFFSET with SIZE in context CTX, starting at pointer to the linked list
1315 at a certain tree level FIRST. Attempt to create it and put into the
1316 appropriate place in the access tree if does not exist, but fail and return
1317 NULL if there are already too many accesses, if it would create a partially
1318 overlapping access or if an access would end up within a pre-existing
1319 non-call access. */
1321 static gensum_param_access *
1322 get_access_1 (gensum_param_desc *desc, gensum_param_access **first,
1323 HOST_WIDE_INT offset, HOST_WIDE_INT size, isra_scan_context ctx)
1325 gensum_param_access *access = *first, **ptr = first;
1327 if (!access)
1329 /* No pre-existing access at this level, just create it. */
1330 gensum_param_access *a = allocate_access (desc, offset, size);
1331 if (!a)
1332 return NULL;
1333 *first = a;
1334 return *first;
1337 if (access->offset >= offset + size)
1339 /* We want to squeeze it in front of the very first access, just do
1340 it. */
1341 gensum_param_access *r = allocate_access (desc, offset, size);
1342 if (!r)
1343 return NULL;
1344 r->next_sibling = access;
1345 *first = r;
1346 return r;
1349 /* Skip all accesses that have to come before us until the next sibling is
1350 already too far. */
1351 while (offset >= access->offset + access->size
1352 && access->next_sibling
1353 && access->next_sibling->offset < offset + size)
1355 ptr = &access->next_sibling;
1356 access = access->next_sibling;
1359 /* At this point we know we do not belong before access. */
1360 gcc_assert (access->offset < offset + size);
1362 if (access->offset == offset && access->size == size)
1363 /* We found what we were looking for. */
1364 return access;
1366 if (access->offset <= offset
1367 && access->offset + access->size >= offset + size)
1369 /* We fit into access which is larger than us. We need to find/create
1370 something below access. But we only allow nesting in call
1371 arguments. */
1372 if (access->nonarg)
1373 return NULL;
1375 return get_access_1 (desc, &access->first_child, offset, size, ctx);
1378 if (offset <= access->offset
1379 && offset + size >= access->offset + access->size)
1380 /* We are actually bigger than access, which fully fits into us, take its
1381 place and make all accesses fitting into it its children. */
1383 /* But first, we only allow nesting in call arguments so check if that is
1384 what we are trying to represent. */
1385 if (ctx != ISRA_CTX_ARG)
1386 return NULL;
1388 gensum_param_access *r = allocate_access (desc, offset, size);
1389 if (!r)
1390 return NULL;
1391 r->first_child = access;
1393 while (access->next_sibling
1394 && access->next_sibling->offset < offset + size)
1395 access = access->next_sibling;
1396 if (access->offset + access->size > offset + size)
1398 /* This must be a different access, which are sorted, so the
1399 following must be true and this signals a partial overlap. */
1400 gcc_assert (access->offset > offset);
1401 return NULL;
1404 r->next_sibling = access->next_sibling;
1405 access->next_sibling = NULL;
1406 *ptr = r;
1407 return r;
1410 if (offset >= access->offset + access->size)
1412 /* We belong after access. */
1413 gensum_param_access *r = allocate_access (desc, offset, size);
1414 if (!r)
1415 return NULL;
1416 r->next_sibling = access->next_sibling;
1417 access->next_sibling = r;
1418 return r;
1421 if (offset < access->offset)
1423 /* We know the following, otherwise we would have created a
1424 super-access. */
1425 gcc_checking_assert (offset + size < access->offset + access->size);
1426 return NULL;
1429 if (offset + size > access->offset + access->size)
1431 /* Likewise. */
1432 gcc_checking_assert (offset > access->offset);
1433 return NULL;
1436 gcc_unreachable ();
1439 /* Return an access describing memory access to the variable described by DESC
1440 at OFFSET with SIZE in context CTX, mark it as used in context CTX. Attempt
1441 to create if it does not exist, but fail and return NULL if there are
1442 already too many accesses, if it would create a partially overlapping access
1443 or if an access would end up in a non-call access. */
1445 static gensum_param_access *
1446 get_access (gensum_param_desc *desc, HOST_WIDE_INT offset, HOST_WIDE_INT size,
1447 isra_scan_context ctx)
1449 gcc_checking_assert (desc->split_candidate);
1451 gensum_param_access *access = get_access_1 (desc, &desc->accesses, offset,
1452 size, ctx);
1453 if (!access)
1455 disqualify_split_candidate (desc,
1456 "Bad access overlap or too many accesses");
1457 return NULL;
1460 switch (ctx)
1462 case ISRA_CTX_STORE:
1463 gcc_assert (!desc->by_ref);
1464 /* Fall-through */
1465 case ISRA_CTX_LOAD:
1466 access->nonarg = true;
1467 break;
1468 case ISRA_CTX_ARG:
1469 break;
1472 return access;
1475 /* Verify that parameter access tree starting with ACCESS is in good shape.
1476 PARENT_OFFSET and PARENT_SIZE are the corresponding fields of parent of
1477 ACCESS or zero if there is none. */
1479 static bool
1480 verify_access_tree_1 (gensum_param_access *access, HOST_WIDE_INT parent_offset,
1481 HOST_WIDE_INT parent_size)
1483 while (access)
1485 gcc_assert (access->offset >= 0 && access->size >= 0);
1487 if (parent_size != 0)
1489 if (access->offset < parent_offset)
1491 error ("Access offset before parent offset");
1492 return true;
1494 if (access->size >= parent_size)
1496 error ("Access size greater or equal to its parent size");
1497 return true;
1499 if (access->offset + access->size > parent_offset + parent_size)
1501 error ("Access terminates outside of its parent");
1502 return true;
1506 if (verify_access_tree_1 (access->first_child, access->offset,
1507 access->size))
1508 return true;
1510 if (access->next_sibling
1511 && (access->next_sibling->offset < access->offset + access->size))
1513 error ("Access overlaps with its sibling");
1514 return true;
1517 access = access->next_sibling;
1519 return false;
1522 /* Verify that parameter access tree starting with ACCESS is in good shape,
1523 halt compilation and dump the tree to stderr if not. */
1525 DEBUG_FUNCTION void
1526 isra_verify_access_tree (gensum_param_access *access)
1528 if (verify_access_tree_1 (access, 0, 0))
1530 for (; access; access = access->next_sibling)
1531 dump_gensum_access (stderr, access, 2);
1532 internal_error ("IPA-SRA access verification failed");
1537 /* Callback of walk_stmt_load_store_addr_ops visit_addr used to determine
1538 GIMPLE_ASM operands with memory constrains which cannot be scalarized. */
1540 static bool
1541 asm_visit_addr (gimple *, tree op, tree, void *)
1543 op = get_base_address (op);
1544 if (op
1545 && TREE_CODE (op) == PARM_DECL)
1546 disqualify_split_candidate (op, "Non-scalarizable GIMPLE_ASM operand.");
1548 return false;
1551 /* Mark a dereference of parameter identified by DESC of distance DIST in a
1552 basic block BB, unless the BB has already been marked as a potentially
1553 final. */
1555 static void
1556 mark_param_dereference (gensum_param_desc *desc, HOST_WIDE_INT dist,
1557 basic_block bb)
1559 gcc_assert (desc->by_ref);
1560 gcc_checking_assert (desc->split_candidate);
1562 if (bitmap_bit_p (final_bbs, bb->index))
1563 return;
1565 int idx = bb->index * by_ref_count + desc->deref_index;
1566 if (bb_dereferences[idx] < dist)
1567 bb_dereferences[idx] = dist;
1570 /* Return true, if any potential replacements should use NEW_TYPE as opposed to
1571 previously recorded OLD_TYPE. */
1573 static bool
1574 type_prevails_p (tree old_type, tree new_type)
1576 if (old_type == new_type)
1577 return false;
1579 /* Non-aggregates are always better. */
1580 if (!is_gimple_reg_type (old_type)
1581 && is_gimple_reg_type (new_type))
1582 return true;
1583 if (is_gimple_reg_type (old_type)
1584 && !is_gimple_reg_type (new_type))
1585 return false;
1587 /* Prefer any complex or vector type over any other scalar type. */
1588 if (TREE_CODE (old_type) != COMPLEX_TYPE
1589 && TREE_CODE (old_type) != VECTOR_TYPE
1590 && (TREE_CODE (new_type) == COMPLEX_TYPE
1591 || TREE_CODE (new_type) == VECTOR_TYPE))
1592 return true;
1593 if ((TREE_CODE (old_type) == COMPLEX_TYPE
1594 || TREE_CODE (old_type) == VECTOR_TYPE)
1595 && TREE_CODE (new_type) != COMPLEX_TYPE
1596 && TREE_CODE (new_type) != VECTOR_TYPE)
1597 return false;
1599 /* Use the integral type with the bigger precision. */
1600 if (INTEGRAL_TYPE_P (old_type)
1601 && INTEGRAL_TYPE_P (new_type))
1602 return (TYPE_PRECISION (new_type) > TYPE_PRECISION (old_type));
1604 /* Attempt to disregard any integral type with non-full precision. */
1605 if (INTEGRAL_TYPE_P (old_type)
1606 && (TREE_INT_CST_LOW (TYPE_SIZE (old_type))
1607 != TYPE_PRECISION (old_type)))
1608 return true;
1609 if (INTEGRAL_TYPE_P (new_type)
1610 && (TREE_INT_CST_LOW (TYPE_SIZE (new_type))
1611 != TYPE_PRECISION (new_type)))
1612 return false;
1613 /* Stabilize the selection. */
1614 return TYPE_UID (old_type) < TYPE_UID (new_type);
1617 /* When scanning an expression which is a call argument, this structure
1618 specifies the call and the position of the argument. */
1620 struct scan_call_info
1622 /* Call graph edge representing the call. */
1623 cgraph_edge *cs;
1624 /* Total number of arguments in the call. */
1625 unsigned argument_count;
1626 /* Number of the actual argument being scanned. */
1627 unsigned arg_idx;
1630 /* Record use of ACCESS which belongs to a parameter described by DESC in a
1631 call argument described by CALL_INFO. */
1633 static void
1634 record_nonregister_call_use (gensum_param_desc *desc,
1635 scan_call_info *call_info,
1636 unsigned unit_offset, unsigned unit_size)
1638 isra_call_summary *csum = call_sums->get_create (call_info->cs);
1639 csum->init_inputs (call_info->argument_count);
1641 isra_param_flow *param_flow = &csum->m_arg_flow[call_info->arg_idx];
1642 param_flow->aggregate_pass_through = true;
1643 set_single_param_flow_source (param_flow, desc->param_number);
1644 param_flow->unit_offset = unit_offset;
1645 param_flow->unit_size = unit_size;
1646 desc->call_uses++;
1649 /* Callback of walk_aliased_vdefs, just mark that there was a possible
1650 modification. */
1652 static bool
1653 mark_maybe_modified (ao_ref *, tree, void *data)
1655 bool *maybe_modified = (bool *) data;
1656 *maybe_modified = true;
1657 return true;
1660 /* Analyze expression EXPR from GIMPLE for accesses to parameters. CTX
1661 specifies whether EXPR is used in a load, store or as an argument call. BB
1662 must be the basic block in which expr resides. If CTX specifies call
1663 argument context, CALL_INFO must describe that call and argument position,
1664 otherwise it is ignored. */
1666 static void
1667 scan_expr_access (tree expr, gimple *stmt, isra_scan_context ctx,
1668 basic_block bb, scan_call_info *call_info = NULL)
1670 poly_int64 poffset, psize, pmax_size;
1671 HOST_WIDE_INT offset, size, max_size;
1672 tree base;
1673 bool deref = false;
1674 bool reverse;
1676 if (TREE_CODE (expr) == BIT_FIELD_REF
1677 || TREE_CODE (expr) == IMAGPART_EXPR
1678 || TREE_CODE (expr) == REALPART_EXPR)
1679 expr = TREE_OPERAND (expr, 0);
1681 base = get_ref_base_and_extent (expr, &poffset, &psize, &pmax_size, &reverse);
1683 if (TREE_CODE (base) == MEM_REF)
1685 tree op = TREE_OPERAND (base, 0);
1686 if (TREE_CODE (op) != SSA_NAME
1687 || !SSA_NAME_IS_DEFAULT_DEF (op))
1688 return;
1689 base = SSA_NAME_VAR (op);
1690 if (!base)
1691 return;
1692 deref = true;
1694 if (TREE_CODE (base) != PARM_DECL)
1695 return;
1697 gensum_param_desc *desc = get_gensum_param_desc (base);
1698 if (!desc || !desc->split_candidate)
1699 return;
1701 if (!poffset.is_constant (&offset)
1702 || !psize.is_constant (&size)
1703 || !pmax_size.is_constant (&max_size))
1705 disqualify_split_candidate (desc, "Encountered a polynomial-sized "
1706 "access.");
1707 return;
1709 if (size < 0 || size != max_size)
1711 disqualify_split_candidate (desc, "Encountered a variable sized access.");
1712 return;
1714 if (TREE_CODE (expr) == COMPONENT_REF
1715 && DECL_BIT_FIELD (TREE_OPERAND (expr, 1)))
1717 disqualify_split_candidate (desc, "Encountered a bit-field access.");
1718 return;
1720 if (offset < 0)
1722 disqualify_split_candidate (desc, "Encountered an access at a "
1723 "negative offset.");
1724 return;
1726 gcc_assert ((offset % BITS_PER_UNIT) == 0);
1727 gcc_assert ((size % BITS_PER_UNIT) == 0);
1728 if ((offset / BITS_PER_UNIT) >= (UINT_MAX - ISRA_ARG_SIZE_LIMIT)
1729 || (size / BITS_PER_UNIT) >= ISRA_ARG_SIZE_LIMIT)
1731 disqualify_split_candidate (desc, "Encountered an access with too big "
1732 "offset or size");
1733 return;
1736 tree type = TREE_TYPE (expr);
1737 unsigned int exp_align = get_object_alignment (expr);
1739 if (exp_align < TYPE_ALIGN (type))
1741 disqualify_split_candidate (desc, "Underaligned access.");
1742 return;
1745 if (deref)
1747 if (!desc->by_ref)
1749 disqualify_split_candidate (desc, "Dereferencing a non-reference.");
1750 return;
1752 else if (ctx == ISRA_CTX_STORE)
1754 disqualify_split_candidate (desc, "Storing to data passed by "
1755 "reference.");
1756 return;
1759 if (!aa_walking_limit)
1761 disqualify_split_candidate (desc, "Out of alias analysis step "
1762 "limit.");
1763 return;
1766 gcc_checking_assert (gimple_vuse (stmt));
1767 bool maybe_modified = false;
1768 ao_ref ar;
1770 ao_ref_init (&ar, expr);
1771 bitmap visited = BITMAP_ALLOC (NULL);
1772 int walked = walk_aliased_vdefs (&ar, gimple_vuse (stmt),
1773 mark_maybe_modified, &maybe_modified,
1774 &visited, NULL, aa_walking_limit);
1775 BITMAP_FREE (visited);
1776 if (walked > 0)
1778 gcc_assert (aa_walking_limit > walked);
1779 aa_walking_limit = aa_walking_limit - walked;
1781 if (walked < 0)
1782 aa_walking_limit = 0;
1783 if (maybe_modified || walked < 0)
1785 disqualify_split_candidate (desc, "Data passed by reference possibly "
1786 "modified through an alias.");
1787 return;
1789 else
1790 mark_param_dereference (desc, offset + size, bb);
1792 else
1793 /* Pointer parameters with direct uses should have been ruled out by
1794 analyzing SSA default def when looking at the parameters. */
1795 gcc_assert (!desc->by_ref);
1797 gensum_param_access *access = get_access (desc, offset, size, ctx);
1798 if (!access)
1799 return;
1801 if (ctx == ISRA_CTX_ARG)
1803 gcc_checking_assert (call_info);
1805 if (!deref)
1806 record_nonregister_call_use (desc, call_info, offset / BITS_PER_UNIT,
1807 size / BITS_PER_UNIT);
1808 else
1809 /* This is not a pass-through of a pointer, this is a use like any
1810 other. */
1811 access->nonarg = true;
1814 if (!access->type)
1816 access->type = type;
1817 access->alias_ptr_type = reference_alias_ptr_type (expr);
1818 access->reverse = reverse;
1820 else
1822 if (exp_align < TYPE_ALIGN (access->type))
1824 disqualify_split_candidate (desc, "Reference has lower alignment "
1825 "than a previous one.");
1826 return;
1828 if (access->alias_ptr_type != reference_alias_ptr_type (expr))
1830 disqualify_split_candidate (desc, "Multiple alias pointer types.");
1831 return;
1833 if (access->reverse != reverse)
1835 disqualify_split_candidate (desc, "Both normal and reverse "
1836 "scalar storage order.");
1837 return;
1839 if (!deref
1840 && (AGGREGATE_TYPE_P (type) || AGGREGATE_TYPE_P (access->type))
1841 && (TYPE_MAIN_VARIANT (access->type) != TYPE_MAIN_VARIANT (type)))
1843 /* We need the same aggregate type on all accesses to be able to
1844 distinguish transformation spots from pass-through arguments in
1845 the transformation phase. */
1846 disqualify_split_candidate (desc, "We do not support aggregate "
1847 "type punning.");
1848 return;
1851 if (type_prevails_p (access->type, type))
1852 access->type = type;
1856 /* Scan body function described by NODE and FUN and create access trees for
1857 parameters. */
1859 static void
1860 scan_function (cgraph_node *node, struct function *fun)
1862 basic_block bb;
1864 FOR_EACH_BB_FN (bb, fun)
1866 gimple_stmt_iterator gsi;
1867 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1869 gimple *stmt = gsi_stmt (gsi);
1871 if (stmt_can_throw_external (fun, stmt))
1872 bitmap_set_bit (final_bbs, bb->index);
1873 switch (gimple_code (stmt))
1875 case GIMPLE_RETURN:
1877 tree t = gimple_return_retval (as_a <greturn *> (stmt));
1878 if (t != NULL_TREE)
1879 scan_expr_access (t, stmt, ISRA_CTX_LOAD, bb);
1880 bitmap_set_bit (final_bbs, bb->index);
1882 break;
1884 case GIMPLE_ASSIGN:
1885 if (gimple_assign_single_p (stmt)
1886 && !gimple_clobber_p (stmt))
1888 tree rhs = gimple_assign_rhs1 (stmt);
1889 scan_expr_access (rhs, stmt, ISRA_CTX_LOAD, bb);
1890 tree lhs = gimple_assign_lhs (stmt);
1891 scan_expr_access (lhs, stmt, ISRA_CTX_STORE, bb);
1893 break;
1895 case GIMPLE_CALL:
1897 unsigned argument_count = gimple_call_num_args (stmt);
1898 isra_scan_context ctx = ISRA_CTX_ARG;
1899 scan_call_info call_info, *call_info_p = &call_info;
1900 if (gimple_call_internal_p (stmt))
1902 call_info_p = NULL;
1903 ctx = ISRA_CTX_LOAD;
1904 internal_fn ifn = gimple_call_internal_fn (stmt);
1905 if (internal_store_fn_p (ifn))
1906 ctx = ISRA_CTX_STORE;
1908 else
1910 call_info.cs = node->get_edge (stmt);
1911 call_info.argument_count = argument_count;
1914 for (unsigned i = 0; i < argument_count; i++)
1916 call_info.arg_idx = i;
1917 scan_expr_access (gimple_call_arg (stmt, i), stmt,
1918 ctx, bb, call_info_p);
1921 tree lhs = gimple_call_lhs (stmt);
1922 if (lhs)
1923 scan_expr_access (lhs, stmt, ISRA_CTX_STORE, bb);
1924 int flags = gimple_call_flags (stmt);
1925 if (((flags & (ECF_CONST | ECF_PURE)) == 0)
1926 || (flags & ECF_LOOPING_CONST_OR_PURE))
1927 bitmap_set_bit (final_bbs, bb->index);
1929 break;
1931 case GIMPLE_ASM:
1933 gasm *asm_stmt = as_a <gasm *> (stmt);
1934 walk_stmt_load_store_addr_ops (asm_stmt, NULL, NULL, NULL,
1935 asm_visit_addr);
1936 bitmap_set_bit (final_bbs, bb->index);
1938 for (unsigned i = 0; i < gimple_asm_ninputs (asm_stmt); i++)
1940 tree t = TREE_VALUE (gimple_asm_input_op (asm_stmt, i));
1941 scan_expr_access (t, stmt, ISRA_CTX_LOAD, bb);
1943 for (unsigned i = 0; i < gimple_asm_noutputs (asm_stmt); i++)
1945 tree t = TREE_VALUE (gimple_asm_output_op (asm_stmt, i));
1946 scan_expr_access (t, stmt, ISRA_CTX_STORE, bb);
1949 break;
1951 default:
1952 break;
1958 /* Return true if SSA_NAME NAME of function described by FUN is only used in
1959 return statements, or if results of any operations it is involved in are
1960 only used in return statements. ANALYZED is a bitmap that tracks which SSA
1961 names we have already started investigating. */
1963 static bool
1964 ssa_name_only_returned_p (function *fun, tree name, bitmap analyzed)
1966 bool res = true;
1967 imm_use_iterator imm_iter;
1968 gimple *stmt;
1970 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, name)
1972 if (is_gimple_debug (stmt))
1973 continue;
1975 if (gimple_code (stmt) == GIMPLE_RETURN)
1977 tree t = gimple_return_retval (as_a <greturn *> (stmt));
1978 if (t != name)
1980 res = false;
1981 break;
1984 else if (!stmt_unremovable_because_of_non_call_eh_p (fun, stmt)
1985 && ((is_gimple_assign (stmt) && !gimple_has_volatile_ops (stmt))
1986 || gimple_code (stmt) == GIMPLE_PHI))
1988 /* TODO: And perhaps for const function calls too? */
1989 tree lhs;
1990 if (gimple_code (stmt) == GIMPLE_PHI)
1991 lhs = gimple_phi_result (stmt);
1992 else
1993 lhs = gimple_assign_lhs (stmt);
1995 if (TREE_CODE (lhs) != SSA_NAME)
1997 res = false;
1998 break;
2000 gcc_assert (!gimple_vdef (stmt));
2001 if (bitmap_set_bit (analyzed, SSA_NAME_VERSION (lhs))
2002 && !ssa_name_only_returned_p (fun, lhs, analyzed))
2004 res = false;
2005 break;
2008 else
2010 res = false;
2011 break;
2014 return res;
2017 /* Inspect the uses of the return value of the call associated with CS, and if
2018 it is not used or if it is only used to construct the return value of the
2019 caller, mark it as such in call or caller summary. Also check for
2020 misaligned arguments. */
2022 static void
2023 isra_analyze_call (cgraph_edge *cs)
2025 gcall *call_stmt = cs->call_stmt;
2026 unsigned count = gimple_call_num_args (call_stmt);
2027 isra_call_summary *csum = call_sums->get_create (cs);
2029 for (unsigned i = 0; i < count; i++)
2031 tree arg = gimple_call_arg (call_stmt, i);
2032 if (is_gimple_reg (arg))
2033 continue;
2035 tree offset;
2036 poly_int64 bitsize, bitpos;
2037 machine_mode mode;
2038 int unsignedp, reversep, volatilep = 0;
2039 get_inner_reference (arg, &bitsize, &bitpos, &offset, &mode,
2040 &unsignedp, &reversep, &volatilep);
2041 if (!multiple_p (bitpos, BITS_PER_UNIT))
2043 csum->m_bit_aligned_arg = true;
2044 break;
2048 tree lhs = gimple_call_lhs (call_stmt);
2049 if (lhs)
2051 /* TODO: Also detect aggregates on a LHS of a call that are only returned
2052 from this function (without being read anywhere). */
2053 if (TREE_CODE (lhs) == SSA_NAME)
2055 bitmap analyzed = BITMAP_ALLOC (NULL);
2056 if (ssa_name_only_returned_p (DECL_STRUCT_FUNCTION (cs->caller->decl),
2057 lhs, analyzed))
2058 csum->m_return_returned = true;
2059 BITMAP_FREE (analyzed);
2062 else
2063 csum->m_return_ignored = true;
2066 /* Look at all calls going out of NODE, described also by IFS and perform all
2067 analyses necessary for IPA-SRA that are not done at body scan time or done
2068 even when body is not scanned because the function is not a candidate. */
2070 static void
2071 isra_analyze_all_outgoing_calls (cgraph_node *node)
2073 for (cgraph_edge *cs = node->callees; cs; cs = cs->next_callee)
2074 isra_analyze_call (cs);
2075 for (cgraph_edge *cs = node->indirect_calls; cs; cs = cs->next_callee)
2076 isra_analyze_call (cs);
2079 /* Dump a dereferences table with heading STR to file F. */
2081 static void
2082 dump_dereferences_table (FILE *f, struct function *fun, const char *str)
2084 basic_block bb;
2086 fprintf (dump_file, "%s", str);
2087 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (fun),
2088 EXIT_BLOCK_PTR_FOR_FN (fun), next_bb)
2090 fprintf (f, "%4i %i ", bb->index, bitmap_bit_p (final_bbs, bb->index));
2091 if (bb != EXIT_BLOCK_PTR_FOR_FN (fun))
2093 int i;
2094 for (i = 0; i < by_ref_count; i++)
2096 int idx = bb->index * by_ref_count + i;
2097 fprintf (f, " %4" HOST_WIDE_INT_PRINT "d", bb_dereferences[idx]);
2100 fprintf (f, "\n");
2102 fprintf (dump_file, "\n");
2105 /* Propagate distances in bb_dereferences in the opposite direction than the
2106 control flow edges, in each step storing the maximum of the current value
2107 and the minimum of all successors. These steps are repeated until the table
2108 stabilizes. Note that BBs which might terminate the functions (according to
2109 final_bbs bitmap) never updated in this way. */
2111 static void
2112 propagate_dereference_distances (struct function *fun)
2114 basic_block bb;
2116 if (dump_file && (dump_flags & TDF_DETAILS))
2117 dump_dereferences_table (dump_file, fun,
2118 "Dereference table before propagation:\n");
2120 auto_vec<basic_block> queue (last_basic_block_for_fn (fun));
2121 queue.quick_push (ENTRY_BLOCK_PTR_FOR_FN (fun));
2122 FOR_EACH_BB_FN (bb, fun)
2124 queue.quick_push (bb);
2125 bb->aux = bb;
2128 while (!queue.is_empty ())
2130 edge_iterator ei;
2131 edge e;
2132 bool change = false;
2133 int i;
2135 bb = queue.pop ();
2136 bb->aux = NULL;
2138 if (bitmap_bit_p (final_bbs, bb->index))
2139 continue;
2141 for (i = 0; i < by_ref_count; i++)
2143 int idx = bb->index * by_ref_count + i;
2144 bool first = true;
2145 HOST_WIDE_INT inh = 0;
2147 FOR_EACH_EDGE (e, ei, bb->succs)
2149 int succ_idx = e->dest->index * by_ref_count + i;
2151 if (e->dest == EXIT_BLOCK_PTR_FOR_FN (fun))
2152 continue;
2154 if (first)
2156 first = false;
2157 inh = bb_dereferences [succ_idx];
2159 else if (bb_dereferences [succ_idx] < inh)
2160 inh = bb_dereferences [succ_idx];
2163 if (!first && bb_dereferences[idx] < inh)
2165 bb_dereferences[idx] = inh;
2166 change = true;
2170 if (change)
2171 FOR_EACH_EDGE (e, ei, bb->preds)
2173 if (e->src->aux)
2174 continue;
2176 e->src->aux = e->src;
2177 queue.quick_push (e->src);
2181 if (dump_file && (dump_flags & TDF_DETAILS))
2182 dump_dereferences_table (dump_file, fun,
2183 "Dereference table after propagation:\n");
2186 /* Perform basic checks on ACCESS to PARM described by DESC and all its
2187 children, return true if the parameter cannot be split, otherwise return
2188 true and update *TOTAL_SIZE and *ONLY_CALLS. ENTRY_BB_INDEX must be the
2189 index of the entry BB in the function of PARM. */
2191 static bool
2192 check_gensum_access (tree parm, gensum_param_desc *desc,
2193 gensum_param_access *access,
2194 HOST_WIDE_INT *nonarg_acc_size, bool *only_calls,
2195 int entry_bb_index)
2197 if (access->nonarg)
2199 *only_calls = false;
2200 *nonarg_acc_size += access->size;
2202 if (access->first_child)
2204 disqualify_split_candidate (desc, "Overlapping non-call uses.");
2205 return true;
2208 /* Do not decompose a non-BLKmode param in a way that would create
2209 BLKmode params. Especially for by-reference passing (thus,
2210 pointer-type param) this is hardly worthwhile. */
2211 if (DECL_MODE (parm) != BLKmode
2212 && TYPE_MODE (access->type) == BLKmode)
2214 disqualify_split_candidate (desc, "Would convert a non-BLK to a BLK.");
2215 return true;
2218 if (desc->by_ref)
2220 int idx = (entry_bb_index * by_ref_count + desc->deref_index);
2221 if ((access->offset + access->size) > bb_dereferences[idx])
2223 disqualify_split_candidate (desc, "Would create a possibly "
2224 "illegal dereference in a caller.");
2225 return true;
2229 for (gensum_param_access *ch = access->first_child;
2231 ch = ch->next_sibling)
2232 if (check_gensum_access (parm, desc, ch, nonarg_acc_size, only_calls,
2233 entry_bb_index))
2234 return true;
2236 return false;
2239 /* Copy data from FROM and all of its children to a vector of accesses in IPA
2240 descriptor DESC. */
2242 static void
2243 copy_accesses_to_ipa_desc (gensum_param_access *from, isra_param_desc *desc)
2245 param_access *to = ggc_cleared_alloc<param_access> ();
2246 gcc_checking_assert ((from->offset % BITS_PER_UNIT) == 0);
2247 gcc_checking_assert ((from->size % BITS_PER_UNIT) == 0);
2248 to->unit_offset = from->offset / BITS_PER_UNIT;
2249 to->unit_size = from->size / BITS_PER_UNIT;
2250 to->type = from->type;
2251 to->alias_ptr_type = from->alias_ptr_type;
2252 to->certain = from->nonarg;
2253 to->reverse = from->reverse;
2254 vec_safe_push (desc->accesses, to);
2256 for (gensum_param_access *ch = from->first_child;
2258 ch = ch->next_sibling)
2259 copy_accesses_to_ipa_desc (ch, desc);
2262 /* Analyze function body scan results stored in param_accesses and
2263 param_accesses, detect possible transformations and store information of
2264 those in function summary. NODE, FUN and IFS are all various structures
2265 describing the currently analyzed function. */
2267 static void
2268 process_scan_results (cgraph_node *node, struct function *fun,
2269 isra_func_summary *ifs,
2270 vec<gensum_param_desc> *param_descriptions)
2272 bool check_pass_throughs = false;
2273 bool dereferences_propagated = false;
2274 tree parm = DECL_ARGUMENTS (node->decl);
2275 unsigned param_count = param_descriptions->length();
2277 for (unsigned desc_index = 0;
2278 desc_index < param_count;
2279 desc_index++, parm = DECL_CHAIN (parm))
2281 gensum_param_desc *desc = &(*param_descriptions)[desc_index];
2282 if (!desc->split_candidate)
2283 continue;
2285 if (flag_checking)
2286 isra_verify_access_tree (desc->accesses);
2288 if (!dereferences_propagated
2289 && desc->by_ref
2290 && desc->accesses)
2292 propagate_dereference_distances (fun);
2293 dereferences_propagated = true;
2296 HOST_WIDE_INT nonarg_acc_size = 0;
2297 bool only_calls = true;
2298 bool check_failed = false;
2300 int entry_bb_index = ENTRY_BLOCK_PTR_FOR_FN (fun)->index;
2301 for (gensum_param_access *acc = desc->accesses;
2302 acc;
2303 acc = acc->next_sibling)
2304 if (check_gensum_access (parm, desc, acc, &nonarg_acc_size, &only_calls,
2305 entry_bb_index))
2307 check_failed = true;
2308 break;
2310 if (check_failed)
2311 continue;
2313 if (only_calls)
2314 desc->locally_unused = true;
2316 HOST_WIDE_INT cur_param_size
2317 = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (parm)));
2318 HOST_WIDE_INT param_size_limit;
2319 if (!desc->by_ref || optimize_function_for_size_p (fun))
2320 param_size_limit = cur_param_size;
2321 else
2322 param_size_limit
2323 = opt_for_fn (node->decl,
2324 param_ipa_sra_ptr_growth_factor) * cur_param_size;
2325 if (nonarg_acc_size > param_size_limit
2326 || (!desc->by_ref && nonarg_acc_size == param_size_limit))
2328 disqualify_split_candidate (desc, "Would result into a too big set "
2329 "of replacements.");
2331 else
2333 /* create_parameter_descriptors makes sure unit sizes of all
2334 candidate parameters fit unsigned integers restricted to
2335 ISRA_ARG_SIZE_LIMIT. */
2336 desc->param_size_limit = param_size_limit / BITS_PER_UNIT;
2337 desc->nonarg_acc_size = nonarg_acc_size / BITS_PER_UNIT;
2338 if (desc->split_candidate && desc->ptr_pt_count)
2340 gcc_assert (desc->by_ref);
2341 check_pass_throughs = true;
2346 /* When a pointer parameter is passed-through to a callee, in which it is
2347 only used to read only one or a few items, we can attempt to transform it
2348 to obtaining and passing through the items instead of the pointer. But we
2349 must take extra care that 1) we do not introduce any segfault by moving
2350 dereferences above control flow and that 2) the data is not modified
2351 through an alias in this function. The IPA analysis must not introduce
2352 any accesses candidates unless it can prove both.
2354 The current solution is very crude as it consists of ensuring that the
2355 call postdominates entry BB and that the definition of VUSE of the call is
2356 default definition. TODO: For non-recursive callees in the same
2357 compilation unit we could do better by doing analysis in topological order
2358 an looking into access candidates of callees, using their alias_ptr_types
2359 to attempt real AA. We could also use the maximum known dereferenced
2360 offset in this function at IPA level.
2362 TODO: Measure the overhead and the effect of just being pessimistic.
2363 Maybe this is only -O3 material? */
2365 bool pdoms_calculated = false;
2366 if (check_pass_throughs)
2367 for (cgraph_edge *cs = node->callees; cs; cs = cs->next_callee)
2369 gcall *call_stmt = cs->call_stmt;
2370 tree vuse = gimple_vuse (call_stmt);
2372 /* If the callee is a const function, we don't get a VUSE. In such
2373 case there will be no memory accesses in the called function (or the
2374 const attribute is wrong) and then we just don't care. */
2375 bool uses_memory_as_obtained = vuse && SSA_NAME_IS_DEFAULT_DEF (vuse);
2377 unsigned count = gimple_call_num_args (call_stmt);
2378 isra_call_summary *csum = call_sums->get_create (cs);
2379 csum->init_inputs (count);
2380 csum->m_before_any_store = uses_memory_as_obtained;
2381 for (unsigned argidx = 0; argidx < count; argidx++)
2383 if (!csum->m_arg_flow[argidx].pointer_pass_through)
2384 continue;
2385 unsigned pidx
2386 = get_single_param_flow_source (&csum->m_arg_flow[argidx]);
2387 gensum_param_desc *desc = &(*param_descriptions)[pidx];
2388 if (!desc->split_candidate)
2390 csum->m_arg_flow[argidx].pointer_pass_through = false;
2391 continue;
2393 if (!uses_memory_as_obtained)
2394 continue;
2396 /* Post-dominator check placed last, hoping that it usually won't
2397 be needed. */
2398 if (!pdoms_calculated)
2400 gcc_checking_assert (cfun);
2401 connect_infinite_loops_to_exit ();
2402 calculate_dominance_info (CDI_POST_DOMINATORS);
2403 pdoms_calculated = true;
2405 if (dominated_by_p (CDI_POST_DOMINATORS,
2406 gimple_bb (call_stmt),
2407 single_succ (ENTRY_BLOCK_PTR_FOR_FN (fun))))
2408 csum->m_arg_flow[argidx].safe_to_import_accesses = true;
2412 if (pdoms_calculated)
2414 free_dominance_info (CDI_POST_DOMINATORS);
2415 remove_fake_exit_edges ();
2418 /* TODO: Add early exit if we disqualified everything. This also requires
2419 that we either relax the restriction that
2420 ipa_param_adjustments.m_always_copy_start must be the number of PARM_DECLs
2421 or store the number of parameters to IPA-SRA function summary and use that
2422 when just removing params. */
2424 vec_safe_reserve_exact (ifs->m_parameters, param_count);
2425 ifs->m_parameters->quick_grow_cleared (param_count);
2426 for (unsigned desc_index = 0; desc_index < param_count; desc_index++)
2428 gensum_param_desc *s = &(*param_descriptions)[desc_index];
2429 isra_param_desc *d = &(*ifs->m_parameters)[desc_index];
2431 d->param_size_limit = s->param_size_limit;
2432 d->size_reached = s->nonarg_acc_size;
2433 d->locally_unused = s->locally_unused;
2434 d->split_candidate = s->split_candidate;
2435 d->by_ref = s->by_ref;
2437 for (gensum_param_access *acc = s->accesses;
2438 acc;
2439 acc = acc->next_sibling)
2440 copy_accesses_to_ipa_desc (acc, d);
2443 if (dump_file)
2444 dump_isra_param_descriptors (dump_file, node->decl, ifs);
2447 /* Return true if there are any overlaps among certain accesses of DESC. If
2448 non-NULL, set *CERTAIN_ACCESS_PRESENT_P upon encountering a certain access
2449 too. DESC is assumed to be a split candidate that is not locally
2450 unused. */
2452 static bool
2453 overlapping_certain_accesses_p (isra_param_desc *desc,
2454 bool *certain_access_present_p)
2456 unsigned pclen = vec_safe_length (desc->accesses);
2457 for (unsigned i = 0; i < pclen; i++)
2459 param_access *a1 = (*desc->accesses)[i];
2461 if (!a1->certain)
2462 continue;
2463 if (certain_access_present_p)
2464 *certain_access_present_p = true;
2465 for (unsigned j = i + 1; j < pclen; j++)
2467 param_access *a2 = (*desc->accesses)[j];
2468 if (a2->certain
2469 && a1->unit_offset < a2->unit_offset + a2->unit_size
2470 && a1->unit_offset + a1->unit_size > a2->unit_offset)
2471 return true;
2474 return false;
2477 /* Check for any overlaps of certain param accesses among splitting candidates
2478 and signal an ICE if there are any. If CERTAIN_MUST_EXIST is set, also
2479 check that used splitting candidates have at least one certain access. */
2481 static void
2482 verify_splitting_accesses (cgraph_node *node, bool certain_must_exist)
2484 isra_func_summary *ifs = func_sums->get (node);
2485 if (!ifs || !ifs->m_candidate)
2486 return;
2487 unsigned param_count = vec_safe_length (ifs->m_parameters);
2488 for (unsigned pidx = 0; pidx < param_count; pidx++)
2490 isra_param_desc *desc = &(*ifs->m_parameters)[pidx];
2491 if (!desc->split_candidate || desc->locally_unused)
2492 continue;
2494 bool certain_access_present = !certain_must_exist;
2495 if (overlapping_certain_accesses_p (desc, &certain_access_present))
2496 internal_error ("function %qs, parameter %u, has IPA-SRA accesses "
2497 "which overlap", node->dump_name (), pidx);
2498 if (!certain_access_present)
2499 internal_error ("function %qs, parameter %u, is used but does not "
2500 "have any certain IPA-SRA access",
2501 node->dump_name (), pidx);
2505 /* Intraprocedural part of IPA-SRA analysis. Scan bodies of all functions in
2506 this compilation unit and create summary structures describing IPA-SRA
2507 opportunities and constraints in them. */
2509 static void
2510 ipa_sra_generate_summary (void)
2512 struct cgraph_node *node;
2514 gcc_checking_assert (!func_sums);
2515 gcc_checking_assert (!call_sums);
2516 func_sums
2517 = (new (ggc_alloc_no_dtor <ipa_sra_function_summaries> ())
2518 ipa_sra_function_summaries (symtab, true));
2519 call_sums = new ipa_sra_call_summaries (symtab);
2521 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
2522 ipa_sra_summarize_function (node);
2523 return;
2526 /* Write intraprocedural analysis information about E and all of its outgoing
2527 edges into a stream for LTO WPA. */
2529 static void
2530 isra_write_edge_summary (output_block *ob, cgraph_edge *e)
2532 isra_call_summary *csum = call_sums->get (e);
2533 unsigned input_count = csum->m_arg_flow.length ();
2534 streamer_write_uhwi (ob, input_count);
2535 for (unsigned i = 0; i < input_count; i++)
2537 isra_param_flow *ipf = &csum->m_arg_flow[i];
2538 streamer_write_hwi (ob, ipf->length);
2539 bitpack_d bp = bitpack_create (ob->main_stream);
2540 for (int j = 0; j < ipf->length; j++)
2541 bp_pack_value (&bp, ipf->inputs[j], 8);
2542 bp_pack_value (&bp, ipf->aggregate_pass_through, 1);
2543 bp_pack_value (&bp, ipf->pointer_pass_through, 1);
2544 bp_pack_value (&bp, ipf->safe_to_import_accesses, 1);
2545 streamer_write_bitpack (&bp);
2546 streamer_write_uhwi (ob, ipf->unit_offset);
2547 streamer_write_uhwi (ob, ipf->unit_size);
2549 bitpack_d bp = bitpack_create (ob->main_stream);
2550 bp_pack_value (&bp, csum->m_return_ignored, 1);
2551 bp_pack_value (&bp, csum->m_return_returned, 1);
2552 bp_pack_value (&bp, csum->m_bit_aligned_arg, 1);
2553 bp_pack_value (&bp, csum->m_before_any_store, 1);
2554 streamer_write_bitpack (&bp);
2557 /* Write intraprocedural analysis information about NODE and all of its outgoing
2558 edges into a stream for LTO WPA. */
2560 static void
2561 isra_write_node_summary (output_block *ob, cgraph_node *node)
2563 isra_func_summary *ifs = func_sums->get (node);
2564 lto_symtab_encoder_t encoder = ob->decl_state->symtab_node_encoder;
2565 int node_ref = lto_symtab_encoder_encode (encoder, node);
2566 streamer_write_uhwi (ob, node_ref);
2568 unsigned param_desc_count = vec_safe_length (ifs->m_parameters);
2569 streamer_write_uhwi (ob, param_desc_count);
2570 for (unsigned i = 0; i < param_desc_count; i++)
2572 isra_param_desc *desc = &(*ifs->m_parameters)[i];
2573 unsigned access_count = vec_safe_length (desc->accesses);
2574 streamer_write_uhwi (ob, access_count);
2575 for (unsigned j = 0; j < access_count; j++)
2577 param_access *acc = (*desc->accesses)[j];
2578 stream_write_tree (ob, acc->type, true);
2579 stream_write_tree (ob, acc->alias_ptr_type, true);
2580 streamer_write_uhwi (ob, acc->unit_offset);
2581 streamer_write_uhwi (ob, acc->unit_size);
2582 bitpack_d bp = bitpack_create (ob->main_stream);
2583 bp_pack_value (&bp, acc->certain, 1);
2584 bp_pack_value (&bp, acc->reverse, 1);
2585 streamer_write_bitpack (&bp);
2587 streamer_write_uhwi (ob, desc->param_size_limit);
2588 streamer_write_uhwi (ob, desc->size_reached);
2589 bitpack_d bp = bitpack_create (ob->main_stream);
2590 bp_pack_value (&bp, desc->locally_unused, 1);
2591 bp_pack_value (&bp, desc->split_candidate, 1);
2592 bp_pack_value (&bp, desc->by_ref, 1);
2593 streamer_write_bitpack (&bp);
2595 bitpack_d bp = bitpack_create (ob->main_stream);
2596 bp_pack_value (&bp, ifs->m_candidate, 1);
2597 bp_pack_value (&bp, ifs->m_returns_value, 1);
2598 bp_pack_value (&bp, ifs->m_return_ignored, 1);
2599 gcc_assert (!ifs->m_queued);
2600 streamer_write_bitpack (&bp);
2602 for (cgraph_edge *e = node->callees; e; e = e->next_callee)
2603 isra_write_edge_summary (ob, e);
2604 for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee)
2605 isra_write_edge_summary (ob, e);
2608 /* Write intraprocedural analysis information into a stream for LTO WPA. */
2610 static void
2611 ipa_sra_write_summary (void)
2613 if (!func_sums || !call_sums)
2614 return;
2616 struct output_block *ob = create_output_block (LTO_section_ipa_sra);
2617 lto_symtab_encoder_t encoder = ob->decl_state->symtab_node_encoder;
2618 ob->symbol = NULL;
2620 unsigned int count = 0;
2621 lto_symtab_encoder_iterator lsei;
2622 for (lsei = lsei_start_function_in_partition (encoder);
2623 !lsei_end_p (lsei);
2624 lsei_next_function_in_partition (&lsei))
2626 cgraph_node *node = lsei_cgraph_node (lsei);
2627 if (node->has_gimple_body_p ()
2628 && func_sums->get (node) != NULL)
2629 count++;
2631 streamer_write_uhwi (ob, count);
2633 /* Process all of the functions. */
2634 for (lsei = lsei_start_function_in_partition (encoder); !lsei_end_p (lsei);
2635 lsei_next_function_in_partition (&lsei))
2637 cgraph_node *node = lsei_cgraph_node (lsei);
2638 if (node->has_gimple_body_p ()
2639 && func_sums->get (node) != NULL)
2640 isra_write_node_summary (ob, node);
2642 streamer_write_char_stream (ob->main_stream, 0);
2643 produce_asm (ob, NULL);
2644 destroy_output_block (ob);
2647 /* Read intraprocedural analysis information about E and all of its outgoing
2648 edges into a stream for LTO WPA. */
2650 static void
2651 isra_read_edge_summary (struct lto_input_block *ib, cgraph_edge *cs)
2653 isra_call_summary *csum = call_sums->get_create (cs);
2654 unsigned input_count = streamer_read_uhwi (ib);
2655 csum->init_inputs (input_count);
2656 for (unsigned i = 0; i < input_count; i++)
2658 isra_param_flow *ipf = &csum->m_arg_flow[i];
2659 ipf->length = streamer_read_hwi (ib);
2660 bitpack_d bp = streamer_read_bitpack (ib);
2661 for (int j = 0; j < ipf->length; j++)
2662 ipf->inputs[j] = bp_unpack_value (&bp, 8);
2663 ipf->aggregate_pass_through = bp_unpack_value (&bp, 1);
2664 ipf->pointer_pass_through = bp_unpack_value (&bp, 1);
2665 ipf->safe_to_import_accesses = bp_unpack_value (&bp, 1);
2666 ipf->unit_offset = streamer_read_uhwi (ib);
2667 ipf->unit_size = streamer_read_uhwi (ib);
2669 bitpack_d bp = streamer_read_bitpack (ib);
2670 csum->m_return_ignored = bp_unpack_value (&bp, 1);
2671 csum->m_return_returned = bp_unpack_value (&bp, 1);
2672 csum->m_bit_aligned_arg = bp_unpack_value (&bp, 1);
2673 csum->m_before_any_store = bp_unpack_value (&bp, 1);
2676 /* Read intraprocedural analysis information about NODE and all of its outgoing
2677 edges into a stream for LTO WPA. */
2679 static void
2680 isra_read_node_info (struct lto_input_block *ib, cgraph_node *node,
2681 struct data_in *data_in)
2683 isra_func_summary *ifs = func_sums->get_create (node);
2684 unsigned param_desc_count = streamer_read_uhwi (ib);
2685 if (param_desc_count > 0)
2687 vec_safe_reserve_exact (ifs->m_parameters, param_desc_count);
2688 ifs->m_parameters->quick_grow_cleared (param_desc_count);
2690 for (unsigned i = 0; i < param_desc_count; i++)
2692 isra_param_desc *desc = &(*ifs->m_parameters)[i];
2693 unsigned access_count = streamer_read_uhwi (ib);
2694 for (unsigned j = 0; j < access_count; j++)
2696 param_access *acc = ggc_cleared_alloc<param_access> ();
2697 acc->type = stream_read_tree (ib, data_in);
2698 acc->alias_ptr_type = stream_read_tree (ib, data_in);
2699 acc->unit_offset = streamer_read_uhwi (ib);
2700 acc->unit_size = streamer_read_uhwi (ib);
2701 bitpack_d bp = streamer_read_bitpack (ib);
2702 acc->certain = bp_unpack_value (&bp, 1);
2703 acc->reverse = bp_unpack_value (&bp, 1);
2704 vec_safe_push (desc->accesses, acc);
2706 desc->param_size_limit = streamer_read_uhwi (ib);
2707 desc->size_reached = streamer_read_uhwi (ib);
2708 bitpack_d bp = streamer_read_bitpack (ib);
2709 desc->locally_unused = bp_unpack_value (&bp, 1);
2710 desc->split_candidate = bp_unpack_value (&bp, 1);
2711 desc->by_ref = bp_unpack_value (&bp, 1);
2713 bitpack_d bp = streamer_read_bitpack (ib);
2714 ifs->m_candidate = bp_unpack_value (&bp, 1);
2715 ifs->m_returns_value = bp_unpack_value (&bp, 1);
2716 ifs->m_return_ignored = bp_unpack_value (&bp, 1);
2717 ifs->m_queued = 0;
2719 for (cgraph_edge *e = node->callees; e; e = e->next_callee)
2720 isra_read_edge_summary (ib, e);
2721 for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee)
2722 isra_read_edge_summary (ib, e);
2725 /* Read IPA-SRA summaries from a section in file FILE_DATA of length LEN with
2726 data DATA. TODO: This function was copied almost verbatim from ipa-prop.cc,
2727 it should be possible to unify them somehow. */
2729 static void
2730 isra_read_summary_section (struct lto_file_decl_data *file_data,
2731 const char *data, size_t len)
2733 const struct lto_function_header *header =
2734 (const struct lto_function_header *) data;
2735 const int cfg_offset = sizeof (struct lto_function_header);
2736 const int main_offset = cfg_offset + header->cfg_size;
2737 const int string_offset = main_offset + header->main_size;
2738 struct data_in *data_in;
2739 unsigned int i;
2740 unsigned int count;
2742 lto_input_block ib_main ((const char *) data + main_offset,
2743 header->main_size, file_data->mode_table);
2745 data_in =
2746 lto_data_in_create (file_data, (const char *) data + string_offset,
2747 header->string_size, vNULL);
2748 count = streamer_read_uhwi (&ib_main);
2750 for (i = 0; i < count; i++)
2752 unsigned int index;
2753 struct cgraph_node *node;
2754 lto_symtab_encoder_t encoder;
2756 index = streamer_read_uhwi (&ib_main);
2757 encoder = file_data->symtab_node_encoder;
2758 node = dyn_cast<cgraph_node *> (lto_symtab_encoder_deref (encoder,
2759 index));
2760 gcc_assert (node->definition);
2761 isra_read_node_info (&ib_main, node, data_in);
2763 lto_free_section_data (file_data, LTO_section_ipa_sra, NULL, data,
2764 len);
2765 lto_data_in_delete (data_in);
2768 /* Read intraprocedural analysis information into a stream for LTO WPA. */
2770 static void
2771 ipa_sra_read_summary (void)
2773 struct lto_file_decl_data **file_data_vec = lto_get_file_decl_data ();
2774 struct lto_file_decl_data *file_data;
2775 unsigned int j = 0;
2777 gcc_checking_assert (!func_sums);
2778 gcc_checking_assert (!call_sums);
2779 func_sums
2780 = (new (ggc_alloc_no_dtor <ipa_sra_function_summaries> ())
2781 ipa_sra_function_summaries (symtab, true));
2782 call_sums = new ipa_sra_call_summaries (symtab);
2784 while ((file_data = file_data_vec[j++]))
2786 size_t len;
2787 const char *data
2788 = lto_get_summary_section_data (file_data, LTO_section_ipa_sra, &len);
2789 if (data)
2790 isra_read_summary_section (file_data, data, len);
2794 /* Dump all IPA-SRA summary data for all cgraph nodes and edges to file F. */
2796 static void
2797 ipa_sra_dump_all_summaries (FILE *f)
2799 cgraph_node *node;
2800 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
2802 fprintf (f, "\nSummary for node %s:\n", node->dump_name ());
2804 isra_func_summary *ifs = func_sums->get (node);
2805 if (!ifs)
2806 fprintf (f, " Function does not have any associated IPA-SRA "
2807 "summary\n");
2808 else
2810 if (!ifs->m_candidate)
2812 fprintf (f, " Not a candidate function\n");
2813 continue;
2815 if (ifs->m_returns_value)
2816 fprintf (f, " Returns value\n");
2817 if (vec_safe_is_empty (ifs->m_parameters))
2818 fprintf (f, " No parameter information. \n");
2819 else
2820 for (unsigned i = 0; i < ifs->m_parameters->length (); ++i)
2822 fprintf (f, " Descriptor for parameter %i:\n", i);
2823 dump_isra_param_descriptor (f, &(*ifs->m_parameters)[i]);
2825 fprintf (f, "\n");
2828 struct cgraph_edge *cs;
2829 for (cs = node->callees; cs; cs = cs->next_callee)
2831 fprintf (f, " Summary for edge %s->%s:\n", cs->caller->dump_name (),
2832 cs->callee->dump_name ());
2833 isra_call_summary *csum = call_sums->get (cs);
2834 if (csum)
2835 csum->dump (f);
2836 else
2837 fprintf (f, " Call summary is MISSING!\n");
2841 fprintf (f, "\n\n");
2844 /* Perform function-scope viability tests that can be only made at IPA level
2845 and return false if the function is deemed unsuitable for IPA-SRA. */
2847 static bool
2848 ipa_sra_ipa_function_checks (cgraph_node *node)
2850 if (!node->can_be_local_p ())
2852 if (dump_file)
2853 fprintf (dump_file, "Function %s disqualified because it cannot be "
2854 "made local.\n", node->dump_name ());
2855 return false;
2857 if (!node->can_change_signature)
2859 if (dump_file)
2860 fprintf (dump_file, "Function can not change signature.\n");
2861 return false;
2864 return true;
2867 /* Issues found out by check_callers_for_issues. */
2869 struct caller_issues
2871 /* The candidate being considered. */
2872 cgraph_node *candidate;
2873 /* There is a thunk among callers. */
2874 bool thunk;
2875 /* Call site with no available information. */
2876 bool unknown_callsite;
2877 /* Call from outside the candidate's comdat group. */
2878 bool call_from_outside_comdat;
2879 /* There is a bit-aligned load into one of non-gimple-typed arguments. */
2880 bool bit_aligned_aggregate_argument;
2883 /* Worker for call_for_symbol_and_aliases, set any flags of passed caller_issues
2884 that apply. */
2886 static bool
2887 check_for_caller_issues (struct cgraph_node *node, void *data)
2889 struct caller_issues *issues = (struct caller_issues *) data;
2891 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
2893 if (cs->caller->thunk)
2895 issues->thunk = true;
2896 /* TODO: We should be able to process at least some types of
2897 thunks. */
2898 return true;
2900 if (issues->candidate->calls_comdat_local
2901 && issues->candidate->same_comdat_group
2902 && !issues->candidate->in_same_comdat_group_p (cs->caller))
2904 issues->call_from_outside_comdat = true;
2905 return true;
2908 isra_call_summary *csum = call_sums->get (cs);
2909 if (!csum)
2911 issues->unknown_callsite = true;
2912 return true;
2915 if (csum->m_bit_aligned_arg)
2916 issues->bit_aligned_aggregate_argument = true;
2918 return false;
2921 /* Look at all incoming edges to NODE, including aliases and thunks and look
2922 for problems. Return true if NODE type should not be modified at all. */
2924 static bool
2925 check_all_callers_for_issues (cgraph_node *node)
2927 struct caller_issues issues;
2928 memset (&issues, 0, sizeof (issues));
2929 issues.candidate = node;
2931 node->call_for_symbol_and_aliases (check_for_caller_issues, &issues, true);
2932 if (issues.unknown_callsite)
2934 if (dump_file && (dump_flags & TDF_DETAILS))
2935 fprintf (dump_file, "A call of %s has not been analyzed. Disabling "
2936 "all modifications.\n", node->dump_name ());
2937 return true;
2939 /* TODO: We should be able to process at least some types of thunks. */
2940 if (issues.thunk)
2942 if (dump_file && (dump_flags & TDF_DETAILS))
2943 fprintf (dump_file, "A call of %s is through thunk, which are not"
2944 " handled yet. Disabling all modifications.\n",
2945 node->dump_name ());
2946 return true;
2948 if (issues.call_from_outside_comdat)
2950 if (dump_file)
2951 fprintf (dump_file, "Function would become private comdat called "
2952 "outside of its comdat group.\n");
2953 return true;
2956 if (issues.bit_aligned_aggregate_argument)
2958 /* Let's only remove parameters/return values from such functions.
2959 TODO: We could only prevent splitting the problematic parameters if
2960 anybody thinks it is worth it. */
2961 if (dump_file && (dump_flags & TDF_DETAILS))
2962 fprintf (dump_file, "A call of %s has bit-aligned aggregate argument,"
2963 " disabling parameter splitting.\n", node->dump_name ());
2965 isra_func_summary *ifs = func_sums->get (node);
2966 gcc_checking_assert (ifs);
2967 unsigned param_count = vec_safe_length (ifs->m_parameters);
2968 for (unsigned i = 0; i < param_count; i++)
2969 (*ifs->m_parameters)[i].split_candidate = false;
2971 return false;
2974 /* Find the access with corresponding OFFSET and SIZE among accesses in
2975 PARAM_DESC and return it or NULL if such an access is not there. */
2977 static param_access *
2978 find_param_access (isra_param_desc *param_desc, unsigned offset, unsigned size)
2980 unsigned pclen = vec_safe_length (param_desc->accesses);
2982 /* The search is linear but the number of stored accesses is bound by
2983 PARAM_IPA_SRA_MAX_REPLACEMENTS, so most probably 8. */
2985 for (unsigned i = 0; i < pclen; i++)
2986 if ((*param_desc->accesses)[i]->unit_offset == offset
2987 && (*param_desc->accesses)[i]->unit_size == size)
2988 return (*param_desc->accesses)[i];
2990 return NULL;
2993 /* Return iff the total size of definite replacement SIZE would violate the
2994 limit set for it in PARAM. */
2996 static bool
2997 size_would_violate_limit_p (isra_param_desc *desc, unsigned size)
2999 unsigned limit = desc->param_size_limit;
3000 if (size > limit
3001 || (!desc->by_ref && size == limit))
3002 return true;
3003 return false;
3006 /* Increase reached size of DESC by SIZE or disqualify it if it would violate
3007 the set limit. IDX is the parameter number which is dumped when
3008 disqualifying. */
3010 static void
3011 bump_reached_size (isra_param_desc *desc, unsigned size, unsigned idx)
3013 unsigned after = desc->size_reached + size;
3014 if (size_would_violate_limit_p (desc, after))
3016 if (dump_file && (dump_flags & TDF_DETAILS))
3017 fprintf (dump_file, " ...size limit reached, disqualifying "
3018 "candidate parameter %u\n", idx);
3019 desc->split_candidate = false;
3020 return;
3022 desc->size_reached = after;
3025 /* Take all actions required to deal with an edge CS that represents a call to
3026 an unknown or un-analyzed function, for both parameter removal and
3027 splitting. */
3029 static void
3030 process_edge_to_unknown_caller (cgraph_edge *cs)
3032 isra_func_summary *from_ifs = func_sums->get (cs->caller);
3033 gcc_checking_assert (from_ifs);
3034 isra_call_summary *csum = call_sums->get (cs);
3036 if (dump_file && (dump_flags & TDF_DETAILS))
3037 fprintf (dump_file, "Processing an edge to an unknown caller from %s:\n",
3038 cs->caller->dump_name ());
3040 unsigned args_count = csum->m_arg_flow.length ();
3041 for (unsigned i = 0; i < args_count; i++)
3043 isra_param_flow *ipf = &csum->m_arg_flow[i];
3045 if (ipf->pointer_pass_through)
3047 isra_param_desc *param_desc
3048 = &(*from_ifs->m_parameters)[get_single_param_flow_source (ipf)];
3049 param_desc->locally_unused = false;
3050 param_desc->split_candidate = false;
3051 continue;
3053 if (ipf->aggregate_pass_through)
3055 unsigned idx = get_single_param_flow_source (ipf);
3056 isra_param_desc *param_desc = &(*from_ifs->m_parameters)[idx];
3058 param_desc->locally_unused = false;
3059 if (!param_desc->split_candidate)
3060 continue;
3061 gcc_assert (!param_desc->by_ref);
3062 param_access *pacc = find_param_access (param_desc, ipf->unit_offset,
3063 ipf->unit_size);
3064 gcc_checking_assert (pacc);
3065 pacc->certain = true;
3066 if (overlapping_certain_accesses_p (param_desc, NULL))
3068 if (dump_file && (dump_flags & TDF_DETAILS))
3069 fprintf (dump_file, " ...leading to overlap, "
3070 " disqualifying candidate parameter %u\n",
3071 idx);
3072 param_desc->split_candidate = false;
3074 else
3075 bump_reached_size (param_desc, pacc->unit_size, idx);
3076 ipf->aggregate_pass_through = false;
3077 continue;
3080 for (int j = 0; j < ipf->length; j++)
3082 int input_idx = ipf->inputs[j];
3083 (*from_ifs->m_parameters)[input_idx].locally_unused = false;
3088 /* Propagate parameter removal information through cross-SCC edge CS,
3089 i.e. decrease the use count in the caller parameter descriptor for each use
3090 in this call. */
3092 static void
3093 param_removal_cross_scc_edge (cgraph_edge *cs)
3095 enum availability availability;
3096 cgraph_node *callee = cs->callee->function_symbol (&availability);
3097 isra_func_summary *to_ifs = func_sums->get (callee);
3098 if (!to_ifs || !to_ifs->m_candidate
3099 || (availability < AVAIL_AVAILABLE)
3100 || vec_safe_is_empty (to_ifs->m_parameters))
3102 process_edge_to_unknown_caller (cs);
3103 return;
3105 isra_func_summary *from_ifs = func_sums->get (cs->caller);
3106 gcc_checking_assert (from_ifs);
3108 isra_call_summary *csum = call_sums->get (cs);
3109 unsigned args_count = csum->m_arg_flow.length ();
3110 unsigned param_count = vec_safe_length (to_ifs->m_parameters);
3112 for (unsigned i = 0; i < args_count; i++)
3114 bool unused_in_callee;
3115 if (i < param_count)
3116 unused_in_callee = (*to_ifs->m_parameters)[i].locally_unused;
3117 else
3118 unused_in_callee = false;
3120 if (!unused_in_callee)
3122 isra_param_flow *ipf = &csum->m_arg_flow[i];
3123 for (int j = 0; j < ipf->length; j++)
3125 int input_idx = ipf->inputs[j];
3126 (*from_ifs->m_parameters)[input_idx].locally_unused = false;
3132 /* Unless it is already there, push NODE which is also described by IFS to
3133 STACK. */
3135 static void
3136 isra_push_node_to_stack (cgraph_node *node, isra_func_summary *ifs,
3137 vec<cgraph_node *> *stack)
3139 if (!ifs->m_queued)
3141 ifs->m_queued = true;
3142 stack->safe_push (node);
3146 /* If parameter with index INPUT_IDX is marked as locally unused, mark it as
3147 used and push CALLER on STACK. */
3149 static void
3150 isra_mark_caller_param_used (isra_func_summary *from_ifs, int input_idx,
3151 cgraph_node *caller, vec<cgraph_node *> *stack)
3153 if ((*from_ifs->m_parameters)[input_idx].locally_unused)
3155 (*from_ifs->m_parameters)[input_idx].locally_unused = false;
3156 isra_push_node_to_stack (caller, from_ifs, stack);
3161 /* Propagate information that any parameter is not used only locally within a
3162 SCC across CS to the caller, which must be in the same SCC as the
3163 callee. Push any callers that need to be re-processed to STACK. */
3165 static void
3166 propagate_used_across_scc_edge (cgraph_edge *cs, vec<cgraph_node *> *stack)
3168 isra_func_summary *from_ifs = func_sums->get (cs->caller);
3169 if (!from_ifs || vec_safe_is_empty (from_ifs->m_parameters))
3170 return;
3172 isra_call_summary *csum = call_sums->get (cs);
3173 gcc_checking_assert (csum);
3174 unsigned args_count = csum->m_arg_flow.length ();
3175 enum availability availability;
3176 cgraph_node *callee = cs->callee->function_symbol (&availability);
3177 isra_func_summary *to_ifs = func_sums->get (callee);
3179 unsigned param_count
3180 = (to_ifs && (availability >= AVAIL_AVAILABLE))
3181 ? vec_safe_length (to_ifs->m_parameters) : 0;
3182 for (unsigned i = 0; i < args_count; i++)
3184 if (i < param_count
3185 && (*to_ifs->m_parameters)[i].locally_unused)
3186 continue;
3188 /* The argument is needed in the callee it, we must mark the parameter as
3189 used also in the caller and its callers within this SCC. */
3190 isra_param_flow *ipf = &csum->m_arg_flow[i];
3191 for (int j = 0; j < ipf->length; j++)
3193 int input_idx = ipf->inputs[j];
3194 isra_mark_caller_param_used (from_ifs, input_idx, cs->caller, stack);
3199 /* Propagate information that any parameter is not used only locally within a
3200 SCC (i.e. is used also elsewhere) to all callers of NODE that are in the
3201 same SCC. Push any callers that need to be re-processed to STACK. */
3203 static bool
3204 propagate_used_to_scc_callers (cgraph_node *node, void *data)
3206 vec<cgraph_node *> *stack = (vec<cgraph_node *> *) data;
3207 cgraph_edge *cs;
3208 for (cs = node->callers; cs; cs = cs->next_caller)
3209 if (ipa_edge_within_scc (cs))
3210 propagate_used_across_scc_edge (cs, stack);
3211 return false;
3214 /* Return true iff all certain accesses in ARG_DESC are also present as
3215 certain accesses in PARAM_DESC. */
3217 static bool
3218 all_callee_accesses_present_p (isra_param_desc *param_desc,
3219 isra_param_desc *arg_desc)
3221 unsigned aclen = vec_safe_length (arg_desc->accesses);
3222 for (unsigned j = 0; j < aclen; j++)
3224 param_access *argacc = (*arg_desc->accesses)[j];
3225 if (!argacc->certain)
3226 continue;
3227 param_access *pacc = find_param_access (param_desc, argacc->unit_offset,
3228 argacc->unit_size);
3229 if (!pacc
3230 || !pacc->certain
3231 || !types_compatible_p (argacc->type, pacc->type))
3232 return false;
3234 return true;
3237 /* Type internal to function pull_accesses_from_callee. Unfortunately gcc 4.8
3238 does not allow instantiating an auto_vec with a type defined within a
3239 function so it is a global type. */
3240 enum acc_prop_kind {ACC_PROP_DONT, ACC_PROP_COPY, ACC_PROP_CERTAIN};
3243 /* Attempt to propagate all definite accesses from ARG_DESC to PARAM_DESC,
3244 (which belongs to CALLER) if they would not violate some constraint there.
3245 If successful, return NULL, otherwise return the string reason for failure
3246 (which can be written to the dump file). DELTA_OFFSET is the known offset
3247 of the actual argument withing the formal parameter (so of ARG_DESCS within
3248 PARAM_DESCS), ARG_SIZE is the size of the actual argument or zero, if not
3249 known. In case of success, set *CHANGE_P to true if propagation actually
3250 changed anything. */
3252 static const char *
3253 pull_accesses_from_callee (cgraph_node *caller, isra_param_desc *param_desc,
3254 isra_param_desc *arg_desc,
3255 unsigned delta_offset, unsigned arg_size,
3256 bool *change_p)
3258 unsigned pclen = vec_safe_length (param_desc->accesses);
3259 unsigned aclen = vec_safe_length (arg_desc->accesses);
3260 unsigned prop_count = 0;
3261 unsigned prop_size = 0;
3262 bool change = false;
3264 auto_vec <enum acc_prop_kind, 8> prop_kinds (aclen);
3265 for (unsigned j = 0; j < aclen; j++)
3267 param_access *argacc = (*arg_desc->accesses)[j];
3268 prop_kinds.safe_push (ACC_PROP_DONT);
3270 if (arg_size > 0
3271 && argacc->unit_offset + argacc->unit_size > arg_size)
3272 return "callee access outsize size boundary";
3274 if (!argacc->certain)
3275 continue;
3277 unsigned offset = argacc->unit_offset + delta_offset;
3278 /* Given that accesses are initially stored according to increasing
3279 offset and decreasing size in case of equal offsets, the following
3280 searches could be written more efficiently if we kept the ordering
3281 when copying. But the number of accesses is capped at
3282 PARAM_IPA_SRA_MAX_REPLACEMENTS (so most likely 8) and the code gets
3283 messy quickly, so let's improve on that only if necessary. */
3285 bool exact_match = false;
3286 for (unsigned i = 0; i < pclen; i++)
3288 /* Check for overlaps. */
3289 param_access *pacc = (*param_desc->accesses)[i];
3290 if (pacc->unit_offset == offset
3291 && pacc->unit_size == argacc->unit_size)
3293 if (argacc->alias_ptr_type != pacc->alias_ptr_type
3294 || !types_compatible_p (argacc->type, pacc->type)
3295 || argacc->reverse != pacc->reverse)
3296 return "propagated access types would not match existing ones";
3298 exact_match = true;
3299 if (!pacc->certain)
3301 prop_kinds[j] = ACC_PROP_CERTAIN;
3302 prop_size += argacc->unit_size;
3303 change = true;
3305 continue;
3308 if (offset < pacc->unit_offset + pacc->unit_size
3309 && offset + argacc->unit_size > pacc->unit_offset)
3311 /* None permissible with load accesses, possible to fit into
3312 argument ones. */
3313 if (pacc->certain
3314 || offset < pacc->unit_offset
3315 || (offset + argacc->unit_size
3316 > pacc->unit_offset + pacc->unit_size))
3317 return "a propagated access would conflict in caller";
3321 if (!exact_match)
3323 prop_kinds[j] = ACC_PROP_COPY;
3324 prop_count++;
3325 prop_size += argacc->unit_size;
3326 change = true;
3330 if (!change)
3331 return NULL;
3333 if ((prop_count + pclen
3334 > (unsigned) opt_for_fn (caller->decl, param_ipa_sra_max_replacements))
3335 || size_would_violate_limit_p (param_desc,
3336 param_desc->size_reached + prop_size))
3337 return "propagating accesses would violate the count or size limit";
3339 *change_p = true;
3340 for (unsigned j = 0; j < aclen; j++)
3342 if (prop_kinds[j] == ACC_PROP_COPY)
3344 param_access *argacc = (*arg_desc->accesses)[j];
3346 param_access *copy = ggc_cleared_alloc<param_access> ();
3347 copy->unit_offset = argacc->unit_offset + delta_offset;
3348 copy->unit_size = argacc->unit_size;
3349 copy->type = argacc->type;
3350 copy->alias_ptr_type = argacc->alias_ptr_type;
3351 copy->certain = true;
3352 copy->reverse = argacc->reverse;
3353 vec_safe_push (param_desc->accesses, copy);
3355 else if (prop_kinds[j] == ACC_PROP_CERTAIN)
3357 param_access *argacc = (*arg_desc->accesses)[j];
3358 param_access *csp
3359 = find_param_access (param_desc, argacc->unit_offset + delta_offset,
3360 argacc->unit_size);
3361 csp->certain = true;
3365 param_desc->size_reached += prop_size;
3367 return NULL;
3370 /* Propagate parameter splitting information through call graph edge CS.
3371 Return true if any changes that might need to be propagated within SCCs have
3372 been made. The function also clears the aggregate_pass_through and
3373 pointer_pass_through in call summaries which do not need to be processed
3374 again if this CS is revisited when iterating while changes are propagated
3375 within an SCC. */
3377 static bool
3378 param_splitting_across_edge (cgraph_edge *cs)
3380 bool res = false;
3381 bool cross_scc = !ipa_edge_within_scc (cs);
3382 enum availability availability;
3383 cgraph_node *callee = cs->callee->function_symbol (&availability);
3384 isra_func_summary *from_ifs = func_sums->get (cs->caller);
3385 gcc_checking_assert (from_ifs && from_ifs->m_parameters);
3387 isra_call_summary *csum = call_sums->get (cs);
3388 gcc_checking_assert (csum);
3389 unsigned args_count = csum->m_arg_flow.length ();
3390 isra_func_summary *to_ifs = func_sums->get (callee);
3391 unsigned param_count
3392 = ((to_ifs && to_ifs->m_candidate && (availability >= AVAIL_AVAILABLE))
3393 ? vec_safe_length (to_ifs->m_parameters)
3394 : 0);
3396 if (dump_file && (dump_flags & TDF_DETAILS))
3397 fprintf (dump_file, "Splitting across %s->%s:\n",
3398 cs->caller->dump_name (), callee->dump_name ());
3400 unsigned i;
3401 for (i = 0; (i < args_count) && (i < param_count); i++)
3403 isra_param_desc *arg_desc = &(*to_ifs->m_parameters)[i];
3404 isra_param_flow *ipf = &csum->m_arg_flow[i];
3406 if (arg_desc->locally_unused)
3408 if (dump_file && (dump_flags & TDF_DETAILS))
3409 fprintf (dump_file, " ->%u: unused in callee\n", i);
3410 ipf->pointer_pass_through = false;
3411 continue;
3414 if (ipf->pointer_pass_through)
3416 int idx = get_single_param_flow_source (ipf);
3417 isra_param_desc *param_desc = &(*from_ifs->m_parameters)[idx];
3418 if (!param_desc->split_candidate)
3419 continue;
3420 gcc_assert (param_desc->by_ref);
3422 if (!arg_desc->split_candidate || !arg_desc->by_ref)
3424 if (dump_file && (dump_flags & TDF_DETAILS))
3425 fprintf (dump_file, " %u->%u: not candidate or not by "
3426 "reference in callee\n", idx, i);
3427 param_desc->split_candidate = false;
3428 ipf->pointer_pass_through = false;
3429 res = true;
3431 else if (!ipf->safe_to_import_accesses)
3433 if (!csum->m_before_any_store
3434 || !all_callee_accesses_present_p (param_desc, arg_desc))
3436 if (dump_file && (dump_flags & TDF_DETAILS))
3437 fprintf (dump_file, " %u->%u: cannot import accesses.\n",
3438 idx, i);
3439 param_desc->split_candidate = false;
3440 ipf->pointer_pass_through = false;
3441 res = true;
3444 else
3446 if (dump_file && (dump_flags & TDF_DETAILS))
3447 fprintf (dump_file, " %u->%u: verified callee accesses "
3448 "present.\n", idx, i);
3449 if (cross_scc)
3450 ipf->pointer_pass_through = false;
3453 else
3455 const char *pull_failure
3456 = pull_accesses_from_callee (cs->caller, param_desc, arg_desc,
3457 0, 0, &res);
3458 if (pull_failure)
3460 if (dump_file && (dump_flags & TDF_DETAILS))
3461 fprintf (dump_file, " %u->%u: by_ref access pull "
3462 "failed: %s.\n", idx, i, pull_failure);
3463 param_desc->split_candidate = false;
3464 ipf->pointer_pass_through = false;
3465 res = true;
3467 else
3469 if (dump_file && (dump_flags & TDF_DETAILS))
3470 fprintf (dump_file, " %u->%u: by_ref access pull "
3471 "succeeded.\n", idx, i);
3472 if (cross_scc)
3473 ipf->pointer_pass_through = false;
3477 else if (ipf->aggregate_pass_through)
3479 int idx = get_single_param_flow_source (ipf);
3480 isra_param_desc *param_desc = &(*from_ifs->m_parameters)[idx];
3481 if (!param_desc->split_candidate)
3482 continue;
3483 gcc_assert (!param_desc->by_ref);
3484 param_access *pacc = find_param_access (param_desc, ipf->unit_offset,
3485 ipf->unit_size);
3486 gcc_checking_assert (pacc);
3488 if (pacc->certain)
3490 if (dump_file && (dump_flags & TDF_DETAILS))
3491 fprintf (dump_file, " %u->%u: already certain\n", idx, i);
3492 ipf->aggregate_pass_through = false;
3494 else if (!arg_desc->split_candidate || arg_desc->by_ref)
3496 if (dump_file && (dump_flags & TDF_DETAILS))
3497 fprintf (dump_file, " %u->%u: not candidate or by "
3498 "reference in callee\n", idx, i);
3500 pacc->certain = true;
3501 if (overlapping_certain_accesses_p (param_desc, NULL))
3503 if (dump_file && (dump_flags & TDF_DETAILS))
3504 fprintf (dump_file, " ...leading to overlap, "
3505 " disqualifying candidate parameter %u\n",
3506 idx);
3507 param_desc->split_candidate = false;
3509 else
3510 bump_reached_size (param_desc, pacc->unit_size, idx);
3512 ipf->aggregate_pass_through = false;
3513 res = true;
3515 else
3517 const char *pull_failure
3518 = pull_accesses_from_callee (cs->caller, param_desc, arg_desc,
3519 ipf->unit_offset,
3520 ipf->unit_size, &res);
3521 if (pull_failure)
3523 if (dump_file && (dump_flags & TDF_DETAILS))
3524 fprintf (dump_file, " %u->%u: arg access pull "
3525 "failed: %s.\n", idx, i, pull_failure);
3527 ipf->aggregate_pass_through = false;
3528 pacc->certain = true;
3530 if (overlapping_certain_accesses_p (param_desc, NULL))
3532 if (dump_file && (dump_flags & TDF_DETAILS))
3533 fprintf (dump_file, " ...leading to overlap, "
3534 " disqualifying candidate parameter %u\n",
3535 idx);
3536 param_desc->split_candidate = false;
3538 else
3539 bump_reached_size (param_desc, pacc->unit_size, idx);
3541 res = true;
3543 else
3545 if (dump_file && (dump_flags & TDF_DETAILS))
3546 fprintf (dump_file, " %u->%u: arg access pull "
3547 "succeeded.\n", idx, i);
3548 if (cross_scc)
3549 ipf->aggregate_pass_through = false;
3555 /* Handle argument-parameter count mismatches. */
3556 for (; (i < args_count); i++)
3558 isra_param_flow *ipf = &csum->m_arg_flow[i];
3560 if (ipf->pointer_pass_through || ipf->aggregate_pass_through)
3562 int idx = get_single_param_flow_source (ipf);
3563 ipf->pointer_pass_through = false;
3564 ipf->aggregate_pass_through = false;
3565 isra_param_desc *param_desc = &(*from_ifs->m_parameters)[idx];
3566 if (!param_desc->split_candidate)
3567 continue;
3569 if (dump_file && (dump_flags & TDF_DETAILS))
3570 fprintf (dump_file, " %u->%u: no corresponding formal parameter\n",
3571 idx, i);
3572 param_desc->split_candidate = false;
3573 res = true;
3576 return res;
3579 /* Worker for call_for_symbol_and_aliases, look at all callers and if all their
3580 callers ignore the return value, or come from the same SCC and use the
3581 return value only to compute their return value, return false, otherwise
3582 return true. */
3584 static bool
3585 retval_used_p (cgraph_node *node, void *)
3587 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
3589 isra_call_summary *csum = call_sums->get (cs);
3590 gcc_checking_assert (csum);
3591 if (csum->m_return_ignored)
3592 continue;
3593 if (!csum->m_return_returned)
3594 return true;
3596 isra_func_summary *from_ifs = func_sums->get (cs->caller);
3597 if (!from_ifs || !from_ifs->m_candidate)
3598 return true;
3600 if (!ipa_edge_within_scc (cs)
3601 && !from_ifs->m_return_ignored)
3602 return true;
3605 return false;
3608 /* Push into NEW_PARAMS all required parameter adjustment entries to copy or
3609 modify parameter which originally had index BASE_INDEX, in the adjustment
3610 vector of parent clone (if any) had PREV_CLONE_INDEX and was described by
3611 PREV_ADJUSTMENT. If the parent clone is the original function,
3612 PREV_ADJUSTMENT is NULL and PREV_CLONE_INDEX is equal to BASE_INDEX. */
3614 static void
3615 push_param_adjustments_for_index (isra_func_summary *ifs, unsigned base_index,
3616 unsigned prev_clone_index,
3617 ipa_adjusted_param *prev_adjustment,
3618 vec<ipa_adjusted_param, va_gc> **new_params)
3620 isra_param_desc *desc = &(*ifs->m_parameters)[base_index];
3621 if (desc->locally_unused)
3623 if (dump_file)
3624 fprintf (dump_file, " Will remove parameter %u\n", base_index);
3625 return;
3628 if (!desc->split_candidate)
3630 ipa_adjusted_param adj;
3631 if (prev_adjustment)
3633 adj = *prev_adjustment;
3634 adj.prev_clone_adjustment = true;
3635 adj.prev_clone_index = prev_clone_index;
3637 else
3639 memset (&adj, 0, sizeof (adj));
3640 adj.op = IPA_PARAM_OP_COPY;
3641 adj.base_index = base_index;
3642 adj.prev_clone_index = prev_clone_index;
3644 vec_safe_push ((*new_params), adj);
3645 return;
3648 if (dump_file)
3649 fprintf (dump_file, " Will split parameter %u\n", base_index);
3651 gcc_assert (!prev_adjustment || prev_adjustment->op == IPA_PARAM_OP_COPY);
3652 unsigned aclen = vec_safe_length (desc->accesses);
3653 for (unsigned j = 0; j < aclen; j++)
3655 param_access *pa = (*desc->accesses)[j];
3656 if (!pa->certain)
3657 continue;
3658 if (dump_file)
3659 fprintf (dump_file, " - component at byte offset %u, "
3660 "size %u\n", pa->unit_offset, pa->unit_size);
3662 ipa_adjusted_param adj;
3663 memset (&adj, 0, sizeof (adj));
3664 adj.op = IPA_PARAM_OP_SPLIT;
3665 adj.base_index = base_index;
3666 adj.prev_clone_index = prev_clone_index;
3667 adj.param_prefix_index = IPA_PARAM_PREFIX_ISRA;
3668 adj.reverse = pa->reverse;
3669 adj.type = pa->type;
3670 adj.alias_ptr_type = pa->alias_ptr_type;
3671 adj.unit_offset = pa->unit_offset;
3672 vec_safe_push ((*new_params), adj);
3676 /* Worker for all call_for_symbol_thunks_and_aliases. Set calls_comdat_local
3677 flag of all callers of NODE. */
3679 static bool
3680 mark_callers_calls_comdat_local (struct cgraph_node *node, void *)
3682 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
3683 cs->caller->calls_comdat_local = true;
3684 return false;
3688 /* Do final processing of results of IPA propagation regarding NODE, clone it
3689 if appropriate. */
3691 static void
3692 process_isra_node_results (cgraph_node *node,
3693 hash_map<const char *, unsigned> *clone_num_suffixes)
3695 isra_func_summary *ifs = func_sums->get (node);
3696 if (!ifs || !ifs->m_candidate)
3697 return;
3699 auto_vec<bool, 16> surviving_params;
3700 bool check_surviving = false;
3701 clone_info *cinfo = clone_info::get (node);
3702 if (cinfo && cinfo->param_adjustments)
3704 check_surviving = true;
3705 cinfo->param_adjustments->get_surviving_params (&surviving_params);
3708 unsigned param_count = vec_safe_length (ifs->m_parameters);
3709 bool will_change_function = false;
3710 if (ifs->m_returns_value && ifs->m_return_ignored)
3711 will_change_function = true;
3712 else
3713 for (unsigned i = 0; i < param_count; i++)
3715 isra_param_desc *desc = &(*ifs->m_parameters)[i];
3716 if ((desc->locally_unused || desc->split_candidate)
3717 /* Make sure we do not clone just to attempt to remove an already
3718 removed unused argument. */
3719 && (!check_surviving
3720 || (i < surviving_params.length ()
3721 && surviving_params[i])))
3723 will_change_function = true;
3724 break;
3727 if (!will_change_function)
3728 return;
3730 if (dump_file)
3732 fprintf (dump_file, "\nEvaluating analysis results for %s\n",
3733 node->dump_name ());
3734 if (ifs->m_returns_value && ifs->m_return_ignored)
3735 fprintf (dump_file, " Will remove return value.\n");
3738 vec<ipa_adjusted_param, va_gc> *new_params = NULL;
3739 if (ipa_param_adjustments *old_adjustments
3740 = cinfo ? cinfo->param_adjustments : NULL)
3742 unsigned old_adj_len = vec_safe_length (old_adjustments->m_adj_params);
3743 for (unsigned i = 0; i < old_adj_len; i++)
3745 ipa_adjusted_param *old_adj = &(*old_adjustments->m_adj_params)[i];
3746 push_param_adjustments_for_index (ifs, old_adj->base_index, i,
3747 old_adj, &new_params);
3750 else
3751 for (unsigned i = 0; i < param_count; i++)
3752 push_param_adjustments_for_index (ifs, i, i, NULL, &new_params);
3754 ipa_param_adjustments *new_adjustments
3755 = (new (ggc_alloc <ipa_param_adjustments> ())
3756 ipa_param_adjustments (new_params, param_count,
3757 ifs->m_returns_value && ifs->m_return_ignored));
3759 if (dump_file && (dump_flags & TDF_DETAILS))
3761 fprintf (dump_file, "\n Created adjustments:\n");
3762 new_adjustments->dump (dump_file);
3765 unsigned &suffix_counter = clone_num_suffixes->get_or_insert (
3766 IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (
3767 node->decl)));
3768 auto_vec<cgraph_edge *> callers = node->collect_callers ();
3769 cgraph_node *new_node
3770 = node->create_virtual_clone (callers, NULL, new_adjustments, "isra",
3771 suffix_counter);
3772 suffix_counter++;
3773 if (node->calls_comdat_local && node->same_comdat_group)
3775 new_node->add_to_same_comdat_group (node);
3776 new_node->call_for_symbol_and_aliases (mark_callers_calls_comdat_local,
3777 NULL, true);
3779 new_node->calls_comdat_local = node->calls_comdat_local;
3781 if (dump_file)
3782 fprintf (dump_file, " Created new node %s\n", new_node->dump_name ());
3783 callers.release ();
3786 /* Check which parameters of NODE described by IFS have survived until IPA-SRA
3787 and disable transformations for those which have not or which should not
3788 transformed because the associated debug counter reached its limit. Return
3789 true if none survived or if there were no candidates to begin with. */
3791 static bool
3792 disable_unavailable_parameters (cgraph_node *node, isra_func_summary *ifs)
3794 bool ret = true;
3795 unsigned len = vec_safe_length (ifs->m_parameters);
3796 if (!len)
3797 return true;
3799 auto_vec<bool, 16> surviving_params;
3800 bool check_surviving = false;
3801 clone_info *cinfo = clone_info::get (node);
3802 if (cinfo && cinfo->param_adjustments)
3804 check_surviving = true;
3805 cinfo->param_adjustments->get_surviving_params (&surviving_params);
3807 bool dumped_first = false;
3808 for (unsigned i = 0; i < len; i++)
3810 isra_param_desc *desc = &(*ifs->m_parameters)[i];
3811 if (!dbg_cnt (ipa_sra_params))
3813 desc->locally_unused = false;
3814 desc->split_candidate = false;
3816 else if (check_surviving
3817 && (i >= surviving_params.length ()
3818 || !surviving_params[i]))
3820 /* Even if the parameter was removed by a previous IPA pass, we do
3821 not clear locally_unused because if it really is unused, this
3822 information might be useful in callers. */
3823 desc->split_candidate = false;
3825 if (dump_file && (dump_flags & TDF_DETAILS))
3827 if (!dumped_first)
3829 fprintf (dump_file,
3830 "The following parameters of %s are dead on "
3831 "arrival:", node->dump_name ());
3832 dumped_first = true;
3834 fprintf (dump_file, " %u", i);
3837 else if (desc->locally_unused || desc->split_candidate)
3838 ret = false;
3841 if (dumped_first)
3842 fprintf (dump_file, "\n");
3844 return ret;
3848 /* Run the interprocedural part of IPA-SRA. */
3850 static unsigned int
3851 ipa_sra_analysis (void)
3853 if (dump_file)
3855 fprintf (dump_file, "\n========== IPA-SRA IPA stage ==========\n");
3856 ipa_sra_dump_all_summaries (dump_file);
3859 gcc_checking_assert (func_sums);
3860 gcc_checking_assert (call_sums);
3861 cgraph_node **order = XCNEWVEC (cgraph_node *, symtab->cgraph_count);
3862 auto_vec <cgraph_node *, 16> stack;
3863 int node_scc_count = ipa_reduced_postorder (order, true, NULL);
3865 /* One sweep from callees to callers for parameter removal and splitting. */
3866 for (int i = 0; i < node_scc_count; i++)
3868 cgraph_node *scc_rep = order[i];
3869 vec<cgraph_node *> cycle_nodes = ipa_get_nodes_in_cycle (scc_rep);
3870 unsigned j;
3872 /* Preliminary IPA function level checks and first step of parameter
3873 removal. */
3874 cgraph_node *v;
3875 FOR_EACH_VEC_ELT (cycle_nodes, j, v)
3877 isra_func_summary *ifs = func_sums->get (v);
3878 if (!ifs || !ifs->m_candidate)
3879 continue;
3880 if (!ipa_sra_ipa_function_checks (v)
3881 || check_all_callers_for_issues (v))
3883 ifs->zap ();
3884 continue;
3886 if (disable_unavailable_parameters (v, ifs))
3887 continue;
3888 for (cgraph_edge *cs = v->indirect_calls; cs; cs = cs->next_callee)
3889 process_edge_to_unknown_caller (cs);
3890 for (cgraph_edge *cs = v->callees; cs; cs = cs->next_callee)
3891 if (!ipa_edge_within_scc (cs))
3892 param_removal_cross_scc_edge (cs);
3895 /* Look at edges within the current SCC and propagate used-ness across
3896 them, pushing onto the stack all notes which might need to be
3897 revisited. */
3898 FOR_EACH_VEC_ELT (cycle_nodes, j, v)
3899 v->call_for_symbol_thunks_and_aliases (propagate_used_to_scc_callers,
3900 &stack, true);
3902 /* Keep revisiting and pushing until nothing changes. */
3903 while (!stack.is_empty ())
3905 cgraph_node *v = stack.pop ();
3906 isra_func_summary *ifs = func_sums->get (v);
3907 gcc_checking_assert (ifs && ifs->m_queued);
3908 ifs->m_queued = false;
3910 v->call_for_symbol_thunks_and_aliases (propagate_used_to_scc_callers,
3911 &stack, true);
3914 /* Parameter splitting. */
3915 bool repeat_scc_access_propagation;
3918 repeat_scc_access_propagation = false;
3919 FOR_EACH_VEC_ELT (cycle_nodes, j, v)
3921 isra_func_summary *ifs = func_sums->get (v);
3922 if (!ifs
3923 || !ifs->m_candidate
3924 || vec_safe_is_empty (ifs->m_parameters))
3925 continue;
3926 for (cgraph_edge *cs = v->callees; cs; cs = cs->next_callee)
3927 if (param_splitting_across_edge (cs))
3928 repeat_scc_access_propagation = true;
3931 while (repeat_scc_access_propagation);
3933 if (flag_checking)
3934 FOR_EACH_VEC_ELT (cycle_nodes, j, v)
3935 verify_splitting_accesses (v, true);
3937 cycle_nodes.release ();
3940 /* One sweep from caller to callees for result removal. */
3941 for (int i = node_scc_count - 1; i >= 0 ; i--)
3943 cgraph_node *scc_rep = order[i];
3944 vec<cgraph_node *> cycle_nodes = ipa_get_nodes_in_cycle (scc_rep);
3945 unsigned j;
3947 cgraph_node *v;
3948 FOR_EACH_VEC_ELT (cycle_nodes, j, v)
3950 isra_func_summary *ifs = func_sums->get (v);
3951 if (!ifs || !ifs->m_candidate)
3952 continue;
3954 bool return_needed
3955 = (ifs->m_returns_value
3956 && (!dbg_cnt (ipa_sra_retvalues)
3957 || v->call_for_symbol_and_aliases (retval_used_p,
3958 NULL, true)));
3959 ifs->m_return_ignored = !return_needed;
3960 if (return_needed)
3961 isra_push_node_to_stack (v, ifs, &stack);
3964 while (!stack.is_empty ())
3966 cgraph_node *node = stack.pop ();
3967 isra_func_summary *ifs = func_sums->get (node);
3968 gcc_checking_assert (ifs && ifs->m_queued);
3969 ifs->m_queued = false;
3971 for (cgraph_edge *cs = node->callees; cs; cs = cs->next_callee)
3972 if (ipa_edge_within_scc (cs)
3973 && call_sums->get (cs)->m_return_returned)
3975 enum availability av;
3976 cgraph_node *callee = cs->callee->function_symbol (&av);
3977 isra_func_summary *to_ifs = func_sums->get (callee);
3978 if (to_ifs && to_ifs->m_return_ignored)
3980 to_ifs->m_return_ignored = false;
3981 isra_push_node_to_stack (callee, to_ifs, &stack);
3985 cycle_nodes.release ();
3988 ipa_free_postorder_info ();
3989 free (order);
3991 if (dump_file)
3993 if (dump_flags & TDF_DETAILS)
3995 fprintf (dump_file, "\n========== IPA-SRA propagation final state "
3996 " ==========\n");
3997 ipa_sra_dump_all_summaries (dump_file);
3999 fprintf (dump_file, "\n========== IPA-SRA decisions ==========\n");
4002 hash_map<const char *, unsigned> *clone_num_suffixes
4003 = new hash_map<const char *, unsigned>;
4005 cgraph_node *node;
4006 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
4007 process_isra_node_results (node, clone_num_suffixes);
4009 delete clone_num_suffixes;
4010 ggc_delete (func_sums);
4011 func_sums = NULL;
4012 delete call_sums;
4013 call_sums = NULL;
4015 if (dump_file)
4016 fprintf (dump_file, "\n========== IPA SRA IPA analysis done "
4017 "==========\n\n");
4018 return 0;
4022 const pass_data pass_data_ipa_sra =
4024 IPA_PASS, /* type */
4025 "sra", /* name */
4026 OPTGROUP_NONE, /* optinfo_flags */
4027 TV_IPA_SRA, /* tv_id */
4028 0, /* properties_required */
4029 0, /* properties_provided */
4030 0, /* properties_destroyed */
4031 0, /* todo_flags_start */
4032 ( TODO_dump_symtab | TODO_remove_functions ), /* todo_flags_finish */
4035 class pass_ipa_sra : public ipa_opt_pass_d
4037 public:
4038 pass_ipa_sra (gcc::context *ctxt)
4039 : ipa_opt_pass_d (pass_data_ipa_sra, ctxt,
4040 ipa_sra_generate_summary, /* generate_summary */
4041 ipa_sra_write_summary, /* write_summary */
4042 ipa_sra_read_summary, /* read_summary */
4043 NULL , /* write_optimization_summary */
4044 NULL, /* read_optimization_summary */
4045 NULL, /* stmt_fixup */
4046 0, /* function_transform_todo_flags_start */
4047 NULL, /* function_transform */
4048 NULL) /* variable_transform */
4051 /* opt_pass methods: */
4052 bool gate (function *) final override
4054 /* TODO: We should remove the optimize check after we ensure we never run
4055 IPA passes when not optimizing. */
4056 return (flag_ipa_sra && optimize);
4059 unsigned int execute (function *) final override
4061 return ipa_sra_analysis ();
4064 }; // class pass_ipa_sra
4066 } // anon namespace
4068 /* Intraprocedural part of IPA-SRA analysis. Scan function body of NODE and
4069 create a summary structure describing IPA-SRA opportunities and constraints
4070 in it. */
4072 static void
4073 ipa_sra_summarize_function (cgraph_node *node)
4075 if (dump_file)
4076 fprintf (dump_file, "Creating summary for %s/%i:\n", node->name (),
4077 node->order);
4078 if (!ipa_sra_preliminary_function_checks (node))
4080 isra_analyze_all_outgoing_calls (node);
4081 return;
4083 gcc_obstack_init (&gensum_obstack);
4084 isra_func_summary *ifs = func_sums->get_create (node);
4085 ifs->m_candidate = true;
4086 tree ret = TREE_TYPE (TREE_TYPE (node->decl));
4087 ifs->m_returns_value = (TREE_CODE (ret) != VOID_TYPE);
4089 decl2desc = new hash_map<tree, gensum_param_desc *>;
4090 unsigned count = 0;
4091 for (tree parm = DECL_ARGUMENTS (node->decl); parm; parm = DECL_CHAIN (parm))
4092 count++;
4094 if (count > 0)
4096 auto_vec<gensum_param_desc, 16> param_descriptions (count);
4097 param_descriptions.reserve_exact (count);
4098 param_descriptions.quick_grow_cleared (count);
4100 bool cfun_pushed = false;
4101 struct function *fun = DECL_STRUCT_FUNCTION (node->decl);
4102 if (create_parameter_descriptors (node, &param_descriptions))
4104 push_cfun (fun);
4105 cfun_pushed = true;
4106 final_bbs = BITMAP_ALLOC (NULL);
4107 bb_dereferences = XCNEWVEC (HOST_WIDE_INT,
4108 by_ref_count
4109 * last_basic_block_for_fn (fun));
4110 aa_walking_limit = opt_for_fn (node->decl, param_ipa_max_aa_steps);
4111 scan_function (node, fun);
4113 if (dump_file)
4115 dump_gensum_param_descriptors (dump_file, node->decl,
4116 &param_descriptions);
4117 fprintf (dump_file, "----------------------------------------\n");
4120 process_scan_results (node, fun, ifs, &param_descriptions);
4122 if (cfun_pushed)
4123 pop_cfun ();
4124 if (bb_dereferences)
4126 free (bb_dereferences);
4127 bb_dereferences = NULL;
4128 BITMAP_FREE (final_bbs);
4129 final_bbs = NULL;
4132 isra_analyze_all_outgoing_calls (node);
4134 delete decl2desc;
4135 decl2desc = NULL;
4136 obstack_free (&gensum_obstack, NULL);
4137 if (dump_file)
4138 fprintf (dump_file, "\n\n");
4139 if (flag_checking)
4140 verify_splitting_accesses (node, false);
4141 return;
4144 ipa_opt_pass_d *
4145 make_pass_ipa_sra (gcc::context *ctxt)
4147 return new pass_ipa_sra (ctxt);
4151 #include "gt-ipa-sra.h"