gcov: make profile merging smarter
[official-gcc.git] / gcc / ipa-sra.c
blob88036590425d235c4772bdd6d5e630d6a70270d0
1 /* Interprocedural scalar replacement of aggregates
2 Copyright (C) 2008-2021 Free Software Foundation, Inc.
4 Contributed by Martin Jambor <mjambor@suse.cz>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 /* IPA-SRA is an interprocedural pass that removes unused function return
23 values (turning functions returning a value which is never used into void
24 functions), removes unused function parameters. It can also replace an
25 aggregate parameter by a set of other parameters representing part of the
26 original, turning those passed by reference into new ones which pass the
27 value directly.
29 The pass is a true IPA one, which means that it works in three stages in
30 order to be able to take advantage of LTO. First, summaries about functions
31 and each calls are generated. Function summaries (often called call graph
32 node summaries) contain mainly information about which parameters are
33 potential transformation candidates and which bits of candidates are
34 accessed. We differentiate between accesses done as a part of a call
35 statement (which might be not necessary if the callee is also transformed)
36 and others (which are mandatory). Call summaries (often called call graph
37 edge summaries) contain information about which function formal parameters
38 feed into which actual call arguments so that if two parameters are only
39 used in a sum which is then passed to another function which then however
40 does not use this parameter, all three parameters of the two functions can
41 be eliminated. Edge summaries also have flags whether the return value is
42 used or if it is only returned in the caller too. In LTO mode these
43 summaries are then streamed to the object file in the compilation phase and
44 streamed back in in the WPA analysis stage.
46 The interprocedural analysis phase traverses the graph in topological order
47 in two sweeps, one in each direction. First, from callees to callers for
48 parameter removal and splitting. Each strongly-connected component is
49 processed iteratively until the situation in it stabilizes. The pass from
50 callers to callees is then carried out to remove unused return values in a
51 very similar fashion.
53 Because parameter manipulation has big implications for call redirection
54 which is done only after all call graph nodes materialize, the
55 transformation phase is not part of this patch but is carried out by the
56 clone materialization and edge redirection itself, see comments in
57 ipa-param-manipulation.h for more details. */
61 #include "config.h"
62 #include "system.h"
63 #include "coretypes.h"
64 #include "backend.h"
65 #include "tree.h"
66 #include "gimple.h"
67 #include "predict.h"
68 #include "tree-pass.h"
69 #include "ssa.h"
70 #include "cgraph.h"
71 #include "gimple-pretty-print.h"
72 #include "alias.h"
73 #include "tree-eh.h"
74 #include "gimple-iterator.h"
75 #include "gimple-walk.h"
76 #include "tree-dfa.h"
77 #include "tree-sra.h"
78 #include "alloc-pool.h"
79 #include "symbol-summary.h"
80 #include "dbgcnt.h"
81 #include "tree-inline.h"
82 #include "ipa-utils.h"
83 #include "builtins.h"
84 #include "cfganal.h"
85 #include "tree-streamer.h"
86 #include "internal-fn.h"
87 #include "symtab-clones.h"
89 static void ipa_sra_summarize_function (cgraph_node *);
91 /* Bits used to track size of an aggregate in bytes interprocedurally. */
92 #define ISRA_ARG_SIZE_LIMIT_BITS 16
93 #define ISRA_ARG_SIZE_LIMIT (1 << ISRA_ARG_SIZE_LIMIT_BITS)
94 /* How many parameters can feed into a call actual argument and still be
95 tracked. */
96 #define IPA_SRA_MAX_PARAM_FLOW_LEN 7
98 /* Structure describing accesses to a specific portion of an aggregate
99 parameter, as given by the offset and size. Any smaller accesses that occur
100 within a function that fall within another access form a tree. The pass
101 cannot analyze parameters with only partially overlapping accesses. */
103 struct GTY(()) param_access
105 /* Type that a potential replacement should have. This field only has
106 meaning in the summary building and transformation phases, when it is
107 reconstructed from the body. Must not be touched in IPA analysis
108 stage. */
109 tree type;
111 /* Alias reference type to be used in MEM_REFs when adjusting caller
112 arguments. */
113 tree alias_ptr_type;
115 /* Values returned by get_ref_base_and_extent but converted to bytes and
116 stored as unsigned ints. */
117 unsigned unit_offset;
118 unsigned unit_size : ISRA_ARG_SIZE_LIMIT_BITS;
120 /* Set once we are sure that the access will really end up in a potentially
121 transformed function - initially not set for portions of formal parameters
122 that are only used as actual function arguments passed to callees. */
123 unsigned certain : 1;
124 /* Set if the access has a reversed scalar storage order. */
125 unsigned reverse : 1;
128 /* This structure has the same purpose as the one above and additionally it
129 contains some fields that are only necessary in the summary generation
130 phase. */
132 struct gensum_param_access
134 /* Values returned by get_ref_base_and_extent. */
135 HOST_WIDE_INT offset;
136 HOST_WIDE_INT size;
138 /* if this access has any children (in terms of the definition above), this
139 points to the first one. */
140 struct gensum_param_access *first_child;
141 /* In intraprocedural SRA, pointer to the next sibling in the access tree as
142 described above. */
143 struct gensum_param_access *next_sibling;
145 /* Type that a potential replacement should have. This field only has
146 meaning in the summary building and transformation phases, when it is
147 reconstructed from the body. Must not be touched in IPA analysis
148 stage. */
149 tree type;
150 /* Alias reference type to be used in MEM_REFs when adjusting caller
151 arguments. */
152 tree alias_ptr_type;
154 /* Have there been writes to or reads from this exact location except for as
155 arguments to a function call that can be tracked. */
156 bool nonarg;
158 /* Set if the access has a reversed scalar storage order. */
159 bool reverse;
162 /* Summary describing a parameter in the IPA stages. */
164 struct GTY(()) isra_param_desc
166 /* List of access representatives to the parameters, sorted according to
167 their offset. */
168 vec <param_access *, va_gc> *accesses;
170 /* Unit size limit of total size of all replacements. */
171 unsigned param_size_limit : ISRA_ARG_SIZE_LIMIT_BITS;
172 /* Sum of unit sizes of all certain replacements. */
173 unsigned size_reached : ISRA_ARG_SIZE_LIMIT_BITS;
175 /* A parameter that is used only in call arguments and can be removed if all
176 concerned actual arguments are removed. */
177 unsigned locally_unused : 1;
178 /* An aggregate that is a candidate for breaking up or complete removal. */
179 unsigned split_candidate : 1;
180 /* Is this a parameter passing stuff by reference? */
181 unsigned by_ref : 1;
184 /* Structure used when generating summaries that describes a parameter. */
186 struct gensum_param_desc
188 /* Roots of param_accesses. */
189 gensum_param_access *accesses;
190 /* Number of accesses in the access tree rooted in field accesses. */
191 unsigned access_count;
193 /* If the below is non-zero, this is the number of uses as actual arguments. */
194 int call_uses;
195 /* Number of times this parameter has been directly passed to. */
196 unsigned ptr_pt_count;
198 /* Size limit of total size of all replacements. */
199 unsigned param_size_limit;
200 /* Sum of sizes of nonarg accesses. */
201 unsigned nonarg_acc_size;
203 /* A parameter that is used only in call arguments and can be removed if all
204 concerned actual arguments are removed. */
205 bool locally_unused;
206 /* An aggregate that is a candidate for breaking up or a pointer passing data
207 by reference that is a candidate for being converted to a set of
208 parameters passing those data by value. */
209 bool split_candidate;
210 /* Is this a parameter passing stuff by reference? */
211 bool by_ref;
213 /* The number of this parameter as they are ordered in function decl. */
214 int param_number;
215 /* For parameters passing data by reference, this is parameter index to
216 compute indices to bb_dereferences. */
217 int deref_index;
220 /* Properly deallocate accesses of DESC. TODO: Since this data structure is
221 not in GC memory, this is not necessary and we can consider removing the
222 function. */
224 static void
225 free_param_decl_accesses (isra_param_desc *desc)
227 unsigned len = vec_safe_length (desc->accesses);
228 for (unsigned i = 0; i < len; ++i)
229 ggc_free ((*desc->accesses)[i]);
230 vec_free (desc->accesses);
233 /* Class used to convey information about functions from the
234 intra-procedural analysis stage to inter-procedural one. */
236 class GTY((for_user)) isra_func_summary
238 public:
239 /* initialize the object. */
241 isra_func_summary ()
242 : m_parameters (NULL), m_candidate (false), m_returns_value (false),
243 m_return_ignored (false), m_queued (false)
246 /* Destroy m_parameters. */
248 ~isra_func_summary ();
250 /* Mark the function as not a candidate for any IPA-SRA transformation.
251 Return true if it was a candidate until now. */
253 bool zap ();
255 /* Vector of parameter descriptors corresponding to the function being
256 analyzed. */
257 vec<isra_param_desc, va_gc> *m_parameters;
259 /* Whether the node is even a candidate for any IPA-SRA transformation at
260 all. */
261 unsigned m_candidate : 1;
263 /* Whether the original function returns any value. */
264 unsigned m_returns_value : 1;
266 /* Set to true if all call statements do not actually use the returned
267 value. */
269 unsigned m_return_ignored : 1;
271 /* Whether the node is already queued in IPA SRA stack during processing of
272 call graphs SCCs. */
274 unsigned m_queued : 1;
277 /* Clean up and deallocate isra_func_summary points to. TODO: Since this data
278 structure is not in GC memory, this is not necessary and we can consider
279 removing the destructor. */
281 isra_func_summary::~isra_func_summary ()
283 unsigned len = vec_safe_length (m_parameters);
284 for (unsigned i = 0; i < len; ++i)
285 free_param_decl_accesses (&(*m_parameters)[i]);
286 vec_free (m_parameters);
290 /* Mark the function as not a candidate for any IPA-SRA transformation. Return
291 true if it was a candidate until now. */
293 bool
294 isra_func_summary::zap ()
296 bool ret = m_candidate;
297 m_candidate = false;
299 unsigned len = vec_safe_length (m_parameters);
300 for (unsigned i = 0; i < len; ++i)
301 free_param_decl_accesses (&(*m_parameters)[i]);
302 vec_free (m_parameters);
304 return ret;
307 /* Structure to describe which formal parameters feed into a particular actual
308 arguments. */
310 struct isra_param_flow
312 /* Number of elements in array inputs that contain valid data. */
313 char length;
314 /* Indices of formal parameters that feed into the described actual argument.
315 If aggregate_pass_through or pointer_pass_through below are true, it must
316 contain exactly one element which is passed through from a formal
317 parameter if the given number. Otherwise, the array contains indices of
318 callee's formal parameters which are used to calculate value of this
319 actual argument. */
320 unsigned char inputs[IPA_SRA_MAX_PARAM_FLOW_LEN];
322 /* Offset within the formal parameter. */
323 unsigned unit_offset;
324 /* Size of the portion of the formal parameter that is being passed. */
325 unsigned unit_size : ISRA_ARG_SIZE_LIMIT_BITS;
327 /* True when the value of this actual copy is a portion of a formal
328 parameter. */
329 unsigned aggregate_pass_through : 1;
330 /* True when the value of this actual copy is a verbatim pass through of an
331 obtained pointer. */
332 unsigned pointer_pass_through : 1;
333 /* True when it is safe to copy access candidates here from the callee, which
334 would mean introducing dereferences into callers of the caller. */
335 unsigned safe_to_import_accesses : 1;
338 /* Structure used to convey information about calls from the intra-procedural
339 analysis stage to inter-procedural one. */
341 class isra_call_summary
343 public:
344 isra_call_summary ()
345 : m_arg_flow (), m_return_ignored (false), m_return_returned (false),
346 m_bit_aligned_arg (false), m_before_any_store (false)
349 void init_inputs (unsigned arg_count);
350 void dump (FILE *f);
352 /* Information about what formal parameters of the caller are used to compute
353 individual actual arguments of this call. */
354 auto_vec <isra_param_flow> m_arg_flow;
356 /* Set to true if the call statement does not have a LHS. */
357 unsigned m_return_ignored : 1;
359 /* Set to true if the LHS of call statement is only used to construct the
360 return value of the caller. */
361 unsigned m_return_returned : 1;
363 /* Set when any of the call arguments are not byte-aligned. */
364 unsigned m_bit_aligned_arg : 1;
366 /* Set to true if the call happend before any (other) store to memory in the
367 caller. */
368 unsigned m_before_any_store : 1;
371 /* Class to manage function summaries. */
373 class GTY((user)) ipa_sra_function_summaries
374 : public function_summary <isra_func_summary *>
376 public:
377 ipa_sra_function_summaries (symbol_table *table, bool ggc):
378 function_summary<isra_func_summary *> (table, ggc) { }
380 virtual void duplicate (cgraph_node *, cgraph_node *,
381 isra_func_summary *old_sum,
382 isra_func_summary *new_sum);
383 virtual void insert (cgraph_node *, isra_func_summary *);
386 /* Hook that is called by summary when a node is duplicated. */
388 void
389 ipa_sra_function_summaries::duplicate (cgraph_node *, cgraph_node *,
390 isra_func_summary *old_sum,
391 isra_func_summary *new_sum)
393 /* TODO: Somehow stop copying when ISRA is doing the cloning, it is
394 useless. */
395 new_sum->m_candidate = old_sum->m_candidate;
396 new_sum->m_returns_value = old_sum->m_returns_value;
397 new_sum->m_return_ignored = old_sum->m_return_ignored;
398 gcc_assert (!old_sum->m_queued);
399 new_sum->m_queued = false;
401 unsigned param_count = vec_safe_length (old_sum->m_parameters);
402 if (!param_count)
403 return;
404 vec_safe_reserve_exact (new_sum->m_parameters, param_count);
405 new_sum->m_parameters->quick_grow_cleared (param_count);
406 for (unsigned i = 0; i < param_count; i++)
408 isra_param_desc *s = &(*old_sum->m_parameters)[i];
409 isra_param_desc *d = &(*new_sum->m_parameters)[i];
411 d->param_size_limit = s->param_size_limit;
412 d->size_reached = s->size_reached;
413 d->locally_unused = s->locally_unused;
414 d->split_candidate = s->split_candidate;
415 d->by_ref = s->by_ref;
417 unsigned acc_count = vec_safe_length (s->accesses);
418 vec_safe_reserve_exact (d->accesses, acc_count);
419 for (unsigned j = 0; j < acc_count; j++)
421 param_access *from = (*s->accesses)[j];
422 param_access *to = ggc_cleared_alloc<param_access> ();
423 to->type = from->type;
424 to->alias_ptr_type = from->alias_ptr_type;
425 to->unit_offset = from->unit_offset;
426 to->unit_size = from->unit_size;
427 to->certain = from->certain;
428 d->accesses->quick_push (to);
433 /* Pointer to the pass function summary holder. */
435 static GTY(()) ipa_sra_function_summaries *func_sums;
437 /* Hook that is called by summary when new node appears. */
439 void
440 ipa_sra_function_summaries::insert (cgraph_node *node, isra_func_summary *)
442 if (opt_for_fn (node->decl, flag_ipa_sra))
444 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
445 ipa_sra_summarize_function (node);
446 pop_cfun ();
448 else
449 func_sums->remove (node);
452 /* Class to manage call summaries. */
454 class ipa_sra_call_summaries: public call_summary <isra_call_summary *>
456 public:
457 ipa_sra_call_summaries (symbol_table *table):
458 call_summary<isra_call_summary *> (table) { }
460 /* Duplicate info when an edge is cloned. */
461 virtual void duplicate (cgraph_edge *, cgraph_edge *,
462 isra_call_summary *old_sum,
463 isra_call_summary *new_sum);
466 static ipa_sra_call_summaries *call_sums;
469 /* Initialize m_arg_flow of a particular instance of isra_call_summary.
470 ARG_COUNT is the number of actual arguments passed. */
472 void
473 isra_call_summary::init_inputs (unsigned arg_count)
475 if (arg_count == 0)
477 gcc_checking_assert (m_arg_flow.length () == 0);
478 return;
480 if (m_arg_flow.length () == 0)
482 m_arg_flow.reserve_exact (arg_count);
483 m_arg_flow.quick_grow_cleared (arg_count);
485 else
486 gcc_checking_assert (arg_count == m_arg_flow.length ());
489 /* Dump all information in call summary to F. */
491 void
492 isra_call_summary::dump (FILE *f)
494 if (m_return_ignored)
495 fprintf (f, " return value ignored\n");
496 if (m_return_returned)
497 fprintf (f, " return value used only to compute caller return value\n");
498 if (m_before_any_store)
499 fprintf (f, " happens before any store to memory\n");
500 for (unsigned i = 0; i < m_arg_flow.length (); i++)
502 fprintf (f, " Parameter %u:\n", i);
503 isra_param_flow *ipf = &m_arg_flow[i];
505 if (ipf->length)
507 bool first = true;
508 fprintf (f, " Scalar param sources: ");
509 for (int j = 0; j < ipf->length; j++)
511 if (!first)
512 fprintf (f, ", ");
513 else
514 first = false;
515 fprintf (f, "%i", (int) ipf->inputs[j]);
517 fprintf (f, "\n");
519 if (ipf->aggregate_pass_through)
520 fprintf (f, " Aggregate pass through from the param given above, "
521 "unit offset: %u , unit size: %u\n",
522 ipf->unit_offset, ipf->unit_size);
523 if (ipf->pointer_pass_through)
524 fprintf (f, " Pointer pass through from the param given above, "
525 "safe_to_import_accesses: %u\n", ipf->safe_to_import_accesses);
529 /* Duplicate edge summary when an edge is cloned. */
531 void
532 ipa_sra_call_summaries::duplicate (cgraph_edge *, cgraph_edge *,
533 isra_call_summary *old_sum,
534 isra_call_summary *new_sum)
536 unsigned arg_count = old_sum->m_arg_flow.length ();
537 new_sum->init_inputs (arg_count);
538 for (unsigned i = 0; i < arg_count; i++)
539 new_sum->m_arg_flow[i] = old_sum->m_arg_flow[i];
541 new_sum->m_return_ignored = old_sum->m_return_ignored;
542 new_sum->m_return_returned = old_sum->m_return_returned;
543 new_sum->m_bit_aligned_arg = old_sum->m_bit_aligned_arg;
544 new_sum->m_before_any_store = old_sum->m_before_any_store;
548 /* With all GTY stuff done, we can move to anonymous namespace. */
549 namespace {
550 /* Quick mapping from a decl to its param descriptor. */
552 hash_map<tree, gensum_param_desc *> *decl2desc;
554 /* Countdown of allowed Alias analysis steps during summary building. */
556 int aa_walking_limit;
558 /* This is a table in which for each basic block and parameter there is a
559 distance (offset + size) in that parameter which is dereferenced and
560 accessed in that BB. */
561 HOST_WIDE_INT *bb_dereferences = NULL;
562 /* How many by-reference parameters there are in the current function. */
563 int by_ref_count;
565 /* Bitmap of BBs that can cause the function to "stop" progressing by
566 returning, throwing externally, looping infinitely or calling a function
567 which might abort etc.. */
568 bitmap final_bbs;
570 /* Obstack to allocate various small structures required only when generating
571 summary for a function. */
572 struct obstack gensum_obstack;
574 /* Return false the function is apparently unsuitable for IPA-SRA based on it's
575 attributes, return true otherwise. NODE is the cgraph node of the current
576 function. */
578 static bool
579 ipa_sra_preliminary_function_checks (cgraph_node *node)
581 if (!node->can_change_signature)
583 if (dump_file)
584 fprintf (dump_file, "Function cannot change signature.\n");
585 return false;
588 if (!tree_versionable_function_p (node->decl))
590 if (dump_file)
591 fprintf (dump_file, "Function is not versionable.\n");
592 return false;
595 if (!opt_for_fn (node->decl, optimize)
596 || !opt_for_fn (node->decl, flag_ipa_sra))
598 if (dump_file)
599 fprintf (dump_file, "Not optimizing or IPA-SRA turned off for this "
600 "function.\n");
601 return false;
604 if (DECL_VIRTUAL_P (node->decl))
606 if (dump_file)
607 fprintf (dump_file, "Function is a virtual method.\n");
608 return false;
611 struct function *fun = DECL_STRUCT_FUNCTION (node->decl);
612 if (fun->stdarg)
614 if (dump_file)
615 fprintf (dump_file, "Function uses stdarg. \n");
616 return false;
619 if (TYPE_ATTRIBUTES (TREE_TYPE (node->decl)))
621 if (dump_file)
622 fprintf (dump_file, "Function type has attributes. \n");
623 return false;
626 if (DECL_DISREGARD_INLINE_LIMITS (node->decl))
628 if (dump_file)
629 fprintf (dump_file, "Always inline function will be inlined "
630 "anyway. \n");
631 return false;
634 return true;
637 /* Print access tree starting at ACCESS to F. */
639 static void
640 dump_gensum_access (FILE *f, gensum_param_access *access, unsigned indent)
642 fprintf (f, " ");
643 for (unsigned i = 0; i < indent; i++)
644 fprintf (f, " ");
645 fprintf (f, " * Access to offset: " HOST_WIDE_INT_PRINT_DEC,
646 access->offset);
647 fprintf (f, ", size: " HOST_WIDE_INT_PRINT_DEC, access->size);
648 fprintf (f, ", type: ");
649 print_generic_expr (f, access->type);
650 fprintf (f, ", alias_ptr_type: ");
651 print_generic_expr (f, access->alias_ptr_type);
652 fprintf (f, ", nonarg: %u, reverse: %u\n", access->nonarg, access->reverse);
653 for (gensum_param_access *ch = access->first_child;
655 ch = ch->next_sibling)
656 dump_gensum_access (f, ch, indent + 2);
660 /* Print access tree starting at ACCESS to F. */
662 static void
663 dump_isra_access (FILE *f, param_access *access)
665 fprintf (f, " * Access to unit offset: %u", access->unit_offset);
666 fprintf (f, ", unit size: %u", access->unit_size);
667 fprintf (f, ", type: ");
668 print_generic_expr (f, access->type);
669 fprintf (f, ", alias_ptr_type: ");
670 print_generic_expr (f, access->alias_ptr_type);
671 if (access->certain)
672 fprintf (f, ", certain");
673 else
674 fprintf (f, ", not-certain");
675 if (access->reverse)
676 fprintf (f, ", reverse");
677 fprintf (f, "\n");
680 /* Dump access tree starting at ACCESS to stderr. */
682 DEBUG_FUNCTION void
683 debug_isra_access (param_access *access)
685 dump_isra_access (stderr, access);
688 /* Dump DESC to F. */
690 static void
691 dump_gensum_param_descriptor (FILE *f, gensum_param_desc *desc)
693 if (desc->locally_unused)
694 fprintf (f, " unused with %i call_uses\n", desc->call_uses);
695 if (!desc->split_candidate)
697 fprintf (f, " not a candidate\n");
698 return;
700 if (desc->by_ref)
701 fprintf (f, " by_ref with %u pass throughs\n", desc->ptr_pt_count);
703 for (gensum_param_access *acc = desc->accesses; acc; acc = acc->next_sibling)
704 dump_gensum_access (f, acc, 2);
707 /* Dump all parameter descriptors in IFS, assuming it describes FNDECL, to
708 F. */
710 static void
711 dump_gensum_param_descriptors (FILE *f, tree fndecl,
712 vec<gensum_param_desc> *param_descriptions)
714 tree parm = DECL_ARGUMENTS (fndecl);
715 for (unsigned i = 0;
716 i < param_descriptions->length ();
717 ++i, parm = DECL_CHAIN (parm))
719 fprintf (f, " Descriptor for parameter %i ", i);
720 print_generic_expr (f, parm, TDF_UID);
721 fprintf (f, "\n");
722 dump_gensum_param_descriptor (f, &(*param_descriptions)[i]);
727 /* Dump DESC to F. */
729 static void
730 dump_isra_param_descriptor (FILE *f, isra_param_desc *desc)
732 if (desc->locally_unused)
734 fprintf (f, " (locally) unused\n");
736 if (!desc->split_candidate)
738 fprintf (f, " not a candidate for splitting\n");
739 return;
741 fprintf (f, " param_size_limit: %u, size_reached: %u%s\n",
742 desc->param_size_limit, desc->size_reached,
743 desc->by_ref ? ", by_ref" : "");
745 for (unsigned i = 0; i < vec_safe_length (desc->accesses); ++i)
747 param_access *access = (*desc->accesses)[i];
748 dump_isra_access (f, access);
752 /* Dump all parameter descriptors in IFS, assuming it describes FNDECL, to
753 F. */
755 static void
756 dump_isra_param_descriptors (FILE *f, tree fndecl,
757 isra_func_summary *ifs)
759 tree parm = DECL_ARGUMENTS (fndecl);
760 if (!ifs->m_parameters)
762 fprintf (f, " parameter descriptors not available\n");
763 return;
766 for (unsigned i = 0;
767 i < ifs->m_parameters->length ();
768 ++i, parm = DECL_CHAIN (parm))
770 fprintf (f, " Descriptor for parameter %i ", i);
771 print_generic_expr (f, parm, TDF_UID);
772 fprintf (f, "\n");
773 dump_isra_param_descriptor (f, &(*ifs->m_parameters)[i]);
777 /* Add SRC to inputs of PARAM_FLOW, unless it would exceed storage. If the
778 function fails return false, otherwise return true. SRC must fit into an
779 unsigned char. Used for purposes of transitive unused parameter
780 removal. */
782 static bool
783 add_src_to_param_flow (isra_param_flow *param_flow, int src)
785 gcc_checking_assert (src >= 0 && src <= UCHAR_MAX);
786 if (param_flow->length == IPA_SRA_MAX_PARAM_FLOW_LEN)
787 return false;
789 param_flow->inputs[(int) param_flow->length] = src;
790 param_flow->length++;
791 return true;
794 /* Add a SRC to the inputs of PARAM_FLOW unless it is already there and assert
795 it is the only input. Used for purposes of transitive parameter
796 splitting. */
798 static void
799 set_single_param_flow_source (isra_param_flow *param_flow, int src)
801 gcc_checking_assert (src >= 0 && src <= UCHAR_MAX);
802 if (param_flow->length == 0)
804 param_flow->inputs[0] = src;
805 param_flow->length = 1;
807 else if (param_flow->length == 1)
808 gcc_assert (param_flow->inputs[0] == src);
809 else
810 gcc_unreachable ();
813 /* Assert that there is only a single value in PARAM_FLOW's inputs and return
814 it. */
816 static unsigned
817 get_single_param_flow_source (const isra_param_flow *param_flow)
819 gcc_assert (param_flow->length == 1);
820 return param_flow->inputs[0];
823 /* Inspect all uses of NAME and simple arithmetic calculations involving NAME
824 in FUN represented with NODE and return a negative number if any of them is
825 used for something else than either an actual call argument, simple
826 arithmetic operation or debug statement. If there are no such uses, return
827 the number of actual arguments that this parameter eventually feeds to (or
828 zero if there is none). For any such parameter, mark PARM_NUM as one of its
829 sources. ANALYZED is a bitmap that tracks which SSA names we have already
830 started investigating. */
832 static int
833 isra_track_scalar_value_uses (function *fun, cgraph_node *node, tree name,
834 int parm_num, bitmap analyzed)
836 int res = 0;
837 imm_use_iterator imm_iter;
838 gimple *stmt;
840 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, name)
842 if (is_gimple_debug (stmt))
843 continue;
845 /* TODO: We could handle at least const builtin functions like arithmetic
846 operations below. */
847 if (is_gimple_call (stmt))
849 int all_uses = 0;
850 use_operand_p use_p;
851 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
852 all_uses++;
854 gcall *call = as_a <gcall *> (stmt);
855 unsigned arg_count;
856 if (gimple_call_internal_p (call)
857 || (arg_count = gimple_call_num_args (call)) == 0)
859 res = -1;
860 break;
863 cgraph_edge *cs = node->get_edge (stmt);
864 gcc_checking_assert (cs);
865 isra_call_summary *csum = call_sums->get_create (cs);
866 csum->init_inputs (arg_count);
868 int simple_uses = 0;
869 for (unsigned i = 0; i < arg_count; i++)
870 if (gimple_call_arg (call, i) == name)
872 if (!add_src_to_param_flow (&csum->m_arg_flow[i], parm_num))
874 simple_uses = -1;
875 break;
877 simple_uses++;
880 if (simple_uses < 0
881 || all_uses != simple_uses)
883 res = -1;
884 break;
886 res += all_uses;
888 else if (!stmt_unremovable_because_of_non_call_eh_p (fun, stmt)
889 && ((is_gimple_assign (stmt) && !gimple_has_volatile_ops (stmt))
890 || gimple_code (stmt) == GIMPLE_PHI))
892 tree lhs;
893 if (gimple_code (stmt) == GIMPLE_PHI)
894 lhs = gimple_phi_result (stmt);
895 else
896 lhs = gimple_assign_lhs (stmt);
898 if (TREE_CODE (lhs) != SSA_NAME)
900 res = -1;
901 break;
903 gcc_assert (!gimple_vdef (stmt));
904 if (bitmap_set_bit (analyzed, SSA_NAME_VERSION (lhs)))
906 int tmp = isra_track_scalar_value_uses (fun, node, lhs, parm_num,
907 analyzed);
908 if (tmp < 0)
910 res = tmp;
911 break;
913 res += tmp;
916 else
918 res = -1;
919 break;
922 return res;
925 /* Inspect all uses of PARM, which must be a gimple register, in FUN (which is
926 also described by NODE) and simple arithmetic calculations involving PARM
927 and return false if any of them is used for something else than either an
928 actual call argument, simple arithmetic operation or debug statement. If
929 there are no such uses, return true and store the number of actual arguments
930 that this parameter eventually feeds to (or zero if there is none) to
931 *CALL_USES_P. For any such parameter, mark PARM_NUM as one of its
932 sources.
934 This function is similar to ptr_parm_has_nonarg_uses but its results are
935 meant for unused parameter removal, as opposed to splitting of parameters
936 passed by reference or converting them to passed by value.
939 static bool
940 isra_track_scalar_param_local_uses (function *fun, cgraph_node *node, tree parm,
941 int parm_num, int *call_uses_p)
943 gcc_checking_assert (is_gimple_reg (parm));
945 tree name = ssa_default_def (fun, parm);
946 if (!name || has_zero_uses (name))
948 *call_uses_p = 0;
949 return false;
952 /* Edge summaries can only handle callers with fewer than 256 parameters. */
953 if (parm_num > UCHAR_MAX)
954 return true;
956 bitmap analyzed = BITMAP_ALLOC (NULL);
957 int call_uses = isra_track_scalar_value_uses (fun, node, name, parm_num,
958 analyzed);
959 BITMAP_FREE (analyzed);
960 if (call_uses < 0)
961 return true;
962 *call_uses_p = call_uses;
963 return false;
966 /* Scan immediate uses of a default definition SSA name of a parameter PARM and
967 examine whether there are any nonarg uses that are not actual arguments or
968 otherwise infeasible uses. If so, return true, otherwise return false.
969 Create pass-through IPA flow records for any direct uses as argument calls
970 and if returning false, store their number into *PT_COUNT_P. NODE and FUN
971 must represent the function that is currently analyzed, PARM_NUM must be the
972 index of the analyzed parameter.
974 This function is similar to isra_track_scalar_param_local_uses but its
975 results are meant for splitting of parameters passed by reference or turning
976 them into bits passed by value, as opposed to generic unused parameter
977 removal.
980 static bool
981 ptr_parm_has_nonarg_uses (cgraph_node *node, function *fun, tree parm,
982 int parm_num, unsigned *pt_count_p)
984 imm_use_iterator ui;
985 gimple *stmt;
986 tree name = ssa_default_def (fun, parm);
987 bool ret = false;
988 unsigned pt_count = 0;
990 if (!name || has_zero_uses (name))
991 return false;
993 /* Edge summaries can only handle callers with fewer than 256 parameters. */
994 if (parm_num > UCHAR_MAX)
995 return true;
997 FOR_EACH_IMM_USE_STMT (stmt, ui, name)
999 unsigned uses_ok = 0;
1000 use_operand_p use_p;
1002 if (is_gimple_debug (stmt))
1003 continue;
1005 if (gimple_assign_single_p (stmt))
1007 tree rhs = gimple_assign_rhs1 (stmt);
1008 if (!TREE_THIS_VOLATILE (rhs))
1010 while (handled_component_p (rhs))
1011 rhs = TREE_OPERAND (rhs, 0);
1012 if (TREE_CODE (rhs) == MEM_REF
1013 && TREE_OPERAND (rhs, 0) == name
1014 && integer_zerop (TREE_OPERAND (rhs, 1))
1015 && types_compatible_p (TREE_TYPE (rhs),
1016 TREE_TYPE (TREE_TYPE (name))))
1017 uses_ok++;
1020 else if (is_gimple_call (stmt))
1022 gcall *call = as_a <gcall *> (stmt);
1023 unsigned arg_count;
1024 if (gimple_call_internal_p (call)
1025 || (arg_count = gimple_call_num_args (call)) == 0)
1027 ret = true;
1028 break;
1031 cgraph_edge *cs = node->get_edge (stmt);
1032 gcc_checking_assert (cs);
1033 isra_call_summary *csum = call_sums->get_create (cs);
1034 csum->init_inputs (arg_count);
1036 for (unsigned i = 0; i < arg_count; ++i)
1038 tree arg = gimple_call_arg (stmt, i);
1040 if (arg == name)
1042 /* TODO: Allow &MEM_REF[name + offset] here,
1043 ipa_param_body_adjustments::modify_call_stmt has to be
1044 adjusted too. */
1045 csum->m_arg_flow[i].pointer_pass_through = true;
1046 set_single_param_flow_source (&csum->m_arg_flow[i], parm_num);
1047 pt_count++;
1048 uses_ok++;
1049 continue;
1052 if (!TREE_THIS_VOLATILE (arg))
1054 while (handled_component_p (arg))
1055 arg = TREE_OPERAND (arg, 0);
1056 if (TREE_CODE (arg) == MEM_REF
1057 && TREE_OPERAND (arg, 0) == name
1058 && integer_zerop (TREE_OPERAND (arg, 1))
1059 && types_compatible_p (TREE_TYPE (arg),
1060 TREE_TYPE (TREE_TYPE (name))))
1061 uses_ok++;
1066 /* If the number of valid uses does not match the number of
1067 uses in this stmt there is an unhandled use. */
1068 unsigned all_uses = 0;
1069 FOR_EACH_IMM_USE_ON_STMT (use_p, ui)
1070 all_uses++;
1072 gcc_checking_assert (uses_ok <= all_uses);
1073 if (uses_ok != all_uses)
1075 ret = true;
1076 break;
1080 *pt_count_p = pt_count;
1081 return ret;
1084 /* Initialize vector of parameter descriptors of NODE. Return true if there
1085 are any candidates for splitting or unused aggregate parameter removal (the
1086 function may return false if there are candidates for removal of register
1087 parameters) and function body must be scanned. */
1089 static bool
1090 create_parameter_descriptors (cgraph_node *node,
1091 vec<gensum_param_desc> *param_descriptions)
1093 function *fun = DECL_STRUCT_FUNCTION (node->decl);
1094 bool ret = false;
1096 int num = 0;
1097 for (tree parm = DECL_ARGUMENTS (node->decl);
1098 parm;
1099 parm = DECL_CHAIN (parm), num++)
1101 const char *msg;
1102 gensum_param_desc *desc = &(*param_descriptions)[num];
1103 /* param_descriptions vector is grown cleared in the caller. */
1104 desc->param_number = num;
1105 decl2desc->put (parm, desc);
1107 if (dump_file && (dump_flags & TDF_DETAILS))
1108 print_generic_expr (dump_file, parm, TDF_UID);
1110 int scalar_call_uses;
1111 tree type = TREE_TYPE (parm);
1112 if (TREE_THIS_VOLATILE (parm))
1114 if (dump_file && (dump_flags & TDF_DETAILS))
1115 fprintf (dump_file, " not a candidate, is volatile\n");
1116 continue;
1118 if (!is_gimple_reg_type (type) && is_va_list_type (type))
1120 if (dump_file && (dump_flags & TDF_DETAILS))
1121 fprintf (dump_file, " not a candidate, is a va_list type\n");
1122 continue;
1124 if (TREE_ADDRESSABLE (parm))
1126 if (dump_file && (dump_flags & TDF_DETAILS))
1127 fprintf (dump_file, " not a candidate, is addressable\n");
1128 continue;
1130 if (TREE_ADDRESSABLE (type))
1132 if (dump_file && (dump_flags & TDF_DETAILS))
1133 fprintf (dump_file, " not a candidate, type cannot be split\n");
1134 continue;
1137 if (is_gimple_reg (parm)
1138 && !isra_track_scalar_param_local_uses (fun, node, parm, num,
1139 &scalar_call_uses))
1141 if (dump_file && (dump_flags & TDF_DETAILS))
1142 fprintf (dump_file, " is a scalar with only %i call uses\n",
1143 scalar_call_uses);
1145 desc->locally_unused = true;
1146 desc->call_uses = scalar_call_uses;
1149 if (POINTER_TYPE_P (type))
1151 type = TREE_TYPE (type);
1153 if (TREE_CODE (type) == FUNCTION_TYPE
1154 || TREE_CODE (type) == METHOD_TYPE)
1156 if (dump_file && (dump_flags & TDF_DETAILS))
1157 fprintf (dump_file, " not a candidate, reference to "
1158 "a function\n");
1159 continue;
1161 if (TYPE_VOLATILE (type))
1163 if (dump_file && (dump_flags & TDF_DETAILS))
1164 fprintf (dump_file, " not a candidate, reference to "
1165 "a volatile type\n");
1166 continue;
1168 if (TREE_CODE (type) == ARRAY_TYPE
1169 && TYPE_NONALIASED_COMPONENT (type))
1171 if (dump_file && (dump_flags & TDF_DETAILS))
1172 fprintf (dump_file, " not a candidate, reference to "
1173 "a nonaliased component array\n");
1174 continue;
1176 if (!is_gimple_reg (parm))
1178 if (dump_file && (dump_flags & TDF_DETAILS))
1179 fprintf (dump_file, " not a candidate, a reference which is "
1180 "not a gimple register (probably addressable)\n");
1181 continue;
1183 if (is_va_list_type (type))
1185 if (dump_file && (dump_flags & TDF_DETAILS))
1186 fprintf (dump_file, " not a candidate, reference to "
1187 "a va list\n");
1188 continue;
1190 if (ptr_parm_has_nonarg_uses (node, fun, parm, num,
1191 &desc->ptr_pt_count))
1193 if (dump_file && (dump_flags & TDF_DETAILS))
1194 fprintf (dump_file, " not a candidate, reference has "
1195 "nonarg uses\n");
1196 continue;
1198 desc->by_ref = true;
1200 else if (!AGGREGATE_TYPE_P (type))
1202 /* This is in an else branch because scalars passed by reference are
1203 still candidates to be passed by value. */
1204 if (dump_file && (dump_flags & TDF_DETAILS))
1205 fprintf (dump_file, " not a candidate, not an aggregate\n");
1206 continue;
1209 if (!COMPLETE_TYPE_P (type))
1211 if (dump_file && (dump_flags & TDF_DETAILS))
1212 fprintf (dump_file, " not a candidate, not a complete type\n");
1213 continue;
1215 if (!tree_fits_uhwi_p (TYPE_SIZE (type)))
1217 if (dump_file && (dump_flags & TDF_DETAILS))
1218 fprintf (dump_file, " not a candidate, size not representable\n");
1219 continue;
1221 unsigned HOST_WIDE_INT type_size
1222 = tree_to_uhwi (TYPE_SIZE (type)) / BITS_PER_UNIT;
1223 if (type_size == 0
1224 || type_size >= ISRA_ARG_SIZE_LIMIT)
1226 if (dump_file && (dump_flags & TDF_DETAILS))
1227 fprintf (dump_file, " not a candidate, has zero or huge size\n");
1228 continue;
1230 if (type_internals_preclude_sra_p (type, &msg))
1232 if (dump_file && (dump_flags & TDF_DETAILS))
1233 fprintf (dump_file, " not a candidate, %s\n", msg);
1234 continue;
1237 if (dump_file && (dump_flags & TDF_DETAILS))
1238 fprintf (dump_file, " is a candidate\n");
1240 ret = true;
1241 desc->split_candidate = true;
1242 if (desc->by_ref)
1243 desc->deref_index = by_ref_count++;
1245 return ret;
1248 /* Return pointer to descriptor of parameter DECL or NULL if it cannot be
1249 found, which happens if DECL is for a static chain. */
1251 static gensum_param_desc *
1252 get_gensum_param_desc (tree decl)
1254 gcc_checking_assert (TREE_CODE (decl) == PARM_DECL);
1255 gensum_param_desc **slot = decl2desc->get (decl);
1256 if (!slot)
1257 /* This can happen for static chains which we cannot handle so far. */
1258 return NULL;
1259 gcc_checking_assert (*slot);
1260 return *slot;
1264 /* Remove parameter described by DESC from candidates for IPA-SRA splitting and
1265 write REASON to the dump file if there is one. */
1267 static void
1268 disqualify_split_candidate (gensum_param_desc *desc, const char *reason)
1270 if (!desc->split_candidate)
1271 return;
1273 if (dump_file && (dump_flags & TDF_DETAILS))
1274 fprintf (dump_file, "! Disqualifying parameter number %i - %s\n",
1275 desc->param_number, reason);
1277 desc->split_candidate = false;
1280 /* Remove DECL from candidates for IPA-SRA and write REASON to the dump file if
1281 there is one. */
1283 static void
1284 disqualify_split_candidate (tree decl, const char *reason)
1286 gensum_param_desc *desc = get_gensum_param_desc (decl);
1287 if (desc)
1288 disqualify_split_candidate (desc, reason);
1291 /* Allocate a new access to DESC and fill it in with OFFSET and SIZE. But
1292 first, check that there are not too many of them already. If so, do not
1293 allocate anything and return NULL. */
1295 static gensum_param_access *
1296 allocate_access (gensum_param_desc *desc,
1297 HOST_WIDE_INT offset, HOST_WIDE_INT size)
1299 if (desc->access_count
1300 == (unsigned) param_ipa_sra_max_replacements)
1302 disqualify_split_candidate (desc, "Too many replacement candidates");
1303 return NULL;
1306 gensum_param_access *access
1307 = (gensum_param_access *) obstack_alloc (&gensum_obstack,
1308 sizeof (gensum_param_access));
1309 memset (access, 0, sizeof (*access));
1310 access->offset = offset;
1311 access->size = size;
1312 return access;
1315 /* In what context scan_expr_access has been called, whether it deals with a
1316 load, a function argument, or a store. Please note that in rare
1317 circumstances when it is not clear if the access is a load or store,
1318 ISRA_CTX_STORE is used too. */
1320 enum isra_scan_context {ISRA_CTX_LOAD, ISRA_CTX_ARG, ISRA_CTX_STORE};
1322 /* Return an access describing memory access to the variable described by DESC
1323 at OFFSET with SIZE in context CTX, starting at pointer to the linked list
1324 at a certain tree level FIRST. Attempt to create it and put into the
1325 appropriate place in the access tree if does not exist, but fail and return
1326 NULL if there are already too many accesses, if it would create a partially
1327 overlapping access or if an access would end up within a pre-existing
1328 non-call access. */
1330 static gensum_param_access *
1331 get_access_1 (gensum_param_desc *desc, gensum_param_access **first,
1332 HOST_WIDE_INT offset, HOST_WIDE_INT size, isra_scan_context ctx)
1334 gensum_param_access *access = *first, **ptr = first;
1336 if (!access)
1338 /* No pre-existing access at this level, just create it. */
1339 gensum_param_access *a = allocate_access (desc, offset, size);
1340 if (!a)
1341 return NULL;
1342 *first = a;
1343 return *first;
1346 if (access->offset >= offset + size)
1348 /* We want to squeeze it in front of the very first access, just do
1349 it. */
1350 gensum_param_access *r = allocate_access (desc, offset, size);
1351 if (!r)
1352 return NULL;
1353 r->next_sibling = access;
1354 *first = r;
1355 return r;
1358 /* Skip all accesses that have to come before us until the next sibling is
1359 already too far. */
1360 while (offset >= access->offset + access->size
1361 && access->next_sibling
1362 && access->next_sibling->offset < offset + size)
1364 ptr = &access->next_sibling;
1365 access = access->next_sibling;
1368 /* At this point we know we do not belong before access. */
1369 gcc_assert (access->offset < offset + size);
1371 if (access->offset == offset && access->size == size)
1372 /* We found what we were looking for. */
1373 return access;
1375 if (access->offset <= offset
1376 && access->offset + access->size >= offset + size)
1378 /* We fit into access which is larger than us. We need to find/create
1379 something below access. But we only allow nesting in call
1380 arguments. */
1381 if (access->nonarg)
1382 return NULL;
1384 return get_access_1 (desc, &access->first_child, offset, size, ctx);
1387 if (offset <= access->offset
1388 && offset + size >= access->offset + access->size)
1389 /* We are actually bigger than access, which fully fits into us, take its
1390 place and make all accesses fitting into it its children. */
1392 /* But first, we only allow nesting in call arguments so check if that is
1393 what we are trying to represent. */
1394 if (ctx != ISRA_CTX_ARG)
1395 return NULL;
1397 gensum_param_access *r = allocate_access (desc, offset, size);
1398 if (!r)
1399 return NULL;
1400 r->first_child = access;
1402 while (access->next_sibling
1403 && access->next_sibling->offset < offset + size)
1404 access = access->next_sibling;
1405 if (access->offset + access->size > offset + size)
1407 /* This must be a different access, which are sorted, so the
1408 following must be true and this signals a partial overlap. */
1409 gcc_assert (access->offset > offset);
1410 return NULL;
1413 r->next_sibling = access->next_sibling;
1414 access->next_sibling = NULL;
1415 *ptr = r;
1416 return r;
1419 if (offset >= access->offset + access->size)
1421 /* We belong after access. */
1422 gensum_param_access *r = allocate_access (desc, offset, size);
1423 if (!r)
1424 return NULL;
1425 r->next_sibling = access->next_sibling;
1426 access->next_sibling = r;
1427 return r;
1430 if (offset < access->offset)
1432 /* We know the following, otherwise we would have created a
1433 super-access. */
1434 gcc_checking_assert (offset + size < access->offset + access->size);
1435 return NULL;
1438 if (offset + size > access->offset + access->size)
1440 /* Likewise. */
1441 gcc_checking_assert (offset > access->offset);
1442 return NULL;
1445 gcc_unreachable ();
1448 /* Return an access describing memory access to the variable described by DESC
1449 at OFFSET with SIZE in context CTX, mark it as used in context CTX. Attempt
1450 to create if it does not exist, but fail and return NULL if there are
1451 already too many accesses, if it would create a partially overlapping access
1452 or if an access would end up in a non-call access. */
1454 static gensum_param_access *
1455 get_access (gensum_param_desc *desc, HOST_WIDE_INT offset, HOST_WIDE_INT size,
1456 isra_scan_context ctx)
1458 gcc_checking_assert (desc->split_candidate);
1460 gensum_param_access *access = get_access_1 (desc, &desc->accesses, offset,
1461 size, ctx);
1462 if (!access)
1464 disqualify_split_candidate (desc,
1465 "Bad access overlap or too many accesses");
1466 return NULL;
1469 switch (ctx)
1471 case ISRA_CTX_STORE:
1472 gcc_assert (!desc->by_ref);
1473 /* Fall-through */
1474 case ISRA_CTX_LOAD:
1475 access->nonarg = true;
1476 break;
1477 case ISRA_CTX_ARG:
1478 break;
1481 return access;
1484 /* Verify that parameter access tree starting with ACCESS is in good shape.
1485 PARENT_OFFSET and PARENT_SIZE are the corresponding fields of parent of
1486 ACCESS or zero if there is none. */
1488 static bool
1489 verify_access_tree_1 (gensum_param_access *access, HOST_WIDE_INT parent_offset,
1490 HOST_WIDE_INT parent_size)
1492 while (access)
1494 gcc_assert (access->offset >= 0 && access->size >= 0);
1496 if (parent_size != 0)
1498 if (access->offset < parent_offset)
1500 error ("Access offset before parent offset");
1501 return true;
1503 if (access->size >= parent_size)
1505 error ("Access size greater or equal to its parent size");
1506 return true;
1508 if (access->offset + access->size > parent_offset + parent_size)
1510 error ("Access terminates outside of its parent");
1511 return true;
1515 if (verify_access_tree_1 (access->first_child, access->offset,
1516 access->size))
1517 return true;
1519 if (access->next_sibling
1520 && (access->next_sibling->offset < access->offset + access->size))
1522 error ("Access overlaps with its sibling");
1523 return true;
1526 access = access->next_sibling;
1528 return false;
1531 /* Verify that parameter access tree starting with ACCESS is in good shape,
1532 halt compilation and dump the tree to stderr if not. */
1534 DEBUG_FUNCTION void
1535 isra_verify_access_tree (gensum_param_access *access)
1537 if (verify_access_tree_1 (access, 0, 0))
1539 for (; access; access = access->next_sibling)
1540 dump_gensum_access (stderr, access, 2);
1541 internal_error ("IPA-SRA access verification failed");
1546 /* Callback of walk_stmt_load_store_addr_ops visit_addr used to determine
1547 GIMPLE_ASM operands with memory constrains which cannot be scalarized. */
1549 static bool
1550 asm_visit_addr (gimple *, tree op, tree, void *)
1552 op = get_base_address (op);
1553 if (op
1554 && TREE_CODE (op) == PARM_DECL)
1555 disqualify_split_candidate (op, "Non-scalarizable GIMPLE_ASM operand.");
1557 return false;
1560 /* Mark a dereference of parameter identified by DESC of distance DIST in a
1561 basic block BB, unless the BB has already been marked as a potentially
1562 final. */
1564 static void
1565 mark_param_dereference (gensum_param_desc *desc, HOST_WIDE_INT dist,
1566 basic_block bb)
1568 gcc_assert (desc->by_ref);
1569 gcc_checking_assert (desc->split_candidate);
1571 if (bitmap_bit_p (final_bbs, bb->index))
1572 return;
1574 int idx = bb->index * by_ref_count + desc->deref_index;
1575 if (bb_dereferences[idx] < dist)
1576 bb_dereferences[idx] = dist;
1579 /* Return true, if any potential replacements should use NEW_TYPE as opposed to
1580 previously recorded OLD_TYPE. */
1582 static bool
1583 type_prevails_p (tree old_type, tree new_type)
1585 if (old_type == new_type)
1586 return false;
1588 /* Non-aggregates are always better. */
1589 if (!is_gimple_reg_type (old_type)
1590 && is_gimple_reg_type (new_type))
1591 return true;
1592 if (is_gimple_reg_type (old_type)
1593 && !is_gimple_reg_type (new_type))
1594 return false;
1596 /* Prefer any complex or vector type over any other scalar type. */
1597 if (TREE_CODE (old_type) != COMPLEX_TYPE
1598 && TREE_CODE (old_type) != VECTOR_TYPE
1599 && (TREE_CODE (new_type) == COMPLEX_TYPE
1600 || TREE_CODE (new_type) == VECTOR_TYPE))
1601 return true;
1602 if ((TREE_CODE (old_type) == COMPLEX_TYPE
1603 || TREE_CODE (old_type) == VECTOR_TYPE)
1604 && TREE_CODE (new_type) != COMPLEX_TYPE
1605 && TREE_CODE (new_type) != VECTOR_TYPE)
1606 return false;
1608 /* Use the integral type with the bigger precision. */
1609 if (INTEGRAL_TYPE_P (old_type)
1610 && INTEGRAL_TYPE_P (new_type))
1611 return (TYPE_PRECISION (new_type) > TYPE_PRECISION (old_type));
1613 /* Attempt to disregard any integral type with non-full precision. */
1614 if (INTEGRAL_TYPE_P (old_type)
1615 && (TREE_INT_CST_LOW (TYPE_SIZE (old_type))
1616 != TYPE_PRECISION (old_type)))
1617 return true;
1618 if (INTEGRAL_TYPE_P (new_type)
1619 && (TREE_INT_CST_LOW (TYPE_SIZE (new_type))
1620 != TYPE_PRECISION (new_type)))
1621 return false;
1622 /* Stabilize the selection. */
1623 return TYPE_UID (old_type) < TYPE_UID (new_type);
1626 /* When scanning an expression which is a call argument, this structure
1627 specifies the call and the position of the argument. */
1629 struct scan_call_info
1631 /* Call graph edge representing the call. */
1632 cgraph_edge *cs;
1633 /* Total number of arguments in the call. */
1634 unsigned argument_count;
1635 /* Number of the actual argument being scanned. */
1636 unsigned arg_idx;
1639 /* Record use of ACCESS which belongs to a parameter described by DESC in a
1640 call argument described by CALL_INFO. */
1642 static void
1643 record_nonregister_call_use (gensum_param_desc *desc,
1644 scan_call_info *call_info,
1645 unsigned unit_offset, unsigned unit_size)
1647 isra_call_summary *csum = call_sums->get_create (call_info->cs);
1648 csum->init_inputs (call_info->argument_count);
1650 isra_param_flow *param_flow = &csum->m_arg_flow[call_info->arg_idx];
1651 param_flow->aggregate_pass_through = true;
1652 set_single_param_flow_source (param_flow, desc->param_number);
1653 param_flow->unit_offset = unit_offset;
1654 param_flow->unit_size = unit_size;
1655 desc->call_uses++;
1658 /* Callback of walk_aliased_vdefs, just mark that there was a possible
1659 modification. */
1661 static bool
1662 mark_maybe_modified (ao_ref *, tree, void *data)
1664 bool *maybe_modified = (bool *) data;
1665 *maybe_modified = true;
1666 return true;
1669 /* Analyze expression EXPR from GIMPLE for accesses to parameters. CTX
1670 specifies whether EXPR is used in a load, store or as an argument call. BB
1671 must be the basic block in which expr resides. If CTX specifies call
1672 argument context, CALL_INFO must describe that call and argument position,
1673 otherwise it is ignored. */
1675 static void
1676 scan_expr_access (tree expr, gimple *stmt, isra_scan_context ctx,
1677 basic_block bb, scan_call_info *call_info = NULL)
1679 poly_int64 poffset, psize, pmax_size;
1680 HOST_WIDE_INT offset, size, max_size;
1681 tree base;
1682 bool deref = false;
1683 bool reverse;
1685 if (TREE_CODE (expr) == BIT_FIELD_REF
1686 || TREE_CODE (expr) == IMAGPART_EXPR
1687 || TREE_CODE (expr) == REALPART_EXPR)
1688 expr = TREE_OPERAND (expr, 0);
1690 base = get_ref_base_and_extent (expr, &poffset, &psize, &pmax_size, &reverse);
1692 if (TREE_CODE (base) == MEM_REF)
1694 tree op = TREE_OPERAND (base, 0);
1695 if (TREE_CODE (op) != SSA_NAME
1696 || !SSA_NAME_IS_DEFAULT_DEF (op))
1697 return;
1698 base = SSA_NAME_VAR (op);
1699 if (!base)
1700 return;
1701 deref = true;
1703 if (TREE_CODE (base) != PARM_DECL)
1704 return;
1706 gensum_param_desc *desc = get_gensum_param_desc (base);
1707 if (!desc || !desc->split_candidate)
1708 return;
1710 if (!poffset.is_constant (&offset)
1711 || !psize.is_constant (&size)
1712 || !pmax_size.is_constant (&max_size))
1714 disqualify_split_candidate (desc, "Encountered a polynomial-sized "
1715 "access.");
1716 return;
1718 if (size < 0 || size != max_size)
1720 disqualify_split_candidate (desc, "Encountered a variable sized access.");
1721 return;
1723 if (TREE_CODE (expr) == COMPONENT_REF
1724 && DECL_BIT_FIELD (TREE_OPERAND (expr, 1)))
1726 disqualify_split_candidate (desc, "Encountered a bit-field access.");
1727 return;
1729 if (offset < 0)
1731 disqualify_split_candidate (desc, "Encountered an access at a "
1732 "negative offset.");
1733 return;
1735 gcc_assert ((offset % BITS_PER_UNIT) == 0);
1736 gcc_assert ((size % BITS_PER_UNIT) == 0);
1737 if ((offset / BITS_PER_UNIT) >= (UINT_MAX - ISRA_ARG_SIZE_LIMIT)
1738 || (size / BITS_PER_UNIT) >= ISRA_ARG_SIZE_LIMIT)
1740 disqualify_split_candidate (desc, "Encountered an access with too big "
1741 "offset or size");
1742 return;
1745 tree type = TREE_TYPE (expr);
1746 unsigned int exp_align = get_object_alignment (expr);
1748 if (exp_align < TYPE_ALIGN (type))
1750 disqualify_split_candidate (desc, "Underaligned access.");
1751 return;
1754 if (deref)
1756 if (!desc->by_ref)
1758 disqualify_split_candidate (desc, "Dereferencing a non-reference.");
1759 return;
1761 else if (ctx == ISRA_CTX_STORE)
1763 disqualify_split_candidate (desc, "Storing to data passed by "
1764 "reference.");
1765 return;
1768 if (!aa_walking_limit)
1770 disqualify_split_candidate (desc, "Out of alias analysis step "
1771 "limit.");
1772 return;
1775 gcc_checking_assert (gimple_vuse (stmt));
1776 bool maybe_modified = false;
1777 ao_ref ar;
1779 ao_ref_init (&ar, expr);
1780 bitmap visited = BITMAP_ALLOC (NULL);
1781 int walked = walk_aliased_vdefs (&ar, gimple_vuse (stmt),
1782 mark_maybe_modified, &maybe_modified,
1783 &visited, NULL, aa_walking_limit);
1784 BITMAP_FREE (visited);
1785 if (walked > 0)
1787 gcc_assert (aa_walking_limit > walked);
1788 aa_walking_limit = aa_walking_limit - walked;
1790 if (walked < 0)
1791 aa_walking_limit = 0;
1792 if (maybe_modified || walked < 0)
1794 disqualify_split_candidate (desc, "Data passed by reference possibly "
1795 "modified through an alias.");
1796 return;
1798 else
1799 mark_param_dereference (desc, offset + size, bb);
1801 else
1802 /* Pointer parameters with direct uses should have been ruled out by
1803 analyzing SSA default def when looking at the parameters. */
1804 gcc_assert (!desc->by_ref);
1806 gensum_param_access *access = get_access (desc, offset, size, ctx);
1807 if (!access)
1808 return;
1810 if (ctx == ISRA_CTX_ARG)
1812 gcc_checking_assert (call_info);
1814 if (!deref)
1815 record_nonregister_call_use (desc, call_info, offset / BITS_PER_UNIT,
1816 size / BITS_PER_UNIT);
1817 else
1818 /* This is not a pass-through of a pointer, this is a use like any
1819 other. */
1820 access->nonarg = true;
1823 if (!access->type)
1825 access->type = type;
1826 access->alias_ptr_type = reference_alias_ptr_type (expr);
1827 access->reverse = reverse;
1829 else
1831 if (exp_align < TYPE_ALIGN (access->type))
1833 disqualify_split_candidate (desc, "Reference has lower alignment "
1834 "than a previous one.");
1835 return;
1837 if (access->alias_ptr_type != reference_alias_ptr_type (expr))
1839 disqualify_split_candidate (desc, "Multiple alias pointer types.");
1840 return;
1842 if (access->reverse != reverse)
1844 disqualify_split_candidate (desc, "Both normal and reverse "
1845 "scalar storage order.");
1846 return;
1848 if (!deref
1849 && (AGGREGATE_TYPE_P (type) || AGGREGATE_TYPE_P (access->type))
1850 && (TYPE_MAIN_VARIANT (access->type) != TYPE_MAIN_VARIANT (type)))
1852 /* We need the same aggregate type on all accesses to be able to
1853 distinguish transformation spots from pass-through arguments in
1854 the transformation phase. */
1855 disqualify_split_candidate (desc, "We do not support aggregate "
1856 "type punning.");
1857 return;
1860 if (type_prevails_p (access->type, type))
1861 access->type = type;
1865 /* Scan body function described by NODE and FUN and create access trees for
1866 parameters. */
1868 static void
1869 scan_function (cgraph_node *node, struct function *fun)
1871 basic_block bb;
1873 FOR_EACH_BB_FN (bb, fun)
1875 gimple_stmt_iterator gsi;
1876 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1878 gimple *stmt = gsi_stmt (gsi);
1880 if (stmt_can_throw_external (fun, stmt))
1881 bitmap_set_bit (final_bbs, bb->index);
1882 switch (gimple_code (stmt))
1884 case GIMPLE_RETURN:
1886 tree t = gimple_return_retval (as_a <greturn *> (stmt));
1887 if (t != NULL_TREE)
1888 scan_expr_access (t, stmt, ISRA_CTX_LOAD, bb);
1889 bitmap_set_bit (final_bbs, bb->index);
1891 break;
1893 case GIMPLE_ASSIGN:
1894 if (gimple_assign_single_p (stmt)
1895 && !gimple_clobber_p (stmt))
1897 tree rhs = gimple_assign_rhs1 (stmt);
1898 scan_expr_access (rhs, stmt, ISRA_CTX_LOAD, bb);
1899 tree lhs = gimple_assign_lhs (stmt);
1900 scan_expr_access (lhs, stmt, ISRA_CTX_STORE, bb);
1902 break;
1904 case GIMPLE_CALL:
1906 unsigned argument_count = gimple_call_num_args (stmt);
1907 isra_scan_context ctx = ISRA_CTX_ARG;
1908 scan_call_info call_info, *call_info_p = &call_info;
1909 if (gimple_call_internal_p (stmt))
1911 call_info_p = NULL;
1912 ctx = ISRA_CTX_LOAD;
1913 internal_fn ifn = gimple_call_internal_fn (stmt);
1914 if (internal_store_fn_p (ifn))
1915 ctx = ISRA_CTX_STORE;
1917 else
1919 call_info.cs = node->get_edge (stmt);
1920 call_info.argument_count = argument_count;
1923 for (unsigned i = 0; i < argument_count; i++)
1925 call_info.arg_idx = i;
1926 scan_expr_access (gimple_call_arg (stmt, i), stmt,
1927 ctx, bb, call_info_p);
1930 tree lhs = gimple_call_lhs (stmt);
1931 if (lhs)
1932 scan_expr_access (lhs, stmt, ISRA_CTX_STORE, bb);
1933 int flags = gimple_call_flags (stmt);
1934 if ((flags & (ECF_CONST | ECF_PURE)) == 0)
1935 bitmap_set_bit (final_bbs, bb->index);
1937 break;
1939 case GIMPLE_ASM:
1941 gasm *asm_stmt = as_a <gasm *> (stmt);
1942 walk_stmt_load_store_addr_ops (asm_stmt, NULL, NULL, NULL,
1943 asm_visit_addr);
1944 bitmap_set_bit (final_bbs, bb->index);
1946 for (unsigned i = 0; i < gimple_asm_ninputs (asm_stmt); i++)
1948 tree t = TREE_VALUE (gimple_asm_input_op (asm_stmt, i));
1949 scan_expr_access (t, stmt, ISRA_CTX_LOAD, bb);
1951 for (unsigned i = 0; i < gimple_asm_noutputs (asm_stmt); i++)
1953 tree t = TREE_VALUE (gimple_asm_output_op (asm_stmt, i));
1954 scan_expr_access (t, stmt, ISRA_CTX_STORE, bb);
1957 break;
1959 default:
1960 break;
1966 /* Return true if SSA_NAME NAME of function described by FUN is only used in
1967 return statements, or if results of any operations it is involved in are
1968 only used in return statements. ANALYZED is a bitmap that tracks which SSA
1969 names we have already started investigating. */
1971 static bool
1972 ssa_name_only_returned_p (function *fun, tree name, bitmap analyzed)
1974 bool res = true;
1975 imm_use_iterator imm_iter;
1976 gimple *stmt;
1978 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, name)
1980 if (is_gimple_debug (stmt))
1981 continue;
1983 if (gimple_code (stmt) == GIMPLE_RETURN)
1985 tree t = gimple_return_retval (as_a <greturn *> (stmt));
1986 if (t != name)
1988 res = false;
1989 break;
1992 else if (!stmt_unremovable_because_of_non_call_eh_p (fun, stmt)
1993 && ((is_gimple_assign (stmt) && !gimple_has_volatile_ops (stmt))
1994 || gimple_code (stmt) == GIMPLE_PHI))
1996 /* TODO: And perhaps for const function calls too? */
1997 tree lhs;
1998 if (gimple_code (stmt) == GIMPLE_PHI)
1999 lhs = gimple_phi_result (stmt);
2000 else
2001 lhs = gimple_assign_lhs (stmt);
2003 if (TREE_CODE (lhs) != SSA_NAME)
2005 res = false;
2006 break;
2008 gcc_assert (!gimple_vdef (stmt));
2009 if (bitmap_set_bit (analyzed, SSA_NAME_VERSION (lhs))
2010 && !ssa_name_only_returned_p (fun, lhs, analyzed))
2012 res = false;
2013 break;
2016 else
2018 res = false;
2019 break;
2022 return res;
2025 /* Inspect the uses of the return value of the call associated with CS, and if
2026 it is not used or if it is only used to construct the return value of the
2027 caller, mark it as such in call or caller summary. Also check for
2028 misaligned arguments. */
2030 static void
2031 isra_analyze_call (cgraph_edge *cs)
2033 gcall *call_stmt = cs->call_stmt;
2034 unsigned count = gimple_call_num_args (call_stmt);
2035 isra_call_summary *csum = call_sums->get_create (cs);
2037 for (unsigned i = 0; i < count; i++)
2039 tree arg = gimple_call_arg (call_stmt, i);
2040 if (is_gimple_reg (arg))
2041 continue;
2043 tree offset;
2044 poly_int64 bitsize, bitpos;
2045 machine_mode mode;
2046 int unsignedp, reversep, volatilep = 0;
2047 get_inner_reference (arg, &bitsize, &bitpos, &offset, &mode,
2048 &unsignedp, &reversep, &volatilep);
2049 if (!multiple_p (bitpos, BITS_PER_UNIT))
2051 csum->m_bit_aligned_arg = true;
2052 break;
2056 tree lhs = gimple_call_lhs (call_stmt);
2057 if (lhs)
2059 /* TODO: Also detect aggregates on a LHS of a call that are only returned
2060 from this function (without being read anywhere). */
2061 if (TREE_CODE (lhs) == SSA_NAME)
2063 bitmap analyzed = BITMAP_ALLOC (NULL);
2064 if (ssa_name_only_returned_p (DECL_STRUCT_FUNCTION (cs->caller->decl),
2065 lhs, analyzed))
2066 csum->m_return_returned = true;
2067 BITMAP_FREE (analyzed);
2070 else
2071 csum->m_return_ignored = true;
2074 /* Look at all calls going out of NODE, described also by IFS and perform all
2075 analyses necessary for IPA-SRA that are not done at body scan time or done
2076 even when body is not scanned because the function is not a candidate. */
2078 static void
2079 isra_analyze_all_outgoing_calls (cgraph_node *node)
2081 for (cgraph_edge *cs = node->callees; cs; cs = cs->next_callee)
2082 isra_analyze_call (cs);
2083 for (cgraph_edge *cs = node->indirect_calls; cs; cs = cs->next_callee)
2084 isra_analyze_call (cs);
2087 /* Dump a dereferences table with heading STR to file F. */
2089 static void
2090 dump_dereferences_table (FILE *f, struct function *fun, const char *str)
2092 basic_block bb;
2094 fprintf (dump_file, "%s", str);
2095 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (fun),
2096 EXIT_BLOCK_PTR_FOR_FN (fun), next_bb)
2098 fprintf (f, "%4i %i ", bb->index, bitmap_bit_p (final_bbs, bb->index));
2099 if (bb != EXIT_BLOCK_PTR_FOR_FN (fun))
2101 int i;
2102 for (i = 0; i < by_ref_count; i++)
2104 int idx = bb->index * by_ref_count + i;
2105 fprintf (f, " %4" HOST_WIDE_INT_PRINT "d", bb_dereferences[idx]);
2108 fprintf (f, "\n");
2110 fprintf (dump_file, "\n");
2113 /* Propagate distances in bb_dereferences in the opposite direction than the
2114 control flow edges, in each step storing the maximum of the current value
2115 and the minimum of all successors. These steps are repeated until the table
2116 stabilizes. Note that BBs which might terminate the functions (according to
2117 final_bbs bitmap) never updated in this way. */
2119 static void
2120 propagate_dereference_distances (struct function *fun)
2122 basic_block bb;
2124 if (dump_file && (dump_flags & TDF_DETAILS))
2125 dump_dereferences_table (dump_file, fun,
2126 "Dereference table before propagation:\n");
2128 auto_vec<basic_block> queue (last_basic_block_for_fn (fun));
2129 queue.quick_push (ENTRY_BLOCK_PTR_FOR_FN (fun));
2130 FOR_EACH_BB_FN (bb, fun)
2132 queue.quick_push (bb);
2133 bb->aux = bb;
2136 while (!queue.is_empty ())
2138 edge_iterator ei;
2139 edge e;
2140 bool change = false;
2141 int i;
2143 bb = queue.pop ();
2144 bb->aux = NULL;
2146 if (bitmap_bit_p (final_bbs, bb->index))
2147 continue;
2149 for (i = 0; i < by_ref_count; i++)
2151 int idx = bb->index * by_ref_count + i;
2152 bool first = true;
2153 HOST_WIDE_INT inh = 0;
2155 FOR_EACH_EDGE (e, ei, bb->succs)
2157 int succ_idx = e->dest->index * by_ref_count + i;
2159 if (e->dest == EXIT_BLOCK_PTR_FOR_FN (fun))
2160 continue;
2162 if (first)
2164 first = false;
2165 inh = bb_dereferences [succ_idx];
2167 else if (bb_dereferences [succ_idx] < inh)
2168 inh = bb_dereferences [succ_idx];
2171 if (!first && bb_dereferences[idx] < inh)
2173 bb_dereferences[idx] = inh;
2174 change = true;
2178 if (change)
2179 FOR_EACH_EDGE (e, ei, bb->preds)
2181 if (e->src->aux)
2182 continue;
2184 e->src->aux = e->src;
2185 queue.quick_push (e->src);
2189 if (dump_file && (dump_flags & TDF_DETAILS))
2190 dump_dereferences_table (dump_file, fun,
2191 "Dereference table after propagation:\n");
2194 /* Perform basic checks on ACCESS to PARM described by DESC and all its
2195 children, return true if the parameter cannot be split, otherwise return
2196 true and update *TOTAL_SIZE and *ONLY_CALLS. ENTRY_BB_INDEX must be the
2197 index of the entry BB in the function of PARM. */
2199 static bool
2200 check_gensum_access (tree parm, gensum_param_desc *desc,
2201 gensum_param_access *access,
2202 HOST_WIDE_INT *nonarg_acc_size, bool *only_calls,
2203 int entry_bb_index)
2205 if (access->nonarg)
2207 *only_calls = false;
2208 *nonarg_acc_size += access->size;
2210 if (access->first_child)
2212 disqualify_split_candidate (desc, "Overlapping non-call uses.");
2213 return true;
2216 /* Do not decompose a non-BLKmode param in a way that would create
2217 BLKmode params. Especially for by-reference passing (thus,
2218 pointer-type param) this is hardly worthwhile. */
2219 if (DECL_MODE (parm) != BLKmode
2220 && TYPE_MODE (access->type) == BLKmode)
2222 disqualify_split_candidate (desc, "Would convert a non-BLK to a BLK.");
2223 return true;
2226 if (desc->by_ref)
2228 int idx = (entry_bb_index * by_ref_count + desc->deref_index);
2229 if ((access->offset + access->size) > bb_dereferences[idx])
2231 disqualify_split_candidate (desc, "Would create a possibly "
2232 "illegal dereference in a caller.");
2233 return true;
2237 for (gensum_param_access *ch = access->first_child;
2239 ch = ch->next_sibling)
2240 if (check_gensum_access (parm, desc, ch, nonarg_acc_size, only_calls,
2241 entry_bb_index))
2242 return true;
2244 return false;
2247 /* Copy data from FROM and all of its children to a vector of accesses in IPA
2248 descriptor DESC. */
2250 static void
2251 copy_accesses_to_ipa_desc (gensum_param_access *from, isra_param_desc *desc)
2253 param_access *to = ggc_cleared_alloc<param_access> ();
2254 gcc_checking_assert ((from->offset % BITS_PER_UNIT) == 0);
2255 gcc_checking_assert ((from->size % BITS_PER_UNIT) == 0);
2256 to->unit_offset = from->offset / BITS_PER_UNIT;
2257 to->unit_size = from->size / BITS_PER_UNIT;
2258 to->type = from->type;
2259 to->alias_ptr_type = from->alias_ptr_type;
2260 to->certain = from->nonarg;
2261 to->reverse = from->reverse;
2262 vec_safe_push (desc->accesses, to);
2264 for (gensum_param_access *ch = from->first_child;
2266 ch = ch->next_sibling)
2267 copy_accesses_to_ipa_desc (ch, desc);
2270 /* Analyze function body scan results stored in param_accesses and
2271 param_accesses, detect possible transformations and store information of
2272 those in function summary. NODE, FUN and IFS are all various structures
2273 describing the currently analyzed function. */
2275 static void
2276 process_scan_results (cgraph_node *node, struct function *fun,
2277 isra_func_summary *ifs,
2278 vec<gensum_param_desc> *param_descriptions)
2280 bool check_pass_throughs = false;
2281 bool dereferences_propagated = false;
2282 tree parm = DECL_ARGUMENTS (node->decl);
2283 unsigned param_count = param_descriptions->length();
2285 for (unsigned desc_index = 0;
2286 desc_index < param_count;
2287 desc_index++, parm = DECL_CHAIN (parm))
2289 gensum_param_desc *desc = &(*param_descriptions)[desc_index];
2290 if (!desc->split_candidate)
2291 continue;
2293 if (flag_checking)
2294 isra_verify_access_tree (desc->accesses);
2296 if (!dereferences_propagated
2297 && desc->by_ref
2298 && desc->accesses)
2300 propagate_dereference_distances (fun);
2301 dereferences_propagated = true;
2304 HOST_WIDE_INT nonarg_acc_size = 0;
2305 bool only_calls = true;
2306 bool check_failed = false;
2308 int entry_bb_index = ENTRY_BLOCK_PTR_FOR_FN (fun)->index;
2309 for (gensum_param_access *acc = desc->accesses;
2310 acc;
2311 acc = acc->next_sibling)
2312 if (check_gensum_access (parm, desc, acc, &nonarg_acc_size, &only_calls,
2313 entry_bb_index))
2315 check_failed = true;
2316 break;
2318 if (check_failed)
2319 continue;
2321 if (only_calls)
2322 desc->locally_unused = true;
2324 HOST_WIDE_INT cur_param_size
2325 = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (parm)));
2326 HOST_WIDE_INT param_size_limit;
2327 if (!desc->by_ref || optimize_function_for_size_p (fun))
2328 param_size_limit = cur_param_size;
2329 else
2330 param_size_limit
2331 = opt_for_fn (node->decl,
2332 param_ipa_sra_ptr_growth_factor) * cur_param_size;
2333 if (nonarg_acc_size > param_size_limit
2334 || (!desc->by_ref && nonarg_acc_size == param_size_limit))
2336 disqualify_split_candidate (desc, "Would result into a too big set "
2337 "of replacements.");
2339 else
2341 /* create_parameter_descriptors makes sure unit sizes of all
2342 candidate parameters fit unsigned integers restricted to
2343 ISRA_ARG_SIZE_LIMIT. */
2344 desc->param_size_limit = param_size_limit / BITS_PER_UNIT;
2345 desc->nonarg_acc_size = nonarg_acc_size / BITS_PER_UNIT;
2346 if (desc->split_candidate && desc->ptr_pt_count)
2348 gcc_assert (desc->by_ref);
2349 check_pass_throughs = true;
2354 /* When a pointer parameter is passed-through to a callee, in which it is
2355 only used to read only one or a few items, we can attempt to transform it
2356 to obtaining and passing through the items instead of the pointer. But we
2357 must take extra care that 1) we do not introduce any segfault by moving
2358 dereferences above control flow and that 2) the data is not modified
2359 through an alias in this function. The IPA analysis must not introduce
2360 any accesses candidates unless it can prove both.
2362 The current solution is very crude as it consists of ensuring that the
2363 call postdominates entry BB and that the definition of VUSE of the call is
2364 default definition. TODO: For non-recursive callees in the same
2365 compilation unit we could do better by doing analysis in topological order
2366 an looking into access candidates of callees, using their alias_ptr_types
2367 to attempt real AA. We could also use the maximum known dereferenced
2368 offset in this function at IPA level.
2370 TODO: Measure the overhead and the effect of just being pessimistic.
2371 Maybe this is only -O3 material?
2373 bool pdoms_calculated = false;
2374 if (check_pass_throughs)
2375 for (cgraph_edge *cs = node->callees; cs; cs = cs->next_callee)
2377 gcall *call_stmt = cs->call_stmt;
2378 tree vuse = gimple_vuse (call_stmt);
2380 /* If the callee is a const function, we don't get a VUSE. In such
2381 case there will be no memory accesses in the called function (or the
2382 const attribute is wrong) and then we just don't care. */
2383 bool uses_memory_as_obtained = vuse && SSA_NAME_IS_DEFAULT_DEF (vuse);
2385 unsigned count = gimple_call_num_args (call_stmt);
2386 isra_call_summary *csum = call_sums->get_create (cs);
2387 csum->init_inputs (count);
2388 csum->m_before_any_store = uses_memory_as_obtained;
2389 for (unsigned argidx = 0; argidx < count; argidx++)
2391 if (!csum->m_arg_flow[argidx].pointer_pass_through)
2392 continue;
2393 unsigned pidx
2394 = get_single_param_flow_source (&csum->m_arg_flow[argidx]);
2395 gensum_param_desc *desc = &(*param_descriptions)[pidx];
2396 if (!desc->split_candidate)
2398 csum->m_arg_flow[argidx].pointer_pass_through = false;
2399 continue;
2401 if (!uses_memory_as_obtained)
2402 continue;
2404 /* Post-dominator check placed last, hoping that it usually won't
2405 be needed. */
2406 if (!pdoms_calculated)
2408 gcc_checking_assert (cfun);
2409 connect_infinite_loops_to_exit ();
2410 calculate_dominance_info (CDI_POST_DOMINATORS);
2411 pdoms_calculated = true;
2413 if (dominated_by_p (CDI_POST_DOMINATORS,
2414 gimple_bb (call_stmt),
2415 single_succ (ENTRY_BLOCK_PTR_FOR_FN (fun))))
2416 csum->m_arg_flow[argidx].safe_to_import_accesses = true;
2420 if (pdoms_calculated)
2422 free_dominance_info (CDI_POST_DOMINATORS);
2423 remove_fake_exit_edges ();
2426 /* TODO: Add early exit if we disqualified everything. This also requires
2427 that we either relax the restriction that
2428 ipa_param_adjustments.m_always_copy_start must be the number of PARM_DECLs
2429 or store the number of parameters to IPA-SRA function summary and use that
2430 when just removing params. */
2432 vec_safe_reserve_exact (ifs->m_parameters, param_count);
2433 ifs->m_parameters->quick_grow_cleared (param_count);
2434 for (unsigned desc_index = 0; desc_index < param_count; desc_index++)
2436 gensum_param_desc *s = &(*param_descriptions)[desc_index];
2437 isra_param_desc *d = &(*ifs->m_parameters)[desc_index];
2439 d->param_size_limit = s->param_size_limit;
2440 d->size_reached = s->nonarg_acc_size;
2441 d->locally_unused = s->locally_unused;
2442 d->split_candidate = s->split_candidate;
2443 d->by_ref = s->by_ref;
2445 for (gensum_param_access *acc = s->accesses;
2446 acc;
2447 acc = acc->next_sibling)
2448 copy_accesses_to_ipa_desc (acc, d);
2451 if (dump_file)
2452 dump_isra_param_descriptors (dump_file, node->decl, ifs);
2455 /* Return true if there are any overlaps among certain accesses of DESC. If
2456 non-NULL, set *CERTAIN_ACCESS_PRESENT_P upon encountering a certain access
2457 too. DESC is assumed to be a split candidate that is not locally
2458 unused. */
2460 static bool
2461 overlapping_certain_accesses_p (isra_param_desc *desc,
2462 bool *certain_access_present_p)
2464 unsigned pclen = vec_safe_length (desc->accesses);
2465 for (unsigned i = 0; i < pclen; i++)
2467 param_access *a1 = (*desc->accesses)[i];
2469 if (!a1->certain)
2470 continue;
2471 if (certain_access_present_p)
2472 *certain_access_present_p = true;
2473 for (unsigned j = i + 1; j < pclen; j++)
2475 param_access *a2 = (*desc->accesses)[j];
2476 if (a2->certain
2477 && a1->unit_offset < a2->unit_offset + a2->unit_size
2478 && a1->unit_offset + a1->unit_size > a2->unit_offset)
2479 return true;
2482 return false;
2485 /* Check for any overlaps of certain param accesses among splitting candidates
2486 and signal an ICE if there are any. If CERTAIN_MUST_EXIST is set, also
2487 check that used splitting candidates have at least one certain access. */
2489 static void
2490 verify_splitting_accesses (cgraph_node *node, bool certain_must_exist)
2492 isra_func_summary *ifs = func_sums->get (node);
2493 if (!ifs || !ifs->m_candidate)
2494 return;
2495 unsigned param_count = vec_safe_length (ifs->m_parameters);
2496 for (unsigned pidx = 0; pidx < param_count; pidx++)
2498 isra_param_desc *desc = &(*ifs->m_parameters)[pidx];
2499 if (!desc->split_candidate || desc->locally_unused)
2500 continue;
2502 bool certain_access_present = !certain_must_exist;
2503 if (overlapping_certain_accesses_p (desc, &certain_access_present))
2504 internal_error ("Function %qs, parameter %u, has IPA-SRA accesses "
2505 "which overlap", node->dump_name (), pidx);
2506 if (!certain_access_present)
2507 internal_error ("Function %s, parameter %u, is used but does not "
2508 "have any certain IPA-SRA access",
2509 node->dump_name (), pidx);
2513 /* Intraprocedural part of IPA-SRA analysis. Scan bodies of all functions in
2514 this compilation unit and create summary structures describing IPA-SRA
2515 opportunities and constraints in them. */
2517 static void
2518 ipa_sra_generate_summary (void)
2520 struct cgraph_node *node;
2522 gcc_checking_assert (!func_sums);
2523 gcc_checking_assert (!call_sums);
2524 func_sums
2525 = (new (ggc_alloc_no_dtor <ipa_sra_function_summaries> ())
2526 ipa_sra_function_summaries (symtab, true));
2527 call_sums = new ipa_sra_call_summaries (symtab);
2529 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
2530 ipa_sra_summarize_function (node);
2531 return;
2534 /* Write intraprocedural analysis information about E and all of its outgoing
2535 edges into a stream for LTO WPA. */
2537 static void
2538 isra_write_edge_summary (output_block *ob, cgraph_edge *e)
2540 isra_call_summary *csum = call_sums->get (e);
2541 unsigned input_count = csum->m_arg_flow.length ();
2542 streamer_write_uhwi (ob, input_count);
2543 for (unsigned i = 0; i < input_count; i++)
2545 isra_param_flow *ipf = &csum->m_arg_flow[i];
2546 streamer_write_hwi (ob, ipf->length);
2547 bitpack_d bp = bitpack_create (ob->main_stream);
2548 for (int j = 0; j < ipf->length; j++)
2549 bp_pack_value (&bp, ipf->inputs[j], 8);
2550 bp_pack_value (&bp, ipf->aggregate_pass_through, 1);
2551 bp_pack_value (&bp, ipf->pointer_pass_through, 1);
2552 bp_pack_value (&bp, ipf->safe_to_import_accesses, 1);
2553 streamer_write_bitpack (&bp);
2554 streamer_write_uhwi (ob, ipf->unit_offset);
2555 streamer_write_uhwi (ob, ipf->unit_size);
2557 bitpack_d bp = bitpack_create (ob->main_stream);
2558 bp_pack_value (&bp, csum->m_return_ignored, 1);
2559 bp_pack_value (&bp, csum->m_return_returned, 1);
2560 bp_pack_value (&bp, csum->m_bit_aligned_arg, 1);
2561 bp_pack_value (&bp, csum->m_before_any_store, 1);
2562 streamer_write_bitpack (&bp);
2565 /* Write intraprocedural analysis information about NODE and all of its outgoing
2566 edges into a stream for LTO WPA. */
2568 static void
2569 isra_write_node_summary (output_block *ob, cgraph_node *node)
2571 isra_func_summary *ifs = func_sums->get (node);
2572 lto_symtab_encoder_t encoder = ob->decl_state->symtab_node_encoder;
2573 int node_ref = lto_symtab_encoder_encode (encoder, node);
2574 streamer_write_uhwi (ob, node_ref);
2576 unsigned param_desc_count = vec_safe_length (ifs->m_parameters);
2577 streamer_write_uhwi (ob, param_desc_count);
2578 for (unsigned i = 0; i < param_desc_count; i++)
2580 isra_param_desc *desc = &(*ifs->m_parameters)[i];
2581 unsigned access_count = vec_safe_length (desc->accesses);
2582 streamer_write_uhwi (ob, access_count);
2583 for (unsigned j = 0; j < access_count; j++)
2585 param_access *acc = (*desc->accesses)[j];
2586 stream_write_tree (ob, acc->type, true);
2587 stream_write_tree (ob, acc->alias_ptr_type, true);
2588 streamer_write_uhwi (ob, acc->unit_offset);
2589 streamer_write_uhwi (ob, acc->unit_size);
2590 bitpack_d bp = bitpack_create (ob->main_stream);
2591 bp_pack_value (&bp, acc->certain, 1);
2592 streamer_write_bitpack (&bp);
2594 streamer_write_uhwi (ob, desc->param_size_limit);
2595 streamer_write_uhwi (ob, desc->size_reached);
2596 bitpack_d bp = bitpack_create (ob->main_stream);
2597 bp_pack_value (&bp, desc->locally_unused, 1);
2598 bp_pack_value (&bp, desc->split_candidate, 1);
2599 bp_pack_value (&bp, desc->by_ref, 1);
2600 streamer_write_bitpack (&bp);
2602 bitpack_d bp = bitpack_create (ob->main_stream);
2603 bp_pack_value (&bp, ifs->m_candidate, 1);
2604 bp_pack_value (&bp, ifs->m_returns_value, 1);
2605 bp_pack_value (&bp, ifs->m_return_ignored, 1);
2606 gcc_assert (!ifs->m_queued);
2607 streamer_write_bitpack (&bp);
2609 for (cgraph_edge *e = node->callees; e; e = e->next_callee)
2610 isra_write_edge_summary (ob, e);
2611 for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee)
2612 isra_write_edge_summary (ob, e);
2615 /* Write intraprocedural analysis information into a stream for LTO WPA. */
2617 static void
2618 ipa_sra_write_summary (void)
2620 if (!func_sums || !call_sums)
2621 return;
2623 struct output_block *ob = create_output_block (LTO_section_ipa_sra);
2624 lto_symtab_encoder_t encoder = ob->decl_state->symtab_node_encoder;
2625 ob->symbol = NULL;
2627 unsigned int count = 0;
2628 lto_symtab_encoder_iterator lsei;
2629 for (lsei = lsei_start_function_in_partition (encoder);
2630 !lsei_end_p (lsei);
2631 lsei_next_function_in_partition (&lsei))
2633 cgraph_node *node = lsei_cgraph_node (lsei);
2634 if (node->has_gimple_body_p ()
2635 && func_sums->get (node) != NULL)
2636 count++;
2638 streamer_write_uhwi (ob, count);
2640 /* Process all of the functions. */
2641 for (lsei = lsei_start_function_in_partition (encoder); !lsei_end_p (lsei);
2642 lsei_next_function_in_partition (&lsei))
2644 cgraph_node *node = lsei_cgraph_node (lsei);
2645 if (node->has_gimple_body_p ()
2646 && func_sums->get (node) != NULL)
2647 isra_write_node_summary (ob, node);
2649 streamer_write_char_stream (ob->main_stream, 0);
2650 produce_asm (ob, NULL);
2651 destroy_output_block (ob);
2654 /* Read intraprocedural analysis information about E and all of its outgoing
2655 edges into a stream for LTO WPA. */
2657 static void
2658 isra_read_edge_summary (struct lto_input_block *ib, cgraph_edge *cs)
2660 isra_call_summary *csum = call_sums->get_create (cs);
2661 unsigned input_count = streamer_read_uhwi (ib);
2662 csum->init_inputs (input_count);
2663 for (unsigned i = 0; i < input_count; i++)
2665 isra_param_flow *ipf = &csum->m_arg_flow[i];
2666 ipf->length = streamer_read_hwi (ib);
2667 bitpack_d bp = streamer_read_bitpack (ib);
2668 for (int j = 0; j < ipf->length; j++)
2669 ipf->inputs[j] = bp_unpack_value (&bp, 8);
2670 ipf->aggregate_pass_through = bp_unpack_value (&bp, 1);
2671 ipf->pointer_pass_through = bp_unpack_value (&bp, 1);
2672 ipf->safe_to_import_accesses = bp_unpack_value (&bp, 1);
2673 ipf->unit_offset = streamer_read_uhwi (ib);
2674 ipf->unit_size = streamer_read_uhwi (ib);
2676 bitpack_d bp = streamer_read_bitpack (ib);
2677 csum->m_return_ignored = bp_unpack_value (&bp, 1);
2678 csum->m_return_returned = bp_unpack_value (&bp, 1);
2679 csum->m_bit_aligned_arg = bp_unpack_value (&bp, 1);
2680 csum->m_before_any_store = bp_unpack_value (&bp, 1);
2683 /* Read intraprocedural analysis information about NODE and all of its outgoing
2684 edges into a stream for LTO WPA. */
2686 static void
2687 isra_read_node_info (struct lto_input_block *ib, cgraph_node *node,
2688 struct data_in *data_in)
2690 isra_func_summary *ifs = func_sums->get_create (node);
2691 unsigned param_desc_count = streamer_read_uhwi (ib);
2692 if (param_desc_count > 0)
2694 vec_safe_reserve_exact (ifs->m_parameters, param_desc_count);
2695 ifs->m_parameters->quick_grow_cleared (param_desc_count);
2697 for (unsigned i = 0; i < param_desc_count; i++)
2699 isra_param_desc *desc = &(*ifs->m_parameters)[i];
2700 unsigned access_count = streamer_read_uhwi (ib);
2701 for (unsigned j = 0; j < access_count; j++)
2703 param_access *acc = ggc_cleared_alloc<param_access> ();
2704 acc->type = stream_read_tree (ib, data_in);
2705 acc->alias_ptr_type = stream_read_tree (ib, data_in);
2706 acc->unit_offset = streamer_read_uhwi (ib);
2707 acc->unit_size = streamer_read_uhwi (ib);
2708 bitpack_d bp = streamer_read_bitpack (ib);
2709 acc->certain = bp_unpack_value (&bp, 1);
2710 vec_safe_push (desc->accesses, acc);
2712 desc->param_size_limit = streamer_read_uhwi (ib);
2713 desc->size_reached = streamer_read_uhwi (ib);
2714 bitpack_d bp = streamer_read_bitpack (ib);
2715 desc->locally_unused = bp_unpack_value (&bp, 1);
2716 desc->split_candidate = bp_unpack_value (&bp, 1);
2717 desc->by_ref = bp_unpack_value (&bp, 1);
2719 bitpack_d bp = streamer_read_bitpack (ib);
2720 ifs->m_candidate = bp_unpack_value (&bp, 1);
2721 ifs->m_returns_value = bp_unpack_value (&bp, 1);
2722 ifs->m_return_ignored = bp_unpack_value (&bp, 1);
2723 ifs->m_queued = 0;
2725 for (cgraph_edge *e = node->callees; e; e = e->next_callee)
2726 isra_read_edge_summary (ib, e);
2727 for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee)
2728 isra_read_edge_summary (ib, e);
2731 /* Read IPA-SRA summaries from a section in file FILE_DATA of length LEN with
2732 data DATA. TODO: This function was copied almost verbatim from ipa-prop.c,
2733 it should be possible to unify them somehow. */
2735 static void
2736 isra_read_summary_section (struct lto_file_decl_data *file_data,
2737 const char *data, size_t len)
2739 const struct lto_function_header *header =
2740 (const struct lto_function_header *) data;
2741 const int cfg_offset = sizeof (struct lto_function_header);
2742 const int main_offset = cfg_offset + header->cfg_size;
2743 const int string_offset = main_offset + header->main_size;
2744 struct data_in *data_in;
2745 unsigned int i;
2746 unsigned int count;
2748 lto_input_block ib_main ((const char *) data + main_offset,
2749 header->main_size, file_data->mode_table);
2751 data_in =
2752 lto_data_in_create (file_data, (const char *) data + string_offset,
2753 header->string_size, vNULL);
2754 count = streamer_read_uhwi (&ib_main);
2756 for (i = 0; i < count; i++)
2758 unsigned int index;
2759 struct cgraph_node *node;
2760 lto_symtab_encoder_t encoder;
2762 index = streamer_read_uhwi (&ib_main);
2763 encoder = file_data->symtab_node_encoder;
2764 node = dyn_cast<cgraph_node *> (lto_symtab_encoder_deref (encoder,
2765 index));
2766 gcc_assert (node->definition);
2767 isra_read_node_info (&ib_main, node, data_in);
2769 lto_free_section_data (file_data, LTO_section_ipa_sra, NULL, data,
2770 len);
2771 lto_data_in_delete (data_in);
2774 /* Read intraprocedural analysis information into a stream for LTO WPA. */
2776 static void
2777 ipa_sra_read_summary (void)
2779 struct lto_file_decl_data **file_data_vec = lto_get_file_decl_data ();
2780 struct lto_file_decl_data *file_data;
2781 unsigned int j = 0;
2783 gcc_checking_assert (!func_sums);
2784 gcc_checking_assert (!call_sums);
2785 func_sums
2786 = (new (ggc_alloc_no_dtor <ipa_sra_function_summaries> ())
2787 ipa_sra_function_summaries (symtab, true));
2788 call_sums = new ipa_sra_call_summaries (symtab);
2790 while ((file_data = file_data_vec[j++]))
2792 size_t len;
2793 const char *data
2794 = lto_get_summary_section_data (file_data, LTO_section_ipa_sra, &len);
2795 if (data)
2796 isra_read_summary_section (file_data, data, len);
2800 /* Dump all IPA-SRA summary data for all cgraph nodes and edges to file F. */
2802 static void
2803 ipa_sra_dump_all_summaries (FILE *f)
2805 cgraph_node *node;
2806 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
2808 fprintf (f, "\nSummary for node %s:\n", node->dump_name ());
2810 isra_func_summary *ifs = func_sums->get (node);
2811 if (!ifs)
2812 fprintf (f, " Function does not have any associated IPA-SRA "
2813 "summary\n");
2814 else
2816 if (!ifs->m_candidate)
2818 fprintf (f, " Not a candidate function\n");
2819 continue;
2821 if (ifs->m_returns_value)
2822 fprintf (f, " Returns value\n");
2823 if (vec_safe_is_empty (ifs->m_parameters))
2824 fprintf (f, " No parameter information. \n");
2825 else
2826 for (unsigned i = 0; i < ifs->m_parameters->length (); ++i)
2828 fprintf (f, " Descriptor for parameter %i:\n", i);
2829 dump_isra_param_descriptor (f, &(*ifs->m_parameters)[i]);
2831 fprintf (f, "\n");
2834 struct cgraph_edge *cs;
2835 for (cs = node->callees; cs; cs = cs->next_callee)
2837 fprintf (f, " Summary for edge %s->%s:\n", cs->caller->dump_name (),
2838 cs->callee->dump_name ());
2839 isra_call_summary *csum = call_sums->get (cs);
2840 if (csum)
2841 csum->dump (f);
2842 else
2843 fprintf (f, " Call summary is MISSING!\n");
2847 fprintf (f, "\n\n");
2850 /* Perform function-scope viability tests that can be only made at IPA level
2851 and return false if the function is deemed unsuitable for IPA-SRA. */
2853 static bool
2854 ipa_sra_ipa_function_checks (cgraph_node *node)
2856 if (!node->can_be_local_p ())
2858 if (dump_file)
2859 fprintf (dump_file, "Function %s disqualified because it cannot be "
2860 "made local.\n", node->dump_name ());
2861 return false;
2863 if (!node->can_change_signature)
2865 if (dump_file)
2866 fprintf (dump_file, "Function can not change signature.\n");
2867 return false;
2870 return true;
2873 /* Issues found out by check_callers_for_issues. */
2875 struct caller_issues
2877 /* The candidate being considered. */
2878 cgraph_node *candidate;
2879 /* There is a thunk among callers. */
2880 bool thunk;
2881 /* Call site with no available information. */
2882 bool unknown_callsite;
2883 /* Call from outside the the candidate's comdat group. */
2884 bool call_from_outside_comdat;
2885 /* There is a bit-aligned load into one of non-gimple-typed arguments. */
2886 bool bit_aligned_aggregate_argument;
2889 /* Worker for call_for_symbol_and_aliases, set any flags of passed caller_issues
2890 that apply. */
2892 static bool
2893 check_for_caller_issues (struct cgraph_node *node, void *data)
2895 struct caller_issues *issues = (struct caller_issues *) data;
2897 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
2899 if (cs->caller->thunk)
2901 issues->thunk = true;
2902 /* TODO: We should be able to process at least some types of
2903 thunks. */
2904 return true;
2906 if (issues->candidate->calls_comdat_local
2907 && issues->candidate->same_comdat_group
2908 && !issues->candidate->in_same_comdat_group_p (cs->caller))
2910 issues->call_from_outside_comdat = true;
2911 return true;
2914 isra_call_summary *csum = call_sums->get (cs);
2915 if (!csum)
2917 issues->unknown_callsite = true;
2918 return true;
2921 if (csum->m_bit_aligned_arg)
2922 issues->bit_aligned_aggregate_argument = true;
2924 return false;
2927 /* Look at all incoming edges to NODE, including aliases and thunks and look
2928 for problems. Return true if NODE type should not be modified at all. */
2930 static bool
2931 check_all_callers_for_issues (cgraph_node *node)
2933 struct caller_issues issues;
2934 memset (&issues, 0, sizeof (issues));
2935 issues.candidate = node;
2937 node->call_for_symbol_and_aliases (check_for_caller_issues, &issues, true);
2938 if (issues.unknown_callsite)
2940 if (dump_file && (dump_flags & TDF_DETAILS))
2941 fprintf (dump_file, "A call of %s has not been analyzed. Disabling "
2942 "all modifications.\n", node->dump_name ());
2943 return true;
2945 /* TODO: We should be able to process at least some types of thunks. */
2946 if (issues.thunk)
2948 if (dump_file && (dump_flags & TDF_DETAILS))
2949 fprintf (dump_file, "A call of %s is through thunk, which are not"
2950 " handled yet. Disabling all modifications.\n",
2951 node->dump_name ());
2952 return true;
2954 if (issues.call_from_outside_comdat)
2956 if (dump_file)
2957 fprintf (dump_file, "Function would become private comdat called "
2958 "outside of its comdat group.\n");
2959 return true;
2962 if (issues.bit_aligned_aggregate_argument)
2964 /* Let's only remove parameters/return values from such functions.
2965 TODO: We could only prevent splitting the problematic parameters if
2966 anybody thinks it is worth it. */
2967 if (dump_file && (dump_flags & TDF_DETAILS))
2968 fprintf (dump_file, "A call of %s has bit-aligned aggregate argument,"
2969 " disabling parameter splitting.\n", node->dump_name ());
2971 isra_func_summary *ifs = func_sums->get (node);
2972 gcc_checking_assert (ifs);
2973 unsigned param_count = vec_safe_length (ifs->m_parameters);
2974 for (unsigned i = 0; i < param_count; i++)
2975 (*ifs->m_parameters)[i].split_candidate = false;
2977 return false;
2980 /* Find the access with corresponding OFFSET and SIZE among accesses in
2981 PARAM_DESC and return it or NULL if such an access is not there. */
2983 static param_access *
2984 find_param_access (isra_param_desc *param_desc, unsigned offset, unsigned size)
2986 unsigned pclen = vec_safe_length (param_desc->accesses);
2988 /* The search is linear but the number of stored accesses is bound by
2989 PARAM_IPA_SRA_MAX_REPLACEMENTS, so most probably 8. */
2991 for (unsigned i = 0; i < pclen; i++)
2992 if ((*param_desc->accesses)[i]->unit_offset == offset
2993 && (*param_desc->accesses)[i]->unit_size == size)
2994 return (*param_desc->accesses)[i];
2996 return NULL;
2999 /* Return iff the total size of definite replacement SIZE would violate the
3000 limit set for it in PARAM. */
3002 static bool
3003 size_would_violate_limit_p (isra_param_desc *desc, unsigned size)
3005 unsigned limit = desc->param_size_limit;
3006 if (size > limit
3007 || (!desc->by_ref && size == limit))
3008 return true;
3009 return false;
3012 /* Increase reached size of DESC by SIZE or disqualify it if it would violate
3013 the set limit. IDX is the parameter number which is dumped when
3014 disqualifying. */
3016 static void
3017 bump_reached_size (isra_param_desc *desc, unsigned size, unsigned idx)
3019 unsigned after = desc->size_reached + size;
3020 if (size_would_violate_limit_p (desc, after))
3022 if (dump_file && (dump_flags & TDF_DETAILS))
3023 fprintf (dump_file, " ...size limit reached, disqualifying "
3024 "candidate parameter %u\n", idx);
3025 desc->split_candidate = false;
3026 return;
3028 desc->size_reached = after;
3031 /* Take all actions required to deal with an edge CS that represents a call to
3032 an unknown or un-analyzed function, for both parameter removal and
3033 splitting. */
3035 static void
3036 process_edge_to_unknown_caller (cgraph_edge *cs)
3038 isra_func_summary *from_ifs = func_sums->get (cs->caller);
3039 gcc_checking_assert (from_ifs);
3040 isra_call_summary *csum = call_sums->get (cs);
3042 if (dump_file && (dump_flags & TDF_DETAILS))
3043 fprintf (dump_file, "Processing an edge to an unknown caller from %s:\n",
3044 cs->caller->dump_name ());
3046 unsigned args_count = csum->m_arg_flow.length ();
3047 for (unsigned i = 0; i < args_count; i++)
3049 isra_param_flow *ipf = &csum->m_arg_flow[i];
3051 if (ipf->pointer_pass_through)
3053 isra_param_desc *param_desc
3054 = &(*from_ifs->m_parameters)[get_single_param_flow_source (ipf)];
3055 param_desc->locally_unused = false;
3056 param_desc->split_candidate = false;
3057 continue;
3059 if (ipf->aggregate_pass_through)
3061 unsigned idx = get_single_param_flow_source (ipf);
3062 isra_param_desc *param_desc = &(*from_ifs->m_parameters)[idx];
3064 param_desc->locally_unused = false;
3065 if (!param_desc->split_candidate)
3066 continue;
3067 gcc_assert (!param_desc->by_ref);
3068 param_access *pacc = find_param_access (param_desc, ipf->unit_offset,
3069 ipf->unit_size);
3070 gcc_checking_assert (pacc);
3071 pacc->certain = true;
3072 if (overlapping_certain_accesses_p (param_desc, NULL))
3074 if (dump_file && (dump_flags & TDF_DETAILS))
3075 fprintf (dump_file, " ...leading to overlap, "
3076 " disqualifying candidate parameter %u\n",
3077 idx);
3078 param_desc->split_candidate = false;
3080 else
3081 bump_reached_size (param_desc, pacc->unit_size, idx);
3082 ipf->aggregate_pass_through = false;
3083 continue;
3086 for (int j = 0; j < ipf->length; j++)
3088 int input_idx = ipf->inputs[j];
3089 (*from_ifs->m_parameters)[input_idx].locally_unused = false;
3094 /* Propagate parameter removal information through cross-SCC edge CS,
3095 i.e. decrease the use count in the caller parameter descriptor for each use
3096 in this call. */
3098 static void
3099 param_removal_cross_scc_edge (cgraph_edge *cs)
3101 enum availability availability;
3102 cgraph_node *callee = cs->callee->function_symbol (&availability);
3103 isra_func_summary *to_ifs = func_sums->get (callee);
3104 if (!to_ifs || !to_ifs->m_candidate
3105 || (availability < AVAIL_AVAILABLE)
3106 || vec_safe_is_empty (to_ifs->m_parameters))
3108 process_edge_to_unknown_caller (cs);
3109 return;
3111 isra_func_summary *from_ifs = func_sums->get (cs->caller);
3112 gcc_checking_assert (from_ifs);
3114 isra_call_summary *csum = call_sums->get (cs);
3115 unsigned args_count = csum->m_arg_flow.length ();
3116 unsigned param_count = vec_safe_length (to_ifs->m_parameters);
3118 for (unsigned i = 0; i < args_count; i++)
3120 bool unused_in_callee;
3121 if (i < param_count)
3122 unused_in_callee = (*to_ifs->m_parameters)[i].locally_unused;
3123 else
3124 unused_in_callee = false;
3126 if (!unused_in_callee)
3128 isra_param_flow *ipf = &csum->m_arg_flow[i];
3129 for (int j = 0; j < ipf->length; j++)
3131 int input_idx = ipf->inputs[j];
3132 (*from_ifs->m_parameters)[input_idx].locally_unused = false;
3138 /* Unless it is already there, push NODE which is also described by IFS to
3139 STACK. */
3141 static void
3142 isra_push_node_to_stack (cgraph_node *node, isra_func_summary *ifs,
3143 vec<cgraph_node *> *stack)
3145 if (!ifs->m_queued)
3147 ifs->m_queued = true;
3148 stack->safe_push (node);
3152 /* If parameter with index INPUT_IDX is marked as locally unused, mark it as
3153 used and push CALLER on STACK. */
3155 static void
3156 isra_mark_caller_param_used (isra_func_summary *from_ifs, int input_idx,
3157 cgraph_node *caller, vec<cgraph_node *> *stack)
3159 if ((*from_ifs->m_parameters)[input_idx].locally_unused)
3161 (*from_ifs->m_parameters)[input_idx].locally_unused = false;
3162 isra_push_node_to_stack (caller, from_ifs, stack);
3167 /* Propagate information that any parameter is not used only locally within a
3168 SCC across CS to the caller, which must be in the same SCC as the
3169 callee. Push any callers that need to be re-processed to STACK. */
3171 static void
3172 propagate_used_across_scc_edge (cgraph_edge *cs, vec<cgraph_node *> *stack)
3174 isra_func_summary *from_ifs = func_sums->get (cs->caller);
3175 if (!from_ifs || vec_safe_is_empty (from_ifs->m_parameters))
3176 return;
3178 isra_call_summary *csum = call_sums->get (cs);
3179 gcc_checking_assert (csum);
3180 unsigned args_count = csum->m_arg_flow.length ();
3181 enum availability availability;
3182 cgraph_node *callee = cs->callee->function_symbol (&availability);
3183 isra_func_summary *to_ifs = func_sums->get (callee);
3185 unsigned param_count
3186 = (to_ifs && (availability >= AVAIL_AVAILABLE))
3187 ? vec_safe_length (to_ifs->m_parameters) : 0;
3188 for (unsigned i = 0; i < args_count; i++)
3190 if (i < param_count
3191 && (*to_ifs->m_parameters)[i].locally_unused)
3192 continue;
3194 /* The argument is needed in the callee it, we must mark the parameter as
3195 used also in the caller and its callers within this SCC. */
3196 isra_param_flow *ipf = &csum->m_arg_flow[i];
3197 for (int j = 0; j < ipf->length; j++)
3199 int input_idx = ipf->inputs[j];
3200 isra_mark_caller_param_used (from_ifs, input_idx, cs->caller, stack);
3205 /* Propagate information that any parameter is not used only locally within a
3206 SCC (i.e. is used also elsewhere) to all callers of NODE that are in the
3207 same SCC. Push any callers that need to be re-processed to STACK. */
3209 static bool
3210 propagate_used_to_scc_callers (cgraph_node *node, void *data)
3212 vec<cgraph_node *> *stack = (vec<cgraph_node *> *) data;
3213 cgraph_edge *cs;
3214 for (cs = node->callers; cs; cs = cs->next_caller)
3215 if (ipa_edge_within_scc (cs))
3216 propagate_used_across_scc_edge (cs, stack);
3217 return false;
3220 /* Return true iff all certain accesses in ARG_DESC are also present as
3221 certain accesses in PARAM_DESC. */
3223 static bool
3224 all_callee_accesses_present_p (isra_param_desc *param_desc,
3225 isra_param_desc *arg_desc)
3227 unsigned aclen = vec_safe_length (arg_desc->accesses);
3228 for (unsigned j = 0; j < aclen; j++)
3230 param_access *argacc = (*arg_desc->accesses)[j];
3231 if (!argacc->certain)
3232 continue;
3233 param_access *pacc = find_param_access (param_desc, argacc->unit_offset,
3234 argacc->unit_size);
3235 if (!pacc
3236 || !pacc->certain
3237 || !types_compatible_p (argacc->type, pacc->type))
3238 return false;
3240 return true;
3243 /* Type internal to function pull_accesses_from_callee. Unfortunately gcc 4.8
3244 does not allow instantiating an auto_vec with a type defined within a
3245 function so it is a global type. */
3246 enum acc_prop_kind {ACC_PROP_DONT, ACC_PROP_COPY, ACC_PROP_CERTAIN};
3249 /* Attempt to propagate all definite accesses from ARG_DESC to PARAM_DESC,
3250 (which belongs to CALLER) if they would not violate some constraint there.
3251 If successful, return NULL, otherwise return the string reason for failure
3252 (which can be written to the dump file). DELTA_OFFSET is the known offset
3253 of the actual argument withing the formal parameter (so of ARG_DESCS within
3254 PARAM_DESCS), ARG_SIZE is the size of the actual argument or zero, if not
3255 known. In case of success, set *CHANGE_P to true if propagation actually
3256 changed anything. */
3258 static const char *
3259 pull_accesses_from_callee (cgraph_node *caller, isra_param_desc *param_desc,
3260 isra_param_desc *arg_desc,
3261 unsigned delta_offset, unsigned arg_size,
3262 bool *change_p)
3264 unsigned pclen = vec_safe_length (param_desc->accesses);
3265 unsigned aclen = vec_safe_length (arg_desc->accesses);
3266 unsigned prop_count = 0;
3267 unsigned prop_size = 0;
3268 bool change = false;
3270 auto_vec <enum acc_prop_kind, 8> prop_kinds (aclen);
3271 for (unsigned j = 0; j < aclen; j++)
3273 param_access *argacc = (*arg_desc->accesses)[j];
3274 prop_kinds.safe_push (ACC_PROP_DONT);
3276 if (arg_size > 0
3277 && argacc->unit_offset + argacc->unit_size > arg_size)
3278 return "callee access outsize size boundary";
3280 if (!argacc->certain)
3281 continue;
3283 unsigned offset = argacc->unit_offset + delta_offset;
3284 /* Given that accesses are initially stored according to increasing
3285 offset and decreasing size in case of equal offsets, the following
3286 searches could be written more efficiently if we kept the ordering
3287 when copying. But the number of accesses is capped at
3288 PARAM_IPA_SRA_MAX_REPLACEMENTS (so most likely 8) and the code gets
3289 messy quickly, so let's improve on that only if necessary. */
3291 bool exact_match = false;
3292 for (unsigned i = 0; i < pclen; i++)
3294 /* Check for overlaps. */
3295 param_access *pacc = (*param_desc->accesses)[i];
3296 if (pacc->unit_offset == offset
3297 && pacc->unit_size == argacc->unit_size)
3299 if (argacc->alias_ptr_type != pacc->alias_ptr_type
3300 || !types_compatible_p (argacc->type, pacc->type))
3301 return "propagated access types would not match existing ones";
3303 exact_match = true;
3304 if (!pacc->certain)
3306 prop_kinds[j] = ACC_PROP_CERTAIN;
3307 prop_size += argacc->unit_size;
3308 change = true;
3310 continue;
3313 if (offset < pacc->unit_offset + pacc->unit_size
3314 && offset + argacc->unit_size > pacc->unit_offset)
3316 /* None permissible with load accesses, possible to fit into
3317 argument ones. */
3318 if (pacc->certain
3319 || offset < pacc->unit_offset
3320 || (offset + argacc->unit_size
3321 > pacc->unit_offset + pacc->unit_size))
3322 return "a propagated access would conflict in caller";
3326 if (!exact_match)
3328 prop_kinds[j] = ACC_PROP_COPY;
3329 prop_count++;
3330 prop_size += argacc->unit_size;
3331 change = true;
3335 if (!change)
3336 return NULL;
3338 if ((prop_count + pclen
3339 > (unsigned) opt_for_fn (caller->decl, param_ipa_sra_max_replacements))
3340 || size_would_violate_limit_p (param_desc,
3341 param_desc->size_reached + prop_size))
3342 return "propagating accesses would violate the count or size limit";
3344 *change_p = true;
3345 for (unsigned j = 0; j < aclen; j++)
3347 if (prop_kinds[j] == ACC_PROP_COPY)
3349 param_access *argacc = (*arg_desc->accesses)[j];
3351 param_access *copy = ggc_cleared_alloc<param_access> ();
3352 copy->unit_offset = argacc->unit_offset + delta_offset;
3353 copy->unit_size = argacc->unit_size;
3354 copy->type = argacc->type;
3355 copy->alias_ptr_type = argacc->alias_ptr_type;
3356 copy->certain = true;
3357 vec_safe_push (param_desc->accesses, copy);
3359 else if (prop_kinds[j] == ACC_PROP_CERTAIN)
3361 param_access *argacc = (*arg_desc->accesses)[j];
3362 param_access *csp
3363 = find_param_access (param_desc, argacc->unit_offset + delta_offset,
3364 argacc->unit_size);
3365 csp->certain = true;
3369 param_desc->size_reached += prop_size;
3371 return NULL;
3374 /* Propagate parameter splitting information through call graph edge CS.
3375 Return true if any changes that might need to be propagated within SCCs have
3376 been made. The function also clears the aggregate_pass_through and
3377 pointer_pass_through in call summaries which do not need to be processed
3378 again if this CS is revisited when iterating while changes are propagated
3379 within an SCC. */
3381 static bool
3382 param_splitting_across_edge (cgraph_edge *cs)
3384 bool res = false;
3385 bool cross_scc = !ipa_edge_within_scc (cs);
3386 enum availability availability;
3387 cgraph_node *callee = cs->callee->function_symbol (&availability);
3388 isra_func_summary *from_ifs = func_sums->get (cs->caller);
3389 gcc_checking_assert (from_ifs && from_ifs->m_parameters);
3391 isra_call_summary *csum = call_sums->get (cs);
3392 gcc_checking_assert (csum);
3393 unsigned args_count = csum->m_arg_flow.length ();
3394 isra_func_summary *to_ifs = func_sums->get (callee);
3395 unsigned param_count
3396 = ((to_ifs && to_ifs->m_candidate && (availability >= AVAIL_AVAILABLE))
3397 ? vec_safe_length (to_ifs->m_parameters)
3398 : 0);
3400 if (dump_file && (dump_flags & TDF_DETAILS))
3401 fprintf (dump_file, "Splitting across %s->%s:\n",
3402 cs->caller->dump_name (), callee->dump_name ());
3404 unsigned i;
3405 for (i = 0; (i < args_count) && (i < param_count); i++)
3407 isra_param_desc *arg_desc = &(*to_ifs->m_parameters)[i];
3408 isra_param_flow *ipf = &csum->m_arg_flow[i];
3410 if (arg_desc->locally_unused)
3412 if (dump_file && (dump_flags & TDF_DETAILS))
3413 fprintf (dump_file, " ->%u: unused in callee\n", i);
3414 ipf->pointer_pass_through = false;
3415 continue;
3418 if (ipf->pointer_pass_through)
3420 int idx = get_single_param_flow_source (ipf);
3421 isra_param_desc *param_desc = &(*from_ifs->m_parameters)[idx];
3422 if (!param_desc->split_candidate)
3423 continue;
3424 gcc_assert (param_desc->by_ref);
3426 if (!arg_desc->split_candidate || !arg_desc->by_ref)
3428 if (dump_file && (dump_flags & TDF_DETAILS))
3429 fprintf (dump_file, " %u->%u: not candidate or not by "
3430 "reference in callee\n", idx, i);
3431 param_desc->split_candidate = false;
3432 ipf->pointer_pass_through = false;
3433 res = true;
3435 else if (!ipf->safe_to_import_accesses)
3437 if (!csum->m_before_any_store
3438 || !all_callee_accesses_present_p (param_desc, arg_desc))
3440 if (dump_file && (dump_flags & TDF_DETAILS))
3441 fprintf (dump_file, " %u->%u: cannot import accesses.\n",
3442 idx, i);
3443 param_desc->split_candidate = false;
3444 ipf->pointer_pass_through = false;
3445 res = true;
3448 else
3450 if (dump_file && (dump_flags & TDF_DETAILS))
3451 fprintf (dump_file, " %u->%u: verified callee accesses "
3452 "present.\n", idx, i);
3453 if (cross_scc)
3454 ipf->pointer_pass_through = false;
3457 else
3459 const char *pull_failure
3460 = pull_accesses_from_callee (cs->caller, param_desc, arg_desc,
3461 0, 0, &res);
3462 if (pull_failure)
3464 if (dump_file && (dump_flags & TDF_DETAILS))
3465 fprintf (dump_file, " %u->%u: by_ref access pull "
3466 "failed: %s.\n", idx, i, pull_failure);
3467 param_desc->split_candidate = false;
3468 ipf->pointer_pass_through = false;
3469 res = true;
3471 else
3473 if (dump_file && (dump_flags & TDF_DETAILS))
3474 fprintf (dump_file, " %u->%u: by_ref access pull "
3475 "succeeded.\n", idx, i);
3476 if (cross_scc)
3477 ipf->pointer_pass_through = false;
3481 else if (ipf->aggregate_pass_through)
3483 int idx = get_single_param_flow_source (ipf);
3484 isra_param_desc *param_desc = &(*from_ifs->m_parameters)[idx];
3485 if (!param_desc->split_candidate)
3486 continue;
3487 gcc_assert (!param_desc->by_ref);
3488 param_access *pacc = find_param_access (param_desc, ipf->unit_offset,
3489 ipf->unit_size);
3490 gcc_checking_assert (pacc);
3492 if (pacc->certain)
3494 if (dump_file && (dump_flags & TDF_DETAILS))
3495 fprintf (dump_file, " %u->%u: already certain\n", idx, i);
3496 ipf->aggregate_pass_through = false;
3498 else if (!arg_desc->split_candidate || arg_desc->by_ref)
3500 if (dump_file && (dump_flags & TDF_DETAILS))
3501 fprintf (dump_file, " %u->%u: not candidate or by "
3502 "reference in callee\n", idx, i);
3504 pacc->certain = true;
3505 if (overlapping_certain_accesses_p (param_desc, NULL))
3507 if (dump_file && (dump_flags & TDF_DETAILS))
3508 fprintf (dump_file, " ...leading to overlap, "
3509 " disqualifying candidate parameter %u\n",
3510 idx);
3511 param_desc->split_candidate = false;
3513 else
3514 bump_reached_size (param_desc, pacc->unit_size, idx);
3516 ipf->aggregate_pass_through = false;
3517 res = true;
3519 else
3521 const char *pull_failure
3522 = pull_accesses_from_callee (cs->caller, param_desc, arg_desc,
3523 ipf->unit_offset,
3524 ipf->unit_size, &res);
3525 if (pull_failure)
3527 if (dump_file && (dump_flags & TDF_DETAILS))
3528 fprintf (dump_file, " %u->%u: arg access pull "
3529 "failed: %s.\n", idx, i, pull_failure);
3531 ipf->aggregate_pass_through = false;
3532 pacc->certain = true;
3534 if (overlapping_certain_accesses_p (param_desc, NULL))
3536 if (dump_file && (dump_flags & TDF_DETAILS))
3537 fprintf (dump_file, " ...leading to overlap, "
3538 " disqualifying candidate parameter %u\n",
3539 idx);
3540 param_desc->split_candidate = false;
3542 else
3543 bump_reached_size (param_desc, pacc->unit_size, idx);
3545 res = true;
3547 else
3549 if (dump_file && (dump_flags & TDF_DETAILS))
3550 fprintf (dump_file, " %u->%u: arg access pull "
3551 "succeeded.\n", idx, i);
3552 if (cross_scc)
3553 ipf->aggregate_pass_through = false;
3559 /* Handle argument-parameter count mismatches. */
3560 for (; (i < args_count); i++)
3562 isra_param_flow *ipf = &csum->m_arg_flow[i];
3564 if (ipf->pointer_pass_through || ipf->aggregate_pass_through)
3566 int idx = get_single_param_flow_source (ipf);
3567 ipf->pointer_pass_through = false;
3568 ipf->aggregate_pass_through = false;
3569 isra_param_desc *param_desc = &(*from_ifs->m_parameters)[idx];
3570 if (!param_desc->split_candidate)
3571 continue;
3573 if (dump_file && (dump_flags & TDF_DETAILS))
3574 fprintf (dump_file, " %u->%u: no corresponding formal parameter\n",
3575 idx, i);
3576 param_desc->split_candidate = false;
3577 res = true;
3580 return res;
3583 /* Worker for call_for_symbol_and_aliases, look at all callers and if all their
3584 callers ignore the return value, or come from the same SCC and use the
3585 return value only to compute their return value, return false, otherwise
3586 return true. */
3588 static bool
3589 retval_used_p (cgraph_node *node, void *)
3591 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
3593 isra_call_summary *csum = call_sums->get (cs);
3594 gcc_checking_assert (csum);
3595 if (csum->m_return_ignored)
3596 continue;
3597 if (!csum->m_return_returned)
3598 return true;
3600 isra_func_summary *from_ifs = func_sums->get (cs->caller);
3601 if (!from_ifs || !from_ifs->m_candidate)
3602 return true;
3604 if (!ipa_edge_within_scc (cs)
3605 && !from_ifs->m_return_ignored)
3606 return true;
3609 return false;
3612 /* Push into NEW_PARAMS all required parameter adjustment entries to copy or
3613 modify parameter which originally had index BASE_INDEX, in the adjustment
3614 vector of parent clone (if any) had PREV_CLONE_INDEX and was described by
3615 PREV_ADJUSTMENT. If the parent clone is the original function,
3616 PREV_ADJUSTMENT is NULL and PREV_CLONE_INDEX is equal to BASE_INDEX. */
3619 static void
3620 push_param_adjustments_for_index (isra_func_summary *ifs, unsigned base_index,
3621 unsigned prev_clone_index,
3622 ipa_adjusted_param *prev_adjustment,
3623 vec<ipa_adjusted_param, va_gc> **new_params)
3625 isra_param_desc *desc = &(*ifs->m_parameters)[base_index];
3626 if (desc->locally_unused)
3628 if (dump_file)
3629 fprintf (dump_file, " Will remove parameter %u\n", base_index);
3630 return;
3633 if (!desc->split_candidate)
3635 ipa_adjusted_param adj;
3636 if (prev_adjustment)
3638 adj = *prev_adjustment;
3639 adj.prev_clone_adjustment = true;
3640 adj.prev_clone_index = prev_clone_index;
3642 else
3644 memset (&adj, 0, sizeof (adj));
3645 adj.op = IPA_PARAM_OP_COPY;
3646 adj.base_index = base_index;
3647 adj.prev_clone_index = prev_clone_index;
3649 vec_safe_push ((*new_params), adj);
3650 return;
3653 if (dump_file)
3654 fprintf (dump_file, " Will split parameter %u\n", base_index);
3656 gcc_assert (!prev_adjustment || prev_adjustment->op == IPA_PARAM_OP_COPY);
3657 unsigned aclen = vec_safe_length (desc->accesses);
3658 for (unsigned j = 0; j < aclen; j++)
3660 param_access *pa = (*desc->accesses)[j];
3661 if (!pa->certain)
3662 continue;
3663 if (dump_file)
3664 fprintf (dump_file, " - component at byte offset %u, "
3665 "size %u\n", pa->unit_offset, pa->unit_size);
3667 ipa_adjusted_param adj;
3668 memset (&adj, 0, sizeof (adj));
3669 adj.op = IPA_PARAM_OP_SPLIT;
3670 adj.base_index = base_index;
3671 adj.prev_clone_index = prev_clone_index;
3672 adj.param_prefix_index = IPA_PARAM_PREFIX_ISRA;
3673 adj.reverse = pa->reverse;
3674 adj.type = pa->type;
3675 adj.alias_ptr_type = pa->alias_ptr_type;
3676 adj.unit_offset = pa->unit_offset;
3677 vec_safe_push ((*new_params), adj);
3681 /* Worker for all call_for_symbol_thunks_and_aliases. Set calls_comdat_local
3682 flag of all callers of NODE. */
3684 static bool
3685 mark_callers_calls_comdat_local (struct cgraph_node *node, void *)
3687 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
3688 cs->caller->calls_comdat_local = true;
3689 return false;
3693 /* Do final processing of results of IPA propagation regarding NODE, clone it
3694 if appropriate. */
3696 static void
3697 process_isra_node_results (cgraph_node *node,
3698 hash_map<const char *, unsigned> *clone_num_suffixes)
3700 isra_func_summary *ifs = func_sums->get (node);
3701 if (!ifs || !ifs->m_candidate)
3702 return;
3704 auto_vec<bool, 16> surviving_params;
3705 bool check_surviving = false;
3706 clone_info *cinfo = clone_info::get (node);
3707 if (cinfo && cinfo->param_adjustments)
3709 check_surviving = true;
3710 cinfo->param_adjustments->get_surviving_params (&surviving_params);
3713 unsigned param_count = vec_safe_length (ifs->m_parameters);
3714 bool will_change_function = false;
3715 if (ifs->m_returns_value && ifs->m_return_ignored)
3716 will_change_function = true;
3717 else
3718 for (unsigned i = 0; i < param_count; i++)
3720 isra_param_desc *desc = &(*ifs->m_parameters)[i];
3721 if ((desc->locally_unused || desc->split_candidate)
3722 /* Make sure we do not clone just to attempt to remove an already
3723 removed unused argument. */
3724 && (!check_surviving
3725 || (i < surviving_params.length ()
3726 && surviving_params[i])))
3728 will_change_function = true;
3729 break;
3732 if (!will_change_function)
3733 return;
3735 if (dump_file)
3737 fprintf (dump_file, "\nEvaluating analysis results for %s\n",
3738 node->dump_name ());
3739 if (ifs->m_returns_value && ifs->m_return_ignored)
3740 fprintf (dump_file, " Will remove return value.\n");
3743 vec<ipa_adjusted_param, va_gc> *new_params = NULL;
3744 if (ipa_param_adjustments *old_adjustments
3745 = cinfo ? cinfo->param_adjustments : NULL)
3747 unsigned old_adj_len = vec_safe_length (old_adjustments->m_adj_params);
3748 for (unsigned i = 0; i < old_adj_len; i++)
3750 ipa_adjusted_param *old_adj = &(*old_adjustments->m_adj_params)[i];
3751 push_param_adjustments_for_index (ifs, old_adj->base_index, i,
3752 old_adj, &new_params);
3755 else
3756 for (unsigned i = 0; i < param_count; i++)
3757 push_param_adjustments_for_index (ifs, i, i, NULL, &new_params);
3759 ipa_param_adjustments *new_adjustments
3760 = (new (ggc_alloc <ipa_param_adjustments> ())
3761 ipa_param_adjustments (new_params, param_count,
3762 ifs->m_returns_value && ifs->m_return_ignored));
3764 if (dump_file && (dump_flags & TDF_DETAILS))
3766 fprintf (dump_file, "\n Created adjustments:\n");
3767 new_adjustments->dump (dump_file);
3770 unsigned &suffix_counter = clone_num_suffixes->get_or_insert (
3771 IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (
3772 node->decl)));
3773 auto_vec<cgraph_edge *> callers = node->collect_callers ();
3774 cgraph_node *new_node
3775 = node->create_virtual_clone (callers, NULL, new_adjustments, "isra",
3776 suffix_counter);
3777 suffix_counter++;
3778 if (node->calls_comdat_local && node->same_comdat_group)
3780 new_node->add_to_same_comdat_group (node);
3781 new_node->call_for_symbol_and_aliases (mark_callers_calls_comdat_local,
3782 NULL, true);
3784 new_node->calls_comdat_local = node->calls_comdat_local;
3786 if (dump_file)
3787 fprintf (dump_file, " Created new node %s\n", new_node->dump_name ());
3788 callers.release ();
3791 /* Check which parameters of NODE described by IFS have survived until IPA-SRA
3792 and disable transformations for those which have not or which should not
3793 transformed because the associated debug counter reached its limit. Return
3794 true if none survived or if there were no candidates to begin with. */
3796 static bool
3797 disable_unavailable_parameters (cgraph_node *node, isra_func_summary *ifs)
3799 bool ret = true;
3800 unsigned len = vec_safe_length (ifs->m_parameters);
3801 if (!len)
3802 return true;
3804 auto_vec<bool, 16> surviving_params;
3805 bool check_surviving = false;
3806 clone_info *cinfo = clone_info::get (node);
3807 if (cinfo && cinfo->param_adjustments)
3809 check_surviving = true;
3810 cinfo->param_adjustments->get_surviving_params (&surviving_params);
3812 bool dumped_first = false;
3813 for (unsigned i = 0; i < len; i++)
3815 isra_param_desc *desc = &(*ifs->m_parameters)[i];
3816 if (!dbg_cnt (ipa_sra_params))
3818 desc->locally_unused = false;
3819 desc->split_candidate = false;
3821 else if (check_surviving
3822 && (i >= surviving_params.length ()
3823 || !surviving_params[i]))
3825 /* Even if the parameter was removed by a previous IPA pass, we do
3826 not clear locally_unused because if it really is unused, this
3827 information might be useful in callers. */
3828 desc->split_candidate = false;
3830 if (dump_file && (dump_flags & TDF_DETAILS))
3832 if (!dumped_first)
3834 fprintf (dump_file,
3835 "The following parameters of %s are dead on "
3836 "arrival:", node->dump_name ());
3837 dumped_first = true;
3839 fprintf (dump_file, " %u", i);
3842 else if (desc->locally_unused || desc->split_candidate)
3843 ret = false;
3846 if (dumped_first)
3847 fprintf (dump_file, "\n");
3849 return ret;
3853 /* Run the interprocedural part of IPA-SRA. */
3855 static unsigned int
3856 ipa_sra_analysis (void)
3858 if (dump_file)
3860 fprintf (dump_file, "\n========== IPA-SRA IPA stage ==========\n");
3861 ipa_sra_dump_all_summaries (dump_file);
3864 gcc_checking_assert (func_sums);
3865 gcc_checking_assert (call_sums);
3866 cgraph_node **order = XCNEWVEC (cgraph_node *, symtab->cgraph_count);
3867 auto_vec <cgraph_node *, 16> stack;
3868 int node_scc_count = ipa_reduced_postorder (order, true, NULL);
3870 /* One sweep from callees to callers for parameter removal and splitting. */
3871 for (int i = 0; i < node_scc_count; i++)
3873 cgraph_node *scc_rep = order[i];
3874 vec<cgraph_node *> cycle_nodes = ipa_get_nodes_in_cycle (scc_rep);
3875 unsigned j;
3877 /* Preliminary IPA function level checks and first step of parameter
3878 removal. */
3879 cgraph_node *v;
3880 FOR_EACH_VEC_ELT (cycle_nodes, j, v)
3882 isra_func_summary *ifs = func_sums->get (v);
3883 if (!ifs || !ifs->m_candidate)
3884 continue;
3885 if (!ipa_sra_ipa_function_checks (v)
3886 || check_all_callers_for_issues (v))
3888 ifs->zap ();
3889 continue;
3891 if (disable_unavailable_parameters (v, ifs))
3892 continue;
3893 for (cgraph_edge *cs = v->indirect_calls; cs; cs = cs->next_callee)
3894 process_edge_to_unknown_caller (cs);
3895 for (cgraph_edge *cs = v->callees; cs; cs = cs->next_callee)
3896 if (!ipa_edge_within_scc (cs))
3897 param_removal_cross_scc_edge (cs);
3900 /* Look at edges within the current SCC and propagate used-ness across
3901 them, pushing onto the stack all notes which might need to be
3902 revisited. */
3903 FOR_EACH_VEC_ELT (cycle_nodes, j, v)
3904 v->call_for_symbol_thunks_and_aliases (propagate_used_to_scc_callers,
3905 &stack, true);
3907 /* Keep revisiting and pushing until nothing changes. */
3908 while (!stack.is_empty ())
3910 cgraph_node *v = stack.pop ();
3911 isra_func_summary *ifs = func_sums->get (v);
3912 gcc_checking_assert (ifs && ifs->m_queued);
3913 ifs->m_queued = false;
3915 v->call_for_symbol_thunks_and_aliases (propagate_used_to_scc_callers,
3916 &stack, true);
3919 /* Parameter splitting. */
3920 bool repeat_scc_access_propagation;
3923 repeat_scc_access_propagation = false;
3924 FOR_EACH_VEC_ELT (cycle_nodes, j, v)
3926 isra_func_summary *ifs = func_sums->get (v);
3927 if (!ifs
3928 || !ifs->m_candidate
3929 || vec_safe_is_empty (ifs->m_parameters))
3930 continue;
3931 for (cgraph_edge *cs = v->callees; cs; cs = cs->next_callee)
3932 if (param_splitting_across_edge (cs))
3933 repeat_scc_access_propagation = true;
3936 while (repeat_scc_access_propagation);
3938 if (flag_checking)
3939 FOR_EACH_VEC_ELT (cycle_nodes, j, v)
3940 verify_splitting_accesses (v, true);
3942 cycle_nodes.release ();
3945 /* One sweep from caller to callees for result removal. */
3946 for (int i = node_scc_count - 1; i >= 0 ; i--)
3948 cgraph_node *scc_rep = order[i];
3949 vec<cgraph_node *> cycle_nodes = ipa_get_nodes_in_cycle (scc_rep);
3950 unsigned j;
3952 cgraph_node *v;
3953 FOR_EACH_VEC_ELT (cycle_nodes, j, v)
3955 isra_func_summary *ifs = func_sums->get (v);
3956 if (!ifs || !ifs->m_candidate)
3957 continue;
3959 bool return_needed
3960 = (ifs->m_returns_value
3961 && (!dbg_cnt (ipa_sra_retvalues)
3962 || v->call_for_symbol_and_aliases (retval_used_p,
3963 NULL, true)));
3964 ifs->m_return_ignored = !return_needed;
3965 if (return_needed)
3966 isra_push_node_to_stack (v, ifs, &stack);
3969 while (!stack.is_empty ())
3971 cgraph_node *node = stack.pop ();
3972 isra_func_summary *ifs = func_sums->get (node);
3973 gcc_checking_assert (ifs && ifs->m_queued);
3974 ifs->m_queued = false;
3976 for (cgraph_edge *cs = node->callees; cs; cs = cs->next_callee)
3977 if (ipa_edge_within_scc (cs)
3978 && call_sums->get (cs)->m_return_returned)
3980 enum availability av;
3981 cgraph_node *callee = cs->callee->function_symbol (&av);
3982 isra_func_summary *to_ifs = func_sums->get (callee);
3983 if (to_ifs && to_ifs->m_return_ignored)
3985 to_ifs->m_return_ignored = false;
3986 isra_push_node_to_stack (callee, to_ifs, &stack);
3990 cycle_nodes.release ();
3993 ipa_free_postorder_info ();
3994 free (order);
3996 if (dump_file)
3998 if (dump_flags & TDF_DETAILS)
4000 fprintf (dump_file, "\n========== IPA-SRA propagation final state "
4001 " ==========\n");
4002 ipa_sra_dump_all_summaries (dump_file);
4004 fprintf (dump_file, "\n========== IPA-SRA decisions ==========\n");
4007 hash_map<const char *, unsigned> *clone_num_suffixes
4008 = new hash_map<const char *, unsigned>;
4010 cgraph_node *node;
4011 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
4012 process_isra_node_results (node, clone_num_suffixes);
4014 delete clone_num_suffixes;
4015 ggc_delete (func_sums);
4016 func_sums = NULL;
4017 delete call_sums;
4018 call_sums = NULL;
4020 if (dump_file)
4021 fprintf (dump_file, "\n========== IPA SRA IPA analysis done "
4022 "==========\n\n");
4023 return 0;
4027 const pass_data pass_data_ipa_sra =
4029 IPA_PASS, /* type */
4030 "sra", /* name */
4031 OPTGROUP_NONE, /* optinfo_flags */
4032 TV_IPA_SRA, /* tv_id */
4033 0, /* properties_required */
4034 0, /* properties_provided */
4035 0, /* properties_destroyed */
4036 0, /* todo_flags_start */
4037 ( TODO_dump_symtab | TODO_remove_functions ), /* todo_flags_finish */
4040 class pass_ipa_sra : public ipa_opt_pass_d
4042 public:
4043 pass_ipa_sra (gcc::context *ctxt)
4044 : ipa_opt_pass_d (pass_data_ipa_sra, ctxt,
4045 ipa_sra_generate_summary, /* generate_summary */
4046 ipa_sra_write_summary, /* write_summary */
4047 ipa_sra_read_summary, /* read_summary */
4048 NULL , /* write_optimization_summary */
4049 NULL, /* read_optimization_summary */
4050 NULL, /* stmt_fixup */
4051 0, /* function_transform_todo_flags_start */
4052 NULL, /* function_transform */
4053 NULL) /* variable_transform */
4056 /* opt_pass methods: */
4057 virtual bool gate (function *)
4059 /* TODO: We should remove the optimize check after we ensure we never run
4060 IPA passes when not optimizing. */
4061 return (flag_ipa_sra && optimize);
4064 virtual unsigned int execute (function *) { return ipa_sra_analysis (); }
4066 }; // class pass_ipa_sra
4068 } // anon namespace
4070 /* Intraprocedural part of IPA-SRA analysis. Scan function body of NODE and
4071 create a summary structure describing IPA-SRA opportunities and constraints
4072 in it. */
4074 static void
4075 ipa_sra_summarize_function (cgraph_node *node)
4077 if (dump_file)
4078 fprintf (dump_file, "Creating summary for %s/%i:\n", node->name (),
4079 node->order);
4080 if (!ipa_sra_preliminary_function_checks (node))
4082 isra_analyze_all_outgoing_calls (node);
4083 return;
4085 gcc_obstack_init (&gensum_obstack);
4086 isra_func_summary *ifs = func_sums->get_create (node);
4087 ifs->m_candidate = true;
4088 tree ret = TREE_TYPE (TREE_TYPE (node->decl));
4089 ifs->m_returns_value = (TREE_CODE (ret) != VOID_TYPE);
4091 decl2desc = new hash_map<tree, gensum_param_desc *>;
4092 unsigned count = 0;
4093 for (tree parm = DECL_ARGUMENTS (node->decl); parm; parm = DECL_CHAIN (parm))
4094 count++;
4096 if (count > 0)
4098 auto_vec<gensum_param_desc, 16> param_descriptions (count);
4099 param_descriptions.reserve_exact (count);
4100 param_descriptions.quick_grow_cleared (count);
4102 bool cfun_pushed = false;
4103 struct function *fun = DECL_STRUCT_FUNCTION (node->decl);
4104 if (create_parameter_descriptors (node, &param_descriptions))
4106 push_cfun (fun);
4107 cfun_pushed = true;
4108 final_bbs = BITMAP_ALLOC (NULL);
4109 bb_dereferences = XCNEWVEC (HOST_WIDE_INT,
4110 by_ref_count
4111 * last_basic_block_for_fn (fun));
4112 aa_walking_limit = opt_for_fn (node->decl, param_ipa_max_aa_steps);
4113 scan_function (node, fun);
4115 if (dump_file)
4117 dump_gensum_param_descriptors (dump_file, node->decl,
4118 &param_descriptions);
4119 fprintf (dump_file, "----------------------------------------\n");
4122 process_scan_results (node, fun, ifs, &param_descriptions);
4124 if (cfun_pushed)
4125 pop_cfun ();
4126 if (bb_dereferences)
4128 free (bb_dereferences);
4129 bb_dereferences = NULL;
4130 BITMAP_FREE (final_bbs);
4131 final_bbs = NULL;
4134 isra_analyze_all_outgoing_calls (node);
4136 delete decl2desc;
4137 decl2desc = NULL;
4138 obstack_free (&gensum_obstack, NULL);
4139 if (dump_file)
4140 fprintf (dump_file, "\n\n");
4141 if (flag_checking)
4142 verify_splitting_accesses (node, false);
4143 return;
4146 ipa_opt_pass_d *
4147 make_pass_ipa_sra (gcc::context *ctxt)
4149 return new pass_ipa_sra (ctxt);
4153 #include "gt-ipa-sra.h"