Skip gcc.dg/guality/example.c on hppa-linux.
[official-gcc.git] / gcc / ipa-sra.c
blob12ccd04955200a3de8d84de13bc1d5890df9fc59
1 /* Interprocedural scalar replacement of aggregates
2 Copyright (C) 2008-2021 Free Software Foundation, Inc.
4 Contributed by Martin Jambor <mjambor@suse.cz>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 /* IPA-SRA is an interprocedural pass that removes unused function return
23 values (turning functions returning a value which is never used into void
24 functions), removes unused function parameters. It can also replace an
25 aggregate parameter by a set of other parameters representing part of the
26 original, turning those passed by reference into new ones which pass the
27 value directly.
29 The pass is a true IPA one, which means that it works in three stages in
30 order to be able to take advantage of LTO. First, summaries about functions
31 and each calls are generated. Function summaries (often called call graph
32 node summaries) contain mainly information about which parameters are
33 potential transformation candidates and which bits of candidates are
34 accessed. We differentiate between accesses done as a part of a call
35 statement (which might be not necessary if the callee is also transformed)
36 and others (which are mandatory). Call summaries (often called call graph
37 edge summaries) contain information about which function formal parameters
38 feed into which actual call arguments so that if two parameters are only
39 used in a sum which is then passed to another function which then however
40 does not use this parameter, all three parameters of the two functions can
41 be eliminated. Edge summaries also have flags whether the return value is
42 used or if it is only returned in the caller too. In LTO mode these
43 summaries are then streamed to the object file in the compilation phase and
44 streamed back in in the WPA analysis stage.
46 The interprocedural analysis phase traverses the graph in topological order
47 in two sweeps, one in each direction. First, from callees to callers for
48 parameter removal and splitting. Each strongly-connected component is
49 processed iteratively until the situation in it stabilizes. The pass from
50 callers to callees is then carried out to remove unused return values in a
51 very similar fashion.
53 Because parameter manipulation has big implications for call redirection
54 which is done only after all call graph nodes materialize, the
55 transformation phase is not part of this patch but is carried out by the
56 clone materialization and edge redirection itself, see comments in
57 ipa-param-manipulation.h for more details. */
61 #include "config.h"
62 #include "system.h"
63 #include "coretypes.h"
64 #include "backend.h"
65 #include "tree.h"
66 #include "gimple.h"
67 #include "predict.h"
68 #include "tree-pass.h"
69 #include "ssa.h"
70 #include "cgraph.h"
71 #include "gimple-pretty-print.h"
72 #include "alias.h"
73 #include "tree-eh.h"
74 #include "gimple-iterator.h"
75 #include "gimple-walk.h"
76 #include "tree-dfa.h"
77 #include "tree-sra.h"
78 #include "alloc-pool.h"
79 #include "symbol-summary.h"
80 #include "dbgcnt.h"
81 #include "tree-inline.h"
82 #include "ipa-utils.h"
83 #include "builtins.h"
84 #include "cfganal.h"
85 #include "tree-streamer.h"
86 #include "internal-fn.h"
87 #include "symtab-clones.h"
88 #include "attribs.h"
90 static void ipa_sra_summarize_function (cgraph_node *);
92 /* Bits used to track size of an aggregate in bytes interprocedurally. */
93 #define ISRA_ARG_SIZE_LIMIT_BITS 16
94 #define ISRA_ARG_SIZE_LIMIT (1 << ISRA_ARG_SIZE_LIMIT_BITS)
95 /* How many parameters can feed into a call actual argument and still be
96 tracked. */
97 #define IPA_SRA_MAX_PARAM_FLOW_LEN 7
99 /* Structure describing accesses to a specific portion of an aggregate
100 parameter, as given by the offset and size. Any smaller accesses that occur
101 within a function that fall within another access form a tree. The pass
102 cannot analyze parameters with only partially overlapping accesses. */
104 struct GTY(()) param_access
106 /* Type that a potential replacement should have. This field only has
107 meaning in the summary building and transformation phases, when it is
108 reconstructed from the body. Must not be touched in IPA analysis
109 stage. */
110 tree type;
112 /* Alias reference type to be used in MEM_REFs when adjusting caller
113 arguments. */
114 tree alias_ptr_type;
116 /* Values returned by get_ref_base_and_extent but converted to bytes and
117 stored as unsigned ints. */
118 unsigned unit_offset;
119 unsigned unit_size : ISRA_ARG_SIZE_LIMIT_BITS;
121 /* Set once we are sure that the access will really end up in a potentially
122 transformed function - initially not set for portions of formal parameters
123 that are only used as actual function arguments passed to callees. */
124 unsigned certain : 1;
125 /* Set if the access has a reversed scalar storage order. */
126 unsigned reverse : 1;
129 /* This structure has the same purpose as the one above and additionally it
130 contains some fields that are only necessary in the summary generation
131 phase. */
133 struct gensum_param_access
135 /* Values returned by get_ref_base_and_extent. */
136 HOST_WIDE_INT offset;
137 HOST_WIDE_INT size;
139 /* if this access has any children (in terms of the definition above), this
140 points to the first one. */
141 struct gensum_param_access *first_child;
142 /* In intraprocedural SRA, pointer to the next sibling in the access tree as
143 described above. */
144 struct gensum_param_access *next_sibling;
146 /* Type that a potential replacement should have. This field only has
147 meaning in the summary building and transformation phases, when it is
148 reconstructed from the body. Must not be touched in IPA analysis
149 stage. */
150 tree type;
151 /* Alias reference type to be used in MEM_REFs when adjusting caller
152 arguments. */
153 tree alias_ptr_type;
155 /* Have there been writes to or reads from this exact location except for as
156 arguments to a function call that can be tracked. */
157 bool nonarg;
159 /* Set if the access has a reversed scalar storage order. */
160 bool reverse;
163 /* Summary describing a parameter in the IPA stages. */
165 struct GTY(()) isra_param_desc
167 /* List of access representatives to the parameters, sorted according to
168 their offset. */
169 vec <param_access *, va_gc> *accesses;
171 /* Unit size limit of total size of all replacements. */
172 unsigned param_size_limit : ISRA_ARG_SIZE_LIMIT_BITS;
173 /* Sum of unit sizes of all certain replacements. */
174 unsigned size_reached : ISRA_ARG_SIZE_LIMIT_BITS;
176 /* A parameter that is used only in call arguments and can be removed if all
177 concerned actual arguments are removed. */
178 unsigned locally_unused : 1;
179 /* An aggregate that is a candidate for breaking up or complete removal. */
180 unsigned split_candidate : 1;
181 /* Is this a parameter passing stuff by reference? */
182 unsigned by_ref : 1;
185 /* Structure used when generating summaries that describes a parameter. */
187 struct gensum_param_desc
189 /* Roots of param_accesses. */
190 gensum_param_access *accesses;
191 /* Number of accesses in the access tree rooted in field accesses. */
192 unsigned access_count;
194 /* If the below is non-zero, this is the number of uses as actual arguments. */
195 int call_uses;
196 /* Number of times this parameter has been directly passed to. */
197 unsigned ptr_pt_count;
199 /* Size limit of total size of all replacements. */
200 unsigned param_size_limit;
201 /* Sum of sizes of nonarg accesses. */
202 unsigned nonarg_acc_size;
204 /* A parameter that is used only in call arguments and can be removed if all
205 concerned actual arguments are removed. */
206 bool locally_unused;
207 /* An aggregate that is a candidate for breaking up or a pointer passing data
208 by reference that is a candidate for being converted to a set of
209 parameters passing those data by value. */
210 bool split_candidate;
211 /* Is this a parameter passing stuff by reference? */
212 bool by_ref;
214 /* The number of this parameter as they are ordered in function decl. */
215 int param_number;
216 /* For parameters passing data by reference, this is parameter index to
217 compute indices to bb_dereferences. */
218 int deref_index;
221 /* Properly deallocate accesses of DESC. TODO: Since this data structure is
222 not in GC memory, this is not necessary and we can consider removing the
223 function. */
225 static void
226 free_param_decl_accesses (isra_param_desc *desc)
228 unsigned len = vec_safe_length (desc->accesses);
229 for (unsigned i = 0; i < len; ++i)
230 ggc_free ((*desc->accesses)[i]);
231 vec_free (desc->accesses);
234 /* Class used to convey information about functions from the
235 intra-procedural analysis stage to inter-procedural one. */
237 class GTY((for_user)) isra_func_summary
239 public:
240 /* initialize the object. */
242 isra_func_summary ()
243 : m_parameters (NULL), m_candidate (false), m_returns_value (false),
244 m_return_ignored (false), m_queued (false)
247 /* Destroy m_parameters. */
249 ~isra_func_summary ();
251 /* Mark the function as not a candidate for any IPA-SRA transformation.
252 Return true if it was a candidate until now. */
254 bool zap ();
256 /* Vector of parameter descriptors corresponding to the function being
257 analyzed. */
258 vec<isra_param_desc, va_gc> *m_parameters;
260 /* Whether the node is even a candidate for any IPA-SRA transformation at
261 all. */
262 unsigned m_candidate : 1;
264 /* Whether the original function returns any value. */
265 unsigned m_returns_value : 1;
267 /* Set to true if all call statements do not actually use the returned
268 value. */
270 unsigned m_return_ignored : 1;
272 /* Whether the node is already queued in IPA SRA stack during processing of
273 call graphs SCCs. */
275 unsigned m_queued : 1;
278 /* Clean up and deallocate isra_func_summary points to. TODO: Since this data
279 structure is not in GC memory, this is not necessary and we can consider
280 removing the destructor. */
282 isra_func_summary::~isra_func_summary ()
284 unsigned len = vec_safe_length (m_parameters);
285 for (unsigned i = 0; i < len; ++i)
286 free_param_decl_accesses (&(*m_parameters)[i]);
287 vec_free (m_parameters);
291 /* Mark the function as not a candidate for any IPA-SRA transformation. Return
292 true if it was a candidate until now. */
294 bool
295 isra_func_summary::zap ()
297 bool ret = m_candidate;
298 m_candidate = false;
300 unsigned len = vec_safe_length (m_parameters);
301 for (unsigned i = 0; i < len; ++i)
302 free_param_decl_accesses (&(*m_parameters)[i]);
303 vec_free (m_parameters);
305 return ret;
308 /* Structure to describe which formal parameters feed into a particular actual
309 arguments. */
311 struct isra_param_flow
313 /* Number of elements in array inputs that contain valid data. */
314 char length;
315 /* Indices of formal parameters that feed into the described actual argument.
316 If aggregate_pass_through or pointer_pass_through below are true, it must
317 contain exactly one element which is passed through from a formal
318 parameter if the given number. Otherwise, the array contains indices of
319 callee's formal parameters which are used to calculate value of this
320 actual argument. */
321 unsigned char inputs[IPA_SRA_MAX_PARAM_FLOW_LEN];
323 /* Offset within the formal parameter. */
324 unsigned unit_offset;
325 /* Size of the portion of the formal parameter that is being passed. */
326 unsigned unit_size : ISRA_ARG_SIZE_LIMIT_BITS;
328 /* True when the value of this actual copy is a portion of a formal
329 parameter. */
330 unsigned aggregate_pass_through : 1;
331 /* True when the value of this actual copy is a verbatim pass through of an
332 obtained pointer. */
333 unsigned pointer_pass_through : 1;
334 /* True when it is safe to copy access candidates here from the callee, which
335 would mean introducing dereferences into callers of the caller. */
336 unsigned safe_to_import_accesses : 1;
339 /* Structure used to convey information about calls from the intra-procedural
340 analysis stage to inter-procedural one. */
342 class isra_call_summary
344 public:
345 isra_call_summary ()
346 : m_arg_flow (), m_return_ignored (false), m_return_returned (false),
347 m_bit_aligned_arg (false), m_before_any_store (false)
350 void init_inputs (unsigned arg_count);
351 void dump (FILE *f);
353 /* Information about what formal parameters of the caller are used to compute
354 individual actual arguments of this call. */
355 auto_vec <isra_param_flow> m_arg_flow;
357 /* Set to true if the call statement does not have a LHS. */
358 unsigned m_return_ignored : 1;
360 /* Set to true if the LHS of call statement is only used to construct the
361 return value of the caller. */
362 unsigned m_return_returned : 1;
364 /* Set when any of the call arguments are not byte-aligned. */
365 unsigned m_bit_aligned_arg : 1;
367 /* Set to true if the call happend before any (other) store to memory in the
368 caller. */
369 unsigned m_before_any_store : 1;
372 /* Class to manage function summaries. */
374 class GTY((user)) ipa_sra_function_summaries
375 : public function_summary <isra_func_summary *>
377 public:
378 ipa_sra_function_summaries (symbol_table *table, bool ggc):
379 function_summary<isra_func_summary *> (table, ggc) { }
381 virtual void duplicate (cgraph_node *, cgraph_node *,
382 isra_func_summary *old_sum,
383 isra_func_summary *new_sum);
384 virtual void insert (cgraph_node *, isra_func_summary *);
387 /* Hook that is called by summary when a node is duplicated. */
389 void
390 ipa_sra_function_summaries::duplicate (cgraph_node *, cgraph_node *,
391 isra_func_summary *old_sum,
392 isra_func_summary *new_sum)
394 /* TODO: Somehow stop copying when ISRA is doing the cloning, it is
395 useless. */
396 new_sum->m_candidate = old_sum->m_candidate;
397 new_sum->m_returns_value = old_sum->m_returns_value;
398 new_sum->m_return_ignored = old_sum->m_return_ignored;
399 gcc_assert (!old_sum->m_queued);
400 new_sum->m_queued = false;
402 unsigned param_count = vec_safe_length (old_sum->m_parameters);
403 if (!param_count)
404 return;
405 vec_safe_reserve_exact (new_sum->m_parameters, param_count);
406 new_sum->m_parameters->quick_grow_cleared (param_count);
407 for (unsigned i = 0; i < param_count; i++)
409 isra_param_desc *s = &(*old_sum->m_parameters)[i];
410 isra_param_desc *d = &(*new_sum->m_parameters)[i];
412 d->param_size_limit = s->param_size_limit;
413 d->size_reached = s->size_reached;
414 d->locally_unused = s->locally_unused;
415 d->split_candidate = s->split_candidate;
416 d->by_ref = s->by_ref;
418 unsigned acc_count = vec_safe_length (s->accesses);
419 vec_safe_reserve_exact (d->accesses, acc_count);
420 for (unsigned j = 0; j < acc_count; j++)
422 param_access *from = (*s->accesses)[j];
423 param_access *to = ggc_cleared_alloc<param_access> ();
424 to->type = from->type;
425 to->alias_ptr_type = from->alias_ptr_type;
426 to->unit_offset = from->unit_offset;
427 to->unit_size = from->unit_size;
428 to->certain = from->certain;
429 d->accesses->quick_push (to);
434 /* Pointer to the pass function summary holder. */
436 static GTY(()) ipa_sra_function_summaries *func_sums;
438 /* Hook that is called by summary when new node appears. */
440 void
441 ipa_sra_function_summaries::insert (cgraph_node *node, isra_func_summary *)
443 if (opt_for_fn (node->decl, flag_ipa_sra))
445 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
446 ipa_sra_summarize_function (node);
447 pop_cfun ();
449 else
450 func_sums->remove (node);
453 /* Class to manage call summaries. */
455 class ipa_sra_call_summaries: public call_summary <isra_call_summary *>
457 public:
458 ipa_sra_call_summaries (symbol_table *table):
459 call_summary<isra_call_summary *> (table) { }
461 /* Duplicate info when an edge is cloned. */
462 virtual void duplicate (cgraph_edge *, cgraph_edge *,
463 isra_call_summary *old_sum,
464 isra_call_summary *new_sum);
467 static ipa_sra_call_summaries *call_sums;
470 /* Initialize m_arg_flow of a particular instance of isra_call_summary.
471 ARG_COUNT is the number of actual arguments passed. */
473 void
474 isra_call_summary::init_inputs (unsigned arg_count)
476 if (arg_count == 0)
478 gcc_checking_assert (m_arg_flow.length () == 0);
479 return;
481 if (m_arg_flow.length () == 0)
483 m_arg_flow.reserve_exact (arg_count);
484 m_arg_flow.quick_grow_cleared (arg_count);
486 else
487 gcc_checking_assert (arg_count == m_arg_flow.length ());
490 /* Dump all information in call summary to F. */
492 void
493 isra_call_summary::dump (FILE *f)
495 if (m_return_ignored)
496 fprintf (f, " return value ignored\n");
497 if (m_return_returned)
498 fprintf (f, " return value used only to compute caller return value\n");
499 if (m_before_any_store)
500 fprintf (f, " happens before any store to memory\n");
501 for (unsigned i = 0; i < m_arg_flow.length (); i++)
503 fprintf (f, " Parameter %u:\n", i);
504 isra_param_flow *ipf = &m_arg_flow[i];
506 if (ipf->length)
508 bool first = true;
509 fprintf (f, " Scalar param sources: ");
510 for (int j = 0; j < ipf->length; j++)
512 if (!first)
513 fprintf (f, ", ");
514 else
515 first = false;
516 fprintf (f, "%i", (int) ipf->inputs[j]);
518 fprintf (f, "\n");
520 if (ipf->aggregate_pass_through)
521 fprintf (f, " Aggregate pass through from the param given above, "
522 "unit offset: %u , unit size: %u\n",
523 ipf->unit_offset, ipf->unit_size);
524 if (ipf->pointer_pass_through)
525 fprintf (f, " Pointer pass through from the param given above, "
526 "safe_to_import_accesses: %u\n", ipf->safe_to_import_accesses);
530 /* Duplicate edge summary when an edge is cloned. */
532 void
533 ipa_sra_call_summaries::duplicate (cgraph_edge *, cgraph_edge *,
534 isra_call_summary *old_sum,
535 isra_call_summary *new_sum)
537 unsigned arg_count = old_sum->m_arg_flow.length ();
538 new_sum->init_inputs (arg_count);
539 for (unsigned i = 0; i < arg_count; i++)
540 new_sum->m_arg_flow[i] = old_sum->m_arg_flow[i];
542 new_sum->m_return_ignored = old_sum->m_return_ignored;
543 new_sum->m_return_returned = old_sum->m_return_returned;
544 new_sum->m_bit_aligned_arg = old_sum->m_bit_aligned_arg;
545 new_sum->m_before_any_store = old_sum->m_before_any_store;
549 /* With all GTY stuff done, we can move to anonymous namespace. */
550 namespace {
551 /* Quick mapping from a decl to its param descriptor. */
553 hash_map<tree, gensum_param_desc *> *decl2desc;
555 /* Countdown of allowed Alias analysis steps during summary building. */
557 int aa_walking_limit;
559 /* This is a table in which for each basic block and parameter there is a
560 distance (offset + size) in that parameter which is dereferenced and
561 accessed in that BB. */
562 HOST_WIDE_INT *bb_dereferences = NULL;
563 /* How many by-reference parameters there are in the current function. */
564 int by_ref_count;
566 /* Bitmap of BBs that can cause the function to "stop" progressing by
567 returning, throwing externally, looping infinitely or calling a function
568 which might abort etc.. */
569 bitmap final_bbs;
571 /* Obstack to allocate various small structures required only when generating
572 summary for a function. */
573 struct obstack gensum_obstack;
575 /* Return false the function is apparently unsuitable for IPA-SRA based on it's
576 attributes, return true otherwise. NODE is the cgraph node of the current
577 function. */
579 static bool
580 ipa_sra_preliminary_function_checks (cgraph_node *node)
582 if (!node->can_change_signature)
584 if (dump_file)
585 fprintf (dump_file, "Function cannot change signature.\n");
586 return false;
589 if (!tree_versionable_function_p (node->decl))
591 if (dump_file)
592 fprintf (dump_file, "Function is not versionable.\n");
593 return false;
596 if (!opt_for_fn (node->decl, optimize)
597 || !opt_for_fn (node->decl, flag_ipa_sra))
599 if (dump_file)
600 fprintf (dump_file, "Not optimizing or IPA-SRA turned off for this "
601 "function.\n");
602 return false;
605 if (DECL_VIRTUAL_P (node->decl))
607 if (dump_file)
608 fprintf (dump_file, "Function is a virtual method.\n");
609 return false;
612 struct function *fun = DECL_STRUCT_FUNCTION (node->decl);
613 if (fun->stdarg)
615 if (dump_file)
616 fprintf (dump_file, "Function uses stdarg. \n");
617 return false;
620 if (DECL_DISREGARD_INLINE_LIMITS (node->decl))
622 if (dump_file)
623 fprintf (dump_file, "Always inline function will be inlined "
624 "anyway. \n");
625 return false;
628 return true;
631 /* Print access tree starting at ACCESS to F. */
633 static void
634 dump_gensum_access (FILE *f, gensum_param_access *access, unsigned indent)
636 fprintf (f, " ");
637 for (unsigned i = 0; i < indent; i++)
638 fprintf (f, " ");
639 fprintf (f, " * Access to offset: " HOST_WIDE_INT_PRINT_DEC,
640 access->offset);
641 fprintf (f, ", size: " HOST_WIDE_INT_PRINT_DEC, access->size);
642 fprintf (f, ", type: ");
643 print_generic_expr (f, access->type);
644 fprintf (f, ", alias_ptr_type: ");
645 print_generic_expr (f, access->alias_ptr_type);
646 fprintf (f, ", nonarg: %u, reverse: %u\n", access->nonarg, access->reverse);
647 for (gensum_param_access *ch = access->first_child;
649 ch = ch->next_sibling)
650 dump_gensum_access (f, ch, indent + 2);
654 /* Print access tree starting at ACCESS to F. */
656 static void
657 dump_isra_access (FILE *f, param_access *access)
659 fprintf (f, " * Access to unit offset: %u", access->unit_offset);
660 fprintf (f, ", unit size: %u", access->unit_size);
661 fprintf (f, ", type: ");
662 print_generic_expr (f, access->type);
663 fprintf (f, ", alias_ptr_type: ");
664 print_generic_expr (f, access->alias_ptr_type);
665 if (access->certain)
666 fprintf (f, ", certain");
667 else
668 fprintf (f, ", not-certain");
669 if (access->reverse)
670 fprintf (f, ", reverse");
671 fprintf (f, "\n");
674 /* Dump access tree starting at ACCESS to stderr. */
676 DEBUG_FUNCTION void
677 debug_isra_access (param_access *access)
679 dump_isra_access (stderr, access);
682 /* Dump DESC to F. */
684 static void
685 dump_gensum_param_descriptor (FILE *f, gensum_param_desc *desc)
687 if (desc->locally_unused)
688 fprintf (f, " unused with %i call_uses\n", desc->call_uses);
689 if (!desc->split_candidate)
691 fprintf (f, " not a candidate\n");
692 return;
694 if (desc->by_ref)
695 fprintf (f, " by_ref with %u pass throughs\n", desc->ptr_pt_count);
697 for (gensum_param_access *acc = desc->accesses; acc; acc = acc->next_sibling)
698 dump_gensum_access (f, acc, 2);
701 /* Dump all parameter descriptors in IFS, assuming it describes FNDECL, to
702 F. */
704 static void
705 dump_gensum_param_descriptors (FILE *f, tree fndecl,
706 vec<gensum_param_desc> *param_descriptions)
708 tree parm = DECL_ARGUMENTS (fndecl);
709 for (unsigned i = 0;
710 i < param_descriptions->length ();
711 ++i, parm = DECL_CHAIN (parm))
713 fprintf (f, " Descriptor for parameter %i ", i);
714 print_generic_expr (f, parm, TDF_UID);
715 fprintf (f, "\n");
716 dump_gensum_param_descriptor (f, &(*param_descriptions)[i]);
721 /* Dump DESC to F. */
723 static void
724 dump_isra_param_descriptor (FILE *f, isra_param_desc *desc)
726 if (desc->locally_unused)
728 fprintf (f, " (locally) unused\n");
730 if (!desc->split_candidate)
732 fprintf (f, " not a candidate for splitting\n");
733 return;
735 fprintf (f, " param_size_limit: %u, size_reached: %u%s\n",
736 desc->param_size_limit, desc->size_reached,
737 desc->by_ref ? ", by_ref" : "");
739 for (unsigned i = 0; i < vec_safe_length (desc->accesses); ++i)
741 param_access *access = (*desc->accesses)[i];
742 dump_isra_access (f, access);
746 /* Dump all parameter descriptors in IFS, assuming it describes FNDECL, to
747 F. */
749 static void
750 dump_isra_param_descriptors (FILE *f, tree fndecl,
751 isra_func_summary *ifs)
753 tree parm = DECL_ARGUMENTS (fndecl);
754 if (!ifs->m_parameters)
756 fprintf (f, " parameter descriptors not available\n");
757 return;
760 for (unsigned i = 0;
761 i < ifs->m_parameters->length ();
762 ++i, parm = DECL_CHAIN (parm))
764 fprintf (f, " Descriptor for parameter %i ", i);
765 print_generic_expr (f, parm, TDF_UID);
766 fprintf (f, "\n");
767 dump_isra_param_descriptor (f, &(*ifs->m_parameters)[i]);
771 /* Add SRC to inputs of PARAM_FLOW, unless it would exceed storage. If the
772 function fails return false, otherwise return true. SRC must fit into an
773 unsigned char. Used for purposes of transitive unused parameter
774 removal. */
776 static bool
777 add_src_to_param_flow (isra_param_flow *param_flow, int src)
779 gcc_checking_assert (src >= 0 && src <= UCHAR_MAX);
780 if (param_flow->length == IPA_SRA_MAX_PARAM_FLOW_LEN)
781 return false;
783 param_flow->inputs[(int) param_flow->length] = src;
784 param_flow->length++;
785 return true;
788 /* Add a SRC to the inputs of PARAM_FLOW unless it is already there and assert
789 it is the only input. Used for purposes of transitive parameter
790 splitting. */
792 static void
793 set_single_param_flow_source (isra_param_flow *param_flow, int src)
795 gcc_checking_assert (src >= 0 && src <= UCHAR_MAX);
796 if (param_flow->length == 0)
798 param_flow->inputs[0] = src;
799 param_flow->length = 1;
801 else if (param_flow->length == 1)
802 gcc_assert (param_flow->inputs[0] == src);
803 else
804 gcc_unreachable ();
807 /* Assert that there is only a single value in PARAM_FLOW's inputs and return
808 it. */
810 static unsigned
811 get_single_param_flow_source (const isra_param_flow *param_flow)
813 gcc_assert (param_flow->length == 1);
814 return param_flow->inputs[0];
817 /* Inspect all uses of NAME and simple arithmetic calculations involving NAME
818 in FUN represented with NODE and return a negative number if any of them is
819 used for something else than either an actual call argument, simple
820 arithmetic operation or debug statement. If there are no such uses, return
821 the number of actual arguments that this parameter eventually feeds to (or
822 zero if there is none). For any such parameter, mark PARM_NUM as one of its
823 sources. ANALYZED is a bitmap that tracks which SSA names we have already
824 started investigating. */
826 static int
827 isra_track_scalar_value_uses (function *fun, cgraph_node *node, tree name,
828 int parm_num, bitmap analyzed)
830 int res = 0;
831 imm_use_iterator imm_iter;
832 gimple *stmt;
834 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, name)
836 if (is_gimple_debug (stmt))
837 continue;
839 /* TODO: We could handle at least const builtin functions like arithmetic
840 operations below. */
841 if (is_gimple_call (stmt))
843 int all_uses = 0;
844 use_operand_p use_p;
845 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
846 all_uses++;
848 gcall *call = as_a <gcall *> (stmt);
849 unsigned arg_count;
850 if (gimple_call_internal_p (call)
851 || (arg_count = gimple_call_num_args (call)) == 0)
853 res = -1;
854 break;
857 cgraph_edge *cs = node->get_edge (stmt);
858 gcc_checking_assert (cs);
859 isra_call_summary *csum = call_sums->get_create (cs);
860 csum->init_inputs (arg_count);
862 int simple_uses = 0;
863 for (unsigned i = 0; i < arg_count; i++)
864 if (gimple_call_arg (call, i) == name)
866 if (!add_src_to_param_flow (&csum->m_arg_flow[i], parm_num))
868 simple_uses = -1;
869 break;
871 simple_uses++;
874 if (simple_uses < 0
875 || all_uses != simple_uses)
877 res = -1;
878 break;
880 res += all_uses;
882 else if (!stmt_unremovable_because_of_non_call_eh_p (fun, stmt)
883 && ((is_gimple_assign (stmt) && !gimple_has_volatile_ops (stmt))
884 || gimple_code (stmt) == GIMPLE_PHI))
886 tree lhs;
887 if (gimple_code (stmt) == GIMPLE_PHI)
888 lhs = gimple_phi_result (stmt);
889 else
890 lhs = gimple_assign_lhs (stmt);
892 if (TREE_CODE (lhs) != SSA_NAME)
894 res = -1;
895 break;
897 gcc_assert (!gimple_vdef (stmt));
898 if (bitmap_set_bit (analyzed, SSA_NAME_VERSION (lhs)))
900 int tmp = isra_track_scalar_value_uses (fun, node, lhs, parm_num,
901 analyzed);
902 if (tmp < 0)
904 res = tmp;
905 break;
907 res += tmp;
910 else
912 res = -1;
913 break;
916 return res;
919 /* Inspect all uses of PARM, which must be a gimple register, in FUN (which is
920 also described by NODE) and simple arithmetic calculations involving PARM
921 and return false if any of them is used for something else than either an
922 actual call argument, simple arithmetic operation or debug statement. If
923 there are no such uses, return true and store the number of actual arguments
924 that this parameter eventually feeds to (or zero if there is none) to
925 *CALL_USES_P. For any such parameter, mark PARM_NUM as one of its
926 sources.
928 This function is similar to ptr_parm_has_nonarg_uses but its results are
929 meant for unused parameter removal, as opposed to splitting of parameters
930 passed by reference or converting them to passed by value.
933 static bool
934 isra_track_scalar_param_local_uses (function *fun, cgraph_node *node, tree parm,
935 int parm_num, int *call_uses_p)
937 gcc_checking_assert (is_gimple_reg (parm));
939 tree name = ssa_default_def (fun, parm);
940 if (!name || has_zero_uses (name))
942 *call_uses_p = 0;
943 return false;
946 /* Edge summaries can only handle callers with fewer than 256 parameters. */
947 if (parm_num > UCHAR_MAX)
948 return true;
950 bitmap analyzed = BITMAP_ALLOC (NULL);
951 int call_uses = isra_track_scalar_value_uses (fun, node, name, parm_num,
952 analyzed);
953 BITMAP_FREE (analyzed);
954 if (call_uses < 0)
955 return true;
956 *call_uses_p = call_uses;
957 return false;
960 /* Scan immediate uses of a default definition SSA name of a parameter PARM and
961 examine whether there are any nonarg uses that are not actual arguments or
962 otherwise infeasible uses. If so, return true, otherwise return false.
963 Create pass-through IPA flow records for any direct uses as argument calls
964 and if returning false, store their number into *PT_COUNT_P. NODE and FUN
965 must represent the function that is currently analyzed, PARM_NUM must be the
966 index of the analyzed parameter.
968 This function is similar to isra_track_scalar_param_local_uses but its
969 results are meant for splitting of parameters passed by reference or turning
970 them into bits passed by value, as opposed to generic unused parameter
971 removal.
974 static bool
975 ptr_parm_has_nonarg_uses (cgraph_node *node, function *fun, tree parm,
976 int parm_num, unsigned *pt_count_p)
978 imm_use_iterator ui;
979 gimple *stmt;
980 tree name = ssa_default_def (fun, parm);
981 bool ret = false;
982 unsigned pt_count = 0;
984 if (!name || has_zero_uses (name))
985 return false;
987 /* Edge summaries can only handle callers with fewer than 256 parameters. */
988 if (parm_num > UCHAR_MAX)
989 return true;
991 FOR_EACH_IMM_USE_STMT (stmt, ui, name)
993 unsigned uses_ok = 0;
994 use_operand_p use_p;
996 if (is_gimple_debug (stmt))
997 continue;
999 if (gimple_assign_single_p (stmt))
1001 tree rhs = gimple_assign_rhs1 (stmt);
1002 if (!TREE_THIS_VOLATILE (rhs))
1004 while (handled_component_p (rhs))
1005 rhs = TREE_OPERAND (rhs, 0);
1006 if (TREE_CODE (rhs) == MEM_REF
1007 && TREE_OPERAND (rhs, 0) == name
1008 && integer_zerop (TREE_OPERAND (rhs, 1))
1009 && types_compatible_p (TREE_TYPE (rhs),
1010 TREE_TYPE (TREE_TYPE (name))))
1011 uses_ok++;
1014 else if (is_gimple_call (stmt))
1016 gcall *call = as_a <gcall *> (stmt);
1017 unsigned arg_count;
1018 if (gimple_call_internal_p (call)
1019 || (arg_count = gimple_call_num_args (call)) == 0)
1021 ret = true;
1022 break;
1025 cgraph_edge *cs = node->get_edge (stmt);
1026 gcc_checking_assert (cs);
1027 isra_call_summary *csum = call_sums->get_create (cs);
1028 csum->init_inputs (arg_count);
1030 for (unsigned i = 0; i < arg_count; ++i)
1032 tree arg = gimple_call_arg (stmt, i);
1034 if (arg == name)
1036 /* TODO: Allow &MEM_REF[name + offset] here,
1037 ipa_param_body_adjustments::modify_call_stmt has to be
1038 adjusted too. */
1039 csum->m_arg_flow[i].pointer_pass_through = true;
1040 set_single_param_flow_source (&csum->m_arg_flow[i], parm_num);
1041 pt_count++;
1042 uses_ok++;
1043 continue;
1046 if (!TREE_THIS_VOLATILE (arg))
1048 while (handled_component_p (arg))
1049 arg = TREE_OPERAND (arg, 0);
1050 if (TREE_CODE (arg) == MEM_REF
1051 && TREE_OPERAND (arg, 0) == name
1052 && integer_zerop (TREE_OPERAND (arg, 1))
1053 && types_compatible_p (TREE_TYPE (arg),
1054 TREE_TYPE (TREE_TYPE (name))))
1055 uses_ok++;
1060 /* If the number of valid uses does not match the number of
1061 uses in this stmt there is an unhandled use. */
1062 unsigned all_uses = 0;
1063 FOR_EACH_IMM_USE_ON_STMT (use_p, ui)
1064 all_uses++;
1066 gcc_checking_assert (uses_ok <= all_uses);
1067 if (uses_ok != all_uses)
1069 ret = true;
1070 break;
1074 *pt_count_p = pt_count;
1075 return ret;
1078 /* Initialize vector of parameter descriptors of NODE. Return true if there
1079 are any candidates for splitting or unused aggregate parameter removal (the
1080 function may return false if there are candidates for removal of register
1081 parameters) and function body must be scanned. */
1083 static bool
1084 create_parameter_descriptors (cgraph_node *node,
1085 vec<gensum_param_desc> *param_descriptions)
1087 function *fun = DECL_STRUCT_FUNCTION (node->decl);
1088 bool ret = false;
1090 int num = 0;
1091 for (tree parm = DECL_ARGUMENTS (node->decl);
1092 parm;
1093 parm = DECL_CHAIN (parm), num++)
1095 const char *msg;
1096 gensum_param_desc *desc = &(*param_descriptions)[num];
1097 /* param_descriptions vector is grown cleared in the caller. */
1098 desc->param_number = num;
1099 decl2desc->put (parm, desc);
1101 if (dump_file && (dump_flags & TDF_DETAILS))
1102 print_generic_expr (dump_file, parm, TDF_UID);
1104 int scalar_call_uses;
1105 tree type = TREE_TYPE (parm);
1106 if (TREE_THIS_VOLATILE (parm))
1108 if (dump_file && (dump_flags & TDF_DETAILS))
1109 fprintf (dump_file, " not a candidate, is volatile\n");
1110 continue;
1112 if (!is_gimple_reg_type (type) && is_va_list_type (type))
1114 if (dump_file && (dump_flags & TDF_DETAILS))
1115 fprintf (dump_file, " not a candidate, is a va_list type\n");
1116 continue;
1118 if (TREE_ADDRESSABLE (parm))
1120 if (dump_file && (dump_flags & TDF_DETAILS))
1121 fprintf (dump_file, " not a candidate, is addressable\n");
1122 continue;
1124 if (TREE_ADDRESSABLE (type))
1126 if (dump_file && (dump_flags & TDF_DETAILS))
1127 fprintf (dump_file, " not a candidate, type cannot be split\n");
1128 continue;
1131 if (is_gimple_reg (parm)
1132 && !isra_track_scalar_param_local_uses (fun, node, parm, num,
1133 &scalar_call_uses))
1135 if (dump_file && (dump_flags & TDF_DETAILS))
1136 fprintf (dump_file, " is a scalar with only %i call uses\n",
1137 scalar_call_uses);
1139 desc->locally_unused = true;
1140 desc->call_uses = scalar_call_uses;
1143 if (POINTER_TYPE_P (type))
1145 type = TREE_TYPE (type);
1147 if (TREE_CODE (type) == FUNCTION_TYPE
1148 || TREE_CODE (type) == METHOD_TYPE)
1150 if (dump_file && (dump_flags & TDF_DETAILS))
1151 fprintf (dump_file, " not a candidate, reference to "
1152 "a function\n");
1153 continue;
1155 if (TYPE_VOLATILE (type))
1157 if (dump_file && (dump_flags & TDF_DETAILS))
1158 fprintf (dump_file, " not a candidate, reference to "
1159 "a volatile type\n");
1160 continue;
1162 if (TREE_CODE (type) == ARRAY_TYPE
1163 && TYPE_NONALIASED_COMPONENT (type))
1165 if (dump_file && (dump_flags & TDF_DETAILS))
1166 fprintf (dump_file, " not a candidate, reference to "
1167 "a nonaliased component array\n");
1168 continue;
1170 if (!is_gimple_reg (parm))
1172 if (dump_file && (dump_flags & TDF_DETAILS))
1173 fprintf (dump_file, " not a candidate, a reference which is "
1174 "not a gimple register (probably addressable)\n");
1175 continue;
1177 if (is_va_list_type (type))
1179 if (dump_file && (dump_flags & TDF_DETAILS))
1180 fprintf (dump_file, " not a candidate, reference to "
1181 "a va list\n");
1182 continue;
1184 if (ptr_parm_has_nonarg_uses (node, fun, parm, num,
1185 &desc->ptr_pt_count))
1187 if (dump_file && (dump_flags & TDF_DETAILS))
1188 fprintf (dump_file, " not a candidate, reference has "
1189 "nonarg uses\n");
1190 continue;
1192 desc->by_ref = true;
1194 else if (!AGGREGATE_TYPE_P (type))
1196 /* This is in an else branch because scalars passed by reference are
1197 still candidates to be passed by value. */
1198 if (dump_file && (dump_flags & TDF_DETAILS))
1199 fprintf (dump_file, " not a candidate, not an aggregate\n");
1200 continue;
1203 if (!COMPLETE_TYPE_P (type))
1205 if (dump_file && (dump_flags & TDF_DETAILS))
1206 fprintf (dump_file, " not a candidate, not a complete type\n");
1207 continue;
1209 if (!tree_fits_uhwi_p (TYPE_SIZE (type)))
1211 if (dump_file && (dump_flags & TDF_DETAILS))
1212 fprintf (dump_file, " not a candidate, size not representable\n");
1213 continue;
1215 unsigned HOST_WIDE_INT type_size
1216 = tree_to_uhwi (TYPE_SIZE (type)) / BITS_PER_UNIT;
1217 if (type_size == 0
1218 || type_size >= ISRA_ARG_SIZE_LIMIT)
1220 if (dump_file && (dump_flags & TDF_DETAILS))
1221 fprintf (dump_file, " not a candidate, has zero or huge size\n");
1222 continue;
1224 if (type_internals_preclude_sra_p (type, &msg))
1226 if (dump_file && (dump_flags & TDF_DETAILS))
1227 fprintf (dump_file, " not a candidate, %s\n", msg);
1228 continue;
1231 if (dump_file && (dump_flags & TDF_DETAILS))
1232 fprintf (dump_file, " is a candidate\n");
1234 ret = true;
1235 desc->split_candidate = true;
1236 if (desc->by_ref)
1237 desc->deref_index = by_ref_count++;
1239 return ret;
1242 /* Return pointer to descriptor of parameter DECL or NULL if it cannot be
1243 found, which happens if DECL is for a static chain. */
1245 static gensum_param_desc *
1246 get_gensum_param_desc (tree decl)
1248 gcc_checking_assert (TREE_CODE (decl) == PARM_DECL);
1249 gensum_param_desc **slot = decl2desc->get (decl);
1250 if (!slot)
1251 /* This can happen for static chains which we cannot handle so far. */
1252 return NULL;
1253 gcc_checking_assert (*slot);
1254 return *slot;
1258 /* Remove parameter described by DESC from candidates for IPA-SRA splitting and
1259 write REASON to the dump file if there is one. */
1261 static void
1262 disqualify_split_candidate (gensum_param_desc *desc, const char *reason)
1264 if (!desc->split_candidate)
1265 return;
1267 if (dump_file && (dump_flags & TDF_DETAILS))
1268 fprintf (dump_file, "! Disqualifying parameter number %i - %s\n",
1269 desc->param_number, reason);
1271 desc->split_candidate = false;
1274 /* Remove DECL from candidates for IPA-SRA and write REASON to the dump file if
1275 there is one. */
1277 static void
1278 disqualify_split_candidate (tree decl, const char *reason)
1280 gensum_param_desc *desc = get_gensum_param_desc (decl);
1281 if (desc)
1282 disqualify_split_candidate (desc, reason);
1285 /* Allocate a new access to DESC and fill it in with OFFSET and SIZE. But
1286 first, check that there are not too many of them already. If so, do not
1287 allocate anything and return NULL. */
1289 static gensum_param_access *
1290 allocate_access (gensum_param_desc *desc,
1291 HOST_WIDE_INT offset, HOST_WIDE_INT size)
1293 if (desc->access_count
1294 == (unsigned) param_ipa_sra_max_replacements)
1296 disqualify_split_candidate (desc, "Too many replacement candidates");
1297 return NULL;
1300 gensum_param_access *access
1301 = (gensum_param_access *) obstack_alloc (&gensum_obstack,
1302 sizeof (gensum_param_access));
1303 memset (access, 0, sizeof (*access));
1304 access->offset = offset;
1305 access->size = size;
1306 return access;
1309 /* In what context scan_expr_access has been called, whether it deals with a
1310 load, a function argument, or a store. Please note that in rare
1311 circumstances when it is not clear if the access is a load or store,
1312 ISRA_CTX_STORE is used too. */
1314 enum isra_scan_context {ISRA_CTX_LOAD, ISRA_CTX_ARG, ISRA_CTX_STORE};
1316 /* Return an access describing memory access to the variable described by DESC
1317 at OFFSET with SIZE in context CTX, starting at pointer to the linked list
1318 at a certain tree level FIRST. Attempt to create it and put into the
1319 appropriate place in the access tree if does not exist, but fail and return
1320 NULL if there are already too many accesses, if it would create a partially
1321 overlapping access or if an access would end up within a pre-existing
1322 non-call access. */
1324 static gensum_param_access *
1325 get_access_1 (gensum_param_desc *desc, gensum_param_access **first,
1326 HOST_WIDE_INT offset, HOST_WIDE_INT size, isra_scan_context ctx)
1328 gensum_param_access *access = *first, **ptr = first;
1330 if (!access)
1332 /* No pre-existing access at this level, just create it. */
1333 gensum_param_access *a = allocate_access (desc, offset, size);
1334 if (!a)
1335 return NULL;
1336 *first = a;
1337 return *first;
1340 if (access->offset >= offset + size)
1342 /* We want to squeeze it in front of the very first access, just do
1343 it. */
1344 gensum_param_access *r = allocate_access (desc, offset, size);
1345 if (!r)
1346 return NULL;
1347 r->next_sibling = access;
1348 *first = r;
1349 return r;
1352 /* Skip all accesses that have to come before us until the next sibling is
1353 already too far. */
1354 while (offset >= access->offset + access->size
1355 && access->next_sibling
1356 && access->next_sibling->offset < offset + size)
1358 ptr = &access->next_sibling;
1359 access = access->next_sibling;
1362 /* At this point we know we do not belong before access. */
1363 gcc_assert (access->offset < offset + size);
1365 if (access->offset == offset && access->size == size)
1366 /* We found what we were looking for. */
1367 return access;
1369 if (access->offset <= offset
1370 && access->offset + access->size >= offset + size)
1372 /* We fit into access which is larger than us. We need to find/create
1373 something below access. But we only allow nesting in call
1374 arguments. */
1375 if (access->nonarg)
1376 return NULL;
1378 return get_access_1 (desc, &access->first_child, offset, size, ctx);
1381 if (offset <= access->offset
1382 && offset + size >= access->offset + access->size)
1383 /* We are actually bigger than access, which fully fits into us, take its
1384 place and make all accesses fitting into it its children. */
1386 /* But first, we only allow nesting in call arguments so check if that is
1387 what we are trying to represent. */
1388 if (ctx != ISRA_CTX_ARG)
1389 return NULL;
1391 gensum_param_access *r = allocate_access (desc, offset, size);
1392 if (!r)
1393 return NULL;
1394 r->first_child = access;
1396 while (access->next_sibling
1397 && access->next_sibling->offset < offset + size)
1398 access = access->next_sibling;
1399 if (access->offset + access->size > offset + size)
1401 /* This must be a different access, which are sorted, so the
1402 following must be true and this signals a partial overlap. */
1403 gcc_assert (access->offset > offset);
1404 return NULL;
1407 r->next_sibling = access->next_sibling;
1408 access->next_sibling = NULL;
1409 *ptr = r;
1410 return r;
1413 if (offset >= access->offset + access->size)
1415 /* We belong after access. */
1416 gensum_param_access *r = allocate_access (desc, offset, size);
1417 if (!r)
1418 return NULL;
1419 r->next_sibling = access->next_sibling;
1420 access->next_sibling = r;
1421 return r;
1424 if (offset < access->offset)
1426 /* We know the following, otherwise we would have created a
1427 super-access. */
1428 gcc_checking_assert (offset + size < access->offset + access->size);
1429 return NULL;
1432 if (offset + size > access->offset + access->size)
1434 /* Likewise. */
1435 gcc_checking_assert (offset > access->offset);
1436 return NULL;
1439 gcc_unreachable ();
1442 /* Return an access describing memory access to the variable described by DESC
1443 at OFFSET with SIZE in context CTX, mark it as used in context CTX. Attempt
1444 to create if it does not exist, but fail and return NULL if there are
1445 already too many accesses, if it would create a partially overlapping access
1446 or if an access would end up in a non-call access. */
1448 static gensum_param_access *
1449 get_access (gensum_param_desc *desc, HOST_WIDE_INT offset, HOST_WIDE_INT size,
1450 isra_scan_context ctx)
1452 gcc_checking_assert (desc->split_candidate);
1454 gensum_param_access *access = get_access_1 (desc, &desc->accesses, offset,
1455 size, ctx);
1456 if (!access)
1458 disqualify_split_candidate (desc,
1459 "Bad access overlap or too many accesses");
1460 return NULL;
1463 switch (ctx)
1465 case ISRA_CTX_STORE:
1466 gcc_assert (!desc->by_ref);
1467 /* Fall-through */
1468 case ISRA_CTX_LOAD:
1469 access->nonarg = true;
1470 break;
1471 case ISRA_CTX_ARG:
1472 break;
1475 return access;
1478 /* Verify that parameter access tree starting with ACCESS is in good shape.
1479 PARENT_OFFSET and PARENT_SIZE are the corresponding fields of parent of
1480 ACCESS or zero if there is none. */
1482 static bool
1483 verify_access_tree_1 (gensum_param_access *access, HOST_WIDE_INT parent_offset,
1484 HOST_WIDE_INT parent_size)
1486 while (access)
1488 gcc_assert (access->offset >= 0 && access->size >= 0);
1490 if (parent_size != 0)
1492 if (access->offset < parent_offset)
1494 error ("Access offset before parent offset");
1495 return true;
1497 if (access->size >= parent_size)
1499 error ("Access size greater or equal to its parent size");
1500 return true;
1502 if (access->offset + access->size > parent_offset + parent_size)
1504 error ("Access terminates outside of its parent");
1505 return true;
1509 if (verify_access_tree_1 (access->first_child, access->offset,
1510 access->size))
1511 return true;
1513 if (access->next_sibling
1514 && (access->next_sibling->offset < access->offset + access->size))
1516 error ("Access overlaps with its sibling");
1517 return true;
1520 access = access->next_sibling;
1522 return false;
1525 /* Verify that parameter access tree starting with ACCESS is in good shape,
1526 halt compilation and dump the tree to stderr if not. */
1528 DEBUG_FUNCTION void
1529 isra_verify_access_tree (gensum_param_access *access)
1531 if (verify_access_tree_1 (access, 0, 0))
1533 for (; access; access = access->next_sibling)
1534 dump_gensum_access (stderr, access, 2);
1535 internal_error ("IPA-SRA access verification failed");
1540 /* Callback of walk_stmt_load_store_addr_ops visit_addr used to determine
1541 GIMPLE_ASM operands with memory constrains which cannot be scalarized. */
1543 static bool
1544 asm_visit_addr (gimple *, tree op, tree, void *)
1546 op = get_base_address (op);
1547 if (op
1548 && TREE_CODE (op) == PARM_DECL)
1549 disqualify_split_candidate (op, "Non-scalarizable GIMPLE_ASM operand.");
1551 return false;
1554 /* Mark a dereference of parameter identified by DESC of distance DIST in a
1555 basic block BB, unless the BB has already been marked as a potentially
1556 final. */
1558 static void
1559 mark_param_dereference (gensum_param_desc *desc, HOST_WIDE_INT dist,
1560 basic_block bb)
1562 gcc_assert (desc->by_ref);
1563 gcc_checking_assert (desc->split_candidate);
1565 if (bitmap_bit_p (final_bbs, bb->index))
1566 return;
1568 int idx = bb->index * by_ref_count + desc->deref_index;
1569 if (bb_dereferences[idx] < dist)
1570 bb_dereferences[idx] = dist;
1573 /* Return true, if any potential replacements should use NEW_TYPE as opposed to
1574 previously recorded OLD_TYPE. */
1576 static bool
1577 type_prevails_p (tree old_type, tree new_type)
1579 if (old_type == new_type)
1580 return false;
1582 /* Non-aggregates are always better. */
1583 if (!is_gimple_reg_type (old_type)
1584 && is_gimple_reg_type (new_type))
1585 return true;
1586 if (is_gimple_reg_type (old_type)
1587 && !is_gimple_reg_type (new_type))
1588 return false;
1590 /* Prefer any complex or vector type over any other scalar type. */
1591 if (TREE_CODE (old_type) != COMPLEX_TYPE
1592 && TREE_CODE (old_type) != VECTOR_TYPE
1593 && (TREE_CODE (new_type) == COMPLEX_TYPE
1594 || TREE_CODE (new_type) == VECTOR_TYPE))
1595 return true;
1596 if ((TREE_CODE (old_type) == COMPLEX_TYPE
1597 || TREE_CODE (old_type) == VECTOR_TYPE)
1598 && TREE_CODE (new_type) != COMPLEX_TYPE
1599 && TREE_CODE (new_type) != VECTOR_TYPE)
1600 return false;
1602 /* Use the integral type with the bigger precision. */
1603 if (INTEGRAL_TYPE_P (old_type)
1604 && INTEGRAL_TYPE_P (new_type))
1605 return (TYPE_PRECISION (new_type) > TYPE_PRECISION (old_type));
1607 /* Attempt to disregard any integral type with non-full precision. */
1608 if (INTEGRAL_TYPE_P (old_type)
1609 && (TREE_INT_CST_LOW (TYPE_SIZE (old_type))
1610 != TYPE_PRECISION (old_type)))
1611 return true;
1612 if (INTEGRAL_TYPE_P (new_type)
1613 && (TREE_INT_CST_LOW (TYPE_SIZE (new_type))
1614 != TYPE_PRECISION (new_type)))
1615 return false;
1616 /* Stabilize the selection. */
1617 return TYPE_UID (old_type) < TYPE_UID (new_type);
1620 /* When scanning an expression which is a call argument, this structure
1621 specifies the call and the position of the argument. */
1623 struct scan_call_info
1625 /* Call graph edge representing the call. */
1626 cgraph_edge *cs;
1627 /* Total number of arguments in the call. */
1628 unsigned argument_count;
1629 /* Number of the actual argument being scanned. */
1630 unsigned arg_idx;
1633 /* Record use of ACCESS which belongs to a parameter described by DESC in a
1634 call argument described by CALL_INFO. */
1636 static void
1637 record_nonregister_call_use (gensum_param_desc *desc,
1638 scan_call_info *call_info,
1639 unsigned unit_offset, unsigned unit_size)
1641 isra_call_summary *csum = call_sums->get_create (call_info->cs);
1642 csum->init_inputs (call_info->argument_count);
1644 isra_param_flow *param_flow = &csum->m_arg_flow[call_info->arg_idx];
1645 param_flow->aggregate_pass_through = true;
1646 set_single_param_flow_source (param_flow, desc->param_number);
1647 param_flow->unit_offset = unit_offset;
1648 param_flow->unit_size = unit_size;
1649 desc->call_uses++;
1652 /* Callback of walk_aliased_vdefs, just mark that there was a possible
1653 modification. */
1655 static bool
1656 mark_maybe_modified (ao_ref *, tree, void *data)
1658 bool *maybe_modified = (bool *) data;
1659 *maybe_modified = true;
1660 return true;
1663 /* Analyze expression EXPR from GIMPLE for accesses to parameters. CTX
1664 specifies whether EXPR is used in a load, store or as an argument call. BB
1665 must be the basic block in which expr resides. If CTX specifies call
1666 argument context, CALL_INFO must describe that call and argument position,
1667 otherwise it is ignored. */
1669 static void
1670 scan_expr_access (tree expr, gimple *stmt, isra_scan_context ctx,
1671 basic_block bb, scan_call_info *call_info = NULL)
1673 poly_int64 poffset, psize, pmax_size;
1674 HOST_WIDE_INT offset, size, max_size;
1675 tree base;
1676 bool deref = false;
1677 bool reverse;
1679 if (TREE_CODE (expr) == BIT_FIELD_REF
1680 || TREE_CODE (expr) == IMAGPART_EXPR
1681 || TREE_CODE (expr) == REALPART_EXPR)
1682 expr = TREE_OPERAND (expr, 0);
1684 base = get_ref_base_and_extent (expr, &poffset, &psize, &pmax_size, &reverse);
1686 if (TREE_CODE (base) == MEM_REF)
1688 tree op = TREE_OPERAND (base, 0);
1689 if (TREE_CODE (op) != SSA_NAME
1690 || !SSA_NAME_IS_DEFAULT_DEF (op))
1691 return;
1692 base = SSA_NAME_VAR (op);
1693 if (!base)
1694 return;
1695 deref = true;
1697 if (TREE_CODE (base) != PARM_DECL)
1698 return;
1700 gensum_param_desc *desc = get_gensum_param_desc (base);
1701 if (!desc || !desc->split_candidate)
1702 return;
1704 if (!poffset.is_constant (&offset)
1705 || !psize.is_constant (&size)
1706 || !pmax_size.is_constant (&max_size))
1708 disqualify_split_candidate (desc, "Encountered a polynomial-sized "
1709 "access.");
1710 return;
1712 if (size < 0 || size != max_size)
1714 disqualify_split_candidate (desc, "Encountered a variable sized access.");
1715 return;
1717 if (TREE_CODE (expr) == COMPONENT_REF
1718 && DECL_BIT_FIELD (TREE_OPERAND (expr, 1)))
1720 disqualify_split_candidate (desc, "Encountered a bit-field access.");
1721 return;
1723 if (offset < 0)
1725 disqualify_split_candidate (desc, "Encountered an access at a "
1726 "negative offset.");
1727 return;
1729 gcc_assert ((offset % BITS_PER_UNIT) == 0);
1730 gcc_assert ((size % BITS_PER_UNIT) == 0);
1731 if ((offset / BITS_PER_UNIT) >= (UINT_MAX - ISRA_ARG_SIZE_LIMIT)
1732 || (size / BITS_PER_UNIT) >= ISRA_ARG_SIZE_LIMIT)
1734 disqualify_split_candidate (desc, "Encountered an access with too big "
1735 "offset or size");
1736 return;
1739 tree type = TREE_TYPE (expr);
1740 unsigned int exp_align = get_object_alignment (expr);
1742 if (exp_align < TYPE_ALIGN (type))
1744 disqualify_split_candidate (desc, "Underaligned access.");
1745 return;
1748 if (deref)
1750 if (!desc->by_ref)
1752 disqualify_split_candidate (desc, "Dereferencing a non-reference.");
1753 return;
1755 else if (ctx == ISRA_CTX_STORE)
1757 disqualify_split_candidate (desc, "Storing to data passed by "
1758 "reference.");
1759 return;
1762 if (!aa_walking_limit)
1764 disqualify_split_candidate (desc, "Out of alias analysis step "
1765 "limit.");
1766 return;
1769 gcc_checking_assert (gimple_vuse (stmt));
1770 bool maybe_modified = false;
1771 ao_ref ar;
1773 ao_ref_init (&ar, expr);
1774 bitmap visited = BITMAP_ALLOC (NULL);
1775 int walked = walk_aliased_vdefs (&ar, gimple_vuse (stmt),
1776 mark_maybe_modified, &maybe_modified,
1777 &visited, NULL, aa_walking_limit);
1778 BITMAP_FREE (visited);
1779 if (walked > 0)
1781 gcc_assert (aa_walking_limit > walked);
1782 aa_walking_limit = aa_walking_limit - walked;
1784 if (walked < 0)
1785 aa_walking_limit = 0;
1786 if (maybe_modified || walked < 0)
1788 disqualify_split_candidate (desc, "Data passed by reference possibly "
1789 "modified through an alias.");
1790 return;
1792 else
1793 mark_param_dereference (desc, offset + size, bb);
1795 else
1796 /* Pointer parameters with direct uses should have been ruled out by
1797 analyzing SSA default def when looking at the parameters. */
1798 gcc_assert (!desc->by_ref);
1800 gensum_param_access *access = get_access (desc, offset, size, ctx);
1801 if (!access)
1802 return;
1804 if (ctx == ISRA_CTX_ARG)
1806 gcc_checking_assert (call_info);
1808 if (!deref)
1809 record_nonregister_call_use (desc, call_info, offset / BITS_PER_UNIT,
1810 size / BITS_PER_UNIT);
1811 else
1812 /* This is not a pass-through of a pointer, this is a use like any
1813 other. */
1814 access->nonarg = true;
1817 if (!access->type)
1819 access->type = type;
1820 access->alias_ptr_type = reference_alias_ptr_type (expr);
1821 access->reverse = reverse;
1823 else
1825 if (exp_align < TYPE_ALIGN (access->type))
1827 disqualify_split_candidate (desc, "Reference has lower alignment "
1828 "than a previous one.");
1829 return;
1831 if (access->alias_ptr_type != reference_alias_ptr_type (expr))
1833 disqualify_split_candidate (desc, "Multiple alias pointer types.");
1834 return;
1836 if (access->reverse != reverse)
1838 disqualify_split_candidate (desc, "Both normal and reverse "
1839 "scalar storage order.");
1840 return;
1842 if (!deref
1843 && (AGGREGATE_TYPE_P (type) || AGGREGATE_TYPE_P (access->type))
1844 && (TYPE_MAIN_VARIANT (access->type) != TYPE_MAIN_VARIANT (type)))
1846 /* We need the same aggregate type on all accesses to be able to
1847 distinguish transformation spots from pass-through arguments in
1848 the transformation phase. */
1849 disqualify_split_candidate (desc, "We do not support aggregate "
1850 "type punning.");
1851 return;
1854 if (type_prevails_p (access->type, type))
1855 access->type = type;
1859 /* Scan body function described by NODE and FUN and create access trees for
1860 parameters. */
1862 static void
1863 scan_function (cgraph_node *node, struct function *fun)
1865 basic_block bb;
1867 FOR_EACH_BB_FN (bb, fun)
1869 gimple_stmt_iterator gsi;
1870 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1872 gimple *stmt = gsi_stmt (gsi);
1874 if (stmt_can_throw_external (fun, stmt))
1875 bitmap_set_bit (final_bbs, bb->index);
1876 switch (gimple_code (stmt))
1878 case GIMPLE_RETURN:
1880 tree t = gimple_return_retval (as_a <greturn *> (stmt));
1881 if (t != NULL_TREE)
1882 scan_expr_access (t, stmt, ISRA_CTX_LOAD, bb);
1883 bitmap_set_bit (final_bbs, bb->index);
1885 break;
1887 case GIMPLE_ASSIGN:
1888 if (gimple_assign_single_p (stmt)
1889 && !gimple_clobber_p (stmt))
1891 tree rhs = gimple_assign_rhs1 (stmt);
1892 scan_expr_access (rhs, stmt, ISRA_CTX_LOAD, bb);
1893 tree lhs = gimple_assign_lhs (stmt);
1894 scan_expr_access (lhs, stmt, ISRA_CTX_STORE, bb);
1896 break;
1898 case GIMPLE_CALL:
1900 unsigned argument_count = gimple_call_num_args (stmt);
1901 isra_scan_context ctx = ISRA_CTX_ARG;
1902 scan_call_info call_info, *call_info_p = &call_info;
1903 if (gimple_call_internal_p (stmt))
1905 call_info_p = NULL;
1906 ctx = ISRA_CTX_LOAD;
1907 internal_fn ifn = gimple_call_internal_fn (stmt);
1908 if (internal_store_fn_p (ifn))
1909 ctx = ISRA_CTX_STORE;
1911 else
1913 call_info.cs = node->get_edge (stmt);
1914 call_info.argument_count = argument_count;
1917 for (unsigned i = 0; i < argument_count; i++)
1919 call_info.arg_idx = i;
1920 scan_expr_access (gimple_call_arg (stmt, i), stmt,
1921 ctx, bb, call_info_p);
1924 tree lhs = gimple_call_lhs (stmt);
1925 if (lhs)
1926 scan_expr_access (lhs, stmt, ISRA_CTX_STORE, bb);
1927 int flags = gimple_call_flags (stmt);
1928 if (((flags & (ECF_CONST | ECF_PURE)) == 0)
1929 || (flags & ECF_LOOPING_CONST_OR_PURE))
1930 bitmap_set_bit (final_bbs, bb->index);
1932 break;
1934 case GIMPLE_ASM:
1936 gasm *asm_stmt = as_a <gasm *> (stmt);
1937 walk_stmt_load_store_addr_ops (asm_stmt, NULL, NULL, NULL,
1938 asm_visit_addr);
1939 bitmap_set_bit (final_bbs, bb->index);
1941 for (unsigned i = 0; i < gimple_asm_ninputs (asm_stmt); i++)
1943 tree t = TREE_VALUE (gimple_asm_input_op (asm_stmt, i));
1944 scan_expr_access (t, stmt, ISRA_CTX_LOAD, bb);
1946 for (unsigned i = 0; i < gimple_asm_noutputs (asm_stmt); i++)
1948 tree t = TREE_VALUE (gimple_asm_output_op (asm_stmt, i));
1949 scan_expr_access (t, stmt, ISRA_CTX_STORE, bb);
1952 break;
1954 default:
1955 break;
1961 /* Return true if SSA_NAME NAME of function described by FUN is only used in
1962 return statements, or if results of any operations it is involved in are
1963 only used in return statements. ANALYZED is a bitmap that tracks which SSA
1964 names we have already started investigating. */
1966 static bool
1967 ssa_name_only_returned_p (function *fun, tree name, bitmap analyzed)
1969 bool res = true;
1970 imm_use_iterator imm_iter;
1971 gimple *stmt;
1973 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, name)
1975 if (is_gimple_debug (stmt))
1976 continue;
1978 if (gimple_code (stmt) == GIMPLE_RETURN)
1980 tree t = gimple_return_retval (as_a <greturn *> (stmt));
1981 if (t != name)
1983 res = false;
1984 break;
1987 else if (!stmt_unremovable_because_of_non_call_eh_p (fun, stmt)
1988 && ((is_gimple_assign (stmt) && !gimple_has_volatile_ops (stmt))
1989 || gimple_code (stmt) == GIMPLE_PHI))
1991 /* TODO: And perhaps for const function calls too? */
1992 tree lhs;
1993 if (gimple_code (stmt) == GIMPLE_PHI)
1994 lhs = gimple_phi_result (stmt);
1995 else
1996 lhs = gimple_assign_lhs (stmt);
1998 if (TREE_CODE (lhs) != SSA_NAME)
2000 res = false;
2001 break;
2003 gcc_assert (!gimple_vdef (stmt));
2004 if (bitmap_set_bit (analyzed, SSA_NAME_VERSION (lhs))
2005 && !ssa_name_only_returned_p (fun, lhs, analyzed))
2007 res = false;
2008 break;
2011 else
2013 res = false;
2014 break;
2017 return res;
2020 /* Inspect the uses of the return value of the call associated with CS, and if
2021 it is not used or if it is only used to construct the return value of the
2022 caller, mark it as such in call or caller summary. Also check for
2023 misaligned arguments. */
2025 static void
2026 isra_analyze_call (cgraph_edge *cs)
2028 gcall *call_stmt = cs->call_stmt;
2029 unsigned count = gimple_call_num_args (call_stmt);
2030 isra_call_summary *csum = call_sums->get_create (cs);
2032 for (unsigned i = 0; i < count; i++)
2034 tree arg = gimple_call_arg (call_stmt, i);
2035 if (is_gimple_reg (arg))
2036 continue;
2038 tree offset;
2039 poly_int64 bitsize, bitpos;
2040 machine_mode mode;
2041 int unsignedp, reversep, volatilep = 0;
2042 get_inner_reference (arg, &bitsize, &bitpos, &offset, &mode,
2043 &unsignedp, &reversep, &volatilep);
2044 if (!multiple_p (bitpos, BITS_PER_UNIT))
2046 csum->m_bit_aligned_arg = true;
2047 break;
2051 tree lhs = gimple_call_lhs (call_stmt);
2052 if (lhs)
2054 /* TODO: Also detect aggregates on a LHS of a call that are only returned
2055 from this function (without being read anywhere). */
2056 if (TREE_CODE (lhs) == SSA_NAME)
2058 bitmap analyzed = BITMAP_ALLOC (NULL);
2059 if (ssa_name_only_returned_p (DECL_STRUCT_FUNCTION (cs->caller->decl),
2060 lhs, analyzed))
2061 csum->m_return_returned = true;
2062 BITMAP_FREE (analyzed);
2065 else
2066 csum->m_return_ignored = true;
2069 /* Look at all calls going out of NODE, described also by IFS and perform all
2070 analyses necessary for IPA-SRA that are not done at body scan time or done
2071 even when body is not scanned because the function is not a candidate. */
2073 static void
2074 isra_analyze_all_outgoing_calls (cgraph_node *node)
2076 for (cgraph_edge *cs = node->callees; cs; cs = cs->next_callee)
2077 isra_analyze_call (cs);
2078 for (cgraph_edge *cs = node->indirect_calls; cs; cs = cs->next_callee)
2079 isra_analyze_call (cs);
2082 /* Dump a dereferences table with heading STR to file F. */
2084 static void
2085 dump_dereferences_table (FILE *f, struct function *fun, const char *str)
2087 basic_block bb;
2089 fprintf (dump_file, "%s", str);
2090 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (fun),
2091 EXIT_BLOCK_PTR_FOR_FN (fun), next_bb)
2093 fprintf (f, "%4i %i ", bb->index, bitmap_bit_p (final_bbs, bb->index));
2094 if (bb != EXIT_BLOCK_PTR_FOR_FN (fun))
2096 int i;
2097 for (i = 0; i < by_ref_count; i++)
2099 int idx = bb->index * by_ref_count + i;
2100 fprintf (f, " %4" HOST_WIDE_INT_PRINT "d", bb_dereferences[idx]);
2103 fprintf (f, "\n");
2105 fprintf (dump_file, "\n");
2108 /* Propagate distances in bb_dereferences in the opposite direction than the
2109 control flow edges, in each step storing the maximum of the current value
2110 and the minimum of all successors. These steps are repeated until the table
2111 stabilizes. Note that BBs which might terminate the functions (according to
2112 final_bbs bitmap) never updated in this way. */
2114 static void
2115 propagate_dereference_distances (struct function *fun)
2117 basic_block bb;
2119 if (dump_file && (dump_flags & TDF_DETAILS))
2120 dump_dereferences_table (dump_file, fun,
2121 "Dereference table before propagation:\n");
2123 auto_vec<basic_block> queue (last_basic_block_for_fn (fun));
2124 queue.quick_push (ENTRY_BLOCK_PTR_FOR_FN (fun));
2125 FOR_EACH_BB_FN (bb, fun)
2127 queue.quick_push (bb);
2128 bb->aux = bb;
2131 while (!queue.is_empty ())
2133 edge_iterator ei;
2134 edge e;
2135 bool change = false;
2136 int i;
2138 bb = queue.pop ();
2139 bb->aux = NULL;
2141 if (bitmap_bit_p (final_bbs, bb->index))
2142 continue;
2144 for (i = 0; i < by_ref_count; i++)
2146 int idx = bb->index * by_ref_count + i;
2147 bool first = true;
2148 HOST_WIDE_INT inh = 0;
2150 FOR_EACH_EDGE (e, ei, bb->succs)
2152 int succ_idx = e->dest->index * by_ref_count + i;
2154 if (e->dest == EXIT_BLOCK_PTR_FOR_FN (fun))
2155 continue;
2157 if (first)
2159 first = false;
2160 inh = bb_dereferences [succ_idx];
2162 else if (bb_dereferences [succ_idx] < inh)
2163 inh = bb_dereferences [succ_idx];
2166 if (!first && bb_dereferences[idx] < inh)
2168 bb_dereferences[idx] = inh;
2169 change = true;
2173 if (change)
2174 FOR_EACH_EDGE (e, ei, bb->preds)
2176 if (e->src->aux)
2177 continue;
2179 e->src->aux = e->src;
2180 queue.quick_push (e->src);
2184 if (dump_file && (dump_flags & TDF_DETAILS))
2185 dump_dereferences_table (dump_file, fun,
2186 "Dereference table after propagation:\n");
2189 /* Perform basic checks on ACCESS to PARM described by DESC and all its
2190 children, return true if the parameter cannot be split, otherwise return
2191 true and update *TOTAL_SIZE and *ONLY_CALLS. ENTRY_BB_INDEX must be the
2192 index of the entry BB in the function of PARM. */
2194 static bool
2195 check_gensum_access (tree parm, gensum_param_desc *desc,
2196 gensum_param_access *access,
2197 HOST_WIDE_INT *nonarg_acc_size, bool *only_calls,
2198 int entry_bb_index)
2200 if (access->nonarg)
2202 *only_calls = false;
2203 *nonarg_acc_size += access->size;
2205 if (access->first_child)
2207 disqualify_split_candidate (desc, "Overlapping non-call uses.");
2208 return true;
2211 /* Do not decompose a non-BLKmode param in a way that would create
2212 BLKmode params. Especially for by-reference passing (thus,
2213 pointer-type param) this is hardly worthwhile. */
2214 if (DECL_MODE (parm) != BLKmode
2215 && TYPE_MODE (access->type) == BLKmode)
2217 disqualify_split_candidate (desc, "Would convert a non-BLK to a BLK.");
2218 return true;
2221 if (desc->by_ref)
2223 int idx = (entry_bb_index * by_ref_count + desc->deref_index);
2224 if ((access->offset + access->size) > bb_dereferences[idx])
2226 disqualify_split_candidate (desc, "Would create a possibly "
2227 "illegal dereference in a caller.");
2228 return true;
2232 for (gensum_param_access *ch = access->first_child;
2234 ch = ch->next_sibling)
2235 if (check_gensum_access (parm, desc, ch, nonarg_acc_size, only_calls,
2236 entry_bb_index))
2237 return true;
2239 return false;
2242 /* Copy data from FROM and all of its children to a vector of accesses in IPA
2243 descriptor DESC. */
2245 static void
2246 copy_accesses_to_ipa_desc (gensum_param_access *from, isra_param_desc *desc)
2248 param_access *to = ggc_cleared_alloc<param_access> ();
2249 gcc_checking_assert ((from->offset % BITS_PER_UNIT) == 0);
2250 gcc_checking_assert ((from->size % BITS_PER_UNIT) == 0);
2251 to->unit_offset = from->offset / BITS_PER_UNIT;
2252 to->unit_size = from->size / BITS_PER_UNIT;
2253 to->type = from->type;
2254 to->alias_ptr_type = from->alias_ptr_type;
2255 to->certain = from->nonarg;
2256 to->reverse = from->reverse;
2257 vec_safe_push (desc->accesses, to);
2259 for (gensum_param_access *ch = from->first_child;
2261 ch = ch->next_sibling)
2262 copy_accesses_to_ipa_desc (ch, desc);
2265 /* Analyze function body scan results stored in param_accesses and
2266 param_accesses, detect possible transformations and store information of
2267 those in function summary. NODE, FUN and IFS are all various structures
2268 describing the currently analyzed function. */
2270 static void
2271 process_scan_results (cgraph_node *node, struct function *fun,
2272 isra_func_summary *ifs,
2273 vec<gensum_param_desc> *param_descriptions)
2275 bool check_pass_throughs = false;
2276 bool dereferences_propagated = false;
2277 tree parm = DECL_ARGUMENTS (node->decl);
2278 unsigned param_count = param_descriptions->length();
2280 for (unsigned desc_index = 0;
2281 desc_index < param_count;
2282 desc_index++, parm = DECL_CHAIN (parm))
2284 gensum_param_desc *desc = &(*param_descriptions)[desc_index];
2285 if (!desc->split_candidate)
2286 continue;
2288 if (flag_checking)
2289 isra_verify_access_tree (desc->accesses);
2291 if (!dereferences_propagated
2292 && desc->by_ref
2293 && desc->accesses)
2295 propagate_dereference_distances (fun);
2296 dereferences_propagated = true;
2299 HOST_WIDE_INT nonarg_acc_size = 0;
2300 bool only_calls = true;
2301 bool check_failed = false;
2303 int entry_bb_index = ENTRY_BLOCK_PTR_FOR_FN (fun)->index;
2304 for (gensum_param_access *acc = desc->accesses;
2305 acc;
2306 acc = acc->next_sibling)
2307 if (check_gensum_access (parm, desc, acc, &nonarg_acc_size, &only_calls,
2308 entry_bb_index))
2310 check_failed = true;
2311 break;
2313 if (check_failed)
2314 continue;
2316 if (only_calls)
2317 desc->locally_unused = true;
2319 HOST_WIDE_INT cur_param_size
2320 = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (parm)));
2321 HOST_WIDE_INT param_size_limit;
2322 if (!desc->by_ref || optimize_function_for_size_p (fun))
2323 param_size_limit = cur_param_size;
2324 else
2325 param_size_limit
2326 = opt_for_fn (node->decl,
2327 param_ipa_sra_ptr_growth_factor) * cur_param_size;
2328 if (nonarg_acc_size > param_size_limit
2329 || (!desc->by_ref && nonarg_acc_size == param_size_limit))
2331 disqualify_split_candidate (desc, "Would result into a too big set "
2332 "of replacements.");
2334 else
2336 /* create_parameter_descriptors makes sure unit sizes of all
2337 candidate parameters fit unsigned integers restricted to
2338 ISRA_ARG_SIZE_LIMIT. */
2339 desc->param_size_limit = param_size_limit / BITS_PER_UNIT;
2340 desc->nonarg_acc_size = nonarg_acc_size / BITS_PER_UNIT;
2341 if (desc->split_candidate && desc->ptr_pt_count)
2343 gcc_assert (desc->by_ref);
2344 check_pass_throughs = true;
2349 /* When a pointer parameter is passed-through to a callee, in which it is
2350 only used to read only one or a few items, we can attempt to transform it
2351 to obtaining and passing through the items instead of the pointer. But we
2352 must take extra care that 1) we do not introduce any segfault by moving
2353 dereferences above control flow and that 2) the data is not modified
2354 through an alias in this function. The IPA analysis must not introduce
2355 any accesses candidates unless it can prove both.
2357 The current solution is very crude as it consists of ensuring that the
2358 call postdominates entry BB and that the definition of VUSE of the call is
2359 default definition. TODO: For non-recursive callees in the same
2360 compilation unit we could do better by doing analysis in topological order
2361 an looking into access candidates of callees, using their alias_ptr_types
2362 to attempt real AA. We could also use the maximum known dereferenced
2363 offset in this function at IPA level.
2365 TODO: Measure the overhead and the effect of just being pessimistic.
2366 Maybe this is only -O3 material?
2368 bool pdoms_calculated = false;
2369 if (check_pass_throughs)
2370 for (cgraph_edge *cs = node->callees; cs; cs = cs->next_callee)
2372 gcall *call_stmt = cs->call_stmt;
2373 tree vuse = gimple_vuse (call_stmt);
2375 /* If the callee is a const function, we don't get a VUSE. In such
2376 case there will be no memory accesses in the called function (or the
2377 const attribute is wrong) and then we just don't care. */
2378 bool uses_memory_as_obtained = vuse && SSA_NAME_IS_DEFAULT_DEF (vuse);
2380 unsigned count = gimple_call_num_args (call_stmt);
2381 isra_call_summary *csum = call_sums->get_create (cs);
2382 csum->init_inputs (count);
2383 csum->m_before_any_store = uses_memory_as_obtained;
2384 for (unsigned argidx = 0; argidx < count; argidx++)
2386 if (!csum->m_arg_flow[argidx].pointer_pass_through)
2387 continue;
2388 unsigned pidx
2389 = get_single_param_flow_source (&csum->m_arg_flow[argidx]);
2390 gensum_param_desc *desc = &(*param_descriptions)[pidx];
2391 if (!desc->split_candidate)
2393 csum->m_arg_flow[argidx].pointer_pass_through = false;
2394 continue;
2396 if (!uses_memory_as_obtained)
2397 continue;
2399 /* Post-dominator check placed last, hoping that it usually won't
2400 be needed. */
2401 if (!pdoms_calculated)
2403 gcc_checking_assert (cfun);
2404 connect_infinite_loops_to_exit ();
2405 calculate_dominance_info (CDI_POST_DOMINATORS);
2406 pdoms_calculated = true;
2408 if (dominated_by_p (CDI_POST_DOMINATORS,
2409 gimple_bb (call_stmt),
2410 single_succ (ENTRY_BLOCK_PTR_FOR_FN (fun))))
2411 csum->m_arg_flow[argidx].safe_to_import_accesses = true;
2415 if (pdoms_calculated)
2417 free_dominance_info (CDI_POST_DOMINATORS);
2418 remove_fake_exit_edges ();
2421 /* TODO: Add early exit if we disqualified everything. This also requires
2422 that we either relax the restriction that
2423 ipa_param_adjustments.m_always_copy_start must be the number of PARM_DECLs
2424 or store the number of parameters to IPA-SRA function summary and use that
2425 when just removing params. */
2427 vec_safe_reserve_exact (ifs->m_parameters, param_count);
2428 ifs->m_parameters->quick_grow_cleared (param_count);
2429 for (unsigned desc_index = 0; desc_index < param_count; desc_index++)
2431 gensum_param_desc *s = &(*param_descriptions)[desc_index];
2432 isra_param_desc *d = &(*ifs->m_parameters)[desc_index];
2434 d->param_size_limit = s->param_size_limit;
2435 d->size_reached = s->nonarg_acc_size;
2436 d->locally_unused = s->locally_unused;
2437 d->split_candidate = s->split_candidate;
2438 d->by_ref = s->by_ref;
2440 for (gensum_param_access *acc = s->accesses;
2441 acc;
2442 acc = acc->next_sibling)
2443 copy_accesses_to_ipa_desc (acc, d);
2446 if (dump_file)
2447 dump_isra_param_descriptors (dump_file, node->decl, ifs);
2450 /* Return true if there are any overlaps among certain accesses of DESC. If
2451 non-NULL, set *CERTAIN_ACCESS_PRESENT_P upon encountering a certain access
2452 too. DESC is assumed to be a split candidate that is not locally
2453 unused. */
2455 static bool
2456 overlapping_certain_accesses_p (isra_param_desc *desc,
2457 bool *certain_access_present_p)
2459 unsigned pclen = vec_safe_length (desc->accesses);
2460 for (unsigned i = 0; i < pclen; i++)
2462 param_access *a1 = (*desc->accesses)[i];
2464 if (!a1->certain)
2465 continue;
2466 if (certain_access_present_p)
2467 *certain_access_present_p = true;
2468 for (unsigned j = i + 1; j < pclen; j++)
2470 param_access *a2 = (*desc->accesses)[j];
2471 if (a2->certain
2472 && a1->unit_offset < a2->unit_offset + a2->unit_size
2473 && a1->unit_offset + a1->unit_size > a2->unit_offset)
2474 return true;
2477 return false;
2480 /* Check for any overlaps of certain param accesses among splitting candidates
2481 and signal an ICE if there are any. If CERTAIN_MUST_EXIST is set, also
2482 check that used splitting candidates have at least one certain access. */
2484 static void
2485 verify_splitting_accesses (cgraph_node *node, bool certain_must_exist)
2487 isra_func_summary *ifs = func_sums->get (node);
2488 if (!ifs || !ifs->m_candidate)
2489 return;
2490 unsigned param_count = vec_safe_length (ifs->m_parameters);
2491 for (unsigned pidx = 0; pidx < param_count; pidx++)
2493 isra_param_desc *desc = &(*ifs->m_parameters)[pidx];
2494 if (!desc->split_candidate || desc->locally_unused)
2495 continue;
2497 bool certain_access_present = !certain_must_exist;
2498 if (overlapping_certain_accesses_p (desc, &certain_access_present))
2499 internal_error ("Function %qs, parameter %u, has IPA-SRA accesses "
2500 "which overlap", node->dump_name (), pidx);
2501 if (!certain_access_present)
2502 internal_error ("Function %s, parameter %u, is used but does not "
2503 "have any certain IPA-SRA access",
2504 node->dump_name (), pidx);
2508 /* Intraprocedural part of IPA-SRA analysis. Scan bodies of all functions in
2509 this compilation unit and create summary structures describing IPA-SRA
2510 opportunities and constraints in them. */
2512 static void
2513 ipa_sra_generate_summary (void)
2515 struct cgraph_node *node;
2517 gcc_checking_assert (!func_sums);
2518 gcc_checking_assert (!call_sums);
2519 func_sums
2520 = (new (ggc_alloc_no_dtor <ipa_sra_function_summaries> ())
2521 ipa_sra_function_summaries (symtab, true));
2522 call_sums = new ipa_sra_call_summaries (symtab);
2524 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
2525 ipa_sra_summarize_function (node);
2526 return;
2529 /* Write intraprocedural analysis information about E and all of its outgoing
2530 edges into a stream for LTO WPA. */
2532 static void
2533 isra_write_edge_summary (output_block *ob, cgraph_edge *e)
2535 isra_call_summary *csum = call_sums->get (e);
2536 unsigned input_count = csum->m_arg_flow.length ();
2537 streamer_write_uhwi (ob, input_count);
2538 for (unsigned i = 0; i < input_count; i++)
2540 isra_param_flow *ipf = &csum->m_arg_flow[i];
2541 streamer_write_hwi (ob, ipf->length);
2542 bitpack_d bp = bitpack_create (ob->main_stream);
2543 for (int j = 0; j < ipf->length; j++)
2544 bp_pack_value (&bp, ipf->inputs[j], 8);
2545 bp_pack_value (&bp, ipf->aggregate_pass_through, 1);
2546 bp_pack_value (&bp, ipf->pointer_pass_through, 1);
2547 bp_pack_value (&bp, ipf->safe_to_import_accesses, 1);
2548 streamer_write_bitpack (&bp);
2549 streamer_write_uhwi (ob, ipf->unit_offset);
2550 streamer_write_uhwi (ob, ipf->unit_size);
2552 bitpack_d bp = bitpack_create (ob->main_stream);
2553 bp_pack_value (&bp, csum->m_return_ignored, 1);
2554 bp_pack_value (&bp, csum->m_return_returned, 1);
2555 bp_pack_value (&bp, csum->m_bit_aligned_arg, 1);
2556 bp_pack_value (&bp, csum->m_before_any_store, 1);
2557 streamer_write_bitpack (&bp);
2560 /* Write intraprocedural analysis information about NODE and all of its outgoing
2561 edges into a stream for LTO WPA. */
2563 static void
2564 isra_write_node_summary (output_block *ob, cgraph_node *node)
2566 isra_func_summary *ifs = func_sums->get (node);
2567 lto_symtab_encoder_t encoder = ob->decl_state->symtab_node_encoder;
2568 int node_ref = lto_symtab_encoder_encode (encoder, node);
2569 streamer_write_uhwi (ob, node_ref);
2571 unsigned param_desc_count = vec_safe_length (ifs->m_parameters);
2572 streamer_write_uhwi (ob, param_desc_count);
2573 for (unsigned i = 0; i < param_desc_count; i++)
2575 isra_param_desc *desc = &(*ifs->m_parameters)[i];
2576 unsigned access_count = vec_safe_length (desc->accesses);
2577 streamer_write_uhwi (ob, access_count);
2578 for (unsigned j = 0; j < access_count; j++)
2580 param_access *acc = (*desc->accesses)[j];
2581 stream_write_tree (ob, acc->type, true);
2582 stream_write_tree (ob, acc->alias_ptr_type, true);
2583 streamer_write_uhwi (ob, acc->unit_offset);
2584 streamer_write_uhwi (ob, acc->unit_size);
2585 bitpack_d bp = bitpack_create (ob->main_stream);
2586 bp_pack_value (&bp, acc->certain, 1);
2587 streamer_write_bitpack (&bp);
2589 streamer_write_uhwi (ob, desc->param_size_limit);
2590 streamer_write_uhwi (ob, desc->size_reached);
2591 bitpack_d bp = bitpack_create (ob->main_stream);
2592 bp_pack_value (&bp, desc->locally_unused, 1);
2593 bp_pack_value (&bp, desc->split_candidate, 1);
2594 bp_pack_value (&bp, desc->by_ref, 1);
2595 streamer_write_bitpack (&bp);
2597 bitpack_d bp = bitpack_create (ob->main_stream);
2598 bp_pack_value (&bp, ifs->m_candidate, 1);
2599 bp_pack_value (&bp, ifs->m_returns_value, 1);
2600 bp_pack_value (&bp, ifs->m_return_ignored, 1);
2601 gcc_assert (!ifs->m_queued);
2602 streamer_write_bitpack (&bp);
2604 for (cgraph_edge *e = node->callees; e; e = e->next_callee)
2605 isra_write_edge_summary (ob, e);
2606 for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee)
2607 isra_write_edge_summary (ob, e);
2610 /* Write intraprocedural analysis information into a stream for LTO WPA. */
2612 static void
2613 ipa_sra_write_summary (void)
2615 if (!func_sums || !call_sums)
2616 return;
2618 struct output_block *ob = create_output_block (LTO_section_ipa_sra);
2619 lto_symtab_encoder_t encoder = ob->decl_state->symtab_node_encoder;
2620 ob->symbol = NULL;
2622 unsigned int count = 0;
2623 lto_symtab_encoder_iterator lsei;
2624 for (lsei = lsei_start_function_in_partition (encoder);
2625 !lsei_end_p (lsei);
2626 lsei_next_function_in_partition (&lsei))
2628 cgraph_node *node = lsei_cgraph_node (lsei);
2629 if (node->has_gimple_body_p ()
2630 && func_sums->get (node) != NULL)
2631 count++;
2633 streamer_write_uhwi (ob, count);
2635 /* Process all of the functions. */
2636 for (lsei = lsei_start_function_in_partition (encoder); !lsei_end_p (lsei);
2637 lsei_next_function_in_partition (&lsei))
2639 cgraph_node *node = lsei_cgraph_node (lsei);
2640 if (node->has_gimple_body_p ()
2641 && func_sums->get (node) != NULL)
2642 isra_write_node_summary (ob, node);
2644 streamer_write_char_stream (ob->main_stream, 0);
2645 produce_asm (ob, NULL);
2646 destroy_output_block (ob);
2649 /* Read intraprocedural analysis information about E and all of its outgoing
2650 edges into a stream for LTO WPA. */
2652 static void
2653 isra_read_edge_summary (struct lto_input_block *ib, cgraph_edge *cs)
2655 isra_call_summary *csum = call_sums->get_create (cs);
2656 unsigned input_count = streamer_read_uhwi (ib);
2657 csum->init_inputs (input_count);
2658 for (unsigned i = 0; i < input_count; i++)
2660 isra_param_flow *ipf = &csum->m_arg_flow[i];
2661 ipf->length = streamer_read_hwi (ib);
2662 bitpack_d bp = streamer_read_bitpack (ib);
2663 for (int j = 0; j < ipf->length; j++)
2664 ipf->inputs[j] = bp_unpack_value (&bp, 8);
2665 ipf->aggregate_pass_through = bp_unpack_value (&bp, 1);
2666 ipf->pointer_pass_through = bp_unpack_value (&bp, 1);
2667 ipf->safe_to_import_accesses = bp_unpack_value (&bp, 1);
2668 ipf->unit_offset = streamer_read_uhwi (ib);
2669 ipf->unit_size = streamer_read_uhwi (ib);
2671 bitpack_d bp = streamer_read_bitpack (ib);
2672 csum->m_return_ignored = bp_unpack_value (&bp, 1);
2673 csum->m_return_returned = bp_unpack_value (&bp, 1);
2674 csum->m_bit_aligned_arg = bp_unpack_value (&bp, 1);
2675 csum->m_before_any_store = bp_unpack_value (&bp, 1);
2678 /* Read intraprocedural analysis information about NODE and all of its outgoing
2679 edges into a stream for LTO WPA. */
2681 static void
2682 isra_read_node_info (struct lto_input_block *ib, cgraph_node *node,
2683 struct data_in *data_in)
2685 isra_func_summary *ifs = func_sums->get_create (node);
2686 unsigned param_desc_count = streamer_read_uhwi (ib);
2687 if (param_desc_count > 0)
2689 vec_safe_reserve_exact (ifs->m_parameters, param_desc_count);
2690 ifs->m_parameters->quick_grow_cleared (param_desc_count);
2692 for (unsigned i = 0; i < param_desc_count; i++)
2694 isra_param_desc *desc = &(*ifs->m_parameters)[i];
2695 unsigned access_count = streamer_read_uhwi (ib);
2696 for (unsigned j = 0; j < access_count; j++)
2698 param_access *acc = ggc_cleared_alloc<param_access> ();
2699 acc->type = stream_read_tree (ib, data_in);
2700 acc->alias_ptr_type = stream_read_tree (ib, data_in);
2701 acc->unit_offset = streamer_read_uhwi (ib);
2702 acc->unit_size = streamer_read_uhwi (ib);
2703 bitpack_d bp = streamer_read_bitpack (ib);
2704 acc->certain = bp_unpack_value (&bp, 1);
2705 vec_safe_push (desc->accesses, acc);
2707 desc->param_size_limit = streamer_read_uhwi (ib);
2708 desc->size_reached = streamer_read_uhwi (ib);
2709 bitpack_d bp = streamer_read_bitpack (ib);
2710 desc->locally_unused = bp_unpack_value (&bp, 1);
2711 desc->split_candidate = bp_unpack_value (&bp, 1);
2712 desc->by_ref = bp_unpack_value (&bp, 1);
2714 bitpack_d bp = streamer_read_bitpack (ib);
2715 ifs->m_candidate = bp_unpack_value (&bp, 1);
2716 ifs->m_returns_value = bp_unpack_value (&bp, 1);
2717 ifs->m_return_ignored = bp_unpack_value (&bp, 1);
2718 ifs->m_queued = 0;
2720 for (cgraph_edge *e = node->callees; e; e = e->next_callee)
2721 isra_read_edge_summary (ib, e);
2722 for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee)
2723 isra_read_edge_summary (ib, e);
2726 /* Read IPA-SRA summaries from a section in file FILE_DATA of length LEN with
2727 data DATA. TODO: This function was copied almost verbatim from ipa-prop.c,
2728 it should be possible to unify them somehow. */
2730 static void
2731 isra_read_summary_section (struct lto_file_decl_data *file_data,
2732 const char *data, size_t len)
2734 const struct lto_function_header *header =
2735 (const struct lto_function_header *) data;
2736 const int cfg_offset = sizeof (struct lto_function_header);
2737 const int main_offset = cfg_offset + header->cfg_size;
2738 const int string_offset = main_offset + header->main_size;
2739 struct data_in *data_in;
2740 unsigned int i;
2741 unsigned int count;
2743 lto_input_block ib_main ((const char *) data + main_offset,
2744 header->main_size, file_data->mode_table);
2746 data_in =
2747 lto_data_in_create (file_data, (const char *) data + string_offset,
2748 header->string_size, vNULL);
2749 count = streamer_read_uhwi (&ib_main);
2751 for (i = 0; i < count; i++)
2753 unsigned int index;
2754 struct cgraph_node *node;
2755 lto_symtab_encoder_t encoder;
2757 index = streamer_read_uhwi (&ib_main);
2758 encoder = file_data->symtab_node_encoder;
2759 node = dyn_cast<cgraph_node *> (lto_symtab_encoder_deref (encoder,
2760 index));
2761 gcc_assert (node->definition);
2762 isra_read_node_info (&ib_main, node, data_in);
2764 lto_free_section_data (file_data, LTO_section_ipa_sra, NULL, data,
2765 len);
2766 lto_data_in_delete (data_in);
2769 /* Read intraprocedural analysis information into a stream for LTO WPA. */
2771 static void
2772 ipa_sra_read_summary (void)
2774 struct lto_file_decl_data **file_data_vec = lto_get_file_decl_data ();
2775 struct lto_file_decl_data *file_data;
2776 unsigned int j = 0;
2778 gcc_checking_assert (!func_sums);
2779 gcc_checking_assert (!call_sums);
2780 func_sums
2781 = (new (ggc_alloc_no_dtor <ipa_sra_function_summaries> ())
2782 ipa_sra_function_summaries (symtab, true));
2783 call_sums = new ipa_sra_call_summaries (symtab);
2785 while ((file_data = file_data_vec[j++]))
2787 size_t len;
2788 const char *data
2789 = lto_get_summary_section_data (file_data, LTO_section_ipa_sra, &len);
2790 if (data)
2791 isra_read_summary_section (file_data, data, len);
2795 /* Dump all IPA-SRA summary data for all cgraph nodes and edges to file F. */
2797 static void
2798 ipa_sra_dump_all_summaries (FILE *f)
2800 cgraph_node *node;
2801 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
2803 fprintf (f, "\nSummary for node %s:\n", node->dump_name ());
2805 isra_func_summary *ifs = func_sums->get (node);
2806 if (!ifs)
2807 fprintf (f, " Function does not have any associated IPA-SRA "
2808 "summary\n");
2809 else
2811 if (!ifs->m_candidate)
2813 fprintf (f, " Not a candidate function\n");
2814 continue;
2816 if (ifs->m_returns_value)
2817 fprintf (f, " Returns value\n");
2818 if (vec_safe_is_empty (ifs->m_parameters))
2819 fprintf (f, " No parameter information. \n");
2820 else
2821 for (unsigned i = 0; i < ifs->m_parameters->length (); ++i)
2823 fprintf (f, " Descriptor for parameter %i:\n", i);
2824 dump_isra_param_descriptor (f, &(*ifs->m_parameters)[i]);
2826 fprintf (f, "\n");
2829 struct cgraph_edge *cs;
2830 for (cs = node->callees; cs; cs = cs->next_callee)
2832 fprintf (f, " Summary for edge %s->%s:\n", cs->caller->dump_name (),
2833 cs->callee->dump_name ());
2834 isra_call_summary *csum = call_sums->get (cs);
2835 if (csum)
2836 csum->dump (f);
2837 else
2838 fprintf (f, " Call summary is MISSING!\n");
2842 fprintf (f, "\n\n");
2845 /* Perform function-scope viability tests that can be only made at IPA level
2846 and return false if the function is deemed unsuitable for IPA-SRA. */
2848 static bool
2849 ipa_sra_ipa_function_checks (cgraph_node *node)
2851 if (!node->can_be_local_p ())
2853 if (dump_file)
2854 fprintf (dump_file, "Function %s disqualified because it cannot be "
2855 "made local.\n", node->dump_name ());
2856 return false;
2858 if (!node->can_change_signature)
2860 if (dump_file)
2861 fprintf (dump_file, "Function can not change signature.\n");
2862 return false;
2865 return true;
2868 /* Issues found out by check_callers_for_issues. */
2870 struct caller_issues
2872 /* The candidate being considered. */
2873 cgraph_node *candidate;
2874 /* There is a thunk among callers. */
2875 bool thunk;
2876 /* Call site with no available information. */
2877 bool unknown_callsite;
2878 /* Call from outside the the candidate's comdat group. */
2879 bool call_from_outside_comdat;
2880 /* There is a bit-aligned load into one of non-gimple-typed arguments. */
2881 bool bit_aligned_aggregate_argument;
2884 /* Worker for call_for_symbol_and_aliases, set any flags of passed caller_issues
2885 that apply. */
2887 static bool
2888 check_for_caller_issues (struct cgraph_node *node, void *data)
2890 struct caller_issues *issues = (struct caller_issues *) data;
2892 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
2894 if (cs->caller->thunk)
2896 issues->thunk = true;
2897 /* TODO: We should be able to process at least some types of
2898 thunks. */
2899 return true;
2901 if (issues->candidate->calls_comdat_local
2902 && issues->candidate->same_comdat_group
2903 && !issues->candidate->in_same_comdat_group_p (cs->caller))
2905 issues->call_from_outside_comdat = true;
2906 return true;
2909 isra_call_summary *csum = call_sums->get (cs);
2910 if (!csum)
2912 issues->unknown_callsite = true;
2913 return true;
2916 if (csum->m_bit_aligned_arg)
2917 issues->bit_aligned_aggregate_argument = true;
2919 return false;
2922 /* Look at all incoming edges to NODE, including aliases and thunks and look
2923 for problems. Return true if NODE type should not be modified at all. */
2925 static bool
2926 check_all_callers_for_issues (cgraph_node *node)
2928 struct caller_issues issues;
2929 memset (&issues, 0, sizeof (issues));
2930 issues.candidate = node;
2932 node->call_for_symbol_and_aliases (check_for_caller_issues, &issues, true);
2933 if (issues.unknown_callsite)
2935 if (dump_file && (dump_flags & TDF_DETAILS))
2936 fprintf (dump_file, "A call of %s has not been analyzed. Disabling "
2937 "all modifications.\n", node->dump_name ());
2938 return true;
2940 /* TODO: We should be able to process at least some types of thunks. */
2941 if (issues.thunk)
2943 if (dump_file && (dump_flags & TDF_DETAILS))
2944 fprintf (dump_file, "A call of %s is through thunk, which are not"
2945 " handled yet. Disabling all modifications.\n",
2946 node->dump_name ());
2947 return true;
2949 if (issues.call_from_outside_comdat)
2951 if (dump_file)
2952 fprintf (dump_file, "Function would become private comdat called "
2953 "outside of its comdat group.\n");
2954 return true;
2957 if (issues.bit_aligned_aggregate_argument)
2959 /* Let's only remove parameters/return values from such functions.
2960 TODO: We could only prevent splitting the problematic parameters if
2961 anybody thinks it is worth it. */
2962 if (dump_file && (dump_flags & TDF_DETAILS))
2963 fprintf (dump_file, "A call of %s has bit-aligned aggregate argument,"
2964 " disabling parameter splitting.\n", node->dump_name ());
2966 isra_func_summary *ifs = func_sums->get (node);
2967 gcc_checking_assert (ifs);
2968 unsigned param_count = vec_safe_length (ifs->m_parameters);
2969 for (unsigned i = 0; i < param_count; i++)
2970 (*ifs->m_parameters)[i].split_candidate = false;
2972 return false;
2975 /* Find the access with corresponding OFFSET and SIZE among accesses in
2976 PARAM_DESC and return it or NULL if such an access is not there. */
2978 static param_access *
2979 find_param_access (isra_param_desc *param_desc, unsigned offset, unsigned size)
2981 unsigned pclen = vec_safe_length (param_desc->accesses);
2983 /* The search is linear but the number of stored accesses is bound by
2984 PARAM_IPA_SRA_MAX_REPLACEMENTS, so most probably 8. */
2986 for (unsigned i = 0; i < pclen; i++)
2987 if ((*param_desc->accesses)[i]->unit_offset == offset
2988 && (*param_desc->accesses)[i]->unit_size == size)
2989 return (*param_desc->accesses)[i];
2991 return NULL;
2994 /* Return iff the total size of definite replacement SIZE would violate the
2995 limit set for it in PARAM. */
2997 static bool
2998 size_would_violate_limit_p (isra_param_desc *desc, unsigned size)
3000 unsigned limit = desc->param_size_limit;
3001 if (size > limit
3002 || (!desc->by_ref && size == limit))
3003 return true;
3004 return false;
3007 /* Increase reached size of DESC by SIZE or disqualify it if it would violate
3008 the set limit. IDX is the parameter number which is dumped when
3009 disqualifying. */
3011 static void
3012 bump_reached_size (isra_param_desc *desc, unsigned size, unsigned idx)
3014 unsigned after = desc->size_reached + size;
3015 if (size_would_violate_limit_p (desc, after))
3017 if (dump_file && (dump_flags & TDF_DETAILS))
3018 fprintf (dump_file, " ...size limit reached, disqualifying "
3019 "candidate parameter %u\n", idx);
3020 desc->split_candidate = false;
3021 return;
3023 desc->size_reached = after;
3026 /* Take all actions required to deal with an edge CS that represents a call to
3027 an unknown or un-analyzed function, for both parameter removal and
3028 splitting. */
3030 static void
3031 process_edge_to_unknown_caller (cgraph_edge *cs)
3033 isra_func_summary *from_ifs = func_sums->get (cs->caller);
3034 gcc_checking_assert (from_ifs);
3035 isra_call_summary *csum = call_sums->get (cs);
3037 if (dump_file && (dump_flags & TDF_DETAILS))
3038 fprintf (dump_file, "Processing an edge to an unknown caller from %s:\n",
3039 cs->caller->dump_name ());
3041 unsigned args_count = csum->m_arg_flow.length ();
3042 for (unsigned i = 0; i < args_count; i++)
3044 isra_param_flow *ipf = &csum->m_arg_flow[i];
3046 if (ipf->pointer_pass_through)
3048 isra_param_desc *param_desc
3049 = &(*from_ifs->m_parameters)[get_single_param_flow_source (ipf)];
3050 param_desc->locally_unused = false;
3051 param_desc->split_candidate = false;
3052 continue;
3054 if (ipf->aggregate_pass_through)
3056 unsigned idx = get_single_param_flow_source (ipf);
3057 isra_param_desc *param_desc = &(*from_ifs->m_parameters)[idx];
3059 param_desc->locally_unused = false;
3060 if (!param_desc->split_candidate)
3061 continue;
3062 gcc_assert (!param_desc->by_ref);
3063 param_access *pacc = find_param_access (param_desc, ipf->unit_offset,
3064 ipf->unit_size);
3065 gcc_checking_assert (pacc);
3066 pacc->certain = true;
3067 if (overlapping_certain_accesses_p (param_desc, NULL))
3069 if (dump_file && (dump_flags & TDF_DETAILS))
3070 fprintf (dump_file, " ...leading to overlap, "
3071 " disqualifying candidate parameter %u\n",
3072 idx);
3073 param_desc->split_candidate = false;
3075 else
3076 bump_reached_size (param_desc, pacc->unit_size, idx);
3077 ipf->aggregate_pass_through = false;
3078 continue;
3081 for (int j = 0; j < ipf->length; j++)
3083 int input_idx = ipf->inputs[j];
3084 (*from_ifs->m_parameters)[input_idx].locally_unused = false;
3089 /* Propagate parameter removal information through cross-SCC edge CS,
3090 i.e. decrease the use count in the caller parameter descriptor for each use
3091 in this call. */
3093 static void
3094 param_removal_cross_scc_edge (cgraph_edge *cs)
3096 enum availability availability;
3097 cgraph_node *callee = cs->callee->function_symbol (&availability);
3098 isra_func_summary *to_ifs = func_sums->get (callee);
3099 if (!to_ifs || !to_ifs->m_candidate
3100 || (availability < AVAIL_AVAILABLE)
3101 || vec_safe_is_empty (to_ifs->m_parameters))
3103 process_edge_to_unknown_caller (cs);
3104 return;
3106 isra_func_summary *from_ifs = func_sums->get (cs->caller);
3107 gcc_checking_assert (from_ifs);
3109 isra_call_summary *csum = call_sums->get (cs);
3110 unsigned args_count = csum->m_arg_flow.length ();
3111 unsigned param_count = vec_safe_length (to_ifs->m_parameters);
3113 for (unsigned i = 0; i < args_count; i++)
3115 bool unused_in_callee;
3116 if (i < param_count)
3117 unused_in_callee = (*to_ifs->m_parameters)[i].locally_unused;
3118 else
3119 unused_in_callee = false;
3121 if (!unused_in_callee)
3123 isra_param_flow *ipf = &csum->m_arg_flow[i];
3124 for (int j = 0; j < ipf->length; j++)
3126 int input_idx = ipf->inputs[j];
3127 (*from_ifs->m_parameters)[input_idx].locally_unused = false;
3133 /* Unless it is already there, push NODE which is also described by IFS to
3134 STACK. */
3136 static void
3137 isra_push_node_to_stack (cgraph_node *node, isra_func_summary *ifs,
3138 vec<cgraph_node *> *stack)
3140 if (!ifs->m_queued)
3142 ifs->m_queued = true;
3143 stack->safe_push (node);
3147 /* If parameter with index INPUT_IDX is marked as locally unused, mark it as
3148 used and push CALLER on STACK. */
3150 static void
3151 isra_mark_caller_param_used (isra_func_summary *from_ifs, int input_idx,
3152 cgraph_node *caller, vec<cgraph_node *> *stack)
3154 if ((*from_ifs->m_parameters)[input_idx].locally_unused)
3156 (*from_ifs->m_parameters)[input_idx].locally_unused = false;
3157 isra_push_node_to_stack (caller, from_ifs, stack);
3162 /* Propagate information that any parameter is not used only locally within a
3163 SCC across CS to the caller, which must be in the same SCC as the
3164 callee. Push any callers that need to be re-processed to STACK. */
3166 static void
3167 propagate_used_across_scc_edge (cgraph_edge *cs, vec<cgraph_node *> *stack)
3169 isra_func_summary *from_ifs = func_sums->get (cs->caller);
3170 if (!from_ifs || vec_safe_is_empty (from_ifs->m_parameters))
3171 return;
3173 isra_call_summary *csum = call_sums->get (cs);
3174 gcc_checking_assert (csum);
3175 unsigned args_count = csum->m_arg_flow.length ();
3176 enum availability availability;
3177 cgraph_node *callee = cs->callee->function_symbol (&availability);
3178 isra_func_summary *to_ifs = func_sums->get (callee);
3180 unsigned param_count
3181 = (to_ifs && (availability >= AVAIL_AVAILABLE))
3182 ? vec_safe_length (to_ifs->m_parameters) : 0;
3183 for (unsigned i = 0; i < args_count; i++)
3185 if (i < param_count
3186 && (*to_ifs->m_parameters)[i].locally_unused)
3187 continue;
3189 /* The argument is needed in the callee it, we must mark the parameter as
3190 used also in the caller and its callers within this SCC. */
3191 isra_param_flow *ipf = &csum->m_arg_flow[i];
3192 for (int j = 0; j < ipf->length; j++)
3194 int input_idx = ipf->inputs[j];
3195 isra_mark_caller_param_used (from_ifs, input_idx, cs->caller, stack);
3200 /* Propagate information that any parameter is not used only locally within a
3201 SCC (i.e. is used also elsewhere) to all callers of NODE that are in the
3202 same SCC. Push any callers that need to be re-processed to STACK. */
3204 static bool
3205 propagate_used_to_scc_callers (cgraph_node *node, void *data)
3207 vec<cgraph_node *> *stack = (vec<cgraph_node *> *) data;
3208 cgraph_edge *cs;
3209 for (cs = node->callers; cs; cs = cs->next_caller)
3210 if (ipa_edge_within_scc (cs))
3211 propagate_used_across_scc_edge (cs, stack);
3212 return false;
3215 /* Return true iff all certain accesses in ARG_DESC are also present as
3216 certain accesses in PARAM_DESC. */
3218 static bool
3219 all_callee_accesses_present_p (isra_param_desc *param_desc,
3220 isra_param_desc *arg_desc)
3222 unsigned aclen = vec_safe_length (arg_desc->accesses);
3223 for (unsigned j = 0; j < aclen; j++)
3225 param_access *argacc = (*arg_desc->accesses)[j];
3226 if (!argacc->certain)
3227 continue;
3228 param_access *pacc = find_param_access (param_desc, argacc->unit_offset,
3229 argacc->unit_size);
3230 if (!pacc
3231 || !pacc->certain
3232 || !types_compatible_p (argacc->type, pacc->type))
3233 return false;
3235 return true;
3238 /* Type internal to function pull_accesses_from_callee. Unfortunately gcc 4.8
3239 does not allow instantiating an auto_vec with a type defined within a
3240 function so it is a global type. */
3241 enum acc_prop_kind {ACC_PROP_DONT, ACC_PROP_COPY, ACC_PROP_CERTAIN};
3244 /* Attempt to propagate all definite accesses from ARG_DESC to PARAM_DESC,
3245 (which belongs to CALLER) if they would not violate some constraint there.
3246 If successful, return NULL, otherwise return the string reason for failure
3247 (which can be written to the dump file). DELTA_OFFSET is the known offset
3248 of the actual argument withing the formal parameter (so of ARG_DESCS within
3249 PARAM_DESCS), ARG_SIZE is the size of the actual argument or zero, if not
3250 known. In case of success, set *CHANGE_P to true if propagation actually
3251 changed anything. */
3253 static const char *
3254 pull_accesses_from_callee (cgraph_node *caller, isra_param_desc *param_desc,
3255 isra_param_desc *arg_desc,
3256 unsigned delta_offset, unsigned arg_size,
3257 bool *change_p)
3259 unsigned pclen = vec_safe_length (param_desc->accesses);
3260 unsigned aclen = vec_safe_length (arg_desc->accesses);
3261 unsigned prop_count = 0;
3262 unsigned prop_size = 0;
3263 bool change = false;
3265 auto_vec <enum acc_prop_kind, 8> prop_kinds (aclen);
3266 for (unsigned j = 0; j < aclen; j++)
3268 param_access *argacc = (*arg_desc->accesses)[j];
3269 prop_kinds.safe_push (ACC_PROP_DONT);
3271 if (arg_size > 0
3272 && argacc->unit_offset + argacc->unit_size > arg_size)
3273 return "callee access outsize size boundary";
3275 if (!argacc->certain)
3276 continue;
3278 unsigned offset = argacc->unit_offset + delta_offset;
3279 /* Given that accesses are initially stored according to increasing
3280 offset and decreasing size in case of equal offsets, the following
3281 searches could be written more efficiently if we kept the ordering
3282 when copying. But the number of accesses is capped at
3283 PARAM_IPA_SRA_MAX_REPLACEMENTS (so most likely 8) and the code gets
3284 messy quickly, so let's improve on that only if necessary. */
3286 bool exact_match = false;
3287 for (unsigned i = 0; i < pclen; i++)
3289 /* Check for overlaps. */
3290 param_access *pacc = (*param_desc->accesses)[i];
3291 if (pacc->unit_offset == offset
3292 && pacc->unit_size == argacc->unit_size)
3294 if (argacc->alias_ptr_type != pacc->alias_ptr_type
3295 || !types_compatible_p (argacc->type, pacc->type))
3296 return "propagated access types would not match existing ones";
3298 exact_match = true;
3299 if (!pacc->certain)
3301 prop_kinds[j] = ACC_PROP_CERTAIN;
3302 prop_size += argacc->unit_size;
3303 change = true;
3305 continue;
3308 if (offset < pacc->unit_offset + pacc->unit_size
3309 && offset + argacc->unit_size > pacc->unit_offset)
3311 /* None permissible with load accesses, possible to fit into
3312 argument ones. */
3313 if (pacc->certain
3314 || offset < pacc->unit_offset
3315 || (offset + argacc->unit_size
3316 > pacc->unit_offset + pacc->unit_size))
3317 return "a propagated access would conflict in caller";
3321 if (!exact_match)
3323 prop_kinds[j] = ACC_PROP_COPY;
3324 prop_count++;
3325 prop_size += argacc->unit_size;
3326 change = true;
3330 if (!change)
3331 return NULL;
3333 if ((prop_count + pclen
3334 > (unsigned) opt_for_fn (caller->decl, param_ipa_sra_max_replacements))
3335 || size_would_violate_limit_p (param_desc,
3336 param_desc->size_reached + prop_size))
3337 return "propagating accesses would violate the count or size limit";
3339 *change_p = true;
3340 for (unsigned j = 0; j < aclen; j++)
3342 if (prop_kinds[j] == ACC_PROP_COPY)
3344 param_access *argacc = (*arg_desc->accesses)[j];
3346 param_access *copy = ggc_cleared_alloc<param_access> ();
3347 copy->unit_offset = argacc->unit_offset + delta_offset;
3348 copy->unit_size = argacc->unit_size;
3349 copy->type = argacc->type;
3350 copy->alias_ptr_type = argacc->alias_ptr_type;
3351 copy->certain = true;
3352 vec_safe_push (param_desc->accesses, copy);
3354 else if (prop_kinds[j] == ACC_PROP_CERTAIN)
3356 param_access *argacc = (*arg_desc->accesses)[j];
3357 param_access *csp
3358 = find_param_access (param_desc, argacc->unit_offset + delta_offset,
3359 argacc->unit_size);
3360 csp->certain = true;
3364 param_desc->size_reached += prop_size;
3366 return NULL;
3369 /* Propagate parameter splitting information through call graph edge CS.
3370 Return true if any changes that might need to be propagated within SCCs have
3371 been made. The function also clears the aggregate_pass_through and
3372 pointer_pass_through in call summaries which do not need to be processed
3373 again if this CS is revisited when iterating while changes are propagated
3374 within an SCC. */
3376 static bool
3377 param_splitting_across_edge (cgraph_edge *cs)
3379 bool res = false;
3380 bool cross_scc = !ipa_edge_within_scc (cs);
3381 enum availability availability;
3382 cgraph_node *callee = cs->callee->function_symbol (&availability);
3383 isra_func_summary *from_ifs = func_sums->get (cs->caller);
3384 gcc_checking_assert (from_ifs && from_ifs->m_parameters);
3386 isra_call_summary *csum = call_sums->get (cs);
3387 gcc_checking_assert (csum);
3388 unsigned args_count = csum->m_arg_flow.length ();
3389 isra_func_summary *to_ifs = func_sums->get (callee);
3390 unsigned param_count
3391 = ((to_ifs && to_ifs->m_candidate && (availability >= AVAIL_AVAILABLE))
3392 ? vec_safe_length (to_ifs->m_parameters)
3393 : 0);
3395 if (dump_file && (dump_flags & TDF_DETAILS))
3396 fprintf (dump_file, "Splitting across %s->%s:\n",
3397 cs->caller->dump_name (), callee->dump_name ());
3399 unsigned i;
3400 for (i = 0; (i < args_count) && (i < param_count); i++)
3402 isra_param_desc *arg_desc = &(*to_ifs->m_parameters)[i];
3403 isra_param_flow *ipf = &csum->m_arg_flow[i];
3405 if (arg_desc->locally_unused)
3407 if (dump_file && (dump_flags & TDF_DETAILS))
3408 fprintf (dump_file, " ->%u: unused in callee\n", i);
3409 ipf->pointer_pass_through = false;
3410 continue;
3413 if (ipf->pointer_pass_through)
3415 int idx = get_single_param_flow_source (ipf);
3416 isra_param_desc *param_desc = &(*from_ifs->m_parameters)[idx];
3417 if (!param_desc->split_candidate)
3418 continue;
3419 gcc_assert (param_desc->by_ref);
3421 if (!arg_desc->split_candidate || !arg_desc->by_ref)
3423 if (dump_file && (dump_flags & TDF_DETAILS))
3424 fprintf (dump_file, " %u->%u: not candidate or not by "
3425 "reference in callee\n", idx, i);
3426 param_desc->split_candidate = false;
3427 ipf->pointer_pass_through = false;
3428 res = true;
3430 else if (!ipf->safe_to_import_accesses)
3432 if (!csum->m_before_any_store
3433 || !all_callee_accesses_present_p (param_desc, arg_desc))
3435 if (dump_file && (dump_flags & TDF_DETAILS))
3436 fprintf (dump_file, " %u->%u: cannot import accesses.\n",
3437 idx, i);
3438 param_desc->split_candidate = false;
3439 ipf->pointer_pass_through = false;
3440 res = true;
3443 else
3445 if (dump_file && (dump_flags & TDF_DETAILS))
3446 fprintf (dump_file, " %u->%u: verified callee accesses "
3447 "present.\n", idx, i);
3448 if (cross_scc)
3449 ipf->pointer_pass_through = false;
3452 else
3454 const char *pull_failure
3455 = pull_accesses_from_callee (cs->caller, param_desc, arg_desc,
3456 0, 0, &res);
3457 if (pull_failure)
3459 if (dump_file && (dump_flags & TDF_DETAILS))
3460 fprintf (dump_file, " %u->%u: by_ref access pull "
3461 "failed: %s.\n", idx, i, pull_failure);
3462 param_desc->split_candidate = false;
3463 ipf->pointer_pass_through = false;
3464 res = true;
3466 else
3468 if (dump_file && (dump_flags & TDF_DETAILS))
3469 fprintf (dump_file, " %u->%u: by_ref access pull "
3470 "succeeded.\n", idx, i);
3471 if (cross_scc)
3472 ipf->pointer_pass_through = false;
3476 else if (ipf->aggregate_pass_through)
3478 int idx = get_single_param_flow_source (ipf);
3479 isra_param_desc *param_desc = &(*from_ifs->m_parameters)[idx];
3480 if (!param_desc->split_candidate)
3481 continue;
3482 gcc_assert (!param_desc->by_ref);
3483 param_access *pacc = find_param_access (param_desc, ipf->unit_offset,
3484 ipf->unit_size);
3485 gcc_checking_assert (pacc);
3487 if (pacc->certain)
3489 if (dump_file && (dump_flags & TDF_DETAILS))
3490 fprintf (dump_file, " %u->%u: already certain\n", idx, i);
3491 ipf->aggregate_pass_through = false;
3493 else if (!arg_desc->split_candidate || arg_desc->by_ref)
3495 if (dump_file && (dump_flags & TDF_DETAILS))
3496 fprintf (dump_file, " %u->%u: not candidate or by "
3497 "reference in callee\n", idx, i);
3499 pacc->certain = true;
3500 if (overlapping_certain_accesses_p (param_desc, NULL))
3502 if (dump_file && (dump_flags & TDF_DETAILS))
3503 fprintf (dump_file, " ...leading to overlap, "
3504 " disqualifying candidate parameter %u\n",
3505 idx);
3506 param_desc->split_candidate = false;
3508 else
3509 bump_reached_size (param_desc, pacc->unit_size, idx);
3511 ipf->aggregate_pass_through = false;
3512 res = true;
3514 else
3516 const char *pull_failure
3517 = pull_accesses_from_callee (cs->caller, param_desc, arg_desc,
3518 ipf->unit_offset,
3519 ipf->unit_size, &res);
3520 if (pull_failure)
3522 if (dump_file && (dump_flags & TDF_DETAILS))
3523 fprintf (dump_file, " %u->%u: arg access pull "
3524 "failed: %s.\n", idx, i, pull_failure);
3526 ipf->aggregate_pass_through = false;
3527 pacc->certain = true;
3529 if (overlapping_certain_accesses_p (param_desc, NULL))
3531 if (dump_file && (dump_flags & TDF_DETAILS))
3532 fprintf (dump_file, " ...leading to overlap, "
3533 " disqualifying candidate parameter %u\n",
3534 idx);
3535 param_desc->split_candidate = false;
3537 else
3538 bump_reached_size (param_desc, pacc->unit_size, idx);
3540 res = true;
3542 else
3544 if (dump_file && (dump_flags & TDF_DETAILS))
3545 fprintf (dump_file, " %u->%u: arg access pull "
3546 "succeeded.\n", idx, i);
3547 if (cross_scc)
3548 ipf->aggregate_pass_through = false;
3554 /* Handle argument-parameter count mismatches. */
3555 for (; (i < args_count); i++)
3557 isra_param_flow *ipf = &csum->m_arg_flow[i];
3559 if (ipf->pointer_pass_through || ipf->aggregate_pass_through)
3561 int idx = get_single_param_flow_source (ipf);
3562 ipf->pointer_pass_through = false;
3563 ipf->aggregate_pass_through = false;
3564 isra_param_desc *param_desc = &(*from_ifs->m_parameters)[idx];
3565 if (!param_desc->split_candidate)
3566 continue;
3568 if (dump_file && (dump_flags & TDF_DETAILS))
3569 fprintf (dump_file, " %u->%u: no corresponding formal parameter\n",
3570 idx, i);
3571 param_desc->split_candidate = false;
3572 res = true;
3575 return res;
3578 /* Worker for call_for_symbol_and_aliases, look at all callers and if all their
3579 callers ignore the return value, or come from the same SCC and use the
3580 return value only to compute their return value, return false, otherwise
3581 return true. */
3583 static bool
3584 retval_used_p (cgraph_node *node, void *)
3586 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
3588 isra_call_summary *csum = call_sums->get (cs);
3589 gcc_checking_assert (csum);
3590 if (csum->m_return_ignored)
3591 continue;
3592 if (!csum->m_return_returned)
3593 return true;
3595 isra_func_summary *from_ifs = func_sums->get (cs->caller);
3596 if (!from_ifs || !from_ifs->m_candidate)
3597 return true;
3599 if (!ipa_edge_within_scc (cs)
3600 && !from_ifs->m_return_ignored)
3601 return true;
3604 return false;
3607 /* Push into NEW_PARAMS all required parameter adjustment entries to copy or
3608 modify parameter which originally had index BASE_INDEX, in the adjustment
3609 vector of parent clone (if any) had PREV_CLONE_INDEX and was described by
3610 PREV_ADJUSTMENT. If the parent clone is the original function,
3611 PREV_ADJUSTMENT is NULL and PREV_CLONE_INDEX is equal to BASE_INDEX. */
3614 static void
3615 push_param_adjustments_for_index (isra_func_summary *ifs, unsigned base_index,
3616 unsigned prev_clone_index,
3617 ipa_adjusted_param *prev_adjustment,
3618 vec<ipa_adjusted_param, va_gc> **new_params)
3620 isra_param_desc *desc = &(*ifs->m_parameters)[base_index];
3621 if (desc->locally_unused)
3623 if (dump_file)
3624 fprintf (dump_file, " Will remove parameter %u\n", base_index);
3625 return;
3628 if (!desc->split_candidate)
3630 ipa_adjusted_param adj;
3631 if (prev_adjustment)
3633 adj = *prev_adjustment;
3634 adj.prev_clone_adjustment = true;
3635 adj.prev_clone_index = prev_clone_index;
3637 else
3639 memset (&adj, 0, sizeof (adj));
3640 adj.op = IPA_PARAM_OP_COPY;
3641 adj.base_index = base_index;
3642 adj.prev_clone_index = prev_clone_index;
3644 vec_safe_push ((*new_params), adj);
3645 return;
3648 if (dump_file)
3649 fprintf (dump_file, " Will split parameter %u\n", base_index);
3651 gcc_assert (!prev_adjustment || prev_adjustment->op == IPA_PARAM_OP_COPY);
3652 unsigned aclen = vec_safe_length (desc->accesses);
3653 for (unsigned j = 0; j < aclen; j++)
3655 param_access *pa = (*desc->accesses)[j];
3656 if (!pa->certain)
3657 continue;
3658 if (dump_file)
3659 fprintf (dump_file, " - component at byte offset %u, "
3660 "size %u\n", pa->unit_offset, pa->unit_size);
3662 ipa_adjusted_param adj;
3663 memset (&adj, 0, sizeof (adj));
3664 adj.op = IPA_PARAM_OP_SPLIT;
3665 adj.base_index = base_index;
3666 adj.prev_clone_index = prev_clone_index;
3667 adj.param_prefix_index = IPA_PARAM_PREFIX_ISRA;
3668 adj.reverse = pa->reverse;
3669 adj.type = pa->type;
3670 adj.alias_ptr_type = pa->alias_ptr_type;
3671 adj.unit_offset = pa->unit_offset;
3672 vec_safe_push ((*new_params), adj);
3676 /* Worker for all call_for_symbol_thunks_and_aliases. Set calls_comdat_local
3677 flag of all callers of NODE. */
3679 static bool
3680 mark_callers_calls_comdat_local (struct cgraph_node *node, void *)
3682 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
3683 cs->caller->calls_comdat_local = true;
3684 return false;
3688 /* Do final processing of results of IPA propagation regarding NODE, clone it
3689 if appropriate. */
3691 static void
3692 process_isra_node_results (cgraph_node *node,
3693 hash_map<const char *, unsigned> *clone_num_suffixes)
3695 isra_func_summary *ifs = func_sums->get (node);
3696 if (!ifs || !ifs->m_candidate)
3697 return;
3699 auto_vec<bool, 16> surviving_params;
3700 bool check_surviving = false;
3701 clone_info *cinfo = clone_info::get (node);
3702 if (cinfo && cinfo->param_adjustments)
3704 check_surviving = true;
3705 cinfo->param_adjustments->get_surviving_params (&surviving_params);
3708 unsigned param_count = vec_safe_length (ifs->m_parameters);
3709 bool will_change_function = false;
3710 if (ifs->m_returns_value && ifs->m_return_ignored)
3711 will_change_function = true;
3712 else
3713 for (unsigned i = 0; i < param_count; i++)
3715 isra_param_desc *desc = &(*ifs->m_parameters)[i];
3716 if ((desc->locally_unused || desc->split_candidate)
3717 /* Make sure we do not clone just to attempt to remove an already
3718 removed unused argument. */
3719 && (!check_surviving
3720 || (i < surviving_params.length ()
3721 && surviving_params[i])))
3723 will_change_function = true;
3724 break;
3727 if (!will_change_function)
3728 return;
3730 if (dump_file)
3732 fprintf (dump_file, "\nEvaluating analysis results for %s\n",
3733 node->dump_name ());
3734 if (ifs->m_returns_value && ifs->m_return_ignored)
3735 fprintf (dump_file, " Will remove return value.\n");
3738 vec<ipa_adjusted_param, va_gc> *new_params = NULL;
3739 if (ipa_param_adjustments *old_adjustments
3740 = cinfo ? cinfo->param_adjustments : NULL)
3742 unsigned old_adj_len = vec_safe_length (old_adjustments->m_adj_params);
3743 for (unsigned i = 0; i < old_adj_len; i++)
3745 ipa_adjusted_param *old_adj = &(*old_adjustments->m_adj_params)[i];
3746 push_param_adjustments_for_index (ifs, old_adj->base_index, i,
3747 old_adj, &new_params);
3750 else
3751 for (unsigned i = 0; i < param_count; i++)
3752 push_param_adjustments_for_index (ifs, i, i, NULL, &new_params);
3754 ipa_param_adjustments *new_adjustments
3755 = (new (ggc_alloc <ipa_param_adjustments> ())
3756 ipa_param_adjustments (new_params, param_count,
3757 ifs->m_returns_value && ifs->m_return_ignored));
3759 if (dump_file && (dump_flags & TDF_DETAILS))
3761 fprintf (dump_file, "\n Created adjustments:\n");
3762 new_adjustments->dump (dump_file);
3765 unsigned &suffix_counter = clone_num_suffixes->get_or_insert (
3766 IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (
3767 node->decl)));
3768 auto_vec<cgraph_edge *> callers = node->collect_callers ();
3769 cgraph_node *new_node
3770 = node->create_virtual_clone (callers, NULL, new_adjustments, "isra",
3771 suffix_counter);
3772 suffix_counter++;
3773 if (node->calls_comdat_local && node->same_comdat_group)
3775 new_node->add_to_same_comdat_group (node);
3776 new_node->call_for_symbol_and_aliases (mark_callers_calls_comdat_local,
3777 NULL, true);
3779 new_node->calls_comdat_local = node->calls_comdat_local;
3781 if (dump_file)
3782 fprintf (dump_file, " Created new node %s\n", new_node->dump_name ());
3783 callers.release ();
3786 /* Check which parameters of NODE described by IFS have survived until IPA-SRA
3787 and disable transformations for those which have not or which should not
3788 transformed because the associated debug counter reached its limit. Return
3789 true if none survived or if there were no candidates to begin with. */
3791 static bool
3792 disable_unavailable_parameters (cgraph_node *node, isra_func_summary *ifs)
3794 bool ret = true;
3795 unsigned len = vec_safe_length (ifs->m_parameters);
3796 if (!len)
3797 return true;
3799 auto_vec<bool, 16> surviving_params;
3800 bool check_surviving = false;
3801 clone_info *cinfo = clone_info::get (node);
3802 if (cinfo && cinfo->param_adjustments)
3804 check_surviving = true;
3805 cinfo->param_adjustments->get_surviving_params (&surviving_params);
3807 bool dumped_first = false;
3808 for (unsigned i = 0; i < len; i++)
3810 isra_param_desc *desc = &(*ifs->m_parameters)[i];
3811 if (!dbg_cnt (ipa_sra_params))
3813 desc->locally_unused = false;
3814 desc->split_candidate = false;
3816 else if (check_surviving
3817 && (i >= surviving_params.length ()
3818 || !surviving_params[i]))
3820 /* Even if the parameter was removed by a previous IPA pass, we do
3821 not clear locally_unused because if it really is unused, this
3822 information might be useful in callers. */
3823 desc->split_candidate = false;
3825 if (dump_file && (dump_flags & TDF_DETAILS))
3827 if (!dumped_first)
3829 fprintf (dump_file,
3830 "The following parameters of %s are dead on "
3831 "arrival:", node->dump_name ());
3832 dumped_first = true;
3834 fprintf (dump_file, " %u", i);
3837 else if (desc->locally_unused || desc->split_candidate)
3838 ret = false;
3841 if (dumped_first)
3842 fprintf (dump_file, "\n");
3844 return ret;
3848 /* Run the interprocedural part of IPA-SRA. */
3850 static unsigned int
3851 ipa_sra_analysis (void)
3853 if (dump_file)
3855 fprintf (dump_file, "\n========== IPA-SRA IPA stage ==========\n");
3856 ipa_sra_dump_all_summaries (dump_file);
3859 gcc_checking_assert (func_sums);
3860 gcc_checking_assert (call_sums);
3861 cgraph_node **order = XCNEWVEC (cgraph_node *, symtab->cgraph_count);
3862 auto_vec <cgraph_node *, 16> stack;
3863 int node_scc_count = ipa_reduced_postorder (order, true, NULL);
3865 /* One sweep from callees to callers for parameter removal and splitting. */
3866 for (int i = 0; i < node_scc_count; i++)
3868 cgraph_node *scc_rep = order[i];
3869 vec<cgraph_node *> cycle_nodes = ipa_get_nodes_in_cycle (scc_rep);
3870 unsigned j;
3872 /* Preliminary IPA function level checks and first step of parameter
3873 removal. */
3874 cgraph_node *v;
3875 FOR_EACH_VEC_ELT (cycle_nodes, j, v)
3877 isra_func_summary *ifs = func_sums->get (v);
3878 if (!ifs || !ifs->m_candidate)
3879 continue;
3880 if (!ipa_sra_ipa_function_checks (v)
3881 || check_all_callers_for_issues (v))
3883 ifs->zap ();
3884 continue;
3886 if (disable_unavailable_parameters (v, ifs))
3887 continue;
3888 for (cgraph_edge *cs = v->indirect_calls; cs; cs = cs->next_callee)
3889 process_edge_to_unknown_caller (cs);
3890 for (cgraph_edge *cs = v->callees; cs; cs = cs->next_callee)
3891 if (!ipa_edge_within_scc (cs))
3892 param_removal_cross_scc_edge (cs);
3895 /* Look at edges within the current SCC and propagate used-ness across
3896 them, pushing onto the stack all notes which might need to be
3897 revisited. */
3898 FOR_EACH_VEC_ELT (cycle_nodes, j, v)
3899 v->call_for_symbol_thunks_and_aliases (propagate_used_to_scc_callers,
3900 &stack, true);
3902 /* Keep revisiting and pushing until nothing changes. */
3903 while (!stack.is_empty ())
3905 cgraph_node *v = stack.pop ();
3906 isra_func_summary *ifs = func_sums->get (v);
3907 gcc_checking_assert (ifs && ifs->m_queued);
3908 ifs->m_queued = false;
3910 v->call_for_symbol_thunks_and_aliases (propagate_used_to_scc_callers,
3911 &stack, true);
3914 /* Parameter splitting. */
3915 bool repeat_scc_access_propagation;
3918 repeat_scc_access_propagation = false;
3919 FOR_EACH_VEC_ELT (cycle_nodes, j, v)
3921 isra_func_summary *ifs = func_sums->get (v);
3922 if (!ifs
3923 || !ifs->m_candidate
3924 || vec_safe_is_empty (ifs->m_parameters))
3925 continue;
3926 for (cgraph_edge *cs = v->callees; cs; cs = cs->next_callee)
3927 if (param_splitting_across_edge (cs))
3928 repeat_scc_access_propagation = true;
3931 while (repeat_scc_access_propagation);
3933 if (flag_checking)
3934 FOR_EACH_VEC_ELT (cycle_nodes, j, v)
3935 verify_splitting_accesses (v, true);
3937 cycle_nodes.release ();
3940 /* One sweep from caller to callees for result removal. */
3941 for (int i = node_scc_count - 1; i >= 0 ; i--)
3943 cgraph_node *scc_rep = order[i];
3944 vec<cgraph_node *> cycle_nodes = ipa_get_nodes_in_cycle (scc_rep);
3945 unsigned j;
3947 cgraph_node *v;
3948 FOR_EACH_VEC_ELT (cycle_nodes, j, v)
3950 isra_func_summary *ifs = func_sums->get (v);
3951 if (!ifs || !ifs->m_candidate)
3952 continue;
3954 bool return_needed
3955 = (ifs->m_returns_value
3956 && (!dbg_cnt (ipa_sra_retvalues)
3957 || v->call_for_symbol_and_aliases (retval_used_p,
3958 NULL, true)));
3959 ifs->m_return_ignored = !return_needed;
3960 if (return_needed)
3961 isra_push_node_to_stack (v, ifs, &stack);
3964 while (!stack.is_empty ())
3966 cgraph_node *node = stack.pop ();
3967 isra_func_summary *ifs = func_sums->get (node);
3968 gcc_checking_assert (ifs && ifs->m_queued);
3969 ifs->m_queued = false;
3971 for (cgraph_edge *cs = node->callees; cs; cs = cs->next_callee)
3972 if (ipa_edge_within_scc (cs)
3973 && call_sums->get (cs)->m_return_returned)
3975 enum availability av;
3976 cgraph_node *callee = cs->callee->function_symbol (&av);
3977 isra_func_summary *to_ifs = func_sums->get (callee);
3978 if (to_ifs && to_ifs->m_return_ignored)
3980 to_ifs->m_return_ignored = false;
3981 isra_push_node_to_stack (callee, to_ifs, &stack);
3985 cycle_nodes.release ();
3988 ipa_free_postorder_info ();
3989 free (order);
3991 if (dump_file)
3993 if (dump_flags & TDF_DETAILS)
3995 fprintf (dump_file, "\n========== IPA-SRA propagation final state "
3996 " ==========\n");
3997 ipa_sra_dump_all_summaries (dump_file);
3999 fprintf (dump_file, "\n========== IPA-SRA decisions ==========\n");
4002 hash_map<const char *, unsigned> *clone_num_suffixes
4003 = new hash_map<const char *, unsigned>;
4005 cgraph_node *node;
4006 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
4007 process_isra_node_results (node, clone_num_suffixes);
4009 delete clone_num_suffixes;
4010 ggc_delete (func_sums);
4011 func_sums = NULL;
4012 delete call_sums;
4013 call_sums = NULL;
4015 if (dump_file)
4016 fprintf (dump_file, "\n========== IPA SRA IPA analysis done "
4017 "==========\n\n");
4018 return 0;
4022 const pass_data pass_data_ipa_sra =
4024 IPA_PASS, /* type */
4025 "sra", /* name */
4026 OPTGROUP_NONE, /* optinfo_flags */
4027 TV_IPA_SRA, /* tv_id */
4028 0, /* properties_required */
4029 0, /* properties_provided */
4030 0, /* properties_destroyed */
4031 0, /* todo_flags_start */
4032 ( TODO_dump_symtab | TODO_remove_functions ), /* todo_flags_finish */
4035 class pass_ipa_sra : public ipa_opt_pass_d
4037 public:
4038 pass_ipa_sra (gcc::context *ctxt)
4039 : ipa_opt_pass_d (pass_data_ipa_sra, ctxt,
4040 ipa_sra_generate_summary, /* generate_summary */
4041 ipa_sra_write_summary, /* write_summary */
4042 ipa_sra_read_summary, /* read_summary */
4043 NULL , /* write_optimization_summary */
4044 NULL, /* read_optimization_summary */
4045 NULL, /* stmt_fixup */
4046 0, /* function_transform_todo_flags_start */
4047 NULL, /* function_transform */
4048 NULL) /* variable_transform */
4051 /* opt_pass methods: */
4052 virtual bool gate (function *)
4054 /* TODO: We should remove the optimize check after we ensure we never run
4055 IPA passes when not optimizing. */
4056 return (flag_ipa_sra && optimize);
4059 virtual unsigned int execute (function *) { return ipa_sra_analysis (); }
4061 }; // class pass_ipa_sra
4063 } // anon namespace
4065 /* Intraprocedural part of IPA-SRA analysis. Scan function body of NODE and
4066 create a summary structure describing IPA-SRA opportunities and constraints
4067 in it. */
4069 static void
4070 ipa_sra_summarize_function (cgraph_node *node)
4072 if (dump_file)
4073 fprintf (dump_file, "Creating summary for %s/%i:\n", node->name (),
4074 node->order);
4075 if (!ipa_sra_preliminary_function_checks (node))
4077 isra_analyze_all_outgoing_calls (node);
4078 return;
4080 gcc_obstack_init (&gensum_obstack);
4081 isra_func_summary *ifs = func_sums->get_create (node);
4082 ifs->m_candidate = true;
4083 tree ret = TREE_TYPE (TREE_TYPE (node->decl));
4084 ifs->m_returns_value = (TREE_CODE (ret) != VOID_TYPE);
4086 decl2desc = new hash_map<tree, gensum_param_desc *>;
4087 unsigned count = 0;
4088 for (tree parm = DECL_ARGUMENTS (node->decl); parm; parm = DECL_CHAIN (parm))
4089 count++;
4091 if (count > 0)
4093 auto_vec<gensum_param_desc, 16> param_descriptions (count);
4094 param_descriptions.reserve_exact (count);
4095 param_descriptions.quick_grow_cleared (count);
4097 bool cfun_pushed = false;
4098 struct function *fun = DECL_STRUCT_FUNCTION (node->decl);
4099 if (create_parameter_descriptors (node, &param_descriptions))
4101 push_cfun (fun);
4102 cfun_pushed = true;
4103 final_bbs = BITMAP_ALLOC (NULL);
4104 bb_dereferences = XCNEWVEC (HOST_WIDE_INT,
4105 by_ref_count
4106 * last_basic_block_for_fn (fun));
4107 aa_walking_limit = opt_for_fn (node->decl, param_ipa_max_aa_steps);
4108 scan_function (node, fun);
4110 if (dump_file)
4112 dump_gensum_param_descriptors (dump_file, node->decl,
4113 &param_descriptions);
4114 fprintf (dump_file, "----------------------------------------\n");
4117 process_scan_results (node, fun, ifs, &param_descriptions);
4119 if (cfun_pushed)
4120 pop_cfun ();
4121 if (bb_dereferences)
4123 free (bb_dereferences);
4124 bb_dereferences = NULL;
4125 BITMAP_FREE (final_bbs);
4126 final_bbs = NULL;
4129 isra_analyze_all_outgoing_calls (node);
4131 delete decl2desc;
4132 decl2desc = NULL;
4133 obstack_free (&gensum_obstack, NULL);
4134 if (dump_file)
4135 fprintf (dump_file, "\n\n");
4136 if (flag_checking)
4137 verify_splitting_accesses (node, false);
4138 return;
4141 ipa_opt_pass_d *
4142 make_pass_ipa_sra (gcc::context *ctxt)
4144 return new pass_ipa_sra (ctxt);
4148 #include "gt-ipa-sra.h"