testsuite: Correct requirements for vadsdu*, vslv and vsrv testcases.
[official-gcc.git] / gcc / ipa-sra.c
blob07227c0bfecfdf8c4b295053e0a90d08aadf58c8
1 /* Interprocedural scalar replacement of aggregates
2 Copyright (C) 2008-2020 Free Software Foundation, Inc.
4 Contributed by Martin Jambor <mjambor@suse.cz>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 /* IPA-SRA is an interprocedural pass that removes unused function return
23 values (turning functions returning a value which is never used into void
24 functions), removes unused function parameters. It can also replace an
25 aggregate parameter by a set of other parameters representing part of the
26 original, turning those passed by reference into new ones which pass the
27 value directly.
29 The pass is a true IPA one, which means that it works in three stages in
30 order to be able to take advantage of LTO. First, summaries about functions
31 and each calls are generated. Function summaries (often called call graph
32 node summaries) contain mainly information about which parameters are
33 potential transformation candidates and which bits of candidates are
34 accessed. We differentiate between accesses done as a part of a call
35 statement (which might be not necessary if the callee is also transformed)
36 and others (which are mandatory). Call summaries (often called call graph
37 edge summaries) contain information about which function formal parameters
38 feed into which actual call arguments so that if two parameters are only
39 used in a sum which is then passed to another function which then however
40 does not use this parameter, all three parameters of the two functions can
41 be eliminated. Edge summaries also have flags whether the return value is
42 used or if it is only returned in the caller too. In LTO mode these
43 summaries are then streamed to the object file in the compilation phase and
44 streamed back in in the WPA analysis stage.
46 The interprocedural analysis phase traverses the graph in topological order
47 in two sweeps, one in each direction. First, from callees to callers for
48 parameter removal and splitting. Each strongly-connected component is
49 processed iteratively until the situation in it stabilizes. The pass from
50 callers to callees is then carried out to remove unused return values in a
51 very similar fashion.
53 Because parameter manipulation has big implications for call redirection
54 which is done only after all call graph nodes materialize, the
55 transformation phase is not part of this patch but is carried out by the
56 clone materialization and edge redirection itself, see comments in
57 ipa-param-manipulation.h for more details. */
61 #include "config.h"
62 #include "system.h"
63 #include "coretypes.h"
64 #include "backend.h"
65 #include "tree.h"
66 #include "gimple.h"
67 #include "predict.h"
68 #include "tree-pass.h"
69 #include "ssa.h"
70 #include "cgraph.h"
71 #include "gimple-pretty-print.h"
72 #include "alias.h"
73 #include "tree-eh.h"
74 #include "gimple-iterator.h"
75 #include "gimple-walk.h"
76 #include "tree-dfa.h"
77 #include "tree-sra.h"
78 #include "alloc-pool.h"
79 #include "symbol-summary.h"
80 #include "dbgcnt.h"
81 #include "tree-inline.h"
82 #include "ipa-utils.h"
83 #include "builtins.h"
84 #include "cfganal.h"
85 #include "tree-streamer.h"
86 #include "internal-fn.h"
88 static void ipa_sra_summarize_function (cgraph_node *);
90 /* Bits used to track size of an aggregate in bytes interprocedurally. */
91 #define ISRA_ARG_SIZE_LIMIT_BITS 16
92 #define ISRA_ARG_SIZE_LIMIT (1 << ISRA_ARG_SIZE_LIMIT_BITS)
93 /* How many parameters can feed into a call actual argument and still be
94 tracked. */
95 #define IPA_SRA_MAX_PARAM_FLOW_LEN 7
97 /* Structure describing accesses to a specific portion of an aggregate
98 parameter, as given by the offset and size. Any smaller accesses that occur
99 within a function that fall within another access form a tree. The pass
100 cannot analyze parameters with only partially overlapping accesses. */
102 struct GTY(()) param_access
104 /* Type that a potential replacement should have. This field only has
105 meaning in the summary building and transformation phases, when it is
106 reconstructed from the body. Must not be touched in IPA analysis
107 stage. */
108 tree type;
110 /* Alias reference type to be used in MEM_REFs when adjusting caller
111 arguments. */
112 tree alias_ptr_type;
114 /* Values returned by get_ref_base_and_extent but converted to bytes and
115 stored as unsigned ints. */
116 unsigned unit_offset;
117 unsigned unit_size : ISRA_ARG_SIZE_LIMIT_BITS;
119 /* Set once we are sure that the access will really end up in a potentially
120 transformed function - initially not set for portions of formal parameters
121 that are only used as actual function arguments passed to callees. */
122 unsigned certain : 1;
123 /* Set if the access has a reversed scalar storage order. */
124 unsigned reverse : 1;
127 /* This structure has the same purpose as the one above and additionally it
128 contains some fields that are only necessary in the summary generation
129 phase. */
131 struct gensum_param_access
133 /* Values returned by get_ref_base_and_extent. */
134 HOST_WIDE_INT offset;
135 HOST_WIDE_INT size;
137 /* if this access has any children (in terms of the definition above), this
138 points to the first one. */
139 struct gensum_param_access *first_child;
140 /* In intraprocedural SRA, pointer to the next sibling in the access tree as
141 described above. */
142 struct gensum_param_access *next_sibling;
144 /* Type that a potential replacement should have. This field only has
145 meaning in the summary building and transformation phases, when it is
146 reconstructed from the body. Must not be touched in IPA analysis
147 stage. */
148 tree type;
149 /* Alias reference type to be used in MEM_REFs when adjusting caller
150 arguments. */
151 tree alias_ptr_type;
153 /* Have there been writes to or reads from this exact location except for as
154 arguments to a function call that can be tracked. */
155 bool nonarg;
157 /* Set if the access has a reversed scalar storage order. */
158 bool reverse;
161 /* Summary describing a parameter in the IPA stages. */
163 struct GTY(()) isra_param_desc
165 /* List of access representatives to the parameters, sorted according to
166 their offset. */
167 vec <param_access *, va_gc> *accesses;
169 /* Unit size limit of total size of all replacements. */
170 unsigned param_size_limit : ISRA_ARG_SIZE_LIMIT_BITS;
171 /* Sum of unit sizes of all certain replacements. */
172 unsigned size_reached : ISRA_ARG_SIZE_LIMIT_BITS;
174 /* A parameter that is used only in call arguments and can be removed if all
175 concerned actual arguments are removed. */
176 unsigned locally_unused : 1;
177 /* An aggregate that is a candidate for breaking up or complete removal. */
178 unsigned split_candidate : 1;
179 /* Is this a parameter passing stuff by reference? */
180 unsigned by_ref : 1;
183 /* Structure used when generating summaries that describes a parameter. */
185 struct gensum_param_desc
187 /* Roots of param_accesses. */
188 gensum_param_access *accesses;
189 /* Number of accesses in the access tree rooted in field accesses. */
190 unsigned access_count;
192 /* If the below is non-zero, this is the number of uses as actual arguments. */
193 int call_uses;
194 /* Number of times this parameter has been directly passed to. */
195 unsigned ptr_pt_count;
197 /* Size limit of total size of all replacements. */
198 unsigned param_size_limit;
199 /* Sum of sizes of nonarg accesses. */
200 unsigned nonarg_acc_size;
202 /* A parameter that is used only in call arguments and can be removed if all
203 concerned actual arguments are removed. */
204 bool locally_unused;
205 /* An aggregate that is a candidate for breaking up or a pointer passing data
206 by reference that is a candidate for being converted to a set of
207 parameters passing those data by value. */
208 bool split_candidate;
209 /* Is this a parameter passing stuff by reference? */
210 bool by_ref;
212 /* The number of this parameter as they are ordered in function decl. */
213 int param_number;
214 /* For parameters passing data by reference, this is parameter index to
215 compute indices to bb_dereferences. */
216 int deref_index;
219 /* Properly deallocate accesses of DESC. TODO: Since this data structure is
220 not in GC memory, this is not necessary and we can consider removing the
221 function. */
223 static void
224 free_param_decl_accesses (isra_param_desc *desc)
226 unsigned len = vec_safe_length (desc->accesses);
227 for (unsigned i = 0; i < len; ++i)
228 ggc_free ((*desc->accesses)[i]);
229 vec_free (desc->accesses);
232 /* Class used to convey information about functions from the
233 intra-procedural analysis stage to inter-procedural one. */
235 class GTY((for_user)) isra_func_summary
237 public:
238 /* initialize the object. */
240 isra_func_summary ()
241 : m_parameters (NULL), m_candidate (false), m_returns_value (false),
242 m_return_ignored (false), m_queued (false)
245 /* Destroy m_parameters. */
247 ~isra_func_summary ();
249 /* Mark the function as not a candidate for any IPA-SRA transformation.
250 Return true if it was a candidate until now. */
252 bool zap ();
254 /* Vector of parameter descriptors corresponding to the function being
255 analyzed. */
256 vec<isra_param_desc, va_gc> *m_parameters;
258 /* Whether the node is even a candidate for any IPA-SRA transformation at
259 all. */
260 unsigned m_candidate : 1;
262 /* Whether the original function returns any value. */
263 unsigned m_returns_value : 1;
265 /* Set to true if all call statements do not actually use the returned
266 value. */
268 unsigned m_return_ignored : 1;
270 /* Whether the node is already queued in IPA SRA stack during processing of
271 call graphs SCCs. */
273 unsigned m_queued : 1;
276 /* Clean up and deallocate isra_func_summary points to. TODO: Since this data
277 structure is not in GC memory, this is not necessary and we can consider
278 removing the destructor. */
280 isra_func_summary::~isra_func_summary ()
282 unsigned len = vec_safe_length (m_parameters);
283 for (unsigned i = 0; i < len; ++i)
284 free_param_decl_accesses (&(*m_parameters)[i]);
285 vec_free (m_parameters);
289 /* Mark the function as not a candidate for any IPA-SRA transformation. Return
290 true if it was a candidate until now. */
292 bool
293 isra_func_summary::zap ()
295 bool ret = m_candidate;
296 m_candidate = false;
298 unsigned len = vec_safe_length (m_parameters);
299 for (unsigned i = 0; i < len; ++i)
300 free_param_decl_accesses (&(*m_parameters)[i]);
301 vec_free (m_parameters);
303 return ret;
306 /* Structure to describe which formal parameters feed into a particular actual
307 arguments. */
309 struct isra_param_flow
311 /* Number of elements in array inputs that contain valid data. */
312 char length;
313 /* Indices of formal parameters that feed into the described actual argument.
314 If aggregate_pass_through or pointer_pass_through below are true, it must
315 contain exactly one element which is passed through from a formal
316 parameter if the given number. Otherwise, the array contains indices of
317 callee's formal parameters which are used to calculate value of this
318 actual argument. */
319 unsigned char inputs[IPA_SRA_MAX_PARAM_FLOW_LEN];
321 /* Offset within the formal parameter. */
322 unsigned unit_offset;
323 /* Size of the portion of the formal parameter that is being passed. */
324 unsigned unit_size : ISRA_ARG_SIZE_LIMIT_BITS;
326 /* True when the value of this actual copy is a portion of a formal
327 parameter. */
328 unsigned aggregate_pass_through : 1;
329 /* True when the value of this actual copy is a verbatim pass through of an
330 obtained pointer. */
331 unsigned pointer_pass_through : 1;
332 /* True when it is safe to copy access candidates here from the callee, which
333 would mean introducing dereferences into callers of the caller. */
334 unsigned safe_to_import_accesses : 1;
337 /* Structure used to convey information about calls from the intra-procedural
338 analysis stage to inter-procedural one. */
340 class isra_call_summary
342 public:
343 isra_call_summary ()
344 : m_arg_flow (), m_return_ignored (false), m_return_returned (false),
345 m_bit_aligned_arg (false)
348 void init_inputs (unsigned arg_count);
349 void dump (FILE *f);
351 /* Information about what formal parameters of the caller are used to compute
352 individual actual arguments of this call. */
353 auto_vec <isra_param_flow> m_arg_flow;
355 /* Set to true if the call statement does not have a LHS. */
356 unsigned m_return_ignored : 1;
358 /* Set to true if the LHS of call statement is only used to construct the
359 return value of the caller. */
360 unsigned m_return_returned : 1;
362 /* Set when any of the call arguments are not byte-aligned. */
363 unsigned m_bit_aligned_arg : 1;
366 /* Class to manage function summaries. */
368 class GTY((user)) ipa_sra_function_summaries
369 : public function_summary <isra_func_summary *>
371 public:
372 ipa_sra_function_summaries (symbol_table *table, bool ggc):
373 function_summary<isra_func_summary *> (table, ggc) { }
375 virtual void duplicate (cgraph_node *, cgraph_node *,
376 isra_func_summary *old_sum,
377 isra_func_summary *new_sum);
378 virtual void insert (cgraph_node *, isra_func_summary *);
381 /* Hook that is called by summary when a node is duplicated. */
383 void
384 ipa_sra_function_summaries::duplicate (cgraph_node *, cgraph_node *,
385 isra_func_summary *old_sum,
386 isra_func_summary *new_sum)
388 /* TODO: Somehow stop copying when ISRA is doing the cloning, it is
389 useless. */
390 new_sum->m_candidate = old_sum->m_candidate;
391 new_sum->m_returns_value = old_sum->m_returns_value;
392 new_sum->m_return_ignored = old_sum->m_return_ignored;
393 gcc_assert (!old_sum->m_queued);
394 new_sum->m_queued = false;
396 unsigned param_count = vec_safe_length (old_sum->m_parameters);
397 if (!param_count)
398 return;
399 vec_safe_reserve_exact (new_sum->m_parameters, param_count);
400 new_sum->m_parameters->quick_grow_cleared (param_count);
401 for (unsigned i = 0; i < param_count; i++)
403 isra_param_desc *s = &(*old_sum->m_parameters)[i];
404 isra_param_desc *d = &(*new_sum->m_parameters)[i];
406 d->param_size_limit = s->param_size_limit;
407 d->size_reached = s->size_reached;
408 d->locally_unused = s->locally_unused;
409 d->split_candidate = s->split_candidate;
410 d->by_ref = s->by_ref;
412 unsigned acc_count = vec_safe_length (s->accesses);
413 vec_safe_reserve_exact (d->accesses, acc_count);
414 for (unsigned j = 0; j < acc_count; j++)
416 param_access *from = (*s->accesses)[j];
417 param_access *to = ggc_cleared_alloc<param_access> ();
418 to->type = from->type;
419 to->alias_ptr_type = from->alias_ptr_type;
420 to->unit_offset = from->unit_offset;
421 to->unit_size = from->unit_size;
422 to->certain = from->certain;
423 d->accesses->quick_push (to);
428 /* Pointer to the pass function summary holder. */
430 static GTY(()) ipa_sra_function_summaries *func_sums;
432 /* Hook that is called by summary when new node appears. */
434 void
435 ipa_sra_function_summaries::insert (cgraph_node *node, isra_func_summary *)
437 if (opt_for_fn (node->decl, flag_ipa_sra))
439 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
440 ipa_sra_summarize_function (node);
441 pop_cfun ();
443 else
444 func_sums->remove (node);
447 /* Class to manage call summaries. */
449 class ipa_sra_call_summaries: public call_summary <isra_call_summary *>
451 public:
452 ipa_sra_call_summaries (symbol_table *table):
453 call_summary<isra_call_summary *> (table) { }
455 /* Duplicate info when an edge is cloned. */
456 virtual void duplicate (cgraph_edge *, cgraph_edge *,
457 isra_call_summary *old_sum,
458 isra_call_summary *new_sum);
461 static ipa_sra_call_summaries *call_sums;
464 /* Initialize m_arg_flow of a particular instance of isra_call_summary.
465 ARG_COUNT is the number of actual arguments passed. */
467 void
468 isra_call_summary::init_inputs (unsigned arg_count)
470 if (arg_count == 0)
472 gcc_checking_assert (m_arg_flow.length () == 0);
473 return;
475 if (m_arg_flow.length () == 0)
477 m_arg_flow.reserve_exact (arg_count);
478 m_arg_flow.quick_grow_cleared (arg_count);
480 else
481 gcc_checking_assert (arg_count == m_arg_flow.length ());
484 /* Dump all information in call summary to F. */
486 void
487 isra_call_summary::dump (FILE *f)
489 if (m_return_ignored)
490 fprintf (f, " return value ignored\n");
491 if (m_return_returned)
492 fprintf (f, " return value used only to compute caller return value\n");
493 for (unsigned i = 0; i < m_arg_flow.length (); i++)
495 fprintf (f, " Parameter %u:\n", i);
496 isra_param_flow *ipf = &m_arg_flow[i];
498 if (ipf->length)
500 bool first = true;
501 fprintf (f, " Scalar param sources: ");
502 for (int j = 0; j < ipf->length; j++)
504 if (!first)
505 fprintf (f, ", ");
506 else
507 first = false;
508 fprintf (f, "%i", (int) ipf->inputs[j]);
510 fprintf (f, "\n");
512 if (ipf->aggregate_pass_through)
513 fprintf (f, " Aggregate pass through from the param given above, "
514 "unit offset: %u , unit size: %u\n",
515 ipf->unit_offset, ipf->unit_size);
516 if (ipf->pointer_pass_through)
517 fprintf (f, " Pointer pass through from the param given above, "
518 "safe_to_import_accesses: %u\n", ipf->safe_to_import_accesses);
522 /* Duplicate edge summary when an edge is cloned. */
524 void
525 ipa_sra_call_summaries::duplicate (cgraph_edge *, cgraph_edge *,
526 isra_call_summary *old_sum,
527 isra_call_summary *new_sum)
529 unsigned arg_count = old_sum->m_arg_flow.length ();
530 new_sum->init_inputs (arg_count);
531 for (unsigned i = 0; i < arg_count; i++)
532 new_sum->m_arg_flow[i] = old_sum->m_arg_flow[i];
534 new_sum->m_return_ignored = old_sum->m_return_ignored;
535 new_sum->m_return_returned = old_sum->m_return_returned;
536 new_sum->m_bit_aligned_arg = old_sum->m_bit_aligned_arg;
540 /* With all GTY stuff done, we can move to anonymous namespace. */
541 namespace {
542 /* Quick mapping from a decl to its param descriptor. */
544 hash_map<tree, gensum_param_desc *> *decl2desc;
546 /* Countdown of allowed Alias analysis steps during summary building. */
548 int aa_walking_limit;
550 /* This is a table in which for each basic block and parameter there is a
551 distance (offset + size) in that parameter which is dereferenced and
552 accessed in that BB. */
553 HOST_WIDE_INT *bb_dereferences = NULL;
554 /* How many by-reference parameters there are in the current function. */
555 int by_ref_count;
557 /* Bitmap of BBs that can cause the function to "stop" progressing by
558 returning, throwing externally, looping infinitely or calling a function
559 which might abort etc.. */
560 bitmap final_bbs;
562 /* Obstack to allocate various small structures required only when generating
563 summary for a function. */
564 struct obstack gensum_obstack;
566 /* Return false the function is apparently unsuitable for IPA-SRA based on it's
567 attributes, return true otherwise. NODE is the cgraph node of the current
568 function. */
570 static bool
571 ipa_sra_preliminary_function_checks (cgraph_node *node)
573 if (!node->can_change_signature)
575 if (dump_file)
576 fprintf (dump_file, "Function cannot change signature.\n");
577 return false;
580 if (!tree_versionable_function_p (node->decl))
582 if (dump_file)
583 fprintf (dump_file, "Function is not versionable.\n");
584 return false;
587 if (!opt_for_fn (node->decl, optimize)
588 || !opt_for_fn (node->decl, flag_ipa_sra))
590 if (dump_file)
591 fprintf (dump_file, "Not optimizing or IPA-SRA turned off for this "
592 "function.\n");
593 return false;
596 if (DECL_VIRTUAL_P (node->decl))
598 if (dump_file)
599 fprintf (dump_file, "Function is a virtual method.\n");
600 return false;
603 struct function *fun = DECL_STRUCT_FUNCTION (node->decl);
604 if (fun->stdarg)
606 if (dump_file)
607 fprintf (dump_file, "Function uses stdarg. \n");
608 return false;
611 if (TYPE_ATTRIBUTES (TREE_TYPE (node->decl)))
613 if (dump_file)
614 fprintf (dump_file, "Function type has attributes. \n");
615 return false;
618 if (DECL_DISREGARD_INLINE_LIMITS (node->decl))
620 if (dump_file)
621 fprintf (dump_file, "Always inline function will be inlined "
622 "anyway. \n");
623 return false;
626 return true;
629 /* Print access tree starting at ACCESS to F. */
631 static void
632 dump_gensum_access (FILE *f, gensum_param_access *access, unsigned indent)
634 fprintf (f, " ");
635 for (unsigned i = 0; i < indent; i++)
636 fprintf (f, " ");
637 fprintf (f, " * Access to offset: " HOST_WIDE_INT_PRINT_DEC,
638 access->offset);
639 fprintf (f, ", size: " HOST_WIDE_INT_PRINT_DEC, access->size);
640 fprintf (f, ", type: ");
641 print_generic_expr (f, access->type);
642 fprintf (f, ", alias_ptr_type: ");
643 print_generic_expr (f, access->alias_ptr_type);
644 fprintf (f, ", nonarg: %u, reverse: %u\n", access->nonarg, access->reverse);
645 for (gensum_param_access *ch = access->first_child;
647 ch = ch->next_sibling)
648 dump_gensum_access (f, ch, indent + 2);
652 /* Print access tree starting at ACCESS to F. */
654 static void
655 dump_isra_access (FILE *f, param_access *access)
657 fprintf (f, " * Access to unit offset: %u", access->unit_offset);
658 fprintf (f, ", unit size: %u", access->unit_size);
659 fprintf (f, ", type: ");
660 print_generic_expr (f, access->type);
661 fprintf (f, ", alias_ptr_type: ");
662 print_generic_expr (f, access->alias_ptr_type);
663 if (access->certain)
664 fprintf (f, ", certain");
665 else
666 fprintf (f, ", not-certain");
667 if (access->reverse)
668 fprintf (f, ", reverse");
669 fprintf (f, "\n");
672 /* Dump access tree starting at ACCESS to stderr. */
674 DEBUG_FUNCTION void
675 debug_isra_access (param_access *access)
677 dump_isra_access (stderr, access);
680 /* Dump DESC to F. */
682 static void
683 dump_gensum_param_descriptor (FILE *f, gensum_param_desc *desc)
685 if (desc->locally_unused)
686 fprintf (f, " unused with %i call_uses\n", desc->call_uses);
687 if (!desc->split_candidate)
689 fprintf (f, " not a candidate\n");
690 return;
692 if (desc->by_ref)
693 fprintf (f, " by_ref with %u pass throughs\n", desc->ptr_pt_count);
695 for (gensum_param_access *acc = desc->accesses; acc; acc = acc->next_sibling)
696 dump_gensum_access (f, acc, 2);
699 /* Dump all parameter descriptors in IFS, assuming it describes FNDECL, to
700 F. */
702 static void
703 dump_gensum_param_descriptors (FILE *f, tree fndecl,
704 vec<gensum_param_desc> *param_descriptions)
706 tree parm = DECL_ARGUMENTS (fndecl);
707 for (unsigned i = 0;
708 i < param_descriptions->length ();
709 ++i, parm = DECL_CHAIN (parm))
711 fprintf (f, " Descriptor for parameter %i ", i);
712 print_generic_expr (f, parm, TDF_UID);
713 fprintf (f, "\n");
714 dump_gensum_param_descriptor (f, &(*param_descriptions)[i]);
719 /* Dump DESC to F. */
721 static void
722 dump_isra_param_descriptor (FILE *f, isra_param_desc *desc)
724 if (desc->locally_unused)
726 fprintf (f, " (locally) unused\n");
728 if (!desc->split_candidate)
730 fprintf (f, " not a candidate for splitting\n");
731 return;
733 fprintf (f, " param_size_limit: %u, size_reached: %u%s\n",
734 desc->param_size_limit, desc->size_reached,
735 desc->by_ref ? ", by_ref" : "");
737 for (unsigned i = 0; i < vec_safe_length (desc->accesses); ++i)
739 param_access *access = (*desc->accesses)[i];
740 dump_isra_access (f, access);
744 /* Dump all parameter descriptors in IFS, assuming it describes FNDECL, to
745 F. */
747 static void
748 dump_isra_param_descriptors (FILE *f, tree fndecl,
749 isra_func_summary *ifs)
751 tree parm = DECL_ARGUMENTS (fndecl);
752 if (!ifs->m_parameters)
754 fprintf (f, " parameter descriptors not available\n");
755 return;
758 for (unsigned i = 0;
759 i < ifs->m_parameters->length ();
760 ++i, parm = DECL_CHAIN (parm))
762 fprintf (f, " Descriptor for parameter %i ", i);
763 print_generic_expr (f, parm, TDF_UID);
764 fprintf (f, "\n");
765 dump_isra_param_descriptor (f, &(*ifs->m_parameters)[i]);
769 /* Add SRC to inputs of PARAM_FLOW, unless it would exceed storage. If the
770 function fails return false, otherwise return true. SRC must fit into an
771 unsigned char. Used for purposes of transitive unused parameter
772 removal. */
774 static bool
775 add_src_to_param_flow (isra_param_flow *param_flow, int src)
777 gcc_checking_assert (src >= 0 && src <= UCHAR_MAX);
778 if (param_flow->length == IPA_SRA_MAX_PARAM_FLOW_LEN)
779 return false;
781 param_flow->inputs[(int) param_flow->length] = src;
782 param_flow->length++;
783 return true;
786 /* Add a SRC to the inputs of PARAM_FLOW unless it is already there and assert
787 it is the only input. Used for purposes of transitive parameter
788 splitting. */
790 static void
791 set_single_param_flow_source (isra_param_flow *param_flow, int src)
793 gcc_checking_assert (src >= 0 && src <= UCHAR_MAX);
794 if (param_flow->length == 0)
796 param_flow->inputs[0] = src;
797 param_flow->length = 1;
799 else if (param_flow->length == 1)
800 gcc_assert (param_flow->inputs[0] == src);
801 else
802 gcc_unreachable ();
805 /* Assert that there is only a single value in PARAM_FLOW's inputs and return
806 it. */
808 static unsigned
809 get_single_param_flow_source (const isra_param_flow *param_flow)
811 gcc_assert (param_flow->length == 1);
812 return param_flow->inputs[0];
815 /* Inspect all uses of NAME and simple arithmetic calculations involving NAME
816 in FUN represented with NODE and return a negative number if any of them is
817 used for something else than either an actual call argument, simple
818 arithmetic operation or debug statement. If there are no such uses, return
819 the number of actual arguments that this parameter eventually feeds to (or
820 zero if there is none). For any such parameter, mark PARM_NUM as one of its
821 sources. ANALYZED is a bitmap that tracks which SSA names we have already
822 started investigating. */
824 static int
825 isra_track_scalar_value_uses (function *fun, cgraph_node *node, tree name,
826 int parm_num, bitmap analyzed)
828 int res = 0;
829 imm_use_iterator imm_iter;
830 gimple *stmt;
832 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, name)
834 if (is_gimple_debug (stmt))
835 continue;
837 /* TODO: We could handle at least const builtin functions like arithmetic
838 operations below. */
839 if (is_gimple_call (stmt))
841 int all_uses = 0;
842 use_operand_p use_p;
843 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
844 all_uses++;
846 gcall *call = as_a <gcall *> (stmt);
847 unsigned arg_count;
848 if (gimple_call_internal_p (call)
849 || (arg_count = gimple_call_num_args (call)) == 0)
851 res = -1;
852 BREAK_FROM_IMM_USE_STMT (imm_iter);
855 cgraph_edge *cs = node->get_edge (stmt);
856 gcc_checking_assert (cs);
857 isra_call_summary *csum = call_sums->get_create (cs);
858 csum->init_inputs (arg_count);
860 int simple_uses = 0;
861 for (unsigned i = 0; i < arg_count; i++)
862 if (gimple_call_arg (call, i) == name)
864 if (!add_src_to_param_flow (&csum->m_arg_flow[i], parm_num))
866 simple_uses = -1;
867 break;
869 simple_uses++;
872 if (simple_uses < 0
873 || all_uses != simple_uses)
875 res = -1;
876 BREAK_FROM_IMM_USE_STMT (imm_iter);
878 res += all_uses;
880 else if (!stmt_unremovable_because_of_non_call_eh_p (fun, stmt)
881 && ((is_gimple_assign (stmt) && !gimple_has_volatile_ops (stmt))
882 || gimple_code (stmt) == GIMPLE_PHI))
884 tree lhs;
885 if (gimple_code (stmt) == GIMPLE_PHI)
886 lhs = gimple_phi_result (stmt);
887 else
888 lhs = gimple_assign_lhs (stmt);
890 if (TREE_CODE (lhs) != SSA_NAME)
892 res = -1;
893 BREAK_FROM_IMM_USE_STMT (imm_iter);
895 gcc_assert (!gimple_vdef (stmt));
896 if (bitmap_set_bit (analyzed, SSA_NAME_VERSION (lhs)))
898 int tmp = isra_track_scalar_value_uses (fun, node, lhs, parm_num,
899 analyzed);
900 if (tmp < 0)
902 res = tmp;
903 BREAK_FROM_IMM_USE_STMT (imm_iter);
905 res += tmp;
908 else
910 res = -1;
911 BREAK_FROM_IMM_USE_STMT (imm_iter);
914 return res;
917 /* Inspect all uses of PARM, which must be a gimple register, in FUN (which is
918 also described by NODE) and simple arithmetic calculations involving PARM
919 and return false if any of them is used for something else than either an
920 actual call argument, simple arithmetic operation or debug statement. If
921 there are no such uses, return true and store the number of actual arguments
922 that this parameter eventually feeds to (or zero if there is none) to
923 *CALL_USES_P. For any such parameter, mark PARM_NUM as one of its
924 sources.
926 This function is similar to ptr_parm_has_nonarg_uses but its results are
927 meant for unused parameter removal, as opposed to splitting of parameters
928 passed by reference or converting them to passed by value.
931 static bool
932 isra_track_scalar_param_local_uses (function *fun, cgraph_node *node, tree parm,
933 int parm_num, int *call_uses_p)
935 gcc_checking_assert (is_gimple_reg (parm));
937 tree name = ssa_default_def (fun, parm);
938 if (!name || has_zero_uses (name))
940 *call_uses_p = 0;
941 return false;
944 /* Edge summaries can only handle callers with fewer than 256 parameters. */
945 if (parm_num > UCHAR_MAX)
946 return true;
948 bitmap analyzed = BITMAP_ALLOC (NULL);
949 int call_uses = isra_track_scalar_value_uses (fun, node, name, parm_num,
950 analyzed);
951 BITMAP_FREE (analyzed);
952 if (call_uses < 0)
953 return true;
954 *call_uses_p = call_uses;
955 return false;
958 /* Scan immediate uses of a default definition SSA name of a parameter PARM and
959 examine whether there are any nonarg uses that are not actual arguments or
960 otherwise infeasible uses. If so, return true, otherwise return false.
961 Create pass-through IPA flow records for any direct uses as argument calls
962 and if returning false, store their number into *PT_COUNT_P. NODE and FUN
963 must represent the function that is currently analyzed, PARM_NUM must be the
964 index of the analyzed parameter.
966 This function is similar to isra_track_scalar_param_local_uses but its
967 results are meant for splitting of parameters passed by reference or turning
968 them into bits passed by value, as opposed to generic unused parameter
969 removal.
972 static bool
973 ptr_parm_has_nonarg_uses (cgraph_node *node, function *fun, tree parm,
974 int parm_num, unsigned *pt_count_p)
976 imm_use_iterator ui;
977 gimple *stmt;
978 tree name = ssa_default_def (fun, parm);
979 bool ret = false;
980 unsigned pt_count = 0;
982 if (!name || has_zero_uses (name))
983 return false;
985 /* Edge summaries can only handle callers with fewer than 256 parameters. */
986 if (parm_num > UCHAR_MAX)
987 return true;
989 FOR_EACH_IMM_USE_STMT (stmt, ui, name)
991 unsigned uses_ok = 0;
992 use_operand_p use_p;
994 if (is_gimple_debug (stmt))
995 continue;
997 if (gimple_assign_single_p (stmt))
999 tree rhs = gimple_assign_rhs1 (stmt);
1000 while (handled_component_p (rhs))
1001 rhs = TREE_OPERAND (rhs, 0);
1002 if (TREE_CODE (rhs) == MEM_REF
1003 && TREE_OPERAND (rhs, 0) == name
1004 && integer_zerop (TREE_OPERAND (rhs, 1))
1005 && types_compatible_p (TREE_TYPE (rhs),
1006 TREE_TYPE (TREE_TYPE (name)))
1007 && !TREE_THIS_VOLATILE (rhs))
1008 uses_ok++;
1010 else if (is_gimple_call (stmt))
1012 gcall *call = as_a <gcall *> (stmt);
1013 unsigned arg_count;
1014 if (gimple_call_internal_p (call)
1015 || (arg_count = gimple_call_num_args (call)) == 0)
1017 ret = true;
1018 BREAK_FROM_IMM_USE_STMT (ui);
1021 cgraph_edge *cs = node->get_edge (stmt);
1022 gcc_checking_assert (cs);
1023 isra_call_summary *csum = call_sums->get_create (cs);
1024 csum->init_inputs (arg_count);
1026 for (unsigned i = 0; i < arg_count; ++i)
1028 tree arg = gimple_call_arg (stmt, i);
1030 if (arg == name)
1032 /* TODO: Allow &MEM_REF[name + offset] here,
1033 ipa_param_body_adjustments::modify_call_stmt has to be
1034 adjusted too. */
1035 csum->m_arg_flow[i].pointer_pass_through = true;
1036 set_single_param_flow_source (&csum->m_arg_flow[i], parm_num);
1037 pt_count++;
1038 uses_ok++;
1039 continue;
1042 while (handled_component_p (arg))
1043 arg = TREE_OPERAND (arg, 0);
1044 if (TREE_CODE (arg) == MEM_REF
1045 && TREE_OPERAND (arg, 0) == name
1046 && integer_zerop (TREE_OPERAND (arg, 1))
1047 && types_compatible_p (TREE_TYPE (arg),
1048 TREE_TYPE (TREE_TYPE (name)))
1049 && !TREE_THIS_VOLATILE (arg))
1050 uses_ok++;
1054 /* If the number of valid uses does not match the number of
1055 uses in this stmt there is an unhandled use. */
1056 unsigned all_uses = 0;
1057 FOR_EACH_IMM_USE_ON_STMT (use_p, ui)
1058 all_uses++;
1060 gcc_checking_assert (uses_ok <= all_uses);
1061 if (uses_ok != all_uses)
1063 ret = true;
1064 BREAK_FROM_IMM_USE_STMT (ui);
1068 *pt_count_p = pt_count;
1069 return ret;
1072 /* Initialize vector of parameter descriptors of NODE. Return true if there
1073 are any candidates for splitting or unused aggregate parameter removal (the
1074 function may return false if there are candidates for removal of register
1075 parameters) and function body must be scanned. */
1077 static bool
1078 create_parameter_descriptors (cgraph_node *node,
1079 vec<gensum_param_desc> *param_descriptions)
1081 function *fun = DECL_STRUCT_FUNCTION (node->decl);
1082 bool ret = false;
1084 int num = 0;
1085 for (tree parm = DECL_ARGUMENTS (node->decl);
1086 parm;
1087 parm = DECL_CHAIN (parm), num++)
1089 const char *msg;
1090 gensum_param_desc *desc = &(*param_descriptions)[num];
1091 /* param_descriptions vector is grown cleared in the caller. */
1092 desc->param_number = num;
1093 decl2desc->put (parm, desc);
1095 if (dump_file && (dump_flags & TDF_DETAILS))
1096 print_generic_expr (dump_file, parm, TDF_UID);
1098 int scalar_call_uses;
1099 tree type = TREE_TYPE (parm);
1100 if (TREE_THIS_VOLATILE (parm))
1102 if (dump_file && (dump_flags & TDF_DETAILS))
1103 fprintf (dump_file, " not a candidate, is volatile\n");
1104 continue;
1106 if (!is_gimple_reg_type (type) && is_va_list_type (type))
1108 if (dump_file && (dump_flags & TDF_DETAILS))
1109 fprintf (dump_file, " not a candidate, is a va_list type\n");
1110 continue;
1112 if (TREE_ADDRESSABLE (parm))
1114 if (dump_file && (dump_flags & TDF_DETAILS))
1115 fprintf (dump_file, " not a candidate, is addressable\n");
1116 continue;
1118 if (TREE_ADDRESSABLE (type))
1120 if (dump_file && (dump_flags & TDF_DETAILS))
1121 fprintf (dump_file, " not a candidate, type cannot be split\n");
1122 continue;
1125 if (is_gimple_reg (parm)
1126 && !isra_track_scalar_param_local_uses (fun, node, parm, num,
1127 &scalar_call_uses))
1129 if (dump_file && (dump_flags & TDF_DETAILS))
1130 fprintf (dump_file, " is a scalar with only %i call uses\n",
1131 scalar_call_uses);
1133 desc->locally_unused = true;
1134 desc->call_uses = scalar_call_uses;
1137 if (POINTER_TYPE_P (type))
1139 type = TREE_TYPE (type);
1141 if (TREE_CODE (type) == FUNCTION_TYPE
1142 || TREE_CODE (type) == METHOD_TYPE)
1144 if (dump_file && (dump_flags & TDF_DETAILS))
1145 fprintf (dump_file, " not a candidate, reference to "
1146 "a function\n");
1147 continue;
1149 if (TYPE_VOLATILE (type))
1151 if (dump_file && (dump_flags & TDF_DETAILS))
1152 fprintf (dump_file, " not a candidate, reference to "
1153 "a volatile type\n");
1154 continue;
1156 if (TREE_CODE (type) == ARRAY_TYPE
1157 && TYPE_NONALIASED_COMPONENT (type))
1159 if (dump_file && (dump_flags & TDF_DETAILS))
1160 fprintf (dump_file, " not a candidate, reference to "
1161 "a nonaliased component array\n");
1162 continue;
1164 if (!is_gimple_reg (parm))
1166 if (dump_file && (dump_flags & TDF_DETAILS))
1167 fprintf (dump_file, " not a candidate, a reference which is "
1168 "not a gimple register (probably addressable)\n");
1169 continue;
1171 if (is_va_list_type (type))
1173 if (dump_file && (dump_flags & TDF_DETAILS))
1174 fprintf (dump_file, " not a candidate, reference to "
1175 "a va list\n");
1176 continue;
1178 if (ptr_parm_has_nonarg_uses (node, fun, parm, num,
1179 &desc->ptr_pt_count))
1181 if (dump_file && (dump_flags & TDF_DETAILS))
1182 fprintf (dump_file, " not a candidate, reference has "
1183 "nonarg uses\n");
1184 continue;
1186 desc->by_ref = true;
1188 else if (!AGGREGATE_TYPE_P (type))
1190 /* This is in an else branch because scalars passed by reference are
1191 still candidates to be passed by value. */
1192 if (dump_file && (dump_flags & TDF_DETAILS))
1193 fprintf (dump_file, " not a candidate, not an aggregate\n");
1194 continue;
1197 if (!COMPLETE_TYPE_P (type))
1199 if (dump_file && (dump_flags & TDF_DETAILS))
1200 fprintf (dump_file, " not a candidate, not a complete type\n");
1201 continue;
1203 if (!tree_fits_uhwi_p (TYPE_SIZE (type)))
1205 if (dump_file && (dump_flags & TDF_DETAILS))
1206 fprintf (dump_file, " not a candidate, size not representable\n");
1207 continue;
1209 unsigned HOST_WIDE_INT type_size
1210 = tree_to_uhwi (TYPE_SIZE (type)) / BITS_PER_UNIT;
1211 if (type_size == 0
1212 || type_size >= ISRA_ARG_SIZE_LIMIT)
1214 if (dump_file && (dump_flags & TDF_DETAILS))
1215 fprintf (dump_file, " not a candidate, has zero or huge size\n");
1216 continue;
1218 if (type_internals_preclude_sra_p (type, &msg))
1220 if (dump_file && (dump_flags & TDF_DETAILS))
1221 fprintf (dump_file, " not a candidate, %s\n", msg);
1222 continue;
1225 if (dump_file && (dump_flags & TDF_DETAILS))
1226 fprintf (dump_file, " is a candidate\n");
1228 ret = true;
1229 desc->split_candidate = true;
1230 if (desc->by_ref)
1231 desc->deref_index = by_ref_count++;
1233 return ret;
1236 /* Return pointer to descriptor of parameter DECL or NULL if it cannot be
1237 found, which happens if DECL is for a static chain. */
1239 static gensum_param_desc *
1240 get_gensum_param_desc (tree decl)
1242 gcc_checking_assert (TREE_CODE (decl) == PARM_DECL);
1243 gensum_param_desc **slot = decl2desc->get (decl);
1244 if (!slot)
1245 /* This can happen for static chains which we cannot handle so far. */
1246 return NULL;
1247 gcc_checking_assert (*slot);
1248 return *slot;
1252 /* Remove parameter described by DESC from candidates for IPA-SRA splitting and
1253 write REASON to the dump file if there is one. */
1255 static void
1256 disqualify_split_candidate (gensum_param_desc *desc, const char *reason)
1258 if (!desc->split_candidate)
1259 return;
1261 if (dump_file && (dump_flags & TDF_DETAILS))
1262 fprintf (dump_file, "! Disqualifying parameter number %i - %s\n",
1263 desc->param_number, reason);
1265 desc->split_candidate = false;
1268 /* Remove DECL from candidates for IPA-SRA and write REASON to the dump file if
1269 there is one. */
1271 static void
1272 disqualify_split_candidate (tree decl, const char *reason)
1274 gensum_param_desc *desc = get_gensum_param_desc (decl);
1275 if (desc)
1276 disqualify_split_candidate (desc, reason);
1279 /* Allocate a new access to DESC and fill it in with OFFSET and SIZE. But
1280 first, check that there are not too many of them already. If so, do not
1281 allocate anything and return NULL. */
1283 static gensum_param_access *
1284 allocate_access (gensum_param_desc *desc,
1285 HOST_WIDE_INT offset, HOST_WIDE_INT size)
1287 if (desc->access_count
1288 == (unsigned) param_ipa_sra_max_replacements)
1290 disqualify_split_candidate (desc, "Too many replacement candidates");
1291 return NULL;
1294 gensum_param_access *access
1295 = (gensum_param_access *) obstack_alloc (&gensum_obstack,
1296 sizeof (gensum_param_access));
1297 memset (access, 0, sizeof (*access));
1298 access->offset = offset;
1299 access->size = size;
1300 return access;
1303 /* In what context scan_expr_access has been called, whether it deals with a
1304 load, a function argument, or a store. Please note that in rare
1305 circumstances when it is not clear if the access is a load or store,
1306 ISRA_CTX_STORE is used too. */
1308 enum isra_scan_context {ISRA_CTX_LOAD, ISRA_CTX_ARG, ISRA_CTX_STORE};
1310 /* Return an access describing memory access to the variable described by DESC
1311 at OFFSET with SIZE in context CTX, starting at pointer to the linked list
1312 at a certain tree level FIRST. Attempt to create it and put into the
1313 appropriate place in the access tree if does not exist, but fail and return
1314 NULL if there are already too many accesses, if it would create a partially
1315 overlapping access or if an access would end up within a pre-existing
1316 non-call access. */
1318 static gensum_param_access *
1319 get_access_1 (gensum_param_desc *desc, gensum_param_access **first,
1320 HOST_WIDE_INT offset, HOST_WIDE_INT size, isra_scan_context ctx)
1322 gensum_param_access *access = *first, **ptr = first;
1324 if (!access)
1326 /* No pre-existing access at this level, just create it. */
1327 gensum_param_access *a = allocate_access (desc, offset, size);
1328 if (!a)
1329 return NULL;
1330 *first = a;
1331 return *first;
1334 if (access->offset >= offset + size)
1336 /* We want to squeeze it in front of the very first access, just do
1337 it. */
1338 gensum_param_access *r = allocate_access (desc, offset, size);
1339 if (!r)
1340 return NULL;
1341 r->next_sibling = access;
1342 *first = r;
1343 return r;
1346 /* Skip all accesses that have to come before us until the next sibling is
1347 already too far. */
1348 while (offset >= access->offset + access->size
1349 && access->next_sibling
1350 && access->next_sibling->offset < offset + size)
1352 ptr = &access->next_sibling;
1353 access = access->next_sibling;
1356 /* At this point we know we do not belong before access. */
1357 gcc_assert (access->offset < offset + size);
1359 if (access->offset == offset && access->size == size)
1360 /* We found what we were looking for. */
1361 return access;
1363 if (access->offset <= offset
1364 && access->offset + access->size >= offset + size)
1366 /* We fit into access which is larger than us. We need to find/create
1367 something below access. But we only allow nesting in call
1368 arguments. */
1369 if (access->nonarg)
1370 return NULL;
1372 return get_access_1 (desc, &access->first_child, offset, size, ctx);
1375 if (offset <= access->offset
1376 && offset + size >= access->offset + access->size)
1377 /* We are actually bigger than access, which fully fits into us, take its
1378 place and make all accesses fitting into it its children. */
1380 /* But first, we only allow nesting in call arguments so check if that is
1381 what we are trying to represent. */
1382 if (ctx != ISRA_CTX_ARG)
1383 return NULL;
1385 gensum_param_access *r = allocate_access (desc, offset, size);
1386 if (!r)
1387 return NULL;
1388 r->first_child = access;
1390 while (access->next_sibling
1391 && access->next_sibling->offset < offset + size)
1392 access = access->next_sibling;
1393 if (access->offset + access->size > offset + size)
1395 /* This must be a different access, which are sorted, so the
1396 following must be true and this signals a partial overlap. */
1397 gcc_assert (access->offset > offset);
1398 return NULL;
1401 r->next_sibling = access->next_sibling;
1402 access->next_sibling = NULL;
1403 *ptr = r;
1404 return r;
1407 if (offset >= access->offset + access->size)
1409 /* We belong after access. */
1410 gensum_param_access *r = allocate_access (desc, offset, size);
1411 if (!r)
1412 return NULL;
1413 r->next_sibling = access->next_sibling;
1414 access->next_sibling = r;
1415 return r;
1418 if (offset < access->offset)
1420 /* We know the following, otherwise we would have created a
1421 super-access. */
1422 gcc_checking_assert (offset + size < access->offset + access->size);
1423 return NULL;
1426 if (offset + size > access->offset + access->size)
1428 /* Likewise. */
1429 gcc_checking_assert (offset > access->offset);
1430 return NULL;
1433 gcc_unreachable ();
1436 /* Return an access describing memory access to the variable described by DESC
1437 at OFFSET with SIZE in context CTX, mark it as used in context CTX. Attempt
1438 to create if it does not exist, but fail and return NULL if there are
1439 already too many accesses, if it would create a partially overlapping access
1440 or if an access would end up in a non-call access. */
1442 static gensum_param_access *
1443 get_access (gensum_param_desc *desc, HOST_WIDE_INT offset, HOST_WIDE_INT size,
1444 isra_scan_context ctx)
1446 gcc_checking_assert (desc->split_candidate);
1448 gensum_param_access *access = get_access_1 (desc, &desc->accesses, offset,
1449 size, ctx);
1450 if (!access)
1452 disqualify_split_candidate (desc,
1453 "Bad access overlap or too many accesses");
1454 return NULL;
1457 switch (ctx)
1459 case ISRA_CTX_STORE:
1460 gcc_assert (!desc->by_ref);
1461 /* Fall-through */
1462 case ISRA_CTX_LOAD:
1463 access->nonarg = true;
1464 break;
1465 case ISRA_CTX_ARG:
1466 break;
1469 return access;
1472 /* Verify that parameter access tree starting with ACCESS is in good shape.
1473 PARENT_OFFSET and PARENT_SIZE are the corresponding fields of parent of
1474 ACCESS or zero if there is none. */
1476 static bool
1477 verify_access_tree_1 (gensum_param_access *access, HOST_WIDE_INT parent_offset,
1478 HOST_WIDE_INT parent_size)
1480 while (access)
1482 gcc_assert (access->offset >= 0 && access->size > 0);
1484 if (parent_size != 0)
1486 if (access->offset < parent_offset)
1488 error ("Access offset before parent offset");
1489 return true;
1491 if (access->size >= parent_size)
1493 error ("Access size greater or equal to its parent size");
1494 return true;
1496 if (access->offset + access->size > parent_offset + parent_size)
1498 error ("Access terminates outside of its parent");
1499 return true;
1503 if (verify_access_tree_1 (access->first_child, access->offset,
1504 access->size))
1505 return true;
1507 if (access->next_sibling
1508 && (access->next_sibling->offset < access->offset + access->size))
1510 error ("Access overlaps with its sibling");
1511 return true;
1514 access = access->next_sibling;
1516 return false;
1519 /* Verify that parameter access tree starting with ACCESS is in good shape,
1520 halt compilation and dump the tree to stderr if not. */
1522 DEBUG_FUNCTION void
1523 isra_verify_access_tree (gensum_param_access *access)
1525 if (verify_access_tree_1 (access, 0, 0))
1527 for (; access; access = access->next_sibling)
1528 dump_gensum_access (stderr, access, 2);
1529 internal_error ("IPA-SRA access verification failed");
1534 /* Callback of walk_stmt_load_store_addr_ops visit_addr used to determine
1535 GIMPLE_ASM operands with memory constrains which cannot be scalarized. */
1537 static bool
1538 asm_visit_addr (gimple *, tree op, tree, void *)
1540 op = get_base_address (op);
1541 if (op
1542 && TREE_CODE (op) == PARM_DECL)
1543 disqualify_split_candidate (op, "Non-scalarizable GIMPLE_ASM operand.");
1545 return false;
1548 /* Mark a dereference of parameter identified by DESC of distance DIST in a
1549 basic block BB, unless the BB has already been marked as a potentially
1550 final. */
1552 static void
1553 mark_param_dereference (gensum_param_desc *desc, HOST_WIDE_INT dist,
1554 basic_block bb)
1556 gcc_assert (desc->by_ref);
1557 gcc_checking_assert (desc->split_candidate);
1559 if (bitmap_bit_p (final_bbs, bb->index))
1560 return;
1562 int idx = bb->index * by_ref_count + desc->deref_index;
1563 if (bb_dereferences[idx] < dist)
1564 bb_dereferences[idx] = dist;
1567 /* Return true, if any potential replacements should use NEW_TYPE as opposed to
1568 previously recorded OLD_TYPE. */
1570 static bool
1571 type_prevails_p (tree old_type, tree new_type)
1573 if (old_type == new_type)
1574 return false;
1576 /* Non-aggregates are always better. */
1577 if (!is_gimple_reg_type (old_type)
1578 && is_gimple_reg_type (new_type))
1579 return true;
1580 if (is_gimple_reg_type (old_type)
1581 && !is_gimple_reg_type (new_type))
1582 return false;
1584 /* Prefer any complex or vector type over any other scalar type. */
1585 if (TREE_CODE (old_type) != COMPLEX_TYPE
1586 && TREE_CODE (old_type) != VECTOR_TYPE
1587 && (TREE_CODE (new_type) == COMPLEX_TYPE
1588 || TREE_CODE (new_type) == VECTOR_TYPE))
1589 return true;
1590 if ((TREE_CODE (old_type) == COMPLEX_TYPE
1591 || TREE_CODE (old_type) == VECTOR_TYPE)
1592 && TREE_CODE (new_type) != COMPLEX_TYPE
1593 && TREE_CODE (new_type) != VECTOR_TYPE)
1594 return false;
1596 /* Use the integral type with the bigger precision. */
1597 if (INTEGRAL_TYPE_P (old_type)
1598 && INTEGRAL_TYPE_P (new_type))
1599 return (TYPE_PRECISION (new_type) > TYPE_PRECISION (old_type));
1601 /* Attempt to disregard any integral type with non-full precision. */
1602 if (INTEGRAL_TYPE_P (old_type)
1603 && (TREE_INT_CST_LOW (TYPE_SIZE (old_type))
1604 != TYPE_PRECISION (old_type)))
1605 return true;
1606 if (INTEGRAL_TYPE_P (new_type)
1607 && (TREE_INT_CST_LOW (TYPE_SIZE (new_type))
1608 != TYPE_PRECISION (new_type)))
1609 return false;
1610 /* Stabilize the selection. */
1611 return TYPE_UID (old_type) < TYPE_UID (new_type);
1614 /* When scanning an expression which is a call argument, this structure
1615 specifies the call and the position of the argument. */
1617 struct scan_call_info
1619 /* Call graph edge representing the call. */
1620 cgraph_edge *cs;
1621 /* Total number of arguments in the call. */
1622 unsigned argument_count;
1623 /* Number of the actual argument being scanned. */
1624 unsigned arg_idx;
1627 /* Record use of ACCESS which belongs to a parameter described by DESC in a
1628 call argument described by CALL_INFO. */
1630 static void
1631 record_nonregister_call_use (gensum_param_desc *desc,
1632 scan_call_info *call_info,
1633 unsigned unit_offset, unsigned unit_size)
1635 isra_call_summary *csum = call_sums->get_create (call_info->cs);
1636 csum->init_inputs (call_info->argument_count);
1638 isra_param_flow *param_flow = &csum->m_arg_flow[call_info->arg_idx];
1639 param_flow->aggregate_pass_through = true;
1640 set_single_param_flow_source (param_flow, desc->param_number);
1641 param_flow->unit_offset = unit_offset;
1642 param_flow->unit_size = unit_size;
1643 desc->call_uses++;
1646 /* Callback of walk_aliased_vdefs, just mark that there was a possible
1647 modification. */
1649 static bool
1650 mark_maybe_modified (ao_ref *, tree, void *data)
1652 bool *maybe_modified = (bool *) data;
1653 *maybe_modified = true;
1654 return true;
1657 /* Analyze expression EXPR from GIMPLE for accesses to parameters. CTX
1658 specifies whether EXPR is used in a load, store or as an argument call. BB
1659 must be the basic block in which expr resides. If CTX specifies call
1660 argument context, CALL_INFO must describe that call and argument position,
1661 otherwise it is ignored. */
1663 static void
1664 scan_expr_access (tree expr, gimple *stmt, isra_scan_context ctx,
1665 basic_block bb, scan_call_info *call_info = NULL)
1667 poly_int64 poffset, psize, pmax_size;
1668 HOST_WIDE_INT offset, size, max_size;
1669 tree base;
1670 bool deref = false;
1671 bool reverse;
1673 if (TREE_CODE (expr) == BIT_FIELD_REF
1674 || TREE_CODE (expr) == IMAGPART_EXPR
1675 || TREE_CODE (expr) == REALPART_EXPR)
1676 expr = TREE_OPERAND (expr, 0);
1678 base = get_ref_base_and_extent (expr, &poffset, &psize, &pmax_size, &reverse);
1680 if (TREE_CODE (base) == MEM_REF)
1682 tree op = TREE_OPERAND (base, 0);
1683 if (TREE_CODE (op) != SSA_NAME
1684 || !SSA_NAME_IS_DEFAULT_DEF (op))
1685 return;
1686 base = SSA_NAME_VAR (op);
1687 if (!base)
1688 return;
1689 deref = true;
1691 if (TREE_CODE (base) != PARM_DECL)
1692 return;
1694 gensum_param_desc *desc = get_gensum_param_desc (base);
1695 if (!desc || !desc->split_candidate)
1696 return;
1698 if (!poffset.is_constant (&offset)
1699 || !psize.is_constant (&size)
1700 || !pmax_size.is_constant (&max_size))
1702 disqualify_split_candidate (desc, "Encountered a polynomial-sized "
1703 "access.");
1704 return;
1706 if (size < 0 || size != max_size)
1708 disqualify_split_candidate (desc, "Encountered a variable sized access.");
1709 return;
1711 if (TREE_CODE (expr) == COMPONENT_REF
1712 && DECL_BIT_FIELD (TREE_OPERAND (expr, 1)))
1714 disqualify_split_candidate (desc, "Encountered a bit-field access.");
1715 return;
1717 if (offset < 0)
1719 disqualify_split_candidate (desc, "Encountered an access at a "
1720 "negative offset.");
1721 return;
1723 gcc_assert ((offset % BITS_PER_UNIT) == 0);
1724 gcc_assert ((size % BITS_PER_UNIT) == 0);
1725 if ((offset / BITS_PER_UNIT) >= (UINT_MAX - ISRA_ARG_SIZE_LIMIT)
1726 || (size / BITS_PER_UNIT) >= ISRA_ARG_SIZE_LIMIT)
1728 disqualify_split_candidate (desc, "Encountered an access with too big "
1729 "offset or size");
1730 return;
1733 tree type = TREE_TYPE (expr);
1734 unsigned int exp_align = get_object_alignment (expr);
1736 if (exp_align < TYPE_ALIGN (type))
1738 disqualify_split_candidate (desc, "Underaligned access.");
1739 return;
1742 if (deref)
1744 if (!desc->by_ref)
1746 disqualify_split_candidate (desc, "Dereferencing a non-reference.");
1747 return;
1749 else if (ctx == ISRA_CTX_STORE)
1751 disqualify_split_candidate (desc, "Storing to data passed by "
1752 "reference.");
1753 return;
1756 if (!aa_walking_limit)
1758 disqualify_split_candidate (desc, "Out of alias analysis step "
1759 "limit.");
1760 return;
1763 gcc_checking_assert (gimple_vuse (stmt));
1764 bool maybe_modified = false;
1765 ao_ref ar;
1767 ao_ref_init (&ar, expr);
1768 bitmap visited = BITMAP_ALLOC (NULL);
1769 int walked = walk_aliased_vdefs (&ar, gimple_vuse (stmt),
1770 mark_maybe_modified, &maybe_modified,
1771 &visited, NULL, aa_walking_limit);
1772 BITMAP_FREE (visited);
1773 if (walked > 0)
1775 gcc_assert (aa_walking_limit > walked);
1776 aa_walking_limit = aa_walking_limit - walked;
1778 if (walked < 0)
1779 aa_walking_limit = 0;
1780 if (maybe_modified || walked < 0)
1782 disqualify_split_candidate (desc, "Data passed by reference possibly "
1783 "modified through an alias.");
1784 return;
1786 else
1787 mark_param_dereference (desc, offset + size, bb);
1789 else
1790 /* Pointer parameters with direct uses should have been ruled out by
1791 analyzing SSA default def when looking at the parameters. */
1792 gcc_assert (!desc->by_ref);
1794 gensum_param_access *access = get_access (desc, offset, size, ctx);
1795 if (!access)
1796 return;
1798 if (ctx == ISRA_CTX_ARG)
1800 gcc_checking_assert (call_info);
1802 if (!deref)
1803 record_nonregister_call_use (desc, call_info, offset / BITS_PER_UNIT,
1804 size / BITS_PER_UNIT);
1805 else
1806 /* This is not a pass-through of a pointer, this is a use like any
1807 other. */
1808 access->nonarg = true;
1811 if (!access->type)
1813 access->type = type;
1814 access->alias_ptr_type = reference_alias_ptr_type (expr);
1815 access->reverse = reverse;
1817 else
1819 if (exp_align < TYPE_ALIGN (access->type))
1821 disqualify_split_candidate (desc, "Reference has lower alignment "
1822 "than a previous one.");
1823 return;
1825 if (access->alias_ptr_type != reference_alias_ptr_type (expr))
1827 disqualify_split_candidate (desc, "Multiple alias pointer types.");
1828 return;
1830 if (access->reverse != reverse)
1832 disqualify_split_candidate (desc, "Both normal and reverse "
1833 "scalar storage order.");
1834 return;
1836 if (!deref
1837 && (AGGREGATE_TYPE_P (type) || AGGREGATE_TYPE_P (access->type))
1838 && (TYPE_MAIN_VARIANT (access->type) != TYPE_MAIN_VARIANT (type)))
1840 /* We need the same aggregate type on all accesses to be able to
1841 distinguish transformation spots from pass-through arguments in
1842 the transformation phase. */
1843 disqualify_split_candidate (desc, "We do not support aggregate "
1844 "type punning.");
1845 return;
1848 if (type_prevails_p (access->type, type))
1849 access->type = type;
1853 /* Scan body function described by NODE and FUN and create access trees for
1854 parameters. */
1856 static void
1857 scan_function (cgraph_node *node, struct function *fun)
1859 basic_block bb;
1861 FOR_EACH_BB_FN (bb, fun)
1863 gimple_stmt_iterator gsi;
1864 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1866 gimple *stmt = gsi_stmt (gsi);
1868 if (stmt_can_throw_external (fun, stmt))
1869 bitmap_set_bit (final_bbs, bb->index);
1870 switch (gimple_code (stmt))
1872 case GIMPLE_RETURN:
1874 tree t = gimple_return_retval (as_a <greturn *> (stmt));
1875 if (t != NULL_TREE)
1876 scan_expr_access (t, stmt, ISRA_CTX_LOAD, bb);
1877 bitmap_set_bit (final_bbs, bb->index);
1879 break;
1881 case GIMPLE_ASSIGN:
1882 if (gimple_assign_single_p (stmt)
1883 && !gimple_clobber_p (stmt))
1885 tree rhs = gimple_assign_rhs1 (stmt);
1886 scan_expr_access (rhs, stmt, ISRA_CTX_LOAD, bb);
1887 tree lhs = gimple_assign_lhs (stmt);
1888 scan_expr_access (lhs, stmt, ISRA_CTX_STORE, bb);
1890 break;
1892 case GIMPLE_CALL:
1894 unsigned argument_count = gimple_call_num_args (stmt);
1895 isra_scan_context ctx = ISRA_CTX_ARG;
1896 scan_call_info call_info, *call_info_p = &call_info;
1897 if (gimple_call_internal_p (stmt))
1899 call_info_p = NULL;
1900 ctx = ISRA_CTX_LOAD;
1901 internal_fn ifn = gimple_call_internal_fn (stmt);
1902 if (internal_store_fn_p (ifn))
1903 ctx = ISRA_CTX_STORE;
1905 else
1907 call_info.cs = node->get_edge (stmt);
1908 call_info.argument_count = argument_count;
1911 for (unsigned i = 0; i < argument_count; i++)
1913 call_info.arg_idx = i;
1914 scan_expr_access (gimple_call_arg (stmt, i), stmt,
1915 ctx, bb, call_info_p);
1918 tree lhs = gimple_call_lhs (stmt);
1919 if (lhs)
1920 scan_expr_access (lhs, stmt, ISRA_CTX_STORE, bb);
1921 int flags = gimple_call_flags (stmt);
1922 if ((flags & (ECF_CONST | ECF_PURE)) == 0)
1923 bitmap_set_bit (final_bbs, bb->index);
1925 break;
1927 case GIMPLE_ASM:
1929 gasm *asm_stmt = as_a <gasm *> (stmt);
1930 walk_stmt_load_store_addr_ops (asm_stmt, NULL, NULL, NULL,
1931 asm_visit_addr);
1932 bitmap_set_bit (final_bbs, bb->index);
1934 for (unsigned i = 0; i < gimple_asm_ninputs (asm_stmt); i++)
1936 tree t = TREE_VALUE (gimple_asm_input_op (asm_stmt, i));
1937 scan_expr_access (t, stmt, ISRA_CTX_LOAD, bb);
1939 for (unsigned i = 0; i < gimple_asm_noutputs (asm_stmt); i++)
1941 tree t = TREE_VALUE (gimple_asm_output_op (asm_stmt, i));
1942 scan_expr_access (t, stmt, ISRA_CTX_STORE, bb);
1945 break;
1947 default:
1948 break;
1954 /* Return true if SSA_NAME NAME is only used in return statements, or if
1955 results of any operations it is involved in are only used in return
1956 statements. ANALYZED is a bitmap that tracks which SSA names we have
1957 already started investigating. */
1959 static bool
1960 ssa_name_only_returned_p (tree name, bitmap analyzed)
1962 bool res = true;
1963 imm_use_iterator imm_iter;
1964 gimple *stmt;
1966 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, name)
1968 if (is_gimple_debug (stmt))
1969 continue;
1971 if (gimple_code (stmt) == GIMPLE_RETURN)
1973 tree t = gimple_return_retval (as_a <greturn *> (stmt));
1974 if (t != name)
1976 res = false;
1977 BREAK_FROM_IMM_USE_STMT (imm_iter);
1980 else if ((is_gimple_assign (stmt) && !gimple_has_volatile_ops (stmt))
1981 || gimple_code (stmt) == GIMPLE_PHI)
1983 /* TODO: And perhaps for const function calls too? */
1984 tree lhs;
1985 if (gimple_code (stmt) == GIMPLE_PHI)
1986 lhs = gimple_phi_result (stmt);
1987 else
1988 lhs = gimple_assign_lhs (stmt);
1990 if (TREE_CODE (lhs) != SSA_NAME)
1992 res = false;
1993 BREAK_FROM_IMM_USE_STMT (imm_iter);
1995 gcc_assert (!gimple_vdef (stmt));
1996 if (bitmap_set_bit (analyzed, SSA_NAME_VERSION (lhs))
1997 && !ssa_name_only_returned_p (lhs, analyzed))
1999 res = false;
2000 BREAK_FROM_IMM_USE_STMT (imm_iter);
2003 else
2005 res = false;
2006 BREAK_FROM_IMM_USE_STMT (imm_iter);
2009 return res;
2012 /* Inspect the uses of the return value of the call associated with CS, and if
2013 it is not used or if it is only used to construct the return value of the
2014 caller, mark it as such in call or caller summary. Also check for
2015 misaligned arguments. */
2017 static void
2018 isra_analyze_call (cgraph_edge *cs)
2020 gcall *call_stmt = cs->call_stmt;
2021 unsigned count = gimple_call_num_args (call_stmt);
2022 isra_call_summary *csum = call_sums->get_create (cs);
2024 for (unsigned i = 0; i < count; i++)
2026 tree arg = gimple_call_arg (call_stmt, i);
2027 if (is_gimple_reg (arg))
2028 continue;
2030 tree offset;
2031 poly_int64 bitsize, bitpos;
2032 machine_mode mode;
2033 int unsignedp, reversep, volatilep = 0;
2034 get_inner_reference (arg, &bitsize, &bitpos, &offset, &mode,
2035 &unsignedp, &reversep, &volatilep);
2036 if (!multiple_p (bitpos, BITS_PER_UNIT))
2038 csum->m_bit_aligned_arg = true;
2039 break;
2043 tree lhs = gimple_call_lhs (call_stmt);
2044 if (lhs)
2046 /* TODO: Also detect aggregates on a LHS of a call that are only returned
2047 from this function (without being read anywhere). */
2048 if (TREE_CODE (lhs) == SSA_NAME)
2050 bitmap analyzed = BITMAP_ALLOC (NULL);
2051 if (ssa_name_only_returned_p (lhs, analyzed))
2052 csum->m_return_returned = true;
2053 BITMAP_FREE (analyzed);
2056 else
2057 csum->m_return_ignored = true;
2060 /* Look at all calls going out of NODE, described also by IFS and perform all
2061 analyses necessary for IPA-SRA that are not done at body scan time or done
2062 even when body is not scanned because the function is not a candidate. */
2064 static void
2065 isra_analyze_all_outgoing_calls (cgraph_node *node)
2067 for (cgraph_edge *cs = node->callees; cs; cs = cs->next_callee)
2068 isra_analyze_call (cs);
2069 for (cgraph_edge *cs = node->indirect_calls; cs; cs = cs->next_callee)
2070 isra_analyze_call (cs);
2073 /* Dump a dereferences table with heading STR to file F. */
2075 static void
2076 dump_dereferences_table (FILE *f, struct function *fun, const char *str)
2078 basic_block bb;
2080 fprintf (dump_file, "%s", str);
2081 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (fun),
2082 EXIT_BLOCK_PTR_FOR_FN (fun), next_bb)
2084 fprintf (f, "%4i %i ", bb->index, bitmap_bit_p (final_bbs, bb->index));
2085 if (bb != EXIT_BLOCK_PTR_FOR_FN (fun))
2087 int i;
2088 for (i = 0; i < by_ref_count; i++)
2090 int idx = bb->index * by_ref_count + i;
2091 fprintf (f, " %4" HOST_WIDE_INT_PRINT "d", bb_dereferences[idx]);
2094 fprintf (f, "\n");
2096 fprintf (dump_file, "\n");
2099 /* Propagate distances in bb_dereferences in the opposite direction than the
2100 control flow edges, in each step storing the maximum of the current value
2101 and the minimum of all successors. These steps are repeated until the table
2102 stabilizes. Note that BBs which might terminate the functions (according to
2103 final_bbs bitmap) never updated in this way. */
2105 static void
2106 propagate_dereference_distances (struct function *fun)
2108 basic_block bb;
2110 if (dump_file && (dump_flags & TDF_DETAILS))
2111 dump_dereferences_table (dump_file, fun,
2112 "Dereference table before propagation:\n");
2114 auto_vec<basic_block> queue (last_basic_block_for_fn (fun));
2115 queue.quick_push (ENTRY_BLOCK_PTR_FOR_FN (fun));
2116 FOR_EACH_BB_FN (bb, fun)
2118 queue.quick_push (bb);
2119 bb->aux = bb;
2122 while (!queue.is_empty ())
2124 edge_iterator ei;
2125 edge e;
2126 bool change = false;
2127 int i;
2129 bb = queue.pop ();
2130 bb->aux = NULL;
2132 if (bitmap_bit_p (final_bbs, bb->index))
2133 continue;
2135 for (i = 0; i < by_ref_count; i++)
2137 int idx = bb->index * by_ref_count + i;
2138 bool first = true;
2139 HOST_WIDE_INT inh = 0;
2141 FOR_EACH_EDGE (e, ei, bb->succs)
2143 int succ_idx = e->dest->index * by_ref_count + i;
2145 if (e->dest == EXIT_BLOCK_PTR_FOR_FN (fun))
2146 continue;
2148 if (first)
2150 first = false;
2151 inh = bb_dereferences [succ_idx];
2153 else if (bb_dereferences [succ_idx] < inh)
2154 inh = bb_dereferences [succ_idx];
2157 if (!first && bb_dereferences[idx] < inh)
2159 bb_dereferences[idx] = inh;
2160 change = true;
2164 if (change)
2165 FOR_EACH_EDGE (e, ei, bb->preds)
2167 if (e->src->aux)
2168 continue;
2170 e->src->aux = e->src;
2171 queue.quick_push (e->src);
2175 if (dump_file && (dump_flags & TDF_DETAILS))
2176 dump_dereferences_table (dump_file, fun,
2177 "Dereference table after propagation:\n");
2180 /* Perform basic checks on ACCESS to PARM described by DESC and all its
2181 children, return true if the parameter cannot be split, otherwise return
2182 true and update *TOTAL_SIZE and *ONLY_CALLS. ENTRY_BB_INDEX must be the
2183 index of the entry BB in the function of PARM. */
2185 static bool
2186 check_gensum_access (tree parm, gensum_param_desc *desc,
2187 gensum_param_access *access,
2188 HOST_WIDE_INT *nonarg_acc_size, bool *only_calls,
2189 int entry_bb_index)
2191 if (access->nonarg)
2193 *only_calls = false;
2194 *nonarg_acc_size += access->size;
2196 if (access->first_child)
2198 disqualify_split_candidate (desc, "Overlapping non-call uses.");
2199 return true;
2202 /* Do not decompose a non-BLKmode param in a way that would create
2203 BLKmode params. Especially for by-reference passing (thus,
2204 pointer-type param) this is hardly worthwhile. */
2205 if (DECL_MODE (parm) != BLKmode
2206 && TYPE_MODE (access->type) == BLKmode)
2208 disqualify_split_candidate (desc, "Would convert a non-BLK to a BLK.");
2209 return true;
2212 if (desc->by_ref)
2214 int idx = (entry_bb_index * by_ref_count + desc->deref_index);
2215 if ((access->offset + access->size) > bb_dereferences[idx])
2217 disqualify_split_candidate (desc, "Would create a possibly "
2218 "illegal dereference in a caller.");
2219 return true;
2223 for (gensum_param_access *ch = access->first_child;
2225 ch = ch->next_sibling)
2226 if (check_gensum_access (parm, desc, ch, nonarg_acc_size, only_calls,
2227 entry_bb_index))
2228 return true;
2230 return false;
2233 /* Copy data from FROM and all of its children to a vector of accesses in IPA
2234 descriptor DESC. */
2236 static void
2237 copy_accesses_to_ipa_desc (gensum_param_access *from, isra_param_desc *desc)
2239 param_access *to = ggc_cleared_alloc<param_access> ();
2240 gcc_checking_assert ((from->offset % BITS_PER_UNIT) == 0);
2241 gcc_checking_assert ((from->size % BITS_PER_UNIT) == 0);
2242 to->unit_offset = from->offset / BITS_PER_UNIT;
2243 to->unit_size = from->size / BITS_PER_UNIT;
2244 to->type = from->type;
2245 to->alias_ptr_type = from->alias_ptr_type;
2246 to->certain = from->nonarg;
2247 to->reverse = from->reverse;
2248 vec_safe_push (desc->accesses, to);
2250 for (gensum_param_access *ch = from->first_child;
2252 ch = ch->next_sibling)
2253 copy_accesses_to_ipa_desc (ch, desc);
2256 /* Analyze function body scan results stored in param_accesses and
2257 param_accesses, detect possible transformations and store information of
2258 those in function summary. NODE, FUN and IFS are all various structures
2259 describing the currently analyzed function. */
2261 static void
2262 process_scan_results (cgraph_node *node, struct function *fun,
2263 isra_func_summary *ifs,
2264 vec<gensum_param_desc> *param_descriptions)
2266 bool check_pass_throughs = false;
2267 bool dereferences_propagated = false;
2268 tree parm = DECL_ARGUMENTS (node->decl);
2269 unsigned param_count = param_descriptions->length();
2271 for (unsigned desc_index = 0;
2272 desc_index < param_count;
2273 desc_index++, parm = DECL_CHAIN (parm))
2275 gensum_param_desc *desc = &(*param_descriptions)[desc_index];
2276 if (!desc->split_candidate)
2277 continue;
2279 if (flag_checking)
2280 isra_verify_access_tree (desc->accesses);
2282 if (!dereferences_propagated
2283 && desc->by_ref
2284 && desc->accesses)
2286 propagate_dereference_distances (fun);
2287 dereferences_propagated = true;
2290 HOST_WIDE_INT nonarg_acc_size = 0;
2291 bool only_calls = true;
2292 bool check_failed = false;
2294 int entry_bb_index = ENTRY_BLOCK_PTR_FOR_FN (fun)->index;
2295 for (gensum_param_access *acc = desc->accesses;
2296 acc;
2297 acc = acc->next_sibling)
2298 if (check_gensum_access (parm, desc, acc, &nonarg_acc_size, &only_calls,
2299 entry_bb_index))
2301 check_failed = true;
2302 break;
2304 if (check_failed)
2305 continue;
2307 if (only_calls)
2308 desc->locally_unused = true;
2310 HOST_WIDE_INT cur_param_size
2311 = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (parm)));
2312 HOST_WIDE_INT param_size_limit;
2313 if (!desc->by_ref || optimize_function_for_size_p (fun))
2314 param_size_limit = cur_param_size;
2315 else
2316 param_size_limit
2317 = opt_for_fn (node->decl,
2318 param_ipa_sra_ptr_growth_factor) * cur_param_size;
2319 if (nonarg_acc_size > param_size_limit
2320 || (!desc->by_ref && nonarg_acc_size == param_size_limit))
2322 disqualify_split_candidate (desc, "Would result into a too big set "
2323 "of replacements.");
2325 else
2327 /* create_parameter_descriptors makes sure unit sizes of all
2328 candidate parameters fit unsigned integers restricted to
2329 ISRA_ARG_SIZE_LIMIT. */
2330 desc->param_size_limit = param_size_limit / BITS_PER_UNIT;
2331 desc->nonarg_acc_size = nonarg_acc_size / BITS_PER_UNIT;
2332 if (desc->split_candidate && desc->ptr_pt_count)
2334 gcc_assert (desc->by_ref);
2335 check_pass_throughs = true;
2340 /* When a pointer parameter is passed-through to a callee, in which it is
2341 only used to read only one or a few items, we can attempt to transform it
2342 to obtaining and passing through the items instead of the pointer. But we
2343 must take extra care that 1) we do not introduce any segfault by moving
2344 dereferences above control flow and that 2) the data is not modified
2345 through an alias in this function. The IPA analysis must not introduce
2346 any accesses candidates unless it can prove both.
2348 The current solution is very crude as it consists of ensuring that the
2349 call postdominates entry BB and that the definition of VUSE of the call is
2350 default definition. TODO: For non-recursive callees in the same
2351 compilation unit we could do better by doing analysis in topological order
2352 an looking into access candidates of callees, using their alias_ptr_types
2353 to attempt real AA. We could also use the maximum known dereferenced
2354 offset in this function at IPA level.
2356 TODO: Measure the overhead and the effect of just being pessimistic.
2357 Maybe this is only -O3 material?
2359 bool pdoms_calculated = false;
2360 if (check_pass_throughs)
2361 for (cgraph_edge *cs = node->callees; cs; cs = cs->next_callee)
2363 gcall *call_stmt = cs->call_stmt;
2364 tree vuse = gimple_vuse (call_stmt);
2366 /* If the callee is a const function, we don't get a VUSE. In such
2367 case there will be no memory accesses in the called function (or the
2368 const attribute is wrong) and then we just don't care. */
2369 bool uses_memory_as_obtained = vuse && SSA_NAME_IS_DEFAULT_DEF (vuse);
2371 unsigned count = gimple_call_num_args (call_stmt);
2372 isra_call_summary *csum = call_sums->get_create (cs);
2373 csum->init_inputs (count);
2374 for (unsigned argidx = 0; argidx < count; argidx++)
2376 if (!csum->m_arg_flow[argidx].pointer_pass_through)
2377 continue;
2378 unsigned pidx
2379 = get_single_param_flow_source (&csum->m_arg_flow[argidx]);
2380 gensum_param_desc *desc = &(*param_descriptions)[pidx];
2381 if (!desc->split_candidate)
2383 csum->m_arg_flow[argidx].pointer_pass_through = false;
2384 continue;
2386 if (!uses_memory_as_obtained)
2387 continue;
2389 /* Post-dominator check placed last, hoping that it usually won't
2390 be needed. */
2391 if (!pdoms_calculated)
2393 gcc_checking_assert (cfun);
2394 add_noreturn_fake_exit_edges ();
2395 connect_infinite_loops_to_exit ();
2396 calculate_dominance_info (CDI_POST_DOMINATORS);
2397 pdoms_calculated = true;
2399 if (dominated_by_p (CDI_POST_DOMINATORS,
2400 gimple_bb (call_stmt),
2401 single_succ (ENTRY_BLOCK_PTR_FOR_FN (fun))))
2402 csum->m_arg_flow[argidx].safe_to_import_accesses = true;
2406 if (pdoms_calculated)
2408 free_dominance_info (CDI_POST_DOMINATORS);
2409 remove_fake_exit_edges ();
2412 /* TODO: Add early exit if we disqualified everything. This also requires
2413 that we either relax the restriction that
2414 ipa_param_adjustments.m_always_copy_start must be the number of PARM_DECLs
2415 or store the number of parameters to IPA-SRA function summary and use that
2416 when just removing params. */
2418 vec_safe_reserve_exact (ifs->m_parameters, param_count);
2419 ifs->m_parameters->quick_grow_cleared (param_count);
2420 for (unsigned desc_index = 0; desc_index < param_count; desc_index++)
2422 gensum_param_desc *s = &(*param_descriptions)[desc_index];
2423 isra_param_desc *d = &(*ifs->m_parameters)[desc_index];
2425 d->param_size_limit = s->param_size_limit;
2426 d->size_reached = s->nonarg_acc_size;
2427 d->locally_unused = s->locally_unused;
2428 d->split_candidate = s->split_candidate;
2429 d->by_ref = s->by_ref;
2431 for (gensum_param_access *acc = s->accesses;
2432 acc;
2433 acc = acc->next_sibling)
2434 copy_accesses_to_ipa_desc (acc, d);
2437 if (dump_file)
2438 dump_isra_param_descriptors (dump_file, node->decl, ifs);
2441 /* Return true if there are any overlaps among certain accesses of DESC. If
2442 non-NULL, set *CERTAIN_ACCESS_PRESENT_P upon encountering a certain access
2443 too. DESC is assumed to be a split candidate that is not locally
2444 unused. */
2446 static bool
2447 overlapping_certain_accesses_p (isra_param_desc *desc,
2448 bool *certain_access_present_p)
2450 unsigned pclen = vec_safe_length (desc->accesses);
2451 for (unsigned i = 0; i < pclen; i++)
2453 param_access *a1 = (*desc->accesses)[i];
2455 if (!a1->certain)
2456 continue;
2457 if (certain_access_present_p)
2458 *certain_access_present_p = true;
2459 for (unsigned j = i + 1; j < pclen; j++)
2461 param_access *a2 = (*desc->accesses)[j];
2462 if (a2->certain
2463 && a1->unit_offset < a2->unit_offset + a2->unit_size
2464 && a1->unit_offset + a1->unit_size > a2->unit_offset)
2465 return true;
2468 return false;
2471 /* Check for any overlaps of certain param accesses among splitting candidates
2472 and signal an ICE if there are any. If CERTAIN_MUST_EXIST is set, also
2473 check that used splitting candidates have at least one certain access. */
2475 static void
2476 verify_splitting_accesses (cgraph_node *node, bool certain_must_exist)
2478 isra_func_summary *ifs = func_sums->get (node);
2479 if (!ifs || !ifs->m_candidate)
2480 return;
2481 unsigned param_count = vec_safe_length (ifs->m_parameters);
2482 for (unsigned pidx = 0; pidx < param_count; pidx++)
2484 isra_param_desc *desc = &(*ifs->m_parameters)[pidx];
2485 if (!desc->split_candidate || desc->locally_unused)
2486 continue;
2488 bool certain_access_present = !certain_must_exist;
2489 if (overlapping_certain_accesses_p (desc, &certain_access_present))
2490 internal_error ("Function %qs, parameter %u, has IPA-SRA accesses "
2491 "which overlap", node->dump_name (), pidx);
2492 if (!certain_access_present)
2493 internal_error ("Function %s, parameter %u, is used but does not "
2494 "have any certain IPA-SRA access",
2495 node->dump_name (), pidx);
2499 /* Intraprocedural part of IPA-SRA analysis. Scan bodies of all functions in
2500 this compilation unit and create summary structures describing IPA-SRA
2501 opportunities and constraints in them. */
2503 static void
2504 ipa_sra_generate_summary (void)
2506 struct cgraph_node *node;
2508 gcc_checking_assert (!func_sums);
2509 gcc_checking_assert (!call_sums);
2510 func_sums
2511 = (new (ggc_alloc_no_dtor <ipa_sra_function_summaries> ())
2512 ipa_sra_function_summaries (symtab, true));
2513 call_sums = new ipa_sra_call_summaries (symtab);
2515 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
2516 ipa_sra_summarize_function (node);
2517 return;
2520 /* Write intraprocedural analysis information about E and all of its outgoing
2521 edges into a stream for LTO WPA. */
2523 static void
2524 isra_write_edge_summary (output_block *ob, cgraph_edge *e)
2526 isra_call_summary *csum = call_sums->get (e);
2527 unsigned input_count = csum->m_arg_flow.length ();
2528 streamer_write_uhwi (ob, input_count);
2529 for (unsigned i = 0; i < input_count; i++)
2531 isra_param_flow *ipf = &csum->m_arg_flow[i];
2532 streamer_write_hwi (ob, ipf->length);
2533 bitpack_d bp = bitpack_create (ob->main_stream);
2534 for (int j = 0; j < ipf->length; j++)
2535 bp_pack_value (&bp, ipf->inputs[j], 8);
2536 bp_pack_value (&bp, ipf->aggregate_pass_through, 1);
2537 bp_pack_value (&bp, ipf->pointer_pass_through, 1);
2538 bp_pack_value (&bp, ipf->safe_to_import_accesses, 1);
2539 streamer_write_bitpack (&bp);
2540 streamer_write_uhwi (ob, ipf->unit_offset);
2541 streamer_write_uhwi (ob, ipf->unit_size);
2543 bitpack_d bp = bitpack_create (ob->main_stream);
2544 bp_pack_value (&bp, csum->m_return_ignored, 1);
2545 bp_pack_value (&bp, csum->m_return_returned, 1);
2546 bp_pack_value (&bp, csum->m_bit_aligned_arg, 1);
2547 streamer_write_bitpack (&bp);
2550 /* Write intraprocedural analysis information about NODE and all of its outgoing
2551 edges into a stream for LTO WPA. */
2553 static void
2554 isra_write_node_summary (output_block *ob, cgraph_node *node)
2556 isra_func_summary *ifs = func_sums->get (node);
2557 lto_symtab_encoder_t encoder = ob->decl_state->symtab_node_encoder;
2558 int node_ref = lto_symtab_encoder_encode (encoder, node);
2559 streamer_write_uhwi (ob, node_ref);
2561 unsigned param_desc_count = vec_safe_length (ifs->m_parameters);
2562 streamer_write_uhwi (ob, param_desc_count);
2563 for (unsigned i = 0; i < param_desc_count; i++)
2565 isra_param_desc *desc = &(*ifs->m_parameters)[i];
2566 unsigned access_count = vec_safe_length (desc->accesses);
2567 streamer_write_uhwi (ob, access_count);
2568 for (unsigned j = 0; j < access_count; j++)
2570 param_access *acc = (*desc->accesses)[j];
2571 stream_write_tree (ob, acc->type, true);
2572 stream_write_tree (ob, acc->alias_ptr_type, true);
2573 streamer_write_uhwi (ob, acc->unit_offset);
2574 streamer_write_uhwi (ob, acc->unit_size);
2575 bitpack_d bp = bitpack_create (ob->main_stream);
2576 bp_pack_value (&bp, acc->certain, 1);
2577 streamer_write_bitpack (&bp);
2579 streamer_write_uhwi (ob, desc->param_size_limit);
2580 streamer_write_uhwi (ob, desc->size_reached);
2581 bitpack_d bp = bitpack_create (ob->main_stream);
2582 bp_pack_value (&bp, desc->locally_unused, 1);
2583 bp_pack_value (&bp, desc->split_candidate, 1);
2584 bp_pack_value (&bp, desc->by_ref, 1);
2585 streamer_write_bitpack (&bp);
2587 bitpack_d bp = bitpack_create (ob->main_stream);
2588 bp_pack_value (&bp, ifs->m_candidate, 1);
2589 bp_pack_value (&bp, ifs->m_returns_value, 1);
2590 bp_pack_value (&bp, ifs->m_return_ignored, 1);
2591 gcc_assert (!ifs->m_queued);
2592 streamer_write_bitpack (&bp);
2594 for (cgraph_edge *e = node->callees; e; e = e->next_callee)
2595 isra_write_edge_summary (ob, e);
2596 for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee)
2597 isra_write_edge_summary (ob, e);
2600 /* Write intraprocedural analysis information into a stream for LTO WPA. */
2602 static void
2603 ipa_sra_write_summary (void)
2605 if (!func_sums || !call_sums)
2606 return;
2608 struct output_block *ob = create_output_block (LTO_section_ipa_sra);
2609 lto_symtab_encoder_t encoder = ob->decl_state->symtab_node_encoder;
2610 ob->symbol = NULL;
2612 unsigned int count = 0;
2613 lto_symtab_encoder_iterator lsei;
2614 for (lsei = lsei_start_function_in_partition (encoder);
2615 !lsei_end_p (lsei);
2616 lsei_next_function_in_partition (&lsei))
2618 cgraph_node *node = lsei_cgraph_node (lsei);
2619 if (node->has_gimple_body_p ()
2620 && func_sums->get (node) != NULL)
2621 count++;
2623 streamer_write_uhwi (ob, count);
2625 /* Process all of the functions. */
2626 for (lsei = lsei_start_function_in_partition (encoder); !lsei_end_p (lsei);
2627 lsei_next_function_in_partition (&lsei))
2629 cgraph_node *node = lsei_cgraph_node (lsei);
2630 if (node->has_gimple_body_p ()
2631 && func_sums->get (node) != NULL)
2632 isra_write_node_summary (ob, node);
2634 streamer_write_char_stream (ob->main_stream, 0);
2635 produce_asm (ob, NULL);
2636 destroy_output_block (ob);
2639 /* Read intraprocedural analysis information about E and all of its outgoing
2640 edges into a stream for LTO WPA. */
2642 static void
2643 isra_read_edge_summary (struct lto_input_block *ib, cgraph_edge *cs)
2645 isra_call_summary *csum = call_sums->get_create (cs);
2646 unsigned input_count = streamer_read_uhwi (ib);
2647 csum->init_inputs (input_count);
2648 for (unsigned i = 0; i < input_count; i++)
2650 isra_param_flow *ipf = &csum->m_arg_flow[i];
2651 ipf->length = streamer_read_hwi (ib);
2652 bitpack_d bp = streamer_read_bitpack (ib);
2653 for (int j = 0; j < ipf->length; j++)
2654 ipf->inputs[j] = bp_unpack_value (&bp, 8);
2655 ipf->aggregate_pass_through = bp_unpack_value (&bp, 1);
2656 ipf->pointer_pass_through = bp_unpack_value (&bp, 1);
2657 ipf->safe_to_import_accesses = bp_unpack_value (&bp, 1);
2658 ipf->unit_offset = streamer_read_uhwi (ib);
2659 ipf->unit_size = streamer_read_uhwi (ib);
2661 bitpack_d bp = streamer_read_bitpack (ib);
2662 csum->m_return_ignored = bp_unpack_value (&bp, 1);
2663 csum->m_return_returned = bp_unpack_value (&bp, 1);
2664 csum->m_bit_aligned_arg = bp_unpack_value (&bp, 1);
2667 /* Read intraprocedural analysis information about NODE and all of its outgoing
2668 edges into a stream for LTO WPA. */
2670 static void
2671 isra_read_node_info (struct lto_input_block *ib, cgraph_node *node,
2672 struct data_in *data_in)
2674 isra_func_summary *ifs = func_sums->get_create (node);
2675 unsigned param_desc_count = streamer_read_uhwi (ib);
2676 if (param_desc_count > 0)
2678 vec_safe_reserve_exact (ifs->m_parameters, param_desc_count);
2679 ifs->m_parameters->quick_grow_cleared (param_desc_count);
2681 for (unsigned i = 0; i < param_desc_count; i++)
2683 isra_param_desc *desc = &(*ifs->m_parameters)[i];
2684 unsigned access_count = streamer_read_uhwi (ib);
2685 for (unsigned j = 0; j < access_count; j++)
2687 param_access *acc = ggc_cleared_alloc<param_access> ();
2688 acc->type = stream_read_tree (ib, data_in);
2689 acc->alias_ptr_type = stream_read_tree (ib, data_in);
2690 acc->unit_offset = streamer_read_uhwi (ib);
2691 acc->unit_size = streamer_read_uhwi (ib);
2692 bitpack_d bp = streamer_read_bitpack (ib);
2693 acc->certain = bp_unpack_value (&bp, 1);
2694 vec_safe_push (desc->accesses, acc);
2696 desc->param_size_limit = streamer_read_uhwi (ib);
2697 desc->size_reached = streamer_read_uhwi (ib);
2698 bitpack_d bp = streamer_read_bitpack (ib);
2699 desc->locally_unused = bp_unpack_value (&bp, 1);
2700 desc->split_candidate = bp_unpack_value (&bp, 1);
2701 desc->by_ref = bp_unpack_value (&bp, 1);
2703 bitpack_d bp = streamer_read_bitpack (ib);
2704 ifs->m_candidate = bp_unpack_value (&bp, 1);
2705 ifs->m_returns_value = bp_unpack_value (&bp, 1);
2706 ifs->m_return_ignored = bp_unpack_value (&bp, 1);
2707 ifs->m_queued = 0;
2709 for (cgraph_edge *e = node->callees; e; e = e->next_callee)
2710 isra_read_edge_summary (ib, e);
2711 for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee)
2712 isra_read_edge_summary (ib, e);
2715 /* Read IPA-SRA summaries from a section in file FILE_DATA of length LEN with
2716 data DATA. TODO: This function was copied almost verbatim from ipa-prop.c,
2717 it should be possible to unify them somehow. */
2719 static void
2720 isra_read_summary_section (struct lto_file_decl_data *file_data,
2721 const char *data, size_t len)
2723 const struct lto_function_header *header =
2724 (const struct lto_function_header *) data;
2725 const int cfg_offset = sizeof (struct lto_function_header);
2726 const int main_offset = cfg_offset + header->cfg_size;
2727 const int string_offset = main_offset + header->main_size;
2728 struct data_in *data_in;
2729 unsigned int i;
2730 unsigned int count;
2732 lto_input_block ib_main ((const char *) data + main_offset,
2733 header->main_size, file_data->mode_table);
2735 data_in =
2736 lto_data_in_create (file_data, (const char *) data + string_offset,
2737 header->string_size, vNULL);
2738 count = streamer_read_uhwi (&ib_main);
2740 for (i = 0; i < count; i++)
2742 unsigned int index;
2743 struct cgraph_node *node;
2744 lto_symtab_encoder_t encoder;
2746 index = streamer_read_uhwi (&ib_main);
2747 encoder = file_data->symtab_node_encoder;
2748 node = dyn_cast<cgraph_node *> (lto_symtab_encoder_deref (encoder,
2749 index));
2750 gcc_assert (node->definition);
2751 isra_read_node_info (&ib_main, node, data_in);
2753 lto_free_section_data (file_data, LTO_section_ipa_sra, NULL, data,
2754 len);
2755 lto_data_in_delete (data_in);
2758 /* Read intraprocedural analysis information into a stream for LTO WPA. */
2760 static void
2761 ipa_sra_read_summary (void)
2763 struct lto_file_decl_data **file_data_vec = lto_get_file_decl_data ();
2764 struct lto_file_decl_data *file_data;
2765 unsigned int j = 0;
2767 gcc_checking_assert (!func_sums);
2768 gcc_checking_assert (!call_sums);
2769 func_sums
2770 = (new (ggc_alloc_no_dtor <ipa_sra_function_summaries> ())
2771 ipa_sra_function_summaries (symtab, true));
2772 call_sums = new ipa_sra_call_summaries (symtab);
2774 while ((file_data = file_data_vec[j++]))
2776 size_t len;
2777 const char *data
2778 = lto_get_summary_section_data (file_data, LTO_section_ipa_sra, &len);
2779 if (data)
2780 isra_read_summary_section (file_data, data, len);
2784 /* Dump all IPA-SRA summary data for all cgraph nodes and edges to file F. */
2786 static void
2787 ipa_sra_dump_all_summaries (FILE *f)
2789 cgraph_node *node;
2790 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
2792 fprintf (f, "\nSummary for node %s:\n", node->dump_name ());
2794 isra_func_summary *ifs = func_sums->get (node);
2795 if (!ifs)
2797 fprintf (f, " Function does not have any associated IPA-SRA "
2798 "summary\n");
2799 continue;
2801 if (!ifs->m_candidate)
2803 fprintf (f, " Not a candidate function\n");
2804 continue;
2806 if (ifs->m_returns_value)
2807 fprintf (f, " Returns value\n");
2808 if (vec_safe_is_empty (ifs->m_parameters))
2809 fprintf (f, " No parameter information. \n");
2810 else
2811 for (unsigned i = 0; i < ifs->m_parameters->length (); ++i)
2813 fprintf (f, " Descriptor for parameter %i:\n", i);
2814 dump_isra_param_descriptor (f, &(*ifs->m_parameters)[i]);
2816 fprintf (f, "\n");
2818 struct cgraph_edge *cs;
2819 for (cs = node->callees; cs; cs = cs->next_callee)
2821 fprintf (f, " Summary for edge %s->%s:\n", cs->caller->dump_name (),
2822 cs->callee->dump_name ());
2823 isra_call_summary *csum = call_sums->get (cs);
2824 if (csum)
2825 csum->dump (f);
2826 else
2827 fprintf (f, " Call summary is MISSING!\n");
2831 fprintf (f, "\n\n");
2834 /* Perform function-scope viability tests that can be only made at IPA level
2835 and return false if the function is deemed unsuitable for IPA-SRA. */
2837 static bool
2838 ipa_sra_ipa_function_checks (cgraph_node *node)
2840 if (!node->can_be_local_p ())
2842 if (dump_file)
2843 fprintf (dump_file, "Function %s disqualified because it cannot be "
2844 "made local.\n", node->dump_name ());
2845 return false;
2847 if (!node->can_change_signature)
2849 if (dump_file)
2850 fprintf (dump_file, "Function can not change signature.\n");
2851 return false;
2854 return true;
2857 /* Issues found out by check_callers_for_issues. */
2859 struct caller_issues
2861 /* The candidate being considered. */
2862 cgraph_node *candidate;
2863 /* There is a thunk among callers. */
2864 bool thunk;
2865 /* Call site with no available information. */
2866 bool unknown_callsite;
2867 /* Call from outside the the candidate's comdat group. */
2868 bool call_from_outside_comdat;
2869 /* There is a bit-aligned load into one of non-gimple-typed arguments. */
2870 bool bit_aligned_aggregate_argument;
2873 /* Worker for call_for_symbol_and_aliases, set any flags of passed caller_issues
2874 that apply. */
2876 static bool
2877 check_for_caller_issues (struct cgraph_node *node, void *data)
2879 struct caller_issues *issues = (struct caller_issues *) data;
2881 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
2883 if (cs->caller->thunk)
2885 issues->thunk = true;
2886 /* TODO: We should be able to process at least some types of
2887 thunks. */
2888 return true;
2890 if (issues->candidate->calls_comdat_local
2891 && issues->candidate->same_comdat_group
2892 && !issues->candidate->in_same_comdat_group_p (cs->caller))
2894 issues->call_from_outside_comdat = true;
2895 return true;
2898 isra_call_summary *csum = call_sums->get (cs);
2899 if (!csum)
2901 issues->unknown_callsite = true;
2902 return true;
2905 if (csum->m_bit_aligned_arg)
2906 issues->bit_aligned_aggregate_argument = true;
2908 return false;
2911 /* Look at all incoming edges to NODE, including aliases and thunks and look
2912 for problems. Return true if NODE type should not be modified at all. */
2914 static bool
2915 check_all_callers_for_issues (cgraph_node *node)
2917 struct caller_issues issues;
2918 memset (&issues, 0, sizeof (issues));
2919 issues.candidate = node;
2921 node->call_for_symbol_and_aliases (check_for_caller_issues, &issues, true);
2922 if (issues.unknown_callsite)
2924 if (dump_file && (dump_flags & TDF_DETAILS))
2925 fprintf (dump_file, "A call of %s has not been analyzed. Disabling "
2926 "all modifications.\n", node->dump_name ());
2927 return true;
2929 /* TODO: We should be able to process at least some types of thunks. */
2930 if (issues.thunk)
2932 if (dump_file && (dump_flags & TDF_DETAILS))
2933 fprintf (dump_file, "A call of %s is through thunk, which are not"
2934 " handled yet. Disabling all modifications.\n",
2935 node->dump_name ());
2936 return true;
2938 if (issues.call_from_outside_comdat)
2940 if (dump_file)
2941 fprintf (dump_file, "Function would become private comdat called "
2942 "outside of its comdat group.\n");
2943 return true;
2946 if (issues.bit_aligned_aggregate_argument)
2948 /* Let's only remove parameters/return values from such functions.
2949 TODO: We could only prevent splitting the problematic parameters if
2950 anybody thinks it is worth it. */
2951 if (dump_file && (dump_flags & TDF_DETAILS))
2952 fprintf (dump_file, "A call of %s has bit-aligned aggregate argument,"
2953 " disabling parameter splitting.\n", node->dump_name ());
2955 isra_func_summary *ifs = func_sums->get (node);
2956 gcc_checking_assert (ifs);
2957 unsigned param_count = vec_safe_length (ifs->m_parameters);
2958 for (unsigned i = 0; i < param_count; i++)
2959 (*ifs->m_parameters)[i].split_candidate = false;
2961 return false;
2964 /* Find the access with corresponding OFFSET and SIZE among accesses in
2965 PARAM_DESC and return it or NULL if such an access is not there. */
2967 static param_access *
2968 find_param_access (isra_param_desc *param_desc, unsigned offset, unsigned size)
2970 unsigned pclen = vec_safe_length (param_desc->accesses);
2972 /* The search is linear but the number of stored accesses is bound by
2973 PARAM_IPA_SRA_MAX_REPLACEMENTS, so most probably 8. */
2975 for (unsigned i = 0; i < pclen; i++)
2976 if ((*param_desc->accesses)[i]->unit_offset == offset
2977 && (*param_desc->accesses)[i]->unit_size == size)
2978 return (*param_desc->accesses)[i];
2980 return NULL;
2983 /* Return iff the total size of definite replacement SIZE would violate the
2984 limit set for it in PARAM. */
2986 static bool
2987 size_would_violate_limit_p (isra_param_desc *desc, unsigned size)
2989 unsigned limit = desc->param_size_limit;
2990 if (size > limit
2991 || (!desc->by_ref && size == limit))
2992 return true;
2993 return false;
2996 /* Increase reached size of DESC by SIZE or disqualify it if it would violate
2997 the set limit. IDX is the parameter number which is dumped when
2998 disqualifying. */
3000 static void
3001 bump_reached_size (isra_param_desc *desc, unsigned size, unsigned idx)
3003 unsigned after = desc->size_reached + size;
3004 if (size_would_violate_limit_p (desc, after))
3006 if (dump_file && (dump_flags & TDF_DETAILS))
3007 fprintf (dump_file, " ...size limit reached, disqualifying "
3008 "candidate parameter %u\n", idx);
3009 desc->split_candidate = false;
3010 return;
3012 desc->size_reached = after;
3015 /* Take all actions required to deal with an edge CS that represents a call to
3016 an unknown or un-analyzed function, for both parameter removal and
3017 splitting. */
3019 static void
3020 process_edge_to_unknown_caller (cgraph_edge *cs)
3022 isra_func_summary *from_ifs = func_sums->get (cs->caller);
3023 gcc_checking_assert (from_ifs);
3024 isra_call_summary *csum = call_sums->get (cs);
3026 if (dump_file && (dump_flags & TDF_DETAILS))
3027 fprintf (dump_file, "Processing an edge to an unknown caller from %s:\n",
3028 cs->caller->dump_name ());
3030 unsigned args_count = csum->m_arg_flow.length ();
3031 for (unsigned i = 0; i < args_count; i++)
3033 isra_param_flow *ipf = &csum->m_arg_flow[i];
3035 if (ipf->pointer_pass_through)
3037 isra_param_desc *param_desc
3038 = &(*from_ifs->m_parameters)[get_single_param_flow_source (ipf)];
3039 param_desc->locally_unused = false;
3040 param_desc->split_candidate = false;
3041 continue;
3043 if (ipf->aggregate_pass_through)
3045 unsigned idx = get_single_param_flow_source (ipf);
3046 isra_param_desc *param_desc = &(*from_ifs->m_parameters)[idx];
3048 param_desc->locally_unused = false;
3049 if (!param_desc->split_candidate)
3050 continue;
3051 gcc_assert (!param_desc->by_ref);
3052 param_access *pacc = find_param_access (param_desc, ipf->unit_offset,
3053 ipf->unit_size);
3054 gcc_checking_assert (pacc);
3055 pacc->certain = true;
3056 if (overlapping_certain_accesses_p (param_desc, NULL))
3058 if (dump_file && (dump_flags & TDF_DETAILS))
3059 fprintf (dump_file, " ...leading to overlap, "
3060 " disqualifying candidate parameter %u\n",
3061 idx);
3062 param_desc->split_candidate = false;
3064 else
3065 bump_reached_size (param_desc, pacc->unit_size, idx);
3066 ipf->aggregate_pass_through = false;
3067 continue;
3070 for (int j = 0; j < ipf->length; j++)
3072 int input_idx = ipf->inputs[j];
3073 (*from_ifs->m_parameters)[input_idx].locally_unused = false;
3078 /* Propagate parameter removal information through cross-SCC edge CS,
3079 i.e. decrease the use count in the caller parameter descriptor for each use
3080 in this call. */
3082 static void
3083 param_removal_cross_scc_edge (cgraph_edge *cs)
3085 enum availability availability;
3086 cgraph_node *callee = cs->callee->function_symbol (&availability);
3087 isra_func_summary *to_ifs = func_sums->get (callee);
3088 if (!to_ifs || !to_ifs->m_candidate
3089 || (availability < AVAIL_AVAILABLE)
3090 || vec_safe_is_empty (to_ifs->m_parameters))
3092 process_edge_to_unknown_caller (cs);
3093 return;
3095 isra_func_summary *from_ifs = func_sums->get (cs->caller);
3096 gcc_checking_assert (from_ifs);
3098 isra_call_summary *csum = call_sums->get (cs);
3099 unsigned args_count = csum->m_arg_flow.length ();
3100 unsigned param_count = vec_safe_length (to_ifs->m_parameters);
3102 for (unsigned i = 0; i < args_count; i++)
3104 bool unused_in_callee;
3105 if (i < param_count)
3106 unused_in_callee = (*to_ifs->m_parameters)[i].locally_unused;
3107 else
3108 unused_in_callee = false;
3110 if (!unused_in_callee)
3112 isra_param_flow *ipf = &csum->m_arg_flow[i];
3113 for (int j = 0; j < ipf->length; j++)
3115 int input_idx = ipf->inputs[j];
3116 (*from_ifs->m_parameters)[input_idx].locally_unused = false;
3122 /* Unless it is already there, push NODE which is also described by IFS to
3123 STACK. */
3125 static void
3126 isra_push_node_to_stack (cgraph_node *node, isra_func_summary *ifs,
3127 vec<cgraph_node *> *stack)
3129 if (!ifs->m_queued)
3131 ifs->m_queued = true;
3132 stack->safe_push (node);
3136 /* If parameter with index INPUT_IDX is marked as locally unused, mark it as
3137 used and push CALLER on STACK. */
3139 static void
3140 isra_mark_caller_param_used (isra_func_summary *from_ifs, int input_idx,
3141 cgraph_node *caller, vec<cgraph_node *> *stack)
3143 if ((*from_ifs->m_parameters)[input_idx].locally_unused)
3145 (*from_ifs->m_parameters)[input_idx].locally_unused = false;
3146 isra_push_node_to_stack (caller, from_ifs, stack);
3151 /* Propagate information that any parameter is not used only locally within a
3152 SCC across CS to the caller, which must be in the same SCC as the
3153 callee. Push any callers that need to be re-processed to STACK. */
3155 static void
3156 propagate_used_across_scc_edge (cgraph_edge *cs, vec<cgraph_node *> *stack)
3158 isra_func_summary *from_ifs = func_sums->get (cs->caller);
3159 if (!from_ifs || vec_safe_is_empty (from_ifs->m_parameters))
3160 return;
3162 isra_call_summary *csum = call_sums->get (cs);
3163 gcc_checking_assert (csum);
3164 unsigned args_count = csum->m_arg_flow.length ();
3165 enum availability availability;
3166 cgraph_node *callee = cs->callee->function_symbol (&availability);
3167 isra_func_summary *to_ifs = func_sums->get (callee);
3169 unsigned param_count
3170 = (to_ifs && (availability >= AVAIL_AVAILABLE))
3171 ? vec_safe_length (to_ifs->m_parameters) : 0;
3172 for (unsigned i = 0; i < args_count; i++)
3174 if (i < param_count
3175 && (*to_ifs->m_parameters)[i].locally_unused)
3176 continue;
3178 /* The argument is needed in the callee it, we must mark the parameter as
3179 used also in the caller and its callers within this SCC. */
3180 isra_param_flow *ipf = &csum->m_arg_flow[i];
3181 for (int j = 0; j < ipf->length; j++)
3183 int input_idx = ipf->inputs[j];
3184 isra_mark_caller_param_used (from_ifs, input_idx, cs->caller, stack);
3189 /* Propagate information that any parameter is not used only locally within a
3190 SCC (i.e. is used also elsewhere) to all callers of NODE that are in the
3191 same SCC. Push any callers that need to be re-processed to STACK. */
3193 static bool
3194 propagate_used_to_scc_callers (cgraph_node *node, void *data)
3196 vec<cgraph_node *> *stack = (vec<cgraph_node *> *) data;
3197 cgraph_edge *cs;
3198 for (cs = node->callers; cs; cs = cs->next_caller)
3199 if (ipa_edge_within_scc (cs))
3200 propagate_used_across_scc_edge (cs, stack);
3201 return false;
3204 /* Return true iff all certain accesses in ARG_DESC are also present as
3205 certain accesses in PARAM_DESC. */
3207 static bool
3208 all_callee_accesses_present_p (isra_param_desc *param_desc,
3209 isra_param_desc *arg_desc)
3211 unsigned aclen = vec_safe_length (arg_desc->accesses);
3212 for (unsigned j = 0; j < aclen; j++)
3214 param_access *argacc = (*arg_desc->accesses)[j];
3215 if (!argacc->certain)
3216 continue;
3217 param_access *pacc = find_param_access (param_desc, argacc->unit_offset,
3218 argacc->unit_size);
3219 if (!pacc
3220 || !pacc->certain
3221 || !types_compatible_p (argacc->type, pacc->type))
3222 return false;
3224 return true;
3227 /* Type internal to function pull_accesses_from_callee. Unfortunately gcc 4.8
3228 does not allow instantiating an auto_vec with a type defined within a
3229 function so it is a global type. */
3230 enum acc_prop_kind {ACC_PROP_DONT, ACC_PROP_COPY, ACC_PROP_CERTAIN};
3233 /* Attempt to propagate all definite accesses from ARG_DESC to PARAM_DESC,
3234 (which belongs to CALLER) if they would not violate some constraint there.
3235 If successful, return NULL, otherwise return the string reason for failure
3236 (which can be written to the dump file). DELTA_OFFSET is the known offset
3237 of the actual argument withing the formal parameter (so of ARG_DESCS within
3238 PARAM_DESCS), ARG_SIZE is the size of the actual argument or zero, if not
3239 known. In case of success, set *CHANGE_P to true if propagation actually
3240 changed anything. */
3242 static const char *
3243 pull_accesses_from_callee (cgraph_node *caller, isra_param_desc *param_desc,
3244 isra_param_desc *arg_desc,
3245 unsigned delta_offset, unsigned arg_size,
3246 bool *change_p)
3248 unsigned pclen = vec_safe_length (param_desc->accesses);
3249 unsigned aclen = vec_safe_length (arg_desc->accesses);
3250 unsigned prop_count = 0;
3251 unsigned prop_size = 0;
3252 bool change = false;
3254 auto_vec <enum acc_prop_kind, 8> prop_kinds (aclen);
3255 for (unsigned j = 0; j < aclen; j++)
3257 param_access *argacc = (*arg_desc->accesses)[j];
3258 prop_kinds.safe_push (ACC_PROP_DONT);
3260 if (arg_size > 0
3261 && argacc->unit_offset + argacc->unit_size > arg_size)
3262 return "callee access outsize size boundary";
3264 if (!argacc->certain)
3265 continue;
3267 unsigned offset = argacc->unit_offset + delta_offset;
3268 /* Given that accesses are initially stored according to increasing
3269 offset and decreasing size in case of equal offsets, the following
3270 searches could be written more efficiently if we kept the ordering
3271 when copying. But the number of accesses is capped at
3272 PARAM_IPA_SRA_MAX_REPLACEMENTS (so most likely 8) and the code gets
3273 messy quickly, so let's improve on that only if necessary. */
3275 bool exact_match = false;
3276 for (unsigned i = 0; i < pclen; i++)
3278 /* Check for overlaps. */
3279 param_access *pacc = (*param_desc->accesses)[i];
3280 if (pacc->unit_offset == offset
3281 && pacc->unit_size == argacc->unit_size)
3283 if (argacc->alias_ptr_type != pacc->alias_ptr_type
3284 || !types_compatible_p (argacc->type, pacc->type))
3285 return "propagated access types would not match existing ones";
3287 exact_match = true;
3288 if (!pacc->certain)
3290 prop_kinds[j] = ACC_PROP_CERTAIN;
3291 prop_size += argacc->unit_size;
3292 change = true;
3294 continue;
3297 if (offset < pacc->unit_offset + pacc->unit_size
3298 && offset + argacc->unit_size > pacc->unit_offset)
3300 /* None permissible with load accesses, possible to fit into
3301 argument ones. */
3302 if (pacc->certain
3303 || offset < pacc->unit_offset
3304 || (offset + argacc->unit_size
3305 > pacc->unit_offset + pacc->unit_size))
3306 return "a propagated access would conflict in caller";
3310 if (!exact_match)
3312 prop_kinds[j] = ACC_PROP_COPY;
3313 prop_count++;
3314 prop_size += argacc->unit_size;
3315 change = true;
3319 if (!change)
3320 return NULL;
3322 if ((prop_count + pclen
3323 > (unsigned) opt_for_fn (caller->decl, param_ipa_sra_max_replacements))
3324 || size_would_violate_limit_p (param_desc,
3325 param_desc->size_reached + prop_size))
3326 return "propagating accesses would violate the count or size limit";
3328 *change_p = true;
3329 for (unsigned j = 0; j < aclen; j++)
3331 if (prop_kinds[j] == ACC_PROP_COPY)
3333 param_access *argacc = (*arg_desc->accesses)[j];
3335 param_access *copy = ggc_cleared_alloc<param_access> ();
3336 copy->unit_offset = argacc->unit_offset + delta_offset;
3337 copy->unit_size = argacc->unit_size;
3338 copy->type = argacc->type;
3339 copy->alias_ptr_type = argacc->alias_ptr_type;
3340 copy->certain = true;
3341 vec_safe_push (param_desc->accesses, copy);
3343 else if (prop_kinds[j] == ACC_PROP_CERTAIN)
3345 param_access *argacc = (*arg_desc->accesses)[j];
3346 param_access *csp
3347 = find_param_access (param_desc, argacc->unit_offset + delta_offset,
3348 argacc->unit_size);
3349 csp->certain = true;
3353 param_desc->size_reached += prop_size;
3355 return NULL;
3358 /* Propagate parameter splitting information through call graph edge CS.
3359 Return true if any changes that might need to be propagated within SCCs have
3360 been made. The function also clears the aggregate_pass_through and
3361 pointer_pass_through in call summaries which do not need to be processed
3362 again if this CS is revisited when iterating while changes are propagated
3363 within an SCC. */
3365 static bool
3366 param_splitting_across_edge (cgraph_edge *cs)
3368 bool res = false;
3369 bool cross_scc = !ipa_edge_within_scc (cs);
3370 enum availability availability;
3371 cgraph_node *callee = cs->callee->function_symbol (&availability);
3372 isra_func_summary *from_ifs = func_sums->get (cs->caller);
3373 gcc_checking_assert (from_ifs && from_ifs->m_parameters);
3375 isra_call_summary *csum = call_sums->get (cs);
3376 gcc_checking_assert (csum);
3377 unsigned args_count = csum->m_arg_flow.length ();
3378 isra_func_summary *to_ifs = func_sums->get (callee);
3379 unsigned param_count
3380 = ((to_ifs && to_ifs->m_candidate && (availability >= AVAIL_AVAILABLE))
3381 ? vec_safe_length (to_ifs->m_parameters)
3382 : 0);
3384 if (dump_file && (dump_flags & TDF_DETAILS))
3385 fprintf (dump_file, "Splitting across %s->%s:\n",
3386 cs->caller->dump_name (), callee->dump_name ());
3388 unsigned i;
3389 for (i = 0; (i < args_count) && (i < param_count); i++)
3391 isra_param_desc *arg_desc = &(*to_ifs->m_parameters)[i];
3392 isra_param_flow *ipf = &csum->m_arg_flow[i];
3394 if (arg_desc->locally_unused)
3396 if (dump_file && (dump_flags & TDF_DETAILS))
3397 fprintf (dump_file, " ->%u: unused in callee\n", i);
3398 ipf->pointer_pass_through = false;
3399 continue;
3402 if (ipf->pointer_pass_through)
3404 int idx = get_single_param_flow_source (ipf);
3405 isra_param_desc *param_desc = &(*from_ifs->m_parameters)[idx];
3406 if (!param_desc->split_candidate)
3407 continue;
3408 gcc_assert (param_desc->by_ref);
3410 if (!arg_desc->split_candidate || !arg_desc->by_ref)
3412 if (dump_file && (dump_flags & TDF_DETAILS))
3413 fprintf (dump_file, " %u->%u: not candidate or not by "
3414 "reference in callee\n", idx, i);
3415 param_desc->split_candidate = false;
3416 ipf->pointer_pass_through = false;
3417 res = true;
3419 else if (!ipf->safe_to_import_accesses)
3421 if (!all_callee_accesses_present_p (param_desc, arg_desc))
3423 if (dump_file && (dump_flags & TDF_DETAILS))
3424 fprintf (dump_file, " %u->%u: cannot import accesses.\n",
3425 idx, i);
3426 param_desc->split_candidate = false;
3427 ipf->pointer_pass_through = false;
3428 res = true;
3431 else
3433 if (dump_file && (dump_flags & TDF_DETAILS))
3434 fprintf (dump_file, " %u->%u: verified callee accesses "
3435 "present.\n", idx, i);
3436 if (cross_scc)
3437 ipf->pointer_pass_through = false;
3440 else
3442 const char *pull_failure
3443 = pull_accesses_from_callee (cs->caller, param_desc, arg_desc,
3444 0, 0, &res);
3445 if (pull_failure)
3447 if (dump_file && (dump_flags & TDF_DETAILS))
3448 fprintf (dump_file, " %u->%u: by_ref access pull "
3449 "failed: %s.\n", idx, i, pull_failure);
3450 param_desc->split_candidate = false;
3451 ipf->pointer_pass_through = false;
3452 res = true;
3454 else
3456 if (dump_file && (dump_flags & TDF_DETAILS))
3457 fprintf (dump_file, " %u->%u: by_ref access pull "
3458 "succeeded.\n", idx, i);
3459 if (cross_scc)
3460 ipf->pointer_pass_through = false;
3464 else if (ipf->aggregate_pass_through)
3466 int idx = get_single_param_flow_source (ipf);
3467 isra_param_desc *param_desc = &(*from_ifs->m_parameters)[idx];
3468 if (!param_desc->split_candidate)
3469 continue;
3470 gcc_assert (!param_desc->by_ref);
3471 param_access *pacc = find_param_access (param_desc, ipf->unit_offset,
3472 ipf->unit_size);
3473 gcc_checking_assert (pacc);
3475 if (pacc->certain)
3477 if (dump_file && (dump_flags & TDF_DETAILS))
3478 fprintf (dump_file, " %u->%u: already certain\n", idx, i);
3479 ipf->aggregate_pass_through = false;
3481 else if (!arg_desc->split_candidate || arg_desc->by_ref)
3483 if (dump_file && (dump_flags & TDF_DETAILS))
3484 fprintf (dump_file, " %u->%u: not candidate or by "
3485 "reference in callee\n", idx, i);
3487 pacc->certain = true;
3488 if (overlapping_certain_accesses_p (param_desc, NULL))
3490 if (dump_file && (dump_flags & TDF_DETAILS))
3491 fprintf (dump_file, " ...leading to overlap, "
3492 " disqualifying candidate parameter %u\n",
3493 idx);
3494 param_desc->split_candidate = false;
3496 else
3497 bump_reached_size (param_desc, pacc->unit_size, idx);
3499 ipf->aggregate_pass_through = false;
3500 res = true;
3502 else
3504 const char *pull_failure
3505 = pull_accesses_from_callee (cs->caller, param_desc, arg_desc,
3506 ipf->unit_offset,
3507 ipf->unit_size, &res);
3508 if (pull_failure)
3510 if (dump_file && (dump_flags & TDF_DETAILS))
3511 fprintf (dump_file, " %u->%u: arg access pull "
3512 "failed: %s.\n", idx, i, pull_failure);
3514 ipf->aggregate_pass_through = false;
3515 pacc->certain = true;
3517 if (overlapping_certain_accesses_p (param_desc, NULL))
3519 if (dump_file && (dump_flags & TDF_DETAILS))
3520 fprintf (dump_file, " ...leading to overlap, "
3521 " disqualifying candidate parameter %u\n",
3522 idx);
3523 param_desc->split_candidate = false;
3525 else
3526 bump_reached_size (param_desc, pacc->unit_size, idx);
3528 res = true;
3530 else
3532 if (dump_file && (dump_flags & TDF_DETAILS))
3533 fprintf (dump_file, " %u->%u: arg access pull "
3534 "succeeded.\n", idx, i);
3535 if (cross_scc)
3536 ipf->aggregate_pass_through = false;
3542 /* Handle argument-parameter count mismatches. */
3543 for (; (i < args_count); i++)
3545 isra_param_flow *ipf = &csum->m_arg_flow[i];
3547 if (ipf->pointer_pass_through || ipf->aggregate_pass_through)
3549 int idx = get_single_param_flow_source (ipf);
3550 ipf->pointer_pass_through = false;
3551 ipf->aggregate_pass_through = false;
3552 isra_param_desc *param_desc = &(*from_ifs->m_parameters)[idx];
3553 if (!param_desc->split_candidate)
3554 continue;
3556 if (dump_file && (dump_flags & TDF_DETAILS))
3557 fprintf (dump_file, " %u->%u: no corresponding formal parameter\n",
3558 idx, i);
3559 param_desc->split_candidate = false;
3560 res = true;
3563 return res;
3566 /* Worker for call_for_symbol_and_aliases, look at all callers and if all their
3567 callers ignore the return value, or come from the same SCC and use the
3568 return value only to compute their return value, return false, otherwise
3569 return true. */
3571 static bool
3572 retval_used_p (cgraph_node *node, void *)
3574 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
3576 isra_call_summary *csum = call_sums->get (cs);
3577 gcc_checking_assert (csum);
3578 if (csum->m_return_ignored)
3579 continue;
3580 if (!csum->m_return_returned)
3581 return true;
3583 isra_func_summary *from_ifs = func_sums->get (cs->caller);
3584 if (!from_ifs || !from_ifs->m_candidate)
3585 return true;
3587 if (!ipa_edge_within_scc (cs)
3588 && !from_ifs->m_return_ignored)
3589 return true;
3592 return false;
3595 /* Push into NEW_PARAMS all required parameter adjustment entries to copy or
3596 modify parameter which originally had index BASE_INDEX, in the adjustment
3597 vector of parent clone (if any) had PREV_CLONE_INDEX and was described by
3598 PREV_ADJUSTMENT. If the parent clone is the original function,
3599 PREV_ADJUSTMENT is NULL and PREV_CLONE_INDEX is equal to BASE_INDEX. */
3602 static void
3603 push_param_adjustments_for_index (isra_func_summary *ifs, unsigned base_index,
3604 unsigned prev_clone_index,
3605 ipa_adjusted_param *prev_adjustment,
3606 vec<ipa_adjusted_param, va_gc> **new_params)
3608 isra_param_desc *desc = &(*ifs->m_parameters)[base_index];
3609 if (desc->locally_unused)
3611 if (dump_file)
3612 fprintf (dump_file, " Will remove parameter %u\n", base_index);
3613 return;
3616 if (!desc->split_candidate)
3618 ipa_adjusted_param adj;
3619 if (prev_adjustment)
3621 adj = *prev_adjustment;
3622 adj.prev_clone_adjustment = true;
3623 adj.prev_clone_index = prev_clone_index;
3625 else
3627 memset (&adj, 0, sizeof (adj));
3628 adj.op = IPA_PARAM_OP_COPY;
3629 adj.base_index = base_index;
3630 adj.prev_clone_index = prev_clone_index;
3632 vec_safe_push ((*new_params), adj);
3633 return;
3636 if (dump_file)
3637 fprintf (dump_file, " Will split parameter %u\n", base_index);
3639 gcc_assert (!prev_adjustment || prev_adjustment->op == IPA_PARAM_OP_COPY);
3640 unsigned aclen = vec_safe_length (desc->accesses);
3641 for (unsigned j = 0; j < aclen; j++)
3643 param_access *pa = (*desc->accesses)[j];
3644 if (!pa->certain)
3645 continue;
3646 if (dump_file)
3647 fprintf (dump_file, " - component at byte offset %u, "
3648 "size %u\n", pa->unit_offset, pa->unit_size);
3650 ipa_adjusted_param adj;
3651 memset (&adj, 0, sizeof (adj));
3652 adj.op = IPA_PARAM_OP_SPLIT;
3653 adj.base_index = base_index;
3654 adj.prev_clone_index = prev_clone_index;
3655 adj.param_prefix_index = IPA_PARAM_PREFIX_ISRA;
3656 adj.reverse = pa->reverse;
3657 adj.type = pa->type;
3658 adj.alias_ptr_type = pa->alias_ptr_type;
3659 adj.unit_offset = pa->unit_offset;
3660 vec_safe_push ((*new_params), adj);
3664 /* Worker for all call_for_symbol_thunks_and_aliases. Set calls_comdat_local
3665 flag of all callers of NODE. */
3667 static bool
3668 mark_callers_calls_comdat_local (struct cgraph_node *node, void *)
3670 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
3671 cs->caller->calls_comdat_local = true;
3672 return false;
3676 /* Do final processing of results of IPA propagation regarding NODE, clone it
3677 if appropriate. */
3679 static void
3680 process_isra_node_results (cgraph_node *node,
3681 hash_map<const char *, unsigned> *clone_num_suffixes)
3683 isra_func_summary *ifs = func_sums->get (node);
3684 if (!ifs || !ifs->m_candidate)
3685 return;
3687 auto_vec<bool, 16> surviving_params;
3688 bool check_surviving = false;
3689 if (node->clone.param_adjustments)
3691 check_surviving = true;
3692 node->clone.param_adjustments->get_surviving_params (&surviving_params);
3695 unsigned param_count = vec_safe_length (ifs->m_parameters);
3696 bool will_change_function = false;
3697 if (ifs->m_returns_value && ifs->m_return_ignored)
3698 will_change_function = true;
3699 else
3700 for (unsigned i = 0; i < param_count; i++)
3702 isra_param_desc *desc = &(*ifs->m_parameters)[i];
3703 if ((desc->locally_unused || desc->split_candidate)
3704 /* Make sure we do not clone just to attempt to remove an already
3705 removed unused argument. */
3706 && (!check_surviving
3707 || (i < surviving_params.length ()
3708 && surviving_params[i])))
3710 will_change_function = true;
3711 break;
3714 if (!will_change_function)
3715 return;
3717 if (dump_file)
3719 fprintf (dump_file, "\nEvaluating analysis results for %s\n",
3720 node->dump_name ());
3721 if (ifs->m_returns_value && ifs->m_return_ignored)
3722 fprintf (dump_file, " Will remove return value.\n");
3725 vec<ipa_adjusted_param, va_gc> *new_params = NULL;
3726 if (ipa_param_adjustments *old_adjustments = node->clone.param_adjustments)
3728 unsigned old_adj_len = vec_safe_length (old_adjustments->m_adj_params);
3729 for (unsigned i = 0; i < old_adj_len; i++)
3731 ipa_adjusted_param *old_adj = &(*old_adjustments->m_adj_params)[i];
3732 push_param_adjustments_for_index (ifs, old_adj->base_index, i,
3733 old_adj, &new_params);
3736 else
3737 for (unsigned i = 0; i < param_count; i++)
3738 push_param_adjustments_for_index (ifs, i, i, NULL, &new_params);
3740 ipa_param_adjustments *new_adjustments
3741 = (new (ggc_alloc <ipa_param_adjustments> ())
3742 ipa_param_adjustments (new_params, param_count,
3743 ifs->m_returns_value && ifs->m_return_ignored));
3745 if (dump_file && (dump_flags & TDF_DETAILS))
3747 fprintf (dump_file, "\n Created adjustments:\n");
3748 new_adjustments->dump (dump_file);
3751 unsigned &suffix_counter = clone_num_suffixes->get_or_insert (
3752 IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (
3753 node->decl)));
3754 vec<cgraph_edge *> callers = node->collect_callers ();
3755 cgraph_node *new_node
3756 = node->create_virtual_clone (callers, NULL, new_adjustments, "isra",
3757 suffix_counter);
3758 suffix_counter++;
3759 if (node->calls_comdat_local && node->same_comdat_group)
3761 new_node->add_to_same_comdat_group (node);
3762 new_node->call_for_symbol_and_aliases (mark_callers_calls_comdat_local,
3763 NULL, true);
3765 new_node->calls_comdat_local = node->calls_comdat_local;
3767 if (dump_file)
3768 fprintf (dump_file, " Created new node %s\n", new_node->dump_name ());
3769 callers.release ();
3772 /* Check which parameters of NODE described by IFS have survived until IPA-SRA
3773 and disable transformations for those which have not or which should not
3774 transformed because the associated debug counter reached its limit. Return
3775 true if none survived or if there were no candidates to begin with. */
3777 static bool
3778 disable_unavailable_parameters (cgraph_node *node, isra_func_summary *ifs)
3780 bool ret = true;
3781 unsigned len = vec_safe_length (ifs->m_parameters);
3782 if (!len)
3783 return true;
3785 auto_vec<bool, 16> surviving_params;
3786 bool check_surviving = false;
3787 if (node->clone.param_adjustments)
3789 check_surviving = true;
3790 node->clone.param_adjustments->get_surviving_params (&surviving_params);
3792 bool dumped_first = false;
3793 for (unsigned i = 0; i < len; i++)
3795 isra_param_desc *desc = &(*ifs->m_parameters)[i];
3796 if (!dbg_cnt (ipa_sra_params))
3798 desc->locally_unused = false;
3799 desc->split_candidate = false;
3801 else if (check_surviving
3802 && (i >= surviving_params.length ()
3803 || !surviving_params[i]))
3805 /* Even if the parameter was removed by a previous IPA pass, we do
3806 not clear locally_unused because if it really is unused, this
3807 information might be useful in callers. */
3808 desc->split_candidate = false;
3810 if (dump_file && (dump_flags & TDF_DETAILS))
3812 if (!dumped_first)
3814 fprintf (dump_file,
3815 "The following parameters of %s are dead on "
3816 "arrival:", node->dump_name ());
3817 dumped_first = true;
3819 fprintf (dump_file, " %u", i);
3822 else if (desc->locally_unused || desc->split_candidate)
3823 ret = false;
3826 if (dumped_first)
3827 fprintf (dump_file, "\n");
3829 return ret;
3833 /* Run the interprocedural part of IPA-SRA. */
3835 static unsigned int
3836 ipa_sra_analysis (void)
3838 if (dump_file)
3840 fprintf (dump_file, "\n========== IPA-SRA IPA stage ==========\n");
3841 ipa_sra_dump_all_summaries (dump_file);
3844 gcc_checking_assert (func_sums);
3845 gcc_checking_assert (call_sums);
3846 cgraph_node **order = XCNEWVEC (cgraph_node *, symtab->cgraph_count);
3847 auto_vec <cgraph_node *, 16> stack;
3848 int node_scc_count = ipa_reduced_postorder (order, true, NULL);
3850 /* One sweep from callees to callers for parameter removal and splitting. */
3851 for (int i = 0; i < node_scc_count; i++)
3853 cgraph_node *scc_rep = order[i];
3854 vec<cgraph_node *> cycle_nodes = ipa_get_nodes_in_cycle (scc_rep);
3855 unsigned j;
3857 /* Preliminary IPA function level checks and first step of parameter
3858 removal. */
3859 cgraph_node *v;
3860 FOR_EACH_VEC_ELT (cycle_nodes, j, v)
3862 isra_func_summary *ifs = func_sums->get (v);
3863 if (!ifs || !ifs->m_candidate)
3864 continue;
3865 if (!ipa_sra_ipa_function_checks (v)
3866 || check_all_callers_for_issues (v))
3868 ifs->zap ();
3869 continue;
3871 if (disable_unavailable_parameters (v, ifs))
3872 continue;
3873 for (cgraph_edge *cs = v->indirect_calls; cs; cs = cs->next_callee)
3874 process_edge_to_unknown_caller (cs);
3875 for (cgraph_edge *cs = v->callees; cs; cs = cs->next_callee)
3876 if (!ipa_edge_within_scc (cs))
3877 param_removal_cross_scc_edge (cs);
3880 /* Look at edges within the current SCC and propagate used-ness across
3881 them, pushing onto the stack all notes which might need to be
3882 revisited. */
3883 FOR_EACH_VEC_ELT (cycle_nodes, j, v)
3884 v->call_for_symbol_thunks_and_aliases (propagate_used_to_scc_callers,
3885 &stack, true);
3887 /* Keep revisiting and pushing until nothing changes. */
3888 while (!stack.is_empty ())
3890 cgraph_node *v = stack.pop ();
3891 isra_func_summary *ifs = func_sums->get (v);
3892 gcc_checking_assert (ifs && ifs->m_queued);
3893 ifs->m_queued = false;
3895 v->call_for_symbol_thunks_and_aliases (propagate_used_to_scc_callers,
3896 &stack, true);
3899 /* Parameter splitting. */
3900 bool repeat_scc_access_propagation;
3903 repeat_scc_access_propagation = false;
3904 FOR_EACH_VEC_ELT (cycle_nodes, j, v)
3906 isra_func_summary *ifs = func_sums->get (v);
3907 if (!ifs
3908 || !ifs->m_candidate
3909 || vec_safe_is_empty (ifs->m_parameters))
3910 continue;
3911 for (cgraph_edge *cs = v->callees; cs; cs = cs->next_callee)
3912 if (param_splitting_across_edge (cs))
3913 repeat_scc_access_propagation = true;
3916 while (repeat_scc_access_propagation);
3918 if (flag_checking)
3919 FOR_EACH_VEC_ELT (cycle_nodes, j, v)
3920 verify_splitting_accesses (v, true);
3922 cycle_nodes.release ();
3925 /* One sweep from caller to callees for result removal. */
3926 for (int i = node_scc_count - 1; i >= 0 ; i--)
3928 cgraph_node *scc_rep = order[i];
3929 vec<cgraph_node *> cycle_nodes = ipa_get_nodes_in_cycle (scc_rep);
3930 unsigned j;
3932 cgraph_node *v;
3933 FOR_EACH_VEC_ELT (cycle_nodes, j, v)
3935 isra_func_summary *ifs = func_sums->get (v);
3936 if (!ifs || !ifs->m_candidate)
3937 continue;
3939 bool return_needed
3940 = (ifs->m_returns_value
3941 && (!dbg_cnt (ipa_sra_retvalues)
3942 || v->call_for_symbol_and_aliases (retval_used_p,
3943 NULL, true)));
3944 ifs->m_return_ignored = !return_needed;
3945 if (return_needed)
3946 isra_push_node_to_stack (v, ifs, &stack);
3949 while (!stack.is_empty ())
3951 cgraph_node *node = stack.pop ();
3952 isra_func_summary *ifs = func_sums->get (node);
3953 gcc_checking_assert (ifs && ifs->m_queued);
3954 ifs->m_queued = false;
3956 for (cgraph_edge *cs = node->callees; cs; cs = cs->next_callee)
3957 if (ipa_edge_within_scc (cs)
3958 && call_sums->get (cs)->m_return_returned)
3960 enum availability av;
3961 cgraph_node *callee = cs->callee->function_symbol (&av);
3962 isra_func_summary *to_ifs = func_sums->get (callee);
3963 if (to_ifs && to_ifs->m_return_ignored)
3965 to_ifs->m_return_ignored = false;
3966 isra_push_node_to_stack (callee, to_ifs, &stack);
3970 cycle_nodes.release ();
3973 ipa_free_postorder_info ();
3974 free (order);
3976 if (dump_file)
3978 if (dump_flags & TDF_DETAILS)
3980 fprintf (dump_file, "\n========== IPA-SRA propagation final state "
3981 " ==========\n");
3982 ipa_sra_dump_all_summaries (dump_file);
3984 fprintf (dump_file, "\n========== IPA-SRA decisions ==========\n");
3987 hash_map<const char *, unsigned> *clone_num_suffixes
3988 = new hash_map<const char *, unsigned>;
3990 cgraph_node *node;
3991 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
3992 process_isra_node_results (node, clone_num_suffixes);
3994 delete clone_num_suffixes;
3995 ggc_delete (func_sums);
3996 func_sums = NULL;
3997 delete call_sums;
3998 call_sums = NULL;
4000 if (dump_file)
4001 fprintf (dump_file, "\n========== IPA SRA IPA analysis done "
4002 "==========\n\n");
4003 return 0;
4007 const pass_data pass_data_ipa_sra =
4009 IPA_PASS, /* type */
4010 "sra", /* name */
4011 OPTGROUP_NONE, /* optinfo_flags */
4012 TV_IPA_SRA, /* tv_id */
4013 0, /* properties_required */
4014 0, /* properties_provided */
4015 0, /* properties_destroyed */
4016 0, /* todo_flags_start */
4017 ( TODO_dump_symtab | TODO_remove_functions ), /* todo_flags_finish */
4020 class pass_ipa_sra : public ipa_opt_pass_d
4022 public:
4023 pass_ipa_sra (gcc::context *ctxt)
4024 : ipa_opt_pass_d (pass_data_ipa_sra, ctxt,
4025 ipa_sra_generate_summary, /* generate_summary */
4026 ipa_sra_write_summary, /* write_summary */
4027 ipa_sra_read_summary, /* read_summary */
4028 NULL , /* write_optimization_summary */
4029 NULL, /* read_optimization_summary */
4030 NULL, /* stmt_fixup */
4031 0, /* function_transform_todo_flags_start */
4032 NULL, /* function_transform */
4033 NULL) /* variable_transform */
4036 /* opt_pass methods: */
4037 virtual bool gate (function *)
4039 /* TODO: We should remove the optimize check after we ensure we never run
4040 IPA passes when not optimizing. */
4041 return (flag_ipa_sra && optimize);
4044 virtual unsigned int execute (function *) { return ipa_sra_analysis (); }
4046 }; // class pass_ipa_sra
4048 } // anon namespace
4050 /* Intraprocedural part of IPA-SRA analysis. Scan function body of NODE and
4051 create a summary structure describing IPA-SRA opportunities and constraints
4052 in it. */
4054 static void
4055 ipa_sra_summarize_function (cgraph_node *node)
4057 if (dump_file)
4058 fprintf (dump_file, "Creating summary for %s/%i:\n", node->name (),
4059 node->order);
4060 if (!ipa_sra_preliminary_function_checks (node))
4061 return;
4062 gcc_obstack_init (&gensum_obstack);
4063 isra_func_summary *ifs = func_sums->get_create (node);
4064 ifs->m_candidate = true;
4065 tree ret = TREE_TYPE (TREE_TYPE (node->decl));
4066 ifs->m_returns_value = (TREE_CODE (ret) != VOID_TYPE);
4068 decl2desc = new hash_map<tree, gensum_param_desc *>;
4069 unsigned count = 0;
4070 for (tree parm = DECL_ARGUMENTS (node->decl); parm; parm = DECL_CHAIN (parm))
4071 count++;
4073 if (count > 0)
4075 auto_vec<gensum_param_desc, 16> param_descriptions (count);
4076 param_descriptions.reserve_exact (count);
4077 param_descriptions.quick_grow_cleared (count);
4079 bool cfun_pushed = false;
4080 struct function *fun = DECL_STRUCT_FUNCTION (node->decl);
4081 if (create_parameter_descriptors (node, &param_descriptions))
4083 push_cfun (fun);
4084 cfun_pushed = true;
4085 final_bbs = BITMAP_ALLOC (NULL);
4086 bb_dereferences = XCNEWVEC (HOST_WIDE_INT,
4087 by_ref_count
4088 * last_basic_block_for_fn (fun));
4089 aa_walking_limit = opt_for_fn (node->decl, param_ipa_max_aa_steps);
4090 scan_function (node, fun);
4092 if (dump_file)
4094 dump_gensum_param_descriptors (dump_file, node->decl,
4095 &param_descriptions);
4096 fprintf (dump_file, "----------------------------------------\n");
4099 process_scan_results (node, fun, ifs, &param_descriptions);
4101 if (cfun_pushed)
4102 pop_cfun ();
4103 if (bb_dereferences)
4105 free (bb_dereferences);
4106 bb_dereferences = NULL;
4107 BITMAP_FREE (final_bbs);
4108 final_bbs = NULL;
4111 isra_analyze_all_outgoing_calls (node);
4113 delete decl2desc;
4114 decl2desc = NULL;
4115 obstack_free (&gensum_obstack, NULL);
4116 if (dump_file)
4117 fprintf (dump_file, "\n\n");
4118 if (flag_checking)
4119 verify_splitting_accesses (node, false);
4120 return;
4123 ipa_opt_pass_d *
4124 make_pass_ipa_sra (gcc::context *ctxt)
4126 return new pass_ipa_sra (ctxt);
4130 #include "gt-ipa-sra.h"