Add testcase of PR c++/92542, already fixed.
[official-gcc.git] / gcc / ipa-sra.c
blob31de527d11162d81b636df5ba674e8660e033b6c
1 /* Interprocedural scalar replacement of aggregates
2 Copyright (C) 2008-2020 Free Software Foundation, Inc.
4 Contributed by Martin Jambor <mjambor@suse.cz>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 /* IPA-SRA is an interprocedural pass that removes unused function return
23 values (turning functions returning a value which is never used into void
24 functions), removes unused function parameters. It can also replace an
25 aggregate parameter by a set of other parameters representing part of the
26 original, turning those passed by reference into new ones which pass the
27 value directly.
29 The pass is a true IPA one, which means that it works in three stages in
30 order to be able to take advantage of LTO. First, summaries about functions
31 and each calls are generated. Function summaries (often called call graph
32 node summaries) contain mainly information about which parameters are
33 potential transformation candidates and which bits of candidates are
34 accessed. We differentiate between accesses done as a part of a call
35 statement (which might be not necessary if the callee is also transformed)
36 and others (which are mandatory). Call summaries (often called call graph
37 edge summaries) contain information about which function formal parameters
38 feed into which actual call arguments so that if two parameters are only
39 used in a sum which is then passed to another function which then however
40 does not use this parameter, all three parameters of the two functions can
41 be eliminated. Edge summaries also have flags whether the return value is
42 used or if it is only returned in the caller too. In LTO mode these
43 summaries are then streamed to the object file in the compilation phase and
44 streamed back in in the WPA analysis stage.
46 The interprocedural analysis phase traverses the graph in topological order
47 in two sweeps, one in each direction. First, from callees to callers for
48 parameter removal and splitting. Each strongly-connected component is
49 processed iteratively until the situation in it stabilizes. The pass from
50 callers to callees is then carried out to remove unused return values in a
51 very similar fashion.
53 Because parameter manipulation has big implications for call redirection
54 which is done only after all call graph nodes materialize, the
55 transformation phase is not part of this patch but is carried out by the
56 clone materialization and edge redirection itself, see comments in
57 ipa-param-manipulation.h for more details. */
61 #include "config.h"
62 #include "system.h"
63 #include "coretypes.h"
64 #include "backend.h"
65 #include "tree.h"
66 #include "gimple.h"
67 #include "predict.h"
68 #include "tree-pass.h"
69 #include "ssa.h"
70 #include "cgraph.h"
71 #include "gimple-pretty-print.h"
72 #include "alias.h"
73 #include "tree-eh.h"
74 #include "gimple-iterator.h"
75 #include "gimple-walk.h"
76 #include "tree-dfa.h"
77 #include "tree-sra.h"
78 #include "alloc-pool.h"
79 #include "symbol-summary.h"
80 #include "dbgcnt.h"
81 #include "tree-inline.h"
82 #include "ipa-utils.h"
83 #include "builtins.h"
84 #include "cfganal.h"
85 #include "tree-streamer.h"
88 /* Bits used to track size of an aggregate in bytes interprocedurally. */
89 #define ISRA_ARG_SIZE_LIMIT_BITS 16
90 #define ISRA_ARG_SIZE_LIMIT (1 << ISRA_ARG_SIZE_LIMIT_BITS)
91 /* How many parameters can feed into a call actual argument and still be
92 tracked. */
93 #define IPA_SRA_MAX_PARAM_FLOW_LEN 7
95 /* Structure describing accesses to a specific portion of an aggregate
96 parameter, as given by the offset and size. Any smaller accesses that occur
97 within a function that fall within another access form a tree. The pass
98 cannot analyze parameters with only partially overlapping accesses. */
100 struct GTY(()) param_access
102 /* Type that a potential replacement should have. This field only has
103 meaning in the summary building and transformation phases, when it is
104 reconstructed from the body. Must not be touched in IPA analysis
105 stage. */
106 tree type;
108 /* Alias reference type to be used in MEM_REFs when adjusting caller
109 arguments. */
110 tree alias_ptr_type;
112 /* Values returned by get_ref_base_and_extent but converted to bytes and
113 stored as unsigned ints. */
114 unsigned unit_offset;
115 unsigned unit_size : ISRA_ARG_SIZE_LIMIT_BITS;
117 /* Set once we are sure that the access will really end up in a potentially
118 transformed function - initially not set for portions of formal parameters
119 that are only used as actual function arguments passed to callees. */
120 unsigned certain : 1;
121 /* Set if the access has a reversed scalar storage order. */
122 unsigned reverse : 1;
125 /* This structure has the same purpose as the one above and additionally it
126 contains some fields that are only necessary in the summary generation
127 phase. */
129 struct gensum_param_access
131 /* Values returned by get_ref_base_and_extent. */
132 HOST_WIDE_INT offset;
133 HOST_WIDE_INT size;
135 /* if this access has any children (in terms of the definition above), this
136 points to the first one. */
137 struct gensum_param_access *first_child;
138 /* In intraprocedural SRA, pointer to the next sibling in the access tree as
139 described above. */
140 struct gensum_param_access *next_sibling;
142 /* Type that a potential replacement should have. This field only has
143 meaning in the summary building and transformation phases, when it is
144 reconstructed from the body. Must not be touched in IPA analysis
145 stage. */
146 tree type;
147 /* Alias reference type to be used in MEM_REFs when adjusting caller
148 arguments. */
149 tree alias_ptr_type;
151 /* Have there been writes to or reads from this exact location except for as
152 arguments to a function call that can be tracked. */
153 bool nonarg;
155 /* Set if the access has a reversed scalar storage order. */
156 bool reverse;
159 /* Summary describing a parameter in the IPA stages. */
161 struct GTY(()) isra_param_desc
163 /* List of access representatives to the parameters, sorted according to
164 their offset. */
165 vec <param_access *, va_gc> *accesses;
167 /* Unit size limit of total size of all replacements. */
168 unsigned param_size_limit : ISRA_ARG_SIZE_LIMIT_BITS;
169 /* Sum of unit sizes of all certain replacements. */
170 unsigned size_reached : ISRA_ARG_SIZE_LIMIT_BITS;
172 /* A parameter that is used only in call arguments and can be removed if all
173 concerned actual arguments are removed. */
174 unsigned locally_unused : 1;
175 /* An aggregate that is a candidate for breaking up or complete removal. */
176 unsigned split_candidate : 1;
177 /* Is this a parameter passing stuff by reference? */
178 unsigned by_ref : 1;
181 /* Structure used when generating summaries that describes a parameter. */
183 struct gensum_param_desc
185 /* Roots of param_accesses. */
186 gensum_param_access *accesses;
187 /* Number of accesses in the access tree rooted in field accesses. */
188 unsigned access_count;
190 /* If the below is non-zero, this is the number of uses as actual arguments. */
191 int call_uses;
192 /* Number of times this parameter has been directly passed to. */
193 unsigned ptr_pt_count;
195 /* Size limit of total size of all replacements. */
196 unsigned param_size_limit;
197 /* Sum of sizes of nonarg accesses. */
198 unsigned nonarg_acc_size;
200 /* A parameter that is used only in call arguments and can be removed if all
201 concerned actual arguments are removed. */
202 bool locally_unused;
203 /* An aggregate that is a candidate for breaking up or a pointer passing data
204 by reference that is a candidate for being converted to a set of
205 parameters passing those data by value. */
206 bool split_candidate;
207 /* Is this a parameter passing stuff by reference? */
208 bool by_ref;
210 /* The number of this parameter as they are ordered in function decl. */
211 int param_number;
212 /* For parameters passing data by reference, this is parameter index to
213 compute indices to bb_dereferences. */
214 int deref_index;
217 /* Properly deallocate accesses of DESC. TODO: Since this data structure is
218 not in GC memory, this is not necessary and we can consider removing the
219 function. */
221 static void
222 free_param_decl_accesses (isra_param_desc *desc)
224 unsigned len = vec_safe_length (desc->accesses);
225 for (unsigned i = 0; i < len; ++i)
226 ggc_free ((*desc->accesses)[i]);
227 vec_free (desc->accesses);
230 /* Class used to convey information about functions from the
231 intra-procedural analysis stage to inter-procedural one. */
233 class GTY((for_user)) isra_func_summary
235 public:
236 /* initialize the object. */
238 isra_func_summary ()
239 : m_parameters (NULL), m_candidate (false), m_returns_value (false),
240 m_return_ignored (false), m_queued (false)
243 /* Destroy m_parameters. */
245 ~isra_func_summary ();
247 /* Mark the function as not a candidate for any IPA-SRA transformation.
248 Return true if it was a candidate until now. */
250 bool zap ();
252 /* Vector of parameter descriptors corresponding to the function being
253 analyzed. */
254 vec<isra_param_desc, va_gc> *m_parameters;
256 /* Whether the node is even a candidate for any IPA-SRA transformation at
257 all. */
258 unsigned m_candidate : 1;
260 /* Whether the original function returns any value. */
261 unsigned m_returns_value : 1;
263 /* Set to true if all call statements do not actually use the returned
264 value. */
266 unsigned m_return_ignored : 1;
268 /* Whether the node is already queued in IPA SRA stack during processing of
269 call graphs SCCs. */
271 unsigned m_queued : 1;
274 /* Clean up and deallocate isra_func_summary points to. TODO: Since this data
275 structure is not in GC memory, this is not necessary and we can consider
276 removing the destructor. */
278 isra_func_summary::~isra_func_summary ()
280 unsigned len = vec_safe_length (m_parameters);
281 for (unsigned i = 0; i < len; ++i)
282 free_param_decl_accesses (&(*m_parameters)[i]);
283 vec_free (m_parameters);
287 /* Mark the function as not a candidate for any IPA-SRA transformation. Return
288 true if it was a candidate until now. */
290 bool
291 isra_func_summary::zap ()
293 bool ret = m_candidate;
294 m_candidate = false;
296 unsigned len = vec_safe_length (m_parameters);
297 for (unsigned i = 0; i < len; ++i)
298 free_param_decl_accesses (&(*m_parameters)[i]);
299 vec_free (m_parameters);
301 return ret;
304 /* Structure to describe which formal parameters feed into a particular actual
305 arguments. */
307 struct isra_param_flow
309 /* Number of elements in array inputs that contain valid data. */
310 char length;
311 /* Indices of formal parameters that feed into the described actual argument.
312 If aggregate_pass_through or pointer_pass_through below are true, it must
313 contain exactly one element which is passed through from a formal
314 parameter if the given number. Otherwise, the array contains indices of
315 callee's formal parameters which are used to calculate value of this
316 actual argument. */
317 unsigned char inputs[IPA_SRA_MAX_PARAM_FLOW_LEN];
319 /* Offset within the formal parameter. */
320 unsigned unit_offset;
321 /* Size of the portion of the formal parameter that is being passed. */
322 unsigned unit_size : ISRA_ARG_SIZE_LIMIT_BITS;
324 /* True when the value of this actual copy is a portion of a formal
325 parameter. */
326 unsigned aggregate_pass_through : 1;
327 /* True when the value of this actual copy is a verbatim pass through of an
328 obtained pointer. */
329 unsigned pointer_pass_through : 1;
330 /* True when it is safe to copy access candidates here from the callee, which
331 would mean introducing dereferences into callers of the caller. */
332 unsigned safe_to_import_accesses : 1;
335 /* Structure used to convey information about calls from the intra-procedural
336 analysis stage to inter-procedural one. */
338 class isra_call_summary
340 public:
341 isra_call_summary ()
342 : m_arg_flow (), m_return_ignored (false), m_return_returned (false),
343 m_bit_aligned_arg (false)
346 void init_inputs (unsigned arg_count);
347 void dump (FILE *f);
349 /* Information about what formal parameters of the caller are used to compute
350 individual actual arguments of this call. */
351 auto_vec <isra_param_flow> m_arg_flow;
353 /* Set to true if the call statement does not have a LHS. */
354 unsigned m_return_ignored : 1;
356 /* Set to true if the LHS of call statement is only used to construct the
357 return value of the caller. */
358 unsigned m_return_returned : 1;
360 /* Set when any of the call arguments are not byte-aligned. */
361 unsigned m_bit_aligned_arg : 1;
364 /* Class to manage function summaries. */
366 class GTY((user)) ipa_sra_function_summaries
367 : public function_summary <isra_func_summary *>
369 public:
370 ipa_sra_function_summaries (symbol_table *table, bool ggc):
371 function_summary<isra_func_summary *> (table, ggc) { }
373 virtual void duplicate (cgraph_node *, cgraph_node *,
374 isra_func_summary *old_sum,
375 isra_func_summary *new_sum);
378 /* Hook that is called by summary when a node is duplicated. */
380 void
381 ipa_sra_function_summaries::duplicate (cgraph_node *, cgraph_node *,
382 isra_func_summary *old_sum,
383 isra_func_summary *new_sum)
385 /* TODO: Somehow stop copying when ISRA is doing the cloning, it is
386 useless. */
387 new_sum->m_candidate = old_sum->m_candidate;
388 new_sum->m_returns_value = old_sum->m_returns_value;
389 new_sum->m_return_ignored = old_sum->m_return_ignored;
390 gcc_assert (!old_sum->m_queued);
391 new_sum->m_queued = false;
393 unsigned param_count = vec_safe_length (old_sum->m_parameters);
394 if (!param_count)
395 return;
396 vec_safe_reserve_exact (new_sum->m_parameters, param_count);
397 new_sum->m_parameters->quick_grow_cleared (param_count);
398 for (unsigned i = 0; i < param_count; i++)
400 isra_param_desc *s = &(*old_sum->m_parameters)[i];
401 isra_param_desc *d = &(*new_sum->m_parameters)[i];
403 d->param_size_limit = s->param_size_limit;
404 d->size_reached = s->size_reached;
405 d->locally_unused = s->locally_unused;
406 d->split_candidate = s->split_candidate;
407 d->by_ref = s->by_ref;
409 unsigned acc_count = vec_safe_length (s->accesses);
410 vec_safe_reserve_exact (d->accesses, acc_count);
411 for (unsigned j = 0; j < acc_count; j++)
413 param_access *from = (*s->accesses)[j];
414 param_access *to = ggc_cleared_alloc<param_access> ();
415 to->type = from->type;
416 to->alias_ptr_type = from->alias_ptr_type;
417 to->unit_offset = from->unit_offset;
418 to->unit_size = from->unit_size;
419 to->certain = from->certain;
420 d->accesses->quick_push (to);
425 /* Pointer to the pass function summary holder. */
427 static GTY(()) ipa_sra_function_summaries *func_sums;
429 /* Class to manage call summaries. */
431 class ipa_sra_call_summaries: public call_summary <isra_call_summary *>
433 public:
434 ipa_sra_call_summaries (symbol_table *table):
435 call_summary<isra_call_summary *> (table) { }
437 /* Duplicate info when an edge is cloned. */
438 virtual void duplicate (cgraph_edge *, cgraph_edge *,
439 isra_call_summary *old_sum,
440 isra_call_summary *new_sum);
443 static ipa_sra_call_summaries *call_sums;
446 /* Initialize m_arg_flow of a particular instance of isra_call_summary.
447 ARG_COUNT is the number of actual arguments passed. */
449 void
450 isra_call_summary::init_inputs (unsigned arg_count)
452 if (arg_count == 0)
454 gcc_checking_assert (m_arg_flow.length () == 0);
455 return;
457 if (m_arg_flow.length () == 0)
459 m_arg_flow.reserve_exact (arg_count);
460 m_arg_flow.quick_grow_cleared (arg_count);
462 else
463 gcc_checking_assert (arg_count == m_arg_flow.length ());
466 /* Dump all information in call summary to F. */
468 void
469 isra_call_summary::dump (FILE *f)
471 if (m_return_ignored)
472 fprintf (f, " return value ignored\n");
473 if (m_return_returned)
474 fprintf (f, " return value used only to compute caller return value\n");
475 for (unsigned i = 0; i < m_arg_flow.length (); i++)
477 fprintf (f, " Parameter %u:\n", i);
478 isra_param_flow *ipf = &m_arg_flow[i];
480 if (ipf->length)
482 bool first = true;
483 fprintf (f, " Scalar param sources: ");
484 for (int j = 0; j < ipf->length; j++)
486 if (!first)
487 fprintf (f, ", ");
488 else
489 first = false;
490 fprintf (f, "%i", (int) ipf->inputs[j]);
492 fprintf (f, "\n");
494 if (ipf->aggregate_pass_through)
495 fprintf (f, " Aggregate pass through from the param given above, "
496 "unit offset: %u , unit size: %u\n",
497 ipf->unit_offset, ipf->unit_size);
498 if (ipf->pointer_pass_through)
499 fprintf (f, " Pointer pass through from the param given above, "
500 "safe_to_import_accesses: %u\n", ipf->safe_to_import_accesses);
504 /* Duplicate edge summary when an edge is cloned. */
506 void
507 ipa_sra_call_summaries::duplicate (cgraph_edge *, cgraph_edge *,
508 isra_call_summary *old_sum,
509 isra_call_summary *new_sum)
511 unsigned arg_count = old_sum->m_arg_flow.length ();
512 new_sum->init_inputs (arg_count);
513 for (unsigned i = 0; i < arg_count; i++)
514 new_sum->m_arg_flow[i] = old_sum->m_arg_flow[i];
516 new_sum->m_return_ignored = old_sum->m_return_ignored;
517 new_sum->m_return_returned = old_sum->m_return_returned;
518 new_sum->m_bit_aligned_arg = old_sum->m_bit_aligned_arg;
522 /* With all GTY stuff done, we can move to anonymous namespace. */
523 namespace {
524 /* Quick mapping from a decl to its param descriptor. */
526 hash_map<tree, gensum_param_desc *> *decl2desc;
528 /* Countdown of allowed Alias analysis steps during summary building. */
530 int aa_walking_limit;
532 /* This is a table in which for each basic block and parameter there is a
533 distance (offset + size) in that parameter which is dereferenced and
534 accessed in that BB. */
535 HOST_WIDE_INT *bb_dereferences = NULL;
536 /* How many by-reference parameters there are in the current function. */
537 int by_ref_count;
539 /* Bitmap of BBs that can cause the function to "stop" progressing by
540 returning, throwing externally, looping infinitely or calling a function
541 which might abort etc.. */
542 bitmap final_bbs;
544 /* Obstack to allocate various small structures required only when generating
545 summary for a function. */
546 struct obstack gensum_obstack;
548 /* Return false the function is apparently unsuitable for IPA-SRA based on it's
549 attributes, return true otherwise. NODE is the cgraph node of the current
550 function. */
552 static bool
553 ipa_sra_preliminary_function_checks (cgraph_node *node)
555 if (!node->can_change_signature)
557 if (dump_file)
558 fprintf (dump_file, "Function cannot change signature.\n");
559 return false;
562 if (!tree_versionable_function_p (node->decl))
564 if (dump_file)
565 fprintf (dump_file, "Function is not versionable.\n");
566 return false;
569 if (!opt_for_fn (node->decl, optimize)
570 || !opt_for_fn (node->decl, flag_ipa_sra))
572 if (dump_file)
573 fprintf (dump_file, "Not optimizing or IPA-SRA turned off for this "
574 "function.\n");
575 return false;
578 if (DECL_VIRTUAL_P (node->decl))
580 if (dump_file)
581 fprintf (dump_file, "Function is a virtual method.\n");
582 return false;
585 struct function *fun = DECL_STRUCT_FUNCTION (node->decl);
586 if (fun->stdarg)
588 if (dump_file)
589 fprintf (dump_file, "Function uses stdarg. \n");
590 return false;
593 if (TYPE_ATTRIBUTES (TREE_TYPE (node->decl)))
595 if (dump_file)
596 fprintf (dump_file, "Function type has attributes. \n");
597 return false;
600 if (DECL_DISREGARD_INLINE_LIMITS (node->decl))
602 if (dump_file)
603 fprintf (dump_file, "Always inline function will be inlined "
604 "anyway. \n");
605 return false;
608 return true;
611 /* Print access tree starting at ACCESS to F. */
613 static void
614 dump_gensum_access (FILE *f, gensum_param_access *access, unsigned indent)
616 fprintf (f, " ");
617 for (unsigned i = 0; i < indent; i++)
618 fprintf (f, " ");
619 fprintf (f, " * Access to offset: " HOST_WIDE_INT_PRINT_DEC,
620 access->offset);
621 fprintf (f, ", size: " HOST_WIDE_INT_PRINT_DEC, access->size);
622 fprintf (f, ", type: ");
623 print_generic_expr (f, access->type);
624 fprintf (f, ", alias_ptr_type: ");
625 print_generic_expr (f, access->alias_ptr_type);
626 fprintf (f, ", nonarg: %u, reverse: %u\n", access->nonarg, access->reverse);
627 for (gensum_param_access *ch = access->first_child;
629 ch = ch->next_sibling)
630 dump_gensum_access (f, ch, indent + 2);
634 /* Print access tree starting at ACCESS to F. */
636 static void
637 dump_isra_access (FILE *f, param_access *access)
639 fprintf (f, " * Access to unit offset: %u", access->unit_offset);
640 fprintf (f, ", unit size: %u", access->unit_size);
641 fprintf (f, ", type: ");
642 print_generic_expr (f, access->type);
643 fprintf (f, ", alias_ptr_type: ");
644 print_generic_expr (f, access->alias_ptr_type);
645 if (access->certain)
646 fprintf (f, ", certain");
647 else
648 fprintf (f, ", not-certain");
649 if (access->reverse)
650 fprintf (f, ", reverse");
651 fprintf (f, "\n");
654 /* Dump access tree starting at ACCESS to stderr. */
656 DEBUG_FUNCTION void
657 debug_isra_access (param_access *access)
659 dump_isra_access (stderr, access);
662 /* Dump DESC to F. */
664 static void
665 dump_gensum_param_descriptor (FILE *f, gensum_param_desc *desc)
667 if (desc->locally_unused)
668 fprintf (f, " unused with %i call_uses\n", desc->call_uses);
669 if (!desc->split_candidate)
671 fprintf (f, " not a candidate\n");
672 return;
674 if (desc->by_ref)
675 fprintf (f, " by_ref with %u pass throughs\n", desc->ptr_pt_count);
677 for (gensum_param_access *acc = desc->accesses; acc; acc = acc->next_sibling)
678 dump_gensum_access (f, acc, 2);
681 /* Dump all parameter descriptors in IFS, assuming it describes FNDECL, to
682 F. */
684 static void
685 dump_gensum_param_descriptors (FILE *f, tree fndecl,
686 vec<gensum_param_desc> *param_descriptions)
688 tree parm = DECL_ARGUMENTS (fndecl);
689 for (unsigned i = 0;
690 i < param_descriptions->length ();
691 ++i, parm = DECL_CHAIN (parm))
693 fprintf (f, " Descriptor for parameter %i ", i);
694 print_generic_expr (f, parm, TDF_UID);
695 fprintf (f, "\n");
696 dump_gensum_param_descriptor (f, &(*param_descriptions)[i]);
701 /* Dump DESC to F. */
703 static void
704 dump_isra_param_descriptor (FILE *f, isra_param_desc *desc)
706 if (desc->locally_unused)
708 fprintf (f, " (locally) unused\n");
710 if (!desc->split_candidate)
712 fprintf (f, " not a candidate for splitting\n");
713 return;
715 fprintf (f, " param_size_limit: %u, size_reached: %u%s\n",
716 desc->param_size_limit, desc->size_reached,
717 desc->by_ref ? ", by_ref" : "");
719 for (unsigned i = 0; i < vec_safe_length (desc->accesses); ++i)
721 param_access *access = (*desc->accesses)[i];
722 dump_isra_access (f, access);
726 /* Dump all parameter descriptors in IFS, assuming it describes FNDECL, to
727 F. */
729 static void
730 dump_isra_param_descriptors (FILE *f, tree fndecl,
731 isra_func_summary *ifs)
733 tree parm = DECL_ARGUMENTS (fndecl);
734 if (!ifs->m_parameters)
736 fprintf (f, " parameter descriptors not available\n");
737 return;
740 for (unsigned i = 0;
741 i < ifs->m_parameters->length ();
742 ++i, parm = DECL_CHAIN (parm))
744 fprintf (f, " Descriptor for parameter %i ", i);
745 print_generic_expr (f, parm, TDF_UID);
746 fprintf (f, "\n");
747 dump_isra_param_descriptor (f, &(*ifs->m_parameters)[i]);
751 /* Add SRC to inputs of PARAM_FLOW, unless it would exceed storage. If the
752 function fails return false, otherwise return true. SRC must fit into an
753 unsigned char. Used for purposes of transitive unused parameter
754 removal. */
756 static bool
757 add_src_to_param_flow (isra_param_flow *param_flow, int src)
759 gcc_checking_assert (src >= 0 && src <= UCHAR_MAX);
760 if (param_flow->length == IPA_SRA_MAX_PARAM_FLOW_LEN)
761 return false;
763 param_flow->inputs[(int) param_flow->length] = src;
764 param_flow->length++;
765 return true;
768 /* Add a SRC to the inputs of PARAM_FLOW unless it is already there and assert
769 it is the only input. Used for purposes of transitive parameter
770 splitting. */
772 static void
773 set_single_param_flow_source (isra_param_flow *param_flow, int src)
775 gcc_checking_assert (src >= 0 && src <= UCHAR_MAX);
776 if (param_flow->length == 0)
778 param_flow->inputs[0] = src;
779 param_flow->length = 1;
781 else if (param_flow->length == 1)
782 gcc_assert (param_flow->inputs[0] == src);
783 else
784 gcc_unreachable ();
787 /* Assert that there is only a single value in PARAM_FLOW's inputs and return
788 it. */
790 static unsigned
791 get_single_param_flow_source (const isra_param_flow *param_flow)
793 gcc_assert (param_flow->length == 1);
794 return param_flow->inputs[0];
797 /* Inspect all uses of NAME and simple arithmetic calculations involving NAME
798 in NODE and return a negative number if any of them is used for something
799 else than either an actual call argument, simple arithmetic operation or
800 debug statement. If there are no such uses, return the number of actual
801 arguments that this parameter eventually feeds to (or zero if there is none).
802 For any such parameter, mark PARM_NUM as one of its sources. ANALYZED is a
803 bitmap that tracks which SSA names we have already started
804 investigating. */
806 static int
807 isra_track_scalar_value_uses (cgraph_node *node, tree name, int parm_num,
808 bitmap analyzed)
810 int res = 0;
811 imm_use_iterator imm_iter;
812 gimple *stmt;
814 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, name)
816 if (is_gimple_debug (stmt))
817 continue;
819 /* TODO: We could handle at least const builtin functions like arithmetic
820 operations below. */
821 if (is_gimple_call (stmt))
823 int all_uses = 0;
824 use_operand_p use_p;
825 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
826 all_uses++;
828 gcall *call = as_a <gcall *> (stmt);
829 unsigned arg_count;
830 if (gimple_call_internal_p (call)
831 || (arg_count = gimple_call_num_args (call)) == 0)
833 res = -1;
834 BREAK_FROM_IMM_USE_STMT (imm_iter);
837 cgraph_edge *cs = node->get_edge (stmt);
838 gcc_checking_assert (cs);
839 isra_call_summary *csum = call_sums->get_create (cs);
840 csum->init_inputs (arg_count);
842 int simple_uses = 0;
843 for (unsigned i = 0; i < arg_count; i++)
844 if (gimple_call_arg (call, i) == name)
846 if (!add_src_to_param_flow (&csum->m_arg_flow[i], parm_num))
848 simple_uses = -1;
849 break;
851 simple_uses++;
854 if (simple_uses < 0
855 || all_uses != simple_uses)
857 res = -1;
858 BREAK_FROM_IMM_USE_STMT (imm_iter);
860 res += all_uses;
862 else if ((is_gimple_assign (stmt) && !gimple_has_volatile_ops (stmt))
863 || gimple_code (stmt) == GIMPLE_PHI)
865 tree lhs;
866 if (gimple_code (stmt) == GIMPLE_PHI)
867 lhs = gimple_phi_result (stmt);
868 else
869 lhs = gimple_assign_lhs (stmt);
871 if (TREE_CODE (lhs) != SSA_NAME)
873 res = -1;
874 BREAK_FROM_IMM_USE_STMT (imm_iter);
876 gcc_assert (!gimple_vdef (stmt));
877 if (bitmap_set_bit (analyzed, SSA_NAME_VERSION (lhs)))
879 int tmp = isra_track_scalar_value_uses (node, lhs, parm_num,
880 analyzed);
881 if (tmp < 0)
883 res = tmp;
884 BREAK_FROM_IMM_USE_STMT (imm_iter);
886 res += tmp;
889 else
891 res = -1;
892 BREAK_FROM_IMM_USE_STMT (imm_iter);
895 return res;
898 /* Inspect all uses of PARM, which must be a gimple register, in FUN (which is
899 also described by NODE) and simple arithmetic calculations involving PARM
900 and return false if any of them is used for something else than either an
901 actual call argument, simple arithmetic operation or debug statement. If
902 there are no such uses, return true and store the number of actual arguments
903 that this parameter eventually feeds to (or zero if there is none) to
904 *CALL_USES_P. For any such parameter, mark PARM_NUM as one of its
905 sources.
907 This function is similar to ptr_parm_has_nonarg_uses but its results are
908 meant for unused parameter removal, as opposed to splitting of parameters
909 passed by reference or converting them to passed by value.
912 static bool
913 isra_track_scalar_param_local_uses (function *fun, cgraph_node *node, tree parm,
914 int parm_num, int *call_uses_p)
916 gcc_checking_assert (is_gimple_reg (parm));
918 tree name = ssa_default_def (fun, parm);
919 if (!name || has_zero_uses (name))
921 *call_uses_p = 0;
922 return false;
925 /* Edge summaries can only handle callers with fewer than 256 parameters. */
926 if (parm_num > UCHAR_MAX)
927 return true;
929 bitmap analyzed = BITMAP_ALLOC (NULL);
930 int call_uses = isra_track_scalar_value_uses (node, name, parm_num, analyzed);
931 BITMAP_FREE (analyzed);
932 if (call_uses < 0)
933 return true;
934 *call_uses_p = call_uses;
935 return false;
938 /* Scan immediate uses of a default definition SSA name of a parameter PARM and
939 examine whether there are any nonarg uses that are not actual arguments or
940 otherwise infeasible uses. If so, return true, otherwise return false.
941 Create pass-through IPA flow records for any direct uses as argument calls
942 and if returning false, store their number into *PT_COUNT_P. NODE and FUN
943 must represent the function that is currently analyzed, PARM_NUM must be the
944 index of the analyzed parameter.
946 This function is similar to isra_track_scalar_param_local_uses but its
947 results are meant for splitting of parameters passed by reference or turning
948 them into bits passed by value, as opposed to generic unused parameter
949 removal.
952 static bool
953 ptr_parm_has_nonarg_uses (cgraph_node *node, function *fun, tree parm,
954 int parm_num, unsigned *pt_count_p)
956 imm_use_iterator ui;
957 gimple *stmt;
958 tree name = ssa_default_def (fun, parm);
959 bool ret = false;
960 unsigned pt_count = 0;
962 if (!name || has_zero_uses (name))
963 return false;
965 /* Edge summaries can only handle callers with fewer than 256 parameters. */
966 if (parm_num > UCHAR_MAX)
967 return true;
969 FOR_EACH_IMM_USE_STMT (stmt, ui, name)
971 unsigned uses_ok = 0;
972 use_operand_p use_p;
974 if (is_gimple_debug (stmt))
975 continue;
977 if (gimple_assign_single_p (stmt))
979 tree rhs = gimple_assign_rhs1 (stmt);
980 while (handled_component_p (rhs))
981 rhs = TREE_OPERAND (rhs, 0);
982 if (TREE_CODE (rhs) == MEM_REF
983 && TREE_OPERAND (rhs, 0) == name
984 && integer_zerop (TREE_OPERAND (rhs, 1))
985 && types_compatible_p (TREE_TYPE (rhs),
986 TREE_TYPE (TREE_TYPE (name)))
987 && !TREE_THIS_VOLATILE (rhs))
988 uses_ok++;
990 else if (is_gimple_call (stmt))
992 gcall *call = as_a <gcall *> (stmt);
993 unsigned arg_count;
994 if (gimple_call_internal_p (call)
995 || (arg_count = gimple_call_num_args (call)) == 0)
997 ret = true;
998 BREAK_FROM_IMM_USE_STMT (ui);
1001 cgraph_edge *cs = node->get_edge (stmt);
1002 gcc_checking_assert (cs);
1003 isra_call_summary *csum = call_sums->get_create (cs);
1004 csum->init_inputs (arg_count);
1006 for (unsigned i = 0; i < arg_count; ++i)
1008 tree arg = gimple_call_arg (stmt, i);
1010 if (arg == name)
1012 /* TODO: Allow &MEM_REF[name + offset] here,
1013 ipa_param_body_adjustments::modify_call_stmt has to be
1014 adjusted too. */
1015 csum->m_arg_flow[i].pointer_pass_through = true;
1016 set_single_param_flow_source (&csum->m_arg_flow[i], parm_num);
1017 pt_count++;
1018 uses_ok++;
1019 continue;
1022 while (handled_component_p (arg))
1023 arg = TREE_OPERAND (arg, 0);
1024 if (TREE_CODE (arg) == MEM_REF
1025 && TREE_OPERAND (arg, 0) == name
1026 && integer_zerop (TREE_OPERAND (arg, 1))
1027 && types_compatible_p (TREE_TYPE (arg),
1028 TREE_TYPE (TREE_TYPE (name)))
1029 && !TREE_THIS_VOLATILE (arg))
1030 uses_ok++;
1034 /* If the number of valid uses does not match the number of
1035 uses in this stmt there is an unhandled use. */
1036 unsigned all_uses = 0;
1037 FOR_EACH_IMM_USE_ON_STMT (use_p, ui)
1038 all_uses++;
1040 gcc_checking_assert (uses_ok <= all_uses);
1041 if (uses_ok != all_uses)
1043 ret = true;
1044 BREAK_FROM_IMM_USE_STMT (ui);
1048 *pt_count_p = pt_count;
1049 return ret;
1052 /* Initialize vector of parameter descriptors of NODE. Return true if there
1053 are any candidates for splitting or unused aggregate parameter removal (the
1054 function may return false if there are candidates for removal of register
1055 parameters) and function body must be scanned. */
1057 static bool
1058 create_parameter_descriptors (cgraph_node *node,
1059 vec<gensum_param_desc> *param_descriptions)
1061 function *fun = DECL_STRUCT_FUNCTION (node->decl);
1062 bool ret = false;
1064 int num = 0;
1065 for (tree parm = DECL_ARGUMENTS (node->decl);
1066 parm;
1067 parm = DECL_CHAIN (parm), num++)
1069 const char *msg;
1070 gensum_param_desc *desc = &(*param_descriptions)[num];
1071 /* param_descriptions vector is grown cleared in the caller. */
1072 desc->param_number = num;
1073 decl2desc->put (parm, desc);
1075 if (dump_file && (dump_flags & TDF_DETAILS))
1076 print_generic_expr (dump_file, parm, TDF_UID);
1078 int scalar_call_uses;
1079 tree type = TREE_TYPE (parm);
1080 if (TREE_THIS_VOLATILE (parm))
1082 if (dump_file && (dump_flags & TDF_DETAILS))
1083 fprintf (dump_file, " not a candidate, is volatile\n");
1084 continue;
1086 if (!is_gimple_reg_type (type) && is_va_list_type (type))
1088 if (dump_file && (dump_flags & TDF_DETAILS))
1089 fprintf (dump_file, " not a candidate, is a va_list type\n");
1090 continue;
1092 if (TREE_ADDRESSABLE (parm))
1094 if (dump_file && (dump_flags & TDF_DETAILS))
1095 fprintf (dump_file, " not a candidate, is addressable\n");
1096 continue;
1098 if (TREE_ADDRESSABLE (type))
1100 if (dump_file && (dump_flags & TDF_DETAILS))
1101 fprintf (dump_file, " not a candidate, type cannot be split\n");
1102 continue;
1105 if (is_gimple_reg (parm)
1106 && !isra_track_scalar_param_local_uses (fun, node, parm, num,
1107 &scalar_call_uses))
1109 if (dump_file && (dump_flags & TDF_DETAILS))
1110 fprintf (dump_file, " is a scalar with only %i call uses\n",
1111 scalar_call_uses);
1113 desc->locally_unused = true;
1114 desc->call_uses = scalar_call_uses;
1117 if (POINTER_TYPE_P (type))
1119 type = TREE_TYPE (type);
1121 if (TREE_CODE (type) == FUNCTION_TYPE
1122 || TREE_CODE (type) == METHOD_TYPE)
1124 if (dump_file && (dump_flags & TDF_DETAILS))
1125 fprintf (dump_file, " not a candidate, reference to "
1126 "a function\n");
1127 continue;
1129 if (TYPE_VOLATILE (type))
1131 if (dump_file && (dump_flags & TDF_DETAILS))
1132 fprintf (dump_file, " not a candidate, reference to "
1133 "a volatile type\n");
1134 continue;
1136 if (TREE_CODE (type) == ARRAY_TYPE
1137 && TYPE_NONALIASED_COMPONENT (type))
1139 if (dump_file && (dump_flags & TDF_DETAILS))
1140 fprintf (dump_file, " not a candidate, reference to "
1141 "a nonaliased component array\n");
1142 continue;
1144 if (!is_gimple_reg (parm))
1146 if (dump_file && (dump_flags & TDF_DETAILS))
1147 fprintf (dump_file, " not a candidate, a reference which is "
1148 "not a gimple register (probably addressable)\n");
1149 continue;
1151 if (is_va_list_type (type))
1153 if (dump_file && (dump_flags & TDF_DETAILS))
1154 fprintf (dump_file, " not a candidate, reference to "
1155 "a va list\n");
1156 continue;
1158 if (ptr_parm_has_nonarg_uses (node, fun, parm, num,
1159 &desc->ptr_pt_count))
1161 if (dump_file && (dump_flags & TDF_DETAILS))
1162 fprintf (dump_file, " not a candidate, reference has "
1163 "nonarg uses\n");
1164 continue;
1166 desc->by_ref = true;
1168 else if (!AGGREGATE_TYPE_P (type))
1170 /* This is in an else branch because scalars passed by reference are
1171 still candidates to be passed by value. */
1172 if (dump_file && (dump_flags & TDF_DETAILS))
1173 fprintf (dump_file, " not a candidate, not an aggregate\n");
1174 continue;
1177 if (!COMPLETE_TYPE_P (type))
1179 if (dump_file && (dump_flags & TDF_DETAILS))
1180 fprintf (dump_file, " not a candidate, not a complete type\n");
1181 continue;
1183 if (!tree_fits_uhwi_p (TYPE_SIZE (type)))
1185 if (dump_file && (dump_flags & TDF_DETAILS))
1186 fprintf (dump_file, " not a candidate, size not representable\n");
1187 continue;
1189 unsigned HOST_WIDE_INT type_size
1190 = tree_to_uhwi (TYPE_SIZE (type)) / BITS_PER_UNIT;
1191 if (type_size == 0
1192 || type_size >= ISRA_ARG_SIZE_LIMIT)
1194 if (dump_file && (dump_flags & TDF_DETAILS))
1195 fprintf (dump_file, " not a candidate, has zero or huge size\n");
1196 continue;
1198 if (type_internals_preclude_sra_p (type, &msg))
1200 if (dump_file && (dump_flags & TDF_DETAILS))
1201 fprintf (dump_file, " not a candidate, %s\n", msg);
1202 continue;
1205 if (dump_file && (dump_flags & TDF_DETAILS))
1206 fprintf (dump_file, " is a candidate\n");
1208 ret = true;
1209 desc->split_candidate = true;
1210 if (desc->by_ref)
1211 desc->deref_index = by_ref_count++;
1213 return ret;
1216 /* Return pointer to descriptor of parameter DECL or NULL if it cannot be
1217 found, which happens if DECL is for a static chain. */
1219 static gensum_param_desc *
1220 get_gensum_param_desc (tree decl)
1222 gcc_checking_assert (TREE_CODE (decl) == PARM_DECL);
1223 gensum_param_desc **slot = decl2desc->get (decl);
1224 if (!slot)
1225 /* This can happen for static chains which we cannot handle so far. */
1226 return NULL;
1227 gcc_checking_assert (*slot);
1228 return *slot;
1232 /* Remove parameter described by DESC from candidates for IPA-SRA splitting and
1233 write REASON to the dump file if there is one. */
1235 static void
1236 disqualify_split_candidate (gensum_param_desc *desc, const char *reason)
1238 if (!desc->split_candidate)
1239 return;
1241 if (dump_file && (dump_flags & TDF_DETAILS))
1242 fprintf (dump_file, "! Disqualifying parameter number %i - %s\n",
1243 desc->param_number, reason);
1245 desc->split_candidate = false;
1248 /* Remove DECL from candidates for IPA-SRA and write REASON to the dump file if
1249 there is one. */
1251 static void
1252 disqualify_split_candidate (tree decl, const char *reason)
1254 gensum_param_desc *desc = get_gensum_param_desc (decl);
1255 if (desc)
1256 disqualify_split_candidate (desc, reason);
1259 /* Allocate a new access to DESC and fill it in with OFFSET and SIZE. But
1260 first, check that there are not too many of them already. If so, do not
1261 allocate anything and return NULL. */
1263 static gensum_param_access *
1264 allocate_access (gensum_param_desc *desc,
1265 HOST_WIDE_INT offset, HOST_WIDE_INT size)
1267 if (desc->access_count
1268 == (unsigned) param_ipa_sra_max_replacements)
1270 disqualify_split_candidate (desc, "Too many replacement candidates");
1271 return NULL;
1274 gensum_param_access *access
1275 = (gensum_param_access *) obstack_alloc (&gensum_obstack,
1276 sizeof (gensum_param_access));
1277 memset (access, 0, sizeof (*access));
1278 access->offset = offset;
1279 access->size = size;
1280 return access;
1283 /* In what context scan_expr_access has been called, whether it deals with a
1284 load, a function argument, or a store. */
1286 enum isra_scan_context {ISRA_CTX_LOAD, ISRA_CTX_ARG, ISRA_CTX_STORE};
1288 /* Return an access describing memory access to the variable described by DESC
1289 at OFFSET with SIZE in context CTX, starting at pointer to the linked list
1290 at a certain tree level FIRST. Attempt to create it and put into the
1291 appropriate place in the access tree if does not exist, but fail and return
1292 NULL if there are already too many accesses, if it would create a partially
1293 overlapping access or if an access would end up within a pre-existing
1294 non-call access. */
1296 static gensum_param_access *
1297 get_access_1 (gensum_param_desc *desc, gensum_param_access **first,
1298 HOST_WIDE_INT offset, HOST_WIDE_INT size, isra_scan_context ctx)
1300 gensum_param_access *access = *first, **ptr = first;
1302 if (!access)
1304 /* No pre-existing access at this level, just create it. */
1305 gensum_param_access *a = allocate_access (desc, offset, size);
1306 if (!a)
1307 return NULL;
1308 *first = a;
1309 return *first;
1312 if (access->offset >= offset + size)
1314 /* We want to squeeze it in front of the very first access, just do
1315 it. */
1316 gensum_param_access *r = allocate_access (desc, offset, size);
1317 if (!r)
1318 return NULL;
1319 r->next_sibling = access;
1320 *first = r;
1321 return r;
1324 /* Skip all accesses that have to come before us until the next sibling is
1325 already too far. */
1326 while (offset >= access->offset + access->size
1327 && access->next_sibling
1328 && access->next_sibling->offset < offset + size)
1330 ptr = &access->next_sibling;
1331 access = access->next_sibling;
1334 /* At this point we know we do not belong before access. */
1335 gcc_assert (access->offset < offset + size);
1337 if (access->offset == offset && access->size == size)
1338 /* We found what we were looking for. */
1339 return access;
1341 if (access->offset <= offset
1342 && access->offset + access->size >= offset + size)
1344 /* We fit into access which is larger than us. We need to find/create
1345 something below access. But we only allow nesting in call
1346 arguments. */
1347 if (access->nonarg)
1348 return NULL;
1350 return get_access_1 (desc, &access->first_child, offset, size, ctx);
1353 if (offset <= access->offset
1354 && offset + size >= access->offset + access->size)
1355 /* We are actually bigger than access, which fully fits into us, take its
1356 place and make all accesses fitting into it its children. */
1358 /* But first, we only allow nesting in call arguments so check if that is
1359 what we are trying to represent. */
1360 if (ctx != ISRA_CTX_ARG)
1361 return NULL;
1363 gensum_param_access *r = allocate_access (desc, offset, size);
1364 if (!r)
1365 return NULL;
1366 r->first_child = access;
1368 while (access->next_sibling
1369 && access->next_sibling->offset < offset + size)
1370 access = access->next_sibling;
1371 if (access->offset + access->size > offset + size)
1373 /* This must be a different access, which are sorted, so the
1374 following must be true and this signals a partial overlap. */
1375 gcc_assert (access->offset > offset);
1376 return NULL;
1379 r->next_sibling = access->next_sibling;
1380 access->next_sibling = NULL;
1381 *ptr = r;
1382 return r;
1385 if (offset >= access->offset + access->size)
1387 /* We belong after access. */
1388 gensum_param_access *r = allocate_access (desc, offset, size);
1389 if (!r)
1390 return NULL;
1391 r->next_sibling = access->next_sibling;
1392 access->next_sibling = r;
1393 return r;
1396 if (offset < access->offset)
1398 /* We know the following, otherwise we would have created a
1399 super-access. */
1400 gcc_checking_assert (offset + size < access->offset + access->size);
1401 return NULL;
1404 if (offset + size > access->offset + access->size)
1406 /* Likewise. */
1407 gcc_checking_assert (offset > access->offset);
1408 return NULL;
1411 gcc_unreachable ();
1414 /* Return an access describing memory access to the variable described by DESC
1415 at OFFSET with SIZE in context CTX, mark it as used in context CTX. Attempt
1416 to create if it does not exist, but fail and return NULL if there are
1417 already too many accesses, if it would create a partially overlapping access
1418 or if an access would end up in a non-call access. */
1420 static gensum_param_access *
1421 get_access (gensum_param_desc *desc, HOST_WIDE_INT offset, HOST_WIDE_INT size,
1422 isra_scan_context ctx)
1424 gcc_checking_assert (desc->split_candidate);
1426 gensum_param_access *access = get_access_1 (desc, &desc->accesses, offset,
1427 size, ctx);
1428 if (!access)
1430 disqualify_split_candidate (desc,
1431 "Bad access overlap or too many accesses");
1432 return NULL;
1435 switch (ctx)
1437 case ISRA_CTX_STORE:
1438 gcc_assert (!desc->by_ref);
1439 /* Fall-through */
1440 case ISRA_CTX_LOAD:
1441 access->nonarg = true;
1442 break;
1443 case ISRA_CTX_ARG:
1444 break;
1447 return access;
1450 /* Verify that parameter access tree starting with ACCESS is in good shape.
1451 PARENT_OFFSET and PARENT_SIZE are the corresponding fields of parent of
1452 ACCESS or zero if there is none. */
1454 static bool
1455 verify_access_tree_1 (gensum_param_access *access, HOST_WIDE_INT parent_offset,
1456 HOST_WIDE_INT parent_size)
1458 while (access)
1460 gcc_assert (access->offset >= 0 && access->size > 0);
1462 if (parent_size != 0)
1464 if (access->offset < parent_offset)
1466 error ("Access offset before parent offset");
1467 return true;
1469 if (access->size >= parent_size)
1471 error ("Access size greater or equal to its parent size");
1472 return true;
1474 if (access->offset + access->size > parent_offset + parent_size)
1476 error ("Access terminates outside of its parent");
1477 return true;
1481 if (verify_access_tree_1 (access->first_child, access->offset,
1482 access->size))
1483 return true;
1485 if (access->next_sibling
1486 && (access->next_sibling->offset < access->offset + access->size))
1488 error ("Access overlaps with its sibling");
1489 return true;
1492 access = access->next_sibling;
1494 return false;
1497 /* Verify that parameter access tree starting with ACCESS is in good shape,
1498 halt compilation and dump the tree to stderr if not. */
1500 DEBUG_FUNCTION void
1501 isra_verify_access_tree (gensum_param_access *access)
1503 if (verify_access_tree_1 (access, 0, 0))
1505 for (; access; access = access->next_sibling)
1506 dump_gensum_access (stderr, access, 2);
1507 internal_error ("IPA-SRA access verification failed");
1512 /* Callback of walk_stmt_load_store_addr_ops visit_addr used to determine
1513 GIMPLE_ASM operands with memory constrains which cannot be scalarized. */
1515 static bool
1516 asm_visit_addr (gimple *, tree op, tree, void *)
1518 op = get_base_address (op);
1519 if (op
1520 && TREE_CODE (op) == PARM_DECL)
1521 disqualify_split_candidate (op, "Non-scalarizable GIMPLE_ASM operand.");
1523 return false;
1526 /* Mark a dereference of parameter identified by DESC of distance DIST in a
1527 basic block BB, unless the BB has already been marked as a potentially
1528 final. */
1530 static void
1531 mark_param_dereference (gensum_param_desc *desc, HOST_WIDE_INT dist,
1532 basic_block bb)
1534 gcc_assert (desc->by_ref);
1535 gcc_checking_assert (desc->split_candidate);
1537 if (bitmap_bit_p (final_bbs, bb->index))
1538 return;
1540 int idx = bb->index * by_ref_count + desc->deref_index;
1541 if (bb_dereferences[idx] < dist)
1542 bb_dereferences[idx] = dist;
1545 /* Return true, if any potential replacements should use NEW_TYPE as opposed to
1546 previously recorded OLD_TYPE. */
1548 static bool
1549 type_prevails_p (tree old_type, tree new_type)
1551 if (old_type == new_type)
1552 return false;
1554 /* Non-aggregates are always better. */
1555 if (!is_gimple_reg_type (old_type)
1556 && is_gimple_reg_type (new_type))
1557 return true;
1558 if (is_gimple_reg_type (old_type)
1559 && !is_gimple_reg_type (new_type))
1560 return false;
1562 /* Prefer any complex or vector type over any other scalar type. */
1563 if (TREE_CODE (old_type) != COMPLEX_TYPE
1564 && TREE_CODE (old_type) != VECTOR_TYPE
1565 && (TREE_CODE (new_type) == COMPLEX_TYPE
1566 || TREE_CODE (new_type) == VECTOR_TYPE))
1567 return true;
1568 if ((TREE_CODE (old_type) == COMPLEX_TYPE
1569 || TREE_CODE (old_type) == VECTOR_TYPE)
1570 && TREE_CODE (new_type) != COMPLEX_TYPE
1571 && TREE_CODE (new_type) != VECTOR_TYPE)
1572 return false;
1574 /* Use the integral type with the bigger precision. */
1575 if (INTEGRAL_TYPE_P (old_type)
1576 && INTEGRAL_TYPE_P (new_type))
1577 return (TYPE_PRECISION (new_type) > TYPE_PRECISION (old_type));
1579 /* Attempt to disregard any integral type with non-full precision. */
1580 if (INTEGRAL_TYPE_P (old_type)
1581 && (TREE_INT_CST_LOW (TYPE_SIZE (old_type))
1582 != TYPE_PRECISION (old_type)))
1583 return true;
1584 if (INTEGRAL_TYPE_P (new_type)
1585 && (TREE_INT_CST_LOW (TYPE_SIZE (new_type))
1586 != TYPE_PRECISION (new_type)))
1587 return false;
1588 /* Stabilize the selection. */
1589 return TYPE_UID (old_type) < TYPE_UID (new_type);
1592 /* When scanning an expression which is a call argument, this structure
1593 specifies the call and the position of the argument. */
1595 struct scan_call_info
1597 /* Call graph edge representing the call. */
1598 cgraph_edge *cs;
1599 /* Total number of arguments in the call. */
1600 unsigned argument_count;
1601 /* Number of the actual argument being scanned. */
1602 unsigned arg_idx;
1605 /* Record use of ACCESS which belongs to a parameter described by DESC in a
1606 call argument described by CALL_INFO. */
1608 static void
1609 record_nonregister_call_use (gensum_param_desc *desc,
1610 scan_call_info *call_info,
1611 unsigned unit_offset, unsigned unit_size)
1613 isra_call_summary *csum = call_sums->get_create (call_info->cs);
1614 csum->init_inputs (call_info->argument_count);
1616 isra_param_flow *param_flow = &csum->m_arg_flow[call_info->arg_idx];
1617 param_flow->aggregate_pass_through = true;
1618 set_single_param_flow_source (param_flow, desc->param_number);
1619 param_flow->unit_offset = unit_offset;
1620 param_flow->unit_size = unit_size;
1621 desc->call_uses++;
1624 /* Callback of walk_aliased_vdefs, just mark that there was a possible
1625 modification. */
1627 static bool
1628 mark_maybe_modified (ao_ref *, tree, void *data)
1630 bool *maybe_modified = (bool *) data;
1631 *maybe_modified = true;
1632 return true;
1635 /* Analyze expression EXPR from GIMPLE for accesses to parameters. CTX
1636 specifies whether EXPR is used in a load, store or as an argument call. BB
1637 must be the basic block in which expr resides. If CTX specifies call
1638 argument context, CALL_INFO must describe that call and argument position,
1639 otherwise it is ignored. */
1641 static void
1642 scan_expr_access (tree expr, gimple *stmt, isra_scan_context ctx,
1643 basic_block bb, scan_call_info *call_info = NULL)
1645 poly_int64 poffset, psize, pmax_size;
1646 HOST_WIDE_INT offset, size, max_size;
1647 tree base;
1648 bool deref = false;
1649 bool reverse;
1651 if (TREE_CODE (expr) == BIT_FIELD_REF
1652 || TREE_CODE (expr) == IMAGPART_EXPR
1653 || TREE_CODE (expr) == REALPART_EXPR)
1654 expr = TREE_OPERAND (expr, 0);
1656 base = get_ref_base_and_extent (expr, &poffset, &psize, &pmax_size, &reverse);
1658 if (TREE_CODE (base) == MEM_REF)
1660 tree op = TREE_OPERAND (base, 0);
1661 if (TREE_CODE (op) != SSA_NAME
1662 || !SSA_NAME_IS_DEFAULT_DEF (op))
1663 return;
1664 base = SSA_NAME_VAR (op);
1665 if (!base)
1666 return;
1667 deref = true;
1669 if (TREE_CODE (base) != PARM_DECL)
1670 return;
1672 gensum_param_desc *desc = get_gensum_param_desc (base);
1673 if (!desc || !desc->split_candidate)
1674 return;
1676 if (!poffset.is_constant (&offset)
1677 || !psize.is_constant (&size)
1678 || !pmax_size.is_constant (&max_size))
1680 disqualify_split_candidate (desc, "Encountered a polynomial-sized "
1681 "access.");
1682 return;
1684 if (size < 0 || size != max_size)
1686 disqualify_split_candidate (desc, "Encountered a variable sized access.");
1687 return;
1689 if (TREE_CODE (expr) == COMPONENT_REF
1690 && DECL_BIT_FIELD (TREE_OPERAND (expr, 1)))
1692 disqualify_split_candidate (desc, "Encountered a bit-field access.");
1693 return;
1695 if (offset < 0)
1697 disqualify_split_candidate (desc, "Encountered an access at a "
1698 "negative offset.");
1699 return;
1701 gcc_assert ((offset % BITS_PER_UNIT) == 0);
1702 gcc_assert ((size % BITS_PER_UNIT) == 0);
1703 if ((offset / BITS_PER_UNIT) >= (UINT_MAX - ISRA_ARG_SIZE_LIMIT)
1704 || (size / BITS_PER_UNIT) >= ISRA_ARG_SIZE_LIMIT)
1706 disqualify_split_candidate (desc, "Encountered an access with too big "
1707 "offset or size");
1708 return;
1711 tree type = TREE_TYPE (expr);
1712 unsigned int exp_align = get_object_alignment (expr);
1714 if (exp_align < TYPE_ALIGN (type))
1716 disqualify_split_candidate (desc, "Underaligned access.");
1717 return;
1720 if (deref)
1722 if (!desc->by_ref)
1724 disqualify_split_candidate (desc, "Dereferencing a non-reference.");
1725 return;
1727 else if (ctx == ISRA_CTX_STORE)
1729 disqualify_split_candidate (desc, "Storing to data passed by "
1730 "reference.");
1731 return;
1734 if (!aa_walking_limit)
1736 disqualify_split_candidate (desc, "Out of alias analysis step "
1737 "limit.");
1738 return;
1741 gcc_checking_assert (gimple_vuse (stmt));
1742 bool maybe_modified = false;
1743 ao_ref ar;
1745 ao_ref_init (&ar, expr);
1746 bitmap visited = BITMAP_ALLOC (NULL);
1747 int walked = walk_aliased_vdefs (&ar, gimple_vuse (stmt),
1748 mark_maybe_modified, &maybe_modified,
1749 &visited, NULL, aa_walking_limit);
1750 BITMAP_FREE (visited);
1751 if (walked > 0)
1753 gcc_assert (aa_walking_limit > walked);
1754 aa_walking_limit = aa_walking_limit - walked;
1756 if (walked < 0)
1757 aa_walking_limit = 0;
1758 if (maybe_modified || walked < 0)
1760 disqualify_split_candidate (desc, "Data passed by reference possibly "
1761 "modified through an alias.");
1762 return;
1764 else
1765 mark_param_dereference (desc, offset + size, bb);
1767 else
1768 /* Pointer parameters with direct uses should have been ruled out by
1769 analyzing SSA default def when looking at the parameters. */
1770 gcc_assert (!desc->by_ref);
1772 gensum_param_access *access = get_access (desc, offset, size, ctx);
1773 if (!access)
1774 return;
1776 if (ctx == ISRA_CTX_ARG)
1778 gcc_checking_assert (call_info);
1780 if (!deref)
1781 record_nonregister_call_use (desc, call_info, offset / BITS_PER_UNIT,
1782 size / BITS_PER_UNIT);
1783 else
1784 /* This is not a pass-through of a pointer, this is a use like any
1785 other. */
1786 access->nonarg = true;
1789 if (!access->type)
1791 access->type = type;
1792 access->alias_ptr_type = reference_alias_ptr_type (expr);
1793 access->reverse = reverse;
1795 else
1797 if (exp_align < TYPE_ALIGN (access->type))
1799 disqualify_split_candidate (desc, "Reference has lower alignment "
1800 "than a previous one.");
1801 return;
1803 if (access->alias_ptr_type != reference_alias_ptr_type (expr))
1805 disqualify_split_candidate (desc, "Multiple alias pointer types.");
1806 return;
1808 if (access->reverse != reverse)
1810 disqualify_split_candidate (desc, "Both normal and reverse "
1811 "scalar storage order.");
1812 return;
1814 if (!deref
1815 && (AGGREGATE_TYPE_P (type) || AGGREGATE_TYPE_P (access->type))
1816 && (TYPE_MAIN_VARIANT (access->type) != TYPE_MAIN_VARIANT (type)))
1818 /* We need the same aggregate type on all accesses to be able to
1819 distinguish transformation spots from pass-through arguments in
1820 the transformation phase. */
1821 disqualify_split_candidate (desc, "We do not support aggregate "
1822 "type punning.");
1823 return;
1826 if (type_prevails_p (access->type, type))
1827 access->type = type;
1831 /* Scan body function described by NODE and FUN and create access trees for
1832 parameters. */
1834 static void
1835 scan_function (cgraph_node *node, struct function *fun)
1837 basic_block bb;
1839 FOR_EACH_BB_FN (bb, fun)
1841 gimple_stmt_iterator gsi;
1842 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1844 gimple *stmt = gsi_stmt (gsi);
1846 if (stmt_can_throw_external (fun, stmt))
1847 bitmap_set_bit (final_bbs, bb->index);
1848 switch (gimple_code (stmt))
1850 case GIMPLE_RETURN:
1852 tree t = gimple_return_retval (as_a <greturn *> (stmt));
1853 if (t != NULL_TREE)
1854 scan_expr_access (t, stmt, ISRA_CTX_LOAD, bb);
1855 bitmap_set_bit (final_bbs, bb->index);
1857 break;
1859 case GIMPLE_ASSIGN:
1860 if (gimple_assign_single_p (stmt)
1861 && !gimple_clobber_p (stmt))
1863 tree rhs = gimple_assign_rhs1 (stmt);
1864 scan_expr_access (rhs, stmt, ISRA_CTX_LOAD, bb);
1865 tree lhs = gimple_assign_lhs (stmt);
1866 scan_expr_access (lhs, stmt, ISRA_CTX_STORE, bb);
1868 break;
1870 case GIMPLE_CALL:
1872 unsigned argument_count = gimple_call_num_args (stmt);
1873 scan_call_info call_info;
1874 call_info.cs = node->get_edge (stmt);
1875 call_info.argument_count = argument_count;
1877 for (unsigned i = 0; i < argument_count; i++)
1879 call_info.arg_idx = i;
1880 scan_expr_access (gimple_call_arg (stmt, i), stmt,
1881 ISRA_CTX_ARG, bb, &call_info);
1884 tree lhs = gimple_call_lhs (stmt);
1885 if (lhs)
1886 scan_expr_access (lhs, stmt, ISRA_CTX_STORE, bb);
1887 int flags = gimple_call_flags (stmt);
1888 if ((flags & (ECF_CONST | ECF_PURE)) == 0)
1889 bitmap_set_bit (final_bbs, bb->index);
1891 break;
1893 case GIMPLE_ASM:
1895 gasm *asm_stmt = as_a <gasm *> (stmt);
1896 walk_stmt_load_store_addr_ops (asm_stmt, NULL, NULL, NULL,
1897 asm_visit_addr);
1898 bitmap_set_bit (final_bbs, bb->index);
1900 for (unsigned i = 0; i < gimple_asm_ninputs (asm_stmt); i++)
1902 tree t = TREE_VALUE (gimple_asm_input_op (asm_stmt, i));
1903 scan_expr_access (t, stmt, ISRA_CTX_LOAD, bb);
1905 for (unsigned i = 0; i < gimple_asm_noutputs (asm_stmt); i++)
1907 tree t = TREE_VALUE (gimple_asm_output_op (asm_stmt, i));
1908 scan_expr_access (t, stmt, ISRA_CTX_STORE, bb);
1911 break;
1913 default:
1914 break;
1920 /* Return true if SSA_NAME NAME is only used in return statements, or if
1921 results of any operations it is involved in are only used in return
1922 statements. ANALYZED is a bitmap that tracks which SSA names we have
1923 already started investigating. */
1925 static bool
1926 ssa_name_only_returned_p (tree name, bitmap analyzed)
1928 bool res = true;
1929 imm_use_iterator imm_iter;
1930 gimple *stmt;
1932 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, name)
1934 if (is_gimple_debug (stmt))
1935 continue;
1937 if (gimple_code (stmt) == GIMPLE_RETURN)
1939 tree t = gimple_return_retval (as_a <greturn *> (stmt));
1940 if (t != name)
1942 res = false;
1943 BREAK_FROM_IMM_USE_STMT (imm_iter);
1946 else if ((is_gimple_assign (stmt) && !gimple_has_volatile_ops (stmt))
1947 || gimple_code (stmt) == GIMPLE_PHI)
1949 /* TODO: And perhaps for const function calls too? */
1950 tree lhs;
1951 if (gimple_code (stmt) == GIMPLE_PHI)
1952 lhs = gimple_phi_result (stmt);
1953 else
1954 lhs = gimple_assign_lhs (stmt);
1956 if (TREE_CODE (lhs) != SSA_NAME)
1958 res = false;
1959 BREAK_FROM_IMM_USE_STMT (imm_iter);
1961 gcc_assert (!gimple_vdef (stmt));
1962 if (bitmap_set_bit (analyzed, SSA_NAME_VERSION (lhs))
1963 && !ssa_name_only_returned_p (lhs, analyzed))
1965 res = false;
1966 BREAK_FROM_IMM_USE_STMT (imm_iter);
1969 else
1971 res = false;
1972 BREAK_FROM_IMM_USE_STMT (imm_iter);
1975 return res;
1978 /* Inspect the uses of the return value of the call associated with CS, and if
1979 it is not used or if it is only used to construct the return value of the
1980 caller, mark it as such in call or caller summary. Also check for
1981 misaligned arguments. */
1983 static void
1984 isra_analyze_call (cgraph_edge *cs)
1986 gcall *call_stmt = cs->call_stmt;
1987 unsigned count = gimple_call_num_args (call_stmt);
1988 isra_call_summary *csum = call_sums->get_create (cs);
1990 for (unsigned i = 0; i < count; i++)
1992 tree arg = gimple_call_arg (call_stmt, i);
1993 if (is_gimple_reg (arg))
1994 continue;
1996 tree offset;
1997 poly_int64 bitsize, bitpos;
1998 machine_mode mode;
1999 int unsignedp, reversep, volatilep = 0;
2000 get_inner_reference (arg, &bitsize, &bitpos, &offset, &mode,
2001 &unsignedp, &reversep, &volatilep);
2002 if (!multiple_p (bitpos, BITS_PER_UNIT))
2004 csum->m_bit_aligned_arg = true;
2005 break;
2009 tree lhs = gimple_call_lhs (call_stmt);
2010 if (lhs)
2012 /* TODO: Also detect aggregates on a LHS of a call that are only returned
2013 from this function (without being read anywhere). */
2014 if (TREE_CODE (lhs) == SSA_NAME)
2016 bitmap analyzed = BITMAP_ALLOC (NULL);
2017 if (ssa_name_only_returned_p (lhs, analyzed))
2018 csum->m_return_returned = true;
2019 BITMAP_FREE (analyzed);
2022 else
2023 csum->m_return_ignored = true;
2026 /* Look at all calls going out of NODE, described also by IFS and perform all
2027 analyses necessary for IPA-SRA that are not done at body scan time or done
2028 even when body is not scanned because the function is not a candidate. */
2030 static void
2031 isra_analyze_all_outgoing_calls (cgraph_node *node)
2033 for (cgraph_edge *cs = node->callees; cs; cs = cs->next_callee)
2034 isra_analyze_call (cs);
2035 for (cgraph_edge *cs = node->indirect_calls; cs; cs = cs->next_callee)
2036 isra_analyze_call (cs);
2039 /* Dump a dereferences table with heading STR to file F. */
2041 static void
2042 dump_dereferences_table (FILE *f, struct function *fun, const char *str)
2044 basic_block bb;
2046 fprintf (dump_file, "%s", str);
2047 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (fun),
2048 EXIT_BLOCK_PTR_FOR_FN (fun), next_bb)
2050 fprintf (f, "%4i %i ", bb->index, bitmap_bit_p (final_bbs, bb->index));
2051 if (bb != EXIT_BLOCK_PTR_FOR_FN (fun))
2053 int i;
2054 for (i = 0; i < by_ref_count; i++)
2056 int idx = bb->index * by_ref_count + i;
2057 fprintf (f, " %4" HOST_WIDE_INT_PRINT "d", bb_dereferences[idx]);
2060 fprintf (f, "\n");
2062 fprintf (dump_file, "\n");
2065 /* Propagate distances in bb_dereferences in the opposite direction than the
2066 control flow edges, in each step storing the maximum of the current value
2067 and the minimum of all successors. These steps are repeated until the table
2068 stabilizes. Note that BBs which might terminate the functions (according to
2069 final_bbs bitmap) never updated in this way. */
2071 static void
2072 propagate_dereference_distances (struct function *fun)
2074 basic_block bb;
2076 if (dump_file && (dump_flags & TDF_DETAILS))
2077 dump_dereferences_table (dump_file, fun,
2078 "Dereference table before propagation:\n");
2080 auto_vec<basic_block> queue (last_basic_block_for_fn (fun));
2081 queue.quick_push (ENTRY_BLOCK_PTR_FOR_FN (fun));
2082 FOR_EACH_BB_FN (bb, fun)
2084 queue.quick_push (bb);
2085 bb->aux = bb;
2088 while (!queue.is_empty ())
2090 edge_iterator ei;
2091 edge e;
2092 bool change = false;
2093 int i;
2095 bb = queue.pop ();
2096 bb->aux = NULL;
2098 if (bitmap_bit_p (final_bbs, bb->index))
2099 continue;
2101 for (i = 0; i < by_ref_count; i++)
2103 int idx = bb->index * by_ref_count + i;
2104 bool first = true;
2105 HOST_WIDE_INT inh = 0;
2107 FOR_EACH_EDGE (e, ei, bb->succs)
2109 int succ_idx = e->dest->index * by_ref_count + i;
2111 if (e->dest == EXIT_BLOCK_PTR_FOR_FN (fun))
2112 continue;
2114 if (first)
2116 first = false;
2117 inh = bb_dereferences [succ_idx];
2119 else if (bb_dereferences [succ_idx] < inh)
2120 inh = bb_dereferences [succ_idx];
2123 if (!first && bb_dereferences[idx] < inh)
2125 bb_dereferences[idx] = inh;
2126 change = true;
2130 if (change)
2131 FOR_EACH_EDGE (e, ei, bb->preds)
2133 if (e->src->aux)
2134 continue;
2136 e->src->aux = e->src;
2137 queue.quick_push (e->src);
2141 if (dump_file && (dump_flags & TDF_DETAILS))
2142 dump_dereferences_table (dump_file, fun,
2143 "Dereference table after propagation:\n");
2146 /* Perform basic checks on ACCESS to PARM described by DESC and all its
2147 children, return true if the parameter cannot be split, otherwise return
2148 true and update *TOTAL_SIZE and *ONLY_CALLS. ENTRY_BB_INDEX must be the
2149 index of the entry BB in the function of PARM. */
2151 static bool
2152 check_gensum_access (tree parm, gensum_param_desc *desc,
2153 gensum_param_access *access,
2154 HOST_WIDE_INT *nonarg_acc_size, bool *only_calls,
2155 int entry_bb_index)
2157 if (access->nonarg)
2159 *only_calls = false;
2160 *nonarg_acc_size += access->size;
2162 if (access->first_child)
2164 disqualify_split_candidate (desc, "Overlapping non-call uses.");
2165 return true;
2168 /* Do not decompose a non-BLKmode param in a way that would create
2169 BLKmode params. Especially for by-reference passing (thus,
2170 pointer-type param) this is hardly worthwhile. */
2171 if (DECL_MODE (parm) != BLKmode
2172 && TYPE_MODE (access->type) == BLKmode)
2174 disqualify_split_candidate (desc, "Would convert a non-BLK to a BLK.");
2175 return true;
2178 if (desc->by_ref)
2180 int idx = (entry_bb_index * by_ref_count + desc->deref_index);
2181 if ((access->offset + access->size) > bb_dereferences[idx])
2183 disqualify_split_candidate (desc, "Would create a possibly "
2184 "illegal dereference in a caller.");
2185 return true;
2189 for (gensum_param_access *ch = access->first_child;
2191 ch = ch->next_sibling)
2192 if (check_gensum_access (parm, desc, ch, nonarg_acc_size, only_calls,
2193 entry_bb_index))
2194 return true;
2196 return false;
2199 /* Copy data from FROM and all of its children to a vector of accesses in IPA
2200 descriptor DESC. */
2202 static void
2203 copy_accesses_to_ipa_desc (gensum_param_access *from, isra_param_desc *desc)
2205 param_access *to = ggc_cleared_alloc<param_access> ();
2206 gcc_checking_assert ((from->offset % BITS_PER_UNIT) == 0);
2207 gcc_checking_assert ((from->size % BITS_PER_UNIT) == 0);
2208 to->unit_offset = from->offset / BITS_PER_UNIT;
2209 to->unit_size = from->size / BITS_PER_UNIT;
2210 to->type = from->type;
2211 to->alias_ptr_type = from->alias_ptr_type;
2212 to->certain = from->nonarg;
2213 to->reverse = from->reverse;
2214 vec_safe_push (desc->accesses, to);
2216 for (gensum_param_access *ch = from->first_child;
2218 ch = ch->next_sibling)
2219 copy_accesses_to_ipa_desc (ch, desc);
2222 /* Analyze function body scan results stored in param_accesses and
2223 param_accesses, detect possible transformations and store information of
2224 those in function summary. NODE, FUN and IFS are all various structures
2225 describing the currently analyzed function. */
2227 static void
2228 process_scan_results (cgraph_node *node, struct function *fun,
2229 isra_func_summary *ifs,
2230 vec<gensum_param_desc> *param_descriptions)
2232 bool check_pass_throughs = false;
2233 bool dereferences_propagated = false;
2234 tree parm = DECL_ARGUMENTS (node->decl);
2235 unsigned param_count = param_descriptions->length();
2237 for (unsigned desc_index = 0;
2238 desc_index < param_count;
2239 desc_index++, parm = DECL_CHAIN (parm))
2241 gensum_param_desc *desc = &(*param_descriptions)[desc_index];
2242 if (!desc->split_candidate)
2243 continue;
2245 if (flag_checking)
2246 isra_verify_access_tree (desc->accesses);
2248 if (!dereferences_propagated
2249 && desc->by_ref
2250 && desc->accesses)
2252 propagate_dereference_distances (fun);
2253 dereferences_propagated = true;
2256 HOST_WIDE_INT nonarg_acc_size = 0;
2257 bool only_calls = true;
2258 bool check_failed = false;
2260 int entry_bb_index = ENTRY_BLOCK_PTR_FOR_FN (fun)->index;
2261 for (gensum_param_access *acc = desc->accesses;
2262 acc;
2263 acc = acc->next_sibling)
2264 if (check_gensum_access (parm, desc, acc, &nonarg_acc_size, &only_calls,
2265 entry_bb_index))
2267 check_failed = true;
2268 break;
2270 if (check_failed)
2271 continue;
2273 if (only_calls)
2274 desc->locally_unused = true;
2276 HOST_WIDE_INT cur_param_size
2277 = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (parm)));
2278 HOST_WIDE_INT param_size_limit;
2279 if (!desc->by_ref || optimize_function_for_size_p (fun))
2280 param_size_limit = cur_param_size;
2281 else
2282 param_size_limit
2283 = opt_for_fn (node->decl,
2284 param_ipa_sra_ptr_growth_factor) * cur_param_size;
2285 if (nonarg_acc_size > param_size_limit
2286 || (!desc->by_ref && nonarg_acc_size == param_size_limit))
2288 disqualify_split_candidate (desc, "Would result into a too big set "
2289 "of replacements.");
2291 else
2293 /* create_parameter_descriptors makes sure unit sizes of all
2294 candidate parameters fit unsigned integers restricted to
2295 ISRA_ARG_SIZE_LIMIT. */
2296 desc->param_size_limit = param_size_limit / BITS_PER_UNIT;
2297 desc->nonarg_acc_size = nonarg_acc_size / BITS_PER_UNIT;
2298 if (desc->split_candidate && desc->ptr_pt_count)
2300 gcc_assert (desc->by_ref);
2301 check_pass_throughs = true;
2306 /* When a pointer parameter is passed-through to a callee, in which it is
2307 only used to read only one or a few items, we can attempt to transform it
2308 to obtaining and passing through the items instead of the pointer. But we
2309 must take extra care that 1) we do not introduce any segfault by moving
2310 dereferences above control flow and that 2) the data is not modified
2311 through an alias in this function. The IPA analysis must not introduce
2312 any accesses candidates unless it can prove both.
2314 The current solution is very crude as it consists of ensuring that the
2315 call postdominates entry BB and that the definition of VUSE of the call is
2316 default definition. TODO: For non-recursive callees in the same
2317 compilation unit we could do better by doing analysis in topological order
2318 an looking into access candidates of callees, using their alias_ptr_types
2319 to attempt real AA. We could also use the maximum known dereferenced
2320 offset in this function at IPA level.
2322 TODO: Measure the overhead and the effect of just being pessimistic.
2323 Maybe this is only -O3 material?
2325 bool pdoms_calculated = false;
2326 if (check_pass_throughs)
2327 for (cgraph_edge *cs = node->callees; cs; cs = cs->next_callee)
2329 gcall *call_stmt = cs->call_stmt;
2330 tree vuse = gimple_vuse (call_stmt);
2332 /* If the callee is a const function, we don't get a VUSE. In such
2333 case there will be no memory accesses in the called function (or the
2334 const attribute is wrong) and then we just don't care. */
2335 bool uses_memory_as_obtained = vuse && SSA_NAME_IS_DEFAULT_DEF (vuse);
2337 unsigned count = gimple_call_num_args (call_stmt);
2338 isra_call_summary *csum = call_sums->get_create (cs);
2339 csum->init_inputs (count);
2340 for (unsigned argidx = 0; argidx < count; argidx++)
2342 if (!csum->m_arg_flow[argidx].pointer_pass_through)
2343 continue;
2344 unsigned pidx
2345 = get_single_param_flow_source (&csum->m_arg_flow[argidx]);
2346 gensum_param_desc *desc = &(*param_descriptions)[pidx];
2347 if (!desc->split_candidate)
2349 csum->m_arg_flow[argidx].pointer_pass_through = false;
2350 continue;
2352 if (!uses_memory_as_obtained)
2353 continue;
2355 /* Post-dominator check placed last, hoping that it usually won't
2356 be needed. */
2357 if (!pdoms_calculated)
2359 gcc_checking_assert (cfun);
2360 add_noreturn_fake_exit_edges ();
2361 connect_infinite_loops_to_exit ();
2362 calculate_dominance_info (CDI_POST_DOMINATORS);
2363 pdoms_calculated = true;
2365 if (dominated_by_p (CDI_POST_DOMINATORS,
2366 gimple_bb (call_stmt),
2367 single_succ (ENTRY_BLOCK_PTR_FOR_FN (fun))))
2368 csum->m_arg_flow[argidx].safe_to_import_accesses = true;
2372 if (pdoms_calculated)
2374 free_dominance_info (CDI_POST_DOMINATORS);
2375 remove_fake_exit_edges ();
2378 /* TODO: Add early exit if we disqualified everything. This also requires
2379 that we either relax the restriction that
2380 ipa_param_adjustments.m_always_copy_start must be the number of PARM_DECLs
2381 or store the number of parameters to IPA-SRA function summary and use that
2382 when just removing params. */
2384 vec_safe_reserve_exact (ifs->m_parameters, param_count);
2385 ifs->m_parameters->quick_grow_cleared (param_count);
2386 for (unsigned desc_index = 0; desc_index < param_count; desc_index++)
2388 gensum_param_desc *s = &(*param_descriptions)[desc_index];
2389 isra_param_desc *d = &(*ifs->m_parameters)[desc_index];
2391 d->param_size_limit = s->param_size_limit;
2392 d->size_reached = s->nonarg_acc_size;
2393 d->locally_unused = s->locally_unused;
2394 d->split_candidate = s->split_candidate;
2395 d->by_ref = s->by_ref;
2397 for (gensum_param_access *acc = s->accesses;
2398 acc;
2399 acc = acc->next_sibling)
2400 copy_accesses_to_ipa_desc (acc, d);
2403 if (dump_file)
2404 dump_isra_param_descriptors (dump_file, node->decl, ifs);
2407 /* Return true if there are any overlaps among certain accesses of DESC. If
2408 non-NULL, set *CERTAIN_ACCESS_PRESENT_P upon encountering a certain access
2409 too. DESC is assumed to be a split candidate that is not locally
2410 unused. */
2412 static bool
2413 overlapping_certain_accesses_p (isra_param_desc *desc,
2414 bool *certain_access_present_p)
2416 unsigned pclen = vec_safe_length (desc->accesses);
2417 for (unsigned i = 0; i < pclen; i++)
2419 param_access *a1 = (*desc->accesses)[i];
2421 if (!a1->certain)
2422 continue;
2423 if (certain_access_present_p)
2424 *certain_access_present_p = true;
2425 for (unsigned j = i + 1; j < pclen; j++)
2427 param_access *a2 = (*desc->accesses)[j];
2428 if (a2->certain
2429 && a1->unit_offset < a2->unit_offset + a2->unit_size
2430 && a1->unit_offset + a1->unit_size > a2->unit_offset)
2431 return true;
2434 return false;
2437 /* Check for any overlaps of certain param accesses among splitting candidates
2438 and signal an ICE if there are any. If CERTAIN_MUST_EXIST is set, also
2439 check that used splitting candidates have at least one certain access. */
2441 static void
2442 verify_splitting_accesses (cgraph_node *node, bool certain_must_exist)
2444 isra_func_summary *ifs = func_sums->get (node);
2445 if (!ifs || !ifs->m_candidate)
2446 return;
2447 unsigned param_count = vec_safe_length (ifs->m_parameters);
2448 for (unsigned pidx = 0; pidx < param_count; pidx++)
2450 isra_param_desc *desc = &(*ifs->m_parameters)[pidx];
2451 if (!desc->split_candidate || desc->locally_unused)
2452 continue;
2454 bool certain_access_present = !certain_must_exist;
2455 if (overlapping_certain_accesses_p (desc, &certain_access_present))
2456 internal_error ("Function %qs, parameter %u, has IPA-SRA accesses "
2457 "which overlap", node->dump_name (), pidx);
2458 if (!certain_access_present)
2459 internal_error ("Function %s, parameter %u, is used but does not "
2460 "have any certain IPA-SRA access",
2461 node->dump_name (), pidx);
2465 /* Intraprocedural part of IPA-SRA analysis. Scan function body of NODE and
2466 create a summary structure describing IPA-SRA opportunities and constraints
2467 in it. */
2469 static void
2470 ipa_sra_summarize_function (cgraph_node *node)
2472 if (dump_file)
2473 fprintf (dump_file, "Creating summary for %s/%i:\n", node->name (),
2474 node->order);
2475 if (!ipa_sra_preliminary_function_checks (node))
2476 return;
2477 gcc_obstack_init (&gensum_obstack);
2478 isra_func_summary *ifs = func_sums->get_create (node);
2479 ifs->m_candidate = true;
2480 tree ret = TREE_TYPE (TREE_TYPE (node->decl));
2481 ifs->m_returns_value = (TREE_CODE (ret) != VOID_TYPE);
2483 decl2desc = new hash_map<tree, gensum_param_desc *>;
2484 unsigned count = 0;
2485 for (tree parm = DECL_ARGUMENTS (node->decl); parm; parm = DECL_CHAIN (parm))
2486 count++;
2488 if (count > 0)
2490 auto_vec<gensum_param_desc, 16> param_descriptions (count);
2491 param_descriptions.reserve_exact (count);
2492 param_descriptions.quick_grow_cleared (count);
2494 bool cfun_pushed = false;
2495 struct function *fun = DECL_STRUCT_FUNCTION (node->decl);
2496 if (create_parameter_descriptors (node, &param_descriptions))
2498 push_cfun (fun);
2499 cfun_pushed = true;
2500 final_bbs = BITMAP_ALLOC (NULL);
2501 bb_dereferences = XCNEWVEC (HOST_WIDE_INT,
2502 by_ref_count
2503 * last_basic_block_for_fn (fun));
2504 aa_walking_limit = opt_for_fn (node->decl, param_ipa_max_aa_steps);
2505 scan_function (node, fun);
2507 if (dump_file)
2509 dump_gensum_param_descriptors (dump_file, node->decl,
2510 &param_descriptions);
2511 fprintf (dump_file, "----------------------------------------\n");
2514 process_scan_results (node, fun, ifs, &param_descriptions);
2516 if (cfun_pushed)
2517 pop_cfun ();
2518 if (bb_dereferences)
2520 free (bb_dereferences);
2521 bb_dereferences = NULL;
2522 BITMAP_FREE (final_bbs);
2523 final_bbs = NULL;
2526 isra_analyze_all_outgoing_calls (node);
2528 delete decl2desc;
2529 decl2desc = NULL;
2530 obstack_free (&gensum_obstack, NULL);
2531 if (dump_file)
2532 fprintf (dump_file, "\n\n");
2533 if (flag_checking)
2534 verify_splitting_accesses (node, false);
2535 return;
2538 /* Intraprocedural part of IPA-SRA analysis. Scan bodies of all functions in
2539 this compilation unit and create summary structures describing IPA-SRA
2540 opportunities and constraints in them. */
2542 static void
2543 ipa_sra_generate_summary (void)
2545 struct cgraph_node *node;
2547 gcc_checking_assert (!func_sums);
2548 gcc_checking_assert (!call_sums);
2549 func_sums
2550 = (new (ggc_alloc_no_dtor <ipa_sra_function_summaries> ())
2551 ipa_sra_function_summaries (symtab, true));
2552 call_sums = new ipa_sra_call_summaries (symtab);
2554 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
2555 ipa_sra_summarize_function (node);
2556 return;
2559 /* Write intraprocedural analysis information about E and all of its outgoing
2560 edges into a stream for LTO WPA. */
2562 static void
2563 isra_write_edge_summary (output_block *ob, cgraph_edge *e)
2565 isra_call_summary *csum = call_sums->get (e);
2566 unsigned input_count = csum->m_arg_flow.length ();
2567 streamer_write_uhwi (ob, input_count);
2568 for (unsigned i = 0; i < input_count; i++)
2570 isra_param_flow *ipf = &csum->m_arg_flow[i];
2571 streamer_write_hwi (ob, ipf->length);
2572 bitpack_d bp = bitpack_create (ob->main_stream);
2573 for (int j = 0; j < ipf->length; j++)
2574 bp_pack_value (&bp, ipf->inputs[j], 8);
2575 bp_pack_value (&bp, ipf->aggregate_pass_through, 1);
2576 bp_pack_value (&bp, ipf->pointer_pass_through, 1);
2577 bp_pack_value (&bp, ipf->safe_to_import_accesses, 1);
2578 streamer_write_bitpack (&bp);
2579 streamer_write_uhwi (ob, ipf->unit_offset);
2580 streamer_write_uhwi (ob, ipf->unit_size);
2582 bitpack_d bp = bitpack_create (ob->main_stream);
2583 bp_pack_value (&bp, csum->m_return_ignored, 1);
2584 bp_pack_value (&bp, csum->m_return_returned, 1);
2585 bp_pack_value (&bp, csum->m_bit_aligned_arg, 1);
2586 streamer_write_bitpack (&bp);
2589 /* Write intraprocedural analysis information about NODE and all of its outgoing
2590 edges into a stream for LTO WPA. */
2592 static void
2593 isra_write_node_summary (output_block *ob, cgraph_node *node)
2595 isra_func_summary *ifs = func_sums->get (node);
2596 lto_symtab_encoder_t encoder = ob->decl_state->symtab_node_encoder;
2597 int node_ref = lto_symtab_encoder_encode (encoder, node);
2598 streamer_write_uhwi (ob, node_ref);
2600 unsigned param_desc_count = vec_safe_length (ifs->m_parameters);
2601 streamer_write_uhwi (ob, param_desc_count);
2602 for (unsigned i = 0; i < param_desc_count; i++)
2604 isra_param_desc *desc = &(*ifs->m_parameters)[i];
2605 unsigned access_count = vec_safe_length (desc->accesses);
2606 streamer_write_uhwi (ob, access_count);
2607 for (unsigned j = 0; j < access_count; j++)
2609 param_access *acc = (*desc->accesses)[j];
2610 stream_write_tree (ob, acc->type, true);
2611 stream_write_tree (ob, acc->alias_ptr_type, true);
2612 streamer_write_uhwi (ob, acc->unit_offset);
2613 streamer_write_uhwi (ob, acc->unit_size);
2614 bitpack_d bp = bitpack_create (ob->main_stream);
2615 bp_pack_value (&bp, acc->certain, 1);
2616 streamer_write_bitpack (&bp);
2618 streamer_write_uhwi (ob, desc->param_size_limit);
2619 streamer_write_uhwi (ob, desc->size_reached);
2620 bitpack_d bp = bitpack_create (ob->main_stream);
2621 bp_pack_value (&bp, desc->locally_unused, 1);
2622 bp_pack_value (&bp, desc->split_candidate, 1);
2623 bp_pack_value (&bp, desc->by_ref, 1);
2624 streamer_write_bitpack (&bp);
2626 bitpack_d bp = bitpack_create (ob->main_stream);
2627 bp_pack_value (&bp, ifs->m_candidate, 1);
2628 bp_pack_value (&bp, ifs->m_returns_value, 1);
2629 bp_pack_value (&bp, ifs->m_return_ignored, 1);
2630 gcc_assert (!ifs->m_queued);
2631 streamer_write_bitpack (&bp);
2633 for (cgraph_edge *e = node->callees; e; e = e->next_callee)
2634 isra_write_edge_summary (ob, e);
2635 for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee)
2636 isra_write_edge_summary (ob, e);
2639 /* Write intraprocedural analysis information into a stream for LTO WPA. */
2641 static void
2642 ipa_sra_write_summary (void)
2644 if (!func_sums || !call_sums)
2645 return;
2647 struct output_block *ob = create_output_block (LTO_section_ipa_sra);
2648 lto_symtab_encoder_t encoder = ob->decl_state->symtab_node_encoder;
2649 ob->symbol = NULL;
2651 unsigned int count = 0;
2652 lto_symtab_encoder_iterator lsei;
2653 for (lsei = lsei_start_function_in_partition (encoder);
2654 !lsei_end_p (lsei);
2655 lsei_next_function_in_partition (&lsei))
2657 cgraph_node *node = lsei_cgraph_node (lsei);
2658 if (node->has_gimple_body_p ()
2659 && func_sums->get (node) != NULL)
2660 count++;
2662 streamer_write_uhwi (ob, count);
2664 /* Process all of the functions. */
2665 for (lsei = lsei_start_function_in_partition (encoder); !lsei_end_p (lsei);
2666 lsei_next_function_in_partition (&lsei))
2668 cgraph_node *node = lsei_cgraph_node (lsei);
2669 if (node->has_gimple_body_p ()
2670 && func_sums->get (node) != NULL)
2671 isra_write_node_summary (ob, node);
2673 streamer_write_char_stream (ob->main_stream, 0);
2674 produce_asm (ob, NULL);
2675 destroy_output_block (ob);
2678 /* Read intraprocedural analysis information about E and all of its outgoing
2679 edges into a stream for LTO WPA. */
2681 static void
2682 isra_read_edge_summary (struct lto_input_block *ib, cgraph_edge *cs)
2684 isra_call_summary *csum = call_sums->get_create (cs);
2685 unsigned input_count = streamer_read_uhwi (ib);
2686 csum->init_inputs (input_count);
2687 for (unsigned i = 0; i < input_count; i++)
2689 isra_param_flow *ipf = &csum->m_arg_flow[i];
2690 ipf->length = streamer_read_hwi (ib);
2691 bitpack_d bp = streamer_read_bitpack (ib);
2692 for (int j = 0; j < ipf->length; j++)
2693 ipf->inputs[j] = bp_unpack_value (&bp, 8);
2694 ipf->aggregate_pass_through = bp_unpack_value (&bp, 1);
2695 ipf->pointer_pass_through = bp_unpack_value (&bp, 1);
2696 ipf->safe_to_import_accesses = bp_unpack_value (&bp, 1);
2697 ipf->unit_offset = streamer_read_uhwi (ib);
2698 ipf->unit_size = streamer_read_uhwi (ib);
2700 bitpack_d bp = streamer_read_bitpack (ib);
2701 csum->m_return_ignored = bp_unpack_value (&bp, 1);
2702 csum->m_return_returned = bp_unpack_value (&bp, 1);
2703 csum->m_bit_aligned_arg = bp_unpack_value (&bp, 1);
2706 /* Read intraprocedural analysis information about NODE and all of its outgoing
2707 edges into a stream for LTO WPA. */
2709 static void
2710 isra_read_node_info (struct lto_input_block *ib, cgraph_node *node,
2711 struct data_in *data_in)
2713 isra_func_summary *ifs = func_sums->get_create (node);
2714 unsigned param_desc_count = streamer_read_uhwi (ib);
2715 if (param_desc_count > 0)
2717 vec_safe_reserve_exact (ifs->m_parameters, param_desc_count);
2718 ifs->m_parameters->quick_grow_cleared (param_desc_count);
2720 for (unsigned i = 0; i < param_desc_count; i++)
2722 isra_param_desc *desc = &(*ifs->m_parameters)[i];
2723 unsigned access_count = streamer_read_uhwi (ib);
2724 for (unsigned j = 0; j < access_count; j++)
2726 param_access *acc = ggc_cleared_alloc<param_access> ();
2727 acc->type = stream_read_tree (ib, data_in);
2728 acc->alias_ptr_type = stream_read_tree (ib, data_in);
2729 acc->unit_offset = streamer_read_uhwi (ib);
2730 acc->unit_size = streamer_read_uhwi (ib);
2731 bitpack_d bp = streamer_read_bitpack (ib);
2732 acc->certain = bp_unpack_value (&bp, 1);
2733 vec_safe_push (desc->accesses, acc);
2735 desc->param_size_limit = streamer_read_uhwi (ib);
2736 desc->size_reached = streamer_read_uhwi (ib);
2737 bitpack_d bp = streamer_read_bitpack (ib);
2738 desc->locally_unused = bp_unpack_value (&bp, 1);
2739 desc->split_candidate = bp_unpack_value (&bp, 1);
2740 desc->by_ref = bp_unpack_value (&bp, 1);
2742 bitpack_d bp = streamer_read_bitpack (ib);
2743 ifs->m_candidate = bp_unpack_value (&bp, 1);
2744 ifs->m_returns_value = bp_unpack_value (&bp, 1);
2745 ifs->m_return_ignored = bp_unpack_value (&bp, 1);
2746 ifs->m_queued = 0;
2748 for (cgraph_edge *e = node->callees; e; e = e->next_callee)
2749 isra_read_edge_summary (ib, e);
2750 for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee)
2751 isra_read_edge_summary (ib, e);
2754 /* Read IPA-SRA summaries from a section in file FILE_DATA of length LEN with
2755 data DATA. TODO: This function was copied almost verbatim from ipa-prop.c,
2756 it should be possible to unify them somehow. */
2758 static void
2759 isra_read_summary_section (struct lto_file_decl_data *file_data,
2760 const char *data, size_t len)
2762 const struct lto_function_header *header =
2763 (const struct lto_function_header *) data;
2764 const int cfg_offset = sizeof (struct lto_function_header);
2765 const int main_offset = cfg_offset + header->cfg_size;
2766 const int string_offset = main_offset + header->main_size;
2767 struct data_in *data_in;
2768 unsigned int i;
2769 unsigned int count;
2771 lto_input_block ib_main ((const char *) data + main_offset,
2772 header->main_size, file_data->mode_table);
2774 data_in =
2775 lto_data_in_create (file_data, (const char *) data + string_offset,
2776 header->string_size, vNULL);
2777 count = streamer_read_uhwi (&ib_main);
2779 for (i = 0; i < count; i++)
2781 unsigned int index;
2782 struct cgraph_node *node;
2783 lto_symtab_encoder_t encoder;
2785 index = streamer_read_uhwi (&ib_main);
2786 encoder = file_data->symtab_node_encoder;
2787 node = dyn_cast<cgraph_node *> (lto_symtab_encoder_deref (encoder,
2788 index));
2789 gcc_assert (node->definition);
2790 isra_read_node_info (&ib_main, node, data_in);
2792 lto_free_section_data (file_data, LTO_section_ipa_sra, NULL, data,
2793 len);
2794 lto_data_in_delete (data_in);
2797 /* Read intraprocedural analysis information into a stream for LTO WPA. */
2799 static void
2800 ipa_sra_read_summary (void)
2802 struct lto_file_decl_data **file_data_vec = lto_get_file_decl_data ();
2803 struct lto_file_decl_data *file_data;
2804 unsigned int j = 0;
2806 gcc_checking_assert (!func_sums);
2807 gcc_checking_assert (!call_sums);
2808 func_sums
2809 = (new (ggc_alloc_no_dtor <ipa_sra_function_summaries> ())
2810 ipa_sra_function_summaries (symtab, true));
2811 call_sums = new ipa_sra_call_summaries (symtab);
2813 while ((file_data = file_data_vec[j++]))
2815 size_t len;
2816 const char *data
2817 = lto_get_summary_section_data (file_data, LTO_section_ipa_sra, &len);
2818 if (data)
2819 isra_read_summary_section (file_data, data, len);
2823 /* Dump all IPA-SRA summary data for all cgraph nodes and edges to file F. */
2825 static void
2826 ipa_sra_dump_all_summaries (FILE *f)
2828 cgraph_node *node;
2829 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
2831 fprintf (f, "\nSummary for node %s:\n", node->dump_name ());
2833 isra_func_summary *ifs = func_sums->get (node);
2834 if (!ifs)
2836 fprintf (f, " Function does not have any associated IPA-SRA "
2837 "summary\n");
2838 continue;
2840 if (!ifs->m_candidate)
2842 fprintf (f, " Not a candidate function\n");
2843 continue;
2845 if (ifs->m_returns_value)
2846 fprintf (f, " Returns value\n");
2847 if (vec_safe_is_empty (ifs->m_parameters))
2848 fprintf (f, " No parameter information. \n");
2849 else
2850 for (unsigned i = 0; i < ifs->m_parameters->length (); ++i)
2852 fprintf (f, " Descriptor for parameter %i:\n", i);
2853 dump_isra_param_descriptor (f, &(*ifs->m_parameters)[i]);
2855 fprintf (f, "\n");
2857 struct cgraph_edge *cs;
2858 for (cs = node->callees; cs; cs = cs->next_callee)
2860 fprintf (f, " Summary for edge %s->%s:\n", cs->caller->dump_name (),
2861 cs->callee->dump_name ());
2862 isra_call_summary *csum = call_sums->get (cs);
2863 if (csum)
2864 csum->dump (f);
2865 else
2866 fprintf (f, " Call summary is MISSING!\n");
2870 fprintf (f, "\n\n");
2873 /* Perform function-scope viability tests that can be only made at IPA level
2874 and return false if the function is deemed unsuitable for IPA-SRA. */
2876 static bool
2877 ipa_sra_ipa_function_checks (cgraph_node *node)
2879 if (!node->can_be_local_p ())
2881 if (dump_file)
2882 fprintf (dump_file, "Function %s disqualified because it cannot be "
2883 "made local.\n", node->dump_name ());
2884 return false;
2886 if (!node->can_change_signature)
2888 if (dump_file)
2889 fprintf (dump_file, "Function can not change signature.\n");
2890 return false;
2893 return true;
2896 /* Issues found out by check_callers_for_issues. */
2898 struct caller_issues
2900 /* There is a thunk among callers. */
2901 bool thunk;
2902 /* Call site with no available information. */
2903 bool unknown_callsite;
2904 /* There is a bit-aligned load into one of non-gimple-typed arguments. */
2905 bool bit_aligned_aggregate_argument;
2908 /* Worker for call_for_symbol_and_aliases, set any flags of passed caller_issues
2909 that apply. */
2911 static bool
2912 check_for_caller_issues (struct cgraph_node *node, void *data)
2914 struct caller_issues *issues = (struct caller_issues *) data;
2916 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
2918 if (cs->caller->thunk.thunk_p)
2920 issues->thunk = true;
2921 /* TODO: We should be able to process at least some types of
2922 thunks. */
2923 return true;
2926 isra_call_summary *csum = call_sums->get (cs);
2927 if (!csum)
2929 issues->unknown_callsite = true;
2930 return true;
2933 if (csum->m_bit_aligned_arg)
2934 issues->bit_aligned_aggregate_argument = true;
2936 return false;
2939 /* Look at all incoming edges to NODE, including aliases and thunks and look
2940 for problems. Return true if NODE type should not be modified at all. */
2942 static bool
2943 check_all_callers_for_issues (cgraph_node *node)
2945 struct caller_issues issues;
2946 memset (&issues, 0, sizeof (issues));
2948 node->call_for_symbol_and_aliases (check_for_caller_issues, &issues, true);
2949 if (issues.unknown_callsite)
2951 if (dump_file && (dump_flags & TDF_DETAILS))
2952 fprintf (dump_file, "A call of %s has not been analyzed. Disabling "
2953 "all modifications.\n", node->dump_name ());
2954 return true;
2956 /* TODO: We should be able to process at least some types of thunks. */
2957 if (issues.thunk)
2959 if (dump_file && (dump_flags & TDF_DETAILS))
2960 fprintf (dump_file, "A call of %s is through thunk, which are not"
2961 " handled yet. Disabling all modifications.\n",
2962 node->dump_name ());
2963 return true;
2966 if (issues.bit_aligned_aggregate_argument)
2968 /* Let's only remove parameters/return values from such functions.
2969 TODO: We could only prevent splitting the problematic parameters if
2970 anybody thinks it is worth it. */
2971 if (dump_file && (dump_flags & TDF_DETAILS))
2972 fprintf (dump_file, "A call of %s has bit-aligned aggregate argument,"
2973 " disabling parameter splitting.\n", node->dump_name ());
2975 isra_func_summary *ifs = func_sums->get (node);
2976 gcc_checking_assert (ifs);
2977 unsigned param_count = vec_safe_length (ifs->m_parameters);
2978 for (unsigned i = 0; i < param_count; i++)
2979 (*ifs->m_parameters)[i].split_candidate = false;
2981 return false;
2984 /* Find the access with corresponding OFFSET and SIZE among accesses in
2985 PARAM_DESC and return it or NULL if such an access is not there. */
2987 static param_access *
2988 find_param_access (isra_param_desc *param_desc, unsigned offset, unsigned size)
2990 unsigned pclen = vec_safe_length (param_desc->accesses);
2992 /* The search is linear but the number of stored accesses is bound by
2993 PARAM_IPA_SRA_MAX_REPLACEMENTS, so most probably 8. */
2995 for (unsigned i = 0; i < pclen; i++)
2996 if ((*param_desc->accesses)[i]->unit_offset == offset
2997 && (*param_desc->accesses)[i]->unit_size == size)
2998 return (*param_desc->accesses)[i];
3000 return NULL;
3003 /* Return iff the total size of definite replacement SIZE would violate the
3004 limit set for it in PARAM. */
3006 static bool
3007 size_would_violate_limit_p (isra_param_desc *desc, unsigned size)
3009 unsigned limit = desc->param_size_limit;
3010 if (size > limit
3011 || (!desc->by_ref && size == limit))
3012 return true;
3013 return false;
3016 /* Increase reached size of DESC by SIZE or disqualify it if it would violate
3017 the set limit. IDX is the parameter number which is dumped when
3018 disqualifying. */
3020 static void
3021 bump_reached_size (isra_param_desc *desc, unsigned size, unsigned idx)
3023 unsigned after = desc->size_reached + size;
3024 if (size_would_violate_limit_p (desc, after))
3026 if (dump_file && (dump_flags & TDF_DETAILS))
3027 fprintf (dump_file, " ...size limit reached, disqualifying "
3028 "candidate parameter %u\n", idx);
3029 desc->split_candidate = false;
3030 return;
3032 desc->size_reached = after;
3035 /* Take all actions required to deal with an edge CS that represents a call to
3036 an unknown or un-analyzed function, for both parameter removal and
3037 splitting. */
3039 static void
3040 process_edge_to_unknown_caller (cgraph_edge *cs)
3042 isra_func_summary *from_ifs = func_sums->get (cs->caller);
3043 gcc_checking_assert (from_ifs);
3044 isra_call_summary *csum = call_sums->get (cs);
3046 if (dump_file && (dump_flags & TDF_DETAILS))
3047 fprintf (dump_file, "Processing an edge to an unknown caller from %s:\n",
3048 cs->caller->dump_name ());
3050 unsigned args_count = csum->m_arg_flow.length ();
3051 for (unsigned i = 0; i < args_count; i++)
3053 isra_param_flow *ipf = &csum->m_arg_flow[i];
3055 if (ipf->pointer_pass_through)
3057 isra_param_desc *param_desc
3058 = &(*from_ifs->m_parameters)[get_single_param_flow_source (ipf)];
3059 param_desc->locally_unused = false;
3060 param_desc->split_candidate = false;
3061 continue;
3063 if (ipf->aggregate_pass_through)
3065 unsigned idx = get_single_param_flow_source (ipf);
3066 isra_param_desc *param_desc = &(*from_ifs->m_parameters)[idx];
3068 param_desc->locally_unused = false;
3069 if (!param_desc->split_candidate)
3070 continue;
3071 gcc_assert (!param_desc->by_ref);
3072 param_access *pacc = find_param_access (param_desc, ipf->unit_offset,
3073 ipf->unit_size);
3074 gcc_checking_assert (pacc);
3075 pacc->certain = true;
3076 if (overlapping_certain_accesses_p (param_desc, NULL))
3078 if (dump_file && (dump_flags & TDF_DETAILS))
3079 fprintf (dump_file, " ...leading to overlap, "
3080 " disqualifying candidate parameter %u\n",
3081 idx);
3082 param_desc->split_candidate = false;
3084 else
3085 bump_reached_size (param_desc, pacc->unit_size, idx);
3086 ipf->aggregate_pass_through = false;
3087 continue;
3090 for (int j = 0; j < ipf->length; j++)
3092 int input_idx = ipf->inputs[j];
3093 (*from_ifs->m_parameters)[input_idx].locally_unused = false;
3098 /* Propagate parameter removal information through cross-SCC edge CS,
3099 i.e. decrease the use count in the caller parameter descriptor for each use
3100 in this call. */
3102 static void
3103 param_removal_cross_scc_edge (cgraph_edge *cs)
3105 enum availability availability;
3106 cgraph_node *callee = cs->callee->function_symbol (&availability);
3107 isra_func_summary *to_ifs = func_sums->get (callee);
3108 if (!to_ifs || !to_ifs->m_candidate
3109 || (availability < AVAIL_AVAILABLE)
3110 || vec_safe_is_empty (to_ifs->m_parameters))
3112 process_edge_to_unknown_caller (cs);
3113 return;
3115 isra_func_summary *from_ifs = func_sums->get (cs->caller);
3116 gcc_checking_assert (from_ifs);
3118 isra_call_summary *csum = call_sums->get (cs);
3119 unsigned args_count = csum->m_arg_flow.length ();
3120 unsigned param_count = vec_safe_length (to_ifs->m_parameters);
3122 for (unsigned i = 0; i < args_count; i++)
3124 bool unused_in_callee;
3125 if (i < param_count)
3126 unused_in_callee = (*to_ifs->m_parameters)[i].locally_unused;
3127 else
3128 unused_in_callee = false;
3130 if (!unused_in_callee)
3132 isra_param_flow *ipf = &csum->m_arg_flow[i];
3133 for (int j = 0; j < ipf->length; j++)
3135 int input_idx = ipf->inputs[j];
3136 (*from_ifs->m_parameters)[input_idx].locally_unused = false;
3142 /* Unless it is already there, push NODE which is also described by IFS to
3143 STACK. */
3145 static void
3146 isra_push_node_to_stack (cgraph_node *node, isra_func_summary *ifs,
3147 vec<cgraph_node *> *stack)
3149 if (!ifs->m_queued)
3151 ifs->m_queued = true;
3152 stack->safe_push (node);
3156 /* If parameter with index INPUT_IDX is marked as locally unused, mark it as
3157 used and push CALLER on STACK. */
3159 static void
3160 isra_mark_caller_param_used (isra_func_summary *from_ifs, int input_idx,
3161 cgraph_node *caller, vec<cgraph_node *> *stack)
3163 if ((*from_ifs->m_parameters)[input_idx].locally_unused)
3165 (*from_ifs->m_parameters)[input_idx].locally_unused = false;
3166 isra_push_node_to_stack (caller, from_ifs, stack);
3171 /* Propagate information that any parameter is not used only locally within a
3172 SCC across CS to the caller, which must be in the same SCC as the
3173 callee. Push any callers that need to be re-processed to STACK. */
3175 static void
3176 propagate_used_across_scc_edge (cgraph_edge *cs, vec<cgraph_node *> *stack)
3178 isra_func_summary *from_ifs = func_sums->get (cs->caller);
3179 if (!from_ifs || vec_safe_is_empty (from_ifs->m_parameters))
3180 return;
3182 isra_call_summary *csum = call_sums->get (cs);
3183 gcc_checking_assert (csum);
3184 unsigned args_count = csum->m_arg_flow.length ();
3185 enum availability availability;
3186 cgraph_node *callee = cs->callee->function_symbol (&availability);
3187 isra_func_summary *to_ifs = func_sums->get (callee);
3189 unsigned param_count
3190 = (to_ifs && (availability >= AVAIL_AVAILABLE))
3191 ? vec_safe_length (to_ifs->m_parameters) : 0;
3192 for (unsigned i = 0; i < args_count; i++)
3194 if (i < param_count
3195 && (*to_ifs->m_parameters)[i].locally_unused)
3196 continue;
3198 /* The argument is needed in the callee it, we must mark the parameter as
3199 used also in the caller and its callers within this SCC. */
3200 isra_param_flow *ipf = &csum->m_arg_flow[i];
3201 for (int j = 0; j < ipf->length; j++)
3203 int input_idx = ipf->inputs[j];
3204 isra_mark_caller_param_used (from_ifs, input_idx, cs->caller, stack);
3209 /* Propagate information that any parameter is not used only locally within a
3210 SCC (i.e. is used also elsewhere) to all callers of NODE that are in the
3211 same SCC. Push any callers that need to be re-processed to STACK. */
3213 static bool
3214 propagate_used_to_scc_callers (cgraph_node *node, void *data)
3216 vec<cgraph_node *> *stack = (vec<cgraph_node *> *) data;
3217 cgraph_edge *cs;
3218 for (cs = node->callers; cs; cs = cs->next_caller)
3219 if (ipa_edge_within_scc (cs))
3220 propagate_used_across_scc_edge (cs, stack);
3221 return false;
3224 /* Return true iff all certain accesses in ARG_DESC are also present as
3225 certain accesses in PARAM_DESC. */
3227 static bool
3228 all_callee_accesses_present_p (isra_param_desc *param_desc,
3229 isra_param_desc *arg_desc)
3231 unsigned aclen = vec_safe_length (arg_desc->accesses);
3232 for (unsigned j = 0; j < aclen; j++)
3234 param_access *argacc = (*arg_desc->accesses)[j];
3235 if (!argacc->certain)
3236 continue;
3237 param_access *pacc = find_param_access (param_desc, argacc->unit_offset,
3238 argacc->unit_size);
3239 if (!pacc || !pacc->certain)
3240 return false;
3242 return true;
3245 /* Type internal to function pull_accesses_from_callee. Unfortunately gcc 4.8
3246 does not allow instantiating an auto_vec with a type defined within a
3247 function so it is a global type. */
3248 enum acc_prop_kind {ACC_PROP_DONT, ACC_PROP_COPY, ACC_PROP_CERTAIN};
3251 /* Attempt to propagate all definite accesses from ARG_DESC to PARAM_DESC,
3252 (which belongs to CALLER) if they would not violate some constraint there.
3253 If successful, return NULL, otherwise return the string reason for failure
3254 (which can be written to the dump file). DELTA_OFFSET is the known offset
3255 of the actual argument withing the formal parameter (so of ARG_DESCS within
3256 PARAM_DESCS), ARG_SIZE is the size of the actual argument or zero, if not
3257 known. In case of success, set *CHANGE_P to true if propagation actually
3258 changed anything. */
3260 static const char *
3261 pull_accesses_from_callee (cgraph_node *caller, isra_param_desc *param_desc,
3262 isra_param_desc *arg_desc,
3263 unsigned delta_offset, unsigned arg_size,
3264 bool *change_p)
3266 unsigned pclen = vec_safe_length (param_desc->accesses);
3267 unsigned aclen = vec_safe_length (arg_desc->accesses);
3268 unsigned prop_count = 0;
3269 unsigned prop_size = 0;
3270 bool change = false;
3272 auto_vec <enum acc_prop_kind, 8> prop_kinds (aclen);
3273 for (unsigned j = 0; j < aclen; j++)
3275 param_access *argacc = (*arg_desc->accesses)[j];
3276 prop_kinds.safe_push (ACC_PROP_DONT);
3278 if (arg_size > 0
3279 && argacc->unit_offset + argacc->unit_size > arg_size)
3280 return "callee access outsize size boundary";
3282 if (!argacc->certain)
3283 continue;
3285 unsigned offset = argacc->unit_offset + delta_offset;
3286 /* Given that accesses are initially stored according to increasing
3287 offset and decreasing size in case of equal offsets, the following
3288 searches could be written more efficiently if we kept the ordering
3289 when copying. But the number of accesses is capped at
3290 PARAM_IPA_SRA_MAX_REPLACEMENTS (so most likely 8) and the code gets
3291 messy quickly, so let's improve on that only if necessary. */
3293 bool exact_match = false;
3294 for (unsigned i = 0; i < pclen; i++)
3296 /* Check for overlaps. */
3297 param_access *pacc = (*param_desc->accesses)[i];
3298 if (pacc->unit_offset == offset
3299 && pacc->unit_size == argacc->unit_size)
3301 if (argacc->alias_ptr_type != pacc->alias_ptr_type
3302 || !types_compatible_p (argacc->type, pacc->type))
3303 return "propagated access types would not match existing ones";
3305 exact_match = true;
3306 if (!pacc->certain)
3308 prop_kinds[j] = ACC_PROP_CERTAIN;
3309 prop_size += argacc->unit_size;
3310 change = true;
3312 continue;
3315 if (offset < pacc->unit_offset + pacc->unit_size
3316 && offset + argacc->unit_size > pacc->unit_offset)
3318 /* None permissible with load accesses, possible to fit into
3319 argument ones. */
3320 if (pacc->certain
3321 || offset < pacc->unit_offset
3322 || (offset + argacc->unit_size
3323 > pacc->unit_offset + pacc->unit_size))
3324 return "a propagated access would conflict in caller";
3328 if (!exact_match)
3330 prop_kinds[j] = ACC_PROP_COPY;
3331 prop_count++;
3332 prop_size += argacc->unit_size;
3333 change = true;
3337 if (!change)
3338 return NULL;
3340 if ((prop_count + pclen
3341 > (unsigned) opt_for_fn (caller->decl, param_ipa_sra_max_replacements))
3342 || size_would_violate_limit_p (param_desc,
3343 param_desc->size_reached + prop_size))
3344 return "propagating accesses would violate the count or size limit";
3346 *change_p = true;
3347 for (unsigned j = 0; j < aclen; j++)
3349 if (prop_kinds[j] == ACC_PROP_COPY)
3351 param_access *argacc = (*arg_desc->accesses)[j];
3353 param_access *copy = ggc_cleared_alloc<param_access> ();
3354 copy->unit_offset = argacc->unit_offset + delta_offset;
3355 copy->unit_size = argacc->unit_size;
3356 copy->type = argacc->type;
3357 copy->alias_ptr_type = argacc->alias_ptr_type;
3358 copy->certain = true;
3359 vec_safe_push (param_desc->accesses, copy);
3361 else if (prop_kinds[j] == ACC_PROP_CERTAIN)
3363 param_access *argacc = (*arg_desc->accesses)[j];
3364 param_access *csp
3365 = find_param_access (param_desc, argacc->unit_offset + delta_offset,
3366 argacc->unit_size);
3367 csp->certain = true;
3371 param_desc->size_reached += prop_size;
3373 return NULL;
3376 /* Propagate parameter splitting information through call graph edge CS.
3377 Return true if any changes that might need to be propagated within SCCs have
3378 been made. The function also clears the aggregate_pass_through and
3379 pointer_pass_through in call summaries which do not need to be processed
3380 again if this CS is revisited when iterating while changes are propagated
3381 within an SCC. */
3383 static bool
3384 param_splitting_across_edge (cgraph_edge *cs)
3386 bool res = false;
3387 bool cross_scc = !ipa_edge_within_scc (cs);
3388 enum availability availability;
3389 cgraph_node *callee = cs->callee->function_symbol (&availability);
3390 isra_func_summary *from_ifs = func_sums->get (cs->caller);
3391 gcc_checking_assert (from_ifs && from_ifs->m_parameters);
3393 isra_call_summary *csum = call_sums->get (cs);
3394 gcc_checking_assert (csum);
3395 unsigned args_count = csum->m_arg_flow.length ();
3396 isra_func_summary *to_ifs = func_sums->get (callee);
3397 unsigned param_count
3398 = ((to_ifs && to_ifs->m_candidate && (availability >= AVAIL_AVAILABLE))
3399 ? vec_safe_length (to_ifs->m_parameters)
3400 : 0);
3402 if (dump_file && (dump_flags & TDF_DETAILS))
3403 fprintf (dump_file, "Splitting across %s->%s:\n",
3404 cs->caller->dump_name (), callee->dump_name ());
3406 unsigned i;
3407 for (i = 0; (i < args_count) && (i < param_count); i++)
3409 isra_param_desc *arg_desc = &(*to_ifs->m_parameters)[i];
3410 isra_param_flow *ipf = &csum->m_arg_flow[i];
3412 if (arg_desc->locally_unused)
3414 if (dump_file && (dump_flags & TDF_DETAILS))
3415 fprintf (dump_file, " ->%u: unused in callee\n", i);
3416 ipf->pointer_pass_through = false;
3417 continue;
3420 if (ipf->pointer_pass_through)
3422 int idx = get_single_param_flow_source (ipf);
3423 isra_param_desc *param_desc = &(*from_ifs->m_parameters)[idx];
3424 if (!param_desc->split_candidate)
3425 continue;
3426 gcc_assert (param_desc->by_ref);
3428 if (!arg_desc->split_candidate || !arg_desc->by_ref)
3430 if (dump_file && (dump_flags & TDF_DETAILS))
3431 fprintf (dump_file, " %u->%u: not candidate or not by "
3432 "reference in callee\n", idx, i);
3433 param_desc->split_candidate = false;
3434 ipf->pointer_pass_through = false;
3435 res = true;
3437 else if (!ipf->safe_to_import_accesses)
3439 if (!all_callee_accesses_present_p (param_desc, arg_desc))
3441 if (dump_file && (dump_flags & TDF_DETAILS))
3442 fprintf (dump_file, " %u->%u: cannot import accesses.\n",
3443 idx, i);
3444 param_desc->split_candidate = false;
3445 ipf->pointer_pass_through = false;
3446 res = true;
3449 else
3451 if (dump_file && (dump_flags & TDF_DETAILS))
3452 fprintf (dump_file, " %u->%u: verified callee accesses "
3453 "present.\n", idx, i);
3454 if (cross_scc)
3455 ipf->pointer_pass_through = false;
3458 else
3460 const char *pull_failure
3461 = pull_accesses_from_callee (cs->caller, param_desc, arg_desc,
3462 0, 0, &res);
3463 if (pull_failure)
3465 if (dump_file && (dump_flags & TDF_DETAILS))
3466 fprintf (dump_file, " %u->%u: by_ref access pull "
3467 "failed: %s.\n", idx, i, pull_failure);
3468 param_desc->split_candidate = false;
3469 ipf->pointer_pass_through = false;
3470 res = true;
3472 else
3474 if (dump_file && (dump_flags & TDF_DETAILS))
3475 fprintf (dump_file, " %u->%u: by_ref access pull "
3476 "succeeded.\n", idx, i);
3477 if (cross_scc)
3478 ipf->pointer_pass_through = false;
3482 else if (ipf->aggregate_pass_through)
3484 int idx = get_single_param_flow_source (ipf);
3485 isra_param_desc *param_desc = &(*from_ifs->m_parameters)[idx];
3486 if (!param_desc->split_candidate)
3487 continue;
3488 gcc_assert (!param_desc->by_ref);
3489 param_access *pacc = find_param_access (param_desc, ipf->unit_offset,
3490 ipf->unit_size);
3491 gcc_checking_assert (pacc);
3493 if (pacc->certain)
3495 if (dump_file && (dump_flags & TDF_DETAILS))
3496 fprintf (dump_file, " %u->%u: already certain\n", idx, i);
3497 ipf->aggregate_pass_through = false;
3499 else if (!arg_desc->split_candidate || arg_desc->by_ref)
3501 if (dump_file && (dump_flags & TDF_DETAILS))
3502 fprintf (dump_file, " %u->%u: not candidate or by "
3503 "reference in callee\n", idx, i);
3505 pacc->certain = true;
3506 if (overlapping_certain_accesses_p (param_desc, NULL))
3508 if (dump_file && (dump_flags & TDF_DETAILS))
3509 fprintf (dump_file, " ...leading to overlap, "
3510 " disqualifying candidate parameter %u\n",
3511 idx);
3512 param_desc->split_candidate = false;
3514 else
3515 bump_reached_size (param_desc, pacc->unit_size, idx);
3517 ipf->aggregate_pass_through = false;
3518 res = true;
3520 else
3522 const char *pull_failure
3523 = pull_accesses_from_callee (cs->caller, param_desc, arg_desc,
3524 ipf->unit_offset,
3525 ipf->unit_size, &res);
3526 if (pull_failure)
3528 if (dump_file && (dump_flags & TDF_DETAILS))
3529 fprintf (dump_file, " %u->%u: arg access pull "
3530 "failed: %s.\n", idx, i, pull_failure);
3532 ipf->aggregate_pass_through = false;
3533 pacc->certain = true;
3535 if (overlapping_certain_accesses_p (param_desc, NULL))
3537 if (dump_file && (dump_flags & TDF_DETAILS))
3538 fprintf (dump_file, " ...leading to overlap, "
3539 " disqualifying candidate parameter %u\n",
3540 idx);
3541 param_desc->split_candidate = false;
3543 else
3544 bump_reached_size (param_desc, pacc->unit_size, idx);
3546 res = true;
3548 else
3550 if (dump_file && (dump_flags & TDF_DETAILS))
3551 fprintf (dump_file, " %u->%u: arg access pull "
3552 "succeeded.\n", idx, i);
3553 if (cross_scc)
3554 ipf->aggregate_pass_through = false;
3560 /* Handle argument-parameter count mismatches. */
3561 for (; (i < args_count); i++)
3563 isra_param_flow *ipf = &csum->m_arg_flow[i];
3565 if (ipf->pointer_pass_through || ipf->aggregate_pass_through)
3567 int idx = get_single_param_flow_source (ipf);
3568 ipf->pointer_pass_through = false;
3569 ipf->aggregate_pass_through = false;
3570 isra_param_desc *param_desc = &(*from_ifs->m_parameters)[idx];
3571 if (!param_desc->split_candidate)
3572 continue;
3574 if (dump_file && (dump_flags & TDF_DETAILS))
3575 fprintf (dump_file, " %u->%u: no corresponding formal parameter\n",
3576 idx, i);
3577 param_desc->split_candidate = false;
3578 res = true;
3581 return res;
3584 /* Worker for call_for_symbol_and_aliases, look at all callers and if all their
3585 callers ignore the return value, or come from the same SCC and use the
3586 return value only to compute their return value, return false, otherwise
3587 return true. */
3589 static bool
3590 retval_used_p (cgraph_node *node, void *)
3592 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
3594 isra_call_summary *csum = call_sums->get (cs);
3595 gcc_checking_assert (csum);
3596 if (csum->m_return_ignored)
3597 continue;
3598 if (!csum->m_return_returned)
3599 return true;
3601 isra_func_summary *from_ifs = func_sums->get (cs->caller);
3602 if (!from_ifs || !from_ifs->m_candidate)
3603 return true;
3605 if (!ipa_edge_within_scc (cs)
3606 && !from_ifs->m_return_ignored)
3607 return true;
3610 return false;
3613 /* Push into NEW_PARAMS all required parameter adjustment entries to copy or
3614 modify parameter which originally had index BASE_INDEX, in the adjustment
3615 vector of parent clone (if any) had PREV_CLONE_INDEX and was described by
3616 PREV_ADJUSTMENT. If the parent clone is the original function,
3617 PREV_ADJUSTMENT is NULL and PREV_CLONE_INDEX is equal to BASE_INDEX. */
3620 static void
3621 push_param_adjustments_for_index (isra_func_summary *ifs, unsigned base_index,
3622 unsigned prev_clone_index,
3623 ipa_adjusted_param *prev_adjustment,
3624 vec<ipa_adjusted_param, va_gc> **new_params)
3626 isra_param_desc *desc = &(*ifs->m_parameters)[base_index];
3627 if (desc->locally_unused)
3629 if (dump_file)
3630 fprintf (dump_file, " Will remove parameter %u\n", base_index);
3631 return;
3634 if (!desc->split_candidate)
3636 ipa_adjusted_param adj;
3637 if (prev_adjustment)
3639 adj = *prev_adjustment;
3640 adj.prev_clone_adjustment = true;
3641 adj.prev_clone_index = prev_clone_index;
3643 else
3645 memset (&adj, 0, sizeof (adj));
3646 adj.op = IPA_PARAM_OP_COPY;
3647 adj.base_index = base_index;
3648 adj.prev_clone_index = prev_clone_index;
3650 vec_safe_push ((*new_params), adj);
3651 return;
3654 if (dump_file)
3655 fprintf (dump_file, " Will split parameter %u\n", base_index);
3657 gcc_assert (!prev_adjustment || prev_adjustment->op == IPA_PARAM_OP_COPY);
3658 unsigned aclen = vec_safe_length (desc->accesses);
3659 for (unsigned j = 0; j < aclen; j++)
3661 param_access *pa = (*desc->accesses)[j];
3662 if (!pa->certain)
3663 continue;
3664 if (dump_file)
3665 fprintf (dump_file, " - component at byte offset %u, "
3666 "size %u\n", pa->unit_offset, pa->unit_size);
3668 ipa_adjusted_param adj;
3669 memset (&adj, 0, sizeof (adj));
3670 adj.op = IPA_PARAM_OP_SPLIT;
3671 adj.base_index = base_index;
3672 adj.prev_clone_index = prev_clone_index;
3673 adj.param_prefix_index = IPA_PARAM_PREFIX_ISRA;
3674 adj.reverse = pa->reverse;
3675 adj.type = pa->type;
3676 adj.alias_ptr_type = pa->alias_ptr_type;
3677 adj.unit_offset = pa->unit_offset;
3678 vec_safe_push ((*new_params), adj);
3683 /* Do final processing of results of IPA propagation regarding NODE, clone it
3684 if appropriate. */
3686 static void
3687 process_isra_node_results (cgraph_node *node,
3688 hash_map<const char *, unsigned> *clone_num_suffixes)
3690 isra_func_summary *ifs = func_sums->get (node);
3691 if (!ifs || !ifs->m_candidate)
3692 return;
3694 auto_vec<bool, 16> surviving_params;
3695 bool check_surviving = false;
3696 if (node->clone.param_adjustments)
3698 check_surviving = true;
3699 node->clone.param_adjustments->get_surviving_params (&surviving_params);
3702 unsigned param_count = vec_safe_length (ifs->m_parameters);
3703 bool will_change_function = false;
3704 if (ifs->m_returns_value && ifs->m_return_ignored)
3705 will_change_function = true;
3706 else
3707 for (unsigned i = 0; i < param_count; i++)
3709 isra_param_desc *desc = &(*ifs->m_parameters)[i];
3710 if ((desc->locally_unused || desc->split_candidate)
3711 /* Make sure we do not clone just to attempt to remove an already
3712 removed unused argument. */
3713 && (!check_surviving
3714 || (i < surviving_params.length ()
3715 && surviving_params[i])))
3717 will_change_function = true;
3718 break;
3721 if (!will_change_function)
3722 return;
3724 if (dump_file)
3726 fprintf (dump_file, "\nEvaluating analysis results for %s\n",
3727 node->dump_name ());
3728 if (ifs->m_returns_value && ifs->m_return_ignored)
3729 fprintf (dump_file, " Will remove return value.\n");
3732 vec<ipa_adjusted_param, va_gc> *new_params = NULL;
3733 if (ipa_param_adjustments *old_adjustments = node->clone.param_adjustments)
3735 unsigned old_adj_len = vec_safe_length (old_adjustments->m_adj_params);
3736 for (unsigned i = 0; i < old_adj_len; i++)
3738 ipa_adjusted_param *old_adj = &(*old_adjustments->m_adj_params)[i];
3739 push_param_adjustments_for_index (ifs, old_adj->base_index, i,
3740 old_adj, &new_params);
3743 else
3744 for (unsigned i = 0; i < param_count; i++)
3745 push_param_adjustments_for_index (ifs, i, i, NULL, &new_params);
3747 ipa_param_adjustments *new_adjustments
3748 = (new (ggc_alloc <ipa_param_adjustments> ())
3749 ipa_param_adjustments (new_params, param_count,
3750 ifs->m_returns_value && ifs->m_return_ignored));
3752 if (dump_file && (dump_flags & TDF_DETAILS))
3754 fprintf (dump_file, "\n Created adjustments:\n");
3755 new_adjustments->dump (dump_file);
3758 unsigned &suffix_counter = clone_num_suffixes->get_or_insert (
3759 IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (
3760 node->decl)));
3761 vec<cgraph_edge *> callers = node->collect_callers ();
3762 cgraph_node *new_node
3763 = node->create_virtual_clone (callers, NULL, new_adjustments, "isra",
3764 suffix_counter);
3765 suffix_counter++;
3766 if (node->same_comdat_group)
3767 new_node->add_to_same_comdat_group (node);
3768 new_node->calls_comdat_local = node->calls_comdat_local;
3770 if (dump_file)
3771 fprintf (dump_file, " Created new node %s\n", new_node->dump_name ());
3772 callers.release ();
3775 /* Check which parameters of NODE described by IFS have survived until IPA-SRA
3776 and disable transformations for those which have not or which should not
3777 transformed because the associated debug counter reached its limit. Return
3778 true if none survived or if there were no candidates to begin with. */
3780 static bool
3781 disable_unavailable_parameters (cgraph_node *node, isra_func_summary *ifs)
3783 bool ret = true;
3784 unsigned len = vec_safe_length (ifs->m_parameters);
3785 if (!len)
3786 return true;
3788 auto_vec<bool, 16> surviving_params;
3789 bool check_surviving = false;
3790 if (node->clone.param_adjustments)
3792 check_surviving = true;
3793 node->clone.param_adjustments->get_surviving_params (&surviving_params);
3795 bool dumped_first = false;
3796 for (unsigned i = 0; i < len; i++)
3798 isra_param_desc *desc = &(*ifs->m_parameters)[i];
3799 if (!dbg_cnt (ipa_sra_params))
3801 desc->locally_unused = false;
3802 desc->split_candidate = false;
3804 else if (check_surviving
3805 && (i >= surviving_params.length ()
3806 || !surviving_params[i]))
3808 /* Even if the parameter was removed by a previous IPA pass, we do
3809 not clear locally_unused because if it really is unused, this
3810 information might be useful in callers. */
3811 desc->split_candidate = false;
3813 if (dump_file && (dump_flags & TDF_DETAILS))
3815 if (!dumped_first)
3817 fprintf (dump_file,
3818 "The following parameters of %s are dead on "
3819 "arrival:", node->dump_name ());
3820 dumped_first = true;
3822 fprintf (dump_file, " %u", i);
3825 else if (desc->locally_unused || desc->split_candidate)
3826 ret = false;
3829 if (dumped_first)
3830 fprintf (dump_file, "\n");
3832 return ret;
3836 /* Run the interprocedural part of IPA-SRA. */
3838 static unsigned int
3839 ipa_sra_analysis (void)
3841 if (dump_file)
3843 fprintf (dump_file, "\n========== IPA-SRA IPA stage ==========\n");
3844 ipa_sra_dump_all_summaries (dump_file);
3847 gcc_checking_assert (func_sums);
3848 gcc_checking_assert (call_sums);
3849 cgraph_node **order = XCNEWVEC (cgraph_node *, symtab->cgraph_count);
3850 auto_vec <cgraph_node *, 16> stack;
3851 int node_scc_count = ipa_reduced_postorder (order, true, NULL);
3853 /* One sweep from callees to callers for parameter removal and splitting. */
3854 for (int i = 0; i < node_scc_count; i++)
3856 cgraph_node *scc_rep = order[i];
3857 vec<cgraph_node *> cycle_nodes = ipa_get_nodes_in_cycle (scc_rep);
3858 unsigned j;
3860 /* Preliminary IPA function level checks and first step of parameter
3861 removal. */
3862 cgraph_node *v;
3863 FOR_EACH_VEC_ELT (cycle_nodes, j, v)
3865 isra_func_summary *ifs = func_sums->get (v);
3866 if (!ifs || !ifs->m_candidate)
3867 continue;
3868 if (!ipa_sra_ipa_function_checks (v)
3869 || check_all_callers_for_issues (v))
3871 ifs->zap ();
3872 continue;
3874 if (disable_unavailable_parameters (v, ifs))
3875 continue;
3876 for (cgraph_edge *cs = v->indirect_calls; cs; cs = cs->next_callee)
3877 process_edge_to_unknown_caller (cs);
3878 for (cgraph_edge *cs = v->callees; cs; cs = cs->next_callee)
3879 if (!ipa_edge_within_scc (cs))
3880 param_removal_cross_scc_edge (cs);
3883 /* Look at edges within the current SCC and propagate used-ness across
3884 them, pushing onto the stack all notes which might need to be
3885 revisited. */
3886 FOR_EACH_VEC_ELT (cycle_nodes, j, v)
3887 v->call_for_symbol_thunks_and_aliases (propagate_used_to_scc_callers,
3888 &stack, true);
3890 /* Keep revisiting and pushing until nothing changes. */
3891 while (!stack.is_empty ())
3893 cgraph_node *v = stack.pop ();
3894 isra_func_summary *ifs = func_sums->get (v);
3895 gcc_checking_assert (ifs && ifs->m_queued);
3896 ifs->m_queued = false;
3898 v->call_for_symbol_thunks_and_aliases (propagate_used_to_scc_callers,
3899 &stack, true);
3902 /* Parameter splitting. */
3903 bool repeat_scc_access_propagation;
3906 repeat_scc_access_propagation = false;
3907 FOR_EACH_VEC_ELT (cycle_nodes, j, v)
3909 isra_func_summary *ifs = func_sums->get (v);
3910 if (!ifs
3911 || !ifs->m_candidate
3912 || vec_safe_is_empty (ifs->m_parameters))
3913 continue;
3914 for (cgraph_edge *cs = v->callees; cs; cs = cs->next_callee)
3915 if (param_splitting_across_edge (cs))
3916 repeat_scc_access_propagation = true;
3919 while (repeat_scc_access_propagation);
3921 if (flag_checking)
3922 FOR_EACH_VEC_ELT (cycle_nodes, j, v)
3923 verify_splitting_accesses (v, true);
3925 cycle_nodes.release ();
3928 /* One sweep from caller to callees for result removal. */
3929 for (int i = node_scc_count - 1; i >= 0 ; i--)
3931 cgraph_node *scc_rep = order[i];
3932 vec<cgraph_node *> cycle_nodes = ipa_get_nodes_in_cycle (scc_rep);
3933 unsigned j;
3935 cgraph_node *v;
3936 FOR_EACH_VEC_ELT (cycle_nodes, j, v)
3938 isra_func_summary *ifs = func_sums->get (v);
3939 if (!ifs || !ifs->m_candidate)
3940 continue;
3942 bool return_needed
3943 = (ifs->m_returns_value
3944 && (!dbg_cnt (ipa_sra_retvalues)
3945 || v->call_for_symbol_and_aliases (retval_used_p,
3946 NULL, true)));
3947 ifs->m_return_ignored = !return_needed;
3948 if (return_needed)
3949 isra_push_node_to_stack (v, ifs, &stack);
3952 while (!stack.is_empty ())
3954 cgraph_node *node = stack.pop ();
3955 isra_func_summary *ifs = func_sums->get (node);
3956 gcc_checking_assert (ifs && ifs->m_queued);
3957 ifs->m_queued = false;
3959 for (cgraph_edge *cs = node->callees; cs; cs = cs->next_callee)
3960 if (ipa_edge_within_scc (cs)
3961 && call_sums->get (cs)->m_return_returned)
3963 enum availability av;
3964 cgraph_node *callee = cs->callee->function_symbol (&av);
3965 isra_func_summary *to_ifs = func_sums->get (callee);
3966 if (to_ifs && to_ifs->m_return_ignored)
3968 to_ifs->m_return_ignored = false;
3969 isra_push_node_to_stack (callee, to_ifs, &stack);
3973 cycle_nodes.release ();
3976 ipa_free_postorder_info ();
3977 free (order);
3979 if (dump_file)
3981 if (dump_flags & TDF_DETAILS)
3983 fprintf (dump_file, "\n========== IPA-SRA propagation final state "
3984 " ==========\n");
3985 ipa_sra_dump_all_summaries (dump_file);
3987 fprintf (dump_file, "\n========== IPA-SRA decisions ==========\n");
3990 hash_map<const char *, unsigned> *clone_num_suffixes
3991 = new hash_map<const char *, unsigned>;
3993 cgraph_node *node;
3994 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
3995 process_isra_node_results (node, clone_num_suffixes);
3997 delete clone_num_suffixes;
3998 ggc_delete (func_sums);
3999 func_sums = NULL;
4000 delete call_sums;
4001 call_sums = NULL;
4003 if (dump_file)
4004 fprintf (dump_file, "\n========== IPA SRA IPA analysis done "
4005 "==========\n\n");
4006 return 0;
4010 const pass_data pass_data_ipa_sra =
4012 IPA_PASS, /* type */
4013 "sra", /* name */
4014 OPTGROUP_NONE, /* optinfo_flags */
4015 TV_IPA_SRA, /* tv_id */
4016 0, /* properties_required */
4017 0, /* properties_provided */
4018 0, /* properties_destroyed */
4019 0, /* todo_flags_start */
4020 ( TODO_dump_symtab | TODO_remove_functions ), /* todo_flags_finish */
4023 class pass_ipa_sra : public ipa_opt_pass_d
4025 public:
4026 pass_ipa_sra (gcc::context *ctxt)
4027 : ipa_opt_pass_d (pass_data_ipa_sra, ctxt,
4028 ipa_sra_generate_summary, /* generate_summary */
4029 ipa_sra_write_summary, /* write_summary */
4030 ipa_sra_read_summary, /* read_summary */
4031 NULL , /* write_optimization_summary */
4032 NULL, /* read_optimization_summary */
4033 NULL, /* stmt_fixup */
4034 0, /* function_transform_todo_flags_start */
4035 NULL, /* function_transform */
4036 NULL) /* variable_transform */
4039 /* opt_pass methods: */
4040 virtual bool gate (function *)
4042 /* TODO: We should remove the optimize check after we ensure we never run
4043 IPA passes when not optimizing. */
4044 return (flag_ipa_sra && optimize);
4047 virtual unsigned int execute (function *) { return ipa_sra_analysis (); }
4049 }; // class pass_ipa_sra
4051 } // anon namespace
4053 ipa_opt_pass_d *
4054 make_pass_ipa_sra (gcc::context *ctxt)
4056 return new pass_ipa_sra (ctxt);
4060 #include "gt-ipa-sra.h"