Remove several xfails for 32-bit hppa*-*-*
[official-gcc.git] / gcc / ipa-sra.cc
blob14c2a344e6d82606b52be0df2114c1f08c265782
1 /* Interprocedural scalar replacement of aggregates
2 Copyright (C) 2019-2024 Free Software Foundation, Inc.
3 Contributed by Martin Jambor <mjambor@suse.cz>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* IPA-SRA is an interprocedural pass that removes unused function return
22 values (turning functions returning a value which is never used into void
23 functions) and removes unused function parameters. It can also replace an
24 aggregate parameter by a set of other parameters representing part of the
25 original, turning those passed by reference into new ones which pass the
26 value directly.
28 The pass is a true IPA one, which means that it works in three stages in
29 order to be able to take advantage of LTO. First, summaries about functions
30 and each calls are generated. Function summaries (often called call graph
31 node summaries) contain mainly information about which parameters are
32 potential transformation candidates and which bits of candidates are
33 accessed. We differentiate between accesses done as a part of a call
34 statement (which might be not necessary if the callee is also transformed)
35 and others (which are mandatory). Call summaries (often called call graph
36 edge summaries) contain information about which function formal parameters
37 feed into which actual call arguments so that if two parameters are only
38 used in a sum which is then passed to another function which then however
39 does not use this parameter, all three parameters of the two functions can
40 be eliminated. Edge summaries also have flags whether the return value is
41 used or if it is only returned in the caller too. In LTO mode these
42 summaries are then streamed to the object file in the compilation phase and
43 streamed back in in the WPA analysis stage.
45 The interprocedural analysis phase traverses the graph in topological order
46 in two sweeps, one in each direction. First, from callees to callers for
47 parameter removal and splitting. Each strongly-connected component is
48 processed iteratively until the situation in it stabilizes. The pass from
49 callers to callees is then carried out to remove unused return values in a
50 very similar fashion.
52 Because parameter manipulation has big implications for call redirection
53 which is done only after all call graph nodes materialize, the
54 transformation phase is not part of this patch but is carried out by the
55 clone materialization and edge redirection itself, see comments in
56 ipa-param-manipulation.h for more details. */
59 #include "config.h"
60 #include "system.h"
61 #include "coretypes.h"
62 #include "backend.h"
63 #include "tree.h"
64 #include "gimple.h"
65 #include "predict.h"
66 #include "tree-pass.h"
67 #include "ssa.h"
68 #include "cgraph.h"
69 #include "gimple-pretty-print.h"
70 #include "alias.h"
71 #include "tree-eh.h"
72 #include "gimple-iterator.h"
73 #include "gimple-walk.h"
74 #include "tree-dfa.h"
75 #include "tree-sra.h"
76 #include "alloc-pool.h"
77 #include "symbol-summary.h"
78 #include "dbgcnt.h"
79 #include "tree-inline.h"
80 #include "ipa-utils.h"
81 #include "builtins.h"
82 #include "cfganal.h"
83 #include "tree-streamer.h"
84 #include "internal-fn.h"
85 #include "symtab-clones.h"
86 #include "attribs.h"
87 #include "ipa-prop.h"
89 static void ipa_sra_summarize_function (cgraph_node *);
91 /* Bits used to track size of an aggregate in bytes interprocedurally. */
92 #define ISRA_ARG_SIZE_LIMIT_BITS 16
93 #define ISRA_ARG_SIZE_LIMIT (1 << ISRA_ARG_SIZE_LIMIT_BITS)
94 /* How many parameters can feed into a call actual argument and still be
95 tracked. */
96 #define IPA_SRA_MAX_PARAM_FLOW_LEN 7
98 /* Structure describing accesses to a specific portion of an aggregate
99 parameter, as given by the offset and size. Any smaller accesses that occur
100 within a function that fall within another access form a tree. The pass
101 cannot analyze parameters with only partially overlapping accesses. */
103 struct GTY(()) param_access
105 /* Type that a potential replacement should have. This field only has
106 meaning in the summary building and transformation phases, when it is
107 reconstructed from the body. Must not be touched in IPA analysis
108 stage. */
109 tree type;
111 /* Alias reference type to be used in MEM_REFs when adjusting caller
112 arguments. */
113 tree alias_ptr_type;
115 /* Values returned by get_ref_base_and_extent but converted to bytes and
116 stored as unsigned ints. */
117 unsigned unit_offset;
118 unsigned unit_size : ISRA_ARG_SIZE_LIMIT_BITS;
120 /* Set once we are sure that the access will really end up in a potentially
121 transformed function - initially not set for portions of formal parameters
122 that are only used as actual function arguments passed to callees. */
123 unsigned certain : 1;
124 /* Set if the access has reverse scalar storage order. */
125 unsigned reverse : 1;
128 /* This structure has the same purpose as the one above and additionally it
129 contains some fields that are only necessary in the summary generation
130 phase. */
132 struct gensum_param_access
134 /* Values returned by get_ref_base_and_extent. */
135 HOST_WIDE_INT offset;
136 HOST_WIDE_INT size;
138 /* if this access has any children (in terms of the definition above), this
139 points to the first one. */
140 struct gensum_param_access *first_child;
141 /* In intraprocedural SRA, pointer to the next sibling in the access tree as
142 described above. */
143 struct gensum_param_access *next_sibling;
145 /* Type that a potential replacement should have. This field only has
146 meaning in the summary building and transformation phases, when it is
147 reconstructed from the body. Must not be touched in IPA analysis
148 stage. */
149 tree type;
150 /* Alias reference type to be used in MEM_REFs when adjusting caller
151 arguments. */
152 tree alias_ptr_type;
154 /* Cumulative count of all loads. */
155 profile_count load_count;
156 /* Have there been writes to or reads from this exact location except for as
157 arguments to a function call that can be tracked. */
158 bool nonarg;
160 /* Set if the access has reverse scalar storage order. */
161 bool reverse;
164 /* Summary describing a parameter in the IPA stages. */
166 struct GTY(()) isra_param_desc
168 /* List of access representatives to the parameters, sorted according to
169 their offset. */
170 vec <param_access *, va_gc> *accesses;
172 /* Unit size limit of total size of all replacements. */
173 unsigned param_size_limit : ISRA_ARG_SIZE_LIMIT_BITS;
174 /* Sum of unit sizes of all certain replacements. */
175 unsigned size_reached : ISRA_ARG_SIZE_LIMIT_BITS;
176 /* Minimum offset that is known to be safe to dereference because of callers
177 pass pointers to DECLs of at least this size or because of dereferences in
178 callers. */
179 unsigned safe_size : ISRA_ARG_SIZE_LIMIT_BITS;
181 /* A parameter that is used only in call arguments and can be removed if all
182 concerned actual arguments are removed. */
183 unsigned locally_unused : 1;
184 /* An aggregate that is a candidate for breaking up or complete removal. */
185 unsigned split_candidate : 1;
186 /* Is this a parameter passing stuff by reference? */
187 unsigned by_ref : 1;
188 /* If set, this parameter can only be a candidate for removal if the function
189 is going to loose its return value. */
190 unsigned remove_only_when_retval_removed : 1;
191 /* If set, this parameter can only be a candidate for splitting if the
192 function is going to loose its return value. Can only be meaningfully set
193 for by_ref parameters. */
194 unsigned split_only_when_retval_removed : 1;
195 /* Parameter hint set during IPA analysis when there is a caller which does
196 not construct the argument just to pass it to calls. Only meaningful for
197 by_ref parameters. */
198 unsigned not_specially_constructed : 1;
199 /* Only meaningful for by_ref parameters. If set, this parameter can only be
200 a split candidate if all callers pass pointers that are known to point to
201 a chunk of memory large enough to contain all accesses. */
202 unsigned conditionally_dereferenceable : 1;
203 /* Set when safe_size has been updated from at least one caller. */
204 unsigned safe_size_set : 1;
207 /* Structure used when generating summaries that describes a parameter. */
209 struct gensum_param_desc
211 /* Roots of param_accesses. */
212 gensum_param_access *accesses;
213 /* Number of accesses in the access tree rooted in field accesses. */
214 unsigned access_count;
216 /* If the below is non-zero, this is the number of uses as actual
217 arguments. */
218 int call_uses;
219 /* Number of times this parameter has been directly passed to. */
220 unsigned ptr_pt_count;
222 /* Size limit of total size of all replacements. */
223 unsigned param_size_limit;
224 /* Sum of sizes of nonarg accesses. */
225 unsigned nonarg_acc_size;
227 /* A parameter that is used only in call arguments and can be removed if all
228 concerned actual arguments are removed. */
229 bool locally_unused;
230 /* An aggregate that is a candidate for breaking up or a pointer passing data
231 by reference that is a candidate for being converted to a set of
232 parameters passing those data by value. */
233 bool split_candidate;
234 /* Is this a parameter passing stuff by reference (either a pointer or a
235 source language reference type)? */
236 bool by_ref;
237 /* If this parameter passes stuff by reference, can it be safely dereferenced
238 without performing further checks (for example because it is a
239 REFERENCE_TYPE)? */
240 bool safe_ref;
241 /* If set, this parameter can only be a candidate for removal if the function
242 is going to loose its return value. */
243 bool remove_only_when_retval_removed;
244 /* If set, this parameter can only be a candidate for splitting if the
245 function is going to loose its return value. Can only be meaningfully set
246 for by_ref parameters. */
247 bool split_only_when_retval_removed;
248 /* Only meaningful for by_ref parameters. If set, this parameter can only be
249 a split candidate if all callers pass pointers that are known to point to
250 a chunk of memory large enough to contain all accesses. */
251 bool conditionally_dereferenceable;
253 /* The number of this parameter as they are ordered in function decl. */
254 int param_number;
255 /* For parameters passing data by reference, this is parameter index to
256 compute indices to bb_dereferences. */
257 int deref_index;
260 /* Properly deallocate accesses of DESC. TODO: Since this data structure is
261 allocated in GC memory, this is not necessary and we can consider removing
262 the function. */
264 static void
265 free_param_decl_accesses (isra_param_desc *desc)
267 unsigned len = vec_safe_length (desc->accesses);
268 for (unsigned i = 0; i < len; ++i)
269 ggc_free ((*desc->accesses)[i]);
270 vec_free (desc->accesses);
273 /* Class used to convey information about functions from the
274 intra-procedural analysis stage to inter-procedural one. */
276 class GTY((for_user)) isra_func_summary
278 public:
279 /* initialize the object. */
281 isra_func_summary ()
282 : m_parameters (NULL), m_candidate (false), m_returns_value (false),
283 m_return_ignored (false), m_queued (false)
286 /* Destroy m_parameters. */
288 ~isra_func_summary ();
290 /* Mark the function as not a candidate for any IPA-SRA transformation.
291 Return true if it was a candidate until now. */
293 bool zap ();
295 /* Vector of parameter descriptors corresponding to the function being
296 analyzed. */
297 vec<isra_param_desc, va_gc> *m_parameters;
299 /* Whether the node is even a candidate for any IPA-SRA transformation at
300 all. */
301 unsigned m_candidate : 1;
303 /* Whether the original function returns any value. */
304 unsigned m_returns_value : 1;
306 /* Set to true if all call statements do not actually use the returned
307 value. */
309 unsigned m_return_ignored : 1;
311 /* Whether the node is already queued in IPA SRA stack during processing of
312 call graphs SCCs. */
314 unsigned m_queued : 1;
317 /* Deallocate the memory pointed to by isra_func_summary. TODO: Since this
318 data structure is allocated in GC memory, this is not necessary and we can
319 consider removing the destructor. */
321 isra_func_summary::~isra_func_summary ()
323 unsigned len = vec_safe_length (m_parameters);
324 for (unsigned i = 0; i < len; ++i)
325 free_param_decl_accesses (&(*m_parameters)[i]);
326 vec_free (m_parameters);
329 /* Mark the function as not a candidate for any IPA-SRA transformation. Return
330 true if it was a candidate until now. */
332 bool
333 isra_func_summary::zap ()
335 bool ret = m_candidate;
336 m_candidate = false;
338 /* TODO: see the destructor above. */
339 unsigned len = vec_safe_length (m_parameters);
340 for (unsigned i = 0; i < len; ++i)
341 free_param_decl_accesses (&(*m_parameters)[i]);
342 vec_free (m_parameters);
344 return ret;
347 /* Structure to describe which formal parameters feed into a particular actual
348 argument. */
350 struct isra_param_flow
352 /* Number of elements in array inputs that contain valid data. */
353 char length;
354 /* Indices of formal parameters that feed into the described actual argument.
355 If aggregate_pass_through or pointer_pass_through below are true, it must
356 contain exactly one element which is passed through from a formal
357 parameter if the given number. Otherwise, the array contains indices of
358 callee's formal parameters which are used to calculate value of this
359 actual argument. */
360 unsigned char inputs[IPA_SRA_MAX_PARAM_FLOW_LEN];
362 /* Offset within the formal parameter. */
363 unsigned unit_offset;
364 /* When aggregate_pass_through is set, this is the size of the portion of an
365 aggregate formal parameter that is being passed. Otherwise, this is size
366 of pointed to memory that is known to be valid be dereferenced. */
367 unsigned unit_size : ISRA_ARG_SIZE_LIMIT_BITS;
369 /* True when the value of this actual argument is a portion of a formal
370 parameter. */
371 unsigned aggregate_pass_through : 1;
372 /* True when the value of this actual copy is a verbatim pass through of an
373 obtained pointer. */
374 unsigned pointer_pass_through : 1;
375 /* True when it is safe to copy access candidates here from the callee, which
376 would mean introducing dereferences into callers of the caller. */
377 unsigned safe_to_import_accesses : 1;
378 /* True when the passed value is an address of a structure that has been
379 constructed in the caller just to be passed by reference to functions
380 (i.e. is never read). */
381 unsigned constructed_for_calls : 1;
384 /* Structure used to convey information about calls from the intra-procedural
385 analysis stage to inter-procedural one. */
387 class isra_call_summary
389 public:
390 isra_call_summary ()
391 : m_arg_flow (), m_return_ignored (false), m_return_returned (false),
392 m_bit_aligned_arg (false), m_before_any_store (false)
395 void init_inputs (unsigned arg_count);
396 void dump (FILE *f);
398 /* Information about what formal parameters of the caller are used to compute
399 individual actual arguments of this call. */
400 auto_vec <isra_param_flow> m_arg_flow;
402 /* Set to true if the call statement does not have a LHS. */
403 unsigned m_return_ignored : 1;
405 /* Set to true if the LHS of call statement is only used to construct the
406 return value of the caller. */
407 unsigned m_return_returned : 1;
409 /* Set when any of the call arguments are not byte-aligned. */
410 unsigned m_bit_aligned_arg : 1;
412 /* Set to true if the call happend before any (other) store to memory in the
413 caller. */
414 unsigned m_before_any_store : 1;
417 /* Class to manage function summaries. */
419 class GTY((user)) ipa_sra_function_summaries
420 : public function_summary <isra_func_summary *>
422 public:
423 ipa_sra_function_summaries (symbol_table *table, bool ggc):
424 function_summary<isra_func_summary *> (table, ggc) { }
426 void duplicate (cgraph_node *, cgraph_node *,
427 isra_func_summary *old_sum,
428 isra_func_summary *new_sum) final override;
429 void insert (cgraph_node *, isra_func_summary *) final override;
432 /* Hook that is called by summary when a node is duplicated. */
434 void
435 ipa_sra_function_summaries::duplicate (cgraph_node *, cgraph_node *,
436 isra_func_summary *old_sum,
437 isra_func_summary *new_sum)
439 /* TODO: Somehow stop copying when ISRA is doing the cloning, it is
440 useless. */
441 new_sum->m_candidate = old_sum->m_candidate;
442 new_sum->m_returns_value = old_sum->m_returns_value;
443 new_sum->m_return_ignored = old_sum->m_return_ignored;
444 gcc_assert (!old_sum->m_queued);
445 new_sum->m_queued = false;
447 unsigned param_count = vec_safe_length (old_sum->m_parameters);
448 if (!param_count)
449 return;
450 vec_safe_reserve_exact (new_sum->m_parameters, param_count);
451 new_sum->m_parameters->quick_grow_cleared (param_count);
452 for (unsigned i = 0; i < param_count; i++)
454 isra_param_desc *s = &(*old_sum->m_parameters)[i];
455 isra_param_desc *d = &(*new_sum->m_parameters)[i];
457 d->param_size_limit = s->param_size_limit;
458 d->size_reached = s->size_reached;
459 d->safe_size = s->safe_size;
460 d->locally_unused = s->locally_unused;
461 d->split_candidate = s->split_candidate;
462 d->by_ref = s->by_ref;
463 d->remove_only_when_retval_removed = s->remove_only_when_retval_removed;
464 d->split_only_when_retval_removed = s->split_only_when_retval_removed;
465 d->not_specially_constructed = s->not_specially_constructed;
466 d->conditionally_dereferenceable = s->conditionally_dereferenceable;
467 d->safe_size_set = s->safe_size_set;
469 unsigned acc_count = vec_safe_length (s->accesses);
470 vec_safe_reserve_exact (d->accesses, acc_count);
471 for (unsigned j = 0; j < acc_count; j++)
473 param_access *from = (*s->accesses)[j];
474 param_access *to = ggc_cleared_alloc<param_access> ();
475 to->type = from->type;
476 to->alias_ptr_type = from->alias_ptr_type;
477 to->unit_offset = from->unit_offset;
478 to->unit_size = from->unit_size;
479 to->certain = from->certain;
480 to->reverse = from->reverse;
481 d->accesses->quick_push (to);
486 /* Pointer to the pass function summary holder. */
488 static GTY(()) ipa_sra_function_summaries *func_sums;
490 /* Hook that is called by summary when new node appears. */
492 void
493 ipa_sra_function_summaries::insert (cgraph_node *node, isra_func_summary *)
495 if (opt_for_fn (node->decl, flag_ipa_sra))
497 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
498 ipa_sra_summarize_function (node);
499 pop_cfun ();
501 else
502 func_sums->remove (node);
505 /* Class to manage call summaries. */
507 class ipa_sra_call_summaries: public call_summary <isra_call_summary *>
509 public:
510 ipa_sra_call_summaries (symbol_table *table):
511 call_summary<isra_call_summary *> (table) { }
513 /* Duplicate info when an edge is cloned. */
514 void duplicate (cgraph_edge *, cgraph_edge *,
515 isra_call_summary *old_sum,
516 isra_call_summary *new_sum) final override;
519 static ipa_sra_call_summaries *call_sums;
522 /* Initialize m_arg_flow of a particular instance of isra_call_summary.
523 ARG_COUNT is the number of actual arguments passed. */
525 void
526 isra_call_summary::init_inputs (unsigned arg_count)
528 if (arg_count == 0)
530 gcc_checking_assert (m_arg_flow.length () == 0);
531 return;
533 if (m_arg_flow.length () == 0)
535 m_arg_flow.reserve_exact (arg_count);
536 m_arg_flow.quick_grow_cleared (arg_count);
538 else
539 gcc_checking_assert (arg_count == m_arg_flow.length ());
542 /* Dump all information in call summary to F. */
544 void
545 isra_call_summary::dump (FILE *f)
547 if (m_return_ignored)
548 fprintf (f, " return value ignored\n");
549 if (m_return_returned)
550 fprintf (f, " return value used only to compute caller return value\n");
551 if (m_before_any_store)
552 fprintf (f, " happens before any store to memory\n");
553 for (unsigned i = 0; i < m_arg_flow.length (); i++)
555 fprintf (f, " Parameter %u:\n", i);
556 isra_param_flow *ipf = &m_arg_flow[i];
558 if (ipf->length)
560 bool first = true;
561 fprintf (f, " Scalar param sources: ");
562 for (int j = 0; j < ipf->length; j++)
564 if (!first)
565 fprintf (f, ", ");
566 else
567 first = false;
568 fprintf (f, "%i", (int) ipf->inputs[j]);
570 fprintf (f, "\n");
572 if (ipf->aggregate_pass_through)
573 fprintf (f, " Aggregate pass through from the param given above, "
574 "unit offset: %u , unit size: %u\n",
575 ipf->unit_offset, ipf->unit_size);
576 else if (ipf->unit_size > 0)
577 fprintf (f, " Known dereferenceable size: %u\n", ipf->unit_size);
578 if (ipf->pointer_pass_through)
579 fprintf (f, " Pointer pass through from the param given above, "
580 "safe_to_import_accesses: %u\n", ipf->safe_to_import_accesses);
581 if (ipf->constructed_for_calls)
582 fprintf (f, " Variable constructed just to be passed to "
583 "calls.\n");
587 /* Duplicate edge summary when an edge is cloned. */
589 void
590 ipa_sra_call_summaries::duplicate (cgraph_edge *, cgraph_edge *,
591 isra_call_summary *old_sum,
592 isra_call_summary *new_sum)
594 unsigned arg_count = old_sum->m_arg_flow.length ();
595 new_sum->init_inputs (arg_count);
596 for (unsigned i = 0; i < arg_count; i++)
597 new_sum->m_arg_flow[i] = old_sum->m_arg_flow[i];
599 new_sum->m_return_ignored = old_sum->m_return_ignored;
600 new_sum->m_return_returned = old_sum->m_return_returned;
601 new_sum->m_bit_aligned_arg = old_sum->m_bit_aligned_arg;
602 new_sum->m_before_any_store = old_sum->m_before_any_store;
606 /* With all GTY stuff done, we can move to anonymous namespace. */
607 namespace {
608 /* Quick mapping from a decl to its param descriptor. */
610 hash_map<tree, gensum_param_desc *> *decl2desc;
612 /* All local DECLs ever loaded from of and of those that have their address
613 assigned to a variable. */
615 hash_set <tree> *loaded_decls;
617 /* Countdown of allowed Alias Analysis steps during summary building. */
619 int aa_walking_limit;
621 /* This is a table in which for each basic block and parameter there is a
622 distance (offset + size) in that parameter which is dereferenced and
623 accessed in that BB. */
624 HOST_WIDE_INT *bb_dereferences = NULL;
625 /* How many by-reference parameters there are in the current function. */
626 int unsafe_by_ref_count;
628 /* Bitmap of BBs that can cause the function to "stop" progressing by
629 returning, throwing externally, looping infinitely or calling a function
630 which might abort etc.. */
631 bitmap final_bbs;
633 /* Obstack to allocate various small structures required only when generating
634 summary for a function. */
635 struct obstack gensum_obstack;
637 /* Return false the function is apparently unsuitable for IPA-SRA based on it's
638 attributes, return true otherwise. NODE is the cgraph node of the current
639 function. */
641 static bool
642 ipa_sra_preliminary_function_checks (cgraph_node *node)
644 if (!node->can_change_signature)
646 if (dump_file)
647 fprintf (dump_file, "Function cannot change signature.\n");
648 return false;
651 if (!tree_versionable_function_p (node->decl))
653 if (dump_file)
654 fprintf (dump_file, "Function is not versionable.\n");
655 return false;
658 if (!opt_for_fn (node->decl, optimize)
659 || !opt_for_fn (node->decl, flag_ipa_sra))
661 if (dump_file)
662 fprintf (dump_file, "Not optimizing or IPA-SRA turned off for this "
663 "function.\n");
664 return false;
667 if (DECL_VIRTUAL_P (node->decl))
669 if (dump_file)
670 fprintf (dump_file, "Function is a virtual method.\n");
671 return false;
674 struct function *fun = DECL_STRUCT_FUNCTION (node->decl);
675 if (fun->stdarg)
677 if (dump_file)
678 fprintf (dump_file, "Function uses stdarg. \n");
679 return false;
682 if (DECL_DISREGARD_INLINE_LIMITS (node->decl))
684 if (dump_file)
685 fprintf (dump_file, "Always inline function will be inlined "
686 "anyway. \n");
687 return false;
690 return true;
693 /* Print access tree starting at ACCESS to F. */
695 static void
696 dump_gensum_access (FILE *f, gensum_param_access *access, unsigned indent)
698 fprintf (f, " ");
699 for (unsigned i = 0; i < indent; i++)
700 fprintf (f, " ");
701 fprintf (f, " * Access to offset: " HOST_WIDE_INT_PRINT_DEC,
702 access->offset);
703 fprintf (f, ", size: " HOST_WIDE_INT_PRINT_DEC, access->size);
704 fprintf (f, ", type: ");
705 print_generic_expr (f, access->type);
706 fprintf (f, ", alias_ptr_type: ");
707 print_generic_expr (f, access->alias_ptr_type);
708 fprintf (f, ", load_count: ");
709 access->load_count.dump (f);
710 fprintf (f, ", nonarg: %u, reverse: %u\n", access->nonarg, access->reverse);
711 for (gensum_param_access *ch = access->first_child;
713 ch = ch->next_sibling)
714 dump_gensum_access (f, ch, indent + 2);
718 /* Print access tree starting at ACCESS to F. */
720 static void
721 dump_isra_access (FILE *f, param_access *access)
723 fprintf (f, " * Access to unit offset: %u", access->unit_offset);
724 fprintf (f, ", unit size: %u", access->unit_size);
725 fprintf (f, ", type: ");
726 print_generic_expr (f, access->type);
727 fprintf (f, ", alias_ptr_type: ");
728 print_generic_expr (f, access->alias_ptr_type);
729 if (access->certain)
730 fprintf (f, ", certain");
731 else
732 fprintf (f, ", not certain");
733 if (access->reverse)
734 fprintf (f, ", reverse");
735 fprintf (f, "\n");
738 /* Dump access tree starting at ACCESS to stderr. */
740 DEBUG_FUNCTION void
741 debug_isra_access (param_access *access)
743 dump_isra_access (stderr, access);
746 /* Dump DESC to F. */
748 static void
749 dump_gensum_param_descriptor (FILE *f, gensum_param_desc *desc)
751 if (desc->locally_unused)
752 fprintf (f, " unused with %i call_uses%s\n", desc->call_uses,
753 desc->remove_only_when_retval_removed ?
754 " remove_only_when_retval_removed" : "");
755 if (!desc->split_candidate)
757 fprintf (f, " not a candidate\n");
758 return;
760 if (desc->by_ref)
761 fprintf (f, " %s%s%s by_ref with %u pass throughs\n",
762 desc->safe_ref ? "safe" : "unsafe",
763 desc->conditionally_dereferenceable
764 ? " conditionally_dereferenceable" : "",
765 desc->split_only_when_retval_removed
766 ? " split_only_when_retval_removed" : "",
767 desc->ptr_pt_count);
769 for (gensum_param_access *acc = desc->accesses; acc; acc = acc->next_sibling)
770 dump_gensum_access (f, acc, 2);
773 /* Dump all parameter descriptors in IFS, assuming it describes FNDECL, to
774 F. */
776 static void
777 dump_gensum_param_descriptors (FILE *f, tree fndecl,
778 vec<gensum_param_desc> *param_descriptions)
780 tree parm = DECL_ARGUMENTS (fndecl);
781 for (unsigned i = 0;
782 i < param_descriptions->length ();
783 ++i, parm = DECL_CHAIN (parm))
785 fprintf (f, " Descriptor for parameter %i ", i);
786 print_generic_expr (f, parm, TDF_UID);
787 fprintf (f, "\n");
788 dump_gensum_param_descriptor (f, &(*param_descriptions)[i]);
793 /* Dump DESC to F. If HINTS is true, also dump IPA-analysis computed
794 hints. */
796 static void
797 dump_isra_param_descriptor (FILE *f, isra_param_desc *desc, bool hints)
799 if (desc->locally_unused)
801 fprintf (f, " (locally) unused\n");
803 if (!desc->split_candidate)
805 fprintf (f, " not a candidate for splitting");
806 if (hints && desc->by_ref && desc->safe_size_set)
807 fprintf (f, ", safe_size: %u", (unsigned) desc->safe_size);
808 fprintf (f, "\n");
809 return;
811 fprintf (f, " param_size_limit: %u, size_reached: %u%s",
812 desc->param_size_limit, desc->size_reached,
813 desc->by_ref ? ", by_ref" : "");
814 if (desc->remove_only_when_retval_removed)
815 fprintf (f, ", remove_only_when_retval_removed");
816 if (desc->split_only_when_retval_removed)
817 fprintf (f, ", split_only_when_retval_removed");
818 if (desc->by_ref && desc->conditionally_dereferenceable)
819 fprintf (f, ", conditionally_dereferenceable");
820 if (hints)
822 if (desc->by_ref && !desc->not_specially_constructed)
823 fprintf (f, ", args_specially_constructed");
824 if (desc->by_ref && desc->safe_size_set)
825 fprintf (f, ", safe_size: %u", (unsigned) desc->safe_size);
827 fprintf (f, "\n");
829 for (unsigned i = 0; i < vec_safe_length (desc->accesses); ++i)
831 param_access *access = (*desc->accesses)[i];
832 dump_isra_access (f, access);
836 /* Dump all parameter descriptors in IFS, assuming it describes FNDECL, to F.
837 If HINTS is true, also dump IPA-analysis computed hints. */
839 static void
840 dump_isra_param_descriptors (FILE *f, tree fndecl, isra_func_summary *ifs,
841 bool hints)
843 tree parm = DECL_ARGUMENTS (fndecl);
844 if (!ifs->m_parameters)
846 fprintf (f, " parameter descriptors not available\n");
847 return;
850 for (unsigned i = 0;
851 i < ifs->m_parameters->length ();
852 ++i, parm = DECL_CHAIN (parm))
854 fprintf (f, " Descriptor for parameter %i ", i);
855 print_generic_expr (f, parm, TDF_UID);
856 fprintf (f, "\n");
857 dump_isra_param_descriptor (f, &(*ifs->m_parameters)[i], hints);
861 /* Add SRC to inputs of PARAM_FLOW, unless it would exceed storage. If the
862 function fails return false, otherwise return true. SRC must fit into an
863 unsigned char. Used for purposes of transitive unused parameter
864 removal. */
866 static bool
867 add_src_to_param_flow (isra_param_flow *param_flow, int src)
869 gcc_checking_assert (src >= 0 && src <= UCHAR_MAX);
870 if (param_flow->length == IPA_SRA_MAX_PARAM_FLOW_LEN)
871 return false;
873 param_flow->inputs[(int) param_flow->length] = src;
874 param_flow->length++;
875 return true;
878 /* Add a SRC to the inputs of PARAM_FLOW unless it is already there and assert
879 it is the only input. Used for purposes of transitive parameter
880 splitting. */
882 static void
883 set_single_param_flow_source (isra_param_flow *param_flow, int src)
885 gcc_checking_assert (src >= 0 && src <= UCHAR_MAX);
886 if (param_flow->length == 0)
888 param_flow->inputs[0] = src;
889 param_flow->length = 1;
891 else if (param_flow->length == 1)
892 gcc_assert (param_flow->inputs[0] == src);
893 else
894 gcc_unreachable ();
897 /* Assert that there is only a single value in PARAM_FLOW's inputs and return
898 it. */
900 static unsigned
901 get_single_param_flow_source (const isra_param_flow *param_flow)
903 gcc_assert (param_flow->length == 1);
904 return param_flow->inputs[0];
907 /* Inspect all uses of NAME and simple arithmetic calculations involving NAME
908 in FUN represented with NODE and return a negative number if any of them is
909 used for something else than either an actual call argument, simple return,
910 simple arithmetic operation or debug statement. If there are no such uses,
911 return the number of actual arguments that this parameter eventually feeds
912 to (or zero if there is none). If there are any simple return uses, set
913 DESC->remove_only_when_retval_removed. For any such parameter, mark
914 PARM_NUM as one of its sources. ANALYZED is a bitmap that tracks which SSA
915 names we have already started investigating. */
917 static int
918 isra_track_scalar_value_uses (function *fun, cgraph_node *node, tree name,
919 int parm_num, bitmap analyzed,
920 gensum_param_desc *desc)
922 int res = 0;
923 imm_use_iterator imm_iter;
924 gimple *stmt;
926 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, name)
928 if (is_gimple_debug (stmt)
929 || gimple_clobber_p (stmt))
930 continue;
932 /* TODO: We could handle at least const builtin functions like arithmetic
933 operations below. */
934 if (is_gimple_call (stmt))
936 int all_uses = 0;
937 use_operand_p use_p;
938 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
939 all_uses++;
941 gcall *call = as_a <gcall *> (stmt);
942 unsigned arg_count;
943 if (gimple_call_internal_p (call)
944 || (arg_count = gimple_call_num_args (call)) == 0)
946 res = -1;
947 break;
950 cgraph_edge *cs = node->get_edge (stmt);
951 gcc_checking_assert (cs);
952 isra_call_summary *csum = call_sums->get_create (cs);
953 csum->init_inputs (arg_count);
955 int simple_uses = 0;
956 for (unsigned i = 0; i < arg_count; i++)
957 if (gimple_call_arg (call, i) == name)
959 if (!add_src_to_param_flow (&csum->m_arg_flow[i], parm_num))
961 simple_uses = -1;
962 break;
964 simple_uses++;
967 if (simple_uses < 0
968 || all_uses != simple_uses)
970 res = -1;
971 break;
973 res += all_uses;
975 else if (!stmt_unremovable_because_of_non_call_eh_p (fun, stmt)
976 && ((is_gimple_assign (stmt) && !gimple_has_volatile_ops (stmt))
977 || gimple_code (stmt) == GIMPLE_PHI))
979 tree lhs;
980 if (gimple_code (stmt) == GIMPLE_PHI)
981 lhs = gimple_phi_result (stmt);
982 else
983 lhs = gimple_assign_lhs (stmt);
985 if (TREE_CODE (lhs) != SSA_NAME)
987 res = -1;
988 break;
990 gcc_assert (!gimple_vdef (stmt));
991 if (bitmap_set_bit (analyzed, SSA_NAME_VERSION (lhs)))
993 int tmp = isra_track_scalar_value_uses (fun, node, lhs, parm_num,
994 analyzed, desc);
995 if (tmp < 0)
997 res = tmp;
998 break;
1000 res += tmp;
1003 else if (greturn *gr = dyn_cast<greturn *>(stmt))
1005 tree rv = gimple_return_retval (gr);
1006 if (rv != name)
1008 res = -1;
1009 break;
1011 desc->remove_only_when_retval_removed = true;
1013 else
1015 res = -1;
1016 break;
1019 return res;
1022 /* Inspect all uses of PARM, which must be a gimple register, in FUN (which is
1023 also described by NODE) and simple arithmetic calculations involving PARM
1024 and return false if any of them is used for something else than either an
1025 actual call argument, simple return, simple arithmetic operation or debug
1026 statement. If there are no such uses, return true and store the number of
1027 actual arguments that this parameter eventually feeds to (or zero if there
1028 is none) to DESC->call_uses and set DESC->remove_only_when_retval_removed if
1029 there are any uses in return statemens. For any such parameter, mark
1030 PARM_NUM as one of its sources.
1032 This function is similar to ptr_parm_has_nonarg_uses but its results are
1033 meant for unused parameter removal, as opposed to splitting of parameters
1034 passed by reference or converting them to passed by value. */
1036 static bool
1037 isra_track_scalar_param_local_uses (function *fun, cgraph_node *node, tree parm,
1038 int parm_num, gensum_param_desc *desc)
1040 gcc_checking_assert (is_gimple_reg (parm));
1042 tree name = ssa_default_def (fun, parm);
1043 if (!name || has_zero_uses (name))
1045 desc->call_uses = 0;
1046 return false;
1049 /* Edge summaries can only handle callers with fewer than 256 parameters. */
1050 if (parm_num > UCHAR_MAX)
1051 return true;
1053 bitmap analyzed = BITMAP_ALLOC (NULL);
1054 int call_uses = isra_track_scalar_value_uses (fun, node, name, parm_num,
1055 analyzed, desc);
1056 BITMAP_FREE (analyzed);
1057 if (call_uses < 0)
1058 return true;
1059 desc->call_uses = call_uses;
1060 return false;
1063 /* Scan immediate uses of a default definition SSA name of a parameter PARM and
1064 examine whether there are any nonarg uses that are not actual arguments or
1065 otherwise infeasible uses. If so, return true, otherwise return false.
1066 Create pass-through IPA flow records for any direct uses as argument calls
1067 and if returning false, store their number into DESC->ptr_pt_count. If
1068 removal of return value would still allow splitting, return true but set
1069 DESC->split_only_when_retval_removed. NODE and FUN must represent the
1070 function that is currently analyzed, PARM_NUM must be the index of the
1071 analyzed parameter.
1073 This function is similar to isra_track_scalar_param_local_uses but its
1074 results are meant for splitting of parameters passed by reference or turning
1075 them into bits passed by value, as opposed to generic unused parameter
1076 removal. */
1078 static bool
1079 ptr_parm_has_nonarg_uses (cgraph_node *node, function *fun, tree parm,
1080 int parm_num, gensum_param_desc *desc)
1082 imm_use_iterator ui;
1083 gimple *stmt;
1084 tree name = ssa_default_def (fun, parm);
1085 bool ret = false;
1086 unsigned pt_count = 0;
1088 if (!name || has_zero_uses (name))
1089 return false;
1091 /* Edge summaries can only handle callers with fewer than 256 parameters. */
1092 if (parm_num > UCHAR_MAX)
1093 return true;
1095 FOR_EACH_IMM_USE_STMT (stmt, ui, name)
1097 unsigned uses_ok = 0;
1098 use_operand_p use_p;
1100 if (is_gimple_debug (stmt)
1101 || gimple_clobber_p (stmt))
1102 continue;
1104 if (gimple_assign_single_p (stmt))
1106 tree rhs = gimple_assign_rhs1 (stmt);
1107 if (!TREE_THIS_VOLATILE (rhs))
1109 while (handled_component_p (rhs))
1110 rhs = TREE_OPERAND (rhs, 0);
1111 if (TREE_CODE (rhs) == MEM_REF
1112 && TREE_OPERAND (rhs, 0) == name
1113 && integer_zerop (TREE_OPERAND (rhs, 1))
1114 && types_compatible_p (TREE_TYPE (rhs),
1115 TREE_TYPE (TREE_TYPE (name))))
1116 uses_ok++;
1119 else if (is_gimple_call (stmt))
1121 gcall *call = as_a <gcall *> (stmt);
1122 unsigned arg_count;
1123 if (gimple_call_internal_p (call)
1124 || (arg_count = gimple_call_num_args (call)) == 0)
1126 ret = true;
1127 break;
1130 cgraph_edge *cs = node->get_edge (stmt);
1131 gcc_checking_assert (cs);
1132 isra_call_summary *csum = call_sums->get_create (cs);
1133 csum->init_inputs (arg_count);
1135 for (unsigned i = 0; i < arg_count; ++i)
1137 tree arg = gimple_call_arg (stmt, i);
1139 if (arg == name)
1141 /* TODO: Allow &MEM_REF[name + offset] here,
1142 ipa_param_body_adjustments::modify_call_stmt has to be
1143 adjusted too. */
1144 csum->m_arg_flow[i].pointer_pass_through = true;
1145 set_single_param_flow_source (&csum->m_arg_flow[i], parm_num);
1146 pt_count++;
1147 uses_ok++;
1148 continue;
1151 if (!TREE_THIS_VOLATILE (arg))
1153 while (handled_component_p (arg))
1154 arg = TREE_OPERAND (arg, 0);
1155 if (TREE_CODE (arg) == MEM_REF
1156 && TREE_OPERAND (arg, 0) == name
1157 && integer_zerop (TREE_OPERAND (arg, 1))
1158 && types_compatible_p (TREE_TYPE (arg),
1159 TREE_TYPE (TREE_TYPE (name))))
1160 uses_ok++;
1164 else if (greturn *gr = dyn_cast<greturn *>(stmt))
1166 tree rv = gimple_return_retval (gr);
1167 if (rv == name)
1169 uses_ok++;
1170 /* Analysis for feasibility of removal must have already reached
1171 the conclusion that the flag must be set if it completed. */
1172 gcc_assert (!desc->locally_unused
1173 || desc->remove_only_when_retval_removed);
1174 desc->split_only_when_retval_removed = true;
1178 /* If the number of valid uses does not match the number of
1179 uses in this stmt there is an unhandled use. */
1180 unsigned all_uses = 0;
1181 FOR_EACH_IMM_USE_ON_STMT (use_p, ui)
1182 all_uses++;
1184 gcc_checking_assert (uses_ok <= all_uses);
1185 if (uses_ok != all_uses)
1187 ret = true;
1188 break;
1192 desc->ptr_pt_count = pt_count;
1193 return ret;
1196 /* Initialize vector of parameter descriptors of NODE. Return true if there
1197 are any candidates for splitting or unused aggregate parameter removal (the
1198 function may return false if there are candidates for removal of register
1199 parameters). */
1201 static bool
1202 create_parameter_descriptors (cgraph_node *node,
1203 vec<gensum_param_desc> *param_descriptions)
1205 function *fun = DECL_STRUCT_FUNCTION (node->decl);
1206 bool ret = false;
1208 int num = 0;
1209 for (tree parm = DECL_ARGUMENTS (node->decl);
1210 parm;
1211 parm = DECL_CHAIN (parm), num++)
1213 const char *msg;
1214 gensum_param_desc *desc = &(*param_descriptions)[num];
1215 /* param_descriptions vector is grown cleared in the caller. */
1216 desc->param_number = num;
1217 decl2desc->put (parm, desc);
1219 if (dump_file && (dump_flags & TDF_DETAILS))
1220 print_generic_expr (dump_file, parm, TDF_UID);
1222 tree type = TREE_TYPE (parm);
1223 if (TREE_THIS_VOLATILE (parm))
1225 if (dump_file && (dump_flags & TDF_DETAILS))
1226 fprintf (dump_file, " not a candidate, is volatile\n");
1227 continue;
1229 if (!is_gimple_reg_type (type) && is_va_list_type (type))
1231 if (dump_file && (dump_flags & TDF_DETAILS))
1232 fprintf (dump_file, " not a candidate, is a va_list type\n");
1233 continue;
1235 if (TREE_ADDRESSABLE (parm))
1237 if (dump_file && (dump_flags & TDF_DETAILS))
1238 fprintf (dump_file, " not a candidate, is addressable\n");
1239 continue;
1241 if (TREE_ADDRESSABLE (type))
1243 if (dump_file && (dump_flags & TDF_DETAILS))
1244 fprintf (dump_file, " not a candidate, type cannot be split\n");
1245 continue;
1248 if (is_gimple_reg (parm)
1249 && !isra_track_scalar_param_local_uses (fun, node, parm, num, desc))
1251 desc->locally_unused = true;
1253 if (dump_file && (dump_flags & TDF_DETAILS))
1254 fprintf (dump_file, " is a scalar with only %i call uses%s\n",
1255 desc->call_uses,
1256 desc->remove_only_when_retval_removed
1257 ? " and return uses": "");
1260 if (POINTER_TYPE_P (type))
1262 desc->by_ref = true;
1263 if (TREE_CODE (type) == REFERENCE_TYPE
1264 || (num == 0
1265 && TREE_CODE (TREE_TYPE (node->decl)) == METHOD_TYPE))
1266 desc->safe_ref = true;
1267 else
1268 desc->safe_ref = false;
1269 type = TREE_TYPE (type);
1271 if (TREE_CODE (type) == FUNCTION_TYPE
1272 || TREE_CODE (type) == METHOD_TYPE)
1274 if (dump_file && (dump_flags & TDF_DETAILS))
1275 fprintf (dump_file, " not a candidate, reference to "
1276 "a function\n");
1277 continue;
1279 if (TYPE_VOLATILE (type))
1281 if (dump_file && (dump_flags & TDF_DETAILS))
1282 fprintf (dump_file, " not a candidate, reference to "
1283 "a volatile type\n");
1284 continue;
1286 if (TREE_CODE (type) == ARRAY_TYPE
1287 && TYPE_NONALIASED_COMPONENT (type))
1289 if (dump_file && (dump_flags & TDF_DETAILS))
1290 fprintf (dump_file, " not a candidate, reference to "
1291 "a nonaliased component array\n");
1292 continue;
1294 if (!is_gimple_reg (parm))
1296 if (dump_file && (dump_flags & TDF_DETAILS))
1297 fprintf (dump_file, " not a candidate, a reference which is "
1298 "not a gimple register (probably addressable)\n");
1299 continue;
1301 if (is_va_list_type (type))
1303 if (dump_file && (dump_flags & TDF_DETAILS))
1304 fprintf (dump_file, " not a candidate, reference to "
1305 "a va list\n");
1306 continue;
1308 if (ptr_parm_has_nonarg_uses (node, fun, parm, num, desc))
1310 if (dump_file && (dump_flags & TDF_DETAILS))
1311 fprintf (dump_file, " not a candidate, reference has "
1312 "nonarg uses\n");
1313 continue;
1316 else if (!AGGREGATE_TYPE_P (type))
1318 /* This is in an else branch because scalars passed by reference are
1319 still candidates to be passed by value. */
1320 if (dump_file && (dump_flags & TDF_DETAILS))
1321 fprintf (dump_file, " not a candidate, not an aggregate\n");
1322 continue;
1325 if (!COMPLETE_TYPE_P (type))
1327 if (dump_file && (dump_flags & TDF_DETAILS))
1328 fprintf (dump_file, " not a candidate, not a complete type\n");
1329 continue;
1331 if (!tree_fits_uhwi_p (TYPE_SIZE (type)))
1333 if (dump_file && (dump_flags & TDF_DETAILS))
1334 fprintf (dump_file, " not a candidate, size not representable\n");
1335 continue;
1337 unsigned HOST_WIDE_INT type_size
1338 = tree_to_uhwi (TYPE_SIZE (type)) / BITS_PER_UNIT;
1339 if (type_size == 0
1340 || type_size >= ISRA_ARG_SIZE_LIMIT)
1342 if (dump_file && (dump_flags & TDF_DETAILS))
1343 fprintf (dump_file, " not a candidate, has zero or huge size\n");
1344 continue;
1346 if (type_internals_preclude_sra_p (type, &msg))
1348 if (dump_file && (dump_flags & TDF_DETAILS))
1349 fprintf (dump_file, " not a candidate, %s\n", msg);
1350 continue;
1353 if (dump_file && (dump_flags & TDF_DETAILS))
1354 fprintf (dump_file, " is a candidate\n");
1356 ret = true;
1357 desc->split_candidate = true;
1358 if (desc->by_ref && !desc->safe_ref)
1359 desc->deref_index = unsafe_by_ref_count++;
1361 return ret;
1364 /* Return pointer to descriptor of parameter DECL or NULL if it cannot be
1365 found, which happens if DECL is for a static chain. */
1367 static gensum_param_desc *
1368 get_gensum_param_desc (tree decl)
1370 if (!decl2desc)
1371 return NULL;
1372 gcc_checking_assert (TREE_CODE (decl) == PARM_DECL);
1373 gensum_param_desc **slot = decl2desc->get (decl);
1374 if (!slot)
1375 /* This can happen for static chains which we cannot handle so far. */
1376 return NULL;
1377 gcc_checking_assert (*slot);
1378 return *slot;
1382 /* Remove parameter described by DESC from candidates for IPA-SRA splitting and
1383 write REASON to the dump file if there is one. */
1385 static void
1386 disqualify_split_candidate (gensum_param_desc *desc, const char *reason)
1388 if (!desc->split_candidate)
1389 return;
1391 if (dump_file && (dump_flags & TDF_DETAILS))
1392 fprintf (dump_file, "! Disqualifying parameter number %i - %s\n",
1393 desc->param_number, reason);
1395 desc->split_candidate = false;
1398 /* Remove DECL from candidates for IPA-SRA and write REASON to the dump file if
1399 there is one. */
1401 static void
1402 disqualify_split_candidate (tree decl, const char *reason)
1404 gensum_param_desc *desc = get_gensum_param_desc (decl);
1405 if (desc)
1406 disqualify_split_candidate (desc, reason);
1409 /* Allocate a new access to DESC and fill it in with OFFSET and SIZE. But
1410 first, check that there are not too many of them already. If so, do not
1411 allocate anything and return NULL. */
1413 static gensum_param_access *
1414 allocate_access (gensum_param_desc *desc,
1415 HOST_WIDE_INT offset, HOST_WIDE_INT size)
1417 if (desc->access_count
1418 == (unsigned) param_ipa_sra_max_replacements)
1420 disqualify_split_candidate (desc, "Too many replacement candidates");
1421 return NULL;
1424 gensum_param_access *access
1425 = (gensum_param_access *) obstack_alloc (&gensum_obstack,
1426 sizeof (gensum_param_access));
1427 memset (access, 0, sizeof (*access));
1428 access->offset = offset;
1429 access->size = size;
1430 access->load_count = profile_count::zero ();
1431 return access;
1434 /* In what context scan_expr_access has been called, whether it deals with a
1435 load, a function argument, or a store. Please note that in rare
1436 circumstances when it is not clear if the access is a load or store,
1437 ISRA_CTX_STORE is used too. */
1439 enum isra_scan_context {ISRA_CTX_LOAD, ISRA_CTX_ARG, ISRA_CTX_STORE};
1441 /* Return an access describing memory access to the variable described by DESC
1442 at OFFSET with SIZE in context CTX, starting at pointer to the linked list
1443 at a certain tree level FIRST. Attempt to create it and put into the
1444 appropriate place in the access tree if does not exist, but fail and return
1445 NULL if there are already too many accesses, if it would create a partially
1446 overlapping access or if an access would end up within a pre-existing
1447 non-call access. */
1449 static gensum_param_access *
1450 get_access_1 (gensum_param_desc *desc, gensum_param_access **first,
1451 HOST_WIDE_INT offset, HOST_WIDE_INT size, isra_scan_context ctx)
1453 gensum_param_access *access = *first, **ptr = first;
1455 if (!access)
1457 /* No pre-existing access at this level, just create it. */
1458 gensum_param_access *a = allocate_access (desc, offset, size);
1459 if (!a)
1460 return NULL;
1461 *first = a;
1462 return *first;
1465 if (access->offset >= offset + size)
1467 /* We want to squeeze it in front of the very first access, just do
1468 it. */
1469 gensum_param_access *r = allocate_access (desc, offset, size);
1470 if (!r)
1471 return NULL;
1472 r->next_sibling = access;
1473 *first = r;
1474 return r;
1477 /* Skip all accesses that have to come before us until the next sibling is
1478 already too far. */
1479 while (offset >= access->offset + access->size
1480 && access->next_sibling
1481 && access->next_sibling->offset < offset + size)
1483 ptr = &access->next_sibling;
1484 access = access->next_sibling;
1487 /* At this point we know we do not belong before access. */
1488 gcc_assert (access->offset < offset + size);
1490 if (access->offset == offset && access->size == size)
1491 /* We found what we were looking for. */
1492 return access;
1494 if (access->offset <= offset
1495 && access->offset + access->size >= offset + size)
1497 /* We fit into access which is larger than us. We need to find/create
1498 something below access. But we only allow nesting in call
1499 arguments. */
1500 if (access->nonarg)
1501 return NULL;
1503 return get_access_1 (desc, &access->first_child, offset, size, ctx);
1506 if (offset <= access->offset
1507 && offset + size >= access->offset + access->size)
1508 /* We are actually bigger than access, which fully fits into us, take its
1509 place and make all accesses fitting into it its children. */
1511 /* But first, we only allow nesting in call arguments so check if that is
1512 what we are trying to represent. */
1513 if (ctx != ISRA_CTX_ARG)
1514 return NULL;
1516 gensum_param_access *r = allocate_access (desc, offset, size);
1517 if (!r)
1518 return NULL;
1519 r->first_child = access;
1521 while (access->next_sibling
1522 && access->next_sibling->offset < offset + size)
1523 access = access->next_sibling;
1524 if (access->offset + access->size > offset + size)
1526 /* This must be a different access, which are sorted, so the
1527 following must be true and this signals a partial overlap. */
1528 gcc_assert (access->offset > offset);
1529 return NULL;
1532 r->next_sibling = access->next_sibling;
1533 access->next_sibling = NULL;
1534 *ptr = r;
1535 return r;
1538 if (offset >= access->offset + access->size)
1540 /* We belong after access. */
1541 gensum_param_access *r = allocate_access (desc, offset, size);
1542 if (!r)
1543 return NULL;
1544 r->next_sibling = access->next_sibling;
1545 access->next_sibling = r;
1546 return r;
1549 if (offset < access->offset)
1551 /* We know the following, otherwise we would have created a
1552 super-access. */
1553 gcc_checking_assert (offset + size < access->offset + access->size);
1554 return NULL;
1557 if (offset + size > access->offset + access->size)
1559 /* Likewise. */
1560 gcc_checking_assert (offset > access->offset);
1561 return NULL;
1564 gcc_unreachable ();
1567 /* Return an access describing memory access to the variable described by DESC
1568 at OFFSET with SIZE in context CTX, mark it as used in context CTX. Attempt
1569 to create if it does not exist, but fail and return NULL if there are
1570 already too many accesses, if it would create a partially overlapping access
1571 or if an access would end up in a non-call access. */
1573 static gensum_param_access *
1574 get_access (gensum_param_desc *desc, HOST_WIDE_INT offset, HOST_WIDE_INT size,
1575 isra_scan_context ctx)
1577 gcc_checking_assert (desc->split_candidate);
1579 gensum_param_access *access = get_access_1 (desc, &desc->accesses, offset,
1580 size, ctx);
1581 if (!access)
1583 disqualify_split_candidate (desc,
1584 "Bad access overlap or too many accesses");
1585 return NULL;
1588 switch (ctx)
1590 case ISRA_CTX_STORE:
1591 gcc_assert (!desc->by_ref);
1592 /* Fall-through */
1593 case ISRA_CTX_LOAD:
1594 access->nonarg = true;
1595 break;
1596 case ISRA_CTX_ARG:
1597 break;
1600 return access;
1603 /* Verify that parameter access tree starting with ACCESS is in good shape.
1604 PARENT_OFFSET and PARENT_SIZE are the corresponding fields of parent of
1605 ACCESS or zero if there is none. */
1607 static bool
1608 verify_access_tree_1 (gensum_param_access *access, HOST_WIDE_INT parent_offset,
1609 HOST_WIDE_INT parent_size)
1611 while (access)
1613 gcc_assert (access->offset >= 0 && access->size >= 0);
1615 if (parent_size != 0)
1617 if (access->offset < parent_offset)
1619 error ("Access offset before parent offset");
1620 return true;
1622 if (access->size >= parent_size)
1624 error ("Access size greater or equal to its parent size");
1625 return true;
1627 if (access->offset + access->size > parent_offset + parent_size)
1629 error ("Access terminates outside of its parent");
1630 return true;
1634 if (verify_access_tree_1 (access->first_child, access->offset,
1635 access->size))
1636 return true;
1638 if (access->next_sibling
1639 && (access->next_sibling->offset < access->offset + access->size))
1641 error ("Access overlaps with its sibling");
1642 return true;
1645 access = access->next_sibling;
1647 return false;
1650 /* Verify that parameter access tree starting with ACCESS is in good shape,
1651 halt compilation and dump the tree to stderr if not. */
1653 DEBUG_FUNCTION void
1654 isra_verify_access_tree (gensum_param_access *access)
1656 if (verify_access_tree_1 (access, 0, 0))
1658 for (; access; access = access->next_sibling)
1659 dump_gensum_access (stderr, access, 2);
1660 internal_error ("IPA-SRA access verification failed");
1665 /* Callback of walk_stmt_load_store_addr_ops visit_addr used to determine
1666 GIMPLE_ASM operands with memory constrains which cannot be scalarized. */
1668 static bool
1669 asm_visit_addr (gimple *, tree op, tree, void *)
1671 op = get_base_address (op);
1672 if (op
1673 && TREE_CODE (op) == PARM_DECL)
1674 disqualify_split_candidate (op, "Non-scalarizable GIMPLE_ASM operand.");
1676 return false;
1679 /* Mark a dereference of parameter identified by DESC of distance DIST in a
1680 basic block BB, unless the BB has already been marked as a potentially
1681 final. */
1683 static void
1684 mark_param_dereference (gensum_param_desc *desc, HOST_WIDE_INT dist,
1685 basic_block bb)
1687 gcc_assert (desc->by_ref);
1688 gcc_checking_assert (desc->split_candidate);
1690 if (desc->safe_ref
1691 || bitmap_bit_p (final_bbs, bb->index))
1692 return;
1694 int idx = bb->index * unsafe_by_ref_count + desc->deref_index;
1695 if (bb_dereferences[idx] < dist)
1696 bb_dereferences[idx] = dist;
1699 /* Return true, if any potential replacements should use NEW_TYPE as opposed to
1700 previously recorded OLD_TYPE. */
1702 static bool
1703 type_prevails_p (tree old_type, tree new_type)
1705 if (old_type == new_type)
1706 return false;
1708 /* Non-aggregates are always better. */
1709 if (!is_gimple_reg_type (old_type)
1710 && is_gimple_reg_type (new_type))
1711 return true;
1712 if (is_gimple_reg_type (old_type)
1713 && !is_gimple_reg_type (new_type))
1714 return false;
1716 /* Prefer any complex or vector type over any other scalar type. */
1717 if (TREE_CODE (old_type) != COMPLEX_TYPE
1718 && TREE_CODE (old_type) != VECTOR_TYPE
1719 && (TREE_CODE (new_type) == COMPLEX_TYPE
1720 || VECTOR_TYPE_P (new_type)))
1721 return true;
1722 if ((TREE_CODE (old_type) == COMPLEX_TYPE
1723 || VECTOR_TYPE_P (old_type))
1724 && TREE_CODE (new_type) != COMPLEX_TYPE
1725 && TREE_CODE (new_type) != VECTOR_TYPE)
1726 return false;
1728 /* Use the integral type with the bigger precision. */
1729 if (INTEGRAL_TYPE_P (old_type)
1730 && INTEGRAL_TYPE_P (new_type))
1731 return (TYPE_PRECISION (new_type) > TYPE_PRECISION (old_type));
1733 /* Attempt to disregard any integral type with non-full precision. */
1734 if (INTEGRAL_TYPE_P (old_type)
1735 && (TREE_INT_CST_LOW (TYPE_SIZE (old_type))
1736 != TYPE_PRECISION (old_type)))
1737 return true;
1738 if (INTEGRAL_TYPE_P (new_type)
1739 && (TREE_INT_CST_LOW (TYPE_SIZE (new_type))
1740 != TYPE_PRECISION (new_type)))
1741 return false;
1742 /* Stabilize the selection. */
1743 return TYPE_UID (old_type) < TYPE_UID (new_type);
1746 /* When scanning an expression which is a call argument, this structure
1747 specifies the call and the position of the argument. */
1749 struct scan_call_info
1751 /* Call graph edge representing the call. */
1752 cgraph_edge *cs;
1753 /* Total number of arguments in the call. */
1754 unsigned argument_count;
1755 /* Number of the actual argument being scanned. */
1756 unsigned arg_idx;
1759 /* Record use of ACCESS which belongs to a parameter described by DESC in a
1760 call argument described by CALL_INFO. */
1762 static void
1763 record_nonregister_call_use (gensum_param_desc *desc,
1764 scan_call_info *call_info,
1765 unsigned unit_offset, unsigned unit_size)
1767 isra_call_summary *csum = call_sums->get_create (call_info->cs);
1768 csum->init_inputs (call_info->argument_count);
1770 isra_param_flow *param_flow = &csum->m_arg_flow[call_info->arg_idx];
1771 param_flow->aggregate_pass_through = true;
1772 set_single_param_flow_source (param_flow, desc->param_number);
1773 param_flow->unit_offset = unit_offset;
1774 param_flow->unit_size = unit_size;
1775 desc->call_uses++;
1778 /* Callback of walk_aliased_vdefs, just mark that there was a possible
1779 modification. */
1781 static bool
1782 mark_maybe_modified (ao_ref *, tree, void *data)
1784 bool *maybe_modified = (bool *) data;
1785 *maybe_modified = true;
1786 return true;
1789 /* Analyze expression EXPR from GIMPLE for accesses to parameters. CTX
1790 specifies whether EXPR is used in a load, store or as an argument call. BB
1791 must be the basic block in which expr resides. If CTX specifies call
1792 argument context, CALL_INFO must describe that call and argument position,
1793 otherwise it is ignored. */
1795 static void
1796 scan_expr_access (tree expr, gimple *stmt, isra_scan_context ctx,
1797 basic_block bb, scan_call_info *call_info = NULL)
1799 poly_int64 poffset, psize, pmax_size;
1800 HOST_WIDE_INT offset, size, max_size;
1801 tree base;
1802 bool deref = false;
1803 bool reverse;
1805 if (TREE_CODE (expr) == ADDR_EXPR)
1807 if (ctx == ISRA_CTX_ARG)
1808 return;
1809 tree t = get_base_address (TREE_OPERAND (expr, 0));
1810 if (VAR_P (t) && !TREE_STATIC (t))
1811 loaded_decls->add (t);
1812 return;
1814 if (TREE_CODE (expr) == SSA_NAME
1815 || CONSTANT_CLASS_P (expr))
1816 return;
1818 if (TREE_CODE (expr) == BIT_FIELD_REF
1819 || TREE_CODE (expr) == IMAGPART_EXPR
1820 || TREE_CODE (expr) == REALPART_EXPR)
1821 expr = TREE_OPERAND (expr, 0);
1823 base = get_ref_base_and_extent (expr, &poffset, &psize, &pmax_size, &reverse);
1825 if (TREE_CODE (base) == MEM_REF)
1827 tree op = TREE_OPERAND (base, 0);
1828 if (TREE_CODE (op) != SSA_NAME
1829 || !SSA_NAME_IS_DEFAULT_DEF (op))
1830 return;
1831 base = SSA_NAME_VAR (op);
1832 if (!base)
1833 return;
1834 deref = true;
1836 else if (VAR_P (base)
1837 && !TREE_STATIC (base)
1838 && (ctx == ISRA_CTX_ARG
1839 || ctx == ISRA_CTX_LOAD))
1840 loaded_decls->add (base);
1842 if (TREE_CODE (base) != PARM_DECL)
1843 return;
1845 gensum_param_desc *desc = get_gensum_param_desc (base);
1846 if (!desc || !desc->split_candidate)
1847 return;
1849 if (!poffset.is_constant (&offset)
1850 || !psize.is_constant (&size)
1851 || !pmax_size.is_constant (&max_size))
1853 disqualify_split_candidate (desc, "Encountered a polynomial-sized "
1854 "access.");
1855 return;
1857 if (size < 0 || size != max_size)
1859 disqualify_split_candidate (desc, "Encountered a variable sized access.");
1860 return;
1862 if (TREE_CODE (expr) == COMPONENT_REF
1863 && DECL_BIT_FIELD (TREE_OPERAND (expr, 1)))
1865 disqualify_split_candidate (desc, "Encountered a bit-field access.");
1866 return;
1868 if (offset < 0)
1870 disqualify_split_candidate (desc, "Encountered an access at a "
1871 "negative offset.");
1872 return;
1874 gcc_assert ((offset % BITS_PER_UNIT) == 0);
1875 gcc_assert ((size % BITS_PER_UNIT) == 0);
1876 if ((offset / BITS_PER_UNIT) >= (UINT_MAX - ISRA_ARG_SIZE_LIMIT)
1877 || (size / BITS_PER_UNIT) >= ISRA_ARG_SIZE_LIMIT)
1879 disqualify_split_candidate (desc, "Encountered an access with too big "
1880 "offset or size");
1881 return;
1884 tree type = TREE_TYPE (expr);
1885 unsigned int exp_align = get_object_alignment (expr);
1887 if (exp_align < TYPE_ALIGN (type))
1889 disqualify_split_candidate (desc, "Underaligned access.");
1890 return;
1893 if (deref)
1895 if (!desc->by_ref)
1897 disqualify_split_candidate (desc, "Dereferencing a non-reference.");
1898 return;
1900 else if (ctx == ISRA_CTX_STORE)
1902 disqualify_split_candidate (desc, "Storing to data passed by "
1903 "reference.");
1904 return;
1907 if (!aa_walking_limit)
1909 disqualify_split_candidate (desc, "Out of alias analysis step "
1910 "limit.");
1911 return;
1914 gcc_checking_assert (gimple_vuse (stmt));
1915 bool maybe_modified = false;
1916 ao_ref ar;
1918 ao_ref_init (&ar, expr);
1919 bitmap visited = BITMAP_ALLOC (NULL);
1920 int walked = walk_aliased_vdefs (&ar, gimple_vuse (stmt),
1921 mark_maybe_modified, &maybe_modified,
1922 &visited, NULL, aa_walking_limit);
1923 BITMAP_FREE (visited);
1924 if (walked > 0)
1926 gcc_assert (aa_walking_limit > walked);
1927 aa_walking_limit = aa_walking_limit - walked;
1929 if (walked < 0)
1930 aa_walking_limit = 0;
1931 if (maybe_modified || walked < 0)
1933 disqualify_split_candidate (desc, "Data passed by reference possibly "
1934 "modified through an alias.");
1935 return;
1937 else
1938 mark_param_dereference (desc, offset + size, bb);
1940 else
1941 /* Pointer parameters with direct uses should have been ruled out by
1942 analyzing SSA default def when looking at the parameters. */
1943 gcc_assert (!desc->by_ref);
1945 gensum_param_access *access = get_access (desc, offset, size, ctx);
1946 if (!access)
1947 return;
1949 if (ctx == ISRA_CTX_ARG)
1951 gcc_checking_assert (call_info);
1953 if (!deref)
1954 record_nonregister_call_use (desc, call_info, offset / BITS_PER_UNIT,
1955 size / BITS_PER_UNIT);
1956 else
1957 /* This is not a pass-through of a pointer, this is a use like any
1958 other. */
1959 access->nonarg = true;
1961 else if (ctx == ISRA_CTX_LOAD && bb->count.initialized_p ())
1962 access->load_count += bb->count;
1964 if (!access->type)
1966 access->type = type;
1967 access->alias_ptr_type = reference_alias_ptr_type (expr);
1968 access->reverse = reverse;
1970 else
1972 if (exp_align < TYPE_ALIGN (access->type))
1974 disqualify_split_candidate (desc, "Reference has lower alignment "
1975 "than a previous one.");
1976 return;
1978 if (access->alias_ptr_type != reference_alias_ptr_type (expr))
1980 disqualify_split_candidate (desc, "Multiple alias pointer types.");
1981 return;
1983 if (access->reverse != reverse)
1985 disqualify_split_candidate (desc, "Both normal and reverse "
1986 "scalar storage order.");
1987 return;
1989 if (!deref
1990 && (AGGREGATE_TYPE_P (type) || AGGREGATE_TYPE_P (access->type))
1991 && (TYPE_MAIN_VARIANT (access->type) != TYPE_MAIN_VARIANT (type)))
1993 /* We need the same aggregate type on all accesses to be able to
1994 distinguish transformation spots from pass-through arguments in
1995 the transformation phase. */
1996 disqualify_split_candidate (desc, "We do not support aggregate "
1997 "type punning.");
1998 return;
2001 if (type_prevails_p (access->type, type))
2002 access->type = type;
2006 /* Scan body function described by NODE and FUN and create access trees for
2007 parameters. */
2009 static void
2010 scan_function (cgraph_node *node, struct function *fun)
2012 basic_block bb;
2014 FOR_EACH_BB_FN (bb, fun)
2016 gimple_stmt_iterator gsi;
2017 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2019 gimple *stmt = gsi_stmt (gsi);
2021 if (final_bbs && stmt_can_throw_external (fun, stmt))
2022 bitmap_set_bit (final_bbs, bb->index);
2023 switch (gimple_code (stmt))
2025 case GIMPLE_RETURN:
2027 tree t = gimple_return_retval (as_a <greturn *> (stmt));
2028 if (t != NULL_TREE)
2029 scan_expr_access (t, stmt, ISRA_CTX_LOAD, bb);
2030 if (final_bbs)
2031 bitmap_set_bit (final_bbs, bb->index);
2033 break;
2035 case GIMPLE_ASSIGN:
2036 if (gimple_assign_single_p (stmt)
2037 && !gimple_clobber_p (stmt))
2039 tree rhs = gimple_assign_rhs1 (stmt);
2040 scan_expr_access (rhs, stmt, ISRA_CTX_LOAD, bb);
2041 tree lhs = gimple_assign_lhs (stmt);
2042 scan_expr_access (lhs, stmt, ISRA_CTX_STORE, bb);
2044 break;
2046 case GIMPLE_CALL:
2048 unsigned argument_count = gimple_call_num_args (stmt);
2049 isra_scan_context ctx = ISRA_CTX_ARG;
2050 scan_call_info call_info, *call_info_p = &call_info;
2051 if (gimple_call_internal_p (stmt))
2053 call_info_p = NULL;
2054 ctx = ISRA_CTX_LOAD;
2055 internal_fn ifn = gimple_call_internal_fn (stmt);
2056 if (internal_store_fn_p (ifn))
2057 ctx = ISRA_CTX_STORE;
2059 else
2061 call_info.cs = node->get_edge (stmt);
2062 call_info.argument_count = argument_count;
2065 for (unsigned i = 0; i < argument_count; i++)
2067 call_info.arg_idx = i;
2068 scan_expr_access (gimple_call_arg (stmt, i), stmt,
2069 ctx, bb, call_info_p);
2072 tree lhs = gimple_call_lhs (stmt);
2073 if (lhs)
2074 scan_expr_access (lhs, stmt, ISRA_CTX_STORE, bb);
2075 int flags = gimple_call_flags (stmt);
2076 if (final_bbs
2077 && (((flags & (ECF_CONST | ECF_PURE)) == 0)
2078 || (flags & ECF_LOOPING_CONST_OR_PURE)))
2079 bitmap_set_bit (final_bbs, bb->index);
2081 break;
2083 case GIMPLE_ASM:
2085 gasm *asm_stmt = as_a <gasm *> (stmt);
2086 walk_stmt_load_store_addr_ops (asm_stmt, NULL, NULL, NULL,
2087 asm_visit_addr);
2088 if (final_bbs)
2089 bitmap_set_bit (final_bbs, bb->index);
2091 for (unsigned i = 0; i < gimple_asm_ninputs (asm_stmt); i++)
2093 tree t = TREE_VALUE (gimple_asm_input_op (asm_stmt, i));
2094 scan_expr_access (t, stmt, ISRA_CTX_LOAD, bb);
2096 for (unsigned i = 0; i < gimple_asm_noutputs (asm_stmt); i++)
2098 tree t = TREE_VALUE (gimple_asm_output_op (asm_stmt, i));
2099 scan_expr_access (t, stmt, ISRA_CTX_STORE, bb);
2102 break;
2104 default:
2105 break;
2111 /* Return true if SSA_NAME NAME of function described by FUN is only used in
2112 return statements, or if results of any operations it is involved in are
2113 only used in return statements. ANALYZED is a bitmap that tracks which SSA
2114 names we have already started investigating. */
2116 static bool
2117 ssa_name_only_returned_p (function *fun, tree name, bitmap analyzed)
2119 bool res = true;
2120 imm_use_iterator imm_iter;
2121 gimple *stmt;
2123 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, name)
2125 if (is_gimple_debug (stmt))
2126 continue;
2128 if (gimple_code (stmt) == GIMPLE_RETURN)
2130 tree t = gimple_return_retval (as_a <greturn *> (stmt));
2131 if (t != name)
2133 res = false;
2134 break;
2137 else if (!stmt_unremovable_because_of_non_call_eh_p (fun, stmt)
2138 && ((is_gimple_assign (stmt) && !gimple_has_volatile_ops (stmt))
2139 || gimple_code (stmt) == GIMPLE_PHI))
2141 /* TODO: And perhaps for const function calls too? */
2142 tree lhs;
2143 if (gimple_code (stmt) == GIMPLE_PHI)
2144 lhs = gimple_phi_result (stmt);
2145 else
2146 lhs = gimple_assign_lhs (stmt);
2148 if (TREE_CODE (lhs) != SSA_NAME)
2150 res = false;
2151 break;
2153 gcc_assert (!gimple_vdef (stmt));
2154 if (bitmap_set_bit (analyzed, SSA_NAME_VERSION (lhs))
2155 && !ssa_name_only_returned_p (fun, lhs, analyzed))
2157 res = false;
2158 break;
2161 else
2163 res = false;
2164 break;
2167 return res;
2170 /* Inspect the uses of the return value of the call associated with CS, and if
2171 it is not used or if it is only used to construct the return value of the
2172 caller, mark it as such in call or caller summary. Also check for
2173 misaligned arguments. */
2175 static void
2176 isra_analyze_call (cgraph_edge *cs)
2178 gcall *call_stmt = cs->call_stmt;
2179 unsigned count = gimple_call_num_args (call_stmt);
2180 isra_call_summary *csum = call_sums->get_create (cs);
2182 for (unsigned i = 0; i < count; i++)
2184 tree arg = gimple_call_arg (call_stmt, i);
2185 if (TREE_CODE (arg) == ADDR_EXPR)
2187 poly_int64 poffset, psize, pmax_size;
2188 bool reverse;
2189 tree base = get_ref_base_and_extent (TREE_OPERAND (arg, 0), &poffset,
2190 &psize, &pmax_size, &reverse);
2191 HOST_WIDE_INT offset;
2192 unsigned HOST_WIDE_INT ds;
2193 if (DECL_P (base)
2194 && (poffset.is_constant (&offset))
2195 && tree_fits_uhwi_p (DECL_SIZE (base))
2196 && ((ds = tree_to_uhwi (DECL_SIZE (base)) - offset)
2197 < ISRA_ARG_SIZE_LIMIT * BITS_PER_UNIT))
2199 csum->init_inputs (count);
2200 gcc_assert (!csum->m_arg_flow[i].aggregate_pass_through);
2201 csum->m_arg_flow[i].unit_size = ds / BITS_PER_UNIT;
2204 if (TREE_CODE (base) == VAR_DECL
2205 && !TREE_STATIC (base)
2206 && !loaded_decls->contains (base))
2208 csum->init_inputs (count);
2209 csum->m_arg_flow[i].constructed_for_calls = true;
2213 if (is_gimple_reg (arg))
2214 continue;
2216 tree offset;
2217 poly_int64 bitsize, bitpos;
2218 machine_mode mode;
2219 int unsignedp, reversep, volatilep = 0;
2220 get_inner_reference (arg, &bitsize, &bitpos, &offset, &mode,
2221 &unsignedp, &reversep, &volatilep);
2222 if (!multiple_p (bitpos, BITS_PER_UNIT))
2224 csum->m_bit_aligned_arg = true;
2225 break;
2229 tree lhs = gimple_call_lhs (call_stmt);
2230 if (lhs)
2232 /* TODO: Also detect aggregates on a LHS of a call that are only returned
2233 from this function (without being read anywhere). */
2234 if (TREE_CODE (lhs) == SSA_NAME)
2236 bitmap analyzed = BITMAP_ALLOC (NULL);
2237 if (ssa_name_only_returned_p (DECL_STRUCT_FUNCTION (cs->caller->decl),
2238 lhs, analyzed))
2239 csum->m_return_returned = true;
2240 BITMAP_FREE (analyzed);
2243 else
2244 csum->m_return_ignored = true;
2247 /* Look at all calls going out of NODE, described also by IFS and perform all
2248 analyses necessary for IPA-SRA that are not done at body scan time or done
2249 even when body is not scanned because the function is not a candidate. */
2251 static void
2252 isra_analyze_all_outgoing_calls (cgraph_node *node)
2254 for (cgraph_edge *cs = node->callees; cs; cs = cs->next_callee)
2255 isra_analyze_call (cs);
2256 for (cgraph_edge *cs = node->indirect_calls; cs; cs = cs->next_callee)
2257 isra_analyze_call (cs);
2260 /* Dump a dereferences table with heading STR to file F. */
2262 static void
2263 dump_dereferences_table (FILE *f, struct function *fun, const char *str)
2265 basic_block bb;
2267 fprintf (dump_file, "%s", str);
2268 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (fun),
2269 EXIT_BLOCK_PTR_FOR_FN (fun), next_bb)
2271 fprintf (f, "%4i %i ", bb->index, bitmap_bit_p (final_bbs, bb->index));
2272 if (bb != EXIT_BLOCK_PTR_FOR_FN (fun))
2274 int i;
2275 for (i = 0; i < unsafe_by_ref_count; i++)
2277 int idx = bb->index * unsafe_by_ref_count + i;
2278 fprintf (f, " %4" HOST_WIDE_INT_PRINT "d", bb_dereferences[idx]);
2281 fprintf (f, "\n");
2283 fprintf (dump_file, "\n");
2286 /* Propagate distances in bb_dereferences in the opposite direction than the
2287 control flow edges, in each step storing the maximum of the current value
2288 and the minimum of all successors. These steps are repeated until the table
2289 stabilizes. Note that BBs which might terminate the functions (according to
2290 final_bbs bitmap) never updated in this way. */
2292 static void
2293 propagate_dereference_distances (struct function *fun)
2295 basic_block bb;
2297 if (dump_file && (dump_flags & TDF_DETAILS))
2298 dump_dereferences_table (dump_file, fun,
2299 "Dereference table before propagation:\n");
2301 auto_vec<basic_block> queue (last_basic_block_for_fn (fun));
2302 queue.quick_push (ENTRY_BLOCK_PTR_FOR_FN (fun));
2303 FOR_EACH_BB_FN (bb, fun)
2305 queue.quick_push (bb);
2306 bb->aux = bb;
2309 while (!queue.is_empty ())
2311 edge_iterator ei;
2312 edge e;
2313 bool change = false;
2314 int i;
2316 bb = queue.pop ();
2317 bb->aux = NULL;
2319 if (bitmap_bit_p (final_bbs, bb->index))
2320 continue;
2322 for (i = 0; i < unsafe_by_ref_count; i++)
2324 int idx = bb->index * unsafe_by_ref_count + i;
2325 bool first = true;
2326 HOST_WIDE_INT inh = 0;
2328 FOR_EACH_EDGE (e, ei, bb->succs)
2330 int succ_idx = e->dest->index * unsafe_by_ref_count + i;
2332 if (e->dest == EXIT_BLOCK_PTR_FOR_FN (fun))
2333 continue;
2335 if (first)
2337 first = false;
2338 inh = bb_dereferences [succ_idx];
2340 else if (bb_dereferences [succ_idx] < inh)
2341 inh = bb_dereferences [succ_idx];
2344 if (!first && bb_dereferences[idx] < inh)
2346 bb_dereferences[idx] = inh;
2347 change = true;
2351 if (change)
2352 FOR_EACH_EDGE (e, ei, bb->preds)
2354 if (e->src->aux)
2355 continue;
2357 e->src->aux = e->src;
2358 queue.quick_push (e->src);
2362 if (dump_file && (dump_flags & TDF_DETAILS))
2363 dump_dereferences_table (dump_file, fun,
2364 "Dereference table after propagation:\n");
2367 /* Return true if the ACCESS loads happen frequently enough in FUN to risk
2368 moving them to the caller and only pass the result. */
2370 static bool
2371 dereference_probable_p (struct function *fun, gensum_param_access *access)
2373 int threshold = opt_for_fn (fun->decl, param_ipa_sra_deref_prob_threshold);
2374 return access->load_count
2375 >= ENTRY_BLOCK_PTR_FOR_FN (fun)->count.apply_scale (threshold, 100);
2378 /* Perform basic checks on ACCESS to PARM (of FUN) described by DESC and all
2379 its children, return true if the parameter cannot be split, otherwise return
2380 false and update *NONARG_ACC_SIZE and *ONLY_CALLS. ENTRY_BB_INDEX must be
2381 the index of the entry BB in the function of PARM. */
2383 static bool
2384 check_gensum_access (struct function *fun, tree parm, gensum_param_desc *desc,
2385 gensum_param_access *access,
2386 HOST_WIDE_INT *nonarg_acc_size, bool *only_calls,
2387 int entry_bb_index)
2389 if (access->nonarg)
2391 *only_calls = false;
2392 *nonarg_acc_size += access->size;
2394 if (access->first_child)
2396 disqualify_split_candidate (desc, "Overlapping non-call uses.");
2397 return true;
2400 /* Do not decompose a non-BLKmode param in a way that would create
2401 BLKmode params. Especially for by-reference passing (thus,
2402 pointer-type param) this is hardly worthwhile. */
2403 if (DECL_MODE (parm) != BLKmode
2404 && TYPE_MODE (access->type) == BLKmode)
2406 disqualify_split_candidate (desc, "Would convert a non-BLK to a BLK.");
2407 return true;
2410 if (desc->by_ref)
2412 if (desc->safe_ref)
2414 if (!dereference_probable_p (fun, access))
2416 disqualify_split_candidate (desc, "Dereferences in callers "
2417 "would happen much more frequently.");
2418 return true;
2421 else
2423 int idx = (entry_bb_index * unsafe_by_ref_count + desc->deref_index);
2424 if ((access->offset + access->size) > bb_dereferences[idx])
2426 if (!dereference_probable_p (fun, access))
2428 disqualify_split_candidate (desc, "Would create a possibly "
2429 "illegal dereference in a "
2430 "caller.");
2431 return true;
2433 desc->conditionally_dereferenceable = true;
2438 for (gensum_param_access *ch = access->first_child;
2440 ch = ch->next_sibling)
2441 if (check_gensum_access (fun, parm, desc, ch, nonarg_acc_size, only_calls,
2442 entry_bb_index))
2443 return true;
2445 return false;
2448 /* Copy data from FROM and all of its children to a vector of accesses in IPA
2449 descriptor DESC. */
2451 static void
2452 copy_accesses_to_ipa_desc (gensum_param_access *from, isra_param_desc *desc)
2454 param_access *to = ggc_cleared_alloc<param_access> ();
2455 gcc_checking_assert ((from->offset % BITS_PER_UNIT) == 0);
2456 gcc_checking_assert ((from->size % BITS_PER_UNIT) == 0);
2457 to->unit_offset = from->offset / BITS_PER_UNIT;
2458 to->unit_size = from->size / BITS_PER_UNIT;
2459 to->type = from->type;
2460 to->alias_ptr_type = from->alias_ptr_type;
2461 to->certain = from->nonarg;
2462 to->reverse = from->reverse;
2463 vec_safe_push (desc->accesses, to);
2465 for (gensum_param_access *ch = from->first_child;
2467 ch = ch->next_sibling)
2468 copy_accesses_to_ipa_desc (ch, desc);
2471 /* Analyze function body scan results stored in param_accesses and
2472 param_accesses, detect possible transformations and store information of
2473 those in function summary. NODE, FUN and IFS are all various structures
2474 describing the currently analyzed function. */
2476 static void
2477 process_scan_results (cgraph_node *node, struct function *fun,
2478 isra_func_summary *ifs,
2479 vec<gensum_param_desc> *param_descriptions)
2481 bool check_pass_throughs = false;
2482 bool dereferences_propagated = false;
2483 tree parm = DECL_ARGUMENTS (node->decl);
2484 unsigned param_count = param_descriptions->length();
2486 for (unsigned desc_index = 0;
2487 desc_index < param_count;
2488 desc_index++, parm = DECL_CHAIN (parm))
2490 gensum_param_desc *desc = &(*param_descriptions)[desc_index];
2491 if (!desc->split_candidate)
2492 continue;
2494 if (flag_checking)
2495 isra_verify_access_tree (desc->accesses);
2497 if (!dereferences_propagated
2498 && desc->by_ref
2499 && !desc->safe_ref
2500 && desc->accesses)
2502 propagate_dereference_distances (fun);
2503 dereferences_propagated = true;
2506 HOST_WIDE_INT nonarg_acc_size = 0;
2507 bool only_calls = true;
2508 bool check_failed = false;
2510 int entry_bb_index = ENTRY_BLOCK_PTR_FOR_FN (fun)->index;
2511 for (gensum_param_access *acc = desc->accesses;
2512 acc;
2513 acc = acc->next_sibling)
2514 if (check_gensum_access (fun, parm, desc, acc, &nonarg_acc_size,
2515 &only_calls, entry_bb_index))
2517 check_failed = true;
2518 break;
2520 if (check_failed)
2521 continue;
2523 if (only_calls)
2524 desc->locally_unused = true;
2526 HOST_WIDE_INT cur_param_size
2527 = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (parm)));
2528 HOST_WIDE_INT param_size_limit, optimistic_limit;
2529 if (!desc->by_ref || optimize_function_for_size_p (fun))
2531 param_size_limit = cur_param_size;
2532 optimistic_limit = cur_param_size;
2534 else
2536 param_size_limit
2537 = opt_for_fn (node->decl,
2538 param_ipa_sra_ptr_growth_factor) * cur_param_size;
2539 optimistic_limit
2540 = (opt_for_fn (node->decl, param_ipa_sra_ptrwrap_growth_factor)
2541 * param_size_limit);
2544 if (nonarg_acc_size > optimistic_limit
2545 || (!desc->by_ref && nonarg_acc_size == param_size_limit))
2547 disqualify_split_candidate (desc, "Would result into a too big set "
2548 "of replacements even in best "
2549 "scenarios.");
2551 else
2553 /* create_parameter_descriptors makes sure unit sizes of all
2554 candidate parameters fit unsigned integers restricted to
2555 ISRA_ARG_SIZE_LIMIT. */
2556 desc->param_size_limit = param_size_limit / BITS_PER_UNIT;
2557 desc->nonarg_acc_size = nonarg_acc_size / BITS_PER_UNIT;
2558 if (desc->split_candidate && desc->ptr_pt_count)
2560 gcc_assert (desc->by_ref);
2561 check_pass_throughs = true;
2566 /* When a pointer parameter is passed-through to a callee, in which it is
2567 only used to read only one or a few items, we can attempt to transform it
2568 to obtaining and passing through the items instead of the pointer. But we
2569 must take extra care that 1) we do not introduce any segfault by moving
2570 dereferences above control flow and that 2) the data is not modified
2571 through an alias in this function. The IPA analysis must not introduce
2572 any accesses candidates unless it can prove both.
2574 The current solution is very crude as it consists of ensuring that the
2575 call postdominates entry BB and that the definition of VUSE of the call is
2576 default definition. TODO: For non-recursive callees in the same
2577 compilation unit we could do better by doing analysis in topological order
2578 an looking into access candidates of callees, using their alias_ptr_types
2579 to attempt real AA. We could also use the maximum known dereferenced
2580 offset in this function at IPA level.
2582 TODO: Measure the overhead and the effect of just being pessimistic.
2583 Maybe this is only -O3 material? */
2585 hash_map<gimple *, bool> analyzed_stmts;
2586 bitmap always_executed_bbs = NULL;
2587 if (check_pass_throughs)
2588 for (cgraph_edge *cs = node->callees; cs; cs = cs->next_callee)
2590 gcall *call_stmt = cs->call_stmt;
2591 tree vuse = gimple_vuse (call_stmt);
2593 /* If the callee is a const function, we don't get a VUSE. In such
2594 case there will be no memory accesses in the called function (or the
2595 const attribute is wrong) and then we just don't care. */
2596 bool uses_memory_as_obtained = vuse && SSA_NAME_IS_DEFAULT_DEF (vuse);
2598 unsigned count = gimple_call_num_args (call_stmt);
2599 isra_call_summary *csum = call_sums->get_create (cs);
2600 csum->init_inputs (count);
2601 csum->m_before_any_store = uses_memory_as_obtained;
2602 for (unsigned argidx = 0; argidx < count; argidx++)
2604 if (!csum->m_arg_flow[argidx].pointer_pass_through)
2605 continue;
2606 unsigned pidx
2607 = get_single_param_flow_source (&csum->m_arg_flow[argidx]);
2608 gensum_param_desc *desc = &(*param_descriptions)[pidx];
2609 if (!desc->split_candidate)
2611 csum->m_arg_flow[argidx].pointer_pass_through = false;
2612 continue;
2614 if (!uses_memory_as_obtained)
2615 continue;
2617 if (desc->safe_ref)
2619 csum->m_arg_flow[argidx].safe_to_import_accesses = true;
2620 continue;
2623 /* Walk basic block and see if its execution can terminate earlier.
2624 Keep the info for later re-use to avoid quadratic behavoiur here. */
2625 gimple_stmt_iterator gsi = gsi_for_stmt (call_stmt);
2626 bool safe = true;
2627 int n = 0;
2628 for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
2630 bool *b = analyzed_stmts.get (gsi_stmt (gsi));
2631 if (b)
2633 safe = *b;
2634 gsi_next (&gsi);
2635 break;
2637 n++;
2638 if (stmt_may_terminate_function_p (fun, gsi_stmt (gsi), false))
2640 safe = false;
2641 break;
2644 if (n)
2646 if (gsi_end_p (gsi))
2647 gsi = gsi_start_bb (gimple_bb (call_stmt));
2648 for (; gsi_stmt (gsi) != call_stmt; gsi_next (&gsi))
2649 analyzed_stmts.get_or_insert (gsi_stmt (gsi)) = safe;
2652 if (safe && !always_executed_bbs)
2654 mark_dfs_back_edges ();
2655 always_executed_bbs = find_always_executed_bbs (fun, false);
2657 if (safe && bitmap_bit_p (always_executed_bbs, gimple_bb (call_stmt)->index))
2658 csum->m_arg_flow[argidx].safe_to_import_accesses = true;
2662 BITMAP_FREE (always_executed_bbs);
2664 /* TODO: Add early exit if we disqualified everything. This also requires
2665 that we either relax the restriction that
2666 ipa_param_adjustments.m_always_copy_start must be the number of PARM_DECLs
2667 or store the number of parameters to IPA-SRA function summary and use that
2668 when just removing params. */
2670 vec_safe_reserve_exact (ifs->m_parameters, param_count);
2671 ifs->m_parameters->quick_grow_cleared (param_count);
2672 for (unsigned desc_index = 0; desc_index < param_count; desc_index++)
2674 gensum_param_desc *s = &(*param_descriptions)[desc_index];
2675 isra_param_desc *d = &(*ifs->m_parameters)[desc_index];
2677 d->param_size_limit = s->param_size_limit;
2678 d->size_reached = s->nonarg_acc_size;
2679 d->locally_unused = s->locally_unused;
2680 d->split_candidate = s->split_candidate;
2681 d->by_ref = s->by_ref;
2682 d->remove_only_when_retval_removed = s->remove_only_when_retval_removed;
2683 d->split_only_when_retval_removed = s->split_only_when_retval_removed;
2684 d->conditionally_dereferenceable = s->conditionally_dereferenceable;
2686 for (gensum_param_access *acc = s->accesses;
2687 acc;
2688 acc = acc->next_sibling)
2689 copy_accesses_to_ipa_desc (acc, d);
2692 if (dump_file)
2693 dump_isra_param_descriptors (dump_file, node->decl, ifs, false);
2696 /* Return true if there are any overlaps among certain accesses of DESC. If
2697 non-NULL, set *CERTAIN_ACCESS_PRESENT_P upon encountering a certain access
2698 too. DESC is assumed to be a split candidate that is not locally
2699 unused. */
2701 static bool
2702 overlapping_certain_accesses_p (isra_param_desc *desc,
2703 bool *certain_access_present_p)
2705 unsigned pclen = vec_safe_length (desc->accesses);
2706 for (unsigned i = 0; i < pclen; i++)
2708 param_access *a1 = (*desc->accesses)[i];
2710 if (!a1->certain)
2711 continue;
2712 if (certain_access_present_p)
2713 *certain_access_present_p = true;
2714 for (unsigned j = i + 1; j < pclen; j++)
2716 param_access *a2 = (*desc->accesses)[j];
2717 if (a2->certain
2718 && a1->unit_offset < a2->unit_offset + a2->unit_size
2719 && a1->unit_offset + a1->unit_size > a2->unit_offset)
2720 return true;
2723 return false;
2726 /* Check for any overlaps of certain param accesses among splitting candidates
2727 and signal an ICE if there are any. If CERTAIN_MUST_EXIST is set, also
2728 check that used splitting candidates have at least one certain access. */
2730 static void
2731 verify_splitting_accesses (cgraph_node *node, bool certain_must_exist)
2733 isra_func_summary *ifs = func_sums->get (node);
2734 if (!ifs || !ifs->m_candidate)
2735 return;
2736 unsigned param_count = vec_safe_length (ifs->m_parameters);
2737 for (unsigned pidx = 0; pidx < param_count; pidx++)
2739 isra_param_desc *desc = &(*ifs->m_parameters)[pidx];
2740 if (!desc->split_candidate || desc->locally_unused)
2741 continue;
2743 bool certain_access_present = !certain_must_exist;
2744 if (overlapping_certain_accesses_p (desc, &certain_access_present))
2745 internal_error ("function %qs, parameter %u, has IPA-SRA accesses "
2746 "which overlap", node->dump_name (), pidx);
2747 if (!certain_access_present)
2748 internal_error ("function %qs, parameter %u, is used but does not "
2749 "have any certain IPA-SRA access",
2750 node->dump_name (), pidx);
2754 /* Intraprocedural part of IPA-SRA analysis. Scan bodies of all functions in
2755 this compilation unit and create summary structures describing IPA-SRA
2756 opportunities and constraints in them. */
2758 static void
2759 ipa_sra_generate_summary (void)
2761 struct cgraph_node *node;
2763 gcc_checking_assert (!func_sums);
2764 gcc_checking_assert (!call_sums);
2765 func_sums
2766 = (new (ggc_alloc_no_dtor <ipa_sra_function_summaries> ())
2767 ipa_sra_function_summaries (symtab, true));
2768 call_sums = new ipa_sra_call_summaries (symtab);
2770 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
2771 ipa_sra_summarize_function (node);
2772 return;
2775 /* Write intraprocedural analysis information about E and all of its outgoing
2776 edges into a stream for LTO WPA. */
2778 static void
2779 isra_write_edge_summary (output_block *ob, cgraph_edge *e)
2781 isra_call_summary *csum = call_sums->get (e);
2782 unsigned input_count = csum->m_arg_flow.length ();
2783 streamer_write_uhwi (ob, input_count);
2784 for (unsigned i = 0; i < input_count; i++)
2786 isra_param_flow *ipf = &csum->m_arg_flow[i];
2787 streamer_write_hwi (ob, ipf->length);
2788 bitpack_d bp = bitpack_create (ob->main_stream);
2789 for (int j = 0; j < ipf->length; j++)
2790 bp_pack_value (&bp, ipf->inputs[j], 8);
2791 bp_pack_value (&bp, ipf->aggregate_pass_through, 1);
2792 bp_pack_value (&bp, ipf->pointer_pass_through, 1);
2793 bp_pack_value (&bp, ipf->safe_to_import_accesses, 1);
2794 bp_pack_value (&bp, ipf->constructed_for_calls, 1);
2795 streamer_write_bitpack (&bp);
2796 streamer_write_uhwi (ob, ipf->unit_offset);
2797 streamer_write_uhwi (ob, ipf->unit_size);
2799 bitpack_d bp = bitpack_create (ob->main_stream);
2800 bp_pack_value (&bp, csum->m_return_ignored, 1);
2801 bp_pack_value (&bp, csum->m_return_returned, 1);
2802 bp_pack_value (&bp, csum->m_bit_aligned_arg, 1);
2803 bp_pack_value (&bp, csum->m_before_any_store, 1);
2804 streamer_write_bitpack (&bp);
2807 /* Write intraprocedural analysis information about NODE and all of its outgoing
2808 edges into a stream for LTO WPA. */
2810 static void
2811 isra_write_node_summary (output_block *ob, cgraph_node *node)
2813 isra_func_summary *ifs = func_sums->get (node);
2814 lto_symtab_encoder_t encoder = ob->decl_state->symtab_node_encoder;
2815 int node_ref = lto_symtab_encoder_encode (encoder, node);
2816 streamer_write_uhwi (ob, node_ref);
2818 unsigned param_desc_count = vec_safe_length (ifs->m_parameters);
2819 streamer_write_uhwi (ob, param_desc_count);
2820 for (unsigned i = 0; i < param_desc_count; i++)
2822 isra_param_desc *desc = &(*ifs->m_parameters)[i];
2823 unsigned access_count = vec_safe_length (desc->accesses);
2824 streamer_write_uhwi (ob, access_count);
2825 for (unsigned j = 0; j < access_count; j++)
2827 param_access *acc = (*desc->accesses)[j];
2828 stream_write_tree (ob, acc->type, true);
2829 stream_write_tree (ob, acc->alias_ptr_type, true);
2830 streamer_write_uhwi (ob, acc->unit_offset);
2831 streamer_write_uhwi (ob, acc->unit_size);
2832 bitpack_d bp = bitpack_create (ob->main_stream);
2833 bp_pack_value (&bp, acc->certain, 1);
2834 bp_pack_value (&bp, acc->reverse, 1);
2835 streamer_write_bitpack (&bp);
2837 streamer_write_uhwi (ob, desc->param_size_limit);
2838 streamer_write_uhwi (ob, desc->size_reached);
2839 gcc_assert (desc->safe_size == 0);
2840 bitpack_d bp = bitpack_create (ob->main_stream);
2841 bp_pack_value (&bp, desc->locally_unused, 1);
2842 bp_pack_value (&bp, desc->split_candidate, 1);
2843 bp_pack_value (&bp, desc->by_ref, 1);
2844 gcc_assert (!desc->not_specially_constructed);
2845 bp_pack_value (&bp, desc->remove_only_when_retval_removed, 1);
2846 bp_pack_value (&bp, desc->split_only_when_retval_removed, 1);
2847 bp_pack_value (&bp, desc->conditionally_dereferenceable, 1);
2848 gcc_assert (!desc->safe_size_set);
2849 streamer_write_bitpack (&bp);
2851 bitpack_d bp = bitpack_create (ob->main_stream);
2852 bp_pack_value (&bp, ifs->m_candidate, 1);
2853 bp_pack_value (&bp, ifs->m_returns_value, 1);
2854 bp_pack_value (&bp, ifs->m_return_ignored, 1);
2855 gcc_assert (!ifs->m_queued);
2856 streamer_write_bitpack (&bp);
2858 for (cgraph_edge *e = node->callees; e; e = e->next_callee)
2859 isra_write_edge_summary (ob, e);
2860 for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee)
2861 isra_write_edge_summary (ob, e);
2864 /* Write intraprocedural analysis information into a stream for LTO WPA. */
2866 static void
2867 ipa_sra_write_summary (void)
2869 if (!func_sums || !call_sums)
2870 return;
2872 struct output_block *ob = create_output_block (LTO_section_ipa_sra);
2873 lto_symtab_encoder_t encoder = ob->decl_state->symtab_node_encoder;
2874 ob->symbol = NULL;
2876 unsigned int count = 0;
2877 lto_symtab_encoder_iterator lsei;
2878 for (lsei = lsei_start_function_in_partition (encoder);
2879 !lsei_end_p (lsei);
2880 lsei_next_function_in_partition (&lsei))
2882 cgraph_node *node = lsei_cgraph_node (lsei);
2883 if (node->has_gimple_body_p ()
2884 && func_sums->get (node) != NULL)
2885 count++;
2887 streamer_write_uhwi (ob, count);
2889 /* Process all of the functions. */
2890 for (lsei = lsei_start_function_in_partition (encoder); !lsei_end_p (lsei);
2891 lsei_next_function_in_partition (&lsei))
2893 cgraph_node *node = lsei_cgraph_node (lsei);
2894 if (node->has_gimple_body_p ()
2895 && func_sums->get (node) != NULL)
2896 isra_write_node_summary (ob, node);
2898 streamer_write_char_stream (ob->main_stream, 0);
2899 produce_asm (ob, NULL);
2900 destroy_output_block (ob);
2903 /* Read intraprocedural analysis information about E and all of its outgoing
2904 edges into a stream for LTO WPA. */
2906 static void
2907 isra_read_edge_summary (struct lto_input_block *ib, cgraph_edge *cs)
2909 isra_call_summary *csum = call_sums->get_create (cs);
2910 unsigned input_count = streamer_read_uhwi (ib);
2911 csum->init_inputs (input_count);
2912 for (unsigned i = 0; i < input_count; i++)
2914 isra_param_flow *ipf = &csum->m_arg_flow[i];
2915 ipf->length = streamer_read_hwi (ib);
2916 bitpack_d bp = streamer_read_bitpack (ib);
2917 for (int j = 0; j < ipf->length; j++)
2918 ipf->inputs[j] = bp_unpack_value (&bp, 8);
2919 ipf->aggregate_pass_through = bp_unpack_value (&bp, 1);
2920 ipf->pointer_pass_through = bp_unpack_value (&bp, 1);
2921 ipf->safe_to_import_accesses = bp_unpack_value (&bp, 1);
2922 ipf->constructed_for_calls = bp_unpack_value (&bp, 1);
2923 ipf->unit_offset = streamer_read_uhwi (ib);
2924 ipf->unit_size = streamer_read_uhwi (ib);
2926 bitpack_d bp = streamer_read_bitpack (ib);
2927 csum->m_return_ignored = bp_unpack_value (&bp, 1);
2928 csum->m_return_returned = bp_unpack_value (&bp, 1);
2929 csum->m_bit_aligned_arg = bp_unpack_value (&bp, 1);
2930 csum->m_before_any_store = bp_unpack_value (&bp, 1);
2933 /* Read intraprocedural analysis information about NODE and all of its outgoing
2934 edges into a stream for LTO WPA. */
2936 static void
2937 isra_read_node_info (struct lto_input_block *ib, cgraph_node *node,
2938 struct data_in *data_in)
2940 isra_func_summary *ifs = func_sums->get_create (node);
2941 unsigned param_desc_count = streamer_read_uhwi (ib);
2942 if (param_desc_count > 0)
2944 vec_safe_reserve_exact (ifs->m_parameters, param_desc_count);
2945 ifs->m_parameters->quick_grow_cleared (param_desc_count);
2947 for (unsigned i = 0; i < param_desc_count; i++)
2949 isra_param_desc *desc = &(*ifs->m_parameters)[i];
2950 unsigned access_count = streamer_read_uhwi (ib);
2951 for (unsigned j = 0; j < access_count; j++)
2953 param_access *acc = ggc_cleared_alloc<param_access> ();
2954 acc->type = stream_read_tree (ib, data_in);
2955 acc->alias_ptr_type = stream_read_tree (ib, data_in);
2956 acc->unit_offset = streamer_read_uhwi (ib);
2957 acc->unit_size = streamer_read_uhwi (ib);
2958 bitpack_d bp = streamer_read_bitpack (ib);
2959 acc->certain = bp_unpack_value (&bp, 1);
2960 acc->reverse = bp_unpack_value (&bp, 1);
2961 vec_safe_push (desc->accesses, acc);
2963 desc->param_size_limit = streamer_read_uhwi (ib);
2964 desc->size_reached = streamer_read_uhwi (ib);
2965 desc->safe_size = 0;
2966 bitpack_d bp = streamer_read_bitpack (ib);
2967 desc->locally_unused = bp_unpack_value (&bp, 1);
2968 desc->split_candidate = bp_unpack_value (&bp, 1);
2969 desc->by_ref = bp_unpack_value (&bp, 1);
2970 desc->not_specially_constructed = 0;
2971 desc->remove_only_when_retval_removed = bp_unpack_value (&bp, 1);
2972 desc->split_only_when_retval_removed = bp_unpack_value (&bp, 1);
2973 desc->conditionally_dereferenceable = bp_unpack_value (&bp, 1);
2974 desc->safe_size_set = 0;
2976 bitpack_d bp = streamer_read_bitpack (ib);
2977 ifs->m_candidate = bp_unpack_value (&bp, 1);
2978 ifs->m_returns_value = bp_unpack_value (&bp, 1);
2979 ifs->m_return_ignored = bp_unpack_value (&bp, 1);
2980 ifs->m_queued = 0;
2982 for (cgraph_edge *e = node->callees; e; e = e->next_callee)
2983 isra_read_edge_summary (ib, e);
2984 for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee)
2985 isra_read_edge_summary (ib, e);
2988 /* Read IPA-SRA summaries from a section in file FILE_DATA of length LEN with
2989 data DATA. TODO: This function was copied almost verbatim from ipa-prop.cc,
2990 it should be possible to unify them somehow. */
2992 static void
2993 isra_read_summary_section (struct lto_file_decl_data *file_data,
2994 const char *data, size_t len)
2996 const struct lto_function_header *header =
2997 (const struct lto_function_header *) data;
2998 const int cfg_offset = sizeof (struct lto_function_header);
2999 const int main_offset = cfg_offset + header->cfg_size;
3000 const int string_offset = main_offset + header->main_size;
3001 struct data_in *data_in;
3002 unsigned int i;
3003 unsigned int count;
3005 lto_input_block ib_main ((const char *) data + main_offset,
3006 header->main_size, file_data);
3008 data_in =
3009 lto_data_in_create (file_data, (const char *) data + string_offset,
3010 header->string_size, vNULL);
3011 count = streamer_read_uhwi (&ib_main);
3013 for (i = 0; i < count; i++)
3015 unsigned int index;
3016 struct cgraph_node *node;
3017 lto_symtab_encoder_t encoder;
3019 index = streamer_read_uhwi (&ib_main);
3020 encoder = file_data->symtab_node_encoder;
3021 node = dyn_cast<cgraph_node *> (lto_symtab_encoder_deref (encoder,
3022 index));
3023 gcc_assert (node->definition);
3024 isra_read_node_info (&ib_main, node, data_in);
3026 lto_free_section_data (file_data, LTO_section_ipa_sra, NULL, data,
3027 len);
3028 lto_data_in_delete (data_in);
3031 /* Read intraprocedural analysis information into a stream for LTO WPA. */
3033 static void
3034 ipa_sra_read_summary (void)
3036 struct lto_file_decl_data **file_data_vec = lto_get_file_decl_data ();
3037 struct lto_file_decl_data *file_data;
3038 unsigned int j = 0;
3040 gcc_checking_assert (!func_sums);
3041 gcc_checking_assert (!call_sums);
3042 func_sums
3043 = (new (ggc_alloc_no_dtor <ipa_sra_function_summaries> ())
3044 ipa_sra_function_summaries (symtab, true));
3045 call_sums = new ipa_sra_call_summaries (symtab);
3047 while ((file_data = file_data_vec[j++]))
3049 size_t len;
3050 const char *data
3051 = lto_get_summary_section_data (file_data, LTO_section_ipa_sra, &len);
3052 if (data)
3053 isra_read_summary_section (file_data, data, len);
3057 /* Dump all IPA-SRA summary data for all cgraph nodes and edges to file F. If
3058 HINTS is true, also dump IPA-analysis computed hints. */
3060 static void
3061 ipa_sra_dump_all_summaries (FILE *f, bool hints)
3063 cgraph_node *node;
3064 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
3066 fprintf (f, "\nSummary for node %s:\n", node->dump_name ());
3068 isra_func_summary *ifs = func_sums->get (node);
3069 if (!ifs)
3070 fprintf (f, " Function does not have any associated IPA-SRA "
3071 "summary\n");
3072 else if (!ifs->m_candidate)
3073 fprintf (f, " Not a candidate function\n");
3074 else
3076 if (ifs->m_returns_value)
3077 fprintf (f, " Returns value\n");
3078 if (vec_safe_is_empty (ifs->m_parameters))
3079 fprintf (f, " No parameter information. \n");
3080 else
3081 for (unsigned i = 0; i < ifs->m_parameters->length (); ++i)
3083 fprintf (f, " Descriptor for parameter %i:\n", i);
3084 dump_isra_param_descriptor (f, &(*ifs->m_parameters)[i], hints);
3086 fprintf (f, "\n");
3089 struct cgraph_edge *cs;
3090 for (cs = node->callees; cs; cs = cs->next_callee)
3092 fprintf (f, " Summary for edge %s->%s:\n", cs->caller->dump_name (),
3093 cs->callee->dump_name ());
3094 isra_call_summary *csum = call_sums->get (cs);
3095 if (csum)
3096 csum->dump (f);
3097 else
3098 fprintf (f, " Call summary is MISSING!\n");
3102 fprintf (f, "\n\n");
3105 /* Perform function-scope viability tests that can be only made at IPA level
3106 and return false if the function is deemed unsuitable for IPA-SRA. */
3108 static bool
3109 ipa_sra_ipa_function_checks (cgraph_node *node)
3111 if (!node->can_be_local_p ())
3113 if (dump_file)
3114 fprintf (dump_file, "Function %s disqualified because it cannot be "
3115 "made local.\n", node->dump_name ());
3116 return false;
3118 if (!node->can_change_signature)
3120 if (dump_file)
3121 fprintf (dump_file, "Function can not change signature.\n");
3122 return false;
3125 return true;
3128 /* Issues found out by check_callers_for_issues. */
3130 struct caller_issues
3132 /* The candidate being considered. */
3133 cgraph_node *candidate;
3134 /* There is a thunk among callers. */
3135 bool thunk;
3136 /* Set if there is at least one caller that is OK. */
3137 bool there_is_one;
3138 /* Call site with no available information. */
3139 bool unknown_callsite;
3140 /* Call from outside the candidate's comdat group. */
3141 bool call_from_outside_comdat;
3142 /* There is a bit-aligned load into one of non-gimple-typed arguments. */
3143 bool bit_aligned_aggregate_argument;
3146 /* Worker for call_for_symbol_and_aliases, set any flags of passed caller_issues
3147 that apply. */
3149 static bool
3150 check_for_caller_issues (struct cgraph_node *node, void *data)
3152 struct caller_issues *issues = (struct caller_issues *) data;
3154 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
3156 if (cs->caller->thunk)
3158 issues->thunk = true;
3159 /* TODO: We should be able to process at least some types of
3160 thunks. */
3161 return true;
3163 if (issues->candidate->calls_comdat_local
3164 && issues->candidate->same_comdat_group
3165 && !issues->candidate->in_same_comdat_group_p (cs->caller))
3167 issues->call_from_outside_comdat = true;
3168 return true;
3171 isra_call_summary *csum = call_sums->get (cs);
3172 if (!csum)
3174 issues->unknown_callsite = true;
3175 return true;
3178 if (csum->m_bit_aligned_arg)
3179 issues->bit_aligned_aggregate_argument = true;
3181 issues->there_is_one = true;
3183 return false;
3186 /* Look at all incoming edges to NODE, including aliases and thunks and look
3187 for problems. Return true if NODE type should not be modified at all. */
3189 static bool
3190 check_all_callers_for_issues (cgraph_node *node)
3192 struct caller_issues issues;
3193 memset (&issues, 0, sizeof (issues));
3194 issues.candidate = node;
3196 node->call_for_symbol_and_aliases (check_for_caller_issues, &issues, true);
3197 if (issues.unknown_callsite)
3199 if (dump_file && (dump_flags & TDF_DETAILS))
3200 fprintf (dump_file, "A call of %s has not been analyzed. Disabling "
3201 "all modifications.\n", node->dump_name ());
3202 return true;
3204 /* TODO: We should be able to process at least some types of thunks. */
3205 if (issues.thunk)
3207 if (dump_file && (dump_flags & TDF_DETAILS))
3208 fprintf (dump_file, "A call of %s is through thunk, which are not"
3209 " handled yet. Disabling all modifications.\n",
3210 node->dump_name ());
3211 return true;
3213 if (issues.call_from_outside_comdat)
3215 if (dump_file)
3216 fprintf (dump_file, "Function would become private comdat called "
3217 "outside of its comdat group.\n");
3218 return true;
3221 if (issues.bit_aligned_aggregate_argument)
3223 /* Let's only remove parameters/return values from such functions.
3224 TODO: We could only prevent splitting the problematic parameters if
3225 anybody thinks it is worth it. */
3226 if (dump_file && (dump_flags & TDF_DETAILS))
3227 fprintf (dump_file, "A call of %s has bit-aligned aggregate argument,"
3228 " disabling parameter splitting.\n", node->dump_name ());
3230 isra_func_summary *ifs = func_sums->get (node);
3231 gcc_checking_assert (ifs);
3232 unsigned param_count = vec_safe_length (ifs->m_parameters);
3233 for (unsigned i = 0; i < param_count; i++)
3234 (*ifs->m_parameters)[i].split_candidate = false;
3236 if (!issues.there_is_one)
3238 if (dump_file && (dump_flags & TDF_DETAILS))
3239 fprintf (dump_file, "There is no call to %s that we can modify. "
3240 "Disabling all modifications.\n", node->dump_name ());
3241 return true;
3243 return false;
3246 /* Find the access with corresponding OFFSET and SIZE among accesses in
3247 PARAM_DESC and return it or NULL if such an access is not there. */
3249 static param_access *
3250 find_param_access (isra_param_desc *param_desc, unsigned offset, unsigned size)
3252 unsigned pclen = vec_safe_length (param_desc->accesses);
3254 /* The search is linear but the number of stored accesses is bound by
3255 PARAM_IPA_SRA_MAX_REPLACEMENTS, so most probably 8. */
3257 for (unsigned i = 0; i < pclen; i++)
3258 if ((*param_desc->accesses)[i]->unit_offset == offset
3259 && (*param_desc->accesses)[i]->unit_size == size)
3260 return (*param_desc->accesses)[i];
3262 return NULL;
3265 /* Return iff the total size of definite replacement SIZE would violate the
3266 limit set for it in PARAM. */
3268 static bool
3269 size_would_violate_limit_p (isra_param_desc *desc, unsigned size)
3271 unsigned limit = desc->param_size_limit;
3272 if (size > limit
3273 || (!desc->by_ref && size == limit))
3274 return true;
3275 return false;
3278 /* Increase reached size of DESC by SIZE or disqualify it if it would violate
3279 the set limit. IDX is the parameter number which is dumped when
3280 disqualifying. */
3282 static void
3283 bump_reached_size (isra_param_desc *desc, unsigned size, unsigned idx)
3285 unsigned after = desc->size_reached + size;
3286 if (size_would_violate_limit_p (desc, after))
3288 if (dump_file && (dump_flags & TDF_DETAILS))
3289 fprintf (dump_file, " ...size limit reached, disqualifying "
3290 "candidate parameter %u\n", idx);
3291 desc->split_candidate = false;
3292 return;
3294 desc->size_reached = after;
3297 /* Take all actions required to deal with an edge CS that represents a call to
3298 an unknown or un-analyzed function, for both parameter removal and
3299 splitting. */
3301 static void
3302 process_edge_to_unknown_caller (cgraph_edge *cs)
3304 isra_func_summary *from_ifs = func_sums->get (cs->caller);
3305 gcc_checking_assert (from_ifs);
3306 isra_call_summary *csum = call_sums->get (cs);
3308 if (dump_file && (dump_flags & TDF_DETAILS))
3309 fprintf (dump_file, "Processing an edge to an unknown caller from %s:\n",
3310 cs->caller->dump_name ());
3312 unsigned args_count = csum->m_arg_flow.length ();
3313 for (unsigned i = 0; i < args_count; i++)
3315 isra_param_flow *ipf = &csum->m_arg_flow[i];
3317 if (ipf->pointer_pass_through)
3319 isra_param_desc *param_desc
3320 = &(*from_ifs->m_parameters)[get_single_param_flow_source (ipf)];
3321 param_desc->locally_unused = false;
3322 param_desc->split_candidate = false;
3323 continue;
3325 if (ipf->aggregate_pass_through)
3327 unsigned idx = get_single_param_flow_source (ipf);
3328 isra_param_desc *param_desc = &(*from_ifs->m_parameters)[idx];
3330 param_desc->locally_unused = false;
3331 if (!param_desc->split_candidate)
3332 continue;
3333 gcc_assert (!param_desc->by_ref);
3334 param_access *pacc = find_param_access (param_desc, ipf->unit_offset,
3335 ipf->unit_size);
3336 gcc_checking_assert (pacc);
3337 pacc->certain = true;
3338 if (overlapping_certain_accesses_p (param_desc, NULL))
3340 if (dump_file && (dump_flags & TDF_DETAILS))
3341 fprintf (dump_file, " ...leading to overlap, "
3342 " disqualifying candidate parameter %u\n",
3343 idx);
3344 param_desc->split_candidate = false;
3346 else
3347 bump_reached_size (param_desc, pacc->unit_size, idx);
3348 ipf->aggregate_pass_through = false;
3349 continue;
3352 for (int j = 0; j < ipf->length; j++)
3354 int input_idx = ipf->inputs[j];
3355 (*from_ifs->m_parameters)[input_idx].locally_unused = false;
3360 /* Propagate parameter removal information through cross-SCC edge CS,
3361 i.e. decrease the use count in the caller parameter descriptor for each use
3362 in this call. */
3364 static void
3365 param_removal_cross_scc_edge (cgraph_edge *cs)
3367 enum availability availability;
3368 cgraph_node *callee = cs->callee->function_symbol (&availability);
3369 isra_func_summary *to_ifs = func_sums->get (callee);
3370 if (!to_ifs || !to_ifs->m_candidate
3371 || (availability < AVAIL_AVAILABLE)
3372 || vec_safe_is_empty (to_ifs->m_parameters))
3374 process_edge_to_unknown_caller (cs);
3375 return;
3377 isra_func_summary *from_ifs = func_sums->get (cs->caller);
3378 gcc_checking_assert (from_ifs);
3380 isra_call_summary *csum = call_sums->get (cs);
3381 unsigned args_count = csum->m_arg_flow.length ();
3382 unsigned param_count = vec_safe_length (to_ifs->m_parameters);
3384 for (unsigned i = 0; i < args_count; i++)
3386 bool unused_in_callee;
3387 if (i < param_count)
3388 unused_in_callee = (*to_ifs->m_parameters)[i].locally_unused;
3389 else
3390 unused_in_callee = false;
3392 if (!unused_in_callee)
3394 isra_param_flow *ipf = &csum->m_arg_flow[i];
3395 for (int j = 0; j < ipf->length; j++)
3397 int input_idx = ipf->inputs[j];
3398 (*from_ifs->m_parameters)[input_idx].locally_unused = false;
3404 /* Unless it is already there, push NODE which is also described by IFS to
3405 STACK. */
3407 static void
3408 isra_push_node_to_stack (cgraph_node *node, isra_func_summary *ifs,
3409 vec<cgraph_node *> *stack)
3411 if (!ifs->m_queued)
3413 ifs->m_queued = true;
3414 stack->safe_push (node);
3418 /* If parameter with index INPUT_IDX is marked as locally unused, mark it as
3419 used and push CALLER on STACK. */
3421 static void
3422 isra_mark_caller_param_used (isra_func_summary *from_ifs, int input_idx,
3423 cgraph_node *caller, vec<cgraph_node *> *stack)
3425 if ((*from_ifs->m_parameters)[input_idx].locally_unused)
3427 (*from_ifs->m_parameters)[input_idx].locally_unused = false;
3428 isra_push_node_to_stack (caller, from_ifs, stack);
3432 /* Combine safe_size of DESC with SIZE and return true if it has changed. */
3434 static bool
3435 update_safe_size (isra_param_desc *desc, unsigned size)
3437 if (!desc->safe_size_set)
3439 desc->safe_size_set = 1;
3440 desc->safe_size = size;
3441 return true;
3443 if (desc->safe_size <= size)
3444 return false;
3445 desc->safe_size = size;
3446 return true;
3449 /* Set all param hints in DESC to the pessimistic values. Return true if any
3450 hints that need to potentially trigger further propagation have changed. */
3452 static bool
3453 flip_all_hints_pessimistic (isra_param_desc *desc)
3455 desc->not_specially_constructed = true;
3456 return update_safe_size (desc, 0);
3459 /* Because we have not analyzed or otherwise problematic caller, go over all
3460 parameter int flags of IFS describing a call graph node of a calllee and
3461 turn them pessimistic. Return true if any hints that need to potentially
3462 trigger further propagation have changed. */
3464 static bool
3465 flip_all_param_hints_pessimistic (isra_func_summary *ifs)
3467 if (!ifs || !ifs->m_candidate)
3468 return false;
3470 bool ret = false;
3471 unsigned param_count = vec_safe_length (ifs->m_parameters);
3473 for (unsigned i = 0; i < param_count; i++)
3474 ret |= flip_all_hints_pessimistic (&(*ifs->m_parameters)[i]);
3476 return ret;
3479 /* Propagate hints accross edge CS which ultimately leads to a node described
3480 by TO_IFS. Return true if any hints of the callee which should potentially
3481 trigger further propagation have changed. */
3483 static bool
3484 propagate_param_hints_accross_call (cgraph_edge *cs, isra_func_summary *to_ifs)
3486 if (!to_ifs || !to_ifs->m_candidate)
3487 return false;
3489 isra_call_summary *csum = call_sums->get (cs);
3490 bool ret = false;
3491 unsigned args_count = csum->m_arg_flow.length ();
3492 unsigned param_count = vec_safe_length (to_ifs->m_parameters);
3494 for (unsigned i = 0; i < param_count; i++)
3496 isra_param_desc *desc = &(*to_ifs->m_parameters)[i];
3497 if (i >= args_count)
3499 ret |= flip_all_hints_pessimistic (desc);
3500 continue;
3503 if (desc->by_ref)
3505 isra_param_flow *ipf = &csum->m_arg_flow[i];
3507 if (!ipf->constructed_for_calls)
3508 desc->not_specially_constructed = true;
3510 if (ipf->pointer_pass_through)
3512 isra_func_summary *from_ifs = func_sums->get (cs->caller);
3513 int srcidx = get_single_param_flow_source (ipf);
3514 if (vec_safe_length (from_ifs->m_parameters) > (unsigned) srcidx)
3516 isra_param_desc *src_d = &(*from_ifs->m_parameters)[srcidx];
3517 if (src_d->safe_size_set)
3518 ret |= update_safe_size (desc, src_d->safe_size);
3520 else
3521 ret |= update_safe_size (desc, 0);
3523 else if (!ipf->aggregate_pass_through)
3524 ret |= update_safe_size (desc, ipf->unit_size);
3525 else
3526 /* LTOing type-mismatched programs can end up here. */
3527 ret |= update_safe_size (desc, 0);
3530 return ret;
3533 /* Propagate hints from NODE described by FROM_IFS to all its (dorect) callees,
3534 push those that may need re-visiting onto STACK. */
3536 static void
3537 propagate_hints_to_all_callees (cgraph_node *node, isra_func_summary *from_ifs,
3538 vec<cgraph_node *> *stack)
3540 for (cgraph_edge *cs = node->callees; cs; cs = cs->next_callee)
3542 enum availability availability;
3543 cgraph_node *callee = cs->callee->function_symbol (&availability);
3544 isra_func_summary *to_ifs = func_sums->get (callee);
3545 if (!from_ifs)
3547 if (flip_all_param_hints_pessimistic (to_ifs)
3548 && ipa_edge_within_scc (cs))
3549 isra_push_node_to_stack (callee, to_ifs, stack);
3551 else if (propagate_param_hints_accross_call (cs, to_ifs)
3552 && ipa_edge_within_scc (cs))
3553 isra_push_node_to_stack (callee, to_ifs, stack);
3557 /* Propagate information that any parameter is not used only locally within a
3558 SCC across CS to the caller, which must be in the same SCC as the
3559 callee. Push any callers that need to be re-processed to STACK. */
3561 static void
3562 propagate_used_across_scc_edge (cgraph_edge *cs, vec<cgraph_node *> *stack)
3564 isra_func_summary *from_ifs = func_sums->get (cs->caller);
3565 if (!from_ifs || vec_safe_is_empty (from_ifs->m_parameters))
3566 return;
3568 isra_call_summary *csum = call_sums->get (cs);
3569 gcc_checking_assert (csum);
3570 unsigned args_count = csum->m_arg_flow.length ();
3571 enum availability availability;
3572 cgraph_node *callee = cs->callee->function_symbol (&availability);
3573 isra_func_summary *to_ifs = func_sums->get (callee);
3575 unsigned param_count
3576 = (to_ifs && (availability >= AVAIL_AVAILABLE))
3577 ? vec_safe_length (to_ifs->m_parameters) : 0;
3578 for (unsigned i = 0; i < args_count; i++)
3580 if (i < param_count
3581 && (*to_ifs->m_parameters)[i].locally_unused)
3582 continue;
3584 /* The argument is needed in the callee it, we must mark the parameter as
3585 used also in the caller and its callers within this SCC. */
3586 isra_param_flow *ipf = &csum->m_arg_flow[i];
3587 for (int j = 0; j < ipf->length; j++)
3589 int input_idx = ipf->inputs[j];
3590 isra_mark_caller_param_used (from_ifs, input_idx, cs->caller, stack);
3595 /* Propagate information that any parameter is not used only locally within a
3596 SCC (i.e. is used also elsewhere) to all callers of NODE that are in the
3597 same SCC. Push any callers that need to be re-processed to STACK. */
3599 static bool
3600 propagate_used_to_scc_callers (cgraph_node *node, void *data)
3602 vec<cgraph_node *> *stack = (vec<cgraph_node *> *) data;
3603 cgraph_edge *cs;
3604 for (cs = node->callers; cs; cs = cs->next_caller)
3605 if (ipa_edge_within_scc (cs))
3606 propagate_used_across_scc_edge (cs, stack);
3607 return false;
3610 /* Return true iff all certain accesses in ARG_DESC are also present as
3611 certain accesses in PARAM_DESC. */
3613 static bool
3614 all_callee_accesses_present_p (isra_param_desc *param_desc,
3615 isra_param_desc *arg_desc)
3617 unsigned aclen = vec_safe_length (arg_desc->accesses);
3618 for (unsigned j = 0; j < aclen; j++)
3620 param_access *argacc = (*arg_desc->accesses)[j];
3621 if (!argacc->certain)
3622 continue;
3623 param_access *pacc = find_param_access (param_desc, argacc->unit_offset,
3624 argacc->unit_size);
3625 if (!pacc
3626 || !pacc->certain
3627 || !types_compatible_p (argacc->type, pacc->type))
3628 return false;
3630 return true;
3633 /* Type internal to function pull_accesses_from_callee. Unfortunately gcc 4.8
3634 does not allow instantiating an auto_vec with a type defined within a
3635 function so it is a global type. */
3636 enum acc_prop_kind {ACC_PROP_DONT, ACC_PROP_COPY, ACC_PROP_CERTAIN};
3639 /* Attempt to propagate all definite accesses from ARG_DESC to PARAM_DESC,
3640 (which belongs to CALLER) if they would not violate some constraint there.
3641 If successful, return NULL, otherwise return the string reason for failure
3642 (which can be written to the dump file). DELTA_OFFSET is the known offset
3643 of the actual argument withing the formal parameter (so of ARG_DESCS within
3644 PARAM_DESCS), ARG_SIZE is the size of the actual argument or zero, if not
3645 known. In case of success, set *CHANGE_P to true if propagation actually
3646 changed anything. */
3648 static const char *
3649 pull_accesses_from_callee (cgraph_node *caller, isra_param_desc *param_desc,
3650 isra_param_desc *arg_desc,
3651 unsigned delta_offset, unsigned arg_size,
3652 bool *change_p)
3654 unsigned pclen = vec_safe_length (param_desc->accesses);
3655 unsigned aclen = vec_safe_length (arg_desc->accesses);
3656 unsigned prop_count = 0;
3657 unsigned prop_size = 0;
3658 bool change = false;
3660 auto_vec <enum acc_prop_kind, 8> prop_kinds (aclen);
3661 for (unsigned j = 0; j < aclen; j++)
3663 param_access *argacc = (*arg_desc->accesses)[j];
3664 prop_kinds.safe_push (ACC_PROP_DONT);
3666 if (arg_size > 0
3667 && argacc->unit_offset + argacc->unit_size > arg_size)
3668 return "callee access outsize size boundary";
3670 if (!argacc->certain)
3671 continue;
3673 unsigned offset = argacc->unit_offset + delta_offset;
3674 /* Given that accesses are initially stored according to increasing
3675 offset and decreasing size in case of equal offsets, the following
3676 searches could be written more efficiently if we kept the ordering
3677 when copying. But the number of accesses is capped at
3678 PARAM_IPA_SRA_MAX_REPLACEMENTS (so most likely 8) and the code gets
3679 messy quickly, so let's improve on that only if necessary. */
3681 bool exact_match = false;
3682 for (unsigned i = 0; i < pclen; i++)
3684 /* Check for overlaps. */
3685 param_access *pacc = (*param_desc->accesses)[i];
3686 if (pacc->unit_offset == offset
3687 && pacc->unit_size == argacc->unit_size)
3689 if (argacc->alias_ptr_type != pacc->alias_ptr_type
3690 || !types_compatible_p (argacc->type, pacc->type)
3691 || argacc->reverse != pacc->reverse)
3692 return "propagated access types would not match existing ones";
3694 exact_match = true;
3695 if (!pacc->certain)
3697 prop_kinds[j] = ACC_PROP_CERTAIN;
3698 prop_size += argacc->unit_size;
3699 change = true;
3701 continue;
3704 if (offset < pacc->unit_offset + pacc->unit_size
3705 && offset + argacc->unit_size > pacc->unit_offset)
3707 /* None permissible with load accesses, possible to fit into
3708 argument ones. */
3709 if (pacc->certain
3710 || offset < pacc->unit_offset
3711 || (offset + argacc->unit_size
3712 > pacc->unit_offset + pacc->unit_size))
3713 return "a propagated access would conflict in caller";
3717 if (!exact_match)
3719 prop_kinds[j] = ACC_PROP_COPY;
3720 prop_count++;
3721 prop_size += argacc->unit_size;
3722 change = true;
3726 if (!change)
3727 return NULL;
3729 if ((prop_count + pclen
3730 > (unsigned) opt_for_fn (caller->decl, param_ipa_sra_max_replacements))
3731 || size_would_violate_limit_p (param_desc,
3732 param_desc->size_reached + prop_size))
3733 return "propagating accesses would violate the count or size limit";
3735 *change_p = true;
3736 for (unsigned j = 0; j < aclen; j++)
3738 if (prop_kinds[j] == ACC_PROP_COPY)
3740 param_access *argacc = (*arg_desc->accesses)[j];
3742 param_access *copy = ggc_cleared_alloc<param_access> ();
3743 copy->unit_offset = argacc->unit_offset + delta_offset;
3744 copy->unit_size = argacc->unit_size;
3745 copy->type = argacc->type;
3746 copy->alias_ptr_type = argacc->alias_ptr_type;
3747 copy->certain = true;
3748 copy->reverse = argacc->reverse;
3749 vec_safe_push (param_desc->accesses, copy);
3751 else if (prop_kinds[j] == ACC_PROP_CERTAIN)
3753 param_access *argacc = (*arg_desc->accesses)[j];
3754 param_access *csp
3755 = find_param_access (param_desc, argacc->unit_offset + delta_offset,
3756 argacc->unit_size);
3757 csp->certain = true;
3761 param_desc->size_reached += prop_size;
3763 return NULL;
3766 /* Propagate parameter splitting information through call graph edge CS.
3767 Return true if any changes that might need to be propagated within SCCs have
3768 been made. The function also clears the aggregate_pass_through and
3769 pointer_pass_through in call summaries which do not need to be processed
3770 again if this CS is revisited when iterating while changes are propagated
3771 within an SCC. */
3773 static bool
3774 param_splitting_across_edge (cgraph_edge *cs)
3776 bool res = false;
3777 bool cross_scc = !ipa_edge_within_scc (cs);
3778 enum availability availability;
3779 cgraph_node *callee = cs->callee->function_symbol (&availability);
3780 isra_func_summary *from_ifs = func_sums->get (cs->caller);
3781 gcc_checking_assert (from_ifs && from_ifs->m_parameters);
3783 isra_call_summary *csum = call_sums->get (cs);
3784 gcc_checking_assert (csum);
3785 unsigned args_count = csum->m_arg_flow.length ();
3786 isra_func_summary *to_ifs = func_sums->get (callee);
3787 unsigned param_count
3788 = ((to_ifs && to_ifs->m_candidate && (availability >= AVAIL_AVAILABLE))
3789 ? vec_safe_length (to_ifs->m_parameters)
3790 : 0);
3792 if (dump_file && (dump_flags & TDF_DETAILS))
3793 fprintf (dump_file, "Splitting across %s->%s:\n",
3794 cs->caller->dump_name (), callee->dump_name ());
3796 unsigned i;
3797 for (i = 0; (i < args_count) && (i < param_count); i++)
3799 isra_param_desc *arg_desc = &(*to_ifs->m_parameters)[i];
3800 isra_param_flow *ipf = &csum->m_arg_flow[i];
3802 if (arg_desc->locally_unused)
3804 if (dump_file && (dump_flags & TDF_DETAILS))
3805 fprintf (dump_file, " ->%u: unused in callee\n", i);
3806 ipf->pointer_pass_through = false;
3807 continue;
3810 if (ipf->pointer_pass_through)
3812 int idx = get_single_param_flow_source (ipf);
3813 isra_param_desc *param_desc = &(*from_ifs->m_parameters)[idx];
3814 if (!param_desc->split_candidate)
3815 continue;
3816 gcc_assert (param_desc->by_ref);
3818 if (!arg_desc->split_candidate || !arg_desc->by_ref)
3820 if (dump_file && (dump_flags & TDF_DETAILS))
3821 fprintf (dump_file, " %u->%u: not candidate or not by "
3822 "reference in callee\n", idx, i);
3823 param_desc->split_candidate = false;
3824 ipf->pointer_pass_through = false;
3825 res = true;
3827 else if (!ipf->safe_to_import_accesses)
3829 if (!csum->m_before_any_store
3830 || !all_callee_accesses_present_p (param_desc, arg_desc))
3832 if (dump_file && (dump_flags & TDF_DETAILS))
3833 fprintf (dump_file, " %u->%u: cannot import accesses.\n",
3834 idx, i);
3835 param_desc->split_candidate = false;
3836 ipf->pointer_pass_through = false;
3837 res = true;
3840 else
3842 if (dump_file && (dump_flags & TDF_DETAILS))
3843 fprintf (dump_file, " %u->%u: verified callee accesses "
3844 "present.\n", idx, i);
3845 if (cross_scc)
3846 ipf->pointer_pass_through = false;
3849 else
3851 const char *pull_failure
3852 = pull_accesses_from_callee (cs->caller, param_desc, arg_desc,
3853 0, 0, &res);
3854 if (pull_failure)
3856 if (dump_file && (dump_flags & TDF_DETAILS))
3857 fprintf (dump_file, " %u->%u: by_ref access pull "
3858 "failed: %s.\n", idx, i, pull_failure);
3859 param_desc->split_candidate = false;
3860 ipf->pointer_pass_through = false;
3861 res = true;
3863 else
3865 if (dump_file && (dump_flags & TDF_DETAILS))
3866 fprintf (dump_file, " %u->%u: by_ref access pull "
3867 "succeeded.\n", idx, i);
3868 if (cross_scc)
3869 ipf->pointer_pass_through = false;
3873 else if (ipf->aggregate_pass_through)
3875 int idx = get_single_param_flow_source (ipf);
3876 isra_param_desc *param_desc = &(*from_ifs->m_parameters)[idx];
3877 if (!param_desc->split_candidate)
3878 continue;
3879 gcc_assert (!param_desc->by_ref);
3880 param_access *pacc = find_param_access (param_desc, ipf->unit_offset,
3881 ipf->unit_size);
3882 gcc_checking_assert (pacc);
3884 if (pacc->certain)
3886 if (dump_file && (dump_flags & TDF_DETAILS))
3887 fprintf (dump_file, " %u->%u: already certain\n", idx, i);
3888 ipf->aggregate_pass_through = false;
3890 else if (!arg_desc->split_candidate || arg_desc->by_ref)
3892 if (dump_file && (dump_flags & TDF_DETAILS))
3893 fprintf (dump_file, " %u->%u: not candidate or by "
3894 "reference in callee\n", idx, i);
3896 pacc->certain = true;
3897 if (overlapping_certain_accesses_p (param_desc, NULL))
3899 if (dump_file && (dump_flags & TDF_DETAILS))
3900 fprintf (dump_file, " ...leading to overlap, "
3901 " disqualifying candidate parameter %u\n",
3902 idx);
3903 param_desc->split_candidate = false;
3905 else
3906 bump_reached_size (param_desc, pacc->unit_size, idx);
3908 ipf->aggregate_pass_through = false;
3909 res = true;
3911 else
3913 const char *pull_failure
3914 = pull_accesses_from_callee (cs->caller, param_desc, arg_desc,
3915 ipf->unit_offset,
3916 ipf->unit_size, &res);
3917 if (pull_failure)
3919 if (dump_file && (dump_flags & TDF_DETAILS))
3920 fprintf (dump_file, " %u->%u: arg access pull "
3921 "failed: %s.\n", idx, i, pull_failure);
3923 ipf->aggregate_pass_through = false;
3924 pacc->certain = true;
3926 if (overlapping_certain_accesses_p (param_desc, NULL))
3928 if (dump_file && (dump_flags & TDF_DETAILS))
3929 fprintf (dump_file, " ...leading to overlap, "
3930 " disqualifying candidate parameter %u\n",
3931 idx);
3932 param_desc->split_candidate = false;
3934 else
3935 bump_reached_size (param_desc, pacc->unit_size, idx);
3937 res = true;
3939 else
3941 if (dump_file && (dump_flags & TDF_DETAILS))
3942 fprintf (dump_file, " %u->%u: arg access pull "
3943 "succeeded.\n", idx, i);
3944 if (cross_scc)
3945 ipf->aggregate_pass_through = false;
3951 /* Handle argument-parameter count mismatches. */
3952 for (; (i < args_count); i++)
3954 isra_param_flow *ipf = &csum->m_arg_flow[i];
3956 if (ipf->pointer_pass_through || ipf->aggregate_pass_through)
3958 int idx = get_single_param_flow_source (ipf);
3959 ipf->pointer_pass_through = false;
3960 ipf->aggregate_pass_through = false;
3961 isra_param_desc *param_desc = &(*from_ifs->m_parameters)[idx];
3962 if (!param_desc->split_candidate)
3963 continue;
3965 if (dump_file && (dump_flags & TDF_DETAILS))
3966 fprintf (dump_file, " %u->%u: no corresponding formal parameter\n",
3967 idx, i);
3968 param_desc->split_candidate = false;
3969 res = true;
3972 return res;
3975 /* Worker for call_for_symbol_and_aliases, look at all callers and if all their
3976 callers ignore the return value, or come from the same SCC and use the
3977 return value only to compute their return value, return false, otherwise
3978 return true. */
3980 static bool
3981 retval_used_p (cgraph_node *node, void *)
3983 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
3985 isra_call_summary *csum = call_sums->get (cs);
3986 gcc_checking_assert (csum);
3987 if (csum->m_return_ignored)
3988 continue;
3989 if (!csum->m_return_returned)
3990 return true;
3992 isra_func_summary *from_ifs = func_sums->get (cs->caller);
3993 if (!from_ifs || !from_ifs->m_candidate)
3994 return true;
3996 if (!ipa_edge_within_scc (cs)
3997 && !from_ifs->m_return_ignored)
3998 return true;
4001 return false;
4004 /* Push into NEW_PARAMS all required parameter adjustment entries to copy or
4005 modify parameter which originally had index BASE_INDEX, in the adjustment
4006 vector of parent clone (if any) had PREV_CLONE_INDEX and was described by
4007 PREV_ADJUSTMENT. If IPA-CP has created a transformation summary for the
4008 original node, it needs to be passed in IPCP_TS, otherwise it should be
4009 NULL. If the parent clone is the original function, PREV_ADJUSTMENT is NULL
4010 and PREV_CLONE_INDEX is equal to BASE_INDEX. */
4012 static void
4013 push_param_adjustments_for_index (isra_func_summary *ifs, unsigned base_index,
4014 unsigned prev_clone_index,
4015 ipa_adjusted_param *prev_adjustment,
4016 ipcp_transformation *ipcp_ts,
4017 vec<ipa_adjusted_param, va_gc> **new_params)
4019 isra_param_desc *desc = &(*ifs->m_parameters)[base_index];
4020 if (desc->locally_unused)
4022 if (dump_file)
4023 fprintf (dump_file, " Will remove parameter %u\n", base_index);
4024 return;
4027 if (!desc->split_candidate)
4029 ipa_adjusted_param adj;
4030 if (prev_adjustment)
4032 adj = *prev_adjustment;
4033 adj.prev_clone_adjustment = true;
4034 adj.prev_clone_index = prev_clone_index;
4036 else
4038 memset (&adj, 0, sizeof (adj));
4039 adj.op = IPA_PARAM_OP_COPY;
4040 adj.base_index = base_index;
4041 adj.prev_clone_index = prev_clone_index;
4043 vec_safe_push ((*new_params), adj);
4044 return;
4047 if (dump_file)
4048 fprintf (dump_file, " Will split parameter %u\n", base_index);
4050 gcc_assert (!prev_adjustment || prev_adjustment->op == IPA_PARAM_OP_COPY);
4051 unsigned aclen = vec_safe_length (desc->accesses);
4052 for (unsigned j = 0; j < aclen; j++)
4054 param_access *pa = (*desc->accesses)[j];
4055 if (!pa->certain)
4056 continue;
4058 if (ipcp_ts)
4060 ipa_argagg_value_list avl (ipcp_ts);
4061 tree value = avl.get_value (base_index, pa->unit_offset);
4062 if (value && !AGGREGATE_TYPE_P (pa->type))
4064 if (dump_file)
4065 fprintf (dump_file, " - omitting component at byte "
4066 "offset %u which is known to have a constant value\n ",
4067 pa->unit_offset);
4068 continue;
4072 if (dump_file)
4073 fprintf (dump_file, " - component at byte offset %u, "
4074 "size %u\n", pa->unit_offset, pa->unit_size);
4076 ipa_adjusted_param adj;
4077 memset (&adj, 0, sizeof (adj));
4078 adj.op = IPA_PARAM_OP_SPLIT;
4079 adj.base_index = base_index;
4080 adj.prev_clone_index = prev_clone_index;
4081 adj.param_prefix_index = IPA_PARAM_PREFIX_ISRA;
4082 adj.reverse = pa->reverse;
4083 adj.type = pa->type;
4084 adj.alias_ptr_type = pa->alias_ptr_type;
4085 adj.unit_offset = pa->unit_offset;
4086 vec_safe_push ((*new_params), adj);
4090 /* Worker for all call_for_symbol_thunks_and_aliases. Set calls_comdat_local
4091 flag of all callers of NODE. */
4093 static bool
4094 mark_callers_calls_comdat_local (struct cgraph_node *node, void *)
4096 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
4097 cs->caller->calls_comdat_local = true;
4098 return false;
4101 /* Remove any IPA-CP results stored in TS that are associated with removed
4102 parameters as marked in IFS. */
4104 static void
4105 zap_useless_ipcp_results (const isra_func_summary *ifs, ipcp_transformation *ts)
4107 ts->remove_argaggs_if ([ifs](const ipa_argagg_value &v)
4109 return (*ifs->m_parameters)[v.index].locally_unused;
4112 bool useful_vr = false;
4113 unsigned count = vec_safe_length (ts->m_vr);
4114 for (unsigned i = 0; i < count; i++)
4115 if ((*ts->m_vr)[i].known_p ())
4117 const isra_param_desc *desc = &(*ifs->m_parameters)[i];
4118 if (desc->locally_unused)
4119 (*ts->m_vr)[i].set_unknown ();
4120 else
4121 useful_vr = true;
4123 if (!useful_vr)
4124 ts->m_vr = NULL;
4127 /* Do final processing of results of IPA propagation regarding NODE, clone it
4128 if appropriate. */
4130 static void
4131 process_isra_node_results (cgraph_node *node,
4132 hash_map<const char *, unsigned> *clone_num_suffixes)
4134 isra_func_summary *ifs = func_sums->get (node);
4135 if (!ifs || !ifs->m_candidate)
4136 return;
4138 auto_vec<bool, 16> surviving_params;
4139 bool check_surviving = false;
4140 clone_info *cinfo = clone_info::get (node);
4141 if (cinfo && cinfo->param_adjustments)
4143 check_surviving = true;
4144 cinfo->param_adjustments->get_surviving_params (&surviving_params);
4147 unsigned param_count = vec_safe_length (ifs->m_parameters);
4148 bool will_change_function = false;
4149 if (ifs->m_returns_value && ifs->m_return_ignored)
4150 will_change_function = true;
4151 else
4152 for (unsigned i = 0; i < param_count; i++)
4154 isra_param_desc *desc = &(*ifs->m_parameters)[i];
4155 if ((desc->locally_unused || desc->split_candidate)
4156 /* Make sure we do not clone just to attempt to remove an already
4157 removed unused argument. */
4158 && (!check_surviving
4159 || (i < surviving_params.length ()
4160 && surviving_params[i])))
4162 will_change_function = true;
4163 break;
4166 if (!will_change_function)
4167 return;
4169 if (dump_file)
4171 fprintf (dump_file, "\nEvaluating analysis results for %s\n",
4172 node->dump_name ());
4173 if (ifs->m_returns_value && ifs->m_return_ignored)
4174 fprintf (dump_file, " Will remove return value.\n");
4177 ipcp_transformation *ipcp_ts = ipcp_get_transformation_summary (node);
4178 if (ipcp_ts)
4179 zap_useless_ipcp_results (ifs, ipcp_ts);
4180 vec<ipa_adjusted_param, va_gc> *new_params = NULL;
4181 if (ipa_param_adjustments *old_adjustments
4182 = cinfo ? cinfo->param_adjustments : NULL)
4184 unsigned old_adj_len = vec_safe_length (old_adjustments->m_adj_params);
4185 for (unsigned i = 0; i < old_adj_len; i++)
4187 ipa_adjusted_param *old_adj = &(*old_adjustments->m_adj_params)[i];
4188 push_param_adjustments_for_index (ifs, old_adj->base_index, i,
4189 old_adj, ipcp_ts, &new_params);
4192 else
4193 for (unsigned i = 0; i < param_count; i++)
4194 push_param_adjustments_for_index (ifs, i, i, NULL, ipcp_ts, &new_params);
4196 ipa_param_adjustments *new_adjustments
4197 = (new (ggc_alloc <ipa_param_adjustments> ())
4198 ipa_param_adjustments (new_params, param_count,
4199 ifs->m_returns_value && ifs->m_return_ignored));
4201 if (dump_file && (dump_flags & TDF_DETAILS))
4203 fprintf (dump_file, "\n Created adjustments:\n");
4204 new_adjustments->dump (dump_file);
4207 unsigned &suffix_counter = clone_num_suffixes->get_or_insert (
4208 IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (
4209 node->decl)));
4210 auto_vec<cgraph_edge *> callers = node->collect_callers ();
4211 cgraph_node *new_node
4212 = node->create_virtual_clone (callers, NULL, new_adjustments, "isra",
4213 suffix_counter);
4214 suffix_counter++;
4215 if (node->calls_comdat_local && node->same_comdat_group)
4217 new_node->add_to_same_comdat_group (node);
4218 new_node->call_for_symbol_and_aliases (mark_callers_calls_comdat_local,
4219 NULL, true);
4221 new_node->calls_comdat_local = node->calls_comdat_local;
4223 if (dump_file)
4224 fprintf (dump_file, " Created new node %s\n", new_node->dump_name ());
4225 callers.release ();
4228 /* If INDICES is not empty, dump a combination of NODE's dump_name and MSG
4229 followed by the list of numbers in INDICES. */
4231 static void
4232 dump_list_of_param_indices (const cgraph_node *node, const char* msg,
4233 const vec<unsigned> &indices)
4235 if (indices.is_empty ())
4236 return;
4237 fprintf (dump_file, "The following parameters of %s %s:", node->dump_name (),
4238 msg);
4239 for (unsigned i : indices)
4240 fprintf (dump_file, " %u", i);
4241 fprintf (dump_file, "\n");
4244 /* Check which parameters of NODE described by IFS have survived until IPA-SRA
4245 and disable transformations for those which have not or which should not
4246 transformed because the associated debug counter reached its limit. Return
4247 true if none survived or if there were no candidates to begin with.
4248 Additionally, also adjust parameter descriptions based on debug counters and
4249 hints propagated earlier. */
4251 static bool
4252 adjust_parameter_descriptions (cgraph_node *node, isra_func_summary *ifs)
4254 bool ret = true;
4255 unsigned len = vec_safe_length (ifs->m_parameters);
4256 if (!len)
4257 return true;
4259 auto_vec<bool, 16> surviving_params;
4260 bool check_surviving = false;
4261 clone_info *cinfo = clone_info::get (node);
4262 if (cinfo && cinfo->param_adjustments)
4264 check_surviving = true;
4265 cinfo->param_adjustments->get_surviving_params (&surviving_params);
4267 ipcp_transformation *ipcp_ts = ipcp_get_transformation_summary (node);
4268 auto_vec <unsigned> dump_dead_indices;
4269 auto_vec <unsigned> dump_bad_cond_indices;
4270 for (unsigned i = 0; i < len; i++)
4272 isra_param_desc *desc = &(*ifs->m_parameters)[i];
4273 if (!dbg_cnt (ipa_sra_params))
4275 desc->locally_unused = false;
4276 desc->split_candidate = false;
4277 continue;
4280 if (desc->split_only_when_retval_removed
4281 && !ifs->m_return_ignored)
4283 if (dump_file && (dump_flags & TDF_DETAILS)
4284 && (desc->locally_unused || desc->split_candidate))
4285 dump_bad_cond_indices.safe_push (i);
4287 gcc_checking_assert (!desc->locally_unused
4288 || desc->remove_only_when_retval_removed);
4289 desc->locally_unused = false;
4290 desc->split_candidate = false;
4291 continue;
4293 if (desc->remove_only_when_retval_removed
4294 && !ifs->m_return_ignored)
4296 if (dump_file && (dump_flags & TDF_DETAILS)
4297 && (desc->locally_unused || desc->split_candidate))
4298 dump_bad_cond_indices.safe_push (i);
4300 desc->locally_unused = false;
4302 if (check_surviving
4303 && (i >= surviving_params.length ()
4304 || !surviving_params[i]))
4306 /* Even if the parameter was removed by a previous IPA pass, we do
4307 not clear locally_unused because if it really is unused, this
4308 information might be useful in callers. */
4309 desc->split_candidate = false;
4311 if (dump_file && (dump_flags & TDF_DETAILS))
4312 dump_dead_indices.safe_push (i);
4315 if (desc->split_candidate && desc->conditionally_dereferenceable)
4317 gcc_assert (desc->safe_size_set);
4318 for (param_access *pa : *desc->accesses)
4319 if ((pa->unit_offset + pa->unit_size) > desc->safe_size)
4321 if (dump_file && (dump_flags & TDF_DETAILS))
4322 dump_bad_cond_indices.safe_push (i);
4323 desc->split_candidate = false;
4324 break;
4328 if (desc->split_candidate)
4330 if (desc->by_ref && !desc->not_specially_constructed)
4332 int extra_factor
4333 = opt_for_fn (node->decl,
4334 param_ipa_sra_ptrwrap_growth_factor);
4335 desc->param_size_limit = extra_factor * desc->param_size_limit;
4337 if (size_would_violate_limit_p (desc, desc->size_reached))
4338 desc->split_candidate = false;
4341 /* Avoid ICEs on size-mismatched VIEW_CONVERT_EXPRs when callers and
4342 callees don't agree on types in aggregates and we try to do both
4343 IPA-CP and IPA-SRA. */
4344 if (ipcp_ts && desc->split_candidate)
4346 ipa_argagg_value_list avl (ipcp_ts);
4347 for (const param_access *pa : desc->accesses)
4349 if (!pa->certain)
4350 continue;
4351 tree value = avl.get_value (i, pa->unit_offset);
4352 if (value
4353 && ((tree_to_uhwi (TYPE_SIZE (TREE_TYPE (value)))
4354 / BITS_PER_UNIT)
4355 != pa->unit_size))
4357 desc->split_candidate = false;
4358 if (dump_file && (dump_flags & TDF_DETAILS))
4359 dump_dead_indices.safe_push (i);
4360 break;
4365 if (desc->locally_unused || desc->split_candidate)
4366 ret = false;
4369 dump_list_of_param_indices (node, "are dead on arrival or have a type "
4370 "mismatch with IPA-CP", dump_dead_indices);
4371 dump_list_of_param_indices (node, "fail additional requirements ",
4372 dump_bad_cond_indices);
4374 return ret;
4378 /* Run the interprocedural part of IPA-SRA. */
4380 static unsigned int
4381 ipa_sra_analysis (void)
4383 if (dump_file)
4385 fprintf (dump_file, "\n========== IPA-SRA IPA stage ==========\n");
4386 ipa_sra_dump_all_summaries (dump_file, false);
4389 gcc_checking_assert (func_sums);
4390 gcc_checking_assert (call_sums);
4391 cgraph_node **order = XCNEWVEC (cgraph_node *, symtab->cgraph_count);
4392 auto_vec <cgraph_node *, 16> stack;
4393 int node_scc_count = ipa_reduced_postorder (order, true, NULL);
4395 /* One sweep from callers to callees for return value removal. */
4396 for (int i = node_scc_count - 1; i >= 0 ; i--)
4398 cgraph_node *scc_rep = order[i];
4399 vec<cgraph_node *> cycle_nodes = ipa_get_nodes_in_cycle (scc_rep);
4401 /* Preliminary IPA function level checks. */
4402 for (cgraph_node *v : cycle_nodes)
4404 isra_func_summary *ifs = func_sums->get (v);
4405 if (!ifs || !ifs->m_candidate)
4406 continue;
4407 if (!ipa_sra_ipa_function_checks (v)
4408 || check_all_callers_for_issues (v))
4409 ifs->zap ();
4412 for (cgraph_node *v : cycle_nodes)
4414 isra_func_summary *ifs = func_sums->get (v);
4415 if (!ifs || !ifs->m_candidate)
4416 continue;
4417 bool return_needed
4418 = (ifs->m_returns_value
4419 && (!dbg_cnt (ipa_sra_retvalues)
4420 || v->call_for_symbol_and_aliases (retval_used_p,
4421 NULL, true)));
4422 ifs->m_return_ignored = !return_needed;
4423 if (return_needed)
4424 isra_push_node_to_stack (v, ifs, &stack);
4427 while (!stack.is_empty ())
4429 cgraph_node *node = stack.pop ();
4430 isra_func_summary *ifs = func_sums->get (node);
4431 gcc_checking_assert (ifs && ifs->m_queued);
4432 ifs->m_queued = false;
4434 for (cgraph_edge *cs = node->callees; cs; cs = cs->next_callee)
4435 if (ipa_edge_within_scc (cs)
4436 && call_sums->get (cs)->m_return_returned)
4438 enum availability av;
4439 cgraph_node *callee = cs->callee->function_symbol (&av);
4440 isra_func_summary *to_ifs = func_sums->get (callee);
4441 if (to_ifs && to_ifs->m_return_ignored)
4443 to_ifs->m_return_ignored = false;
4444 isra_push_node_to_stack (callee, to_ifs, &stack);
4449 /* Parameter hint propagation. */
4450 for (cgraph_node *v : cycle_nodes)
4452 isra_func_summary *ifs = func_sums->get (v);
4453 propagate_hints_to_all_callees (v, ifs, &stack);
4456 while (!stack.is_empty ())
4458 cgraph_node *node = stack.pop ();
4459 isra_func_summary *ifs = func_sums->get (node);
4460 gcc_checking_assert (ifs && ifs->m_queued);
4461 ifs->m_queued = false;
4462 propagate_hints_to_all_callees (node, ifs, &stack);
4465 cycle_nodes.release ();
4468 /* One sweep from callees to callers for parameter removal and splitting. */
4469 for (int i = 0; i < node_scc_count; i++)
4471 cgraph_node *scc_rep = order[i];
4472 vec<cgraph_node *> cycle_nodes = ipa_get_nodes_in_cycle (scc_rep);
4474 /* First step of parameter removal. */
4475 for (cgraph_node *v : cycle_nodes)
4477 isra_func_summary *ifs = func_sums->get (v);
4478 if (!ifs || !ifs->m_candidate)
4479 continue;
4480 if (adjust_parameter_descriptions (v, ifs))
4481 continue;
4482 for (cgraph_edge *cs = v->indirect_calls; cs; cs = cs->next_callee)
4483 process_edge_to_unknown_caller (cs);
4484 for (cgraph_edge *cs = v->callees; cs; cs = cs->next_callee)
4485 if (!ipa_edge_within_scc (cs))
4486 param_removal_cross_scc_edge (cs);
4489 /* Look at edges within the current SCC and propagate used-ness across
4490 them, pushing onto the stack all notes which might need to be
4491 revisited. */
4492 for (cgraph_node *v : cycle_nodes)
4493 v->call_for_symbol_thunks_and_aliases (propagate_used_to_scc_callers,
4494 &stack, true);
4496 /* Keep revisiting and pushing until nothing changes. */
4497 while (!stack.is_empty ())
4499 cgraph_node *v = stack.pop ();
4500 isra_func_summary *ifs = func_sums->get (v);
4501 gcc_checking_assert (ifs && ifs->m_queued);
4502 ifs->m_queued = false;
4504 v->call_for_symbol_thunks_and_aliases (propagate_used_to_scc_callers,
4505 &stack, true);
4508 /* Parameter splitting. */
4509 bool repeat_scc_access_propagation;
4512 repeat_scc_access_propagation = false;
4513 for (cgraph_node *v : cycle_nodes)
4515 isra_func_summary *ifs = func_sums->get (v);
4516 if (!ifs
4517 || !ifs->m_candidate
4518 || vec_safe_is_empty (ifs->m_parameters))
4519 continue;
4520 for (cgraph_edge *cs = v->callees; cs; cs = cs->next_callee)
4521 if (param_splitting_across_edge (cs))
4522 repeat_scc_access_propagation = true;
4525 while (repeat_scc_access_propagation);
4527 if (flag_checking)
4528 for (cgraph_node *v : cycle_nodes)
4529 verify_splitting_accesses (v, true);
4531 cycle_nodes.release ();
4534 ipa_free_postorder_info ();
4535 free (order);
4537 if (dump_file)
4539 if (dump_flags & TDF_DETAILS)
4541 fprintf (dump_file, "\n========== IPA-SRA propagation final state "
4542 " ==========\n");
4543 ipa_sra_dump_all_summaries (dump_file, true);
4545 fprintf (dump_file, "\n========== IPA-SRA decisions ==========\n");
4548 hash_map<const char *, unsigned> *clone_num_suffixes
4549 = new hash_map<const char *, unsigned>;
4551 cgraph_node *node;
4552 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
4553 process_isra_node_results (node, clone_num_suffixes);
4555 delete clone_num_suffixes;
4556 ggc_delete (func_sums);
4557 func_sums = NULL;
4558 delete call_sums;
4559 call_sums = NULL;
4561 if (dump_file)
4562 fprintf (dump_file, "\n========== IPA SRA IPA analysis done "
4563 "==========\n\n");
4564 return 0;
4568 const pass_data pass_data_ipa_sra =
4570 IPA_PASS, /* type */
4571 "sra", /* name */
4572 OPTGROUP_NONE, /* optinfo_flags */
4573 TV_IPA_SRA, /* tv_id */
4574 0, /* properties_required */
4575 0, /* properties_provided */
4576 0, /* properties_destroyed */
4577 0, /* todo_flags_start */
4578 ( TODO_dump_symtab | TODO_remove_functions ), /* todo_flags_finish */
4581 class pass_ipa_sra : public ipa_opt_pass_d
4583 public:
4584 pass_ipa_sra (gcc::context *ctxt)
4585 : ipa_opt_pass_d (pass_data_ipa_sra, ctxt,
4586 ipa_sra_generate_summary, /* generate_summary */
4587 ipa_sra_write_summary, /* write_summary */
4588 ipa_sra_read_summary, /* read_summary */
4589 NULL , /* write_optimization_summary */
4590 NULL, /* read_optimization_summary */
4591 NULL, /* stmt_fixup */
4592 0, /* function_transform_todo_flags_start */
4593 NULL, /* function_transform */
4594 NULL) /* variable_transform */
4597 /* opt_pass methods: */
4598 bool gate (function *) final override
4600 /* TODO: We should remove the optimize check after we ensure we never run
4601 IPA passes when not optimizing. */
4602 return (flag_ipa_sra && optimize);
4605 unsigned int execute (function *) final override
4607 return ipa_sra_analysis ();
4610 }; // class pass_ipa_sra
4612 } // anon namespace
4614 /* Intraprocedural part of IPA-SRA analysis. Scan function body of NODE and
4615 create a summary structure describing IPA-SRA opportunities and constraints
4616 in it. */
4618 static void
4619 ipa_sra_summarize_function (cgraph_node *node)
4621 if (dump_file)
4622 fprintf (dump_file, "Creating summary for %s/%i:\n", node->name (),
4623 node->order);
4624 gcc_obstack_init (&gensum_obstack);
4625 loaded_decls = new hash_set<tree>;
4627 isra_func_summary *ifs = NULL;
4628 unsigned count = 0;
4629 if (ipa_sra_preliminary_function_checks (node))
4631 ifs = func_sums->get_create (node);
4632 ifs->m_candidate = true;
4633 tree ret = TREE_TYPE (TREE_TYPE (node->decl));
4634 ifs->m_returns_value = (TREE_CODE (ret) != VOID_TYPE);
4635 for (tree parm = DECL_ARGUMENTS (node->decl);
4636 parm;
4637 parm = DECL_CHAIN (parm))
4638 count++;
4640 auto_vec<gensum_param_desc, 16> param_descriptions (count);
4642 struct function *fun = DECL_STRUCT_FUNCTION (node->decl);
4643 bool cfun_pushed = false;
4644 if (count > 0)
4646 decl2desc = new hash_map<tree, gensum_param_desc *>;
4647 param_descriptions.reserve_exact (count);
4648 param_descriptions.quick_grow_cleared (count);
4650 if (create_parameter_descriptors (node, &param_descriptions))
4652 push_cfun (fun);
4653 cfun_pushed = true;
4654 final_bbs = BITMAP_ALLOC (NULL);
4655 bb_dereferences = XCNEWVEC (HOST_WIDE_INT,
4656 unsafe_by_ref_count
4657 * last_basic_block_for_fn (fun));
4658 aa_walking_limit = opt_for_fn (node->decl, param_ipa_max_aa_steps);
4661 /* Scan function is run even when there are no removal or splitting
4662 candidates so that we can calculate hints on call edges which can be
4663 useful in callees. */
4664 scan_function (node, fun);
4666 if (count > 0)
4668 if (dump_file)
4670 dump_gensum_param_descriptors (dump_file, node->decl,
4671 &param_descriptions);
4672 fprintf (dump_file, "----------------------------------------\n");
4675 process_scan_results (node, fun, ifs, &param_descriptions);
4677 if (cfun_pushed)
4678 pop_cfun ();
4679 if (bb_dereferences)
4681 free (bb_dereferences);
4682 bb_dereferences = NULL;
4683 BITMAP_FREE (final_bbs);
4684 final_bbs = NULL;
4687 isra_analyze_all_outgoing_calls (node);
4689 delete loaded_decls;
4690 loaded_decls = NULL;
4691 if (decl2desc)
4693 delete decl2desc;
4694 decl2desc = NULL;
4696 obstack_free (&gensum_obstack, NULL);
4697 if (dump_file)
4698 fprintf (dump_file, "\n\n");
4699 if (flag_checking)
4700 verify_splitting_accesses (node, false);
4701 return;
4704 ipa_opt_pass_d *
4705 make_pass_ipa_sra (gcc::context *ctxt)
4707 return new pass_ipa_sra (ctxt);
4710 /* Reset all state within ipa-sra.cc so that we can rerun the compiler
4711 within the same process. For use by toplev::finalize. */
4713 void
4714 ipa_sra_cc_finalize (void)
4716 if (func_sums)
4717 ggc_delete (func_sums);
4718 func_sums = NULL;
4719 delete call_sums;
4720 call_sums = NULL;
4723 #include "gt-ipa-sra.h"