Daily bump.
[official-gcc.git] / gcc / tree-profile.cc
blob33ff550a7bc1a267de024a76e5f778b047ee6568
1 /* Calculate branch probabilities, and basic block execution counts.
2 Copyright (C) 1990-2024 Free Software Foundation, Inc.
3 Contributed by James E. Wilson, UC Berkeley/Cygnus Support;
4 based on some ideas from Dain Samples of UC Berkeley.
5 Further mangling by Bob Manson, Cygnus Support.
6 Converted to use trees by Dale Johannesen, Apple Computer.
8 This file is part of GCC.
10 GCC is free software; you can redistribute it and/or modify it under
11 the terms of the GNU General Public License as published by the Free
12 Software Foundation; either version 3, or (at your option) any later
13 version.
15 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or
17 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 for more details.
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING3. If not see
22 <http://www.gnu.org/licenses/>. */
24 /* Generate basic block profile instrumentation and auxiliary files.
25 Tree-based version. See profile.cc for overview. */
27 #include "config.h"
28 #include "system.h"
29 #include "coretypes.h"
30 #include "memmodel.h"
31 #include "backend.h"
32 #include "target.h"
33 #include "tree.h"
34 #include "gimple.h"
35 #include "cfghooks.h"
36 #include "tree-pass.h"
37 #include "ssa.h"
38 #include "cgraph.h"
39 #include "coverage.h"
40 #include "diagnostic-core.h"
41 #include "fold-const.h"
42 #include "varasm.h"
43 #include "tree-nested.h"
44 #include "gimplify.h"
45 #include "gimple-iterator.h"
46 #include "gimplify-me.h"
47 #include "tree-cfg.h"
48 #include "tree-into-ssa.h"
49 #include "value-prof.h"
50 #include "profile.h"
51 #include "tree-cfgcleanup.h"
52 #include "stringpool.h"
53 #include "attribs.h"
54 #include "tree-pretty-print.h"
55 #include "langhooks.h"
56 #include "stor-layout.h"
57 #include "xregex.h"
58 #include "alloc-pool.h"
59 #include "symbol-summary.h"
60 #include "symtab-thunks.h"
61 #include "cfganal.h"
63 static GTY(()) tree gcov_type_node;
64 static GTY(()) tree tree_interval_profiler_fn;
65 static GTY(()) tree tree_pow2_profiler_fn;
66 static GTY(()) tree tree_topn_values_profiler_fn;
67 static GTY(()) tree tree_indirect_call_profiler_fn;
68 static GTY(()) tree tree_average_profiler_fn;
69 static GTY(()) tree tree_ior_profiler_fn;
70 static GTY(()) tree tree_time_profiler_counter;
73 static GTY(()) tree ic_tuple_var;
74 static GTY(()) tree ic_tuple_counters_field;
75 static GTY(()) tree ic_tuple_callee_field;
77 /* Types of counter update methods.
79 By default, the counter updates are done for a single threaded system
80 (COUNTER_UPDATE_SINGLE_THREAD).
82 If the user selected atomic profile counter updates
83 (-fprofile-update=atomic), then the counter updates will be done atomically
84 on a best-effort basis. One of three methods to do the counter updates is
85 selected according to the target capabilities.
87 Ideally, the counter updates are done through atomic operations in hardware
88 (COUNTER_UPDATE_ATOMIC_BUILTIN).
90 If the target supports only 32-bit atomic increments and gcov_type_node is a
91 64-bit integer type, then for the profile edge counters the increment is
92 performed through two separate 32-bit atomic increments
93 (COUNTER_UPDATE_ATOMIC_SPLIT or COUNTER_UPDATE_ATOMIC_PARTIAL). If the
94 target supports libatomic (targetm.have_libatomic), then other counter
95 updates are carried out by libatomic calls (COUNTER_UPDATE_ATOMIC_SPLIT).
96 If the target does not support libatomic, then the other counter updates are
97 not done atomically (COUNTER_UPDATE_ATOMIC_PARTIAL) and a warning is
98 issued.
100 If the target does not support atomic operations in hardware, however, it
101 supports libatomic, then all updates are carried out by libatomic calls
102 (COUNTER_UPDATE_ATOMIC_BUILTIN). */
103 enum counter_update_method {
104 COUNTER_UPDATE_SINGLE_THREAD,
105 COUNTER_UPDATE_ATOMIC_BUILTIN,
106 COUNTER_UPDATE_ATOMIC_SPLIT,
107 COUNTER_UPDATE_ATOMIC_PARTIAL
110 static counter_update_method counter_update = COUNTER_UPDATE_SINGLE_THREAD;
112 /* These functions support measuring modified conditition/decision coverage
113 (MC/DC). MC/DC requires all of the below during testing:
115 - Each entry and exit point is invoked
116 - Each decision takes every possible outcome
117 - Each condition in a decision takes every possible outcome
118 - Each condition in a decision is shown to independently affect the outcome
119 of the decision
121 Independence of a condition is shown by recording it being evaluated to a
122 value (true/false) and not being made irrelevant ("masked") by a later term.
123 This feature adds some instrumentation code, a few bitwise operators, that
124 records the branches taken in conditions and applies a filter for the
125 masking effect. Masking is essentially short-circuiting in reverse: a
126 condition does not contribute to the outcome if it would short circuit the
127 (sub) expression if it was evaluated right-to-left, (_ && false) and (_ ||
128 true).
130 The program is essentially rewritten this way:
132 - if (a || b) { fn () }
133 + if (a) { _t |= 0x1; goto _then; }
134 + else { _f |= 0x1;
135 + if (b) { _t |= 0x2; _mask |= 0x1; goto _then; }
136 + else { _f |= 0x2; goto _else; }
137 + _then:
138 + _gcov_t |= (_t & _mask);
139 + _gcov_f |= (_f & _mask);
140 + fn (); goto _end;
141 + _else:
142 + _gcov_t |= (_t & _mask);
143 + _gcov_f |= (_f & _mask);
144 + fn ();
145 + _end:
147 It is assumed the front end will provide discrimnators so that conditional
148 basic blocks (basic block with a conditional jump and outgoing true/false
149 edges) that belong to the same Boolean expression have the same
150 discriminator. Masking is determined by analyzing these expressions as a
151 reduced order binary decision diagram. */
152 namespace
154 /* Some context and reused instances between function calls. Large embedded
155 buffers are used to up-front request enough memory for most programs and
156 merge them into a single allocation at the cost of using more memory in the
157 average case. Some numbers from linux v5.13 which is assumed to be a
158 reasonably diverse code base: 75% of the functions in linux have less than
159 16 nodes in the CFG and approx 2.5% have more than 64 nodes. The functions
160 that go beyond a few dozen nodes tend to be very large (>100) and so 64
161 seems like a good balance.
163 This is really just a performance balance of the cost of allocation and
164 wasted memory. */
165 struct conds_ctx
167 /* This is both a reusable shared allocation which is also used to return
168 single expressions, which means it for most code should only hold a
169 couple of elements. */
170 auto_vec<basic_block, 64> blocks;
172 /* Index for the topological order indexed by basic_block->index to an
173 ordering so that expression (a || b && c) => top_index[a] < top_index[b]
174 < top_index[c]. */
175 auto_vec<int, 256> top_index;
177 /* Pre-allocate bitmaps and vectors for per-function book keeping. This is
178 pure instance reuse and the bitmaps carry no data between function
179 calls. */
180 auto_vec<basic_block, 64> B1;
181 auto_vec<basic_block, 64> B2;
182 auto_sbitmap G1;
183 auto_sbitmap G2;
184 auto_sbitmap G3;
186 explicit conds_ctx (unsigned size) noexcept (true) : G1 (size), G2 (size),
187 G3 (size)
192 /* Only instrument terms with fewer than number of bits in a (wide) gcov
193 integer, which is probably 64. The algorithm itself does not impose this
194 limitation, but it makes for a simpler implementation.
196 * Allocating the output data structure (coverage_counter_alloc ()) can
197 assume pairs of gcov_type_unsigned and not use a separate length field.
198 * A pair gcov_type_unsigned can be used as accumulators.
199 * Updating accumulators is can use the bitwise operations |=, &= and not
200 custom operators that work for arbitrary-sized bit-sets.
202 Most real-world code should be unaffected by this, but it is possible
203 (especially for generated code) to exceed this limit. */
204 #define CONDITIONS_MAX_TERMS (TYPE_PRECISION (gcov_type_node))
205 #define EDGE_CONDITION (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)
207 /* Compare two basic blocks by their order in the expression i.e. for (a || b)
208 then topological_cmp (a, b, ...) < 0. The result is undefined if LHS, RHS
209 belong to different expressions. The TOP_INDEX argument should be the
210 top_index vector from ctx. */
212 topological_cmp (const void *lhs, const void *rhs, void *top_index)
214 const_basic_block l = *(const basic_block*) lhs;
215 const_basic_block r = *(const basic_block*) rhs;
216 const vec<int>* im = (const vec<int>*) top_index;
217 return (*im)[l->index] - (*im)[r->index];
220 /* Find the index of NEEDLE in BLOCKS; return -1 if not found. This has two
221 uses, sometimes for the index and sometimes for set member checks. Sets are
222 typically very small (number of conditions, >8 is uncommon) so linear search
223 should be very fast. */
225 index_of (const basic_block needle, array_slice<basic_block> blocks)
227 for (size_t i = 0; i < blocks.size (); i++)
228 if (blocks[i] == needle)
229 return int (i);
230 return -1;
233 /* Special cases of the single_*_p and single_*_edge functions in basic-block.h
234 that don't consider exception handling or other complex edges. This helps
235 create a view of the CFG with only normal edges - if a basic block has both
236 an outgoing fallthrough and exceptional edge, it should be considered a
237 single-successor. */
238 bool
239 single_p (const vec<edge, va_gc> *edges)
241 int n = EDGE_COUNT (edges);
242 if (n == 0)
243 return false;
245 for (edge e : edges)
246 if (e->flags & EDGE_COMPLEX)
247 n -= 1;
249 return n == 1;
252 /* Get the single, non-complex edge. Behavior is undefined edges have more
253 than 1 non-complex edges. */
254 edge
255 single_edge (const vec<edge, va_gc> *edges)
257 gcc_checking_assert (single_p (edges));
258 for (edge e : edges)
260 if (e->flags & EDGE_COMPLEX)
261 continue;
262 return e;
264 return NULL;
267 /* Sometimes, for example with function calls, goto labels, and C++
268 destructors, the CFG gets extra nodes that are essentially single-entry
269 single-exit in the middle of boolean expressions. For example:
271 x || can_throw (y)
279 / \ |
280 / \|
283 Without the extra node inserted by the function + exception it becomes a
284 proper 2-term graph, not 2 single-term graphs.
289 / \|
292 This function finds the source edge of these paths. This is often the
293 identity function. */
294 edge
295 contract_edge_up (edge e)
297 while (true)
299 basic_block src = e->src;
300 if (!single_p (src->preds))
301 return e;
302 if (!single_p (src->succs))
303 return e;
304 e = single_edge (src->preds);
308 /* A simple struct for storing/returning outcome block pairs. Either both
309 blocks are set or both are NULL. */
310 struct outcomes
312 basic_block t = NULL;
313 basic_block f = NULL;
315 operator bool () const noexcept (true)
317 return t && f;
321 /* Get the true/false successors of a basic block. If b is not a conditional
322 block both edges are NULL. */
323 outcomes
324 conditional_succs (const basic_block b)
326 outcomes c;
327 for (edge e : b->succs)
329 if (e->flags & EDGE_TRUE_VALUE)
330 c.t = e->dest;
331 if (e->flags & EDGE_FALSE_VALUE)
332 c.f = e->dest;
335 gcc_assert ((c.t && c.f) || (!c.t && !c.f));
336 return c;
339 /* Get the index or offset of a conditional flag, 0 for true and 1 for false.
340 These indices carry no semantics but must be consistent as they are used to
341 index into data structures in code generation and gcov. */
342 unsigned
343 condition_index (unsigned flag)
345 return (flag & EDGE_CONDITION) == EDGE_TRUE_VALUE ? 0 : 1;
348 /* Returns the condition identifier for the basic block if set, otherwise 0.
349 This is only meaningful in GIMPLE and is used for condition coverage.
351 There may be conditions created that did not get an uid, such as those
352 implicitly created by destructors. We could include them in the condition
353 coverage for completeness (i.e. condition coverage implies (implicit) branch
354 coverage), but they have no natural buckets and should all be single-term.
355 For now these are ignored and given uid = 0, and branch coverage is left to
356 -fprofile-arcs.
358 Under optimization, COND_EXPRs may be folded, replaced with switches,
359 min-max, etc., which leaves ghost identifiers in basic blocks that do not
360 end with a conditional jump. They are not really meaningful for condition
361 coverage anymore, but since coverage is unreliable under optimization anyway
362 this is not a big problem. */
363 unsigned
364 condition_uid (struct function *fn, basic_block b)
366 gimple *stmt = gsi_stmt (gsi_last_bb (b));
367 if (!safe_is_a<gcond *> (stmt))
368 return 0;
370 unsigned *v = fn->cond_uids->get (as_a <gcond*> (stmt));
371 return v ? *v : 0;
374 /* Compute the masking table.
376 Masking and short circuiting are deeply connected - masking occurs when
377 control flow reaches a state that is also reachable with short circuiting.
378 In fact, masking corresponds to short circuiting for the reversed
379 expression. This means we can find the limits, the last term in preceeding
380 subexpressions, by following the edges that short circuit to the same
381 outcome. The algorithm treats the CFG as a reduced order binary decision
382 diagram (see Randall E. Bryant's Graph Based Algorithms for Boolean
383 Function Manipulation (1987)).
385 In the simplest case a || b:
390 |/ \
393 T has has multiple incoming edges and is the outcome of a short circuit,
394 with top = a, bot = b. The top node (a) is masked when the edge (b, T) is
395 taken.
397 The names "top" and "bot" refer to a pair of nodes with a shared
398 successor. The top is always the node corresponding to the left-most
399 operand of the two, and it holds that top < bot in a topological ordering.
401 Now consider (a && b) || (c && d) and its masking table:
408 | |\
409 | d \
410 |/ \|
413 a[0] = {}
414 a[1] = {}
415 b[0] = {a}
416 b[1] = {}
417 c[0] = {}
418 c[1] = {}
419 d[0] = {c}
420 d[1] = {a,b}
422 Note that 0 and 1 are indices and not boolean values - a[0] is the index in
423 the masking vector when a takes the true edge.
425 b[0] and d[0] are identical to the a || b example, and d[1] is the bot in
426 the triangle [d, b] -> T. b is the top node in the [d, b] relationship and
427 last term in (a && b). To find the other terms masked we use the fact that
428 all paths in an expression go through either of the outcomes, found by
429 collecting all non-complex edges that go out of the expression (the
430 neighborhood). In some cases the outgoing edge go through intermediate (or
431 bypass) nodes, and we collect these paths too (see contract_edge_up).
433 We find the terms by marking the outcomes (in this case c, T) and walk the
434 predecessors starting at top (in this case b) and masking nodes when both
435 successors are marked.
437 The masking table is represented as two bitfields per term in the expression
438 with the index corresponding to the term in the Boolean expression.
439 a || b && c becomes the term vector [a b c] and the masking table [a[0]
440 a[1] b[0] ...]. The kth bit of a masking vector is set if the the kth term
441 is masked by taking the edge.
443 The out masks are in uint64_t (the practical maximum for gcov_type_node for
444 any target) as it has to be big enough to store the target size gcov types
445 independent of the host. */
446 void
447 masking_vectors (conds_ctx& ctx, array_slice<basic_block> blocks,
448 array_slice<sbitmap> maps, array_slice<uint64_t> masks)
450 gcc_assert (blocks.is_valid ());
451 gcc_assert (!blocks.empty ());
452 gcc_assert (maps.is_valid ());
453 gcc_assert (masks.is_valid ());
454 gcc_assert (sizeof (masks[0]) * BITS_PER_UNIT >= CONDITIONS_MAX_TERMS);
456 if (bitmap_count_bits (maps[0]) == 1)
457 return;
459 sbitmap marks = ctx.G1;
460 const sbitmap core = maps[0];
461 const sbitmap allg = maps[1];
462 vec<basic_block>& queue = ctx.B1;
463 vec<basic_block>& body = ctx.B2;
464 const vec<int>& top_index = ctx.top_index;
466 /* Set up for the iteration - include the outcome nodes in the traversal.
467 The algorithm compares pairs of nodes and is not really sensitive to
468 traversal order, but need to maintain topological order because the
469 index of masking nodes maps to the index in the accumulators. We must
470 also check the incoming-to-outcome pairs. These edges may in turn be
471 split (this happens with labels on top of then/else blocks) so we must
472 follow any single-in single-out path. The non-condition blocks do not
473 have to be in order as they are non-condition blocks and will not be
474 considered for the set-bit index. */
475 body.truncate (0);
476 body.reserve (blocks.size () + 2);
477 for (const basic_block b : blocks)
478 if (bitmap_bit_p (core, b->index))
479 body.quick_push (b);
481 for (basic_block b : blocks)
483 if (!bitmap_bit_p (core, b->index))
484 continue;
486 for (edge e : b->succs)
488 if (e->flags & EDGE_COMPLEX)
489 continue;
490 if (bitmap_bit_p (allg, e->dest->index))
491 continue;
492 body.safe_push (e->dest);
494 /* There may be multiple nodes between the condition edge and the
495 actual outcome, and we need to know when these paths join to
496 determine if there is short circuit/masking. This is
497 effectively creating a virtual edge from the condition node to
498 the real outcome. */
499 while (!(e->flags & EDGE_DFS_BACK) && single_p (e->dest->succs))
501 e = single_edge (e->dest->succs);
502 body.safe_push (e->dest);
507 /* Find the masking. The leftmost element cannot mask anything, so
508 start at 1. */
509 for (size_t i = 1; i != body.length (); i++)
511 const basic_block b = body[i];
512 for (edge e1 : b->preds)
513 for (edge e2 : b->preds)
515 if (e1 == e2)
516 continue;
517 if ((e1->flags | e2->flags) & EDGE_COMPLEX)
518 continue;
520 edge etop = contract_edge_up (e1);
521 edge ebot = contract_edge_up (e2);
522 gcc_assert (etop != ebot);
524 const basic_block top = etop->src;
525 const basic_block bot = ebot->src;
526 const unsigned cond = etop->flags & ebot->flags & EDGE_CONDITION;
527 if (!cond)
528 continue;
529 if (top_index[top->index] > top_index[bot->index])
530 continue;
531 if (!bitmap_bit_p (core, top->index))
532 continue;
533 if (!bitmap_bit_p (core, bot->index))
534 continue;
536 outcomes out = conditional_succs (top);
537 gcc_assert (out);
538 bitmap_clear (marks);
539 bitmap_set_bit (marks, out.t->index);
540 bitmap_set_bit (marks, out.f->index);
541 queue.truncate (0);
542 queue.safe_push (top);
544 // The edge bot -> outcome triggers the masking
545 const int m = 2*index_of (bot, body) + condition_index (cond);
546 gcc_assert (m >= 0);
547 while (!queue.is_empty ())
549 basic_block q = queue.pop ();
550 /* q may have been processed & completed by being added to the
551 queue multiple times, so check that there is still work to
552 do before continuing. */
553 if (bitmap_bit_p (marks, q->index))
554 continue;
556 outcomes succs = conditional_succs (q);
557 if (!bitmap_bit_p (marks, succs.t->index))
558 continue;
559 if (!bitmap_bit_p (marks, succs.f->index))
560 continue;
562 const int index = index_of (q, body);
563 gcc_assert (index != -1);
564 masks[m] |= uint64_t (1) << index;
565 bitmap_set_bit (marks, q->index);
567 for (edge e : q->preds)
569 e = contract_edge_up (e);
570 if (e->flags & EDGE_DFS_BACK)
571 continue;
572 if (bitmap_bit_p (marks, e->src->index))
573 continue;
574 if (!bitmap_bit_p (core, e->src->index))
575 continue;
576 queue.safe_push (e->src);
583 /* Emit LHS = RHS on edges. This is just a short hand that automates the
584 building of the assign and immediately puts it on the edge, which becomes
585 noisy. */
586 tree
587 emit_assign (edge e, tree lhs, tree rhs)
589 gassign *w = gimple_build_assign (lhs, rhs);
590 gsi_insert_on_edge (e, w);
591 return lhs;
594 /* Emit lhs = RHS on edges. The lhs is created. */
595 tree
596 emit_assign (edge e, tree rhs)
598 return emit_assign (e, make_ssa_name (gcov_type_node), rhs);
601 /* Emit LHS = OP1 <OP> OP2 on edges. */
602 tree
603 emit_bitwise_op (edge e, tree op1, tree_code op, tree op2 = NULL_TREE)
605 tree lhs = make_ssa_name (gcov_type_node);
606 gassign *w = gimple_build_assign (lhs, op, op1, op2);
607 gsi_insert_on_edge (e, w);
608 return lhs;
611 /* Visitor for make_top_index. */
612 void
613 make_top_index_visit (basic_block b, vec<basic_block>& L, vec<int>& marks)
615 if (marks[b->index])
616 return;
618 /* Follow the false edge first, if it exists, so that true paths are given
619 the lower index in the ordering. Any iteration order
620 would yield a valid and useful topological ordering, but making sure the
621 true branch has the lower index first makes reporting work better for
622 expressions with ternaries. Walk the false branch first because the
623 array will be reversed to finalize the topological order.
625 With the wrong ordering (a ? b : c) && d could become [a c b d], but the
626 (expected) order is really [a b c d]. */
628 const unsigned false_fwd = EDGE_DFS_BACK | EDGE_FALSE_VALUE;
629 for (edge e : b->succs)
630 if ((e->flags & false_fwd) == EDGE_FALSE_VALUE)
631 make_top_index_visit (e->dest, L, marks);
633 for (edge e : b->succs)
634 if (!(e->flags & false_fwd))
635 make_top_index_visit (e->dest, L, marks);
637 marks[b->index] = 1;
638 L.quick_push (b);
641 /* Find a topological sorting of the blocks in a function so that left operands
642 are before right operands including subexpressions. Sorting on block index
643 does not guarantee this property and the syntactical order of terms is very
644 important to the condition coverage. The sorting algorithm is from Cormen
645 et al (2001) but with back-edges ignored and thus there is no need for
646 temporary marks (for cycle detection). The L argument is a buffer/working
647 memory, and the output will be written to TOP_INDEX.
649 For the expression (a || (b && c) || d) the blocks should be [a b c d]. */
650 void
651 make_top_index (array_slice<basic_block> blocks, vec<basic_block>& L,
652 vec<int>& top_index)
654 L.truncate (0);
655 L.reserve (blocks.size ());
657 /* Use of the output map as a temporary for tracking visited status. */
658 top_index.truncate (0);
659 top_index.safe_grow_cleared (blocks.size ());
660 for (const basic_block b : blocks)
661 make_top_index_visit (b, L, top_index);
663 /* Insert canaries - if there are unreachable nodes (for example infinite
664 loops) then the unreachable nodes should never be needed for comparison,
665 and L.length () < max_index. An index mapping should also never be
666 recorded twice. */
667 for (unsigned i = 0; i != top_index.length (); i++)
668 top_index[i] = -1;
670 gcc_assert (blocks.size () == L.length ());
671 L.reverse ();
672 const unsigned nblocks = L.length ();
673 for (unsigned i = 0; i != nblocks; i++)
675 gcc_assert (L[i]->index != -1);
676 top_index[L[i]->index] = int (i);
680 /* Find all nodes including non-conditions in a Boolean expression. We need to
681 know the paths through the expression so that the masking and
682 instrumentation phases can limit searches and know what subgraphs must be
683 threaded through, but not counted, such as the (b || c) in
684 a && fn (b || c) && d.
686 It is essentially the intersection of downwards paths from the expression
687 nodes EXPR to the post-dominator and upwards from the post-dominator.
688 Finding the dominator is slightly more involved than picking the first/last,
689 particularly under optimization, because both incoming and outgoing paths
690 may have multiple entries/exits.
692 It is assumed GRAPH is an array_slice of the basic blocks of this function
693 sorted by the basic block index. */
694 vec<basic_block>&
695 paths_between (conds_ctx &ctx, array_slice<basic_block> graph,
696 const vec<basic_block>& expr)
698 if (expr.length () == 1)
700 ctx.blocks.truncate (0);
701 ctx.blocks.safe_push (expr[0]);
702 return ctx.blocks;
705 basic_block dom;
706 sbitmap up = ctx.G1;
707 sbitmap down = ctx.G2;
708 sbitmap paths = ctx.G3;
709 vec<basic_block>& queue = ctx.B1;
711 queue.truncate (0);
712 bitmap_clear (down);
713 dom = get_immediate_dominator (CDI_POST_DOMINATORS, expr[0]);
714 for (basic_block b : expr)
715 if (dom != b)
716 dom = nearest_common_dominator (CDI_POST_DOMINATORS, dom, b);
717 queue.safe_splice (expr);
718 while (!queue.is_empty ())
720 basic_block b = queue.pop ();
721 if (!bitmap_set_bit (down, b->index))
722 continue;
723 if (b == dom)
724 continue;
725 for (edge e : b->succs)
726 if (!(e->flags & (EDGE_COMPLEX | EDGE_DFS_BACK)))
727 queue.safe_push (e->dest);
730 queue.truncate (0);
731 bitmap_clear (up);
732 dom = expr[0];
733 for (basic_block b : expr)
734 if (dom != b)
735 dom = nearest_common_dominator (CDI_DOMINATORS, dom, b);
736 queue.safe_splice (expr);
737 while (!queue.is_empty ())
739 basic_block b = queue.pop ();
740 if (!bitmap_set_bit (up, b->index))
741 continue;
742 if (b == dom)
743 continue;
744 for (edge e : b->preds)
745 if (!(e->flags & (EDGE_COMPLEX | EDGE_DFS_BACK)))
746 queue.safe_push (e->src);
749 bitmap_and (paths, up, down);
750 vec<basic_block>& blocks = ctx.blocks;
751 blocks.truncate (0);
752 blocks.reserve (graph.size ());
753 sbitmap_iterator itr;
754 unsigned index;
755 EXECUTE_IF_SET_IN_BITMAP (paths, 0, index, itr)
756 blocks.quick_push (graph[index]);
757 return blocks;
762 /* Context object for the condition coverage. This stores conds_ctx (the
763 buffers reused when analyzing the cfg) and the output arrays. This is
764 designed to be heap allocated and aggressively preallocates large buffers to
765 avoid having to reallocate for most programs. */
766 struct condcov
768 explicit condcov (unsigned nblocks) noexcept (true) : ctx (nblocks),
769 m_maps (sbitmap_vector_alloc (2 * nblocks, nblocks))
771 bitmap_vector_clear (m_maps, 2 * nblocks);
773 auto_vec<size_t, 128> m_index;
774 auto_vec<basic_block, 256> m_blocks;
775 auto_vec<uint64_t, 512> m_masks;
776 conds_ctx ctx;
777 sbitmap *m_maps;
780 /* Get the length, that is the number of Boolean expression found. cov_length
781 is the one-past index for cov_{blocks,masks,maps}. */
782 size_t
783 cov_length (const struct condcov* cov)
785 if (cov->m_index.is_empty ())
786 return 0;
787 return cov->m_index.length () - 1;
790 /* The subgraph, exluding intermediates, for the nth Boolean expression. */
791 array_slice<basic_block>
792 cov_blocks (struct condcov* cov, size_t n)
794 if (n >= cov->m_index.length ())
795 return array_slice<basic_block>::invalid ();
797 basic_block *begin = cov->m_blocks.begin () + cov->m_index[n];
798 basic_block *end = cov->m_blocks.begin () + cov->m_index[n + 1];
799 return array_slice<basic_block> (begin, end - begin);
802 /* The masks for the nth Boolean expression. */
803 array_slice<uint64_t>
804 cov_masks (struct condcov* cov, size_t n)
806 if (n >= cov->m_index.length ())
807 return array_slice<uint64_t>::invalid ();
809 uint64_t *begin = cov->m_masks.begin () + 2*cov->m_index[n];
810 uint64_t *end = cov->m_masks.begin () + 2*cov->m_index[n + 1];
811 return array_slice<uint64_t> (begin, end - begin);
814 /* The maps for the nth Boolean expression. */
815 array_slice<sbitmap>
816 cov_maps (struct condcov* cov, size_t n)
818 if (n >= cov->m_index.length ())
819 return array_slice<sbitmap>::invalid ();
821 sbitmap *begin = cov->m_maps + 2*n;
822 sbitmap *end = begin + 2;
823 return array_slice<sbitmap> (begin, end - begin);
826 /* Deleter for condcov. */
827 void
828 cov_free (struct condcov* cov)
830 sbitmap_vector_free (cov->m_maps);
831 delete cov;
834 /* Condition coverage (MC/DC)
836 Whalen, Heimdahl, De Silva in "Efficient Test Coverage Measurement for
837 MC/DC" describe an algorithm for modified condition/decision coverage based
838 on AST analysis. This algorithm does analyzes the control flow graph
839 (interpreted as a binary decision diagram) to determine the masking vectors.
840 The individual phases are described in more detail closer to the
841 implementation.
843 The coverage only considers the positions, not the symbols, in a
844 conditional, e.g. !A || (!B && A) is a 3-term conditional even though A
845 appears twice. Subexpressions have no effect on term ordering:
846 (a && (b || (c && d)) || e) comes out as [a b c d e]. Functions whose
847 arguments are Boolean expressions are treated as separate expressions, that
848 is, a && fn (b || c) && d is treated as [a _fn d] and [b c], not [a b c d].
850 The output for gcov is a vector of pairs of unsigned integers, interpreted
851 as bit-sets, where the bit index corresponds to the index of the condition
852 in the expression.
854 The returned condcov should be released by the caller with cov_free. */
855 struct condcov*
856 find_conditions (struct function *fn)
858 mark_dfs_back_edges (fn);
859 const bool have_dom = dom_info_available_p (fn, CDI_DOMINATORS);
860 const bool have_post_dom = dom_info_available_p (fn, CDI_POST_DOMINATORS);
861 if (!have_dom)
862 calculate_dominance_info (CDI_DOMINATORS);
863 if (!have_post_dom)
864 calculate_dominance_info (CDI_POST_DOMINATORS);
866 const unsigned nblocks = n_basic_blocks_for_fn (fn);
867 basic_block *fnblocksp = basic_block_info_for_fn (fn)->address ();
868 condcov *cov = new condcov (nblocks);
869 conds_ctx& ctx = cov->ctx;
870 array_slice<basic_block> fnblocks (fnblocksp, nblocks);
871 make_top_index (fnblocks, ctx.B1, ctx.top_index);
873 /* Bin the Boolean expressions so that exprs[id] -> [x1, x2, ...]. */
874 hash_map<int_hash<unsigned, 0>, vec<basic_block>> exprs;
875 for (basic_block b : fnblocks)
877 const unsigned uid = condition_uid (fn, b);
878 if (uid == 0)
879 continue;
880 exprs.get_or_insert (uid).safe_push (b);
883 /* Visit all reachable nodes and collect conditions. Topological order is
884 important so the first node of a boolean expression is visited first
885 (it will mark subsequent terms). */
886 cov->m_index.safe_push (0);
887 for (auto expr : exprs)
889 vec<basic_block>& conds = expr.second;
890 if (conds.length () > CONDITIONS_MAX_TERMS)
892 location_t loc = gimple_location (gsi_stmt (gsi_last_bb (conds[0])));
893 warning_at (loc, OPT_Wcoverage_too_many_conditions,
894 "Too many conditions (found %u); giving up coverage",
895 conds.length ());
896 continue;
898 conds.sort (topological_cmp, &ctx.top_index);
899 vec<basic_block>& subgraph = paths_between (ctx, fnblocks, conds);
900 subgraph.sort (topological_cmp, &ctx.top_index);
901 const unsigned index = cov->m_index.length () - 1;
902 sbitmap condm = cov->m_maps[0 + 2*index];
903 sbitmap subgm = cov->m_maps[1 + 2*index];
904 for (basic_block b : conds)
905 bitmap_set_bit (condm, b->index);
906 for (basic_block b : subgraph)
907 bitmap_set_bit (subgm, b->index);
908 cov->m_blocks.safe_splice (subgraph);
909 cov->m_index.safe_push (cov->m_blocks.length ());
912 if (!have_dom)
913 free_dominance_info (fn, CDI_DOMINATORS);
914 if (!have_post_dom)
915 free_dominance_info (fn, CDI_POST_DOMINATORS);
917 cov->m_masks.safe_grow_cleared (2 * cov->m_index.last ());
918 const size_t length = cov_length (cov);
919 for (size_t i = 0; i != length; i++)
920 masking_vectors (ctx, cov_blocks (cov, i), cov_maps (cov, i),
921 cov_masks (cov, i));
923 return cov;
926 namespace
929 /* Stores the incoming edge and previous counters (in SSA form) on that edge
930 for the node e->deston that edge for the node e->dest. The counters record
931 the seen-true (0), seen-false (1), and current-mask (2). They are stored in
932 an array rather than proper members for access-by-index as the code paths
933 tend to be identical for the different counters. */
934 struct counters
936 edge e;
937 tree counter[3];
938 tree& operator [] (size_t i) { return counter[i]; }
941 /* Find the counters for the incoming edge e, or NULL if the edge has not been
942 recorded (could be for complex incoming edges). */
943 counters*
944 find_counters (vec<counters>& candidates, edge e)
946 for (counters& candidate : candidates)
947 if (candidate.e == e)
948 return &candidate;
949 return NULL;
952 /* Resolve the SSA for a specific counter KIND. If it is not modified by any
953 incoming edges, simply forward it, otherwise create a phi node of all the
954 candidate counters and return it. */
955 tree
956 resolve_counter (vec<counters>& cands, size_t kind)
958 gcc_assert (!cands.is_empty ());
959 gcc_assert (kind < 3);
961 counters& fst = cands[0];
963 if (!fst.e || fst.e->dest->preds->length () == 1)
965 gcc_assert (cands.length () == 1);
966 return fst[kind];
969 tree zero0 = build_int_cst (gcov_type_node, 0);
970 tree ssa = make_ssa_name (gcov_type_node);
971 gphi *phi = create_phi_node (ssa, fst.e->dest);
972 for (edge e : fst.e->dest->preds)
974 counters *prev = find_counters (cands, e);
975 if (prev)
976 add_phi_arg (phi, (*prev)[kind], e, UNKNOWN_LOCATION);
977 else
979 tree zero = make_ssa_name (gcov_type_node);
980 gimple_stmt_iterator gsi = gsi_after_labels (e->src);
981 gassign *set = gimple_build_assign (zero, zero0);
982 gsi_insert_before (&gsi, set, GSI_NEW_STMT);
983 add_phi_arg (phi, zero, e, UNKNOWN_LOCATION);
986 return ssa;
989 /* Resolve all the counters for a node. Note that the edge is undefined, as
990 the counters are intended to form the base to push to the successors, and
991 because the is only meaningful for nodes with a single predecessor. */
992 counters
993 resolve_counters (vec<counters>& cands)
995 counters next;
996 next[0] = resolve_counter (cands, 0);
997 next[1] = resolve_counter (cands, 1);
998 next[2] = resolve_counter (cands, 2);
999 return next;
1004 /* Add instrumentation to a decision subgraph. EXPR should be the
1005 (topologically sorted) block of nodes returned by cov_blocks, MAPS the
1006 bitmaps returned by cov_maps, and MASKS the block of bitsets returned by
1007 cov_masks. CONDNO should be the index of this condition in the function,
1008 i.e. the same argument given to cov_{masks,graphs}. EXPR may contain nodes
1009 in-between the conditions, e.g. when an operand contains a function call,
1010 or there is a setjmp and the cfg is filled with complex edges.
1012 Every node is annotated with three counters; the true, false, and mask
1013 value. First, walk the graph and determine what if there are multiple
1014 possible values for either accumulator depending on the path taken, in which
1015 case a phi node is created and registered as the accumulator. Then, those
1016 values are pushed as accumulators to the immediate successors. For some
1017 very particular programs there may be multiple paths into the expression
1018 (e.g. when prior terms are determined by a surrounding conditional) in which
1019 case the default zero-counter is pushed, otherwise all predecessors will
1020 have been considered before the successor because of topologically ordered
1021 traversal. Finally, expr is traversed again to look for edges to the
1022 outcomes, that is, edges with a destination outside of expr, and the local
1023 accumulators are flushed to the global gcov counters on these edges. In
1024 some cases there are edge splits that cause 3+ edges to the two outcome
1025 nodes.
1027 If a complex edge is taken (e.g. on a longjmp) the accumulators are
1028 attempted poisoned so that there would be no change to the global counters,
1029 but this has proven unreliable in the presence of undefined behavior, see
1030 the setjmp003 test.
1032 It is important that the flushes happen on the basic condition outgoing
1033 edge, otherwise flushes could be lost to exception handling or other
1034 abnormal control flow. */
1035 size_t
1036 instrument_decisions (array_slice<basic_block> expr, size_t condno,
1037 array_slice<sbitmap> maps, array_slice<uint64_t> masks)
1039 tree zero = build_int_cst (gcov_type_node, 0);
1040 tree poison = build_int_cst (gcov_type_node, ~0ULL);
1041 const sbitmap core = maps[0];
1042 const sbitmap allg = maps[1];
1044 hash_map<basic_block, vec<counters>> table;
1045 counters zerocounter;
1046 zerocounter.e = NULL;
1047 zerocounter[0] = zero;
1048 zerocounter[1] = zero;
1049 zerocounter[2] = zero;
1051 unsigned xi = 0;
1052 tree rhs = build_int_cst (gcov_type_node, 1ULL << xi);
1053 for (basic_block current : expr)
1055 vec<counters>& candidates = table.get_or_insert (current);
1056 if (candidates.is_empty ())
1057 candidates.safe_push (zerocounter);
1058 counters prev = resolve_counters (candidates);
1060 int increment = 0;
1061 for (edge e : current->succs)
1063 counters next = prev;
1064 next.e = e;
1066 if (bitmap_bit_p (core, e->src->index) && (e->flags & EDGE_CONDITION))
1068 const int k = condition_index (e->flags);
1069 next[k] = emit_bitwise_op (e, prev[k], BIT_IOR_EXPR, rhs);
1070 if (masks[2*xi + k])
1072 tree m = build_int_cst (gcov_type_node, masks[2*xi + k]);
1073 next[2] = emit_bitwise_op (e, prev[2], BIT_IOR_EXPR, m);
1075 increment = 1;
1077 else if (e->flags & EDGE_COMPLEX)
1079 /* A complex edge has been taken - wipe the accumulators and
1080 poison the mask so that this path does not contribute to
1081 coverage. */
1082 next[0] = poison;
1083 next[1] = poison;
1084 next[2] = poison;
1086 table.get_or_insert (e->dest).safe_push (next);
1088 xi += increment;
1089 if (increment)
1090 rhs = build_int_cst (gcov_type_node, 1ULL << xi);
1093 gcc_assert (xi == bitmap_count_bits (core));
1095 const tree relaxed = build_int_cst (integer_type_node, MEMMODEL_RELAXED);
1096 const bool atomic = flag_profile_update == PROFILE_UPDATE_ATOMIC;
1097 const tree atomic_ior = builtin_decl_explicit
1098 (TYPE_PRECISION (gcov_type_node) > 32
1099 ? BUILT_IN_ATOMIC_FETCH_OR_8
1100 : BUILT_IN_ATOMIC_FETCH_OR_4);
1102 /* Flush to the gcov accumulators. */
1103 for (const basic_block b : expr)
1105 if (!bitmap_bit_p (core, b->index))
1106 continue;
1108 for (edge e : b->succs)
1110 /* Flush the accumulators on leaving the Boolean function. The
1111 destination may be inside the function only when it returns to
1112 the loop header, such as do { ... } while (x); */
1113 if (bitmap_bit_p (allg, e->dest->index)) {
1114 if (!(e->flags & EDGE_DFS_BACK))
1115 continue;
1116 if (e->dest != expr[0])
1117 continue;
1120 vec<counters> *cands = table.get (e->dest);
1121 gcc_assert (cands);
1122 counters *prevp = find_counters (*cands, e);
1123 gcc_assert (prevp);
1124 counters prev = *prevp;
1126 /* _true &= ~mask, _false &= ~mask */
1127 counters next;
1128 next[2] = emit_bitwise_op (e, prev[2], BIT_NOT_EXPR);
1129 next[0] = emit_bitwise_op (e, prev[0], BIT_AND_EXPR, next[2]);
1130 next[1] = emit_bitwise_op (e, prev[1], BIT_AND_EXPR, next[2]);
1132 /* _global_true |= _true, _global_false |= _false */
1133 for (size_t k = 0; k != 2; ++k)
1135 tree ref = tree_coverage_counter_ref (GCOV_COUNTER_CONDS,
1136 2*condno + k);
1137 if (atomic)
1139 ref = unshare_expr (ref);
1140 gcall *flush = gimple_build_call (atomic_ior, 3,
1141 build_addr (ref),
1142 next[k], relaxed);
1143 gsi_insert_on_edge (e, flush);
1145 else
1147 tree get = emit_assign (e, ref);
1148 tree put = emit_bitwise_op (e, next[k], BIT_IOR_EXPR, get);
1149 emit_assign (e, unshare_expr (ref), put);
1155 return xi;
1158 #undef CONDITIONS_MAX_TERMS
1159 #undef EDGE_CONDITION
1161 /* Do initialization work for the edge profiler. */
1163 /* Add code:
1164 __thread gcov* __gcov_indirect_call.counters; // pointer to actual counter
1165 __thread void* __gcov_indirect_call.callee; // actual callee address
1166 __thread int __gcov_function_counter; // time profiler function counter
1168 static void
1169 init_ic_make_global_vars (void)
1171 tree gcov_type_ptr;
1173 gcov_type_ptr = build_pointer_type (get_gcov_type ());
1175 tree tuple_type = lang_hooks.types.make_type (RECORD_TYPE);
1177 /* callee */
1178 ic_tuple_callee_field = build_decl (BUILTINS_LOCATION, FIELD_DECL, NULL_TREE,
1179 ptr_type_node);
1181 /* counters */
1182 ic_tuple_counters_field = build_decl (BUILTINS_LOCATION, FIELD_DECL,
1183 NULL_TREE, gcov_type_ptr);
1184 DECL_CHAIN (ic_tuple_counters_field) = ic_tuple_callee_field;
1186 finish_builtin_struct (tuple_type, "indirect_call_tuple",
1187 ic_tuple_counters_field, NULL_TREE);
1189 ic_tuple_var
1190 = build_decl (UNKNOWN_LOCATION, VAR_DECL,
1191 get_identifier ("__gcov_indirect_call"), tuple_type);
1192 TREE_PUBLIC (ic_tuple_var) = 1;
1193 DECL_ARTIFICIAL (ic_tuple_var) = 1;
1194 DECL_INITIAL (ic_tuple_var) = NULL;
1195 DECL_EXTERNAL (ic_tuple_var) = 1;
1196 if (targetm.have_tls)
1197 set_decl_tls_model (ic_tuple_var, decl_default_tls_model (ic_tuple_var));
1200 /* Create the type and function decls for the interface with gcov. */
1202 void
1203 gimple_init_gcov_profiler (void)
1205 tree interval_profiler_fn_type;
1206 tree pow2_profiler_fn_type;
1207 tree topn_values_profiler_fn_type;
1208 tree gcov_type_ptr;
1209 tree ic_profiler_fn_type;
1210 tree average_profiler_fn_type;
1211 const char *fn_name;
1213 if (!gcov_type_node)
1215 const char *fn_suffix
1216 = flag_profile_update == PROFILE_UPDATE_ATOMIC ? "_atomic" : "";
1218 gcov_type_node = get_gcov_type ();
1219 gcov_type_ptr = build_pointer_type (gcov_type_node);
1221 /* void (*) (gcov_type *, gcov_type, int, unsigned) */
1222 interval_profiler_fn_type
1223 = build_function_type_list (void_type_node,
1224 gcov_type_ptr, gcov_type_node,
1225 integer_type_node,
1226 unsigned_type_node, NULL_TREE);
1227 fn_name = concat ("__gcov_interval_profiler", fn_suffix, NULL);
1228 tree_interval_profiler_fn = build_fn_decl (fn_name,
1229 interval_profiler_fn_type);
1230 free (CONST_CAST (char *, fn_name));
1231 TREE_NOTHROW (tree_interval_profiler_fn) = 1;
1232 DECL_ATTRIBUTES (tree_interval_profiler_fn)
1233 = tree_cons (get_identifier ("leaf"), NULL,
1234 DECL_ATTRIBUTES (tree_interval_profiler_fn));
1236 /* void (*) (gcov_type *, gcov_type) */
1237 pow2_profiler_fn_type
1238 = build_function_type_list (void_type_node,
1239 gcov_type_ptr, gcov_type_node,
1240 NULL_TREE);
1241 fn_name = concat ("__gcov_pow2_profiler", fn_suffix, NULL);
1242 tree_pow2_profiler_fn = build_fn_decl (fn_name, pow2_profiler_fn_type);
1243 free (CONST_CAST (char *, fn_name));
1244 TREE_NOTHROW (tree_pow2_profiler_fn) = 1;
1245 DECL_ATTRIBUTES (tree_pow2_profiler_fn)
1246 = tree_cons (get_identifier ("leaf"), NULL,
1247 DECL_ATTRIBUTES (tree_pow2_profiler_fn));
1249 /* void (*) (gcov_type *, gcov_type) */
1250 topn_values_profiler_fn_type
1251 = build_function_type_list (void_type_node,
1252 gcov_type_ptr, gcov_type_node,
1253 NULL_TREE);
1254 fn_name = concat ("__gcov_topn_values_profiler", fn_suffix, NULL);
1255 tree_topn_values_profiler_fn
1256 = build_fn_decl (fn_name, topn_values_profiler_fn_type);
1257 free (CONST_CAST (char *, fn_name));
1259 TREE_NOTHROW (tree_topn_values_profiler_fn) = 1;
1260 DECL_ATTRIBUTES (tree_topn_values_profiler_fn)
1261 = tree_cons (get_identifier ("leaf"), NULL,
1262 DECL_ATTRIBUTES (tree_topn_values_profiler_fn));
1264 init_ic_make_global_vars ();
1266 /* void (*) (gcov_type, void *) */
1267 ic_profiler_fn_type
1268 = build_function_type_list (void_type_node,
1269 gcov_type_node,
1270 ptr_type_node,
1271 NULL_TREE);
1272 fn_name = concat ("__gcov_indirect_call_profiler_v4", fn_suffix, NULL);
1273 tree_indirect_call_profiler_fn
1274 = build_fn_decl (fn_name, ic_profiler_fn_type);
1275 free (CONST_CAST (char *, fn_name));
1277 TREE_NOTHROW (tree_indirect_call_profiler_fn) = 1;
1278 DECL_ATTRIBUTES (tree_indirect_call_profiler_fn)
1279 = tree_cons (get_identifier ("leaf"), NULL,
1280 DECL_ATTRIBUTES (tree_indirect_call_profiler_fn));
1282 tree_time_profiler_counter
1283 = build_decl (UNKNOWN_LOCATION, VAR_DECL,
1284 get_identifier ("__gcov_time_profiler_counter"),
1285 get_gcov_type ());
1286 TREE_PUBLIC (tree_time_profiler_counter) = 1;
1287 DECL_EXTERNAL (tree_time_profiler_counter) = 1;
1288 TREE_STATIC (tree_time_profiler_counter) = 1;
1289 DECL_ARTIFICIAL (tree_time_profiler_counter) = 1;
1290 DECL_INITIAL (tree_time_profiler_counter) = NULL;
1292 /* void (*) (gcov_type *, gcov_type) */
1293 average_profiler_fn_type
1294 = build_function_type_list (void_type_node,
1295 gcov_type_ptr, gcov_type_node, NULL_TREE);
1296 fn_name = concat ("__gcov_average_profiler", fn_suffix, NULL);
1297 tree_average_profiler_fn = build_fn_decl (fn_name,
1298 average_profiler_fn_type);
1299 free (CONST_CAST (char *, fn_name));
1300 TREE_NOTHROW (tree_average_profiler_fn) = 1;
1301 DECL_ATTRIBUTES (tree_average_profiler_fn)
1302 = tree_cons (get_identifier ("leaf"), NULL,
1303 DECL_ATTRIBUTES (tree_average_profiler_fn));
1304 fn_name = concat ("__gcov_ior_profiler", fn_suffix, NULL);
1305 tree_ior_profiler_fn = build_fn_decl (fn_name, average_profiler_fn_type);
1306 free (CONST_CAST (char *, fn_name));
1307 TREE_NOTHROW (tree_ior_profiler_fn) = 1;
1308 DECL_ATTRIBUTES (tree_ior_profiler_fn)
1309 = tree_cons (get_identifier ("leaf"), NULL,
1310 DECL_ATTRIBUTES (tree_ior_profiler_fn));
1312 /* LTO streamer needs assembler names. Because we create these decls
1313 late, we need to initialize them by hand. */
1314 DECL_ASSEMBLER_NAME (tree_interval_profiler_fn);
1315 DECL_ASSEMBLER_NAME (tree_pow2_profiler_fn);
1316 DECL_ASSEMBLER_NAME (tree_topn_values_profiler_fn);
1317 DECL_ASSEMBLER_NAME (tree_indirect_call_profiler_fn);
1318 DECL_ASSEMBLER_NAME (tree_average_profiler_fn);
1319 DECL_ASSEMBLER_NAME (tree_ior_profiler_fn);
1323 /* If RESULT is not null, then output instructions as GIMPLE trees to assign
1324 the updated counter from CALL of FUNC to RESULT. Insert the CALL and the
1325 optional assignment instructions to GSI. Use NAME for temporary values. */
1327 static inline void
1328 gen_assign_counter_update (gimple_stmt_iterator *gsi, gcall *call, tree func,
1329 tree result, const char *name)
1331 if (result)
1333 tree result_type = TREE_TYPE (TREE_TYPE (func));
1334 tree tmp1 = make_temp_ssa_name (result_type, NULL, name);
1335 gimple_set_lhs (call, tmp1);
1336 gsi_insert_after (gsi, call, GSI_NEW_STMT);
1337 tree tmp2 = make_temp_ssa_name (TREE_TYPE (result), NULL, name);
1338 gassign *assign = gimple_build_assign (tmp2, NOP_EXPR, tmp1);
1339 gsi_insert_after (gsi, assign, GSI_NEW_STMT);
1340 assign = gimple_build_assign (result, tmp2);
1341 gsi_insert_after (gsi, assign, GSI_NEW_STMT);
1343 else
1344 gsi_insert_after (gsi, call, GSI_NEW_STMT);
1347 /* Output instructions as GIMPLE trees to increment the COUNTER. If RESULT is
1348 not null, then assign the updated counter value to RESULT. Insert the
1349 instructions to GSI. Use NAME for temporary values. */
1351 static inline void
1352 gen_counter_update (gimple_stmt_iterator *gsi, tree counter, tree result,
1353 const char *name)
1355 tree type = gcov_type_node;
1356 tree addr = build_fold_addr_expr (counter);
1357 tree one = build_int_cst (type, 1);
1358 tree relaxed = build_int_cst (integer_type_node, MEMMODEL_RELAXED);
1360 if (counter_update == COUNTER_UPDATE_ATOMIC_BUILTIN
1361 || (result && counter_update == COUNTER_UPDATE_ATOMIC_SPLIT))
1363 /* __atomic_fetch_add (&counter, 1, MEMMODEL_RELAXED); */
1364 tree f = builtin_decl_explicit (TYPE_PRECISION (type) > 32
1365 ? BUILT_IN_ATOMIC_ADD_FETCH_8
1366 : BUILT_IN_ATOMIC_ADD_FETCH_4);
1367 gcall *call = gimple_build_call (f, 3, addr, one, relaxed);
1368 gen_assign_counter_update (gsi, call, f, result, name);
1370 else if (!result && (counter_update == COUNTER_UPDATE_ATOMIC_SPLIT
1371 || counter_update == COUNTER_UPDATE_ATOMIC_PARTIAL))
1373 /* low = __atomic_add_fetch_4 (addr, 1, MEMMODEL_RELAXED);
1374 high_inc = low == 0 ? 1 : 0;
1375 __atomic_add_fetch_4 (addr_high, high_inc, MEMMODEL_RELAXED); */
1376 tree zero32 = build_zero_cst (uint32_type_node);
1377 tree one32 = build_one_cst (uint32_type_node);
1378 tree addr_high = make_temp_ssa_name (TREE_TYPE (addr), NULL, name);
1379 tree four = build_int_cst (size_type_node, 4);
1380 gassign *assign1 = gimple_build_assign (addr_high, POINTER_PLUS_EXPR,
1381 addr, four);
1382 gsi_insert_after (gsi, assign1, GSI_NEW_STMT);
1383 if (WORDS_BIG_ENDIAN)
1384 std::swap (addr, addr_high);
1385 tree f = builtin_decl_explicit (BUILT_IN_ATOMIC_ADD_FETCH_4);
1386 gcall *call1 = gimple_build_call (f, 3, addr, one, relaxed);
1387 tree low = make_temp_ssa_name (uint32_type_node, NULL, name);
1388 gimple_call_set_lhs (call1, low);
1389 gsi_insert_after (gsi, call1, GSI_NEW_STMT);
1390 tree is_zero = make_temp_ssa_name (boolean_type_node, NULL, name);
1391 gassign *assign2 = gimple_build_assign (is_zero, EQ_EXPR, low,
1392 zero32);
1393 gsi_insert_after (gsi, assign2, GSI_NEW_STMT);
1394 tree high_inc = make_temp_ssa_name (uint32_type_node, NULL, name);
1395 gassign *assign3 = gimple_build_assign (high_inc, COND_EXPR,
1396 is_zero, one32, zero32);
1397 gsi_insert_after (gsi, assign3, GSI_NEW_STMT);
1398 gcall *call2 = gimple_build_call (f, 3, addr_high, high_inc,
1399 relaxed);
1400 gsi_insert_after (gsi, call2, GSI_NEW_STMT);
1402 else
1404 tree tmp1 = make_temp_ssa_name (type, NULL, name);
1405 gassign *assign1 = gimple_build_assign (tmp1, counter);
1406 gsi_insert_after (gsi, assign1, GSI_NEW_STMT);
1407 tree tmp2 = make_temp_ssa_name (type, NULL, name);
1408 gassign *assign2 = gimple_build_assign (tmp2, PLUS_EXPR, tmp1, one);
1409 gsi_insert_after (gsi, assign2, GSI_NEW_STMT);
1410 gassign *assign3 = gimple_build_assign (unshare_expr (counter), tmp2);
1411 gsi_insert_after (gsi, assign3, GSI_NEW_STMT);
1412 if (result)
1414 gassign *assign4 = gimple_build_assign (result, tmp2);
1415 gsi_insert_after (gsi, assign4, GSI_NEW_STMT);
1420 /* Output instructions as GIMPLE trees to increment the edge
1421 execution count, and insert them on E. */
1423 void
1424 gimple_gen_edge_profiler (int edgeno, edge e)
1426 gimple_stmt_iterator gsi = gsi_last (PENDING_STMT (e));
1427 tree counter = tree_coverage_counter_ref (GCOV_COUNTER_ARCS, edgeno);
1428 gen_counter_update (&gsi, counter, NULL_TREE, "PROF_edge_counter");
1431 /* Emits code to get VALUE to instrument at GSI, and returns the
1432 variable containing the value. */
1434 static tree
1435 prepare_instrumented_value (gimple_stmt_iterator *gsi, histogram_value value)
1437 tree val = value->hvalue.value;
1438 if (POINTER_TYPE_P (TREE_TYPE (val)))
1439 val = fold_convert (build_nonstandard_integer_type
1440 (TYPE_PRECISION (TREE_TYPE (val)), 1), val);
1441 return force_gimple_operand_gsi (gsi, fold_convert (gcov_type_node, val),
1442 true, NULL_TREE, true, GSI_SAME_STMT);
1445 /* Output instructions as GIMPLE trees to increment the interval histogram
1446 counter. VALUE is the expression whose value is profiled. TAG is the
1447 tag of the section for counters, BASE is offset of the counter position. */
1449 void
1450 gimple_gen_interval_profiler (histogram_value value, unsigned tag)
1452 gimple *stmt = value->hvalue.stmt;
1453 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1454 tree ref = tree_coverage_counter_ref (tag, 0), ref_ptr;
1455 gcall *call;
1456 tree val;
1457 tree start = build_int_cst_type (integer_type_node,
1458 value->hdata.intvl.int_start);
1459 tree steps = build_int_cst_type (unsigned_type_node,
1460 value->hdata.intvl.steps);
1462 ref_ptr = force_gimple_operand_gsi (&gsi,
1463 build_addr (ref),
1464 true, NULL_TREE, true, GSI_SAME_STMT);
1465 val = prepare_instrumented_value (&gsi, value);
1466 call = gimple_build_call (tree_interval_profiler_fn, 4,
1467 ref_ptr, val, start, steps);
1468 gsi_insert_before (&gsi, call, GSI_NEW_STMT);
1471 /* Output instructions as GIMPLE trees to increment the power of two histogram
1472 counter. VALUE is the expression whose value is profiled. TAG is the tag
1473 of the section for counters. */
1475 void
1476 gimple_gen_pow2_profiler (histogram_value value, unsigned tag)
1478 gimple *stmt = value->hvalue.stmt;
1479 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1480 tree ref_ptr = tree_coverage_counter_addr (tag, 0);
1481 gcall *call;
1482 tree val;
1484 ref_ptr = force_gimple_operand_gsi (&gsi, ref_ptr,
1485 true, NULL_TREE, true, GSI_SAME_STMT);
1486 val = prepare_instrumented_value (&gsi, value);
1487 call = gimple_build_call (tree_pow2_profiler_fn, 2, ref_ptr, val);
1488 gsi_insert_before (&gsi, call, GSI_NEW_STMT);
1491 /* Output instructions as GIMPLE trees for code to find the most N common
1492 values. VALUE is the expression whose value is profiled. TAG is the tag
1493 of the section for counters. */
1495 void
1496 gimple_gen_topn_values_profiler (histogram_value value, unsigned tag)
1498 gimple *stmt = value->hvalue.stmt;
1499 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1500 tree ref_ptr = tree_coverage_counter_addr (tag, 0);
1501 gcall *call;
1502 tree val;
1504 ref_ptr = force_gimple_operand_gsi (&gsi, ref_ptr,
1505 true, NULL_TREE, true, GSI_SAME_STMT);
1506 val = prepare_instrumented_value (&gsi, value);
1507 call = gimple_build_call (tree_topn_values_profiler_fn, 2, ref_ptr, val);
1508 gsi_insert_before (&gsi, call, GSI_NEW_STMT);
1512 /* Output instructions as GIMPLE trees for code to find the most
1513 common called function in indirect call.
1514 VALUE is the call expression whose indirect callee is profiled.
1515 TAG is the tag of the section for counters. */
1517 void
1518 gimple_gen_ic_profiler (histogram_value value, unsigned tag)
1520 tree tmp1;
1521 gassign *stmt1, *stmt2, *stmt3;
1522 gimple *stmt = value->hvalue.stmt;
1523 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1524 tree ref_ptr = tree_coverage_counter_addr (tag, 0);
1526 ref_ptr = force_gimple_operand_gsi (&gsi, ref_ptr,
1527 true, NULL_TREE, true, GSI_SAME_STMT);
1529 /* Insert code:
1531 stmt1: __gcov_indirect_call.counters = get_relevant_counter_ptr ();
1532 stmt2: tmp1 = (void *) (indirect call argument value)
1533 stmt3: __gcov_indirect_call.callee = tmp1;
1535 Example:
1536 f_1 = foo;
1537 __gcov_indirect_call.counters = &__gcov4.main[0];
1538 PROF_fn_9 = f_1;
1539 __gcov_indirect_call.callee = PROF_fn_9;
1540 _4 = f_1 ();
1543 tree gcov_type_ptr = build_pointer_type (get_gcov_type ());
1545 tree counter_ref = build3 (COMPONENT_REF, gcov_type_ptr,
1546 ic_tuple_var, ic_tuple_counters_field, NULL_TREE);
1548 stmt1 = gimple_build_assign (counter_ref, ref_ptr);
1549 tmp1 = make_temp_ssa_name (ptr_type_node, NULL, "PROF_fn");
1550 stmt2 = gimple_build_assign (tmp1, unshare_expr (value->hvalue.value));
1551 tree callee_ref = build3 (COMPONENT_REF, ptr_type_node,
1552 ic_tuple_var, ic_tuple_callee_field, NULL_TREE);
1553 stmt3 = gimple_build_assign (callee_ref, tmp1);
1555 gsi_insert_before (&gsi, stmt1, GSI_SAME_STMT);
1556 gsi_insert_before (&gsi, stmt2, GSI_SAME_STMT);
1557 gsi_insert_before (&gsi, stmt3, GSI_SAME_STMT);
1561 /* Output instructions as GIMPLE trees for code to find the most
1562 common called function in indirect call. Insert instructions at the
1563 beginning of every possible called function.
1566 void
1567 gimple_gen_ic_func_profiler (void)
1569 struct cgraph_node * c_node = cgraph_node::get (current_function_decl);
1570 gcall *stmt1;
1571 tree tree_uid, cur_func, void0;
1573 /* Disable indirect call profiling for an IFUNC resolver and its
1574 callees since it requires TLS which hasn't been set up yet when
1575 the dynamic linker is resolving IFUNC symbols. See
1576 https://gcc.gnu.org/bugzilla/show_bug.cgi?id=114115
1578 if (c_node->only_called_directly_p ()
1579 || c_node->called_by_ifunc_resolver)
1580 return;
1582 gimple_init_gcov_profiler ();
1584 basic_block entry = ENTRY_BLOCK_PTR_FOR_FN (cfun);
1585 basic_block cond_bb = split_edge (single_succ_edge (entry));
1586 basic_block update_bb = split_edge (single_succ_edge (cond_bb));
1588 /* We need to do an extra split in order to not create an input
1589 for a possible PHI node. */
1590 split_edge (single_succ_edge (update_bb));
1592 edge true_edge = single_succ_edge (cond_bb);
1593 true_edge->flags = EDGE_TRUE_VALUE;
1595 profile_probability probability;
1596 if (DECL_VIRTUAL_P (current_function_decl))
1597 probability = profile_probability::very_likely ();
1598 else
1599 probability = profile_probability::unlikely ();
1601 true_edge->probability = probability;
1602 edge e = make_edge (cond_bb, single_succ_edge (update_bb)->dest,
1603 EDGE_FALSE_VALUE);
1604 e->probability = true_edge->probability.invert ();
1606 /* Insert code:
1608 if (__gcov_indirect_call.callee != NULL)
1609 __gcov_indirect_call_profiler_v3 (profile_id, &current_function_decl);
1611 The function __gcov_indirect_call_profiler_v3 is responsible for
1612 resetting __gcov_indirect_call.callee to NULL. */
1614 gimple_stmt_iterator gsi = gsi_start_bb (cond_bb);
1615 void0 = build_int_cst (ptr_type_node, 0);
1617 tree callee_ref = build3 (COMPONENT_REF, ptr_type_node,
1618 ic_tuple_var, ic_tuple_callee_field, NULL_TREE);
1620 tree ref = force_gimple_operand_gsi (&gsi, callee_ref, true, NULL_TREE,
1621 true, GSI_SAME_STMT);
1623 gcond *cond = gimple_build_cond (NE_EXPR, ref,
1624 void0, NULL, NULL);
1625 gsi_insert_before (&gsi, cond, GSI_NEW_STMT);
1627 gsi = gsi_after_labels (update_bb);
1629 cur_func = force_gimple_operand_gsi (&gsi,
1630 build_addr (current_function_decl),
1631 true, NULL_TREE,
1632 true, GSI_SAME_STMT);
1633 tree_uid = build_int_cst
1634 (gcov_type_node,
1635 cgraph_node::get (current_function_decl)->profile_id);
1636 stmt1 = gimple_build_call (tree_indirect_call_profiler_fn, 2,
1637 tree_uid, cur_func);
1638 gsi_insert_before (&gsi, stmt1, GSI_SAME_STMT);
1641 /* Output instructions as GIMPLE tree at the beginning for each function.
1642 TAG is the tag of the section for counters, BASE is offset of the
1643 counter position and GSI is the iterator we place the counter. */
1645 void
1646 gimple_gen_time_profiler (unsigned tag)
1648 tree type = get_gcov_type ();
1649 basic_block entry = ENTRY_BLOCK_PTR_FOR_FN (cfun);
1650 basic_block cond_bb = split_edge (single_succ_edge (entry));
1651 basic_block update_bb = split_edge (single_succ_edge (cond_bb));
1653 /* We need to do an extra split in order to not create an input
1654 for a possible PHI node. */
1655 split_edge (single_succ_edge (update_bb));
1657 edge true_edge = single_succ_edge (cond_bb);
1658 true_edge->flags = EDGE_TRUE_VALUE;
1659 true_edge->probability = profile_probability::unlikely ();
1660 edge e
1661 = make_edge (cond_bb, single_succ_edge (update_bb)->dest, EDGE_FALSE_VALUE);
1662 e->probability = true_edge->probability.invert ();
1664 gimple_stmt_iterator gsi = gsi_start_bb (cond_bb);
1665 tree original_ref = tree_coverage_counter_ref (tag, 0);
1666 tree ref = force_gimple_operand_gsi (&gsi, original_ref, true, NULL_TREE,
1667 true, GSI_SAME_STMT);
1669 /* Emit: if (counters[0] != 0). */
1670 gcond *cond = gimple_build_cond (EQ_EXPR, ref, build_int_cst (type, 0),
1671 NULL, NULL);
1672 gsi_insert_before (&gsi, cond, GSI_NEW_STMT);
1674 /* Emit: counters[0] = ++__gcov_time_profiler_counter. */
1675 gsi = gsi_start_bb (update_bb);
1676 gen_counter_update (&gsi, tree_time_profiler_counter, original_ref,
1677 "PROF_time_profile");
1680 /* Output instructions as GIMPLE trees to increment the average histogram
1681 counter. VALUE is the expression whose value is profiled. TAG is the
1682 tag of the section for counters, BASE is offset of the counter position. */
1684 void
1685 gimple_gen_average_profiler (histogram_value value, unsigned tag)
1687 gimple *stmt = value->hvalue.stmt;
1688 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1689 tree ref_ptr = tree_coverage_counter_addr (tag, 0);
1690 gcall *call;
1691 tree val;
1693 ref_ptr = force_gimple_operand_gsi (&gsi, ref_ptr,
1694 true, NULL_TREE,
1695 true, GSI_SAME_STMT);
1696 val = prepare_instrumented_value (&gsi, value);
1697 call = gimple_build_call (tree_average_profiler_fn, 2, ref_ptr, val);
1698 gsi_insert_before (&gsi, call, GSI_NEW_STMT);
1701 /* Output instructions as GIMPLE trees to increment the ior histogram
1702 counter. VALUE is the expression whose value is profiled. TAG is the
1703 tag of the section for counters, BASE is offset of the counter position. */
1705 void
1706 gimple_gen_ior_profiler (histogram_value value, unsigned tag)
1708 gimple *stmt = value->hvalue.stmt;
1709 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1710 tree ref_ptr = tree_coverage_counter_addr (tag, 0);
1711 gcall *call;
1712 tree val;
1714 ref_ptr = force_gimple_operand_gsi (&gsi, ref_ptr,
1715 true, NULL_TREE, true, GSI_SAME_STMT);
1716 val = prepare_instrumented_value (&gsi, value);
1717 call = gimple_build_call (tree_ior_profiler_fn, 2, ref_ptr, val);
1718 gsi_insert_before (&gsi, call, GSI_NEW_STMT);
1721 static vec<regex_t> profile_filter_files;
1722 static vec<regex_t> profile_exclude_files;
1724 /* Parse list of provided REGEX (separated with semi-collon) and
1725 create expressions (of type regex_t) and save them into V vector.
1726 If there is a regular expression parsing error, error message is
1727 printed for FLAG_NAME. */
1729 static void
1730 parse_profile_filter (const char *regex, vec<regex_t> *v,
1731 const char *flag_name)
1733 v->create (4);
1734 if (regex != NULL)
1736 char *str = xstrdup (regex);
1737 for (char *p = strtok (str, ";"); p != NULL; p = strtok (NULL, ";"))
1739 regex_t r;
1740 if (regcomp (&r, p, REG_EXTENDED | REG_NOSUB) != 0)
1742 error ("invalid regular expression %qs in %qs",
1743 p, flag_name);
1744 return;
1747 v->safe_push (r);
1752 /* Parse values of -fprofile-filter-files and -fprofile-exclude-files
1753 options. */
1755 static void
1756 parse_profile_file_filtering ()
1758 parse_profile_filter (flag_profile_filter_files, &profile_filter_files,
1759 "-fprofile-filter-files");
1760 parse_profile_filter (flag_profile_exclude_files, &profile_exclude_files,
1761 "-fprofile-exclude-files");
1764 /* Parse vectors of regular expressions. */
1766 static void
1767 release_profile_file_filtering ()
1769 profile_filter_files.release ();
1770 profile_exclude_files.release ();
1773 /* Return true when FILENAME should be instrumented based on
1774 -fprofile-filter-files and -fprofile-exclude-files options. */
1776 static bool
1777 include_source_file_for_profile (const char *filename)
1779 /* First check whether file is included in flag_profile_exclude_files. */
1780 for (unsigned i = 0; i < profile_exclude_files.length (); i++)
1781 if (regexec (&profile_exclude_files[i],
1782 filename, 0, NULL, 0) == REG_NOERROR)
1783 return false;
1785 /* For non-empty flag_profile_filter_files include only files matching a
1786 regex in the flag. */
1787 if (profile_filter_files.is_empty ())
1788 return true;
1790 for (unsigned i = 0; i < profile_filter_files.length (); i++)
1791 if (regexec (&profile_filter_files[i], filename, 0, NULL, 0) == REG_NOERROR)
1792 return true;
1794 return false;
1797 #ifndef HAVE_sync_compare_and_swapsi
1798 #define HAVE_sync_compare_and_swapsi 0
1799 #endif
1800 #ifndef HAVE_atomic_compare_and_swapsi
1801 #define HAVE_atomic_compare_and_swapsi 0
1802 #endif
1804 #ifndef HAVE_sync_compare_and_swapdi
1805 #define HAVE_sync_compare_and_swapdi 0
1806 #endif
1807 #ifndef HAVE_atomic_compare_and_swapdi
1808 #define HAVE_atomic_compare_and_swapdi 0
1809 #endif
1811 /* Profile all functions in the callgraph. */
1813 static unsigned int
1814 tree_profiling (void)
1816 struct cgraph_node *node;
1818 /* Verify whether we can utilize atomic update operations. */
1819 bool can_support_atomic = targetm.have_libatomic;
1820 unsigned HOST_WIDE_INT gcov_type_size
1821 = tree_to_uhwi (TYPE_SIZE_UNIT (get_gcov_type ()));
1822 bool have_atomic_4
1823 = HAVE_sync_compare_and_swapsi || HAVE_atomic_compare_and_swapsi;
1824 bool have_atomic_8
1825 = HAVE_sync_compare_and_swapdi || HAVE_atomic_compare_and_swapdi;
1826 bool needs_split = gcov_type_size == 8 && !have_atomic_8 && have_atomic_4;
1827 if (!can_support_atomic)
1829 if (gcov_type_size == 4)
1830 can_support_atomic = have_atomic_4;
1831 else if (gcov_type_size == 8)
1832 can_support_atomic = have_atomic_8;
1835 if (flag_profile_update != PROFILE_UPDATE_SINGLE && needs_split)
1836 counter_update = COUNTER_UPDATE_ATOMIC_PARTIAL;
1838 if (flag_profile_update == PROFILE_UPDATE_ATOMIC
1839 && !can_support_atomic)
1841 warning (0, "target does not support atomic profile update, "
1842 "single mode is selected");
1843 flag_profile_update = PROFILE_UPDATE_SINGLE;
1845 else if (flag_profile_update == PROFILE_UPDATE_PREFER_ATOMIC)
1846 flag_profile_update
1847 = can_support_atomic ? PROFILE_UPDATE_ATOMIC : PROFILE_UPDATE_SINGLE;
1849 if (flag_profile_update == PROFILE_UPDATE_ATOMIC)
1851 if (needs_split)
1852 counter_update = COUNTER_UPDATE_ATOMIC_SPLIT;
1853 else
1854 counter_update = COUNTER_UPDATE_ATOMIC_BUILTIN;
1857 /* This is a small-ipa pass that gets called only once, from
1858 cgraphunit.cc:ipa_passes(). */
1859 gcc_assert (symtab->state == IPA_SSA);
1861 init_node_map (true);
1862 parse_profile_file_filtering ();
1864 FOR_EACH_DEFINED_FUNCTION (node)
1866 bool thunk = false;
1867 if (!gimple_has_body_p (node->decl) && !node->thunk)
1868 continue;
1870 /* Don't profile functions produced for builtin stuff. */
1871 if (DECL_SOURCE_LOCATION (node->decl) == BUILTINS_LOCATION)
1872 continue;
1874 if (lookup_attribute ("no_profile_instrument_function",
1875 DECL_ATTRIBUTES (node->decl)))
1876 continue;
1877 /* Do not instrument extern inline functions when testing coverage.
1878 While this is not perfectly consistent (early inlined extern inlines
1879 will get acocunted), testsuite expects that. */
1880 if (DECL_EXTERNAL (node->decl)
1881 && flag_test_coverage)
1882 continue;
1884 const char *file = LOCATION_FILE (DECL_SOURCE_LOCATION (node->decl));
1885 if (!include_source_file_for_profile (file))
1886 continue;
1888 if (node->thunk)
1890 /* We cannot expand variadic thunks to Gimple. */
1891 if (stdarg_p (TREE_TYPE (node->decl)))
1892 continue;
1893 thunk = true;
1894 /* When generate profile, expand thunk to gimple so it can be
1895 instrumented same way as other functions. */
1896 if (profile_arc_flag || condition_coverage_flag)
1897 expand_thunk (node, false, true);
1898 /* Read cgraph profile but keep function as thunk at profile-use
1899 time. */
1900 else
1902 read_thunk_profile (node);
1903 continue;
1907 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
1909 if (dump_file)
1910 dump_function_header (dump_file, cfun->decl, dump_flags);
1912 /* Local pure-const may imply need to fixup the cfg. */
1913 if (gimple_has_body_p (node->decl)
1914 && (execute_fixup_cfg () & TODO_cleanup_cfg))
1915 cleanup_tree_cfg ();
1917 branch_prob (thunk);
1919 if (! flag_branch_probabilities
1920 && flag_profile_values)
1921 gimple_gen_ic_func_profiler ();
1923 if (flag_branch_probabilities
1924 && !thunk
1925 && flag_profile_values
1926 && flag_value_profile_transformations
1927 && profile_status_for_fn (cfun) == PROFILE_READ)
1928 gimple_value_profile_transformations ();
1930 /* The above could hose dominator info. Currently there is
1931 none coming in, this is a safety valve. It should be
1932 easy to adjust it, if and when there is some. */
1933 free_dominance_info (CDI_DOMINATORS);
1934 free_dominance_info (CDI_POST_DOMINATORS);
1935 pop_cfun ();
1938 release_profile_file_filtering ();
1940 /* Drop pure/const flags from instrumented functions. */
1941 if (profile_arc_flag || condition_coverage_flag || flag_test_coverage)
1942 FOR_EACH_DEFINED_FUNCTION (node)
1944 if (!gimple_has_body_p (node->decl)
1945 || !(!node->clone_of
1946 || node->decl != node->clone_of->decl))
1947 continue;
1949 /* Don't profile functions produced for builtin stuff. */
1950 if (DECL_SOURCE_LOCATION (node->decl) == BUILTINS_LOCATION)
1951 continue;
1953 node->set_const_flag (false, false);
1954 node->set_pure_flag (false, false);
1957 /* Update call statements and rebuild the cgraph. */
1958 FOR_EACH_DEFINED_FUNCTION (node)
1960 basic_block bb;
1962 if (!gimple_has_body_p (node->decl)
1963 || !(!node->clone_of
1964 || node->decl != node->clone_of->decl))
1965 continue;
1967 /* Don't profile functions produced for builtin stuff. */
1968 if (DECL_SOURCE_LOCATION (node->decl) == BUILTINS_LOCATION)
1969 continue;
1971 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
1973 if (profile_arc_flag || condition_coverage_flag || flag_test_coverage)
1974 FOR_EACH_BB_FN (bb, cfun)
1976 gimple_stmt_iterator gsi;
1977 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1979 gcall *call = dyn_cast <gcall *> (gsi_stmt (gsi));
1980 if (!call || gimple_call_internal_p (call))
1981 continue;
1983 /* We do not clear pure/const on decls without body. */
1984 tree fndecl = gimple_call_fndecl (call);
1985 cgraph_node *callee;
1986 if (fndecl
1987 && (callee = cgraph_node::get (fndecl))
1988 && callee->get_availability (node) == AVAIL_NOT_AVAILABLE)
1989 continue;
1991 /* Drop the const attribute from the call type (the pure
1992 attribute is not available on types). */
1993 tree fntype = gimple_call_fntype (call);
1994 if (fntype && TYPE_READONLY (fntype))
1996 int quals = TYPE_QUALS (fntype) & ~TYPE_QUAL_CONST;
1997 fntype = build_qualified_type (fntype, quals);
1998 gimple_call_set_fntype (call, fntype);
2001 /* Update virtual operands of calls to no longer const/pure
2002 functions. */
2003 update_stmt (call);
2007 /* re-merge split blocks. */
2008 cleanup_tree_cfg ();
2009 update_ssa (TODO_update_ssa);
2011 cgraph_edge::rebuild_edges ();
2013 pop_cfun ();
2016 handle_missing_profiles ();
2018 del_node_map ();
2019 return 0;
2022 namespace {
2024 const pass_data pass_data_ipa_tree_profile =
2026 SIMPLE_IPA_PASS, /* type */
2027 "profile", /* name */
2028 OPTGROUP_NONE, /* optinfo_flags */
2029 TV_IPA_PROFILE, /* tv_id */
2030 0, /* properties_required */
2031 0, /* properties_provided */
2032 0, /* properties_destroyed */
2033 0, /* todo_flags_start */
2034 TODO_dump_symtab, /* todo_flags_finish */
2037 class pass_ipa_tree_profile : public simple_ipa_opt_pass
2039 public:
2040 pass_ipa_tree_profile (gcc::context *ctxt)
2041 : simple_ipa_opt_pass (pass_data_ipa_tree_profile, ctxt)
2044 /* opt_pass methods: */
2045 bool gate (function *) final override;
2046 unsigned int execute (function *) final override { return tree_profiling (); }
2048 }; // class pass_ipa_tree_profile
2050 bool
2051 pass_ipa_tree_profile::gate (function *)
2053 /* When profile instrumentation, use or test coverage shall be performed.
2054 But for AutoFDO, this there is no instrumentation, thus this pass is
2055 disabled. */
2056 return (!in_lto_p && !flag_auto_profile
2057 && (flag_branch_probabilities || flag_test_coverage
2058 || profile_arc_flag || condition_coverage_flag));
2061 } // anon namespace
2063 simple_ipa_opt_pass *
2064 make_pass_ipa_tree_profile (gcc::context *ctxt)
2066 return new pass_ipa_tree_profile (ctxt);
2069 #include "gt-tree-profile.h"