analyzer: enable taint state machine by default [PR103533]
[official-gcc.git] / gcc / analyzer / region-model-asm.cc
blob19ce284498880603b36e37b510e9a950ec307f56
1 /* Handling inline asm in the analyzer.
2 Copyright (C) 2021-2023 Free Software Foundation, Inc.
3 Contributed by David Malcolm <dmalcolm@redhat.com>.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #define INCLUDE_MEMORY
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tree.h"
26 #include "function.h"
27 #include "basic-block.h"
28 #include "gimple.h"
29 #include "gimple-iterator.h"
30 #include "diagnostic-core.h"
31 #include "pretty-print.h"
32 #include "analyzer/analyzer.h"
33 #include "analyzer/analyzer-logging.h"
34 #include "options.h"
35 #include "analyzer/call-string.h"
36 #include "analyzer/program-point.h"
37 #include "analyzer/store.h"
38 #include "analyzer/region-model.h"
39 #include "analyzer/region-model-reachability.h"
40 #include "stmt.h"
42 #if ENABLE_ANALYZER
44 namespace ana {
46 /* Minimal asm support for the analyzer.
48 The objective of this code is to:
49 - minimize false positives from the analyzer on the Linux kernel
50 (which makes heavy use of inline asm), whilst
51 - avoiding having to "teach" the compiler anything about specific strings
52 in asm statements.
54 Specifically, we want to:
56 (a) mark asm outputs and certain other regions as having been written to,
57 to avoid false postives from -Wanalyzer-use-of-uninitialized-value.
59 (b) identify some of these stmts as "deterministic" so that we can
60 write consistent outputs given consistent inputs, so that we can
61 avoid false positives for paths in which an asm is invoked twice
62 with the same inputs and is expected to emit the same output.
64 This file implements heuristics for achieving the above. */
66 /* Determine if ASM_STMT is deterministic, in the sense of (b) above.
68 Consider this x86 function taken from the Linux kernel
69 (arch/x86/include/asm/barrier.h):
71 static inline unsigned long array_index_mask_nospec(unsigned long index,
72 unsigned long size)
74 unsigned long mask;
76 asm volatile ("cmp %1,%2; sbb %0,%0;"
77 :"=r" (mask)
78 :"g"(size),"r" (index)
79 :"cc");
80 return mask;
83 The above is a mitigation for Spectre-variant-1 attacks, for clamping
84 an array access to within the range of [0, size] if the CPU speculates
85 past the array bounds.
87 However, it is ultimately used to implement wdev_to_wvif:
89 static inline struct wfx_vif *
90 wdev_to_wvif(struct wfx_dev *wdev, int vif_id)
92 vif_id = array_index_nospec(vif_id, ARRAY_SIZE(wdev->vif));
93 if (!wdev->vif[vif_id]) {
94 return NULL;
96 return (struct wfx_vif *)wdev->vif[vif_id]->drv_priv;
99 which is used by:
101 if (wdev_to_wvif(wvif->wdev, 1))
102 return wdev_to_wvif(wvif->wdev, 1)->vif;
104 The code has been written to assume that wdev_to_wvif is deterministic,
105 and won't change from returning non-NULL at the "if" clause to
106 returning NULL at the "->vif" dereference.
108 By treating the above specific "asm volatile" as deterministic we avoid
109 a false positive from -Wanalyzer-null-dereference. */
111 static bool
112 deterministic_p (const gasm *asm_stmt)
114 /* Assume something volatile with no inputs is querying
115 changeable state e.g. rdtsc. */
116 if (gimple_asm_ninputs (asm_stmt) == 0
117 && gimple_asm_volatile_p (asm_stmt))
118 return false;
120 /* Otherwise assume it's purely a function of its inputs. */
121 return true;
124 /* Update this model for the asm STMT, using CTXT to report any
125 diagnostics.
127 Compare with cfgexpand.cc: expand_asm_stmt. */
129 void
130 region_model::on_asm_stmt (const gasm *stmt, region_model_context *ctxt)
132 logger *logger = ctxt ? ctxt->get_logger () : NULL;
133 LOG_SCOPE (logger);
135 const unsigned noutputs = gimple_asm_noutputs (stmt);
136 const unsigned ninputs = gimple_asm_ninputs (stmt);
138 auto_vec<tree> output_tvec;
139 auto_vec<tree> input_tvec;
140 auto_vec<const char *> constraints;
142 /* Copy the gimple vectors into new vectors that we can manipulate. */
143 output_tvec.safe_grow (noutputs, true);
144 input_tvec.safe_grow (ninputs, true);
145 constraints.safe_grow (noutputs + ninputs, true);
147 for (unsigned i = 0; i < noutputs; ++i)
149 tree t = gimple_asm_output_op (stmt, i);
150 output_tvec[i] = TREE_VALUE (t);
151 constraints[i] = TREE_STRING_POINTER (TREE_VALUE (TREE_PURPOSE (t)));
153 for (unsigned i = 0; i < ninputs; i++)
155 tree t = gimple_asm_input_op (stmt, i);
156 input_tvec[i] = TREE_VALUE (t);
157 constraints[i + noutputs]
158 = TREE_STRING_POINTER (TREE_VALUE (TREE_PURPOSE (t)));
161 /* Determine which regions are reachable from the inputs
162 to this stmt. */
163 reachable_regions reachable_regs (this);
165 int num_errors = 0;
167 auto_vec<const region *> output_regions (noutputs);
168 for (unsigned i = 0; i < noutputs; ++i)
170 tree val = output_tvec[i];
171 const char *constraint;
172 bool is_inout;
173 bool allows_reg;
174 bool allows_mem;
176 const region *dst_reg = get_lvalue (val, ctxt);
177 output_regions.quick_push (dst_reg);
178 reachable_regs.add (dst_reg, true);
180 /* Try to parse the output constraint. If that fails, there's
181 no point in going further. */
182 constraint = constraints[i];
183 if (!parse_output_constraint (&constraint, i, ninputs, noutputs,
184 &allows_mem, &allows_reg, &is_inout))
186 if (logger)
187 logger->log ("error parsing constraint for output %i: %qs",
188 i, constraint);
189 num_errors++;
190 continue;
193 if (logger)
195 logger->log ("output %i: %qs %qE"
196 " is_inout: %i allows_reg: %i allows_mem: %i",
197 i, constraint, val,
198 (int)is_inout, (int)allows_reg, (int)allows_mem);
199 logger->start_log_line ();
200 logger->log_partial (" region: ");
201 dst_reg->dump_to_pp (logger->get_printer (), true);
202 logger->end_log_line ();
207 /* Ideally should combine with inout_svals to determine the
208 "effective inputs" and use this for the asm_output_svalue. */
210 auto_vec<const svalue *> input_svals (ninputs);
211 for (unsigned i = 0; i < ninputs; i++)
213 tree val = input_tvec[i];
214 const char *constraint = constraints[i + noutputs];
215 bool allows_reg, allows_mem;
216 if (! parse_input_constraint (&constraint, i, ninputs, noutputs, 0,
217 constraints.address (),
218 &allows_mem, &allows_reg))
220 if (logger)
221 logger->log ("error parsing constraint for input %i: %qs",
222 i, constraint);
223 num_errors++;
224 continue;
227 tree src_expr = input_tvec[i];
228 const svalue *src_sval = get_rvalue (src_expr, ctxt);
229 check_for_poison (src_sval, src_expr, NULL, ctxt);
230 input_svals.quick_push (src_sval);
231 reachable_regs.handle_sval (src_sval);
233 if (logger)
235 logger->log ("input %i: %qs %qE"
236 " allows_reg: %i allows_mem: %i",
237 i, constraint, val,
238 (int)allows_reg, (int)allows_mem);
239 logger->start_log_line ();
240 logger->log_partial (" sval: ");
241 src_sval->dump_to_pp (logger->get_printer (), true);
242 logger->end_log_line ();
246 if (num_errors > 0)
247 gcc_unreachable ();
249 if (logger)
251 logger->log ("reachability: ");
252 reachable_regs.dump_to_pp (logger->get_printer ());
253 logger->end_log_line ();
256 /* Given the regions that were reachable from the inputs we
257 want to clobber them.
258 This is similar to region_model::handle_unrecognized_call,
259 but the unknown call policies seems too aggressive (e.g. purging state
260 from anything that's ever escaped). Instead, clobber any clusters
261 that were reachable in *this* asm stmt, rather than those that
262 escaped, and we don't treat the values as having escaped.
263 We also assume that asm stmts don't affect sm-state. */
264 for (auto iter = reachable_regs.begin_mutable_base_regs ();
265 iter != reachable_regs.end_mutable_base_regs (); ++iter)
267 const region *base_reg = *iter;
268 if (base_reg->symbolic_for_unknown_ptr_p ()
269 || !base_reg->tracked_p ())
270 continue;
272 binding_cluster *cluster = m_store.get_or_create_cluster (base_reg);
273 cluster->on_asm (stmt, m_mgr->get_store_manager (),
274 conjured_purge (this, ctxt));
277 /* Update the outputs. */
278 for (unsigned output_idx = 0; output_idx < noutputs; output_idx++)
280 tree dst_expr = output_tvec[output_idx];
281 const region *dst_reg = output_regions[output_idx];
283 const svalue *sval;
284 if (deterministic_p (stmt)
285 && input_svals.length () <= asm_output_svalue::MAX_INPUTS)
286 sval = m_mgr->get_or_create_asm_output_svalue (TREE_TYPE (dst_expr),
287 stmt,
288 output_idx,
289 input_svals);
290 else
292 sval = m_mgr->get_or_create_conjured_svalue (TREE_TYPE (dst_expr),
293 stmt,
294 dst_reg,
295 conjured_purge (this,
296 ctxt));
298 set_value (dst_reg, sval, ctxt);
302 } // namespace ana
304 #endif /* #if ENABLE_ANALYZER */