1 /* An experimental state machine, for tracking "taint": unsanitized uses
2 of data potentially under an attacker's control.
4 Copyright (C) 2019-2020 Free Software Foundation, Inc.
5 Contributed by David Malcolm <dmalcolm@redhat.com>.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
14 GCC is distributed in the hope that it will be useful, but
15 WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
28 #include "basic-block.h"
31 #include "diagnostic-path.h"
32 #include "diagnostic-metadata.h"
34 #include "analyzer/analyzer.h"
35 #include "diagnostic-event-id.h"
36 #include "analyzer/analyzer-logging.h"
37 #include "analyzer/sm.h"
38 #include "analyzer/pending-diagnostic.h"
46 /* An experimental state machine, for tracking "taint": unsanitized uses
47 of data potentially under an attacker's control. */
49 class taint_state_machine
: public state_machine
52 taint_state_machine (logger
*logger
);
54 bool inherited_state_p () const FINAL OVERRIDE
{ return true; }
56 bool on_stmt (sm_context
*sm_ctxt
,
57 const supernode
*node
,
58 const gimple
*stmt
) const FINAL OVERRIDE
;
60 void on_condition (sm_context
*sm_ctxt
,
61 const supernode
*node
,
65 tree rhs
) const FINAL OVERRIDE
;
67 bool can_purge_p (state_t s
) const FINAL OVERRIDE
;
72 /* State for a "tainted" value: unsanitized data potentially under an
73 attacker's control. */
76 /* State for a "tainted" value that has a lower bound. */
79 /* State for a "tainted" value that has an upper bound. */
82 /* Stop state, for a value we don't want to track any more. */
93 class tainted_array_index
94 : public pending_diagnostic_subclass
<tainted_array_index
>
97 tainted_array_index (const taint_state_machine
&sm
, tree arg
,
98 enum bounds has_bounds
)
99 : m_sm (sm
), m_arg (arg
), m_has_bounds (has_bounds
) {}
101 const char *get_kind () const FINAL OVERRIDE
{ return "tainted_array_index"; }
103 bool operator== (const tainted_array_index
&other
) const
105 return same_tree_p (m_arg
, other
.m_arg
);
108 bool emit (rich_location
*rich_loc
) FINAL OVERRIDE
110 diagnostic_metadata m
;
112 switch (m_has_bounds
)
117 return warning_meta (rich_loc
, m
, OPT_Wanalyzer_tainted_array_index
,
118 "use of tainted value %qE in array lookup"
119 " without bounds checking",
123 return warning_meta (rich_loc
, m
, OPT_Wanalyzer_tainted_array_index
,
124 "use of tainted value %qE in array lookup"
125 " without lower-bounds checking",
129 return warning_meta (rich_loc
, m
, OPT_Wanalyzer_tainted_array_index
,
130 "use of tainted value %qE in array lookup"
131 " without upper-bounds checking",
137 label_text
describe_state_change (const evdesc::state_change
&change
)
140 if (change
.m_new_state
== m_sm
.m_tainted
)
143 return change
.formatted_print ("%qE has an unchecked value here"
145 change
.m_expr
, change
.m_origin
);
147 return change
.formatted_print ("%qE gets an unchecked value here",
150 else if (change
.m_new_state
== m_sm
.m_has_lb
)
151 return change
.formatted_print ("%qE has its lower bound checked here",
153 else if (change
.m_new_state
== m_sm
.m_has_ub
)
154 return change
.formatted_print ("%qE has its upper bound checked here",
156 return label_text ();
159 label_text
describe_final_event (const evdesc::final_event
&ev
) FINAL OVERRIDE
161 switch (m_has_bounds
)
166 return ev
.formatted_print ("use of tainted value %qE in array lookup"
167 " without bounds checking",
170 return ev
.formatted_print ("use of tainted value %qE in array lookup"
171 " without lower-bounds checking",
174 return ev
.formatted_print ("use of tainted value %qE in array lookup"
175 " without upper-bounds checking",
181 const taint_state_machine
&m_sm
;
183 enum bounds m_has_bounds
;
186 /* taint_state_machine's ctor. */
188 taint_state_machine::taint_state_machine (logger
*logger
)
189 : state_machine ("taint", logger
)
191 m_start
= add_state ("start");
192 m_tainted
= add_state ("tainted");
193 m_has_lb
= add_state ("has_lb");
194 m_has_ub
= add_state ("has_ub");
195 m_stop
= add_state ("stop");
198 /* Implementation of state_machine::on_stmt vfunc for taint_state_machine. */
201 taint_state_machine::on_stmt (sm_context
*sm_ctxt
,
202 const supernode
*node
,
203 const gimple
*stmt
) const
205 if (const gcall
*call
= dyn_cast
<const gcall
*> (stmt
))
206 if (tree callee_fndecl
= sm_ctxt
->get_fndecl_for_call (call
))
208 if (is_named_call_p (callee_fndecl
, "fread", call
, 4))
210 tree arg
= gimple_call_arg (call
, 0);
211 arg
= sm_ctxt
->get_readable_tree (arg
);
213 sm_ctxt
->on_transition (node
, stmt
, arg
, m_start
, m_tainted
);
215 /* Dereference an ADDR_EXPR. */
216 // TODO: should the engine do this?
217 if (TREE_CODE (arg
) == ADDR_EXPR
)
218 sm_ctxt
->on_transition (node
, stmt
, TREE_OPERAND (arg
, 0),
223 // TODO: ...etc; many other sources of untrusted data
225 if (const gassign
*assign
= dyn_cast
<const gassign
*> (stmt
))
227 tree rhs1
= gimple_assign_rhs1 (assign
);
228 enum tree_code op
= gimple_assign_rhs_code (assign
);
230 /* Check array accesses. */
233 tree arg
= TREE_OPERAND (rhs1
, 1);
234 arg
= sm_ctxt
->get_readable_tree (arg
);
236 /* Unsigned types have an implicit lower bound. */
237 bool is_unsigned
= false;
238 if (INTEGRAL_TYPE_P (TREE_TYPE (arg
)))
239 is_unsigned
= TYPE_UNSIGNED (TREE_TYPE (arg
));
241 /* Complain about missing bounds. */
242 sm_ctxt
->warn_for_state
243 (node
, stmt
, arg
, m_tainted
,
244 new tainted_array_index (*this, arg
,
246 ? BOUNDS_LOWER
: BOUNDS_NONE
));
247 sm_ctxt
->on_transition (node
, stmt
, arg
, m_tainted
, m_stop
);
249 /* Complain about missing upper bound. */
250 sm_ctxt
->warn_for_state (node
, stmt
, arg
, m_has_lb
,
251 new tainted_array_index (*this, arg
,
253 sm_ctxt
->on_transition (node
, stmt
, arg
, m_has_lb
, m_stop
);
255 /* Complain about missing lower bound. */
258 sm_ctxt
->warn_for_state (node
, stmt
, arg
, m_has_ub
,
259 new tainted_array_index (*this, arg
,
261 sm_ctxt
->on_transition (node
, stmt
, arg
, m_has_ub
, m_stop
);
269 /* Implementation of state_machine::on_condition vfunc for taint_state_machine.
270 Potentially transition state 'tainted' to 'has_ub' or 'has_lb',
271 and states 'has_ub' and 'has_lb' to 'stop'. */
274 taint_state_machine::on_condition (sm_context
*sm_ctxt
,
275 const supernode
*node
,
279 tree rhs ATTRIBUTE_UNUSED
) const
284 // TODO: this doesn't use the RHS; should we make it symmetric?
294 sm_ctxt
->on_transition (node
, stmt
, lhs
, m_tainted
,
296 sm_ctxt
->on_transition (node
, stmt
, lhs
, m_has_ub
,
303 sm_ctxt
->on_transition (node
, stmt
, lhs
, m_tainted
,
305 sm_ctxt
->on_transition (node
, stmt
, lhs
, m_has_lb
,
315 taint_state_machine::can_purge_p (state_t s ATTRIBUTE_UNUSED
) const
320 } // anonymous namespace
322 /* Internal interface to this file. */
325 make_taint_state_machine (logger
*logger
)
327 return new taint_state_machine (logger
);
332 #endif /* #if ENABLE_ANALYZER */