1 /* An experimental state machine, for tracking "taint": unsanitized uses
2 of data potentially under an attacker's control.
4 Copyright (C) 2019-2021 Free Software Foundation, Inc.
5 Contributed by David Malcolm <dmalcolm@redhat.com>.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
14 GCC is distributed in the hope that it will be useful, but
15 WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
28 #include "basic-block.h"
31 #include "diagnostic-path.h"
32 #include "diagnostic-metadata.h"
35 #include "analyzer/analyzer.h"
36 #include "analyzer/analyzer-logging.h"
37 #include "gimple-iterator.h"
40 #include "ordered-hash-map.h"
44 #include "analyzer/supergraph.h"
45 #include "analyzer/call-string.h"
46 #include "analyzer/program-point.h"
47 #include "analyzer/store.h"
48 #include "analyzer/region-model.h"
49 #include "analyzer/sm.h"
50 #include "analyzer/program-state.h"
51 #include "analyzer/pending-diagnostic.h"
59 /* An enum for describing tainted values. */
63 /* This tainted value has no upper or lower bound. */
66 /* This tainted value has an upper bound but not lower bound. */
69 /* This tainted value has a lower bound but no upper bound. */
73 /* An experimental state machine, for tracking "taint": unsanitized uses
74 of data potentially under an attacker's control. */
76 class taint_state_machine
: public state_machine
79 taint_state_machine (logger
*logger
);
81 bool inherited_state_p () const FINAL OVERRIDE
{ return true; }
83 state_t
alt_get_inherited_state (const sm_state_map
&map
,
85 const extrinsic_state
&ext_state
)
88 bool on_stmt (sm_context
*sm_ctxt
,
89 const supernode
*node
,
90 const gimple
*stmt
) const FINAL OVERRIDE
;
92 void on_condition (sm_context
*sm_ctxt
,
93 const supernode
*node
,
97 const svalue
*rhs
) const FINAL OVERRIDE
;
99 bool can_purge_p (state_t s
) const FINAL OVERRIDE
;
101 bool get_taint (state_t s
, tree type
, enum bounds
*out
) const;
103 state_t
combine_states (state_t s0
, state_t s1
) const;
105 /* State for a "tainted" value: unsanitized data potentially under an
106 attacker's control. */
109 /* State for a "tainted" value that has a lower bound. */
112 /* State for a "tainted" value that has an upper bound. */
115 /* Stop state, for a value we don't want to track any more. */
119 /* Class for diagnostics relating to taint_state_machine. */
121 class taint_diagnostic
: public pending_diagnostic
124 taint_diagnostic (const taint_state_machine
&sm
, tree arg
,
125 enum bounds has_bounds
)
126 : m_sm (sm
), m_arg (arg
), m_has_bounds (has_bounds
)
129 bool subclass_equal_p (const pending_diagnostic
&base_other
) const OVERRIDE
131 return same_tree_p (m_arg
, ((const taint_diagnostic
&)base_other
).m_arg
);
134 label_text
describe_state_change (const evdesc::state_change
&change
)
137 if (change
.m_new_state
== m_sm
.m_tainted
)
140 return change
.formatted_print ("%qE has an unchecked value here"
142 change
.m_expr
, change
.m_origin
);
144 return change
.formatted_print ("%qE gets an unchecked value here",
147 else if (change
.m_new_state
== m_sm
.m_has_lb
)
148 return change
.formatted_print ("%qE has its lower bound checked here",
150 else if (change
.m_new_state
== m_sm
.m_has_ub
)
151 return change
.formatted_print ("%qE has its upper bound checked here",
153 return label_text ();
156 const taint_state_machine
&m_sm
;
158 enum bounds m_has_bounds
;
161 /* Concrete taint_diagnostic subclass for reporting attacker-controlled
164 class tainted_array_index
: public taint_diagnostic
167 tainted_array_index (const taint_state_machine
&sm
, tree arg
,
168 enum bounds has_bounds
)
169 : taint_diagnostic (sm
, arg
, has_bounds
)
172 const char *get_kind () const FINAL OVERRIDE
{ return "tainted_array_index"; }
174 bool emit (rich_location
*rich_loc
) FINAL OVERRIDE
176 diagnostic_metadata m
;
177 /* CWE-129: "Improper Validation of Array Index". */
179 switch (m_has_bounds
)
184 return warning_meta (rich_loc
, m
, OPT_Wanalyzer_tainted_array_index
,
185 "use of attacker-controlled value %qE"
186 " in array lookup without bounds checking",
190 return warning_meta (rich_loc
, m
, OPT_Wanalyzer_tainted_array_index
,
191 "use of attacker-controlled value %qE"
192 " in array lookup without checking for negative",
196 return warning_meta (rich_loc
, m
, OPT_Wanalyzer_tainted_array_index
,
197 "use of attacker-controlled value %qE"
198 " in array lookup without upper-bounds checking",
204 label_text
describe_final_event (const evdesc::final_event
&ev
) FINAL OVERRIDE
206 switch (m_has_bounds
)
211 return ev
.formatted_print
212 ("use of attacker-controlled value %qE in array lookup"
213 " without bounds checking",
216 return ev
.formatted_print
217 ("use of attacker-controlled value %qE"
218 " in array lookup without checking for negative",
221 return ev
.formatted_print
222 ("use of attacker-controlled value %qE"
223 " in array lookup without upper-bounds checking",
229 /* Concrete taint_diagnostic subclass for reporting attacker-controlled
232 class tainted_offset
: public taint_diagnostic
235 tainted_offset (const taint_state_machine
&sm
, tree arg
,
236 enum bounds has_bounds
)
237 : taint_diagnostic (sm
, arg
, has_bounds
)
240 const char *get_kind () const FINAL OVERRIDE
{ return "tainted_offset"; }
242 bool emit (rich_location
*rich_loc
) FINAL OVERRIDE
244 diagnostic_metadata m
;
245 /* CWE-823: "Use of Out-of-range Pointer Offset". */
248 switch (m_has_bounds
)
253 return warning_meta (rich_loc
, m
, OPT_Wanalyzer_tainted_offset
,
254 "use of attacker-controlled value %qE as offset"
255 " without bounds checking",
259 return warning_meta (rich_loc
, m
, OPT_Wanalyzer_tainted_offset
,
260 "use of attacker-controlled value %qE as offset"
261 " without lower-bounds checking",
265 return warning_meta (rich_loc
, m
, OPT_Wanalyzer_tainted_offset
,
266 "use of attacker-controlled value %qE as offset"
267 " without upper-bounds checking",
272 switch (m_has_bounds
)
277 return warning_meta (rich_loc
, m
, OPT_Wanalyzer_tainted_offset
,
278 "use of attacker-controlled value as offset"
279 " without bounds checking");
282 return warning_meta (rich_loc
, m
, OPT_Wanalyzer_tainted_offset
,
283 "use of attacker-controlled value as offset"
284 " without lower-bounds checking");
287 return warning_meta (rich_loc
, m
, OPT_Wanalyzer_tainted_offset
,
288 "use of attacker-controlled value as offset"
289 " without upper-bounds checking");
294 label_text
describe_final_event (const evdesc::final_event
&ev
) FINAL OVERRIDE
297 switch (m_has_bounds
)
302 return ev
.formatted_print ("use of attacker-controlled value %qE"
303 " as offset without bounds checking",
306 return ev
.formatted_print ("use of attacker-controlled value %qE"
307 " as offset without lower-bounds checking",
310 return ev
.formatted_print ("use of attacker-controlled value %qE"
311 " as offset without upper-bounds checking",
315 switch (m_has_bounds
)
320 return ev
.formatted_print ("use of attacker-controlled value"
321 " as offset without bounds checking");
323 return ev
.formatted_print ("use of attacker-controlled value"
324 " as offset without lower-bounds"
327 return ev
.formatted_print ("use of attacker-controlled value"
328 " as offset without upper-bounds"
334 /* Concrete taint_diagnostic subclass for reporting attacker-controlled
337 class tainted_size
: public taint_diagnostic
340 tainted_size (const taint_state_machine
&sm
, tree arg
,
341 enum bounds has_bounds
,
342 enum access_direction dir
)
343 : taint_diagnostic (sm
, arg
, has_bounds
),
347 const char *get_kind () const FINAL OVERRIDE
{ return "tainted_size"; }
349 bool emit (rich_location
*rich_loc
) FINAL OVERRIDE
351 diagnostic_metadata m
;
353 switch (m_has_bounds
)
358 return warning_meta (rich_loc
, m
, OPT_Wanalyzer_tainted_size
,
359 "use of attacker-controlled value %qE as size"
360 " without bounds checking",
364 return warning_meta (rich_loc
, m
, OPT_Wanalyzer_tainted_size
,
365 "use of attacker-controlled value %qE as size"
366 " without lower-bounds checking",
370 return warning_meta (rich_loc
, m
, OPT_Wanalyzer_tainted_size
,
371 "use of attacker-controlled value %qE as size"
372 " without upper-bounds checking",
378 label_text
describe_final_event (const evdesc::final_event
&ev
) FINAL OVERRIDE
380 switch (m_has_bounds
)
385 return ev
.formatted_print ("use of attacker-controlled value %qE"
386 " as size without bounds checking",
389 return ev
.formatted_print ("use of attacker-controlled value %qE"
390 " as size without lower-bounds checking",
393 return ev
.formatted_print ("use of attacker-controlled value %qE"
394 " as size without upper-bounds checking",
400 enum access_direction m_dir
;
403 /* Concrete taint_diagnostic subclass for reporting attacker-controlled
404 divisor (so that an attacker can trigger a divide by zero). */
406 class tainted_divisor
: public taint_diagnostic
409 tainted_divisor (const taint_state_machine
&sm
, tree arg
,
410 enum bounds has_bounds
)
411 : taint_diagnostic (sm
, arg
, has_bounds
)
414 const char *get_kind () const FINAL OVERRIDE
{ return "tainted_divisor"; }
416 bool emit (rich_location
*rich_loc
) FINAL OVERRIDE
418 diagnostic_metadata m
;
419 /* CWE-369: "Divide By Zero". */
422 return warning_meta (rich_loc
, m
, OPT_Wanalyzer_tainted_divisor
,
423 "use of attacker-controlled value %qE as divisor"
424 " without checking for zero",
427 return warning_meta (rich_loc
, m
, OPT_Wanalyzer_tainted_divisor
,
428 "use of attacker-controlled value as divisor"
429 " without checking for zero");
432 label_text
describe_final_event (const evdesc::final_event
&ev
) FINAL OVERRIDE
435 return ev
.formatted_print
436 ("use of attacker-controlled value %qE as divisor"
437 " without checking for zero",
440 return ev
.formatted_print
441 ("use of attacker-controlled value as divisor"
442 " without checking for zero");
446 /* Concrete taint_diagnostic subclass for reporting attacker-controlled
447 size of a dynamic allocation. */
449 class tainted_allocation_size
: public taint_diagnostic
452 tainted_allocation_size (const taint_state_machine
&sm
, tree arg
,
453 enum bounds has_bounds
, enum memory_space mem_space
)
454 : taint_diagnostic (sm
, arg
, has_bounds
),
455 m_mem_space (mem_space
)
457 gcc_assert (mem_space
== MEMSPACE_STACK
|| mem_space
== MEMSPACE_HEAP
);
460 const char *get_kind () const FINAL OVERRIDE
462 return "tainted_allocation_size";
465 bool emit (rich_location
*rich_loc
) FINAL OVERRIDE
467 diagnostic_metadata m
;
468 /* "CWE-789: Memory Allocation with Excessive Size Value". */
470 gcc_assert (m_mem_space
== MEMSPACE_STACK
|| m_mem_space
== MEMSPACE_HEAP
);
471 // TODO: make use of m_mem_space
473 switch (m_has_bounds
)
478 return warning_meta (rich_loc
, m
,
479 OPT_Wanalyzer_tainted_allocation_size
,
480 "use of attacker-controlled value %qE as"
481 " allocation size without bounds checking",
485 return warning_meta (rich_loc
, m
,
486 OPT_Wanalyzer_tainted_allocation_size
,
487 "use of attacker-controlled value %qE as"
488 " allocation size without lower-bounds checking",
492 return warning_meta (rich_loc
, m
,
493 OPT_Wanalyzer_tainted_allocation_size
,
494 "use of attacker-controlled value %qE as"
495 " allocation size without upper-bounds checking",
500 switch (m_has_bounds
)
505 return warning_meta (rich_loc
, m
,
506 OPT_Wanalyzer_tainted_allocation_size
,
507 "use of attacker-controlled value as"
508 " allocation size without bounds"
512 return warning_meta (rich_loc
, m
,
513 OPT_Wanalyzer_tainted_allocation_size
,
514 "use of attacker-controlled value as"
515 " allocation size without lower-bounds"
519 return warning_meta (rich_loc
, m
,
520 OPT_Wanalyzer_tainted_allocation_size
,
521 "use of attacker-controlled value as"
522 " allocation size without upper-bounds"
528 label_text
describe_final_event (const evdesc::final_event
&ev
) FINAL OVERRIDE
531 switch (m_has_bounds
)
536 return ev
.formatted_print
537 ("use of attacker-controlled value %qE as allocation size"
538 " without bounds checking",
541 return ev
.formatted_print
542 ("use of attacker-controlled value %qE as allocation size"
543 " without lower-bounds checking",
546 return ev
.formatted_print
547 ("use of attacker-controlled value %qE as allocation size"
548 " without upper-bounds checking",
552 switch (m_has_bounds
)
557 return ev
.formatted_print
558 ("use of attacker-controlled value as allocation size"
559 " without bounds checking");
561 return ev
.formatted_print
562 ("use of attacker-controlled value as allocation size"
563 " without lower-bounds checking");
565 return ev
.formatted_print
566 ("use of attacker-controlled value as allocation size"
567 " without upper-bounds checking");
572 enum memory_space m_mem_space
;
575 /* taint_state_machine's ctor. */
577 taint_state_machine::taint_state_machine (logger
*logger
)
578 : state_machine ("taint", logger
)
580 m_tainted
= add_state ("tainted");
581 m_has_lb
= add_state ("has_lb");
582 m_has_ub
= add_state ("has_ub");
583 m_stop
= add_state ("stop");
586 state_machine::state_t
587 taint_state_machine::alt_get_inherited_state (const sm_state_map
&map
,
589 const extrinsic_state
&ext_state
)
592 switch (sval
->get_kind ())
598 const unaryop_svalue
*unaryop_sval
599 = as_a
<const unaryop_svalue
*> (sval
);
600 enum tree_code op
= unaryop_sval
->get_op ();
601 const svalue
*arg
= unaryop_sval
->get_arg ();
606 state_t arg_state
= map
.get_state (arg
, ext_state
);
617 const binop_svalue
*binop_sval
= as_a
<const binop_svalue
*> (sval
);
618 enum tree_code op
= binop_sval
->get_op ();
619 const svalue
*arg0
= binop_sval
->get_arg0 ();
620 const svalue
*arg1
= binop_sval
->get_arg1 ();
628 case POINTER_PLUS_EXPR
:
632 state_t arg0_state
= map
.get_state (arg0
, ext_state
);
633 state_t arg1_state
= map
.get_state (arg1
, ext_state
);
634 return combine_states (arg0_state
, arg1_state
);
646 /* Comparisons are just booleans. */
659 /* Implementation of state_machine::on_stmt vfunc for taint_state_machine. */
662 taint_state_machine::on_stmt (sm_context
*sm_ctxt
,
663 const supernode
*node
,
664 const gimple
*stmt
) const
666 if (const gcall
*call
= dyn_cast
<const gcall
*> (stmt
))
667 if (tree callee_fndecl
= sm_ctxt
->get_fndecl_for_call (call
))
669 if (is_named_call_p (callee_fndecl
, "fread", call
, 4))
671 tree arg
= gimple_call_arg (call
, 0);
673 sm_ctxt
->on_transition (node
, stmt
, arg
, m_start
, m_tainted
);
675 /* Dereference an ADDR_EXPR. */
676 // TODO: should the engine do this?
677 if (TREE_CODE (arg
) == ADDR_EXPR
)
678 sm_ctxt
->on_transition (node
, stmt
, TREE_OPERAND (arg
, 0),
683 // TODO: ...etc; many other sources of untrusted data
685 if (const gassign
*assign
= dyn_cast
<const gassign
*> (stmt
))
687 enum tree_code op
= gimple_assign_rhs_code (assign
);
704 tree divisor
= gimple_assign_rhs2 (assign
);;
705 state_t state
= sm_ctxt
->get_state (stmt
, divisor
);
707 if (get_taint (state
, TREE_TYPE (divisor
), &b
))
709 tree diag_divisor
= sm_ctxt
->get_diagnostic_tree (divisor
);
710 sm_ctxt
->warn (node
, stmt
, divisor
,
711 new tainted_divisor (*this, diag_divisor
, b
));
712 sm_ctxt
->set_next_state (stmt
, divisor
, m_stop
);
722 /* Implementation of state_machine::on_condition vfunc for taint_state_machine.
723 Potentially transition state 'tainted' to 'has_ub' or 'has_lb',
724 and states 'has_ub' and 'has_lb' to 'stop'. */
727 taint_state_machine::on_condition (sm_context
*sm_ctxt
,
728 const supernode
*node
,
732 const svalue
*rhs ATTRIBUTE_UNUSED
) const
737 // TODO: this doesn't use the RHS; should we make it symmetric?
747 sm_ctxt
->on_transition (node
, stmt
, lhs
, m_tainted
,
749 sm_ctxt
->on_transition (node
, stmt
, lhs
, m_has_ub
,
756 sm_ctxt
->on_transition (node
, stmt
, lhs
, m_tainted
,
758 sm_ctxt
->on_transition (node
, stmt
, lhs
, m_has_lb
,
768 taint_state_machine::can_purge_p (state_t s ATTRIBUTE_UNUSED
) const
773 /* If STATE is a tainted state, write the bounds to *OUT and return true.
774 Otherwise return false.
775 Use the signedness of TYPE to determine if "has_ub" is tainted. */
778 taint_state_machine::get_taint (state_t state
, tree type
,
779 enum bounds
*out
) const
781 /* Unsigned types have an implicit lower bound. */
782 bool is_unsigned
= false;
784 if (INTEGRAL_TYPE_P (type
))
785 is_unsigned
= TYPE_UNSIGNED (type
);
787 /* Can't use a switch as the states are non-const. */
788 if (state
== m_tainted
)
790 *out
= is_unsigned
? BOUNDS_LOWER
: BOUNDS_NONE
;
793 else if (state
== m_has_lb
)
798 else if (state
== m_has_ub
&& !is_unsigned
)
800 /* Missing lower bound. */
807 /* Find the most tainted state of S0 and S1. */
809 state_machine::state_t
810 taint_state_machine::combine_states (state_t s0
, state_t s1
) const
816 if (s0
== m_tainted
|| s1
== m_tainted
)
829 } // anonymous namespace
831 /* Internal interface to this file. */
834 make_taint_state_machine (logger
*logger
)
836 return new taint_state_machine (logger
);
839 /* Complain to CTXT if accessing REG leads could lead to arbitrary
840 memory access under an attacker's control (due to taint). */
843 region_model::check_region_for_taint (const region
*reg
,
844 enum access_direction dir
,
845 region_model_context
*ctxt
) const
850 LOG_SCOPE (ctxt
->get_logger ());
853 const state_machine
*sm
;
855 if (!ctxt
->get_taint_map (&smap
, &sm
, &sm_idx
))
861 const taint_state_machine
&taint_sm
= (const taint_state_machine
&)*sm
;
863 const extrinsic_state
*ext_state
= ctxt
->get_ext_state ();
867 const region
*iter_region
= reg
;
870 switch (iter_region
->get_kind ())
877 const element_region
*element_reg
878 = (const element_region
*)iter_region
;
879 const svalue
*index
= element_reg
->get_index ();
880 const state_machine::state_t
881 state
= smap
->get_state (index
, *ext_state
);
884 if (taint_sm
.get_taint (state
, index
->get_type (), &b
))
886 tree arg
= get_representative_tree (index
);
887 ctxt
->warn (new tainted_array_index (taint_sm
, arg
, b
));
894 const offset_region
*offset_reg
895 = (const offset_region
*)iter_region
;
896 const svalue
*offset
= offset_reg
->get_byte_offset ();
897 const state_machine::state_t
898 state
= smap
->get_state (offset
, *ext_state
);
900 /* Handle implicit cast to sizetype. */
901 tree effective_type
= offset
->get_type ();
902 if (const svalue
*cast
= offset
->maybe_undo_cast ())
903 if (cast
->get_type ())
904 effective_type
= cast
->get_type ();
906 if (taint_sm
.get_taint (state
, effective_type
, &b
))
908 tree arg
= get_representative_tree (offset
);
909 ctxt
->warn (new tainted_offset (taint_sm
, arg
, b
));
916 const cast_region
*cast_reg
917 = as_a
<const cast_region
*> (iter_region
);
918 iter_region
= cast_reg
->get_original_region ();
924 const sized_region
*sized_reg
925 = (const sized_region
*)iter_region
;
926 const svalue
*size_sval
= sized_reg
->get_byte_size_sval (m_mgr
);
927 const state_machine::state_t
928 state
= smap
->get_state (size_sval
, *ext_state
);
931 if (taint_sm
.get_taint (state
, size_sval
->get_type (), &b
))
933 tree arg
= get_representative_tree (size_sval
);
934 ctxt
->warn (new tainted_size (taint_sm
, arg
, b
, dir
));
940 iter_region
= iter_region
->get_parent_region ();
944 /* Complain to CTXT about a tainted allocation size if SIZE_IN_BYTES is
945 under an attacker's control (due to taint), where the allocation
946 is happening within MEM_SPACE. */
949 region_model::check_dynamic_size_for_taint (enum memory_space mem_space
,
950 const svalue
*size_in_bytes
,
951 region_model_context
*ctxt
) const
953 gcc_assert (mem_space
== MEMSPACE_STACK
|| mem_space
== MEMSPACE_HEAP
);
954 gcc_assert (size_in_bytes
);
957 LOG_SCOPE (ctxt
->get_logger ());
960 const state_machine
*sm
;
962 if (!ctxt
->get_taint_map (&smap
, &sm
, &sm_idx
))
968 const taint_state_machine
&taint_sm
= (const taint_state_machine
&)*sm
;
970 const extrinsic_state
*ext_state
= ctxt
->get_ext_state ();
974 const state_machine::state_t
975 state
= smap
->get_state (size_in_bytes
, *ext_state
);
978 if (taint_sm
.get_taint (state
, size_in_bytes
->get_type (), &b
))
980 tree arg
= get_representative_tree (size_in_bytes
);
981 ctxt
->warn (new tainted_allocation_size (taint_sm
, arg
, b
, mem_space
));
987 #endif /* #if ENABLE_ANALYZER */