1 /* A state machine for tracking "taint": unsanitized uses
2 of data potentially under an attacker's control.
4 Copyright (C) 2019-2023 Free Software Foundation, Inc.
5 Contributed by David Malcolm <dmalcolm@redhat.com>.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
14 GCC is distributed in the hope that it will be useful, but
15 WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
24 #define INCLUDE_MEMORY
26 #include "coretypes.h"
27 #include "make-unique.h"
30 #include "basic-block.h"
33 #include "diagnostic-path.h"
34 #include "analyzer/analyzer.h"
35 #include "analyzer/analyzer-logging.h"
36 #include "gimple-iterator.h"
37 #include "ordered-hash-map.h"
41 #include "stringpool.h"
43 #include "analyzer/supergraph.h"
44 #include "analyzer/call-string.h"
45 #include "analyzer/program-point.h"
46 #include "analyzer/store.h"
47 #include "analyzer/region-model.h"
48 #include "analyzer/sm.h"
49 #include "analyzer/program-state.h"
50 #include "analyzer/pending-diagnostic.h"
51 #include "analyzer/constraint-manager.h"
59 /* An enum for describing tainted values. */
63 /* This tainted value has no upper or lower bound. */
66 /* This tainted value has an upper bound but not lower bound. */
69 /* This tainted value has a lower bound but no upper bound. */
73 /* An experimental state machine, for tracking "taint": unsanitized uses
74 of data potentially under an attacker's control. */
76 class taint_state_machine
: public state_machine
79 taint_state_machine (logger
*logger
);
81 bool inherited_state_p () const final override
{ return true; }
83 state_t
alt_get_inherited_state (const sm_state_map
&map
,
85 const extrinsic_state
&ext_state
)
88 bool on_stmt (sm_context
*sm_ctxt
,
89 const supernode
*node
,
90 const gimple
*stmt
) const final override
;
92 void on_condition (sm_context
*sm_ctxt
,
93 const supernode
*node
,
97 const svalue
*rhs
) const final override
;
98 void on_bounded_ranges (sm_context
*sm_ctxt
,
99 const supernode
*node
,
102 const bounded_ranges
&ranges
) const final override
;
104 bool can_purge_p (state_t s
) const final override
;
106 bool get_taint (state_t s
, tree type
, enum bounds
*out
) const;
108 state_t
combine_states (state_t s0
, state_t s1
) const;
111 void check_control_flow_arg_for_taint (sm_context
*sm_ctxt
,
115 void check_for_tainted_size_arg (sm_context
*sm_ctxt
,
116 const supernode
*node
,
118 tree callee_fndecl
) const;
119 void check_for_tainted_divisor (sm_context
*sm_ctxt
,
120 const supernode
*node
,
121 const gassign
*assign
) const;
124 /* State for a "tainted" value: unsanitized data potentially under an
125 attacker's control. */
128 /* State for a "tainted" value that has a lower bound. */
131 /* State for a "tainted" value that has an upper bound. */
134 /* Stop state, for a value we don't want to track any more. */
137 /* Global state, for when the last condition had tainted arguments. */
138 state_t m_tainted_control_flow
;
141 /* Class for diagnostics relating to taint_state_machine. */
143 class taint_diagnostic
: public pending_diagnostic
146 taint_diagnostic (const taint_state_machine
&sm
, tree arg
,
147 enum bounds has_bounds
)
148 : m_sm (sm
), m_arg (arg
), m_has_bounds (has_bounds
)
151 bool subclass_equal_p (const pending_diagnostic
&base_other
) const override
153 const taint_diagnostic
&other
= (const taint_diagnostic
&)base_other
;
154 return (same_tree_p (m_arg
, other
.m_arg
)
155 && m_has_bounds
== other
.m_has_bounds
);
158 label_text
describe_state_change (const evdesc::state_change
&change
) override
160 if (change
.m_new_state
== m_sm
.m_tainted
)
163 return change
.formatted_print ("%qE has an unchecked value here"
165 change
.m_expr
, change
.m_origin
);
167 return change
.formatted_print ("%qE gets an unchecked value here",
170 else if (change
.m_new_state
== m_sm
.m_has_lb
)
171 return change
.formatted_print ("%qE has its lower bound checked here",
173 else if (change
.m_new_state
== m_sm
.m_has_ub
)
174 return change
.formatted_print ("%qE has its upper bound checked here",
176 return label_text ();
179 diagnostic_event::meaning
180 get_meaning_for_state_change (const evdesc::state_change
&change
)
183 if (change
.m_new_state
== m_sm
.m_tainted
)
184 return diagnostic_event::meaning (diagnostic_event::VERB_acquire
,
185 diagnostic_event::NOUN_taint
);
186 return diagnostic_event::meaning ();
190 const taint_state_machine
&m_sm
;
192 enum bounds m_has_bounds
;
195 /* Concrete taint_diagnostic subclass for reporting attacker-controlled
198 class tainted_array_index
: public taint_diagnostic
201 tainted_array_index (const taint_state_machine
&sm
, tree arg
,
202 enum bounds has_bounds
)
203 : taint_diagnostic (sm
, arg
, has_bounds
)
206 const char *get_kind () const final override
{ return "tainted_array_index"; }
208 int get_controlling_option () const final override
210 return OPT_Wanalyzer_tainted_array_index
;
213 bool emit (diagnostic_emission_context
&ctxt
) final override
215 /* CWE-129: "Improper Validation of Array Index". */
218 switch (m_has_bounds
)
223 return ctxt
.warn ("use of attacker-controlled value %qE"
224 " in array lookup without bounds checking",
228 return ctxt
.warn ("use of attacker-controlled value %qE"
229 " in array lookup without checking for negative",
233 return ctxt
.warn ("use of attacker-controlled value %qE"
234 " in array lookup without upper-bounds checking",
239 switch (m_has_bounds
)
244 return ctxt
.warn ("use of attacker-controlled value"
245 " in array lookup without bounds checking");
248 return ctxt
.warn ("use of attacker-controlled value"
249 " in array lookup without checking for"
253 return ctxt
.warn ("use of attacker-controlled value"
254 " in array lookup without upper-bounds"
260 label_text
describe_final_event (const evdesc::final_event
&ev
) final override
263 switch (m_has_bounds
)
268 return ev
.formatted_print
269 ("use of attacker-controlled value %qE in array lookup"
270 " without bounds checking",
273 return ev
.formatted_print
274 ("use of attacker-controlled value %qE"
275 " in array lookup without checking for negative",
278 return ev
.formatted_print
279 ("use of attacker-controlled value %qE"
280 " in array lookup without upper-bounds checking",
284 switch (m_has_bounds
)
289 return ev
.formatted_print
290 ("use of attacker-controlled value in array lookup"
291 " without bounds checking");
293 return ev
.formatted_print
294 ("use of attacker-controlled value"
295 " in array lookup without checking for negative");
297 return ev
.formatted_print
298 ("use of attacker-controlled value"
299 " in array lookup without upper-bounds checking");
304 /* Concrete taint_diagnostic subclass for reporting attacker-controlled
307 class tainted_offset
: public taint_diagnostic
310 tainted_offset (const taint_state_machine
&sm
, tree arg
,
311 enum bounds has_bounds
)
312 : taint_diagnostic (sm
, arg
, has_bounds
)
315 const char *get_kind () const final override
{ return "tainted_offset"; }
317 int get_controlling_option () const final override
319 return OPT_Wanalyzer_tainted_offset
;
322 bool emit (diagnostic_emission_context
&ctxt
) final override
324 /* CWE-823: "Use of Out-of-range Pointer Offset". */
327 switch (m_has_bounds
)
332 return ctxt
.warn ("use of attacker-controlled value %qE as offset"
333 " without bounds checking",
337 return ctxt
.warn ("use of attacker-controlled value %qE as offset"
338 " without lower-bounds checking",
342 return ctxt
.warn ("use of attacker-controlled value %qE as offset"
343 " without upper-bounds checking",
348 switch (m_has_bounds
)
353 return ctxt
.warn ("use of attacker-controlled value as offset"
354 " without bounds checking");
357 return ctxt
.warn ("use of attacker-controlled value as offset"
358 " without lower-bounds checking");
361 return ctxt
.warn ("use of attacker-controlled value as offset"
362 " without upper-bounds checking");
367 label_text
describe_final_event (const evdesc::final_event
&ev
) final override
370 switch (m_has_bounds
)
375 return ev
.formatted_print ("use of attacker-controlled value %qE"
376 " as offset without bounds checking",
379 return ev
.formatted_print ("use of attacker-controlled value %qE"
380 " as offset without lower-bounds checking",
383 return ev
.formatted_print ("use of attacker-controlled value %qE"
384 " as offset without upper-bounds checking",
388 switch (m_has_bounds
)
393 return ev
.formatted_print ("use of attacker-controlled value"
394 " as offset without bounds checking");
396 return ev
.formatted_print ("use of attacker-controlled value"
397 " as offset without lower-bounds"
400 return ev
.formatted_print ("use of attacker-controlled value"
401 " as offset without upper-bounds"
407 /* Concrete taint_diagnostic subclass for reporting attacker-controlled
410 class tainted_size
: public taint_diagnostic
413 tainted_size (const taint_state_machine
&sm
, tree arg
,
414 enum bounds has_bounds
)
415 : taint_diagnostic (sm
, arg
, has_bounds
)
418 const char *get_kind () const override
{ return "tainted_size"; }
420 int get_controlling_option () const final override
422 return OPT_Wanalyzer_tainted_size
;
425 bool emit (diagnostic_emission_context
&ctxt
) override
427 /* "CWE-129: Improper Validation of Array Index". */
430 switch (m_has_bounds
)
435 return ctxt
.warn ("use of attacker-controlled value %qE as size"
436 " without bounds checking",
440 return ctxt
.warn ("use of attacker-controlled value %qE as size"
441 " without lower-bounds checking",
445 return ctxt
.warn ("use of attacker-controlled value %qE as size"
446 " without upper-bounds checking",
451 switch (m_has_bounds
)
456 return ctxt
.warn ("use of attacker-controlled value as size"
457 " without bounds checking");
460 return ctxt
.warn ("use of attacker-controlled value as size"
461 " without lower-bounds checking");
464 return ctxt
.warn ("use of attacker-controlled value as size"
465 " without upper-bounds checking");
470 label_text
describe_final_event (const evdesc::final_event
&ev
) final override
473 switch (m_has_bounds
)
478 return ev
.formatted_print ("use of attacker-controlled value %qE"
479 " as size without bounds checking",
482 return ev
.formatted_print ("use of attacker-controlled value %qE"
483 " as size without lower-bounds checking",
486 return ev
.formatted_print ("use of attacker-controlled value %qE"
487 " as size without upper-bounds checking",
491 switch (m_has_bounds
)
496 return ev
.formatted_print ("use of attacker-controlled value"
497 " as size without bounds checking");
499 return ev
.formatted_print ("use of attacker-controlled value"
500 " as size without lower-bounds checking");
502 return ev
.formatted_print ("use of attacker-controlled value"
503 " as size without upper-bounds checking");
508 /* Subclass of tainted_size for reporting on tainted size values
509 passed to an external function annotated with attribute "access". */
511 class tainted_access_attrib_size
: public tainted_size
514 tainted_access_attrib_size (const taint_state_machine
&sm
, tree arg
,
515 enum bounds has_bounds
, tree callee_fndecl
,
516 unsigned size_argno
, const char *access_str
)
517 : tainted_size (sm
, arg
, has_bounds
),
518 m_callee_fndecl (callee_fndecl
),
519 m_size_argno (size_argno
), m_access_str (access_str
)
523 const char *get_kind () const override
525 return "tainted_access_attrib_size";
528 bool emit (diagnostic_emission_context
&ctxt
) final override
530 bool warned
= tainted_size::emit (ctxt
);
533 inform (DECL_SOURCE_LOCATION (m_callee_fndecl
),
534 "parameter %i of %qD marked as a size via attribute %qs",
535 m_size_argno
+ 1, m_callee_fndecl
, m_access_str
);
541 tree m_callee_fndecl
;
542 unsigned m_size_argno
;
543 const char *m_access_str
;
546 /* Concrete taint_diagnostic subclass for reporting attacker-controlled
547 divisor (so that an attacker can trigger a divide by zero). */
549 class tainted_divisor
: public taint_diagnostic
552 tainted_divisor (const taint_state_machine
&sm
, tree arg
,
553 enum bounds has_bounds
)
554 : taint_diagnostic (sm
, arg
, has_bounds
)
557 const char *get_kind () const final override
{ return "tainted_divisor"; }
559 int get_controlling_option () const final override
561 return OPT_Wanalyzer_tainted_divisor
;
564 bool emit (diagnostic_emission_context
&ctxt
) final override
566 /* CWE-369: "Divide By Zero". */
569 return ctxt
.warn ("use of attacker-controlled value %qE as divisor"
570 " without checking for zero",
573 return ctxt
.warn ("use of attacker-controlled value as divisor"
574 " without checking for zero");
577 label_text
describe_final_event (const evdesc::final_event
&ev
) final override
580 return ev
.formatted_print
581 ("use of attacker-controlled value %qE as divisor"
582 " without checking for zero",
585 return ev
.formatted_print
586 ("use of attacker-controlled value as divisor"
587 " without checking for zero");
591 /* Concrete taint_diagnostic subclass for reporting attacker-controlled
592 size of a dynamic allocation. */
594 class tainted_allocation_size
: public taint_diagnostic
597 tainted_allocation_size (const taint_state_machine
&sm
, tree arg
,
598 enum bounds has_bounds
, enum memory_space mem_space
)
599 : taint_diagnostic (sm
, arg
, has_bounds
),
600 m_mem_space (mem_space
)
604 const char *get_kind () const final override
606 return "tainted_allocation_size";
609 bool subclass_equal_p (const pending_diagnostic
&base_other
) const override
611 if (!taint_diagnostic::subclass_equal_p (base_other
))
613 const tainted_allocation_size
&other
614 = (const tainted_allocation_size
&)base_other
;
615 return m_mem_space
== other
.m_mem_space
;
618 int get_controlling_option () const final override
620 return OPT_Wanalyzer_tainted_allocation_size
;
623 bool emit (diagnostic_emission_context
&ctxt
) final override
625 /* "CWE-789: Memory Allocation with Excessive Size Value". */
630 switch (m_has_bounds
)
635 warned
= ctxt
.warn ("use of attacker-controlled value %qE as"
636 " allocation size without bounds checking",
640 warned
= ctxt
.warn ("use of attacker-controlled value %qE as"
641 " allocation size without"
642 " lower-bounds checking",
646 warned
= ctxt
.warn ("use of attacker-controlled value %qE as"
647 " allocation size without"
648 " upper-bounds checking",
653 switch (m_has_bounds
)
658 warned
= ctxt
.warn ("use of attacker-controlled value as"
659 " allocation size without bounds"
663 warned
= ctxt
.warn ("use of attacker-controlled value as"
664 " allocation size without"
665 " lower-bounds checking");
668 warned
= ctxt
.warn ("use of attacker-controlled value as"
669 " allocation size without"
670 " upper-bounds checking");
675 const location_t loc
= ctxt
.get_location ();
681 inform (loc
, "stack-based allocation");
684 inform (loc
, "heap-based allocation");
691 label_text
describe_final_event (const evdesc::final_event
&ev
) final override
694 switch (m_has_bounds
)
699 return ev
.formatted_print
700 ("use of attacker-controlled value %qE as allocation size"
701 " without bounds checking",
704 return ev
.formatted_print
705 ("use of attacker-controlled value %qE as allocation size"
706 " without lower-bounds checking",
709 return ev
.formatted_print
710 ("use of attacker-controlled value %qE as allocation size"
711 " without upper-bounds checking",
715 switch (m_has_bounds
)
720 return ev
.formatted_print
721 ("use of attacker-controlled value as allocation size"
722 " without bounds checking");
724 return ev
.formatted_print
725 ("use of attacker-controlled value as allocation size"
726 " without lower-bounds checking");
728 return ev
.formatted_print
729 ("use of attacker-controlled value as allocation size"
730 " without upper-bounds checking");
735 enum memory_space m_mem_space
;
738 /* Concrete taint_diagnostic subclass for reporting attacker-controlled
739 value being used as part of the condition of an assertion. */
741 class tainted_assertion
: public taint_diagnostic
744 tainted_assertion (const taint_state_machine
&sm
, tree arg
,
745 tree assert_failure_fndecl
)
746 : taint_diagnostic (sm
, arg
, BOUNDS_NONE
),
747 m_assert_failure_fndecl (assert_failure_fndecl
)
749 gcc_assert (m_assert_failure_fndecl
);
752 const char *get_kind () const final override
754 return "tainted_assertion";
757 bool subclass_equal_p (const pending_diagnostic
&base_other
) const override
759 if (!taint_diagnostic::subclass_equal_p (base_other
))
761 const tainted_assertion
&other
762 = (const tainted_assertion
&)base_other
;
763 return m_assert_failure_fndecl
== other
.m_assert_failure_fndecl
;
766 int get_controlling_option () const final override
768 return OPT_Wanalyzer_tainted_assertion
;
771 bool emit (diagnostic_emission_context
&ctxt
) final override
773 /* "CWE-617: Reachable Assertion". */
776 return ctxt
.warn ("use of attacked-controlled value in"
777 " condition for assertion");
780 location_t
fixup_location (location_t loc
,
781 bool primary
) const final override
784 /* For the primary location we want to avoid being in e.g. the
785 <assert.h> system header, since this would suppress the
787 return expansion_point_location_if_in_system_header (loc
);
788 else if (in_system_header_at (loc
))
789 /* For events, we want to show the implemenation of the assert
790 macro when we're describing them. */
791 return linemap_resolve_location (line_table
, loc
,
792 LRK_SPELLING_LOCATION
,
795 return pending_diagnostic::fixup_location (loc
, primary
);
798 label_text
describe_state_change (const evdesc::state_change
&change
) override
800 if (change
.m_new_state
== m_sm
.m_tainted_control_flow
)
801 return change
.formatted_print
802 ("use of attacker-controlled value for control flow");
803 return taint_diagnostic::describe_state_change (change
);
806 label_text
describe_final_event (const evdesc::final_event
&ev
) final override
808 if (mention_noreturn_attribute_p ())
809 return ev
.formatted_print
810 ("treating %qE as an assertion failure handler"
811 " due to %<__attribute__((__noreturn__))%>",
812 m_assert_failure_fndecl
);
814 return ev
.formatted_print
815 ("treating %qE as an assertion failure handler",
816 m_assert_failure_fndecl
);
820 bool mention_noreturn_attribute_p () const
822 if (fndecl_built_in_p (m_assert_failure_fndecl
, BUILT_IN_UNREACHABLE
))
827 tree m_assert_failure_fndecl
;
830 /* taint_state_machine's ctor. */
832 taint_state_machine::taint_state_machine (logger
*logger
)
833 : state_machine ("taint", logger
),
834 m_tainted (add_state ("tainted")),
835 m_has_lb (add_state ("has_lb")),
836 m_has_ub (add_state ("has_ub")),
837 m_stop (add_state ("stop")),
838 m_tainted_control_flow (add_state ("tainted-control-flow"))
842 state_machine::state_t
843 taint_state_machine::alt_get_inherited_state (const sm_state_map
&map
,
845 const extrinsic_state
&ext_state
)
848 switch (sval
->get_kind ())
854 const unaryop_svalue
*unaryop_sval
855 = as_a
<const unaryop_svalue
*> (sval
);
856 enum tree_code op
= unaryop_sval
->get_op ();
857 const svalue
*arg
= unaryop_sval
->get_arg ();
862 state_t arg_state
= map
.get_state (arg
, ext_state
);
872 const binop_svalue
*binop_sval
= as_a
<const binop_svalue
*> (sval
);
873 enum tree_code op
= binop_sval
->get_op ();
874 const svalue
*arg0
= binop_sval
->get_arg0 ();
875 const svalue
*arg1
= binop_sval
->get_arg1 ();
892 case POINTER_PLUS_EXPR
:
895 state_t arg0_state
= map
.get_state (arg0
, ext_state
);
896 state_t arg1_state
= map
.get_state (arg1
, ext_state
);
897 return combine_states (arg0_state
, arg1_state
);
903 /* The left-hand side of X % Y can be sanitized by
905 return map
.get_state (arg1
, ext_state
);
919 /* Return true iff FNDECL should be considered to be an assertion failure
920 handler by -Wanalyzer-tainted-assertion. */
923 is_assertion_failure_handler_p (tree fndecl
)
926 if (TREE_THIS_VOLATILE (fndecl
))
932 /* Implementation of state_machine::on_stmt vfunc for taint_state_machine. */
935 taint_state_machine::on_stmt (sm_context
*sm_ctxt
,
936 const supernode
*node
,
937 const gimple
*stmt
) const
939 if (const gcall
*call
= dyn_cast
<const gcall
*> (stmt
))
940 if (tree callee_fndecl
= sm_ctxt
->get_fndecl_for_call (call
))
942 if (is_named_call_p (callee_fndecl
, "fread", call
, 4))
944 tree arg
= gimple_call_arg (call
, 0);
946 sm_ctxt
->on_transition (node
, stmt
, arg
, m_start
, m_tainted
);
948 /* Dereference an ADDR_EXPR. */
949 // TODO: should the engine do this?
950 if (TREE_CODE (arg
) == ADDR_EXPR
)
951 sm_ctxt
->on_transition (node
, stmt
, TREE_OPERAND (arg
, 0),
956 /* External function with "access" attribute. */
957 if (sm_ctxt
->unknown_side_effects_p ())
958 check_for_tainted_size_arg (sm_ctxt
, node
, call
, callee_fndecl
);
960 if (is_assertion_failure_handler_p (callee_fndecl
)
961 && sm_ctxt
->get_global_state () == m_tainted_control_flow
)
963 sm_ctxt
->warn (node
, call
, NULL_TREE
,
964 make_unique
<tainted_assertion
> (*this, NULL_TREE
,
968 // TODO: ...etc; many other sources of untrusted data
970 if (const gassign
*assign
= dyn_cast
<const gassign
*> (stmt
))
972 enum tree_code op
= gimple_assign_rhs_code (assign
);
988 check_for_tainted_divisor (sm_ctxt
, node
, assign
);
993 if (const gcond
*cond
= dyn_cast
<const gcond
*> (stmt
))
995 /* Reset the state of "tainted-control-flow" before each
996 control flow statement, so that only the last one before
997 an assertion-failure-handler counts. */
998 sm_ctxt
->set_global_state (m_start
);
999 check_control_flow_arg_for_taint (sm_ctxt
, cond
, gimple_cond_lhs (cond
));
1000 check_control_flow_arg_for_taint (sm_ctxt
, cond
, gimple_cond_rhs (cond
));
1003 if (const gswitch
*switch_
= dyn_cast
<const gswitch
*> (stmt
))
1005 /* Reset the state of "tainted-control-flow" before each
1006 control flow statement, so that only the last one before
1007 an assertion-failure-handler counts. */
1008 sm_ctxt
->set_global_state (m_start
);
1009 check_control_flow_arg_for_taint (sm_ctxt
, switch_
,
1010 gimple_switch_index (switch_
));
1016 /* If EXPR is tainted, mark this execution path with the
1017 "tainted-control-flow" global state, in case we're about
1018 to call an assertion-failure-handler. */
1021 taint_state_machine::check_control_flow_arg_for_taint (sm_context
*sm_ctxt
,
1025 const region_model
*old_model
= sm_ctxt
->get_old_region_model ();
1026 const svalue
*sval
= old_model
->get_rvalue (expr
, NULL
);
1027 state_t state
= sm_ctxt
->get_state (stmt
, sval
);
1029 if (get_taint (state
, TREE_TYPE (expr
), &b
))
1030 sm_ctxt
->set_global_state (m_tainted_control_flow
);
1033 /* Implementation of state_machine::on_condition vfunc for taint_state_machine.
1034 Potentially transition state 'tainted' to 'has_ub' or 'has_lb',
1035 and states 'has_ub' and 'has_lb' to 'stop'. */
1038 taint_state_machine::on_condition (sm_context
*sm_ctxt
,
1039 const supernode
*node
,
1043 const svalue
*rhs
) const
1048 if (lhs
->get_kind () == SK_UNKNOWN
1049 || rhs
->get_kind () == SK_UNKNOWN
)
1051 /* If we have a comparison against UNKNOWN, then
1052 we've presumably hit the svalue complexity limit,
1053 and we don't know what is being sanitized.
1054 Give up on any taint already found on this execution path. */
1055 // TODO: warn about this
1057 get_logger ()->log ("comparison against UNKNOWN; removing all taint");
1058 sm_ctxt
->clear_all_per_svalue_state ();
1070 /* (LHS >= RHS) or (LHS > RHS)
1071 LHS gains a lower bound
1072 RHS gains an upper bound. */
1073 sm_ctxt
->on_transition (node
, stmt
, lhs
, m_tainted
,
1075 sm_ctxt
->on_transition (node
, stmt
, lhs
, m_has_ub
,
1077 sm_ctxt
->on_transition (node
, stmt
, rhs
, m_tainted
,
1079 sm_ctxt
->on_transition (node
, stmt
, rhs
, m_has_lb
,
1086 /* Detect where build_range_check has optimized
1087 (c>=low) && (c<=high)
1089 (c-low>=0) && (c-low<=high-low)
1091 (unsigned)(c - low) <= (unsigned)(high-low). */
1092 if (const binop_svalue
*binop_sval
1093 = lhs
->dyn_cast_binop_svalue ())
1095 const svalue
*inner_lhs
= binop_sval
->get_arg0 ();
1096 enum tree_code inner_op
= binop_sval
->get_op ();
1097 const svalue
*inner_rhs
= binop_sval
->get_arg1 ();
1098 if (const svalue
*before_cast
= inner_lhs
->maybe_undo_cast ())
1099 inner_lhs
= before_cast
;
1100 if (tree outer_rhs_cst
= rhs
->maybe_get_constant ())
1101 if (tree inner_rhs_cst
= inner_rhs
->maybe_get_constant ())
1102 if (inner_op
== PLUS_EXPR
1103 && TREE_CODE (inner_rhs_cst
) == INTEGER_CST
1104 && TREE_CODE (outer_rhs_cst
) == INTEGER_CST
1105 && TYPE_UNSIGNED (TREE_TYPE (inner_rhs_cst
))
1106 && TYPE_UNSIGNED (TREE_TYPE (outer_rhs_cst
)))
1109 (unsigned)(INNER_LHS + CST_A) </<= UNSIGNED_CST_B
1110 and thus an optimized test of INNER_LHS (before any
1111 cast to unsigned) against a range.
1112 Transition any of the tainted states to the stop state.
1113 We have to special-case this here rather than in
1114 region_model::on_condition since we can't apply
1115 both conditions simultaneously (we'd have a transition
1116 from the old state to has_lb, then a transition from
1117 the old state *again* to has_ub). */
1119 = sm_ctxt
->get_state (stmt
, inner_lhs
);
1120 if (old_state
== m_tainted
1121 || old_state
== m_has_lb
1122 || old_state
== m_has_ub
)
1123 sm_ctxt
->set_next_state (stmt
, inner_lhs
, m_stop
);
1128 /* (LHS <= RHS) or (LHS < RHS)
1129 LHS gains an upper bound
1130 RHS gains a lower bound. */
1131 sm_ctxt
->on_transition (node
, stmt
, lhs
, m_tainted
,
1133 sm_ctxt
->on_transition (node
, stmt
, lhs
, m_has_lb
,
1135 sm_ctxt
->on_transition (node
, stmt
, rhs
, m_tainted
,
1137 sm_ctxt
->on_transition (node
, stmt
, rhs
, m_has_ub
,
1146 /* Implementation of state_machine::on_bounded_ranges vfunc for
1147 taint_state_machine, for handling switch statement cases.
1148 Potentially transition state 'tainted' to 'has_ub' or 'has_lb',
1149 and states 'has_ub' and 'has_lb' to 'stop'. */
1152 taint_state_machine::on_bounded_ranges (sm_context
*sm_ctxt
,
1156 const bounded_ranges
&ranges
) const
1158 gcc_assert (!ranges
.empty_p ());
1159 gcc_assert (ranges
.get_count () > 0);
1161 /* We have one or more ranges; this could be a "default:", or one or
1162 more single or range cases.
1164 Look at the overall endpoints to see if the ranges impose any lower
1165 bounds or upper bounds beyond those of the underlying numeric type. */
1167 tree lowest_bound
= ranges
.get_range (0).m_lower
;
1168 tree highest_bound
= ranges
.get_range (ranges
.get_count () - 1).m_upper
;
1169 gcc_assert (lowest_bound
);
1170 gcc_assert (highest_bound
);
1173 = (lowest_bound
!= TYPE_MIN_VALUE (TREE_TYPE (lowest_bound
)));
1175 = (highest_bound
!= TYPE_MAX_VALUE (TREE_TYPE (highest_bound
)));
1177 if (!ranges_have_lb
&& !ranges_have_ub
)
1180 /* We have new bounds from the ranges; combine them with any
1181 existing bounds on SVAL. */
1182 state_t old_state
= sm_ctxt
->get_state (stmt
, &sval
);
1183 if (old_state
== m_tainted
)
1185 if (ranges_have_lb
&& ranges_have_ub
)
1186 sm_ctxt
->set_next_state (stmt
, &sval
, m_stop
);
1187 else if (ranges_have_lb
)
1188 sm_ctxt
->set_next_state (stmt
, &sval
, m_has_lb
);
1189 else if (ranges_have_ub
)
1190 sm_ctxt
->set_next_state (stmt
, &sval
, m_has_ub
);
1192 else if (old_state
== m_has_ub
&& ranges_have_lb
)
1193 sm_ctxt
->set_next_state (stmt
, &sval
, m_stop
);
1194 else if (old_state
== m_has_lb
&& ranges_have_ub
)
1195 sm_ctxt
->set_next_state (stmt
, &sval
, m_stop
);
1199 taint_state_machine::can_purge_p (state_t s ATTRIBUTE_UNUSED
) const
1204 /* If STATE is a tainted state, write the bounds to *OUT and return true.
1205 Otherwise return false.
1206 Use the signedness of TYPE to determine if "has_ub" is tainted. */
1209 taint_state_machine::get_taint (state_t state
, tree type
,
1210 enum bounds
*out
) const
1212 /* Unsigned types have an implicit lower bound. */
1213 bool is_unsigned
= false;
1215 if (INTEGRAL_TYPE_P (type
))
1216 is_unsigned
= TYPE_UNSIGNED (type
);
1218 /* Can't use a switch as the states are non-const. */
1219 if (state
== m_tainted
)
1221 *out
= is_unsigned
? BOUNDS_LOWER
: BOUNDS_NONE
;
1224 else if (state
== m_has_lb
)
1226 *out
= BOUNDS_LOWER
;
1229 else if (state
== m_has_ub
&& !is_unsigned
)
1231 /* Missing lower bound. */
1232 *out
= BOUNDS_UPPER
;
1238 /* Find the most tainted state of S0 and S1. */
1240 state_machine::state_t
1241 taint_state_machine::combine_states (state_t s0
, state_t s1
) const
1247 if (s0
== m_tainted
|| s1
== m_tainted
)
1257 /* The only remaining combinations are one of has_ub and has_lb
1258 (in either order). */
1259 gcc_assert ((s0
== m_has_lb
&& s1
== m_has_ub
)
1260 || (s0
== m_has_ub
&& s1
== m_has_lb
));
1264 /* Check for calls to external functions marked with
1265 __attribute__((access)) with a size-index: complain about
1266 tainted values passed as a size to such a function. */
1269 taint_state_machine::check_for_tainted_size_arg (sm_context
*sm_ctxt
,
1270 const supernode
*node
,
1272 tree callee_fndecl
) const
1274 tree fntype
= TREE_TYPE (callee_fndecl
);
1278 if (!TYPE_ATTRIBUTES (fntype
))
1281 /* Initialize a map of attribute access specifications for arguments
1282 to the function call. */
1284 init_attr_rdwr_indices (&rdwr_idx
, TYPE_ATTRIBUTES (fntype
));
1288 for (tree iter
= TYPE_ARG_TYPES (fntype
); iter
;
1289 iter
= TREE_CHAIN (iter
), ++argno
)
1291 const attr_access
* access
= rdwr_idx
.get (argno
);
1295 /* Ignore any duplicate entry in the map for the size argument. */
1296 if (access
->ptrarg
!= argno
)
1299 if (access
->sizarg
== UINT_MAX
)
1302 tree size_arg
= gimple_call_arg (call
, access
->sizarg
);
1304 state_t state
= sm_ctxt
->get_state (call
, size_arg
);
1306 if (get_taint (state
, TREE_TYPE (size_arg
), &b
))
1308 const char* const access_str
=
1309 TREE_STRING_POINTER (access
->to_external_string ());
1310 tree diag_size
= sm_ctxt
->get_diagnostic_tree (size_arg
);
1311 sm_ctxt
->warn (node
, call
, size_arg
,
1312 make_unique
<tainted_access_attrib_size
>
1313 (*this, diag_size
, b
,
1321 /* Complain if ASSIGN (a division operation) has a tainted divisor
1322 that could be zero. */
1325 taint_state_machine::check_for_tainted_divisor (sm_context
*sm_ctxt
,
1326 const supernode
*node
,
1327 const gassign
*assign
) const
1329 const region_model
*old_model
= sm_ctxt
->get_old_region_model ();
1333 tree divisor_expr
= gimple_assign_rhs2 (assign
);;
1335 /* Until we track conditions on floating point values, we can't check to
1336 see if they've been checked against zero. */
1337 if (!INTEGRAL_TYPE_P (TREE_TYPE (divisor_expr
)))
1340 const svalue
*divisor_sval
= old_model
->get_rvalue (divisor_expr
, NULL
);
1342 state_t state
= sm_ctxt
->get_state (assign
, divisor_sval
);
1344 if (get_taint (state
, TREE_TYPE (divisor_expr
), &b
))
1346 const svalue
*zero_sval
1347 = old_model
->get_manager ()->get_or_create_int_cst
1348 (TREE_TYPE (divisor_expr
), 0);
1350 = old_model
->eval_condition (divisor_sval
, NE_EXPR
, zero_sval
);
1352 /* The divisor is known to not equal 0: don't warn. */
1355 tree diag_divisor
= sm_ctxt
->get_diagnostic_tree (divisor_expr
);
1356 sm_ctxt
->warn (node
, assign
, divisor_expr
,
1357 make_unique
<tainted_divisor
> (*this, diag_divisor
, b
));
1358 sm_ctxt
->set_next_state (assign
, divisor_sval
, m_stop
);
1362 } // anonymous namespace
1364 /* Internal interface to this file. */
1367 make_taint_state_machine (logger
*logger
)
1369 return new taint_state_machine (logger
);
1372 /* Complain to CTXT if accessing REG leads could lead to arbitrary
1373 memory access under an attacker's control (due to taint). */
1376 region_model::check_region_for_taint (const region
*reg
,
1377 enum access_direction
,
1378 region_model_context
*ctxt
) const
1383 LOG_SCOPE (ctxt
->get_logger ());
1386 const state_machine
*sm
;
1388 if (!ctxt
->get_taint_map (&smap
, &sm
, &sm_idx
))
1394 const taint_state_machine
&taint_sm
= (const taint_state_machine
&)*sm
;
1396 const extrinsic_state
*ext_state
= ctxt
->get_ext_state ();
1400 const region
*iter_region
= reg
;
1403 switch (iter_region
->get_kind ())
1410 const element_region
*element_reg
1411 = (const element_region
*)iter_region
;
1412 const svalue
*index
= element_reg
->get_index ();
1413 const state_machine::state_t
1414 state
= smap
->get_state (index
, *ext_state
);
1417 if (taint_sm
.get_taint (state
, index
->get_type (), &b
))
1419 tree arg
= get_representative_tree (index
);
1420 ctxt
->warn (make_unique
<tainted_array_index
> (taint_sm
, arg
, b
));
1427 const offset_region
*offset_reg
1428 = (const offset_region
*)iter_region
;
1429 const svalue
*offset
= offset_reg
->get_byte_offset ();
1430 const state_machine::state_t
1431 state
= smap
->get_state (offset
, *ext_state
);
1433 /* Handle implicit cast to sizetype. */
1434 tree effective_type
= offset
->get_type ();
1435 if (const svalue
*cast
= offset
->maybe_undo_cast ())
1436 if (cast
->get_type ())
1437 effective_type
= cast
->get_type ();
1439 if (taint_sm
.get_taint (state
, effective_type
, &b
))
1441 tree arg
= get_representative_tree (offset
);
1442 ctxt
->warn (make_unique
<tainted_offset
> (taint_sm
, arg
, b
));
1449 const cast_region
*cast_reg
1450 = as_a
<const cast_region
*> (iter_region
);
1451 iter_region
= cast_reg
->get_original_region ();
1457 const sized_region
*sized_reg
1458 = (const sized_region
*)iter_region
;
1459 const svalue
*size_sval
= sized_reg
->get_byte_size_sval (m_mgr
);
1460 const state_machine::state_t
1461 state
= smap
->get_state (size_sval
, *ext_state
);
1464 if (taint_sm
.get_taint (state
, size_sval
->get_type (), &b
))
1466 tree arg
= get_representative_tree (size_sval
);
1467 ctxt
->warn (make_unique
<tainted_size
> (taint_sm
, arg
, b
));
1473 iter_region
= iter_region
->get_parent_region ();
1477 /* Complain to CTXT about a tainted allocation size if SIZE_IN_BYTES is
1478 under an attacker's control (due to taint), where the allocation
1479 is happening within MEM_SPACE. */
1482 region_model::check_dynamic_size_for_taint (enum memory_space mem_space
,
1483 const svalue
*size_in_bytes
,
1484 region_model_context
*ctxt
) const
1486 gcc_assert (size_in_bytes
);
1489 LOG_SCOPE (ctxt
->get_logger ());
1492 const state_machine
*sm
;
1494 if (!ctxt
->get_taint_map (&smap
, &sm
, &sm_idx
))
1500 const taint_state_machine
&taint_sm
= (const taint_state_machine
&)*sm
;
1502 const extrinsic_state
*ext_state
= ctxt
->get_ext_state ();
1506 const state_machine::state_t
1507 state
= smap
->get_state (size_in_bytes
, *ext_state
);
1510 if (taint_sm
.get_taint (state
, size_in_bytes
->get_type (), &b
))
1512 tree arg
= get_representative_tree (size_in_bytes
);
1513 ctxt
->warn (make_unique
<tainted_allocation_size
>
1514 (taint_sm
, arg
, b
, mem_space
));
1518 /* Mark SVAL as TAINTED. CTXT must be non-NULL. */
1521 region_model::mark_as_tainted (const svalue
*sval
,
1522 region_model_context
*ctxt
)
1528 const state_machine
*sm
;
1530 if (!ctxt
->get_taint_map (&smap
, &sm
, &sm_idx
))
1536 const taint_state_machine
&taint_sm
= (const taint_state_machine
&)*sm
;
1538 const extrinsic_state
*ext_state
= ctxt
->get_ext_state ();
1542 smap
->set_state (this, sval
, taint_sm
.m_tainted
, NULL
, *ext_state
);
1545 /* Return true if SVAL could possibly be attacker-controlled. */
1548 region_model_context::possibly_tainted_p (const svalue
*sval
)
1551 const state_machine
*sm
;
1553 if (!get_taint_map (&smap
, &sm
, &sm_idx
))
1556 const taint_state_machine
&taint_sm
= (const taint_state_machine
&)*sm
;
1558 const extrinsic_state
*ext_state
= get_ext_state ();
1562 const state_machine::state_t state
= smap
->get_state (sval
, *ext_state
);
1565 return (state
== taint_sm
.m_tainted
1566 || state
== taint_sm
.m_has_lb
1567 || state
== taint_sm
.m_has_ub
);
1572 #endif /* #if ENABLE_ANALYZER */