1 /* A state machine for tracking "taint": unsanitized uses
2 of data potentially under an attacker's control.
4 Copyright (C) 2019-2024 Free Software Foundation, Inc.
5 Contributed by David Malcolm <dmalcolm@redhat.com>.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
14 GCC is distributed in the hope that it will be useful, but
15 WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
24 #define INCLUDE_MEMORY
25 #define INCLUDE_VECTOR
27 #include "coretypes.h"
28 #include "make-unique.h"
31 #include "basic-block.h"
34 #include "diagnostic-path.h"
35 #include "analyzer/analyzer.h"
36 #include "analyzer/analyzer-logging.h"
37 #include "gimple-iterator.h"
38 #include "ordered-hash-map.h"
42 #include "stringpool.h"
44 #include "fold-const.h"
45 #include "analyzer/supergraph.h"
46 #include "analyzer/call-string.h"
47 #include "analyzer/program-point.h"
48 #include "analyzer/store.h"
49 #include "analyzer/region-model.h"
50 #include "analyzer/sm.h"
51 #include "analyzer/program-state.h"
52 #include "analyzer/pending-diagnostic.h"
53 #include "analyzer/constraint-manager.h"
54 #include "diagnostic-format-sarif.h"
62 /* An enum for describing tainted values. */
66 /* This tainted value has no upper or lower bound. */
69 /* This tainted value has an upper bound but not lower bound. */
72 /* This tainted value has a lower bound but no upper bound. */
77 bounds_to_str (enum bounds b
)
86 return "BOUNDS_UPPER";
88 return "BOUNDS_LOWER";
92 /* An experimental state machine, for tracking "taint": unsanitized uses
93 of data potentially under an attacker's control. */
95 class taint_state_machine
: public state_machine
98 taint_state_machine (logger
*logger
);
100 bool inherited_state_p () const final override
{ return true; }
102 state_t
alt_get_inherited_state (const sm_state_map
&map
,
104 const extrinsic_state
&ext_state
)
105 const final override
;
108 has_alt_get_inherited_state_p () const final override
113 bool on_stmt (sm_context
&sm_ctxt
,
114 const supernode
*node
,
115 const gimple
*stmt
) const final override
;
117 void on_condition (sm_context
&sm_ctxt
,
118 const supernode
*node
,
122 const svalue
*rhs
) const final override
;
123 void on_bounded_ranges (sm_context
&sm_ctxt
,
124 const supernode
*node
,
127 const bounded_ranges
&ranges
) const final override
;
129 bool can_purge_p (state_t s
) const final override
;
131 bool get_taint (state_t s
, tree type
, enum bounds
*out
) const;
133 state_t
combine_states (state_t s0
, state_t s1
) const;
136 void check_control_flow_arg_for_taint (sm_context
&sm_ctxt
,
140 void check_for_tainted_size_arg (sm_context
&sm_ctxt
,
141 const supernode
*node
,
143 tree callee_fndecl
) const;
144 void check_for_tainted_divisor (sm_context
&sm_ctxt
,
145 const supernode
*node
,
146 const gassign
*assign
) const;
149 /* State for a "tainted" value: unsanitized data potentially under an
150 attacker's control. */
153 /* State for a "tainted" value that has a lower bound. */
156 /* State for a "tainted" value that has an upper bound. */
159 /* Stop state, for a value we don't want to track any more. */
162 /* Global state, for when the last condition had tainted arguments. */
163 state_t m_tainted_control_flow
;
166 /* Class for diagnostics relating to taint_state_machine. */
168 class taint_diagnostic
: public pending_diagnostic
171 taint_diagnostic (const taint_state_machine
&sm
, tree arg
,
172 enum bounds has_bounds
)
173 : m_sm (sm
), m_arg (arg
), m_has_bounds (has_bounds
)
176 bool subclass_equal_p (const pending_diagnostic
&base_other
) const override
178 const taint_diagnostic
&other
= (const taint_diagnostic
&)base_other
;
179 return (same_tree_p (m_arg
, other
.m_arg
)
180 && m_has_bounds
== other
.m_has_bounds
);
183 label_text
describe_state_change (const evdesc::state_change
&change
) override
185 if (change
.m_new_state
== m_sm
.m_tainted
)
188 return change
.formatted_print ("%qE has an unchecked value here"
190 change
.m_expr
, change
.m_origin
);
192 return change
.formatted_print ("%qE gets an unchecked value here",
195 else if (change
.m_new_state
== m_sm
.m_has_lb
)
196 return change
.formatted_print ("%qE has its lower bound checked here",
198 else if (change
.m_new_state
== m_sm
.m_has_ub
)
199 return change
.formatted_print ("%qE has its upper bound checked here",
201 return label_text ();
204 diagnostic_event::meaning
205 get_meaning_for_state_change (const evdesc::state_change
&change
)
208 if (change
.m_new_state
== m_sm
.m_tainted
)
209 return diagnostic_event::meaning (diagnostic_event::VERB_acquire
,
210 diagnostic_event::NOUN_taint
);
211 return diagnostic_event::meaning ();
214 void maybe_add_sarif_properties (sarif_object
&result_obj
)
217 sarif_property_bag
&props
= result_obj
.get_or_create_properties ();
218 #define PROPERTY_PREFIX "gcc/analyzer/taint_diagnostic/"
219 props
.set (PROPERTY_PREFIX
"arg", tree_to_json (m_arg
));
220 props
.set_string (PROPERTY_PREFIX
"has_bounds",
221 bounds_to_str (m_has_bounds
));
222 #undef PROPERTY_PREFIX
226 const taint_state_machine
&m_sm
;
228 enum bounds m_has_bounds
;
231 /* Concrete taint_diagnostic subclass for reporting attacker-controlled
234 class tainted_array_index
: public taint_diagnostic
237 tainted_array_index (const taint_state_machine
&sm
, tree arg
,
238 enum bounds has_bounds
)
239 : taint_diagnostic (sm
, arg
, has_bounds
)
242 const char *get_kind () const final override
{ return "tainted_array_index"; }
244 int get_controlling_option () const final override
246 return OPT_Wanalyzer_tainted_array_index
;
249 bool emit (diagnostic_emission_context
&ctxt
) final override
251 /* CWE-129: "Improper Validation of Array Index". */
254 switch (m_has_bounds
)
259 return ctxt
.warn ("use of attacker-controlled value %qE"
260 " in array lookup without bounds checking",
264 return ctxt
.warn ("use of attacker-controlled value %qE"
265 " in array lookup without checking for negative",
269 return ctxt
.warn ("use of attacker-controlled value %qE"
270 " in array lookup without upper-bounds checking",
275 switch (m_has_bounds
)
280 return ctxt
.warn ("use of attacker-controlled value"
281 " in array lookup without bounds checking");
284 return ctxt
.warn ("use of attacker-controlled value"
285 " in array lookup without checking for"
289 return ctxt
.warn ("use of attacker-controlled value"
290 " in array lookup without upper-bounds"
296 label_text
describe_final_event (const evdesc::final_event
&ev
) final override
299 switch (m_has_bounds
)
304 return ev
.formatted_print
305 ("use of attacker-controlled value %qE in array lookup"
306 " without bounds checking",
309 return ev
.formatted_print
310 ("use of attacker-controlled value %qE"
311 " in array lookup without checking for negative",
314 return ev
.formatted_print
315 ("use of attacker-controlled value %qE"
316 " in array lookup without upper-bounds checking",
320 switch (m_has_bounds
)
325 return ev
.formatted_print
326 ("use of attacker-controlled value in array lookup"
327 " without bounds checking");
329 return ev
.formatted_print
330 ("use of attacker-controlled value"
331 " in array lookup without checking for negative");
333 return ev
.formatted_print
334 ("use of attacker-controlled value"
335 " in array lookup without upper-bounds checking");
340 /* Concrete taint_diagnostic subclass for reporting attacker-controlled
343 class tainted_offset
: public taint_diagnostic
346 tainted_offset (const taint_state_machine
&sm
, tree arg
,
347 enum bounds has_bounds
,
348 const svalue
*offset
)
349 : taint_diagnostic (sm
, arg
, has_bounds
),
353 const char *get_kind () const final override
{ return "tainted_offset"; }
355 int get_controlling_option () const final override
357 return OPT_Wanalyzer_tainted_offset
;
360 bool emit (diagnostic_emission_context
&ctxt
) final override
362 /* CWE-823: "Use of Out-of-range Pointer Offset". */
365 switch (m_has_bounds
)
370 return ctxt
.warn ("use of attacker-controlled value %qE as offset"
371 " without bounds checking",
375 return ctxt
.warn ("use of attacker-controlled value %qE as offset"
376 " without lower-bounds checking",
380 return ctxt
.warn ("use of attacker-controlled value %qE as offset"
381 " without upper-bounds checking",
386 switch (m_has_bounds
)
391 return ctxt
.warn ("use of attacker-controlled value as offset"
392 " without bounds checking");
395 return ctxt
.warn ("use of attacker-controlled value as offset"
396 " without lower-bounds checking");
399 return ctxt
.warn ("use of attacker-controlled value as offset"
400 " without upper-bounds checking");
405 label_text
describe_final_event (const evdesc::final_event
&ev
) final override
408 switch (m_has_bounds
)
413 return ev
.formatted_print ("use of attacker-controlled value %qE"
414 " as offset without bounds checking",
417 return ev
.formatted_print ("use of attacker-controlled value %qE"
418 " as offset without lower-bounds checking",
421 return ev
.formatted_print ("use of attacker-controlled value %qE"
422 " as offset without upper-bounds checking",
426 switch (m_has_bounds
)
431 return ev
.formatted_print ("use of attacker-controlled value"
432 " as offset without bounds checking");
434 return ev
.formatted_print ("use of attacker-controlled value"
435 " as offset without lower-bounds"
438 return ev
.formatted_print ("use of attacker-controlled value"
439 " as offset without upper-bounds"
444 void maybe_add_sarif_properties (sarif_object
&result_obj
)
447 taint_diagnostic::maybe_add_sarif_properties (result_obj
);
448 sarif_property_bag
&props
= result_obj
.get_or_create_properties ();
449 #define PROPERTY_PREFIX "gcc/analyzer/tainted_offset/"
450 props
.set (PROPERTY_PREFIX
"offset", m_offset
->to_json ());
451 #undef PROPERTY_PREFIX
455 const svalue
*m_offset
;
458 /* Concrete taint_diagnostic subclass for reporting attacker-controlled
461 class tainted_size
: public taint_diagnostic
464 tainted_size (const taint_state_machine
&sm
, tree arg
,
465 enum bounds has_bounds
)
466 : taint_diagnostic (sm
, arg
, has_bounds
)
469 const char *get_kind () const override
{ return "tainted_size"; }
471 int get_controlling_option () const final override
473 return OPT_Wanalyzer_tainted_size
;
476 bool emit (diagnostic_emission_context
&ctxt
) override
478 /* "CWE-129: Improper Validation of Array Index". */
481 switch (m_has_bounds
)
486 return ctxt
.warn ("use of attacker-controlled value %qE as size"
487 " without bounds checking",
491 return ctxt
.warn ("use of attacker-controlled value %qE as size"
492 " without lower-bounds checking",
496 return ctxt
.warn ("use of attacker-controlled value %qE as size"
497 " without upper-bounds checking",
502 switch (m_has_bounds
)
507 return ctxt
.warn ("use of attacker-controlled value as size"
508 " without bounds checking");
511 return ctxt
.warn ("use of attacker-controlled value as size"
512 " without lower-bounds checking");
515 return ctxt
.warn ("use of attacker-controlled value as size"
516 " without upper-bounds checking");
521 label_text
describe_final_event (const evdesc::final_event
&ev
) final override
524 switch (m_has_bounds
)
529 return ev
.formatted_print ("use of attacker-controlled value %qE"
530 " as size without bounds checking",
533 return ev
.formatted_print ("use of attacker-controlled value %qE"
534 " as size without lower-bounds checking",
537 return ev
.formatted_print ("use of attacker-controlled value %qE"
538 " as size without upper-bounds checking",
542 switch (m_has_bounds
)
547 return ev
.formatted_print ("use of attacker-controlled value"
548 " as size without bounds checking");
550 return ev
.formatted_print ("use of attacker-controlled value"
551 " as size without lower-bounds checking");
553 return ev
.formatted_print ("use of attacker-controlled value"
554 " as size without upper-bounds checking");
559 /* Subclass of tainted_size for reporting on tainted size values
560 passed to an external function annotated with attribute "access". */
562 class tainted_access_attrib_size
: public tainted_size
565 tainted_access_attrib_size (const taint_state_machine
&sm
, tree arg
,
566 enum bounds has_bounds
, tree callee_fndecl
,
567 unsigned size_argno
, const char *access_str
)
568 : tainted_size (sm
, arg
, has_bounds
),
569 m_callee_fndecl (callee_fndecl
),
570 m_size_argno (size_argno
), m_access_str (access_str
)
574 const char *get_kind () const override
576 return "tainted_access_attrib_size";
579 bool emit (diagnostic_emission_context
&ctxt
) final override
581 bool warned
= tainted_size::emit (ctxt
);
584 inform (DECL_SOURCE_LOCATION (m_callee_fndecl
),
585 "parameter %i of %qD marked as a size via attribute %qs",
586 m_size_argno
+ 1, m_callee_fndecl
, m_access_str
);
592 tree m_callee_fndecl
;
593 unsigned m_size_argno
;
594 const char *m_access_str
;
597 /* Concrete taint_diagnostic subclass for reporting attacker-controlled
598 divisor (so that an attacker can trigger a divide by zero). */
600 class tainted_divisor
: public taint_diagnostic
603 tainted_divisor (const taint_state_machine
&sm
, tree arg
,
604 enum bounds has_bounds
)
605 : taint_diagnostic (sm
, arg
, has_bounds
)
608 const char *get_kind () const final override
{ return "tainted_divisor"; }
610 int get_controlling_option () const final override
612 return OPT_Wanalyzer_tainted_divisor
;
615 bool emit (diagnostic_emission_context
&ctxt
) final override
617 /* CWE-369: "Divide By Zero". */
620 return ctxt
.warn ("use of attacker-controlled value %qE as divisor"
621 " without checking for zero",
624 return ctxt
.warn ("use of attacker-controlled value as divisor"
625 " without checking for zero");
628 label_text
describe_final_event (const evdesc::final_event
&ev
) final override
631 return ev
.formatted_print
632 ("use of attacker-controlled value %qE as divisor"
633 " without checking for zero",
636 return ev
.formatted_print
637 ("use of attacker-controlled value as divisor"
638 " without checking for zero");
642 /* Concrete taint_diagnostic subclass for reporting attacker-controlled
643 size of a dynamic allocation. */
645 class tainted_allocation_size
: public taint_diagnostic
648 tainted_allocation_size (const taint_state_machine
&sm
, tree arg
,
649 const svalue
*size_in_bytes
,
650 enum bounds has_bounds
, enum memory_space mem_space
)
651 : taint_diagnostic (sm
, arg
, has_bounds
),
652 m_size_in_bytes (size_in_bytes
),
653 m_mem_space (mem_space
)
657 const char *get_kind () const final override
659 return "tainted_allocation_size";
662 bool subclass_equal_p (const pending_diagnostic
&base_other
) const override
664 if (!taint_diagnostic::subclass_equal_p (base_other
))
666 const tainted_allocation_size
&other
667 = (const tainted_allocation_size
&)base_other
;
668 return m_mem_space
== other
.m_mem_space
;
671 int get_controlling_option () const final override
673 return OPT_Wanalyzer_tainted_allocation_size
;
676 bool emit (diagnostic_emission_context
&ctxt
) final override
678 /* "CWE-789: Memory Allocation with Excessive Size Value". */
683 switch (m_has_bounds
)
688 warned
= ctxt
.warn ("use of attacker-controlled value %qE as"
689 " allocation size without bounds checking",
693 warned
= ctxt
.warn ("use of attacker-controlled value %qE as"
694 " allocation size without"
695 " lower-bounds checking",
699 warned
= ctxt
.warn ("use of attacker-controlled value %qE as"
700 " allocation size without"
701 " upper-bounds checking",
706 switch (m_has_bounds
)
711 warned
= ctxt
.warn ("use of attacker-controlled value as"
712 " allocation size without bounds"
716 warned
= ctxt
.warn ("use of attacker-controlled value as"
717 " allocation size without"
718 " lower-bounds checking");
721 warned
= ctxt
.warn ("use of attacker-controlled value as"
722 " allocation size without"
723 " upper-bounds checking");
728 const location_t loc
= ctxt
.get_location ();
734 inform (loc
, "stack-based allocation");
737 inform (loc
, "heap-based allocation");
744 label_text
describe_final_event (const evdesc::final_event
&ev
) final override
747 switch (m_has_bounds
)
752 return ev
.formatted_print
753 ("use of attacker-controlled value %qE as allocation size"
754 " without bounds checking",
757 return ev
.formatted_print
758 ("use of attacker-controlled value %qE as allocation size"
759 " without lower-bounds checking",
762 return ev
.formatted_print
763 ("use of attacker-controlled value %qE as allocation size"
764 " without upper-bounds checking",
768 switch (m_has_bounds
)
773 return ev
.formatted_print
774 ("use of attacker-controlled value as allocation size"
775 " without bounds checking");
777 return ev
.formatted_print
778 ("use of attacker-controlled value as allocation size"
779 " without lower-bounds checking");
781 return ev
.formatted_print
782 ("use of attacker-controlled value as allocation size"
783 " without upper-bounds checking");
787 void maybe_add_sarif_properties (sarif_object
&result_obj
)
790 taint_diagnostic::maybe_add_sarif_properties (result_obj
);
791 sarif_property_bag
&props
= result_obj
.get_or_create_properties ();
792 #define PROPERTY_PREFIX "gcc/analyzer/tainted_allocation_size/"
793 props
.set (PROPERTY_PREFIX
"size_in_bytes", m_size_in_bytes
->to_json ());
794 #undef PROPERTY_PREFIX
798 const svalue
*m_size_in_bytes
;
799 enum memory_space m_mem_space
;
802 /* Concrete taint_diagnostic subclass for reporting attacker-controlled
803 value being used as part of the condition of an assertion. */
805 class tainted_assertion
: public taint_diagnostic
808 tainted_assertion (const taint_state_machine
&sm
, tree arg
,
809 tree assert_failure_fndecl
)
810 : taint_diagnostic (sm
, arg
, BOUNDS_NONE
),
811 m_assert_failure_fndecl (assert_failure_fndecl
)
813 gcc_assert (m_assert_failure_fndecl
);
816 const char *get_kind () const final override
818 return "tainted_assertion";
821 bool subclass_equal_p (const pending_diagnostic
&base_other
) const override
823 if (!taint_diagnostic::subclass_equal_p (base_other
))
825 const tainted_assertion
&other
826 = (const tainted_assertion
&)base_other
;
827 return m_assert_failure_fndecl
== other
.m_assert_failure_fndecl
;
830 int get_controlling_option () const final override
832 return OPT_Wanalyzer_tainted_assertion
;
835 bool emit (diagnostic_emission_context
&ctxt
) final override
837 /* "CWE-617: Reachable Assertion". */
840 return ctxt
.warn ("use of attacked-controlled value in"
841 " condition for assertion");
844 location_t
fixup_location (location_t loc
,
845 bool primary
) const final override
848 /* For the primary location we want to avoid being in e.g. the
849 <assert.h> system header, since this would suppress the
851 return expansion_point_location_if_in_system_header (loc
);
852 else if (in_system_header_at (loc
))
853 /* For events, we want to show the implemenation of the assert
854 macro when we're describing them. */
855 return linemap_resolve_location (line_table
, loc
,
856 LRK_SPELLING_LOCATION
,
859 return pending_diagnostic::fixup_location (loc
, primary
);
862 label_text
describe_state_change (const evdesc::state_change
&change
) override
864 if (change
.m_new_state
== m_sm
.m_tainted_control_flow
)
865 return change
.formatted_print
866 ("use of attacker-controlled value for control flow");
867 return taint_diagnostic::describe_state_change (change
);
870 label_text
describe_final_event (const evdesc::final_event
&ev
) final override
872 if (mention_noreturn_attribute_p ())
873 return ev
.formatted_print
874 ("treating %qE as an assertion failure handler"
875 " due to %<__attribute__((__noreturn__))%>",
876 m_assert_failure_fndecl
);
878 return ev
.formatted_print
879 ("treating %qE as an assertion failure handler",
880 m_assert_failure_fndecl
);
884 bool mention_noreturn_attribute_p () const
886 if (fndecl_built_in_p (m_assert_failure_fndecl
, BUILT_IN_UNREACHABLE
))
891 tree m_assert_failure_fndecl
;
894 /* taint_state_machine's ctor. */
896 taint_state_machine::taint_state_machine (logger
*logger
)
897 : state_machine ("taint", logger
),
898 m_tainted (add_state ("tainted")),
899 m_has_lb (add_state ("has_lb")),
900 m_has_ub (add_state ("has_ub")),
901 m_stop (add_state ("stop")),
902 m_tainted_control_flow (add_state ("tainted-control-flow"))
906 state_machine::state_t
907 taint_state_machine::alt_get_inherited_state (const sm_state_map
&map
,
909 const extrinsic_state
&ext_state
)
912 switch (sval
->get_kind ())
918 const unaryop_svalue
*unaryop_sval
919 = as_a
<const unaryop_svalue
*> (sval
);
920 enum tree_code op
= unaryop_sval
->get_op ();
921 const svalue
*arg
= unaryop_sval
->get_arg ();
926 state_t arg_state
= map
.get_state (arg
, ext_state
);
936 const binop_svalue
*binop_sval
= as_a
<const binop_svalue
*> (sval
);
937 enum tree_code op
= binop_sval
->get_op ();
938 const svalue
*arg0
= binop_sval
->get_arg0 ();
939 const svalue
*arg1
= binop_sval
->get_arg1 ();
956 case POINTER_PLUS_EXPR
:
959 state_t arg0_state
= map
.get_state (arg0
, ext_state
);
960 state_t arg1_state
= map
.get_state (arg1
, ext_state
);
961 return combine_states (arg0_state
, arg1_state
);
967 /* The left-hand side of X % Y can be sanitized by
969 return map
.get_state (arg1
, ext_state
);
983 /* Return true iff FNDECL should be considered to be an assertion failure
984 handler by -Wanalyzer-tainted-assertion. */
987 is_assertion_failure_handler_p (tree fndecl
)
990 if (TREE_THIS_VOLATILE (fndecl
))
996 /* Implementation of state_machine::on_stmt vfunc for taint_state_machine. */
999 taint_state_machine::on_stmt (sm_context
&sm_ctxt
,
1000 const supernode
*node
,
1001 const gimple
*stmt
) const
1003 if (const gcall
*call
= dyn_cast
<const gcall
*> (stmt
))
1004 if (tree callee_fndecl
= sm_ctxt
.get_fndecl_for_call (call
))
1006 if (is_named_call_p (callee_fndecl
, "fread", call
, 4))
1008 tree arg
= gimple_call_arg (call
, 0);
1010 sm_ctxt
.on_transition (node
, stmt
, arg
, m_start
, m_tainted
);
1012 /* Dereference an ADDR_EXPR. */
1013 // TODO: should the engine do this?
1014 if (TREE_CODE (arg
) == ADDR_EXPR
)
1015 sm_ctxt
.on_transition (node
, stmt
, TREE_OPERAND (arg
, 0),
1016 m_start
, m_tainted
);
1020 /* External function with "access" attribute. */
1021 if (sm_ctxt
.unknown_side_effects_p ())
1022 check_for_tainted_size_arg (sm_ctxt
, node
, call
, callee_fndecl
);
1024 if (is_assertion_failure_handler_p (callee_fndecl
)
1025 && sm_ctxt
.get_global_state () == m_tainted_control_flow
)
1027 sm_ctxt
.warn (node
, call
, NULL_TREE
,
1028 make_unique
<tainted_assertion
> (*this, NULL_TREE
,
1032 // TODO: ...etc; many other sources of untrusted data
1034 if (const gassign
*assign
= dyn_cast
<const gassign
*> (stmt
))
1036 enum tree_code op
= gimple_assign_rhs_code (assign
);
1042 case TRUNC_DIV_EXPR
:
1044 case FLOOR_DIV_EXPR
:
1045 case ROUND_DIV_EXPR
:
1046 case TRUNC_MOD_EXPR
:
1048 case FLOOR_MOD_EXPR
:
1049 case ROUND_MOD_EXPR
:
1051 case EXACT_DIV_EXPR
:
1052 check_for_tainted_divisor (sm_ctxt
, node
, assign
);
1057 if (const gcond
*cond
= dyn_cast
<const gcond
*> (stmt
))
1059 /* Reset the state of "tainted-control-flow" before each
1060 control flow statement, so that only the last one before
1061 an assertion-failure-handler counts. */
1062 sm_ctxt
.set_global_state (m_start
);
1063 check_control_flow_arg_for_taint (sm_ctxt
, cond
, gimple_cond_lhs (cond
));
1064 check_control_flow_arg_for_taint (sm_ctxt
, cond
, gimple_cond_rhs (cond
));
1067 if (const gswitch
*switch_
= dyn_cast
<const gswitch
*> (stmt
))
1069 /* Reset the state of "tainted-control-flow" before each
1070 control flow statement, so that only the last one before
1071 an assertion-failure-handler counts. */
1072 sm_ctxt
.set_global_state (m_start
);
1073 check_control_flow_arg_for_taint (sm_ctxt
, switch_
,
1074 gimple_switch_index (switch_
));
1080 /* If EXPR is tainted, mark this execution path with the
1081 "tainted-control-flow" global state, in case we're about
1082 to call an assertion-failure-handler. */
1085 taint_state_machine::check_control_flow_arg_for_taint (sm_context
&sm_ctxt
,
1089 const region_model
*old_model
= sm_ctxt
.get_old_region_model ();
1090 const svalue
*sval
= old_model
->get_rvalue (expr
, NULL
);
1091 state_t state
= sm_ctxt
.get_state (stmt
, sval
);
1093 if (get_taint (state
, TREE_TYPE (expr
), &b
))
1094 sm_ctxt
.set_global_state (m_tainted_control_flow
);
1097 /* Implementation of state_machine::on_condition vfunc for taint_state_machine.
1098 Potentially transition state 'tainted' to 'has_ub' or 'has_lb',
1099 and states 'has_ub' and 'has_lb' to 'stop'. */
1102 taint_state_machine::on_condition (sm_context
&sm_ctxt
,
1103 const supernode
*node
,
1107 const svalue
*rhs
) const
1112 if (lhs
->get_kind () == SK_UNKNOWN
1113 || rhs
->get_kind () == SK_UNKNOWN
)
1115 /* If we have a comparison against UNKNOWN, then
1116 we've presumably hit the svalue complexity limit,
1117 and we don't know what is being sanitized.
1118 Give up on any taint already found on this execution path. */
1119 // TODO: warn about this
1121 get_logger ()->log ("comparison against UNKNOWN; removing all taint");
1122 sm_ctxt
.clear_all_per_svalue_state ();
1126 /* Strip away casts before considering LHS and RHS, to increase the
1127 chance of detecting places where sanitization of a value may have
1129 if (const svalue
*inner
= lhs
->maybe_undo_cast ())
1131 if (const svalue
*inner
= rhs
->maybe_undo_cast ())
1142 /* (LHS >= RHS) or (LHS > RHS)
1143 LHS gains a lower bound
1144 RHS gains an upper bound. */
1145 sm_ctxt
.on_transition (node
, stmt
, lhs
, m_tainted
, m_has_lb
);
1146 sm_ctxt
.on_transition (node
, stmt
, lhs
, m_has_ub
, m_stop
);
1147 sm_ctxt
.on_transition (node
, stmt
, rhs
, m_tainted
, m_has_ub
);
1148 sm_ctxt
.on_transition (node
, stmt
, rhs
, m_has_lb
, m_stop
);
1154 /* Detect where build_range_check has optimized
1155 (c>=low) && (c<=high)
1157 (c-low>=0) && (c-low<=high-low)
1159 (unsigned)(c - low) <= (unsigned)(high-low). */
1160 if (const binop_svalue
*binop_sval
1161 = lhs
->dyn_cast_binop_svalue ())
1163 const svalue
*inner_lhs
= binop_sval
->get_arg0 ();
1164 enum tree_code inner_op
= binop_sval
->get_op ();
1165 const svalue
*inner_rhs
= binop_sval
->get_arg1 ();
1166 if (const svalue
*before_cast
= inner_lhs
->maybe_undo_cast ())
1167 inner_lhs
= before_cast
;
1168 if (tree outer_rhs_cst
= rhs
->maybe_get_constant ())
1169 if (tree inner_rhs_cst
= inner_rhs
->maybe_get_constant ())
1170 if (inner_op
== PLUS_EXPR
1171 && TREE_CODE (inner_rhs_cst
) == INTEGER_CST
1172 && TREE_CODE (outer_rhs_cst
) == INTEGER_CST
1173 && TYPE_UNSIGNED (TREE_TYPE (inner_rhs_cst
))
1174 && TYPE_UNSIGNED (TREE_TYPE (outer_rhs_cst
)))
1177 (unsigned)(INNER_LHS + CST_A) </<= UNSIGNED_CST_B
1178 and thus an optimized test of INNER_LHS (before any
1179 cast to unsigned) against a range.
1180 Transition any of the tainted states to the stop state.
1181 We have to special-case this here rather than in
1182 region_model::on_condition since we can't apply
1183 both conditions simultaneously (we'd have a transition
1184 from the old state to has_lb, then a transition from
1185 the old state *again* to has_ub). */
1187 = sm_ctxt
.get_state (stmt
, inner_lhs
);
1188 if (old_state
== m_tainted
1189 || old_state
== m_has_lb
1190 || old_state
== m_has_ub
)
1191 sm_ctxt
.set_next_state (stmt
, inner_lhs
, m_stop
);
1196 /* (LHS <= RHS) or (LHS < RHS)
1197 LHS gains an upper bound
1198 RHS gains a lower bound. */
1199 sm_ctxt
.on_transition (node
, stmt
, lhs
, m_tainted
, m_has_ub
);
1200 sm_ctxt
.on_transition (node
, stmt
, lhs
, m_has_lb
, m_stop
);
1201 sm_ctxt
.on_transition (node
, stmt
, rhs
, m_tainted
, m_has_lb
);
1202 sm_ctxt
.on_transition (node
, stmt
, rhs
, m_has_ub
, m_stop
);
1210 /* Implementation of state_machine::on_bounded_ranges vfunc for
1211 taint_state_machine, for handling switch statement cases.
1212 Potentially transition state 'tainted' to 'has_ub' or 'has_lb',
1213 and states 'has_ub' and 'has_lb' to 'stop'. */
1216 taint_state_machine::on_bounded_ranges (sm_context
&sm_ctxt
,
1220 const bounded_ranges
&ranges
) const
1222 gcc_assert (!ranges
.empty_p ());
1223 gcc_assert (ranges
.get_count () > 0);
1225 /* We have one or more ranges; this could be a "default:", or one or
1226 more single or range cases.
1228 Look at the overall endpoints to see if the ranges impose any lower
1229 bounds or upper bounds beyond those of the underlying numeric type. */
1231 tree lowest_bound
= ranges
.get_range (0).m_lower
;
1232 tree highest_bound
= ranges
.get_range (ranges
.get_count () - 1).m_upper
;
1233 gcc_assert (lowest_bound
);
1234 gcc_assert (highest_bound
);
1237 = (lowest_bound
!= TYPE_MIN_VALUE (TREE_TYPE (lowest_bound
)));
1239 = (highest_bound
!= TYPE_MAX_VALUE (TREE_TYPE (highest_bound
)));
1241 if (!ranges_have_lb
&& !ranges_have_ub
)
1244 /* We have new bounds from the ranges; combine them with any
1245 existing bounds on SVAL. */
1246 state_t old_state
= sm_ctxt
.get_state (stmt
, &sval
);
1247 if (old_state
== m_tainted
)
1249 if (ranges_have_lb
&& ranges_have_ub
)
1250 sm_ctxt
.set_next_state (stmt
, &sval
, m_stop
);
1251 else if (ranges_have_lb
)
1252 sm_ctxt
.set_next_state (stmt
, &sval
, m_has_lb
);
1253 else if (ranges_have_ub
)
1254 sm_ctxt
.set_next_state (stmt
, &sval
, m_has_ub
);
1256 else if (old_state
== m_has_ub
&& ranges_have_lb
)
1257 sm_ctxt
.set_next_state (stmt
, &sval
, m_stop
);
1258 else if (old_state
== m_has_lb
&& ranges_have_ub
)
1259 sm_ctxt
.set_next_state (stmt
, &sval
, m_stop
);
1263 taint_state_machine::can_purge_p (state_t s ATTRIBUTE_UNUSED
) const
1265 if (s
== m_has_lb
|| s
== m_has_ub
)
1271 /* If STATE is a tainted state, write the bounds to *OUT and return true.
1272 Otherwise return false.
1273 Use the signedness of TYPE to determine if "has_ub" is tainted. */
1276 taint_state_machine::get_taint (state_t state
, tree type
,
1277 enum bounds
*out
) const
1279 /* Unsigned types have an implicit lower bound. */
1280 bool is_unsigned
= false;
1282 if (INTEGRAL_TYPE_P (type
))
1283 is_unsigned
= TYPE_UNSIGNED (type
);
1285 /* Can't use a switch as the states are non-const. */
1286 if (state
== m_tainted
)
1288 *out
= is_unsigned
? BOUNDS_LOWER
: BOUNDS_NONE
;
1291 else if (state
== m_has_lb
)
1293 *out
= BOUNDS_LOWER
;
1296 else if (state
== m_has_ub
&& !is_unsigned
)
1298 /* Missing lower bound. */
1299 *out
= BOUNDS_UPPER
;
1305 /* Find the most tainted state of S0 and S1. */
1307 state_machine::state_t
1308 taint_state_machine::combine_states (state_t s0
, state_t s1
) const
1314 if (s0
== m_tainted
|| s1
== m_tainted
)
1324 /* The only remaining combinations are one of has_ub and has_lb
1325 (in either order). */
1326 gcc_assert ((s0
== m_has_lb
&& s1
== m_has_ub
)
1327 || (s0
== m_has_ub
&& s1
== m_has_lb
));
1331 /* Check for calls to external functions marked with
1332 __attribute__((access)) with a size-index: complain about
1333 tainted values passed as a size to such a function. */
1336 taint_state_machine::check_for_tainted_size_arg (sm_context
&sm_ctxt
,
1337 const supernode
*node
,
1339 tree callee_fndecl
) const
1341 tree fntype
= TREE_TYPE (callee_fndecl
);
1345 if (!TYPE_ATTRIBUTES (fntype
))
1348 /* Initialize a map of attribute access specifications for arguments
1349 to the function call. */
1351 init_attr_rdwr_indices (&rdwr_idx
, TYPE_ATTRIBUTES (fntype
));
1355 for (tree iter
= TYPE_ARG_TYPES (fntype
); iter
;
1356 iter
= TREE_CHAIN (iter
), ++argno
)
1358 const attr_access
* access
= rdwr_idx
.get (argno
);
1362 /* Ignore any duplicate entry in the map for the size argument. */
1363 if (access
->ptrarg
!= argno
)
1366 if (access
->sizarg
== UINT_MAX
)
1369 tree size_arg
= gimple_call_arg (call
, access
->sizarg
);
1371 state_t state
= sm_ctxt
.get_state (call
, size_arg
);
1373 if (get_taint (state
, TREE_TYPE (size_arg
), &b
))
1375 const char* const access_str
=
1376 TREE_STRING_POINTER (access
->to_external_string ());
1377 tree diag_size
= sm_ctxt
.get_diagnostic_tree (size_arg
);
1378 sm_ctxt
.warn (node
, call
, size_arg
,
1379 make_unique
<tainted_access_attrib_size
>
1380 (*this, diag_size
, b
,
1388 /* Complain if ASSIGN (a division operation) has a tainted divisor
1389 that could be zero. */
1392 taint_state_machine::check_for_tainted_divisor (sm_context
&sm_ctxt
,
1393 const supernode
*node
,
1394 const gassign
*assign
) const
1396 const region_model
*old_model
= sm_ctxt
.get_old_region_model ();
1400 tree divisor_expr
= gimple_assign_rhs2 (assign
);;
1402 /* Until we track conditions on floating point values, we can't check to
1403 see if they've been checked against zero. */
1404 if (!INTEGRAL_TYPE_P (TREE_TYPE (divisor_expr
)))
1407 const svalue
*divisor_sval
= old_model
->get_rvalue (divisor_expr
, NULL
);
1409 state_t state
= sm_ctxt
.get_state (assign
, divisor_sval
);
1411 if (get_taint (state
, TREE_TYPE (divisor_expr
), &b
))
1413 const svalue
*zero_sval
1414 = old_model
->get_manager ()->get_or_create_int_cst
1415 (TREE_TYPE (divisor_expr
), 0);
1417 = old_model
->eval_condition (divisor_sval
, NE_EXPR
, zero_sval
);
1419 /* The divisor is known to not equal 0: don't warn. */
1422 tree diag_divisor
= sm_ctxt
.get_diagnostic_tree (divisor_expr
);
1423 sm_ctxt
.warn (node
, assign
, divisor_expr
,
1424 make_unique
<tainted_divisor
> (*this, diag_divisor
, b
));
1425 sm_ctxt
.set_next_state (assign
, divisor_sval
, m_stop
);
1429 } // anonymous namespace
1431 /* Internal interface to this file. */
1434 make_taint_state_machine (logger
*logger
)
1436 return new taint_state_machine (logger
);
1439 /* A closed concrete range. */
1441 class concrete_range
1444 /* Return true iff THIS is fully within OTHER
1446 - m_min must be >= OTHER.m_min
1447 - m_max must be <= OTHER.m_max. */
1448 bool within_p (const concrete_range
&other
) const
1450 if (compare_constants (m_min
, GE_EXPR
, other
.m_min
).is_true ())
1451 if (compare_constants (m_max
, LE_EXPR
, other
.m_max
).is_true ())
1460 /* Attempt to get a closed concrete range for SVAL based on types.
1461 If found, write to *OUT and return true.
1462 Otherwise return false. */
1465 get_possible_range (const svalue
*sval
, concrete_range
*out
)
1467 if (const svalue
*inner
= sval
->maybe_undo_cast ())
1469 concrete_range inner_range
;
1470 if (!get_possible_range (inner
, &inner_range
))
1473 if (sval
->get_type ()
1474 && inner
->get_type ()
1475 && INTEGRAL_TYPE_P (sval
->get_type ())
1476 && INTEGRAL_TYPE_P (inner
->get_type ())
1477 && TYPE_UNSIGNED (inner
->get_type ())
1478 && (TYPE_PRECISION (sval
->get_type ())
1479 > TYPE_PRECISION (inner
->get_type ())))
1481 /* We have a cast from an unsigned type to a wider integral type.
1482 Assuming this is zero-extension, we can inherit the range from
1484 enum tree_code op
= ((const unaryop_svalue
*)sval
)->get_op ();
1485 out
->m_min
= fold_unary (op
, sval
->get_type (), inner_range
.m_min
);
1486 out
->m_max
= fold_unary (op
, sval
->get_type (), inner_range
.m_max
);
1491 if (sval
->get_type ()
1492 && INTEGRAL_TYPE_P (sval
->get_type ()))
1494 out
->m_min
= TYPE_MIN_VALUE (sval
->get_type ());
1495 out
->m_max
= TYPE_MAX_VALUE (sval
->get_type ());
1502 /* Determine if it's possible for tainted array access ELEMENT_REG to
1503 actually be a problem.
1505 Check here for index being from e.g. unsigned char when the array
1506 contains >= 255 elements.
1508 Return true if out-of-bounds is possible, false if it's impossible
1509 (for suppressing false positives). */
1512 index_can_be_out_of_bounds_p (const element_region
*element_reg
)
1514 const svalue
*index
= element_reg
->get_index ();
1515 const region
*array_reg
= element_reg
->get_parent_region ();
1517 if (array_reg
->get_type ()
1518 && TREE_CODE (array_reg
->get_type ()) == ARRAY_TYPE
1519 && TYPE_DOMAIN (array_reg
->get_type ())
1520 && INTEGRAL_TYPE_P (TYPE_DOMAIN (array_reg
->get_type ())))
1522 concrete_range valid_index_range
;
1523 valid_index_range
.m_min
1524 = TYPE_MIN_VALUE (TYPE_DOMAIN (array_reg
->get_type ()));
1525 valid_index_range
.m_max
1526 = TYPE_MAX_VALUE (TYPE_DOMAIN (array_reg
->get_type ()));
1528 concrete_range possible_index_range
;
1529 if (get_possible_range (index
, &possible_index_range
))
1530 if (possible_index_range
.within_p (valid_index_range
))
1537 /* Complain to CTXT if accessing REG leads could lead to arbitrary
1538 memory access under an attacker's control (due to taint). */
1541 region_model::check_region_for_taint (const region
*reg
,
1542 enum access_direction
,
1543 region_model_context
*ctxt
) const
1548 LOG_SCOPE (ctxt
->get_logger ());
1551 const state_machine
*sm
;
1553 if (!ctxt
->get_taint_map (&smap
, &sm
, &sm_idx
))
1559 const taint_state_machine
&taint_sm
= (const taint_state_machine
&)*sm
;
1561 const extrinsic_state
*ext_state
= ctxt
->get_ext_state ();
1565 const region
*iter_region
= reg
;
1568 switch (iter_region
->get_kind ())
1575 const element_region
*element_reg
1576 = (const element_region
*)iter_region
;
1577 const svalue
*index
= element_reg
->get_index ();
1578 const state_machine::state_t
1579 state
= smap
->get_state (index
, *ext_state
);
1582 if (taint_sm
.get_taint (state
, index
->get_type (), &b
))
1584 if (index_can_be_out_of_bounds_p (element_reg
))
1586 tree arg
= get_representative_tree (index
);
1587 ctxt
->warn (make_unique
<tainted_array_index
> (taint_sm
,
1590 else if (ctxt
->get_logger ())
1591 ctxt
->get_logger ()->log ("rejecting tainted_array_index as"
1592 " out of bounds is not possible");
1599 const offset_region
*offset_reg
1600 = (const offset_region
*)iter_region
;
1601 const svalue
*offset
= offset_reg
->get_byte_offset ();
1602 const state_machine::state_t
1603 state
= smap
->get_state (offset
, *ext_state
);
1605 /* Handle implicit cast to sizetype. */
1606 tree effective_type
= offset
->get_type ();
1607 if (const svalue
*cast
= offset
->maybe_undo_cast ())
1608 if (cast
->get_type ())
1609 effective_type
= cast
->get_type ();
1611 if (taint_sm
.get_taint (state
, effective_type
, &b
))
1613 tree arg
= get_representative_tree (offset
);
1614 ctxt
->warn (make_unique
<tainted_offset
> (taint_sm
, arg
, b
,
1622 const sized_region
*sized_reg
1623 = (const sized_region
*)iter_region
;
1624 const svalue
*size_sval
= sized_reg
->get_byte_size_sval (m_mgr
);
1625 const state_machine::state_t
1626 state
= smap
->get_state (size_sval
, *ext_state
);
1629 if (taint_sm
.get_taint (state
, size_sval
->get_type (), &b
))
1631 tree arg
= get_representative_tree (size_sval
);
1632 ctxt
->warn (make_unique
<tainted_size
> (taint_sm
, arg
, b
));
1638 iter_region
= iter_region
->get_parent_region ();
1642 /* Complain to CTXT about a tainted allocation size if SIZE_IN_BYTES is
1643 under an attacker's control (due to taint), where the allocation
1644 is happening within MEM_SPACE. */
1647 region_model::check_dynamic_size_for_taint (enum memory_space mem_space
,
1648 const svalue
*size_in_bytes
,
1649 region_model_context
*ctxt
) const
1651 gcc_assert (size_in_bytes
);
1654 LOG_SCOPE (ctxt
->get_logger ());
1657 const state_machine
*sm
;
1659 if (!ctxt
->get_taint_map (&smap
, &sm
, &sm_idx
))
1665 const taint_state_machine
&taint_sm
= (const taint_state_machine
&)*sm
;
1667 const extrinsic_state
*ext_state
= ctxt
->get_ext_state ();
1671 const state_machine::state_t
1672 state
= smap
->get_state (size_in_bytes
, *ext_state
);
1675 if (taint_sm
.get_taint (state
, size_in_bytes
->get_type (), &b
))
1677 tree arg
= get_representative_tree (size_in_bytes
);
1678 ctxt
->warn (make_unique
<tainted_allocation_size
>
1679 (taint_sm
, arg
, size_in_bytes
, b
, mem_space
));
1683 /* Mark SVAL as TAINTED. CTXT must be non-NULL. */
1686 region_model::mark_as_tainted (const svalue
*sval
,
1687 region_model_context
*ctxt
)
1693 const state_machine
*sm
;
1695 if (!ctxt
->get_taint_map (&smap
, &sm
, &sm_idx
))
1701 const taint_state_machine
&taint_sm
= (const taint_state_machine
&)*sm
;
1703 const extrinsic_state
*ext_state
= ctxt
->get_ext_state ();
1707 smap
->set_state (this, sval
, taint_sm
.m_tainted
, NULL
, *ext_state
);
1710 /* Return true if SVAL could possibly be attacker-controlled. */
1713 region_model_context::possibly_tainted_p (const svalue
*sval
)
1716 const state_machine
*sm
;
1718 if (!get_taint_map (&smap
, &sm
, &sm_idx
))
1721 const taint_state_machine
&taint_sm
= (const taint_state_machine
&)*sm
;
1723 const extrinsic_state
*ext_state
= get_ext_state ();
1727 const state_machine::state_t state
= smap
->get_state (sval
, *ext_state
);
1730 return (state
== taint_sm
.m_tainted
1731 || state
== taint_sm
.m_has_lb
1732 || state
== taint_sm
.m_has_ub
);
1737 #endif /* #if ENABLE_ANALYZER */