1 /* An experimental state machine, for tracking "taint": unsanitized uses
2 of data potentially under an attacker's control.
4 Copyright (C) 2019-2023 Free Software Foundation, Inc.
5 Contributed by David Malcolm <dmalcolm@redhat.com>.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
14 GCC is distributed in the hope that it will be useful, but
15 WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
24 #define INCLUDE_MEMORY
26 #include "coretypes.h"
27 #include "make-unique.h"
30 #include "basic-block.h"
33 #include "diagnostic-path.h"
34 #include "diagnostic-metadata.h"
35 #include "analyzer/analyzer.h"
36 #include "analyzer/analyzer-logging.h"
37 #include "gimple-iterator.h"
38 #include "ordered-hash-map.h"
42 #include "stringpool.h"
44 #include "analyzer/supergraph.h"
45 #include "analyzer/call-string.h"
46 #include "analyzer/program-point.h"
47 #include "analyzer/store.h"
48 #include "analyzer/region-model.h"
49 #include "analyzer/sm.h"
50 #include "analyzer/program-state.h"
51 #include "analyzer/pending-diagnostic.h"
52 #include "analyzer/constraint-manager.h"
60 /* An enum for describing tainted values. */
64 /* This tainted value has no upper or lower bound. */
67 /* This tainted value has an upper bound but not lower bound. */
70 /* This tainted value has a lower bound but no upper bound. */
74 /* An experimental state machine, for tracking "taint": unsanitized uses
75 of data potentially under an attacker's control. */
77 class taint_state_machine
: public state_machine
80 taint_state_machine (logger
*logger
);
82 bool inherited_state_p () const final override
{ return true; }
84 state_t
alt_get_inherited_state (const sm_state_map
&map
,
86 const extrinsic_state
&ext_state
)
89 bool on_stmt (sm_context
*sm_ctxt
,
90 const supernode
*node
,
91 const gimple
*stmt
) const final override
;
93 void on_condition (sm_context
*sm_ctxt
,
94 const supernode
*node
,
98 const svalue
*rhs
) const final override
;
99 void on_bounded_ranges (sm_context
*sm_ctxt
,
100 const supernode
*node
,
103 const bounded_ranges
&ranges
) const final override
;
105 bool can_purge_p (state_t s
) const final override
;
107 bool get_taint (state_t s
, tree type
, enum bounds
*out
) const;
109 state_t
combine_states (state_t s0
, state_t s1
) const;
112 void check_control_flow_arg_for_taint (sm_context
*sm_ctxt
,
116 void check_for_tainted_size_arg (sm_context
*sm_ctxt
,
117 const supernode
*node
,
119 tree callee_fndecl
) const;
120 void check_for_tainted_divisor (sm_context
*sm_ctxt
,
121 const supernode
*node
,
122 const gassign
*assign
) const;
125 /* State for a "tainted" value: unsanitized data potentially under an
126 attacker's control. */
129 /* State for a "tainted" value that has a lower bound. */
132 /* State for a "tainted" value that has an upper bound. */
135 /* Stop state, for a value we don't want to track any more. */
138 /* Global state, for when the last condition had tainted arguments. */
139 state_t m_tainted_control_flow
;
142 /* Class for diagnostics relating to taint_state_machine. */
144 class taint_diagnostic
: public pending_diagnostic
147 taint_diagnostic (const taint_state_machine
&sm
, tree arg
,
148 enum bounds has_bounds
)
149 : m_sm (sm
), m_arg (arg
), m_has_bounds (has_bounds
)
152 bool subclass_equal_p (const pending_diagnostic
&base_other
) const override
154 const taint_diagnostic
&other
= (const taint_diagnostic
&)base_other
;
155 return (same_tree_p (m_arg
, other
.m_arg
)
156 && m_has_bounds
== other
.m_has_bounds
);
159 label_text
describe_state_change (const evdesc::state_change
&change
) override
161 if (change
.m_new_state
== m_sm
.m_tainted
)
164 return change
.formatted_print ("%qE has an unchecked value here"
166 change
.m_expr
, change
.m_origin
);
168 return change
.formatted_print ("%qE gets an unchecked value here",
171 else if (change
.m_new_state
== m_sm
.m_has_lb
)
172 return change
.formatted_print ("%qE has its lower bound checked here",
174 else if (change
.m_new_state
== m_sm
.m_has_ub
)
175 return change
.formatted_print ("%qE has its upper bound checked here",
177 return label_text ();
180 diagnostic_event::meaning
181 get_meaning_for_state_change (const evdesc::state_change
&change
)
184 if (change
.m_new_state
== m_sm
.m_tainted
)
185 return diagnostic_event::meaning (diagnostic_event::VERB_acquire
,
186 diagnostic_event::NOUN_taint
);
187 return diagnostic_event::meaning ();
191 const taint_state_machine
&m_sm
;
193 enum bounds m_has_bounds
;
196 /* Concrete taint_diagnostic subclass for reporting attacker-controlled
199 class tainted_array_index
: public taint_diagnostic
202 tainted_array_index (const taint_state_machine
&sm
, tree arg
,
203 enum bounds has_bounds
)
204 : taint_diagnostic (sm
, arg
, has_bounds
)
207 const char *get_kind () const final override
{ return "tainted_array_index"; }
209 int get_controlling_option () const final override
211 return OPT_Wanalyzer_tainted_array_index
;
214 bool emit (rich_location
*rich_loc
, logger
*) final override
216 diagnostic_metadata m
;
217 /* CWE-129: "Improper Validation of Array Index". */
220 switch (m_has_bounds
)
225 return warning_meta (rich_loc
, m
, get_controlling_option (),
226 "use of attacker-controlled value %qE"
227 " in array lookup without bounds checking",
231 return warning_meta (rich_loc
, m
, get_controlling_option (),
232 "use of attacker-controlled value %qE"
233 " in array lookup without checking for negative",
237 return warning_meta (rich_loc
, m
, get_controlling_option (),
238 "use of attacker-controlled value %qE"
239 " in array lookup without upper-bounds checking",
244 switch (m_has_bounds
)
249 return warning_meta (rich_loc
, m
, get_controlling_option (),
250 "use of attacker-controlled value"
251 " in array lookup without bounds checking");
254 return warning_meta (rich_loc
, m
, get_controlling_option (),
255 "use of attacker-controlled value"
256 " in array lookup without checking for"
260 return warning_meta (rich_loc
, m
, get_controlling_option (),
261 "use of attacker-controlled value"
262 " in array lookup without upper-bounds"
268 label_text
describe_final_event (const evdesc::final_event
&ev
) final override
271 switch (m_has_bounds
)
276 return ev
.formatted_print
277 ("use of attacker-controlled value %qE in array lookup"
278 " without bounds checking",
281 return ev
.formatted_print
282 ("use of attacker-controlled value %qE"
283 " in array lookup without checking for negative",
286 return ev
.formatted_print
287 ("use of attacker-controlled value %qE"
288 " in array lookup without upper-bounds checking",
292 switch (m_has_bounds
)
297 return ev
.formatted_print
298 ("use of attacker-controlled value in array lookup"
299 " without bounds checking");
301 return ev
.formatted_print
302 ("use of attacker-controlled value"
303 " in array lookup without checking for negative");
305 return ev
.formatted_print
306 ("use of attacker-controlled value"
307 " in array lookup without upper-bounds checking");
312 /* Concrete taint_diagnostic subclass for reporting attacker-controlled
315 class tainted_offset
: public taint_diagnostic
318 tainted_offset (const taint_state_machine
&sm
, tree arg
,
319 enum bounds has_bounds
)
320 : taint_diagnostic (sm
, arg
, has_bounds
)
323 const char *get_kind () const final override
{ return "tainted_offset"; }
325 int get_controlling_option () const final override
327 return OPT_Wanalyzer_tainted_offset
;
330 bool emit (rich_location
*rich_loc
, logger
*) final override
332 diagnostic_metadata m
;
333 /* CWE-823: "Use of Out-of-range Pointer Offset". */
336 switch (m_has_bounds
)
341 return warning_meta (rich_loc
, m
, get_controlling_option (),
342 "use of attacker-controlled value %qE as offset"
343 " without bounds checking",
347 return warning_meta (rich_loc
, m
, get_controlling_option (),
348 "use of attacker-controlled value %qE as offset"
349 " without lower-bounds checking",
353 return warning_meta (rich_loc
, m
, get_controlling_option (),
354 "use of attacker-controlled value %qE as offset"
355 " without upper-bounds checking",
360 switch (m_has_bounds
)
365 return warning_meta (rich_loc
, m
, get_controlling_option (),
366 "use of attacker-controlled value as offset"
367 " without bounds checking");
370 return warning_meta (rich_loc
, m
, get_controlling_option (),
371 "use of attacker-controlled value as offset"
372 " without lower-bounds checking");
375 return warning_meta (rich_loc
, m
, get_controlling_option (),
376 "use of attacker-controlled value as offset"
377 " without upper-bounds checking");
382 label_text
describe_final_event (const evdesc::final_event
&ev
) final override
385 switch (m_has_bounds
)
390 return ev
.formatted_print ("use of attacker-controlled value %qE"
391 " as offset without bounds checking",
394 return ev
.formatted_print ("use of attacker-controlled value %qE"
395 " as offset without lower-bounds checking",
398 return ev
.formatted_print ("use of attacker-controlled value %qE"
399 " as offset without upper-bounds checking",
403 switch (m_has_bounds
)
408 return ev
.formatted_print ("use of attacker-controlled value"
409 " as offset without bounds checking");
411 return ev
.formatted_print ("use of attacker-controlled value"
412 " as offset without lower-bounds"
415 return ev
.formatted_print ("use of attacker-controlled value"
416 " as offset without upper-bounds"
422 /* Concrete taint_diagnostic subclass for reporting attacker-controlled
425 class tainted_size
: public taint_diagnostic
428 tainted_size (const taint_state_machine
&sm
, tree arg
,
429 enum bounds has_bounds
)
430 : taint_diagnostic (sm
, arg
, has_bounds
)
433 const char *get_kind () const override
{ return "tainted_size"; }
435 int get_controlling_option () const final override
437 return OPT_Wanalyzer_tainted_size
;
440 bool emit (rich_location
*rich_loc
, logger
*) override
442 /* "CWE-129: Improper Validation of Array Index". */
443 diagnostic_metadata m
;
446 switch (m_has_bounds
)
451 return warning_meta (rich_loc
, m
, get_controlling_option (),
452 "use of attacker-controlled value %qE as size"
453 " without bounds checking",
457 return warning_meta (rich_loc
, m
, get_controlling_option (),
458 "use of attacker-controlled value %qE as size"
459 " without lower-bounds checking",
463 return warning_meta (rich_loc
, m
, get_controlling_option (),
464 "use of attacker-controlled value %qE as size"
465 " without upper-bounds checking",
470 switch (m_has_bounds
)
475 return warning_meta (rich_loc
, m
, get_controlling_option (),
476 "use of attacker-controlled value as size"
477 " without bounds checking");
480 return warning_meta (rich_loc
, m
, get_controlling_option (),
481 "use of attacker-controlled value as size"
482 " without lower-bounds checking");
485 return warning_meta (rich_loc
, m
, get_controlling_option (),
486 "use of attacker-controlled value as size"
487 " without upper-bounds checking");
492 label_text
describe_final_event (const evdesc::final_event
&ev
) final override
495 switch (m_has_bounds
)
500 return ev
.formatted_print ("use of attacker-controlled value %qE"
501 " as size without bounds checking",
504 return ev
.formatted_print ("use of attacker-controlled value %qE"
505 " as size without lower-bounds checking",
508 return ev
.formatted_print ("use of attacker-controlled value %qE"
509 " as size without upper-bounds checking",
513 switch (m_has_bounds
)
518 return ev
.formatted_print ("use of attacker-controlled value"
519 " as size without bounds checking");
521 return ev
.formatted_print ("use of attacker-controlled value"
522 " as size without lower-bounds checking");
524 return ev
.formatted_print ("use of attacker-controlled value"
525 " as size without upper-bounds checking");
530 /* Subclass of tainted_size for reporting on tainted size values
531 passed to an external function annotated with attribute "access". */
533 class tainted_access_attrib_size
: public tainted_size
536 tainted_access_attrib_size (const taint_state_machine
&sm
, tree arg
,
537 enum bounds has_bounds
, tree callee_fndecl
,
538 unsigned size_argno
, const char *access_str
)
539 : tainted_size (sm
, arg
, has_bounds
),
540 m_callee_fndecl (callee_fndecl
),
541 m_size_argno (size_argno
), m_access_str (access_str
)
545 const char *get_kind () const override
547 return "tainted_access_attrib_size";
550 bool emit (rich_location
*rich_loc
, logger
*logger
) final override
552 bool warned
= tainted_size::emit (rich_loc
, logger
);
555 inform (DECL_SOURCE_LOCATION (m_callee_fndecl
),
556 "parameter %i of %qD marked as a size via attribute %qs",
557 m_size_argno
+ 1, m_callee_fndecl
, m_access_str
);
563 tree m_callee_fndecl
;
564 unsigned m_size_argno
;
565 const char *m_access_str
;
568 /* Concrete taint_diagnostic subclass for reporting attacker-controlled
569 divisor (so that an attacker can trigger a divide by zero). */
571 class tainted_divisor
: public taint_diagnostic
574 tainted_divisor (const taint_state_machine
&sm
, tree arg
,
575 enum bounds has_bounds
)
576 : taint_diagnostic (sm
, arg
, has_bounds
)
579 const char *get_kind () const final override
{ return "tainted_divisor"; }
581 int get_controlling_option () const final override
583 return OPT_Wanalyzer_tainted_divisor
;
586 bool emit (rich_location
*rich_loc
, logger
*) final override
588 diagnostic_metadata m
;
589 /* CWE-369: "Divide By Zero". */
592 return warning_meta (rich_loc
, m
, get_controlling_option (),
593 "use of attacker-controlled value %qE as divisor"
594 " without checking for zero",
597 return warning_meta (rich_loc
, m
, get_controlling_option (),
598 "use of attacker-controlled value as divisor"
599 " without checking for zero");
602 label_text
describe_final_event (const evdesc::final_event
&ev
) final override
605 return ev
.formatted_print
606 ("use of attacker-controlled value %qE as divisor"
607 " without checking for zero",
610 return ev
.formatted_print
611 ("use of attacker-controlled value as divisor"
612 " without checking for zero");
616 /* Concrete taint_diagnostic subclass for reporting attacker-controlled
617 size of a dynamic allocation. */
619 class tainted_allocation_size
: public taint_diagnostic
622 tainted_allocation_size (const taint_state_machine
&sm
, tree arg
,
623 enum bounds has_bounds
, enum memory_space mem_space
)
624 : taint_diagnostic (sm
, arg
, has_bounds
),
625 m_mem_space (mem_space
)
629 const char *get_kind () const final override
631 return "tainted_allocation_size";
634 bool subclass_equal_p (const pending_diagnostic
&base_other
) const override
636 if (!taint_diagnostic::subclass_equal_p (base_other
))
638 const tainted_allocation_size
&other
639 = (const tainted_allocation_size
&)base_other
;
640 return m_mem_space
== other
.m_mem_space
;
643 int get_controlling_option () const final override
645 return OPT_Wanalyzer_tainted_allocation_size
;
648 bool emit (rich_location
*rich_loc
, logger
*) final override
650 diagnostic_metadata m
;
651 /* "CWE-789: Memory Allocation with Excessive Size Value". */
656 switch (m_has_bounds
)
661 warned
= warning_meta (rich_loc
, m
, get_controlling_option (),
662 "use of attacker-controlled value %qE as"
663 " allocation size without bounds checking",
667 warned
= warning_meta (rich_loc
, m
, get_controlling_option (),
668 "use of attacker-controlled value %qE as"
669 " allocation size without"
670 " lower-bounds checking",
674 warned
= warning_meta (rich_loc
, m
, get_controlling_option (),
675 "use of attacker-controlled value %qE as"
676 " allocation size without"
677 " upper-bounds checking",
682 switch (m_has_bounds
)
687 warned
= warning_meta (rich_loc
, m
, get_controlling_option (),
688 "use of attacker-controlled value as"
689 " allocation size without bounds"
693 warned
= warning_meta (rich_loc
, m
, get_controlling_option (),
694 "use of attacker-controlled value as"
695 " allocation size without"
696 " lower-bounds checking");
699 warned
= warning_meta (rich_loc
, m
, get_controlling_option (),
700 "use of attacker-controlled value as"
701 " allocation size without"
702 " upper-bounds checking");
707 location_t loc
= rich_loc
->get_loc ();
713 inform (loc
, "stack-based allocation");
716 inform (loc
, "heap-based allocation");
723 label_text
describe_final_event (const evdesc::final_event
&ev
) final override
726 switch (m_has_bounds
)
731 return ev
.formatted_print
732 ("use of attacker-controlled value %qE as allocation size"
733 " without bounds checking",
736 return ev
.formatted_print
737 ("use of attacker-controlled value %qE as allocation size"
738 " without lower-bounds checking",
741 return ev
.formatted_print
742 ("use of attacker-controlled value %qE as allocation size"
743 " without upper-bounds checking",
747 switch (m_has_bounds
)
752 return ev
.formatted_print
753 ("use of attacker-controlled value as allocation size"
754 " without bounds checking");
756 return ev
.formatted_print
757 ("use of attacker-controlled value as allocation size"
758 " without lower-bounds checking");
760 return ev
.formatted_print
761 ("use of attacker-controlled value as allocation size"
762 " without upper-bounds checking");
767 enum memory_space m_mem_space
;
770 /* Concrete taint_diagnostic subclass for reporting attacker-controlled
771 value being used as part of the condition of an assertion. */
773 class tainted_assertion
: public taint_diagnostic
776 tainted_assertion (const taint_state_machine
&sm
, tree arg
,
777 tree assert_failure_fndecl
)
778 : taint_diagnostic (sm
, arg
, BOUNDS_NONE
),
779 m_assert_failure_fndecl (assert_failure_fndecl
)
781 gcc_assert (m_assert_failure_fndecl
);
784 const char *get_kind () const final override
786 return "tainted_assertion";
789 bool subclass_equal_p (const pending_diagnostic
&base_other
) const override
791 if (!taint_diagnostic::subclass_equal_p (base_other
))
793 const tainted_assertion
&other
794 = (const tainted_assertion
&)base_other
;
795 return m_assert_failure_fndecl
== other
.m_assert_failure_fndecl
;
798 int get_controlling_option () const final override
800 return OPT_Wanalyzer_tainted_assertion
;
803 bool emit (rich_location
*rich_loc
, logger
*) final override
805 diagnostic_metadata m
;
806 /* "CWE-617: Reachable Assertion". */
809 return warning_meta (rich_loc
, m
, get_controlling_option (),
810 "use of attacked-controlled value in"
811 " condition for assertion");
814 location_t
fixup_location (location_t loc
,
815 bool primary
) const final override
818 /* For the primary location we want to avoid being in e.g. the
819 <assert.h> system header, since this would suppress the
821 return expansion_point_location_if_in_system_header (loc
);
822 else if (in_system_header_at (loc
))
823 /* For events, we want to show the implemenation of the assert
824 macro when we're describing them. */
825 return linemap_resolve_location (line_table
, loc
,
826 LRK_SPELLING_LOCATION
,
829 return pending_diagnostic::fixup_location (loc
, primary
);
832 label_text
describe_state_change (const evdesc::state_change
&change
) override
834 if (change
.m_new_state
== m_sm
.m_tainted_control_flow
)
835 return change
.formatted_print
836 ("use of attacker-controlled value for control flow");
837 return taint_diagnostic::describe_state_change (change
);
840 label_text
describe_final_event (const evdesc::final_event
&ev
) final override
842 if (mention_noreturn_attribute_p ())
843 return ev
.formatted_print
844 ("treating %qE as an assertion failure handler"
845 " due to %<__attribute__((__noreturn__))%>",
846 m_assert_failure_fndecl
);
848 return ev
.formatted_print
849 ("treating %qE as an assertion failure handler",
850 m_assert_failure_fndecl
);
854 bool mention_noreturn_attribute_p () const
856 if (fndecl_built_in_p (m_assert_failure_fndecl
, BUILT_IN_UNREACHABLE
))
861 tree m_assert_failure_fndecl
;
864 /* taint_state_machine's ctor. */
866 taint_state_machine::taint_state_machine (logger
*logger
)
867 : state_machine ("taint", logger
)
869 m_tainted
= add_state ("tainted");
870 m_has_lb
= add_state ("has_lb");
871 m_has_ub
= add_state ("has_ub");
872 m_stop
= add_state ("stop");
873 m_tainted_control_flow
= add_state ("tainted-control-flow");
876 state_machine::state_t
877 taint_state_machine::alt_get_inherited_state (const sm_state_map
&map
,
879 const extrinsic_state
&ext_state
)
882 switch (sval
->get_kind ())
888 const unaryop_svalue
*unaryop_sval
889 = as_a
<const unaryop_svalue
*> (sval
);
890 enum tree_code op
= unaryop_sval
->get_op ();
891 const svalue
*arg
= unaryop_sval
->get_arg ();
896 state_t arg_state
= map
.get_state (arg
, ext_state
);
906 const binop_svalue
*binop_sval
= as_a
<const binop_svalue
*> (sval
);
907 enum tree_code op
= binop_sval
->get_op ();
908 const svalue
*arg0
= binop_sval
->get_arg0 ();
909 const svalue
*arg1
= binop_sval
->get_arg1 ();
926 case POINTER_PLUS_EXPR
:
930 state_t arg0_state
= map
.get_state (arg0
, ext_state
);
931 state_t arg1_state
= map
.get_state (arg1
, ext_state
);
932 return combine_states (arg0_state
, arg1_state
);
946 /* Return true iff FNDECL should be considered to be an assertion failure
947 handler by -Wanalyzer-tainted-assertion. */
950 is_assertion_failure_handler_p (tree fndecl
)
953 if (TREE_THIS_VOLATILE (fndecl
))
959 /* Implementation of state_machine::on_stmt vfunc for taint_state_machine. */
962 taint_state_machine::on_stmt (sm_context
*sm_ctxt
,
963 const supernode
*node
,
964 const gimple
*stmt
) const
966 if (const gcall
*call
= dyn_cast
<const gcall
*> (stmt
))
967 if (tree callee_fndecl
= sm_ctxt
->get_fndecl_for_call (call
))
969 if (is_named_call_p (callee_fndecl
, "fread", call
, 4))
971 tree arg
= gimple_call_arg (call
, 0);
973 sm_ctxt
->on_transition (node
, stmt
, arg
, m_start
, m_tainted
);
975 /* Dereference an ADDR_EXPR. */
976 // TODO: should the engine do this?
977 if (TREE_CODE (arg
) == ADDR_EXPR
)
978 sm_ctxt
->on_transition (node
, stmt
, TREE_OPERAND (arg
, 0),
983 /* External function with "access" attribute. */
984 if (sm_ctxt
->unknown_side_effects_p ())
985 check_for_tainted_size_arg (sm_ctxt
, node
, call
, callee_fndecl
);
987 if (is_assertion_failure_handler_p (callee_fndecl
)
988 && sm_ctxt
->get_global_state () == m_tainted_control_flow
)
990 sm_ctxt
->warn (node
, call
, NULL_TREE
,
991 make_unique
<tainted_assertion
> (*this, NULL_TREE
,
995 // TODO: ...etc; many other sources of untrusted data
997 if (const gassign
*assign
= dyn_cast
<const gassign
*> (stmt
))
999 enum tree_code op
= gimple_assign_rhs_code (assign
);
1005 case TRUNC_DIV_EXPR
:
1007 case FLOOR_DIV_EXPR
:
1008 case ROUND_DIV_EXPR
:
1009 case TRUNC_MOD_EXPR
:
1011 case FLOOR_MOD_EXPR
:
1012 case ROUND_MOD_EXPR
:
1014 case EXACT_DIV_EXPR
:
1015 check_for_tainted_divisor (sm_ctxt
, node
, assign
);
1020 if (const gcond
*cond
= dyn_cast
<const gcond
*> (stmt
))
1022 /* Reset the state of "tainted-control-flow" before each
1023 control flow statement, so that only the last one before
1024 an assertion-failure-handler counts. */
1025 sm_ctxt
->set_global_state (m_start
);
1026 check_control_flow_arg_for_taint (sm_ctxt
, cond
, gimple_cond_lhs (cond
));
1027 check_control_flow_arg_for_taint (sm_ctxt
, cond
, gimple_cond_rhs (cond
));
1030 if (const gswitch
*switch_
= dyn_cast
<const gswitch
*> (stmt
))
1032 /* Reset the state of "tainted-control-flow" before each
1033 control flow statement, so that only the last one before
1034 an assertion-failure-handler counts. */
1035 sm_ctxt
->set_global_state (m_start
);
1036 check_control_flow_arg_for_taint (sm_ctxt
, switch_
,
1037 gimple_switch_index (switch_
));
1043 /* If EXPR is tainted, mark this execution path with the
1044 "tainted-control-flow" global state, in case we're about
1045 to call an assertion-failure-handler. */
1048 taint_state_machine::check_control_flow_arg_for_taint (sm_context
*sm_ctxt
,
1052 const region_model
*old_model
= sm_ctxt
->get_old_region_model ();
1053 const svalue
*sval
= old_model
->get_rvalue (expr
, NULL
);
1054 state_t state
= sm_ctxt
->get_state (stmt
, sval
);
1056 if (get_taint (state
, TREE_TYPE (expr
), &b
))
1057 sm_ctxt
->set_global_state (m_tainted_control_flow
);
1060 /* Implementation of state_machine::on_condition vfunc for taint_state_machine.
1061 Potentially transition state 'tainted' to 'has_ub' or 'has_lb',
1062 and states 'has_ub' and 'has_lb' to 'stop'. */
1065 taint_state_machine::on_condition (sm_context
*sm_ctxt
,
1066 const supernode
*node
,
1070 const svalue
*rhs
) const
1083 /* (LHS >= RHS) or (LHS > RHS)
1084 LHS gains a lower bound
1085 RHS gains an upper bound. */
1086 sm_ctxt
->on_transition (node
, stmt
, lhs
, m_tainted
,
1088 sm_ctxt
->on_transition (node
, stmt
, lhs
, m_has_ub
,
1090 sm_ctxt
->on_transition (node
, stmt
, rhs
, m_tainted
,
1092 sm_ctxt
->on_transition (node
, stmt
, rhs
, m_has_lb
,
1099 /* Detect where build_range_check has optimized
1100 (c>=low) && (c<=high)
1102 (c-low>=0) && (c-low<=high-low)
1104 (unsigned)(c - low) <= (unsigned)(high-low). */
1105 if (const binop_svalue
*binop_sval
1106 = lhs
->dyn_cast_binop_svalue ())
1108 const svalue
*inner_lhs
= binop_sval
->get_arg0 ();
1109 enum tree_code inner_op
= binop_sval
->get_op ();
1110 const svalue
*inner_rhs
= binop_sval
->get_arg1 ();
1111 if (const svalue
*before_cast
= inner_lhs
->maybe_undo_cast ())
1112 inner_lhs
= before_cast
;
1113 if (tree outer_rhs_cst
= rhs
->maybe_get_constant ())
1114 if (tree inner_rhs_cst
= inner_rhs
->maybe_get_constant ())
1115 if (inner_op
== PLUS_EXPR
1116 && TREE_CODE (inner_rhs_cst
) == INTEGER_CST
1117 && TREE_CODE (outer_rhs_cst
) == INTEGER_CST
1118 && TYPE_UNSIGNED (TREE_TYPE (inner_rhs_cst
))
1119 && TYPE_UNSIGNED (TREE_TYPE (outer_rhs_cst
)))
1122 (unsigned)(INNER_LHS + CST_A) </<= UNSIGNED_CST_B
1123 and thus an optimized test of INNER_LHS (before any
1124 cast to unsigned) against a range.
1125 Transition any of the tainted states to the stop state.
1126 We have to special-case this here rather than in
1127 region_model::on_condition since we can't apply
1128 both conditions simultaneously (we'd have a transition
1129 from the old state to has_lb, then a transition from
1130 the old state *again* to has_ub). */
1132 = sm_ctxt
->get_state (stmt
, inner_lhs
);
1133 if (old_state
== m_tainted
1134 || old_state
== m_has_lb
1135 || old_state
== m_has_ub
)
1136 sm_ctxt
->set_next_state (stmt
, inner_lhs
, m_stop
);
1141 /* (LHS <= RHS) or (LHS < RHS)
1142 LHS gains an upper bound
1143 RHS gains a lower bound. */
1144 sm_ctxt
->on_transition (node
, stmt
, lhs
, m_tainted
,
1146 sm_ctxt
->on_transition (node
, stmt
, lhs
, m_has_lb
,
1148 sm_ctxt
->on_transition (node
, stmt
, rhs
, m_tainted
,
1150 sm_ctxt
->on_transition (node
, stmt
, rhs
, m_has_ub
,
1159 /* Implementation of state_machine::on_bounded_ranges vfunc for
1160 taint_state_machine, for handling switch statement cases.
1161 Potentially transition state 'tainted' to 'has_ub' or 'has_lb',
1162 and states 'has_ub' and 'has_lb' to 'stop'. */
1165 taint_state_machine::on_bounded_ranges (sm_context
*sm_ctxt
,
1169 const bounded_ranges
&ranges
) const
1171 gcc_assert (!ranges
.empty_p ());
1172 gcc_assert (ranges
.get_count () > 0);
1174 /* We have one or more ranges; this could be a "default:", or one or
1175 more single or range cases.
1177 Look at the overall endpoints to see if the ranges impose any lower
1178 bounds or upper bounds beyond those of the underlying numeric type. */
1180 tree lowest_bound
= ranges
.get_range (0).m_lower
;
1181 tree highest_bound
= ranges
.get_range (ranges
.get_count () - 1).m_upper
;
1182 gcc_assert (lowest_bound
);
1183 gcc_assert (highest_bound
);
1186 = (lowest_bound
!= TYPE_MIN_VALUE (TREE_TYPE (lowest_bound
)));
1188 = (highest_bound
!= TYPE_MAX_VALUE (TREE_TYPE (highest_bound
)));
1190 if (!ranges_have_lb
&& !ranges_have_ub
)
1193 /* We have new bounds from the ranges; combine them with any
1194 existing bounds on SVAL. */
1195 state_t old_state
= sm_ctxt
->get_state (stmt
, &sval
);
1196 if (old_state
== m_tainted
)
1198 if (ranges_have_lb
&& ranges_have_ub
)
1199 sm_ctxt
->set_next_state (stmt
, &sval
, m_stop
);
1200 else if (ranges_have_lb
)
1201 sm_ctxt
->set_next_state (stmt
, &sval
, m_has_lb
);
1202 else if (ranges_have_ub
)
1203 sm_ctxt
->set_next_state (stmt
, &sval
, m_has_ub
);
1205 else if (old_state
== m_has_ub
&& ranges_have_lb
)
1206 sm_ctxt
->set_next_state (stmt
, &sval
, m_stop
);
1207 else if (old_state
== m_has_lb
&& ranges_have_ub
)
1208 sm_ctxt
->set_next_state (stmt
, &sval
, m_stop
);
1212 taint_state_machine::can_purge_p (state_t s ATTRIBUTE_UNUSED
) const
1217 /* If STATE is a tainted state, write the bounds to *OUT and return true.
1218 Otherwise return false.
1219 Use the signedness of TYPE to determine if "has_ub" is tainted. */
1222 taint_state_machine::get_taint (state_t state
, tree type
,
1223 enum bounds
*out
) const
1225 /* Unsigned types have an implicit lower bound. */
1226 bool is_unsigned
= false;
1228 if (INTEGRAL_TYPE_P (type
))
1229 is_unsigned
= TYPE_UNSIGNED (type
);
1231 /* Can't use a switch as the states are non-const. */
1232 if (state
== m_tainted
)
1234 *out
= is_unsigned
? BOUNDS_LOWER
: BOUNDS_NONE
;
1237 else if (state
== m_has_lb
)
1239 *out
= BOUNDS_LOWER
;
1242 else if (state
== m_has_ub
&& !is_unsigned
)
1244 /* Missing lower bound. */
1245 *out
= BOUNDS_UPPER
;
1251 /* Find the most tainted state of S0 and S1. */
1253 state_machine::state_t
1254 taint_state_machine::combine_states (state_t s0
, state_t s1
) const
1260 if (s0
== m_tainted
|| s1
== m_tainted
)
1270 /* The only remaining combinations are one of has_ub and has_lb
1271 (in either order). */
1272 gcc_assert ((s0
== m_has_lb
&& s1
== m_has_ub
)
1273 || (s0
== m_has_ub
&& s1
== m_has_lb
));
1277 /* Check for calls to external functions marked with
1278 __attribute__((access)) with a size-index: complain about
1279 tainted values passed as a size to such a function. */
1282 taint_state_machine::check_for_tainted_size_arg (sm_context
*sm_ctxt
,
1283 const supernode
*node
,
1285 tree callee_fndecl
) const
1287 tree fntype
= TREE_TYPE (callee_fndecl
);
1291 if (!TYPE_ATTRIBUTES (fntype
))
1294 /* Initialize a map of attribute access specifications for arguments
1295 to the function call. */
1297 init_attr_rdwr_indices (&rdwr_idx
, TYPE_ATTRIBUTES (fntype
));
1301 for (tree iter
= TYPE_ARG_TYPES (fntype
); iter
;
1302 iter
= TREE_CHAIN (iter
), ++argno
)
1304 const attr_access
* access
= rdwr_idx
.get (argno
);
1308 /* Ignore any duplicate entry in the map for the size argument. */
1309 if (access
->ptrarg
!= argno
)
1312 if (access
->sizarg
== UINT_MAX
)
1315 tree size_arg
= gimple_call_arg (call
, access
->sizarg
);
1317 state_t state
= sm_ctxt
->get_state (call
, size_arg
);
1319 if (get_taint (state
, TREE_TYPE (size_arg
), &b
))
1321 const char* const access_str
=
1322 TREE_STRING_POINTER (access
->to_external_string ());
1323 tree diag_size
= sm_ctxt
->get_diagnostic_tree (size_arg
);
1324 sm_ctxt
->warn (node
, call
, size_arg
,
1325 make_unique
<tainted_access_attrib_size
>
1326 (*this, diag_size
, b
,
1334 /* Complain if ASSIGN (a division operation) has a tainted divisor
1335 that could be zero. */
1338 taint_state_machine::check_for_tainted_divisor (sm_context
*sm_ctxt
,
1339 const supernode
*node
,
1340 const gassign
*assign
) const
1342 const region_model
*old_model
= sm_ctxt
->get_old_region_model ();
1346 tree divisor_expr
= gimple_assign_rhs2 (assign
);;
1348 /* Until we track conditions on floating point values, we can't check to
1349 see if they've been checked against zero. */
1350 if (!INTEGRAL_TYPE_P (TREE_TYPE (divisor_expr
)))
1353 const svalue
*divisor_sval
= old_model
->get_rvalue (divisor_expr
, NULL
);
1355 state_t state
= sm_ctxt
->get_state (assign
, divisor_sval
);
1357 if (get_taint (state
, TREE_TYPE (divisor_expr
), &b
))
1359 const svalue
*zero_sval
1360 = old_model
->get_manager ()->get_or_create_int_cst
1361 (TREE_TYPE (divisor_expr
), 0);
1363 = old_model
->eval_condition (divisor_sval
, NE_EXPR
, zero_sval
);
1365 /* The divisor is known to not equal 0: don't warn. */
1368 tree diag_divisor
= sm_ctxt
->get_diagnostic_tree (divisor_expr
);
1369 sm_ctxt
->warn (node
, assign
, divisor_expr
,
1370 make_unique
<tainted_divisor
> (*this, diag_divisor
, b
));
1371 sm_ctxt
->set_next_state (assign
, divisor_sval
, m_stop
);
1375 } // anonymous namespace
1377 /* Internal interface to this file. */
1380 make_taint_state_machine (logger
*logger
)
1382 return new taint_state_machine (logger
);
1385 /* Complain to CTXT if accessing REG leads could lead to arbitrary
1386 memory access under an attacker's control (due to taint). */
1389 region_model::check_region_for_taint (const region
*reg
,
1390 enum access_direction
,
1391 region_model_context
*ctxt
) const
1396 LOG_SCOPE (ctxt
->get_logger ());
1399 const state_machine
*sm
;
1401 if (!ctxt
->get_taint_map (&smap
, &sm
, &sm_idx
))
1407 const taint_state_machine
&taint_sm
= (const taint_state_machine
&)*sm
;
1409 const extrinsic_state
*ext_state
= ctxt
->get_ext_state ();
1413 const region
*iter_region
= reg
;
1416 switch (iter_region
->get_kind ())
1423 const element_region
*element_reg
1424 = (const element_region
*)iter_region
;
1425 const svalue
*index
= element_reg
->get_index ();
1426 const state_machine::state_t
1427 state
= smap
->get_state (index
, *ext_state
);
1430 if (taint_sm
.get_taint (state
, index
->get_type (), &b
))
1432 tree arg
= get_representative_tree (index
);
1433 ctxt
->warn (make_unique
<tainted_array_index
> (taint_sm
, arg
, b
));
1440 const offset_region
*offset_reg
1441 = (const offset_region
*)iter_region
;
1442 const svalue
*offset
= offset_reg
->get_byte_offset ();
1443 const state_machine::state_t
1444 state
= smap
->get_state (offset
, *ext_state
);
1446 /* Handle implicit cast to sizetype. */
1447 tree effective_type
= offset
->get_type ();
1448 if (const svalue
*cast
= offset
->maybe_undo_cast ())
1449 if (cast
->get_type ())
1450 effective_type
= cast
->get_type ();
1452 if (taint_sm
.get_taint (state
, effective_type
, &b
))
1454 tree arg
= get_representative_tree (offset
);
1455 ctxt
->warn (make_unique
<tainted_offset
> (taint_sm
, arg
, b
));
1462 const cast_region
*cast_reg
1463 = as_a
<const cast_region
*> (iter_region
);
1464 iter_region
= cast_reg
->get_original_region ();
1470 const sized_region
*sized_reg
1471 = (const sized_region
*)iter_region
;
1472 const svalue
*size_sval
= sized_reg
->get_byte_size_sval (m_mgr
);
1473 const state_machine::state_t
1474 state
= smap
->get_state (size_sval
, *ext_state
);
1477 if (taint_sm
.get_taint (state
, size_sval
->get_type (), &b
))
1479 tree arg
= get_representative_tree (size_sval
);
1480 ctxt
->warn (make_unique
<tainted_size
> (taint_sm
, arg
, b
));
1486 iter_region
= iter_region
->get_parent_region ();
1490 /* Complain to CTXT about a tainted allocation size if SIZE_IN_BYTES is
1491 under an attacker's control (due to taint), where the allocation
1492 is happening within MEM_SPACE. */
1495 region_model::check_dynamic_size_for_taint (enum memory_space mem_space
,
1496 const svalue
*size_in_bytes
,
1497 region_model_context
*ctxt
) const
1499 gcc_assert (size_in_bytes
);
1502 LOG_SCOPE (ctxt
->get_logger ());
1505 const state_machine
*sm
;
1507 if (!ctxt
->get_taint_map (&smap
, &sm
, &sm_idx
))
1513 const taint_state_machine
&taint_sm
= (const taint_state_machine
&)*sm
;
1515 const extrinsic_state
*ext_state
= ctxt
->get_ext_state ();
1519 const state_machine::state_t
1520 state
= smap
->get_state (size_in_bytes
, *ext_state
);
1523 if (taint_sm
.get_taint (state
, size_in_bytes
->get_type (), &b
))
1525 tree arg
= get_representative_tree (size_in_bytes
);
1526 ctxt
->warn (make_unique
<tainted_allocation_size
>
1527 (taint_sm
, arg
, b
, mem_space
));
1531 /* Mark SVAL as TAINTED. CTXT must be non-NULL. */
1534 region_model::mark_as_tainted (const svalue
*sval
,
1535 region_model_context
*ctxt
)
1541 const state_machine
*sm
;
1543 if (!ctxt
->get_taint_map (&smap
, &sm
, &sm_idx
))
1549 const taint_state_machine
&taint_sm
= (const taint_state_machine
&)*sm
;
1551 const extrinsic_state
*ext_state
= ctxt
->get_ext_state ();
1555 smap
->set_state (this, sval
, taint_sm
.m_tainted
, NULL
, *ext_state
);
1558 /* Return true if SVAL could possibly be attacker-controlled. */
1561 region_model_context::possibly_tainted_p (const svalue
*sval
)
1564 const state_machine
*sm
;
1566 if (!get_taint_map (&smap
, &sm
, &sm_idx
))
1569 const taint_state_machine
&taint_sm
= (const taint_state_machine
&)*sm
;
1571 const extrinsic_state
*ext_state
= get_ext_state ();
1575 const state_machine::state_t state
= smap
->get_state (sval
, *ext_state
);
1578 return (state
== taint_sm
.m_tainted
1579 || state
== taint_sm
.m_has_lb
1580 || state
== taint_sm
.m_has_ub
);
1585 #endif /* #if ENABLE_ANALYZER */