Daily bump.
[official-gcc.git] / gcc / analyzer / sm-taint.cc
blob0a51a1fe2ea97026b08c8d19835fc6c6b7587043
1 /* An experimental state machine, for tracking "taint": unsanitized uses
2 of data potentially under an attacker's control.
4 Copyright (C) 2019-2021 Free Software Foundation, Inc.
5 Contributed by David Malcolm <dmalcolm@redhat.com>.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
12 any later version.
14 GCC is distributed in the hope that it will be useful, but
15 WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tree.h"
27 #include "function.h"
28 #include "basic-block.h"
29 #include "gimple.h"
30 #include "options.h"
31 #include "diagnostic-path.h"
32 #include "diagnostic-metadata.h"
33 #include "function.h"
34 #include "json.h"
35 #include "analyzer/analyzer.h"
36 #include "analyzer/analyzer-logging.h"
37 #include "gimple-iterator.h"
38 #include "tristate.h"
39 #include "selftest.h"
40 #include "ordered-hash-map.h"
41 #include "cgraph.h"
42 #include "cfg.h"
43 #include "digraph.h"
44 #include "analyzer/supergraph.h"
45 #include "analyzer/call-string.h"
46 #include "analyzer/program-point.h"
47 #include "analyzer/store.h"
48 #include "analyzer/region-model.h"
49 #include "analyzer/sm.h"
50 #include "analyzer/program-state.h"
51 #include "analyzer/pending-diagnostic.h"
53 #if ENABLE_ANALYZER
55 namespace ana {
57 namespace {
59 /* An enum for describing tainted values. */
61 enum bounds
63 /* This tainted value has no upper or lower bound. */
64 BOUNDS_NONE,
66 /* This tainted value has an upper bound but not lower bound. */
67 BOUNDS_UPPER,
69 /* This tainted value has a lower bound but no upper bound. */
70 BOUNDS_LOWER
73 /* An experimental state machine, for tracking "taint": unsanitized uses
74 of data potentially under an attacker's control. */
76 class taint_state_machine : public state_machine
78 public:
79 taint_state_machine (logger *logger);
81 bool inherited_state_p () const FINAL OVERRIDE { return true; }
83 state_t alt_get_inherited_state (const sm_state_map &map,
84 const svalue *sval,
85 const extrinsic_state &ext_state)
86 const FINAL OVERRIDE;
88 bool on_stmt (sm_context *sm_ctxt,
89 const supernode *node,
90 const gimple *stmt) const FINAL OVERRIDE;
92 void on_condition (sm_context *sm_ctxt,
93 const supernode *node,
94 const gimple *stmt,
95 const svalue *lhs,
96 enum tree_code op,
97 const svalue *rhs) const FINAL OVERRIDE;
99 bool can_purge_p (state_t s) const FINAL OVERRIDE;
101 bool get_taint (state_t s, tree type, enum bounds *out) const;
103 state_t combine_states (state_t s0, state_t s1) const;
105 /* State for a "tainted" value: unsanitized data potentially under an
106 attacker's control. */
107 state_t m_tainted;
109 /* State for a "tainted" value that has a lower bound. */
110 state_t m_has_lb;
112 /* State for a "tainted" value that has an upper bound. */
113 state_t m_has_ub;
115 /* Stop state, for a value we don't want to track any more. */
116 state_t m_stop;
119 /* Class for diagnostics relating to taint_state_machine. */
121 class taint_diagnostic : public pending_diagnostic
123 public:
124 taint_diagnostic (const taint_state_machine &sm, tree arg,
125 enum bounds has_bounds)
126 : m_sm (sm), m_arg (arg), m_has_bounds (has_bounds)
129 bool subclass_equal_p (const pending_diagnostic &base_other) const OVERRIDE
131 return same_tree_p (m_arg, ((const taint_diagnostic &)base_other).m_arg);
134 label_text describe_state_change (const evdesc::state_change &change)
135 FINAL OVERRIDE
137 if (change.m_new_state == m_sm.m_tainted)
139 if (change.m_origin)
140 return change.formatted_print ("%qE has an unchecked value here"
141 " (from %qE)",
142 change.m_expr, change.m_origin);
143 else
144 return change.formatted_print ("%qE gets an unchecked value here",
145 change.m_expr);
147 else if (change.m_new_state == m_sm.m_has_lb)
148 return change.formatted_print ("%qE has its lower bound checked here",
149 change.m_expr);
150 else if (change.m_new_state == m_sm.m_has_ub)
151 return change.formatted_print ("%qE has its upper bound checked here",
152 change.m_expr);
153 return label_text ();
155 protected:
156 const taint_state_machine &m_sm;
157 tree m_arg;
158 enum bounds m_has_bounds;
161 /* Concrete taint_diagnostic subclass for reporting attacker-controlled
162 array index. */
164 class tainted_array_index : public taint_diagnostic
166 public:
167 tainted_array_index (const taint_state_machine &sm, tree arg,
168 enum bounds has_bounds)
169 : taint_diagnostic (sm, arg, has_bounds)
172 const char *get_kind () const FINAL OVERRIDE { return "tainted_array_index"; }
174 bool emit (rich_location *rich_loc) FINAL OVERRIDE
176 diagnostic_metadata m;
177 /* CWE-129: "Improper Validation of Array Index". */
178 m.add_cwe (129);
179 switch (m_has_bounds)
181 default:
182 gcc_unreachable ();
183 case BOUNDS_NONE:
184 return warning_meta (rich_loc, m, OPT_Wanalyzer_tainted_array_index,
185 "use of attacker-controlled value %qE"
186 " in array lookup without bounds checking",
187 m_arg);
188 break;
189 case BOUNDS_UPPER:
190 return warning_meta (rich_loc, m, OPT_Wanalyzer_tainted_array_index,
191 "use of attacker-controlled value %qE"
192 " in array lookup without checking for negative",
193 m_arg);
194 break;
195 case BOUNDS_LOWER:
196 return warning_meta (rich_loc, m, OPT_Wanalyzer_tainted_array_index,
197 "use of attacker-controlled value %qE"
198 " in array lookup without upper-bounds checking",
199 m_arg);
200 break;
204 label_text describe_final_event (const evdesc::final_event &ev) FINAL OVERRIDE
206 switch (m_has_bounds)
208 default:
209 gcc_unreachable ();
210 case BOUNDS_NONE:
211 return ev.formatted_print
212 ("use of attacker-controlled value %qE in array lookup"
213 " without bounds checking",
214 m_arg);
215 case BOUNDS_UPPER:
216 return ev.formatted_print
217 ("use of attacker-controlled value %qE"
218 " in array lookup without checking for negative",
219 m_arg);
220 case BOUNDS_LOWER:
221 return ev.formatted_print
222 ("use of attacker-controlled value %qE"
223 " in array lookup without upper-bounds checking",
224 m_arg);
229 /* Concrete taint_diagnostic subclass for reporting attacker-controlled
230 pointer offset. */
232 class tainted_offset : public taint_diagnostic
234 public:
235 tainted_offset (const taint_state_machine &sm, tree arg,
236 enum bounds has_bounds)
237 : taint_diagnostic (sm, arg, has_bounds)
240 const char *get_kind () const FINAL OVERRIDE { return "tainted_offset"; }
242 bool emit (rich_location *rich_loc) FINAL OVERRIDE
244 diagnostic_metadata m;
245 /* CWE-823: "Use of Out-of-range Pointer Offset". */
246 m.add_cwe (823);
247 if (m_arg)
248 switch (m_has_bounds)
250 default:
251 gcc_unreachable ();
252 case BOUNDS_NONE:
253 return warning_meta (rich_loc, m, OPT_Wanalyzer_tainted_offset,
254 "use of attacker-controlled value %qE as offset"
255 " without bounds checking",
256 m_arg);
257 break;
258 case BOUNDS_UPPER:
259 return warning_meta (rich_loc, m, OPT_Wanalyzer_tainted_offset,
260 "use of attacker-controlled value %qE as offset"
261 " without lower-bounds checking",
262 m_arg);
263 break;
264 case BOUNDS_LOWER:
265 return warning_meta (rich_loc, m, OPT_Wanalyzer_tainted_offset,
266 "use of attacker-controlled value %qE as offset"
267 " without upper-bounds checking",
268 m_arg);
269 break;
271 else
272 switch (m_has_bounds)
274 default:
275 gcc_unreachable ();
276 case BOUNDS_NONE:
277 return warning_meta (rich_loc, m, OPT_Wanalyzer_tainted_offset,
278 "use of attacker-controlled value as offset"
279 " without bounds checking");
280 break;
281 case BOUNDS_UPPER:
282 return warning_meta (rich_loc, m, OPT_Wanalyzer_tainted_offset,
283 "use of attacker-controlled value as offset"
284 " without lower-bounds checking");
285 break;
286 case BOUNDS_LOWER:
287 return warning_meta (rich_loc, m, OPT_Wanalyzer_tainted_offset,
288 "use of attacker-controlled value as offset"
289 " without upper-bounds checking");
290 break;
294 label_text describe_final_event (const evdesc::final_event &ev) FINAL OVERRIDE
296 if (m_arg)
297 switch (m_has_bounds)
299 default:
300 gcc_unreachable ();
301 case BOUNDS_NONE:
302 return ev.formatted_print ("use of attacker-controlled value %qE"
303 " as offset without bounds checking",
304 m_arg);
305 case BOUNDS_UPPER:
306 return ev.formatted_print ("use of attacker-controlled value %qE"
307 " as offset without lower-bounds checking",
308 m_arg);
309 case BOUNDS_LOWER:
310 return ev.formatted_print ("use of attacker-controlled value %qE"
311 " as offset without upper-bounds checking",
312 m_arg);
314 else
315 switch (m_has_bounds)
317 default:
318 gcc_unreachable ();
319 case BOUNDS_NONE:
320 return ev.formatted_print ("use of attacker-controlled value"
321 " as offset without bounds checking");
322 case BOUNDS_UPPER:
323 return ev.formatted_print ("use of attacker-controlled value"
324 " as offset without lower-bounds"
325 " checking");
326 case BOUNDS_LOWER:
327 return ev.formatted_print ("use of attacker-controlled value"
328 " as offset without upper-bounds"
329 " checking");
334 /* Concrete taint_diagnostic subclass for reporting attacker-controlled
335 size. */
337 class tainted_size : public taint_diagnostic
339 public:
340 tainted_size (const taint_state_machine &sm, tree arg,
341 enum bounds has_bounds,
342 enum access_direction dir)
343 : taint_diagnostic (sm, arg, has_bounds),
344 m_dir (dir)
347 const char *get_kind () const FINAL OVERRIDE { return "tainted_size"; }
349 bool emit (rich_location *rich_loc) FINAL OVERRIDE
351 diagnostic_metadata m;
352 m.add_cwe (129);
353 switch (m_has_bounds)
355 default:
356 gcc_unreachable ();
357 case BOUNDS_NONE:
358 return warning_meta (rich_loc, m, OPT_Wanalyzer_tainted_size,
359 "use of attacker-controlled value %qE as size"
360 " without bounds checking",
361 m_arg);
362 break;
363 case BOUNDS_UPPER:
364 return warning_meta (rich_loc, m, OPT_Wanalyzer_tainted_size,
365 "use of attacker-controlled value %qE as size"
366 " without lower-bounds checking",
367 m_arg);
368 break;
369 case BOUNDS_LOWER:
370 return warning_meta (rich_loc, m, OPT_Wanalyzer_tainted_size,
371 "use of attacker-controlled value %qE as size"
372 " without upper-bounds checking",
373 m_arg);
374 break;
378 label_text describe_final_event (const evdesc::final_event &ev) FINAL OVERRIDE
380 switch (m_has_bounds)
382 default:
383 gcc_unreachable ();
384 case BOUNDS_NONE:
385 return ev.formatted_print ("use of attacker-controlled value %qE"
386 " as size without bounds checking",
387 m_arg);
388 case BOUNDS_UPPER:
389 return ev.formatted_print ("use of attacker-controlled value %qE"
390 " as size without lower-bounds checking",
391 m_arg);
392 case BOUNDS_LOWER:
393 return ev.formatted_print ("use of attacker-controlled value %qE"
394 " as size without upper-bounds checking",
395 m_arg);
399 private:
400 enum access_direction m_dir;
403 /* Concrete taint_diagnostic subclass for reporting attacker-controlled
404 divisor (so that an attacker can trigger a divide by zero). */
406 class tainted_divisor : public taint_diagnostic
408 public:
409 tainted_divisor (const taint_state_machine &sm, tree arg,
410 enum bounds has_bounds)
411 : taint_diagnostic (sm, arg, has_bounds)
414 const char *get_kind () const FINAL OVERRIDE { return "tainted_divisor"; }
416 bool emit (rich_location *rich_loc) FINAL OVERRIDE
418 diagnostic_metadata m;
419 /* CWE-369: "Divide By Zero". */
420 m.add_cwe (369);
421 if (m_arg)
422 return warning_meta (rich_loc, m, OPT_Wanalyzer_tainted_divisor,
423 "use of attacker-controlled value %qE as divisor"
424 " without checking for zero",
425 m_arg);
426 else
427 return warning_meta (rich_loc, m, OPT_Wanalyzer_tainted_divisor,
428 "use of attacker-controlled value as divisor"
429 " without checking for zero");
432 label_text describe_final_event (const evdesc::final_event &ev) FINAL OVERRIDE
434 if (m_arg)
435 return ev.formatted_print
436 ("use of attacker-controlled value %qE as divisor"
437 " without checking for zero",
438 m_arg);
439 else
440 return ev.formatted_print
441 ("use of attacker-controlled value as divisor"
442 " without checking for zero");
446 /* Concrete taint_diagnostic subclass for reporting attacker-controlled
447 size of a dynamic allocation. */
449 class tainted_allocation_size : public taint_diagnostic
451 public:
452 tainted_allocation_size (const taint_state_machine &sm, tree arg,
453 enum bounds has_bounds, enum memory_space mem_space)
454 : taint_diagnostic (sm, arg, has_bounds),
455 m_mem_space (mem_space)
457 gcc_assert (mem_space == MEMSPACE_STACK || mem_space == MEMSPACE_HEAP);
460 const char *get_kind () const FINAL OVERRIDE
462 return "tainted_allocation_size";
465 bool emit (rich_location *rich_loc) FINAL OVERRIDE
467 diagnostic_metadata m;
468 /* "CWE-789: Memory Allocation with Excessive Size Value". */
469 m.add_cwe (789);
470 gcc_assert (m_mem_space == MEMSPACE_STACK || m_mem_space == MEMSPACE_HEAP);
471 // TODO: make use of m_mem_space
472 if (m_arg)
473 switch (m_has_bounds)
475 default:
476 gcc_unreachable ();
477 case BOUNDS_NONE:
478 return warning_meta (rich_loc, m,
479 OPT_Wanalyzer_tainted_allocation_size,
480 "use of attacker-controlled value %qE as"
481 " allocation size without bounds checking",
482 m_arg);
483 break;
484 case BOUNDS_UPPER:
485 return warning_meta (rich_loc, m,
486 OPT_Wanalyzer_tainted_allocation_size,
487 "use of attacker-controlled value %qE as"
488 " allocation size without lower-bounds checking",
489 m_arg);
490 break;
491 case BOUNDS_LOWER:
492 return warning_meta (rich_loc, m,
493 OPT_Wanalyzer_tainted_allocation_size,
494 "use of attacker-controlled value %qE as"
495 " allocation size without upper-bounds checking",
496 m_arg);
497 break;
499 else
500 switch (m_has_bounds)
502 default:
503 gcc_unreachable ();
504 case BOUNDS_NONE:
505 return warning_meta (rich_loc, m,
506 OPT_Wanalyzer_tainted_allocation_size,
507 "use of attacker-controlled value as"
508 " allocation size without bounds"
509 " checking");
510 break;
511 case BOUNDS_UPPER:
512 return warning_meta (rich_loc, m,
513 OPT_Wanalyzer_tainted_allocation_size,
514 "use of attacker-controlled value as"
515 " allocation size without lower-bounds"
516 " checking");
517 break;
518 case BOUNDS_LOWER:
519 return warning_meta (rich_loc, m,
520 OPT_Wanalyzer_tainted_allocation_size,
521 "use of attacker-controlled value as"
522 " allocation size without upper-bounds"
523 " checking");
524 break;
528 label_text describe_final_event (const evdesc::final_event &ev) FINAL OVERRIDE
530 if (m_arg)
531 switch (m_has_bounds)
533 default:
534 gcc_unreachable ();
535 case BOUNDS_NONE:
536 return ev.formatted_print
537 ("use of attacker-controlled value %qE as allocation size"
538 " without bounds checking",
539 m_arg);
540 case BOUNDS_UPPER:
541 return ev.formatted_print
542 ("use of attacker-controlled value %qE as allocation size"
543 " without lower-bounds checking",
544 m_arg);
545 case BOUNDS_LOWER:
546 return ev.formatted_print
547 ("use of attacker-controlled value %qE as allocation size"
548 " without upper-bounds checking",
549 m_arg);
551 else
552 switch (m_has_bounds)
554 default:
555 gcc_unreachable ();
556 case BOUNDS_NONE:
557 return ev.formatted_print
558 ("use of attacker-controlled value as allocation size"
559 " without bounds checking");
560 case BOUNDS_UPPER:
561 return ev.formatted_print
562 ("use of attacker-controlled value as allocation size"
563 " without lower-bounds checking");
564 case BOUNDS_LOWER:
565 return ev.formatted_print
566 ("use of attacker-controlled value as allocation size"
567 " without upper-bounds checking");
571 private:
572 enum memory_space m_mem_space;
575 /* taint_state_machine's ctor. */
577 taint_state_machine::taint_state_machine (logger *logger)
578 : state_machine ("taint", logger)
580 m_tainted = add_state ("tainted");
581 m_has_lb = add_state ("has_lb");
582 m_has_ub = add_state ("has_ub");
583 m_stop = add_state ("stop");
586 state_machine::state_t
587 taint_state_machine::alt_get_inherited_state (const sm_state_map &map,
588 const svalue *sval,
589 const extrinsic_state &ext_state)
590 const
592 switch (sval->get_kind ())
594 default:
595 break;
596 case SK_UNARYOP:
598 const unaryop_svalue *unaryop_sval
599 = as_a <const unaryop_svalue *> (sval);
600 enum tree_code op = unaryop_sval->get_op ();
601 const svalue *arg = unaryop_sval->get_arg ();
602 switch (op)
604 case NOP_EXPR:
606 state_t arg_state = map.get_state (arg, ext_state);
607 return arg_state;
609 default:
610 gcc_unreachable ();
611 break;
614 break;
615 case SK_BINOP:
617 const binop_svalue *binop_sval = as_a <const binop_svalue *> (sval);
618 enum tree_code op = binop_sval->get_op ();
619 const svalue *arg0 = binop_sval->get_arg0 ();
620 const svalue *arg1 = binop_sval->get_arg1 ();
621 switch (op)
623 default:
624 break;
625 case PLUS_EXPR:
626 case MINUS_EXPR:
627 case MULT_EXPR:
628 case POINTER_PLUS_EXPR:
629 case TRUNC_DIV_EXPR:
630 case TRUNC_MOD_EXPR:
632 state_t arg0_state = map.get_state (arg0, ext_state);
633 state_t arg1_state = map.get_state (arg1, ext_state);
634 return combine_states (arg0_state, arg1_state);
636 break;
638 case EQ_EXPR:
639 case GE_EXPR:
640 case LE_EXPR:
641 case NE_EXPR:
642 case GT_EXPR:
643 case LT_EXPR:
644 case UNORDERED_EXPR:
645 case ORDERED_EXPR:
646 /* Comparisons are just booleans. */
647 return m_start;
649 case BIT_AND_EXPR:
650 case RSHIFT_EXPR:
651 return NULL;
654 break;
656 return NULL;
659 /* Implementation of state_machine::on_stmt vfunc for taint_state_machine. */
661 bool
662 taint_state_machine::on_stmt (sm_context *sm_ctxt,
663 const supernode *node,
664 const gimple *stmt) const
666 if (const gcall *call = dyn_cast <const gcall *> (stmt))
667 if (tree callee_fndecl = sm_ctxt->get_fndecl_for_call (call))
669 if (is_named_call_p (callee_fndecl, "fread", call, 4))
671 tree arg = gimple_call_arg (call, 0);
673 sm_ctxt->on_transition (node, stmt, arg, m_start, m_tainted);
675 /* Dereference an ADDR_EXPR. */
676 // TODO: should the engine do this?
677 if (TREE_CODE (arg) == ADDR_EXPR)
678 sm_ctxt->on_transition (node, stmt, TREE_OPERAND (arg, 0),
679 m_start, m_tainted);
680 return true;
683 // TODO: ...etc; many other sources of untrusted data
685 if (const gassign *assign = dyn_cast <const gassign *> (stmt))
687 enum tree_code op = gimple_assign_rhs_code (assign);
689 switch (op)
691 default:
692 break;
693 case TRUNC_DIV_EXPR:
694 case CEIL_DIV_EXPR:
695 case FLOOR_DIV_EXPR:
696 case ROUND_DIV_EXPR:
697 case TRUNC_MOD_EXPR:
698 case CEIL_MOD_EXPR:
699 case FLOOR_MOD_EXPR:
700 case ROUND_MOD_EXPR:
701 case RDIV_EXPR:
702 case EXACT_DIV_EXPR:
704 tree divisor = gimple_assign_rhs2 (assign);;
705 state_t state = sm_ctxt->get_state (stmt, divisor);
706 enum bounds b;
707 if (get_taint (state, TREE_TYPE (divisor), &b))
709 tree diag_divisor = sm_ctxt->get_diagnostic_tree (divisor);
710 sm_ctxt->warn (node, stmt, divisor,
711 new tainted_divisor (*this, diag_divisor, b));
712 sm_ctxt->set_next_state (stmt, divisor, m_stop);
715 break;
719 return false;
722 /* Implementation of state_machine::on_condition vfunc for taint_state_machine.
723 Potentially transition state 'tainted' to 'has_ub' or 'has_lb',
724 and states 'has_ub' and 'has_lb' to 'stop'. */
726 void
727 taint_state_machine::on_condition (sm_context *sm_ctxt,
728 const supernode *node,
729 const gimple *stmt,
730 const svalue *lhs,
731 enum tree_code op,
732 const svalue *rhs ATTRIBUTE_UNUSED) const
734 if (stmt == NULL)
735 return;
737 // TODO: this doesn't use the RHS; should we make it symmetric?
739 // TODO
740 switch (op)
742 //case NE_EXPR:
743 //case EQ_EXPR:
744 case GE_EXPR:
745 case GT_EXPR:
747 sm_ctxt->on_transition (node, stmt, lhs, m_tainted,
748 m_has_lb);
749 sm_ctxt->on_transition (node, stmt, lhs, m_has_ub,
750 m_stop);
752 break;
753 case LE_EXPR:
754 case LT_EXPR:
756 sm_ctxt->on_transition (node, stmt, lhs, m_tainted,
757 m_has_ub);
758 sm_ctxt->on_transition (node, stmt, lhs, m_has_lb,
759 m_stop);
761 break;
762 default:
763 break;
767 bool
768 taint_state_machine::can_purge_p (state_t s ATTRIBUTE_UNUSED) const
770 return true;
773 /* If STATE is a tainted state, write the bounds to *OUT and return true.
774 Otherwise return false.
775 Use the signedness of TYPE to determine if "has_ub" is tainted. */
777 bool
778 taint_state_machine::get_taint (state_t state, tree type,
779 enum bounds *out) const
781 /* Unsigned types have an implicit lower bound. */
782 bool is_unsigned = false;
783 if (type)
784 if (INTEGRAL_TYPE_P (type))
785 is_unsigned = TYPE_UNSIGNED (type);
787 /* Can't use a switch as the states are non-const. */
788 if (state == m_tainted)
790 *out = is_unsigned ? BOUNDS_LOWER : BOUNDS_NONE;
791 return true;
793 else if (state == m_has_lb)
795 *out = BOUNDS_LOWER;
796 return true;
798 else if (state == m_has_ub && !is_unsigned)
800 /* Missing lower bound. */
801 *out = BOUNDS_UPPER;
802 return true;
804 return false;
807 /* Find the most tainted state of S0 and S1. */
809 state_machine::state_t
810 taint_state_machine::combine_states (state_t s0, state_t s1) const
812 gcc_assert (s0);
813 gcc_assert (s1);
814 if (s0 == s1)
815 return s0;
816 if (s0 == m_tainted || s1 == m_tainted)
817 return m_tainted;
818 if (s0 == m_stop)
819 return s1;
820 if (s1 == m_stop)
821 return s0;
822 if (s0 == m_start)
823 return s1;
824 if (s1 == m_start)
825 return s0;
826 gcc_unreachable ();
829 } // anonymous namespace
831 /* Internal interface to this file. */
833 state_machine *
834 make_taint_state_machine (logger *logger)
836 return new taint_state_machine (logger);
839 /* Complain to CTXT if accessing REG leads could lead to arbitrary
840 memory access under an attacker's control (due to taint). */
842 void
843 region_model::check_region_for_taint (const region *reg,
844 enum access_direction dir,
845 region_model_context *ctxt) const
847 gcc_assert (reg);
848 gcc_assert (ctxt);
850 LOG_SCOPE (ctxt->get_logger ());
852 sm_state_map *smap;
853 const state_machine *sm;
854 unsigned sm_idx;
855 if (!ctxt->get_taint_map (&smap, &sm, &sm_idx))
856 return;
858 gcc_assert (smap);
859 gcc_assert (sm);
861 const taint_state_machine &taint_sm = (const taint_state_machine &)*sm;
863 const extrinsic_state *ext_state = ctxt->get_ext_state ();
864 if (!ext_state)
865 return;
867 const region *iter_region = reg;
868 while (iter_region)
870 switch (iter_region->get_kind ())
872 default:
873 break;
875 case RK_ELEMENT:
877 const element_region *element_reg
878 = (const element_region *)iter_region;
879 const svalue *index = element_reg->get_index ();
880 const state_machine::state_t
881 state = smap->get_state (index, *ext_state);
882 gcc_assert (state);
883 enum bounds b;
884 if (taint_sm.get_taint (state, index->get_type (), &b))
886 tree arg = get_representative_tree (index);
887 ctxt->warn (new tainted_array_index (taint_sm, arg, b));
890 break;
892 case RK_OFFSET:
894 const offset_region *offset_reg
895 = (const offset_region *)iter_region;
896 const svalue *offset = offset_reg->get_byte_offset ();
897 const state_machine::state_t
898 state = smap->get_state (offset, *ext_state);
899 gcc_assert (state);
900 /* Handle implicit cast to sizetype. */
901 tree effective_type = offset->get_type ();
902 if (const svalue *cast = offset->maybe_undo_cast ())
903 if (cast->get_type ())
904 effective_type = cast->get_type ();
905 enum bounds b;
906 if (taint_sm.get_taint (state, effective_type, &b))
908 tree arg = get_representative_tree (offset);
909 ctxt->warn (new tainted_offset (taint_sm, arg, b));
912 break;
914 case RK_CAST:
916 const cast_region *cast_reg
917 = as_a <const cast_region *> (iter_region);
918 iter_region = cast_reg->get_original_region ();
919 continue;
922 case RK_SIZED:
924 const sized_region *sized_reg
925 = (const sized_region *)iter_region;
926 const svalue *size_sval = sized_reg->get_byte_size_sval (m_mgr);
927 const state_machine::state_t
928 state = smap->get_state (size_sval, *ext_state);
929 gcc_assert (state);
930 enum bounds b;
931 if (taint_sm.get_taint (state, size_sval->get_type (), &b))
933 tree arg = get_representative_tree (size_sval);
934 ctxt->warn (new tainted_size (taint_sm, arg, b, dir));
937 break;
940 iter_region = iter_region->get_parent_region ();
944 /* Complain to CTXT about a tainted allocation size if SIZE_IN_BYTES is
945 under an attacker's control (due to taint), where the allocation
946 is happening within MEM_SPACE. */
948 void
949 region_model::check_dynamic_size_for_taint (enum memory_space mem_space,
950 const svalue *size_in_bytes,
951 region_model_context *ctxt) const
953 gcc_assert (mem_space == MEMSPACE_STACK || mem_space == MEMSPACE_HEAP);
954 gcc_assert (size_in_bytes);
955 gcc_assert (ctxt);
957 LOG_SCOPE (ctxt->get_logger ());
959 sm_state_map *smap;
960 const state_machine *sm;
961 unsigned sm_idx;
962 if (!ctxt->get_taint_map (&smap, &sm, &sm_idx))
963 return;
965 gcc_assert (smap);
966 gcc_assert (sm);
968 const taint_state_machine &taint_sm = (const taint_state_machine &)*sm;
970 const extrinsic_state *ext_state = ctxt->get_ext_state ();
971 if (!ext_state)
972 return;
974 const state_machine::state_t
975 state = smap->get_state (size_in_bytes, *ext_state);
976 gcc_assert (state);
977 enum bounds b;
978 if (taint_sm.get_taint (state, size_in_bytes->get_type (), &b))
980 tree arg = get_representative_tree (size_in_bytes);
981 ctxt->warn (new tainted_allocation_size (taint_sm, arg, b, mem_space));
985 } // namespace ana
987 #endif /* #if ENABLE_ANALYZER */