hppa: Fix pr110279-1.c on hppa
[official-gcc.git] / gcc / analyzer / sm-taint.cc
blobce18957b56b84adee3c8b45b66e32ec0e88ea139
1 /* A state machine for tracking "taint": unsanitized uses
2 of data potentially under an attacker's control.
4 Copyright (C) 2019-2023 Free Software Foundation, Inc.
5 Contributed by David Malcolm <dmalcolm@redhat.com>.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
12 any later version.
14 GCC is distributed in the hope that it will be useful, but
15 WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #define INCLUDE_MEMORY
25 #include "system.h"
26 #include "coretypes.h"
27 #include "make-unique.h"
28 #include "tree.h"
29 #include "function.h"
30 #include "basic-block.h"
31 #include "gimple.h"
32 #include "options.h"
33 #include "diagnostic-path.h"
34 #include "analyzer/analyzer.h"
35 #include "analyzer/analyzer-logging.h"
36 #include "gimple-iterator.h"
37 #include "ordered-hash-map.h"
38 #include "cgraph.h"
39 #include "cfg.h"
40 #include "digraph.h"
41 #include "stringpool.h"
42 #include "attribs.h"
43 #include "analyzer/supergraph.h"
44 #include "analyzer/call-string.h"
45 #include "analyzer/program-point.h"
46 #include "analyzer/store.h"
47 #include "analyzer/region-model.h"
48 #include "analyzer/sm.h"
49 #include "analyzer/program-state.h"
50 #include "analyzer/pending-diagnostic.h"
51 #include "analyzer/constraint-manager.h"
53 #if ENABLE_ANALYZER
55 namespace ana {
57 namespace {
59 /* An enum for describing tainted values. */
61 enum bounds
63 /* This tainted value has no upper or lower bound. */
64 BOUNDS_NONE,
66 /* This tainted value has an upper bound but not lower bound. */
67 BOUNDS_UPPER,
69 /* This tainted value has a lower bound but no upper bound. */
70 BOUNDS_LOWER
73 /* An experimental state machine, for tracking "taint": unsanitized uses
74 of data potentially under an attacker's control. */
76 class taint_state_machine : public state_machine
78 public:
79 taint_state_machine (logger *logger);
81 bool inherited_state_p () const final override { return true; }
83 state_t alt_get_inherited_state (const sm_state_map &map,
84 const svalue *sval,
85 const extrinsic_state &ext_state)
86 const final override;
88 bool on_stmt (sm_context *sm_ctxt,
89 const supernode *node,
90 const gimple *stmt) const final override;
92 void on_condition (sm_context *sm_ctxt,
93 const supernode *node,
94 const gimple *stmt,
95 const svalue *lhs,
96 enum tree_code op,
97 const svalue *rhs) const final override;
98 void on_bounded_ranges (sm_context *sm_ctxt,
99 const supernode *node,
100 const gimple *stmt,
101 const svalue &sval,
102 const bounded_ranges &ranges) const final override;
104 bool can_purge_p (state_t s) const final override;
106 bool get_taint (state_t s, tree type, enum bounds *out) const;
108 state_t combine_states (state_t s0, state_t s1) const;
110 private:
111 void check_control_flow_arg_for_taint (sm_context *sm_ctxt,
112 const gimple *stmt,
113 tree expr) const;
115 void check_for_tainted_size_arg (sm_context *sm_ctxt,
116 const supernode *node,
117 const gcall *call,
118 tree callee_fndecl) const;
119 void check_for_tainted_divisor (sm_context *sm_ctxt,
120 const supernode *node,
121 const gassign *assign) const;
123 public:
124 /* State for a "tainted" value: unsanitized data potentially under an
125 attacker's control. */
126 state_t m_tainted;
128 /* State for a "tainted" value that has a lower bound. */
129 state_t m_has_lb;
131 /* State for a "tainted" value that has an upper bound. */
132 state_t m_has_ub;
134 /* Stop state, for a value we don't want to track any more. */
135 state_t m_stop;
137 /* Global state, for when the last condition had tainted arguments. */
138 state_t m_tainted_control_flow;
141 /* Class for diagnostics relating to taint_state_machine. */
143 class taint_diagnostic : public pending_diagnostic
145 public:
146 taint_diagnostic (const taint_state_machine &sm, tree arg,
147 enum bounds has_bounds)
148 : m_sm (sm), m_arg (arg), m_has_bounds (has_bounds)
151 bool subclass_equal_p (const pending_diagnostic &base_other) const override
153 const taint_diagnostic &other = (const taint_diagnostic &)base_other;
154 return (same_tree_p (m_arg, other.m_arg)
155 && m_has_bounds == other.m_has_bounds);
158 label_text describe_state_change (const evdesc::state_change &change) override
160 if (change.m_new_state == m_sm.m_tainted)
162 if (change.m_origin)
163 return change.formatted_print ("%qE has an unchecked value here"
164 " (from %qE)",
165 change.m_expr, change.m_origin);
166 else
167 return change.formatted_print ("%qE gets an unchecked value here",
168 change.m_expr);
170 else if (change.m_new_state == m_sm.m_has_lb)
171 return change.formatted_print ("%qE has its lower bound checked here",
172 change.m_expr);
173 else if (change.m_new_state == m_sm.m_has_ub)
174 return change.formatted_print ("%qE has its upper bound checked here",
175 change.m_expr);
176 return label_text ();
179 diagnostic_event::meaning
180 get_meaning_for_state_change (const evdesc::state_change &change)
181 const final override
183 if (change.m_new_state == m_sm.m_tainted)
184 return diagnostic_event::meaning (diagnostic_event::VERB_acquire,
185 diagnostic_event::NOUN_taint);
186 return diagnostic_event::meaning ();
189 protected:
190 const taint_state_machine &m_sm;
191 tree m_arg;
192 enum bounds m_has_bounds;
195 /* Concrete taint_diagnostic subclass for reporting attacker-controlled
196 array index. */
198 class tainted_array_index : public taint_diagnostic
200 public:
201 tainted_array_index (const taint_state_machine &sm, tree arg,
202 enum bounds has_bounds)
203 : taint_diagnostic (sm, arg, has_bounds)
206 const char *get_kind () const final override { return "tainted_array_index"; }
208 int get_controlling_option () const final override
210 return OPT_Wanalyzer_tainted_array_index;
213 bool emit (diagnostic_emission_context &ctxt) final override
215 /* CWE-129: "Improper Validation of Array Index". */
216 ctxt.add_cwe (129);
217 if (m_arg)
218 switch (m_has_bounds)
220 default:
221 gcc_unreachable ();
222 case BOUNDS_NONE:
223 return ctxt.warn ("use of attacker-controlled value %qE"
224 " in array lookup without bounds checking",
225 m_arg);
226 break;
227 case BOUNDS_UPPER:
228 return ctxt.warn ("use of attacker-controlled value %qE"
229 " in array lookup without checking for negative",
230 m_arg);
231 break;
232 case BOUNDS_LOWER:
233 return ctxt.warn ("use of attacker-controlled value %qE"
234 " in array lookup without upper-bounds checking",
235 m_arg);
236 break;
238 else
239 switch (m_has_bounds)
241 default:
242 gcc_unreachable ();
243 case BOUNDS_NONE:
244 return ctxt.warn ("use of attacker-controlled value"
245 " in array lookup without bounds checking");
246 break;
247 case BOUNDS_UPPER:
248 return ctxt.warn ("use of attacker-controlled value"
249 " in array lookup without checking for"
250 " negative");
251 break;
252 case BOUNDS_LOWER:
253 return ctxt.warn ("use of attacker-controlled value"
254 " in array lookup without upper-bounds"
255 " checking");
256 break;
260 label_text describe_final_event (const evdesc::final_event &ev) final override
262 if (m_arg)
263 switch (m_has_bounds)
265 default:
266 gcc_unreachable ();
267 case BOUNDS_NONE:
268 return ev.formatted_print
269 ("use of attacker-controlled value %qE in array lookup"
270 " without bounds checking",
271 m_arg);
272 case BOUNDS_UPPER:
273 return ev.formatted_print
274 ("use of attacker-controlled value %qE"
275 " in array lookup without checking for negative",
276 m_arg);
277 case BOUNDS_LOWER:
278 return ev.formatted_print
279 ("use of attacker-controlled value %qE"
280 " in array lookup without upper-bounds checking",
281 m_arg);
283 else
284 switch (m_has_bounds)
286 default:
287 gcc_unreachable ();
288 case BOUNDS_NONE:
289 return ev.formatted_print
290 ("use of attacker-controlled value in array lookup"
291 " without bounds checking");
292 case BOUNDS_UPPER:
293 return ev.formatted_print
294 ("use of attacker-controlled value"
295 " in array lookup without checking for negative");
296 case BOUNDS_LOWER:
297 return ev.formatted_print
298 ("use of attacker-controlled value"
299 " in array lookup without upper-bounds checking");
304 /* Concrete taint_diagnostic subclass for reporting attacker-controlled
305 pointer offset. */
307 class tainted_offset : public taint_diagnostic
309 public:
310 tainted_offset (const taint_state_machine &sm, tree arg,
311 enum bounds has_bounds)
312 : taint_diagnostic (sm, arg, has_bounds)
315 const char *get_kind () const final override { return "tainted_offset"; }
317 int get_controlling_option () const final override
319 return OPT_Wanalyzer_tainted_offset;
322 bool emit (diagnostic_emission_context &ctxt) final override
324 /* CWE-823: "Use of Out-of-range Pointer Offset". */
325 ctxt.add_cwe (823);
326 if (m_arg)
327 switch (m_has_bounds)
329 default:
330 gcc_unreachable ();
331 case BOUNDS_NONE:
332 return ctxt.warn ("use of attacker-controlled value %qE as offset"
333 " without bounds checking",
334 m_arg);
335 break;
336 case BOUNDS_UPPER:
337 return ctxt.warn ("use of attacker-controlled value %qE as offset"
338 " without lower-bounds checking",
339 m_arg);
340 break;
341 case BOUNDS_LOWER:
342 return ctxt.warn ("use of attacker-controlled value %qE as offset"
343 " without upper-bounds checking",
344 m_arg);
345 break;
347 else
348 switch (m_has_bounds)
350 default:
351 gcc_unreachable ();
352 case BOUNDS_NONE:
353 return ctxt.warn ("use of attacker-controlled value as offset"
354 " without bounds checking");
355 break;
356 case BOUNDS_UPPER:
357 return ctxt.warn ("use of attacker-controlled value as offset"
358 " without lower-bounds checking");
359 break;
360 case BOUNDS_LOWER:
361 return ctxt.warn ("use of attacker-controlled value as offset"
362 " without upper-bounds checking");
363 break;
367 label_text describe_final_event (const evdesc::final_event &ev) final override
369 if (m_arg)
370 switch (m_has_bounds)
372 default:
373 gcc_unreachable ();
374 case BOUNDS_NONE:
375 return ev.formatted_print ("use of attacker-controlled value %qE"
376 " as offset without bounds checking",
377 m_arg);
378 case BOUNDS_UPPER:
379 return ev.formatted_print ("use of attacker-controlled value %qE"
380 " as offset without lower-bounds checking",
381 m_arg);
382 case BOUNDS_LOWER:
383 return ev.formatted_print ("use of attacker-controlled value %qE"
384 " as offset without upper-bounds checking",
385 m_arg);
387 else
388 switch (m_has_bounds)
390 default:
391 gcc_unreachable ();
392 case BOUNDS_NONE:
393 return ev.formatted_print ("use of attacker-controlled value"
394 " as offset without bounds checking");
395 case BOUNDS_UPPER:
396 return ev.formatted_print ("use of attacker-controlled value"
397 " as offset without lower-bounds"
398 " checking");
399 case BOUNDS_LOWER:
400 return ev.formatted_print ("use of attacker-controlled value"
401 " as offset without upper-bounds"
402 " checking");
407 /* Concrete taint_diagnostic subclass for reporting attacker-controlled
408 size. */
410 class tainted_size : public taint_diagnostic
412 public:
413 tainted_size (const taint_state_machine &sm, tree arg,
414 enum bounds has_bounds)
415 : taint_diagnostic (sm, arg, has_bounds)
418 const char *get_kind () const override { return "tainted_size"; }
420 int get_controlling_option () const final override
422 return OPT_Wanalyzer_tainted_size;
425 bool emit (diagnostic_emission_context &ctxt) override
427 /* "CWE-129: Improper Validation of Array Index". */
428 ctxt.add_cwe (129);
429 if (m_arg)
430 switch (m_has_bounds)
432 default:
433 gcc_unreachable ();
434 case BOUNDS_NONE:
435 return ctxt.warn ("use of attacker-controlled value %qE as size"
436 " without bounds checking",
437 m_arg);
438 break;
439 case BOUNDS_UPPER:
440 return ctxt.warn ("use of attacker-controlled value %qE as size"
441 " without lower-bounds checking",
442 m_arg);
443 break;
444 case BOUNDS_LOWER:
445 return ctxt.warn ("use of attacker-controlled value %qE as size"
446 " without upper-bounds checking",
447 m_arg);
448 break;
450 else
451 switch (m_has_bounds)
453 default:
454 gcc_unreachable ();
455 case BOUNDS_NONE:
456 return ctxt.warn ("use of attacker-controlled value as size"
457 " without bounds checking");
458 break;
459 case BOUNDS_UPPER:
460 return ctxt.warn ("use of attacker-controlled value as size"
461 " without lower-bounds checking");
462 break;
463 case BOUNDS_LOWER:
464 return ctxt.warn ("use of attacker-controlled value as size"
465 " without upper-bounds checking");
466 break;
470 label_text describe_final_event (const evdesc::final_event &ev) final override
472 if (m_arg)
473 switch (m_has_bounds)
475 default:
476 gcc_unreachable ();
477 case BOUNDS_NONE:
478 return ev.formatted_print ("use of attacker-controlled value %qE"
479 " as size without bounds checking",
480 m_arg);
481 case BOUNDS_UPPER:
482 return ev.formatted_print ("use of attacker-controlled value %qE"
483 " as size without lower-bounds checking",
484 m_arg);
485 case BOUNDS_LOWER:
486 return ev.formatted_print ("use of attacker-controlled value %qE"
487 " as size without upper-bounds checking",
488 m_arg);
490 else
491 switch (m_has_bounds)
493 default:
494 gcc_unreachable ();
495 case BOUNDS_NONE:
496 return ev.formatted_print ("use of attacker-controlled value"
497 " as size without bounds checking");
498 case BOUNDS_UPPER:
499 return ev.formatted_print ("use of attacker-controlled value"
500 " as size without lower-bounds checking");
501 case BOUNDS_LOWER:
502 return ev.formatted_print ("use of attacker-controlled value"
503 " as size without upper-bounds checking");
508 /* Subclass of tainted_size for reporting on tainted size values
509 passed to an external function annotated with attribute "access". */
511 class tainted_access_attrib_size : public tainted_size
513 public:
514 tainted_access_attrib_size (const taint_state_machine &sm, tree arg,
515 enum bounds has_bounds, tree callee_fndecl,
516 unsigned size_argno, const char *access_str)
517 : tainted_size (sm, arg, has_bounds),
518 m_callee_fndecl (callee_fndecl),
519 m_size_argno (size_argno), m_access_str (access_str)
523 const char *get_kind () const override
525 return "tainted_access_attrib_size";
528 bool emit (diagnostic_emission_context &ctxt) final override
530 bool warned = tainted_size::emit (ctxt);
531 if (warned)
533 inform (DECL_SOURCE_LOCATION (m_callee_fndecl),
534 "parameter %i of %qD marked as a size via attribute %qs",
535 m_size_argno + 1, m_callee_fndecl, m_access_str);
537 return warned;
540 private:
541 tree m_callee_fndecl;
542 unsigned m_size_argno;
543 const char *m_access_str;
546 /* Concrete taint_diagnostic subclass for reporting attacker-controlled
547 divisor (so that an attacker can trigger a divide by zero). */
549 class tainted_divisor : public taint_diagnostic
551 public:
552 tainted_divisor (const taint_state_machine &sm, tree arg,
553 enum bounds has_bounds)
554 : taint_diagnostic (sm, arg, has_bounds)
557 const char *get_kind () const final override { return "tainted_divisor"; }
559 int get_controlling_option () const final override
561 return OPT_Wanalyzer_tainted_divisor;
564 bool emit (diagnostic_emission_context &ctxt) final override
566 /* CWE-369: "Divide By Zero". */
567 ctxt.add_cwe (369);
568 if (m_arg)
569 return ctxt.warn ("use of attacker-controlled value %qE as divisor"
570 " without checking for zero",
571 m_arg);
572 else
573 return ctxt.warn ("use of attacker-controlled value as divisor"
574 " without checking for zero");
577 label_text describe_final_event (const evdesc::final_event &ev) final override
579 if (m_arg)
580 return ev.formatted_print
581 ("use of attacker-controlled value %qE as divisor"
582 " without checking for zero",
583 m_arg);
584 else
585 return ev.formatted_print
586 ("use of attacker-controlled value as divisor"
587 " without checking for zero");
591 /* Concrete taint_diagnostic subclass for reporting attacker-controlled
592 size of a dynamic allocation. */
594 class tainted_allocation_size : public taint_diagnostic
596 public:
597 tainted_allocation_size (const taint_state_machine &sm, tree arg,
598 enum bounds has_bounds, enum memory_space mem_space)
599 : taint_diagnostic (sm, arg, has_bounds),
600 m_mem_space (mem_space)
604 const char *get_kind () const final override
606 return "tainted_allocation_size";
609 bool subclass_equal_p (const pending_diagnostic &base_other) const override
611 if (!taint_diagnostic::subclass_equal_p (base_other))
612 return false;
613 const tainted_allocation_size &other
614 = (const tainted_allocation_size &)base_other;
615 return m_mem_space == other.m_mem_space;
618 int get_controlling_option () const final override
620 return OPT_Wanalyzer_tainted_allocation_size;
623 bool emit (diagnostic_emission_context &ctxt) final override
625 /* "CWE-789: Memory Allocation with Excessive Size Value". */
626 ctxt.add_cwe (789);
628 bool warned;
629 if (m_arg)
630 switch (m_has_bounds)
632 default:
633 gcc_unreachable ();
634 case BOUNDS_NONE:
635 warned = ctxt.warn ("use of attacker-controlled value %qE as"
636 " allocation size without bounds checking",
637 m_arg);
638 break;
639 case BOUNDS_UPPER:
640 warned = ctxt.warn ("use of attacker-controlled value %qE as"
641 " allocation size without"
642 " lower-bounds checking",
643 m_arg);
644 break;
645 case BOUNDS_LOWER:
646 warned = ctxt.warn ("use of attacker-controlled value %qE as"
647 " allocation size without"
648 " upper-bounds checking",
649 m_arg);
650 break;
652 else
653 switch (m_has_bounds)
655 default:
656 gcc_unreachable ();
657 case BOUNDS_NONE:
658 warned = ctxt.warn ("use of attacker-controlled value as"
659 " allocation size without bounds"
660 " checking");
661 break;
662 case BOUNDS_UPPER:
663 warned = ctxt.warn ("use of attacker-controlled value as"
664 " allocation size without"
665 " lower-bounds checking");
666 break;
667 case BOUNDS_LOWER:
668 warned = ctxt.warn ("use of attacker-controlled value as"
669 " allocation size without"
670 " upper-bounds checking");
671 break;
673 if (warned)
675 const location_t loc = ctxt.get_location ();
676 switch (m_mem_space)
678 default:
679 break;
680 case MEMSPACE_STACK:
681 inform (loc, "stack-based allocation");
682 break;
683 case MEMSPACE_HEAP:
684 inform (loc, "heap-based allocation");
685 break;
688 return warned;
691 label_text describe_final_event (const evdesc::final_event &ev) final override
693 if (m_arg)
694 switch (m_has_bounds)
696 default:
697 gcc_unreachable ();
698 case BOUNDS_NONE:
699 return ev.formatted_print
700 ("use of attacker-controlled value %qE as allocation size"
701 " without bounds checking",
702 m_arg);
703 case BOUNDS_UPPER:
704 return ev.formatted_print
705 ("use of attacker-controlled value %qE as allocation size"
706 " without lower-bounds checking",
707 m_arg);
708 case BOUNDS_LOWER:
709 return ev.formatted_print
710 ("use of attacker-controlled value %qE as allocation size"
711 " without upper-bounds checking",
712 m_arg);
714 else
715 switch (m_has_bounds)
717 default:
718 gcc_unreachable ();
719 case BOUNDS_NONE:
720 return ev.formatted_print
721 ("use of attacker-controlled value as allocation size"
722 " without bounds checking");
723 case BOUNDS_UPPER:
724 return ev.formatted_print
725 ("use of attacker-controlled value as allocation size"
726 " without lower-bounds checking");
727 case BOUNDS_LOWER:
728 return ev.formatted_print
729 ("use of attacker-controlled value as allocation size"
730 " without upper-bounds checking");
734 private:
735 enum memory_space m_mem_space;
738 /* Concrete taint_diagnostic subclass for reporting attacker-controlled
739 value being used as part of the condition of an assertion. */
741 class tainted_assertion : public taint_diagnostic
743 public:
744 tainted_assertion (const taint_state_machine &sm, tree arg,
745 tree assert_failure_fndecl)
746 : taint_diagnostic (sm, arg, BOUNDS_NONE),
747 m_assert_failure_fndecl (assert_failure_fndecl)
749 gcc_assert (m_assert_failure_fndecl);
752 const char *get_kind () const final override
754 return "tainted_assertion";
757 bool subclass_equal_p (const pending_diagnostic &base_other) const override
759 if (!taint_diagnostic::subclass_equal_p (base_other))
760 return false;
761 const tainted_assertion &other
762 = (const tainted_assertion &)base_other;
763 return m_assert_failure_fndecl == other.m_assert_failure_fndecl;
766 int get_controlling_option () const final override
768 return OPT_Wanalyzer_tainted_assertion;
771 bool emit (diagnostic_emission_context &ctxt) final override
773 /* "CWE-617: Reachable Assertion". */
774 ctxt.add_cwe (617);
776 return ctxt.warn ("use of attacked-controlled value in"
777 " condition for assertion");
780 location_t fixup_location (location_t loc,
781 bool primary) const final override
783 if (primary)
784 /* For the primary location we want to avoid being in e.g. the
785 <assert.h> system header, since this would suppress the
786 diagnostic. */
787 return expansion_point_location_if_in_system_header (loc);
788 else if (in_system_header_at (loc))
789 /* For events, we want to show the implemenation of the assert
790 macro when we're describing them. */
791 return linemap_resolve_location (line_table, loc,
792 LRK_SPELLING_LOCATION,
793 NULL);
794 else
795 return pending_diagnostic::fixup_location (loc, primary);
798 label_text describe_state_change (const evdesc::state_change &change) override
800 if (change.m_new_state == m_sm.m_tainted_control_flow)
801 return change.formatted_print
802 ("use of attacker-controlled value for control flow");
803 return taint_diagnostic::describe_state_change (change);
806 label_text describe_final_event (const evdesc::final_event &ev) final override
808 if (mention_noreturn_attribute_p ())
809 return ev.formatted_print
810 ("treating %qE as an assertion failure handler"
811 " due to %<__attribute__((__noreturn__))%>",
812 m_assert_failure_fndecl);
813 else
814 return ev.formatted_print
815 ("treating %qE as an assertion failure handler",
816 m_assert_failure_fndecl);
819 private:
820 bool mention_noreturn_attribute_p () const
822 if (fndecl_built_in_p (m_assert_failure_fndecl, BUILT_IN_UNREACHABLE))
823 return false;
824 return true;
827 tree m_assert_failure_fndecl;
830 /* taint_state_machine's ctor. */
832 taint_state_machine::taint_state_machine (logger *logger)
833 : state_machine ("taint", logger),
834 m_tainted (add_state ("tainted")),
835 m_has_lb (add_state ("has_lb")),
836 m_has_ub (add_state ("has_ub")),
837 m_stop (add_state ("stop")),
838 m_tainted_control_flow (add_state ("tainted-control-flow"))
842 state_machine::state_t
843 taint_state_machine::alt_get_inherited_state (const sm_state_map &map,
844 const svalue *sval,
845 const extrinsic_state &ext_state)
846 const
848 switch (sval->get_kind ())
850 default:
851 break;
852 case SK_UNARYOP:
854 const unaryop_svalue *unaryop_sval
855 = as_a <const unaryop_svalue *> (sval);
856 enum tree_code op = unaryop_sval->get_op ();
857 const svalue *arg = unaryop_sval->get_arg ();
858 switch (op)
860 case NOP_EXPR:
862 state_t arg_state = map.get_state (arg, ext_state);
863 return arg_state;
865 default:
866 break;
869 break;
870 case SK_BINOP:
872 const binop_svalue *binop_sval = as_a <const binop_svalue *> (sval);
873 enum tree_code op = binop_sval->get_op ();
874 const svalue *arg0 = binop_sval->get_arg0 ();
875 const svalue *arg1 = binop_sval->get_arg1 ();
876 switch (op)
878 default:
879 break;
881 case EQ_EXPR:
882 case GE_EXPR:
883 case LE_EXPR:
884 case NE_EXPR:
885 case GT_EXPR:
886 case LT_EXPR:
887 case UNORDERED_EXPR:
888 case ORDERED_EXPR:
889 case PLUS_EXPR:
890 case MINUS_EXPR:
891 case MULT_EXPR:
892 case POINTER_PLUS_EXPR:
893 case TRUNC_DIV_EXPR:
895 state_t arg0_state = map.get_state (arg0, ext_state);
896 state_t arg1_state = map.get_state (arg1, ext_state);
897 return combine_states (arg0_state, arg1_state);
899 break;
901 case TRUNC_MOD_EXPR:
903 /* The left-hand side of X % Y can be sanitized by
904 the operation. */
905 return map.get_state (arg1, ext_state);
907 break;
909 case BIT_AND_EXPR:
910 case RSHIFT_EXPR:
911 return NULL;
914 break;
916 return NULL;
919 /* Return true iff FNDECL should be considered to be an assertion failure
920 handler by -Wanalyzer-tainted-assertion. */
922 static bool
923 is_assertion_failure_handler_p (tree fndecl)
925 // i.e. "noreturn"
926 if (TREE_THIS_VOLATILE (fndecl))
927 return true;
929 return false;
932 /* Implementation of state_machine::on_stmt vfunc for taint_state_machine. */
934 bool
935 taint_state_machine::on_stmt (sm_context *sm_ctxt,
936 const supernode *node,
937 const gimple *stmt) const
939 if (const gcall *call = dyn_cast <const gcall *> (stmt))
940 if (tree callee_fndecl = sm_ctxt->get_fndecl_for_call (call))
942 if (is_named_call_p (callee_fndecl, "fread", call, 4))
944 tree arg = gimple_call_arg (call, 0);
946 sm_ctxt->on_transition (node, stmt, arg, m_start, m_tainted);
948 /* Dereference an ADDR_EXPR. */
949 // TODO: should the engine do this?
950 if (TREE_CODE (arg) == ADDR_EXPR)
951 sm_ctxt->on_transition (node, stmt, TREE_OPERAND (arg, 0),
952 m_start, m_tainted);
953 return true;
956 /* External function with "access" attribute. */
957 if (sm_ctxt->unknown_side_effects_p ())
958 check_for_tainted_size_arg (sm_ctxt, node, call, callee_fndecl);
960 if (is_assertion_failure_handler_p (callee_fndecl)
961 && sm_ctxt->get_global_state () == m_tainted_control_flow)
963 sm_ctxt->warn (node, call, NULL_TREE,
964 make_unique<tainted_assertion> (*this, NULL_TREE,
965 callee_fndecl));
968 // TODO: ...etc; many other sources of untrusted data
970 if (const gassign *assign = dyn_cast <const gassign *> (stmt))
972 enum tree_code op = gimple_assign_rhs_code (assign);
974 switch (op)
976 default:
977 break;
978 case TRUNC_DIV_EXPR:
979 case CEIL_DIV_EXPR:
980 case FLOOR_DIV_EXPR:
981 case ROUND_DIV_EXPR:
982 case TRUNC_MOD_EXPR:
983 case CEIL_MOD_EXPR:
984 case FLOOR_MOD_EXPR:
985 case ROUND_MOD_EXPR:
986 case RDIV_EXPR:
987 case EXACT_DIV_EXPR:
988 check_for_tainted_divisor (sm_ctxt, node, assign);
989 break;
993 if (const gcond *cond = dyn_cast <const gcond *> (stmt))
995 /* Reset the state of "tainted-control-flow" before each
996 control flow statement, so that only the last one before
997 an assertion-failure-handler counts. */
998 sm_ctxt->set_global_state (m_start);
999 check_control_flow_arg_for_taint (sm_ctxt, cond, gimple_cond_lhs (cond));
1000 check_control_flow_arg_for_taint (sm_ctxt, cond, gimple_cond_rhs (cond));
1003 if (const gswitch *switch_ = dyn_cast <const gswitch *> (stmt))
1005 /* Reset the state of "tainted-control-flow" before each
1006 control flow statement, so that only the last one before
1007 an assertion-failure-handler counts. */
1008 sm_ctxt->set_global_state (m_start);
1009 check_control_flow_arg_for_taint (sm_ctxt, switch_,
1010 gimple_switch_index (switch_));
1013 return false;
1016 /* If EXPR is tainted, mark this execution path with the
1017 "tainted-control-flow" global state, in case we're about
1018 to call an assertion-failure-handler. */
1020 void
1021 taint_state_machine::check_control_flow_arg_for_taint (sm_context *sm_ctxt,
1022 const gimple *stmt,
1023 tree expr) const
1025 const region_model *old_model = sm_ctxt->get_old_region_model ();
1026 const svalue *sval = old_model->get_rvalue (expr, NULL);
1027 state_t state = sm_ctxt->get_state (stmt, sval);
1028 enum bounds b;
1029 if (get_taint (state, TREE_TYPE (expr), &b))
1030 sm_ctxt->set_global_state (m_tainted_control_flow);
1033 /* Implementation of state_machine::on_condition vfunc for taint_state_machine.
1034 Potentially transition state 'tainted' to 'has_ub' or 'has_lb',
1035 and states 'has_ub' and 'has_lb' to 'stop'. */
1037 void
1038 taint_state_machine::on_condition (sm_context *sm_ctxt,
1039 const supernode *node,
1040 const gimple *stmt,
1041 const svalue *lhs,
1042 enum tree_code op,
1043 const svalue *rhs) const
1045 if (stmt == NULL)
1046 return;
1048 if (lhs->get_kind () == SK_UNKNOWN
1049 || rhs->get_kind () == SK_UNKNOWN)
1051 /* If we have a comparison against UNKNOWN, then
1052 we've presumably hit the svalue complexity limit,
1053 and we don't know what is being sanitized.
1054 Give up on any taint already found on this execution path. */
1055 // TODO: warn about this
1056 if (get_logger ())
1057 get_logger ()->log ("comparison against UNKNOWN; removing all taint");
1058 sm_ctxt->clear_all_per_svalue_state ();
1059 return;
1062 // TODO
1063 switch (op)
1065 //case NE_EXPR:
1066 //case EQ_EXPR:
1067 case GE_EXPR:
1068 case GT_EXPR:
1070 /* (LHS >= RHS) or (LHS > RHS)
1071 LHS gains a lower bound
1072 RHS gains an upper bound. */
1073 sm_ctxt->on_transition (node, stmt, lhs, m_tainted,
1074 m_has_lb);
1075 sm_ctxt->on_transition (node, stmt, lhs, m_has_ub,
1076 m_stop);
1077 sm_ctxt->on_transition (node, stmt, rhs, m_tainted,
1078 m_has_ub);
1079 sm_ctxt->on_transition (node, stmt, rhs, m_has_lb,
1080 m_stop);
1082 break;
1083 case LE_EXPR:
1084 case LT_EXPR:
1086 /* Detect where build_range_check has optimized
1087 (c>=low) && (c<=high)
1088 into
1089 (c-low>=0) && (c-low<=high-low)
1090 and thus into:
1091 (unsigned)(c - low) <= (unsigned)(high-low). */
1092 if (const binop_svalue *binop_sval
1093 = lhs->dyn_cast_binop_svalue ())
1095 const svalue *inner_lhs = binop_sval->get_arg0 ();
1096 enum tree_code inner_op = binop_sval->get_op ();
1097 const svalue *inner_rhs = binop_sval->get_arg1 ();
1098 if (const svalue *before_cast = inner_lhs->maybe_undo_cast ())
1099 inner_lhs = before_cast;
1100 if (tree outer_rhs_cst = rhs->maybe_get_constant ())
1101 if (tree inner_rhs_cst = inner_rhs->maybe_get_constant ())
1102 if (inner_op == PLUS_EXPR
1103 && TREE_CODE (inner_rhs_cst) == INTEGER_CST
1104 && TREE_CODE (outer_rhs_cst) == INTEGER_CST
1105 && TYPE_UNSIGNED (TREE_TYPE (inner_rhs_cst))
1106 && TYPE_UNSIGNED (TREE_TYPE (outer_rhs_cst)))
1108 /* We have
1109 (unsigned)(INNER_LHS + CST_A) </<= UNSIGNED_CST_B
1110 and thus an optimized test of INNER_LHS (before any
1111 cast to unsigned) against a range.
1112 Transition any of the tainted states to the stop state.
1113 We have to special-case this here rather than in
1114 region_model::on_condition since we can't apply
1115 both conditions simultaneously (we'd have a transition
1116 from the old state to has_lb, then a transition from
1117 the old state *again* to has_ub). */
1118 state_t old_state
1119 = sm_ctxt->get_state (stmt, inner_lhs);
1120 if (old_state == m_tainted
1121 || old_state == m_has_lb
1122 || old_state == m_has_ub)
1123 sm_ctxt->set_next_state (stmt, inner_lhs, m_stop);
1124 return;
1128 /* (LHS <= RHS) or (LHS < RHS)
1129 LHS gains an upper bound
1130 RHS gains a lower bound. */
1131 sm_ctxt->on_transition (node, stmt, lhs, m_tainted,
1132 m_has_ub);
1133 sm_ctxt->on_transition (node, stmt, lhs, m_has_lb,
1134 m_stop);
1135 sm_ctxt->on_transition (node, stmt, rhs, m_tainted,
1136 m_has_lb);
1137 sm_ctxt->on_transition (node, stmt, rhs, m_has_ub,
1138 m_stop);
1140 break;
1141 default:
1142 break;
1146 /* Implementation of state_machine::on_bounded_ranges vfunc for
1147 taint_state_machine, for handling switch statement cases.
1148 Potentially transition state 'tainted' to 'has_ub' or 'has_lb',
1149 and states 'has_ub' and 'has_lb' to 'stop'. */
1151 void
1152 taint_state_machine::on_bounded_ranges (sm_context *sm_ctxt,
1153 const supernode *,
1154 const gimple *stmt,
1155 const svalue &sval,
1156 const bounded_ranges &ranges) const
1158 gcc_assert (!ranges.empty_p ());
1159 gcc_assert (ranges.get_count () > 0);
1161 /* We have one or more ranges; this could be a "default:", or one or
1162 more single or range cases.
1164 Look at the overall endpoints to see if the ranges impose any lower
1165 bounds or upper bounds beyond those of the underlying numeric type. */
1167 tree lowest_bound = ranges.get_range (0).m_lower;
1168 tree highest_bound = ranges.get_range (ranges.get_count () - 1).m_upper;
1169 gcc_assert (lowest_bound);
1170 gcc_assert (highest_bound);
1172 bool ranges_have_lb
1173 = (lowest_bound != TYPE_MIN_VALUE (TREE_TYPE (lowest_bound)));
1174 bool ranges_have_ub
1175 = (highest_bound != TYPE_MAX_VALUE (TREE_TYPE (highest_bound)));
1177 if (!ranges_have_lb && !ranges_have_ub)
1178 return;
1180 /* We have new bounds from the ranges; combine them with any
1181 existing bounds on SVAL. */
1182 state_t old_state = sm_ctxt->get_state (stmt, &sval);
1183 if (old_state == m_tainted)
1185 if (ranges_have_lb && ranges_have_ub)
1186 sm_ctxt->set_next_state (stmt, &sval, m_stop);
1187 else if (ranges_have_lb)
1188 sm_ctxt->set_next_state (stmt, &sval, m_has_lb);
1189 else if (ranges_have_ub)
1190 sm_ctxt->set_next_state (stmt, &sval, m_has_ub);
1192 else if (old_state == m_has_ub && ranges_have_lb)
1193 sm_ctxt->set_next_state (stmt, &sval, m_stop);
1194 else if (old_state == m_has_lb && ranges_have_ub)
1195 sm_ctxt->set_next_state (stmt, &sval, m_stop);
1198 bool
1199 taint_state_machine::can_purge_p (state_t s ATTRIBUTE_UNUSED) const
1201 return true;
1204 /* If STATE is a tainted state, write the bounds to *OUT and return true.
1205 Otherwise return false.
1206 Use the signedness of TYPE to determine if "has_ub" is tainted. */
1208 bool
1209 taint_state_machine::get_taint (state_t state, tree type,
1210 enum bounds *out) const
1212 /* Unsigned types have an implicit lower bound. */
1213 bool is_unsigned = false;
1214 if (type)
1215 if (INTEGRAL_TYPE_P (type))
1216 is_unsigned = TYPE_UNSIGNED (type);
1218 /* Can't use a switch as the states are non-const. */
1219 if (state == m_tainted)
1221 *out = is_unsigned ? BOUNDS_LOWER : BOUNDS_NONE;
1222 return true;
1224 else if (state == m_has_lb)
1226 *out = BOUNDS_LOWER;
1227 return true;
1229 else if (state == m_has_ub && !is_unsigned)
1231 /* Missing lower bound. */
1232 *out = BOUNDS_UPPER;
1233 return true;
1235 return false;
1238 /* Find the most tainted state of S0 and S1. */
1240 state_machine::state_t
1241 taint_state_machine::combine_states (state_t s0, state_t s1) const
1243 gcc_assert (s0);
1244 gcc_assert (s1);
1245 if (s0 == s1)
1246 return s0;
1247 if (s0 == m_tainted || s1 == m_tainted)
1248 return m_tainted;
1249 if (s0 == m_start)
1250 return s1;
1251 if (s1 == m_start)
1252 return s0;
1253 if (s0 == m_stop)
1254 return s1;
1255 if (s1 == m_stop)
1256 return s0;
1257 /* The only remaining combinations are one of has_ub and has_lb
1258 (in either order). */
1259 gcc_assert ((s0 == m_has_lb && s1 == m_has_ub)
1260 || (s0 == m_has_ub && s1 == m_has_lb));
1261 return m_tainted;
1264 /* Check for calls to external functions marked with
1265 __attribute__((access)) with a size-index: complain about
1266 tainted values passed as a size to such a function. */
1268 void
1269 taint_state_machine::check_for_tainted_size_arg (sm_context *sm_ctxt,
1270 const supernode *node,
1271 const gcall *call,
1272 tree callee_fndecl) const
1274 tree fntype = TREE_TYPE (callee_fndecl);
1275 if (!fntype)
1276 return;
1278 if (!TYPE_ATTRIBUTES (fntype))
1279 return;
1281 /* Initialize a map of attribute access specifications for arguments
1282 to the function call. */
1283 rdwr_map rdwr_idx;
1284 init_attr_rdwr_indices (&rdwr_idx, TYPE_ATTRIBUTES (fntype));
1286 unsigned argno = 0;
1288 for (tree iter = TYPE_ARG_TYPES (fntype); iter;
1289 iter = TREE_CHAIN (iter), ++argno)
1291 const attr_access* access = rdwr_idx.get (argno);
1292 if (!access)
1293 continue;
1295 /* Ignore any duplicate entry in the map for the size argument. */
1296 if (access->ptrarg != argno)
1297 continue;
1299 if (access->sizarg == UINT_MAX)
1300 continue;
1302 tree size_arg = gimple_call_arg (call, access->sizarg);
1304 state_t state = sm_ctxt->get_state (call, size_arg);
1305 enum bounds b;
1306 if (get_taint (state, TREE_TYPE (size_arg), &b))
1308 const char* const access_str =
1309 TREE_STRING_POINTER (access->to_external_string ());
1310 tree diag_size = sm_ctxt->get_diagnostic_tree (size_arg);
1311 sm_ctxt->warn (node, call, size_arg,
1312 make_unique<tainted_access_attrib_size>
1313 (*this, diag_size, b,
1314 callee_fndecl,
1315 access->sizarg,
1316 access_str));
1321 /* Complain if ASSIGN (a division operation) has a tainted divisor
1322 that could be zero. */
1324 void
1325 taint_state_machine::check_for_tainted_divisor (sm_context *sm_ctxt,
1326 const supernode *node,
1327 const gassign *assign) const
1329 const region_model *old_model = sm_ctxt->get_old_region_model ();
1330 if (!old_model)
1331 return;
1333 tree divisor_expr = gimple_assign_rhs2 (assign);;
1335 /* Until we track conditions on floating point values, we can't check to
1336 see if they've been checked against zero. */
1337 if (!INTEGRAL_TYPE_P (TREE_TYPE (divisor_expr)))
1338 return;
1340 const svalue *divisor_sval = old_model->get_rvalue (divisor_expr, NULL);
1342 state_t state = sm_ctxt->get_state (assign, divisor_sval);
1343 enum bounds b;
1344 if (get_taint (state, TREE_TYPE (divisor_expr), &b))
1346 const svalue *zero_sval
1347 = old_model->get_manager ()->get_or_create_int_cst
1348 (TREE_TYPE (divisor_expr), 0);
1349 tristate ts
1350 = old_model->eval_condition (divisor_sval, NE_EXPR, zero_sval);
1351 if (ts.is_true ())
1352 /* The divisor is known to not equal 0: don't warn. */
1353 return;
1355 tree diag_divisor = sm_ctxt->get_diagnostic_tree (divisor_expr);
1356 sm_ctxt->warn (node, assign, divisor_expr,
1357 make_unique <tainted_divisor> (*this, diag_divisor, b));
1358 sm_ctxt->set_next_state (assign, divisor_sval, m_stop);
1362 } // anonymous namespace
1364 /* Internal interface to this file. */
1366 state_machine *
1367 make_taint_state_machine (logger *logger)
1369 return new taint_state_machine (logger);
1372 /* Complain to CTXT if accessing REG leads could lead to arbitrary
1373 memory access under an attacker's control (due to taint). */
1375 void
1376 region_model::check_region_for_taint (const region *reg,
1377 enum access_direction,
1378 region_model_context *ctxt) const
1380 gcc_assert (reg);
1381 gcc_assert (ctxt);
1383 LOG_SCOPE (ctxt->get_logger ());
1385 sm_state_map *smap;
1386 const state_machine *sm;
1387 unsigned sm_idx;
1388 if (!ctxt->get_taint_map (&smap, &sm, &sm_idx))
1389 return;
1391 gcc_assert (smap);
1392 gcc_assert (sm);
1394 const taint_state_machine &taint_sm = (const taint_state_machine &)*sm;
1396 const extrinsic_state *ext_state = ctxt->get_ext_state ();
1397 if (!ext_state)
1398 return;
1400 const region *iter_region = reg;
1401 while (iter_region)
1403 switch (iter_region->get_kind ())
1405 default:
1406 break;
1408 case RK_ELEMENT:
1410 const element_region *element_reg
1411 = (const element_region *)iter_region;
1412 const svalue *index = element_reg->get_index ();
1413 const state_machine::state_t
1414 state = smap->get_state (index, *ext_state);
1415 gcc_assert (state);
1416 enum bounds b;
1417 if (taint_sm.get_taint (state, index->get_type (), &b))
1419 tree arg = get_representative_tree (index);
1420 ctxt->warn (make_unique<tainted_array_index> (taint_sm, arg, b));
1423 break;
1425 case RK_OFFSET:
1427 const offset_region *offset_reg
1428 = (const offset_region *)iter_region;
1429 const svalue *offset = offset_reg->get_byte_offset ();
1430 const state_machine::state_t
1431 state = smap->get_state (offset, *ext_state);
1432 gcc_assert (state);
1433 /* Handle implicit cast to sizetype. */
1434 tree effective_type = offset->get_type ();
1435 if (const svalue *cast = offset->maybe_undo_cast ())
1436 if (cast->get_type ())
1437 effective_type = cast->get_type ();
1438 enum bounds b;
1439 if (taint_sm.get_taint (state, effective_type, &b))
1441 tree arg = get_representative_tree (offset);
1442 ctxt->warn (make_unique<tainted_offset> (taint_sm, arg, b));
1445 break;
1447 case RK_CAST:
1449 const cast_region *cast_reg
1450 = as_a <const cast_region *> (iter_region);
1451 iter_region = cast_reg->get_original_region ();
1452 continue;
1455 case RK_SIZED:
1457 const sized_region *sized_reg
1458 = (const sized_region *)iter_region;
1459 const svalue *size_sval = sized_reg->get_byte_size_sval (m_mgr);
1460 const state_machine::state_t
1461 state = smap->get_state (size_sval, *ext_state);
1462 gcc_assert (state);
1463 enum bounds b;
1464 if (taint_sm.get_taint (state, size_sval->get_type (), &b))
1466 tree arg = get_representative_tree (size_sval);
1467 ctxt->warn (make_unique<tainted_size> (taint_sm, arg, b));
1470 break;
1473 iter_region = iter_region->get_parent_region ();
1477 /* Complain to CTXT about a tainted allocation size if SIZE_IN_BYTES is
1478 under an attacker's control (due to taint), where the allocation
1479 is happening within MEM_SPACE. */
1481 void
1482 region_model::check_dynamic_size_for_taint (enum memory_space mem_space,
1483 const svalue *size_in_bytes,
1484 region_model_context *ctxt) const
1486 gcc_assert (size_in_bytes);
1487 gcc_assert (ctxt);
1489 LOG_SCOPE (ctxt->get_logger ());
1491 sm_state_map *smap;
1492 const state_machine *sm;
1493 unsigned sm_idx;
1494 if (!ctxt->get_taint_map (&smap, &sm, &sm_idx))
1495 return;
1497 gcc_assert (smap);
1498 gcc_assert (sm);
1500 const taint_state_machine &taint_sm = (const taint_state_machine &)*sm;
1502 const extrinsic_state *ext_state = ctxt->get_ext_state ();
1503 if (!ext_state)
1504 return;
1506 const state_machine::state_t
1507 state = smap->get_state (size_in_bytes, *ext_state);
1508 gcc_assert (state);
1509 enum bounds b;
1510 if (taint_sm.get_taint (state, size_in_bytes->get_type (), &b))
1512 tree arg = get_representative_tree (size_in_bytes);
1513 ctxt->warn (make_unique<tainted_allocation_size>
1514 (taint_sm, arg, b, mem_space));
1518 /* Mark SVAL as TAINTED. CTXT must be non-NULL. */
1520 void
1521 region_model::mark_as_tainted (const svalue *sval,
1522 region_model_context *ctxt)
1524 gcc_assert (sval);
1525 gcc_assert (ctxt);
1527 sm_state_map *smap;
1528 const state_machine *sm;
1529 unsigned sm_idx;
1530 if (!ctxt->get_taint_map (&smap, &sm, &sm_idx))
1531 return;
1533 gcc_assert (smap);
1534 gcc_assert (sm);
1536 const taint_state_machine &taint_sm = (const taint_state_machine &)*sm;
1538 const extrinsic_state *ext_state = ctxt->get_ext_state ();
1539 if (!ext_state)
1540 return;
1542 smap->set_state (this, sval, taint_sm.m_tainted, NULL, *ext_state);
1545 /* Return true if SVAL could possibly be attacker-controlled. */
1547 bool
1548 region_model_context::possibly_tainted_p (const svalue *sval)
1550 sm_state_map *smap;
1551 const state_machine *sm;
1552 unsigned sm_idx;
1553 if (!get_taint_map (&smap, &sm, &sm_idx))
1554 return false;
1556 const taint_state_machine &taint_sm = (const taint_state_machine &)*sm;
1558 const extrinsic_state *ext_state = get_ext_state ();
1559 if (!ext_state)
1560 return false;
1562 const state_machine::state_t state = smap->get_state (sval, *ext_state);
1563 gcc_assert (state);
1565 return (state == taint_sm.m_tainted
1566 || state == taint_sm.m_has_lb
1567 || state == taint_sm.m_has_ub);
1570 } // namespace ana
1572 #endif /* #if ENABLE_ANALYZER */