function_hooks: introduce early add_function_hook_early()
[smatch.git] / check_host_input.c
blob80bcb5817c3ef42dd09da6621941357f3612c4f6
1 /*
2 * Smatch pattern to facilitate the hardening of the Linux guest kernel
3 * for Confidential Cloud Computing threat model.
4 * In this model the Linux guest kernel cannot trust the values
5 * it obtains using low level IO functions because they can be provided
6 * by a potentially malicious host or VMM. Instead it needs to make
7 * sure the code that handles processing of such values is hardened,
8 * free of memory safety issues and other potential security issues.
10 * This smatch pattern helps to indentify such places.
11 * Currently it covers most of MSR, portIO, MMIO, PCI config space
12 * and cpuid reading primitives.
13 * The full list of covered functions is stored in host_input_funcs array.
14 * The output of the pattern can be used to facilitate code audit, as
15 * well as to verify that utilized fuzzing strategy can reach all the
16 * code paths that can take a low-level input from a potentially malicious host.
18 * When ran, the pattern produces two types of findings: errors and warnings.
19 * This is done to help prioritizing the issues for the manual code audit.
20 * However, if time permits, all locations reported by the pattern should be checked.
22 * Written based on existing smatch patterns.
24 * Author: Elena Reshetova <elena.reshetova@intel.com>
25 * Copyright (c) 2022, Intel Corporation
27 * This program is free software; you can redistribute it and/or modify
28 * it under the terms of the GNU General Public License version 2, as
29 * published by the Free Software Foundation.
32 #include "smatch.h"
33 #include "smatch_slist.h"
34 #include "smatch_extra.h"
35 #include <math.h>
37 STATE(called_funcs);
38 static int my_id;
39 static const char* pattern_name = "check_host_input";
41 /* Obtain the line number where a current function
42 * starts. Used to calculate a relative offset for
43 * the pattern findings. */
44 static int get_func_start_lineno(char* func_name)
46 struct sm_state *sm;
48 if (!func_name)
49 return -1;
51 FOR_EACH_MY_SM(my_id, __get_cur_stree(), sm) {
52 if ( (sm->sym) && (strstr(func_name, sm->name) != NULL)
53 && (slist_has_state(sm->possible, &called_funcs)))
54 return sm->sym->pos.line;
55 } END_FOR_EACH_SM(sm);
56 return -1;
59 /* Calculate djb2 hash */
60 unsigned long djb2_hash(const char *str, int num)
62 unsigned long hash = 5381;
63 int c;
65 while ((c = *str++))
66 hash = ((hash << 5) + hash) + c; /* hash * 33 + c */
67 return ((hash << 5) + hash) + num;
70 /* Produce the djb2 hash from a given expression.
71 * Used in order to generate unique identifies for each
72 * reported issue. These identifiers are used then
73 * to automatically transfer previously seen results. */
74 unsigned long produce_expression_hash(struct expression *expr)
76 unsigned long hash = 0;
77 int line_offset = get_lineno() - get_func_start_lineno(get_function());
78 const char *str = expr_to_str(expr);
80 /* for non-parsable exressions and expressions
81 * contatining temp variables (like __UNIQUE_ID_*, $expr_), it is
82 * more stable to use a fix string for hasing together
83 * with line offset to avoid many results that do not
84 * automatically transfer between the audits on different
85 * versions */
87 if (str && !(strstr(str, "__UNIQUE_ID_")) && !(strstr(str, "$expr_")))
88 hash = djb2_hash(str, line_offset);
89 else
90 hash = djb2_hash("complex", line_offset);
91 return hash;
94 /* Helper utility to remove various operands
95 * to get a clean expression */
96 static struct expression* strip_pre_post_ops(struct expression *expr)
98 while (expr) {
99 if((expr->type == EXPR_PREOP) || (expr->type == EXPR_POSTOP)) {
100 expr = expr->unop;
101 } else if ((expr->type == EXPR_CAST) || (expr->type == EXPR_FORCE_CAST)
102 || (expr->type == EXPR_IMPLIED_CAST)) {
103 expr = expr->cast_expression;
104 } else {
105 // Done if we can't strip anything more
106 break;
109 return expr;
112 /* Helper to store the info on called functions.
113 * Used to calculate the line number in get_func_start_lineno() */
114 static void match_function_def(struct symbol *sym)
116 set_state(my_id, sym->ident->name, sym, &called_funcs);
119 /* Checks all return expressions for tainted values */
120 static void match_return(struct expression *ret_value)
122 unsigned long hash;
124 if (!ret_value)
125 return;
127 if (is_host_rl(ret_value)) {
128 hash = produce_expression_hash(ret_value);
129 sm_warning("{%lu}\n\t'%s' return an expression containing a propagated value from the host '%s';",
130 hash, pattern_name, expr_to_str(ret_value));
135 /* Checks all STMT_ITERATOR/IF/SWITCH expressions for tainted values */
136 static void match_statement(struct statement *stmt)
138 unsigned long hash;
139 struct expression *expr = NULL;
141 if (!stmt)
142 return;
144 if (stmt->type == STMT_ITERATOR) {
145 if ((stmt->iterator_pre_statement) && (stmt->iterator_pre_statement->type == STMT_EXPRESSION)
146 && (stmt->iterator_pre_statement->expression)
147 && (is_host_rl(stmt->iterator_pre_statement->expression)))
148 expr = stmt->iterator_pre_statement->expression;
150 if ((stmt->iterator_post_statement) && (stmt->iterator_post_statement->type == STMT_EXPRESSION)
151 && (stmt->iterator_post_statement->expression)
152 && (is_host_rl(stmt->iterator_post_statement->expression)))
153 expr = stmt->iterator_post_statement->expression;
155 if ((stmt->iterator_pre_condition) && (is_host_rl(stmt->iterator_pre_condition)))
156 expr = stmt->iterator_pre_condition;
158 if ((stmt->iterator_post_condition) && (is_host_rl(stmt->iterator_post_condition)))
159 expr = stmt->iterator_post_condition;
161 /* The above logic only stores the latest tainted expr.
162 * This is ok since one warning per line is enough */
163 if (expr) {
164 hash = produce_expression_hash(expr);
165 sm_error("{%lu}\n\t'%s' an expression containing a propagated value from the host '%s' used in iterator;",
166 hash, pattern_name, expr_to_str(expr));
167 return;
169 } else if (stmt->type == STMT_IF) {
170 expr = stmt->if_conditional;
171 } else if (stmt->type == STMT_SWITCH) {
172 expr = stmt->switch_expression;
173 } else if (stmt->type == STMT_RETURN){
174 return; /* returns are handled by match_return */
177 if (!expr)
178 return;
180 hash = produce_expression_hash(expr);
181 if (is_host_rl(expr)){
182 sm_warning("{%lu}\n\t'%s' an expression containing a propagated value from the host '%s' used in if/switch statement;",
183 hash, pattern_name, expr_to_str(expr));
184 return;
188 /* Helper to rule out the temp expressions */
189 bool is_tmp_expression(struct expression *expr)
191 if (expr_to_str(expr))
192 if ((strncmp(expr_to_str(expr), "__fake_", 7) == 0) ||
193 (strncmp(expr_to_str(expr), "__UNIQUE_ID", 11) == 0) ||
194 (strncmp(expr_to_str(expr), "$expr_", 6) == 0))
195 return true;
196 return false;
199 /* Checks assigment expressions */
200 static void match_assign(struct expression *expr)
202 struct expression *current = expr;
203 struct expression *left = NULL;
204 unsigned long hash = 0;
206 if (!current)
207 return;
209 if (is_fake_var_assign(current))
210 return;
212 if (__in_fake_parameter_assign)
213 return;
215 if (current->type != EXPR_ASSIGNMENT) {
216 sm_error("'%s' Strange EXPR in assigment;", pattern_name);
217 return;
220 hash = produce_expression_hash(expr);
221 left = current->left;
222 left = strip_pre_post_ops(left);
223 current = strip_expr(current->right);
225 if (is_tmp_expression(current) || is_tmp_expression(left))
226 return;
228 if (current->type == EXPR_CALL) {
229 int param = get_host_data_fn_param(expr_to_str(current->fn));
230 if (param == -1) {
231 sm_warning("{%lu}\n\t'%s' read from the host using function '%s' into a variable '%s';",
232 hash, pattern_name, expr_to_str(current->fn), expr_to_str(left));
234 /* rest of the cases are handled in match_after_call */
235 return;
238 if (!is_host_rl(current))
239 return;
241 sm_warning("{%lu}\n\t'%s' propagating read value from the host '%s' into a different variable '%s';",
242 hash, pattern_name, expr_to_str(current), expr_to_str(left));
243 return;
246 /* Checks function calls */
247 static void match_after_call(struct expression *expr)
249 struct expression *arg;
250 unsigned long hash;
251 const char *message, *function_name;
252 int param = get_host_data_fn_param(expr_to_str(expr->fn));
254 if ((!expr) || (!expr->fn))
255 return;
257 if (parse_error)
258 return;
260 if (is_impossible_path())
261 return;
263 if (!expr->fn->symbol_name)
264 function_name = expr_to_str(expr);
265 else
266 function_name = expr->fn->symbol_name->name;
268 hash = produce_expression_hash(expr);
270 FOR_EACH_PTR(expr->args, arg) {
271 if (!is_host_rl(arg) && !points_to_host_data(arg))
272 continue;
274 /* the case when param = -1 is handled in match_assign */
275 if (param > 0)
276 sm_warning("{%lu}\n\t'%s' read from the host using function '%s' into a non-local variable '%s';",
277 hash, pattern_name, expr_to_str(expr->fn), expr_to_str(arg));
278 else {
279 if (arg->type == EXPR_BINOP)
280 message = "{%lu}\n\t'%s' an expression containing a tainted value from the host '%s' used in function '%s';";
281 else
282 message = "{%lu}\n\t'%s' a tainted value from the host '%s' used in function '%s';";
283 sm_warning(message, hash, pattern_name, expr_to_str(arg), function_name);
285 } END_FOR_EACH_PTR(arg);
288 /* Checks if the array offset has
289 * been influenced by a value supplied by host */
290 static void array_offset_check(struct expression *expr)
292 struct expression *offset;
294 expr = strip_expr(expr);
295 if (!is_array(expr))
296 return;
298 if (is_impossible_path())
299 return;
301 offset = get_array_offset(expr);
302 if (!is_host_rl(offset))
303 return;
305 sm_error("'%s' a tainted value from the host '%s' used as array offset in expression '%s';",
306 pattern_name, expr_to_str(offset), expr_to_str(expr));
307 return;
310 void check_host_input(int id)
312 my_id = id;
313 add_hook(&match_assign, ASSIGNMENT_HOOK);
314 add_hook(&match_return, RETURN_HOOK);
315 add_hook(&match_statement, STMT_HOOK);
316 add_hook(&match_function_def, AFTER_DEF_HOOK);
317 add_hook(&match_after_call, FUNCTION_CALL_HOOK_AFTER_DB);
318 add_hook(&array_offset_check, OP_HOOK);