2 * Smatch pattern to facilitate the hardening of the Linux guest kernel
3 * for Confidential Cloud Computing threat model.
4 * In this model the Linux guest kernel cannot trust the values
5 * it obtains using low level IO functions because they can be provided
6 * by a potentially malicious host or VMM. Instead it needs to make
7 * sure the code that handles processing of such values is hardened,
8 * free of memory safety issues and other potential security issues.
10 * This smatch pattern helps to indentify such places.
11 * Currently it covers most of MSR, portIO, MMIO, PCI config space
12 * and cpuid reading primitives.
13 * The full list of covered functions is stored in host_input_funcs array.
14 * The output of the pattern can be used to facilitate code audit, as
15 * well as to verify that utilized fuzzing strategy can reach all the
16 * code paths that can take a low-level input from a potentially malicious host.
18 * When ran, the pattern produces two types of findings: errors and warnings.
19 * This is done to help prioritizing the issues for the manual code audit.
20 * However, if time permits, all locations reported by the pattern should be checked.
22 * Written based on existing smatch patterns.
24 * Author: Elena Reshetova <elena.reshetova@intel.com>
25 * Copyright (c) 2022, Intel Corporation
27 * This program is free software; you can redistribute it and/or modify
28 * it under the terms of the GNU General Public License version 2, as
29 * published by the Free Software Foundation.
33 #include "smatch_slist.h"
34 #include "smatch_extra.h"
39 static const char* pattern_name
= "check_host_input";
41 /* Obtain the line number where a current function
42 * starts. Used to calculate a relative offset for
43 * the pattern findings. */
44 static int get_func_start_lineno(char* func_name
)
51 FOR_EACH_MY_SM(my_id
, __get_cur_stree(), sm
) {
52 if ( (sm
->sym
) && (strstr(func_name
, sm
->name
) != NULL
)
53 && (slist_has_state(sm
->possible
, &called_funcs
)))
54 return sm
->sym
->pos
.line
;
55 } END_FOR_EACH_SM(sm
);
59 /* Calculate djb2 hash */
60 unsigned long djb2_hash(const char *str
, int num
)
62 unsigned long hash
= 5381;
66 hash
= ((hash
<< 5) + hash
) + c
; /* hash * 33 + c */
67 return ((hash
<< 5) + hash
) + num
;
70 /* Produce the djb2 hash from a given expression.
71 * Used in order to generate unique identifies for each
72 * reported issue. These identifiers are used then
73 * to automatically transfer previously seen results. */
74 unsigned long produce_expression_hash(struct expression
*expr
)
76 unsigned long hash
= 0;
77 int line_offset
= get_lineno() - get_func_start_lineno(get_function());
78 const char *str
= expr_to_str(expr
);
80 /* for non-parsable exressions and expressions
81 * contatining temp variables (like __UNIQUE_ID_*, $expr_), it is
82 * more stable to use a fix string for hasing together
83 * with line offset to avoid many results that do not
84 * automatically transfer between the audits on different
87 if (str
&& !(strstr(str
, "__UNIQUE_ID_")) && !(strstr(str
, "$expr_")))
88 hash
= djb2_hash(str
, line_offset
);
90 hash
= djb2_hash("complex", line_offset
);
94 /* Helper utility to remove various operands
95 * to get a clean expression */
96 static struct expression
* strip_pre_post_ops(struct expression
*expr
)
99 if((expr
->type
== EXPR_PREOP
) || (expr
->type
== EXPR_POSTOP
)) {
101 } else if ((expr
->type
== EXPR_CAST
) || (expr
->type
== EXPR_FORCE_CAST
)
102 || (expr
->type
== EXPR_IMPLIED_CAST
)) {
103 expr
= expr
->cast_expression
;
105 // Done if we can't strip anything more
112 /* Helper to store the info on called functions.
113 * Used to calculate the line number in get_func_start_lineno() */
114 static void match_function_def(struct symbol
*sym
)
116 set_state(my_id
, sym
->ident
->name
, sym
, &called_funcs
);
119 /* Checks all return expressions for tainted values */
120 static void match_return(struct expression
*ret_value
)
127 if (is_host_rl(ret_value
)) {
128 hash
= produce_expression_hash(ret_value
);
129 sm_warning("{%lu}\n\t'%s' return an expression containing a propagated value from the host '%s';",
130 hash
, pattern_name
, expr_to_str(ret_value
));
135 /* Checks all STMT_ITERATOR/IF/SWITCH expressions for tainted values */
136 static void match_statement(struct statement
*stmt
)
139 struct expression
*expr
= NULL
;
144 if (stmt
->type
== STMT_ITERATOR
) {
145 if ((stmt
->iterator_pre_statement
) && (stmt
->iterator_pre_statement
->type
== STMT_EXPRESSION
)
146 && (stmt
->iterator_pre_statement
->expression
)
147 && (is_host_rl(stmt
->iterator_pre_statement
->expression
)))
148 expr
= stmt
->iterator_pre_statement
->expression
;
150 if ((stmt
->iterator_post_statement
) && (stmt
->iterator_post_statement
->type
== STMT_EXPRESSION
)
151 && (stmt
->iterator_post_statement
->expression
)
152 && (is_host_rl(stmt
->iterator_post_statement
->expression
)))
153 expr
= stmt
->iterator_post_statement
->expression
;
155 if ((stmt
->iterator_pre_condition
) && (is_host_rl(stmt
->iterator_pre_condition
)))
156 expr
= stmt
->iterator_pre_condition
;
158 if ((stmt
->iterator_post_condition
) && (is_host_rl(stmt
->iterator_post_condition
)))
159 expr
= stmt
->iterator_post_condition
;
161 /* The above logic only stores the latest tainted expr.
162 * This is ok since one warning per line is enough */
164 hash
= produce_expression_hash(expr
);
165 sm_error("{%lu}\n\t'%s' an expression containing a propagated value from the host '%s' used in iterator;",
166 hash
, pattern_name
, expr_to_str(expr
));
169 } else if (stmt
->type
== STMT_IF
) {
170 expr
= stmt
->if_conditional
;
171 } else if (stmt
->type
== STMT_SWITCH
) {
172 expr
= stmt
->switch_expression
;
173 } else if (stmt
->type
== STMT_RETURN
){
174 return; /* returns are handled by match_return */
180 hash
= produce_expression_hash(expr
);
181 if (is_host_rl(expr
)){
182 sm_warning("{%lu}\n\t'%s' an expression containing a propagated value from the host '%s' used in if/switch statement;",
183 hash
, pattern_name
, expr_to_str(expr
));
188 /* Helper to rule out the temp expressions */
189 bool is_tmp_expression(struct expression
*expr
)
191 if (expr_to_str(expr
))
192 if ((strncmp(expr_to_str(expr
), "__fake_", 7) == 0) ||
193 (strncmp(expr_to_str(expr
), "__UNIQUE_ID", 11) == 0) ||
194 (strncmp(expr_to_str(expr
), "$expr_", 6) == 0))
199 /* Checks assigment expressions */
200 static void match_assign(struct expression
*expr
)
202 struct expression
*current
= expr
;
203 struct expression
*left
= NULL
;
204 unsigned long hash
= 0;
209 if (is_fake_var_assign(current
))
212 if (__in_fake_parameter_assign
)
215 if (current
->type
!= EXPR_ASSIGNMENT
) {
216 sm_error("'%s' Strange EXPR in assigment;", pattern_name
);
220 hash
= produce_expression_hash(expr
);
221 left
= current
->left
;
222 left
= strip_pre_post_ops(left
);
223 current
= strip_expr(current
->right
);
225 if (is_tmp_expression(current
) || is_tmp_expression(left
))
228 if (current
->type
== EXPR_CALL
) {
229 int param
= get_host_data_fn_param(expr_to_str(current
->fn
));
231 sm_warning("{%lu}\n\t'%s' read from the host using function '%s' into a variable '%s';",
232 hash
, pattern_name
, expr_to_str(current
->fn
), expr_to_str(left
));
234 /* rest of the cases are handled in match_after_call */
238 if (!is_host_rl(current
))
241 sm_warning("{%lu}\n\t'%s' propagating read value from the host '%s' into a different variable '%s';",
242 hash
, pattern_name
, expr_to_str(current
), expr_to_str(left
));
246 /* Checks function calls */
247 static void match_after_call(struct expression
*expr
)
249 struct expression
*arg
;
251 const char *message
, *function_name
;
252 int param
= get_host_data_fn_param(expr_to_str(expr
->fn
));
254 if ((!expr
) || (!expr
->fn
))
260 if (is_impossible_path())
263 if (!expr
->fn
->symbol_name
)
264 function_name
= expr_to_str(expr
);
266 function_name
= expr
->fn
->symbol_name
->name
;
268 hash
= produce_expression_hash(expr
);
270 FOR_EACH_PTR(expr
->args
, arg
) {
271 if (!is_host_rl(arg
) && !points_to_host_data(arg
))
274 /* the case when param = -1 is handled in match_assign */
276 sm_warning("{%lu}\n\t'%s' read from the host using function '%s' into a non-local variable '%s';",
277 hash
, pattern_name
, expr_to_str(expr
->fn
), expr_to_str(arg
));
279 if (arg
->type
== EXPR_BINOP
)
280 message
= "{%lu}\n\t'%s' an expression containing a tainted value from the host '%s' used in function '%s';";
282 message
= "{%lu}\n\t'%s' a tainted value from the host '%s' used in function '%s';";
283 sm_warning(message
, hash
, pattern_name
, expr_to_str(arg
), function_name
);
285 } END_FOR_EACH_PTR(arg
);
288 /* Checks if the array offset has
289 * been influenced by a value supplied by host */
290 static void array_offset_check(struct expression
*expr
)
292 struct expression
*offset
;
294 expr
= strip_expr(expr
);
298 if (is_impossible_path())
301 offset
= get_array_offset(expr
);
302 if (!is_host_rl(offset
))
305 sm_error("'%s' a tainted value from the host '%s' used as array offset in expression '%s';",
306 pattern_name
, expr_to_str(offset
), expr_to_str(expr
));
310 void check_host_input(int id
)
313 add_hook(&match_assign
, ASSIGNMENT_HOOK
);
314 add_hook(&match_return
, RETURN_HOOK
);
315 add_hook(&match_statement
, STMT_HOOK
);
316 add_hook(&match_function_def
, AFTER_DEF_HOOK
);
317 add_hook(&match_after_call
, FUNCTION_CALL_HOOK_AFTER_DB
);
318 add_hook(&array_offset_check
, OP_HOOK
);