From e16063ef865513039265ea70711c0d848a43605d Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 24 Sep 2012 15:35:02 +0300 Subject: [PATCH] db, buf_size, parse_call_math: track the size of returned buffers With this change, we try to be a bit clever about how we track the size of returned buffers. There are lots of functions like alloc_foo() where you pass the number of foos you want and it's allocation nr_foo * sizeof(foo) and returns a buffer of that size. This patch tries to calculate that. First we record in the database when how we allocate the buffer so if we take 24 bytes * parameter number 2 then we store that in the database. BUF_SIZE = "24 * <2>" Then on the second pass if we can calculate the size we use that. I had to fix some bugs in smatch_function_hooks.c to make this work and those fixes got merged int this patch. Originally the return_implies hooks and the return_states hooks were mixed together, but now I've separated them so they are called separately. Also there was a problem because the return_states hooks and the implied_states hooks conflict. They both try to set SMATCH_EXTRA. In the original code, we avoided this by only calling the implied_states hooks because the implied_states are more important. In this we call them all but if there are implied_states they over write the return_states. Signed-off-by: Dan Carpenter --- Makefile | 2 +- check_list.h | 1 + check_overflow.c | 25 ++ smatch.h | 1 + smatch_buf_size.c | 15 + smatch_function_hooks.c | 33 +- smatch_parse_call_math.c | 507 +++++++++++++++++++++++++++++ smatch_scripts/db/fill_db_return_states.pl | 7 + 8 files changed, 582 insertions(+), 9 deletions(-) create mode 100644 smatch_parse_call_math.c diff --git a/Makefile b/Makefile index 426ec9e4..cea7ac3f 100644 --- a/Makefile +++ b/Makefile @@ -48,7 +48,7 @@ SMATCH_FILES=smatch_flow.o smatch_conditions.o smatch_slist.o smatch_states.o \ smatch_ranges.o smatch_implied.o smatch_ignore.o smatch_project.o \ smatch_tracker.o smatch_files.o smatch_expression_stacks.o \ smatch_constraints.o smatch_buf_size.o smatch_capped.o smatch_db.o \ - smatch_expressions.o smatch_returns.o + smatch_expressions.o smatch_returns.o smatch_parse_call_math.o SMATCH_CHECKS=$(shell ls check_*.c | sed -e 's/\.c/.o/') SMATCH_DATA=smatch_data/kernel.allocation_funcs smatch_data/kernel.balanced_funcs \ diff --git a/check_list.h b/check_list.h index f44ab5ef..a4c891a0 100644 --- a/check_list.h +++ b/check_list.h @@ -12,6 +12,7 @@ CK(register_smatch_ignore) CK(register_buf_size) CK(register_strlen) CK(register_capped) +CK(register_parse_call_math) CK(check_debug) CK(check_assigned_expr) diff --git a/check_overflow.c b/check_overflow.c index 77bfdc9d..50f4e88e 100644 --- a/check_overflow.c +++ b/check_overflow.c @@ -302,6 +302,30 @@ static void match_limited(const char *fn, struct expression *expr, void *_limite free_string(dest_name); } +static void db_returns_buf_size(struct expression *expr, int param, char *unused, char *math) +{ + struct expression *call; + struct symbol *type; + int bytes; + long long val; + + if (expr->type != EXPR_ASSIGNMENT) + return; + call = strip_expr(expr->right); + type = get_pointer_type(expr->left); + + if (!parse_call_math(call, math, &val) || val == 0) + return; + if (!type) + return; + bytes = bits_to_bytes(type->bit_size); + if (!bytes) + return; + if (val >= bytes) + return; + sm_msg("error: not allocating enough data %d vs %lld", bytes, val); +} + static void register_funcs_from_file(void) { char name[256]; @@ -356,6 +380,7 @@ void check_overflow(int id) add_function_hook("sprintf", &match_sprintf, NULL); add_function_hook("memcmp", &match_limited, &b0_l2); add_function_hook("memcmp", &match_limited, &b1_l2); + add_db_return_states_callback(BUF_SIZE, &db_returns_buf_size); add_modification_hook(my_used_id, &delete); if (option_project == PROJ_KERNEL) { add_function_hook("copy_to_user", &match_limited, &b0_l2); diff --git a/smatch.h b/smatch.h index 502def57..29ebf3d5 100644 --- a/smatch.h +++ b/smatch.h @@ -123,6 +123,7 @@ void add_macro_assign_hook_extra(const char *look_for, func_hook *call_back, void return_implies_state(const char *look_for, long long start, long long end, implication_hook *call_back, void *info); void add_db_return_implies_callback(int type, return_implies_hook *callback); +void add_db_return_states_callback(int type, return_implies_hook *callback); int get_implied_return(struct expression *expr, struct range_list **rl); typedef void (modification_hook)(struct sm_state *sm); diff --git a/smatch_buf_size.c b/smatch_buf_size.c index ee03fdce..ac4d2c86 100644 --- a/smatch_buf_size.c +++ b/smatch_buf_size.c @@ -173,6 +173,20 @@ static int size_from_db(struct expression *expr) return db_size; } +static void db_returns_buf_size(struct expression *expr, int param, char *unused, char *math) +{ + struct expression *call; + long long val; + + if (expr->type != EXPR_ASSIGNMENT) + return; + call = strip_expr(expr->right); + + if (!parse_call_math(call, math, &val)) + return; + set_state_expr(my_size_id, expr->left, alloc_state_num(val)); +} + static int get_real_array_size(struct expression *expr) { struct symbol *type; @@ -623,6 +637,7 @@ void register_buf_size(int id) my_size_id = id; add_definition_db_callback(set_param_buf_size, BUF_SIZE); + add_db_return_states_callback(BUF_SIZE, &db_returns_buf_size); add_function_assign_hook("malloc", &match_alloc, INT_PTR(0)); add_function_assign_hook("calloc", &match_calloc, NULL); diff --git a/smatch_function_hooks.c b/smatch_function_hooks.c index cdc7ca21..ddc42c4c 100644 --- a/smatch_function_hooks.c +++ b/smatch_function_hooks.c @@ -56,6 +56,7 @@ struct return_implies_callback { ALLOCATOR(return_implies_callback, "return_implies callbacks"); DECLARE_PTR_LIST(db_implies_list, struct return_implies_callback); static struct db_implies_list *db_implies_list; +static struct db_implies_list *db_return_states_list; static struct fcall_back *alloc_fcall_back(int type, void *call_back, void *info) @@ -133,6 +134,15 @@ void add_db_return_implies_callback(int type, return_implies_hook *callback) add_ptr_list(&db_implies_list, cb); } +void add_db_return_states_callback(int type, return_implies_hook *callback) +{ + struct return_implies_callback *cb = __alloc_return_implies_callback(0); + + cb->type = type; + cb->callback = callback; + add_ptr_list(&db_return_states_list, cb); +} + static int call_call_backs(struct call_back_list *list, int type, const char *fn, struct expression *expr) { @@ -291,6 +301,7 @@ struct db_callback_info { struct range_list *rl; int left; struct state_list *slist; + struct db_implies_list *callbacks; }; static struct db_callback_info db_info; static int db_compare_callback(void *unused, int argc, char **argv, char **azColName) @@ -321,7 +332,7 @@ static int db_compare_callback(void *unused, int argc, char **argv, char **azCol return 0; } - FOR_EACH_PTR(db_implies_list, tmp) { + FOR_EACH_PTR(db_info.callbacks, tmp) { if (tmp->type == type) tmp->callback(db_info.expr, param, key, value); } END_FOR_EACH_PTR(tmp); @@ -356,6 +367,7 @@ void compare_db_implies_callbacks(int comparison, struct expression *expr, long db_info.expr = expr; db_info.rl = alloc_range_list(value, value); db_info.left = left; + db_info.callbacks = db_implies_list; db_info.true_side = 1; __push_fake_cur_slist(); @@ -410,6 +422,7 @@ void compare_db_return_states_callbacks(int comparison, struct expression *expr, db_info.expr = expr; db_info.rl = alloc_range_list(value, value); db_info.left = left; + db_info.callbacks = db_return_states_list; db_info.true_side = 1; __push_fake_cur_slist(); @@ -543,7 +556,7 @@ static int db_assign_return_states_callback(void *unused, int argc, char **argv, } prev_return_id = return_id; - FOR_EACH_PTR(db_implies_list, tmp) { + FOR_EACH_PTR(db_return_states_list, tmp) { if (tmp->type == type) tmp->callback(db_info.expr, param, key, value); } END_FOR_EACH_PTR(tmp); @@ -619,13 +632,17 @@ static void match_assign_call(struct expression *expr) fn = right->fn->symbol->ident->name; call_backs = search_callback(func_hash, (char *)fn); + /* + * some of these conflict (they try to set smatch extra twice), so we + * call them in order from least important to most important. + */ + call_call_backs(call_backs, ASSIGN_CALL, fn, expr); - handled |= handle_implied_return(expr); - handled |= assign_ranged_funcs(fn, expr, call_backs); - if (handled) - return; - handled |= db_return_implies_assign(expr); handled |= db_return_states_assign(expr); + handled |= db_return_implies_assign(expr); + handled |= assign_ranged_funcs(fn, expr, call_backs); + handled |= handle_implied_return(expr); + if (!handled) set_extra_expr_mod(expr->left, extra_undefined()); } @@ -656,7 +673,7 @@ static int db_return_states_callback(void *unused, int argc, char **argv, char * } prev_return_id = return_id; - FOR_EACH_PTR(db_implies_list, tmp) { + FOR_EACH_PTR(db_return_states_list, tmp) { if (tmp->type == type) tmp->callback(db_info.expr, param, key, value); } END_FOR_EACH_PTR(tmp); diff --git a/smatch_parse_call_math.c b/smatch_parse_call_math.c new file mode 100644 index 00000000..6328f78c --- /dev/null +++ b/smatch_parse_call_math.c @@ -0,0 +1,507 @@ +/* + * smatch/smatch_parse_call_math.c + * + * Copyright (C) 2012 Oracle. + * + * Licensed under the Open Software License version 1.1 + * + */ + +#include "smatch.h" +#include "smatch_slist.h" +#include "smatch_extra.h" + +static int my_id; + +struct { + const char *func; + int param; +} alloc_functions[] = { + {"kmalloc", 0}, + {"__kmalloc", 0}, + {"vmalloc", 0}, + {"__vmalloc", 0}, + {"__vmalloc_node", 0}, +}; + +DECLARE_PTR_LIST(llong_list, long long); + +static struct llong_list *num_list; +static struct string_list *op_list; + +static void push_val(long long val) +{ + long long *p; + + p = malloc(sizeof(*p)); + *p = val; + add_ptr_list(&num_list, p); +} + +static long long pop_val() +{ + long long *p; + long long val; + + if (!num_list) + return 0; + p = last_ptr_list((struct ptr_list *)num_list); + delete_ptr_list_last((struct ptr_list **)&num_list); + val = *p; + free(p); + + return val; +} + +static void push_op(char c) +{ + char *p; + + p = malloc(1); + p[0] = c; + add_ptr_list(&op_list, p); +} + +static char pop_op() +{ + char *p; + char c; + + if (!op_list) { + sm_msg("internal smatch error %s", __func__); + return '\0'; + } + + p = last_ptr_list((struct ptr_list *)op_list); + + delete_ptr_list_last((struct ptr_list **)&op_list); + c = p[0]; + free(p); + + return c; +} + +static int op_precedence(char c) +{ + switch (c) { + case '+': + case '-': + return 1; + case '*': + case '/': + return 2; + default: + return 0; + } +} + +static int top_op_precedence() +{ + char *p; + + if (!op_list) + return 0; + + p = last_ptr_list((struct ptr_list *)op_list); + return op_precedence(p[0]); +} + +static long long do_op(long long left, char op, long long right) +{ + switch (op) { + case '+': + return left + right; + case '-': + return left - right; + case '*': + return left * right; + case '/': + if (right == 0) + return 0; + return left / right; + } + return 0; +} + +static void pop_until(char c) +{ + char op; + long long left, right, res; + + while (top_op_precedence() && op_precedence(c) <= top_op_precedence()) { + op = pop_op(); + right = pop_val(); + left = pop_val(); + res = do_op(left, op, right); + push_val(res); + } +} + +static int get_implied_param(struct expression *call, int param, long long *val) +{ + struct expression *arg; + + arg = get_argument_from_call_expr(call->args, param); + return get_implied_value(arg, val); +} + +static int read_number(struct expression *call, char *p, char **end, long long *val) +{ + long param; + + while (*p == ' ') + p++; + + if (*p == '<') { + p++; + param = strtol(p, &p, 10); + if (!get_implied_param(call, param, val)) + return 0; + *end = p + 1; + } else { + *val = strtoll(p, end, 10); + if (*end == p) + return 0; + } + return 1; +} + +static char *read_op(char *p) +{ + while (*p == ' ') + p++; + + switch (*p) { + case '+': + case '-': + case '*': + case '/': + return p; + default: + return NULL; + } +} + +int parse_call_math(struct expression *call, char *math, long long *val) +{ + long long tmp; + char *c; + + /* try to implement shunting yard algorithm. */ + + c = (char *)math; + while (1) { + if (option_debug) + sm_msg("parsing %s", c); + + /* read a number and push it onto the number stack */ + if (!read_number(call, c, &c, &tmp)) + goto fail; + push_val(tmp); + + if (option_debug) + sm_msg("val = %lld remaining = %s", tmp, c); + + if (!*c) + break; + + c = read_op(c); + if (!c) + goto fail; + + if (option_debug) + sm_msg("op = %c remaining = %s", *c, c); + + pop_until(*c); + push_op(*c); + c++; + } + + pop_until(0); + *val = pop_val(); + return 1; +fail: + pop_until(0); /* discard stack */ + return 0; +} + +static struct smatch_state *alloc_state_sname(char *sname) +{ + struct smatch_state *state; + + state = __alloc_smatch_state(0); + state->name = sname; + state->data = INT_PTR(1); + return state; +} + +static int get_arg_number(struct expression *expr) +{ + struct symbol *sym; + struct symbol *arg; + int i; + + expr = strip_expr(expr); + if (expr->type != EXPR_SYMBOL) + return -1; + sym = expr->symbol; + + i = 0; + FOR_EACH_PTR(cur_func_sym->ctype.base_type->arguments, arg) { + if (arg == sym) + return i; + i++; + } END_FOR_EACH_PTR(arg); + + return -1; +} + +static int format_expr_helper(char *buf, int remaining, struct expression *expr) +{ + long long val; + int ret; + char *cur; + + cur = buf; + + if (expr->type == EXPR_BINOP) { + ret = format_expr_helper(cur, remaining, expr->left); + if (ret == 0) + return 0; + remaining -= ret; + if (remaining <= 0) + return 0; + cur += ret; + + ret = snprintf(cur, remaining, " %s ", show_special(expr->op)); + remaining -= ret; + if (remaining <= 0) + return 0; + cur += ret; + + ret = format_expr_helper(cur, remaining, expr->right); + if (ret == 0) + return 0; + remaining -= ret; + if (remaining <= 0) + return 0; + cur += ret; + return cur - buf; + } + + val = get_arg_number(expr); + if (val >= 0) { + ret = snprintf(cur, remaining, "<%lld>", val); + remaining -= ret; + if (remaining <= 0) + return 0; + return ret; + } + + if (get_implied_value(expr, &val)) { + ret = snprintf(cur, remaining, "%lld", val); + remaining -= ret; + if (remaining <= 0) + return 0; + return ret; + } + + return 0; +} + +static char *format_expr(struct expression *expr) +{ + char buf[256]; + int ret; + + ret = format_expr_helper(buf, sizeof(buf), expr); + if (ret == 0) + return NULL; + + return alloc_sname(buf); +} + +static void match_alloc(const char *fn, struct expression *expr, void *_size_arg) +{ + int size_arg = PTR_INT(_size_arg); + struct expression *right; + struct expression *size_expr; + char *sname; + + right = strip_expr(expr->right); + size_expr = get_argument_from_call_expr(right->args, size_arg); + + sname = format_expr(size_expr); + if (!sname) + return; + set_state_expr(my_id, expr->left, alloc_state_sname(sname)); +} + +static char *swap_format(struct expression *call, char *format) +{ + static char buf[256]; + long long val; + long param; + struct expression *arg; + char *p; + char *out; + int ret; + + if (format[0] == '<' && format[2] == '>' && format[3] == '\0') { + param = strtol(format + 1, NULL, 10); + arg = get_argument_from_call_expr(call->args, param); + if (!arg) + return NULL; + return format_expr(arg); + } + + buf[0] = '\0'; + p = format; + out = buf; + while (*p) { + if (*p == '<') { + p++; + param = strtol(p, &p, 10); + if (*p != '>') + return NULL; + p++; + arg = get_argument_from_call_expr(call->args, param); + if (!arg) + return NULL; + param = get_arg_number(arg); + if (param >= 0) { + ret = snprintf(out, buf + sizeof(buf) - out, "<%ld>", param); + out += ret; + if (out >= buf + sizeof(buf)) + return NULL; + } else if (get_implied_value(arg, &val)) { + ret = snprintf(out, buf + sizeof(buf) - out, "%lld", val); + out += ret; + if (out >= buf + sizeof(buf)) + return NULL; + } else { + return NULL; + } + } + *out = *p; + p++; + out++; + } + if (buf[0] == '\0') + return NULL; + return alloc_sname(buf); +} + +static char *buf_size_recipe; +static int db_buf_size_callback(void *unused, int argc, char **argv, char **azColName) +{ + if (argc != 1) + return 0; + + if (!buf_size_recipe) + buf_size_recipe = alloc_sname(argv[0]); + else if (strcmp(buf_size_recipe, argv[0]) != 0) + buf_size_recipe = alloc_sname("invalid"); + return 0; +} + +static char *get_allocation_recipe_from_call(struct expression *expr) +{ + struct symbol *sym; + static char sql_filter[1024]; + int i; + + expr = strip_expr(expr); + if (expr->fn->type != EXPR_SYMBOL) + return NULL; + sym = expr->fn->symbol; + if (!sym) + return NULL; + + for (i = 0; i < ARRAY_SIZE(alloc_functions); i++) { + if (strcmp(sym->ident->name, alloc_functions[i].func) == 0) { + char buf[32]; + + snprintf(buf, sizeof(buf), "<%d>", alloc_functions[i].param); + buf_size_recipe = alloc_sname(buf); + return swap_format(expr, buf_size_recipe); + } + } + + if (sym->ctype.modifiers & MOD_STATIC) { + snprintf(sql_filter, 1024, "file = '%s' and function = '%s';", + get_filename(), sym->ident->name); + } else { + snprintf(sql_filter, 1024, "function = '%s' and static = 0;", + sym->ident->name); + } + + buf_size_recipe = NULL; + run_sql(db_buf_size_callback, "select value from return_states where type=%d and %s", + BUF_SIZE, sql_filter); + if (!buf_size_recipe || strcmp(buf_size_recipe, "invalid") == 0) + return NULL; + return swap_format(expr, buf_size_recipe); +} + +static void match_call_assignment(struct expression *expr) +{ + char *sname; + + sname = get_allocation_recipe_from_call(expr->right); + if (!sname) + return; + set_state_expr(my_id, expr->left, alloc_state_sname(sname)); +} + +static void match_returns_call(struct expression *call) +{ + char *sname; + struct range_list *rl; + + sname = get_allocation_recipe_from_call(call); + if (option_debug) + sm_msg("sname = %s", sname); + if (!sname) + return; + get_implied_range_list(call, &rl); + sm_msg("info: return_allocation %d '%s' '%s' %s", + get_return_id(), show_ranges(rl), sname, global_static()); +} + +static void match_return(struct expression *expr) +{ + struct smatch_state *state; + struct range_list *rl; + + expr = strip_expr(expr); + if (!expr) + return; + + if (expr->type == EXPR_CALL) { + match_returns_call(expr); + return; + } + + state = get_state_expr(my_id, expr); + if (!state || !state->data) + return; + get_implied_range_list(expr, &rl); + sm_msg("info: return_allocation %d '%s' '%s' %s", + get_return_id(), show_ranges(rl), state->name, global_static()); +} + +void register_parse_call_math(int id) +{ + int i; + if (!option_info) + return; + + my_id = id; + for (i = 0; i < ARRAY_SIZE(alloc_functions); i++) + add_function_assign_hook(alloc_functions[i].func, &match_alloc, + INT_PTR(alloc_functions[i].param)); + add_hook(&match_call_assignment, CALL_ASSIGNMENT_HOOK); + add_hook(&match_return, RETURN_HOOK); +} + diff --git a/smatch_scripts/db/fill_db_return_states.pl b/smatch_scripts/db/fill_db_return_states.pl index 371621a8..380390b2 100755 --- a/smatch_scripts/db/fill_db_return_states.pl +++ b/smatch_scripts/db/fill_db_return_states.pl @@ -51,6 +51,13 @@ while () { $value = ''; ($file_and_line, $func, $dummy, $dummy, $return_id, $return_value, $gs) = split(/ /, $_); $param = -1; + } elsif ($_ =~ /info: return_allocation /) { + # drivers/net/usb/hso.c:2374 hso_create_device() info: return_allocation 2 'min-max' '456' static + $type = 2; # BUF_SIZE + $key = ''; + ($file_and_line, $func, $dummy, $dummy, $return_id, $dummy) = split(/ /, $_); + ($dummy, $return_value, $dummy, $value, $gs) = split(/'/, $_); + $param = -1; } else { next; } -- 2.11.4.GIT