From 5b2bb66de6adc7f7f1dc4e8676ec3a4b9443db55 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 6 Oct 2022 08:05:14 +0300 Subject: [PATCH] db: use hashes instead of file names It turns out that probably 40% of the database is just file names. On my system the DB is 40GB so that's a lot of data which can be saved. It takes me about 5 hours to create the DB so it can end up taking until 9am to do a test. This change makes the DB slightly more complicated to work with but it's definitely worth it because of all the time it saves. Signed-off-by: Dan Carpenter --- smatch.h | 2 + smatch_data/db/build_early_index.sh | 1 + smatch_data/db/call_implies.schema | 2 +- smatch_data/db/caller_info.schema | 2 +- smatch_data/db/common_caller_info.schema | 2 +- smatch_data/db/data_info.schema | 2 +- smatch_data/db/fn_data_link.schema | 2 +- smatch_data/db/function_ptr.schema | 2 +- smatch_data/db/function_type.schema | 2 +- smatch_data/db/function_type_info.schema | 2 +- smatch_data/db/function_type_size.schema | 2 +- smatch_data/db/function_type_value.schema | 2 +- smatch_data/db/hash_string.schema | 6 +++ smatch_data/db/local_values.schema | 2 +- smatch_data/db/mtag_about.schema | 2 +- smatch_data/db/mtag_info.schema | 2 +- smatch_data/db/param_map.schema | 2 +- smatch_data/db/parameter_name.schema | 2 +- smatch_data/db/return_implies.schema | 2 +- smatch_data/db/return_states.schema | 2 +- smatch_data/db/sink_info.schema | 2 +- smatch_data/db/smdb.py | 29 ++++++++---- smatch_data/db/type_info.schema | 2 +- smatch_db.c | 76 +++++++++++++++++-------------- smatch_mtag.c | 17 ++++++- smatch_type_val.c | 4 +- 26 files changed, 107 insertions(+), 66 deletions(-) create mode 100644 smatch_data/db/hash_string.schema diff --git a/smatch.h b/smatch.h index 421cc308..f21e3c96 100644 --- a/smatch.h +++ b/smatch.h @@ -974,6 +974,7 @@ extern struct sqlite3 *cache_db; bool db_incomplete(void); void db_ignore_states(int id); +unsigned long long get_base_file_id(void); typedef bool (delete_hook)(struct expression *expr); void add_delete_return_hook(delete_hook *hook); void select_caller_info_hook(void (*callback)(const char *name, struct symbol *sym, char *key, char *value), int type); @@ -1073,6 +1074,7 @@ do { \ #define sql_insert_or_ignore(table, values...) sql_insert_helper(table, 0, 1, 0, values); #define sql_insert_late(table, values...) sql_insert_helper(table, 0, 0, 1, values); #define sql_insert_cache(table, values...) sql_insert_helper(table, cache_db, 1, 0, values); +#define sql_insert_cache_or_ignore(table, values...) sql_insert_helper(table, cache_db, 1, 0, values); char *get_static_filter(struct symbol *sym); diff --git a/smatch_data/db/build_early_index.sh b/smatch_data/db/build_early_index.sh index 7e3d5735..6f80522c 100755 --- a/smatch_data/db/build_early_index.sh +++ b/smatch_data/db/build_early_index.sh @@ -41,6 +41,7 @@ CREATE INDEX mtag_data_idx on mtag_data (tag); CREATE INDEX mtag_map_idx1 on mtag_map (tag); CREATE INDEX mtag_map_idx2 on mtag_map (container); CREATE INDEX sink_index on sink_info (file, sink_name); +CREATE INDEX hash_index on hash_string (hash); EOF diff --git a/smatch_data/db/call_implies.schema b/smatch_data/db/call_implies.schema index 6133061b..898e99de 100644 --- a/smatch_data/db/call_implies.schema +++ b/smatch_data/db/call_implies.schema @@ -1,5 +1,5 @@ CREATE TABLE call_implies ( - file varchar(128), + file big int, function varchar(64), call_id integer, static boolean, diff --git a/smatch_data/db/caller_info.schema b/smatch_data/db/caller_info.schema index 31a029e8..e6d93fb9 100644 --- a/smatch_data/db/caller_info.schema +++ b/smatch_data/db/caller_info.schema @@ -1 +1 @@ -CREATE TABLE caller_info (file varchar(128), caller varchar(64), function varchar(64), call_id integer, static boolean, type integer, parameter integer, key varchar(256), value varchar(256)); +CREATE TABLE caller_info (file big int, caller varchar(64), function varchar(64), call_id integer, static boolean, type integer, parameter integer, key varchar(256), value varchar(256)); diff --git a/smatch_data/db/common_caller_info.schema b/smatch_data/db/common_caller_info.schema index 9d344d02..9eccc7c9 100644 --- a/smatch_data/db/common_caller_info.schema +++ b/smatch_data/db/common_caller_info.schema @@ -1 +1 @@ -CREATE TABLE common_caller_info (file varchar(128), caller varchar(64), function varchar(64), call_id integer, static boolean, type integer, parameter integer, key varchar(256), value varchar(256)); +CREATE TABLE common_caller_info (file big int, caller varchar(64), function varchar(64), call_id integer, static boolean, type integer, parameter integer, key varchar(256), value varchar(256)); diff --git a/smatch_data/db/data_info.schema b/smatch_data/db/data_info.schema index 651a07c4..c43463b1 100644 --- a/smatch_data/db/data_info.schema +++ b/smatch_data/db/data_info.schema @@ -1 +1 @@ -CREATE TABLE data_info (file varchar(80), data varchar(80), type integer, value varchar(80)); +CREATE TABLE data_info (file big int, data varchar(80), type integer, value varchar(80)); diff --git a/smatch_data/db/fn_data_link.schema b/smatch_data/db/fn_data_link.schema index ba7170a9..fd2f8710 100644 --- a/smatch_data/db/fn_data_link.schema +++ b/smatch_data/db/fn_data_link.schema @@ -1 +1 @@ -CREATE TABLE fn_data_link (file varchar(128), function varchar(64), static boolean, type integer, parameter integer, key varchar(256), value varchar(256)); +CREATE TABLE fn_data_link (file big int, function varchar(64), static boolean, type integer, parameter integer, key varchar(256), value varchar(256)); diff --git a/smatch_data/db/function_ptr.schema b/smatch_data/db/function_ptr.schema index 3ad777de..c1050d1c 100644 --- a/smatch_data/db/function_ptr.schema +++ b/smatch_data/db/function_ptr.schema @@ -1,5 +1,5 @@ CREATE TABLE function_ptr ( - file varchar(128), + file big int, function varchar(64), ptr varchar(256), searchable integer, diff --git a/smatch_data/db/function_type.schema b/smatch_data/db/function_type.schema index 0c10c61e..d6a03198 100644 --- a/smatch_data/db/function_type.schema +++ b/smatch_data/db/function_type.schema @@ -1 +1 @@ -CREATE TABLE function_type (file varchar(80), function varchar(80), static boolean, parameter integer, value varchar(80)); +CREATE TABLE function_type (file big int, function varchar(80), static boolean, parameter integer, value varchar(80)); diff --git a/smatch_data/db/function_type_info.schema b/smatch_data/db/function_type_info.schema index fe065847..f86d8065 100644 --- a/smatch_data/db/function_type_info.schema +++ b/smatch_data/db/function_type_info.schema @@ -1 +1 @@ -CREATE TABLE function_type_info (file varchar(128), function varchar(80), type integer, struct varchar(80), member varchar(80), value varchar(80)); +CREATE TABLE function_type_info (file big int, function varchar(80), type integer, struct varchar(80), member varchar(80), value varchar(80)); diff --git a/smatch_data/db/function_type_size.schema b/smatch_data/db/function_type_size.schema index b9574fbb..1bcedc8f 100644 --- a/smatch_data/db/function_type_size.schema +++ b/smatch_data/db/function_type_size.schema @@ -1 +1 @@ -CREATE TABLE function_type_size (file varchar(128), function varchar(80), type varchar(80), size varchar(80)); +CREATE TABLE function_type_size (file big int, function varchar(80), type varchar(80), size varchar(80)); diff --git a/smatch_data/db/function_type_value.schema b/smatch_data/db/function_type_value.schema index 079fa545..36141f79 100644 --- a/smatch_data/db/function_type_value.schema +++ b/smatch_data/db/function_type_value.schema @@ -1 +1 @@ -CREATE TABLE function_type_value (file varchar(128), function varchar(80), type varchar(80), value varchar(80)); +CREATE TABLE function_type_value (file big int, function varchar(80), type varchar(80), value varchar(80)); diff --git a/smatch_data/db/hash_string.schema b/smatch_data/db/hash_string.schema new file mode 100644 index 00000000..9192d238 --- /dev/null +++ b/smatch_data/db/hash_string.schema @@ -0,0 +1,6 @@ +CREATE TABLE hash_string ( + hash big int, + value varchar(80), + + CONSTRAINT mtag_info_row UNIQUE (hash, value) +); diff --git a/smatch_data/db/local_values.schema b/smatch_data/db/local_values.schema index 6c78efc5..84e0f190 100644 --- a/smatch_data/db/local_values.schema +++ b/smatch_data/db/local_values.schema @@ -1 +1 @@ -CREATE TABLE local_values (file varchar(128), variable varchar(64), value varchar(256)); +CREATE TABLE local_values (file big int, variable varchar(64), value varchar(256)); diff --git a/smatch_data/db/mtag_about.schema b/smatch_data/db/mtag_about.schema index 74939582..35a193d8 100644 --- a/smatch_data/db/mtag_about.schema +++ b/smatch_data/db/mtag_about.schema @@ -1,6 +1,6 @@ CREATE TABLE mtag_about ( tag big int, - file varchar(80), + file big int, function varchar(80), line integer, left_name varchar(80), diff --git a/smatch_data/db/mtag_info.schema b/smatch_data/db/mtag_info.schema index f7816832..c445cb13 100644 --- a/smatch_data/db/mtag_info.schema +++ b/smatch_data/db/mtag_info.schema @@ -1,5 +1,5 @@ CREATE TABLE mtag_info ( - file varchar(80), + file big int, tag big int, type integer, value varchar(80), diff --git a/smatch_data/db/param_map.schema b/smatch_data/db/param_map.schema index 88853134..a33b8ce9 100644 --- a/smatch_data/db/param_map.schema +++ b/smatch_data/db/param_map.schema @@ -1 +1 @@ -CREATE TABLE param_map (file varchar(256), to_from bool, fn_ptr varchar(80), parameter integer, source varchar(80)); +CREATE TABLE param_map (file big int, to_from bool, fn_ptr varchar(80), parameter integer, source varchar(80)); diff --git a/smatch_data/db/parameter_name.schema b/smatch_data/db/parameter_name.schema index 14da987e..737da0d8 100644 --- a/smatch_data/db/parameter_name.schema +++ b/smatch_data/db/parameter_name.schema @@ -1 +1 @@ -CREATE TABLE parameter_name (file varchar(80), function varchar(80), static boolean, parameter integer, value varchar(80)); +CREATE TABLE parameter_name (file big int, function varchar(80), static boolean, parameter integer, value varchar(80)); diff --git a/smatch_data/db/return_implies.schema b/smatch_data/db/return_implies.schema index 065d0f6d..07e4513e 100644 --- a/smatch_data/db/return_implies.schema +++ b/smatch_data/db/return_implies.schema @@ -1,5 +1,5 @@ CREATE TABLE return_implies ( - file varchar(128), + file big int, function varchar(64), call_id integer, static boolean, diff --git a/smatch_data/db/return_states.schema b/smatch_data/db/return_states.schema index d6f0bbe3..e38bfb3d 100644 --- a/smatch_data/db/return_states.schema +++ b/smatch_data/db/return_states.schema @@ -1 +1 @@ -CREATE TABLE return_states (file varchar(128), function varchar(64), call_id big int, return_id integer, return varchar(256), static boolean, type integer, parameter integer, key varchar(256), value varchar(256)); +CREATE TABLE return_states (file big int, function varchar(64), call_id big int, return_id integer, return varchar(256), static boolean, type integer, parameter integer, key varchar(256), value varchar(256)); diff --git a/smatch_data/db/sink_info.schema b/smatch_data/db/sink_info.schema index dc2a285a..07769028 100644 --- a/smatch_data/db/sink_info.schema +++ b/smatch_data/db/sink_info.schema @@ -1 +1 @@ -CREATE TABLE sink_info (file varchar(128), static boolean, sink_name varchar(64), type integer, key varchar(256), value varchar(256)); +CREATE TABLE sink_info (file big int, static boolean, sink_name varchar(64), type integer, key varchar(256), value varchar(256)); diff --git a/smatch_data/db/smdb.py b/smatch_data/db/smdb.py index c982848f..92beda64 100755 --- a/smatch_data/db/smdb.py +++ b/smatch_data/db/smdb.py @@ -267,6 +267,19 @@ def val_to_txt(val): else: return "%d" %(val) +hash_strings = {} +def hash_to_string(sha): + if sha in hash_strings: + return hash_strings[sha] + + cur = con.cursor() + cur.execute("select value from hash_string where hash = '%d';" %(sha)) + for txt in cur: + hash_strings[sha] = txt[0] + if not sha in hash_strings: + hash_strings[sha] = "%x" %(sha) + return hash_strings[sha] + def get_next_str(txt): val = "" parsed = 0 @@ -355,7 +368,7 @@ def display_caller_info(printed, cur, param_names): if len(param_names) and parameter in param_names: key = key.replace("$", param_names[parameter]) - print("%20s | %20s | %20s |" %(txt[0], txt[1], txt[2]), end = '') + print("%20s | %20s | %20s |" %(hash_to_string(txt[0]), txt[1], txt[2]), end = '') print(" %18s |" %(type_to_str(txt[5])), end = '') print(" %2d | %15s | %s" %(parameter, key, txt[8])) return printed @@ -474,7 +487,7 @@ def print_return_states(func): if count == 0: print("file | function | return_id | return_value | type | param | key | value |") count += 1 - print("%s | %s | %2s | %13s" %(txt[0], txt[1], txt[3], txt[4]), end = '') + print("%s | %s | %2s | %13s" %(hash_to_string(txt[0]), txt[1], txt[3], txt[4]), end = '') print("| %15s |" %(type_to_str(txt[6])), end = '') print(" %2d | %20s | %20s |" %(txt[7], txt[8], txt[9])) except: @@ -488,7 +501,7 @@ def print_return_implies(func): if not count: print("file | function | type | param | key | value |") count += 1 - print("%15s | %15s" %(txt[0], txt[1]), end = '') + print("%15s | %15s" %(hash_to_string(txt[0]), txt[1]), end = '') print("| %15s" %(type_to_str(txt[4])), end = '') print("| %3d | %15s | %15s |" %(txt[5], txt[6], txt[7])) @@ -502,14 +515,14 @@ def print_type_size(struct_type, member): cur.execute("select * from function_type_size where type like '(struct %s)->%s';" %(struct_type, member)) print("file | function | type | size") for txt in cur: - print("%-15s | %-15s | %-15s | %s" %(txt[0], txt[1], txt[2], txt[3])) + print("%-15s | %-15s | %-15s | %s" %(hash_to_string(txt[0]), txt[1], txt[2], txt[3])) def print_data_info(struct_type, member): cur = con.cursor() cur.execute("select * from data_info where data like '(struct %s)->%s';" %(struct_type, member)) print("file | data | type | value") for txt in cur: - print("%-15s | %-15s | %-15s | %s" %(txt[0], txt[1], type_to_str(txt[2]), txt[3])) + print("%-15s | %-15s | %-15s | %s" %(hash_to_string(txt[0]), txt[1], type_to_str(txt[2]), txt[3])) def print_fn_ptrs(func): ptrs = get_function_pointers(func) @@ -528,7 +541,7 @@ def print_functions(struct, member): cur.execute("select * from function_ptr where ptr like '%%->%s';" %(member)) print("File | Pointer | Function | Static") for txt in cur: - print("%-15s | %-15s | %-15s | %s" %(txt[0], txt[2], txt[1], txt[3])) + print("%-15s | %-15s | %-15s | %s" %(hash_to_string(txt[0]), txt[2], txt[1], txt[3])) class CallTree: def __init__(self, func, printed = ""): @@ -642,7 +655,7 @@ def function_type_value(struct_type, member): cur = con.cursor() cur.execute("select * from function_type_value where type like '(struct %s)->%s';" %(struct_type, member)) for txt in cur: - print("%-30s | %-30s | %s | %s" %(txt[0], txt[1], txt[2], txt[3])) + print("%-30s | %-30s | %s | %s" %(hash_to_string(txt[0]), txt[1], txt[2], txt[3])) def rl_too_big(txt): rl = txt_to_rl(txt) @@ -795,7 +808,7 @@ def print_locals(filename): cur = con.cursor() cur.execute("select file,data,value from data_info where file = '%s' and type = 8029 and value != 0;" %(filename)) for txt in cur: - print("%s | %s | %s" %(txt[0], txt[1], txt[2])) + print("%s | %s | %s" %(hash_to_string(txt[0]), txt[1], txt[2])) def constraint(struct_type, member): cur = con.cursor() diff --git a/smatch_data/db/type_info.schema b/smatch_data/db/type_info.schema index cf07a641..590fe9a4 100644 --- a/smatch_data/db/type_info.schema +++ b/smatch_data/db/type_info.schema @@ -1,5 +1,5 @@ CREATE TABLE type_info ( - file varchar(80), + file big int, type integer, key varchar(80), value varchar(80), diff --git a/smatch_db.c b/smatch_db.c index 7f363890..1f4804d3 100644 --- a/smatch_db.c +++ b/smatch_db.c @@ -284,6 +284,11 @@ static void set_fn_mtag(struct symbol *sym) __fn_mtag = str_to_mtag(buf); } +unsigned long long get_base_file_id(void) +{ + return str_to_llu_hash(get_base_file()); +} + void sql_insert_return_states(int return_id, const char *return_ranges, int type, int param, const char *key, const char *value) { @@ -297,8 +302,8 @@ void sql_insert_return_states(int return_id, const char *return_ranges, else id = __fn_mtag; - sql_insert(return_states, "'%s', '%s', %llu, %d, '%s', %d, %d, %d, '%s', '%s'", - get_base_file(), get_function(), id, return_id, + sql_insert(return_states, "0x%llx, '%s', %llu, %d, '%s', %d, %d, %d, '%s', '%s'", + get_base_file_id(), get_function(), id, return_id, return_ranges, is_local(cur_func_sym), type, param, key, value); } @@ -346,8 +351,8 @@ void sql_insert_caller_info(struct expression *call, int type, if (__inline_call) { mem_sql(NULL, NULL, - "insert into caller_info values ('%s', '%s', '%s', %lu, %d, %d, %d, '%s', '%s');", - get_base_file(), get_function(), fn, (unsigned long)call, + "insert into caller_info values (0x%llx, '%s', '%s', %lu, %d, %d, %d, '%s', '%s');", + get_base_file_id(), get_function(), fn, (unsigned long)call, is_static(call->fn), type, param, key, value); } @@ -361,8 +366,8 @@ void sql_insert_caller_info(struct expression *call, int type, sm_outfd = caller_info_fd; sm_msg("SQL_caller_info: insert into caller_info values (" - "'%s', '%s', '%s', %%CALL_ID%%, %d, %d, %d, '%s', '%s');", - get_base_file(), get_function(), fn, is_static(call->fn), + "0x%llx, '%s', '%s', %%CALL_ID%%, %d, %d, %d, '%s', '%s');", + get_base_file_id(), get_function(), fn, is_static(call->fn), type, param, key, value); sm_outfd = tmp_fd; @@ -371,59 +376,59 @@ void sql_insert_caller_info(struct expression *call, int type, void sql_insert_function_ptr(const char *fn, const char *struct_name) { - sql_insert_or_ignore(function_ptr, "'%s', '%s', '%s', 0", - get_base_file(), fn, struct_name); + sql_insert_or_ignore(function_ptr, "0x%llx, '%s', '%s', 0", + get_base_file_id(), fn, struct_name); } void sql_insert_return_implies(int type, int param, const char *key, const char *value) { - sql_insert_or_ignore(return_implies, "'%s', '%s', %lu, %d, %d, %d, '%s', '%s'", - get_base_file(), get_function(), (unsigned long)__inline_fn, + sql_insert_or_ignore(return_implies, "0x%llx, '%s', %lu, %d, %d, %d, '%s', '%s'", + get_base_file_id(), get_function(), (unsigned long)__inline_fn, fn_static(), type, param, key, value); } void sql_insert_call_implies(int type, int param, const char *key, const char *value) { - sql_insert_or_ignore(call_implies, "'%s', '%s', %lu, %d, %d, %d, '%s', '%s'", - get_base_file(), get_function(), (unsigned long)__inline_fn, + sql_insert_or_ignore(call_implies, "0x%llx, '%s', %lu, %d, %d, %d, '%s', '%s'", + get_base_file_id(), get_function(), (unsigned long)__inline_fn, fn_static(), type, param, key, value); } void sql_insert_function_type_size(const char *member, const char *ranges) { - sql_insert(function_type_size, "'%s', '%s', '%s', '%s'", get_base_file(), get_function(), member, ranges); + sql_insert(function_type_size, "0x%llx, '%s', '%s', '%s'", get_base_file_id(), get_function(), member, ranges); } void sql_insert_function_type_info(int type, const char *struct_type, const char *member, const char *value) { - sql_insert(function_type_info, "'%s', '%s', %d, '%s', '%s', '%s'", get_base_file(), get_function(), type, struct_type, member, value); + sql_insert(function_type_info, "0x%llx, '%s', %d, '%s', '%s', '%s'", get_base_file_id(), get_function(), type, struct_type, member, value); } void sql_insert_type_info(int type, const char *member, const char *value) { - sql_insert_cache(type_info, "'%s', %d, '%s', '%s'", get_base_file(), type, member, value); + sql_insert_cache(type_info, "0x%llx, %d, '%s', '%s'", get_base_file_id(), type, member, value); } void sql_insert_local_values(const char *name, const char *value) { - sql_insert(local_values, "'%s', '%s', '%s'", get_base_file(), name, value); + sql_insert(local_values, "0x%llx, '%s', '%s'", get_base_file_id(), name, value); } void sql_insert_function_type_value(const char *type, const char *value) { - sql_insert(function_type_value, "'%s', '%s', '%s', '%s'", get_base_file(), get_function(), type, value); + sql_insert(function_type_value, "0x%llx, '%s', '%s', '%s'", get_base_file_id(), get_function(), type, value); } void sql_insert_function_type(int param, const char *value) { - sql_insert(function_type, "'%s', '%s', %d, %d, '%s'", - get_base_file(), get_function(), fn_static(), param, value); + sql_insert(function_type, "0x%llx, '%s', %d, %d, '%s'", + get_base_file_id(), get_function(), fn_static(), param, value); } void sql_insert_parameter_name(int param, const char *value) { - sql_insert(parameter_name, "'%s', '%s', %d, %d, '%s'", - get_base_file(), get_function(), fn_static(), param, value); + sql_insert(parameter_name, "0x%llx, '%s', %d, %d, '%s'", + get_base_file_id(), get_function(), fn_static(), param, value); } void sql_insert_data_info(struct expression *data, int type, const char *value) @@ -433,15 +438,15 @@ void sql_insert_data_info(struct expression *data, int type, const char *value) data_name = get_data_info_name(data); if (!data_name) return; - sql_insert(data_info, "'%s', '%s', %d, '%s'", - is_static(data) ? get_base_file() : "extern", + sql_insert(data_info, "0x%llx, '%s', %d, '%s'", + is_static(data) ? get_base_file_id() : 0, data_name, type, value); } void sql_insert_data_info_var_sym(const char *var, struct symbol *sym, int type, const char *value) { - sql_insert(data_info, "'%s', '%s', %d, '%s'", - (sym->ctype.modifiers & MOD_STATIC) ? get_base_file() : "extern", + sql_insert(data_info, "0x%llx, '%s', %d, '%s'", + (sym->ctype.modifiers & MOD_STATIC) ? get_base_file_id() : 0, var, type, value); } @@ -478,8 +483,8 @@ void sql_insert_fn_data_link(struct expression *fn, int type, int param, const c if (fn->type != EXPR_SYMBOL || !fn->symbol->ident) return; - sql_insert(fn_data_link, "'%s', '%s', %d, %d, %d, '%s', '%s'", - is_local(fn->symbol) ? get_base_file() : "extern", + sql_insert(fn_data_link, "0x%llx, '%s', %d, %d, %d, '%s', '%s'", + is_local(fn->symbol) ? get_base_file_id() : 0, fn->symbol->ident->name, is_local(fn->symbol), type, param, key, value); @@ -562,8 +567,8 @@ char *get_static_filter(struct symbol *sym) if (is_local(sym)) { snprintf(sql_filter, sizeof(sql_filter), - "file = '%s' and function = '%s' and static = '1'", - get_base_file(), sym->ident->name); + "file = 0x%llx and function = '%s' and static = '1'", + get_base_file_id(), sym->ident->name); } else { snprintf(sql_filter, sizeof(sql_filter), "function = '%s' and static = '0'", sym->ident->name); @@ -1373,13 +1378,13 @@ static char *get_next_ptr_name(void) return NULL; } -static void get_ptr_names(const char *file, const char *name) +static void get_ptr_names(unsigned long long file, const char *name) { char sql_filter[1024]; int before, after; if (file) { - snprintf(sql_filter, 1024, "file = '%s' and function = '%s';", + snprintf(sql_filter, 1024, "file = 0x%llx and function = '%s';", file, name); } else { snprintf(sql_filter, 1024, "function = '%s';", name); @@ -1396,7 +1401,7 @@ static void get_ptr_names(const char *file, const char *name) return; while ((name = get_next_ptr_name())) - get_ptr_names(NULL, name); + get_ptr_names(0, name); } static void match_data_from_db(struct symbol *sym) @@ -1419,9 +1424,9 @@ static void match_data_from_db(struct symbol *sym) char *ptr; if (sym->ctype.modifiers & MOD_STATIC) - get_ptr_names(get_base_file(), sym->ident->name); + get_ptr_names(get_base_file_id(), sym->ident->name); else - get_ptr_names(NULL, sym->ident->name); + get_ptr_names(0, sym->ident->name); if (ptr_list_size((struct ptr_list *)ptr_names) > 20) { __free_ptr_list((struct ptr_list **)&ptr_names); @@ -2798,6 +2803,7 @@ static void init_cachedb(void) "db/mtag_data.schema", "db/mtag_info.schema", "db/sink_info.schema", + "db/hash_string.schema", }; static char buf[4096]; int fd; @@ -2863,7 +2869,7 @@ static void dump_cache(struct symbol_list *sym_list) { const char *cache_tables[] = { "type_info", "return_implies", "call_implies", "mtag_data", - "mtag_info", "mtag_about", "sink_info", + "mtag_info", "mtag_about", "sink_info", "hash_string", }; char buf[64]; int i; diff --git a/smatch_mtag.c b/smatch_mtag.c index 127b64ef..d329d7c4 100644 --- a/smatch_mtag.c +++ b/smatch_mtag.c @@ -50,7 +50,12 @@ static int my_id; -unsigned long long str_to_llu_hash(const char *str) +static void store_hash(const char *str, unsigned long long hash) +{ + sql_insert_cache_or_ignore(hash_string, "0x%llx, '%s'", hash, str); +} + +static unsigned long long str_to_llu_hash_helper(const char *str, bool store) { unsigned char c[EVP_MAX_MD_SIZE]; unsigned long long *tag = (unsigned long long *)&c; @@ -68,14 +73,22 @@ unsigned long long str_to_llu_hash(const char *str) EVP_DigestFinal_ex(mdctx, c, NULL); EVP_MD_CTX_destroy(mdctx); + if (store) + store_hash(str, *tag); + return *tag; } +unsigned long long str_to_llu_hash(const char *str) +{ + return str_to_llu_hash_helper(str, true); +} + mtag_t str_to_mtag(const char *str) { unsigned long long tag; - tag = str_to_llu_hash(str); + tag = str_to_llu_hash_helper(str, false); tag &= ~MTAG_ALIAS_BIT; tag &= ~MTAG_OFFSET_MASK; diff --git a/smatch_type_val.c b/smatch_type_val.c index 615c85d9..bc84347a 100644 --- a/smatch_type_val.c +++ b/smatch_type_val.c @@ -408,8 +408,8 @@ static char *db_get_parameter_type(int param) run_sql(set_param_type, &ret, "select value from fn_data_link where " - "file = '%s' and function = '%s' and static = %d and type = %d and parameter = %d and key = '$';", - (cur_func_sym->ctype.modifiers & MOD_STATIC) ? get_base_file() : "extern", + "file = 0x%llx and function = '%s' and static = %d and type = %d and parameter = %d and key = '$';", + (cur_func_sym->ctype.modifiers & MOD_STATIC) ? get_base_file_id() : 0, cur_func_sym->ident->name, !!(cur_func_sym->ctype.modifiers & MOD_STATIC), PASSES_TYPE, param); -- 2.11.4.GIT