db: rename call_implies to return_implies
[smatch.git] / smatch_data / db / smdb.py
blobf0ef5911aa059a9031de676059799c94a6451a31
1 #!/usr/bin/python
3 # Copyright (C) 2013 Oracle.
5 # Licensed under the Open Software License version 1.1
7 import sqlite3
8 import sys
9 import re
11 try:
12 con = sqlite3.connect('smatch_db.sqlite')
13 except sqlite3.Error, e:
14 print "Error %s:" % e.args[0]
15 sys.exit(1)
17 def usage():
18 print "%s" %(sys.argv[0])
19 print "<function> - how a function is called"
20 print "return_states <function> - what a function returns"
21 print "call_tree <function> - show the call tree"
22 print "where <struct_type> <member> - where a struct member is set"
23 print "type_size <struct_type> <member> - how a struct member is allocated"
24 print "data_info <struct_type> <member> - information about a given data type"
25 print "function_ptr <function> - which function pointers point to this"
26 print "trace_param <function> <param> - trace where a parameter came from"
27 sys.exit(1)
29 function_ptrs = []
30 searched_ptrs = []
31 def get_function_pointers_helper(func):
32 cur = con.cursor()
33 cur.execute("select distinct ptr from function_ptr where function = '%s';" %(func))
34 for row in cur:
35 ptr = row[0]
36 if ptr in function_ptrs:
37 continue
38 function_ptrs.append(ptr)
39 if not ptr in searched_ptrs:
40 searched_ptrs.append(ptr)
41 get_function_pointers_helper(ptr)
43 def get_function_pointers(func):
44 global function_ptrs
45 global searched_ptrs
46 function_ptrs = [func]
47 searched_ptrs = [func]
48 get_function_pointers_helper(func)
49 return function_ptrs
51 db_types = { 0: "INTERNAL",
52 101: "PARAM_CLEARED",
53 103: "PARAM_LIMIT",
54 104: "PARAM_FILTER",
55 1001: "PARAM_VALUE",
56 1002: "BUF_SIZE",
57 1003: "USER_DATA",
58 1004: "CAPPED_DATA",
59 1005: "RETURN_VALUE",
60 1006: "DEREFERENCE",
61 1007: "RANGE_CAP",
62 1008: "LOCK_HELD",
63 1009: "LOCK_RELEASED",
64 1010: "ABSOLUTE_LIMITS",
65 1012: "PARAM_ADD",
66 1013: "PARAM_FREED",
67 1014: "DATA_SOURCE",
68 1015: "FUZZY_MAX",
69 1016: "STR_LEN",
70 1017: "ARRAY_LEN",
71 1018: "CAPABLE",
72 1019: "NS_CAPABLE",
73 1022: "TYPE_LINK",
74 1023: "UNTRACKED_PARAM",
75 1024: "CULL_PATH",
76 1025: "PARAM_SET",
77 1026: "PARAM_USED",
78 1027: "BYTE_UNITS",
79 1028: "COMPARE_LIMIT",
80 1029: "PARAM_COMPARE",
81 8017: "USER_DATA2",
82 8018: "NO_OVERFLOW",
83 8019: "NO_OVERFLOW_SIMPLE",
84 8020: "LOCKED",
85 8021: "UNLOCKED",
86 8023: "ATOMIC_INC",
87 8024: "ATOMIC_DEC",
90 def add_range(rl, min_val, max_val):
91 check_next = 0
92 done = 0
93 ret = []
94 idx = 0
96 if len(rl) == 0:
97 return [[min_val, max_val]]
99 for idx in range(len(rl)):
100 cur_min = rl[idx][0]
101 cur_max = rl[idx][1]
103 # we already merged the new range but we might need to change later
104 # ranges if they over lap with more than one
105 if check_next:
106 # join with added range
107 if max_val + 1 == cur_min:
108 ret[len(ret) - 1][1] = cur_max
109 done = 1
110 break
111 # don't overlap
112 if max_val < cur_min:
113 ret.append([cur_min, cur_max])
114 done = 1
115 break
116 # partially overlap
117 if max_val < cur_max:
118 ret[len(ret) - 1][1] = cur_max
119 done = 1
120 break
121 # completely overlap
122 continue
124 # join 2 ranges into one
125 if max_val + 1 == cur_min:
126 ret.append([min_val, cur_max])
127 done = 1
128 break
129 # range is entirely below
130 if max_val < cur_min:
131 ret.append([min_val, max_val])
132 ret.append([cur_min, cur_max])
133 done = 1
134 break
135 # range is partially below
136 if min_val < cur_min:
137 if max_val <= cur_max:
138 ret.append([min_val, cur_max])
139 done = 1
140 break
141 else:
142 ret.append([min_val, max_val])
143 check_next = 1
144 continue
145 # range already included
146 if max_val <= cur_max:
147 ret.append([cur_min, cur_max])
148 done = 1
149 break;
150 # range partially above
151 if min_val <= cur_max:
152 ret.append([cur_min, max_val])
153 check_next = 1
154 continue
155 # join 2 ranges on the other side
156 if min_val - 1 == cur_max:
157 ret.append([cur_min, max_val])
158 check_next = 1
159 continue
160 # range is above
161 ret.append([cur_min, cur_max])
163 if idx + 1 < len(rl): # we hit a break statement
164 ret = ret + rl[idx + 1:]
165 elif done: # we hit a break on the last iteration
166 pass
167 elif not check_next: # it's past the end of the rl
168 ret.append([min_val, max_val])
170 return ret;
172 def rl_union(rl1, rl2):
173 ret = []
174 for r in rl1:
175 ret = add_range(ret, r[0], r[1])
176 for r in rl2:
177 ret = add_range(ret, r[0], r[1])
179 if (rl1 or rl2) and not ret:
180 print "bug: merging %s + %s gives empty" %(rl1, rl2)
182 return ret
184 def txt_to_val(txt):
185 if txt == "s64min":
186 return -(2**63)
187 elif txt == "s32min":
188 return -(2**31)
189 elif txt == "s16min":
190 return -(2**15)
191 elif txt == "s64max":
192 return 2**63 - 1
193 elif txt == "s32max":
194 return 2**31 - 1
195 elif txt == "s16max":
196 return 2**15 - 1
197 elif txt == "u64max":
198 return 2**64 - 1
199 elif txt == "u32max":
200 return 2**32 - 1
201 elif txt == "u16max":
202 return 2**16 - 1
203 else:
204 try:
205 return int(txt)
206 except ValueError:
207 return 0
209 def val_to_txt(val):
210 if val == -(2**63):
211 return "s64min"
212 elif val == -(2**31):
213 return "s32min"
214 elif val == -(2**15):
215 return "s16min"
216 elif val == 2**63 - 1:
217 return "s64max"
218 elif val == 2**31 - 1:
219 return "s32max"
220 elif val == 2**15 - 1:
221 return "s16max"
222 elif val == 2**64 - 1:
223 return "u64max"
224 elif val == 2**32 - 1:
225 return "u32max"
226 elif val == 2**16 - 1:
227 return "u16max"
228 elif val < 0:
229 return "(%d)" %(val)
230 else:
231 return "%d" %(val)
233 def get_next_str(txt):
234 val = ""
235 parsed = 0
237 if txt[0] == '(':
238 parsed += 1
239 for char in txt[1:]:
240 if char == ')':
241 break
242 parsed += 1
243 val = txt[1:parsed]
244 parsed += 1
245 elif txt[0] == 's' or txt[0] == 'u':
246 parsed += 6
247 val = txt[:parsed]
248 else:
249 if txt[0] == '-':
250 parsed += 1
251 for char in txt[parsed:]:
252 if char == '-':
253 break
254 parsed += 1
255 val = txt[:parsed]
256 return [parsed, val]
258 def txt_to_rl(txt):
259 if len(txt) == 0:
260 return []
262 ret = []
263 pairs = txt.split(",")
264 for pair in pairs:
265 cnt, min_str = get_next_str(pair)
266 if cnt == len(pair):
267 max_str = min_str
268 else:
269 cnt, max_str = get_next_str(pair[cnt + 1:])
270 min_val = txt_to_val(min_str)
271 max_val = txt_to_val(max_str)
272 ret.append([min_val, max_val])
274 # Hm... Smatch won't call INT_MAX s32max if the variable is unsigned.
275 # if txt != rl_to_txt(ret):
276 # print "bug: converting: text = %s rl = %s internal = %s" %(txt, rl_to_txt(ret), ret)
278 return ret
280 def rl_to_txt(rl):
281 ret = ""
282 for idx in range(len(rl)):
283 cur_min = rl[idx][0]
284 cur_max = rl[idx][1]
286 if idx != 0:
287 ret += ","
289 if cur_min == cur_max:
290 ret += val_to_txt(cur_min)
291 else:
292 ret += val_to_txt(cur_min)
293 ret += "-"
294 ret += val_to_txt(cur_max)
295 return ret
297 def type_to_str(type_int):
299 t = int(type_int)
300 if db_types.has_key(t):
301 return db_types[t]
302 return type_int
304 def type_to_int(type_string):
305 for k in db_types.keys():
306 if db_types[k] == type_string:
307 return k
308 return -1
310 def display_caller_info(printed, cur, param_names):
311 for txt in cur:
312 if not printed:
313 print "file | caller | function | type | parameter | key | value |"
314 printed = 1
316 parameter = int(txt[6])
317 key = txt[7]
318 if len(param_names) and parameter in param_names:
319 key = key.replace("$", param_names[parameter])
321 print "%20s | %20s | %20s |" %(txt[0], txt[1], txt[2]),
322 print " %10s |" %(type_to_str(txt[5])),
323 print " %d | %s | %s" %(parameter, key, txt[8])
324 return printed
326 def get_caller_info(filename, ptrs, my_type):
327 cur = con.cursor()
328 param_names = get_param_names(filename, func)
329 printed = 0
330 type_filter = ""
331 if my_type != "":
332 type_filter = "and type = %d" %(type_to_int(my_type))
333 for ptr in ptrs:
334 cur.execute("select * from caller_info where function = '%s' %s;" %(ptr, type_filter))
335 printed = display_caller_info(printed, cur, param_names)
337 def print_caller_info(filename, func, my_type = ""):
338 ptrs = get_function_pointers(func)
339 get_caller_info(filename, ptrs, my_type)
341 def merge_values(param_names, vals, cur):
342 for txt in cur:
343 parameter = int(txt[0])
344 name = txt[1]
345 rl = txt_to_rl(txt[2])
346 if parameter in param_names:
347 name = name.replace("$", param_names[parameter])
349 if not parameter in vals:
350 vals[parameter] = {}
352 # the first item on the list is the number of rows. it's incremented
353 # every time we call merge_values().
354 if name in vals[parameter]:
355 vals[parameter][name] = [vals[parameter][name][0] + 1, rl_union(vals[parameter][name][1], rl)]
356 else:
357 vals[parameter][name] = [1, rl]
359 def get_param_names(filename, func):
360 cur = con.cursor()
361 param_names = {}
362 cur.execute("select parameter, value from parameter_name where file = '%s' and function = '%s';" %(filename, func))
363 for txt in cur:
364 parameter = int(txt[0])
365 name = txt[1]
366 param_names[parameter] = name
367 if len(param_names):
368 return param_names
370 cur.execute("select parameter, value from parameter_name where function = '%s';" %(func))
371 for txt in cur:
372 parameter = int(txt[0])
373 name = txt[1]
374 param_names[parameter] = name
375 return param_names
377 def get_caller_count(ptrs):
378 cur = con.cursor()
379 count = 0
380 for ptr in ptrs:
381 cur.execute("select count(distinct(call_id)) from caller_info where function = '%s';" %(ptr))
382 for txt in cur:
383 count += int(txt[0])
384 return count
386 def print_merged_caller_values(filename, func, ptrs, param_names, call_cnt):
387 cur = con.cursor()
388 vals = {}
389 for ptr in ptrs:
390 cur.execute("select parameter, key, value from caller_info where function = '%s' and type = %d;" %(ptr, type_to_int("PARAM_VALUE")))
391 merge_values(param_names, vals, cur);
393 for param in sorted(vals):
394 for name in sorted(vals[param]):
395 if vals[param][name][0] != call_cnt:
396 continue
397 print "%d %s -> %s" %(param, name, rl_to_txt(vals[param][name][1]))
400 def print_unmerged_caller_values(filename, func, ptrs, param_names):
401 cur = con.cursor()
402 for ptr in ptrs:
403 prev = -1
404 cur.execute("select file, caller, call_id, parameter, key, value from caller_info where function = '%s' and type = %d;" %(ptr, type_to_int("PARAM_VALUE")))
405 for filename, caller, call_id, parameter, name, value in cur:
406 if prev != int(call_id):
407 prev = int(call_id)
409 parameter = int(parameter)
410 if parameter < len(param_names):
411 name = name.replace("$", param_names[parameter])
412 else:
413 name = name.replace("$", "$%d" %(parameter))
415 print "%s | %s | %s | %s" %(filename, caller, name, value)
416 print "=========================="
418 def print_caller_values(filename, func, ptrs):
419 param_names = get_param_names(filename, func)
420 call_cnt = get_caller_count(ptrs)
422 print_merged_caller_values(filename, func, ptrs, param_names, call_cnt)
423 print "=========================="
424 print_unmerged_caller_values(filename, func, ptrs, param_names)
426 def caller_info_values(filename, func):
427 ptrs = get_function_pointers(func)
428 print_caller_values(filename, func, ptrs)
430 def print_return_states(func):
431 cur = con.cursor()
432 cur.execute("select * from return_states where function = '%s';" %(func))
433 count = 0
434 for txt in cur:
435 printed = 1
436 if count == 0:
437 print "file | function | return_id | return_value | type | param | key | value |"
438 count += 1
439 print "%s | %s | %2s | %13s" %(txt[0], txt[1], txt[3], txt[4]),
440 print "| %13s |" %(type_to_str(txt[6])),
441 print " %2d | %20s | %20s |" %(txt[7], txt[8], txt[9])
443 def print_return_implies(func):
444 cur = con.cursor()
445 cur.execute("select * from return_implies where function = '%s';" %(func))
446 count = 0
447 for txt in cur:
448 if not count:
449 print "file | function | type | param | key | value |"
450 count += 1
451 print "%15s | %15s" %(txt[0], txt[1]),
452 print "| %15s" %(type_to_str(txt[4])),
453 print "| %3d | %s | %15s |" %(txt[5], txt[6], txt[7])
455 def print_type_size(struct_type, member):
456 cur = con.cursor()
457 cur.execute("select * from type_size where type like '(struct %s)->%s';" %(struct_type, member))
458 print "type | size"
459 for txt in cur:
460 print "%-15s | %s" %(txt[0], txt[1])
462 cur.execute("select * from function_type_size where type like '(struct %s)->%s';" %(struct_type, member))
463 print "file | function | type | size"
464 for txt in cur:
465 print "%-15s | %-15s | %-15s | %s" %(txt[0], txt[1], txt[2], txt[3])
467 def print_data_info(struct_type, member):
468 cur = con.cursor()
469 cur.execute("select * from data_info where data like '(struct %s)->%s';" %(struct_type, member))
470 print "file | data | type | value"
471 for txt in cur:
472 print "%-15s | %-15s | %-15s | %s" %(txt[0], txt[1], type_to_str(txt[2]), txt[3])
474 def print_fn_ptrs(func):
475 ptrs = get_function_pointers(func)
476 if not ptrs:
477 return
478 print "%s = " %(func),
479 i = 0
480 for p in ptrs:
481 if i > 0:
482 print ",",
483 i = i + 1
484 print "'%s'" %(p),
485 print ""
487 def print_functions(member):
488 cur = con.cursor()
489 cur.execute("select * from function_ptr where ptr like '%%->%s';" %(member))
490 print "File | Pointer | Function | Static"
491 for txt in cur:
492 print "%-15s | %-15s | %-15s | %s" %(txt[0], txt[2], txt[1], txt[3])
494 def get_callers(func):
495 ret = []
496 cur = con.cursor()
497 ptrs = get_function_pointers(func)
498 for ptr in ptrs:
499 cur.execute("select distinct caller from caller_info where function = '%s';" %(ptr))
500 for row in cur:
501 ret.append(row[0])
502 return ret
504 printed_funcs = []
505 def call_tree_helper(func, indent = 0):
506 global printed_funcs
507 if func in printed_funcs:
508 return
509 print "%s%s()" %(" " * indent, func)
510 if func == "too common":
511 return
512 if indent > 6:
513 return
514 printed_funcs.append(func)
515 callers = get_callers(func)
516 if len(callers) >= 20:
517 print "Over 20 callers for %s()" %(func)
518 return
519 for caller in callers:
520 call_tree_helper(caller, indent + 2)
522 def print_call_tree(func):
523 global printed_funcs
524 printed_funcs = []
525 call_tree_helper(func)
527 def function_type_value(struct_type, member):
528 cur = con.cursor()
529 cur.execute("select * from function_type_value where type like '(struct %s)->%s';" %(struct_type, member))
530 for txt in cur:
531 print "%-30s | %-30s | %s | %s" %(txt[0], txt[1], txt[2], txt[3])
533 def trace_callers(func, param):
534 sources = []
535 prev_type = 0
537 cur = con.cursor()
538 ptrs = get_function_pointers(func)
539 for ptr in ptrs:
540 cur.execute("select type, caller, value from caller_info where function = '%s' and (type = 0 or type = 1014 or type = 1028) and (parameter = -1 or parameter = %d);" %(ptr, param))
541 for row in cur:
542 data_type = int(row[0])
543 if data_type == 1014:
544 sources.append((row[1], row[2]))
545 elif data_type == 1028:
546 sources.append(("%", row[2])) # hack...
547 elif data_type == 0 and prev_type == 0:
548 sources.append((row[1], ""))
549 prev_type = data_type
550 return sources
552 def trace_param_helper(func, param, indent = 0):
553 global printed_funcs
554 if func in printed_funcs:
555 return
556 print "%s%s(param %d)" %(" " * indent, func, param)
557 if func == "too common":
558 return
559 if indent > 20:
560 return
561 printed_funcs.append(func)
562 sources = trace_callers(func, param)
563 for path in sources:
565 if len(path[1]) and path[1][0] == 'p' and path[1][1] == ' ':
566 p = int(path[1][2:])
567 trace_param_helper(path[0], p, indent + 2)
568 elif len(path[0]) and path[0][0] == '%':
569 print " %s%s" %(" " * indent, path[1])
570 else:
571 print "* %s%s %s" %(" " * (indent - 1), path[0], path[1])
573 def trace_param(func, param):
574 global printed_funcs
575 printed_funcs = []
576 print "tracing %s %d" %(func, param)
577 trace_param_helper(func, param)
579 if len(sys.argv) < 2:
580 usage()
582 if len(sys.argv) == 2:
583 func = sys.argv[1]
584 print_caller_info("", func)
585 elif sys.argv[1] == "call_info":
586 if len(sys.argv) != 4:
587 usage()
588 filename = sys.argv[2]
589 func = sys.argv[3]
590 caller_info_values(filename, func)
591 print_caller_info(filename, func)
592 elif sys.argv[1] == "user_data":
593 func = sys.argv[2]
594 print_caller_info(filename, func, "USER_DATA")
595 elif sys.argv[1] == "param_value":
596 func = sys.argv[2]
597 print_caller_info(filename, func, "PARAM_VALUE")
598 elif sys.argv[1] == "function_ptr" or sys.argv[1] == "fn_ptr":
599 func = sys.argv[2]
600 print_fn_ptrs(func)
601 elif sys.argv[1] == "return_states":
602 func = sys.argv[2]
603 print_return_states(func)
604 print "================================================"
605 print_return_implies(func)
606 elif sys.argv[1] == "return_implies":
607 func = sys.argv[2]
608 print_return_implies(func)
609 elif sys.argv[1] == "type_size" or sys.argv[1] == "buf_size":
610 struct_type = sys.argv[2]
611 member = sys.argv[3]
612 print_type_size(struct_type, member)
613 elif sys.argv[1] == "data_info":
614 struct_type = sys.argv[2]
615 member = sys.argv[3]
616 print_data_info(struct_type, member)
617 elif sys.argv[1] == "call_tree":
618 func = sys.argv[2]
619 print_call_tree(func)
620 elif sys.argv[1] == "where":
621 if len(sys.argv) == 3:
622 struct_type = "%"
623 member = sys.argv[2]
624 elif len(sys.argv) == 4:
625 struct_type = sys.argv[2]
626 member = sys.argv[3]
627 function_type_value(struct_type, member)
628 elif sys.argv[1] == "local":
629 filename = sys.argv[2]
630 variable = ""
631 if len(sys.argv) == 4:
632 variable = sys.argv[3]
633 local_values(filename, variable)
634 elif sys.argv[1] == "functions":
635 member = sys.argv[2]
636 print_functions(member)
637 elif sys.argv[1] == "trace_param":
638 if len(sys.argv) != 4:
639 usage()
640 func = sys.argv[2]
641 param = int(sys.argv[3])
642 trace_param(func, param)
643 else:
644 usage()