modification_hooks: handle PARAM_SET earlier
[smatch.git] / smatch_data / db / smdb.py
blobe8a2c613a03199b52d4b62ae480d1021786bafea
1 #!/usr/bin/python
3 # Copyright (C) 2013 Oracle.
5 # Licensed under the Open Software License version 1.1
7 import sqlite3
8 import sys
9 import re
11 try:
12 con = sqlite3.connect('smatch_db.sqlite')
13 except sqlite3.Error, e:
14 print "Error %s:" % e.args[0]
15 sys.exit(1)
17 def usage():
18 print "%s" %(sys.argv[0])
19 print "<function> - how a function is called"
20 print "return_states <function> - what a function returns"
21 print "call_tree <function> - show the call tree"
22 print "where <struct_type> <member> - where a struct member is set"
23 print "type_size <struct_type> <member> - how a struct member is allocated"
24 print "data_info <struct_type> <member> - information about a given data type"
25 print "function_ptr <function> - which function pointers point to this"
26 print "trace_param <function> <param> - trace where a parameter came from"
27 sys.exit(1)
29 function_ptrs = []
30 searched_ptrs = []
31 def get_function_pointers_helper(func):
32 cur = con.cursor()
33 cur.execute("select distinct ptr from function_ptr where function = '%s';" %(func))
34 for row in cur:
35 ptr = row[0]
36 if ptr in function_ptrs:
37 continue
38 function_ptrs.append(ptr)
39 if not ptr in searched_ptrs:
40 searched_ptrs.append(ptr)
41 get_function_pointers_helper(ptr)
43 def get_function_pointers(func):
44 global function_ptrs
45 global searched_ptrs
46 function_ptrs = [func]
47 searched_ptrs = [func]
48 get_function_pointers_helper(func)
49 return function_ptrs
51 db_types = { 0: "INTERNAL",
52 101: "PARAM_CLEARED",
53 103: "PARAM_LIMIT",
54 104: "PARAM_FILTER",
55 1001: "PARAM_VALUE",
56 1002: "BUF_SIZE",
57 1003: "USER_DATA",
58 1004: "CAPPED_DATA",
59 1005: "RETURN_VALUE",
60 1006: "DEREFERENCE",
61 1007: "RANGE_CAP",
62 1008: "LOCK_HELD",
63 1009: "LOCK_RELEASED",
64 1010: "ABSOLUTE_LIMITS",
65 1012: "PARAM_ADD",
66 1013: "PARAM_FREED",
67 1014: "DATA_SOURCE",
68 1015: "FUZZY_MAX",
69 1016: "STR_LEN",
70 1017: "ARRAY_LEN",
71 1018: "CAPABLE",
72 1019: "NS_CAPABLE",
73 1022: "TYPE_LINK",
74 1023: "UNTRACKED_PARAM",
75 1024: "CULL_PATH",
76 1025: "PARAM_SET",
77 1026: "PARAM_USED",
78 1027: "BYTE_UNITS",
79 1028: "PARAM_COMPARE",
80 8017: "USER_DATA2",
81 8018: "NO_OVERFLOW",
82 8019: "NO_OVERFLOW_SIMPLE",
83 8020: "LOCKED",
84 8021: "UNLOCKED",
85 8023: "ATOMIC_INC",
86 8024: "ATOMIC_DEC",
89 def add_range(rl, min_val, max_val):
90 check_next = 0
91 done = 0
92 ret = []
93 idx = 0
95 if len(rl) == 0:
96 return [[min_val, max_val]]
98 for idx in range(len(rl)):
99 cur_min = rl[idx][0]
100 cur_max = rl[idx][1]
102 # we already merged the new range but we might need to change later
103 # ranges if they over lap with more than one
104 if check_next:
105 # join with added range
106 if max_val + 1 == cur_min:
107 ret[len(ret) - 1][1] = cur_max
108 done = 1
109 break
110 # don't overlap
111 if max_val < cur_min:
112 ret.append([cur_min, cur_max])
113 done = 1
114 break
115 # partially overlap
116 if max_val < cur_max:
117 ret[len(ret) - 1][1] = cur_max
118 done = 1
119 break
120 # completely overlap
121 continue
123 # join 2 ranges into one
124 if max_val + 1 == cur_min:
125 ret.append([min_val, cur_max])
126 done = 1
127 break
128 # range is entirely below
129 if max_val < cur_min:
130 ret.append([min_val, max_val])
131 ret.append([cur_min, cur_max])
132 done = 1
133 break
134 # range is partially below
135 if min_val < cur_min:
136 if max_val <= cur_max:
137 ret.append([min_val, cur_max])
138 done = 1
139 break
140 else:
141 ret.append([min_val, max_val])
142 check_next = 1
143 continue
144 # range already included
145 if max_val <= cur_max:
146 ret.append([cur_min, cur_max])
147 done = 1
148 break;
149 # range partially above
150 if min_val <= cur_max:
151 ret.append([cur_min, max_val])
152 check_next = 1
153 continue
154 # join 2 ranges on the other side
155 if min_val - 1 == cur_max:
156 ret.append([cur_min, max_val])
157 check_next = 1
158 continue
159 # range is above
160 ret.append([cur_min, cur_max])
162 if idx + 1 < len(rl): # we hit a break statement
163 ret = ret + rl[idx + 1:]
164 elif done: # we hit a break on the last iteration
165 pass
166 elif not check_next: # it's past the end of the rl
167 ret.append([min_val, max_val])
169 return ret;
171 def rl_union(rl1, rl2):
172 ret = []
173 for r in rl1:
174 ret = add_range(ret, r[0], r[1])
175 for r in rl2:
176 ret = add_range(ret, r[0], r[1])
178 if (rl1 or rl2) and not ret:
179 print "bug: merging %s + %s gives empty" %(rl1, rl2)
181 return ret
183 def txt_to_val(txt):
184 if txt == "s64min":
185 return -(2**63)
186 elif txt == "s32min":
187 return -(2**31)
188 elif txt == "s16min":
189 return -(2**15)
190 elif txt == "s64max":
191 return 2**63 - 1
192 elif txt == "s32max":
193 return 2**31 - 1
194 elif txt == "s16max":
195 return 2**15 - 1
196 elif txt == "u64max":
197 return 2**64 - 1
198 elif txt == "u32max":
199 return 2**32 - 1
200 elif txt == "u16max":
201 return 2**16 - 1
202 else:
203 try:
204 return int(txt)
205 except ValueError:
206 return 0
208 def val_to_txt(val):
209 if val == -(2**63):
210 return "s64min"
211 elif val == -(2**31):
212 return "s32min"
213 elif val == -(2**15):
214 return "s16min"
215 elif val == 2**63 - 1:
216 return "s64max"
217 elif val == 2**31 - 1:
218 return "s32max"
219 elif val == 2**15 - 1:
220 return "s16max"
221 elif val == 2**64 - 1:
222 return "u64max"
223 elif val == 2**32 - 1:
224 return "u32max"
225 elif val == 2**16 - 1:
226 return "u16max"
227 elif val < 0:
228 return "(%d)" %(val)
229 else:
230 return "%d" %(val)
232 def get_next_str(txt):
233 val = ""
234 parsed = 0
236 if txt[0] == '(':
237 parsed += 1
238 for char in txt[1:]:
239 if char == ')':
240 break
241 parsed += 1
242 val = txt[1:parsed]
243 parsed += 1
244 elif txt[0] == 's' or txt[0] == 'u':
245 parsed += 6
246 val = txt[:parsed]
247 else:
248 if txt[0] == '-':
249 parsed += 1
250 for char in txt[parsed:]:
251 if char == '-':
252 break
253 parsed += 1
254 val = txt[:parsed]
255 return [parsed, val]
257 def txt_to_rl(txt):
258 ret = []
259 pairs = txt.split(",")
260 for pair in pairs:
261 cnt, min_str = get_next_str(pair)
262 if cnt == len(pair):
263 max_str = min_str
264 else:
265 cnt, max_str = get_next_str(pair[cnt + 1:])
266 min_val = txt_to_val(min_str)
267 max_val = txt_to_val(max_str)
268 ret.append([min_val, max_val])
270 # Hm... Smatch won't call INT_MAX s32max if the variable is unsigned.
271 # if txt != rl_to_txt(ret):
272 # print "bug: converting: text = %s rl = %s internal = %s" %(txt, rl_to_txt(ret), ret)
274 return ret
276 def rl_to_txt(rl):
277 ret = ""
278 for idx in range(len(rl)):
279 cur_min = rl[idx][0]
280 cur_max = rl[idx][1]
282 if idx != 0:
283 ret += ","
285 if cur_min == cur_max:
286 ret += val_to_txt(cur_min)
287 else:
288 ret += val_to_txt(cur_min)
289 ret += "-"
290 ret += val_to_txt(cur_max)
291 return ret
293 def type_to_str(type_int):
295 t = int(type_int)
296 if db_types.has_key(t):
297 return db_types[t]
298 return type_int
300 def type_to_int(type_string):
301 for k in db_types.keys():
302 if db_types[k] == type_string:
303 return k
304 return -1
306 def display_caller_info(printed, cur, param_names):
307 for txt in cur:
308 if not printed:
309 print "file | caller | function | type | parameter | key | value |"
310 printed = 1
312 parameter = int(txt[6])
313 key = txt[7]
314 if len(param_names) and parameter in param_names:
315 key = key.replace("$", param_names[parameter])
317 print "%20s | %20s | %20s |" %(txt[0], txt[1], txt[2]),
318 print " %10s |" %(type_to_str(txt[5])),
319 print " %d | %s | %s" %(parameter, key, txt[8])
320 return printed
322 def get_caller_info(filename, ptrs, my_type):
323 cur = con.cursor()
324 param_names = get_param_names(filename, func)
325 printed = 0
326 type_filter = ""
327 if my_type != "":
328 type_filter = "and type = %d" %(type_to_int(my_type))
329 for ptr in ptrs:
330 cur.execute("select * from caller_info where function = '%s' %s;" %(ptr, type_filter))
331 printed = display_caller_info(printed, cur, param_names)
333 def print_caller_info(filename, func, my_type = ""):
334 ptrs = get_function_pointers(func)
335 get_caller_info(filename, ptrs, my_type)
337 def merge_values(param_names, vals, cur):
338 for txt in cur:
339 parameter = int(txt[0])
340 name = txt[1]
341 rl = txt_to_rl(txt[2])
342 if parameter in param_names:
343 name = name.replace("$", param_names[parameter])
345 if not parameter in vals:
346 vals[parameter] = {}
348 # the first item on the list is the number of rows. it's incremented
349 # every time we call merge_values().
350 if name in vals[parameter]:
351 vals[parameter][name] = [vals[parameter][name][0] + 1, rl_union(vals[parameter][name][1], rl)]
352 else:
353 vals[parameter][name] = [1, rl]
355 def get_param_names(filename, func):
356 cur = con.cursor()
357 param_names = {}
358 cur.execute("select parameter, value from parameter_name where file = '%s' and function = '%s';" %(filename, func))
359 for txt in cur:
360 parameter = int(txt[0])
361 name = txt[1]
362 param_names[parameter] = name
363 if len(param_names):
364 return param_names
366 cur.execute("select parameter, value from parameter_name where function = '%s';" %(func))
367 for txt in cur:
368 parameter = int(txt[0])
369 name = txt[1]
370 param_names[parameter] = name
371 return param_names
373 def get_caller_count(ptrs):
374 cur = con.cursor()
375 count = 0
376 for ptr in ptrs:
377 cur.execute("select count(distinct(call_id)) from caller_info where function = '%s';" %(ptr))
378 for txt in cur:
379 count += int(txt[0])
380 return count
382 def print_merged_caller_values(filename, func, ptrs, param_names, call_cnt):
383 cur = con.cursor()
384 vals = {}
385 for ptr in ptrs:
386 cur.execute("select parameter, key, value from caller_info where function = '%s' and type = %d;" %(ptr, type_to_int("PARAM_VALUE")))
387 merge_values(param_names, vals, cur);
389 for param in sorted(vals):
390 for name in sorted(vals[param]):
391 if vals[param][name][0] != call_cnt:
392 continue
393 print "%d %s -> %s" %(param, name, rl_to_txt(vals[param][name][1]))
396 def print_unmerged_caller_values(filename, func, ptrs, param_names):
397 cur = con.cursor()
398 for ptr in ptrs:
399 prev = -1
400 cur.execute("select file, caller, call_id, parameter, key, value from caller_info where function = '%s' and type = %d;" %(ptr, type_to_int("PARAM_VALUE")))
401 for filename, caller, call_id, parameter, name, value in cur:
402 if prev != int(call_id):
403 prev = int(call_id)
405 parameter = int(parameter)
406 if len(param_names) and param_names[parameter]:
407 name = name.replace("$", param_names[parameter])
408 else:
409 name = name.replace("$", "$%d" %(parameter))
411 print "%s | %s | %s | %s" %(filename, caller, name, value)
412 print "=========================="
414 def print_caller_values(filename, func, ptrs):
415 param_names = get_param_names(filename, func)
416 call_cnt = get_caller_count(ptrs)
418 print_merged_caller_values(filename, func, ptrs, param_names, call_cnt)
419 print "=========================="
420 print_unmerged_caller_values(filename, func, ptrs, param_names)
422 def caller_info_values(filename, func):
423 ptrs = get_function_pointers(func)
424 print_caller_values(filename, func, ptrs)
426 def print_return_states(func):
427 cur = con.cursor()
428 cur.execute("select * from return_states where function = '%s';" %(func))
429 count = 0
430 for txt in cur:
431 printed = 1
432 if count == 0:
433 print "file | function | return_id | return_value | type | param | key | value |"
434 count += 1
435 print "%s | %s | %2s | %13s" %(txt[0], txt[1], txt[3], txt[4]),
436 print "| %13s |" %(type_to_str(txt[6])),
437 print " %2d | %20s | %20s |" %(txt[7], txt[8], txt[9])
439 def print_call_implies(func):
440 cur = con.cursor()
441 cur.execute("select * from call_implies where function = '%s';" %(func))
442 count = 0
443 for txt in cur:
444 if not count:
445 print "file | function | type | param | key | value |"
446 count += 1
447 print "%15s | %15s" %(txt[0], txt[1]),
448 print "| %15s" %(type_to_str(txt[4])),
449 print "| %3d | %15s | %15s |" %(txt[5], txt[6], txt[7])
451 def print_type_size(struct_type, member):
452 cur = con.cursor()
453 cur.execute("select * from type_size where type like '(struct %s)->%s';" %(struct_type, member))
454 print "type | size"
455 for txt in cur:
456 print "%-15s | %s" %(txt[0], txt[1])
458 cur.execute("select * from function_type_size where type like '(struct %s)->%s';" %(struct_type, member))
459 print "file | function | type | size"
460 for txt in cur:
461 print "%-15s | %-15s | %-15s | %s" %(txt[0], txt[1], txt[2], txt[3])
463 def print_data_info(struct_type, member):
464 cur = con.cursor()
465 cur.execute("select * from data_info where data like '(struct %s)->%s';" %(struct_type, member))
466 print "file | data | type | value"
467 for txt in cur:
468 print "%-15s | %-15s | %-15s | %s" %(txt[0], txt[1], type_to_str(txt[2]), txt[3])
470 def print_fn_ptrs(func):
471 ptrs = get_function_pointers(func)
472 if not ptrs:
473 return
474 print "%s = " %(func),
475 i = 0
476 for p in ptrs:
477 if i > 0:
478 print ",",
479 i = i + 1
480 print "'%s'" %(p),
481 print ""
483 def print_functions(member):
484 cur = con.cursor()
485 cur.execute("select * from function_ptr where ptr like '%%->%s';" %(member))
486 print "File | Pointer | Function | Static"
487 for txt in cur:
488 print "%-15s | %-15s | %-15s | %s" %(txt[0], txt[2], txt[1], txt[3])
490 def get_callers(func):
491 ret = []
492 cur = con.cursor()
493 ptrs = get_function_pointers(func)
494 for ptr in ptrs:
495 cur.execute("select distinct caller from caller_info where function = '%s';" %(ptr))
496 for row in cur:
497 ret.append(row[0])
498 return ret
500 printed_funcs = []
501 def call_tree_helper(func, indent = 0):
502 global printed_funcs
503 if func in printed_funcs:
504 return
505 print "%s%s()" %(" " * indent, func)
506 if func == "too common":
507 return
508 if indent > 6:
509 return
510 printed_funcs.append(func)
511 callers = get_callers(func)
512 if len(callers) >= 20:
513 print "Over 20 callers for %s()" %(func)
514 return
515 for caller in callers:
516 call_tree_helper(caller, indent + 2)
518 def print_call_tree(func):
519 global printed_funcs
520 printed_funcs = []
521 call_tree_helper(func)
523 def function_type_value(struct_type, member):
524 cur = con.cursor()
525 cur.execute("select * from function_type_value where type like '(struct %s)->%s';" %(struct_type, member))
526 for txt in cur:
527 print "%-30s | %-30s | %s | %s" %(txt[0], txt[1], txt[2], txt[3])
529 def trace_callers(func, param):
530 sources = []
531 prev_type = 0
533 cur = con.cursor()
534 ptrs = get_function_pointers(func)
535 for ptr in ptrs:
536 cur.execute("select type, caller, value from caller_info where function = '%s' and (type = 0 or type = 1014 or type = 1028) and (parameter = -1 or parameter = %d);" %(ptr, param))
537 for row in cur:
538 data_type = int(row[0])
539 if data_type == 1014:
540 sources.append((row[1], row[2]))
541 elif data_type == 1028:
542 sources.append(("%", row[2])) # hack...
543 elif data_type == 0 and prev_type == 0:
544 sources.append((row[1], ""))
545 prev_type = data_type
546 return sources
548 def trace_param_helper(func, param, indent = 0):
549 global printed_funcs
550 if func in printed_funcs:
551 return
552 print "%s%s(param %d)" %(" " * indent, func, param)
553 if func == "too common":
554 return
555 if indent > 20:
556 return
557 printed_funcs.append(func)
558 sources = trace_callers(func, param)
559 for path in sources:
561 if len(path[1]) and path[1][0] == 'p' and path[1][1] == ' ':
562 p = int(path[1][2:])
563 trace_param_helper(path[0], p, indent + 2)
564 elif len(path[0]) and path[0][0] == '%':
565 print " %s%s" %(" " * indent, path[1])
566 else:
567 print "* %s%s %s" %(" " * (indent - 1), path[0], path[1])
569 def trace_param(func, param):
570 global printed_funcs
571 printed_funcs = []
572 print "tracing %s %d" %(func, param)
573 trace_param_helper(func, param)
575 if len(sys.argv) < 2:
576 usage()
578 if len(sys.argv) == 2:
579 func = sys.argv[1]
580 print_caller_info("", func)
581 elif sys.argv[1] == "call_info":
582 if len(sys.argv) != 4:
583 usage()
584 filename = sys.argv[2]
585 func = sys.argv[3]
586 caller_info_values(filename, func)
587 print_caller_info(filename, func)
588 elif sys.argv[1] == "user_data":
589 func = sys.argv[2]
590 print_caller_info(filename, func, "USER_DATA")
591 elif sys.argv[1] == "param_value":
592 func = sys.argv[2]
593 print_caller_info(filename, func, "PARAM_VALUE")
594 elif sys.argv[1] == "function_ptr" or sys.argv[1] == "fn_ptr":
595 func = sys.argv[2]
596 print_fn_ptrs(func)
597 elif sys.argv[1] == "return_states":
598 func = sys.argv[2]
599 print_return_states(func)
600 print "================================================"
601 print_call_implies(func)
602 elif sys.argv[1] == "call_implies":
603 func = sys.argv[2]
604 print_call_implies(func)
605 elif sys.argv[1] == "type_size" or sys.argv[1] == "buf_size":
606 struct_type = sys.argv[2]
607 member = sys.argv[3]
608 print_type_size(struct_type, member)
609 elif sys.argv[1] == "data_info":
610 struct_type = sys.argv[2]
611 member = sys.argv[3]
612 print_data_info(struct_type, member)
613 elif sys.argv[1] == "call_tree":
614 func = sys.argv[2]
615 print_call_tree(func)
616 elif sys.argv[1] == "where":
617 if len(sys.argv) == 3:
618 struct_type = "%"
619 member = sys.argv[2]
620 elif len(sys.argv) == 4:
621 struct_type = sys.argv[2]
622 member = sys.argv[3]
623 function_type_value(struct_type, member)
624 elif sys.argv[1] == "local":
625 filename = sys.argv[2]
626 variable = ""
627 if len(sys.argv) == 4:
628 variable = sys.argv[3]
629 local_values(filename, variable)
630 elif sys.argv[1] == "functions":
631 member = sys.argv[2]
632 print_functions(member)
633 elif sys.argv[1] == "trace_param":
634 if len(sys.argv) != 4:
635 usage()
636 func = sys.argv[2]
637 param = int(sys.argv[3])
638 trace_param(func, param)
639 else:
640 usage()