comparison: select the caller_info
[smatch.git] / smatch_scripts / implicit_dependencies / parser.py
blob5cc5a8efb155d2f4628aa12f02fbab429549a4e7
1 from collections import defaultdict
2 import copy
3 import json
4 import sys
5 import pprint
7 from constants import (
8 GLOBAL_BLACKLIST,
9 IMPL_DEP_FILE_STR,
10 OUTPUT_FILE_STR,
11 SYSCALL_PREFIXES,
12 ListType,
13 hardcode_syscall_read_fields,
14 hardcode_syscall_write_fields,
17 class Parser(object):
18 def __init__(
19 self,
20 impl_dep_file_str=IMPL_DEP_FILE_STR,
21 output_file_str=OUTPUT_FILE_STR,
22 verbose=False,
23 pretty=False
25 try:
26 self.impl_dep_file = file(impl_dep_file_str, 'r')
27 self.output_file = file(output_file_str + '.json', 'w+')
28 if verbose:
29 self.output_file_verbose = file(output_file_str + '_verbose.json', 'w+')
30 if pretty:
31 self.pretty_output_file = file(output_file_str + '.pretty', 'w+')
32 self.pretty_output_file_verbose = file(output_file_str + '_verbose.pretty', 'w+')
33 except IOError:
34 sys.stderr.write("ERROR: Cannot open files %s %s.\n" % (impl_dep_file_str, output_file_str))
35 sys.exit(1)
36 self.verbose = verbose
37 self.pretty = pretty
38 self.syscall_read_fields = defaultdict(set)
39 self.syscall_write_fields = defaultdict(set)
40 self.implicit_dependencies = defaultdict(set)
41 self.verbose_impl_dep = defaultdict(list)
42 self.deref_counter = defaultdict(int) # count which struct->members are most common
44 for syscall,fields in hardcode_syscall_read_fields.iteritems():
45 self.syscall_read_fields[syscall].update(set(fields))
47 for syscall,fields in hardcode_syscall_write_fields.iteritems():
48 self.syscall_write_fields[syscall].update(set(fields))
50 def _sanitize_syscall(self, syscall):
51 for prefix in SYSCALL_PREFIXES:
52 if syscall.startswith(prefix):
53 return syscall[len(prefix):]
54 return syscall
56 def _deref_to_tuple(self, deref):
57 """ (struct a)->b ==> (a,b) """
58 struct, member = deref.split('->')
59 struct = struct[1:-1] # strip parens
60 struct = struct.split(' ')[1] # drop struct keyword
61 return (struct, member)
63 def _split_field(self, field):
64 field = field.strip()
65 field = field[1: -1] # strip square brackets
66 derefs = [struct.strip() for struct in field.strip().split(',') if struct]
67 return map(
68 lambda deref: self._deref_to_tuple(deref),
69 derefs
72 def _sanitize_line(self, line):
73 syscall_and_listtype, field = line.split(':')
74 syscall, list_type = syscall_and_listtype.split(' ')
75 syscall = self._sanitize_syscall(syscall)
76 derefs = self._split_field(field)
77 return syscall, list_type, derefs
79 def _add_fields(self, syscall, list_type, derefs):
80 if list_type == ListType.READ:
81 d = self.syscall_read_fields
82 elif list_type == ListType.WRITE:
83 d = self.syscall_write_fields
84 for deref in derefs:
85 if deref in GLOBAL_BLACKLIST: # ignore spammy structs
86 continue
87 d[syscall].add(deref)
89 def _construct_implicit_deps(self):
90 """ just do a naive O(n^2) loop to see intersections between write_list and read_list """
91 for this_call,read_fields in self.syscall_read_fields.iteritems():
92 for that_call,write_fields in self.syscall_write_fields.iteritems():
93 if that_call == this_call: # calls are obviously dependent on themselves. ignore.
94 continue
95 intersection = read_fields & write_fields
96 if intersection:
97 self.implicit_dependencies[this_call].add(that_call)
98 if intersection and self.verbose:
99 self.verbose_impl_dep[this_call].append({
100 'call': that_call,
101 'reason': intersection,
103 for deref in intersection:
104 self.deref_counter[deref] += 1
106 def parse(self):
107 for line in self.impl_dep_file:
108 syscall, list_type, derefs = self._sanitize_line(line)
109 self._add_fields(syscall, list_type, derefs)
110 # pprint.pprint(dict(self.syscall_write_fields))
111 # pprint.pprint(dict(self.syscall_read_fields))
112 self._construct_implicit_deps()
113 # pprint.pprint(dict(self.implicit_dependencies))
114 # pprint.pprint(dict(self.verbose_impl_dep))
116 def _listify_verbose_reason(self, reason):
117 r = copy.deepcopy(reason)
118 r['reason'] = list(r['reason'])
119 r['reason'] = map(
120 lambda (struct,field): struct + '->' + field,
121 r['reason']
123 return r
125 def _get_json_dependencies(self):
126 implicit_dependencies = {}
127 verbose_impl_dep = {}
128 for call, dep_set in self.implicit_dependencies.iteritems():
129 implicit_dependencies[call] = list(dep_set)
130 for call, call_reasons in self.verbose_impl_dep.iteritems():
131 verbose_impl_dep[call] = map(
132 lambda reason: self._listify_verbose_reason(reason),
133 call_reasons,
135 return implicit_dependencies, verbose_impl_dep
137 def write(self):
138 implicit_dependencies, verbose_impl_dep = self._get_json_dependencies()
139 json.dump(implicit_dependencies, self.output_file)
140 if self.verbose:
141 json.dump(verbose_impl_dep, self.output_file_verbose)
142 if self.pretty:
143 pprint.pprint(dict(self.implicit_dependencies), self.pretty_output_file)
144 pprint.pprint(dict(self.verbose_impl_dep), self.pretty_output_file_verbose)
145 for deref, count in sorted(self.deref_counter.iteritems(), key=lambda (k,v): (v,k)):
146 print "%s: %d" % (deref, count)
148 def close(self):
149 self.output_file.close()
150 self.impl_dep_file.close()
151 if self.verbose:
152 self.output_file_verbose.close()