null-deref seen in testing
[LibreOffice.git] / compilerplugins / clang / methodcycles.py
blob8cb023ffb290496ca92507df98bb40bfd84488e2
1 #!/usr/bin/python3
3 from collections import defaultdict
4 import io
5 import re
6 import subprocess
8 # --------------------------------------------------------------------------------------------
9 # globals
10 # --------------------------------------------------------------------------------------------
12 definitionSet = set() # set of method_name
13 definitionToSourceLocationMap = dict()
15 # for the "unused methods" analysis
16 callDict = defaultdict(set) # map of from_method_name -> set(method_name)
18 # clang does not always use exactly the same numbers in the type-parameter vars it generates
19 # so I need to substitute them to ensure we can match correctly.
20 normalizeTypeParamsRegex = re.compile(r"type-parameter-\d+-\d+")
21 def normalizeTypeParams( line ):
22 line = normalizeTypeParamsRegex.sub("type-parameter-?-?", line)
23 # make some of the types a little prettier
24 line = line.replace("std::__debug", "std::")
25 line = line.replace("class ", "")
26 line = line.replace("struct ", "")
27 line = line.replace("_Bool", "bool")
28 return line
30 # --------------------------------------------------------------------------------------------
31 # primary input loop
32 # --------------------------------------------------------------------------------------------
34 cnt = 0
35 with io.open("workdir/loplugin.methodcycles.log", "r", buffering=1024*1024) as txt:
36 for line in txt:
37 tokens = line.strip().split("\t")
38 if tokens[0] == "definition:":
39 returnType = tokens[1]
40 nameAndParams = tokens[2]
41 sourceLocation = tokens[3]
42 funcInfo = (normalizeTypeParams(returnType) + " " + normalizeTypeParams(nameAndParams)).strip()
43 definitionSet.add(funcInfo)
44 definitionToSourceLocationMap[funcInfo] = sourceLocation
45 elif tokens[0] == "call:":
46 returnTypeFrom = tokens[1]
47 nameAndParamsFrom = tokens[2]
48 returnTypeTo = tokens[3]
49 nameAndParamsTo = tokens[4]
50 caller = (normalizeTypeParams(returnTypeFrom) + " " + normalizeTypeParams(nameAndParamsFrom)).strip()
51 callee = (normalizeTypeParams(returnTypeTo) + " " + normalizeTypeParams(nameAndParamsTo)).strip()
52 callDict[caller].add(callee)
53 else:
54 print( "unknown line: " + line)
55 cnt = cnt + 1
56 #if cnt > 100000: break
58 # sort the results using a "natural order" so sequences like [item1,item2,item10] sort nicely
59 def natural_sort_key(s, _nsre=re.compile('([0-9]+)')):
60 return [int(text) if text.isdigit() else text.lower()
61 for text in re.split(_nsre, s)]
62 # sort by both the source-line and the datatype, so the output file ordering is stable
63 # when we have multiple items on the same source line
64 def v_sort_key(v):
65 return natural_sort_key(v[1]) + [v[0]]
66 def sort_set_by_natural_key(s):
67 return sorted(s, key=lambda v: v_sort_key(v))
70 # --------------------------------------------------------------------------------------------
71 # analysis
72 # --------------------------------------------------------------------------------------------
74 # follow caller-callee chains, removing all methods reachable from a root method
75 def remove_reachable(callDict, startCaller):
76 worklist = list()
77 worklist.append(startCaller)
78 while len(worklist) > 0:
79 caller = worklist.pop()
80 if caller not in callDict:
81 continue
82 calleeSet = callDict[caller]
83 del callDict[caller]
84 if caller in definitionSet:
85 definitionSet.remove(caller)
86 for c in calleeSet:
87 worklist.append(c)
89 # look for all the external entry points and remove code called from there
90 to_be_removed = set()
91 to_be_removed.add("int main(int,char **)")
92 # random dynload entrypoints that we don't otherwise find
93 to_be_removed.add("bool TestImportOLE2(SvStream &)")
94 to_be_removed.add("void SbiRuntime::StepREDIMP()")
95 to_be_removed.add("_object * (anonymous namespace)::createUnoStructHelper(_object *,_object *,_object *)")
96 for caller in definitionSet:
97 if caller not in definitionToSourceLocationMap:
98 to_be_removed.append(caller)
99 continue
100 location = definitionToSourceLocationMap[caller]
101 if "include/com/" in location \
102 or "include/cppu/" in location \
103 or "include/cppuhelper/" in location \
104 or "include/osl/" in location \
105 or "include/rtl/" in location \
106 or "include/sal/" in location \
107 or "include/salhelper/" in location \
108 or "include/typelib/" in location \
109 or "include/uno/" in location \
110 or "workdir/UnpackedTarball/" in location \
111 or "workdir/UnoApiHeadersTarget/" in location \
112 or "workdir/CustomTarget/officecfg/" in location \
113 or "workdir/LexTarget/" in location \
114 or "workdir/CustomTarget/i18npool/localedata/" in location \
115 or "workdir/SdiTarget/" in location \
116 or "/qa/" in location \
117 or "include/test/" in location:
118 to_be_removed.add(caller)
119 # TODO calls to destructors are not mentioned in the AST, so we'll just have to assume they get called,
120 # which is not ideal
121 if "::~" in caller:
122 to_be_removed.add(caller)
123 # dyload entry points for VCL builder
124 if "(VclPtr<vcl::Window> & rRet, const VclPtr<vcl::Window> & pParent, VclBuilder::stringmap & rMap)" in caller:
125 to_be_removed.add(caller)
126 if "(VclPtr<vcl::Window> &,const VclPtr<vcl::Window> &,std::::map<rtl::OString, rtl::OUString, std::less<rtl::OString>, std::allocator<std::pair<const rtl::OString, rtl::OUString> > > &)" in caller:
127 to_be_removed.add(caller)
128 # find all the UNO load-by-symbol-name entrypoints
129 uno_constructor_entrypoints = set()
130 git_grep_process = subprocess.Popen("git grep -h 'constructor=' -- *.component", stdout=subprocess.PIPE, shell=True)
131 with git_grep_process.stdout as txt:
132 for line in txt:
133 idx1 = line.find(b"\"")
134 idx2 = line.find(b"\"", idx1 + 1)
135 func = line[idx1+1 : idx2]
136 uno_constructor_entrypoints.add(func.decode('utf-8'))
137 for caller in callDict:
138 if "(com::sun::star::uno::XComponentContext *,const com::sun::star::uno::Sequence<com::sun::star::uno::Any> &)" in caller:
139 for func in uno_constructor_entrypoints:
140 if func in caller:
141 to_be_removed.add(caller)
142 # remove everything reachable from the found entry points
143 for caller in to_be_removed:
144 remove_reachable(callDict, caller)
145 for caller in callDict:
146 callDict[caller] -= to_be_removed
148 # create a reverse call graph
149 inverseCallDict = defaultdict(set) # map of from_method_name -> set(method_name)
150 for caller in callDict:
151 for callee in callDict[caller]:
152 inverseCallDict[callee].add(caller)
154 print_tree_recurse_set = set() # protect against cycles
155 def print_tree(f, callDict, caller, depth):
156 if depth == 0:
157 f.write("\n") # add an empty line before each tree
158 print_tree_recurse_set.clear()
159 # protect against cycles
160 if caller in print_tree_recurse_set:
161 return
162 # when printing out trees, things that are not in the map are things that are reachable,
163 # so we're not interested in them
164 if caller not in callDict:
165 return
166 print_tree_recurse_set.add(caller)
167 f.write(" " * depth + caller + "\n")
168 f.write(" " * depth + definitionToSourceLocationMap[caller] + "\n")
169 calleeSet = callDict[caller]
170 for c in calleeSet:
171 print_tree(f, callDict, c, depth+1)
173 # find possible roots (ie. entrypoints) by looking for methods that are not called
174 def dump_possible_roots():
175 possibleRootList = list()
176 for caller in callDict:
177 if caller not in inverseCallDict and caller in definitionToSourceLocationMap:
178 possibleRootList.append(caller)
179 possibleRootList.sort()
181 # print out first 100 trees of caller->callees
182 count = 0
183 with open("compilerplugins/clang/methodcycles.roots", "wt") as f:
184 f.write("callDict size " + str(len(callDict)) + "\n")
185 f.write("possibleRootList size " + str(len(possibleRootList)) + "\n")
186 f.write("\n")
187 for caller in possibleRootList:
188 f.write(caller + "\n")
189 f.write(" " + definitionToSourceLocationMap[caller] + "\n")
190 #print_tree(f, caller, 0)
191 count = count + 1
192 #if count>1000: break
194 # Look for cycles in a directed graph
195 # Adapted from:
196 # https://codereview.stackexchange.com/questions/86021/check-if-a-directed-graph-contains-a-cycle
197 def print_cycles():
198 with open("compilerplugins/clang/methodcycles.results", "wt") as f:
199 path = set()
200 visited = set()
202 def printPath(path):
203 if len(path) < 2:
204 return
205 # we may have found a cycle, but if the cycle is called from outside the cycle
206 # the code is still in use.
207 for p in path:
208 for caller in inverseCallDict[p]:
209 if caller not in path:
210 return
211 f.write("found cycle\n")
212 for p in path:
213 f.write(" " + p + "\n")
214 f.write(" " + definitionToSourceLocationMap[p] + "\n")
215 f.write("\n")
217 def checkCyclic(vertex):
218 if vertex in visited:
219 return
220 visited.add(vertex)
221 path.add(vertex)
222 if vertex in callDict:
223 for neighbour in callDict[vertex]:
224 if neighbour in path:
225 printPath(path)
226 break
227 else:
228 checkCyclic(neighbour)
229 path.remove(vertex)
231 for caller in callDict:
232 checkCyclic(caller)
234 print_cycles()
236 # print partitioned sub-graphs
237 def print_partitions():
238 callDict2 = callDict
239 # Remove anything with no callees, and that is itself not called.
240 # After this stage, we should only be left with closed sub-graphs ie. partitions
241 while True:
242 to_be_removed.clear()
243 for caller in callDict2:
244 if len(callDict2[caller]) == 0 \
245 or caller not in inverseCallDict[caller]:
246 to_be_removed.add(caller)
247 if len(to_be_removed) == 0:
248 break
249 for caller in to_be_removed:
250 remove_reachable(callDict2, caller)
251 for caller in callDict2:
252 callDict2[caller] -= to_be_removed
254 count = 0
255 with open("compilerplugins/clang/methodcycles.partition.results", "wt") as f:
256 f.write("callDict size " + str(len(callDict2)) + "\n")
257 f.write("\n")
258 while len(callDict2) > 0:
259 print_tree(f, callDict2, next(iter(callDict2)), 0)
260 for c in print_tree_recurse_set:
261 callDict2.pop(c, None)
262 count = count + 1
263 if count>1000:
264 break
266 print_partitions()