Updated getPackageName2PathMap
[LibreOffice.git] / bin / find-mergedlib-can-be-private.py
blob243e19374d8ae6e2c0ed078ac07a48164d47256c
1 #!/usr/bin/python3
3 # Generate a custom linker script/map file for the --enabled-mergedlibs merged library
4 # which reduces the startup time and enables further optimisations with --enable-lto because 60% or more
5 # of the symbols become internal only.
8 import subprocess
9 import re
11 exported_symbols1 = set()
12 imported_symbols1 = set()
13 exported_symbols2 = set() # decoded
14 imported_symbols2 = set() # decoded
17 # find all our shared libs, note that all the libs that have been merged into the mergedlib library will
18 # not contain anything, those files are just empty
19 subprocess_find = subprocess.Popen("find ./instdir -name *.so && find ./workdir/LinkTarget/CppunitTest -name *.so",
20 stdout=subprocess.PIPE, shell=True)
21 with subprocess_find.stdout as txt:
22 for line in txt:
23 sharedlib = line.strip()
24 # look for exported symbols
25 subprocess_nm = subprocess.Popen(b"nm -D " + sharedlib, stdout=subprocess.PIPE, shell=True)
26 with subprocess_nm.stdout as txt2:
27 # We are looking for lines something like:
28 # 0000000000036ed0 T flash_component_getFactory
29 line_regex = re.compile(r'^[0-9a-fA-F]+ T ')
30 for line2_bytes in txt2:
31 line2 = line2_bytes.strip().decode("utf-8")
32 if line_regex.match(line2):
33 sym = line2.split(" ")[2].strip()
34 exported_symbols1.add(sym)
35 subprocess_nm.terminate()
36 # look for imported symbols
37 subprocess_objdump = subprocess.Popen(b"objdump -T " + sharedlib, stdout=subprocess.PIPE, shell=True)
38 with subprocess_objdump.stdout as txt2:
39 # ignore some header bumpf
40 txt2.readline()
41 txt2.readline()
42 txt2.readline()
43 txt2.readline()
44 # We are looking for lines something like:
45 # 0000000000000000 DF *UND* 0000000000000000 _ZN16FilterConfigItem10WriteInt32ERKN3rtl8OUStringEi
46 for line2_bytes in txt2:
47 line2 = line2_bytes.strip().decode("utf-8")
48 if "*UND*" not in line2:
49 continue
50 tokens = line2.split(" ")
51 sym = tokens[len(tokens)-1].strip()
52 imported_symbols1.add(sym)
53 subprocess_objdump.terminate()
54 subprocess_find.terminate()
56 # look for imported symbols in executables
57 subprocess_find = subprocess.Popen("find ./instdir -name *.bin", stdout=subprocess.PIPE, shell=True)
58 with subprocess_find.stdout as txt:
59 for line in txt:
60 executable = line.strip()
61 # look for exported symbols
62 subprocess_nm = subprocess.Popen(b"nm -D " + executable + b" | grep -w U", stdout=subprocess.PIPE, shell=True)
63 with subprocess_nm.stdout as txt2:
64 # We are looking for lines something like:
65 # U sal_detail_deinitialize
66 for line2_bytes in txt2:
67 line2 = line2_bytes.strip().decode("utf-8")
68 sym = line2.split(" ")[1]
69 imported_symbols1.add(sym)
70 subprocess_find.terminate()
72 # Now we have to symbolize before comparing because sometimes (due to thunks) two
73 # different encoded names symbolize to the same method/func name
75 progress = 0
76 progress_max_len = len(imported_symbols1) + len(exported_symbols1)
77 for sym in imported_symbols1:
78 progress += 1
79 if (progress % 128 == 0):
80 print( str(int(progress * 100 / progress_max_len)) + "%")
81 filtered_sym = subprocess.check_output(["c++filt", sym]).strip().decode("utf-8")
82 if filtered_sym.startswith("non-virtual thunk to "):
83 filtered_sym = filtered_sym[21:]
84 elif filtered_sym.startswith("virtual thunk to "):
85 filtered_sym = filtered_sym[17:]
86 imported_symbols2.add(filtered_sym)
87 progress = 0
88 for sym in exported_symbols1:
89 progress += 1
90 if (progress % 128 == 0):
91 print( str(int(progress * 100 / progress_max_len)) + "%")
92 filtered_sym = subprocess.check_output(["c++filt", sym]).strip().decode("utf-8")
93 if filtered_sym.startswith("non-virtual thunk to "):
94 filtered_sym = filtered_sym[21:]
95 elif filtered_sym.startswith("virtual thunk to "):
96 filtered_sym = filtered_sym[17:]
97 exported_symbols2.add(filtered_sym)
100 unused_exports = exported_symbols2 - imported_symbols2
101 print("exported = " + str(len(exported_symbols2)))
102 print("imported = " + str(len(imported_symbols2)))
103 print("unused_exports = " + str(len(unused_exports)))
105 # for each class, count how many symbols will become hidden if we mark the class as hidden
106 can_be_hidden_count = dict()
107 for sym in exported_symbols2:
108 i = sym.rfind("::")
109 if i == -1:
110 continue
111 clz = sym[:i]
112 if clz in can_be_hidden_count:
113 can_be_hidden_count[clz] = can_be_hidden_count[clz] + 1
114 else:
115 can_be_hidden_count[clz] = 1
116 for sym in imported_symbols2:
117 i = sym.rfind("::")
118 if i == -1:
119 continue
120 clz = sym[:i]
121 if clz in can_be_hidden_count:
122 can_be_hidden_count[clz] = can_be_hidden_count[clz] - 1
123 else:
124 can_be_hidden_count[clz] = -1
125 # convert to list, and sort the results in descending order
126 can_be_hidden_list = list()
127 for clz in can_be_hidden_count:
128 cnt = can_be_hidden_count[clz]
129 if cnt > 0:
130 can_be_hidden_list.append((cnt, clz))
131 can_be_hidden_list.sort(reverse=True)
132 with open("bin/find-mergedlib-can-be-private-symbols.classes.results", "wt") as f:
133 for i in can_be_hidden_list:
134 if i[0] < 10:
135 break
136 f.write(str(i[0]) + " " + i[1] + "\n")
139 with open("bin/find-mergedlib-can-be-private-symbols.functions.results", "wt") as f:
140 for sym in sorted(unused_exports):
141 # Filter out most of the noise.
142 # No idea where these are coming from, but not our code.
143 if sym.startswith("CERT_"):
144 continue
145 elif sym.startswith("DER_"):
146 continue
147 elif sym.startswith("FORM_"):
148 continue
149 elif sym.startswith("FPDF"):
150 continue
151 elif sym.startswith("HASH_"):
152 continue
153 elif sym.startswith("Hunspell_"):
154 continue
155 elif sym.startswith("LL_"):
156 continue
157 elif sym.startswith("LP_"):
158 continue
159 elif sym.startswith("LU"):
160 continue
161 elif sym.startswith("MIP"):
162 continue
163 elif sym.startswith("MPS"):
164 continue
165 elif sym.startswith("NSS"):
166 continue
167 elif sym.startswith("NSC_"):
168 continue
169 elif sym.startswith("PK11"):
170 continue
171 elif sym.startswith("PL_"):
172 continue
173 elif sym.startswith("PQ"):
174 continue
175 elif sym.startswith("PBE_"):
176 continue
177 elif sym.startswith("PORT_"):
178 continue
179 elif sym.startswith("PRP_"):
180 continue
181 elif sym.startswith("PR_"):
182 continue
183 elif sym.startswith("PT_"):
184 continue
185 elif sym.startswith("QS_"):
186 continue
187 elif sym.startswith("REPORT_"):
188 continue
189 elif sym.startswith("RSA_"):
190 continue
191 elif sym.startswith("SEC"):
192 continue
193 elif sym.startswith("SGN"):
194 continue
195 elif sym.startswith("SOS"):
196 continue
197 elif sym.startswith("SSL_"):
198 continue
199 elif sym.startswith("VFY_"):
200 continue
201 elif sym.startswith("_PR_"):
202 continue
203 elif sym.startswith("ber_"):
204 continue
205 elif sym.startswith("bfp_"):
206 continue
207 elif sym.startswith("ldap_"):
208 continue
209 elif sym.startswith("ne_"):
210 continue
211 elif sym.startswith("opj_"):
212 continue
213 elif sym.startswith("pg_"):
214 continue
215 elif sym.startswith("pq"):
216 continue
217 elif sym.startswith("presolve_"):
218 continue
219 elif sym.startswith("sqlite3_"):
220 continue
221 elif sym.startswith("libepubgen::"):
222 continue
223 elif sym.startswith("lucene::"):
224 continue
225 elif sym.startswith("Hunspell::"):
226 continue
227 elif sym.startswith("sk_"):
228 continue
229 elif sym.startswith("_Z"):
230 continue
231 # dynamically loaded
232 elif sym.endswith("get_implementation"):
233 continue
234 elif sym.endswith("component_getFactory"):
235 continue
236 elif sym == "CreateUnoWrapper":
237 continue
238 elif sym == "ExportDOC":
239 continue
240 elif sym == "ExportRTF":
241 continue
242 elif sym == "GetSaveWarningOfMSVBAStorage_ww8":
243 continue
244 elif sym == "GetSpecialCharsForEdit":
245 continue
246 elif sym.startswith("Import"):
247 continue
248 elif sym.startswith("Java_com_sun_star_"):
249 continue
250 elif sym.startswith("TestImport"):
251 continue
252 elif sym.startswith("getAllCalendars_"):
253 continue
254 elif sym.startswith("getAllCurrencies_"):
255 continue
256 elif sym.startswith("getAllFormats"):
257 continue
258 elif sym.startswith("getBreakIteratorRules_"):
259 continue
260 elif sym.startswith("getCollationOptions_"):
261 continue
262 elif sym.startswith("getCollatorImplementation_"):
263 continue
264 elif sym.startswith("getContinuousNumberingLevels_"):
265 continue
266 elif sym.startswith("getDateAcceptancePatterns_"):
267 continue
268 elif sym.startswith("getForbiddenCharacters_"):
269 continue
270 elif sym.startswith("getIndexAlgorithm_"):
271 continue
272 elif sym.startswith("getLCInfo_"):
273 continue
274 elif sym.startswith("getLocaleItem_"):
275 continue
276 elif sym.startswith("getOutlineNumberingLevels_"):
277 continue
278 elif sym.startswith("getReservedWords_"):
279 continue
280 elif sym.startswith("getSTC_"):
281 continue
282 elif sym.startswith("getSearchOptions_"):
283 continue
284 elif sym.startswith("getTransliterations_"):
285 continue
286 elif sym.startswith("getUnicodeScripts_"):
287 continue
288 elif sym.startswith("lok_"):
289 continue
290 # UDK API
291 elif sym.startswith("osl_"):
292 continue
293 elif sym.startswith("rtl_"):
294 continue
295 elif sym.startswith("typelib_"):
296 continue
297 elif sym.startswith("typereg_"):
298 continue
299 elif sym.startswith("uno_"):
300 continue
301 f.write(sym + "\n")