improve the mergedlib script
[LibreOffice.git] / bin / find-mergedlib-can-be-private.py
blobbab145839571deb697b9c5fd69125e4d3a5a314d
1 #!/usr/bin/python3
3 # Generate a custom linker script/map file for the --enabled-mergedlibs merged library
4 # which reduces the startup time and enables further optimisations with --enable-lto because 60% or more
5 # of the symbols become internal only.
8 import subprocess
9 import re
10 import multiprocessing
12 exported_symbols1 = set()
13 imported_symbols1 = set()
14 exported_symbols2 = set() # decoded
15 imported_symbols2 = set() # decoded
18 # find all our shared libs, note that all the libs that have been merged into the mergedlib library will
19 # not contain anything, those files are just empty
20 subprocess_find = subprocess.Popen("find ./instdir -name *.so && find ./workdir/LinkTarget/CppunitTest -name *.so",
21 stdout=subprocess.PIPE, shell=True)
22 with subprocess_find.stdout as txt:
23 for line in txt:
24 sharedlib = line.strip()
25 # look for exported symbols
26 subprocess_nm = subprocess.Popen(b"nm -D " + sharedlib, stdout=subprocess.PIPE, shell=True)
27 with subprocess_nm.stdout as txt2:
28 # We are looking for lines something like:
29 # 0000000000036ed0 T flash_component_getFactory
30 line_regex = re.compile(r'^[0-9a-fA-F]+ T ')
31 for line2_bytes in txt2:
32 line2 = line2_bytes.strip().decode("utf-8")
33 if line_regex.match(line2):
34 sym = line2.split(" ")[2].strip()
35 exported_symbols1.add(sym)
36 subprocess_nm.terminate()
37 # look for imported symbols
38 subprocess_objdump = subprocess.Popen(b"objdump -T " + sharedlib, stdout=subprocess.PIPE, shell=True)
39 with subprocess_objdump.stdout as txt2:
40 # ignore some header bumpf
41 txt2.readline()
42 txt2.readline()
43 txt2.readline()
44 txt2.readline()
45 # We are looking for lines something like:
46 # 0000000000000000 DF *UND* 0000000000000000 _ZN16FilterConfigItem10WriteInt32ERKN3rtl8OUStringEi
47 for line2_bytes in txt2:
48 line2 = line2_bytes.strip().decode("utf-8")
49 if "*UND*" not in line2: continue
50 tokens = line2.split(" ")
51 sym = tokens[len(tokens)-1].strip()
52 imported_symbols1.add(sym)
53 subprocess_objdump.terminate()
54 subprocess_find.terminate()
56 # look for imported symbols in executables
57 subprocess_find = subprocess.Popen("find ./instdir -name *.bin", stdout=subprocess.PIPE, shell=True)
58 with subprocess_find.stdout as txt:
59 for line in txt:
60 executable = line.strip()
61 # look for exported symbols
62 subprocess_nm = subprocess.Popen(b"nm -D " + executable + b" | grep -w U", stdout=subprocess.PIPE, shell=True)
63 with subprocess_nm.stdout as txt2:
64 # We are looking for lines something like:
65 # U sal_detail_deinitialize
66 for line2_bytes in txt2:
67 line2 = line2_bytes.strip().decode("utf-8")
68 sym = line2.split(" ")[1]
69 imported_symbols1.add(sym)
70 subprocess_find.terminate()
72 # Now we have to symbolize before comparing because sometimes (due to thunks) two
73 # different encoded names symbolize to the same method/func name
75 progress = 0
76 progress_max_len = len(imported_symbols1) + len(exported_symbols1)
77 for sym in imported_symbols1:
78 progress += 1
79 if (progress % 128 == 0): print( str(int(progress * 100 / progress_max_len)) + "%")
80 filtered_sym = subprocess.check_output(["c++filt", sym]).strip().decode("utf-8")
81 if filtered_sym.startswith("non-virtual thunk to "): filtered_sym = filtered_sym[21:]
82 elif filtered_sym.startswith("virtual thunk to "): filtered_sym = filtered_sym[17:]
83 imported_symbols2.add(filtered_sym)
84 progress = 0
85 for sym in exported_symbols1:
86 progress += 1
87 if (progress % 128 == 0): print( str(int(progress * 100 / progress_max_len)) + "%")
88 filtered_sym = subprocess.check_output(["c++filt", sym]).strip().decode("utf-8")
89 if filtered_sym.startswith("non-virtual thunk to "): filtered_sym = filtered_sym[21:]
90 elif filtered_sym.startswith("virtual thunk to "): filtered_sym = filtered_sym[17:]
91 exported_symbols2.add(filtered_sym)
94 unused_exports = exported_symbols2 - imported_symbols2
95 print("exported = " + str(len(exported_symbols2)))
96 print("imported = " + str(len(imported_symbols2)))
97 print("unused_exports = " + str(len(unused_exports)))
99 # for each class, count how many symbols will become hidden if we mark the class as hidden
100 can_be_hidden_count = dict()
101 for sym in exported_symbols2:
102 i = sym.rfind("::")
103 if i == -1: continue
104 clz = sym[:i]
105 if clz in can_be_hidden_count:
106 can_be_hidden_count[clz] = can_be_hidden_count[clz] + 1
107 else:
108 can_be_hidden_count[clz] = 1
109 for sym in imported_symbols2:
110 i = sym.rfind("::")
111 if i == -1: continue
112 clz = sym[:i]
113 if clz in can_be_hidden_count:
114 can_be_hidden_count[clz] = can_be_hidden_count[clz] - 1
115 else:
116 can_be_hidden_count[clz] = -1
117 # convert to list, and sort the results in descending order
118 can_be_hidden_list = list()
119 for clz in can_be_hidden_count:
120 cnt = can_be_hidden_count[clz]
121 if cnt > 0:
122 can_be_hidden_list.append((cnt, clz))
123 can_be_hidden_list.sort(reverse=True)
124 with open("bin/find-mergedlib-can-be-private-symbols.classes.results", "wt") as f:
125 for i in can_be_hidden_list:
126 if i[0] < 10: break
127 f.write(str(i[0]) + " " + i[1] + "\n")
130 with open("bin/find-mergedlib-can-be-private-symbols.functions.results", "wt") as f:
131 for sym in sorted(unused_exports):
132 # Filter out most of the noise.
133 # No idea where these are coming from, but not our code.
134 if sym.startswith("CERT_"): continue
135 elif sym.startswith("DER_"): continue
136 elif sym.startswith("FORM_"): continue
137 elif sym.startswith("FPDF"): continue
138 elif sym.startswith("HASH_"): continue
139 elif sym.startswith("Hunspell_"): continue
140 elif sym.startswith("LL_"): continue
141 elif sym.startswith("LP_"): continue
142 elif sym.startswith("LU"): continue
143 elif sym.startswith("MIP"): continue
144 elif sym.startswith("MPS"): continue
145 elif sym.startswith("NSS"): continue
146 elif sym.startswith("NSC_"): continue
147 elif sym.startswith("PK11"): continue
148 elif sym.startswith("PL_"): continue
149 elif sym.startswith("PQ"): continue
150 elif sym.startswith("PBE_"): continue
151 elif sym.startswith("PORT_"): continue
152 elif sym.startswith("PRP_"): continue
153 elif sym.startswith("PR_"): continue
154 elif sym.startswith("PT_"): continue
155 elif sym.startswith("QS_"): continue
156 elif sym.startswith("REPORT_"): continue
157 elif sym.startswith("RSA_"): continue
158 elif sym.startswith("SEC"): continue
159 elif sym.startswith("SGN"): continue
160 elif sym.startswith("SOS"): continue
161 elif sym.startswith("SSL_"): continue
162 elif sym.startswith("VFY_"): continue
163 elif sym.startswith("_PR_"): continue
164 elif sym.startswith("ber_"): continue
165 elif sym.startswith("bfp_"): continue
166 elif sym.startswith("ldap_"): continue
167 elif sym.startswith("ne_"): continue
168 elif sym.startswith("opj_"): continue
169 elif sym.startswith("pg_"): continue
170 elif sym.startswith("pq"): continue
171 elif sym.startswith("presolve_"): continue
172 elif sym.startswith("sqlite3_"): continue
173 elif sym.startswith("libepubgen::"): continue
174 elif sym.startswith("lucene::"): continue
175 elif sym.startswith("Hunspell::"): continue
176 elif sym.startswith("sk_"): continue
177 elif sym.startswith("_Z"): continue
178 # dynamically loaded
179 elif sym.endswith("get_implementation"): continue
180 elif sym.endswith("component_getFactory"): continue
181 elif sym == "CreateUnoWrapper": continue
182 elif sym == "ExportDOC": continue
183 elif sym == "ExportRTF": continue
184 elif sym == "GetSaveWarningOfMSVBAStorage_ww8": continue
185 elif sym == "GetSpecialCharsForEdit": continue
186 elif sym.startswith("Import"): continue
187 elif sym.startswith("Java_com_sun_star_"): continue
188 elif sym.startswith("TestImport"): continue
189 elif sym.startswith("getAllCalendars_"): continue
190 elif sym.startswith("getAllCurrencies_"): continue
191 elif sym.startswith("getAllFormats"): continue
192 elif sym.startswith("getBreakIteratorRules_"): continue
193 elif sym.startswith("getCollationOptions_"): continue
194 elif sym.startswith("getCollatorImplementation_"): continue
195 elif sym.startswith("getContinuousNumberingLevels_"): continue
196 elif sym.startswith("getDateAcceptancePatterns_"): continue
197 elif sym.startswith("getForbiddenCharacters_"): continue
198 elif sym.startswith("getIndexAlgorithm_"): continue
199 elif sym.startswith("getLCInfo_"): continue
200 elif sym.startswith("getLocaleItem_"): continue
201 elif sym.startswith("getOutlineNumberingLevels_"): continue
202 elif sym.startswith("getReservedWords_"): continue
203 elif sym.startswith("getSTC_"): continue
204 elif sym.startswith("getSearchOptions_"): continue
205 elif sym.startswith("getTransliterations_"): continue
206 elif sym.startswith("getUnicodeScripts_"): continue
207 elif sym.startswith("lok_"): continue
208 # UDK API
209 elif sym.startswith("osl_"): continue
210 elif sym.startswith("rtl_"): continue
211 elif sym.startswith("typelib_"): continue
212 elif sym.startswith("typereg_"): continue
213 elif sym.startswith("uno_"): continue
214 f.write(sym + "\n")