Resolves: tdf#149277 we don't want to search the whole tree for dups
[LibreOffice.git] / bin / find-mergedlib-can-be-private.py
blobf21f740529179550765e0a71932b23dd16c8f7a1
1 #!/usr/bin/python2
3 # Generate a custom linker script/map file for the --enabled-mergedlibs merged library
4 # which reduces the startup time and enables further optimisations with --enable-lto because 60% or more
5 # of the symbols become internal only.
8 import subprocess
9 import sys
10 import re
11 import multiprocessing
13 exported_symbols = set()
14 imported_symbols = set()
17 # Copied from solenv/gbuild/extensions/pre_MergedLibsList.mk
18 # TODO there has to be a way to run gmake and get it to dump this list for me
19 merged_libs = { \
20 "avmedia" \
21 ,"basctl" \
22 ,"basprov" \
23 ,"basegfx" \
24 ,"canvasfactory" \
25 ,"canvastools" \
26 ,"comphelper" \
27 ,"configmgr" \
28 ,"cppcanvas" \
29 ,"crashreport)" \
30 ,"dbtools" \
31 ,"deployment" \
32 ,"deploymentmisc" \
33 ,"desktopbe1)" \
34 ,"desktop_detector)" \
35 ,"drawinglayer" \
36 ,"editeng" \
37 ,"expwrap" \
38 ,"filterconfig" \
39 ,"fsstorage" \
40 ,"fwk" \
41 ,"helplinker)" \
42 ,"i18npool" \
43 ,"i18nutil" \
44 ,"lng" \
45 ,"localebe1" \
46 ,"msfilter" \
47 ,"mtfrenderer" \
48 ,"opencl" \
49 ,"package2" \
50 ,"sax" \
51 ,"sb" \
52 ,"simplecanvas" \
53 ,"sfx" \
54 ,"sofficeapp" \
55 ,"sot" \
56 ,"spl" \
57 ,"stringresource" \
58 ,"svl" \
59 ,"svt" \
60 ,"svx" \
61 ,"svxcore" \
62 ,"tk" \
63 ,"tl" \
64 ,"ucb1" \
65 ,"ucbhelper" \
66 ,"ucpexpand1" \
67 ,"ucpfile1" \
68 ,"unoxml" \
69 ,"utl" \
70 ,"uui" \
71 ,"vcl" \
72 ,"xmlscript" \
73 ,"xo" \
74 ,"xstor" }
76 # look for symbols exported by libmerged
77 subprocess_nm = subprocess.Popen("nm -D instdir/program/libmergedlo.so", stdout=subprocess.PIPE, shell=True)
78 with subprocess_nm.stdout as txt:
79 # We are looking for lines something like:
80 # 0000000000036ed0 T flash_component_getFactory
81 line_regex = re.compile(r'^[0-9a-fA-F]+ T ')
82 for line in txt:
83 line = line.strip()
84 if line_regex.match(line):
85 exported_symbols.add(line.split(" ")[2])
86 subprocess_nm.terminate()
88 # look for symbols imported from libmerged
89 subprocess_find = subprocess.Popen("(find instdir/program/ -type f; ls ./workdir/LinkTarget/CppunitTest/*.so) | xargs grep -l mergedlo",
90 stdout=subprocess.PIPE, shell=True)
91 with subprocess_find.stdout as txt:
92 for line in txt:
93 sharedlib = line.strip()
94 s = sharedlib[sharedlib.find("/lib") + 4 : len(sharedlib) - 3]
95 if s in merged_libs: continue
96 # look for imported symbols
97 subprocess_objdump = subprocess.Popen("objdump -T " + sharedlib, stdout=subprocess.PIPE, shell=True)
98 with subprocess_objdump.stdout as txt2:
99 # ignore some header bumpf
100 txt2.readline()
101 txt2.readline()
102 txt2.readline()
103 txt2.readline()
104 # We are looking for lines something like (noting that one of them uses spaces, and the other tabs)
105 # 0000000000000000 DF *UND* 0000000000000000 _ZN16FilterConfigItem10WriteInt32ERKN3rtl8OUStringEi
106 for line2 in txt2:
107 line2 = line2.strip()
108 if line2.find("*UND*") == -1: continue
109 tokens = line2.split(" ")
110 sym = tokens[len(tokens)-1].strip()
111 imported_symbols.add(sym)
112 subprocess_objdump.terminate()
113 subprocess_find.terminate()
115 intersec_symbols = exported_symbols.intersection(imported_symbols)
116 print("no symbols exported from libmerged = " + str(len(exported_symbols)))
117 print("no symbols that can be made internal = " + str(len(intersec_symbols)))
119 # Now look for classes where none of the class symbols are imported,
120 # i.e. we can mark the whole class as hidden
122 def extract_class(sym):
123 filtered_sym = subprocess.check_output(["c++filt", sym]).strip()
124 if filtered_sym.startswith("vtable for "):
125 classname = filtered_sym[11:]
126 return classname
127 if filtered_sym.startswith("non-virtual thunk to "):
128 filtered_sym = filtered_sym[21:]
129 elif filtered_sym.startswith("virtual thunk to "):
130 filtered_sym = filtered_sym[17:]
131 i = filtered_sym.find("(")
132 if i != -1:
133 i = filtered_sym.rfind("::", 0, i)
134 if i != -1:
135 classname = filtered_sym[:i]
136 return classname
137 return ""
139 pool = multiprocessing.Pool(multiprocessing.cpu_count())
140 classes_with_exported_symbols = set(pool.map(extract_class, list(exported_symbols)))
141 classes_with_imported_symbols = set(pool.map(extract_class, list(imported_symbols)))
143 # Some stuff is particular to Windows, so won't be found by a Linux analysis, so remove
144 # those classes.
145 can_be_private_classes = classes_with_exported_symbols - classes_with_imported_symbols;
146 can_be_private_classes.discard("SpinField")
148 with open("bin/find-mergedlib-can-be-private.classes.results", "wt") as f:
149 for sym in sorted(can_be_private_classes):
150 if sym.startswith("std::") or sym.startswith("void std::"): continue
151 f.write(sym + "\n")