9471fce06e
Change-Id: I44e546defb278bb5217ed028dcaebc9fb2d23f0b Reviewed-on: https://gerrit.libreoffice.org/c/core/+/171020 Tested-by: Jenkins Tested-by: Ilmari Lauhakangas <ilmari.lauhakangas@libreoffice.org> Reviewed-by: Ilmari Lauhakangas <ilmari.lauhakangas@libreoffice.org>
313 lines
11 KiB
Python
Executable file
313 lines
11 KiB
Python
Executable file
#!/usr/bin/python3
|
|
#
|
|
# Find exported symbols that can be made non-exported.
|
|
#
|
|
# Noting that (a) parsing these commands is a pain, the output is quite irregular and (b) I'm fumbling in the
|
|
# dark here, trying to guess what exactly constitutes an "import" vs an "export" of a symbol, linux linking
|
|
# is rather complex.
|
|
#
|
|
# Takes about 5min to run on a decent machine.
|
|
#
|
|
# The standalone function analysis is reasonable reliable, but the class/method analysis is less so
|
|
# (something to do with destructor thunks not showing up in my results?)
|
|
#
|
|
# Also, the class/method analysis will not catch problems like
|
|
# 'dynamic_cast from 'Foo' with hidden type visibility to 'Bar' with default type visibility'
|
|
# but loplugin:dyncastvisibility will do that for you
|
|
#
|
|
|
|
import subprocess
|
|
import re
|
|
|
|
exported_symbols1 = set()
|
|
imported_symbols1 = set()
|
|
exported_symbols2 = set() # decoded
|
|
imported_symbols2 = set() # decoded
|
|
|
|
|
|
# find all our shared libs
|
|
subprocess_find = subprocess.Popen("find ./instdir -name *.so && find ./workdir/LinkTarget/CppunitTest -name *.so",
|
|
stdout=subprocess.PIPE, shell=True)
|
|
with subprocess_find.stdout as txt:
|
|
for line in txt:
|
|
sharedlib = line.strip()
|
|
# look for exported symbols
|
|
subprocess_nm = subprocess.Popen(b"nm -D " + sharedlib, stdout=subprocess.PIPE, shell=True)
|
|
with subprocess_nm.stdout as txt2:
|
|
# We are looking for lines something like:
|
|
# 0000000000036ed0 T flash_component_getFactory
|
|
line_regex = re.compile(r'^[0-9a-fA-F]+ T ')
|
|
for line2_bytes in txt2:
|
|
line2 = line2_bytes.strip().decode("utf-8")
|
|
if line_regex.match(line2):
|
|
sym = line2.split(" ")[2].strip()
|
|
exported_symbols1.add(sym)
|
|
subprocess_nm.terminate()
|
|
# look for imported symbols
|
|
subprocess_objdump = subprocess.Popen(b"objdump -T " + sharedlib, stdout=subprocess.PIPE, shell=True)
|
|
with subprocess_objdump.stdout as txt2:
|
|
# ignore some header bumpf
|
|
txt2.readline()
|
|
txt2.readline()
|
|
txt2.readline()
|
|
txt2.readline()
|
|
# We are looking for lines something like:
|
|
# 0000000000000000 DF *UND* 0000000000000000 _ZN16FilterConfigItem10WriteInt32ERKN3rtl8OUStringEi
|
|
for line2_bytes in txt2:
|
|
line2 = line2_bytes.strip().decode("utf-8")
|
|
if "*UND*" not in line2:
|
|
continue
|
|
tokens = line2.split(" ")
|
|
sym = tokens[len(tokens)-1].strip()
|
|
imported_symbols1.add(sym)
|
|
subprocess_objdump.terminate()
|
|
subprocess_find.terminate()
|
|
|
|
# look for imported symbols in executables
|
|
subprocess_find = subprocess.Popen("find ./instdir -name *.bin", stdout=subprocess.PIPE, shell=True)
|
|
with subprocess_find.stdout as txt:
|
|
for line in txt:
|
|
executable = line.strip()
|
|
# look for exported symbols
|
|
subprocess_nm = subprocess.Popen(b"nm -D " + executable + b" | grep -w U", stdout=subprocess.PIPE, shell=True)
|
|
with subprocess_nm.stdout as txt2:
|
|
# We are looking for lines something like:
|
|
# U sal_detail_deinitialize
|
|
for line2_bytes in txt2:
|
|
line2 = line2_bytes.strip().decode("utf-8")
|
|
sym = line2.split(" ")[1]
|
|
imported_symbols1.add(sym)
|
|
subprocess_find.terminate()
|
|
|
|
# Now we have to symbolize before comparing because sometimes (due to thunks) two
|
|
# different encoded names symbolize to the same method/func name
|
|
#
|
|
progress = 0
|
|
progress_max_len = len(imported_symbols1) + len(exported_symbols1)
|
|
for sym in imported_symbols1:
|
|
progress += 1
|
|
if (progress % 128 == 0):
|
|
print( str(int(progress * 100 / progress_max_len)) + "%")
|
|
filtered_sym = subprocess.check_output(["c++filt", sym]).strip().decode("utf-8")
|
|
if filtered_sym.startswith("non-virtual thunk to "):
|
|
filtered_sym = filtered_sym[21:]
|
|
elif filtered_sym.startswith("virtual thunk to "):
|
|
filtered_sym = filtered_sym[17:]
|
|
imported_symbols2.add(filtered_sym)
|
|
progress = 0
|
|
for sym in exported_symbols1:
|
|
progress += 1
|
|
if (progress % 128 == 0):
|
|
print( str(int(progress * 100 / progress_max_len)) + "%")
|
|
filtered_sym = subprocess.check_output(["c++filt", sym]).strip().decode("utf-8")
|
|
if filtered_sym.startswith("non-virtual thunk to "):
|
|
filtered_sym = filtered_sym[21:]
|
|
elif filtered_sym.startswith("virtual thunk to "):
|
|
filtered_sym = filtered_sym[17:]
|
|
exported_symbols2.add(filtered_sym)
|
|
|
|
unused_exports = exported_symbols2 - imported_symbols2
|
|
print("exported = " + str(len(exported_symbols2)))
|
|
print("imported = " + str(len(imported_symbols2)))
|
|
print("unused_exports = " + str(len(unused_exports)))
|
|
|
|
# for each class, count how many symbols will become hidden if we mark the class as hidden
|
|
can_be_hidden_count = dict()
|
|
for sym in exported_symbols2:
|
|
i = sym.rfind("::")
|
|
if i == -1:
|
|
continue
|
|
clz = sym[:i]
|
|
if clz in can_be_hidden_count:
|
|
can_be_hidden_count[clz] = can_be_hidden_count[clz] + 1
|
|
else:
|
|
can_be_hidden_count[clz] = 1
|
|
for sym in imported_symbols2:
|
|
i = sym.rfind("::")
|
|
if i == -1:
|
|
continue
|
|
clz = sym[:i]
|
|
if clz in can_be_hidden_count:
|
|
can_be_hidden_count[clz] = can_be_hidden_count[clz] - 1
|
|
else:
|
|
can_be_hidden_count[clz] = -1
|
|
# convert to list, and sort the results in descending order
|
|
can_be_hidden_list = list()
|
|
for clz in can_be_hidden_count:
|
|
cnt = can_be_hidden_count[clz]
|
|
if cnt > 0:
|
|
can_be_hidden_list.append((cnt, clz))
|
|
can_be_hidden_list.sort(reverse=True)
|
|
with open("bin/find-can-be-private-symbols.classes.results", "wt") as f:
|
|
for i in can_be_hidden_list:
|
|
if i[0] < 10:
|
|
break
|
|
f.write(str(i[0]) + " " + i[1] + "\n")
|
|
|
|
|
|
with open("bin/find-can-be-private-symbols.functions.results", "wt") as f:
|
|
for sym in sorted(unused_exports):
|
|
# Filter out most of the noise.
|
|
# No idea where these are coming from, but not our code.
|
|
if sym.startswith("CERT_"):
|
|
continue
|
|
elif sym.startswith("DER_"):
|
|
continue
|
|
elif sym.startswith("FORM_"):
|
|
continue
|
|
elif sym.startswith("FPDF"):
|
|
continue
|
|
elif sym.startswith("HASH_"):
|
|
continue
|
|
elif sym.startswith("Hunspell_"):
|
|
continue
|
|
elif sym.startswith("LL_"):
|
|
continue
|
|
elif sym.startswith("LP_"):
|
|
continue
|
|
elif sym.startswith("LU"):
|
|
continue
|
|
elif sym.startswith("MIP"):
|
|
continue
|
|
elif sym.startswith("MPS"):
|
|
continue
|
|
elif sym.startswith("NSS"):
|
|
continue
|
|
elif sym.startswith("NSC_"):
|
|
continue
|
|
elif sym.startswith("PK11"):
|
|
continue
|
|
elif sym.startswith("PL_"):
|
|
continue
|
|
elif sym.startswith("PQ"):
|
|
continue
|
|
elif sym.startswith("PBE_"):
|
|
continue
|
|
elif sym.startswith("PORT_"):
|
|
continue
|
|
elif sym.startswith("PRP_"):
|
|
continue
|
|
elif sym.startswith("PR_"):
|
|
continue
|
|
elif sym.startswith("PT_"):
|
|
continue
|
|
elif sym.startswith("QS_"):
|
|
continue
|
|
elif sym.startswith("REPORT_"):
|
|
continue
|
|
elif sym.startswith("RSA_"):
|
|
continue
|
|
elif sym.startswith("SEC"):
|
|
continue
|
|
elif sym.startswith("SGN"):
|
|
continue
|
|
elif sym.startswith("SOS"):
|
|
continue
|
|
elif sym.startswith("SSL_"):
|
|
continue
|
|
elif sym.startswith("VFY_"):
|
|
continue
|
|
elif sym.startswith("_PR_"):
|
|
continue
|
|
elif sym.startswith("ber_"):
|
|
continue
|
|
elif sym.startswith("bfp_"):
|
|
continue
|
|
elif sym.startswith("ldap_"):
|
|
continue
|
|
elif sym.startswith("ne_"):
|
|
continue
|
|
elif sym.startswith("opj_"):
|
|
continue
|
|
elif sym.startswith("pg_"):
|
|
continue
|
|
elif sym.startswith("pq"):
|
|
continue
|
|
elif sym.startswith("presolve_"):
|
|
continue
|
|
elif sym.startswith("sqlite3_"):
|
|
continue
|
|
elif sym.startswith("libepubgen::"):
|
|
continue
|
|
elif sym.startswith("lucene::"):
|
|
continue
|
|
elif sym.startswith("Hunspell::"):
|
|
continue
|
|
elif sym.startswith("sk_"):
|
|
continue
|
|
elif sym.startswith("_Z"):
|
|
continue
|
|
# dynamically loaded
|
|
elif sym.endswith("get_implementation"):
|
|
continue
|
|
elif sym.endswith("component_getFactory"):
|
|
continue
|
|
elif sym == "CreateUnoWrapper":
|
|
continue
|
|
elif sym == "ExportDOC":
|
|
continue
|
|
elif sym == "ExportRTF":
|
|
continue
|
|
elif sym == "GetSaveWarningOfMSVBAStorage_ww8":
|
|
continue
|
|
elif sym == "GetSpecialCharsForEdit":
|
|
continue
|
|
elif sym.startswith("Import"):
|
|
continue
|
|
elif sym.startswith("Java_com_sun_star_"):
|
|
continue
|
|
elif sym.startswith("TestImport"):
|
|
continue
|
|
elif sym.startswith("getAllCalendars_"):
|
|
continue
|
|
elif sym.startswith("getAllCurrencies_"):
|
|
continue
|
|
elif sym.startswith("getAllFormats"):
|
|
continue
|
|
elif sym.startswith("getBreakIteratorRules_"):
|
|
continue
|
|
elif sym.startswith("getCollationOptions_"):
|
|
continue
|
|
elif sym.startswith("getCollatorImplementation_"):
|
|
continue
|
|
elif sym.startswith("getContinuousNumberingLevels_"):
|
|
continue
|
|
elif sym.startswith("getDateAcceptancePatterns_"):
|
|
continue
|
|
elif sym.startswith("getForbiddenCharacters_"):
|
|
continue
|
|
elif sym.startswith("getIndexAlgorithm_"):
|
|
continue
|
|
elif sym.startswith("getLCInfo_"):
|
|
continue
|
|
elif sym.startswith("getLocaleItem_"):
|
|
continue
|
|
elif sym.startswith("getOutlineNumberingLevels_"):
|
|
continue
|
|
elif sym.startswith("getReservedWords_"):
|
|
continue
|
|
elif sym.startswith("getSTC_"):
|
|
continue
|
|
elif sym.startswith("getSearchOptions_"):
|
|
continue
|
|
elif sym.startswith("getTransliterations_"):
|
|
continue
|
|
elif sym.startswith("getUnicodeScripts_"):
|
|
continue
|
|
elif sym.startswith("lok_"):
|
|
continue
|
|
# UDK API
|
|
elif sym.startswith("osl_"):
|
|
continue
|
|
elif sym.startswith("rtl_"):
|
|
continue
|
|
elif sym.startswith("typelib_"):
|
|
continue
|
|
elif sym.startswith("typereg_"):
|
|
continue
|
|
elif sym.startswith("uno_"):
|
|
continue
|
|
# remove things we found that do not exist in our source code, they're not ours
|
|
#if not(extractFunctionNameFromSignature(sym) in all_source_names): continue
|
|
f.write(sym + "\n")
|
|
|