2022-01-31 10:21:32 -06:00
|
|
|
#!/usr/bin/python3
|
|
|
|
# -*- tab-width: 4; indent-tabs-mode: nil; py-indent-offset: 4 -*-
|
|
|
|
#
|
|
|
|
# This file is part of the LibreOffice project.
|
|
|
|
#
|
|
|
|
# This Source Code Form is subject to the terms of the Mozilla Public
|
|
|
|
# License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
|
|
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
|
|
|
#
|
|
|
|
|
|
|
|
import sys
|
|
|
|
# sadly need lxml because the python one doesn't preserve namespace prefixes
|
|
|
|
# and type-detection looks for the string "office:document"
|
|
|
|
from lxml import etree as ET
|
|
|
|
#import xml.etree.ElementTree as ET
|
|
|
|
|
|
|
|
def get_used_p_styles(root):
|
|
|
|
elementnames = [
|
|
|
|
".//{urn:oasis:names:tc:opendocument:xmlns:text:1.0}p",
|
|
|
|
".//{urn:oasis:names:tc:opendocument:xmlns:text:1.0}h",
|
|
|
|
".//{urn:oasis:names:tc:opendocument:xmlns:text:1.0}alphabetical-index-entry-template",
|
|
|
|
".//{urn:oasis:names:tc:opendocument:xmlns:text:1.0}bibliography-entry-template",
|
|
|
|
".//{urn:oasis:names:tc:opendocument:xmlns:text:1.0}illustration-index-entry-template",
|
|
|
|
".//{urn:oasis:names:tc:opendocument:xmlns:text:1.0}index-source-style",
|
|
|
|
".//{urn:oasis:names:tc:opendocument:xmlns:text:1.0}object-index-entry-template",
|
|
|
|
".//{urn:oasis:names:tc:opendocument:xmlns:text:1.0}table-index-entry-template",
|
|
|
|
".//{urn:oasis:names:tc:opendocument:xmlns:text:1.0}table-of-content-entry-template",
|
|
|
|
".//{urn:oasis:names:tc:opendocument:xmlns:text:1.0}user-index-entry-template",
|
|
|
|
]
|
|
|
|
|
|
|
|
# document content
|
|
|
|
ps = sum([root.findall(e) for e in elementnames], [])
|
|
|
|
usedpstyles = set()
|
|
|
|
usedcondstyles = set()
|
|
|
|
for p in ps:
|
|
|
|
usedpstyles.add(p.get("{urn:oasis:names:tc:opendocument:xmlns:text:1.0}style-name"))
|
|
|
|
if p.get("{urn:oasis:names:tc:opendocument:xmlns:text:1.0}cond-style-name"):
|
|
|
|
usedcondstyles.add(p.get("{urn:oasis:names:tc:opendocument:xmlns:text:1.0}cond-style-name"))
|
|
|
|
if p.get("{urn:oasis:names:tc:opendocument:xmlns:text:1.0}class-names"):
|
|
|
|
for style in p.get("{urn:oasis:names:tc:opendocument:xmlns:text:1.0}class-names").split(" "):
|
|
|
|
usedpstyles.add(style)
|
2022-11-30 05:57:08 -06:00
|
|
|
for shape in root.findall(".//*[@{urn:oasis:names:tc:opendocument:xmlns:drawing:1.0}text-style-name]"):
|
|
|
|
usedpstyles.add(shape.get("{urn:oasis:names:tc:opendocument:xmlns:drawing:1.0}text-style-name"))
|
2022-01-31 10:21:32 -06:00
|
|
|
for tabletemplate in root.findall(".//*[@{urn:oasis:names:tc:opendocument:xmlns:table:1.0}paragraph-style-name]"):
|
|
|
|
usedpstyles.add(tabletemplate.get("{urn:oasis:names:tc:opendocument:xmlns:table:1.0}paragraph-style-name"))
|
|
|
|
for page in root.findall(".//*[@{urn:oasis:names:tc:opendocument:xmlns:style:1.0}register-truth-ref-style-name]"):
|
|
|
|
usedpstyles.add(page.get("{urn:oasis:names:tc:opendocument:xmlns:style:1.0}register-truth-ref-style-name"))
|
|
|
|
for form in root.findall(".//*[@{urn:oasis:names:tc:opendocument:xmlns:form:1.0}text-style-name]"):
|
|
|
|
usedpstyles.add(form.get("{urn:oasis:names:tc:opendocument:xmlns:form:1.0}text-style-name"))
|
|
|
|
# conditional styles
|
|
|
|
for condstyle in usedcondstyles:
|
|
|
|
for map_ in root.findall(".//{urn:oasis:names:tc:opendocument:xmlns:style:1.0}style[@{urn:oasis:names:tc:opendocument:xmlns:style:1.0}family='paragraph'][@{urn:oasis:names:tc:opendocument:xmlns:style:1.0}name='" + condstyle + "']/{urn:oasis:names:tc:opendocument:xmlns:style:1.0}map"):
|
|
|
|
usedpstyles.add(map_.get("{urn:oasis:names:tc:opendocument:xmlns:style:1.0}apply-style-name"))
|
|
|
|
# other styles
|
|
|
|
for notesconfig in root.findall(".//*[@{urn:oasis:names:tc:opendocument:xmlns:text:1.0}default-style-name]"):
|
|
|
|
usedpstyles.add(notesconfig.get("{urn:oasis:names:tc:opendocument:xmlns:text:1.0}default-style-name"))
|
|
|
|
return usedpstyles
|
|
|
|
|
|
|
|
def add_parent_styles(usedstyles, styles):
|
|
|
|
size = -1
|
|
|
|
while size != len(usedstyles):
|
|
|
|
size = len(usedstyles)
|
|
|
|
for style in styles:
|
|
|
|
if style.get("{urn:oasis:names:tc:opendocument:xmlns:style:1.0}name") in usedstyles:
|
|
|
|
if style.get("{urn:oasis:names:tc:opendocument:xmlns:style:1.0}parent-style-name"):
|
|
|
|
usedstyles.add(style.get("{urn:oasis:names:tc:opendocument:xmlns:style:1.0}parent-style-name"))
|
|
|
|
# only for paragraph styles and master-pages
|
|
|
|
if style.get("{urn:oasis:names:tc:opendocument:xmlns:style:1.0}next-style-name"):
|
|
|
|
usedstyles.add(style.get("{urn:oasis:names:tc:opendocument:xmlns:style:1.0}next-style-name"))
|
|
|
|
|
|
|
|
def remove_unused_styles(root, usedstyles, styles, name):
|
|
|
|
for style in styles:
|
|
|
|
print(style.get("{urn:oasis:names:tc:opendocument:xmlns:style:1.0}name"))
|
|
|
|
if not(style.get("{urn:oasis:names:tc:opendocument:xmlns:style:1.0}name") in usedstyles):
|
|
|
|
print("removing unused " + name + " " + style.get("{urn:oasis:names:tc:opendocument:xmlns:style:1.0}name"))
|
|
|
|
# it is really dumb that there is no parent pointer in dom
|
|
|
|
try:
|
|
|
|
root.find(".//{urn:oasis:names:tc:opendocument:xmlns:office:1.0}automatic-styles").remove(style)
|
|
|
|
except ValueError:
|
|
|
|
root.find(".//{urn:oasis:names:tc:opendocument:xmlns:office:1.0}styles").remove(style)
|
|
|
|
|
2023-10-10 07:02:45 -05:00
|
|
|
def remove_unused_drawings(root, useddrawings, drawings, name):
|
|
|
|
for drawing in drawings:
|
|
|
|
print(drawing.get("{urn:oasis:names:tc:opendocument:xmlns:drawing:1.0}name"))
|
|
|
|
if not(drawing.get("{urn:oasis:names:tc:opendocument:xmlns:drawing:1.0}name") in useddrawings):
|
|
|
|
print("removing unused " + name + " " + drawing.get("{urn:oasis:names:tc:opendocument:xmlns:drawing:1.0}name"))
|
|
|
|
root.find(".//{urn:oasis:names:tc:opendocument:xmlns:office:1.0}styles").remove(drawing)
|
|
|
|
|
2022-01-31 10:21:32 -06:00
|
|
|
def collect_all_attribute(usedstyles, attribute):
|
|
|
|
for element in root.findall(".//*[@" + attribute + "]"):
|
|
|
|
usedstyles.add(element.get(attribute))
|
|
|
|
|
2023-10-10 07:02:45 -05:00
|
|
|
def collect_all_attribute_list(usedstyles, attribute):
|
|
|
|
for element in root.findall(".//*[@" + attribute + "]"):
|
|
|
|
for style in element.get(attribute).split(" "):
|
|
|
|
usedstyles.add(style)
|
|
|
|
|
2022-01-31 10:21:32 -06:00
|
|
|
def remove_unused(root):
|
2023-09-14 13:05:02 -05:00
|
|
|
# 1) find all elements that may reference page styles - this gets rid of some paragraphs
|
2022-01-31 10:21:32 -06:00
|
|
|
usedpstyles = get_used_p_styles(root)
|
|
|
|
print(usedpstyles)
|
|
|
|
usedtstyles = set()
|
|
|
|
tables = root.findall(".//{urn:oasis:names:tc:opendocument:xmlns:table:1.0}table")
|
|
|
|
print(tables)
|
|
|
|
for table in tables:
|
|
|
|
usedtstyles.add(table.get("{urn:oasis:names:tc:opendocument:xmlns:table:1.0}style-name"))
|
|
|
|
pstyles = root.findall(".//{urn:oasis:names:tc:opendocument:xmlns:style:1.0}style[@{urn:oasis:names:tc:opendocument:xmlns:style:1.0}family='paragraph']")
|
|
|
|
tstyles = root.findall(".//{urn:oasis:names:tc:opendocument:xmlns:style:1.0}style[@{urn:oasis:names:tc:opendocument:xmlns:style:1.0}family='table']")
|
|
|
|
usedmasterpages = {"Standard"} # assume this is the default on page 1
|
|
|
|
# only automatic styles may have page breaks in LO, so no need to chase parents or nexts
|
|
|
|
for pstyle in pstyles:
|
|
|
|
print(pstyle.get("{urn:oasis:names:tc:opendocument:xmlns:style:1.0}name"))
|
|
|
|
if pstyle.get("{urn:oasis:names:tc:opendocument:xmlns:style:1.0}name") in usedpstyles:
|
|
|
|
usedmasterpages.add(pstyle.get("{urn:oasis:names:tc:opendocument:xmlns:style:1.0}master-page-name"))
|
|
|
|
for tstyle in tstyles:
|
|
|
|
if tstyle.get("{urn:oasis:names:tc:opendocument:xmlns:style:1.0}name") in usedtstyles:
|
|
|
|
usedmasterpages.add(tstyle.get("{urn:oasis:names:tc:opendocument:xmlns:style:1.0}master-page-name"))
|
|
|
|
for node in root.findall(".//*[@{urn:oasis:names:tc:opendocument:xmlns:text:1.0}master-page-name]"):
|
|
|
|
usedmasterpages.add(node.get("{urn:oasis:names:tc:opendocument:xmlns:text:1.0}master-page-name"))
|
2022-11-30 05:57:08 -06:00
|
|
|
for node in root.findall(".//*[@{urn:oasis:names:tc:opendocument:xmlns:drawing:1.0}master-page-name]"):
|
|
|
|
usedmasterpages.add(node.get("{urn:oasis:names:tc:opendocument:xmlns:drawing:1.0}master-page-name"))
|
2022-01-31 10:21:32 -06:00
|
|
|
print(usedmasterpages)
|
|
|
|
# iterate parent/next until no more masterpage is added
|
|
|
|
size = -1
|
|
|
|
while size != len(usedmasterpages):
|
|
|
|
size = len(usedmasterpages)
|
|
|
|
for mp in root.findall(".//{urn:oasis:names:tc:opendocument:xmlns:style:1.0}master-page"):
|
|
|
|
if mp.get("{urn:oasis:names:tc:opendocument:xmlns:style:1.0}name") in usedmasterpages:
|
|
|
|
if mp.get("{urn:oasis:names:tc:opendocument:xmlns:style:1.0}parent-style-name"):
|
|
|
|
usedmasterpages.add(mp.get("{urn:oasis:names:tc:opendocument:xmlns:style:1.0}parent-style-name"))
|
|
|
|
if mp.get("{urn:oasis:names:tc:opendocument:xmlns:style:1.0}next-style-name"):
|
|
|
|
usedmasterpages.add(mp.get("{urn:oasis:names:tc:opendocument:xmlns:style:1.0}next-style-name"))
|
|
|
|
# remove unused masterpages
|
|
|
|
for mp in root.findall(".//{urn:oasis:names:tc:opendocument:xmlns:style:1.0}master-page"):
|
|
|
|
if not(mp.get("{urn:oasis:names:tc:opendocument:xmlns:style:1.0}name") in usedmasterpages):
|
|
|
|
print("removing unused master page " + mp.get("{urn:oasis:names:tc:opendocument:xmlns:style:1.0}name"))
|
|
|
|
# there is no way to get the parent element???
|
|
|
|
root.find(".//{urn:oasis:names:tc:opendocument:xmlns:office:1.0}master-styles").remove(mp)
|
|
|
|
|
|
|
|
# 2) remove unused paragraph styles
|
|
|
|
usedpstyles = get_used_p_styles(root)
|
|
|
|
|
|
|
|
add_parent_styles(usedpstyles, pstyles)
|
|
|
|
remove_unused_styles(root, usedpstyles, pstyles, "paragraph style")
|
|
|
|
|
|
|
|
# 3) unused list styles - keep referenced from still used paragraph styles
|
|
|
|
usedliststyles = set()
|
|
|
|
for style in root.findall(".//*[@{urn:oasis:names:tc:opendocument:xmlns:style:1.0}list-style-name]"):
|
2023-06-28 10:17:26 -05:00
|
|
|
usedliststyles.add(style.get("{urn:oasis:names:tc:opendocument:xmlns:style:1.0}list-style-name"))
|
2022-01-31 10:21:32 -06:00
|
|
|
for list_ in root.findall(".//{urn:oasis:names:tc:opendocument:xmlns:text:1.0}list[@{urn:oasis:names:tc:opendocument:xmlns:text:1.0}style-name]"):
|
|
|
|
usedliststyles.add(list_.get("{urn:oasis:names:tc:opendocument:xmlns:text:1.0}style-name"))
|
|
|
|
for listitem in root.findall(".//{urn:oasis:names:tc:opendocument:xmlns:text:1.0}list-item[@{urn:oasis:names:tc:opendocument:xmlns:text:1.0}style-override]"):
|
|
|
|
usedliststyles.add(listitem.get("{urn:oasis:names:tc:opendocument:xmlns:text:1.0}style-override"))
|
|
|
|
for numpara in root.findall(".//{urn:oasis:names:tc:opendocument:xmlns:text:1.0}numbered-paragraph[@{urn:oasis:names:tc:opendocument:xmlns:text:1.0}style-name]"):
|
|
|
|
usedliststyles.add(list_.get("{urn:oasis:names:tc:opendocument:xmlns:text:1.0}style-name"))
|
|
|
|
# ignore ones that are children of style:graphic-properties, those must be handled as the containing style
|
|
|
|
# there is no inheritance for these
|
|
|
|
liststyles = root.findall("./*/{urn:oasis:names:tc:opendocument:xmlns:text:1.0}list-style")
|
|
|
|
remove_unused_styles(root, usedliststyles, liststyles, "list style")
|
|
|
|
|
|
|
|
# 4) unused text styles
|
|
|
|
usedtextstyles = set()
|
|
|
|
usedsectionstyles = set()
|
|
|
|
usedrubystyles = set()
|
|
|
|
|
|
|
|
sections = {
|
|
|
|
"{urn:oasis:names:tc:opendocument:xmlns:text:1.0}alphabetical-index",
|
|
|
|
"{urn:oasis:names:tc:opendocument:xmlns:text:1.0}bibliography",
|
|
|
|
"{urn:oasis:names:tc:opendocument:xmlns:text:1.0}illustration-index",
|
|
|
|
"{urn:oasis:names:tc:opendocument:xmlns:text:1.0}index-title",
|
|
|
|
"{urn:oasis:names:tc:opendocument:xmlns:text:1.0}object-index",
|
|
|
|
"{urn:oasis:names:tc:opendocument:xmlns:text:1.0}section",
|
|
|
|
"{urn:oasis:names:tc:opendocument:xmlns:text:1.0}table-of-content",
|
|
|
|
"{urn:oasis:names:tc:opendocument:xmlns:text:1.0}table-index",
|
|
|
|
"{urn:oasis:names:tc:opendocument:xmlns:text:1.0}user-index",
|
|
|
|
}
|
|
|
|
texts = {
|
|
|
|
"{urn:oasis:names:tc:opendocument:xmlns:text:1.0}a",
|
|
|
|
"{urn:oasis:names:tc:opendocument:xmlns:text:1.0}index-entry-bibliography",
|
|
|
|
"{urn:oasis:names:tc:opendocument:xmlns:text:1.0}index-entry-chapter",
|
|
|
|
"{urn:oasis:names:tc:opendocument:xmlns:text:1.0}index-entry-link-end",
|
|
|
|
"{urn:oasis:names:tc:opendocument:xmlns:text:1.0}index-entry-link-start",
|
|
|
|
"{urn:oasis:names:tc:opendocument:xmlns:text:1.0}index-entry-page-number",
|
|
|
|
"{urn:oasis:names:tc:opendocument:xmlns:text:1.0}index-entry-span",
|
|
|
|
"{urn:oasis:names:tc:opendocument:xmlns:text:1.0}index-entry-tab-stop",
|
|
|
|
"{urn:oasis:names:tc:opendocument:xmlns:text:1.0}index-entry-text",
|
|
|
|
"{urn:oasis:names:tc:opendocument:xmlns:text:1.0}index-title-template",
|
|
|
|
"{urn:oasis:names:tc:opendocument:xmlns:text:1.0}linenumbering-configuration",
|
|
|
|
"{urn:oasis:names:tc:opendocument:xmlns:text:1.0}list-level-style-number",
|
|
|
|
"{urn:oasis:names:tc:opendocument:xmlns:text:1.0}list-level-style-bullet",
|
|
|
|
"{urn:oasis:names:tc:opendocument:xmlns:text:1.0}outline-level-style",
|
|
|
|
"{urn:oasis:names:tc:opendocument:xmlns:text:1.0}ruby-text",
|
|
|
|
"{urn:oasis:names:tc:opendocument:xmlns:text:1.0}span",
|
|
|
|
}
|
|
|
|
for element in root.findall(".//*[@{urn:oasis:names:tc:opendocument:xmlns:text:1.0}style-name]"):
|
|
|
|
style = element.get("{urn:oasis:names:tc:opendocument:xmlns:text:1.0}style-name")
|
|
|
|
if element.tag == "{urn:oasis:names:tc:opendocument:xmlns:text:1.0}ruby":
|
|
|
|
usedrubystyles.add(style)
|
|
|
|
elif element.tag in sections:
|
|
|
|
usedsectionstyles.add(style)
|
|
|
|
elif element.tag in texts:
|
|
|
|
usedtextstyles.add(style)
|
|
|
|
|
|
|
|
collect_all_attribute(usedtextstyles, "{urn:oasis:names:tc:opendocument:xmlns:style:1.0}style-name")
|
|
|
|
collect_all_attribute(usedtextstyles, "{urn:oasis:names:tc:opendocument:xmlns:style:1.0}leader-text-style")
|
|
|
|
collect_all_attribute(usedtextstyles, "{urn:oasis:names:tc:opendocument:xmlns:style:1.0}text-line-through-text-style")
|
|
|
|
collect_all_attribute(usedtextstyles, "{urn:oasis:names:tc:opendocument:xmlns:text:1.0}visited-style-name")
|
|
|
|
collect_all_attribute(usedtextstyles, "{urn:oasis:names:tc:opendocument:xmlns:text:1.0}main-entry-style-name")
|
|
|
|
collect_all_attribute(usedtextstyles, "{urn:oasis:names:tc:opendocument:xmlns:text:1.0}citation-style-name")
|
|
|
|
collect_all_attribute(usedtextstyles, "{urn:oasis:names:tc:opendocument:xmlns:text:1.0}citation-body-style-name")
|
|
|
|
for span in root.findall(".//{urn:oasis:names:tc:opendocument:xmlns:text:1.0}span[@{urn:oasis:names:tc:opendocument:xmlns:text:1.0}class-names]"):
|
|
|
|
for style in span.get("{urn:oasis:names:tc:opendocument:xmlns:text:1.0}class-names").split(" "):
|
|
|
|
usedtextstyles.add(style)
|
|
|
|
textstyles = root.findall(".//{urn:oasis:names:tc:opendocument:xmlns:style:1.0}style[@{urn:oasis:names:tc:opendocument:xmlns:style:1.0}family='text']")
|
|
|
|
add_parent_styles(usedtextstyles, textstyles)
|
|
|
|
remove_unused_styles(root, usedtextstyles, textstyles, "text style")
|
|
|
|
|
|
|
|
# 5) unused ruby styles - can't have parents?
|
|
|
|
rubystyles = root.findall(".//{urn:oasis:names:tc:opendocument:xmlns:style:1.0}style[@{urn:oasis:names:tc:opendocument:xmlns:style:1.0}family='ruby']")
|
|
|
|
remove_unused_styles(root, usedrubystyles, rubystyles, "ruby style")
|
|
|
|
|
|
|
|
# 6) unused section styles - can't have parents?
|
|
|
|
sectionstyles = root.findall(".//{urn:oasis:names:tc:opendocument:xmlns:style:1.0}style[@{urn:oasis:names:tc:opendocument:xmlns:style:1.0}family='section']")
|
|
|
|
remove_unused_styles(root, usedsectionstyles, sectionstyles, "section style")
|
|
|
|
|
2022-11-30 05:57:08 -06:00
|
|
|
# 7) presentation styles
|
|
|
|
usedpresentationstyles = set()
|
|
|
|
|
|
|
|
collect_all_attribute(usedpresentationstyles, "{urn:oasis:names:tc:opendocument:xmlns:presentation:1.0}style-name")
|
2023-10-10 07:02:45 -05:00
|
|
|
collect_all_attribute_list(usedpresentationstyles, "{urn:oasis:names:tc:opendocument:xmlns:presentation:1.0}class-names")
|
2022-11-30 05:57:08 -06:00
|
|
|
|
|
|
|
presentationstyles = root.findall(".//{urn:oasis:names:tc:opendocument:xmlns:style:1.0}style[@{urn:oasis:names:tc:opendocument:xmlns:style:1.0}family='presentation']")
|
|
|
|
add_parent_styles(usedpresentationstyles, presentationstyles)
|
|
|
|
remove_unused_styles(root, usedpresentationstyles, presentationstyles, "presentation style")
|
|
|
|
|
|
|
|
# 8) graphic styles
|
|
|
|
pages = {
|
|
|
|
"{urn:oasis:names:tc:opendocument:xmlns:drawing:1.0}page",
|
|
|
|
"{urn:oasis:names:tc:opendocument:xmlns:presentation:1.0}notes",
|
|
|
|
"{urn:oasis:names:tc:opendocument:xmlns:style:1.0}handout-master",
|
|
|
|
"{urn:oasis:names:tc:opendocument:xmlns:style:1.0}master-page",
|
|
|
|
}
|
|
|
|
usedgraphicstyles = set()
|
|
|
|
useddrawingpagestyles = set()
|
|
|
|
for element in root.findall(".//*[@{urn:oasis:names:tc:opendocument:xmlns:drawing:1.0}style-name]"):
|
|
|
|
style = element.get("{urn:oasis:names:tc:opendocument:xmlns:drawing:1.0}style-name")
|
|
|
|
if element.tag in pages:
|
|
|
|
useddrawingpagestyles.add(style)
|
|
|
|
else:
|
|
|
|
usedgraphicstyles.add(style)
|
2023-10-10 07:02:45 -05:00
|
|
|
collect_all_attribute_list(usedgraphicstyles, "{urn:oasis:names:tc:opendocument:xmlns:drawing:1.0}class-names")
|
2022-11-30 05:57:08 -06:00
|
|
|
|
|
|
|
graphicstyles = root.findall(".//{urn:oasis:names:tc:opendocument:xmlns:style:1.0}style[@{urn:oasis:names:tc:opendocument:xmlns:style:1.0}family='graphic']")
|
|
|
|
add_parent_styles(usedgraphicstyles, graphicstyles)
|
|
|
|
remove_unused_styles(root, usedgraphicstyles, graphicstyles, "graphic style")
|
|
|
|
|
|
|
|
# 9) drawing-page styles
|
|
|
|
drawingpagestyles = root.findall(".//{urn:oasis:names:tc:opendocument:xmlns:style:1.0}style[@{urn:oasis:names:tc:opendocument:xmlns:style:1.0}family='drawing-page']")
|
|
|
|
add_parent_styles(useddrawingpagestyles, drawingpagestyles)
|
|
|
|
remove_unused_styles(root, useddrawingpagestyles, drawingpagestyles, "drawing-page style")
|
|
|
|
|
2023-08-21 10:51:37 -05:00
|
|
|
# 10) page layouts
|
|
|
|
usedpagelayouts = set()
|
|
|
|
collect_all_attribute(usedpagelayouts, "{urn:oasis:names:tc:opendocument:xmlns:style:1.0}page-layout-name")
|
|
|
|
pagelayouts = root.findall(".//{urn:oasis:names:tc:opendocument:xmlns:style:1.0}page-layout")
|
|
|
|
remove_unused_styles(root, usedpagelayouts, pagelayouts, "page layout")
|
|
|
|
|
|
|
|
# 11) presentation page layouts
|
|
|
|
usedpresentationpagelayouts = set()
|
|
|
|
collect_all_attribute(usedpresentationpagelayouts, "{urn:oasis:names:tc:opendocument:xmlns:presentation:1.0}presentation-page-layout-name")
|
|
|
|
presentationpagelayouts = root.findall(".//{urn:oasis:names:tc:opendocument:xmlns:style:1.0}presentation-page-layout")
|
|
|
|
remove_unused_styles(root, usedpresentationpagelayouts, presentationpagelayouts, "presentation page layout")
|
|
|
|
|
|
|
|
# 12) table (column/row/cell) styles
|
|
|
|
usedtablestyles = set()
|
|
|
|
usedtablecolumnstyles = set()
|
|
|
|
usedtablerowstyles = set()
|
|
|
|
usedtablecellstyles = set()
|
|
|
|
|
|
|
|
tables = {
|
|
|
|
"{urn:oasis:names:tc:opendocument:xmlns:table:1.0}table",
|
|
|
|
"{urn:oasis:names:tc:opendocument:xmlns:table:1.0}table:background",
|
|
|
|
}
|
|
|
|
tablecells = {
|
|
|
|
"{urn:oasis:names:tc:opendocument:xmlns:table:1.0}covered-table-cell",
|
|
|
|
"{urn:oasis:names:tc:opendocument:xmlns:table:1.0}table-cell",
|
|
|
|
"{urn:oasis:names:tc:opendocument:xmlns:table:1.0}body",
|
|
|
|
"{urn:oasis:names:tc:opendocument:xmlns:table:1.0}even-columns",
|
|
|
|
"{urn:oasis:names:tc:opendocument:xmlns:table:1.0}even-rows",
|
|
|
|
"{urn:oasis:names:tc:opendocument:xmlns:table:1.0}first-column",
|
|
|
|
"{urn:oasis:names:tc:opendocument:xmlns:table:1.0}first-row",
|
|
|
|
"{urn:oasis:names:tc:opendocument:xmlns:table:1.0}last-column",
|
|
|
|
"{urn:oasis:names:tc:opendocument:xmlns:table:1.0}last-row",
|
|
|
|
"{urn:oasis:names:tc:opendocument:xmlns:table:1.0}odd-columns",
|
|
|
|
"{urn:oasis:names:tc:opendocument:xmlns:table:1.0}odd-rows",
|
|
|
|
}
|
|
|
|
for element in root.findall(".//*[@{urn:oasis:names:tc:opendocument:xmlns:table:1.0}style-name]"):
|
|
|
|
style = element.get("{urn:oasis:names:tc:opendocument:xmlns:table:1.0}style-name")
|
|
|
|
if element.tag == "{urn:oasis:names:tc:opendocument:xmlns:table:1.0}table-column":
|
|
|
|
usedtablecolumnstyles.add(style)
|
|
|
|
elif element.tag == "{urn:oasis:names:tc:opendocument:xmlns:table:1.0}table-row":
|
|
|
|
usedtablerowstyles.add(style)
|
|
|
|
elif element.tag in tables:
|
|
|
|
usedtablestyles.add(style)
|
|
|
|
elif element.tag in tablecells:
|
|
|
|
usedtablecellstyles.add(style)
|
|
|
|
|
|
|
|
for element in root.findall(".//*[@{urn:oasis:names:tc:opendocument:xmlns:database:1.0}style-name]"):
|
|
|
|
style = element.get("{urn:oasis:names:tc:opendocument:xmlns:database:1.0}style-name")
|
|
|
|
if element.tag == "{urn:oasis:names:tc:opendocument:xmlns:database:1.0}column":
|
|
|
|
usedtablecolumnstyles.add(style)
|
|
|
|
else: # db:query db:table-representation
|
|
|
|
usedtablestyles.add(style)
|
|
|
|
|
|
|
|
collect_all_attribute(usedtablerowstyles, "{urn:oasis:names:tc:opendocument:xmlns:database:1.0}default-row-style-name")
|
|
|
|
collect_all_attribute(usedtablecellstyles, "{urn:oasis:names:tc:opendocument:xmlns:database:1.0}default-cell-style-name")
|
|
|
|
collect_all_attribute(usedtablecellstyles, "{urn:oasis:names:tc:opendocument:xmlns:table:1.0}default-cell-style-name")
|
|
|
|
|
|
|
|
tablecolumstyles = root.findall(".//{urn:oasis:names:tc:opendocument:xmlns:style:1.0}style[@{urn:oasis:names:tc:opendocument:xmlns:style:1.0}family='table-column']")
|
|
|
|
tablerowstyles = root.findall(".//{urn:oasis:names:tc:opendocument:xmlns:style:1.0}style[@{urn:oasis:names:tc:opendocument:xmlns:style:1.0}family='table-row']")
|
|
|
|
tablecellstyles = root.findall(".//{urn:oasis:names:tc:opendocument:xmlns:style:1.0}style[@{urn:oasis:names:tc:opendocument:xmlns:style:1.0}family='table-cell']")
|
|
|
|
add_parent_styles(usedtablestyles, tstyles)
|
|
|
|
add_parent_styles(usedtablecolumnstyles, tablecolumstyles)
|
|
|
|
add_parent_styles(usedtablerowstyles, tablerowstyles)
|
|
|
|
add_parent_styles(usedtablecellstyles, tablecellstyles)
|
|
|
|
remove_unused_styles(root, usedtstyles, tstyles, "table style")
|
|
|
|
remove_unused_styles(root, usedtablecolumnstyles, tablecolumstyles, "table column style")
|
|
|
|
remove_unused_styles(root, usedtablerowstyles, tablerowstyles, "table row style")
|
|
|
|
remove_unused_styles(root, usedtablecellstyles, tablecellstyles, "table cell style")
|
2022-11-30 05:57:08 -06:00
|
|
|
|
2023-10-10 07:02:45 -05:00
|
|
|
# 13) gradients
|
|
|
|
usedgradients = set()
|
|
|
|
collect_all_attribute(usedgradients, "{urn:oasis:names:tc:opendocument:xmlns:drawing:1.0}fill-gradient-name")
|
|
|
|
collect_all_attribute(usedgradients, "{urn:oasis:names:tc:opendocument:xmlns:drawing:1.0}opacity-name")
|
|
|
|
gradients = root.findall(".//{urn:oasis:names:tc:opendocument:xmlns:drawing:1.0}gradient")
|
|
|
|
remove_unused_drawings(root, usedgradients, gradients, "gradient")
|
|
|
|
|
|
|
|
# 14) hatchs
|
|
|
|
usedhatchs = set()
|
|
|
|
collect_all_attribute(usedhatchs, "{urn:oasis:names:tc:opendocument:xmlns:drawing:1.0}fill-hatch-name")
|
|
|
|
hatchs = root.findall(".//{urn:oasis:names:tc:opendocument:xmlns:drawing:1.0}hatch")
|
|
|
|
remove_unused_drawings(root, usedhatchs, hatchs, "hatch")
|
|
|
|
|
|
|
|
# 15) bitmaps
|
|
|
|
usedbitmaps = set()
|
|
|
|
collect_all_attribute(usedbitmaps, "{urn:oasis:names:tc:opendocument:xmlns:drawing:1.0}fill-image-name")
|
|
|
|
bitmaps = root.findall(".//{urn:oasis:names:tc:opendocument:xmlns:drawing:1.0}bitmap")
|
|
|
|
remove_unused_drawings(root, usedbitmaps, bitmaps, "bitmap")
|
|
|
|
|
|
|
|
# 16) markers
|
|
|
|
usedmarkers = set()
|
|
|
|
collect_all_attribute(usedmarkers, "{urn:oasis:names:tc:opendocument:xmlns:drawing:1.0}marker-start")
|
|
|
|
collect_all_attribute(usedmarkers, "{urn:oasis:names:tc:opendocument:xmlns:drawing:1.0}marker-end")
|
|
|
|
markers = root.findall(".//{urn:oasis:names:tc:opendocument:xmlns:drawing:1.0}marker")
|
|
|
|
remove_unused_drawings(root, usedmarkers, markers, "marker")
|
|
|
|
|
|
|
|
# 17) stroke-dash
|
|
|
|
usedstrokedashs = set()
|
|
|
|
collect_all_attribute(usedstrokedashs, "{urn:oasis:names:tc:opendocument:xmlns:drawing:1.0}stroke-dash")
|
|
|
|
collect_all_attribute_list(usedstrokedashs, "{urn:oasis:names:tc:opendocument:xmlns:drawing:1.0}stroke-dash-names")
|
|
|
|
strokedashs = root.findall(".//{urn:oasis:names:tc:opendocument:xmlns:drawing:1.0}stroke-dash")
|
|
|
|
remove_unused_drawings(root, usedstrokedashs, strokedashs, "stroke-dash")
|
|
|
|
|
2022-11-30 05:57:08 -06:00
|
|
|
# TODO 3 other styles
|
2022-01-31 10:21:32 -06:00
|
|
|
|
|
|
|
# 13) unused font-face-decls
|
|
|
|
usedfonts = set()
|
|
|
|
collect_all_attribute(usedfonts, "{urn:oasis:names:tc:opendocument:xmlns:style:1.0}font-name")
|
|
|
|
collect_all_attribute(usedfonts, "{urn:oasis:names:tc:opendocument:xmlns:style:1.0}font-name-asian")
|
|
|
|
collect_all_attribute(usedfonts, "{urn:oasis:names:tc:opendocument:xmlns:style:1.0}font-name-complex")
|
|
|
|
fonts = root.findall(".//{urn:oasis:names:tc:opendocument:xmlns:style:1.0}font-face")
|
|
|
|
for font in fonts:
|
|
|
|
if not(font.get("{urn:oasis:names:tc:opendocument:xmlns:style:1.0}name") in usedfonts):
|
|
|
|
print("removing unused font-face " + font.get("{urn:oasis:names:tc:opendocument:xmlns:style:1.0}name"))
|
|
|
|
root.find(".//{urn:oasis:names:tc:opendocument:xmlns:office:1.0}font-face-decls").remove(font)
|
|
|
|
|
2022-02-11 13:23:00 -06:00
|
|
|
# 14) remove rsid attributes
|
|
|
|
styles = root.findall(".//{urn:oasis:names:tc:opendocument:xmlns:style:1.0}style")
|
|
|
|
for style in styles:
|
|
|
|
tp = style.find(".//{urn:oasis:names:tc:opendocument:xmlns:style:1.0}text-properties")
|
|
|
|
if tp is not None:
|
|
|
|
if "{http://openoffice.org/2009/office}rsid" in tp.attrib:
|
|
|
|
print("removing rsid from " + style.get("{urn:oasis:names:tc:opendocument:xmlns:style:1.0}name"))
|
|
|
|
del tp.attrib["{http://openoffice.org/2009/office}rsid"]
|
|
|
|
if "{http://openoffice.org/2009/office}paragraph-rsid" in tp.attrib:
|
|
|
|
print("removing paragraph-rsid from " + style.get("{urn:oasis:names:tc:opendocument:xmlns:style:1.0}name"))
|
|
|
|
del tp.attrib["{http://openoffice.org/2009/office}paragraph-rsid"]
|
|
|
|
|
2023-06-28 10:17:26 -05:00
|
|
|
# 15) unused user field decls
|
|
|
|
useduserfields = set()
|
|
|
|
for field in root.findall(".//{urn:oasis:names:tc:opendocument:xmlns:text:1.0}user-field-get"):
|
|
|
|
useduserfields.add(field.get("{urn:oasis:names:tc:opendocument:xmlns:text:1.0}name"))
|
|
|
|
for field in root.findall(".//{urn:oasis:names:tc:opendocument:xmlns:text:1.0}user-field-input"):
|
|
|
|
useduserfields.add(field.get("{urn:oasis:names:tc:opendocument:xmlns:text:1.0}name"))
|
|
|
|
for field in root.findall(".//{urn:oasis:names:tc:opendocument:xmlns:text:1.0}user-field-decl"):
|
|
|
|
if not(field.get("{urn:oasis:names:tc:opendocument:xmlns:text:1.0}name") in useduserfields):
|
|
|
|
print("removing unused user-field-decl " + field.get("{urn:oasis:names:tc:opendocument:xmlns:text:1.0}name"))
|
|
|
|
root.find(".//{urn:oasis:names:tc:opendocument:xmlns:text:1.0}user-field-decls").remove(field)
|
|
|
|
|
2022-01-31 10:21:32 -06:00
|
|
|
# remove office:settings
|
2022-02-02 05:12:02 -06:00
|
|
|
settings = root.find(".//{urn:oasis:names:tc:opendocument:xmlns:office:1.0}settings")
|
2022-02-16 10:29:05 -06:00
|
|
|
if settings is not None:
|
2022-02-02 05:12:02 -06:00
|
|
|
root.remove(settings)
|
2022-01-31 10:21:32 -06:00
|
|
|
|
|
|
|
# scripts are almost never needed
|
2022-02-02 05:12:02 -06:00
|
|
|
scripts = root.find(".//{urn:oasis:names:tc:opendocument:xmlns:office:1.0}scripts")
|
2022-02-16 10:29:05 -06:00
|
|
|
if scripts is not None:
|
2022-02-02 05:12:02 -06:00
|
|
|
root.remove(scripts)
|
2022-01-31 10:21:32 -06:00
|
|
|
|
2023-09-01 08:32:07 -05:00
|
|
|
# remove theme
|
|
|
|
theme = root.find(".//{urn:org:documentfoundation:names:experimental:office:xmlns:loext:1.0}theme")
|
|
|
|
if theme is not None:
|
|
|
|
theme.getparent().remove(theme)
|
|
|
|
|
2022-01-31 10:21:32 -06:00
|
|
|
# TODO: replace embedded image with some tiny one
|
|
|
|
# TODO: perhaps replace text with xxx (optionally)?
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
infile = sys.argv[1]
|
|
|
|
outfile = sys.argv[2]
|
|
|
|
|
|
|
|
dom = ET.parse(infile)
|
|
|
|
root = dom.getroot()
|
|
|
|
|
|
|
|
remove_unused(root)
|
|
|
|
|
|
|
|
# write output
|
|
|
|
dom.write(outfile, encoding='utf-8', xml_declaration=True)
|
|
|
|
|
|
|
|
"""
|
|
|
|
TODO
|
|
|
|
chart:style-name
|
|
|
|
-> chart
|
|
|
|
style:data-style-name
|
|
|
|
-> data style
|
|
|
|
style:percentage-data-style-name
|
|
|
|
-> data style
|
|
|
|
"""
|
|
|
|
|
|
|
|
# vim: set shiftwidth=4 softtabstop=4 expandtab:
|