Add options to remove descriptive elements (#102)

* --remove-titles (removes <title>)
* --remove-descriptions (removes <desc>)
* --remove-descriptive-elements (removes <title>, <desc> and <metadata>)
This commit is contained in:
Eitot 2016-08-29 21:05:12 +02:00 committed by Eduard Braun
parent 842123a393
commit aa48c90d56
5 changed files with 111 additions and 11 deletions

View file

@ -910,11 +910,25 @@ def removeNamespacedElements(node, namespaces):
num += removeNamespacedElements(child, namespaces)
return num
def removeMetadataElements(doc):
def removeDescriptiveElements(doc, options):
elementTypes = []
if options.remove_descriptive_elements:
elementTypes.extend(("title", "desc", "metadata"))
else:
if options.remove_titles:
elementTypes.append("title")
if options.remove_descriptions:
elementTypes.append("desc")
if options.remove_metadata:
elementTypes.append("metadata")
if not elementTypes:
return
global numElemsRemoved
num = 0
# clone the list, as the tag list is live from the DOM
elementsToRemove = [element for element in doc.documentElement.getElementsByTagName('metadata')]
elementsToRemove = []
for elementType in elementTypes:
elementsToRemove.extend(doc.documentElement.getElementsByTagName(elementType))
for element in elementsToRemove:
element.parentNode.removeChild(element)
@ -1082,7 +1096,7 @@ def createGroupsForCommonAttributes(elem):
# SVG 1.1 (see https://www.w3.org/TR/SVG/struct.html#GElement)
'animate', 'animateColor', 'animateMotion', 'animateTransform', 'set', # animation elements
'desc', 'metadata', 'title', # descriptive elements
'circle', 'ellipse', 'line', 'path', 'polygon', 'polyline', 'rect', # shape elements
'circle', 'ellipse', 'line', 'path', 'polygon', 'polyline', 'rect', # shape elements
'defs', 'g', 'svg', 'symbol', 'use', # structural elements
'linearGradient', 'radialGradient', # gradient elements
'a', 'altGlyphDef', 'clipPath', 'color-profile', 'cursor', 'filter',
@ -3059,9 +3073,8 @@ def scourString(in_string, options=None):
else:
print("WARNING: {}".format(errmsg), file = options.ensure_value("stdout", sys.stdout))
# remove <metadata> if the user wants to
if options.remove_metadata:
removeMetadataElements(doc)
# remove descriptive elements
removeDescriptiveElements(doc, options)
# for whatever reason this does not always remove all inkscape/sodipodi attributes/elements
# on the first pass, so we do it multiple times
@ -3142,7 +3155,7 @@ def scourString(in_string, options=None):
# remove empty defs, metadata, g
# NOTE: these elements will be removed if they just have whitespace-only text nodes
for tag in ['defs', 'metadata', 'g'] :
for tag in ['defs', 'title', 'desc', 'metadata', 'g'] :
for elem in doc.documentElement.getElementsByTagName(tag) :
removeElem = not elem.hasChildNodes()
if removeElem == False :
@ -3369,9 +3382,18 @@ _option_group_document = optparse.OptionGroup(_options_parser, "SVG document")
_option_group_document.add_option("--strip-xml-prolog",
action="store_true", dest="strip_xml_prolog", default=False,
help="won't output the XML prolog (<?xml ?>)")
_option_group_document.add_option("--remove-titles",
action="store_true", dest="remove_titles", default=False,
help="remove <title> elements")
_option_group_document.add_option("--remove-descriptions",
action="store_true", dest="remove_descriptions", default=False,
help="remove <desc> elements")
_option_group_document.add_option("--remove-metadata",
action="store_true", dest="remove_metadata", default=False,
help="remove <metadata> elements (which may contain license/author information etc.)")
_option_group_document.add_option("--remove-descriptive-elements",
action="store_true", dest="remove_descriptive_elements", default=False,
help="remove <title>, <desc> and <metadata> elements")
_option_group_document.add_option("--enable-comment-stripping",
action="store_true", dest="strip_comments", default=False,
help="remove all comments (<!-- -->)")

View file

@ -141,15 +141,59 @@ class NoAdobeXPathElements(unittest.TestCase):
lambda e: e.namespaceURI != 'http://ns.adobe.com/XPath/1.0/'), False,
'Found Adobe XPath elements' )
class DoNotRemoveTitleWithOnlyText(unittest.TestCase):
def runTest(self):
doc = scour.scourXmlFile('unittests/descriptive-elements-with-text.svg')
self.assertEqual(len(doc.getElementsByTagNameNS(SVGNS, 'title')), 1,
'Removed title element with only text child' )
class RemoveEmptyTitleElement(unittest.TestCase):
def runTest(self):
doc = scour.scourXmlFile('unittests/empty-descriptive-elements.svg')
self.assertEqual(len(doc.getElementsByTagNameNS(SVGNS, 'title')), 0,
'Did not remove empty title element' )
class DoNotRemoveDescriptionWithOnlyText(unittest.TestCase):
def runTest(self):
doc = scour.scourXmlFile('unittests/descriptive-elements-with-text.svg')
self.assertEqual(len(doc.getElementsByTagNameNS(SVGNS, 'desc')), 1,
'Removed description element with only text child' )
class RemoveEmptyDescriptionElement(unittest.TestCase):
def runTest(self):
doc = scour.scourXmlFile('unittests/empty-descriptive-elements.svg')
self.assertEqual(len(doc.getElementsByTagNameNS(SVGNS, 'desc')), 0,
'Did not remove empty description element' )
class DoNotRemoveMetadataWithOnlyText(unittest.TestCase):
def runTest(self):
doc = scour.scourXmlFile('unittests/metadata-with-text.svg')
doc = scour.scourXmlFile('unittests/descriptive-elements-with-text.svg')
self.assertEqual(len(doc.getElementsByTagNameNS(SVGNS, 'metadata')), 1,
'Removed metadata element with only text child' )
class RemoveEmptyMetadataElement(unittest.TestCase):
def runTest(self):
doc = scour.scourXmlFile('unittests/empty-metadata.svg')
doc = scour.scourXmlFile('unittests/empty-descriptive-elements.svg')
self.assertEqual(len(doc.getElementsByTagNameNS(SVGNS, 'metadata')), 0,
'Did not remove empty metadata element' )
class DoNotRemoveDescriptiveElementsWithOnlyText(unittest.TestCase):
def runTest(self):
doc = scour.scourXmlFile('unittests/descriptive-elements-with-text.svg')
self.assertEqual(len(doc.getElementsByTagNameNS(SVGNS, 'title')), 1,
'Removed title element with only text child' )
self.assertEqual(len(doc.getElementsByTagNameNS(SVGNS, 'desc')), 1,
'Removed description element with only text child')
self.assertEqual(len(doc.getElementsByTagNameNS(SVGNS, 'metadata')), 1,
'Removed metadata element with only text child' )
class RemoveEmptyDescriptiveElements(unittest.TestCase):
def runTest(self):
doc = scour.scourXmlFile('unittests/empty-descriptive-elements.svg')
self.assertEqual(len(doc.getElementsByTagNameNS(SVGNS, 'title')), 0,
'Did not remove empty title element' )
self.assertEqual(len(doc.getElementsByTagNameNS(SVGNS, 'desc')), 0,
'Did not remove empty description element' )
self.assertEqual(len(doc.getElementsByTagNameNS(SVGNS, 'metadata')), 0,
'Did not remove empty metadata element' )
@ -1152,13 +1196,34 @@ class PathImplicitLineWithMoveCommands(unittest.TestCase):
self.assertEqual( path.getAttribute('d'), "m100 100v100m200-100h-200m200 100v-100",
"Implicit line segments after move not preserved")
class RemoveTitlesOption(unittest.TestCase):
def runTest(self):
doc = scour.scourXmlFile('unittests/full-descriptive-elements.svg',
scour.parse_args(['--remove-titles']))
self.assertEqual(doc.childNodes.length, 1,
'Did not remove <title> tag with --remove-titles')
class RemoveDescriptionsOption(unittest.TestCase):
def runTest(self):
doc = scour.scourXmlFile('unittests/full-descriptive-elements.svg',
scour.parse_args(['--remove-descriptions']))
self.assertEqual(doc.childNodes.length, 1,
'Did not remove <desc> tag with --remove-descriptions')
class RemoveMetadataOption(unittest.TestCase):
def runTest(self):
doc = scour.scourXmlFile('unittests/full-metadata.svg',
doc = scour.scourXmlFile('unittests/full-descriptive-elements.svg',
scour.parse_args(['--remove-metadata']))
self.assertEqual(doc.childNodes.length, 1,
'Did not remove <metadata> tag with --remove-metadata')
class RemoveDescriptiveElementsOption(unittest.TestCase):
def runTest(self):
doc = scour.scourXmlFile('unittests/full-descriptive-elements.svg',
scour.parse_args(['--remove-descriptive-elements']))
self.assertEqual(doc.childNodes.length, 1,
'Did not remove <title>, <desc> and <metadata> tags with --remove-descriptive-elements')
class EnableCommentStrippingOption(unittest.TestCase):
def runTest(self):
with open('unittests/comment-beside-xml-decl.svg') as f:

View file

@ -1,4 +1,6 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<svg xmlns="http://www.w3.org/2000/svg">
<title>This is a title element with only text node children</title>
<desc>This is a desc element with only text node children</desc>
<metadata>This is a metadata element with only text node children</metadata>
</svg>

Before

Width:  |  Height:  |  Size: 182 B

After

Width:  |  Height:  |  Size: 319 B

Before After
Before After

View file

@ -1,3 +1,5 @@
<svg xmlns="http://www.w3.org/2000/svg">
<title></title>
<desc></desc>
<metadata></metadata>
</svg>

Before

Width:  |  Height:  |  Size: 72 B

After

Width:  |  Height:  |  Size: 106 B

Before After
Before After

View file

@ -1,4 +1,13 @@
<svg xmlns="http://www.w3.org/2000/svg">
<title xmlns:mytitle="http://example.org/mytitle">
<mytitle:title>This is an example SVG file</mytitle:title>
<mytitle:desc>Unit test for Scour's --remove-titles option</mytitle:desc>
</title>
<desc xmlns:mydesc="http://example.org/mydesc">
<mydesc:title>This is an example SVG file</mydesc:title>
<mydesc:para>Unit test for Scour's
<mydesc:emph>--remove-descriptions</mydesc:emph> option</mydesc:para>
</desc>
<metadata>
<rdf:RDF
xmlns:rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"

Before

Width:  |  Height:  |  Size: 765 B

After

Width:  |  Height:  |  Size: 1.2 KiB

Before After
Before After