Refactor command-line argument parsing. Only strip IDs if option is enabled. Added some unit tests for the new functionality

This commit is contained in:
JSCHILL1 2009-04-19 22:26:26 -05:00
parent 9f1a88ba3a
commit 28cc0d59fb
5 changed files with 153 additions and 79 deletions

182
scour.py
View file

@ -127,16 +127,13 @@ svgAttributes = [
'stroke-width', 'stroke-width',
] ]
def printHeader(): def findElementById(node, id):
print APP , VER if node == None or node.nodeType != 1: return None
print COPYRIGHT if node.getAttribute('id') == id: return node
for child in node.childNodes :
def printSyntaxAndQuit(): e = findElementById(child,id)
printHeader() if e != None: return e
print 'usage: scour.py [-i input.svg] [-o output.svg]\n' return None
print 'If the input file is not specified, stdin is used.'
print 'If the output file is not specified, stdout is used.'
quit()
# returns all elements with id attributes # returns all elements with id attributes
def findElementsWithId(node,elems={}): def findElementsWithId(node,elems={}):
@ -197,6 +194,47 @@ numElemsRemoved = 0
numAttrsRemoved = 0 numAttrsRemoved = 0
numRastersEmbedded = 0 numRastersEmbedded = 0
# removes all unreferenced elements except for <svg>, <font>, <metadata>, <title>, and <desc>
# also vacuums the defs of any non-referenced renderable elements
# returns the number of unreferenced elements removed from the document
def removeUnreferencedElements(doc):
global numElemsRemoved
num = 0
removeTags = ['linearGradient', 'radialGradient', 'pattern']
identifiedElements = findElementsWithId(doc.documentElement, {})
referencedIDs = findReferencedElements(doc.documentElement, {})
for id in identifiedElements:
if not id in referencedIDs:
goner = findElementById(doc.documentElement, id)
if goner != None and goner.parentNode != None and goner.nodeName in removeTags:
goner.parentNode.removeChild(goner)
num += 1
numElemsRemoved += 1
# TODO: should also go through defs and vacuum it
identifiedElements = findElementsWithId(doc.documentElement, {})
referencedIDs = findReferencedElements(doc.documentElement, {})
keepTags = ['font', 'style', 'metadata', 'script', 'title', 'desc']
num = 0
defs = doc.documentElement.getElementsByTagNameNS(NS['SVG'], 'defs')
for aDef in defs:
elemsToRemove = []
for elem in aDef.childNodes:
if elem.nodeType == 1 and (elem.getAttribute('id') == '' or \
(not elem.getAttribute('id') in referencedIDs)) and \
not elem.nodeName in keepTags:
elemsToRemove.append(elem)
for elem in elemsToRemove:
aDef.removeChild(elem)
numElemsRemoved += 1
num += 1
return num
return num
# removes the unreferenced ID attributes # removes the unreferenced ID attributes
# returns the number of ID attributes removed # returns the number of ID attributes removed
def removeUnreferencedIDs(referencedIDs, identifiedElements): def removeUnreferencedIDs(referencedIDs, identifiedElements):
@ -211,40 +249,6 @@ def removeUnreferencedIDs(referencedIDs, identifiedElements):
num += 1 num += 1
return num return num
# returns the number of unreferenced children removed from defs elements
def vacuumDefs(doc):
global numElemsRemoved
keepTags = ['font', 'style', 'metadata' ]
num = 0
defs = doc.documentElement.getElementsByTagNameNS(NS['SVG'], 'defs')
for aDef in defs:
elemsToRemove = []
for elem in aDef.childNodes:
if elem.nodeType == 1 and elem.getAttribute('id') == '' and not elem.nodeName in keepTags:
elemsToRemove.append(elem)
for elem in elemsToRemove:
aDef.removeChild(elem)
numElemsRemoved += 1
num += 1
return num
# returns the number of unreferenced gradients or patterns removed from the document
# (this relies on the ids being removed first)
def removeUnreferencedElements(doc):
global numElemsRemoved
num = 0
for tag in ['pattern', 'linearGradient', 'radialGradient'] :
elems = doc.documentElement.getElementsByTagNameNS(NS['SVG'], tag)
elemsToRemove = []
for elem in elems:
if elem.getAttribute('id') == '' :
elemsToRemove.append(elem)
for elem in elemsToRemove:
elem.parentNode.removeChild(elem)
numElemsRemoved += 1
num += 1
return num
def removeNamespacedAttributes(node, namespaces): def removeNamespacedAttributes(node, namespaces):
global numAttrsRemoved global numAttrsRemoved
num = 0 num = 0
@ -635,7 +639,7 @@ def properlySizeDoc(docElement):
# this is the main method # this is the main method
# input is a string representation of the input XML # input is a string representation of the input XML
# returns a string representation of the output XML # returns a string representation of the output XML
def scourString(in_string): def scourString(in_string, options=[]):
global numAttrsRemoved global numAttrsRemoved
global numStylePropsFixed global numStylePropsFixed
global numElemsRemoved global numElemsRemoved
@ -660,12 +664,6 @@ def scourString(in_string):
doc.documentElement.removeAttribute(attr) doc.documentElement.removeAttribute(attr)
numAttrsRemoved += 1 numAttrsRemoved += 1
bContinueLooping = True
while bContinueLooping:
identifiedElements = findElementsWithId(doc.documentElement, {})
referencedIDs = findReferencedElements(doc.documentElement, {})
bContinueLooping = ((removeUnreferencedIDs(referencedIDs, identifiedElements) + vacuumDefs(doc)) > 0)
# repair style (remove unnecessary style properties and change them into XML attributes) # repair style (remove unnecessary style properties and change them into XML attributes)
numStylePropsFixed = repairStyle(doc.documentElement) numStylePropsFixed = repairStyle(doc.documentElement)
@ -689,6 +687,13 @@ def scourString(in_string):
while removeUnreferencedElements(doc) > 0: while removeUnreferencedElements(doc) > 0:
pass pass
if '--enable-id-stripping' in options:
bContinueLooping = True
while bContinueLooping:
identifiedElements = findElementsWithId(doc.documentElement, {})
referencedIDs = findReferencedElements(doc.documentElement, {})
bContinueLooping = (removeUnreferencedIDs(referencedIDs, identifiedElements) > 0)
while removeNestedGroups(doc.documentElement) > 0: while removeNestedGroups(doc.documentElement) > 0:
pass pass
@ -722,43 +727,68 @@ def scourString(in_string):
# used mostly by unit tests # used mostly by unit tests
# input is a filename # input is a filename
# returns the minidom doc representation of the SVG # returns the minidom doc representation of the SVG
def scourXmlFile(filename): def scourXmlFile(filename, options=[]):
in_string = open(filename).read() in_string = open(filename).read()
out_string = scourString(in_string) out_string = scourString(in_string, options)
return xml.dom.minidom.parseString(out_string) return xml.dom.minidom.parseString(out_string)
if __name__ == '__main__': def printHeader():
print APP , VER
print COPYRIGHT
# parse command-line arguments def printSyntaxAndQuit():
printHeader()
print 'usage: scour.py [-i input.svg] [-o output.svg] [OPTIONS]\n'
print 'If the input file is not specified, stdin is used.'
print 'If the output file is not specified, stdout is used.\n'
print 'If an option is not available below that means it occurs automatically'
print 'when scour is invoked. Available OPTIONS:\n'
print ' --enable-id-stripping : Scour will remove all un-referenced ID attributes'
print ''
quit()
# returns a tuple with:
# input stream, output stream, and a list of options specified on the command-line
def parseCLA():
args = sys.argv[1:] args = sys.argv[1:]
# by default the input and output are the standard streams # by default the input and output are the standard streams
input = sys.stdin input = sys.stdin
output = sys.stdout output = sys.stdout
options = []
validOptions = [
'--enable-id-stripping',
]
# if -i or -o is supplied, switch the stream to the file i = 0
if len(args) == 2: while i < len(args):
if args[0] == '-i' : arg = args[i]
input = open(args[1], 'r') i += 1
elif args[0] == '-o' : if arg == '-i' :
output = open(args[1], 'w') if i < len(args) :
else: input = open(args[i], 'r')
i += 1
continue
else:
printSyntaxAndQuit()
elif arg == '-o' :
if i < len(args) :
output = open(args[i], 'w')
i += 1
continue
else:
printSyntaxAndQuit()
elif arg in validOptions :
options.append(arg)
else :
print 'Error! Invalid argument:', arg
printSyntaxAndQuit() printSyntaxAndQuit()
# if both -o and -o are supplied, switch streams to the files return (input, output, options)
elif len(args) == 4 :
if args[0] == '-i' and args[2] == '-o' :
input = open(args[1], 'r')
output = open(args[3], 'w')
elif args[0] == '-o' and args[2] == 'i' :
output = open(args[1], 'w')
input = open(args[3], 'r')
else:
printSyntaxAndQuit()
# else invalid syntax if __name__ == '__main__':
elif len(args) != 0 :
printSyntaxAndQuit() (input, output, options) = parseCLA()
# if we are not sending to stdout, then print out app information # if we are not sending to stdout, then print out app information
bOutputReport = False bOutputReport = False
@ -768,7 +798,7 @@ if __name__ == '__main__':
# do the work # do the work
in_string = input.read() in_string = input.read()
out_string = scourString(in_string) out_string = scourString(in_string, options)
output.write(out_string) output.write(out_string)
# Close input and output files # Close input and output files

View file

@ -133,6 +133,30 @@ class RemoveUnreferencedRadialGradient(unittest.TestCase):
self.assertEquals(len(doc.getElementsByTagNameNS(SVGNS, 'radialradient')), 0, self.assertEquals(len(doc.getElementsByTagNameNS(SVGNS, 'radialradient')), 0,
'Unreferenced radialGradient not removed' ) 'Unreferenced radialGradient not removed' )
class RemoveUnreferencedElementInDefs(unittest.TestCase):
def runTest(self):
doc = scour.scourXmlFile('unittests/referenced-elements-1.svg')
self.assertEquals(len(doc.getElementsByTagNameNS(SVGNS, 'rect')), 1,
'Unreferenced rect left in defs' )
class KeepTitleInDefs(unittest.TestCase):
def runTest(self):
doc = scour.scourXmlFile('unittests/referenced-elements-1.svg')
self.assertEquals(len(doc.getElementsByTagNameNS(SVGNS, 'title')), 1,
'Title removed from in defs' )
class KeepUnreferencedIDsWhenEnabled(unittest.TestCase):
def runTest(self):
doc = scour.scourXmlFile('unittests/ids-to-strip.svg')
self.assertEquals(doc.getElementsByTagNameNS(SVGNS, 'svg')[0].getAttribute('id'), 'boo',
'<svg> ID stripped when it should be disabled' )
class RemoveUnreferencedIDsWhenEnabled(unittest.TestCase):
def runTest(self):
doc = scour.scourXmlFile('unittests/ids-to-strip.svg', ['--enable-id-stripping'])
self.assertEquals(doc.getElementsByTagNameNS(SVGNS, 'svg')[0].getAttribute('id'), '',
'<svg> ID not stripped' )
class RemoveUselessNestedGroups(unittest.TestCase): class RemoveUselessNestedGroups(unittest.TestCase):
def runTest(self): def runTest(self):
doc = scour.scourXmlFile('unittests/nested-useless-groups.svg') doc = scour.scourXmlFile('unittests/nested-useless-groups.svg')

View file

@ -0,0 +1,10 @@
<svg xmlns="http://www.w3.org/2000/svg" id="boo">
<defs>
<title id="title1">Fooey</title>
<rect id='r1' />
<linearGradient id="Polka_Dot_Pattern">
<stop offset="0.5" stop-color="blue" id="stop1234"/>
</linearGradient>
</defs>
<rect id='r2' fill="url(#Polka_Dot_Pattern)" />
</svg>

After

Width:  |  Height:  |  Size: 292 B

View file

@ -0,0 +1,10 @@
<svg xmlns="http://www.w3.org/2000/svg">
<defs>
<title id="title1">Fooey</title>
<rect id='r1' />
<linearGradient id="Polka_Dot_Pattern">
<stop offset="0.5" stop-color="blue" id="stop1234"/>
</linearGradient>
</defs>
<rect id='r2' fill="url(#Polka_Dot_Pattern)" />
</svg>

After

Width:  |  Height:  |  Size: 283 B

View file

@ -1,4 +1,4 @@
<svg xmlns="http://www.w3.org/2000/svg"> <svg xmlns="http://www.w3.org/2000/svg" id='fooey'>
<linearGradient id="Polka_Dot_Pattern"> <linearGradient id="Polka_Dot_Pattern">
<stop offset="0.5" stop-color="blue" /> <stop offset="0.5" stop-color="blue" />
</linearGradient> </linearGradient>

Before

Width:  |  Height:  |  Size: 147 B

After

Width:  |  Height:  |  Size: 158 B

Before After
Before After