Refactor command-line argument parsing. Only strip IDs if option is enabled. Added some unit tests for the new functionality

2009-04-19 22:26:26 -05:00 · 2009-04-19 22:26:26 -05:00 · 28cc0d59fb
commit 28cc0d59fb
parent 9f1a88ba3a
5 changed files with 153 additions and 79 deletions
--- a/scour.py
+++ b/scour.py
@ -127,16 +127,13 @@ svgAttributes = [
 				'stroke-width',
 				]
-def printHeader():
+def findElementById(node, id):
-	print APP , VER
+	if node == None or node.nodeType != 1: return None
-	print COPYRIGHT
+	if node.getAttribute('id') == id: return node
-
+	for child in node.childNodes :
-def printSyntaxAndQuit():
+		e = findElementById(child,id)
-	printHeader()
+		if e != None: return e
-	print 'usage: scour.py [-i input.svg] [-o output.svg]\n'
+	return None
 	print 'If the input file is not specified, stdin is used.'
 	print 'If the output file is not specified, stdout is used.'
 	quit()	
 # returns all elements with id attributes
 def findElementsWithId(node,elems={}):
@ -197,6 +194,47 @@ numElemsRemoved = 0
 numAttrsRemoved = 0
 numRastersEmbedded = 0
 # removes all unreferenced elements except for <svg>, <font>, <metadata>, <title>, and <desc>
 # also vacuums the defs of any non-referenced renderable elements
 # returns the number of unreferenced elements removed from the document
 def removeUnreferencedElements(doc):
 	global numElemsRemoved
 	num = 0
 	removeTags = ['linearGradient', 'radialGradient', 'pattern']
 	identifiedElements = findElementsWithId(doc.documentElement, {})
 	referencedIDs = findReferencedElements(doc.documentElement, {})
 	for id in identifiedElements:
 		if not id in referencedIDs:
 			goner = findElementById(doc.documentElement, id)
 			if goner != None and goner.parentNode != None and goner.nodeName in removeTags:
 				goner.parentNode.removeChild(goner)
 				num += 1
 				numElemsRemoved += 1
 	# TODO: should also go through defs and vacuum it
 	identifiedElements = findElementsWithId(doc.documentElement, {})
 	referencedIDs = findReferencedElements(doc.documentElement, {})
 	keepTags = ['font', 'style', 'metadata', 'script', 'title', 'desc']
 	num = 0
 	defs = doc.documentElement.getElementsByTagNameNS(NS['SVG'], 'defs')
 	for aDef in defs:
 		elemsToRemove = []
 		for elem in aDef.childNodes:
 			if elem.nodeType == 1 and (elem.getAttribute('id') == '' or \
 					(not elem.getAttribute('id') in referencedIDs)) and \
 					not elem.nodeName in keepTags:
 				elemsToRemove.append(elem)
 		for elem in elemsToRemove:
 			aDef.removeChild(elem)
 			numElemsRemoved += 1
 			num += 1
 	return num
 	return num
 # removes the unreferenced ID attributes
 # returns the number of ID attributes removed
 def removeUnreferencedIDs(referencedIDs, identifiedElements):
@ -211,40 +249,6 @@ def removeUnreferencedIDs(referencedIDs, identifiedElements):
 			num += 1
 	return num
 # returns the number of unreferenced children removed from defs elements
 def vacuumDefs(doc):
 	global numElemsRemoved
 	keepTags = ['font', 'style', 'metadata' ]
 	num = 0
 	defs = doc.documentElement.getElementsByTagNameNS(NS['SVG'], 'defs')
 	for aDef in defs:
 		elemsToRemove = []
 		for elem in aDef.childNodes:
 			if elem.nodeType == 1 and elem.getAttribute('id') == '' and not elem.nodeName in keepTags:
 				elemsToRemove.append(elem)
 		for elem in elemsToRemove:
 			aDef.removeChild(elem)
 			numElemsRemoved += 1
 			num += 1
 	return num
 # returns the number of unreferenced gradients or patterns removed from the document
 # (this relies on the ids being removed first)
 def removeUnreferencedElements(doc):
 	global numElemsRemoved
 	num = 0
 	for tag in ['pattern', 'linearGradient', 'radialGradient'] :
 		elems = doc.documentElement.getElementsByTagNameNS(NS['SVG'], tag)
 		elemsToRemove = []
 		for elem in elems:
 			if elem.getAttribute('id') == '' :
 				elemsToRemove.append(elem)
 		for elem in elemsToRemove:
 			elem.parentNode.removeChild(elem)
 			numElemsRemoved += 1
 			num += 1
 	return num
 def removeNamespacedAttributes(node, namespaces):
 	global numAttrsRemoved
 	num = 0
@ -635,7 +639,7 @@ def properlySizeDoc(docElement):
 # this is the main method
 # input is a string representation of the input XML
 # returns a string representation of the output XML
-def scourString(in_string):
+def scourString(in_string, options=[]):
 	global numAttrsRemoved
 	global numStylePropsFixed
 	global numElemsRemoved
@ -660,12 +664,6 @@ def scourString(in_string):
 		doc.documentElement.removeAttribute(attr)
 		numAttrsRemoved += 1
 	bContinueLooping = True
 	while bContinueLooping:
 		identifiedElements = findElementsWithId(doc.documentElement, {})
 		referencedIDs = findReferencedElements(doc.documentElement, {})
 		bContinueLooping = ((removeUnreferencedIDs(referencedIDs, identifiedElements) + vacuumDefs(doc)) > 0)
 	# repair style (remove unnecessary style properties and change them into XML attributes)
 	numStylePropsFixed = repairStyle(doc.documentElement)
@ -689,6 +687,13 @@ def scourString(in_string):
 	while removeUnreferencedElements(doc) > 0:
 		pass
 	if '--enable-id-stripping' in options:
 		bContinueLooping = True
 		while bContinueLooping:
 			identifiedElements = findElementsWithId(doc.documentElement, {})
 			referencedIDs = findReferencedElements(doc.documentElement, {})
 			bContinueLooping = (removeUnreferencedIDs(referencedIDs, identifiedElements) > 0)
 	while removeNestedGroups(doc.documentElement) > 0:
 		pass
@ -722,43 +727,68 @@ def scourString(in_string):
 # used mostly by unit tests
 # input is a filename
 # returns the minidom doc representation of the SVG
-def scourXmlFile(filename):
+def scourXmlFile(filename, options=[]):
 	in_string = open(filename).read()
-	out_string = scourString(in_string)
+	out_string = scourString(in_string, options)
 	return xml.dom.minidom.parseString(out_string)
-if __name__ == '__main__':
+def printHeader():
 	print APP , VER
 	print COPYRIGHT
-	# parse command-line arguments
+def printSyntaxAndQuit():
 	printHeader()
 	print 'usage: scour.py [-i input.svg] [-o output.svg] [OPTIONS]\n'
 	print 'If the input file is not specified, stdin is used.'
 	print 'If the output file is not specified, stdout is used.\n'
 	print 'If an option is not available below that means it occurs automatically'
 	print 'when scour is invoked.  Available OPTIONS:\n'
 	print '  --enable-id-stripping : Scour will remove all un-referenced ID attributes'
 	print ''
 	quit()	
 # returns a tuple with:
 # input stream, output stream, and a list of options specified on the command-line
 def parseCLA():
 	args = sys.argv[1:]
 	# by default the input and output are the standard streams
 	input = sys.stdin
 	output = sys.stdout
 	options = []
 	validOptions = [
 					'--enable-id-stripping',
 					]
-	# if -i or -o is supplied, switch the stream to the file
+	i = 0
-	if len(args) == 2:
+	while i < len(args):
-		if args[0] == '-i' :
+		arg = args[i]
-			input = open(args[1], 'r')
+		i += 1
-		elif args[0] == '-o' :
+		if arg == '-i' :
-			output = open(args[1], 'w')
+			if i < len(args) :
-		else:
+				input = open(args[i], 'r')
 				i += 1
 				continue
 			else:
 				printSyntaxAndQuit()
 		elif arg == '-o' :
 			if i < len(args) :
 				output = open(args[i], 'w')
 				i += 1
 				continue
 			else:
 				printSyntaxAndQuit()
 		elif arg in validOptions :
 			options.append(arg)
 		else :
 			print 'Error!  Invalid argument:', arg
 			printSyntaxAndQuit()
-	# if both -o and -o are supplied, switch streams to the files
+	return (input, output, options)
 	elif len(args) == 4 :
 		if args[0] == '-i' and args[2] == '-o' :
 			input = open(args[1], 'r')
 			output = open(args[3], 'w')
 		elif args[0] == '-o' and args[2] == 'i' :
 			output = open(args[1], 'w')
 			input = open(args[3], 'r')
 		else:
 			printSyntaxAndQuit()
-	# else invalid syntax
+if __name__ == '__main__':
-	elif len(args) != 0 :
+
-		printSyntaxAndQuit()
+	(input, output, options) = parseCLA()
 	# if we are not sending to stdout, then print out app information
 	bOutputReport = False
@ -768,7 +798,7 @@ if __name__ == '__main__':
 	# do the work
 	in_string = input.read()
-	out_string = scourString(in_string)
+	out_string = scourString(in_string, options)
 	output.write(out_string)
 	# Close input and output files
--- a/testscour.py
+++ b/testscour.py
@ -133,6 +133,30 @@ class RemoveUnreferencedRadialGradient(unittest.TestCase):
 		self.assertEquals(len(doc.getElementsByTagNameNS(SVGNS, 'radialradient')), 0,
 			'Unreferenced radialGradient not removed' )
 class RemoveUnreferencedElementInDefs(unittest.TestCase):
 	def runTest(self):
 		doc = scour.scourXmlFile('unittests/referenced-elements-1.svg')
 		self.assertEquals(len(doc.getElementsByTagNameNS(SVGNS, 'rect')), 1,
 			'Unreferenced rect left in defs' )
 class KeepTitleInDefs(unittest.TestCase):
 	def runTest(self):
 		doc = scour.scourXmlFile('unittests/referenced-elements-1.svg')
 		self.assertEquals(len(doc.getElementsByTagNameNS(SVGNS, 'title')), 1,
 			'Title removed from in defs' )
 class KeepUnreferencedIDsWhenEnabled(unittest.TestCase):
 	def runTest(self):
 		doc = scour.scourXmlFile('unittests/ids-to-strip.svg')
 		self.assertEquals(doc.getElementsByTagNameNS(SVGNS, 'svg')[0].getAttribute('id'), 'boo',
 			'<svg> ID stripped when it should be disabled' )
 class RemoveUnreferencedIDsWhenEnabled(unittest.TestCase):
 	def runTest(self):
 		doc = scour.scourXmlFile('unittests/ids-to-strip.svg', ['--enable-id-stripping'])
 		self.assertEquals(doc.getElementsByTagNameNS(SVGNS, 'svg')[0].getAttribute('id'), '',
 			'<svg> ID not stripped' )
 class RemoveUselessNestedGroups(unittest.TestCase):
 	def runTest(self):
 		doc = scour.scourXmlFile('unittests/nested-useless-groups.svg')
--- a/unittests/ids-to-strip.svg
+++ b/unittests/ids-to-strip.svg
@ -0,0 +1,10 @@
 <svg xmlns="http://www.w3.org/2000/svg" id="boo">
 <defs>
  <title id="title1">Fooey</title>
  <rect id='r1' />
  <linearGradient id="Polka_Dot_Pattern">
    <stop offset="0.5" stop-color="blue" id="stop1234"/>
  </linearGradient>
 </defs>
 <rect id='r2' fill="url(#Polka_Dot_Pattern)" />
 </svg>
--- a/unittests/referenced-elements-1.svg
+++ b/unittests/referenced-elements-1.svg
@ -0,0 +1,10 @@
 <svg xmlns="http://www.w3.org/2000/svg">
 <defs>
  <title id="title1">Fooey</title>
  <rect id='r1' />
  <linearGradient id="Polka_Dot_Pattern">
    <stop offset="0.5" stop-color="blue" id="stop1234"/>
  </linearGradient>
 </defs>
 <rect id='r2' fill="url(#Polka_Dot_Pattern)" />
 </svg>
--- a/unittests/unreferenced-linearGradient.svg
+++ b/unittests/unreferenced-linearGradient.svg
@ -1,4 +1,4 @@
-<svg xmlns="http://www.w3.org/2000/svg">
+<svg xmlns="http://www.w3.org/2000/svg" id='fooey'>
 <linearGradient id="Polka_Dot_Pattern">
  <stop offset="0.5" stop-color="blue" />
 </linearGradient>