Start 0.07: move scour functionality into a module-level function and only call when __main__. Move tests around a bit. Add starter testscour.py

This commit is contained in:
JSCHILL1 2009-04-13 10:22:27 -05:00
parent b1f861d285
commit a1f6d9e8dd
14 changed files with 146 additions and 112 deletions

View file

Before

Width:  |  Height:  |  Size: 2.3 MiB

After

Width:  |  Height:  |  Size: 2.3 MiB

Before After
Before After

View file

Before

Width:  |  Height:  |  Size: 94 KiB

After

Width:  |  Height:  |  Size: 94 KiB

Before After
Before After

View file

Before

Width:  |  Height:  |  Size: 274 B

After

Width:  |  Height:  |  Size: 274 B

Before After
Before After

View file

Before

Width:  |  Height:  |  Size: 117 B

After

Width:  |  Height:  |  Size: 117 B

Before After
Before After

View file

Before

Width:  |  Height:  |  Size: 69 B

After

Width:  |  Height:  |  Size: 69 B

Before After
Before After

View file

Before

Width:  |  Height:  |  Size: 208 KiB

After

Width:  |  Height:  |  Size: 208 KiB

Before After
Before After

View file

Before

Width:  |  Height:  |  Size: 250 KiB

After

Width:  |  Height:  |  Size: 250 KiB

Before After
Before After

View file

Before

Width:  |  Height:  |  Size: 393 B

After

Width:  |  Height:  |  Size: 393 B

Before After
Before After

View file

Before

Width:  |  Height:  |  Size: 144 B

After

Width:  |  Height:  |  Size: 144 B

Before After
Before After

View file

Before

Width:  |  Height:  |  Size: 15 KiB

After

Width:  |  Height:  |  Size: 15 KiB

Before After
Before After

View file

@ -1,4 +1,4 @@
#!/bin/bash #!/bin/bash
SCOURVER="0.05" SCOURVER="0.06"
tar cvf scour-$SCOURVER.tar scour.py LICENSE NOTICE README.txt tar cvf scour-$SCOURVER.tar scour.py LICENSE NOTICE README.txt
gzip scour-$SCOURVER.tar gzip scour-$SCOURVER.tar

241
scour.py
View file

@ -1,6 +1,6 @@
#!/usr/local/bin/python #!/usr/local/bin/python
# Scour # Scour
# Version 0.06 # Version 0.07
# #
# Copyright 2009 Jeff Schiller # Copyright 2009 Jeff Schiller
# #
@ -20,7 +20,7 @@
# Notes: # Notes:
# rubys path-crunching ideas here: http://intertwingly.net/code/svgtidy/spec.rb # rubys' path-crunching ideas here: http://intertwingly.net/code/svgtidy/spec.rb
# (and implemented here: http://intertwingly.net/code/svgtidy/svgtidy.rb ) # (and implemented here: http://intertwingly.net/code/svgtidy/svgtidy.rb )
# Yet more ideas here: http://wiki.inkscape.org/wiki/index.php/Save_Cleaned_SVG # Yet more ideas here: http://wiki.inkscape.org/wiki/index.php/Save_Cleaned_SVG
@ -45,13 +45,10 @@
# * Process Transformations # * Process Transformations
# * Process quadratic Bezier curves # * Process quadratic Bezier curves
# * Collapse all group based transformations # * Collapse all group based transformations
# * Output Standard SVG
# * Use viewPort instead of document width/height
# Next Up: # Next Up:
# + Prevent error when stroke-width property value has a unit # + move all functionality into a module level function named 'scour' and only call it
# + Convert width/height into a viewBox where possible # when being run as main (prepare for unit testing)
# + Convert all referenced rasters into base64 encoded URLs if the files can be found
# - Removed duplicate gradient stops # - Removed duplicate gradient stops
# - Convert all colors to #RRGGBB format # - Convert all colors to #RRGGBB format
# - # -
@ -70,7 +67,7 @@ import os.path
import urllib import urllib
APP = 'scour' APP = 'scour'
VER = '0.06' VER = '0.07'
COPYRIGHT = 'Copyright Jeff Schiller, 2009' COPYRIGHT = 'Copyright Jeff Schiller, 2009'
NS = { 'SVG': 'http://www.w3.org/2000/svg', NS = { 'SVG': 'http://www.w3.org/2000/svg',
@ -550,117 +547,131 @@ def properlySizeDoc(docElement):
docElement.removeAttribute('width') docElement.removeAttribute('width')
docElement.removeAttribute('height') docElement.removeAttribute('height')
# parse command-line arguments # this is the main method
args = sys.argv[1:] # input is a string representation of the input XML
# returns a string representation of the output XML
def scour(in_string):
global numAttrsRemoved
global numStylePropsFixed
global numElemsRemoved
doc = xml.dom.minidom.parseString(in_string)
# by default the input and output are the standard streams # for whatever reason this does not always remove all inkscape/sodipodi attributes/elements
input = sys.stdin # on the first pass, so we do it multiple times
output = sys.stdout # does it have to do with removal of children affecting the childlist?
while removeNamespacedElements( doc.documentElement, unwanted_ns ) > 0 :
pass
while removeNamespacedAttributes( doc.documentElement, unwanted_ns ) > 0 :
pass
# if -i or -o is supplied, switch the stream to the file # remove the xmlns: declarations now
if len(args) == 2: xmlnsDeclsToRemove = []
if args[0] == '-i' : attrList = doc.documentElement.attributes
input = open(args[1], 'r') for num in range(attrList.length) :
elif args[0] == '-o' : if attrList.item(num).nodeValue in unwanted_ns :
output = open(args[1], 'w') xmlnsDeclsToRemove.append(attrList.item(num).nodeName)
else:
for attr in xmlnsDeclsToRemove :
doc.documentElement.removeAttribute(attr)
numAttrsRemoved += 1
bContinueLooping = True
while bContinueLooping:
identifiedElements = findElementsWithId(doc.documentElement, {})
referencedIDs = findReferencedElements(doc.documentElement, {})
bContinueLooping = ((removeUnreferencedIDs(referencedIDs, identifiedElements) + vacuumDefs(doc)) > 0)
# repair style (remove unnecessary style properties and change them into XML attributes)
numStylePropsFixed = repairStyle(doc.documentElement)
# remove empty defs, metadata, g
# NOTE: these elements will be removed even if they have (invalid) text nodes
elemsToRemove = []
for tag in ['defs', 'metadata', 'g'] :
for elem in doc.documentElement.getElementsByTagNameNS(NS['SVG'], tag) :
removeElem = not elem.hasChildNodes()
if removeElem == False :
for child in elem.childNodes :
if child.nodeType in [1, 4, 8] :
break
else:
removeElem = True
if removeElem :
elem.parentNode.removeChild(elem)
numElemsRemoved += 1
# clean path data
for elem in doc.documentElement.getElementsByTagNameNS(NS['SVG'], 'path') :
cleanPath(elem)
# convert rasters refereces to base64-encoded strings
for elem in doc.documentElement.getElementsByTagNameNS(NS['SVG'], 'image') :
embedRasters(elem)
# properly size the SVG document (ideally width/height should be 100% with a viewBox)
properlySizeDoc(doc.documentElement)
# output the document
out_string = doc.documentElement.toxml()
return out_string
if __name__ == '__main__':
# parse command-line arguments
args = sys.argv[1:]
# by default the input and output are the standard streams
input = sys.stdin
output = sys.stdout
# if -i or -o is supplied, switch the stream to the file
if len(args) == 2:
if args[0] == '-i' :
input = open(args[1], 'r')
elif args[0] == '-o' :
output = open(args[1], 'w')
else:
printSyntaxAndQuit()
# if both -o and -o are supplied, switch streams to the files
elif len(args) == 4 :
if args[0] == '-i' and args[2] == '-o' :
input = open(args[1], 'r')
output = open(args[3], 'w')
elif args[0] == '-o' and args[2] == 'i' :
output = open(args[1], 'w')
input = open(args[3], 'r')
else:
printSyntaxAndQuit()
# else invalid syntax
elif len(args) != 0 :
printSyntaxAndQuit() printSyntaxAndQuit()
# if both -o and -o are supplied, switch streams to the files # if we are not sending to stdout, then print out app information
elif len(args) == 4 : bOutputReport = False
if args[0] == '-i' and args[2] == '-o' : if output != sys.stdout :
input = open(args[1], 'r') bOutputReport = True
output = open(args[3], 'w') printHeader()
elif args[0] == '-o' and args[2] == 'i' :
output = open(args[1], 'w')
input = open(args[3], 'r')
else:
printSyntaxAndQuit()
# else invalid syntax # do the work
elif len(args) != 0 : in_string = input.read()
printSyntaxAndQuit() out_string = scour(in_string)
output.write(out_string)
# if we are not sending to stdout, then print out app information # Close input and output files
bOutputReport = False input.close()
if output != sys.stdout : output.close()
bOutputReport = True
printHeader()
# build DOM in memory # output some statistics if we are not using stdout
doc = xml.dom.minidom.parse(input) if bOutputReport :
print " Number of unreferenced id attributes removed:", numIDsRemoved
# for whatever reason this does not always remove all inkscape/sodipodi attributes/elements print " Number of elements removed:", numElemsRemoved
# on the first pass, so we do it multiple times print " Number of attributes removed:", numAttrsRemoved
# does it have to do with removal of children affecting the childlist? print " Number of style properties fixed:", numStylePropsFixed
while removeNamespacedElements( doc.documentElement, unwanted_ns ) > 0 : print " Number of raster images embedded inline:", numRastersEmbedded
pass oldsize = os.path.getsize(input.name)
while removeNamespacedAttributes( doc.documentElement, unwanted_ns ) > 0 : newsize = os.path.getsize(output.name)
pass #sizediff = (min(oldsize, newsize) / max(oldsize, newsize)) * 100;
sizediff = (newsize / oldsize);
# remove the xmlns: declarations now print " Original file size:", oldsize, "kb; new file size:", newsize, "kb (" + str(sizediff)[:5] + "x)"
xmlnsDeclsToRemove = []
attrList = doc.documentElement.attributes
for num in range(attrList.length) :
if attrList.item(num).nodeValue in unwanted_ns :
xmlnsDeclsToRemove.append(attrList.item(num).nodeName)
for attr in xmlnsDeclsToRemove :
doc.documentElement.removeAttribute(attr)
numAttrsRemoved += 1
bContinueLooping = True
while bContinueLooping:
identifiedElements = findElementsWithId(doc.documentElement, {})
referencedIDs = findReferencedElements(doc.documentElement, {})
bContinueLooping = ((removeUnreferencedIDs(referencedIDs, identifiedElements) + vacuumDefs(doc)) > 0)
# repair style (remove unnecessary style properties and change them into XML attributes)
numStylePropsFixed = repairStyle(doc.documentElement)
# remove empty defs, metadata, g
# NOTE: these elements will be removed even if they have (invalid) text nodes
elemsToRemove = []
for tag in ['defs', 'metadata', 'g'] :
for elem in doc.documentElement.getElementsByTagNameNS(NS['SVG'], tag) :
removeElem = not elem.hasChildNodes()
if removeElem == False :
for child in elem.childNodes :
if child.nodeType in [1, 4, 8] :
break
else:
removeElem = True
if removeElem :
elem.parentNode.removeChild(elem)
numElemsRemoved += 1
# clean path data
for elem in doc.documentElement.getElementsByTagNameNS(NS['SVG'], 'path') :
cleanPath(elem)
# convert rasters refereces to base64-encoded strings
for elem in doc.documentElement.getElementsByTagNameNS(NS['SVG'], 'image') :
embedRasters(elem)
# properly size the SVG document (ideally width/height should be 100% with a viewBox)
properlySizeDoc(doc.documentElement)
# output the document
doc.documentElement.writexml(output)
# Close input and output files
input.close()
output.close()
# output some statistics if we are not using stdout
if bOutputReport :
print " Number of unreferenced id attributes removed:", numIDsRemoved
print " Number of elements removed:", numElemsRemoved
print " Number of attributes removed:", numAttrsRemoved
print " Number of style properties fixed:", numStylePropsFixed
print " Number of raster images embedded inline:", numRastersEmbedded
oldsize = os.path.getsize(input.name)
newsize = os.path.getsize(output.name)
#sizediff = (min(oldsize, newsize) / max(oldsize, newsize)) * 100;
sizediff = (newsize / oldsize);
print " Original file size:", oldsize, "kb; new file size:", newsize, "kb (" + str(sizediff)[:5] + "x)"

23
testscour.py Executable file
View file

@ -0,0 +1,23 @@
#!/usr/local/bin/python
# Test Harness for Scour
#
# Copyright 2009 Jeff Schiller
#
# This file is part of Scour, http://www.codedread.com/scour/
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import scour
print "done"