Rewrite to use optparse for user interface, picking up a few minor bug fixes and improvements in the process, but trying to keep otherwise unchanged.

This commit is contained in:
Martin 2009-05-20 19:22:57 +01:00
parent 4bbb5923b3
commit d8ffea56e5
4 changed files with 112 additions and 118 deletions

View file

@ -8,3 +8,4 @@ Thanks to the following contributors to scour:
* Martin:
- better methods of handling string-to-float conversions in Python
- document functions in the traditional Python way
- rewrite option parsing code

View file

@ -3,6 +3,6 @@ mkdir $1
for FILE in `ls fulltests`
do
echo Doing $FILE:
./scour.py -i fulltests/$FILE -o $1/$FILE >> $1/report.txt
./scour.py -i fulltests/$FILE -o $1/$FILE 2>> $1/report.txt
done

218
scour.py
View file

@ -57,14 +57,11 @@ import xml.dom.minidom
import re
import math
import base64
import os.path
import urllib
from svg_regex import svg_parser
from decimal import *
import gzip
# set precision to 5 decimal places by default
getcontext().prec = 5
import optparse
APP = 'scour'
VER = '0.14'
@ -829,7 +826,7 @@ def repairStyle(node, options):
# now if any of the properties match known SVG attributes we prefer attributes
# over style so emit them and remove them from the style map
if not '--disable-style-to-xml' in options:
if options.style_to_xml:
for propName in styleMap.keys() :
if propName in svgAttributes :
node.setAttribute(propName, styleMap[propName])
@ -1313,7 +1310,10 @@ def properlySizeDoc(docElement):
# this is the main method
# input is a string representation of the input XML
# returns a string representation of the output XML
def scourString(in_string, options=[]):
def scourString(in_string, options=None):
if options is None:
options = _options_parser.get_default_values()
getcontext().prec = options.digits
global numAttrsRemoved
global numStylePropsFixed
global numElemsRemoved
@ -1343,7 +1343,7 @@ def scourString(in_string, options=[]):
numStylePropsFixed = repairStyle(doc.documentElement, options)
# convert colors to #RRGGBB format
if not '--disable-simplify-colors' in options:
if options.simple_colors:
numBytesSavedInColors = convertColors(doc.documentElement)
# remove empty defs, metadata, g
@ -1366,14 +1366,14 @@ def scourString(in_string, options=[]):
while removeUnreferencedElements(doc) > 0:
pass
if '--enable-id-stripping' in options:
if options.strip_ids:
bContinueLooping = True
while bContinueLooping:
identifiedElements = findElementsWithId(doc.documentElement)
referencedIDs = findReferencedElements(doc.documentElement)
bContinueLooping = (removeUnreferencedIDs(referencedIDs, identifiedElements) > 0)
if not '--disable-group-collapsing' in options:
if options.group_collapse:
while removeNestedGroups(doc.documentElement) > 0:
pass
@ -1417,133 +1417,123 @@ def scourString(in_string, options=[]):
# used mostly by unit tests
# input is a filename
# returns the minidom doc representation of the SVG
def scourXmlFile(filename, options=[]):
def scourXmlFile(filename, options=None):
in_string = open(filename).read()
# print 'IN=',in_string
out_string = scourString(in_string, options)
# print 'OUT=',out_string
return xml.dom.minidom.parseString(out_string.encode('utf-8'))
def printHeader():
print APP , VER
print COPYRIGHT
# GZ: Seems most other commandline tools don't do this, is it really wanted?
class HeaderedFormatter(optparse.IndentedHelpFormatter):
"""
Show application name, version number, and copyright statement
above usage information.
"""
def format_usage(self, usage):
return "%s %s\n%s\n%s" % (APP, VER, COPYRIGHT,
optparse.IndentedHelpFormatter.format_usage(self, usage))
def printSyntaxAndQuit():
printHeader()
print 'usage: scour.py [-i input.svg] [-o output.svg] [OPTIONS]\n'
print 'If the input/output files are specified with a svgz extension, then compressed SVG is assumed.\n'
print 'If the input file is not specified, stdin is used.'
print 'If the output file is not specified, stdout is used.'
print 'If an option is not available below that means it occurs automatically'
print 'when scour is invoked. Available OPTIONS:\n'
print ' --disable-simplify-colors : Scour will not convert all colors to #RRGGBB format'
print ' --disable-style-to-xml : Scour will not convert style properties into XML attributes'
print ' --disable-group-collapsing : Scour will not collapse <g> elements'
print ' --enable-id-stripping : Scour will remove all un-referenced ID attributes'
print ' --set-precision N : Scour will set the number of significant digits (default: 6)'
print ''
quit()
# GZ: would prefer this to be in a function or class scope, but tests etc need
# access to the defaults anyway
_options_parser = optparse.OptionParser(
usage="%prog [-i input.svg] [-o output.svg] [OPTIONS]",
description=("If the input/output files are specified with a svgz"
" extension, then compressed SVG is assumed. If the input file is not"
" specified, stdin is used. If the output file is not specified, "
" stdout is used. If an option is not available below that means it"
" occurs automatically."),
formatter=HeaderedFormatter(max_help_position=30),
version=VER)
# returns a tuple with:
# input stream, output stream, a list of options specified on the command-line,
# input filename, and output filename
def parseCLA():
args = sys.argv[1:]
_options_parser.add_option("--disable-simplify-colors",
action="store_false", dest="simple_colors", default=True,
help="won't convert all colors to #RRGGBB format")
_options_parser.add_option("--disable-style-to-xml",
action="store_false", dest="style_to_xml", default=True,
help="won't convert styles into XML attributes")
_options_parser.add_option("--disable-group-collapsing",
action="store_false", dest="group_collapse", default=True,
help="won't collapse <g> elements")
_options_parser.add_option("--enable-id-stripping",
action="store_true", dest="strip_ids", default=False,
help="remove all un-referenced ID attributes")
# GZ: this is confusing, most people will be thinking in terms of
# decimal places, which is not what decimal precision is doing
_options_parser.add_option("-p", "--set-precision",
action="store", type=int, dest="digits", default=5,
help="set number of significant digits (default: %default)")
_options_parser.add_option("-i",
action="store", dest="infilename", help=optparse.SUPPRESS_HELP)
_options_parser.add_option("-o",
action="store", dest="outfilename", help=optparse.SUPPRESS_HELP)
# by default the input and output are the standard streams
inputfilename = ''
outputfilename = ''
input = sys.stdin
output = sys.stdout
options = []
validOptions = [
'--disable-simplify-colors',
'--disable-style-to-xml',
'--disable-group-collapsing',
'--enable-id-stripping',
'--set-precision',
]
i = 0
while i < len(args):
arg = args[i]
i += 1
if arg == '-i' :
if i < len(args) :
inputfilename = args[i]
if args[i][-5:] == '.svgz':
input = gzip.open(args[i], 'rb')
else:
input = open(args[i], 'r')
i += 1
continue
else:
printSyntaxAndQuit()
elif arg == '-o' :
if i < len(args) :
outputfilename = args[i]
if args[i][-5:] == '.svgz':
output = gzip.open(args[i], 'wb')
else:
output = open(args[i], 'w')
i += 1
continue
else:
printSyntaxAndQuit()
elif arg == '--set-precision':
if i < len(args):
getcontext().prec = int(args[i])
i += 1
continue
else:
printSyntaxAndQuit()
elif arg in validOptions :
options.append(arg)
else :
print 'Error! Invalid argument:', arg
printSyntaxAndQuit()
return (input, output, options, inputfilename, outputfilename)
def maybe_gziped_file(filename, mode="r"):
if os.path.splitext(filename)[1].lower() in (".svgz", ".gz"):
return gzip.GzipFile(filename, mode)
return file(filename, mode)
def parse_args(args=None):
options, rargs = _options_parser.parse_args(args)
if rargs:
parser.error("Additional arguments not handled: %r" % rargs)
if options.digits < 0:
parser.error("Can't have negative significant digits")
if options.infilename:
infile = maybe_gziped_file(options.infilename)
# GZ: could catch a raised IOError here and report
else:
# GZ: could sniff for gzip compression here
infile = sys.stdin
if options.outfilename:
outfile = maybe_gziped_file(options.outfilename, "w")
else:
outfile = sys.stdout
return options, [infile, outfile]
if __name__ == '__main__':
if sys.platform == "win32":
from time import clock as get_tick
else:
# GZ: is this different from time.time() in any way?
def get_tick():
return os.times()[0]
startTimes = os.times()
start = get_tick()
(input, output, options, inputfilename, outputfilename) = parseCLA()
options, (input, output) = parse_args()
# if we are not sending to stdout, then print out app information
bOutputReport = False
if output != sys.stdout :
bOutputReport = True
printHeader()
print >>sys.stderr, "%s %s\n%s" % (APP, VER, COPYRIGHT)
# do the work
in_string = input.read()
out_string = scourString(in_string, options)
output.write(out_string.encode("utf-8"))
out_string = scourString(in_string, options).encode("UTF-8")
output.write(out_string)
# Close input and output files
input.close()
output.close()
endTimes = os.times()
end = get_tick()
# output some statistics if we are not using stdout
if bOutputReport :
if inputfilename != '':
print ' File:', inputfilename
print ' Time taken:', str(endTimes[0]-startTimes[0]) + 's'
print ' Number of elements removed:', numElemsRemoved
print ' Number of attributes removed:', numAttrsRemoved
print ' Number of unreferenced id attributes removed:', numIDsRemoved
print ' Number of style properties fixed:', numStylePropsFixed
print ' Number of raster images embedded inline:', numRastersEmbedded
print ' Number of path segments reduced/removed:', numPathSegmentsReduced
print ' Number of bytes saved in path data:', numBytesSavedInPathData
print ' Number of bytes saved in colors:', numBytesSavedInColors
oldsize = os.path.getsize(inputfilename)
newsize = os.path.getsize(outputfilename)
sizediff = (newsize / oldsize) * 100;
print ' Original file size:', oldsize, 'bytes; new file size:', newsize, 'bytes (' + str(sizediff)[:5] + '%)'
# GZ: unless silenced by -q or something?
# GZ: not using globals would be good too
print >>sys.stderr, ' File:', input.name, \
'\n Time taken:', str(end-start) + 's', \
'\n Number of elements removed:', numElemsRemoved, \
'\n Number of attributes removed:', numAttrsRemoved, \
'\n Number of unreferenced id attributes removed:', numIDsRemoved, \
'\n Number of style properties fixed:', numStylePropsFixed, \
'\n Number of raster images embedded inline:', numRastersEmbedded, \
'\n Number of path segments reduced/removed:', numPathSegmentsReduced, \
'\n Number of bytes saved in path data:', numBytesSavedInPathData, \
'\n Number of bytes saved in colors:', numBytesSavedInColors
oldsize = len(in_string)
newsize = len(out_string)
sizediff = (newsize / oldsize) * 100
print >>sys.stderr, ' Original file size:', oldsize, 'bytes;', \
'new file size:', newsize, 'bytes (' + str(sizediff)[:5] + '%)'

View file

@ -156,7 +156,8 @@ class KeepUnreferencedIDsWhenEnabled(unittest.TestCase):
class RemoveUnreferencedIDsWhenEnabled(unittest.TestCase):
def runTest(self):
doc = scour.scourXmlFile('unittests/ids-to-strip.svg', ['--enable-id-stripping'])
doc = scour.scourXmlFile('unittests/ids-to-strip.svg',
scour.parse_args(['--enable-id-stripping'])[0])
self.assertEquals(doc.getElementsByTagNameNS(SVGNS, 'svg')[0].getAttribute('id'), '',
'<svg> ID not stripped' )
@ -168,7 +169,8 @@ class RemoveUselessNestedGroups(unittest.TestCase):
class DoNotRemoveUselessNestedGroups(unittest.TestCase):
def runTest(self):
doc = scour.scourXmlFile('unittests/nested-useless-groups.svg', ['--disable-group-collapsing'])
doc = scour.scourXmlFile('unittests/nested-useless-groups.svg',
scour.parse_args(['--disable-group-collapsing'])[0])
self.assertEquals(len(doc.getElementsByTagNameNS(SVGNS, 'g')), 2,
'Useless nested groups were removed despite --disable-group-collapsing' )
@ -388,7 +390,8 @@ class RemoveFillOpacityWhenFillNone(unittest.TestCase):
class ConvertFillPropertyToAttr(unittest.TestCase):
def runTest(self):
doc = scour.scourXmlFile('unittests/fill-none.svg', '--disable-simplify-colors')
doc = scour.scourXmlFile('unittests/fill-none.svg',
scour.parse_args(['--disable-simplify-colors'])[0])
self.assertEquals(doc.getElementsByTagNameNS(SVGNS, 'path')[1].getAttribute('fill'), 'black',
'fill property not converted to XML attribute' )