diff --git a/CONTRIBUTORS b/CONTRIBUTORS index ea94cbd..91f5f24 100644 --- a/CONTRIBUTORS +++ b/CONTRIBUTORS @@ -8,3 +8,4 @@ Thanks to the following contributors to scour: * Martin: - better methods of handling string-to-float conversions in Python - document functions in the traditional Python way + - rewrite option parsing code diff --git a/crunch.sh b/crunch.sh index 86006a3..03e8b23 100755 --- a/crunch.sh +++ b/crunch.sh @@ -3,6 +3,6 @@ mkdir $1 for FILE in `ls fulltests` do echo Doing $FILE: - ./scour.py -i fulltests/$FILE -o $1/$FILE >> $1/report.txt + ./scour.py -i fulltests/$FILE -o $1/$FILE 2>> $1/report.txt done \ No newline at end of file diff --git a/scour.py b/scour.py index 99f71a6..39246cf 100755 --- a/scour.py +++ b/scour.py @@ -57,14 +57,11 @@ import xml.dom.minidom import re import math import base64 -import os.path import urllib from svg_regex import svg_parser from decimal import * import gzip - -# set precision to 5 decimal places by default -getcontext().prec = 5 +import optparse APP = 'scour' VER = '0.14' @@ -829,7 +826,7 @@ def repairStyle(node, options): # now if any of the properties match known SVG attributes we prefer attributes # over style so emit them and remove them from the style map - if not '--disable-style-to-xml' in options: + if options.style_to_xml: for propName in styleMap.keys() : if propName in svgAttributes : node.setAttribute(propName, styleMap[propName]) @@ -1313,7 +1310,10 @@ def properlySizeDoc(docElement): # this is the main method # input is a string representation of the input XML # returns a string representation of the output XML -def scourString(in_string, options=[]): +def scourString(in_string, options=None): + if options is None: + options = _options_parser.get_default_values() + getcontext().prec = options.digits global numAttrsRemoved global numStylePropsFixed global numElemsRemoved @@ -1343,7 +1343,7 @@ def scourString(in_string, options=[]): numStylePropsFixed = repairStyle(doc.documentElement, options) # convert colors to #RRGGBB format - if not '--disable-simplify-colors' in options: + if options.simple_colors: numBytesSavedInColors = convertColors(doc.documentElement) # remove empty defs, metadata, g @@ -1366,14 +1366,14 @@ def scourString(in_string, options=[]): while removeUnreferencedElements(doc) > 0: pass - if '--enable-id-stripping' in options: + if options.strip_ids: bContinueLooping = True while bContinueLooping: identifiedElements = findElementsWithId(doc.documentElement) referencedIDs = findReferencedElements(doc.documentElement) bContinueLooping = (removeUnreferencedIDs(referencedIDs, identifiedElements) > 0) - if not '--disable-group-collapsing' in options: + if options.group_collapse: while removeNestedGroups(doc.documentElement) > 0: pass @@ -1417,133 +1417,123 @@ def scourString(in_string, options=[]): # used mostly by unit tests # input is a filename # returns the minidom doc representation of the SVG -def scourXmlFile(filename, options=[]): +def scourXmlFile(filename, options=None): in_string = open(filename).read() # print 'IN=',in_string out_string = scourString(in_string, options) # print 'OUT=',out_string return xml.dom.minidom.parseString(out_string.encode('utf-8')) -def printHeader(): - print APP , VER - print COPYRIGHT +# GZ: Seems most other commandline tools don't do this, is it really wanted? +class HeaderedFormatter(optparse.IndentedHelpFormatter): + """ + Show application name, version number, and copyright statement + above usage information. + """ + def format_usage(self, usage): + return "%s %s\n%s\n%s" % (APP, VER, COPYRIGHT, + optparse.IndentedHelpFormatter.format_usage(self, usage)) -def printSyntaxAndQuit(): - printHeader() - print 'usage: scour.py [-i input.svg] [-o output.svg] [OPTIONS]\n' - print 'If the input/output files are specified with a svgz extension, then compressed SVG is assumed.\n' - print 'If the input file is not specified, stdin is used.' - print 'If the output file is not specified, stdout is used.' - print 'If an option is not available below that means it occurs automatically' - print 'when scour is invoked. Available OPTIONS:\n' - print ' --disable-simplify-colors : Scour will not convert all colors to #RRGGBB format' - print ' --disable-style-to-xml : Scour will not convert style properties into XML attributes' - print ' --disable-group-collapsing : Scour will not collapse elements' - print ' --enable-id-stripping : Scour will remove all un-referenced ID attributes' - print ' --set-precision N : Scour will set the number of significant digits (default: 6)' - print '' - quit() +# GZ: would prefer this to be in a function or class scope, but tests etc need +# access to the defaults anyway +_options_parser = optparse.OptionParser( + usage="%prog [-i input.svg] [-o output.svg] [OPTIONS]", + description=("If the input/output files are specified with a svgz" + " extension, then compressed SVG is assumed. If the input file is not" + " specified, stdin is used. If the output file is not specified, " + " stdout is used. If an option is not available below that means it" + " occurs automatically."), + formatter=HeaderedFormatter(max_help_position=30), + version=VER) -# returns a tuple with: -# input stream, output stream, a list of options specified on the command-line, -# input filename, and output filename -def parseCLA(): - args = sys.argv[1:] +_options_parser.add_option("--disable-simplify-colors", + action="store_false", dest="simple_colors", default=True, + help="won't convert all colors to #RRGGBB format") +_options_parser.add_option("--disable-style-to-xml", + action="store_false", dest="style_to_xml", default=True, + help="won't convert styles into XML attributes") +_options_parser.add_option("--disable-group-collapsing", + action="store_false", dest="group_collapse", default=True, + help="won't collapse elements") +_options_parser.add_option("--enable-id-stripping", + action="store_true", dest="strip_ids", default=False, + help="remove all un-referenced ID attributes") +# GZ: this is confusing, most people will be thinking in terms of +# decimal places, which is not what decimal precision is doing +_options_parser.add_option("-p", "--set-precision", + action="store", type=int, dest="digits", default=5, + help="set number of significant digits (default: %default)") +_options_parser.add_option("-i", + action="store", dest="infilename", help=optparse.SUPPRESS_HELP) +_options_parser.add_option("-o", + action="store", dest="outfilename", help=optparse.SUPPRESS_HELP) - # by default the input and output are the standard streams - inputfilename = '' - outputfilename = '' - input = sys.stdin - output = sys.stdout - options = [] - validOptions = [ - '--disable-simplify-colors', - '--disable-style-to-xml', - '--disable-group-collapsing', - '--enable-id-stripping', - '--set-precision', - ] - - i = 0 - while i < len(args): - arg = args[i] - i += 1 - if arg == '-i' : - if i < len(args) : - inputfilename = args[i] - if args[i][-5:] == '.svgz': - input = gzip.open(args[i], 'rb') - else: - input = open(args[i], 'r') - i += 1 - continue - else: - printSyntaxAndQuit() - elif arg == '-o' : - if i < len(args) : - outputfilename = args[i] - if args[i][-5:] == '.svgz': - output = gzip.open(args[i], 'wb') - else: - output = open(args[i], 'w') - i += 1 - continue - else: - printSyntaxAndQuit() - elif arg == '--set-precision': - if i < len(args): - getcontext().prec = int(args[i]) - i += 1 - continue - else: - printSyntaxAndQuit() - elif arg in validOptions : - options.append(arg) - else : - print 'Error! Invalid argument:', arg - printSyntaxAndQuit() - - return (input, output, options, inputfilename, outputfilename) +def maybe_gziped_file(filename, mode="r"): + if os.path.splitext(filename)[1].lower() in (".svgz", ".gz"): + return gzip.GzipFile(filename, mode) + return file(filename, mode) + +def parse_args(args=None): + options, rargs = _options_parser.parse_args(args) + + if rargs: + parser.error("Additional arguments not handled: %r" % rargs) + if options.digits < 0: + parser.error("Can't have negative significant digits") + if options.infilename: + infile = maybe_gziped_file(options.infilename) + # GZ: could catch a raised IOError here and report + else: + # GZ: could sniff for gzip compression here + infile = sys.stdin + if options.outfilename: + outfile = maybe_gziped_file(options.outfilename, "w") + else: + outfile = sys.stdout + + return options, [infile, outfile] if __name__ == '__main__': + if sys.platform == "win32": + from time import clock as get_tick + else: + # GZ: is this different from time.time() in any way? + def get_tick(): + return os.times()[0] - startTimes = os.times() + start = get_tick() - (input, output, options, inputfilename, outputfilename) = parseCLA() + options, (input, output) = parse_args() - # if we are not sending to stdout, then print out app information - bOutputReport = False - if output != sys.stdout : - bOutputReport = True - printHeader() + print >>sys.stderr, "%s %s\n%s" % (APP, VER, COPYRIGHT) # do the work in_string = input.read() - out_string = scourString(in_string, options) - output.write(out_string.encode("utf-8")) + out_string = scourString(in_string, options).encode("UTF-8") + output.write(out_string) # Close input and output files input.close() output.close() - endTimes = os.times() + end = get_tick() - # output some statistics if we are not using stdout - if bOutputReport : - if inputfilename != '': - print ' File:', inputfilename - print ' Time taken:', str(endTimes[0]-startTimes[0]) + 's' - print ' Number of elements removed:', numElemsRemoved - print ' Number of attributes removed:', numAttrsRemoved - print ' Number of unreferenced id attributes removed:', numIDsRemoved - print ' Number of style properties fixed:', numStylePropsFixed - print ' Number of raster images embedded inline:', numRastersEmbedded - print ' Number of path segments reduced/removed:', numPathSegmentsReduced - print ' Number of bytes saved in path data:', numBytesSavedInPathData - print ' Number of bytes saved in colors:', numBytesSavedInColors - oldsize = os.path.getsize(inputfilename) - newsize = os.path.getsize(outputfilename) - sizediff = (newsize / oldsize) * 100; - print ' Original file size:', oldsize, 'bytes; new file size:', newsize, 'bytes (' + str(sizediff)[:5] + '%)' + # GZ: unless silenced by -q or something? + # GZ: not using globals would be good too + print >>sys.stderr, ' File:', input.name, \ + '\n Time taken:', str(end-start) + 's', \ + '\n Number of elements removed:', numElemsRemoved, \ + '\n Number of attributes removed:', numAttrsRemoved, \ + '\n Number of unreferenced id attributes removed:', numIDsRemoved, \ + '\n Number of style properties fixed:', numStylePropsFixed, \ + '\n Number of raster images embedded inline:', numRastersEmbedded, \ + '\n Number of path segments reduced/removed:', numPathSegmentsReduced, \ + '\n Number of bytes saved in path data:', numBytesSavedInPathData, \ + '\n Number of bytes saved in colors:', numBytesSavedInColors + oldsize = len(in_string) + newsize = len(out_string) + sizediff = (newsize / oldsize) * 100 + print >>sys.stderr, ' Original file size:', oldsize, 'bytes;', \ + 'new file size:', newsize, 'bytes (' + str(sizediff)[:5] + '%)' diff --git a/testscour.py b/testscour.py index 9786b8d..2686b85 100755 --- a/testscour.py +++ b/testscour.py @@ -156,7 +156,8 @@ class KeepUnreferencedIDsWhenEnabled(unittest.TestCase): class RemoveUnreferencedIDsWhenEnabled(unittest.TestCase): def runTest(self): - doc = scour.scourXmlFile('unittests/ids-to-strip.svg', ['--enable-id-stripping']) + doc = scour.scourXmlFile('unittests/ids-to-strip.svg', + scour.parse_args(['--enable-id-stripping'])[0]) self.assertEquals(doc.getElementsByTagNameNS(SVGNS, 'svg')[0].getAttribute('id'), '', ' ID not stripped' ) @@ -168,7 +169,8 @@ class RemoveUselessNestedGroups(unittest.TestCase): class DoNotRemoveUselessNestedGroups(unittest.TestCase): def runTest(self): - doc = scour.scourXmlFile('unittests/nested-useless-groups.svg', ['--disable-group-collapsing']) + doc = scour.scourXmlFile('unittests/nested-useless-groups.svg', + scour.parse_args(['--disable-group-collapsing'])[0]) self.assertEquals(len(doc.getElementsByTagNameNS(SVGNS, 'g')), 2, 'Useless nested groups were removed despite --disable-group-collapsing' ) @@ -388,7 +390,8 @@ class RemoveFillOpacityWhenFillNone(unittest.TestCase): class ConvertFillPropertyToAttr(unittest.TestCase): def runTest(self): - doc = scour.scourXmlFile('unittests/fill-none.svg', '--disable-simplify-colors') + doc = scour.scourXmlFile('unittests/fill-none.svg', + scour.parse_args(['--disable-simplify-colors'])[0]) self.assertEquals(doc.getElementsByTagNameNS(SVGNS, 'path')[1].getAttribute('fill'), 'black', 'fill property not converted to XML attribute' )