From a1f6d9e8dd267c35167714b6c5e263cd53a914c9 Mon Sep 17 00:00:00 2001 From: JSCHILL1 Date: Mon, 13 Apr 2009 10:22:27 -0500 Subject: [PATCH] Start 0.07: move scour functionality into a module-level function and only call when __main__. Move tests around a bit. Add starter testscour.py --- {tests => fulltests}/News_Paper.svg | 0 {tests => fulltests}/Wave.svg | 0 {tests => fulltests}/chained.svg | 0 {tests => fulltests}/emptyElems.svg | 0 {tests => fulltests}/emptyElems2.svg | 0 {tests => fulltests}/foo.xml | 0 {tests => fulltests}/header.svg | 0 {tests => fulltests}/strawberry.svg | 0 {tests => fulltests}/units.svg | 0 {tests => fulltests}/viewbox.svg | 0 {tests => fulltests}/wifi.svg | 0 package.sh | 4 +- scour.py | 231 ++++++++++++++------------- testscour.py | 23 +++ 14 files changed, 146 insertions(+), 112 deletions(-) rename {tests => fulltests}/News_Paper.svg (100%) rename {tests => fulltests}/Wave.svg (100%) rename {tests => fulltests}/chained.svg (100%) rename {tests => fulltests}/emptyElems.svg (100%) rename {tests => fulltests}/emptyElems2.svg (100%) rename {tests => fulltests}/foo.xml (100%) rename {tests => fulltests}/header.svg (100%) rename {tests => fulltests}/strawberry.svg (100%) rename {tests => fulltests}/units.svg (100%) rename {tests => fulltests}/viewbox.svg (100%) rename {tests => fulltests}/wifi.svg (100%) create mode 100755 testscour.py diff --git a/tests/News_Paper.svg b/fulltests/News_Paper.svg similarity index 100% rename from tests/News_Paper.svg rename to fulltests/News_Paper.svg diff --git a/tests/Wave.svg b/fulltests/Wave.svg similarity index 100% rename from tests/Wave.svg rename to fulltests/Wave.svg diff --git a/tests/chained.svg b/fulltests/chained.svg similarity index 100% rename from tests/chained.svg rename to fulltests/chained.svg diff --git a/tests/emptyElems.svg b/fulltests/emptyElems.svg similarity index 100% rename from tests/emptyElems.svg rename to fulltests/emptyElems.svg diff --git a/tests/emptyElems2.svg b/fulltests/emptyElems2.svg similarity index 100% rename from tests/emptyElems2.svg rename to fulltests/emptyElems2.svg diff --git a/tests/foo.xml b/fulltests/foo.xml similarity index 100% rename from tests/foo.xml rename to fulltests/foo.xml diff --git a/tests/header.svg b/fulltests/header.svg similarity index 100% rename from tests/header.svg rename to fulltests/header.svg diff --git a/tests/strawberry.svg b/fulltests/strawberry.svg similarity index 100% rename from tests/strawberry.svg rename to fulltests/strawberry.svg diff --git a/tests/units.svg b/fulltests/units.svg similarity index 100% rename from tests/units.svg rename to fulltests/units.svg diff --git a/tests/viewbox.svg b/fulltests/viewbox.svg similarity index 100% rename from tests/viewbox.svg rename to fulltests/viewbox.svg diff --git a/tests/wifi.svg b/fulltests/wifi.svg similarity index 100% rename from tests/wifi.svg rename to fulltests/wifi.svg diff --git a/package.sh b/package.sh index 41f8676..c4a8416 100755 --- a/package.sh +++ b/package.sh @@ -1,4 +1,4 @@ #!/bin/bash -SCOURVER="0.05" +SCOURVER="0.06" tar cvf scour-$SCOURVER.tar scour.py LICENSE NOTICE README.txt -gzip scour-$SCOURVER.tar \ No newline at end of file +gzip scour-$SCOURVER.tar diff --git a/scour.py b/scour.py index 8635bef..058ad40 100755 --- a/scour.py +++ b/scour.py @@ -1,6 +1,6 @@ #!/usr/local/bin/python # Scour -# Version 0.06 +# Version 0.07 # # Copyright 2009 Jeff Schiller # @@ -20,7 +20,7 @@ # Notes: -# rubys path-crunching ideas here: http://intertwingly.net/code/svgtidy/spec.rb +# rubys' path-crunching ideas here: http://intertwingly.net/code/svgtidy/spec.rb # (and implemented here: http://intertwingly.net/code/svgtidy/svgtidy.rb ) # Yet more ideas here: http://wiki.inkscape.org/wiki/index.php/Save_Cleaned_SVG @@ -45,13 +45,10 @@ # * Process Transformations # * Process quadratic Bezier curves # * Collapse all group based transformations -# * Output Standard SVG -# * Use viewPort instead of document width/height # Next Up: -# + Prevent error when stroke-width property value has a unit -# + Convert width/height into a viewBox where possible -# + Convert all referenced rasters into base64 encoded URLs if the files can be found +# + move all functionality into a module level function named 'scour' and only call it +# when being run as main (prepare for unit testing) # - Removed duplicate gradient stops # - Convert all colors to #RRGGBB format # - @@ -70,7 +67,7 @@ import os.path import urllib APP = 'scour' -VER = '0.06' +VER = '0.07' COPYRIGHT = 'Copyright Jeff Schiller, 2009' NS = { 'SVG': 'http://www.w3.org/2000/svg', @@ -550,117 +547,131 @@ def properlySizeDoc(docElement): docElement.removeAttribute('width') docElement.removeAttribute('height') -# parse command-line arguments -args = sys.argv[1:] +# this is the main method +# input is a string representation of the input XML +# returns a string representation of the output XML +def scour(in_string): + global numAttrsRemoved + global numStylePropsFixed + global numElemsRemoved + doc = xml.dom.minidom.parseString(in_string) -# by default the input and output are the standard streams -input = sys.stdin -output = sys.stdout - -# if -i or -o is supplied, switch the stream to the file -if len(args) == 2: - if args[0] == '-i' : - input = open(args[1], 'r') - elif args[0] == '-o' : - output = open(args[1], 'w') - else: - printSyntaxAndQuit() - -# if both -o and -o are supplied, switch streams to the files -elif len(args) == 4 : - if args[0] == '-i' and args[2] == '-o' : - input = open(args[1], 'r') - output = open(args[3], 'w') - elif args[0] == '-o' and args[2] == 'i' : - output = open(args[1], 'w') - input = open(args[3], 'r') - else: - printSyntaxAndQuit() - -# else invalid syntax -elif len(args) != 0 : - printSyntaxAndQuit() - -# if we are not sending to stdout, then print out app information -bOutputReport = False -if output != sys.stdout : - bOutputReport = True - printHeader() - -# build DOM in memory -doc = xml.dom.minidom.parse(input) - -# for whatever reason this does not always remove all inkscape/sodipodi attributes/elements -# on the first pass, so we do it multiple times -# does it have to do with removal of children affecting the childlist? -while removeNamespacedElements( doc.documentElement, unwanted_ns ) > 0 : - pass -while removeNamespacedAttributes( doc.documentElement, unwanted_ns ) > 0 : - pass + # for whatever reason this does not always remove all inkscape/sodipodi attributes/elements + # on the first pass, so we do it multiple times + # does it have to do with removal of children affecting the childlist? + while removeNamespacedElements( doc.documentElement, unwanted_ns ) > 0 : + pass + while removeNamespacedAttributes( doc.documentElement, unwanted_ns ) > 0 : + pass -# remove the xmlns: declarations now -xmlnsDeclsToRemove = [] -attrList = doc.documentElement.attributes -for num in range(attrList.length) : - if attrList.item(num).nodeValue in unwanted_ns : - xmlnsDeclsToRemove.append(attrList.item(num).nodeName) + # remove the xmlns: declarations now + xmlnsDeclsToRemove = [] + attrList = doc.documentElement.attributes + for num in range(attrList.length) : + if attrList.item(num).nodeValue in unwanted_ns : + xmlnsDeclsToRemove.append(attrList.item(num).nodeName) -for attr in xmlnsDeclsToRemove : - doc.documentElement.removeAttribute(attr) - numAttrsRemoved += 1 + for attr in xmlnsDeclsToRemove : + doc.documentElement.removeAttribute(attr) + numAttrsRemoved += 1 -bContinueLooping = True -while bContinueLooping: - identifiedElements = findElementsWithId(doc.documentElement, {}) - referencedIDs = findReferencedElements(doc.documentElement, {}) - bContinueLooping = ((removeUnreferencedIDs(referencedIDs, identifiedElements) + vacuumDefs(doc)) > 0) + bContinueLooping = True + while bContinueLooping: + identifiedElements = findElementsWithId(doc.documentElement, {}) + referencedIDs = findReferencedElements(doc.documentElement, {}) + bContinueLooping = ((removeUnreferencedIDs(referencedIDs, identifiedElements) + vacuumDefs(doc)) > 0) -# repair style (remove unnecessary style properties and change them into XML attributes) -numStylePropsFixed = repairStyle(doc.documentElement) + # repair style (remove unnecessary style properties and change them into XML attributes) + numStylePropsFixed = repairStyle(doc.documentElement) -# remove empty defs, metadata, g -# NOTE: these elements will be removed even if they have (invalid) text nodes -elemsToRemove = [] -for tag in ['defs', 'metadata', 'g'] : - for elem in doc.documentElement.getElementsByTagNameNS(NS['SVG'], tag) : - removeElem = not elem.hasChildNodes() - if removeElem == False : - for child in elem.childNodes : - if child.nodeType in [1, 4, 8] : - break - else: - removeElem = True - if removeElem : - elem.parentNode.removeChild(elem) - numElemsRemoved += 1 + # remove empty defs, metadata, g + # NOTE: these elements will be removed even if they have (invalid) text nodes + elemsToRemove = [] + for tag in ['defs', 'metadata', 'g'] : + for elem in doc.documentElement.getElementsByTagNameNS(NS['SVG'], tag) : + removeElem = not elem.hasChildNodes() + if removeElem == False : + for child in elem.childNodes : + if child.nodeType in [1, 4, 8] : + break + else: + removeElem = True + if removeElem : + elem.parentNode.removeChild(elem) + numElemsRemoved += 1 -# clean path data -for elem in doc.documentElement.getElementsByTagNameNS(NS['SVG'], 'path') : - cleanPath(elem) + # clean path data + for elem in doc.documentElement.getElementsByTagNameNS(NS['SVG'], 'path') : + cleanPath(elem) -# convert rasters refereces to base64-encoded strings -for elem in doc.documentElement.getElementsByTagNameNS(NS['SVG'], 'image') : - embedRasters(elem) + # convert rasters refereces to base64-encoded strings + for elem in doc.documentElement.getElementsByTagNameNS(NS['SVG'], 'image') : + embedRasters(elem) -# properly size the SVG document (ideally width/height should be 100% with a viewBox) -properlySizeDoc(doc.documentElement) + # properly size the SVG document (ideally width/height should be 100% with a viewBox) + properlySizeDoc(doc.documentElement) -# output the document -doc.documentElement.writexml(output) + # output the document + out_string = doc.documentElement.toxml() + return out_string + +if __name__ == '__main__': -# Close input and output files -input.close() -output.close() + # parse command-line arguments + args = sys.argv[1:] -# output some statistics if we are not using stdout -if bOutputReport : - print " Number of unreferenced id attributes removed:", numIDsRemoved - print " Number of elements removed:", numElemsRemoved - print " Number of attributes removed:", numAttrsRemoved - print " Number of style properties fixed:", numStylePropsFixed - print " Number of raster images embedded inline:", numRastersEmbedded - oldsize = os.path.getsize(input.name) - newsize = os.path.getsize(output.name) - #sizediff = (min(oldsize, newsize) / max(oldsize, newsize)) * 100; - sizediff = (newsize / oldsize); - print " Original file size:", oldsize, "kb; new file size:", newsize, "kb (" + str(sizediff)[:5] + "x)" + # by default the input and output are the standard streams + input = sys.stdin + output = sys.stdout + + # if -i or -o is supplied, switch the stream to the file + if len(args) == 2: + if args[0] == '-i' : + input = open(args[1], 'r') + elif args[0] == '-o' : + output = open(args[1], 'w') + else: + printSyntaxAndQuit() + + # if both -o and -o are supplied, switch streams to the files + elif len(args) == 4 : + if args[0] == '-i' and args[2] == '-o' : + input = open(args[1], 'r') + output = open(args[3], 'w') + elif args[0] == '-o' and args[2] == 'i' : + output = open(args[1], 'w') + input = open(args[3], 'r') + else: + printSyntaxAndQuit() + + # else invalid syntax + elif len(args) != 0 : + printSyntaxAndQuit() + + # if we are not sending to stdout, then print out app information + bOutputReport = False + if output != sys.stdout : + bOutputReport = True + printHeader() + + # do the work + in_string = input.read() + out_string = scour(in_string) + output.write(out_string) + + # Close input and output files + input.close() + output.close() + + # output some statistics if we are not using stdout + if bOutputReport : + print " Number of unreferenced id attributes removed:", numIDsRemoved + print " Number of elements removed:", numElemsRemoved + print " Number of attributes removed:", numAttrsRemoved + print " Number of style properties fixed:", numStylePropsFixed + print " Number of raster images embedded inline:", numRastersEmbedded + oldsize = os.path.getsize(input.name) + newsize = os.path.getsize(output.name) + #sizediff = (min(oldsize, newsize) / max(oldsize, newsize)) * 100; + sizediff = (newsize / oldsize); + print " Original file size:", oldsize, "kb; new file size:", newsize, "kb (" + str(sizediff)[:5] + "x)" diff --git a/testscour.py b/testscour.py new file mode 100755 index 0000000..ad77f07 --- /dev/null +++ b/testscour.py @@ -0,0 +1,23 @@ +#!/usr/local/bin/python +# Test Harness for Scour +# +# Copyright 2009 Jeff Schiller +# +# This file is part of Scour, http://www.codedread.com/scour/ +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import scour + +print "done" \ No newline at end of file