Add hand-rolled XML serialization function to improve XML output. Added --indent option to choose indentation mechanism (space, tab, none)
This commit is contained in:
parent
04487ed1ec
commit
bac229dd14
3 changed files with 102 additions and 8 deletions
|
|
@ -13,10 +13,12 @@
|
||||||
<header>
|
<header>
|
||||||
<h2><a href="#0.18">Version 0.18</a></h2>
|
<h2><a href="#0.18">Version 0.18</a></h2>
|
||||||
</header>
|
</header>
|
||||||
<p>Aug 3rd, 2009</p>
|
<p>Aug 5th, 2009</p>
|
||||||
<ul>
|
<ul>
|
||||||
<li>Remove attributes of gradients if they contain default values</li>
|
<li>Remove attributes of gradients if they contain default values</li>
|
||||||
<li>Reduce bezier/quadratic (c/q) segments to their shorthand equivalents (s/t)</li>
|
<li>Reduce bezier/quadratic (c/q) segments to their shorthand equivalents (s/t)</li>
|
||||||
|
<li>Custom XML serialization such that id/xml:id is printed first (Thanks to Richard Hutch for the suggestion)</li>
|
||||||
|
<li>Added --indent option to specify indentation type (default='space', other options: 'none', 'tab')</li>
|
||||||
</ul>
|
</ul>
|
||||||
</section>
|
</section>
|
||||||
|
|
||||||
|
|
|
||||||
95
scour.py
95
scour.py
|
|
@ -51,13 +51,12 @@
|
||||||
# <rect />
|
# <rect />
|
||||||
# </g>
|
# </g>
|
||||||
|
|
||||||
# Suggestion from Richard Hutch:
|
|
||||||
# * Put id attributes first in the serialization (or make the d attribute last)
|
|
||||||
# This would require my own serialization of the DOM objects (not impossible)
|
|
||||||
|
|
||||||
# Next Up:
|
# Next Up:
|
||||||
# + Remove some attributes that have default values
|
# + Remove some attributes that have default values
|
||||||
# + Convert c/q path segments into shorthand equivalents where possible:
|
# + Convert c/q path segments into shorthand equivalents where possible:
|
||||||
|
# + custom serialization of SVG that prints out id/xml:id first (suggestion by Richard Hutch)
|
||||||
|
# + --indent option to specify how indent should work: space, tab, none
|
||||||
|
# - option to remove metadata
|
||||||
# - parse transform attribute
|
# - parse transform attribute
|
||||||
# - if a <g> has only one element in it, collapse the <g> (ensure transform, etc are carried down)
|
# - if a <g> has only one element in it, collapse the <g> (ensure transform, etc are carried down)
|
||||||
# - remove id if it matches the Inkscape-style of IDs (also provide a switch to disable this)
|
# - remove id if it matches the Inkscape-style of IDs (also provide a switch to disable this)
|
||||||
|
|
@ -1876,6 +1875,82 @@ def remapNamespacePrefix(node, oldprefix, newprefix):
|
||||||
for child in node.childNodes :
|
for child in node.childNodes :
|
||||||
remapNamespacePrefix(child, oldprefix, newprefix)
|
remapNamespacePrefix(child, oldprefix, newprefix)
|
||||||
|
|
||||||
|
# hand-rolled serialization function that has the following benefits:
|
||||||
|
# - pretty printing
|
||||||
|
# - somewhat judicious use of whitespace
|
||||||
|
# - ensure id attributes are first
|
||||||
|
def serializeXML(element, options, ind = 0):
|
||||||
|
indent = ind
|
||||||
|
I=''
|
||||||
|
if options.indent_type == 'tab': I='\t'
|
||||||
|
elif options.indent_type == 'space': I=' '
|
||||||
|
|
||||||
|
outString = (I * ind) + '<' + element.nodeName
|
||||||
|
|
||||||
|
# always serialize the id or xml:id attributes first
|
||||||
|
if element.getAttribute('id') != '':
|
||||||
|
id = element.getAttribute('id')
|
||||||
|
quot = '"'
|
||||||
|
if id.find('"') != -1:
|
||||||
|
quot = "'"
|
||||||
|
outString += ' ' + 'id=' + quot + id + quot
|
||||||
|
if element.getAttribute('xml:id') != '':
|
||||||
|
id = element.getAttribute('xml:id')
|
||||||
|
quot = '"'
|
||||||
|
if id.find('"') != -1:
|
||||||
|
quot = "'"
|
||||||
|
outString += ' ' + 'xml:id=' + quot + id + quot
|
||||||
|
|
||||||
|
# now serialize the other attributes
|
||||||
|
attrList = element.attributes
|
||||||
|
for num in range(attrList.length) :
|
||||||
|
attr = attrList.item(num)
|
||||||
|
if attr.nodeName == 'id' or attr.nodeName == 'xml:id': continue
|
||||||
|
# if the attribute value contains a double-quote, use single-quotes
|
||||||
|
quot = '"'
|
||||||
|
if attr.nodeValue.find('"') != -1:
|
||||||
|
quot = "'"
|
||||||
|
|
||||||
|
outString += ' ' + attr.nodeName + '=' + quot + attr.nodeValue + quot
|
||||||
|
|
||||||
|
# if no children, self-close
|
||||||
|
children = element.childNodes
|
||||||
|
if children.length > 0:
|
||||||
|
outString += '>'
|
||||||
|
|
||||||
|
onNewLine = False
|
||||||
|
for child in element.childNodes:
|
||||||
|
# element node
|
||||||
|
if child.nodeType == 1:
|
||||||
|
outString += '\n' + serializeXML(child, options, indent + 1)
|
||||||
|
onNewLine = True
|
||||||
|
# text node
|
||||||
|
elif child.nodeType == 3:
|
||||||
|
# trim it only in the case of not being a child of an element
|
||||||
|
# where whitespace might be important
|
||||||
|
if element.nodeName in ["text", "tspan", "textPath", "tref", "title", "desc", "textArea"]:
|
||||||
|
outString += child.nodeValue
|
||||||
|
else:
|
||||||
|
outString += child.nodeValue.strip()
|
||||||
|
# CDATA node
|
||||||
|
elif child.nodeType == 4:
|
||||||
|
outString += '<![CDATA[' + child.nodeValue + ']]>'
|
||||||
|
# Comment node
|
||||||
|
elif child.nodeType == 8:
|
||||||
|
outString += '<!--' + child.nodeValue + '-->'
|
||||||
|
# TODO: entities, processing instructions, what else?
|
||||||
|
else: # ignore the rest
|
||||||
|
pass
|
||||||
|
|
||||||
|
if onNewLine: outString += (I * ind)
|
||||||
|
outString += '</' + element.nodeName + '>'
|
||||||
|
if indent > 0: outString += '\n'
|
||||||
|
else:
|
||||||
|
outString += '/>'
|
||||||
|
if indent > 0: outString += '\n'
|
||||||
|
|
||||||
|
return outString
|
||||||
|
|
||||||
# this is the main method
|
# this is the main method
|
||||||
# input is a string representation of the input XML
|
# input is a string representation of the input XML
|
||||||
# returns a string representation of the output XML
|
# returns a string representation of the output XML
|
||||||
|
|
@ -2004,7 +2079,6 @@ def scourString(in_string, options=None):
|
||||||
elem.setAttribute(attr, scourLength(elem.getAttribute(attr)))
|
elem.setAttribute(attr, scourLength(elem.getAttribute(attr)))
|
||||||
|
|
||||||
# remove default values of attributes
|
# remove default values of attributes
|
||||||
# print doc.documentElement.toxml()
|
|
||||||
numAttrsRemoved += removeDefaultAttributeValues(doc.documentElement, options)
|
numAttrsRemoved += removeDefaultAttributeValues(doc.documentElement, options)
|
||||||
|
|
||||||
# convert rasters references to base64-encoded strings
|
# convert rasters references to base64-encoded strings
|
||||||
|
|
@ -2018,8 +2092,9 @@ def scourString(in_string, options=None):
|
||||||
# output the document as a pretty string with a single space for indent
|
# output the document as a pretty string with a single space for indent
|
||||||
# NOTE: removed pretty printing because of this problem:
|
# NOTE: removed pretty printing because of this problem:
|
||||||
# http://ronrothman.com/public/leftbraned/xml-dom-minidom-toprettyxml-and-silly-whitespace/
|
# http://ronrothman.com/public/leftbraned/xml-dom-minidom-toprettyxml-and-silly-whitespace/
|
||||||
|
# rolled our own serialize function here to save on space, put id first, customize indentation, etc
|
||||||
# out_string = doc.documentElement.toprettyxml(' ')
|
# out_string = doc.documentElement.toprettyxml(' ')
|
||||||
out_string = doc.documentElement.toxml()
|
out_string = serializeXML(doc.documentElement, options)
|
||||||
|
|
||||||
# now strip out empty lines
|
# now strip out empty lines
|
||||||
lines = []
|
lines = []
|
||||||
|
|
@ -2096,6 +2171,9 @@ _options_parser.add_option("-i",
|
||||||
action="store", dest="infilename", help=optparse.SUPPRESS_HELP)
|
action="store", dest="infilename", help=optparse.SUPPRESS_HELP)
|
||||||
_options_parser.add_option("-o",
|
_options_parser.add_option("-o",
|
||||||
action="store", dest="outfilename", help=optparse.SUPPRESS_HELP)
|
action="store", dest="outfilename", help=optparse.SUPPRESS_HELP)
|
||||||
|
_options_parser.add_option("--indent",
|
||||||
|
action="store", type="string", dest="indent_type", default="space",
|
||||||
|
help="indentation of the output: none, space, tab (default: %default)")
|
||||||
|
|
||||||
def maybe_gziped_file(filename, mode="r"):
|
def maybe_gziped_file(filename, mode="r"):
|
||||||
if os.path.splitext(filename)[1].lower() in (".svgz", ".gz"):
|
if os.path.splitext(filename)[1].lower() in (".svgz", ".gz"):
|
||||||
|
|
@ -2109,6 +2187,9 @@ def parse_args(args=None):
|
||||||
_options_parser.error("Additional arguments not handled: %r, see --help" % rargs)
|
_options_parser.error("Additional arguments not handled: %r, see --help" % rargs)
|
||||||
if options.digits < 0:
|
if options.digits < 0:
|
||||||
_options_parser.error("Can't have negative significant digits, see --help")
|
_options_parser.error("Can't have negative significant digits, see --help")
|
||||||
|
if not options.indent_type in ["tab", "space", "none"]:
|
||||||
|
_options_parser.error("Invalid value for --indent, see --help")
|
||||||
|
|
||||||
if options.infilename:
|
if options.infilename:
|
||||||
infile = maybe_gziped_file(options.infilename)
|
infile = maybe_gziped_file(options.infilename)
|
||||||
# GZ: could catch a raised IOError here and report
|
# GZ: could catch a raised IOError here and report
|
||||||
|
|
@ -2119,7 +2200,7 @@ def parse_args(args=None):
|
||||||
outfile = maybe_gziped_file(options.outfilename, "w")
|
outfile = maybe_gziped_file(options.outfilename, "w")
|
||||||
else:
|
else:
|
||||||
outfile = sys.stdout
|
outfile = sys.stdout
|
||||||
|
|
||||||
return options, [infile, outfile]
|
return options, [infile, outfile]
|
||||||
|
|
||||||
def getReport():
|
def getReport():
|
||||||
|
|
|
||||||
11
testscour.py
11
testscour.py
|
|
@ -820,6 +820,17 @@ class RemoveDefaultGradFYValue(unittest.TestCase):
|
||||||
self.assertEquals( g.getAttribute('fy'), '',
|
self.assertEquals( g.getAttribute('fy'), '',
|
||||||
'fy matching cy not removed')
|
'fy matching cy not removed')
|
||||||
|
|
||||||
|
class CDATAInXml(unittest.TestCase):
|
||||||
|
def runTest(self):
|
||||||
|
self.assertEquals( scour.scourString(open('unittests/cdata.svg').read()),
|
||||||
|
'''<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||||
|
<svg xmlns="http://www.w3.org/2000/svg">
|
||||||
|
<script type="application/ecmascript"><![CDATA[
|
||||||
|
alert('pb&j');
|
||||||
|
]]></script>
|
||||||
|
</svg>''',
|
||||||
|
'Improperly serialized the cdata unit tests')
|
||||||
|
|
||||||
# TODO; write a test for embedding rasters
|
# TODO; write a test for embedding rasters
|
||||||
# TODO: write a test for --disable-embed-rasters
|
# TODO: write a test for --disable-embed-rasters
|
||||||
# TODO: write tests for --keep-editor-data
|
# TODO: write tests for --keep-editor-data
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue