Fix bug when DOCTYPE is present. Big performance improvement to makeWellFormed() function

This commit is contained in:
JSCHILL1 2010-02-05 16:28:13 -06:00
parent c835423e8f
commit c00bc8b70c
5 changed files with 37 additions and 14 deletions

View file

@ -13,11 +13,11 @@
<header>
<h2><a href="#0.24">Version 0.24</a></h2>
</header>
<p>2010-02-04</p>
<p>2010-02-05</p>
<ul>
<li>Fix <a href="https://bugs.launchpad.net/scour/+bug/517064">Bug 517064</a> to make XML well-formed again</li>
<li>Fix <a href="https://bugs.launchpad.net/scour/+bug/503750">Bug 503750</a> fix Inkscape extension to correctly pass --enable-viewboxing</li>
<li>Fix <a href="https://bugs.launchpad.net/scour/+bug/511186">Bug 511186</a> fix stripping of comments outside of the root &lt;svg&gt; node</li>
<li>Fix <a href="https://bugs.launchpad.net/scour/+bug/511186">Bug 511186</a> to allow comments outside of the root &lt;svg&gt; node</li>
</ul>
</section>

View file

@ -2021,15 +2021,17 @@ def remapNamespacePrefix(node, oldprefix, newprefix):
remapNamespacePrefix(child, oldprefix, newprefix)
def makeWellFormed(str):
newstr = ''
xml_ents = { '<':'&lt;', '>':'&gt;', '&':'&amp;', "'":'&apos;', '"':'&quot;'}
for c in str:
if c in xml_ents:
newstr += xml_ents[c]
else:
newstr += c
return newstr
# starr = []
# for c in str:
# if c in xml_ents:
# starr.append(xml_ents[c])
# else:
# starr.append(c)
# this list comprehension is short-form for the above for-loop:
return ''.join([xml_ents[c] if c in xml_ents else c for c in str])
# hand-rolled serialization function that has the following benefits:
# - pretty printing
@ -2295,12 +2297,11 @@ def scourString(in_string, options=None):
else:
total_output = ""
# Find all comments before and after the root node and print them
for child in doc.childNodes:
if child.nodeType == 8:
total_output += ('<!--' + child.nodeValue + '-->' + os.linesep)
else:
if child.nodeType == 1:
total_output += "".join(lines)
else: # doctypes, entities, comments
total_output += child.toxml() + os.linesep
return total_output

View file

@ -997,6 +997,15 @@ class DoNotStripCommentsOutsideOfRoot(unittest.TestCase):
self.assertEquals( doc.childNodes[1].nodeType, 8, 'Second node not a comment')
self.assertEquals( doc.childNodes[3].nodeType, 8, 'Fourth node not a comment')
class DoNotStripDoctype(unittest.TestCase):
def runTest(self):
doc = scour.scourXmlFile('unittests/doctype.svg')
self.assertEquals( doc.childNodes.length, 3,
'Did not include the DOCROOT')
self.assertEquals( doc.childNodes[0].nodeType, 8, 'First node not a comment')
self.assertEquals( doc.childNodes[1].nodeType, 10, 'Second node not a doctype')
self.assertEquals( doc.childNodes[2].nodeType, 1, 'Third node not the root node')
# TODO: write tests for --enable-viewboxing
# TODO; write a test for embedding rasters
# TODO: write a test for --disable-embed-rasters

6
unittests/comments.svg Normal file
View file

@ -0,0 +1,6 @@
<?xml version="1.0" ?>
<!-- Empty -->
<!-- Comment #2 -->
<svg xmlns="http://www.w3.org/2000/svg">
</svg>
<!-- After -->

After

Width:  |  Height:  |  Size: 120 B

7
unittests/doctype.svg Normal file
View file

@ -0,0 +1,7 @@
<?xml version="1.0" encoding="UTF-8"?>
<!-- comment -->
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.0//EN" "http://www.w3.org/TR/2001/REC-SVG-20010904/DTD/svg10.dtd" [
<!ENTITY ns_svg "http://www.w3.org/2000/svg">
<!ENTITY ns_xlink "http://www.w3.org/1999/xlink">
]>
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"/>

After

Width:  |  Height:  |  Size: 350 B