From c00bc8b70cbe626d13848fe05a6b39730e422ac8 Mon Sep 17 00:00:00 2001 From: JSCHILL1 Date: Fri, 5 Feb 2010 16:28:13 -0600 Subject: [PATCH] Fix bug when DOCTYPE is present. Big performance improvement to makeWellFormed() function --- release-notes.html | 4 ++-- scour.py | 23 ++++++++++++----------- testscour.py | 11 ++++++++++- unittests/comments.svg | 6 ++++++ unittests/doctype.svg | 7 +++++++ 5 files changed, 37 insertions(+), 14 deletions(-) create mode 100644 unittests/comments.svg create mode 100644 unittests/doctype.svg diff --git a/release-notes.html b/release-notes.html index 7bee3c6..d62af29 100644 --- a/release-notes.html +++ b/release-notes.html @@ -13,11 +13,11 @@

Version 0.24

-

2010-02-04

+

2010-02-05

diff --git a/scour.py b/scour.py index a269c14..b851e48 100755 --- a/scour.py +++ b/scour.py @@ -2021,15 +2021,17 @@ def remapNamespacePrefix(node, oldprefix, newprefix): remapNamespacePrefix(child, oldprefix, newprefix) def makeWellFormed(str): - newstr = '' xml_ents = { '<':'<', '>':'>', '&':'&', "'":''', '"':'"'} - for c in str: - if c in xml_ents: - newstr += xml_ents[c] - else: - newstr += c + +# starr = [] +# for c in str: +# if c in xml_ents: +# starr.append(xml_ents[c]) +# else: +# starr.append(c) - return newstr + # this list comprehension is short-form for the above for-loop: + return ''.join([xml_ents[c] if c in xml_ents else c for c in str]) # hand-rolled serialization function that has the following benefits: # - pretty printing @@ -2295,12 +2297,11 @@ def scourString(in_string, options=None): else: total_output = "" - # Find all comments before and after the root node and print them for child in doc.childNodes: - if child.nodeType == 8: - total_output += ('' + os.linesep) - else: + if child.nodeType == 1: total_output += "".join(lines) + else: # doctypes, entities, comments + total_output += child.toxml() + os.linesep return total_output diff --git a/testscour.py b/testscour.py index 880687e..e4293ae 100755 --- a/testscour.py +++ b/testscour.py @@ -996,7 +996,16 @@ class DoNotStripCommentsOutsideOfRoot(unittest.TestCase): self.assertEquals( doc.childNodes[0].nodeType, 8, 'First node not a comment') self.assertEquals( doc.childNodes[1].nodeType, 8, 'Second node not a comment') self.assertEquals( doc.childNodes[3].nodeType, 8, 'Fourth node not a comment') - + +class DoNotStripDoctype(unittest.TestCase): + def runTest(self): + doc = scour.scourXmlFile('unittests/doctype.svg') + self.assertEquals( doc.childNodes.length, 3, + 'Did not include the DOCROOT') + self.assertEquals( doc.childNodes[0].nodeType, 8, 'First node not a comment') + self.assertEquals( doc.childNodes[1].nodeType, 10, 'Second node not a doctype') + self.assertEquals( doc.childNodes[2].nodeType, 1, 'Third node not the root node') + # TODO: write tests for --enable-viewboxing # TODO; write a test for embedding rasters # TODO: write a test for --disable-embed-rasters diff --git a/unittests/comments.svg b/unittests/comments.svg new file mode 100644 index 0000000..a588593 --- /dev/null +++ b/unittests/comments.svg @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/unittests/doctype.svg b/unittests/doctype.svg new file mode 100644 index 0000000..d19e074 --- /dev/null +++ b/unittests/doctype.svg @@ -0,0 +1,7 @@ + + + + +]> +