From c00bc8b70cbe626d13848fe05a6b39730e422ac8 Mon Sep 17 00:00:00 2001
From: JSCHILL1 <jschill1@Lithium-3.local>
Date: Fri, 5 Feb 2010 16:28:13 -0600
Subject: [PATCH] Fix bug when DOCTYPE is present.  Big performance improvement
 to makeWellFormed() function

---
 release-notes.html     |  4 ++--
 scour.py               | 23 ++++++++++++-----------
 testscour.py           | 11 ++++++++++-
 unittests/comments.svg |  6 ++++++
 unittests/doctype.svg  |  7 +++++++
 5 files changed, 37 insertions(+), 14 deletions(-)
 create mode 100644 unittests/comments.svg
 create mode 100644 unittests/doctype.svg
diff --git a/release-notes.html b/release-notes.html
index 7bee3c6..d62af29 100644
--- a/release-notes.html
+++ b/release-notes.html
@@ -13,11 +13,11 @@
 	<header>
 		<h2><a href="#0.24">Version 0.24</a></h2>
 	</header>
-	<p>2010-02-04</p>
+	<p>2010-02-05</p>
 	<ul>
 		<li>Fix <a href="https://bugs.launchpad.net/scour/+bug/517064">Bug 517064</a> to make XML well-formed again</li>
 		<li>Fix <a href="https://bugs.launchpad.net/scour/+bug/503750">Bug 503750</a> fix Inkscape extension to correctly pass --enable-viewboxing</li>
-		<li>Fix <a href="https://bugs.launchpad.net/scour/+bug/511186">Bug 511186</a> fix stripping of comments outside of the root &lt;svg&gt; node</li>
+		<li>Fix <a href="https://bugs.launchpad.net/scour/+bug/511186">Bug 511186</a> to allow comments outside of the root &lt;svg&gt; node</li>
 	</ul>
 </section>
 
diff --git a/scour.py b/scour.py
index a269c14..b851e48 100755
--- a/scour.py
+++ b/scour.py
@@ -2021,15 +2021,17 @@ def remapNamespacePrefix(node, oldprefix, newprefix):
 		remapNamespacePrefix(child, oldprefix, newprefix)	
 
 def makeWellFormed(str):
-	newstr = ''
 	xml_ents = { '<':'&lt;', '>':'&gt;', '&':'&amp;', "'":'&apos;', '"':'&quot;'}
-	for c in str:
-		if c in xml_ents:
-			newstr += xml_ents[c]
-		else:
-			newstr += c
+	
+#	starr = []
+#	for c in str:
+#		if c in xml_ents:
+#			starr.append(xml_ents[c])
+#		else:
+#			starr.append(c)
 			
-	return newstr
+	# this list comprehension is short-form for the above for-loop:
+	return ''.join([xml_ents[c] if c in xml_ents else c for c in str])
 
 # hand-rolled serialization function that has the following benefits:
 # - pretty printing
@@ -2295,12 +2297,11 @@ def scourString(in_string, options=None):
 	else:
 		total_output = ""
 	
-	# Find all comments before and after the root node and print them
 	for child in doc.childNodes:
-		if child.nodeType == 8:
-			total_output += ('<!--' + child.nodeValue + '-->' + os.linesep)
-		else:
+		if child.nodeType == 1:
 			total_output += "".join(lines)
+		else: # doctypes, entities, comments
+			total_output += child.toxml() + os.linesep
 		
 	return total_output
 
diff --git a/testscour.py b/testscour.py
index 880687e..e4293ae 100755
--- a/testscour.py
+++ b/testscour.py
@@ -996,7 +996,16 @@ class DoNotStripCommentsOutsideOfRoot(unittest.TestCase):
 		self.assertEquals( doc.childNodes[0].nodeType, 8, 'First node not a comment')
 		self.assertEquals( doc.childNodes[1].nodeType, 8, 'Second node not a comment')
 		self.assertEquals( doc.childNodes[3].nodeType, 8, 'Fourth node not a comment')
-			
+
+class DoNotStripDoctype(unittest.TestCase):
+	def runTest(self):
+		doc = scour.scourXmlFile('unittests/doctype.svg')
+		self.assertEquals( doc.childNodes.length, 3, 
+			'Did not include the DOCROOT')
+		self.assertEquals( doc.childNodes[0].nodeType, 8, 'First node not a comment')
+		self.assertEquals( doc.childNodes[1].nodeType, 10, 'Second node not a doctype')
+		self.assertEquals( doc.childNodes[2].nodeType, 1, 'Third node not the root node')
+
 # TODO: write tests for --enable-viewboxing
 # TODO; write a test for embedding rasters
 # TODO: write a test for --disable-embed-rasters
diff --git a/unittests/comments.svg b/unittests/comments.svg
new file mode 100644
index 0000000..a588593
--- /dev/null
+++ b/unittests/comments.svg
@@ -0,0 +1,6 @@
+<?xml version="1.0" ?>
+<!-- Empty -->
+<!-- Comment #2 -->
+<svg xmlns="http://www.w3.org/2000/svg">
+</svg>
+<!-- After -->
\ No newline at end of file
diff --git a/unittests/doctype.svg b/unittests/doctype.svg
new file mode 100644
index 0000000..d19e074
--- /dev/null
+++ b/unittests/doctype.svg
@@ -0,0 +1,7 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!-- comment -->
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.0//EN" "http://www.w3.org/TR/2001/REC-SVG-20010904/DTD/svg10.dtd" [
+	<!ENTITY ns_svg "http://www.w3.org/2000/svg">
+	<!ENTITY ns_xlink "http://www.w3.org/1999/xlink">
+]>
+<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"/>