diff --git a/scour/scour.py b/scour/scour.py
index 8feb15c..a46af61 100644
--- a/scour/scour.py
+++ b/scour/scour.py
@@ -3334,8 +3334,6 @@ def serializeXML(element, options, indent_depth=0, preserveWhitespace=False):
children = element.childNodes
if children.length == 0:
outParts.append('/>')
- if indent_depth > 0:
- outParts.append(newline)
else:
outParts.append('>')
@@ -3343,34 +3341,47 @@ def serializeXML(element, options, indent_depth=0, preserveWhitespace=False):
for child in element.childNodes:
# element node
if child.nodeType == Node.ELEMENT_NODE:
- if preserveWhitespace:
+ # do not indent inside text content elements as in SVG there's a difference between
+ # "text1\ntext2" and
+ # "text1\n text2"
+ # see https://www.w3.org/TR/SVG/text.html#WhiteSpace
+ if preserveWhitespace or element.nodeName in ['text', 'tspan', 'tref', 'textPath', 'altGlyph']:
outParts.append(serializeXML(child, options, 0, preserveWhitespace))
else:
outParts.extend([newline, serializeXML(child, options, indent_depth + 1, preserveWhitespace)])
onNewLine = True
# text node
elif child.nodeType == Node.TEXT_NODE:
- # trim it only in the case of not being a child of an element
- # where whitespace might be important
- if preserveWhitespace:
- outParts.append(makeWellFormed(child.nodeValue))
- else:
- outParts.append(makeWellFormed(child.nodeValue.strip()))
+ text_content = child.nodeValue
+ if not preserveWhitespace:
+ # strip / consolidate whitespace according to spec, see
+ # https://www.w3.org/TR/SVG/text.html#WhiteSpace
+ if element.nodeName in ['text', 'tspan', 'tref', 'textPath', 'altGlyph']:
+ text_content = text_content.replace('\n', '')
+ text_content = text_content.replace('\t', ' ')
+ if child == element.firstChild:
+ text_content = text_content.lstrip()
+ elif child == element.lastChild:
+ text_content = text_content.rstrip()
+ while ' ' in text_content:
+ text_content = text_content.replace(' ', ' ')
+ else:
+ text_content = text_content.strip()
+ outParts.append(makeWellFormed(text_content))
# CDATA node
elif child.nodeType == Node.CDATA_SECTION_NODE:
outParts.extend([''])
# Comment node
elif child.nodeType == Node.COMMENT_NODE:
- outParts.extend([''])
+ outParts.extend([newline, indent_type * (indent_depth+1), ''])
# TODO: entities, processing instructions, what else?
else: # ignore the rest
pass
if onNewLine:
+ outParts.append(newline)
outParts.append(indent_type * indent_depth)
outParts.extend(['', element.nodeName, '>'])
- if indent_depth > 0:
- outParts.append(newline)
return "".join(outParts)
@@ -3632,13 +3643,6 @@ def scourString(in_string, options=None):
# out_string = doc.documentElement.toprettyxml(' ')
out_string = serializeXML(doc.documentElement, options) + '\n'
- # now strip out empty lines
- lines = []
- # Get rid of empty lines
- for line in out_string.splitlines(True):
- if line.strip():
- lines.append(line)
-
# return the string with its XML prolog and surrounding comments
if options.strip_xml_prolog is False:
total_output = '
-
-'''.splitlines()
- for i in range(4):
- self.assertEqual(s[i], c[i],
- 'Whitespace not preserved for line ' + str(i))
+ def setUp(self):
+ self.doc = scourXmlFile('unittests/whitespace.svg')
+ def test_basic(self):
+ text = self.doc.getElementById('txt_a1')
+ self.assertIn('text1 text2', text.toxml(),
+ 'Multiple spaces not stripped from text element')
+ text = self.doc.getElementById('txt_a2')
+ self.assertIn('text1 text2', text.toxml(),
+ 'Tab not replaced with space in text element')
+ text = self.doc.getElementById('txt_a3')
+ self.assertIn('text1 text2', text.toxml(),
+ 'Multiple spaces not stripped from text element with xml:space="default"')
+ text = self.doc.getElementById('txt_a4')
+ self.assertIn('text1 text2', text.toxml(),
+ 'Tab not replaced with space in text element with xml:space="default"')
+ text = self.doc.getElementById('txt_a5')
+ self.assertIn('text1 text2', text.toxml(),
+ 'Multiple spaces not preserved in text element with xml:space="preserve"')
+ text = self.doc.getElementById('txt_a6')
+ self.assertIn('text1\ttext2', text.toxml(),
+ 'Tab not preserved in text element with xml:space="preserve"')
-class DoNotPrettyPrintWhenNestedWhitespacePreserved(unittest.TestCase):
+ def test_newlines(self):
+ text = self.doc.getElementById('txt_b1')
+ self.assertIn('text1 text2', text.toxml(),
+ 'Newline not replaced with space in text element')
+ text = self.doc.getElementById('txt_b2')
+ self.assertIn('text1 text2', text.toxml(),
+ 'Newline not replaced with space in text element with xml:space="default"')
+ text = self.doc.getElementById('txt_b3')
+ self.assertIn('text1\n text2', text.toxml(),
+ 'Newline not preserved in text element with xml:space="preserve"')
- def runTest(self):
- with open('unittests/whitespace-nested.svg') as f:
- s = scourString(f.read()).splitlines()
- c = '''
-
-'''.splitlines()
- for i in range(4):
- self.assertEqual(s[i], c[i],
- 'Whitespace not preserved when nested for line ' + str(i))
+ def test_inheritance(self):
+ text = self.doc.getElementById('txt_c1')
+ self.assertIn('text1 text2', text.toxml(),
+ ' does not inherit xml:space="preserve" of parent text element')
+ text = self.doc.getElementById('txt_c2')
+ self.assertIn('text1 text2', text.toxml(),
+ 'xml:space="default" of does not overwrite xml:space="preserve" of parent text element')
+ text = self.doc.getElementById('txt_c3')
+ self.assertIn('text1 text2', text.toxml(),
+ 'xml:space="preserve" of does not overwrite xml:space="default" of parent text element')
+ text = self.doc.getElementById('txt_c4')
+ self.assertIn('text1 text2', text.toxml(),
+ ' does not inherit xml:space="preserve" of parent group')
+ text = self.doc.getElementById('txt_c5')
+ self.assertIn('text1 text2', text.toxml(),
+ 'xml:space="default" of text element does not overwrite xml:space="preserve" of parent group')
+ text = self.doc.getElementById('txt_c6')
+ self.assertIn('text1 text2', text.toxml(),
+ 'xml:space="preserve" of text element does not overwrite xml:space="default" of parent group')
+
+ def test_important_whitespace(self):
+ text = self.doc.getElementById('txt_d1')
+ self.assertIn('text1 text2', text.toxml(),
+ 'Newline with whitespace collapsed in text element')
+ text = self.doc.getElementById('txt_d2')
+ self.assertIn('text1 tspan1 text2', text.toxml(),
+ 'Whitespace stripped from the middle of a text element')
+ text = self.doc.getElementById('txt_d3')
+ self.assertIn('text1 tspan1 tspan2 text2', text.toxml(),
+ 'Whitespace stripped from the middle of a text element')
+
+ def test_incorrect_whitespace(self):
+ text = self.doc.getElementById('txt_e1')
+ self.assertIn('text1text2', text.toxml(),
+ 'Whitespace introduced in text element with newline')
+ text = self.doc.getElementById('txt_e2')
+ self.assertIn('text1tspantext2', text.toxml(),
+ 'Whitespace introduced in text element with ')
+ text = self.doc.getElementById('txt_e3')
+ self.assertIn('text1tspantext2', text.toxml(),
+ 'Whitespace introduced in text element with and newlines')
class GetAttrPrefixRight(unittest.TestCase):
@@ -1807,10 +1856,10 @@ class HandleEmptyStyleElement(unittest.TestCase):
class EnsureLineEndings(unittest.TestCase):
def runTest(self):
- with open('unittests/whitespace-important.svg') as f:
+ with open('unittests/newlines.svg') as f:
s = scourString(f.read())
- self.assertEqual(len(s.splitlines()), 4,
- 'Did not output line ending character correctly')
+ self.assertEqual(len(s.splitlines()), 24,
+ 'Did handle reading or outputting line ending characters correctly')
class XmlEntities(unittest.TestCase):
diff --git a/unittests/newlines.svg b/unittests/newlines.svg
new file mode 100644
index 0000000..a909603
--- /dev/null
+++ b/unittests/newlines.svg
@@ -0,0 +1,50 @@
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/unittests/whitespace-important.svg b/unittests/whitespace-important.svg
deleted file mode 100644
index 6918044..0000000
--- a/unittests/whitespace-important.svg
+++ /dev/null
@@ -1,4 +0,0 @@
-
-
diff --git a/unittests/whitespace-nested.svg b/unittests/whitespace-nested.svg
deleted file mode 100644
index 3b99356..0000000
--- a/unittests/whitespace-nested.svg
+++ /dev/null
@@ -1,4 +0,0 @@
-
-
diff --git a/unittests/whitespace.svg b/unittests/whitespace.svg
new file mode 100644
index 0000000..2bb48a6
--- /dev/null
+++ b/unittests/whitespace.svg
@@ -0,0 +1,40 @@
+
+