Merge pull request #199 from Ede123/newline_handling
Several improvements for handling whitespace including newlines, especially in text nodes
This commit is contained in:
commit
718748ff22
6 changed files with 190 additions and 55 deletions
|
|
@ -3334,8 +3334,6 @@ def serializeXML(element, options, indent_depth=0, preserveWhitespace=False):
|
||||||
children = element.childNodes
|
children = element.childNodes
|
||||||
if children.length == 0:
|
if children.length == 0:
|
||||||
outParts.append('/>')
|
outParts.append('/>')
|
||||||
if indent_depth > 0:
|
|
||||||
outParts.append(newline)
|
|
||||||
else:
|
else:
|
||||||
outParts.append('>')
|
outParts.append('>')
|
||||||
|
|
||||||
|
|
@ -3343,34 +3341,47 @@ def serializeXML(element, options, indent_depth=0, preserveWhitespace=False):
|
||||||
for child in element.childNodes:
|
for child in element.childNodes:
|
||||||
# element node
|
# element node
|
||||||
if child.nodeType == Node.ELEMENT_NODE:
|
if child.nodeType == Node.ELEMENT_NODE:
|
||||||
if preserveWhitespace:
|
# do not indent inside text content elements as in SVG there's a difference between
|
||||||
|
# "text1\ntext2" and
|
||||||
|
# "text1\n text2"
|
||||||
|
# see https://www.w3.org/TR/SVG/text.html#WhiteSpace
|
||||||
|
if preserveWhitespace or element.nodeName in ['text', 'tspan', 'tref', 'textPath', 'altGlyph']:
|
||||||
outParts.append(serializeXML(child, options, 0, preserveWhitespace))
|
outParts.append(serializeXML(child, options, 0, preserveWhitespace))
|
||||||
else:
|
else:
|
||||||
outParts.extend([newline, serializeXML(child, options, indent_depth + 1, preserveWhitespace)])
|
outParts.extend([newline, serializeXML(child, options, indent_depth + 1, preserveWhitespace)])
|
||||||
onNewLine = True
|
onNewLine = True
|
||||||
# text node
|
# text node
|
||||||
elif child.nodeType == Node.TEXT_NODE:
|
elif child.nodeType == Node.TEXT_NODE:
|
||||||
# trim it only in the case of not being a child of an element
|
text_content = child.nodeValue
|
||||||
# where whitespace might be important
|
if not preserveWhitespace:
|
||||||
if preserveWhitespace:
|
# strip / consolidate whitespace according to spec, see
|
||||||
outParts.append(makeWellFormed(child.nodeValue))
|
# https://www.w3.org/TR/SVG/text.html#WhiteSpace
|
||||||
|
if element.nodeName in ['text', 'tspan', 'tref', 'textPath', 'altGlyph']:
|
||||||
|
text_content = text_content.replace('\n', '')
|
||||||
|
text_content = text_content.replace('\t', ' ')
|
||||||
|
if child == element.firstChild:
|
||||||
|
text_content = text_content.lstrip()
|
||||||
|
elif child == element.lastChild:
|
||||||
|
text_content = text_content.rstrip()
|
||||||
|
while ' ' in text_content:
|
||||||
|
text_content = text_content.replace(' ', ' ')
|
||||||
else:
|
else:
|
||||||
outParts.append(makeWellFormed(child.nodeValue.strip()))
|
text_content = text_content.strip()
|
||||||
|
outParts.append(makeWellFormed(text_content))
|
||||||
# CDATA node
|
# CDATA node
|
||||||
elif child.nodeType == Node.CDATA_SECTION_NODE:
|
elif child.nodeType == Node.CDATA_SECTION_NODE:
|
||||||
outParts.extend(['<![CDATA[', child.nodeValue, ']]>'])
|
outParts.extend(['<![CDATA[', child.nodeValue, ']]>'])
|
||||||
# Comment node
|
# Comment node
|
||||||
elif child.nodeType == Node.COMMENT_NODE:
|
elif child.nodeType == Node.COMMENT_NODE:
|
||||||
outParts.extend(['<!--', child.nodeValue, '-->'])
|
outParts.extend([newline, indent_type * (indent_depth+1), '<!--', child.nodeValue, '-->'])
|
||||||
# TODO: entities, processing instructions, what else?
|
# TODO: entities, processing instructions, what else?
|
||||||
else: # ignore the rest
|
else: # ignore the rest
|
||||||
pass
|
pass
|
||||||
|
|
||||||
if onNewLine:
|
if onNewLine:
|
||||||
|
outParts.append(newline)
|
||||||
outParts.append(indent_type * indent_depth)
|
outParts.append(indent_type * indent_depth)
|
||||||
outParts.extend(['</', element.nodeName, '>'])
|
outParts.extend(['</', element.nodeName, '>'])
|
||||||
if indent_depth > 0:
|
|
||||||
outParts.append(newline)
|
|
||||||
|
|
||||||
return "".join(outParts)
|
return "".join(outParts)
|
||||||
|
|
||||||
|
|
@ -3632,13 +3643,6 @@ def scourString(in_string, options=None):
|
||||||
# out_string = doc.documentElement.toprettyxml(' ')
|
# out_string = doc.documentElement.toprettyxml(' ')
|
||||||
out_string = serializeXML(doc.documentElement, options) + '\n'
|
out_string = serializeXML(doc.documentElement, options) + '\n'
|
||||||
|
|
||||||
# now strip out empty lines
|
|
||||||
lines = []
|
|
||||||
# Get rid of empty lines
|
|
||||||
for line in out_string.splitlines(True):
|
|
||||||
if line.strip():
|
|
||||||
lines.append(line)
|
|
||||||
|
|
||||||
# return the string with its XML prolog and surrounding comments
|
# return the string with its XML prolog and surrounding comments
|
||||||
if options.strip_xml_prolog is False:
|
if options.strip_xml_prolog is False:
|
||||||
total_output = '<?xml version="1.0" encoding="UTF-8"'
|
total_output = '<?xml version="1.0" encoding="UTF-8"'
|
||||||
|
|
@ -3650,7 +3654,7 @@ def scourString(in_string, options=None):
|
||||||
|
|
||||||
for child in doc.childNodes:
|
for child in doc.childNodes:
|
||||||
if child.nodeType == Node.ELEMENT_NODE:
|
if child.nodeType == Node.ELEMENT_NODE:
|
||||||
total_output += "".join(lines)
|
total_output += out_string
|
||||||
else: # doctypes, entities, comments
|
else: # doctypes, entities, comments
|
||||||
total_output += child.toxml() + '\n'
|
total_output += child.toxml() + '\n'
|
||||||
|
|
||||||
|
|
|
||||||
103
testscour.py
103
testscour.py
|
|
@ -1744,34 +1744,83 @@ class DoNotRemoveGradientsWhenReferencedInStyleCss(unittest.TestCase):
|
||||||
'Gradients removed when referenced in CSS')
|
'Gradients removed when referenced in CSS')
|
||||||
|
|
||||||
|
|
||||||
class DoNotPrettyPrintWhenWhitespacePreserved(unittest.TestCase):
|
class Whitespace(unittest.TestCase):
|
||||||
|
|
||||||
def runTest(self):
|
def setUp(self):
|
||||||
with open('unittests/whitespace-important.svg') as f:
|
self.doc = scourXmlFile('unittests/whitespace.svg')
|
||||||
s = scourString(f.read()).splitlines()
|
|
||||||
c = '''<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<svg xmlns="http://www.w3.org/2000/svg">
|
|
||||||
<text xml:space="preserve">This is some <tspan font-style="italic">messed-up</tspan> markup</text>
|
|
||||||
</svg>
|
|
||||||
'''.splitlines()
|
|
||||||
for i in range(4):
|
|
||||||
self.assertEqual(s[i], c[i],
|
|
||||||
'Whitespace not preserved for line ' + str(i))
|
|
||||||
|
|
||||||
|
def test_basic(self):
|
||||||
|
text = self.doc.getElementById('txt_a1')
|
||||||
|
self.assertIn('text1 text2', text.toxml(),
|
||||||
|
'Multiple spaces not stripped from text element')
|
||||||
|
text = self.doc.getElementById('txt_a2')
|
||||||
|
self.assertIn('text1 text2', text.toxml(),
|
||||||
|
'Tab not replaced with space in text element')
|
||||||
|
text = self.doc.getElementById('txt_a3')
|
||||||
|
self.assertIn('text1 text2', text.toxml(),
|
||||||
|
'Multiple spaces not stripped from text element with xml:space="default"')
|
||||||
|
text = self.doc.getElementById('txt_a4')
|
||||||
|
self.assertIn('text1 text2', text.toxml(),
|
||||||
|
'Tab not replaced with space in text element with xml:space="default"')
|
||||||
|
text = self.doc.getElementById('txt_a5')
|
||||||
|
self.assertIn('text1 text2', text.toxml(),
|
||||||
|
'Multiple spaces not preserved in text element with xml:space="preserve"')
|
||||||
|
text = self.doc.getElementById('txt_a6')
|
||||||
|
self.assertIn('text1\ttext2', text.toxml(),
|
||||||
|
'Tab not preserved in text element with xml:space="preserve"')
|
||||||
|
|
||||||
class DoNotPrettyPrintWhenNestedWhitespacePreserved(unittest.TestCase):
|
def test_newlines(self):
|
||||||
|
text = self.doc.getElementById('txt_b1')
|
||||||
|
self.assertIn('text1 text2', text.toxml(),
|
||||||
|
'Newline not replaced with space in text element')
|
||||||
|
text = self.doc.getElementById('txt_b2')
|
||||||
|
self.assertIn('text1 text2', text.toxml(),
|
||||||
|
'Newline not replaced with space in text element with xml:space="default"')
|
||||||
|
text = self.doc.getElementById('txt_b3')
|
||||||
|
self.assertIn('text1\n text2', text.toxml(),
|
||||||
|
'Newline not preserved in text element with xml:space="preserve"')
|
||||||
|
|
||||||
def runTest(self):
|
def test_inheritance(self):
|
||||||
with open('unittests/whitespace-nested.svg') as f:
|
text = self.doc.getElementById('txt_c1')
|
||||||
s = scourString(f.read()).splitlines()
|
self.assertIn('text1 text2', text.toxml(),
|
||||||
c = '''<?xml version="1.0" encoding="UTF-8"?>
|
'<tspan> does not inherit xml:space="preserve" of parent text element')
|
||||||
<svg xmlns="http://www.w3.org/2000/svg">
|
text = self.doc.getElementById('txt_c2')
|
||||||
<text xml:space="preserve"><tspan font-style="italic">Use <tspan font-style="bold">bold</tspan> text</tspan></text>
|
self.assertIn('text1 text2', text.toxml(),
|
||||||
</svg>
|
'xml:space="default" of <tspan> does not overwrite xml:space="preserve" of parent text element')
|
||||||
'''.splitlines()
|
text = self.doc.getElementById('txt_c3')
|
||||||
for i in range(4):
|
self.assertIn('text1 text2', text.toxml(),
|
||||||
self.assertEqual(s[i], c[i],
|
'xml:space="preserve" of <tspan> does not overwrite xml:space="default" of parent text element')
|
||||||
'Whitespace not preserved when nested for line ' + str(i))
|
text = self.doc.getElementById('txt_c4')
|
||||||
|
self.assertIn('text1 text2', text.toxml(),
|
||||||
|
'<text> does not inherit xml:space="preserve" of parent group')
|
||||||
|
text = self.doc.getElementById('txt_c5')
|
||||||
|
self.assertIn('text1 text2', text.toxml(),
|
||||||
|
'xml:space="default" of text element does not overwrite xml:space="preserve" of parent group')
|
||||||
|
text = self.doc.getElementById('txt_c6')
|
||||||
|
self.assertIn('text1 text2', text.toxml(),
|
||||||
|
'xml:space="preserve" of text element does not overwrite xml:space="default" of parent group')
|
||||||
|
|
||||||
|
def test_important_whitespace(self):
|
||||||
|
text = self.doc.getElementById('txt_d1')
|
||||||
|
self.assertIn('text1 text2', text.toxml(),
|
||||||
|
'Newline with whitespace collapsed in text element')
|
||||||
|
text = self.doc.getElementById('txt_d2')
|
||||||
|
self.assertIn('text1 <tspan>tspan1</tspan> text2', text.toxml(),
|
||||||
|
'Whitespace stripped from the middle of a text element')
|
||||||
|
text = self.doc.getElementById('txt_d3')
|
||||||
|
self.assertIn('text1 <tspan>tspan1 <tspan>tspan2</tspan> text2</tspan>', text.toxml(),
|
||||||
|
'Whitespace stripped from the middle of a text element')
|
||||||
|
|
||||||
|
def test_incorrect_whitespace(self):
|
||||||
|
text = self.doc.getElementById('txt_e1')
|
||||||
|
self.assertIn('text1text2', text.toxml(),
|
||||||
|
'Whitespace introduced in text element with newline')
|
||||||
|
text = self.doc.getElementById('txt_e2')
|
||||||
|
self.assertIn('text1<tspan>tspan</tspan>text2', text.toxml(),
|
||||||
|
'Whitespace introduced in text element with <tspan>')
|
||||||
|
text = self.doc.getElementById('txt_e3')
|
||||||
|
self.assertIn('text1<tspan>tspan</tspan>text2', text.toxml(),
|
||||||
|
'Whitespace introduced in text element with <tspan> and newlines')
|
||||||
|
|
||||||
|
|
||||||
class GetAttrPrefixRight(unittest.TestCase):
|
class GetAttrPrefixRight(unittest.TestCase):
|
||||||
|
|
@ -1807,10 +1856,10 @@ class HandleEmptyStyleElement(unittest.TestCase):
|
||||||
class EnsureLineEndings(unittest.TestCase):
|
class EnsureLineEndings(unittest.TestCase):
|
||||||
|
|
||||||
def runTest(self):
|
def runTest(self):
|
||||||
with open('unittests/whitespace-important.svg') as f:
|
with open('unittests/newlines.svg') as f:
|
||||||
s = scourString(f.read())
|
s = scourString(f.read())
|
||||||
self.assertEqual(len(s.splitlines()), 4,
|
self.assertEqual(len(s.splitlines()), 24,
|
||||||
'Did not output line ending character correctly')
|
'Did handle reading or outputting line ending characters correctly')
|
||||||
|
|
||||||
|
|
||||||
class XmlEntities(unittest.TestCase):
|
class XmlEntities(unittest.TestCase):
|
||||||
|
|
|
||||||
50
unittests/newlines.svg
Normal file
50
unittests/newlines.svg
Normal file
|
|
@ -0,0 +1,50 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<svg xmlns="http://www.w3.org/2000/svg"
|
||||||
|
|
||||||
|
>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<!-- this file has pretty messed up formatting --> <rect width="100" height="100"/>
|
||||||
|
<rect width="100" height="100"/>
|
||||||
|
<rect width="100" height="100"/>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<rect width="100" height="100"/>
|
||||||
|
<rect width="100" height="100"/>
|
||||||
|
<rect width="100" height="100"/>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<!-- we have mixed newline
|
||||||
|
characters, carriage returns
and both of them
|
||||||
|
as well as tabs and spaces
|
||||||
|
-->
|
||||||
|
|
||||||
|
<rect width="100" height="100"/><rect width="100" height="100"/> <rect width="100" height="100"/>
|
||||||
|
|
||||||
|
<rect width="100" height="100"/>
<rect width="100" height="100"/> <rect width="100" height="100"/>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<rect width="100" height="100"/> <rect width="100" height="100"/>
|
||||||
|
|
||||||
|
|
||||||
|
</svg>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<!-- OMG, really? -->
|
||||||
|
After Width: | Height: | Size: 889 B |
|
|
@ -1,4 +0,0 @@
|
||||||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
|
||||||
<svg xmlns="http://www.w3.org/2000/svg">
|
|
||||||
<text xml:space="preserve">This is some <tspan font-style="italic">messed-up</tspan> markup</text>
|
|
||||||
</svg>
|
|
||||||
|
Before Width: | Height: | Size: 203 B |
|
|
@ -1,4 +0,0 @@
|
||||||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
|
||||||
<svg xmlns="http://www.w3.org/2000/svg">
|
|
||||||
<text xml:space="preserve"><tspan font-style="italic">Use <tspan font-style="bold">bold</tspan> text</tspan></text>
|
|
||||||
</svg>
|
|
||||||
|
Before Width: | Height: | Size: 220 B |
40
unittests/whitespace.svg
Normal file
40
unittests/whitespace.svg
Normal file
|
|
@ -0,0 +1,40 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<svg xmlns="http://www.w3.org/2000/svg">
|
||||||
|
<!-- basic tests -->
|
||||||
|
<text id="txt_a1">text1 text2</text> <!-- multiple spaces -->
|
||||||
|
<text id="txt_a2">text1 text2</text> <!-- tab -->
|
||||||
|
<text id="txt_a3" xml:space="default">text1 text2</text> <!-- multiple spaces -->
|
||||||
|
<text id="txt_a4" xml:space="default">text1 text2</text> <!-- tab -->
|
||||||
|
<text id="txt_a5" xml:space="preserve">text1 text2</text> <!-- multiple spaces -->
|
||||||
|
<text id="txt_a6" xml:space="preserve">text1 text2</text> <!-- tab -->
|
||||||
|
|
||||||
|
<!-- newlines -->
|
||||||
|
<text id="txt_b1">text1
|
||||||
|
text2</text>
|
||||||
|
<text id="txt_b2" xml:space="default">text1
|
||||||
|
text2</text>
|
||||||
|
<text id="txt_b3" xml:space="preserve">text1
|
||||||
|
text2</text>
|
||||||
|
|
||||||
|
<!-- inheritance -->
|
||||||
|
<text id="txt_c1" xml:space="preserve"><tspan>text1 text2</tspan></text>
|
||||||
|
<text id="txt_c2" xml:space="preserve"><tspan xml:space="default">text1 text2</tspan></text>
|
||||||
|
<text id="txt_c3" xml:space="default"><tspan xml:space="preserve">text1 text2</tspan></text>
|
||||||
|
<g xml:space="preserve"><text id="txt_c4">text1 text2</text></g>
|
||||||
|
<g xml:space="preserve"><text id="txt_c5" xml:space="default">text1 text2</text></g>
|
||||||
|
<g xml:space="default"><text id="txt_c6" xml:space="preserve">text1 text2</text></g>
|
||||||
|
|
||||||
|
<!-- important whitespace that must not be stripped -->
|
||||||
|
<text id="txt_d1">text1
|
||||||
|
text2</text>
|
||||||
|
<text id="txt_d2">text1 <tspan>tspan1</tspan> text2</text>
|
||||||
|
<text id="txt_d3">text1 <tspan>tspan1 <tspan>tspan2</tspan> text2</tspan></text>
|
||||||
|
|
||||||
|
<!-- whitespace must not be introduced -->
|
||||||
|
<text id="txt_e1">text1
|
||||||
|
text2</text>
|
||||||
|
<text id="txt_e2">text1<tspan>tspan</tspan>text2</text>
|
||||||
|
<text id="txt_e3">text1
|
||||||
|
<tspan>tspan</tspan>
|
||||||
|
text2</text>
|
||||||
|
</svg>
|
||||||
|
After Width: | Height: | Size: 1.7 KiB |
Loading…
Add table
Add a link
Reference in a new issue