Improve whitespace handling in text content elements
SVG specifies special logic for handling whitespace, see https://www.w3.org/TR/SVG/text.html#WhiteSpace by implementing it we can even shave off some unneeded bytes here and there (e.g. consecutive spaces). Unfortunately handling of newlines by renderers is inconsistent: Sometimes they are replaced by a single space, sometimes they are removed in the output. As we can not know the expected behavior work around this by keeping newlines inside text content elements intact. Fixes #160.
This commit is contained in:
parent
7d28f5e051
commit
e1c2699f07
1 changed files with 18 additions and 7 deletions
|
|
@ -3341,19 +3341,30 @@ def serializeXML(element, options, indent_depth=0, preserveWhitespace=False):
|
|||
for child in element.childNodes:
|
||||
# element node
|
||||
if child.nodeType == Node.ELEMENT_NODE:
|
||||
if preserveWhitespace:
|
||||
# do not indent inside text content elements as in SVG there's a difference between
|
||||
# "text1\ntext2" and
|
||||
# "text1\n text2"
|
||||
# see https://www.w3.org/TR/SVG/text.html#WhiteSpace
|
||||
if preserveWhitespace or element.nodeName in ['text', 'tspan', 'tref', 'textPath', 'altGlyph']:
|
||||
outParts.append(serializeXML(child, options, 0, preserveWhitespace))
|
||||
else:
|
||||
outParts.extend([newline, serializeXML(child, options, indent_depth + 1, preserveWhitespace)])
|
||||
onNewLine = True
|
||||
# text node
|
||||
elif child.nodeType == Node.TEXT_NODE:
|
||||
# trim it only in the case of not being a child of an element
|
||||
# where whitespace might be important
|
||||
if preserveWhitespace:
|
||||
outParts.append(makeWellFormed(child.nodeValue))
|
||||
text_content = child.nodeValue
|
||||
if not preserveWhitespace:
|
||||
# strip / consolidate whitespace according to spec, see
|
||||
# https://www.w3.org/TR/SVG/text.html#WhiteSpace
|
||||
# As a workaround for inconsistent handling of renderers keep newlines if they were in the original
|
||||
if element.nodeName in ['text', 'tspan', 'tref', 'textPath', 'altGlyph']:
|
||||
text_content = text_content.replace('\t', ' ')
|
||||
text_content = text_content.strip(' ')
|
||||
while ' ' in text_content:
|
||||
text_content = text_content.replace(' ', ' ')
|
||||
else:
|
||||
outParts.append(makeWellFormed(child.nodeValue.strip()))
|
||||
text_content = text_content.strip()
|
||||
outParts.append(makeWellFormed(text_content))
|
||||
# CDATA node
|
||||
elif child.nodeType == Node.CDATA_SECTION_NODE:
|
||||
outParts.extend(['<![CDATA[', child.nodeValue, ']]>'])
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue