scour.py: Escape quote characters in attribute values, as necessary and minimally
Either double quotes or single quotes are escaped; the choice is made so as to minimize the length of the escaped string.
This commit is contained in:
parent
f14784b01f
commit
7e14cd352f
1 changed files with 20 additions and 19 deletions
|
|
@ -3178,23 +3178,25 @@ def remapNamespacePrefix(node, oldprefix, newprefix):
|
||||||
remapNamespacePrefix(child, oldprefix, newprefix)
|
remapNamespacePrefix(child, oldprefix, newprefix)
|
||||||
|
|
||||||
|
|
||||||
def makeWellFormed(str):
|
def makeWellFormed(str, quote=''):
|
||||||
# Don't escape quotation marks for now as they are fine in text nodes
|
|
||||||
# as well as in attributes if used reciprocally
|
|
||||||
# xml_ents = { '<':'<', '>':'>', '&':'&', "'":''', '"':'"'}
|
|
||||||
xml_ents = {'<': '<', '>': '>', '&': '&'}
|
xml_ents = {'<': '<', '>': '>', '&': '&'}
|
||||||
|
if quote:
|
||||||
# starr = []
|
xml_ents[quote] = ''' if (quote == "'") else """
|
||||||
# for c in str:
|
|
||||||
# if c in xml_ents:
|
|
||||||
# starr.append(xml_ents[c])
|
|
||||||
# else:
|
|
||||||
# starr.append(c)
|
|
||||||
|
|
||||||
# this list comprehension is short-form for the above for-loop:
|
|
||||||
return ''.join([xml_ents[c] if c in xml_ents else c for c in str])
|
return ''.join([xml_ents[c] if c in xml_ents else c for c in str])
|
||||||
|
|
||||||
|
|
||||||
|
def chooseQuoteCharacter(str):
|
||||||
|
quotCount = str.count('"')
|
||||||
|
aposCount = str.count("'")
|
||||||
|
if quotCount > aposCount:
|
||||||
|
quote = "'"
|
||||||
|
hasEmbeddedQuote = aposCount
|
||||||
|
else:
|
||||||
|
quote = '"'
|
||||||
|
hasEmbeddedQuote = quotCount
|
||||||
|
return (quote, hasEmbeddedQuote)
|
||||||
|
|
||||||
|
|
||||||
# hand-rolled serialization function that has the following benefits:
|
# hand-rolled serialization function that has the following benefits:
|
||||||
# - pretty printing
|
# - pretty printing
|
||||||
# - somewhat judicious use of whitespace
|
# - somewhat judicious use of whitespace
|
||||||
|
|
@ -3239,12 +3241,11 @@ def serializeXML(element, options, ind=0, preserveWhitespace=False):
|
||||||
attrIndices += [attrName2Index[name] for name in sorted(attrName2Index.keys())]
|
attrIndices += [attrName2Index[name] for name in sorted(attrName2Index.keys())]
|
||||||
for index in attrIndices:
|
for index in attrIndices:
|
||||||
attr = attrList.item(index)
|
attr = attrList.item(index)
|
||||||
# if the attribute value contains a double-quote, use single-quotes
|
|
||||||
quot = '"'
|
|
||||||
if attr.nodeValue.find('"') != -1:
|
|
||||||
quot = "'"
|
|
||||||
|
|
||||||
attrValue = makeWellFormed(attr.nodeValue)
|
attrValue = attr.nodeValue
|
||||||
|
(quote, hasEmbeddedQuote) = chooseQuoteCharacter(attrValue)
|
||||||
|
attrValue = makeWellFormed(attrValue, quote if hasEmbeddedQuote else '')
|
||||||
|
|
||||||
if attr.nodeName == 'style':
|
if attr.nodeName == 'style':
|
||||||
# sort declarations
|
# sort declarations
|
||||||
attrValue = ';'.join([p for p in sorted(attrValue.split(';'))])
|
attrValue = ';'.join([p for p in sorted(attrValue.split(';'))])
|
||||||
|
|
@ -3258,7 +3259,7 @@ def serializeXML(element, options, ind=0, preserveWhitespace=False):
|
||||||
outParts.append('xmlns:')
|
outParts.append('xmlns:')
|
||||||
elif attr.namespaceURI == 'http://www.w3.org/1999/xlink':
|
elif attr.namespaceURI == 'http://www.w3.org/1999/xlink':
|
||||||
outParts.append('xlink:')
|
outParts.append('xlink:')
|
||||||
outParts.extend([attr.localName, '=', quot, attrValue, quot])
|
outParts.extend([attr.localName, '=', quote, attrValue, quote])
|
||||||
|
|
||||||
if attr.nodeName == 'xml:space':
|
if attr.nodeName == 'xml:space':
|
||||||
if attrValue == 'preserve':
|
if attrValue == 'preserve':
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue