From b007d75d1c0dbb74aa9208dc8b6f1bd3eb405f34 Mon Sep 17 00:00:00 2001 From: Michael Witten Date: Thu, 24 Aug 2017 21:40:44 +0000 Subject: [PATCH] scour.py: Escape quote characters in attribute values, as necessary and minimally Either double quotes or single quotes are escaped; the choice is made so as to minimize the length of the escaped string. --- scour/scour.py | 40 +++++++++++++++++++++------------------- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/scour/scour.py b/scour/scour.py index 99c2f47..15e0fa0 100644 --- a/scour/scour.py +++ b/scour/scour.py @@ -3178,20 +3178,10 @@ def remapNamespacePrefix(node, oldprefix, newprefix): remapNamespacePrefix(child, oldprefix, newprefix) -def makeWellFormed(str): - # Don't escape quotation marks for now as they are fine in text nodes - # as well as in attributes if used reciprocally - # xml_ents = { '<':'<', '>':'>', '&':'&', "'":''', '"':'"'} +def makeWellFormed(str, quote=''): xml_ents = {'<': '<', '>': '>', '&': '&'} - -# starr = [] -# for c in str: -# if c in xml_ents: -# starr.append(xml_ents[c]) -# else: -# starr.append(c) - - # this list comprehension is short-form for the above for-loop: + if quote: + xml_ents[quote] = ''' if (quote == "'") else """ return ''.join([xml_ents[c] if c in xml_ents else c for c in str]) @@ -3239,12 +3229,24 @@ def serializeXML(element, options, ind=0, preserveWhitespace=False): attrIndices += [attrName2Index[name] for name in sorted(attrName2Index.keys())] for index in attrIndices: attr = attrList.item(index) - # if the attribute value contains a double-quote, use single-quotes - quot = '"' - if attr.nodeValue.find('"') != -1: - quot = "'" - attrValue = makeWellFormed(attr.nodeValue) + attrValue = attr.nodeValue + + quot_count = 0 + apos_count = 0 + + for c in attrValue: + if c == '"': + quot_count += 1 + elif c == "'": + apos_count += 1 + + if quot_count > apos_count: + quote = "'" + else: + quote = '"' + + attrValue = makeWellFormed(attrValue, quote if (quot_count or apos_count) else '') if attr.nodeName == 'style': # sort declarations attrValue = ';'.join([p for p in sorted(attrValue.split(';'))]) @@ -3258,7 +3260,7 @@ def serializeXML(element, options, ind=0, preserveWhitespace=False): outParts.append('xmlns:') elif attr.namespaceURI == 'http://www.w3.org/1999/xlink': outParts.append('xlink:') - outParts.extend([attr.localName, '=', quot, attrValue, quot]) + outParts.extend([attr.localName, '=', quote, attrValue, quote]) if attr.nodeName == 'xml:space': if attrValue == 'preserve':