From 5be6b03d7cb48c4354e5f7f313b98292cb88d72d Mon Sep 17 00:00:00 2001 From: Niels Thykier Date: Tue, 19 May 2020 21:36:58 +0000 Subject: [PATCH] Serialization: Avoid creating a single-use dict in each call to make_well_formed Signed-off-by: Niels Thykier --- scour/scour.py | 40 +++++++++++++++++++++++----------------- test_scour.py | 21 +++++++++++---------- 2 files changed, 34 insertions(+), 27 deletions(-) diff --git a/scour/scour.py b/scour/scour.py index 150b8e4..15de2bd 100644 --- a/scour/scour.py +++ b/scour/scour.py @@ -74,6 +74,12 @@ VER = __version__ COPYRIGHT = u'Copyright Jeff Schiller, Louis Simard, 2010' +XML_ENTS_NO_QUOTES = {'<': '<', '>': '>', '&': '&'} +XML_ENTS_ESCAPE_APOS = XML_ENTS_NO_QUOTES.copy() +XML_ENTS_ESCAPE_APOS["'"] = ''' +XML_ENTS_ESCAPE_QUOT = XML_ENTS_NO_QUOTES.copy() +XML_ENTS_ESCAPE_QUOT['"'] = '"' + NS = {'SVG': 'http://www.w3.org/2000/svg', 'XLINK': 'http://www.w3.org/1999/xlink', 'SODIPODI': 'http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd', @@ -3404,23 +3410,23 @@ def remapNamespacePrefix(node, oldprefix, newprefix): remapNamespacePrefix(child, oldprefix, newprefix) -def makeWellFormed(str, quote=''): - xml_ents = {'<': '<', '>': '>', '&': '&'} - if quote: - xml_ents[quote] = ''' if (quote == "'") else """ - return ''.join(xml_ents[c] if c in xml_ents else c for c in str) +def make_well_formed(text, quote_dict=None): + if quote_dict is None: + quote_dict = XML_ENTS_NO_QUOTES + return ''.join(quote_dict[c] if c in quote_dict else c for c in text) -def chooseQuoteCharacter(str): - quotCount = str.count('"') - aposCount = str.count("'") - if quotCount > aposCount: - quote = "'" - hasEmbeddedQuote = aposCount - else: +def choose_quote_character(value): + quot_count = value.count('"') + if quot_count == 0 or quot_count <= value.count("'"): + # Fewest "-symbols (if there are 0, we pick this to avoid spending + # time counting the '-symbols as it won't matter) quote = '"' - hasEmbeddedQuote = quotCount - return (quote, hasEmbeddedQuote) + xml_ent = XML_ENTS_ESCAPE_QUOT + else: + quote = "'" + xml_ent = XML_ENTS_ESCAPE_APOS + return quote, xml_ent TEXT_CONTENT_ELEMENTS = ['text', 'tspan', 'tref', 'textPath', 'altGlyph', @@ -3472,8 +3478,8 @@ def serializeXML(element, options, indent_depth=0, preserveWhitespace=False): attr = attrList.item(index) attrValue = attr.nodeValue - (quote, hasEmbeddedQuote) = chooseQuoteCharacter(attrValue) - attrValue = makeWellFormed(attrValue, quote if hasEmbeddedQuote else '') + quote, xml_ent = choose_quote_character(attrValue) + attrValue = make_well_formed(attrValue, xml_ent) if attr.nodeName == 'style': # sort declarations @@ -3532,7 +3538,7 @@ def serializeXML(element, options, indent_depth=0, preserveWhitespace=False): text_content = text_content.replace(' ', ' ') else: text_content = text_content.strip() - outParts.append(makeWellFormed(text_content)) + outParts.append(make_well_formed(text_content)) # CDATA node elif child.nodeType == Node.CDATA_SECTION_NODE: outParts.extend(['']) diff --git a/test_scour.py b/test_scour.py index 6c4c7ce..e55b9db 100755 --- a/test_scour.py +++ b/test_scour.py @@ -30,7 +30,8 @@ import unittest import six from six.moves import map, range -from scour.scour import makeWellFormed, parse_args, scourString, scourXmlFile, start, run +from scour.scour import (make_well_formed, parse_args, scourString, scourXmlFile, start, run, + XML_ENTS_ESCAPE_APOS, XML_ENTS_ESCAPE_QUOT) from scour.svg_regex import svg_parser from scour import __version__ @@ -1893,26 +1894,26 @@ class EnsureLineEndings(unittest.TestCase): class XmlEntities(unittest.TestCase): def runTest(self): - self.assertEqual(makeWellFormed('<>&'), '<>&', + self.assertEqual(make_well_formed('<>&'), '<>&', 'Incorrectly translated unquoted XML entities') - self.assertEqual(makeWellFormed('<>&', "'"), '<>&', + self.assertEqual(make_well_formed('<>&', XML_ENTS_ESCAPE_APOS), '<>&', 'Incorrectly translated single-quoted XML entities') - self.assertEqual(makeWellFormed('<>&', '"'), '<>&', + self.assertEqual(make_well_formed('<>&', XML_ENTS_ESCAPE_QUOT), '<>&', 'Incorrectly translated double-quoted XML entities') - self.assertEqual(makeWellFormed("'"), "'", + self.assertEqual(make_well_formed("'"), "'", 'Incorrectly translated unquoted single quote') - self.assertEqual(makeWellFormed('"'), '"', + self.assertEqual(make_well_formed('"'), '"', 'Incorrectly translated unquoted double quote') - self.assertEqual(makeWellFormed("'", '"'), "'", + self.assertEqual(make_well_formed("'", XML_ENTS_ESCAPE_QUOT), "'", 'Incorrectly translated double-quoted single quote') - self.assertEqual(makeWellFormed('"', "'"), '"', + self.assertEqual(make_well_formed('"', XML_ENTS_ESCAPE_APOS), '"', 'Incorrectly translated single-quoted double quote') - self.assertEqual(makeWellFormed("'", "'"), ''', + self.assertEqual(make_well_formed("'", XML_ENTS_ESCAPE_APOS), ''', 'Incorrectly translated single-quoted single quote') - self.assertEqual(makeWellFormed('"', '"'), '"', + self.assertEqual(make_well_formed('"', XML_ENTS_ESCAPE_QUOT), '"', 'Incorrectly translated double-quoted double quote')