From 975a84f50ebe2031b8e0a2536aec5b53b177d571 Mon Sep 17 00:00:00 2001 From: Niels Thykier Date: Tue, 10 Apr 2018 05:53:21 +0000 Subject: [PATCH] Optimize removeDefaultAttributeValues MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Avoid looping over DefaultAttribute(s) that are not relevant for a given node. This skips a lot of calls to removeDefaultAttributeValue but more importantly, it avoids "node.nodeName not in attribute.elements" line in removeDefaultAttributeValue. As attribute.elements is a list, this becomes expensive for "larger lists" (or in this case when there are a lot of attributes). This seems to remove about 1½-2 minutes of runtime (out of ~8) on the 1_42_polytope_7-cube.svg test case provided in #184. Signed-off-by: Niels Thykier --- scour/scour.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/scour/scour.py b/scour/scour.py index 4c13e9f..8f53fc8 100644 --- a/scour/scour.py +++ b/scour/scour.py @@ -1910,9 +1910,6 @@ def removeDefaultAttributeValue(node, attribute): if not node.hasAttribute(attribute.name): return 0 - if (attribute.elements is not None) and (node.nodeName not in attribute.elements): - return 0 - # differentiate between text and numeric values if isinstance(attribute.value, str): if node.getAttribute(attribute.name) == attribute.value: @@ -1941,9 +1938,17 @@ def removeDefaultAttributeValues(node, options, tainted=set()): if node.nodeType != Node.ELEMENT_NODE: return 0 - # Conditionally remove all default attributes defined in 'default_attributes' (a list of 'DefaultAttribute's) - for attribute in default_attributes: + # Remove all default attributes. The remoteDefaultAttributeValue + # function deals with "if/when" we are allowed to remove the + # attribute as long as we supply it only with attributes that are + # applicable for this given node. That part is handled by using + # default_attributes_unrestricted and + # default_attributes_restricted_by_tag + for attribute in default_attributes_unrestricted: num += removeDefaultAttributeValue(node, attribute) + if node.nodeName in default_attributes_restricted_by_tag: + for attribute in default_attributes_restricted_by_tag[node.nodeName]: + num += removeDefaultAttributeValue(node, attribute) # Summarily get rid of default properties attributes = [node.attributes.item(i).nodeName for i in range(node.attributes.length)]