From e8e104d8b802a4c799e68fd1fa2254a24ff98ddb Mon Sep 17 00:00:00 2001 From: Niels Thykier Date: Sun, 17 May 2020 20:20:59 +0000 Subject: [PATCH] Add optimization that prunes nested -tags An optimization that prunes nested -tags when they contain exactly one and nothing else (except whitespace nodes). This looks a bit like `removeNestedGroups` except it only touches tags without attributes (but can remove -tags completely from a tree, whereas this optimization always leaves at least one tag behind). Closes: #215 Signed-off-by: Niels Thykier --- scour/scour.py | 73 +++++++++++++++++++++++ test_scour.py | 98 +++++++++++++++++++++++++++++++ unittests/group-parent-merge.svg | 15 +++++ unittests/group-parent-merge2.svg | 15 +++++ unittests/group-parent-merge3.svg | 15 +++++ 5 files changed, 216 insertions(+) create mode 100644 unittests/group-parent-merge.svg create mode 100644 unittests/group-parent-merge2.svg create mode 100644 unittests/group-parent-merge3.svg diff --git a/scour/scour.py b/scour/scour.py index 8f0d532..204dc70 100644 --- a/scour/scour.py +++ b/scour/scour.py @@ -1143,6 +1143,78 @@ def moveCommonAttributesToParentGroup(elem, referencedElements): return num +def mergeSingleChildGroupIntoParent(elem): + """ + Merge ... into ... + + Optimize the pattern: + + .../ + + into + + ... + + This is only possible when: + + * The parent has exactly one -child node (ignoring whitespace) + * The child node is mergeable (per :func:`g_tag_is_unmergeable`) + + Note that this function overlaps to some extend with `removeNestedGroups`. + However, `removeNestedGroups` work entirely on empty and can completely + remove empty `` tags. This works on any tag containing tags + (regardless of their attributes) and as such this function can never + completely eliminate all -tags (but it does deal with attributes). + + This function acts recursively on the given element. + """ + num = 0 + + # Depth first - fix child notes first + for childNode in elem.childNodes: + if childNode.nodeType == Node.ELEMENT_NODE: + num += mergeSingleChildGroupIntoParent(childNode) + + # The elem node must be a tag for this to be relevant. + if elem.nodeType != Node.ELEMENT_NODE or elem.nodeName != 'g' or \ + elem.namespaceURI != NS['SVG']: + return num + + if g_tag_is_unmergeable(elem): + # elem itself is protected, then leave it alone. + return num + + g_node_idx = None + for idx, node in enumerate(elem.childNodes): + if node.nodeType == Node.TEXT_NODE and not node.nodeValue.strip(): + # whitespace-only node; ignore + continue + if node.nodeType != Node.ELEMENT_NODE or node.nodeName != 'g' or \ + node.namespaceURI != NS['SVG']: + # The elem node has something other than tag; then this + # optimization does not apply + return num + if g_tag_is_unmergeable(node) or g_node_idx is not None: + # The elem node has two or more tags or the node it has + # is unmergeable; then this optimization does not apply + return num + g_node_idx = idx + + # We got a optimization candidate + g_node = elem.childNodes[g_node_idx] + elem.childNodes.remove(g_node) + + elem.childNodes[g_node_idx:g_node_idx] = g_node.childNodes + g_node.childNodes = [] + + for a in g_node.attributes.values(): + elem.setAttribute(a.nodeName, a.nodeValue) + + num += 1 + + return num + + def mergeSiblingGroupsWithCommonAttributes(elem): """ Merge two or more sibling elements with the identical attributes. @@ -3754,6 +3826,7 @@ def scourString(in_string, options=None): # Collapse groups LAST, because we've created groups. If done before # moveAttributesToParentGroup, empty 's may remain. if options.group_collapse: + _num_elements_removed += mergeSingleChildGroupIntoParent(doc.documentElement) while removeNestedGroups(doc.documentElement) > 0: pass diff --git a/test_scour.py b/test_scour.py index 6c4c7ce..6adfc1f 100755 --- a/test_scour.py +++ b/test_scour.py @@ -2075,6 +2075,104 @@ class MustKeepGInSwitch2(unittest.TestCase): 'Erroneously removed a in a ') +class GroupParentMerge(unittest.TestCase): + + def test_parent_merge(self): + doc = scourXmlFile('unittests/group-parent-merge.svg', + parse_args([])) + g_tags = doc.getElementsByTagName('g') + attrs = { + 'font-family': 'Liberation Sans,Arial,Helvetica,sans-serif', + 'text-anchor': 'middle', + 'font-weight': '400', + 'font-size': '24', + } + self.assertEqual(g_tags.length, 1, + 'Inline single-child node tags into parent -tags') + + g_tag = g_tags[0] + for attr_name, attr_value in attrs.items(): + + self.assertEqual(g_tag.getAttribute(attr_name), attr_value, + 'Parent now has inherited attributes of obsolete -tags') + + def test_parent_merge_disabled(self): + doc = scourXmlFile('unittests/group-parent-merge.svg', + parse_args(['--disable-group-collapsing'])) + g_tags = doc.getElementsByTagName('g') + attrs = { + 'font-family': 'Liberation Sans,Arial,Helvetica,sans-serif', + 'text-anchor': '', + 'font-weight': '', + 'font-size': '', + } + self.assertEqual(g_tags.length, 4, + 'Inline single-child node tags into parent -tags') + + # There should be exactly one tag in the top of the document + # Note that the order returned by getElementsByTagName is not specified + # so we do not rely on its return value + g_tags = [g for g in doc.documentElement.childNodes if g.nodeName == 'g'] + self.assertEqual(len(g_tags), 1, + 'Optimization must not move the up to the root') + g_tag = g_tags[0] + for attr_name, attr_value in attrs.items(): + self.assertEqual(g_tag.getAttribute(attr_name), attr_value, + 'Parent now has inherited attributes of obsolete -tags') + + def test_parent_merge2(self): + doc = scourXmlFile('unittests/group-parent-merge2.svg', + parse_args([])) + attrs = { + 'font-family': 'Liberation Sans,Arial,Helvetica,sans-serif', + 'text-anchor': 'middle', + 'font-weight': '400', + 'font-size': '', # The top-level g-node cannot have gotten this. + } + # There is one inner that cannot be optimized, so there must be 2 + # tags in total + self.assertEqual(doc.getElementsByTagName('g').length, 2, + 'Inline single-child node tags into parent -tags') + + # There should be exactly one tag in the top of the document + # Note that the order returned by getElementsByTagName is not specified + # so we do not rely on its return value + g_tags = [g for g in doc.documentElement.childNodes if g.nodeName == 'g'] + self.assertEqual(len(g_tags), 1, + 'Optimization must not move the up to the root') + g_tag = g_tags[0] + for attr_name, attr_value in attrs.items(): + self.assertEqual(g_tag.getAttribute(attr_name), attr_value, + 'Parent now has inherited attributes of obsolete -tags') + + def test_parent_merge3(self): + doc = scourXmlFile('unittests/group-parent-merge3.svg', + parse_args(['--protect-ids-list=foo'])) + attrs = { + 'font-family': 'Liberation Sans,Arial,Helvetica,sans-serif', + 'text-anchor': 'middle', + 'font-weight': '400', + 'font-size': '', # The top-level g-node cannot have gotten this. + } + # There is one inner that cannot be optimized, so there must be 2 + # tags in total + self.assertEqual(doc.getElementsByTagName('g').length, 2, + 'Inline single-child node tags into parent -tags') + + self.assertIsNotNone(doc.getElementById('foo'), 'The inner was left untouched') + + # There should be exactly one tag in the top of the document + # Note that the order returned by getElementsByTagName is not specified + # so we do not rely on its return value + g_tags = [g for g in doc.documentElement.childNodes if g.nodeName == 'g'] + self.assertEqual(len(g_tags), 1, + 'Optimization must not move the up to the root') + g_tag = g_tags[0] + for attr_name, attr_value in attrs.items(): + self.assertEqual(g_tag.getAttribute(attr_name), attr_value, + 'Parent now has inherited attributes of obsolete -tags') + + class GroupSiblingMerge(unittest.TestCase): def test_sibling_merge(self): diff --git a/unittests/group-parent-merge.svg b/unittests/group-parent-merge.svg new file mode 100644 index 0000000..59fa1a2 --- /dev/null +++ b/unittests/group-parent-merge.svg @@ -0,0 +1,15 @@ + + + + + + + + Text1 + Text2 + Text3 + + + + + diff --git a/unittests/group-parent-merge2.svg b/unittests/group-parent-merge2.svg new file mode 100644 index 0000000..3349be0 --- /dev/null +++ b/unittests/group-parent-merge2.svg @@ -0,0 +1,15 @@ + + + + + + + Text1 + + Text2 + Text3 + + + + + diff --git a/unittests/group-parent-merge3.svg b/unittests/group-parent-merge3.svg new file mode 100644 index 0000000..eeb1a08 --- /dev/null +++ b/unittests/group-parent-merge3.svg @@ -0,0 +1,15 @@ + + + + + + + + Text1 + Text2 + Text3 + + + + +