From 36ee0932a4595f5a26e3db720672bd9b45dd47bf Mon Sep 17 00:00:00 2001 From: Niels Thykier Date: Tue, 19 May 2020 18:04:48 +0000 Subject: [PATCH] removeDuplicateGradients: Compile at most one regex per master gradient Regex compilation is by far the most expensive part of removeDuplicateGradients. This commit reduces the pain a bit by trading "many small regexes" to "few larger regexes", which avoid some of the compilation overhead. Signed-off-by: Niels Thykier --- scour/scour.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/scour/scour.py b/scour/scour.py index 1b9c150..fb8f9d1 100644 --- a/scour/scour.py +++ b/scour/scour.py @@ -1576,6 +1576,7 @@ def removeDuplicateGradients(doc): # get a collection of all elements that are referenced and their referencing elements referencedIDs = findReferencedElements(doc.documentElement) for master_id, duplicates_ids, duplicates in gradients_to_remove: + funcIRI = None for dup_id, dupGrad in zip(duplicates_ids, duplicates): # if the duplicate gradient no longer has a parent that means it was # already re-mapped to another master gradient @@ -1585,7 +1586,10 @@ def removeDuplicateGradients(doc): # With --keep-unreferenced-defs, we can end up with # unreferenced gradients. See GH#156. if dup_id in referencedIDs: - funcIRI = re.compile('url\\([\'"]?#' + dup_id + '[\'"]?\\)') # matches url(#a), url('#a') and url("#a") + if funcIRI is None: + # matches url(#), url('#') and url("#") + dup_id_regex = "|".join(duplicates_ids) + funcIRI = re.compile('url\\([\'"]?#(?:' + dup_id_regex + ')[\'"]?\\)') for elem in referencedIDs[dup_id]: # find out which attribute referenced the duplicate gradient for attr in ['fill', 'stroke']: