From 36ee0932a4595f5a26e3db720672bd9b45dd47bf Mon Sep 17 00:00:00 2001
From: Niels Thykier <niels@thykier.net>
Date: Tue, 19 May 2020 18:04:48 +0000
Subject: [PATCH] removeDuplicateGradients: Compile at most one regex per
 master gradient

Regex compilation is by far the most expensive part of
removeDuplicateGradients.  This commit reduces the pain a bit by
trading "many small regexes" to "few larger regexes", which avoid some
of the compilation overhead.

Signed-off-by: Niels Thykier <niels@thykier.net>
---
 scour/scour.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/scour/scour.py b/scour/scour.py
index 1b9c150..fb8f9d1 100644
--- a/scour/scour.py
+++ b/scour/scour.py
@@ -1576,6 +1576,7 @@ def removeDuplicateGradients(doc):
     # get a collection of all elements that are referenced and their referencing elements
     referencedIDs = findReferencedElements(doc.documentElement)
     for master_id, duplicates_ids, duplicates in gradients_to_remove:
+        funcIRI = None
         for dup_id, dupGrad in zip(duplicates_ids, duplicates):
             # if the duplicate gradient no longer has a parent that means it was
             # already re-mapped to another master gradient
@@ -1585,7 +1586,10 @@ def removeDuplicateGradients(doc):
             # With --keep-unreferenced-defs, we can end up with
             # unreferenced gradients.  See GH#156.
             if dup_id in referencedIDs:
-                funcIRI = re.compile('url\\([\'"]?#' + dup_id + '[\'"]?\\)')  # matches url(#a), url('#a') and url("#a")
+                if funcIRI is None:
+                    # matches url(#<ANY_DUP_ID>), url('#<ANY_DUP_ID>') and url("#<ANY_DUP_ID>")
+                    dup_id_regex = "|".join(duplicates_ids)
+                    funcIRI = re.compile('url\\([\'"]?#(?:' + dup_id_regex + ')[\'"]?\\)')
                 for elem in referencedIDs[dup_id]:
                     # find out which attribute referenced the duplicate gradient
                     for attr in ['fill', 'stroke']: