removeDuplicateGradients: Maintain referenced_ids

This avoids calling `findReferencedElements` more than once per
removeDuplicateGradients.  This is good for performance as
`findReferencedElements` is one of the slowest functions in scour.

Signed-off-by: Niels Thykier <niels@thykier.net>
This commit is contained in:
Niels Thykier 2020-05-21 14:14:25 +00:00
parent 3d29029c72
commit ca2b32c0b3
No known key found for this signature in database
GPG key ID: A65B78DBE67C7AAC

View file

@ -1615,19 +1615,39 @@ def dedup_gradient(master_id, duplicates_ids, duplicates, referenced_ids):
# it is safe to remove this gradient from the document
dup_grad.parentNode.removeChild(dup_grad)
# If the gradients have an ID, we update referenced_ids to match the newly remapped IDs.
# This enable us to avoid calling findReferencedElements once per loop, which is helpful as it is
# one of the slowest functions in scour.
if master_id:
try:
master_references = referenced_ids[master_id]
except KeyError:
master_references = set()
for dup_id in duplicates_ids:
references = referenced_ids.pop(dup_id, None)
if references is None:
continue
master_references.update(references)
# Only necessary but needed if the master gradient did
# not have any references originally
referenced_ids[master_id] = master_references
def removeDuplicateGradients(doc):
prev_num = -1
num = 0
# get a collection of all elements that are referenced and their referencing elements
referenced_ids = findReferencedElements(doc.documentElement)
while prev_num != num:
prev_num = num
linear_gradients = doc.getElementsByTagName('linearGradient')
radial_gradients = doc.getElementsByTagName('radialGradient')
# get a collection of all elements that are referenced and their referencing elements
referenced_ids = findReferencedElements(doc.documentElement)
for master_id, duplicates_ids, duplicates in detect_duplicate_gradients(linear_gradients, radial_gradients):
dedup_gradient(master_id, duplicates_ids, duplicates, referenced_ids)
num += len(duplicates)