From 9e4b9d6f5ed680d8c269c97641fae5ad68c5c9c2 Mon Sep 17 00:00:00 2001 From: Eduard Braun Date: Wed, 31 Aug 2016 00:04:19 +0200 Subject: [PATCH 1/4] Improve behaviour of numerial precision reduction * Previously all calculations were immediately done with the precision specified by the user, resulting in accumulation of numerical errors and in some cases very discernible abberations from the initial image (e.g. #80) * Now all calculations are done with default precision (the module "decimal" uses a whopping 28 signifigant digits initially!) and only at the very end coordinates are rounded to the desired precision. --- scour/scour.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/scour/scour.py b/scour/scour.py index 31cf1e7..17168ef 100644 --- a/scour/scour.py +++ b/scour/scour.py @@ -64,12 +64,7 @@ import optparse from scour.yocto_css import parseCssString import six from six.moves import range - -# Python 2.3- did not have Decimal -try: - from decimal import Decimal, InvalidOperation, getcontext -except ImportError: - sys.stderr.write("Scour requires at least Python 2.7 or Python 3.3+.") +from decimal import Context, Decimal, InvalidOperation, getcontext # select the most precise walltime measurement function available on the platform if sys.platform.startswith('win'): @@ -2474,9 +2469,13 @@ def scourUnitlessLength(length, needsRendererWorkaround=False): # length is of a This is faster than scourLength on elements guaranteed not to contain units. """ - # reduce to the proper number of digits if not isinstance(length, Decimal): length = getcontext().create_decimal(str(length)) + + # reduce numeric precision + # plus() corresponds to the unary prefix plus operator and applies context precision and rounding + length = scouringContext.plus(length) + # if the value is an integer, it may still have .0[...] attached to it for some reason # remove those if int(length) == length: @@ -3068,7 +3067,11 @@ def scourString(in_string, options=None): # sanitize options (take missing attributes from defaults, discard unknown attributes) options = sanitizeOptions(options) - getcontext().prec = options.digits + # create decimal context with reduced precision for scouring numbers + # calculations should be done in the default context (precision defaults to 28 significant digits) to minimize errors + global scouringContext + scouringContext = Context(prec = options.digits) + global numAttrsRemoved global numStylePropsFixed global numElemsRemoved From 29e005bf7b7212fb68b9058879c73a3e88bdb07f Mon Sep 17 00:00:00 2001 From: Eduard Braun Date: Wed, 31 Aug 2016 06:29:01 +0200 Subject: [PATCH 2/4] Improve handling of scientific vs. non-scientific notation * Negative exponents were not handled in a reasonable way (e.g. 0.000001 remained unchanged) * Trailing zeroes were not always properly removed --- scour/scour.py | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/scour/scour.py b/scour/scour.py index 17168ef..bd0ced5 100644 --- a/scour/scour.py +++ b/scour/scour.py @@ -2476,10 +2476,12 @@ def scourUnitlessLength(length, needsRendererWorkaround=False): # length is of a # plus() corresponds to the unary prefix plus operator and applies context precision and rounding length = scouringContext.plus(length) - # if the value is an integer, it may still have .0[...] attached to it for some reason - # remove those - if int(length) == length: - length = getcontext().create_decimal(int(length)) + # remove trailing zeroes as we do not care for significance + intLength = length.to_integral_value() + if length == intLength: + length = Decimal(intLength) + else: + length = length.normalize() # gather the non-scientific notation version of the coordinate. # this may actually be in scientific notation if the value is @@ -2491,14 +2493,22 @@ def scourUnitlessLength(length, needsRendererWorkaround=False): # length is of a elif len(nonsci) > 3 and nonsci[:3] == '-0.': nonsci = '-' + nonsci[2:] # remove the 0, leave the minus and dot - if len(nonsci) > 3: # avoid calling normalize unless strictly necessary - # and then the scientific notation version, with E+NUMBER replaced with - # just eNUMBER, since SVG accepts this. - sci = six.text_type(length.normalize()).lower().replace("e+", "e") + # Gather the scientific notation version of the coordinate which + # can only be shorter if the length of the number is at least 4 characters (e.g. 1000 = 1e3). + if len(nonsci) > 3: + # We have to implement this ourselves since both 'normalize()' and 'to_sci_string()' + # don't handle negative exponents in a reasonable way (e.g. 0.000001 remains unchanged) + exponent = length.adjusted() # how far do we have to shift the dot? + length = length.scaleb(-exponent).normalize() # shift the dot and remove potential trailing zeroes - if len(sci) < len(nonsci): return sci - else: return nonsci - else: return nonsci + sci = six.text_type(length) + 'e' + six.text_type(exponent) + + if len(sci) < len(nonsci): + return sci + else: + return nonsci + else: + return nonsci From 21e6c7491b158e01327ae2d368a0cfa7f23a1740 Mon Sep 17 00:00:00 2001 From: Eduard Braun Date: Wed, 31 Aug 2016 06:32:05 +0200 Subject: [PATCH 3/4] Fix a unittest that failed due to the increased accuracy of paths after 29e005bf7b7212fb68b9058879c73a3e88bdb07f --- testscour.py | 2 +- unittests/collapse-same-path-points.svg | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/testscour.py b/testscour.py index ba0ceba..ad02ec3 100755 --- a/testscour.py +++ b/testscour.py @@ -877,7 +877,7 @@ class RereferenceForRadialGradient(unittest.TestCase): class CollapseSamePathPoints(unittest.TestCase): def runTest(self): p = scour.scourXmlFile('unittests/collapse-same-path-points.svg').getElementsByTagNameNS(SVGNS, 'path')[0]; - self.assertEqual(p.getAttribute('d'), "m100 100l100.12 100.12c14.88 4.88-15.12-5.12 0 0z", + self.assertEqual(p.getAttribute('d'), "m100 100l100.12 100.12c14.877 4.8766-15.123-5.1234 0 0z", 'Did not collapse same path points') class ScourUnitlessLengths(unittest.TestCase): diff --git a/unittests/collapse-same-path-points.svg b/unittests/collapse-same-path-points.svg index bda0fff..b05f4d1 100644 --- a/unittests/collapse-same-path-points.svg +++ b/unittests/collapse-same-path-points.svg @@ -1,4 +1,4 @@ - + From 0c63344ea431a1f677b3eb66bacff1fc63689563 Mon Sep 17 00:00:00 2001 From: Eduard Braun Date: Wed, 31 Aug 2016 07:06:42 +0200 Subject: [PATCH 4/4] Add a check to prevent we make path data longer by "optimization" --- scour/scour.py | 8 ++++++-- testscour.py | 8 ++++++++ unittests/path-no-optimize.svg | 4 ++++ 3 files changed, 18 insertions(+), 2 deletions(-) create mode 100644 unittests/path-no-optimize.svg diff --git a/scour/scour.py b/scour/scour.py index bd0ced5..4af6384 100644 --- a/scour/scour.py +++ b/scour/scour.py @@ -2286,8 +2286,12 @@ def cleanPath(element, options) : path = newPath newPathStr = serializePath(path, options) - numBytesSavedInPathData += ( len(oldPathStr) - len(newPathStr) ) - element.setAttribute('d', newPathStr) + + # if for whatever reason we actually made the path longer don't use it + # TODO: maybe we could compare path lengths after each optimization step and use the shortest + if len(newPathStr) <= len(oldPathStr): + numBytesSavedInPathData += ( len(oldPathStr) - len(newPathStr) ) + element.setAttribute('d', newPathStr) diff --git a/testscour.py b/testscour.py index ad02ec3..f409225 100755 --- a/testscour.py +++ b/testscour.py @@ -685,6 +685,14 @@ class ChangeQuadToShorthandInPath(unittest.TestCase): self.assertEqual(path.getAttribute('d'), 'm10 100q50-50 100 0t100 0', 'Did not change quadratic curves into shorthand curve segments in path') +class DoNotOptimzePathIfLarger(unittest.TestCase): + def runTest(self): + p = scour.scourXmlFile('unittests/path-no-optimize.svg').getElementsByTagNameNS(SVGNS, 'path')[0]; + self.assertTrue(len(p.getAttribute('d')) <= len("M100,100 L200.12345,200.12345 C215,205 185,195 200.12,200.12 Z"), + 'Made path data longer during optimization') + # this was the scoured path data as of 2016-08-31 without the length check in cleanPath(): + # d="m100 100l100.12 100.12c14.877 4.8766-15.123-5.1234-0.00345-0.00345z" + class HandleEncodingUTF8(unittest.TestCase): def runTest(self): doc = scour.scourXmlFile('unittests/encoding-utf8.svg') diff --git a/unittests/path-no-optimize.svg b/unittests/path-no-optimize.svg new file mode 100644 index 0000000..bda0fff --- /dev/null +++ b/unittests/path-no-optimize.svg @@ -0,0 +1,4 @@ + + + +