diff --git a/fulltests/gimp.svg b/fulltests/gimp.svg new file mode 100644 index 0000000..435665f --- /dev/null +++ b/fulltests/gimp.svg @@ -0,0 +1,199 @@ + + + + version="1.0" + x="0.0000000" + y="0.0000000" + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + Wilber, the GIMP + 10/23/05 + + + worthawholebean + + + + + Inkscape + + + This is an SVG version of the original Wilber. + + + worthawholebean + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/fulltests/header.svg b/fulltests/header.svg deleted file mode 100644 index 67fd2d8..0000000 --- a/fulltests/header.svg +++ /dev/null @@ -1,3662 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - SVG coming of age - SVG coming of age - - SVG Open 2009 - SVG Open 2009 - SVG coming of age - Mountain View CA, USA, Oct. 2-4 2009 - Mountain View CA, USA, Oct. 2-4 2009 - Mountain View CA, USA, Oct. 2-4 2009 - - - - - - - - diff --git a/scour.py b/scour.py index 5a15dfe..c33017d 100755 --- a/scour.py +++ b/scour.py @@ -52,16 +52,6 @@ # - Reduce #RRGGBB format to #RGB format when possible # https://bugs.edge.launchpad.net/ubuntu/+source/human-icon-theme/+bug/361667/ -# Some notes to not forget: -# - removing empty nested groups also potentially loses some semantic information -# (i.e. the following button: -# -# -# -# -# will be flattened) - - # necessary to get true division from __future__ import division @@ -74,6 +64,8 @@ import math import base64 import os.path import urllib +import svg_regex +from svg_regex import svg_parser APP = 'scour' VER = '0.10' @@ -610,9 +602,41 @@ def repairStyle(node): return num -# does nothing at the moment but waste time +# This method will do the following: +# - parse the path data and reserialize def cleanPath(element) : - path = element.getAttribute('d') + path = svg_parser.parse(element.getAttribute('d')) + for (cmd,dataset) in path: + if not dataset == None: + for data in dataset: + pass + element.setAttribute('d', serializePath(path)) + +# - reserialize the path data with some cleanups: +# - removes scientific notation (exponents) +# - removes trailing zeros after the decimal +# - removes extraneous whitespace +# - adds commas between all values in a subcommand +def serializePath(pathObj): + pathStr = "" +# print pathObj + for (cmd,dataset) in pathObj: + pathStr += cmd + if not dataset == None: + for data in dataset: + try: + c = 0 + for coord in data: + # if coord can be an integer without loss of precision, go for it + if int(coord) == coord: pathStr += str(int(coord)) + else: pathStr += str(coord) + if c < len(data)-1: + pathStr += ',' + c += 1 + except TypeError: + pathStr += str(data) + pathStr += ' ' + return pathStr # converts raster references to inline images # NOTE: there are size limits to base64-encoding handling in browsers diff --git a/svg_regex.py b/svg_regex.py new file mode 100644 index 0000000..f4e5dc0 --- /dev/null +++ b/svg_regex.py @@ -0,0 +1,280 @@ +# This software is OSI Certified Open Source Software. +# OSI Certified is a certification mark of the Open Source Initiative. +# +# Copyright (c) 2006, Enthought, Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# * Neither the name of Enthought, Inc. nor the names of its contributors may +# be used to endorse or promote products derived from this software without +# specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR +# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +""" Small hand-written recursive descent parser for SVG data. + + +In [1]: from svg_regex import svg_parser + +In [3]: svg_parser.parse('M 10,20 30,40V50 60 70') +Out[3]: [('M', [(10.0, 20.0), (30.0, 40.0)]), ('V', [50.0, 60.0, 70.0])] + +In [4]: svg_parser.parse('M 0.6051.5') # An edge case +Out[4]: [('M', [(0.60509999999999997, 0.5)])] + +In [5]: svg_parser.parse('M 100-200') # Another edge case +Out[5]: [('M', [(100.0, -200.0)])] +""" + +import re + + +# Sentinel. +class _EOF(object): + def __repr__(self): + return 'EOF' +EOF = _EOF() + +lexicon = [ + ('float', r'[-\+]?(?:(?:[0-9]*\.[0-9]+)|(?:[0-9]+\.))(?:[Ee][-\+]?[0-9]+)?'), + ('int', r'[-\+]?[0-9]+'), + ('command', r'[AaCcHhLlMmQqSsTtVvZz]'), +] + + +class Lexer(object): + """ Break SVG path data into tokens. + + The SVG spec requires that tokens are greedy. This lexer relies on Python's + regexes defaulting to greediness. + + This style of implementation was inspired by this article: + + http://www.gooli.org/blog/a-simple-lexer-in-python/ + """ + def __init__(self, lexicon): + self.lexicon = lexicon + parts = [] + for name, regex in lexicon: + parts.append('(?P<%s>%s)' % (name, regex)) + self.regex_string = '|'.join(parts) + self.regex = re.compile(self.regex_string) + + def lex(self, text): + """ Yield (token_type, str_data) tokens. + + The last token will be (EOF, None) where EOF is the singleton object + defined in this module. + """ + for match in self.regex.finditer(text): + for name, _ in self.lexicon: + m = match.group(name) + if m is not None: + yield (name, m) + break + yield (EOF, None) + +svg_lexer = Lexer(lexicon) + + +class SVGPathParser(object): + """ Parse SVG data into a list of commands. + + Each distinct command will take the form of a tuple (command, data). The + `command` is just the character string that starts the command group in the + data, so 'M' for absolute moveto, 'm' for relative moveto, 'Z' for + closepath, etc. The kind of data it carries with it depends on the command. + For 'Z' (closepath), it's just None. The others are lists of individual + argument groups. Multiple elements in these lists usually mean to repeat the + command. The notable exception is 'M' (moveto) where only the first element + is truly a moveto. The remainder are implicit linetos. + + See the SVG documentation for the interpretation of the individual elements + for each command. + + The main method is `parse(text)`. It can only consume actual strings, not + filelike objects or iterators. + """ + + def __init__(self, lexer=svg_lexer): + self.lexer = lexer + + self.command_dispatch = { + 'Z': self.rule_closepath, + 'z': self.rule_closepath, + 'M': self.rule_moveto_or_lineto, + 'm': self.rule_moveto_or_lineto, + 'L': self.rule_moveto_or_lineto, + 'l': self.rule_moveto_or_lineto, + 'H': self.rule_orthogonal_lineto, + 'h': self.rule_orthogonal_lineto, + 'V': self.rule_orthogonal_lineto, + 'v': self.rule_orthogonal_lineto, + 'C': self.rule_curveto3, + 'c': self.rule_curveto3, + 'S': self.rule_curveto2, + 's': self.rule_curveto2, + 'Q': self.rule_curveto2, + 'q': self.rule_curveto2, + 'T': self.rule_curveto1, + 't': self.rule_curveto1, + 'A': self.rule_elliptical_arc, + 'a': self.rule_elliptical_arc, + } + + self.number_tokens = set(['int', 'float']) + + def parse(self, text): + """ Parse a string of SVG data. + """ + next = self.lexer.lex(text).next + token = next() + return self.rule_svg_path(next, token) + + def rule_svg_path(self, next, token): + commands = [] + while token[0] is not EOF: + if token[0] != 'command': + raise SyntaxError("expecting a command; got %r" % (token,)) + rule = self.command_dispatch[token[1]] + command_group, token = rule(next, token) + commands.append(command_group) + return commands + + def rule_closepath(self, next, token): + command = token[1] + token = next() + return (command, None), token + + def rule_moveto_or_lineto(self, next, token): + command = token[1] + token = next() + coordinates = [] + while token[0] in self.number_tokens: + pair, token = self.rule_coordinate_pair(next, token) + coordinates.append(pair) + return (command, coordinates), token + + def rule_orthogonal_lineto(self, next, token): + command = token[1] + token = next() + coordinates = [] + while token[0] in self.number_tokens: + coord, token = self.rule_coordinate(next, token) + coordinates.append(coord) + return (command, coordinates), token + + def rule_curveto3(self, next, token): + command = token[1] + token = next() + coordinates = [] + while token[0] in self.number_tokens: + pair1, token = self.rule_coordinate_pair(next, token) + pair2, token = self.rule_coordinate_pair(next, token) + pair3, token = self.rule_coordinate_pair(next, token) + coordinates.append((pair1, pair2, pair3)) + return (command, coordinates), token + + def rule_curveto2(self, next, token): + command = token[1] + token = next() + coordinates = [] + while token[0] in self.number_tokens: + pair1, token = self.rule_coordinate_pair(next, token) + pair2, token = self.rule_coordinate_pair(next, token) + coordinates.append((pair1, pair2)) + return (command, coordinates), token + + def rule_curveto1(self, next, token): + command = token[1] + token = next() + coordinates = [] + while token[0] in self.number_tokens: + pair1, token = self.rule_coordinate_pair(next, token) + coordinates.append(pair1) + return (command, coordinates), token + + def rule_elliptical_arc(self, next, token): + command = token[1] + token = next() + arguments = [] + while token[0] in self.number_tokens: + rx = float(token[1]) + if rx < 0.0: + raise SyntaxError("expecting a nonnegative number; got %r" % (token,)) + + token = next() + if token[0] not in self.number_tokens: + raise SyntaxError("expecting a number; got %r" % (token,)) + ry = float(token[1]) + if ry < 0.0: + raise SyntaxError("expecting a nonnegative number; got %r" % (token,)) + + token = next() + if token[0] not in self.number_tokens: + raise SyntaxError("expecting a number; got %r" % (token,)) + axis_rotation = float(token[1]) + + token = next() + if token[1] not in ('0', '1'): + raise SyntaxError("expecting a boolean flag; got %r" % (token,)) + large_arc_flag = bool(int(token[1])) + + token = next() + if token[1] not in ('0', '1'): + raise SyntaxError("expecting a boolean flag; got %r" % (token,)) + sweep_flag = bool(int(token[1])) + + token = next() + if token[0] not in self.number_tokens: + raise SyntaxError("expecting a number; got %r" % (token,)) + x = float(token[1]) + + token = next() + if token[0] not in self.number_tokens: + raise SyntaxError("expecting a number; got %r" % (token,)) + y = float(token[1]) + + token = next() + arguments.append(((rx,ry), axis_rotation, large_arc_flag, sweep_flag, (x,y))) + + return (command, arguments), token + + def rule_coordinate(self, next, token): + if token[0] not in self.number_tokens: + raise SyntaxError("expecting a number; got %r" % (token,)) + x = float(token[1]) + token = next() + return x, token + + + def rule_coordinate_pair(self, next, token): + # Inline these since this rule is so common. + if token[0] not in self.number_tokens: + raise SyntaxError("expecting a number; got %r" % (token,)) + x = float(token[1]) + token = next() + if token[0] not in self.number_tokens: + raise SyntaxError("expecting a number; got %r" % (token,)) + y = float(token[1]) + token = next() + return (x,y), token + + +svg_parser = SVGPathParser() diff --git a/unittests/path-simple-triangle.svg b/unittests/path-simple-triangle.svg new file mode 100644 index 0000000..1153720 --- /dev/null +++ b/unittests/path-simple-triangle.svg @@ -0,0 +1,7 @@ + + +