initial source import (v0.26) and packaging

2013-10-22 16:31:42 +02:00 · 2013-10-22 16:31:42 +02:00 · f89b6bbf6a
commit f89b6bbf6a
parent f3a7507d82
11 changed files with 4134 additions and 37 deletions
--- a/scour/init.py
+++ b/scour/init.py
@ -0,0 +1,22 @@
+###############################################################################
+##
+##  Copyright (C) 2013 Tavendo GmbH
+##
+##  Licensed under the Apache License, Version 2.0 (the "License");
+##  you may not use this file except in compliance with the License.
+##  You may obtain a copy of the License at
+##
+##      http://www.apache.org/licenses/LICENSE-2.0
+##
+##  Unless required by applicable law or agreed to in writing, software
+##  distributed under the License is distributed on an "AS IS" BASIS,
+##  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+##  See the License for the specific language governing permissions and
+##  limitations under the License.
+##
+###############################################################################
+
+import scour
+import svg_regex
+import svg_transform
+import yocto_css
--- a/scour/scour.py
+++ b/scour/scour.py
--- a/scour/svg_regex.py
+++ b/scour/svg_regex.py
@ -0,0 +1,285 @@
+# This software is OSI Certified Open Source Software.
+# OSI Certified is a certification mark of the Open Source Initiative.
+# 
+# Copyright (c) 2006, Enthought, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+#  * Redistributions of source code must retain the above copyright notice, this
+#    list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright notice,
+#    this list of conditions and the following disclaimer in the documentation
+#    and/or other materials provided with the distribution.
+#  * Neither the name of Enthought, Inc. nor the names of its contributors may
+#    be used to endorse or promote products derived from this software without
+#    specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+""" Small hand-written recursive descent parser for SVG <path> data.
+
+
+In [1]: from svg_regex import svg_parser
+
+In [3]: svg_parser.parse('M 10,20 30,40V50 60 70')
+Out[3]: [('M', [(10.0, 20.0), (30.0, 40.0)]), ('V', [50.0, 60.0, 70.0])]
+
+In [4]: svg_parser.parse('M 0.6051.5')  # An edge case
+Out[4]: [('M', [(0.60509999999999997, 0.5)])]
+
+In [5]: svg_parser.parse('M 100-200')  # Another edge case
+Out[5]: [('M', [(100.0, -200.0)])]
+"""
+
+import re
+from decimal import *
+
+
+# Sentinel.
+class _EOF(object):
+    def __repr__(self):
+        return 'EOF'
+EOF = _EOF()
+
+lexicon = [
+    ('float', r'[-+]?(?:(?:[0-9]*\.[0-9]+)|(?:[0-9]+\.?))(?:[Ee][-+]?[0-9]+)?'),
+    ('int', r'[-+]?[0-9]+'),
+    ('command', r'[AaCcHhLlMmQqSsTtVvZz]'),
+]
+
+
+class Lexer(object):
+    """ Break SVG path data into tokens.
+
+    The SVG spec requires that tokens are greedy. This lexer relies on Python's
+    regexes defaulting to greediness.
+
+    This style of implementation was inspired by this article:
+
+        http://www.gooli.org/blog/a-simple-lexer-in-python/
+    """
+    def __init__(self, lexicon):
+        self.lexicon = lexicon
+        parts = []
+        for name, regex in lexicon:
+            parts.append('(?P<%s>%s)' % (name, regex))
+        self.regex_string = '|'.join(parts)
+        self.regex = re.compile(self.regex_string)
+
+    def lex(self, text):
+        """ Yield (token_type, str_data) tokens.
+
+        The last token will be (EOF, None) where EOF is the singleton object
+        defined in this module.
+        """
+        for match in self.regex.finditer(text):
+            for name, _ in self.lexicon:
+                m = match.group(name)
+                if m is not None:
+                    yield (name, m)
+                    break
+        yield (EOF, None)
+
+svg_lexer = Lexer(lexicon)
+
+
+class SVGPathParser(object):
+    """ Parse SVG <path> data into a list of commands.
+
+    Each distinct command will take the form of a tuple (command, data). The
+    `command` is just the character string that starts the command group in the
+    <path> data, so 'M' for absolute moveto, 'm' for relative moveto, 'Z' for
+    closepath, etc. The kind of data it carries with it depends on the command.
+    For 'Z' (closepath), it's just None. The others are lists of individual
+    argument groups. Multiple elements in these lists usually mean to repeat the
+    command. The notable exception is 'M' (moveto) where only the first element
+    is truly a moveto. The remainder are implicit linetos.
+
+    See the SVG documentation for the interpretation of the individual elements
+    for each command.
+
+    The main method is `parse(text)`. It can only consume actual strings, not
+    filelike objects or iterators.
+    """
+
+    def __init__(self, lexer=svg_lexer):
+        self.lexer = lexer
+
+        self.command_dispatch = {
+            'Z': self.rule_closepath,
+            'z': self.rule_closepath,
+            'M': self.rule_moveto_or_lineto,
+            'm': self.rule_moveto_or_lineto,
+            'L': self.rule_moveto_or_lineto,
+            'l': self.rule_moveto_or_lineto,
+            'H': self.rule_orthogonal_lineto,
+            'h': self.rule_orthogonal_lineto,
+            'V': self.rule_orthogonal_lineto,
+            'v': self.rule_orthogonal_lineto,
+            'C': self.rule_curveto3,
+            'c': self.rule_curveto3,
+            'S': self.rule_curveto2,
+            's': self.rule_curveto2,
+            'Q': self.rule_curveto2,
+            'q': self.rule_curveto2,
+            'T': self.rule_curveto1,
+            't': self.rule_curveto1,
+            'A': self.rule_elliptical_arc,
+            'a': self.rule_elliptical_arc,
+        }
+
+#        self.number_tokens = set(['int', 'float'])
+        self.number_tokens = list(['int', 'float'])
+
+    def parse(self, text):
+        """ Parse a string of SVG <path> data.
+        """
+        next = self.lexer.lex(text).next
+        token = next()
+        return self.rule_svg_path(next, token)
+
+    def rule_svg_path(self, next, token):
+        commands = []
+        while token[0] is not EOF:
+            if token[0] != 'command':
+                raise SyntaxError("expecting a command; got %r" % (token,))
+            rule = self.command_dispatch[token[1]]
+            command_group, token = rule(next, token)
+            commands.append(command_group)
+        return commands
+
+    def rule_closepath(self, next, token):
+        command = token[1]
+        token = next()
+        return (command, []), token
+
+    def rule_moveto_or_lineto(self, next, token):
+        command = token[1]
+        token = next()
+        coordinates = []
+        while token[0] in self.number_tokens:
+            pair, token = self.rule_coordinate_pair(next, token)
+            coordinates.extend(pair)
+        return (command, coordinates), token
+
+    def rule_orthogonal_lineto(self, next, token):
+        command = token[1]
+        token = next()
+        coordinates = []
+        while token[0] in self.number_tokens:
+            coord, token = self.rule_coordinate(next, token)
+            coordinates.append(coord)
+        return (command, coordinates), token
+
+    def rule_curveto3(self, next, token):
+        command = token[1]
+        token = next()
+        coordinates = []
+        while token[0] in self.number_tokens:
+            pair1, token = self.rule_coordinate_pair(next, token)
+            pair2, token = self.rule_coordinate_pair(next, token)
+            pair3, token = self.rule_coordinate_pair(next, token)
+            coordinates.extend(pair1)
+            coordinates.extend(pair2)
+            coordinates.extend(pair3)
+        return (command, coordinates), token
+
+    def rule_curveto2(self, next, token):
+        command = token[1]
+        token = next()
+        coordinates = []
+        while token[0] in self.number_tokens:
+            pair1, token = self.rule_coordinate_pair(next, token)
+            pair2, token = self.rule_coordinate_pair(next, token)
+            coordinates.extend(pair1)
+            coordinates.extend(pair2)
+        return (command, coordinates), token
+
+    def rule_curveto1(self, next, token):
+        command = token[1]
+        token = next()
+        coordinates = []
+        while token[0] in self.number_tokens:
+            pair1, token = self.rule_coordinate_pair(next, token)
+            coordinates.extend(pair1)
+        return (command, coordinates), token
+
+    def rule_elliptical_arc(self, next, token):
+        command = token[1]
+        token = next()
+        arguments = []
+        while token[0] in self.number_tokens:
+            rx = Decimal(token[1]) * 1
+            if rx < Decimal("0.0"):
+                raise SyntaxError("expecting a nonnegative number; got %r" % (token,))
+
+            token = next()
+            if token[0] not in self.number_tokens:
+                raise SyntaxError("expecting a number; got %r" % (token,))
+            ry = Decimal(token[1]) * 1
+            if ry < Decimal("0.0"):
+                raise SyntaxError("expecting a nonnegative number; got %r" % (token,))
+
+            token = next()
+            if token[0] not in self.number_tokens:
+                raise SyntaxError("expecting a number; got %r" % (token,))
+            axis_rotation = Decimal(token[1]) * 1
+
+            token = next()
+            if token[1] not in ('0', '1'):
+                raise SyntaxError("expecting a boolean flag; got %r" % (token,))
+            large_arc_flag = Decimal(token[1]) * 1
+
+            token = next()
+            if token[1] not in ('0', '1'):
+                raise SyntaxError("expecting a boolean flag; got %r" % (token,))
+            sweep_flag = Decimal(token[1]) * 1
+
+            token = next()
+            if token[0] not in self.number_tokens:
+                raise SyntaxError("expecting a number; got %r" % (token,))
+            x = Decimal(token[1]) * 1
+
+            token = next()
+            if token[0] not in self.number_tokens:
+                raise SyntaxError("expecting a number; got %r" % (token,))
+            y = Decimal(token[1]) * 1
+
+            token = next()
+            arguments.extend([rx, ry, axis_rotation, large_arc_flag, sweep_flag, x, y])
+
+        return (command, arguments), token
+
+    def rule_coordinate(self, next, token):
+        if token[0] not in self.number_tokens:
+            raise SyntaxError("expecting a number; got %r" % (token,))
+        x = getcontext().create_decimal(token[1])
+        token = next()
+        return x, token
+
+
+    def rule_coordinate_pair(self, next, token):
+        # Inline these since this rule is so common.
+        if token[0] not in self.number_tokens:
+            raise SyntaxError("expecting a number; got %r" % (token,))
+        x = getcontext().create_decimal(token[1])
+        token = next()
+        if token[0] not in self.number_tokens:
+            raise SyntaxError("expecting a number; got %r" % (token,))
+        y = getcontext().create_decimal(token[1])
+        token = next()
+        return [x, y], token
+
+
+svg_parser = SVGPathParser()
--- a/scour/svg_transform.py
+++ b/scour/svg_transform.py
@ -0,0 +1,233 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+#  SVG transformation list parser
+#
+#  Copyright 2010 Louis Simard
+#
+#  This file is part of Scour, http://www.codedread.com/scour/
+#
+#   Licensed under the Apache License, Version 2.0 (the "License");
+#   you may not use this file except in compliance with the License.
+#   You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#   Unless required by applicable law or agreed to in writing, software
+#   distributed under the License is distributed on an "AS IS" BASIS,
+#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#   See the License for the specific language governing permissions and
+#   limitations under the License.
+
+""" Small recursive descent parser for SVG transform="" data.
+
+
+In [1]: from svg_transform import svg_transform_parser
+
+In [3]: svg_transform_parser.parse('translate(50, 50)')
+Out[3]: [('translate', [50.0, 50.0])]
+
+In [4]: svg_transform_parser.parse('translate(50)')
+Out[4]: [('translate', [50.0])]
+
+In [5]: svg_transform_parser.parse('rotate(36 50,50)')
+Out[5]: [('rotate', [36.0, 50.0, 50.0])]
+
+In [6]: svg_transform_parser.parse('rotate(36)')
+Out[6]: [('rotate', [36.0])]
+
+In [7]: svg_transform_parser.parse('skewX(20)')
+Out[7]: [('skewX', [20.0])]
+
+In [8]: svg_transform_parser.parse('skewY(40)')
+Out[8]: [('skewX', [20.0])]
+
+In [9]: svg_transform_parser.parse('scale(2 .5)')
+Out[9]: [('scale', [2.0, 0.5])]
+
+In [10]: svg_transform_parser.parse('scale(.5)')
+Out[10]: [('scale', [0.5])]
+
+In [11]: svg_transform_parser.parse('matrix(1 0 50 0 1 80)')
+Out[11]: [('matrix', [1.0, 0.0, 50.0, 0.0, 1.0, 80.0])]
+
+Multiple transformations are supported:
+
+In [12]: svg_transform_parser.parse('translate(30 -30) rotate(36)')
+Out[12]: [('translate', [30.0, -30.0]), ('rotate', [36.0])]
+"""
+
+import re
+from decimal import *
+
+
+# Sentinel.
+class _EOF(object):
+    def __repr__(self):
+        return 'EOF'
+EOF = _EOF()
+
+lexicon = [
+    ('float', r'[-+]?(?:(?:[0-9]*\.[0-9]+)|(?:[0-9]+\.?))(?:[Ee][-+]?[0-9]+)?'),
+    ('int', r'[-+]?[0-9]+'),
+    ('command', r'(?:matrix|translate|scale|rotate|skew[XY])'),
+    ('coordstart', r'\('),
+    ('coordend', r'\)'),
+]
+
+
+class Lexer(object):
+    """ Break SVG path data into tokens.
+
+    The SVG spec requires that tokens are greedy. This lexer relies on Python's
+    regexes defaulting to greediness.
+
+    This style of implementation was inspired by this article:
+
+        http://www.gooli.org/blog/a-simple-lexer-in-python/
+    """
+    def __init__(self, lexicon):
+        self.lexicon = lexicon
+        parts = []
+        for name, regex in lexicon:
+            parts.append('(?P<%s>%s)' % (name, regex))
+        self.regex_string = '|'.join(parts)
+        self.regex = re.compile(self.regex_string)
+
+    def lex(self, text):
+        """ Yield (token_type, str_data) tokens.
+
+        The last token will be (EOF, None) where EOF is the singleton object
+        defined in this module.
+        """
+        for match in self.regex.finditer(text):
+            for name, _ in self.lexicon:
+                m = match.group(name)
+                if m is not None:
+                    yield (name, m)
+                    break
+        yield (EOF, None)
+
+svg_lexer = Lexer(lexicon)
+
+
+class SVGTransformationParser(object):
+    """ Parse SVG transform="" data into a list of commands.
+
+    Each distinct command will take the form of a tuple (type, data). The
+    `type` is the character string that defines the type of transformation in the
+    transform data, so either of "translate", "rotate", "scale", "matrix",
+    "skewX" and "skewY". Data is always a list of numbers contained within the
+    transformation's parentheses.
+
+    See the SVG documentation for the interpretation of the individual elements
+    for each transformation.
+
+    The main method is `parse(text)`. It can only consume actual strings, not
+    filelike objects or iterators.
+    """
+
+    def __init__(self, lexer=svg_lexer):
+        self.lexer = lexer
+
+        self.command_dispatch = {
+            'translate': self.rule_1or2numbers,
+            'scale': self.rule_1or2numbers,
+            'skewX': self.rule_1number,
+            'skewY': self.rule_1number,
+            'rotate': self.rule_1or3numbers,
+            'matrix': self.rule_6numbers,
+        }
+
+#        self.number_tokens = set(['int', 'float'])
+        self.number_tokens = list(['int', 'float'])
+
+    def parse(self, text):
+        """ Parse a string of SVG transform="" data.
+        """
+        next = self.lexer.lex(text).next
+        commands = []
+        token = next()
+        while token[0] is not EOF:
+        	command, token = self.rule_svg_transform(next, token)
+        	commands.append(command)
+        return commands
+
+    def rule_svg_transform(self, next, token):
+        if token[0] != 'command':
+            raise SyntaxError("expecting a transformation type; got %r" % (token,))
+        command = token[1]
+        rule = self.command_dispatch[command]
+        token = next()
+        if token[0] != 'coordstart':
+            raise SyntaxError("expecting '('; got %r" % (token,))
+        numbers, token = rule(next, token)
+        if token[0] != 'coordend':
+            raise SyntaxError("expecting ')'; got %r" % (token,))
+        token = next()
+        return (command, numbers), token
+
+    def rule_1or2numbers(self, next, token):
+        numbers = []
+        # 1st number is mandatory
+        token = next()
+        number, token = self.rule_number(next, token)
+        numbers.append(number)
+        # 2nd number is optional
+        number, token = self.rule_optional_number(next, token)
+        if number is not None:
+            numbers.append(number)
+            
+        return numbers, token
+
+    def rule_1number(self, next, token):
+        # this number is mandatory
+        token = next()
+        number, token = self.rule_number(next, token)
+        numbers = [number]
+        return numbers, token
+
+    def rule_1or3numbers(self, next, token):
+        numbers = []
+        # 1st number is mandatory
+        token = next()
+        number, token = self.rule_number(next, token)
+        numbers.append(number)
+        # 2nd number is optional
+        number, token = self.rule_optional_number(next, token)
+        if number is not None:
+            # but, if the 2nd number is provided, the 3rd is mandatory.
+            # we can't have just 2.
+            numbers.append(number)
+            
+            number, token = self.rule_number(next, token)
+            numbers.append(number)
+            
+        return numbers, token
+
+    def rule_6numbers(self, next, token):
+        numbers = []
+        token = next()
+        # all numbers are mandatory
+        for i in xrange(6):
+            number, token = self.rule_number(next, token)
+            numbers.append(number)
+        return numbers, token
+
+    def rule_number(self, next, token):
+        if token[0] not in self.number_tokens:
+            raise SyntaxError("expecting a number; got %r" % (token,))
+        x = Decimal(token[1]) * 1
+        token = next()
+        return x, token
+
+    def rule_optional_number(self, next, token):
+        if token[0] not in self.number_tokens:
+            return None, token
+        else:
+            x = Decimal(token[1]) * 1
+            token = next()
+            return x, token
+
+
+svg_transform_parser = SVGTransformationParser()
--- a/scour/yocto_css.py
+++ b/scour/yocto_css.py
@ -0,0 +1,72 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+#  yocto-css, an extremely bare minimum CSS parser
+#
+#  Copyright 2009 Jeff Schiller
+#
+#  This file is part of Scour, http://www.codedread.com/scour/
+#
+#   Licensed under the Apache License, Version 2.0 (the "License");
+#   you may not use this file except in compliance with the License.
+#   You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#   Unless required by applicable law or agreed to in writing, software
+#   distributed under the License is distributed on an "AS IS" BASIS,
+#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#   See the License for the specific language governing permissions and
+#   limitations under the License.
+
+# In order to resolve Bug 368716 (https://bugs.launchpad.net/scour/+bug/368716)
+# scour needed a bare-minimum CSS parser in order to determine if some elements
+# were still referenced by CSS properties.
+
+# I looked at css-py (a CSS parser built in Python), but that library 
+# is about 35k of Python and requires ply to be installed.  I just need 
+# something very basic to suit scour's needs.
+
+# yocto-css takes a string of CSS and tries to spit out a list of rules
+# A rule is an associative array (dictionary) with the following keys:
+# - selector: contains the string of the selector (see CSS grammar)
+# - properties: contains an associative array of CSS properties for this rule
+
+# TODO: need to build up some unit tests for yocto_css
+
+# stylesheet  : [ CDO | CDC | S | statement ]*;
+# statement   : ruleset | at-rule;
+# at-rule     : ATKEYWORD S* any* [ block | ';' S* ];
+# block       : '{' S* [ any | block | ATKEYWORD S* | ';' S* ]* '}' S*;
+# ruleset     : selector? '{' S* declaration? [ ';' S* declaration? ]* '}' S*;
+# selector    : any+;
+# declaration : property S* ':' S* value;
+# property    : IDENT;
+# value       : [ any | block | ATKEYWORD S* ]+;
+# any         : [ IDENT | NUMBER | PERCENTAGE | DIMENSION | STRING
+#               | DELIM | URI | HASH | UNICODE-RANGE | INCLUDES
+#               | DASHMATCH | FUNCTION S* any* ')' 
+#               | '(' S* any* ')' | '[' S* any* ']' ] S*;
+
+def parseCssString(str):
+	rules = []
+	# first, split on } to get the rule chunks
+	chunks = str.split('}')
+	for chunk in chunks:
+		# second, split on { to get the selector and the list of properties
+		bits = chunk.split('{')
+		if len(bits) != 2: continue
+		rule = {}
+		rule['selector'] = bits[0].strip()
+		# third, split on ; to get the property declarations
+		bites = bits[1].strip().split(';')
+		if len(bites) < 1: continue
+		props = {}
+		for bite in bites:
+			# fourth, split on : to get the property name and value
+			nibbles = bite.strip().split(':')
+			if len(nibbles) != 2: continue
+			props[nibbles[0].strip()] = nibbles[1].strip()
+		rule['properties'] = props
+		rules.append(rule)
+	return rules