Fix bug with polygon point parsing. Fix bug 368716: tiny CSS parser to find referenced elements in style element

2009-08-31 00:11:34 -05:00 · 2009-08-31 00:11:34 -05:00 · 7fcc125286
commit 7fcc125286
parent d9e3e2436b
6 changed files with 159 additions and 35 deletions
--- a/package.sh
+++ b/package.sh
@ -1,6 +1,6 @@
 #!/bin/bash
 SCOURVER="0.20"
 cd ..
-zip scour/tarballs/scour-$SCOURVER.zip scour/scour.py scour/svg_regex.py scour/LICENSE scour/NOTICE scour/README.txt scour/release-notes.html
+zip scour/tarballs/scour-$SCOURVER.zip scour/scour.py scour/yocto_css.py scour/svg_regex.py scour/LICENSE scour/NOTICE scour/README.txt scour/release-notes.html
 cd scour
-zip tarballs/scour-inkscape-extension-$SCOURVER.zip scour.inx scour.inkscape.py scour.py svg_regex.py
+zip tarballs/scour-inkscape-extension-$SCOURVER.zip scour.inx scour.inkscape.py scour.py svg_regex.py scour/yocto_css.py
--- a/release-notes.html
+++ b/release-notes.html
@ -13,9 +13,11 @@
 	<header>
 		<h2><a href="#0.20">Version 0.20</a></h2>
 	</header>
-	<p>Aug ??th, 2009</p>
+	<p>Aug 31st, 2009</p>
 	<ul>
+		<li>Fix <a href="https://bugs.launchpad.net/scour/+bug/368716">Bug 368716</a> by implementing a really tiny CSS parser to find out if any style element have rules referencing gradients, filters, etc</li>
 		<li>Remove unused attributes from parent elements</li>
+		<li>Fix a bug with polygon/polyline point parsing if there was whitespace at the end</li>
 	</ul>
 </section>

--- a/scour.py
+++ b/scour.py
@ -35,13 +35,13 @@

 # Next Up:
 # + remove unused attributes in parent elements
+# + prevent elements from being stripped if they are referenced in a <style> element
+#   (for instance, filter, marker, pattern) - need a crude CSS parser
 # - add an option to remove ids if they match the Inkscape-style of IDs
 # - investigate point-reducing algorithms
 # - parse transform attribute
 # - if a <g> has only one element in it, collapse the <g> (ensure transform, etc are carried down)
 # - option to remove metadata
-# - prevent elements from being stripped if they are referenced in a <style> element
-#   (for instance, filter, marker, pattern) - need a crude CSS parser

 # necessary to get true division
 from __future__ import division
@ -56,6 +56,7 @@ import urllib
 from svg_regex import svg_parser
 import gzip
 import optparse
+from yocto_css import parseCssString

 # Python 2.3- did not have Decimal
 try:
@ -401,6 +402,9 @@ def findElementsWithId(node, elems=None):
 				findElementsWithId(child, elems)
 	return elems

+referencingProps = ['fill', 'stroke', 'filter', 'clip-path', 'mask',  'marker-start', 
+					'marker-end', 'marker-mid']
+
 def findReferencedElements(node, ids=None):
 	"""
 	Returns the number of times an ID is referenced as well as all elements
@ -409,13 +413,25 @@ def findReferencedElements(node, ids=None):
 	Currently looks at fill, stroke, clip-path, mask, marker, and
 	xlink:href attributes.
 	"""
+	global referencingProps
 	if ids is None:
 		ids = {}
 	# TODO: input argument ids is clunky here (see below how it is called)
 	# GZ: alternative to passing dict, use **kwargs
-	href = node.getAttributeNS(NS['XLINK'],'href')

-	# if xlink:href is set, then grab the id
+	# if this node is a style element, parse its text into CSS
+	if node.nodeName == 'style' and node.namespaceURI == NS['SVG']:
+		# node.firstChild will be either a CDATA or a Text node
+		cssRules = parseCssString(node.firstChild.nodeValue)
+		for rule in cssRules:
+			for propname in rule['properties']:
+				propval = rule['properties'][propname]
+				findReferencingProperty(node, propname, propval, ids)
+		
+		return ids
+	
+	# else if xlink:href is set, then grab the id
+	href = node.getAttributeNS(NS['XLINK'],'href')	
 	if href != '' and len(href) > 1 and href[0] == '#':
 		# we remove the hash mark from the beginning of the id
 		id = href[1:]
@ -427,8 +443,6 @@ def findReferencedElements(node, ids=None):

 	# now get all style properties and the fill, stroke, filter attributes
 	styles = node.getAttribute('style').split(';')
-	referencingProps = ['fill', 'stroke', 'filter', 'clip-path', 'mask',  'marker-start', 
-						'marker-end', 'marker-mid']
 	for attr in referencingProps:
 		styles.append(':'.join([attr, node.getAttribute(attr)]))
 			
@ -437,29 +451,7 @@ def findReferencedElements(node, ids=None):
 		if len(propval) == 2 :
 			prop = propval[0].strip()
 			val = propval[1].strip()
-			if prop in referencingProps and val != '' :
-				if len(val) >= 7 and val[0:5] == 'url(#' :
-					id = val[5:val.find(')')]
-					if ids.has_key(id) :
-						ids[id][0] += 1
-						ids[id][1].append(node)
-					else:
-						ids[id] = [1,[node]]
-				# if the url has a quote in it, we need to compensate
-				elif len(val) >= 8 :
-					id = None
-					# double-quote
-					if val[0:6] == 'url("#' :
-						id = val[6:val.find('")')]
-					# single-quote
-					elif val[0:6] == "url('#" :
-						id = val[6:val.find("')")]
-					if id != None:
-						if ids.has_key(id) :
-							ids[id][0] += 1
-							ids[id][1].append(node)
-						else:
-							ids[id] = [1,[node]]
+			findReferencingProperty(node, prop, val, ids)

 	if node.hasChildNodes() :
 		for child in node.childNodes:
@ -467,6 +459,32 @@ def findReferencedElements(node, ids=None):
 				findReferencedElements(child, ids)
 	return ids

+def findReferencingProperty(node, prop, val, ids):
+	global referencingProps
+	if prop in referencingProps and val != '' :
+		if len(val) >= 7 and val[0:5] == 'url(#' :
+			id = val[5:val.find(')')]
+			if ids.has_key(id) :
+				ids[id][0] += 1
+				ids[id][1].append(node)
+			else:
+				ids[id] = [1,[node]]
+		# if the url has a quote in it, we need to compensate
+		elif len(val) >= 8 :
+			id = None
+			# double-quote
+			if val[0:6] == 'url("#' :
+				id = val[6:val.find('")')]
+			# single-quote
+			elif val[0:6] == "url('#" :
+				id = val[6:val.find("')")]
+			if id != None:
+				if ids.has_key(id) :
+					ids[id][0] += 1
+					ids[id][1].append(node)
+				else:
+					ids[id] = [1,[node]]
+
 numIDsRemoved = 0
 numElemsRemoved = 0
 numAttrsRemoved = 0
@ -741,6 +759,7 @@ def removeUnusedAttributesOnParent(elem):
 	# unusedAttrs now has all the parent attributes that are unused
 	for name in unusedAttrs.keys():
 		elem.removeAttribute(name)
+		num += 1
 	
 	return num
 	
@ -1749,11 +1768,10 @@ def parseListOfPoints(s):
 	
 		Returns a list of containing an even number of coordinate strings
 	"""
-	
 	# (wsp)? comma-or-wsp-separated coordinate pairs (wsp)?
 	# coordinate-pair = coordinate comma-or-wsp coordinate
 	# coordinate = sign? integer
-	nums = re.split("\\s*\\,?\\s*", s)
+	nums = re.split("\\s*\\,?\\s*", s.strip())
 	i = 0
 	points = []
 	while i < len(nums):
--- a/testscour.py
+++ b/testscour.py
@ -915,6 +915,11 @@ class DoNotRemoveCommonAttributesOnParentIfAtLeastOneUsed(unittest.TestCase):
 		self.assertEquals( g.getAttribute('fill'), '#0F0',
 			'Used attributes on group were removed')

+class DoNotRemoveGradientsWhenReferencedInStyleCss(unittest.TestCase):
+	def runTest(self):
+		grads = scour.scourXmlFile('unittests/css-reference.svg').getElementsByTagNameNS(SVGNS, 'linearGradient')
+		self.assertEquals( grads.length, 2,
+			'Gradients removed when referenced in CSS')
 		
 # TODO; write a test for embedding rasters
 # TODO: write a test for --disable-embed-rasters
--- a/unittests/css-reference.svg
+++ b/unittests/css-reference.svg
@ -0,0 +1,27 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+
+<defs>
+	<linearGradient id="g1">
+		<stop offset="0" stop-color="red"/>
+		<stop offset="1" stop-color="blue"/>
+	</linearGradient>
+	<linearGradient id="g2">
+		<stop offset="0" stop-color="green"/>
+		<stop offset="1" stop-color="yellow"/>
+	</linearGradient>
+</defs>
+<style type="text/css"><![CDATA[
+	rect {
+		stroke: red;
+		stroke-width: 10;
+		fill:url(#g1)
+	}
+]]></style>
+
+<style type="text/css">.circ { fill: none; stroke: url("#g2"); stroke-width: 15 }</style>
+
+<rect height="300" width="300"/>
+<circle class="circ" cx="350" cy="350" r="40"/>
+
+</svg>
--- a/yocto_css.py
+++ b/yocto_css.py
@ -0,0 +1,72 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+#  yocto-css, an extremely bare minimum CSS parser
+#
+#  Copyright 2009 Jeff Schiller
+#
+#  This file is part of Scour, http://www.codedread.com/scour/
+#
+#   Licensed under the Apache License, Version 2.0 (the "License");
+#   you may not use this file except in compliance with the License.
+#   You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#   Unless required by applicable law or agreed to in writing, software
+#   distributed under the License is distributed on an "AS IS" BASIS,
+#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#   See the License for the specific language governing permissions and
+#   limitations under the License.
+
+# In order to resolve Bug 368716 (https://bugs.launchpad.net/scour/+bug/368716)
+# scour needed a bare-minimum CSS parser in order to determine if some elements
+# were still referenced by CSS properties.
+
+# I looked at css-py (a CSS parser built in Python), but that library 
+# is about 35k of Python and requires ply to be installed.  I just need 
+# something very basic to suit scour's needs.
+
+# yocto-css takes a string of CSS and tries to spit out a list of rules
+# A rule is an associative array (dictionary) with the following keys:
+# - selector: contains the string of the selector (see CSS grammar)
+# - properties: contains an associative array of CSS properties for this rule
+
+# TODO: need to build up some unit tests for yocto_css
+
+# stylesheet  : [ CDO | CDC | S | statement ]*;
+# statement   : ruleset | at-rule;
+# at-rule     : ATKEYWORD S* any* [ block | ';' S* ];
+# block       : '{' S* [ any | block | ATKEYWORD S* | ';' S* ]* '}' S*;
+# ruleset     : selector? '{' S* declaration? [ ';' S* declaration? ]* '}' S*;
+# selector    : any+;
+# declaration : property S* ':' S* value;
+# property    : IDENT;
+# value       : [ any | block | ATKEYWORD S* ]+;
+# any         : [ IDENT | NUMBER | PERCENTAGE | DIMENSION | STRING
+#               | DELIM | URI | HASH | UNICODE-RANGE | INCLUDES
+#               | DASHMATCH | FUNCTION S* any* ')' 
+#               | '(' S* any* ')' | '[' S* any* ']' ] S*;
+
+def parseCssString(str):
+	rules = []
+	# first, split on } to get the rule chunks
+	chunks = str.split('}')
+	for chunk in chunks:
+		# second, split on { to get the selector and the list of properties
+		bits = chunk.split('{')
+		if len(bits) != 2: continue
+		rule = {}
+		rule['selector'] = bits[0].strip()
+		# third, split on ; to get the property declarations
+		bites = bits[1].strip().split(';')
+		if len(bites) < 1: continue
+		props = {}
+		for bite in bites:
+			# fourth, split on : to get the property name and value
+			nibbles = bite.strip().split(':')
+			if len(nibbles) != 2: continue
+			props[nibbles[0].strip()] = nibbles[1].strip()
+		rule['properties'] = props
+		rules.append(rule)
+	return rules