scour/scour.py

#!/usr/bin/env python
# -*- coding: utf-8 -*-

#  Scour
#
#  Copyright 2009 Jeff Schiller
#
#  This file is part of Scour, http://www.codedread.com/scour/
#
#   Licensed under the Apache License, Version 2.0 (the "License");
#   you may not use this file except in compliance with the License.
#   You may obtain a copy of the License at
#
#       http://www.apache.org/licenses/LICENSE-2.0
#
#   Unless required by applicable law or agreed to in writing, software
#   distributed under the License is distributed on an "AS IS" BASIS,
#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#   See the License for the specific language governing permissions and
#   limitations under the License.

# Notes:

# rubys' path-crunching ideas here: http://intertwingly.net/code/svgtidy/spec.rb
# (and implemented here: http://intertwingly.net/code/svgtidy/svgtidy.rb )

# Yet more ideas here: http://wiki.inkscape.org/wiki/index.php/Save_Cleaned_SVG
#
# * Process Transformations
#  * Collapse all group based transformations

# Even more ideas here: http://esw.w3.org/topic/SvgTidy
#  * analysis of path elements to see if rect can be used instead? (must also need to look
#    at rounded corners)
#  * removal of unused attributes in groups:
#    <g fill="blue" ...>
#      <rect fill="red" ... />
#      <rect fill="red" ... />
#      <rect fill="red" ... />
#    </g>
#    in this case, fill="blue" should be removed
#  * Move common attributes up to a parent group:
#    <g>
#      <rect fill="white"/>
#      <rect fill="white"/>
#      <rect fill="white"/>
#    </g>
#    becomes:
#    <g fill="white">
#      <rect />
#      <rect />
#      <rect />
#    </g>

# Next Up:
# + Remove some attributes that have default values
# + Convert c/q path segments into shorthand equivalents where possible:
# + custom serialization of SVG that prints out id/xml:id first (suggestion by Richard Hutch)
# + --indent option to specify how indent should work: space, tab, none
# - option to remove metadata
# - parse transform attribute
# - if a <g> has only one element in it, collapse the <g> (ensure transform, etc are carried down)
# - remove id if it matches the Inkscape-style of IDs (also provide a switch to disable this)
# - prevent elements from being stripped if they are referenced in a <style> element
#   (for instance, filter, marker, pattern) - need a crude CSS parser
# - Remove any unused glyphs from font elements?
# - add an option for svgweb compatible markup (no self-closing tags)?

# necessary to get true division
from __future__ import division

import os
import sys
import xml.dom.minidom
import re
import math
import base64
import urllib
from svg_regex import svg_parser
import gzip
import optparse

# Python 2.3- did not have Decimal
try:
	from decimal import *
except ImportError:
	from fixedpoint import *
	Decimal = FixedPoint

APP = 'scour'
VER = '0.18'
COPYRIGHT = 'Copyright Jeff Schiller, 2009'

NS = { 	'SVG': 		'http://www.w3.org/2000/svg',
		'XLINK': 	'http://www.w3.org/1999/xlink',
		'SODIPODI': 'http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd',
		'INKSCAPE': 'http://www.inkscape.org/namespaces/inkscape',
		'ADOBE_ILLUSTRATOR': 'http://ns.adobe.com/AdobeIllustrator/10.0/',
		'ADOBE_GRAPHS': 'http://ns.adobe.com/Graphs/1.0/',
		'ADOBE_SVG_VIEWER': 'http://ns.adobe.com/AdobeSVGViewerExtensions/3.0/',
		'ADOBE_VARIABLES': 'http://ns.adobe.com/Variables/1.0/',
		'ADOBE_SFW': 'http://ns.adobe.com/SaveForWeb/1.0/',
		'ADOBE_EXTENSIBILITY': 'http://ns.adobe.com/Extensibility/1.0/',
		'ADOBE_FLOWS': 'http://ns.adobe.com/Flows/1.0/',
		'ADOBE_IMAGE_REPLACEMENT': 'http://ns.adobe.com/ImageReplacement/1.0/',
		'ADOBE_CUSTOM': 'http://ns.adobe.com/GenericCustomNamespace/1.0/',
		'ADOBE_XPATH': 'http://ns.adobe.com/XPath/1.0/'
		}

unwanted_ns = [ NS['SODIPODI'], NS['INKSCAPE'], NS['ADOBE_ILLUSTRATOR'],
				NS['ADOBE_GRAPHS'], NS['ADOBE_SVG_VIEWER'], NS['ADOBE_VARIABLES'],
				NS['ADOBE_SFW'], NS['ADOBE_EXTENSIBILITY'], NS['ADOBE_FLOWS'],
				NS['ADOBE_IMAGE_REPLACEMENT'], NS['ADOBE_CUSTOM'], NS['ADOBE_XPATH'] ]

svgAttributes = [
				'clip-rule',
				'display',
				'fill',
				'fill-opacity',
				'fill-rule',
				'filter',
				'font-family',
				'font-size',
				'font-stretch',
				'font-style',
				'font-variant',
				'font-weight',
				'line-height',
				'marker',
				'opacity',
				'overflow',
				'stop-color',
				'stop-opacity',
				'stroke',
				'stroke-dashoffset',
				'stroke-linecap',
				'stroke-linejoin',
				'stroke-miterlimit',
				'stroke-opacity',
				'stroke-width',
				'visibility'
				]

colors = {
	'aliceblue': 'rgb(240, 248, 255)',
	'antiquewhite': 'rgb(250, 235, 215)',
	'aqua': 'rgb( 0, 255, 255)',
	'aquamarine': 'rgb(127, 255, 212)',
	'azure': 'rgb(240, 255, 255)',
	'beige': 'rgb(245, 245, 220)',
	'bisque': 'rgb(255, 228, 196)',
	'black': 'rgb( 0, 0, 0)',
	'blanchedalmond': 'rgb(255, 235, 205)',
	'blue': 'rgb( 0, 0, 255)',
	'blueviolet': 'rgb(138, 43, 226)',
	'brown': 'rgb(165, 42, 42)',
	'burlywood': 'rgb(222, 184, 135)',
	'cadetblue': 'rgb( 95, 158, 160)',
	'chartreuse': 'rgb(127, 255, 0)',
	'chocolate': 'rgb(210, 105, 30)',
	'coral': 'rgb(255, 127, 80)',
	'cornflowerblue': 'rgb(100, 149, 237)',
	'cornsilk': 'rgb(255, 248, 220)',
	'crimson': 'rgb(220, 20, 60)',
	'cyan': 'rgb( 0, 255, 255)',
	'darkblue': 'rgb( 0, 0, 139)',
	'darkcyan': 'rgb( 0, 139, 139)',
	'darkgoldenrod': 'rgb(184, 134, 11)',
	'darkgray': 'rgb(169, 169, 169)',
	'darkgreen': 'rgb( 0, 100, 0)',
	'darkgrey': 'rgb(169, 169, 169)',
	'darkkhaki': 'rgb(189, 183, 107)',
	'darkmagenta': 'rgb(139, 0, 139)',
	'darkolivegreen': 'rgb( 85, 107, 47)',
	'darkorange': 'rgb(255, 140, 0)',
	'darkorchid': 'rgb(153, 50, 204)',
	'darkred': 'rgb(139, 0, 0)',
	'darksalmon': 'rgb(233, 150, 122)',
	'darkseagreen': 'rgb(143, 188, 143)',
	'darkslateblue': 'rgb( 72, 61, 139)',
	'darkslategray': 'rgb( 47, 79, 79)',
	'darkslategrey': 'rgb( 47, 79, 79)',
	'darkturquoise': 'rgb( 0, 206, 209)',
	'darkviolet': 'rgb(148, 0, 211)',
	'deeppink': 'rgb(255, 20, 147)',
	'deepskyblue': 'rgb( 0, 191, 255)',
	'dimgray': 'rgb(105, 105, 105)',
	'dimgrey': 'rgb(105, 105, 105)',
	'dodgerblue': 'rgb( 30, 144, 255)',
	'firebrick': 'rgb(178, 34, 34)',
	'floralwhite': 'rgb(255, 250, 240)',
	'forestgreen': 'rgb( 34, 139, 34)',
	'fuchsia': 'rgb(255, 0, 255)',
	'gainsboro': 'rgb(220, 220, 220)',
	'ghostwhite': 'rgb(248, 248, 255)',
	'gold': 'rgb(255, 215, 0)',
	'goldenrod': 'rgb(218, 165, 32)',
	'gray': 'rgb(128, 128, 128)',
	'grey': 'rgb(128, 128, 128)',
	'green': 'rgb( 0, 128, 0)',
	'greenyellow': 'rgb(173, 255, 47)',
	'honeydew': 'rgb(240, 255, 240)',
	'hotpink': 'rgb(255, 105, 180)',
	'indianred': 'rgb(205, 92, 92)',
	'indigo': 'rgb( 75, 0, 130)',
	'ivory': 'rgb(255, 255, 240)',
	'khaki': 'rgb(240, 230, 140)',
	'lavender': 'rgb(230, 230, 250)',
	'lavenderblush': 'rgb(255, 240, 245)',
	'lawngreen': 'rgb(124, 252, 0)',
	'lemonchiffon': 'rgb(255, 250, 205)',
	'lightblue': 'rgb(173, 216, 230)',
	'lightcoral': 'rgb(240, 128, 128)',
	'lightcyan': 'rgb(224, 255, 255)',
	'lightgoldenrodyellow': 'rgb(250, 250, 210)',
	'lightgray': 'rgb(211, 211, 211)',
	'lightgreen': 'rgb(144, 238, 144)',
	'lightgrey': 'rgb(211, 211, 211)',
	'lightpink': 'rgb(255, 182, 193)',
	'lightsalmon': 'rgb(255, 160, 122)',
	'lightseagreen': 'rgb( 32, 178, 170)',
	'lightskyblue': 'rgb(135, 206, 250)',
	'lightslategray': 'rgb(119, 136, 153)',
	'lightslategrey': 'rgb(119, 136, 153)',
	'lightsteelblue': 'rgb(176, 196, 222)',
	'lightyellow': 'rgb(255, 255, 224)',
	'lime': 'rgb( 0, 255, 0)',
	'limegreen': 'rgb( 50, 205, 50)',
	'linen': 'rgb(250, 240, 230)',
	'magenta': 'rgb(255, 0, 255)',
	'maroon': 'rgb(128, 0, 0)',
	'mediumaquamarine': 'rgb(102, 205, 170)',
	'mediumblue': 'rgb( 0, 0, 205)',
	'mediumorchid': 'rgb(186, 85, 211)',
	'mediumpurple': 'rgb(147, 112, 219)',
	'mediumseagreen': 'rgb( 60, 179, 113)',
	'mediumslateblue': 'rgb(123, 104, 238)',
	'mediumspringgreen': 'rgb( 0, 250, 154)',
	'mediumturquoise': 'rgb( 72, 209, 204)',
	'mediumvioletred': 'rgb(199, 21, 133)',
	'midnightblue': 'rgb( 25, 25, 112)',
	'mintcream': 'rgb(245, 255, 250)',
	'mistyrose': 'rgb(255, 228, 225)',
	'moccasin': 'rgb(255, 228, 181)',
	'navajowhite': 'rgb(255, 222, 173)',
	'navy': 'rgb( 0, 0, 128)',
	'oldlace': 'rgb(253, 245, 230)',
	'olive': 'rgb(128, 128, 0)',
	'olivedrab': 'rgb(107, 142, 35)',
	'orange': 'rgb(255, 165, 0)',
	'orangered': 'rgb(255, 69, 0)',
	'orchid': 'rgb(218, 112, 214)',
	'palegoldenrod': 'rgb(238, 232, 170)',
	'palegreen': 'rgb(152, 251, 152)',
	'paleturquoise': 'rgb(175, 238, 238)',
	'palevioletred': 'rgb(219, 112, 147)',
	'papayawhip': 'rgb(255, 239, 213)',
	'peachpuff': 'rgb(255, 218, 185)',
	'peru': 'rgb(205, 133, 63)',
	'pink': 'rgb(255, 192, 203)',
	'plum': 'rgb(221, 160, 221)',
	'powderblue': 'rgb(176, 224, 230)',
	'purple': 'rgb(128, 0, 128)',
	'red': 'rgb(255, 0, 0)',
	'rosybrown': 'rgb(188, 143, 143)',
	'royalblue': 'rgb( 65, 105, 225)',
	'saddlebrown': 'rgb(139, 69, 19)',
	'salmon': 'rgb(250, 128, 114)',
	'sandybrown': 'rgb(244, 164, 96)',
	'seagreen': 'rgb( 46, 139, 87)',
	'seashell': 'rgb(255, 245, 238)',
	'sienna': 'rgb(160, 82, 45)',
	'silver': 'rgb(192, 192, 192)',
	'skyblue': 'rgb(135, 206, 235)',
	'slateblue': 'rgb(106, 90, 205)',
	'slategray': 'rgb(112, 128, 144)',
	'slategrey': 'rgb(112, 128, 144)',
	'snow': 'rgb(255, 250, 250)',
	'springgreen': 'rgb( 0, 255, 127)',
	'steelblue': 'rgb( 70, 130, 180)',
	'tan': 'rgb(210, 180, 140)',
	'teal': 'rgb( 0, 128, 128)',
	'thistle': 'rgb(216, 191, 216)',
	'tomato': 'rgb(255, 99, 71)',
	'turquoise': 'rgb( 64, 224, 208)',
	'violet': 'rgb(238, 130, 238)',
	'wheat': 'rgb(245, 222, 179)',
	'white': 'rgb(255, 255, 255)',
	'whitesmoke': 'rgb(245, 245, 245)',
	'yellow': 'rgb(255, 255, 0)',
	'yellowgreen': 'rgb(154, 205, 50)',
	}

def isSameSign(a,b): return (a <= 0 and b <= 0) or (a >= 0 and b >= 0)

coord = re.compile("\\-?\\d+\\.?\\d*")
scinumber = re.compile("[\\-\\+]?(\\d*\\.?)?\\d+[eE][\\-\\+]?\\d+")
number = re.compile("[\\-\\+]?(\\d*\\.?)?\\d+")
sciExponent = re.compile("[eE]([\\-\\+]?\\d+)")
unit = re.compile("(em|ex|px|pt|pc|cm|mm|in|\\%){1,1}$")

class Unit(object):
	INVALID = -1
	NONE = 0
	PCT = 1
	PX = 2
	PT = 3
	PC = 4
	EM = 5
	EX = 6
	CM = 7
	MM = 8
	IN = 9

#	@staticmethod
	def get(str):
		# GZ: shadowing builtins like 'str' is generally bad form
		# GZ: encoding stuff like this in a dict makes for nicer code
		if str == None or str == '': return Unit.NONE
		elif str == '%': return Unit.PCT
		elif str == 'px': return Unit.PX
		elif str == 'pt': return Unit.PT
		elif str == 'pc': return Unit.PC
		elif str == 'em': return Unit.EM
		elif str == 'ex': return Unit.EX
		elif str == 'cm': return Unit.CM
		elif str == 'mm': return Unit.MM
		elif str == 'in': return Unit.IN
		return Unit.INVALID

#	@staticmethod
	def str(u):
		if u == Unit.NONE: return ''
		elif u == Unit.PCT: return '%'
		elif u == Unit.PX: return 'px'
		elif u == Unit.PT: return 'pt'
		elif u == Unit.PC: return 'pc'
		elif u == Unit.EM: return 'em'
		elif u == Unit.EX: return 'ex'
		elif u == Unit.CM: return 'cm'
		elif u == Unit.MM: return 'mm'
		elif u == Unit.IN: return 'in'
		return 'INVALID'

	get = staticmethod(get)
	str = staticmethod(str)

class SVGLength(object):
	def __init__(self, str):
		try: # simple unitless and no scientific notation
			self.value = float(str)
			if int(self.value) == self.value:
				self.value = int(self.value)
			self.units = Unit.NONE
		except ValueError:
			# we know that the length string has an exponent, a unit, both or is invalid

			# parse out number, exponent and unit
			self.value = 0
			unitBegin = 0
			scinum = scinumber.match(str)
			if scinum != None:
				# this will always match, no need to check it
				numMatch = number.match(str)
				expMatch = sciExponent.search(str, numMatch.start(0))
				self.value = (float(numMatch.group(0)) *
					10 ** float(expMatch.group(1)))
				unitBegin = expMatch.end(1)
			else:
				# unit or invalid
				numMatch = number.match(str)
				if numMatch != None:
					self.value = float(numMatch.group(0))
					unitBegin = numMatch.end(0)

			if int(self.value) == self.value:
				self.value = int(self.value)

			if unitBegin != 0 :
				unitMatch = unit.search(str, unitBegin)
				if unitMatch != None :
					self.units = Unit.get(unitMatch.group(0))

			# invalid
			else:
				# TODO: this needs to set the default for the given attribute (how?)
				self.value = 0
				self.units = Unit.INVALID

# returns the length of a property
# TODO: eventually use the above class once it is complete
def getSVGLength(value):
	try:
		v = float(value)
	except ValueError:
		coordMatch = coord.match(value)
		if coordMatch != None:
			unitMatch = unit.search(value, coordMatch.start(0))
		v = value
	return v

def findElementById(node, id):
	if node == None or node.nodeType != 1: return None
	if node.getAttribute('id') == id: return node
	for child in node.childNodes :
		e = findElementById(child,id)
		if e != None: return e
	return None

def findElementsWithId(node, elems=None):
	"""
	Returns all elements with id attributes
	"""
	if elems is None:
		elems = {}
	id = node.getAttribute('id')
	if id != '' :
		elems[id] = node
	if node.hasChildNodes() :
		for child in node.childNodes:
			# from http://www.w3.org/TR/DOM-Level-2-Core/idl-definitions.html
			# we are only really interested in nodes of type Element (1)
			if child.nodeType == 1 :
				findElementsWithId(child, elems)
	return elems

def findReferencedElements(node, ids=None):
	"""
	Returns the number of times an ID is referenced as well as all elements
	that reference it.

	Currently looks at fill, stroke, clip-path, mask, marker, and
	xlink:href attributes.
	"""
	if ids is None:
		ids = {}
	# TODO: input argument ids is clunky here (see below how it is called)
	# GZ: alternative to passing dict, use **kwargs
	href = node.getAttributeNS(NS['XLINK'],'href')

	# if xlink:href is set, then grab the id
	if href != '' and len(href) > 1 and href[0] == '#':
		# we remove the hash mark from the beginning of the id
		id = href[1:]
		if id in ids:
			ids[id][0] += 1
			ids[id][1].append(node)
		else:
			ids[id] = [1,[node]]

	# now get all style properties and the fill, stroke, filter attributes
	styles = node.getAttribute('style').split(';')
	referencingProps = ['fill', 'stroke', 'filter', 'clip-path', 'mask',  'marker-start',
						'marker-end', 'marker-mid']
	for attr in referencingProps:
		styles.append(':'.join([attr, node.getAttribute(attr)]))

	for style in styles:
		propval = style.split(':')
		if len(propval) == 2 :
			prop = propval[0].strip()
			val = propval[1].strip()
			if prop in referencingProps and val != '' :
				if len(val) >= 7 and val[0:5] == 'url(#' :
					id = val[5:val.find(')')]
					if ids.has_key(id) :
						ids[id][0] += 1
						ids[id][1].append(node)
					else:
						ids[id] = [1,[node]]
				# if the url has a quote in it, we need to compensate
				elif len(val) >= 8 :
					id = None
					# double-quote
					if val[0:6] == 'url("#' :
						id = val[6:val.find('")')]
					# single-quote
					elif val[0:6] == "url('#" :
						id = val[6:val.find("')")]
					if id != None:
						if ids.has_key(id) :
							ids[id][0] += 1
							ids[id][1].append(node)
						else:
							ids[id] = [1,[node]]

	if node.hasChildNodes() :
		for child in node.childNodes:
			if child.nodeType == 1 :
				findReferencedElements(child, ids)
	return ids

numIDsRemoved = 0
numElemsRemoved = 0
numAttrsRemoved = 0
numRastersEmbedded = 0
numPathSegmentsReduced = 0
numCurvesStraightened = 0
numBytesSavedInPathData = 0
numBytesSavedInColors = 0
numPointsRemovedFromPolygon = 0

def removeUnusedDefs(doc, defElem, elemsToRemove=None):
	if elemsToRemove is None:
		elemsToRemove = []

	identifiedElements = findElementsWithId(doc.documentElement)
	referencedIDs = findReferencedElements(doc.documentElement)

	keepTags = ['font', 'style', 'metadata', 'script', 'title', 'desc']
	for elem in defElem.childNodes:
		if elem.nodeName == 'g' and elem.namespaceURI == NS['SVG']:
			elemsToRemove = removeUnusedDefs(doc, elem, elemsToRemove)
			continue
		if elem.nodeType == 1 and (elem.getAttribute('id') == '' or \
				(not elem.getAttribute('id') in referencedIDs)) and \
				not elem.nodeName in keepTags:
			elemsToRemove.append(elem)
	return elemsToRemove

def removeUnreferencedElements(doc):
	"""
	Removes all unreferenced elements except for <svg>, <font>, <metadata>, <title>, and <desc>.
	Also vacuums the defs of any non-referenced renderable elements.

	Returns the number of unreferenced elements removed from the document.
	"""
	global numElemsRemoved
	num = 0
	removeTags = ['linearGradient', 'radialGradient', 'pattern']

	identifiedElements = findElementsWithId(doc.documentElement)
	referencedIDs = findReferencedElements(doc.documentElement)

	for id in identifiedElements:
		if not id in referencedIDs:
			goner = findElementById(doc.documentElement, id)
			if goner != None and goner.parentNode != None and goner.nodeName in removeTags:
				goner.parentNode.removeChild(goner)
				num += 1
				numElemsRemoved += 1

	# TODO: should also go through defs and vacuum it
	num = 0
	defs = doc.documentElement.getElementsByTagNameNS(NS['SVG'], 'defs')
	for aDef in defs:
		elemsToRemove = removeUnusedDefs(doc, aDef)
		for elem in elemsToRemove:
			elem.parentNode.removeChild(elem)
			numElemsRemoved += 1
			num += 1
	return num

def removeUnreferencedIDs(referencedIDs, identifiedElements):
	"""
	Removes the unreferenced ID attributes.

	Returns the number of ID attributes removed
	"""
	global numIDsRemoved
	keepTags = ['font']
	num = 0;
	for id in identifiedElements.keys():
		node = identifiedElements[id]
		if referencedIDs.has_key(id) == False and not node.nodeName in keepTags:
			node.removeAttribute('id')
			numIDsRemoved += 1
			num += 1
	return num

def removeNamespacedAttributes(node, namespaces):
	global numAttrsRemoved
	num = 0
	if node.nodeType == 1 :
		# remove all namespace'd attributes from this element
		attrList = node.attributes
		attrsToRemove = []
		for attrNum in range(attrList.length):
			attr = attrList.item(attrNum)
			if attr != None and attr.namespaceURI in namespaces:
				attrsToRemove.append(attr.nodeName)
		for attrName in attrsToRemove :
			num += 1
			numAttrsRemoved += 1
			node.removeAttribute(attrName)

		# now recurse for children
		for child in node.childNodes:
			num += removeNamespacedAttributes(child, namespaces)
	return num

def removeNamespacedElements(node, namespaces):
	global numElemsRemoved
	num = 0
	if node.nodeType == 1 :
		# remove all namespace'd child nodes from this element
		childList = node.childNodes
		childrenToRemove = []
		for child in childList:
			if child != None and child.namespaceURI in namespaces:
				childrenToRemove.append(child)
		for child in childrenToRemove :
			num += 1
			numElemsRemoved += 1
			node.removeChild(child)

		# now recurse for children
		for child in node.childNodes:
			num += removeNamespacedElements(child, namespaces)
	return num

# this walks further and further down the tree, removing groups
# which do not have any attributes or a title/desc child and
# promoting their children up one level
def removeNestedGroups(node):
	global numElemsRemoved
	num = 0

	groupsToRemove = []
	for child in node.childNodes:
		if child.nodeName == 'g' and child.namespaceURI == NS['SVG'] and len(child.attributes) == 0:
			# only collapse group if it does not have a title or desc as a direct descendant
			for grandchild in child.childNodes:
				if grandchild.nodeType == 1 and grandchild.namespaceURI == NS['SVG'] and \
						grandchild.nodeName in ['title','desc']:
					break
			else:
				groupsToRemove.append(child)

	for g in groupsToRemove:
		while g.childNodes.length > 0:
			g.parentNode.insertBefore(g.firstChild, g)
		g.parentNode.removeChild(g)
		numElemsRemoved += 1
		num += 1

	# now recurse for children
	for child in node.childNodes:
		if child.nodeType == 1:
			num += removeNestedGroups(child)
	return num

def removeDuplicateGradientStops(doc):
	global numElemsRemoved
	num = 0

	for gradType in ['linearGradient', 'radialGradient']:
		for grad in doc.getElementsByTagNameNS(NS['SVG'], gradType):
			stops = {}
			stopsToRemove = []
			for stop in grad.getElementsByTagNameNS(NS['SVG'], 'stop'):
				# convert percentages into a floating point number
				offsetU = SVGLength(stop.getAttribute('offset'))
				if offsetU.units == Unit.PCT:
					offset = offsetU.value / 100.0
				elif offsetU.units == Unit.NONE:
					offset = offsetU.value
				else:
					offset = 0
				# set the stop offset value to the integer or floating point equivalent
				if int(offset) == offset: stop.setAttribute('offset', str(int(offset)))
				else: stop.setAttribute('offset', str(offset))

				color = stop.getAttribute('stop-color')
				opacity = stop.getAttribute('stop-opacity')
				if stops.has_key(offset) :
					oldStop = stops[offset]
					if oldStop[0] == color and oldStop[1] == opacity:
						stopsToRemove.append(stop)
				stops[offset] = [color, opacity]

			for stop in stopsToRemove:
				stop.parentNode.removeChild(stop)
				num += 1
				numElemsRemoved += 1

	# linear gradients
	return num

def collapseSinglyReferencedGradients(doc):
	global numElemsRemoved
	num = 0

	# make sure to reset the ref'ed ids for when we are running this in testscour
	for rid,nodeCount in findReferencedElements(doc.documentElement).iteritems():
		count = nodeCount[0]
		nodes = nodeCount[1]
		if count == 1:
			elem = findElementById(doc.documentElement,rid)
			if elem != None and elem.nodeType == 1 and elem.nodeName in ['linearGradient', 'radialGradient'] \
					and elem.namespaceURI == NS['SVG']:
				# found a gradient that is referenced by only 1 other element
				refElem = nodes[0]
				if refElem.nodeType == 1 and refElem.nodeName in ['linearGradient', 'radialGradient'] \
						and refElem.namespaceURI == NS['SVG']:
					# elem is a gradient referenced by only one other gradient (refElem)

					# add the stops to the referencing gradient (this removes them from elem)
					if len(refElem.getElementsByTagNameNS(NS['SVG'], 'stop')) == 0:
						stopsToAdd = elem.getElementsByTagNameNS(NS['SVG'], 'stop')
						for stop in stopsToAdd:
							refElem.appendChild(stop)

					# adopt the gradientUnits, spreadMethod,  gradientTransform attributes if
					# they are unspecified on refElem
					for attr in ['gradientUnits','spreadMethod','gradientTransform']:
						if refElem.getAttribute(attr) == '' and not elem.getAttribute(attr) == '':
							refElem.setAttributeNS(None, attr, elem.getAttribute(attr))

					# if both are radialGradients, adopt elem's fx,fy,cx,cy,r attributes if
					# they are unspecified on refElem
					if elem.nodeName == 'radialGradient' and refElem.nodeName == 'radialGradient':
						for attr in ['fx','fy','cx','cy','r']:
							if refElem.getAttribute(attr) == '' and not elem.getAttribute(attr) == '':
								refElem.setAttributeNS(None, attr, elem.getAttribute(attr))

					# if both are linearGradients, adopt elem's x1,y1,x2,y2 attributes if
					# they are unspecified on refElem
					if elem.nodeName == 'linearGradient' and refElem.nodeName == 'linearGradient':
						for attr in ['x1','y1','x2','y2']:
							if refElem.getAttribute(attr) == '' and not elem.getAttribute(attr) == '':
								refElem.setAttributeNS(None, attr, elem.getAttribute(attr))

					# now remove the xlink:href from refElem
					refElem.removeAttributeNS(NS['XLINK'], 'href')

					# now delete elem
					elem.parentNode.removeChild(elem)
					numElemsRemoved += 1
					num += 1
	return num

def removeDuplicateGradients(doc):
	global numElemsRemoved
	num = 0

	gradientsToRemove = {}
	duplicateToMaster = {}

	for gradType in ['linearGradient', 'radialGradient']:
		grads = doc.getElementsByTagNameNS(NS['SVG'], gradType)
		for grad in grads:
			# TODO: should slice grads from 'grad' here to optimize
			for ograd in grads:
				# do not compare gradient to itself
				if grad == ograd: continue

				# compare grad to ograd (all properties, then all stops)
				# if attributes do not match, go to next gradient
				someGradAttrsDoNotMatch = False
				for attr in ['gradientUnits','spreadMethod','gradientTransform','x1','y1','x2','y2','cx','cy','fx','fy','r']:
					if grad.getAttribute(attr) != ograd.getAttribute(attr):
						someGradAttrsDoNotMatch = True
						break;

				if someGradAttrsDoNotMatch: continue

				# compare xlink:href values too
				if grad.getAttributeNS(NS['XLINK'], 'href') != ograd.getAttributeNS(NS['XLINK'], 'href'):
					continue

				# all gradient properties match, now time to compare stops
				stops = grad.getElementsByTagNameNS(NS['SVG'], 'stop')
				ostops = ograd.getElementsByTagNameNS(NS['SVG'], 'stop')

				if stops.length != ostops.length: continue

				# now compare stops
				stopsNotEqual = False
				for i in range(stops.length):
					if stopsNotEqual: break
					stop = stops.item(i)
					ostop = ostops.item(i)
					for attr in ['offset', 'stop-color', 'stop-opacity']:
						if stop.getAttribute(attr) != ostop.getAttribute(attr):
							stopsNotEqual = True
							break
				if stopsNotEqual: continue

				# ograd is a duplicate of grad, we schedule it to be removed UNLESS
				# ograd is ALREADY considered a 'master' element
				if not gradientsToRemove.has_key(ograd):
					if not duplicateToMaster.has_key(ograd):
						if not gradientsToRemove.has_key(grad):
							gradientsToRemove[grad] = []
						gradientsToRemove[grad].append( ograd )
						duplicateToMaster[ograd] = grad

	# get a collection of all elements that are referenced and their referencing elements
	referencedIDs = findReferencedElements(doc.documentElement)
	for masterGrad in gradientsToRemove.keys():
		master_id = masterGrad.getAttribute('id')
		for dupGrad in gradientsToRemove[masterGrad]:
			# if the duplicate gradient no longer has a parent that means it was
			# already re-mapped to another master gradient
			if not dupGrad.parentNode: continue
			dup_id = dupGrad.getAttribute('id')
			# for each element that referenced the gradient we are going to remove
			for elem in referencedIDs[dup_id][1]:
				# find out which attribute referenced the duplicate gradient
				for attr in ['fill', 'stroke']:
					v = elem.getAttribute(attr)
					if v == 'url(#'+dup_id+')' or v == 'url("#'+dup_id+'")' or v == "url('#"+dup_id+"')":
						elem.setAttribute(attr, 'url(#'+master_id+')')
				if elem.getAttributeNS(NS['XLINK'], 'href') == '#'+dup_id:
					elem.setAttributeNS(NS['XLINK'], 'href', '#'+master_id)

			# now that all referencing elements have been re-mapped to the master
			# it is safe to remove this gradient from the document
			dupGrad.parentNode.removeChild(dupGrad)
			numElemsRemoved += 1
			num += 1
	return num

def repairStyle(node, options):
	num = 0
	if node.nodeType == 1 and len(node.getAttribute('style')) > 0 :
		# get all style properties and stuff them into a dictionary
		styleMap = { }
		rawStyles = node.getAttribute('style').split(';')
		for style in rawStyles:
			propval = style.split(':')
			if len(propval) == 2 :
				styleMap[propval[0].strip()] = propval[1].strip()

		# I've seen this enough to know that I need to correct it:
		# fill: url(#linearGradient4918) rgb(0, 0, 0);
		for prop in ['fill', 'stroke'] :
			if styleMap.has_key(prop) :
				chunk = styleMap[prop].split(') ')
				if len(chunk) == 2 and (chunk[0][:5] == 'url(#' or chunk[0][:6] == 'url("#' or chunk[0][:6] == "url('#") and chunk[1] == 'rgb(0, 0, 0)' :
					styleMap[prop] = chunk[0] + ')'
					num += 1

		# Here is where we can weed out unnecessary styles like:
		#  opacity:1
		if styleMap.has_key('opacity') :
			opacity = float(styleMap['opacity'])
			# opacity='1.0' is useless, remove it
			if opacity == 1.0 :
				del styleMap['opacity']
				num += 1

			# if opacity='0' then all fill and stroke properties are useless, remove them
			elif opacity == 0.0 :
				for uselessStyle in ['fill', 'fill-opacity', 'fill-rule', 'stroke', 'stroke-linejoin',
					'stroke-opacity', 'stroke-miterlimit', 'stroke-linecap', 'stroke-dasharray',
					'stroke-dashoffset', 'stroke-opacity'] :
					if styleMap.has_key(uselessStyle):
						del styleMap[uselessStyle]
						num += 1

		#  if stroke:none, then remove all stroke-related properties (stroke-width, etc)
		#  TODO: should also detect if the computed value of this element is stroke="none"
		if styleMap.has_key('stroke') and styleMap['stroke'] == 'none' :
			for strokestyle in [ 'stroke-width', 'stroke-linejoin', 'stroke-miterlimit',
					'stroke-linecap', 'stroke-dasharray', 'stroke-dashoffset', 'stroke-opacity'] :
				if styleMap.has_key(strokestyle) :
					del styleMap[strokestyle]
					num += 1
			# TODO: This is actually a problem if a parent element has a specified stroke
			# we need to properly calculate computed values
			del styleMap['stroke']

		#  if fill:none, then remove all fill-related properties (fill-rule, etc)
		if styleMap.has_key('fill') and styleMap['fill'] == 'none' :
			for fillstyle in [ 'fill-rule', 'fill-opacity' ] :
				if styleMap.has_key(fillstyle) :
					del styleMap[fillstyle]
					num += 1

		#  stop-opacity: 1
		if styleMap.has_key('stop-opacity') :
			if float(styleMap['stop-opacity']) == 1.0 :
				del styleMap['stop-opacity']
				num += 1

		#  fill-opacity: 1 or 0
		if styleMap.has_key('fill-opacity') :
			fillOpacity = float(styleMap['fill-opacity'])
			#  TODO: This is actually a problem if the parent element does not have fill-opacity=1
			if fillOpacity == 1.0 :
				del styleMap['fill-opacity']
				num += 1
			elif fillOpacity == 0.0 :
				for uselessFillStyle in [ 'fill', 'fill-rule' ] :
					if styleMap.has_key(uselessFillStyle):
						del styleMap[uselessFillStyle]
						num += 1

		#  stroke-opacity: 1 or 0
		if styleMap.has_key('stroke-opacity') :
			strokeOpacity = float(styleMap['stroke-opacity'])
			#  TODO: This is actually a problem if the parent element does not have stroke-opacity=1
			if strokeOpacity == 1.0 :
				del styleMap['stroke-opacity']
				num += 1
			elif strokeOpacity == 0.0 :
				for uselessStrokeStyle in [ 'stroke', 'stroke-width', 'stroke-linejoin', 'stroke-linecap',
							'stroke-dasharray', 'stroke-dashoffset' ] :
					if styleMap.has_key(uselessStrokeStyle):
						del styleMap[uselessStrokeStyle]
						num += 1

		# stroke-width: 0
		if styleMap.has_key('stroke-width') :
			strokeWidth = getSVGLength(styleMap['stroke-width'])
			if strokeWidth == 0.0 :
				for uselessStrokeStyle in [ 'stroke', 'stroke-linejoin', 'stroke-linecap',
							'stroke-dasharray', 'stroke-dashoffset', 'stroke-opacity' ] :
					if styleMap.has_key(uselessStrokeStyle):
						del styleMap[uselessStrokeStyle]
						num += 1

		# remove font properties for non-text elements
		# I've actually observed this in real SVG content
		if node.nodeName in ['rect', 'circle', 'ellipse', 'line', 'polyline', 'polygon', 'path']:
			for fontstyle in [ 'font-family', 'font-size', 'font-stretch', 'font-size-adjust',
								'font-style', 'font-variant', 'font-weight',
								'letter-spacing', 'line-height', 'kerning',
								'text-anchor', 'text-decoration', 'text-rendering',
								'unicode-bidi', 'word-spacing', 'writing-mode'] :
				if styleMap.has_key(fontstyle) :
					del styleMap[fontstyle]
					num += 1

		# remove inkscape-specific styles
		# TODO: need to get a full list of these
		for inkscapeStyle in ['-inkscape-font-specification']:
			if styleMap.has_key(inkscapeStyle):
				del styleMap[inkscapeStyle]
				num += 1

		# visibility: visible
		if styleMap.has_key('visibility') :
			if styleMap['visibility'] == 'visible':
				del styleMap['visibility']
				num += 1

		# display: inline
		if styleMap.has_key('display') :
			if styleMap['display'] == 'inline':
				del styleMap['display']
				num += 1

		# overflow: visible or overflow specified on element other than svg, marker, pattern
		if styleMap.has_key('overflow') :
			if styleMap['overflow'] == 'visible' or node.nodeName in ['svg','marker','pattern']:
				del styleMap['overflow']
				num += 1

		# marker: none
		if styleMap.has_key('marker') :
			if styleMap['marker'] == 'none':
				del styleMap['marker']
				num += 1

		# now if any of the properties match known SVG attributes we prefer attributes
		# over style so emit them and remove them from the style map
		if options.style_to_xml:
			for propName in styleMap.keys() :
				if propName in svgAttributes :
					node.setAttribute(propName, styleMap[propName])
					del styleMap[propName]

		# sew our remaining style properties back together into a style attribute
		fixedStyle = ''
		for prop in styleMap.keys() :
			fixedStyle += prop + ':' + styleMap[prop] + ';'

		if fixedStyle != '' :
			node.setAttribute('style', fixedStyle)
		else:
			node.removeAttribute('style')

	# recurse for our child elements
	for child in node.childNodes :
		num += repairStyle(child,options)

	return num

def removeDefaultAttributeValues(node, options):
	num = 0
	if node.nodeType != 1: return 0

	# gradientUnits: objectBoundingBox
	if node.getAttribute('gradientUnits') == 'objectBoundingBox':
		node.removeAttribute('gradientUnits')
		num += 1

	# spreadMethod: pad
	if node.getAttribute('spreadMethod') == 'pad':
		node.removeAttribute('spreadMethod')
		num += 1

	# x1: 0%
	if node.getAttribute('x1') != '':
		x1 = SVGLength(node.getAttribute('x1'))
		if x1.value == 0:
			node.removeAttribute('x1')
			num += 1

	# y1: 0%
	if node.getAttribute('y1') != '':
		y1 = SVGLength(node.getAttribute('y1'))
		if y1.value == 0:
			node.removeAttribute('y1')
			num += 1

	# x2: 100%
	if node.getAttribute('x2') != '':
		x2 = SVGLength(node.getAttribute('x2'))
		if (x2.value == 100 and x2.units == Unit.PCT) or (x2.value == 1 and x2.units == Unit.NONE):
			node.removeAttribute('x2')
			num += 1

	# y2: 0%
	if node.getAttribute('y2') != '':
		y2 = SVGLength(node.getAttribute('y2'))
		if y2.value == 0:
			node.removeAttribute('y2')
			num += 1

	# fx: equal to rx
	if node.getAttribute('fx') != '':
		if node.getAttribute('fx') == node.getAttribute('cx'):
			node.removeAttribute('fx')
			num += 1

	# fy: equal to ry
	if node.getAttribute('fy') != '':
		if node.getAttribute('fy') == node.getAttribute('cy'):
			node.removeAttribute('fy')
			num += 1

	# cx: 50%
	if node.getAttribute('cx') != '':
		cx = SVGLength(node.getAttribute('cx'))
		if (cx.value == 50 and cx.units == Unit.PCT) or (cx.value == 0.5 and cx.units == Unit.NONE):
			node.removeAttribute('cx')
			num += 1

	# cy: 50%
	if node.getAttribute('cy') != '':
		cy = SVGLength(node.getAttribute('cy'))
		if (cy.value == 50 and cy.units == Unit.PCT) or (cy.value == 0.5 and cy.units == Unit.NONE):
			node.removeAttribute('cy')
			num += 1

	# r: 50%
	if node.getAttribute('r') != '':
		r = SVGLength(node.getAttribute('r'))
		if (r.value == 50 and r.units == Unit.PCT) or (r.value == 0.5 and r.units == Unit.NONE):
			node.removeAttribute('r')
			num += 1

	# recurse for our child elements
	for child in node.childNodes :
		num += removeDefaultAttributeValues(child,options)

	return num

rgb = re.compile("\\s*rgb\\(\\s*(\\d+)\\s*\\,\\s*(\\d+)\\s*\\,\\s*(\\d+)\\s*\\)\\s*")
rgbp = re.compile("\\s*rgb\\(\\s*(\\d*\\.?\\d+)\\%\\s*\\,\\s*(\\d*\\.?\\d+)\\%\\s*\\,\\s*(\\d*\\.?\\d+)\\%\\s*\\)\\s*")
def convertColor(value):
	"""
		Converts the input color string and returns a #RRGGBB (or #RGB if possible) string
	"""
	s = value

	if s in colors.keys():
		s = colors[s]

	rgbpMatch = rgbp.match(s)
	if rgbpMatch != None :
		r = int(float(rgbpMatch.group(1)) * 255.0 / 100.0)
		g = int(float(rgbpMatch.group(2)) * 255.0 / 100.0)
		b = int(float(rgbpMatch.group(3)) * 255.0 / 100.0)
		s  = 'rgb(%d,%d,%d)' % (r,g,b)

	rgbMatch = rgb.match(s)
	if rgbMatch != None :
		r = hex( int( rgbMatch.group(1) ) )[2:].upper()
		g = hex( int( rgbMatch.group(2) ) )[2:].upper()
		b = hex( int( rgbMatch.group(3) ) )[2:].upper()
		if len(r) == 1: r='0'+r
		if len(g) == 1: g='0'+g
		if len(b) == 1: b='0'+b
		s = '#'+r+g+b

	if s[0] == '#' and len(s)==7 and s[1]==s[2] and s[3]==s[4] and s[5]==s[6]:
		s = s.upper()
		s = '#'+s[1]+s[3]+s[5]

	return s

def convertColors(element) :
	"""
		Recursively converts all color properties into #RRGGBB format if shorter
	"""
	numBytes = 0

	if element.nodeType != 1: return 0

	# set up list of color attributes for each element type
	attrsToConvert = []
	if element.nodeName in ['rect', 'circle', 'ellipse', 'polygon', \
							'line', 'polyline', 'path', 'g', 'a']:
		attrsToConvert = ['fill', 'stroke']
	elif element.nodeName in ['stop']:
		attrsToConvert = ['stop-color']
	elif element.nodeName in ['solidColor']:
		attrsToConvert = ['solid-color']

	# now convert all the color formats
	for attr in attrsToConvert:
		oldColorValue = element.getAttribute(attr)
		if oldColorValue != '':
			newColorValue = convertColor(oldColorValue)
			oldBytes = len(oldColorValue)
			newBytes = len(newColorValue)
			if oldBytes > newBytes:
				element.setAttribute(attr, newColorValue)
				numBytes += (oldBytes - len(element.getAttribute(attr)))

	# now recurse for our child elements
	for child in element.childNodes :
		numBytes += convertColors(child)

	return numBytes

# TODO: go over what this method does and see if there is a way to optimize it
# TODO: go over the performance of this method and see if I can save memory/speed by
#       reusing data structures, etc
def cleanPath(element) :
	"""
		Cleans the path string (d attribute) of the element
	"""
	global numBytesSavedInPathData
	global numPathSegmentsReduced
	global numCurvesStraightened

	# this gets the parser object from svg_regex.py
	oldPathStr = element.getAttribute('d')
	pathObj = svg_parser.parse(oldPathStr)

	# however, this parser object has some ugliness in it (lists of tuples of tuples of
	# numbers and booleans).  we just need a list of (cmd,[numbers]):
	path = []
	for (cmd,dataset) in pathObj:
		if cmd in ['M','m','L','l','T','t']:
			# one or more tuples, each containing two numbers
			nums = []
			for t in dataset:
				# convert to a Decimal
				nums.append(Decimal(str(t[0])) * Decimal(1))
				nums.append(Decimal(str(t[1])) * Decimal(1))

			# only create this segment if it is not empty
			if nums:
				path.append( (cmd, nums) )

		elif cmd in ['V','v','H','h']:
			# one or more numbers
			nums = []
			for n in dataset:
				nums.append(Decimal(str(n)))
			if nums:
				path.append( (cmd, nums) )

		elif cmd in ['C','c']:
			# one or more tuples, each containing three tuples of two numbers each
			nums = []
			for t in dataset:
				for pair in t:
					nums.append(Decimal(str(pair[0])) * Decimal(1))
					nums.append(Decimal(str(pair[1])) * Decimal(1))
			path.append( (cmd, nums) )

		elif cmd in ['S','s','Q','q']:
			# one or more tuples, each containing two tuples of two numbers each
			nums = []
			for t in dataset:
				for pair in t:
					nums.append(Decimal(str(pair[0])) * Decimal(1))
					nums.append(Decimal(str(pair[1])) * Decimal(1))
			path.append( (cmd, nums) )

		elif cmd in ['A','a']:
			# one or more tuples, each containing a tuple of two numbers, a number, a boolean,
			# another boolean, and a tuple of two numbers
			nums = []
			for t in dataset:
				nums.append( Decimal(str(t[0][0])) * Decimal(1) )
				nums.append( Decimal(str(t[0][1])) * Decimal(1) )
				nums.append( Decimal(str(t[1])) * Decimal(1))

				if t[2]: nums.append( Decimal(1) )
				else: nums.append( Decimal(0) )

				if t[3]: nums.append( Decimal(1) )
				else: nums.append( Decimal(0) )

				nums.append( Decimal(str(t[4][0])) * Decimal(1) )
				nums.append( Decimal(str(t[4][1])) * Decimal(1) )
			path.append( (cmd, nums) )

		elif cmd in ['Z','z']:
			path.append( (cmd, []) )

	# calculate the starting x,y coord for the second path command
	if len(path[0][1]) == 2:
		(x,y) = path[0][1]
	else:
		# we have a move and then 1 or more coords for lines
		N = len(path[0][1])
		if path[0] == 'M':
			# take the last pair of coordinates for the starting point
			x = path[0][1][N-2]
			y = path[0][1][N-1]
		else: # relative move, accumulate coordinates for the starting point
			(x,y) = path[0][1][0],path[0][1][1]
			n = 2
			while n < N:
				x += path[0][1][n]
				y += path[0][1][n+1]
				n += 2

	# now we have the starting point at x,y so let's save it
	(startx,starty) = (x,y)

	# convert absolute coordinates into relative ones (start with the second subcommand
	# and leave the first M as absolute)
	newPath = [path[0]]
	for (cmd,data) in path[1:]:
		i = 0
		newCmd = cmd
		newData = data
		# adjust abs to rel
		# only the A command has some values that we don't want to adjust (radii, rotation, flags)
		if cmd == 'A':
			newCmd = 'a'
			newData = []
			while i < len(data):
				newData.append(data[i])
				newData.append(data[i+1])
				newData.append(data[i+2])
				newData.append(data[i+3])
				newData.append(data[i+4])
				newData.append(data[i+5]-x)
				newData.append(data[i+6]-y)
				x = data[i+5]
				y = data[i+6]
				i += 7
		elif cmd == 'a':
			while i < len(data):
				x += data[i+5]
				y += data[i+6]
				i += 7
		elif cmd == 'H':
			newCmd = 'h'
			newData = []
			while i < len(data):
				newData.append(data[i]-x)
				x = data[i]
				i += 1
		elif cmd == 'h':
			while i < len(data):
				x += data[i]
				i += 1
		elif cmd == 'V':
			newCmd = 'v'
			newData = []
			while i < len(data):
				newData.append(data[i] - y)
				y = data[i]
				i += 1
		elif cmd == 'v':
			while i < len(data):
				y += data[i]
				i += 1
		elif cmd in ['M']:
			newCmd = cmd.lower()
			newData = []
			startx = data[0]
			starty = data[1]
			while i < len(data):
				newData.append( data[i] - x )
				newData.append( data[i+1] - y )
				x = data[i]
				y = data[i+1]
				i += 2
		elif cmd in ['L','T']:
			newCmd = cmd.lower()
			newData = []
			while i < len(data):
				newData.append( data[i] - x )
				newData.append( data[i+1] - y )
				x = data[i]
				y = data[i+1]
				i += 2
		elif cmd in ['m']:
			startx += data[0]
			starty += data[1]
			while i < len(data):
				x += data[i]
				y += data[i+1]
				i += 2
		elif cmd in ['l','t']:
			while i < len(data):
				x += data[i]
				y += data[i+1]
				i += 2
		elif cmd in ['S','Q']:
			newCmd = cmd.lower()
			newData = []
			while i < len(data):
				newData.append( data[i] - x )
				newData.append( data[i+1] - y )
				newData.append( data[i+2] - x )
				newData.append( data[i+3] - y )
				x = data[i+2]
				y = data[i+3]
				i += 4
		elif cmd in ['s','q']:
			while i < len(data):
				x += data[i+2]
				y += data[i+3]
				i += 4
		elif cmd == 'C':
			newCmd = 'c'
			newData = []
			while i < len(data):
				newData.append( data[i] - x )
				newData.append( data[i+1] - y )
				newData.append( data[i+2] - x )
				newData.append( data[i+3] - y )
				newData.append( data[i+4] - x )
				newData.append( data[i+5] - y )
				x = data[i+4]
				y = data[i+5]
				i += 6
		elif cmd == 'c':
			while i < len(data):
				x += data[i+4]
				y += data[i+5]
				i += 6
		elif cmd in ['z','Z']:
			x = startx
			y = starty
			newCmd = 'z'
		newPath.append( (newCmd, newData) )
	path = newPath

	# remove empty segments
	newPath = [path[0]]
	for (cmd,data) in path[1:]:
		if cmd in ['m','l','t']:
			newData = []
			i = 0
			while i < len(data):
				if data[i] != 0 or data[i+1] != 0:
					newData.append(data[i])
					newData.append(data[i+1])
				else:
					numPathSegmentsReduced += 1
				i += 2
			if newData:
				newPath.append( (cmd,newData) )
		elif cmd == 'c':
			newData = []
			i = 0
			while i < len(data):
				if data[i+4] != 0 or data[i+5] != 0:
					newData.append(data[i])
					newData.append(data[i+1])
					newData.append(data[i+2])
					newData.append(data[i+3])
					newData.append(data[i+4])
					newData.append(data[i+5])
				else:
					numPathSegmentsReduced += 1
				i += 6
			if newData:
				newPath.append( (cmd,newData) )
		elif cmd == 'a':
			newData = []
			i = 0
			while i < len(data):
				if data[i+5] != 0 or data[i+6] != 0:
					newData.append(data[i])
					newData.append(data[i+1])
					newData.append(data[i+2])
					newData.append(data[i+3])
					newData.append(data[i+4])
					newData.append(data[i+5])
					newData.append(data[i+6])
				else:
					numPathSegmentsReduced += 1
				i += 7
			if newData:
				newPath.append( (cmd,newData) )
		elif cmd == 'q':
			newData = []
			i = 0
			while i < len(data):
				if data[i+2] != 0 or data[i+3] != 0:
					newData.append(data[i])
					newData.append(data[i+1])
					newData.append(data[i+2])
					newData.append(data[i+3])
				else:
					numPathSegmentsReduced += 1
				i += 4
			if newData:
				newPath.append( (cmd,newData) )
		elif cmd in ['h','v']:
			newData = []
			i = 0
			while i < len(data):
				if data[i] != 0:
					newData.append(data[i])
				else:
					numPathSegmentsReduced += 1
				i += 1
			if newData:
				newPath.append( (cmd,newData) )
		else:
			newPath.append( (cmd,data) )
	path = newPath

	# convert straight curves into lines
	newPath = [path[0]]
	for (cmd,data) in path[1:]:
		i = 0
		newData = data
		if cmd == 'c':
			newData = []
			while i < len(data):
				# since all commands are now relative, we can think of previous point as (0,0)
				# and new point (dx,dy) is (data[i+4],data[i+5])
				# eqn of line will be y = (dy/dx)*x or if dx=0 then eqn of line is x=0
				(p1x,p1y) = (data[i],data[i+1])
				(p2x,p2y) = (data[i+2],data[i+3])
				dx = data[i+4]
				dy = data[i+5]

				foundStraightCurve = False

				if dx == 0:
					if p1x == 0 and p2x == 0:
						foundStraightCurve = True
				else:
					m = dy/dx
					if p1y == m*p1x and p2y == m*p2y:
						foundStraightCurve = True

				if foundStraightCurve:
					# flush any existing curve coords first
					if newData:
						newPath.append( (cmd,newData) )
						newData = []
					# now create a straight line segment
					newPath.append( ('l', [dx,dy]) )
					numCurvesStraightened += 1
				else:
					newData.append(data[i])
					newData.append(data[i+1])
					newData.append(data[i+2])
					newData.append(data[i+3])
					newData.append(data[i+4])
					newData.append(data[i+5])

				i += 6
		if newData or cmd == 'z' or cmd == 'Z':
			newPath.append( (cmd,newData) )
	path = newPath

	# collapse all consecutive commands of the same type into one command
	prevCmd = ''
	prevData = []
	newPath = [path[0]]
	for (cmd,data) in path[1:]:
		# flush the previous command if it is not the same type as the current command
		if prevCmd != '':
			if cmd != prevCmd:
				newPath.append( (prevCmd, prevData) )
				prevCmd = ''
				prevData = []

		# if the previous and current commands are the same type, collapse
		if cmd == prevCmd:
			for coord in data:
				prevData.append(coord)

		# save last command and data
		else:
			prevCmd = cmd
			prevData = data
	# flush last command and data
	if prevCmd != '':
		newPath.append( (prevCmd, prevData) )
	path = newPath

	# convert to shorthand path segments where possible
	newPath = [path[0]]
	for (cmd,data) in path[1:]:
		# convert line segments into h,v where possible
		if cmd == 'l':
			i = 0
			lineTuples = []
			while i < len(data):
				if data[i] == 0:
					# vertical
					if lineTuples:
						# flush the existing line command
						newPath.append( ('l', lineTuples) )
						lineTuples = []
					# append the v and then the remaining line coords
					newPath.append( ('v', [data[i+1]]) )
					numPathSegmentsReduced += 1
				elif data[i+1] == 0:
					if lineTuples:
						# flush the line command, then append the h and then the remaining line coords
						newPath.append( ('l', lineTuples) )
						lineTuples = []
					newPath.append( ('h', [data[i]]) )
					numPathSegmentsReduced += 1
				else:
					lineTuples.append(data[i])
					lineTuples.append(data[i+1])
				i += 2
			if lineTuples:
				newPath.append( ('l', lineTuples) )
		# convert Bézier curve segments into s where possible
		elif cmd == 'c':
			bez_ctl_pt = (0,0)
			i = 0
			curveTuples = []
			while i < len(data):
				# rotate by 180deg means negate both coordinates
				# if the previous control point is equal then we can substitute a
				# shorthand bezier command
				if bez_ctl_pt[0] == data[i] and bez_ctl_pt[1] == data[i+1]:
					if curveTuples:
						newPath.append( ('c', curveTuples) )
						curveTuples = []
					# append the s command
					newPath.append( ('s', [data[i+2], data[i+3], data[i+4], data[i+5]]) )
					numPathSegmentsReduced += 1
				else:
					j = 0
					while j <= 5:
						curveTuples.append(data[i+j])
						j += 1

				# set up control point for next curve segment
				bez_ctl_pt = (data[i+4]-data[i+2], data[i+5]-data[i+3])
				i += 6

			if curveTuples:
				newPath.append( ('c', curveTuples) )
		# convert quadratic curve segments into t where possible
		elif cmd == 'q':
			quad_ctl_pt = (0,0)
			i = 0
			curveTuples = []
			while i < len(data):
				if quad_ctl_pt[0] == data[i] and quad_ctl_pt[1] == data[i+1]:
					if curveTuples:
						newPath.append( ('q', curveTuples) )
						curveTuples = []
					# append the t command
					newPath.append( ('t', [data[i+2], data[i+3]]) )
					numPathSegmentsReduced += 1
				else:
					j = 0;
					while j <= 3:
						curveTuples.append(data[i+j])
						j += 1

				quad_ctl_pt = (data[i+2]-data[i], data[i+3]-data[i+1])
				i += 4

			if curveTuples:
				newPath.append( ('q', curveTuples) )
		else:
			newPath.append( (cmd, data) )
	path = newPath

	# for each h or v, collapse unnecessary coordinates that run in the same direction
	# i.e. "h-100-100" becomes "h-200" but "h300-100" does not change
	newPath = [path[0]]
	for (cmd,data) in path[1:]:
		if cmd in ['h','v'] and len(data) > 1:
			newData = []
			prevCoord = data[0]
			for coord in data[1:]:
				if isSameSign(prevCoord, coord):
					prevCoord += coord
					numPathSegmentsReduced += 1
				else:
					newData.append(prevCoord)
					prevCoord = coord
			newData.append(prevCoord)
			newPath.append( (cmd, newData) )
		else:
			newPath.append( (cmd, data) )
	path = newPath

	# it is possible that we have consecutive h, v, c, t commands now
	# so again collapse all consecutive commands of the same type into one command
	prevCmd = ''
	prevData = []
	newPath = [path[0]]
	for (cmd,data) in path[1:]:
		# flush the previous command if it is not the same type as the current command
		if prevCmd != '':
			if cmd != prevCmd:
				newPath.append( (prevCmd, prevData) )
				prevCmd = ''
				prevData = []

		# if the previous and current commands are the same type, collapse
		if cmd == prevCmd:
			for coord in data:
				prevData.append(coord)

		# save last command and data
		else:
			prevCmd = cmd
			prevData = data
	# flush last command and data
	if prevCmd != '':
		newPath.append( (prevCmd, prevData) )
	path = newPath

	newPathStr = serializePath(path)
	numBytesSavedInPathData += ( len(oldPathStr) - len(newPathStr) )
	element.setAttribute('d', newPathStr)

def parseListOfPoints(s):
	"""
		Parse string into a list of points.

		Returns a list of containing an even number of coordinate strings
	"""

	# (wsp)? comma-or-wsp-separated coordinate pairs (wsp)?
	# coordinate-pair = coordinate comma-or-wsp coordinate
	# coordinate = sign? integer
	nums = re.split("\\s*\\,?\\s*", s)
	i = 0
	points = []
	while i < len(nums):
		x = SVGLength(nums[i])
		# if we had an odd number of points, return empty
		if i == len(nums)-1: return []
		else: y = SVGLength(nums[i+1])

		# if the coordinates were not unitless, return empty
		if x.units != Unit.NONE or y.units != Unit.NONE: return []
		points.append( str(x.value) )
		points.append( str(y.value) )
		i += 2

	return points

def cleanPolygon(elem):
	"""
		Remove unnecessary closing point of polygon points attribute
	"""
	global numPointsRemovedFromPolygon

	pts = parseListOfPoints(elem.getAttribute('points'))
	N = len(pts)/2
	if N >= 2:
		(startx,starty) = (pts[0],pts[0])
		(endx,endy) = (pts[len(pts)-2],pts[len(pts)-1])
		if startx == endx and starty == endy:
			pts = pts[:-2]
			numPointsRemovedFromPolygon += 1
	elem.setAttribute('points', scourCoordinates(pts))

def cleanPolyline(elem):
	"""
		Scour the polyline points attribute
	"""
	pts = parseListOfPoints(elem.getAttribute('points'))
	elem.setAttribute('points', scourCoordinates(pts))

def serializePath(pathObj):
	"""
		Reserializes the path data with some cleanups.
	"""
	pathStr = ""
	for (cmd,data) in pathObj:
		pathStr += cmd
		pathStr += scourCoordinates(data)
	return pathStr

def scourCoordinates(data):
	"""
		Serializes coordinate data with some cleanups:
			- removes all trailing zeros after the decimal
			- integerize coordinates if possible
			- removes extraneous whitespace
			- adds commas between values in a subcommand if required
	"""
	coordsStr = ""
	if data != None:
		c = 0
		for coord in data:
			# add the scoured coordinate to the path string
			coordsStr += scourLength(coord)

			# only need the comma if the next number is non-negative
			if c < len(data)-1 and Decimal(data[c+1]) >= 0:
				coordsStr += ','
			c += 1
	return coordsStr

def scourLength(str):
	length = SVGLength(str)
	coord = length.value

	# reduce to the proper number of digits
	coord = Decimal(unicode(coord)) * Decimal(1)

	# integerize if we can
	if int(coord) == coord: coord = Decimal(unicode(int(coord)))

	# Decimal.trim() is available in Python 2.6+ to trim trailing zeros
	try:
		coord = coord.trim()
	except AttributeError:
		# trim it ourselves
		s = unicode(coord)
		dec = s.find('.')
		if dec != -1:
			while s[-1] == '0':
				s = s[:-1]
		coord = Decimal(s)

		# Decimal.normalize() will uses scientific notation - if that
		# string is smaller, then use it
		normd = coord.normalize()
		if len(unicode(normd)) < len(unicode(coord)):
			coord = normd

	return unicode(coord)+Unit.str(length.units)

def embedRasters(element, options) :
	"""
		Converts raster references to inline images.
		NOTE: there are size limits to base64-encoding handling in browsers
	"""
	global numRastersEmbedded

	href = element.getAttributeNS(NS['XLINK'],'href')

	# if xlink:href is set, then grab the id
	if href != '' and len(href) > 1:
		# find if href value has filename ext
		ext = os.path.splitext(os.path.basename(href))[1].lower()[1:]

		# look for 'png', 'jpg', and 'gif' extensions
		if ext == 'png' or ext == 'jpg' or ext == 'gif':

			# check if href resolves to an existing file
			if os.path.isfile(href) == False :
				if href[:7] != 'http://' and os.path.isfile(href) == False :
						# if this is not an absolute path, set path relative
						# to script file based on input arg
						infilename = '.'
						if options.infilename: infilename = options.infilename
						href = os.path.join(os.path.dirname(infilename), href)

			rasterdata = ''
			# test if file exists locally
			if os.path.isfile(href) == True :
				# open raster file as raw binary
				raster = open( href, "rb")
				rasterdata = raster.read()

			elif href[:7] == 'http://':
				# raster = open( href, "rb")
				webFile = urllib.urlopen( href )
				rasterdata = webFile.read()
				webFile.close()

			# ... should we remove all images which don't resolve?
			if rasterdata != '' :
				# base64-encode raster
				b64eRaster = base64.b64encode( rasterdata )

				# set href attribute to base64-encoded equivalent
				if b64eRaster != '':
					# PNG and GIF both have MIME Type 'image/[ext]', but
					# JPEG has MIME Type 'image/jpeg'
					if ext == 'jpg':
						ext = 'jpeg'

					element.setAttributeNS(NS['XLINK'], 'href', 'data:image/' + ext + ';base64,' + b64eRaster)
					numRastersEmbedded += 1
					del b64eRaster

def properlySizeDoc(docElement):
	# get doc width and height
	w = SVGLength(docElement.getAttribute('width'))
	h = SVGLength(docElement.getAttribute('height'))

	# if width/height are not unitless or px then it is not ok to rewrite them into a viewBox
	if ((w.units != Unit.NONE and w.units != Unit.PX) or
		(w.units != Unit.NONE and w.units != Unit.PX)):
	    return

	# else we have a statically sized image and we should try to remedy that

	# parse viewBox attribute
	vbSep = re.split("\\s*\\,?\\s*", docElement.getAttribute('viewBox'), 3)
	# if we have a valid viewBox we need to check it
	vbWidth,vbHeight = 0,0
	if len(vbSep) == 4:
		try:
			# if x or y are specified and non-zero then it is not ok to overwrite it
			vbX = float(vbSep[0])
			vbY = float(vbSep[1])
			if vbX != 0 or vbY != 0:
				return

			# if width or height are not equal to doc width/height then it is not ok to overwrite it
			vbWidth = float(vbSep[2])
			vbHeight = float(vbSep[3])
			if vbWidth != w.value or vbHeight != h.value:
				return
		# if the viewBox did not parse properly it is invalid and ok to overwrite it
		except ValueError:
			pass

	# at this point it's safe to set the viewBox and remove width/height
	docElement.setAttribute('viewBox', '0 0 %s %s' % (w.value, h.value))
	docElement.removeAttribute('width')
	docElement.removeAttribute('height')

def remapNamespacePrefix(node, oldprefix, newprefix):
	if node == None or node.nodeType != 1: return

	if node.prefix == oldprefix:
		localName = node.localName
		namespace = node.namespaceURI
		doc = node.ownerDocument
		parent = node.parentNode

		# create a replacement node
		newNode = None
		if newprefix != '':
			newNode = doc.createElementNS(namespace, newprefix+":"+localName)
		else:
			newNode = doc.createElement(localName);

		# add all the attributes
		attrList = node.attributes
		for i in range(attrList.length):
			attr = attrList.item(i)
			newNode.setAttributeNS( attr.namespaceURI, attr.localName, attr.nodeValue)

		# clone and add all the child nodes
		for child in node.childNodes:
			newNode.appendChild(child.cloneNode(True))

		# replace old node with new node
		node = parent.replaceChild( newNode, node )

	# now do all child nodes
	for child in node.childNodes :
		remapNamespacePrefix(child, oldprefix, newprefix)

def makeWellFormed(str):
	newstr = str

	# encode & as &amp; ( must do this first so that &lt; does not become &amp;lt; )
	if str.find('&') != -1:
		newstr = str.replace('&', '&amp;')

	# encode < as &lt;
	if str.find("<") != -1:
		newstr = str.replace('<', '&lt;')

	# encode > as &gt; (TODO: is this necessary?)
	if str.find('>') != -1:
		newstr = str.replace('>', '&gt;')

	return newstr

# hand-rolled serialization function that has the following benefits:
# - pretty printing
# - somewhat judicious use of whitespace
# - ensure id attributes are first
def serializeXML(element, options, ind = 0):
	indent = ind
	I=''
	if options.indent_type == 'tab': I='\t'
	elif options.indent_type == 'space': I=' '

	outString = (I * ind) + '<' + element.nodeName

	# always serialize the id or xml:id attributes first
	if element.getAttribute('id') != '':
		id = element.getAttribute('id')
		quot = '"'
		if id.find('"') != -1:
			quot = "'"
		outString += ' ' + 'id=' + quot + id + quot
	if element.getAttribute('xml:id') != '':
		id = element.getAttribute('xml:id')
		quot = '"'
		if id.find('"') != -1:
			quot = "'"
		outString += ' ' + 'xml:id=' + quot + id + quot

	# now serialize the other attributes
	attrList = element.attributes
	for num in range(attrList.length) :
		attr = attrList.item(num)
		if attr.nodeName == 'id' or attr.nodeName == 'xml:id': continue
		# if the attribute value contains a double-quote, use single-quotes
		quot = '"'
		if attr.nodeValue.find('"') != -1:
			quot = "'"

		attrValue = makeWellFormed( attr.nodeValue )

		outString += ' ' + attr.nodeName + '=' + quot + attrValue + quot

	# if no children, self-close
	children = element.childNodes
	if children.length > 0:
		outString += '>'

		onNewLine = False
		for child in element.childNodes:
			# element node
			if child.nodeType == 1:
				outString += '\n' + serializeXML(child, options, indent + 1)
				onNewLine = True
			# text node
			elif child.nodeType == 3:
				# trim it only in the case of not being a child of an element
				# where whitespace might be important
				if element.nodeName in ["text", "tspan", "textPath", "tref", "title", "desc", "textArea"]:
					outString += makeWellFormed(child.nodeValue)
				else:
					outString += makeWellFormed(child.nodeValue.strip())
			# CDATA node
			elif child.nodeType == 4:
				outString += '<![CDATA[' + child.nodeValue + ']]>'
			# Comment node
			elif child.nodeType == 8:
				outString += '<!--' + child.nodeValue + '-->'
			# TODO: entities, processing instructions, what else?
			else: # ignore the rest
				pass

		if onNewLine: outString += (I * ind)
		outString += '</' + element.nodeName + '>'
		if indent > 0: outString += '\n'
	else:
		outString += '/>'
		if indent > 0: outString += '\n'

	return outString

# this is the main method
# input is a string representation of the input XML
# returns a string representation of the output XML
def scourString(in_string, options=None):
	if options is None:
		options = _options_parser.get_default_values()
	getcontext().prec = options.digits
	global numAttrsRemoved
	global numStylePropsFixed
	global numElemsRemoved
	global numBytesSavedInColors
	doc = xml.dom.minidom.parseString(in_string)

	# for whatever reason this does not always remove all inkscape/sodipodi attributes/elements
	# on the first pass, so we do it multiple times
	# does it have to do with removal of children affecting the childlist?
	if options.keep_editor_data == False:
		while removeNamespacedElements( doc.documentElement, unwanted_ns ) > 0 :
			pass
		while removeNamespacedAttributes( doc.documentElement, unwanted_ns ) > 0 :
			pass

		# remove the xmlns: declarations now
		xmlnsDeclsToRemove = []
		attrList = doc.documentElement.attributes
		for num in range(attrList.length) :
			if attrList.item(num).nodeValue in unwanted_ns :
				xmlnsDeclsToRemove.append(attrList.item(num).nodeName)

		for attr in xmlnsDeclsToRemove :
			doc.documentElement.removeAttribute(attr)
			numAttrsRemoved += 1

	# ensure namespace for SVG is declared
	if doc.documentElement.getAttribute('xmlns') != 'http://www.w3.org/2000/svg':
		doc.documentElement.setAttribute('xmlns', 'http://www.w3.org/2000/svg')
		# TODO: throw error or warning?

	# check for redundant SVG namespace declaration
	attrList = doc.documentElement.attributes
	xmlnsDeclsToRemove = []
	redundantPrefixes = []
	for i in range(attrList.length):
		attr = attrList.item(i)
		name = attr.nodeName
		val = attr.nodeValue
		if name[0:6] == 'xmlns:' and val == 'http://www.w3.org/2000/svg':
			redundantPrefixes.append(name[6:])
			xmlnsDeclsToRemove.append(name)

	for attrName in xmlnsDeclsToRemove:
		doc.documentElement.removeAttribute(attrName)

	for prefix in redundantPrefixes:
		remapNamespacePrefix(doc.documentElement, prefix, '')

	# repair style (remove unnecessary style properties and change them into XML attributes)
	numStylePropsFixed = repairStyle(doc.documentElement, options)

	# convert colors to #RRGGBB format
	if options.simple_colors:
		numBytesSavedInColors = convertColors(doc.documentElement)

	# remove empty defs, metadata, g
	# NOTE: these elements will be removed even if they have (invalid) text nodes
	elemsToRemove = []
	for tag in ['defs', 'metadata', 'g'] :
		for elem in doc.documentElement.getElementsByTagNameNS(NS['SVG'], tag) :
			removeElem = not elem.hasChildNodes()
			if removeElem == False :
				for child in elem.childNodes :
					if child.nodeType in [1, 3, 4, 8] :
						break
				else:
					removeElem = True
			if removeElem :
				elem.parentNode.removeChild(elem)
				numElemsRemoved += 1

	# remove unreferenced gradients/patterns outside of defs
	while removeUnreferencedElements(doc) > 0:
		pass

	if options.strip_ids:
		bContinueLooping = True
		while bContinueLooping:
			identifiedElements = findElementsWithId(doc.documentElement)
			referencedIDs = findReferencedElements(doc.documentElement)
			bContinueLooping = (removeUnreferencedIDs(referencedIDs, identifiedElements) > 0)

	if options.group_collapse:
		while removeNestedGroups(doc.documentElement) > 0:
			pass

	while removeDuplicateGradientStops(doc) > 0:
		pass

	# remove gradients that are only referenced by one other gradient
	while collapseSinglyReferencedGradients(doc) > 0:
		pass

	# remove duplicate gradients
	while removeDuplicateGradients(doc) > 0:
		pass

	# clean path data
	for elem in doc.documentElement.getElementsByTagNameNS(NS['SVG'], 'path') :
		if elem.getAttribute('d') == '':
			elem.parentNode.removeChild(elem)
		else:
			cleanPath(elem)

	# remove unnecessary closing point of polygons and scour points
	for polygon in doc.documentElement.getElementsByTagNameNS(NS['SVG'], 'polygon') :
		cleanPolygon(polygon)

	# scour points of polyline
	for polyline in doc.documentElement.getElementsByTagNameNS(NS['SVG'], 'polyline') :
		cleanPolygon(polyline)

	# scour lengths (including coordinates)
	for type in ['svg', 'image', 'rect', 'circle', 'ellipse', 'line', 'linearGradient', 'radialGradient', 'stop']:
		for elem in doc.documentElement.getElementsByTagNameNS(NS['SVG'], type):
			for attr in ['x', 'y', 'width', 'height', 'cx', 'cy', 'r', 'rx', 'ry', 'x1', 'y1', 'x2', 'y2', 'fx', 'fy', 'offset']:
				if elem.getAttribute(attr) != '':
					elem.setAttribute(attr, scourLength(elem.getAttribute(attr)))

	# remove default values of attributes
	numAttrsRemoved += removeDefaultAttributeValues(doc.documentElement, options)

	# convert rasters references to base64-encoded strings
	if options.embed_rasters:
		for elem in doc.documentElement.getElementsByTagNameNS(NS['SVG'], 'image') :
			embedRasters(elem, options)

	# properly size the SVG document (ideally width/height should be 100% with a viewBox)
	properlySizeDoc(doc.documentElement)

	# output the document as a pretty string with a single space for indent
	# NOTE: removed pretty printing because of this problem:
	# http://ronrothman.com/public/leftbraned/xml-dom-minidom-toprettyxml-and-silly-whitespace/
	# rolled our own serialize function here to save on space, put id first, customize indentation, etc
#	out_string = doc.documentElement.toprettyxml(' ')
	out_string = serializeXML(doc.documentElement, options)

	# now strip out empty lines
	lines = []
	# Get rid of empty lines
	for line in out_string.splitlines(True):
		if line.strip():
			lines.append(line)

	# return the string stripped of empty lines
	if options.strip_xml_prolog == False:
		xmlprolog = '<?xml version="1.0" encoding="UTF-8" standalone="no"?>\n'
	else:
		xmlprolog = ""

	return xmlprolog + "".join(lines)

# used mostly by unit tests
# input is a filename
# returns the minidom doc representation of the SVG
def scourXmlFile(filename, options=None):
	in_string = open(filename).read()
	out_string = scourString(in_string, options)
	return xml.dom.minidom.parseString(out_string.encode('utf-8'))

# GZ: Seems most other commandline tools don't do this, is it really wanted?
class HeaderedFormatter(optparse.IndentedHelpFormatter):
	"""
		Show application name, version number, and copyright statement
		above usage information.
	"""
	def format_usage(self, usage):
		return "%s %s\n%s\n%s" % (APP, VER, COPYRIGHT,
			optparse.IndentedHelpFormatter.format_usage(self, usage))

# GZ: would prefer this to be in a function or class scope, but tests etc need
#     access to the defaults anyway
_options_parser = optparse.OptionParser(
	usage="%prog [-i input.svg] [-o output.svg] [OPTIONS]",
	description=("If the input/output files are specified with a svgz"
	" extension, then compressed SVG is assumed. If the input file is not"
	" specified, stdin is used. If the output file is not specified, "
	" stdout is used."),
	formatter=HeaderedFormatter(max_help_position=30),
	version=VER)

_options_parser.add_option("--disable-simplify-colors",
	action="store_false", dest="simple_colors", default=True,
	help="won't convert all colors to #RRGGBB format")
_options_parser.add_option("--disable-style-to-xml",
	action="store_false", dest="style_to_xml", default=True,
	help="won't convert styles into XML attributes")
_options_parser.add_option("--disable-group-collapsing",
	action="store_false", dest="group_collapse", default=True,
	help="won't collapse <g> elements")
_options_parser.add_option("--enable-id-stripping",
	action="store_true", dest="strip_ids", default=False,
	help="remove all un-referenced ID attributes")
_options_parser.add_option("--disable-embed-rasters",
	action="store_false", dest="embed_rasters", default=True,
	help="won't embed rasters as base64-encoded data")
_options_parser.add_option("--keep-editor-data",
	action="store_true", dest="keep_editor_data", default=False,
	help="won't remove Inkscape, Sodipodi or Adobe Illustrator elements and attributes")
_options_parser.add_option("--strip-xml-prolog",
	action="store_true", dest="strip_xml_prolog", default=False,
	help="won't output the <?xml ?> prolog")

# GZ: this is confusing, most people will be thinking in terms of
#     decimal places, which is not what decimal precision is doing
_options_parser.add_option("-p", "--set-precision",
	action="store", type=int, dest="digits", default=5,
	help="set number of significant digits (default: %default)")
_options_parser.add_option("-i",
	action="store", dest="infilename", help=optparse.SUPPRESS_HELP)
_options_parser.add_option("-o",
	action="store", dest="outfilename", help=optparse.SUPPRESS_HELP)
_options_parser.add_option("--indent",
	action="store", type="string", dest="indent_type", default="space",
	help="indentation of the output: none, space, tab (default: %default)")

def maybe_gziped_file(filename, mode="r"):
	if os.path.splitext(filename)[1].lower() in (".svgz", ".gz"):
		return gzip.GzipFile(filename, mode)
	return file(filename, mode)

def parse_args(args=None):
	options, rargs = _options_parser.parse_args(args)

	if rargs:
		_options_parser.error("Additional arguments not handled: %r, see --help" % rargs)
	if options.digits < 0:
		_options_parser.error("Can't have negative significant digits, see --help")
	if not options.indent_type in ["tab", "space", "none"]:
		_options_parser.error("Invalid value for --indent, see --help")

	if options.infilename:
		infile = maybe_gziped_file(options.infilename)
		# GZ: could catch a raised IOError here and report
	else:
		# GZ: could sniff for gzip compression here
		infile = sys.stdin
	if options.outfilename:
		outfile = maybe_gziped_file(options.outfilename, "w")
	else:
		outfile = sys.stdout

	return options, [infile, outfile]

def getReport():
	return ' Number of elements removed: ' + str(numElemsRemoved) + \
		'\n Number of attributes removed: ' + str(numAttrsRemoved) + \
		'\n Number of unreferenced id attributes removed: ' + str(numIDsRemoved) + \
		'\n Number of style properties fixed: ' + str(numStylePropsFixed) + \
		'\n Number of raster images embedded inline: ' + str(numRastersEmbedded) + \
		'\n Number of path segments reduced/removed: ' + str(numPathSegmentsReduced) + \
		'\n Number of bytes saved in path data: ' + str(numBytesSavedInPathData) + \
		'\n Number of bytes saved in colors: ' + str(numBytesSavedInColors) + \
		'\n Number of points removed from polygons: ' + str(numPointsRemovedFromPolygon)

if __name__ == '__main__':
	if sys.platform == "win32":
		from time import clock as get_tick
	else:
		# GZ: is this different from time.time() in any way?
		def get_tick():
			return os.times()[0]

	start = get_tick()

	options, (input, output) = parse_args()

	print >>sys.stderr, "%s %s\n%s" % (APP, VER, COPYRIGHT)

	# do the work
	in_string = input.read()
	out_string = scourString(in_string, options).encode("UTF-8")
	output.write(out_string)

	# Close input and output files
	input.close()
	output.close()

	end = get_tick()

	# GZ: unless silenced by -q or something?
	# GZ: not using globals would be good too
	print >>sys.stderr, ' File:', input.name, \
		'\n Time taken:', str(end-start) + 's\n', \
		getReport()

	oldsize = len(in_string)
	newsize = len(out_string)
	sizediff = (newsize / oldsize) * 100
	print >>sys.stderr, ' Original file size:', oldsize, 'bytes;', \
		'new file size:', newsize, 'bytes (' + str(sizediff)[:5] + '%)'