Remove all inkscape and sodipodi attributes and elements
This commit is contained in:
parent
aec2019e07
commit
879300373f
1 changed files with 75 additions and 25 deletions
100
scour.py
100
scour.py
|
|
@ -35,8 +35,11 @@ APP = 'scour'
|
||||||
VER = '0.02'
|
VER = '0.02'
|
||||||
COPYRIGHT = 'Copyright Jeff Schiller, 2009'
|
COPYRIGHT = 'Copyright Jeff Schiller, 2009'
|
||||||
|
|
||||||
SVGNS = 'http://www.w3.org/2000/svg'
|
NS = { 'SVG': 'http://www.w3.org/2000/svg',
|
||||||
XLINKNS = 'http://www.w3.org/1999/xlink'
|
'XLINK': 'http://www.w3.org/1999/xlink',
|
||||||
|
'SODIPODI': 'http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd',
|
||||||
|
'INKSCAPE': 'http://www.inkscape.org/namespaces/inkscape'
|
||||||
|
}
|
||||||
|
|
||||||
def printHeader():
|
def printHeader():
|
||||||
print APP , VER
|
print APP , VER
|
||||||
|
|
@ -57,32 +60,32 @@ input = sys.stdin
|
||||||
output = sys.stdout
|
output = sys.stdout
|
||||||
|
|
||||||
# if -i or -o is supplied, switch the stream to the file
|
# if -i or -o is supplied, switch the stream to the file
|
||||||
if( len(args) == 2):
|
if len(args) == 2:
|
||||||
if( args[0] == '-i' ):
|
if args[0] == '-i' :
|
||||||
input = open(args[1], 'r')
|
input = open(args[1], 'r')
|
||||||
elif( args[0] == '-o' ):
|
elif args[0] == '-o' :
|
||||||
output = open(args[1], 'w')
|
output = open(args[1], 'w')
|
||||||
else:
|
else:
|
||||||
printSyntaxAndQuit()
|
printSyntaxAndQuit()
|
||||||
|
|
||||||
# if both -o and -o are supplied, switch streams to the files
|
# if both -o and -o are supplied, switch streams to the files
|
||||||
elif( len(args) == 4 ):
|
elif len(args) == 4 :
|
||||||
if( args[0] == '-i' and args[2] == '-o' ):
|
if args[0] == '-i' and args[2] == '-o' :
|
||||||
input = open(args[1], 'r')
|
input = open(args[1], 'r')
|
||||||
output = open(args[3], 'w')
|
output = open(args[3], 'w')
|
||||||
elif( args[0] == '-o' and args[2] == 'i' ):
|
elif args[0] == '-o' and args[2] == 'i' :
|
||||||
output = open(args[1], 'w')
|
output = open(args[1], 'w')
|
||||||
input = open(args[3], 'r')
|
input = open(args[3], 'r')
|
||||||
else:
|
else:
|
||||||
printSyntaxAndQuit()
|
printSyntaxAndQuit()
|
||||||
|
|
||||||
# else invalid syntax
|
# else invalid syntax
|
||||||
elif( len(args) != 0 ):
|
elif len(args) != 0 :
|
||||||
printSyntaxAndQuit()
|
printSyntaxAndQuit()
|
||||||
|
|
||||||
# if we are not sending to stdout, then print out app information
|
# if we are not sending to stdout, then print out app information
|
||||||
bOutputReport = False
|
bOutputReport = False
|
||||||
if( output != sys.stdout ):
|
if output != sys.stdout :
|
||||||
bOutputReport = True
|
bOutputReport = True
|
||||||
printHeader()
|
printHeader()
|
||||||
|
|
||||||
|
|
@ -92,13 +95,13 @@ doc = xml.dom.minidom.parse(input)
|
||||||
# returns all elements with id attributes
|
# returns all elements with id attributes
|
||||||
def findElementsWithId(node,elems={}):
|
def findElementsWithId(node,elems={}):
|
||||||
id = node.getAttribute('id')
|
id = node.getAttribute('id')
|
||||||
if( id != '' ):
|
if id != '' :
|
||||||
elems[id] = node
|
elems[id] = node
|
||||||
if( node.hasChildNodes() ):
|
if node.hasChildNodes() :
|
||||||
for child in node.childNodes:
|
for child in node.childNodes:
|
||||||
# from http://www.w3.org/TR/DOM-Level-2-Core/idl-definitions.html
|
# from http://www.w3.org/TR/DOM-Level-2-Core/idl-definitions.html
|
||||||
# we are only really interested in nodes of type Element (1)
|
# we are only really interested in nodes of type Element (1)
|
||||||
if( child.nodeType == 1 ):
|
if child.nodeType == 1 :
|
||||||
findElementsWithId(child, elems)
|
findElementsWithId(child, elems)
|
||||||
return elems
|
return elems
|
||||||
|
|
||||||
|
|
@ -107,13 +110,13 @@ def findElementsWithId(node,elems={}):
|
||||||
def findReferencedElements(node,ids={}):
|
def findReferencedElements(node,ids={}):
|
||||||
# TODO: error here (ids is not cleared upon next invocation), the
|
# TODO: error here (ids is not cleared upon next invocation), the
|
||||||
# input argument ids is clunky here (see below how it is called)
|
# input argument ids is clunky here (see below how it is called)
|
||||||
href = node.getAttributeNS(XLINKNS,'href')
|
href = node.getAttributeNS(NS['XLINK'],'href')
|
||||||
|
|
||||||
# if xlink:href is set, then grab the id
|
# if xlink:href is set, then grab the id
|
||||||
if( href != '' and len(href) > 1 and href[0] == '#'):
|
if href != '' and len(href) > 1 and href[0] == '#':
|
||||||
# we remove the hash mark from the beginning of the id
|
# we remove the hash mark from the beginning of the id
|
||||||
id = href[1:]
|
id = href[1:]
|
||||||
if( ids.has_key(id) ):
|
if ids.has_key(id) :
|
||||||
ids[id] += 1
|
ids[id] += 1
|
||||||
else:
|
else:
|
||||||
ids[id] = 1
|
ids[id] = 1
|
||||||
|
|
@ -128,24 +131,25 @@ def findReferencedElements(node,ids={}):
|
||||||
|
|
||||||
for style in styles:
|
for style in styles:
|
||||||
propval = string.split(style,':')
|
propval = string.split(style,':')
|
||||||
if(len(propval) == 2):
|
if len(propval) == 2 :
|
||||||
prop = propval[0].strip()
|
prop = propval[0].strip()
|
||||||
val = propval[1].strip()
|
val = propval[1].strip()
|
||||||
if( prop in referencingProps and val != '' and val[0:5] == 'url(#' ):
|
if prop in referencingProps and val != '' and val[0:5] == 'url(#' :
|
||||||
id = val[5:val.find(')')]
|
id = val[5:val.find(')')]
|
||||||
if( ids.has_key(id) ):
|
if ids.has_key(id) :
|
||||||
ids[id] += 1
|
ids[id] += 1
|
||||||
else:
|
else:
|
||||||
ids[id] = 1
|
ids[id] = 1
|
||||||
|
|
||||||
if( node.hasChildNodes() ):
|
if node.hasChildNodes() :
|
||||||
for child in node.childNodes:
|
for child in node.childNodes:
|
||||||
if( child.nodeType == 1 ):
|
if child.nodeType == 1 :
|
||||||
findReferencedElements(child, ids)
|
findReferencedElements(child, ids)
|
||||||
return ids
|
return ids
|
||||||
|
|
||||||
numIDsRemoved = 0
|
numIDsRemoved = 0
|
||||||
numElemsRemoved = 0
|
numElemsRemoved = 0
|
||||||
|
numAttrsRemoved = 0
|
||||||
|
|
||||||
# removes the unreferenced ID attributes
|
# removes the unreferenced ID attributes
|
||||||
# returns the number of ID attributes removed
|
# returns the number of ID attributes removed
|
||||||
|
|
@ -154,7 +158,7 @@ def removeUnreferencedIDs(referencedIDs, identifiedElements):
|
||||||
num = 0;
|
num = 0;
|
||||||
for id in identifiedElements.keys():
|
for id in identifiedElements.keys():
|
||||||
node = identifiedElements[id]
|
node = identifiedElements[id]
|
||||||
if( referencedIDs.has_key(id) == False ):
|
if referencedIDs.has_key(id) == False :
|
||||||
node.removeAttribute('id')
|
node.removeAttribute('id')
|
||||||
# now remove the element from our list of elements with ids
|
# now remove the element from our list of elements with ids
|
||||||
# not necessary if we're calculating the array again every time
|
# not necessary if we're calculating the array again every time
|
||||||
|
|
@ -166,15 +170,60 @@ def removeUnreferencedIDs(referencedIDs, identifiedElements):
|
||||||
def vacuumDefs(doc):
|
def vacuumDefs(doc):
|
||||||
global numElemsRemoved
|
global numElemsRemoved
|
||||||
num = 0
|
num = 0
|
||||||
defs = doc.documentElement.getElementsByTagNameNS(SVGNS, 'defs')
|
defs = doc.documentElement.getElementsByTagNameNS(NS['SVG'], 'defs')
|
||||||
for aDef in defs:
|
for aDef in defs:
|
||||||
for elem in aDef.childNodes:
|
for elem in aDef.childNodes:
|
||||||
if( elem.nodeType == 1 and elem.getAttribute('id') == '' ):
|
if elem.nodeType == 1 and elem.getAttribute('id') == '' :
|
||||||
aDef.removeChild(elem)
|
aDef.removeChild(elem)
|
||||||
numElemsRemoved += 1
|
numElemsRemoved += 1
|
||||||
num += 1
|
num += 1
|
||||||
return num
|
return num
|
||||||
|
|
||||||
|
# TODO: check namespaceURI and remove
|
||||||
|
# TODO: iterate through children
|
||||||
|
def removeNamespacedAttributes(node, namespaces):
|
||||||
|
global numAttrsRemoved
|
||||||
|
num = 0
|
||||||
|
if node.nodeType == 1 :
|
||||||
|
# remove all namespace'd attributes from this element
|
||||||
|
attrList = node.attributes
|
||||||
|
for attrNum in range(attrList.length):
|
||||||
|
attr = attrList.item(attrNum)
|
||||||
|
if attr != None and attr.namespaceURI in namespaces:
|
||||||
|
num += 1
|
||||||
|
numAttrsRemoved += 1
|
||||||
|
node.removeAttribute(attr.nodeName)
|
||||||
|
|
||||||
|
# now recurse for children
|
||||||
|
for child in node.childNodes:
|
||||||
|
removeNamespacedAttributes(child, namespaces)
|
||||||
|
return num
|
||||||
|
|
||||||
|
def removeNamespacedElements(node, namespaces):
|
||||||
|
global numElemsRemoved
|
||||||
|
num = 0
|
||||||
|
if node.nodeType == 1 :
|
||||||
|
# remove all namespace'd child nodes from this element
|
||||||
|
childList = node.childNodes
|
||||||
|
for child in childList:
|
||||||
|
if child != None and child.namespaceURI in namespaces:
|
||||||
|
num += 1
|
||||||
|
numElemsRemoved += 1
|
||||||
|
node.removeChild(child)
|
||||||
|
|
||||||
|
# now recurse for children
|
||||||
|
for child in node.childNodes:
|
||||||
|
removeNamespacedElements(child, namespaces)
|
||||||
|
return num
|
||||||
|
|
||||||
|
# for whatever reason this does not always remove all inkscape/sodipodi attributes/elements
|
||||||
|
# on the first pass, so we do it multiple times
|
||||||
|
while removeNamespacedElements( doc.documentElement, [ NS['SODIPODI'], NS['INKSCAPE'] ] ) > 0 :
|
||||||
|
pass
|
||||||
|
|
||||||
|
while removeNamespacedAttributes( doc.documentElement, [ NS['SODIPODI'], NS['INKSCAPE'] ] ) > 0 :
|
||||||
|
pass
|
||||||
|
|
||||||
bContinueLooping = True
|
bContinueLooping = True
|
||||||
while bContinueLooping:
|
while bContinueLooping:
|
||||||
identifiedElements = findElementsWithId(doc.documentElement, {})
|
identifiedElements = findElementsWithId(doc.documentElement, {})
|
||||||
|
|
@ -191,4 +240,5 @@ output.close()
|
||||||
# output some statistics if we are not using stdout
|
# output some statistics if we are not using stdout
|
||||||
if( bOutputReport):
|
if( bOutputReport):
|
||||||
print "Number of unreferenced id attributes removed:", numIDsRemoved
|
print "Number of unreferenced id attributes removed:", numIDsRemoved
|
||||||
print "Number of unreferenced elements removed:", numElemsRemoved
|
print "Number of elements removed:", numElemsRemoved
|
||||||
|
print "Number of attributes removed:", numAttrsRemoved
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue