Fix bug when DOCTYPE is present. Big performance improvement to makeWellFormed() function
This commit is contained in:
parent
c835423e8f
commit
c00bc8b70c
5 changed files with 37 additions and 14 deletions
23
scour.py
23
scour.py
|
|
@ -2021,15 +2021,17 @@ def remapNamespacePrefix(node, oldprefix, newprefix):
|
|||
remapNamespacePrefix(child, oldprefix, newprefix)
|
||||
|
||||
def makeWellFormed(str):
|
||||
newstr = ''
|
||||
xml_ents = { '<':'<', '>':'>', '&':'&', "'":''', '"':'"'}
|
||||
for c in str:
|
||||
if c in xml_ents:
|
||||
newstr += xml_ents[c]
|
||||
else:
|
||||
newstr += c
|
||||
|
||||
# starr = []
|
||||
# for c in str:
|
||||
# if c in xml_ents:
|
||||
# starr.append(xml_ents[c])
|
||||
# else:
|
||||
# starr.append(c)
|
||||
|
||||
return newstr
|
||||
# this list comprehension is short-form for the above for-loop:
|
||||
return ''.join([xml_ents[c] if c in xml_ents else c for c in str])
|
||||
|
||||
# hand-rolled serialization function that has the following benefits:
|
||||
# - pretty printing
|
||||
|
|
@ -2295,12 +2297,11 @@ def scourString(in_string, options=None):
|
|||
else:
|
||||
total_output = ""
|
||||
|
||||
# Find all comments before and after the root node and print them
|
||||
for child in doc.childNodes:
|
||||
if child.nodeType == 8:
|
||||
total_output += ('<!--' + child.nodeValue + '-->' + os.linesep)
|
||||
else:
|
||||
if child.nodeType == 1:
|
||||
total_output += "".join(lines)
|
||||
else: # doctypes, entities, comments
|
||||
total_output += child.toxml() + os.linesep
|
||||
|
||||
return total_output
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue