Allow elements to be found via Document.getElementById() in the minidom document returned by scourXmlFile() (#68)

2016-08-25 21:13:09 +02:00 · 2016-08-25 21:13:09 +02:00 · 386d5d8656
commit 386d5d8656
parent 8d6301950b
3 changed files with 35 additions and 3 deletions
--- a/scour/scour.py
+++ b/scour/scour.py
@ -3126,7 +3126,20 @@ def scourXmlFile(filename, options=None):
   with open(filename, "rb") as f:
      in_string = f.read()
   out_string = scourString(in_string, options)
-   return xml.dom.minidom.parseString(out_string.encode('utf-8'))
+
+   doc = xml.dom.minidom.parseString(out_string.encode('utf-8'))
+
+   # since minidom does not seem to parse DTDs properly
+   # manually declare all attributes with name "id" to be of type ID
+   # (otherwise things like doc.getElementById() won't work)
+   all_nodes = doc.getElementsByTagName("*")
+   for node in all_nodes:
+      try:
+         node.setIdAttribute('id')
+      except:
+         pass
+
+   return doc