# General XML utilities from xml.dom import minidom from xml import xpath import codecs def getFileWriter(fileHandle): """Gets a 'writer' for a file object that encodes as UTF-8""" return codecs.lookup("UTF-8").streamwriter(fileHandle) def writexml(doc, fileHandle): """Writes an XML document to a file handle""" doc.writexml(getFileWriter(fileHandle), encoding="UTF-8") def getNodesFromXPath(document, path): """Selects nodes specified by 'path' from 'document', where path is a string or a compiled xpath object""" if isinstance(path, basestring): path = xpath.Compile(path) return path.select(xpath.CreateContext(document)) _rootxpath = xpath.Compile('/') def getRoot(doc): """Returns the root node of a document""" return getNodesFromXPath(doc, _rootxpath)[0] # Classes to help us with modifications class RemoveNode: def act(self, node): if isinstance(node, minidom.Attr): node.ownerElement.removeAttribute(node.name) else: node.parentNode.removeChild(node) class GeneralReplaceContents: """Replace the contents of a node, with user providable function for calculating replacement text """ def __init__(self, replacefunc): self.replacefunc = replacefunc def act(self, node): origText = u''.join(c.toxml() for c in node.childNodes) # Usually replacefunc will just return text, # but we allow it to return xml as well newNodes = minidom.parseString(u'' + self.replacefunc(origText) + u'' ) # newNodes is a DOM instance, and it is has a dummy # element wrapping the nodes we actually want. node.childNodes = newNodes.childNodes[0].childNodes class ReplaceContents(GeneralReplaceContents): def __init__(self, replacementtext): assert isinstance(replacementtext, unicode) def _replacefunc(text): return replacementtext self.replacefunc = _replacefunc def do_replacements(doc, replacements): ctx = xpath.CreateContext(doc) for path, action in replacements.items(): xp = xpath.Compile(path) for n in xp.select(ctx): action.act(n)