# Utility functions for manipulating ThML from xml.dom import minidom from swordutils.xml import utils def isScripCom(node): return node.nodeName == u'scripCom' def findParentDiv(node): pnode = node.parentNode if pnode is None: raise Exception("Cannot find parent div for node %r" % node) if pnode.nodeType == minidom.Document.ELEMENT_NODE \ and pnode.nodeName.startswith(u'div'): return pnode else: return findParentDiv(pnode) def moveToParent(node, destParent): if node.parentNode is destParent: return else: pnode = node.parentNode pnode.removeChild(node) pnode.parentNode.insertBefore(node, pnode) return moveToParent(node, destParent) def _findNextScripComNode(node, return_parent): if node is None: return None if isScripCom(node): if return_parent: return node.parentNode else: return node else: # Search deeper, but return node that is on the # same level as our original node descendent = _findNextScripComNode(node.firstChild, True) if descendent is not None: if return_parent: return descendent.parentNode else: return descendent else: return _findNextScripComNode(node.nextSibling, False) def _expandScripComNode(scNode): nextSCN = _findNextScripComNode(scNode.nextSibling, False) collection = [] n = scNode.nextSibling while (n is not None and n is not nextSCN): collection.append(n) n = n.nextSibling for n in collection: n.parentNode.removeChild(n) scNode.appendChild(n) def expandScripComNodes(node): """Expands all empty nodes so that they contain the nodes that they refer to, using neighboring nodes and the structure of the XML as a guide, starting at the supplied node""" if isScripCom(node): # Often placed as markers instead of enclosing # the nodes to which they apply. if node.nodeValue is None or node.nodeValue == "": # Try to find scope over which the element # should actually be placed. # Rules: # - move the scripCom element 'up' the tree until is # a descendent of a `divX' node, placing it before # any of its parent nodes along the way # - make all its sibling nodes that are below it # into child nodes, up to the point where there # is another element div = findParentDiv(node) moveToParent(node, div) _expandScripComNode(node) if node.childNodes.length > 0: for n in node.childNodes: expandScripComNodes(n)