[sword-svn] r123 - in trunk/python/swordutils: . osis

lukeplant at www.crosswire.org lukeplant at www.crosswire.org
Thu Nov 29 16:29:44 MST 2007


Author: lukeplant
Date: 2007-11-29 16:29:43 -0700 (Thu, 29 Nov 2007)
New Revision: 123

Added:
   trunk/python/swordutils/osis/
   trunk/python/swordutils/osis/genbookOsis2Commentary.py
Log:
Added 'genbookOsis2Commentary.py' script for converting 
OSIS files to format currently expected by osis2mod


Added: trunk/python/swordutils/osis/genbookOsis2Commentary.py
===================================================================
--- trunk/python/swordutils/osis/genbookOsis2Commentary.py	                        (rev 0)
+++ trunk/python/swordutils/osis/genbookOsis2Commentary.py	2007-11-29 23:29:43 UTC (rev 123)
@@ -0,0 +1,149 @@
+#!/usr/bin/env python
+
+# Currently, the Sword importer, osis2mod, expects the OSIS file to be
+# structured like a Bible - i.e. <book>, <chapter>, <verse>
+# In general, a commentary might be structured using <div osisRef>
+# instead.  For importing, we convert to the format osis2mod expects
+
+import sys
+import codecs
+from xml.dom import minidom as dom
+from swordutils.xml.utils import getFileWriter
+
+def usage():
+    print "Usage: genbookOsis2Commentary.py <osisFile>"
+    print "Output is OSIS converted for use by osis2mod."
+
+def isRoot(n):
+    return (n.nodeType == dom.Document.ELEMENT_NODE
+            and n.nodeName == u'osis')
+
+def isVerseDiv(n):
+    return n.nodeName == u'div' and n.attributes.has_key(u'osisID')
+
+class VerseRef(object):
+    def __init__(self, ref):
+        parts = ref.split('.')
+        if len(parts) > 0:
+            self.book = parts[0]
+        else:
+            self.book = None
+
+        if len(parts) > 1:
+            self.chapter = parts[1]
+        else:
+            self.chapter = None
+
+        if len(parts) > 2:
+            self.verse = parts[2]
+        else:
+            self.verse = None
+
+def add_book_node(doc, node, verseRef):
+    # Insert <div type="book"> node around verse
+    div = doc.createElement(u'div')
+    div.attributes[u'type'] = u'book'
+    node.parentNode.insertBefore(div, node)
+    
+    #head = doc.createElement(u'head')
+    #head.appendChild(doc.createTextNode(verseRef.book.title()))
+    #div.appendChild(head)
+    div.appendChild(node)
+    return div
+
+def add_chapter_node(doc, node, verseRef):
+    # Insert <div type="chapter"> node around verse
+    div = doc.createElement(u'div')
+    div.attributes[u'type'] = u'chapter'
+    node.parentNode.insertBefore(div, node)
+    
+    #head = doc.createElement(u'head')
+    #head.appendChild(doc.createTextNode('Chapter %s' % verseRef.chapter))
+    #div.appendChild(head)
+    div.appendChild(node)
+    return div
+
+def versify_tree(doc, node, curRef=VerseRef(''), curBookNode=None, curChapterNode=None):
+    # Traverse tree, modifying divisions to include <div type="book">,
+    # <div type="chapter"> and <verse>
+
+    if isVerseDiv(node):        
+        node.tagName = u'verse'
+        node.attributes.removeNamedItem(u'type')
+        vr = VerseRef(node.attributes[u'osisID'].value)
+        assert vr.book is not None
+        if vr.book != curRef.book:
+            curBookNode = add_book_node(doc, node, vr)
+            curChapterNode = add_chapter_node(doc, node, vr)
+        else:
+            if vr.chapter != curRef.chapter:
+                curChapterNode = add_chapter_node(doc, node, vr)
+            else:
+                # move the verse into the current chapter
+                # We know that curChapterNode != None here,
+                # because VerseRef('') never matches VerseRef(anythingelse)
+                curChapterNode.appendChild(node)
+
+        curRef = vr                
+
+    elif node.childNodes.length > 0:
+        for n in list(node.childNodes):
+            curRef, curBookNode, curChapterNode = versify_tree(doc, n, curRef=curRef, curBookNode=curBookNode, curChapterNode=curChapterNode)
+
+    return (curRef, curBookNode, curChapterNode)
+
+
+def remove_non_commentary(node):
+    """Recursivley removes any body text which is not part of commentary on a verse,
+    returns True if the current node contains any commentary"""
+    # The preamble before actual commentary is currently included by
+    # osis2mod.  This is fairly annoying, and this function will
+    # remove such text so that it is not included in the end product
+
+    assert node is not None
+
+    inCommentary = \
+        (node.nodeName == u'div' and node.attributes.get('type') is not None and node.attributes['type'].value in ['book','chapter']) or \
+        (node.nodeName == u'verse')
+
+    inHeader = \
+        (node.nodeName in [u'osis', u'osisText']) or\
+        isRoot(node)
+    
+    if not inHeader and not inCommentary:
+        # remove text contents of this item, or entire node
+        # if it has no child elements
+        for n in list(node.childNodes):
+            if n.nodeType == n.TEXT_NODE:
+                node.removeChild(n)
+        if node.childNodes.length == 0:
+            # Nothing more to do now, there is definitely
+            # no commentary here.
+            return False
+
+    if inCommentary:
+        # keep node and everything that is below node
+        return True
+
+    childrenHaveCommentary = False
+    for n in list(node.childNodes):
+        hadCommentary = remove_non_commentary(n)
+        if hadCommentary:
+            childrenHaveCommentary = True
+        else:
+            node.removeChild(n)
+    return childrenHaveCommentary
+            
+
+def main(filename):
+    d = dom.parse(filename)
+    rootNode = filter(isRoot, d.childNodes)[0]
+    versify_tree(d, rootNode)
+    remove_non_commentary(rootNode)
+    d.writexml(getFileWriter(sys.stdout), encoding="UTF-8")
+
+if __name__ == "__main__":
+    if len(sys.argv) != 2:
+        usage()
+        sys.exit(1)
+    main(sys.argv[1])


Property changes on: trunk/python/swordutils/osis/genbookOsis2Commentary.py
___________________________________________________________________
Name: svn:executable
   + *
Name: svn:eol-style
   + native




More information about the sword-cvs mailing list