[sword-svn] r123 - in trunk/python/swordutils: . osis
lukeplant at www.crosswire.org
lukeplant at www.crosswire.org
Thu Nov 29 16:29:44 MST 2007
Author: lukeplant
Date: 2007-11-29 16:29:43 -0700 (Thu, 29 Nov 2007)
New Revision: 123
Added:
trunk/python/swordutils/osis/
trunk/python/swordutils/osis/genbookOsis2Commentary.py
Log:
Added 'genbookOsis2Commentary.py' script for converting
OSIS files to format currently expected by osis2mod
Added: trunk/python/swordutils/osis/genbookOsis2Commentary.py
===================================================================
--- trunk/python/swordutils/osis/genbookOsis2Commentary.py (rev 0)
+++ trunk/python/swordutils/osis/genbookOsis2Commentary.py 2007-11-29 23:29:43 UTC (rev 123)
@@ -0,0 +1,149 @@
+#!/usr/bin/env python
+
+# Currently, the Sword importer, osis2mod, expects the OSIS file to be
+# structured like a Bible - i.e. <book>, <chapter>, <verse>
+# In general, a commentary might be structured using <div osisRef>
+# instead. For importing, we convert to the format osis2mod expects
+
+import sys
+import codecs
+from xml.dom import minidom as dom
+from swordutils.xml.utils import getFileWriter
+
+def usage():
+ print "Usage: genbookOsis2Commentary.py <osisFile>"
+ print "Output is OSIS converted for use by osis2mod."
+
+def isRoot(n):
+ return (n.nodeType == dom.Document.ELEMENT_NODE
+ and n.nodeName == u'osis')
+
+def isVerseDiv(n):
+ return n.nodeName == u'div' and n.attributes.has_key(u'osisID')
+
+class VerseRef(object):
+ def __init__(self, ref):
+ parts = ref.split('.')
+ if len(parts) > 0:
+ self.book = parts[0]
+ else:
+ self.book = None
+
+ if len(parts) > 1:
+ self.chapter = parts[1]
+ else:
+ self.chapter = None
+
+ if len(parts) > 2:
+ self.verse = parts[2]
+ else:
+ self.verse = None
+
+def add_book_node(doc, node, verseRef):
+ # Insert <div type="book"> node around verse
+ div = doc.createElement(u'div')
+ div.attributes[u'type'] = u'book'
+ node.parentNode.insertBefore(div, node)
+
+ #head = doc.createElement(u'head')
+ #head.appendChild(doc.createTextNode(verseRef.book.title()))
+ #div.appendChild(head)
+ div.appendChild(node)
+ return div
+
+def add_chapter_node(doc, node, verseRef):
+ # Insert <div type="chapter"> node around verse
+ div = doc.createElement(u'div')
+ div.attributes[u'type'] = u'chapter'
+ node.parentNode.insertBefore(div, node)
+
+ #head = doc.createElement(u'head')
+ #head.appendChild(doc.createTextNode('Chapter %s' % verseRef.chapter))
+ #div.appendChild(head)
+ div.appendChild(node)
+ return div
+
+def versify_tree(doc, node, curRef=VerseRef(''), curBookNode=None, curChapterNode=None):
+ # Traverse tree, modifying divisions to include <div type="book">,
+ # <div type="chapter"> and <verse>
+
+ if isVerseDiv(node):
+ node.tagName = u'verse'
+ node.attributes.removeNamedItem(u'type')
+ vr = VerseRef(node.attributes[u'osisID'].value)
+ assert vr.book is not None
+ if vr.book != curRef.book:
+ curBookNode = add_book_node(doc, node, vr)
+ curChapterNode = add_chapter_node(doc, node, vr)
+ else:
+ if vr.chapter != curRef.chapter:
+ curChapterNode = add_chapter_node(doc, node, vr)
+ else:
+ # move the verse into the current chapter
+ # We know that curChapterNode != None here,
+ # because VerseRef('') never matches VerseRef(anythingelse)
+ curChapterNode.appendChild(node)
+
+ curRef = vr
+
+ elif node.childNodes.length > 0:
+ for n in list(node.childNodes):
+ curRef, curBookNode, curChapterNode = versify_tree(doc, n, curRef=curRef, curBookNode=curBookNode, curChapterNode=curChapterNode)
+
+ return (curRef, curBookNode, curChapterNode)
+
+
+def remove_non_commentary(node):
+ """Recursivley removes any body text which is not part of commentary on a verse,
+ returns True if the current node contains any commentary"""
+ # The preamble before actual commentary is currently included by
+ # osis2mod. This is fairly annoying, and this function will
+ # remove such text so that it is not included in the end product
+
+ assert node is not None
+
+ inCommentary = \
+ (node.nodeName == u'div' and node.attributes.get('type') is not None and node.attributes['type'].value in ['book','chapter']) or \
+ (node.nodeName == u'verse')
+
+ inHeader = \
+ (node.nodeName in [u'osis', u'osisText']) or\
+ isRoot(node)
+
+ if not inHeader and not inCommentary:
+ # remove text contents of this item, or entire node
+ # if it has no child elements
+ for n in list(node.childNodes):
+ if n.nodeType == n.TEXT_NODE:
+ node.removeChild(n)
+ if node.childNodes.length == 0:
+ # Nothing more to do now, there is definitely
+ # no commentary here.
+ return False
+
+ if inCommentary:
+ # keep node and everything that is below node
+ return True
+
+ childrenHaveCommentary = False
+ for n in list(node.childNodes):
+ hadCommentary = remove_non_commentary(n)
+ if hadCommentary:
+ childrenHaveCommentary = True
+ else:
+ node.removeChild(n)
+ return childrenHaveCommentary
+
+
+def main(filename):
+ d = dom.parse(filename)
+ rootNode = filter(isRoot, d.childNodes)[0]
+ versify_tree(d, rootNode)
+ remove_non_commentary(rootNode)
+ d.writexml(getFileWriter(sys.stdout), encoding="UTF-8")
+
+if __name__ == "__main__":
+ if len(sys.argv) != 2:
+ usage()
+ sys.exit(1)
+ main(sys.argv[1])
Property changes on: trunk/python/swordutils/osis/genbookOsis2Commentary.py
___________________________________________________________________
Name: svn:executable
+ *
Name: svn:eol-style
+ native
More information about the sword-cvs
mailing list