[sword-svn] r402 - trunk/modules/python

Mon Aug 27 02:56:12 MST 2012

Author: chrislit
Date: 2012-08-27 02:56:12 -0700 (Mon, 27 Aug 2012)
New Revision: 402

Modified:
   trunk/modules/python/usfm2osis.py
Log:
added docstrings


Modified: trunk/modules/python/usfm2osis.py
===================================================================

--- trunk/modules/python/usfm2osis.py	2012-08-27 08:20:20 UTC (rev 401)
+++ trunk/modules/python/usfm2osis.py	2012-08-27 09:56:12 UTC (rev 402)
@@ -28,10 +28,10 @@
 
 
 ### Guidelines & objectives:
-# Target Python 2.7+ (but support 3.2 if possible)
-# Use no non-default libraries (this may change in the future)
-# Don't use SWORD bindings (this will probably change to allow *optional* use of bindings, if installed)
-# Achieve full coverage of USFM according to UBS spec:
+# Target CPython 2.7+ (but support CPython 3 and other interpreters if possible)
+# Require no non-default libraries
+# Don't require SWORD bindings
+# Handle all USFM characters from the USFM reference:
 #      <http://paratext.ubs-translations.org/about/usfm>
 # Employ best-practice conformant OSIS
 # Employ modularity (functions rather than a big long script)
@@ -44,14 +44,13 @@
 # 0.7 test suite incorporating all USFM examples from UBS ICAP and other complex cases
 # 0.8 more clean-up & re-ordering to correctly encapsulate milestones within appropriate containers; clear remaining TODO items, to the extent possible
 # 1.0 feature complete for release & production use
-# 1.x xreffix.pl-functionality (osisParse(ref)), requiring SWORD bindings
+# 1.x xreffix.pl-functionality (osisParse(ref)), requiring SWORD bindings; use toc3 for localization
 # 1.x SWORD-mode output?
 # 1.x IMP output?
 # 1.x SWORD module output?, requiring SWORD bindings
 
 ### TODO for 0.6:
 # expand sub-verses with ! in osisIDs
-# document functions (docstrings)
 # unittest
 # make fully OO
 # PyDev project?
@@ -348,13 +347,22 @@
     verbosePrint(('Processing: ' + sFile))
 
     def cvtPreprocess(osis, relaxedConformance):
+        """Perform preprocessing on a USFM document, returning the processed text as a string.
+        Removes excess spaces & CRs and escapes XML entities.
+
+        Keyword arguments:
+        osis -- The document as a string.
+        relaxedConformance -- Boolean value indicating whether to process non-standard & deprecated USFM tags.
+
+        """
+
         # lines should never start with non-tags
         osis = re.sub('\n\s*([^\\\s])', r' \1', osis)  # TODO: test this
         # convert CR to LF
         osis = osis.replace('\r', '\n')
         # lines should never end with whitespace (other than \n)
         osis = re.sub('\s+\n', '\n', osis)
-        # XML-encode as necessary
+        # replace with XML entities, as necessary
         osis = osis.replace('&', '&amp;')
         osis = osis.replace('<', '&lt;')
         osis = osis.replace('>', '&gt;')
@@ -365,6 +373,15 @@
 
 
     def cvtRelaxedConformanceRemaps(osis, relaxedConformance):
+        """Perform preprocessing on a USFM document, returning the processed text as a string.
+        Remaps certain deprecated USFM tags to recommended alternatives.
+
+        Keyword arguments:
+        osis -- The document as a string.
+        relaxedConformance -- Boolean value indicating whether to process non-standard & deprecated USFM tags.
+
+        """
+
         if not relaxedConformance:
             return osis
 
@@ -399,10 +416,17 @@
         return osis
 
 
-    def cvtIdentification(osis, relaxedConformance, filename):
-        ### Identification
-        ### supported: \id, \ide, \sts, \rem, \h, \toc1, \toc2, \toc3
+    def cvtIdentification(osis, relaxedConformance):
+        """Converts USFM **Identification** tags to OSIS, returning the processed text as a string.
 
+        Supported tags: \id, \ide, \sts, \rem, \h, \toc1, \toc2, \toc3
+
+        Keyword arguments:
+        osis -- The document as a string.
+        relaxedConformance -- Boolean value indicating whether to process non-standard & deprecated USFM tags.
+
+        """
+
         # \id_<CODE>_(Name of file, Book name, Language, Last edited, Date etc.)
         osis = re.sub(r'\\id\s+([A-Z0-9]{3})\b\s*([^\\'+'\n]*?)\n'+r'(.*)(?=\\id|$)', lambda m: '\uFDD0<div type="book" osisID="' + bookDict[m.group(1)] + '">\n' + (('<!-- id comment - ' + m.group(2) + ' -->\n') if m.group(2) else '') + m.group(3) + '</div type="book">\uFDD0\n' , osis, flags=re.DOTALL)
 
@@ -436,9 +460,16 @@
 
 
     def cvtIntroductions(osis, relaxedConformance):
-        ### Introductions
-        ### supported: \imt#, \is#, \ip, \ipi, \im, \imi, \ipq, \imq, \ipr, \iq#, \ib, \ili#, \iot, \io#, \ior...\ior*, \iex, \iqt...\iqt*, \imte, \ie
+        """Converts USFM **Introduction** tags to OSIS, returning the processed text as a string.
 
+        Supported tags: \imt#, \is#, \ip, \ipi, \im, \imi, \ipq, \imq, \ipr, \iq#, \ib, \ili#, \iot, \io#, \ior...\ior*, \iex, \iqt...\iqt*, \imte, \ie
+
+        Keyword arguments:
+        osis -- The document as a string.
+        relaxedConformance -- Boolean value indicating whether to process non-standard & deprecated USFM tags.
+
+        """
+
         # \imt#_text...
         osis = re.sub(r'\\imt(\d?)\s+(.+)', lambda m: '<title ' + ('level="'+m.group(1)+'" ' if m.group(1) else '') + 'type="main" subType="x-introduction">' + m.group(2) + '</title>', osis)
 
@@ -510,9 +541,16 @@
 
 
     def cvtTitles(osis, relaxedConformance):
-        ### Titles, Headings, and Labels
-        ### supported: \mt#, \mte#, \ms#, \mr, \s#, \sr, \r, \rq...\rq*, \d, \sp
+        """Converts USFM **Title, Heading, and Label** tags to OSIS, returning the processed text as a string.
 
+        Supported tags: \mt#, \mte#, \ms#, \mr, \s#, \sr, \r, \rq...\rq*, \d, \sp 
+
+        Keyword arguments:
+        osis -- The document as a string.
+        relaxedConformance -- Boolean value indicating whether to process non-standard & deprecated USFM tags.
+
+        """
+
         # \ms#_text...
         osis = re.sub(r'\\ms1?\s+(.+)', lambda m: '\uFDD5<div type="majorSection"><title>' + m.group(1) + '</title>', osis)
         osis = re.sub('(\uFDD5[^\uFDD5\uFDD0]+)', r'\1'+'</div>\uFDD5\n', osis, flags=re.DOTALL)
@@ -565,16 +603,29 @@
 
 
     def cvtChaptersAndVerses(osis, relaxedConformance):
-        ### Chapters and Verses
-        ### supported: \c, \ca...\ca*, \cl, \cp, \cd, \v, \va...\va*, \vp...\vp*
+        """Converts USFM **Chapter and Verse** tags to OSIS, returning the processed text as a string.
 
+        Supported tags: \c, \ca...\ca*, \cl, \cp, \cd, \v, \va...\va*, \vp...\vp*
+
+        Keyword arguments:
+        osis -- The document as a string.
+        relaxedConformance -- Boolean value indicating whether to process non-standard & deprecated USFM tags.
+
+        """
+
         # \c_#
         osis = re.sub(r'\\c\s+([^\s]+)\b(.+?)(?=(\\c\s+|</div type="book"))', lambda m: '\uFDD1<chapter osisID="$BOOK$.' + m.group(1) + r'" sID="$BOOK$.' + m.group(1) + '"/>' + m.group(2) + '<chapter eID="$BOOK$.' + m.group(1) + '"/>\uFDD3\n', osis, flags=re.DOTALL)
 
         # \cp_#
         # \ca_#\ca*
-        def replaceChapterNumber(matchObj):
-            ctext = matchObj.group(1)
+        def replaceChapterNumber(matchObject):
+            """Regex helper function to replace chapter numbers from \c_# with values that appeared in \cp_# and \ca_#\ca*, returing the chapter text as a string.
+
+            Keyword arguments:
+            matchObject -- a regex match object in which the first element is the chapter text
+
+            """
+            ctext = matchObject.group(1)
             cp = re.search(r'\\cp\s+(.+?)(?=(\\|\s))', ctext)
             if cp:
                 ctext = re.sub(r'\\cp\s+(.+?)\\cp*', '', ctext, flags=re.DOTALL)
@@ -599,8 +650,14 @@
 
         # \vp_#\vp*
         # \va_#\va*
-        def replaceVerseNumber(matchObj):
-            vtext = matchObj.group(1)
+        def replaceVerseNumber(matchObject):
+            """Regex helper function to replace verse numbers from \v_# with values that appeared in \vp_#\vp* and \va_#\va*, returing the verse text as a string.
+
+            Keyword arguments:
+            matchObject -- a regex match object in which the first element is the verse text
+
+            """
+            vtext = matchObject.group(1)
             vp = re.search(r'\\vp\s+(.+?)\\vp*', vtext)
             if vp:
                 vtext = re.sub(r'\\vp\s+(.+?)\\vp*', '', vtext, flags=re.DOTALL)
@@ -618,9 +675,16 @@
 
 
     def cvtParagraphs(osis, relaxedConformance):
-        ### Paragraphs
-        ### supported: \p, \m, \pmo, \pm, \pmc, \pmr, \pi#, \mi, \nb, \cls, \li#, \pc, \pr, \ph#, \b
+        """Converts USFM **Paragraph** tags to OSIS, returning the processed text as a string.
 
+        Supported tags: \p, \m, \pmo, \pm, \pmc, \pmr, \pi#, \mi, \nb, \cls, \li#, \pc, \pr, \ph#, \b
+
+        Keyword arguments:
+        osis -- The document as a string.
+        relaxedConformance -- Boolean value indicating whether to process non-standard & deprecated USFM tags.
+
+        """
+
         # \p(_text...)
         osis = re.sub(r'\\p\s+(.*?)(?=(\\(i?m|i?p|nb|lit|cls|tr)|<chapter eID|</?div\b|<(p|closer)\b))', lambda m: '\uFDD3<p>\n' + m.group(1) + '\uFDD3</p>\n', osis, flags=re.DOTALL)
 
@@ -663,9 +727,16 @@
 
 
     def cvtPoetry(osis, relaxedConformance):
-        ### Poetry
-        ### supported: \q#, \qr, \qc, \qs...\qs*, \qa, \qac...\qac*, \qm#, \b
+        """Converts USFM **Poetry** tags to OSIS, returning the processed text as a string.
 
+        Supported tags: \q#, \qr, \qc, \qs...\qs*, \qa, \qac...\qac*, \qm#, \b
+
+        Keyword arguments:
+        osis -- The document as a string.
+        relaxedConformance -- Boolean value indicating whether to process non-standard & deprecated USFM tags.
+
+        """
+
         # \qs_(Selah)\qs*
         osis = re.sub(r'\\qs\b\s(.+?)\\qs\*', r'<l type="selah">\1</l>', osis, flags=re.DOTALL)
 
@@ -695,9 +766,16 @@
 
 
     def cvtTables(osis, relaxedConformance):
-        ### Tables
-        ### supported: \tr, \th#, \thr#, \tc#, \tcr#
+        """Converts USFM **Table** tags to OSIS, returning the processed text as a string.
 
+        Supported tags: \tr, \th#, \thr#, \tc#, \tcr#
+
+        Keyword arguments:
+        osis -- The document as a string.
+        relaxedConformance -- Boolean value indicating whether to process non-standard & deprecated USFM tags.
+
+        """
+
         # \tr_
         osis = re.sub(r'\\tr\b\s*(.*?)(?=(['+'\uFDD0\uFDD1\uFDD3\uFDD4'+r']|\\tr\s|<lb\b|<title\b))', r'<row>\1</row>', osis, flags=re.DOTALL)
 
@@ -712,6 +790,13 @@
 
 
     def processNote(note):
+        """Convert note-internal USFM tags to OSIS, returning the note as a string.
+
+        Keyword arguments:
+        note -- The note as a string.
+
+        """
+
         note = note.replace('\n', ' ')
 
         # \fdc_refs...\fdc*
@@ -750,9 +835,16 @@
 
 
     def cvtFootnotes(osis, relaxedConformance):
-        ### Footnotes
-        ### supported:\f...\f*, \fe...\fe*, \fr, \fk, \fq, \fqa, \fl, \fp, \fv, \ft, \fdc...\fdc*, \fm...\fm*
+        """Converts USFM **Footnote** tags to OSIS, returning the processed text as a string.
 
+        Supported tags: \f...\f*, \fe...\fe*, \fr, \fk, \fq, \fqa, \fl, \fp, \fv, \ft, \fdc...\fdc*, \fm...\fm*
+
+        Keyword arguments:
+        osis -- The document as a string.
+        relaxedConformance -- Boolean value indicating whether to process non-standard & deprecated USFM tags.
+
+        """
+
         # \f_+_...\f*
         osis = re.sub(r'\\f\s+([^\s\\]+)?\s*(.+?)\s*\\f\*', lambda m: '<note' + ((' n=""') if (m.group(1) == '-') else ('' if (m.group(1) == '+') else (' n="' + m.group(1) + '"'))) + ' placement="foot">' + m.group(2) + '\uFDDF</note>', osis, flags=re.DOTALL)
 
@@ -768,6 +860,13 @@
 
 
     def processXref(note):
+        """Convert cross-reference note-internal USFM tags to OSIS, returning the cross-reference note as a string.
+
+        Keyword arguments:
+        note -- The cross-reference note as a string.
+
+        """
+
         note = note.replace('\n', ' ')
 
         # \xot_refs...\xot*
@@ -806,9 +905,16 @@
 
 
     def cvtCrossReferences(osis, relaxedConformance):
-        ### Cross References
-        ### supported: \\x...\\x*, \\xo, \\xk, \\xq, \\xt, \\xot...\\xot*, \\xnt...\\xnt*, \\xdc...\\xdc*
+        """Converts USFM **Cross Reference** tags to OSIS, returning the processed text as a string.
 
+        Supported tags: \\x...\\x*, \\xo, \\xk, \\xq, \\xt, \\xot...\\xot*, \\xnt...\\xnt*, \\xdc...\\xdc*
+
+        Keyword arguments:
+        osis -- The document as a string.
+        relaxedConformance -- Boolean value indicating whether to process non-standard & deprecated USFM tags.
+
+        """
+
         # \x_+_...\x*
         osis = re.sub(r'\\x\s+([^\s]+?)\s+(.+?)\s*\\x\*', lambda m: '<note' + ((' n=""') if (m.group(1) == '-') else ('' if (m.group(1) == '+') else (' n="' + m.group(1) + '"'))) + ' type="crossReference">' + m.group(2) + '\uFDDF</note>', osis, flags=re.DOTALL)
 
@@ -819,9 +925,16 @@
 
     ### Special Text and Character Styles
     def cvtSpecialText(osis, relaxedConformance):
-        ### Special Text
-        ### supported: \add...\add*, \bk...\bk*, \dc...\dc*, \k...\k*, \lit, \nd...\nd*, \ord...\ord*, \pn...\pn*, \qt...\qt*, \sig...\sig*, \sls...\sls*, \tl...\tl*, \wj...\wj*
+        """Converts USFM **Special Text** tags to OSIS, returning the processed text as a string.
 
+        Supported tags: \add...\add*, \bk...\bk*, \dc...\dc*, \k...\k*, \lit, \nd...\nd*, \ord...\ord*, \pn...\pn*, \qt...\qt*, \sig...\sig*, \sls...\sls*, \tl...\tl*, \wj...\wj*
+
+        Keyword arguments:
+        osis -- The document as a string.
+        relaxedConformance -- Boolean value indicating whether to process non-standard & deprecated USFM tags.
+
+        """
+
         # \add_...\add*
         osis = re.sub(r'\\add\s+(.+?)\\add\*', r'<transChange type="added">\1</transChange>', osis, flags=re.DOTALL)
 
@@ -875,9 +988,16 @@
 
 
     def cvtCharacterStyling(osis, relaxedConformance):
-        ### Character Styling
-        ### supported: \em...\em*, \bd...\bd*, \it...\it*, \bdit...\bdit*, \no...\no*, \sc...\sc*
+        """Converts USFM **Character Styling** tags to OSIS, returning the processed text as a string.
 
+        Supported tags: \em...\em*, \bd...\bd*, \it...\it*, \bdit...\bdit*, \no...\no*, \sc...\sc*
+
+        Keyword arguments:
+        osis -- The document as a string.
+        relaxedConformance -- Boolean value indicating whether to process non-standard & deprecated USFM tags.
+
+        """
+
         # \em_...\em*
         osis = re.sub(r'\\em\s+(.+?)\\em\*', r'<hi type="emphasis">\1</hi>', osis, flags=re.DOTALL)
 
@@ -900,9 +1020,16 @@
 
 
     def cvtSpacingAndBreaks(osis, relaxedConformance):
-        ### Spacing and Breaks
-        ### supported: ~, //, \pb
+        """Converts USFM **Spacing and Breaks** tags to OSIS, returning the processed text as a string.
 
+        Supported tags: ~, //, \pb
+
+        Keyword arguments:
+        osis -- The document as a string.
+        relaxedConformance -- Boolean value indicating whether to process non-standard & deprecated USFM tags.
+
+        """
+
         # ~
         osis = osis.replace('~', '\u00A0')
 
@@ -916,11 +1043,24 @@
 
 
     def cvtSpecialFeatures(osis, relaxedConformance):
-        ### Special Features
-        ### supported: \fig...\fig*, \ndx...\ndx*, \pro...\pro*, \w...\w*, \wg...\wg*, \wh...\wh*
+        """Converts USFM **Special Feature** tags to OSIS, returning the processed text as a string.
 
+        Supported tags: \fig...\fig*, \ndx...\ndx*, \pro...\pro*, \w...\w*, \wg...\wg*, \wh...\wh*
+
+        Keyword arguments:
+        osis -- The document as a string.
+        relaxedConformance -- Boolean value indicating whether to process non-standard & deprecated USFM tags.
+
+        """
+
         # \fig DESC|FILE|SIZE|LOC|COPY|CAP|REF\fig*
         def makeFigure(matchObject):
+            """Regex helper function to convert USFM \fig to OSIS <figure/>, returning the OSIS element as a string.
+
+            Keyword arguments:
+            matchObject -- a regex match object containing the elements of a USFM \fig tag
+
+            """
             fig_desc,fig_file,fig_size,fig_loc,fig_copy,fig_cap,fig_ref = matchObject.groups()
             figure = '<figure'
             if fig_file:
@@ -968,11 +1108,24 @@
 
 
     def cvtPeripherals(osis, relaxedConformance):
-        ### Peripherals
-        ### supported: \periph
+        """Converts USFM **Peripheral** tags to OSIS, returning the processed text as a string.
 
+        Supported tag: \periph
+
+        Keyword arguments:
+        osis -- The document as a string.
+        relaxedConformance -- Boolean value indicating whether to process non-standard & deprecated USFM tags.
+
+        """
+
         # \periph
         def tagPeriph(matchObject):
+            """Regex helper function to tag peripherals, returning a <div>-encapsulated string.
+
+            Keyword arguments:
+            matchObject -- a regex match object containing the peripheral type and contents
+
+            """
             periphType,contents = matchObject
             periph = '<div type="'
             if periphType in peripherals:
@@ -990,9 +1143,16 @@
 
 
     def cvtStudyBibleContent(osis, relaxedConformance):
-        ### Study Bible Content
-        ### supported: \ef...\ef*, \ex...\ex*, \esb...\esbe, \cat
+        """Converts USFM **Study Bible Content** tags to OSIS, returning the processed text as a string.
 
+        Supported tags: \ef...\ef*, \ex...\ex*, \esb...\esbe, \cat
+
+        Keyword arguments:
+        osis -- The document as a string.
+        relaxedConformance -- Boolean value indicating whether to process non-standard & deprecated USFM tags.
+
+        """
+
         # \ef...\ef*
         osis = re.sub(r'\\ef\s+([^\s\\]+?)\s*(.+?)\s*\\ef\*', lambda m: '<note' + ((' n=""') if (m.group(1) == '-') else ('' if (m.group(1) == '+') else (' n="' + m.group(1) + '"'))) + ' type="study">' + m.group(2) + '\uFDDF</note>', osis, flags=re.DOTALL)
         osis = re.sub(r'(<note\b[^>]*?>.*?</note>)', lambda m: processNote(m.group(1)), osis, flags=re.DOTALL)
@@ -1011,8 +1171,16 @@
 
 
     def cvtPrivateUseExtensions(osis, relaxedConformance):
-        ### \z namespace
-        ### supported: \z<Extension>
+        """Converts USFM **\z namespace** tags to OSIS, returning the processed text as a string.
+
+        Supported tags: \z<Extension>
+
+        Keyword arguments:
+        osis -- The document as a string.
+        relaxedConformance -- Boolean value indicating whether to process non-standard & deprecated USFM tags.
+
+        """
+
         ### We can't really know what these mean, but will preserve them as <milestone/> elements.
 
         # publishing assistant markers
@@ -1033,10 +1201,23 @@
 
 
     def processOsisIDs(osis):
+        """Perform postprocessing on an OSIS document, returning the processed text as a string.
+        Recurses through chapter & verses, substituting acutal book IDs & chapter numbers for placeholders.
+
+        Keyword arguments:
+        osis -- The document as a string.
+
+        """
         # TODO: add support for subverses, including in ranges/series, e.g. Matt.1.1!b-Matt.2.5,Matt.2.7!a
         # TODO: make sure that descending ranges generate invalid markup (osisID="")
         # expand verse ranges, series
         def expandRange(vRange):
+            """Expands a verse range into its constituent verses as a string.
+
+            Keyword arguments:
+            vRange -- A string of the lower & upper bounds of the range, with a hypen in between.
+            
+            """
             vRange = re.findall(r'\d+', vRange)
             osisID = list()
             for n in range(int(vRange[0]), int(vRange[1])+1):
@@ -1045,6 +1226,13 @@
         osis = re.sub(r'\$BOOK\$\.\$CHAP\$\.(\d+-\d+)"', lambda m: expandRange(m.group(1))+'"', osis)
 
         def expandSeries(vSeries):
+            """Expands a verse series (list) into its constituent verses as a string.
+
+            Keyword arguments:
+            vSeries -- A comma-separated list of verses.
+            
+            """
+
             vSeries = re.findall(r'\d+', vSeries)
             osisID = list()
             for n in vSeries:
@@ -1074,6 +1262,15 @@
 
 
     def osisReorderAndCleanup(osis):
+        """Perform postprocessing on an OSIS document, returning the processed text as a string.
+        Reorders elements, strips non-characters, and cleans up excess spaces & newlines
+
+        Keyword arguments:
+        osis -- The document as a string.
+        relaxedConformance -- Boolean value indicating whether to process non-standard & deprecated USFM tags.
+
+        """
+
         # assorted re-orderings
         osis = re.sub('(\uFDD3<chapter eID=.+?\n)(<verse eID=.+?>\uFDD2)\n?', r'\2'+'\n'+r'\1', osis)
         osis = re.sub('([\uFDD5\uFDD6\uFDD7\uFDD8\uFDD9]</div>)([^\uFDD5\uFDD6\uFDD7\uFDD8\uFDD9]*<chapter eID.+?>)', r'\2\1', osis)
@@ -1084,7 +1281,7 @@
         # delete attributes from end tags (since they are invalid)
         osis = re.sub(r'(</[^\s>]+) [^>]*>', r'\1>', osis)
         osis = osis.replace('<lb type="x-p"/>', '<lb/>')
-        # delete Unicode tags
+        # delete Unicode non-characters
         for c in '\uFDD0\uFDD1\uFDD2\uFDD3\uFDD4\uFDD5\uFDD6\uFDD7\uFDD8\uFDD9\uFDDA\uFDDB\uFDDC\uFDDD\uFDDE\uFDDF\uFDE0\uFDE1\uFDE2\uFDE3\uFDE4\uFDE5\uFDE6\uFDE7\uFDE8\uFDE9\uFDEA\uFDEB\uFDEC\uFDED\uFDEE\uFDEF':
             osis = osis.replace(c, '')
 
@@ -1118,7 +1315,7 @@
     # call individual conversion processors in series
     osis = cvtPreprocess(osis, relaxedConformance)
     osis = cvtRelaxedConformanceRemaps(osis, relaxedConformance)
-    osis = cvtIdentification(osis, relaxedConformance, sFile)
+    osis = cvtIdentification(osis, relaxedConformance)
     osis = cvtIntroductions(osis, relaxedConformance)
     osis = cvtTitles(osis, relaxedConformance)
     osis = cvtChaptersAndVerses(osis, relaxedConformance)
@@ -1150,6 +1347,13 @@
     return osis
 
 def readIdentifiersFromOsis(filename):
+    """Reads the USFM file and stores information about which Bible book it represents and localized abbrevations in global variables.
+
+    Keyword arguments:
+    filename -- a USFM filename
+
+    """
+
     global encoding
     global loc2osisBk, osis2locBk, filename2osis
 
@@ -1184,10 +1388,12 @@
             loc2osisBk[locBook]=osisBook
 
 def verbosePrint(text):
+    """Wraper for print() that only prints if verbose is True."""
     if verbose:
         print(text)
 
 def printUsage():
+    """Prints usage statement."""
     print(('usfm2osis.py -- USFM ' + usfmVersion + ' to OSIS ' + osisVersion + ' converter version ' + scriptVersion))
     print(('                Revision: ' + rev + ' (' + date + ')'))
     print('')
@@ -1212,8 +1418,8 @@
 
 
 class Worker(multiprocessing.Process):
+    """Worker object for multiprocessing."""
     def __init__(self, work_queue, result_queue):
-
         # base class initialization
         multiprocessing.Process.__init__(self)
 
@@ -1224,9 +1430,7 @@
 
     def run(self):
         while not self.kill_received:
-
             # get a task
-            #job = self.work_queue.get_nowait()
             try:
                 job = self.work_queue.get_nowait()
             except Queue.Empty:
@@ -1234,6 +1438,7 @@
 
             # the actual processing
             osis = convertToOsis(job)
+            # TODO: move XML validation here?
 
             # store the result
             self.result_queue.put((job,osis))