[sword-svn] r400 - trunk/modules/python

Mon Aug 27 00:48:06 MST 2012

Author: chrislit
Date: 2012-08-27 00:48:06 -0700 (Mon, 27 Aug 2012)
New Revision: 400

Modified:
   trunk/modules/python/usfm2osis.py
Log:
cleaned up excess spaces
completed Python3 compatibility implementation (still works with (C)Python2 & PyPy, but not Jython due to 2.6+ features (multiprocessing))


Modified: trunk/modules/python/usfm2osis.py
===================================================================

--- trunk/modules/python/usfm2osis.py	2012-08-27 06:47:02 UTC (rev 399)
+++ trunk/modules/python/usfm2osis.py	2012-08-27 07:48:06 UTC (rev 400)
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 
-from __future__ import print_function, unicode_literals
+#from __future__ import print_function, unicode_literals
 
 date = '$Date$'
 rev = '$Rev$'
@@ -20,7 +20,7 @@
 #
 # This program is distributed in the hope that it will be useful, but
 # WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 # General Public License for more details.
 #
 # The full text of the GNU General Public License is available at:
@@ -40,7 +40,7 @@
 
 ### Roadmap:
 # 0.5 initial commit, including full coverage of core USFM tags
-# 0.6 file sorting options (natural/alphabetic/canonical/none); expand sub-verses with ! in osisIDs; Python3 compatability; add optional schema validator (lxml probably); docstrings; unittest; make fully OO; PyDev project? 
+# 0.6 file sorting options (natural/alphabetic/canonical/none); expand sub-verses with ! in osisIDs; Python3 compatability; add optional schema validator (lxml probably); docstrings; unittest; make fully OO; PyDev project?
 # 0.7 test suite incorporating all USFM examples from UBS ICAP and other complex cases
 # 0.8 more clean-up & re-ordering to correctly encapsulate milestones within appropriate containers; clear remaining TODO items, to the extent possible
 # 1.0 feature complete for release & production use
@@ -51,11 +51,11 @@
 
 ### TODO for 0.6:
 # expand sub-verses with ! in osisIDs
-# Python3 compatability
 # document functions (docstrings)
 # unittest
 # make fully OO
-# PyDev project? 
+# PyDev project?
+# check Python2/3 compatibility
 
 ### Key to non-characters:
 # Used   : \uFDD0\uFDD1\uFDD2\uFDD3\uFDD4\uFDD5\uFDD6\uFDD7\uFDD8\uFDD9\uFDDA\uFDDB\uFDDC\uFDDD\uFDDE\uFDDF\uFDE0\uFDE1
@@ -88,7 +88,11 @@
 
 import sys, codecs, re
 from encodings.aliases import aliases
-import multiprocessing, Queue
+import multiprocessing
+if sys.version_info[0] < 3:
+    import Queue
+else:
+    import queue as Queue
 import random
 
 date = date.replace('$', '').strip()[6:16]
@@ -170,26 +174,26 @@
     # 1) Book representing parts of protocanonical books follow the primary book
     # 2) Variants follow primary forms
     # 3) Books that appear in only one tradition or Bible appear following their traditional/attested antecedent
-    
+
     # There's no fool-proof way to order books without knowing the tradition ahead of time,
     # but this ordering should get it right often for many common real Bibles.
-    
+
     # Front Matter
-    'FRONT', 'INTRODUCTION', 
+    'FRONT', 'INTRODUCTION',
 
     # OT
     'Gen', 'Exod', 'Lev', 'Num', 'Deut', 'Josh', 'JoshA', 'Judg', 'JudgB', 'Ruth',
     '1Sam', '2Sam', '1Kgs', '2Kgs', '1Chr', '2Chr', 'PrMan', 'Jub', '1En',
     'Ezra', 'Neh', 'Tob', 'TobS', 'Jdt', 'Esth', 'EsthGr', 'AddEsth', '1Meq', '2Meq', '3Meq',
     'Job', 'Ps', 'AddPs', '5ApocSyrPss', 'PsMet', 'Odes', 'Prov', 'Reproof', 'Eccl', 'Song',
-    'Wis', 'Sir', 'WSir', 'PrSol', 'PssSol', 
-    'Isa',  'Jer', 'Lam', 'PrJer', 'Bar', 'EpJer', '2Bar', 'EpBar', '4Bar', 
-    'Ezek', 'Dan', 'DanGr', 'DanTh', 'PrAzar', 'Sus', 'SusTh', 'Bel', 'BelTh', 
+    'Wis', 'Sir', 'WSir', 'PrSol', 'PssSol',
+    'Isa', 'Jer', 'Lam', 'PrJer', 'Bar', 'EpJer', '2Bar', 'EpBar', '4Bar',
+    'Ezek', 'Dan', 'DanGr', 'DanTh', 'PrAzar', 'Sus', 'SusTh', 'Bel', 'BelTh',
     'Hos', 'Joel', 'Amos', 'Obad', 'Jonah', 'Mic', 'Nah', 'Hab', 'Zeph', 'Hag', 'Zech', 'Mal',
 
     # Intertestamentals
     '1Esd', '2Esd', '4Ezra', '5Ezra', '6Ezra',
-    '1Macc', '2Macc', '3Macc', '4Macc', 
+    '1Macc', '2Macc', '3Macc', '4Macc',
 
     # NT
     'Matt', 'Mark', 'Luke', 'John', 'Acts', 'Rom', '1Cor', '2Cor',
@@ -203,7 +207,7 @@
 
     # Private-Use Extensions
     'XXA', 'XXB', 'XXC', 'XXD', 'XXE', 'XXF', 'XXG',
-    
+
     # Back Matter
     'BACK', 'CONCORDANCE', 'GLOSSARY',
     'INDEX', 'GAZETTEER', 'X-OTHER'
@@ -211,12 +215,12 @@
 
 usfmNumericOrder = [
     # Front Matter
-    'FRONT', 'INTRODUCTION', 
+    'FRONT', 'INTRODUCTION',
 
     # OT 01-39
     'Gen', 'Exod', 'Lev', 'Num', 'Deut', 'Josh', 'Judg', 'Ruth',
-    '1Sam', '2Sam', '1Kgs', '2Kgs', '1Chr', '2Chr', 'Ezra', 'Neh', 
-    'Esth', 'Job', 'Ps', 'Prov', 'Eccl', 'Song', 'Isa',  'Jer',
+    '1Sam', '2Sam', '1Kgs', '2Kgs', '1Chr', '2Chr', 'Ezra', 'Neh',
+    'Esth', 'Job', 'Ps', 'Prov', 'Eccl', 'Song', 'Isa', 'Jer',
     'Lam', 'Ezek', 'Dan', 'Hos', 'Joel', 'Amos', 'Obad', 'Jonah',
     'Mic', 'Nah', 'Hab', 'Zeph', 'Hag', 'Zech', 'Mal',
 
@@ -250,10 +254,10 @@
 
     # Books not currently adopted into USFM, recommended for removal by BFBS
     'JoshA', 'JudgB', 'TobS', 'DanTh', 'SusTh', 'BelTh',
-    
+
     # Private-Use Extensions
     'XXA', 'XXB', 'XXC', 'XXD', 'XXE', 'XXF', 'XXG',
-    
+
     # Back Matter
     'BACK', 'CONCORDANCE', 'GLOSSARY',
     'INDEX', 'GAZETTEER', 'X-OTHER'
@@ -396,7 +400,7 @@
         """
 
         # \id_<CODE>_(Name of file, Book name, Language, Last edited, Date etc.)
-        osis = re.sub(r'\\id\s+([A-Z0-9]{3})\b\s*([^\\'+'\n'+']*?)'+'\n'+r'(.*)(?=\\id|$)', lambda m: '\uFDD0<div type="book" osisID="' + bookDict[m.group(1)] + '">\n' + (('<!-- id comment - ' + m.group(2) + ' -->\n') if m.group(2) else '') +  m.group(3) + '</div type="book">\uFDD0\n' , osis, flags=re.DOTALL)
+        osis = re.sub(r'\\id\s+([A-Z0-9]{3})\b\s*([^\\'+'\n]*?)\n'+r'(.*)(?=\\id|$)', lambda m: '\uFDD0<div type="book" osisID="' + bookDict[m.group(1)] + '">\n' + (('<!-- id comment - ' + m.group(2) + ' -->\n') if m.group(2) else '') + m.group(3) + '</div type="book">\uFDD0\n' , osis, flags=re.DOTALL)
 
         # \ide_<ENCODING>
         osis = re.sub(r'\\ide\b.*'+'\n', '', osis) # delete, since this was handled above
@@ -406,7 +410,7 @@
 
         # \rem_text...
         osis = re.sub(r'\\rem\b\s+(.+)', r'<!-- rem - \1 -->', osis)
-        
+
         # \restore_text...
         if relaxedConformance:
             osis = re.sub(r'\\restore\b\s+(.+)', r'<!-- restore - \1 -->', osis)
@@ -460,7 +464,7 @@
         # \imq_text...
         # \ipr_text...
         pType = {'ipi':'x-indented', 'im':'x-noindent', 'imi':'x-noindent-indented', 'ipq':'x-quote', 'imq':'x-noindent-quote', 'ipr':'x-right'}
-        osis = re.sub(r'\\(ipi|im|ipq|imq|ipr)\s+(.*?)(?=(\\(i?m|i?p|nb|lit|cls|tr|io|iq|i?li|iex?|s)|<chapter eID|</?div\b|<(p|closer)\b))', lambda m: '\uFDD3<p type="' + pType[m.group(1)]  + '" subType="x-introduction">\n' + m.group(2) + '\uFDD3</p>\n', osis, flags=re.DOTALL)
+        osis = re.sub(r'\\(ipi|im|ipq|imq|ipr)\s+(.*?)(?=(\\(i?m|i?p|nb|lit|cls|tr|io|iq|i?li|iex?|s)|<chapter eID|</?div\b|<(p|closer)\b))', lambda m: '\uFDD3<p type="' + pType[m.group(1)] + '" subType="x-introduction">\n' + m.group(2) + '\uFDD3</p>\n', osis, flags=re.DOTALL)
 
         # \iq#_text...
         osis = re.sub(r'\\iq\b\s*(.*?)(?=(['+'\uFDD0\uFDD1\uFDD3\uFDD4'+r']|\\i?q[\d\s]|\\fig|<l\b|<lb\b|<title\b))', r'<l level="1" subType="x-introduction">\1</l>', osis, flags=re.DOTALL)
@@ -473,24 +477,24 @@
         osis = re.sub('(<lg>.+?</lg>)', lambda m: m.group(1).replace('<lb type="x-p"/>', '</lg><lg>'), osis, flags=re.DOTALL) # re-handle \b that occurs within <lg>
 
         # \ili#_text...
-        osis = re.sub(r'\\ili\b\s*(.*?)(?=(['+'\uFDD0\uFDD1\uFDD3\uFDD4'+r']|\\ili[\d\s]|<lb\b|<title\b|<item\b))', r'<item type="x-indent-1" subType="x-introduction">\uFDE0\1\uFDE0</item>', osis, flags=re.DOTALL)
-        osis = re.sub(r'\\ili(\d)\b\s*(.*?)(?=(['+'\uFDD0\uFDD1\uFDD3\uFDD4'+r']|\\ili[\d\s]|<lb\b|<title\b|<item\b))', r'<item type="x-indent-\1" subType="x-introduction">\uFDE0\2\uFDE0</item>', osis, flags=re.DOTALL)
+        osis = re.sub(r'\\ili\b\s*(.*?)(?=(['+'\uFDD0\uFDD1\uFDD3\uFDD4'+r']|\\ili[\d\s]|<lb\b|<title\b|<item\b))', '<item type="x-indent-1" subType="x-introduction">\uFDE0'+r'\1'+'\uFDE0</item>', osis, flags=re.DOTALL)
+        osis = re.sub(r'\\ili(\d)\b\s*(.*?)(?=(['+'\uFDD0\uFDD1\uFDD3\uFDD4'+r']|\\ili[\d\s]|<lb\b|<title\b|<item\b))', '<item type="x-indent-\1" subType="x-introduction">\uFDE0'+r'\2'+'\uFDE0</item>', osis, flags=re.DOTALL)
         osis = osis.replace('\n</item>', '</item>\n')
-        osis = re.sub('(<item [^\uFDD0\uFDD1\uFDD3\uFDD4]+</item>)', r'\uFDD3<list>\1</list>\uFDD3', osis, flags=re.DOTALL)
+        osis = re.sub('(<item [^\uFDD0\uFDD1\uFDD3\uFDD4]+</item>)', '\uFDD3<list>'+r'\1'+'</list>\uFDD3', osis, flags=re.DOTALL)
 
         # \iot_text...
         # \io#_text...(references range)
-        osis = re.sub(r'\\io\b\s*(.*?)(?=(['+'\uFDD0\uFDD1\uFDD3\uFDD4'+r']|\\io[t\d\s]|\\iex?|<lb\b|<title\b|<item\b))', r'<item type="x-indent-1" subType="x-introduction">\uFDE1\1\uFDE1</item>', osis, flags=re.DOTALL)
-        osis = re.sub(r'\\io(\d)\b\s*(.*?)(?=(['+'\uFDD0\uFDD1\uFDD3\uFDD4'+r']|\\io[t\d\s]|\\iex?|<lb\b|<title\b|<item\b))', r'<item type="x-indent-\1" subType="x-introduction">\uFDE1\2\uFDE1</item>', osis, flags=re.DOTALL)
-        osis = re.sub(r'\\iot\b\s*(.*?)(?=(['+'\uFDD0\uFDD1\uFDD3\uFDD4'+r']|\\io[t\d\s]|\\iex?|<lb\b|<title\b|<item\b))', r'<item type="head">\uFDE1\1\uFDE1</item type="head">', osis, flags=re.DOTALL)
+        osis = re.sub(r'\\io\b\s*(.*?)(?=(['+'\uFDD0\uFDD1\uFDD3\uFDD4'+r']|\\io[t\d\s]|\\iex?|<lb\b|<title\b|<item\b))', '<item type="x-indent-1" subType="x-introduction">\uFDE1'+r'\1'+'\uFDE1</item>', osis, flags=re.DOTALL)
+        osis = re.sub(r'\\io(\d)\b\s*(.*?)(?=(['+'\uFDD0\uFDD1\uFDD3\uFDD4'+r']|\\io[t\d\s]|\\iex?|<lb\b|<title\b|<item\b))', '<item type="x-indent-\1" subType="x-introduction">\uFDE1'+r'\2'+'\uFDE1</item>', osis, flags=re.DOTALL)
+        osis = re.sub(r'\\iot\b\s*(.*?)(?=(['+'\uFDD0\uFDD1\uFDD3\uFDD4'+r']|\\io[t\d\s]|\\iex?|<lb\b|<title\b|<item\b))', '<item type="head">\uFDE1'+r'\1'+'\uFDE1</item type="head">', osis, flags=re.DOTALL)
         osis = osis.replace('\n</item>', '</item>\n')
-        osis = re.sub('(<item [^\uFDD0\uFDD1\uFDD3\uFDD4\uFDE0]+</item>)', r'\uFDD3<div type="outline"><list>\1</list></div>\uFDD3', osis, flags=re.DOTALL)
+        osis = re.sub('(<item [^\uFDD0\uFDD1\uFDD3\uFDD4\uFDE0]+</item>)', '\uFDD3<div type="outline"><list>'+r'\1'+'</list></div>\uFDD3', osis, flags=re.DOTALL)
         osis = re.sub('item type="head"', 'head', osis)
 
         # \ior_text...\ior*
         osis = re.sub(r'\\ior\b\s+(.+?)\\ior\*', r'<reference>\1</reference>', osis, flags=re.DOTALL)
-        
-        # \iex    # TODO: look for example; I have no idea what this would look like in context
+
+        # \iex  # TODO: look for example; I have no idea what this would look like in context
         osis = re.sub(r'\\iex\b\s*(.+?)'+'?=(\s*(\\c|</div type="book">\uFDD0))', r'<div type="bridge">\1</div>', osis, flags=re.DOTALL)
 
         # \iqt_text...\iqt*
@@ -538,14 +542,14 @@
         osis = re.sub('(\uFDDE<div type="x-subSubSubSubSection">[^\uFDD5\uFDD0\uFDD6\uFDD7\uFDD8\uFDD9\uFDDA\uFDDB\uFDDC\uFDDD\uFDDE]+)', r'\1'+'</div>\uFDDE\n', osis, flags=re.DOTALL)
 
         # \sr_text...
-        osis = re.sub(r'\\sr\s+(.+)', r'\uFDD4<title type="scope"><reference>\1</reference></title>', osis)
+        osis = re.sub(r'\\sr\s+(.+)', '\uFDD4<title type="scope"><reference>'+r'\1</reference></title>', osis)
         # \r_text...
-        osis = re.sub(r'\\r\s+(.+)', r'\uFDD4<title type="parallel"><reference type="parallel">\1</reference></title>', osis)
+        osis = re.sub(r'\\r\s+(.+)', '\uFDD4<title type="parallel"><reference type="parallel">'+r'\1</reference></title>', osis)
         # \rq_text...\rq*
         osis = re.sub(r'\\rq\s+(.+?)\\rq\*', r'<reference type="source">\1</reference>', osis, flags=re.DOTALL)
 
         # \d_text...
-        osis = re.sub(r'\\d\s+(.+)', r'\uFDD4<title canonical="true" type="psalm">\1</title>', osis)
+        osis = re.sub(r'\\d\s+(.+)', '\uFDD4<title canonical="true" type="psalm">'+r'\1</title>', osis)
 
         # \sp_text...
         osis = re.sub(r'\\sp\s+(.+)', r'<speaker>\1</speaker>', osis)
@@ -564,7 +568,7 @@
         supported: \c, \ca...\ca*, \cl, \cp, \cd, \v, \va...\va*, \vp...\vp*
         """
         # \c_#
-        osis = re.sub(r'\\c\s+([^\s]+)\b(.+?)(?=(\\c\s+|</div type="book"))', lambda m: '\uFDD1<chapter osisID="$BOOK$.' + m.group(1) + r'" sID="$BOOK$.' + m.group(1) + '"/>' + m.group(2) +  '<chapter eID="$BOOK$.' + m.group(1) + '"/>\uFDD3\n', osis, flags=re.DOTALL)
+        osis = re.sub(r'\\c\s+([^\s]+)\b(.+?)(?=(\\c\s+|</div type="book"))', lambda m: '\uFDD1<chapter osisID="$BOOK$.' + m.group(1) + r'" sID="$BOOK$.' + m.group(1) + '"/>' + m.group(2) + '<chapter eID="$BOOK$.' + m.group(1) + '"/>\uFDD3\n', osis, flags=re.DOTALL)
 
         # \cp_#
         # \ca_#\ca*
@@ -590,7 +594,7 @@
         osis = re.sub(r'\\cd\b\s+(.+)', '\uFDD4<title type="x-description">'+r'\1</title>', osis)
 
         # \v_#
-        osis = re.sub(r'\\v\s+([^\s]+)\b\s*(.+?)(?=(\\v\s+|</div type="book"|<chapter eID))', lambda m: '\uFDD2<verse osisID="$BOOK$.$CHAP$.' + m.group(1) + '" sID="$BOOK$.$CHAP$.' + m.group(1) + '"/>' + m.group(2) +  '<verse eID="$BOOK$.$CHAP$.' + m.group(1) + '"/>\uFDD2\n', osis, flags=re.DOTALL)
+        osis = re.sub(r'\\v\s+([^\s]+)\b\s*(.+?)(?=(\\v\s+|</div type="book"|<chapter eID))', lambda m: '\uFDD2<verse osisID="$BOOK$.$CHAP$.' + m.group(1) + '" sID="$BOOK$.$CHAP$.' + m.group(1) + '"/>' + m.group(2) + '<verse eID="$BOOK$.$CHAP$.' + m.group(1) + '"/>\uFDD2\n', osis, flags=re.DOTALL)
 
         # \vp_#\vp*
         # \va_#\va*
@@ -626,7 +630,7 @@
         # \pmo(_text...)
         # \pm(_text...)
         # \pmc(_text...)
-        # \pmr_text...          # deprecated: map to same as \pr
+        # \pmr_text...  # deprecated: map to same as \pr
         # \pi#(_Sample text...)
         # \mi(_text...)
         # \nb
@@ -638,7 +642,7 @@
         paragraphregex = 'pc|pr|m|pmo|pm|pmc|pmr|pi|pi1|pi2|pi3|pi4|pi5|mi|nb'
         if relaxedConformance:
             paragraphregex += '|phi|ps|psi|p1|p2|p3|p4|p5'
-        osis = re.sub(r'\\('+paragraphregex+r')\s+(.*?)(?=(\\(i?m|i?p|nb|lit|cls|tr)|<chapter eID|</?div\b|<(p|closer)\b))', lambda m: '\uFDD3<p type="' + pType[m.group(1)]  + '">\n' + m.group(2) + '\uFDD3</p>\n', osis, flags=re.DOTALL)
+        osis = re.sub(r'\\('+paragraphregex+r')\s+(.*?)(?=(\\(i?m|i?p|nb|lit|cls|tr)|<chapter eID|</?div\b|<(p|closer)\b))', lambda m: '\uFDD3<p type="' + pType[m.group(1)] + '">\n' + m.group(2) + '\uFDD3</p>\n', osis, flags=re.DOTALL)
 
         # \cls_text...
         osis = re.sub(r'\\m\s+(.+?)(?=(\\(i?m|i?p|nb|lit|cls|tr)|<chapter eID|</?div\b|<(p|closer)\b))', lambda m: '\uFDD3<closer>' + m.group(1) + '\uFDD3</closer>\n', osis, flags=re.DOTALL)
@@ -650,7 +654,7 @@
         osis = re.sub(r'\\li\b\s*(.*?)(?=(['+'\uFDD0\uFDD1\uFDD3\uFDD4'+r']|\\li[\d\s]|<lb\b|<title\b|<item\b))', r'<item type="x-indent-1">\1</item>', osis, flags=re.DOTALL)
         osis = re.sub(r'\\li(\d)\b\s*(.*?)(?=(['+'\uFDD0\uFDD1\uFDD3\uFDD4'+r']|\\li[\d\s]|<lb\b|<title\b|<item\b))', r'<item type="x-indent-\1">\2</item>', osis, flags=re.DOTALL)
         osis = osis.replace('\n</item>', '</item>\n')
-        osis = re.sub('(<item [^\uFDD0\uFDD1\uFDD3\uFDD4\uFDE0\uFDE1]+</item>)', r'\uFDD3<list>\1</list>\uFDD3', osis, flags=re.DOTALL)
+        osis = re.sub('(<item [^\uFDD0\uFDD1\uFDD3\uFDD4\uFDE0\uFDE1]+</item>)', '\uFDD3<list>'+r'\1'+'</list>\uFDD3', osis, flags=re.DOTALL)
 
         # \b
         osis = re.sub(r'\\b\b\s?', '<lb type="x-p"/>', osis)
@@ -789,7 +793,7 @@
 
         # \xt_  # This isn't guaranteed to be *the* reference, but it's a good guess.
         note = re.sub(r'\\xt\b\s(.+?)(?=(\\x|'+'\uFDDF))', '\uFDDF'+r'<reference>\1</reference>', note)
-        
+
         if relaxedConformance:
             # TODO: move this to a concorance/index-specific section?
             # \xtSee..\xtSee*: Concordance and Names Index markup for an alternate entry target reference.
@@ -862,7 +866,7 @@
         osis = re.sub(r'\\dc\b\s*(.+?)\\dc\*', r'<transChange type="added" editions="dc">\1</transChange>', osis, flags=re.DOTALL)
 
         # \sls_...\sls*
-        osis = re.sub(r'\\sls\b\s*(.+?)\\sls\*', r'<foreign>/1</foreign>', osis, flags=re.DOTALL)  # find a better mapping than <foreign>?
+        osis = re.sub(r'\\sls\b\s*(.+?)\\sls\*', r'<foreign>/1</foreign>', osis, flags=re.DOTALL)  # TODO: find a better mapping than <foreign>?
 
         if relaxedConformance:
             # \addpn...\addpn*
@@ -873,7 +877,6 @@
             osis = re.sub(r'\\k3\s+(.+?)\\k3\*', r'<seg type="keyword" n="3">\1</seg>', osis, flags=re.DOTALL)
             osis = re.sub(r'\\k4\s+(.+?)\\k4\*', r'<seg type="keyword" n="4">\1</seg>', osis, flags=re.DOTALL)
             osis = re.sub(r'\\k5\s+(.+?)\\k5\*', r'<seg type="keyword" n="5">\1</seg>', osis, flags=re.DOTALL)
-            
 
         return osis
 
@@ -930,7 +933,7 @@
         def makeFigure(matchObject):
             fig_desc,fig_file,fig_size,fig_loc,fig_copy,fig_cap,fig_ref = matchObject.groups()
             figure = '<figure'
-            if  fig_file:
+            if fig_file:
                 figure += ' src="' + fig_file + '"'
             if fig_size:
                 figure += ' size="' + fig_size + '"'
@@ -990,7 +993,7 @@
                 periph += 'introduction" subType="x-' + introPeripherals[periphType]
             else:
                 periph += 'x-unknown'
-            periph += '">\n' +  contents + '</div>\n'
+            periph += '">\n' + contents + '</div>\n'
             return periph
 
         osis = re.sub(r'\\periph\s+([^'+'\n'+r']+)\s*'+'\n'+r'(.+?)(?=(</div type="book">|\\periph\s+))', tagPeriph, osis, flags=re.DOTALL)
@@ -1012,7 +1015,7 @@
         osis = re.sub(r'(<note [^>]*?type="crossReference"[^>]*>.*?</note>)', lambda m: processXref(m.group(1)), osis, flags=re.DOTALL)
 
         # \esb...\esbex  # TODO: this likely needs to go much earlier in the process
-        osis = re.sub(r'\\esb\b\s*(.+?)\\esbe\b\s*', r'\uFDD5<div type="x-sidebar">\1</div>\uFDD5'+'\n', osis, flags=re.DOTALL)
+        osis = re.sub(r'\\esb\b\s*(.+?)\\esbe\b\s*', '\uFDD5<div type="x-sidebar">'+r'\1'+'</div>\uFDD5\n', osis, flags=re.DOTALL)
 
         # \cat_<TAG>\cat*
         osis = re.sub(r'\\cat\b\s+(.+?)\\cat\*', r'<index index="category" level1="\1"/>', osis)
@@ -1036,7 +1039,7 @@
 
         # \z{X}...\z{X}*
         osis = re.sub(r'\z([^\s]+)\s(.+?)(\z\1\*)', r'<seg type="x-\1">\2</seg>', osis, flags=re.DOTALL)
-        
+
         # \z{X}
         osis = re.sub(r'\\z([^\s]+)', r'<milestone type="x-usfm-z-\1"/>', osis)
 
@@ -1063,7 +1066,6 @@
             return ' '.join(osisID)
         osis = re.sub(r'\$BOOK\$\.\$CHAP\$\.(\d+(,\d+)+)"', lambda m: expandSeries(m.group(1))+'"', osis)
 
-
         # fill in book & chapter values
         bookChunks = osis.split('\uFDD0')
         osis = ''
@@ -1110,7 +1112,6 @@
         osis = re.sub(' ?\n\n+', '\n', osis)
         return osis
 
-
     ### Processing starts here
     if encoding:
         osis = codecs.open(sFile, 'r', encoding).read().strip() + '\n'
@@ -1128,7 +1129,6 @@
                     print(('WARNING: Encoding "' + encoding + '" unknown, processing ' + sFile + ' as UTF-8.'))
                     encoding = 'utf-8'
 
-
     # call individual conversion processors in series
     osis = cvtPreprocess(osis, relaxedConformance)
     osis = cvtRelaxedConformanceRemaps(osis, relaxedConformance)
@@ -1154,7 +1154,7 @@
 
     # change type on special books
     for sb in specialBooks:
-        osis = osis.replace('<div type="book" osisID="' + sb  + '">', '<div type="' + sb.lower() + '">')
+        osis = osis.replace('<div type="book" osisID="' + sb + '">', '<div type="' + sb.lower() + '">')
 
     if DEBUG:
         localUnhandledTags = set(re.findall(r'(\\[^\s\*]+?\b\*?)', osis))
@@ -1366,7 +1366,6 @@
             k,v=result_queue.get()
             osisSegment[k]=v
 
-        
         verbosePrint('Assembling OSIS document...')
         osisDoc = '<osis xmlns="http://www.bibletechnologies.net/2003/OSIS/namespace" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.bibletechnologies.net/2003/OSIS/namespace http://www.bibletechnologies.net/osisCore.'+osisVersion+'.xsd">\n<osisText osisRefWork="Bible" xml:lang="und" osisIDWork="' + osisWork + '">\n<header>\n<work osisWork="' + osisWork + '"/>\n</header>\n'
 
@@ -1389,7 +1388,7 @@
             except ImportError:
                 verbosePrint('For schema validation, install lxml')
             except etree.XMLSyntaxError as eVal:
-                print('XML Validation error: ' + eVal)
+                print('XML Validation error: ' + str(eVal))
 
         osisFile = codecs.open(osisFileName, 'w', 'utf-8')
         osisFile.write('<?xml version="1.0" encoding="UTF-8"?>\n')