[sword-svn] r399 - trunk/modules/python

chrislit at crosswire.org chrislit at crosswire.org
Sun Aug 26 23:47:02 MST 2012


Author: chrislit
Date: 2012-08-26 23:47:02 -0700 (Sun, 26 Aug 2012)
New Revision: 399

Modified:
   trunk/modules/python/usfm2osis.py
Log:
converted non-ANSI to \uXXXX escapes for Python3 compatibility


Modified: trunk/modules/python/usfm2osis.py
===================================================================
--- trunk/modules/python/usfm2osis.py	2012-08-26 22:40:55 UTC (rev 398)
+++ trunk/modules/python/usfm2osis.py	2012-08-27 06:47:02 UTC (rev 399)
@@ -49,7 +49,7 @@
 # 1.x IMP output?
 # 1.x SWORD module output?, requiring SWORD bindings
 
-### TODO for next milestone:
+### TODO for 0.6:
 # expand sub-verses with ! in osisIDs
 # Python3 compatability
 # document functions (docstrings)
@@ -58,33 +58,33 @@
 # PyDev project? 
 
 ### Key to non-characters:
-# Used   : ﷐﷑﷒﷓﷔﷕﷖﷗﷘﷙﷚﷛﷜﷝﷞﷟﷠﷡
-# Unused : ﷧﷨﷩﷪﷫﷬﷭﷮﷯
-# ﷐ book
-# ﷑ chapter
-# ﷒ verse
-# ﷓ paragraph
-# ﷔ title
-# ﷕ ms1
-# ﷖ ms2
-# ﷗ ms3
-# ﷘ ms4
-# ﷙ ms5
-# ﷚ s1
-# ﷛ s2
-# ﷜ s3
-# ﷝ s4
-# ﷞ s5
-# ﷟ notes
-# ﷠ intro-list
-# ﷡ intro-outline
-# ﷢ is1
-# ﷣ is2
-# ﷤ is3
-# ﷥ is4
-# ﷦ is5
+# Used   : \uFDD0\uFDD1\uFDD2\uFDD3\uFDD4\uFDD5\uFDD6\uFDD7\uFDD8\uFDD9\uFDDA\uFDDB\uFDDC\uFDDD\uFDDE\uFDDF\uFDE0\uFDE1
+# Unused : \uFDE7\uFDE8\uFDE9\uFDEA\uFDEB\uFDEC\uFDED\uFDEE\uFDEF
+# \uFDD0 book
+# \uFDD1 chapter
+# \uFDD2 verse
+# \uFDD3 paragraph
+# \uFDD4 title
+# \uFDD5 ms1
+# \uFDD6 ms2
+# \uFDD7 ms3
+# \uFDD8 ms4
+# \uFDD9 ms5
+# \uFDDA s1
+# \uFDDB s2
+# \uFDDC s3
+# \uFDDD s4
+# \uFDDE s5
+# \uFDDF notes
+# \uFDE0 intro-list
+# \uFDE1 intro-outline
+# \uFDE2 is1
+# \uFDE3 is2
+# \uFDE4 is3
+# \uFDE5 is4
+# \uFDE6 is5
 
-# ﷕﷖﷗﷘﷙﷚﷛﷜﷝﷞ sections
+# \uFDD5\uFDD6\uFDD7\uFDD8\uFDD9\uFDDA\uFDDB\uFDDC\uFDDD\uFDDE sections
 
 import sys, codecs, re
 from encodings.aliases import aliases
@@ -396,7 +396,7 @@
         """
 
         # \id_<CODE>_(Name of file, Book name, Language, Last edited, Date etc.)
-        osis = re.sub(r'\\id\s+([A-Z0-9]{3})\b\s*([^\\'+'\n'+']*?)'+'\n'+r'(.*)(?=\\id|$)', lambda m: u'﷐<div type="book" osisID="' + bookDict[m.group(1)] + '">\n' + (('<!-- id comment - ' + m.group(2) + ' -->\n') if m.group(2) else '') +  m.group(3) + u'</div type="book">﷐\n' , osis, flags=re.DOTALL)
+        osis = re.sub(r'\\id\s+([A-Z0-9]{3})\b\s*([^\\'+'\n'+']*?)'+'\n'+r'(.*)(?=\\id|$)', lambda m: '\uFDD0<div type="book" osisID="' + bookDict[m.group(1)] + '">\n' + (('<!-- id comment - ' + m.group(2) + ' -->\n') if m.group(2) else '') +  m.group(3) + '</div type="book">\uFDD0\n' , osis, flags=re.DOTALL)
 
         # \ide_<ENCODING>
         osis = re.sub(r'\\ide\b.*'+'\n', '', osis) # delete, since this was handled above
@@ -439,19 +439,19 @@
         osis = re.sub(r'\\imte(\d?)\b\s+(.+)', lambda m: '<title ' + ('level="'+m.group(1)+'" ' if m.group(1) else '') + 'type="main" subType="x-introduction-end">' + m.group(2) + '</title>', osis)
 
         # \is#_text...
-        osis = re.sub(r'\\is1?\s+(.+)', lambda m: u'﷚<div type="section" subType="x-introduction"><title>' + m.group(1) + '</title>', osis)
-        osis = re.sub(u'(﷚[^﷕﷐﷖﷗﷘﷙﷚]+)', r'\1'+u'</div>﷚\n', osis, flags=re.DOTALL)
-        osis = re.sub(r'\\is2\s+(.+)', lambda m: u'﷛<div type="subSection" subType="x-introduction"><title>' + m.group(1) + '</title>', osis)
-        osis = re.sub(u'(﷛[^﷕﷐﷖﷗﷘﷙﷚﷛]+)', r'\1'+u'</div>﷛\n', osis, flags=re.DOTALL)
-        osis = re.sub(r'\\is3\s+(.+)', lambda m: u'﷜<div type="x-subSubSection" subType="x-introduction"><title>' + m.group(1) + '</title>', osis)
-        osis = re.sub(u'(﷜[^﷕﷐﷖﷗﷘﷙﷚﷛﷜]+)', r'\1'+u'</div>﷜\n', osis, flags=re.DOTALL)
-        osis = re.sub(r'\\is4\s+(.+)', lambda m: u'﷝<div type="x-subSubSubSection" subType="x-introduction"><title>' + m.group(1) + '</title>', osis)
-        osis = re.sub(u'(﷝[^﷕﷐﷖﷗﷘﷙﷚﷛﷜﷝]+)', r'\1'+u'</div>﷝\n', osis, flags=re.DOTALL)
-        osis = re.sub(r'\\is5\s+(.+)', lambda m: u'﷞<div type="x-subSubSubSubSection" subType="x-introduction"><title>' + m.group(1) + '</title>', osis)
-        osis = re.sub(u'(﷞[^﷕﷐﷖﷗﷘﷙﷚﷛﷜﷝﷞]+)', r'\1'+u'</div>﷞\n', osis, flags=re.DOTALL)
+        osis = re.sub(r'\\is1?\s+(.+)', lambda m: '\uFDDA<div type="section" subType="x-introduction"><title>' + m.group(1) + '</title>', osis)
+        osis = re.sub('(\uFDDA[^\uFDD5\uFDD0\uFDD6\uFDD7\uFDD8\uFDD9\uFDDA]+)', r'\1'+'</div>\uFDDA\n', osis, flags=re.DOTALL)
+        osis = re.sub(r'\\is2\s+(.+)', lambda m: '\uFDDB<div type="subSection" subType="x-introduction"><title>' + m.group(1) + '</title>', osis)
+        osis = re.sub('(\uFDDB[^\uFDD5\uFDD0\uFDD6\uFDD7\uFDD8\uFDD9\uFDDA\uFDDB]+)', r'\1'+'</div>\uFDDB\n', osis, flags=re.DOTALL)
+        osis = re.sub(r'\\is3\s+(.+)', lambda m: '\uFDDC<div type="x-subSubSection" subType="x-introduction"><title>' + m.group(1) + '</title>', osis)
+        osis = re.sub('(\uFDDC[^\uFDD5\uFDD0\uFDD6\uFDD7\uFDD8\uFDD9\uFDDA\uFDDB\uFDDC]+)', r'\1'+'</div>\uFDDC\n', osis, flags=re.DOTALL)
+        osis = re.sub(r'\\is4\s+(.+)', lambda m: '\uFDDD<div type="x-subSubSubSection" subType="x-introduction"><title>' + m.group(1) + '</title>', osis)
+        osis = re.sub('(\uFDDD[^\uFDD5\uFDD0\uFDD6\uFDD7\uFDD8\uFDD9\uFDDA\uFDDB\uFDDC\uFDDD]+)', r'\1'+'</div>\uFDDD\n', osis, flags=re.DOTALL)
+        osis = re.sub(r'\\is5\s+(.+)', lambda m: '\uFDDE<div type="x-subSubSubSubSection" subType="x-introduction"><title>' + m.group(1) + '</title>', osis)
+        osis = re.sub('(\uFDDE[^\uFDD5\uFDD0\uFDD6\uFDD7\uFDD8\uFDD9\uFDDA\uFDDB\uFDDC\uFDDD\uFDDE]+)', r'\1'+'</div>\uFDDE\n', osis, flags=re.DOTALL)
 
         # \ip_text...
-        osis = re.sub(r'\\ip\s+(.*?)(?=(\\(i?m|i?p|nb|lit|cls|tr|io|iq|i?li|iex?|s)|<chapter eID|</?div\b|<(p|closer)\b))', lambda m: u'﷓<p subType="x-introduction">\n' + m.group(1) + u'﷓</p>\n', osis, flags=re.DOTALL)
+        osis = re.sub(r'\\ip\s+(.*?)(?=(\\(i?m|i?p|nb|lit|cls|tr|io|iq|i?li|iex?|s)|<chapter eID|</?div\b|<(p|closer)\b))', lambda m: '\uFDD3<p subType="x-introduction">\n' + m.group(1) + '\uFDD3</p>\n', osis, flags=re.DOTALL)
 
         # \ipi_text...
         # \im_text...
@@ -460,38 +460,38 @@
         # \imq_text...
         # \ipr_text...
         pType = {'ipi':'x-indented', 'im':'x-noindent', 'imi':'x-noindent-indented', 'ipq':'x-quote', 'imq':'x-noindent-quote', 'ipr':'x-right'}
-        osis = re.sub(r'\\(ipi|im|ipq|imq|ipr)\s+(.*?)(?=(\\(i?m|i?p|nb|lit|cls|tr|io|iq|i?li|iex?|s)|<chapter eID|</?div\b|<(p|closer)\b))', lambda m: u'﷓<p type="' + pType[m.group(1)]  + '" subType="x-introduction">\n' + m.group(2) + u'﷓</p>\n', osis, flags=re.DOTALL)
+        osis = re.sub(r'\\(ipi|im|ipq|imq|ipr)\s+(.*?)(?=(\\(i?m|i?p|nb|lit|cls|tr|io|iq|i?li|iex?|s)|<chapter eID|</?div\b|<(p|closer)\b))', lambda m: '\uFDD3<p type="' + pType[m.group(1)]  + '" subType="x-introduction">\n' + m.group(2) + '\uFDD3</p>\n', osis, flags=re.DOTALL)
 
         # \iq#_text...
-        osis = re.sub(r'\\iq\b\s*(.*?)(?=(['+u'﷐﷑﷓﷔'+r']|\\i?q[\d\s]|\\fig|<l\b|<lb\b|<title\b))', r'<l level="1" subType="x-introduction">\1</l>', osis, flags=re.DOTALL)
-        osis = re.sub(r'\\iq(\d)\b\s*(.*?)(?=(['+u'﷐﷑﷓﷔'+r']|\\i?q[\d\s]|\\fig|<l\b|<lb\b|<title\b))', r'<l level="\1" subType="x-introduction">\2</l>', osis, flags=re.DOTALL)
+        osis = re.sub(r'\\iq\b\s*(.*?)(?=(['+'\uFDD0\uFDD1\uFDD3\uFDD4'+r']|\\i?q[\d\s]|\\fig|<l\b|<lb\b|<title\b))', r'<l level="1" subType="x-introduction">\1</l>', osis, flags=re.DOTALL)
+        osis = re.sub(r'\\iq(\d)\b\s*(.*?)(?=(['+'\uFDD0\uFDD1\uFDD3\uFDD4'+r']|\\i?q[\d\s]|\\fig|<l\b|<lb\b|<title\b))', r'<l level="\1" subType="x-introduction">\2</l>', osis, flags=re.DOTALL)
 
         # \ib
         osis = re.sub(r'\\ib\b\s?', '<lb type="x-p"/>', osis)
         osis = osis.replace('\n</l>', '</l>\n')
-        osis = re.sub(u'(<l [^﷐﷑﷓﷔]+</l>)', r'<lg>\1</lg>', osis, flags=re.DOTALL)
+        osis = re.sub('(<l [^\uFDD0\uFDD1\uFDD3\uFDD4]+</l>)', r'<lg>\1</lg>', osis, flags=re.DOTALL)
         osis = re.sub('(<lg>.+?</lg>)', lambda m: m.group(1).replace('<lb type="x-p"/>', '</lg><lg>'), osis, flags=re.DOTALL) # re-handle \b that occurs within <lg>
 
         # \ili#_text...
-        osis = re.sub(r'\\ili\b\s*(.*?)(?=(['+u'﷐﷑﷓﷔'+r']|\\ili[\d\s]|<lb\b|<title\b|<item\b))', ur'<item type="x-indent-1" subType="x-introduction">﷠\1﷠</item>', osis, flags=re.DOTALL)
-        osis = re.sub(r'\\ili(\d)\b\s*(.*?)(?=(['+u'﷐﷑﷓﷔'+r']|\\ili[\d\s]|<lb\b|<title\b|<item\b))', ur'<item type="x-indent-\1" subType="x-introduction">﷠\2﷠</item>', osis, flags=re.DOTALL)
+        osis = re.sub(r'\\ili\b\s*(.*?)(?=(['+'\uFDD0\uFDD1\uFDD3\uFDD4'+r']|\\ili[\d\s]|<lb\b|<title\b|<item\b))', r'<item type="x-indent-1" subType="x-introduction">\uFDE0\1\uFDE0</item>', osis, flags=re.DOTALL)
+        osis = re.sub(r'\\ili(\d)\b\s*(.*?)(?=(['+'\uFDD0\uFDD1\uFDD3\uFDD4'+r']|\\ili[\d\s]|<lb\b|<title\b|<item\b))', r'<item type="x-indent-\1" subType="x-introduction">\uFDE0\2\uFDE0</item>', osis, flags=re.DOTALL)
         osis = osis.replace('\n</item>', '</item>\n')
-        osis = re.sub(u'(<item [^﷐﷑﷓﷔]+</item>)', ur'﷓<list>\1</list>﷓', osis, flags=re.DOTALL)
+        osis = re.sub('(<item [^\uFDD0\uFDD1\uFDD3\uFDD4]+</item>)', r'\uFDD3<list>\1</list>\uFDD3', osis, flags=re.DOTALL)
 
         # \iot_text...
         # \io#_text...(references range)
-        osis = re.sub(r'\\io\b\s*(.*?)(?=(['+u'﷐﷑﷓﷔'+r']|\\io[t\d\s]|\\iex?|<lb\b|<title\b|<item\b))', ur'<item type="x-indent-1" subType="x-introduction">﷡\1﷡</item>', osis, flags=re.DOTALL)
-        osis = re.sub(r'\\io(\d)\b\s*(.*?)(?=(['+u'﷐﷑﷓﷔'+r']|\\io[t\d\s]|\\iex?|<lb\b|<title\b|<item\b))', ur'<item type="x-indent-\1" subType="x-introduction">﷡\2﷡</item>', osis, flags=re.DOTALL)
-        osis = re.sub(r'\\iot\b\s*(.*?)(?=(['+u'﷐﷑﷓﷔'+r']|\\io[t\d\s]|\\iex?|<lb\b|<title\b|<item\b))', ur'<item type="head">﷡\1﷡</item type="head">', osis, flags=re.DOTALL)
+        osis = re.sub(r'\\io\b\s*(.*?)(?=(['+'\uFDD0\uFDD1\uFDD3\uFDD4'+r']|\\io[t\d\s]|\\iex?|<lb\b|<title\b|<item\b))', r'<item type="x-indent-1" subType="x-introduction">\uFDE1\1\uFDE1</item>', osis, flags=re.DOTALL)
+        osis = re.sub(r'\\io(\d)\b\s*(.*?)(?=(['+'\uFDD0\uFDD1\uFDD3\uFDD4'+r']|\\io[t\d\s]|\\iex?|<lb\b|<title\b|<item\b))', r'<item type="x-indent-\1" subType="x-introduction">\uFDE1\2\uFDE1</item>', osis, flags=re.DOTALL)
+        osis = re.sub(r'\\iot\b\s*(.*?)(?=(['+'\uFDD0\uFDD1\uFDD3\uFDD4'+r']|\\io[t\d\s]|\\iex?|<lb\b|<title\b|<item\b))', r'<item type="head">\uFDE1\1\uFDE1</item type="head">', osis, flags=re.DOTALL)
         osis = osis.replace('\n</item>', '</item>\n')
-        osis = re.sub(u'(<item [^﷐﷑﷓﷔﷠]+</item>)', ur'﷓<div type="outline"><list>\1</list></div>﷓', osis, flags=re.DOTALL)
+        osis = re.sub('(<item [^\uFDD0\uFDD1\uFDD3\uFDD4\uFDE0]+</item>)', r'\uFDD3<div type="outline"><list>\1</list></div>\uFDD3', osis, flags=re.DOTALL)
         osis = re.sub('item type="head"', 'head', osis)
 
         # \ior_text...\ior*
         osis = re.sub(r'\\ior\b\s+(.+?)\\ior\*', r'<reference>\1</reference>', osis, flags=re.DOTALL)
         
         # \iex    # TODO: look for example; I have no idea what this would look like in context
-        osis = re.sub(r'\\iex\b\s*(.+?)'+u'?=(\s*(\\c|</div type="book">﷐))', r'<div type="bridge">\1</div>', osis, flags=re.DOTALL)
+        osis = re.sub(r'\\iex\b\s*(.+?)'+'?=(\s*(\\c|</div type="book">\uFDD0))', r'<div type="bridge">\1</div>', osis, flags=re.DOTALL)
 
         # \iqt_text...\iqt*
         osis = re.sub(r'\\iqt\s+(.+?)\\iqt\*', r'<q subType="x-introduction">\1</q>', osis, flags=re.DOTALL)
@@ -508,44 +508,44 @@
         supported: \mt#, \mte#, \ms#, \mr, \s#, \sr, \r, \rq...\rq*, \d, \sp
         """
         # \ms#_text...
-        osis = re.sub(r'\\ms1?\s+(.+)', lambda m: u'﷕<div type="majorSection"><title>' + m.group(1) + '</title>', osis)
-        osis = re.sub(u'(﷕[^﷕﷐]+)', r'\1'+u'</div>﷕\n', osis, flags=re.DOTALL)
-        osis = re.sub(r'\\ms2\s+(.+)', lambda m: u'﷖<div type="majorSection" n="2"><title>' + m.group(1) + '</title>', osis)
-        osis = re.sub(u'(﷖[^﷕﷐﷖]+)', r'\1'+u'</div>﷖\n', osis, flags=re.DOTALL)
-        osis = re.sub(r'\\ms3\s+(.+)', lambda m: u'﷗<div type="majorSection" n="3"><title>' + m.group(1) + '</title>', osis)
-        osis = re.sub(u'(﷗[^﷕﷐﷖﷗]+)', r'\1'+u'</div>﷗\n', osis, flags=re.DOTALL)
-        osis = re.sub(r'\\ms4\s+(.+)', lambda m: u'﷘<div type="majorSection" n="4"><title>' + m.group(1) + '</title>', osis)
-        osis = re.sub(u'(﷘[^﷕﷐﷖﷗﷘]+)', r'\1'+u'</div>﷘\n', osis, flags=re.DOTALL)
-        osis = re.sub(r'\\ms5\s+(.+)', lambda m: u'﷙<div type="majorSection" n="5"><title>' + m.group(1) + '</title>', osis)
-        osis = re.sub(u'(﷙[^﷕﷐﷖﷗﷘﷙]+)', r'\1'+u'</div>﷙\n', osis, flags=re.DOTALL)
+        osis = re.sub(r'\\ms1?\s+(.+)', lambda m: '\uFDD5<div type="majorSection"><title>' + m.group(1) + '</title>', osis)
+        osis = re.sub('(\uFDD5[^\uFDD5\uFDD0]+)', r'\1'+'</div>\uFDD5\n', osis, flags=re.DOTALL)
+        osis = re.sub(r'\\ms2\s+(.+)', lambda m: '\uFDD6<div type="majorSection" n="2"><title>' + m.group(1) + '</title>', osis)
+        osis = re.sub('(\uFDD6[^\uFDD5\uFDD0\uFDD6]+)', r'\1'+'</div>\uFDD6\n', osis, flags=re.DOTALL)
+        osis = re.sub(r'\\ms3\s+(.+)', lambda m: '\uFDD7<div type="majorSection" n="3"><title>' + m.group(1) + '</title>', osis)
+        osis = re.sub('(\uFDD7[^\uFDD5\uFDD0\uFDD6\uFDD7]+)', r'\1'+'</div>\uFDD7\n', osis, flags=re.DOTALL)
+        osis = re.sub(r'\\ms4\s+(.+)', lambda m: '\uFDD8<div type="majorSection" n="4"><title>' + m.group(1) + '</title>', osis)
+        osis = re.sub('(\uFDD8[^\uFDD5\uFDD0\uFDD6\uFDD7\uFDD8]+)', r'\1'+'</div>\uFDD8\n', osis, flags=re.DOTALL)
+        osis = re.sub(r'\\ms5\s+(.+)', lambda m: '\uFDD9<div type="majorSection" n="5"><title>' + m.group(1) + '</title>', osis)
+        osis = re.sub('(\uFDD9[^\uFDD5\uFDD0\uFDD6\uFDD7\uFDD8\uFDD9]+)', r'\1'+'</div>\uFDD9\n', osis, flags=re.DOTALL)
 
         # \mr_text...
-        osis = re.sub(r'\\mr\s+(.+)', u'﷔<title type="scope"><reference>'+r'\1</reference></title>', osis)
+        osis = re.sub(r'\\mr\s+(.+)', '\uFDD4<title type="scope"><reference>'+r'\1</reference></title>', osis)
 
         # \s#_text...
-        osis = re.sub(r'\\s1?\s+(.+)', lambda m: u'﷚<div type="section"><title>' + m.group(1) + '</title>', osis)
-        osis = re.sub(u'(﷚<div type="section">[^﷕﷐﷖﷗﷘﷙﷚]+)', r'\1'+u'</div>﷚\n', osis, flags=re.DOTALL)
+        osis = re.sub(r'\\s1?\s+(.+)', lambda m: '\uFDDA<div type="section"><title>' + m.group(1) + '</title>', osis)
+        osis = re.sub('(\uFDDA<div type="section">[^\uFDD5\uFDD0\uFDD6\uFDD7\uFDD8\uFDD9\uFDDA]+)', r'\1'+'</div>\uFDDA\n', osis, flags=re.DOTALL)
         if relaxedConformance:
             osis = re.sub(r'\\ss\s+', r'\\s2 ', osis)
             osis = re.sub(r'\\sss\s+', r'\\s3 ', osis)
-        osis = re.sub(r'\\s2\s+(.+)', lambda m: u'﷛<div type="subSection"><title>' + m.group(1) + '</title>', osis)
-        osis = re.sub(u'(﷛<div type="subSection">[^﷕﷐﷖﷗﷘﷙﷚﷛]+)', r'\1'+u'</div>﷛\n', osis, flags=re.DOTALL)
-        osis = re.sub(r'\\s3\s+(.+)', lambda m: u'﷜<div type="x-subSubSection"><title>' + m.group(1) + '</title>', osis)
-        osis = re.sub(u'(﷜<div type="x-subSubSection">[^﷕﷐﷖﷗﷘﷙﷚﷛﷜]+)', r'\1'+u'</div>﷜\n', osis, flags=re.DOTALL)
-        osis = re.sub(r'\\s4\s+(.+)', lambda m: u'﷝<div type="x-subSubSubSection"><title>' + m.group(1) + '</title>', osis)
-        osis = re.sub(u'(﷝<div type="x-subSubSubSection">[^﷕﷐﷖﷗﷘﷙﷚﷛﷜﷝]+)', r'\1'+u'</div>﷝\n', osis, flags=re.DOTALL)
-        osis = re.sub(r'\\s5\s+(.+)', lambda m: u'﷞<div type="x-subSubSubSubSection"><title>' + m.group(1) + '</title>', osis)
-        osis = re.sub(u'(﷞<div type="x-subSubSubSubSection">[^﷕﷐﷖﷗﷘﷙﷚﷛﷜﷝﷞]+)', r'\1'+u'</div>﷞\n', osis, flags=re.DOTALL)
+        osis = re.sub(r'\\s2\s+(.+)', lambda m: '\uFDDB<div type="subSection"><title>' + m.group(1) + '</title>', osis)
+        osis = re.sub('(\uFDDB<div type="subSection">[^\uFDD5\uFDD0\uFDD6\uFDD7\uFDD8\uFDD9\uFDDA\uFDDB]+)', r'\1'+'</div>\uFDDB\n', osis, flags=re.DOTALL)
+        osis = re.sub(r'\\s3\s+(.+)', lambda m: '\uFDDC<div type="x-subSubSection"><title>' + m.group(1) + '</title>', osis)
+        osis = re.sub('(\uFDDC<div type="x-subSubSection">[^\uFDD5\uFDD0\uFDD6\uFDD7\uFDD8\uFDD9\uFDDA\uFDDB\uFDDC]+)', r'\1'+'</div>\uFDDC\n', osis, flags=re.DOTALL)
+        osis = re.sub(r'\\s4\s+(.+)', lambda m: '\uFDDD<div type="x-subSubSubSection"><title>' + m.group(1) + '</title>', osis)
+        osis = re.sub('(\uFDDD<div type="x-subSubSubSection">[^\uFDD5\uFDD0\uFDD6\uFDD7\uFDD8\uFDD9\uFDDA\uFDDB\uFDDC\uFDDD]+)', r'\1'+'</div>\uFDDD\n', osis, flags=re.DOTALL)
+        osis = re.sub(r'\\s5\s+(.+)', lambda m: '\uFDDE<div type="x-subSubSubSubSection"><title>' + m.group(1) + '</title>', osis)
+        osis = re.sub('(\uFDDE<div type="x-subSubSubSubSection">[^\uFDD5\uFDD0\uFDD6\uFDD7\uFDD8\uFDD9\uFDDA\uFDDB\uFDDC\uFDDD\uFDDE]+)', r'\1'+'</div>\uFDDE\n', osis, flags=re.DOTALL)
 
         # \sr_text...
-        osis = re.sub(r'\\sr\s+(.+)', ur'﷔<title type="scope"><reference>\1</reference></title>', osis)
+        osis = re.sub(r'\\sr\s+(.+)', r'\uFDD4<title type="scope"><reference>\1</reference></title>', osis)
         # \r_text...
-        osis = re.sub(r'\\r\s+(.+)', ur'﷔<title type="parallel"><reference type="parallel">\1</reference></title>', osis)
+        osis = re.sub(r'\\r\s+(.+)', r'\uFDD4<title type="parallel"><reference type="parallel">\1</reference></title>', osis)
         # \rq_text...\rq*
-        osis = re.sub(r'\\rq\s+(.+?)\\rq\*', ur'<reference type="source">\1</reference>', osis, flags=re.DOTALL)
+        osis = re.sub(r'\\rq\s+(.+?)\\rq\*', r'<reference type="source">\1</reference>', osis, flags=re.DOTALL)
 
         # \d_text...
-        osis = re.sub(r'\\d\s+(.+)', ur'﷔<title canonical="true" type="psalm">\1</title>', osis)
+        osis = re.sub(r'\\d\s+(.+)', r'\uFDD4<title canonical="true" type="psalm">\1</title>', osis)
 
         # \sp_text...
         osis = re.sub(r'\\sp\s+(.+)', r'<speaker>\1</speaker>', osis)
@@ -564,7 +564,7 @@
         supported: \c, \ca...\ca*, \cl, \cp, \cd, \v, \va...\va*, \vp...\vp*
         """
         # \c_#
-        osis = re.sub(r'\\c\s+([^\s]+)\b(.+?)(?=(\\c\s+|</div type="book"))', lambda m: u'﷑<chapter osisID="$BOOK$.' + m.group(1) + r'" sID="$BOOK$.' + m.group(1) + '"/>' + m.group(2) +  u'<chapter eID="$BOOK$.' + m.group(1) + u'"/>﷓\n', osis, flags=re.DOTALL)
+        osis = re.sub(r'\\c\s+([^\s]+)\b(.+?)(?=(\\c\s+|</div type="book"))', lambda m: '\uFDD1<chapter osisID="$BOOK$.' + m.group(1) + r'" sID="$BOOK$.' + m.group(1) + '"/>' + m.group(2) +  '<chapter eID="$BOOK$.' + m.group(1) + '"/>\uFDD3\n', osis, flags=re.DOTALL)
 
         # \cp_#
         # \ca_#\ca*
@@ -584,13 +584,13 @@
         osis = re.sub(r'(<chapter [^<]+sID[^<]+/>.+?<chapter eID[^>]+/>)', replaceChapterNumber, osis, flags=re.DOTALL)
 
         # \cl_
-        osis = re.sub(r'\\cl\s+(.+)', u'﷔<title>'+r'\1</title>', osis)
+        osis = re.sub(r'\\cl\s+(.+)', '\uFDD4<title>'+r'\1</title>', osis)
 
         # \cd_#   <--This # seems to be an error
-        osis = re.sub(r'\\cd\b\s+(.+)', u'﷔<title type="x-description">'+r'\1</title>', osis)
+        osis = re.sub(r'\\cd\b\s+(.+)', '\uFDD4<title type="x-description">'+r'\1</title>', osis)
 
         # \v_#
-        osis = re.sub(r'\\v\s+([^\s]+)\b\s*(.+?)(?=(\\v\s+|</div type="book"|<chapter eID))', lambda m: u'﷒<verse osisID="$BOOK$.$CHAP$.' + m.group(1) + '" sID="$BOOK$.$CHAP$.' + m.group(1) + '"/>' + m.group(2) +  '<verse eID="$BOOK$.$CHAP$.' + m.group(1) + u'"/>﷒\n', osis, flags=re.DOTALL)
+        osis = re.sub(r'\\v\s+([^\s]+)\b\s*(.+?)(?=(\\v\s+|</div type="book"|<chapter eID))', lambda m: '\uFDD2<verse osisID="$BOOK$.$CHAP$.' + m.group(1) + '" sID="$BOOK$.$CHAP$.' + m.group(1) + '"/>' + m.group(2) +  '<verse eID="$BOOK$.$CHAP$.' + m.group(1) + '"/>\uFDD2\n', osis, flags=re.DOTALL)
 
         # \vp_#\vp*
         # \va_#\va*
@@ -618,7 +618,7 @@
         supported: \p, \m, \pmo, \pm, \pmc, \pmr, \pi#, \mi, \nb, \cls, \li#, \pc, \pr, \ph#, \b
         """
         # \p(_text...)
-        osis = re.sub(r'\\p\s+(.*?)(?=(\\(i?m|i?p|nb|lit|cls|tr)|<chapter eID|</?div\b|<(p|closer)\b))', lambda m: u'﷓<p>\n' + m.group(1) + u'﷓</p>\n', osis, flags=re.DOTALL)
+        osis = re.sub(r'\\p\s+(.*?)(?=(\\(i?m|i?p|nb|lit|cls|tr)|<chapter eID|</?div\b|<(p|closer)\b))', lambda m: '\uFDD3<p>\n' + m.group(1) + '\uFDD3</p>\n', osis, flags=re.DOTALL)
 
         # \pc(_text...)
         # \pr(_text...)
@@ -638,19 +638,19 @@
         paragraphregex = 'pc|pr|m|pmo|pm|pmc|pmr|pi|pi1|pi2|pi3|pi4|pi5|mi|nb'
         if relaxedConformance:
             paragraphregex += '|phi|ps|psi|p1|p2|p3|p4|p5'
-        osis = re.sub(r'\\('+paragraphregex+r')\s+(.*?)(?=(\\(i?m|i?p|nb|lit|cls|tr)|<chapter eID|</?div\b|<(p|closer)\b))', lambda m: u'﷓<p type="' + pType[m.group(1)]  + '">\n' + m.group(2) + u'﷓</p>\n', osis, flags=re.DOTALL)
+        osis = re.sub(r'\\('+paragraphregex+r')\s+(.*?)(?=(\\(i?m|i?p|nb|lit|cls|tr)|<chapter eID|</?div\b|<(p|closer)\b))', lambda m: '\uFDD3<p type="' + pType[m.group(1)]  + '">\n' + m.group(2) + '\uFDD3</p>\n', osis, flags=re.DOTALL)
 
         # \cls_text...
-        osis = re.sub(r'\\m\s+(.+?)(?=(\\(i?m|i?p|nb|lit|cls|tr)|<chapter eID|</?div\b|<(p|closer)\b))', lambda m: u'﷓<closer>' + m.group(1) + u'﷓</closer>\n', osis, flags=re.DOTALL)
+        osis = re.sub(r'\\m\s+(.+?)(?=(\\(i?m|i?p|nb|lit|cls|tr)|<chapter eID|</?div\b|<(p|closer)\b))', lambda m: '\uFDD3<closer>' + m.group(1) + '\uFDD3</closer>\n', osis, flags=re.DOTALL)
 
         # \ph#(_text...)
         # \li#(_text...)
         osis = re.sub(r'\\ph\b\s*', r'\\li ', osis)
         osis = re.sub(r'\\ph(\d)\b\s*', r'\\li\1 ', osis)
-        osis = re.sub(r'\\li\b\s*(.*?)(?=(['+u'﷐﷑﷓﷔'+r']|\\li[\d\s]|<lb\b|<title\b|<item\b))', r'<item type="x-indent-1">\1</item>', osis, flags=re.DOTALL)
-        osis = re.sub(r'\\li(\d)\b\s*(.*?)(?=(['+u'﷐﷑﷓﷔'+r']|\\li[\d\s]|<lb\b|<title\b|<item\b))', r'<item type="x-indent-\1">\2</item>', osis, flags=re.DOTALL)
+        osis = re.sub(r'\\li\b\s*(.*?)(?=(['+'\uFDD0\uFDD1\uFDD3\uFDD4'+r']|\\li[\d\s]|<lb\b|<title\b|<item\b))', r'<item type="x-indent-1">\1</item>', osis, flags=re.DOTALL)
+        osis = re.sub(r'\\li(\d)\b\s*(.*?)(?=(['+'\uFDD0\uFDD1\uFDD3\uFDD4'+r']|\\li[\d\s]|<lb\b|<title\b|<item\b))', r'<item type="x-indent-\1">\2</item>', osis, flags=re.DOTALL)
         osis = osis.replace('\n</item>', '</item>\n')
-        osis = re.sub(u'(<item [^﷐﷑﷓﷔﷠﷡]+</item>)', ur'﷓<list>\1</list>﷓', osis, flags=re.DOTALL)
+        osis = re.sub('(<item [^\uFDD0\uFDD1\uFDD3\uFDD4\uFDE0\uFDE1]+</item>)', r'\uFDD3<list>\1</list>\uFDD3', osis, flags=re.DOTALL)
 
         # \b
         osis = re.sub(r'\\b\b\s?', '<lb type="x-p"/>', osis)
@@ -667,23 +667,23 @@
         osis = re.sub(r'\\qs\b\s(.+?)\\qs\*', r'<l type="selah">\1</l>', osis, flags=re.DOTALL)
 
         # \q#(_text...)
-        osis = re.sub(r'\\q\b\s*(.*?)(?=(['+u'﷐﷑﷓﷔﷕﷖﷗﷘﷙﷚﷛﷜﷝﷞'+r']|\\q[\d\s]|\\fig|<l\b|<lb\b|<title\b|<list\b|</?div\b))', r'<l level="1">\1</l>', osis, flags=re.DOTALL)
-        osis = re.sub(r'\\q(\d)\b\s*(.*?)(?=(['+u'﷐﷑﷓﷔﷕﷖﷗﷘﷙﷚﷛﷜﷝﷞'+r']|\\q[\d\s]|\\fig|<l\b|<lb\b|<title\b|<list\b|</?div\b))', r'<l level="\1">\2</l>', osis, flags=re.DOTALL)
+        osis = re.sub(r'\\q\b\s*(.*?)(?=(['+'\uFDD0\uFDD1\uFDD3\uFDD4\uFDD5\uFDD6\uFDD7\uFDD8\uFDD9\uFDDA\uFDDB\uFDDC\uFDDD\uFDDE'+r']|\\q[\d\s]|\\fig|<l\b|<lb\b|<title\b|<list\b|</?div\b))', r'<l level="1">\1</l>', osis, flags=re.DOTALL)
+        osis = re.sub(r'\\q(\d)\b\s*(.*?)(?=(['+'\uFDD0\uFDD1\uFDD3\uFDD4\uFDD5\uFDD6\uFDD7\uFDD8\uFDD9\uFDDA\uFDDB\uFDDC\uFDDD\uFDDE'+r']|\\q[\d\s]|\\fig|<l\b|<lb\b|<title\b|<list\b|</?div\b))', r'<l level="\1">\2</l>', osis, flags=re.DOTALL)
 
         # \qr_text...
         # \qc_text...
         # \qm#(_text...)
         qType = {'qr':'x-right', 'qc':'x-center', 'qm':'x-embedded" level="1', 'qm1':'x-embedded" level="1', 'qm2':'x-embedded" level="2', 'qm3':'x-embedded" level="3', 'qm4':'x-embedded" level="4', 'qm5':'x-embedded" level="5'}
-        osis = re.sub(r'\\(qr|qc|qm\d)\b\s*(.*?)(?=(['+u'﷐﷑﷓﷔﷕﷖﷗﷘﷙﷚﷛﷜﷝﷞'+r']|\\q[\d\s]|\\fig|<l\b|<lb\b|<title\b|<list\b|</?div\b))', lambda m: '<l type="' + qType[m.group(1)] + '">' + m.group(2) + '</l>', osis, flags=re.DOTALL)
+        osis = re.sub(r'\\(qr|qc|qm\d)\b\s*(.*?)(?=(['+'\uFDD0\uFDD1\uFDD3\uFDD4\uFDD5\uFDD6\uFDD7\uFDD8\uFDD9\uFDDA\uFDDB\uFDDC\uFDDD\uFDDE'+r']|\\q[\d\s]|\\fig|<l\b|<lb\b|<title\b|<list\b|</?div\b))', lambda m: '<l type="' + qType[m.group(1)] + '">' + m.group(2) + '</l>', osis, flags=re.DOTALL)
 
         osis = osis.replace('\n</l>', '</l>\n')
-        osis = re.sub(u'(<l [^﷐﷑﷓﷔﷕﷖﷗﷘﷙﷚﷛﷜﷝﷞]+</l>)', r'<lg>\1</lg>', osis, flags=re.DOTALL)
+        osis = re.sub('(<l [^\uFDD0\uFDD1\uFDD3\uFDD4\uFDD5\uFDD6\uFDD7\uFDD8\uFDD9\uFDDA\uFDDB\uFDDC\uFDDD\uFDDE]+</l>)', r'<lg>\1</lg>', osis, flags=re.DOTALL)
 
         # \b
         osis = re.sub('(<lg>.+?</lg>)', lambda m: m.group(1).replace('<lb type="x-p"/>', '</lg><lg>'), osis, flags=re.DOTALL) # re-handle \b that occurs within <lg>
 
         # \qa_text...
-        osis = re.sub(r'\\qa\s+(.+)', u'﷔<title type="acrostic">'+r'\1</title>', osis)
+        osis = re.sub(r'\\qa\s+(.+)', '\uFDD4<title type="acrostic">'+r'\1</title>', osis)
 
         # \qac_text...\qac*
         osis = re.sub(r'\\qac\s+(.+?)\\qac\*', r'<hi type="acrostic">\1</hi>', osis, flags=re.DOTALL)
@@ -697,7 +697,7 @@
         supported: \tr, \th#, \thr#, \tc#, \tcr#
         """
         # \tr_
-        osis = re.sub(r'\\tr\b\s*(.*?)(?=(['+u'﷐﷑﷓﷔'+r']|\\tr\s|<lb\b|<title\b))', r'<row>\1</row>', osis, flags=re.DOTALL)
+        osis = re.sub(r'\\tr\b\s*(.*?)(?=(['+'\uFDD0\uFDD1\uFDD3\uFDD4'+r']|\\tr\s|<lb\b|<title\b))', r'<row>\1</row>', osis, flags=re.DOTALL)
 
         # \th#_text...
         # \thr#_text...
@@ -716,34 +716,34 @@
         note = re.sub(r'\\fdc\b\s(.+?)\\fdc\b\*', r'<seg editions="dc">\1</seg>', note)
 
         # \fq_
-        note = re.sub(r'\\fq\b\s(.+?)(?=(\\f|'+u'﷟))', u'﷟'+r'<catchWord>\1</catchWord>', note)
+        note = re.sub(r'\\fq\b\s(.+?)(?=(\\f|'+'\uFDDF))', '\uFDDF'+r'<catchWord>\1</catchWord>', note)
 
         # \fqa_
-        note = re.sub(r'\\fqa\b\s(.+?)(?=(\\f|'+u'﷟))', u'﷟'+r'<rdg type="alternate">\1</rdg>', note)
+        note = re.sub(r'\\fqa\b\s(.+?)(?=(\\f|'+'\uFDDF))', '\uFDDF'+r'<rdg type="alternate">\1</rdg>', note)
 
         # \ft_
         note = re.sub(r'\\ft\s', '', note)
 
         # \fr_##SEP##
-        note = re.sub(r'\\fr\b\s(.+?)(?=(\\f|'+u'﷟))', u'﷟'+r'<reference type="annotateRef">\1</reference>', note)
+        note = re.sub(r'\\fr\b\s(.+?)(?=(\\f|'+'\uFDDF))', '\uFDDF'+r'<reference type="annotateRef">\1</reference>', note)
 
         # \fk_
-        note = re.sub(r'\\fk\b\s(.+?)(?=(\\f|'+u'﷟))', u'﷟'+r'<catchWord>\1</catchWord>', note)
+        note = re.sub(r'\\fk\b\s(.+?)(?=(\\f|'+'\uFDDF))', '\uFDDF'+r'<catchWord>\1</catchWord>', note)
 
         # \fl_
-        note = re.sub(r'\\fl\b\s(.+?)(?=(\\f|'+u'﷟))', u'﷟'+r'<label>\1</label>', note)
+        note = re.sub(r'\\fl\b\s(.+?)(?=(\\f|'+'\uFDDF))', '\uFDDF'+r'<label>\1</label>', note)
 
         # \fp_
         note = re.sub(r'\\fp\b\s(.+?)(?=(\\fp|$))', r'<p>\1</p>', note)
         note = re.sub(r'(<note\b[^>]*?>)(.*?)<p>', r'\1<p>\2</p><p>', note)
 
         # \fv_
-        note = re.sub(r'\\fv\b\s(.+?)(?=(\\f|'+u'﷟))', u'﷟'+r'<hi type="super">\1</hi>', note)
+        note = re.sub(r'\\fv\b\s(.+?)(?=(\\f|'+'\uFDDF))', '\uFDDF'+r'<hi type="super">\1</hi>', note)
 
         # \fq*,\fqa*,\ft*,\fr*,\fk*,\fl*,\fp*,\fv*
         note = re.sub(r'\\f(q|qa|t|r|k|l|p|v)\*', '', note)
 
-        note = note.replace(u'﷟', '')
+        note = note.replace('\uFDDF', '')
         return note
 
 
@@ -753,10 +753,10 @@
         supported:\f...\f*, \fe...\fe*, \fr, \fk, \fq, \fqa, \fl, \fp, \fv, \ft, \fdc...\fdc*, \fm...\fm*
         """
         # \f_+_...\f*
-        osis = re.sub(r'\\f\s+([^\s\\]+)?\s*(.+?)\s*\\f\*', lambda m: '<note' + ((' n=""') if (m.group(1) == u'-') else ('' if (m.group(1) == '+') else (' n="' + m.group(1) + '"'))) + ' placement="foot">' + m.group(2) + u'﷟</note>', osis, flags=re.DOTALL)
+        osis = re.sub(r'\\f\s+([^\s\\]+)?\s*(.+?)\s*\\f\*', lambda m: '<note' + ((' n=""') if (m.group(1) == '-') else ('' if (m.group(1) == '+') else (' n="' + m.group(1) + '"'))) + ' placement="foot">' + m.group(2) + '\uFDDF</note>', osis, flags=re.DOTALL)
 
         # \fe_+_...\fe*
-        osis = re.sub(r'\\fe\s+([^\s\\]+?)\s*(.+?)\s*\\fe\*', lambda m: '<note' + ((' n=""') if (m.group(1) == u'-') else ('' if (m.group(1) == '+') else (' n="' + m.group(1) + '"'))) + ' placement="end">' + m.group(2) + u'﷟</note>', osis, flags=re.DOTALL)
+        osis = re.sub(r'\\fe\s+([^\s\\]+?)\s*(.+?)\s*\\fe\*', lambda m: '<note' + ((' n=""') if (m.group(1) == '-') else ('' if (m.group(1) == '+') else (' n="' + m.group(1) + '"'))) + ' placement="end">' + m.group(2) + '\uFDDF</note>', osis, flags=re.DOTALL)
 
         osis = re.sub(r'(<note\b[^>]*?>.*?</note>)', lambda m: processNote(m.group(1)), osis, flags=re.DOTALL)
 
@@ -770,37 +770,37 @@
         note = note.replace('\n', ' ')
 
         # \xot_refs...\xot*
-        note = re.sub(r'\\xot\b\s(.+?)\\xot\b\*', u'﷟'+r'<seg editions="ot">\1</seg>', note)
+        note = re.sub(r'\\xot\b\s(.+?)\\xot\b\*', '\uFDDF'+r'<seg editions="ot">\1</seg>', note)
 
         # \xnt_refs...\xnt*
-        note = re.sub(r'\\xnt\b\s(.+?)\\xnt\b\*', u'﷟'+r'<seg editions="nt">\1</seg>', note)
+        note = re.sub(r'\\xnt\b\s(.+?)\\xnt\b\*', '\uFDDF'+r'<seg editions="nt">\1</seg>', note)
 
         # \xdc_refs...\xdc*
-        note = re.sub(r'\\xdc\b\s(.+?)\\xdc\b\*', u'﷟'+r'<seg editions="dc">\1</seg>', note)
+        note = re.sub(r'\\xdc\b\s(.+?)\\xdc\b\*', '\uFDDF'+r'<seg editions="dc">\1</seg>', note)
 
         # \xq_
-        note = re.sub(r'\\xq\b\s(.+?)(?=(\\x|'+u'﷟))', u'﷟'+r'<catchWord>\1</catchWord>', note)
+        note = re.sub(r'\\xq\b\s(.+?)(?=(\\x|'+'\uFDDF))', '\uFDDF'+r'<catchWord>\1</catchWord>', note)
 
         # \xo_##SEP##
-        note = re.sub(r'\\xo\b\s(.+?)(?=(\\x|'+u'﷟))', u'﷟'+r'<reference type="annotateRef">\1</reference>', note)
+        note = re.sub(r'\\xo\b\s(.+?)(?=(\\x|'+'\uFDDF))', '\uFDDF'+r'<reference type="annotateRef">\1</reference>', note)
 
         # \xk_
-        note = re.sub(r'\\xk\b\s(.+?)(?=(\\x|'+u'﷟))', u'﷟'+r'<catchWord>\1</catchWord>', note)
+        note = re.sub(r'\\xk\b\s(.+?)(?=(\\x|'+'\uFDDF))', '\uFDDF'+r'<catchWord>\1</catchWord>', note)
 
         # \xt_  # This isn't guaranteed to be *the* reference, but it's a good guess.
-        note = re.sub(r'\\xt\b\s(.+?)(?=(\\x|'+u'﷟))', u'﷟'+r'<reference>\1</reference>', note)
+        note = re.sub(r'\\xt\b\s(.+?)(?=(\\x|'+'\uFDDF))', '\uFDDF'+r'<reference>\1</reference>', note)
         
         if relaxedConformance:
             # TODO: move this to a concorance/index-specific section?
             # \xtSee..\xtSee*: Concordance and Names Index markup for an alternate entry target reference.
-            note = re.sub(r'\\xtSee\b\s(.+?)\\xtSee\b\*', u'﷟'+r'<reference osisRef="\1">See: \1</reference>', note)
+            note = re.sub(r'\\xtSee\b\s(.+?)\\xtSee\b\*', '\uFDDF'+r'<reference osisRef="\1">See: \1</reference>', note)
             # \xtSeeAlso...\xtSeeAlso: Concordance and Names Index markup for an additional entry target reference.
-            note = re.sub(r'\\xtSeeAlso\b\s(.+?)\\xtSeeAlso\b\*', u'﷟'+r'<reference osisRef="\1">See also: \1</reference>', note)
+            note = re.sub(r'\\xtSeeAlso\b\s(.+?)\\xtSeeAlso\b\*', '\uFDDF'+r'<reference osisRef="\1">See also: \1</reference>', note)
 
         # \xq*,\xt*,\xo*,\xk*
         note = re.sub(r'\\x(q|t|o|k)\*', '', note)
 
-        note = note.replace(u'﷟', '')
+        note = note.replace('\uFDDF', '')
         return note
 
 
@@ -810,7 +810,7 @@
         supported: \\x...\\x*, \\xo, \\xk, \\xq, \\xt, \\xot...\\xot*, \\xnt...\\xnt*, \\xdc...\\xdc*
         """
         # \x_+_...\x*
-        osis = re.sub(r'\\x\s+([^\s]+?)\s+(.+?)\s*\\x\*', lambda m: '<note' + ((' n=""') if (m.group(1) == u'-') else ('' if (m.group(1) == '+') else (' n="' + m.group(1) + '"'))) + ' type="crossReference">' + m.group(2) + u'﷟</note>', osis, flags=re.DOTALL)
+        osis = re.sub(r'\\x\s+([^\s]+?)\s+(.+?)\s*\\x\*', lambda m: '<note' + ((' n=""') if (m.group(1) == '-') else ('' if (m.group(1) == '+') else (' n="' + m.group(1) + '"'))) + ' type="crossReference">' + m.group(2) + '\uFDDF</note>', osis, flags=re.DOTALL)
 
         osis = re.sub(r'(<note [^>]*?type="crossReference"[^>]*>.*?</note>)', lambda m: processXref(m.group(1)), osis, flags=re.DOTALL)
 
@@ -856,7 +856,7 @@
         osis = re.sub(r'\\k\s+(.+?)\\k\*', r'<seg type="keyword">\1</seg>', osis, flags=re.DOTALL)
 
         # \lit
-        osis = re.sub(r'\\lit\s+(.*?)(?=(\\(i?m|i?p|nb|lit|cls|tr)|<chapter eID|</?div\b|<(p|closer)\b))', lambda m: u'﷓<p type="x-liturgical">\n' + m.group(1) + u'﷓</p>\n', osis, flags=re.DOTALL)
+        osis = re.sub(r'\\lit\s+(.*?)(?=(\\(i?m|i?p|nb|lit|cls|tr)|<chapter eID|</?div\b|<(p|closer)\b))', lambda m: '\uFDD3<p type="x-liturgical">\n' + m.group(1) + '\uFDD3</p>\n', osis, flags=re.DOTALL)
 
         # \dc_...\dc*  # TODO: Find an example---should this really be transChange?
         osis = re.sub(r'\\dc\b\s*(.+?)\\dc\*', r'<transChange type="added" editions="dc">\1</transChange>', osis, flags=re.DOTALL)
@@ -1004,15 +1004,15 @@
         supported: \ef...\ef*, \ex...\ex*, \esb...\esbe, \cat
         """
         # \ef...\ef*
-        osis = re.sub(r'\\ef\s+([^\s\\]+?)\s*(.+?)\s*\\ef\*', lambda m: '<note' + ((' n=""') if (m.group(1) == u'-') else ('' if (m.group(1) == '+') else (' n="' + m.group(1) + '"'))) + ' type="study">' + m.group(2) + u'﷟</note>', osis, flags=re.DOTALL)
+        osis = re.sub(r'\\ef\s+([^\s\\]+?)\s*(.+?)\s*\\ef\*', lambda m: '<note' + ((' n=""') if (m.group(1) == '-') else ('' if (m.group(1) == '+') else (' n="' + m.group(1) + '"'))) + ' type="study">' + m.group(2) + '\uFDDF</note>', osis, flags=re.DOTALL)
         osis = re.sub(r'(<note\b[^>]*?>.*?</note>)', lambda m: processNote(m.group(1)), osis, flags=re.DOTALL)
 
         # \ex...\ex*
-        osis = re.sub(r'\\ex\s+([^\s]+?)\s+(.+?)\s*\\ex\*', lambda m: '<note' + ((' n=""') if (m.group(1) == u'-') else ('' if (m.group(1) == '+') else (' n="' + m.group(1) + '"'))) + ' type="crossReference" subType="x-study"><reference>' + m.group(2) + u'</reference>﷟</note>', osis, flags=re.DOTALL)
+        osis = re.sub(r'\\ex\s+([^\s]+?)\s+(.+?)\s*\\ex\*', lambda m: '<note' + ((' n=""') if (m.group(1) == '-') else ('' if (m.group(1) == '+') else (' n="' + m.group(1) + '"'))) + ' type="crossReference" subType="x-study"><reference>' + m.group(2) + '</reference>\uFDDF</note>', osis, flags=re.DOTALL)
         osis = re.sub(r'(<note [^>]*?type="crossReference"[^>]*>.*?</note>)', lambda m: processXref(m.group(1)), osis, flags=re.DOTALL)
 
         # \esb...\esbex  # TODO: this likely needs to go much earlier in the process
-        osis = re.sub(r'\\esb\b\s*(.+?)\\esbe\b\s*', ur'﷕<div type="x-sidebar">\1</div>﷕'+'\n', osis, flags=re.DOTALL)
+        osis = re.sub(r'\\esb\b\s*(.+?)\\esbe\b\s*', r'\uFDD5<div type="x-sidebar">\1</div>\uFDD5'+'\n', osis, flags=re.DOTALL)
 
         # \cat_<TAG>\cat*
         osis = re.sub(r'\\cat\b\s+(.+?)\\cat\*', r'<index index="category" level1="\1"/>', osis)
@@ -1065,14 +1065,14 @@
 
 
         # fill in book & chapter values
-        bookChunks = osis.split(u'﷐')
+        bookChunks = osis.split('\uFDD0')
         osis = ''
         for bc in bookChunks:
             bookValue = re.search(r'<div type="book" osisID="([^"]+?)"', bc)
             if bookValue:
                 bookValue = bookValue.group(1)
                 bc = bc.replace('$BOOK$', bookValue)
-                chapChunks = bc.split(u'﷑')
+                chapChunks = bc.split('\uFDD1')
                 newbc = ''
                 for cc in chapChunks:
                     chapValue = re.search(r'<chapter osisID="[^\."]+\.([^"]+)', cc)
@@ -1087,17 +1087,17 @@
 
     def osisReorderAndCleanup(osis):
         # assorted re-orderings
-        osis = re.sub(u'(﷓<chapter eID=.+?\n)(<verse eID=.+?>﷒)\n?', r'\2'+'\n'+r'\1', osis)
-        osis = re.sub(u'([﷕﷖﷗﷘﷙]</div>)([^﷕﷖﷗﷘﷙]*<chapter eID.+?>)', r'\2\1', osis)
-        osis = re.sub(u'(﷓</p>\n?﷓<p>)\n?(<verse eID=.+?>﷒)\n?', r'\2'+'\n'+r'\1'+'\n', osis)
-        osis = re.sub(u'\n(<verse eID=.+?>﷒)', r'\1'+'\n', osis)
-        osis = re.sub(u'\n*(<l.+?>)(<verse eID=.+?>[﷒\n]*<verse osisID=.+?>)', r'\2\1', osis)
+        osis = re.sub('(\uFDD3<chapter eID=.+?\n)(<verse eID=.+?>\uFDD2)\n?', r'\2'+'\n'+r'\1', osis)
+        osis = re.sub('([\uFDD5\uFDD6\uFDD7\uFDD8\uFDD9]</div>)([^\uFDD5\uFDD6\uFDD7\uFDD8\uFDD9]*<chapter eID.+?>)', r'\2\1', osis)
+        osis = re.sub('(\uFDD3</p>\n?\uFDD3<p>)\n?(<verse eID=.+?>\uFDD2)\n?', r'\2'+'\n'+r'\1'+'\n', osis)
+        osis = re.sub('\n(<verse eID=.+?>\uFDD2)', r'\1'+'\n', osis)
+        osis = re.sub('\n*(<l.+?>)(<verse eID=.+?>[\uFDD2\n]*<verse osisID=.+?>)', r'\2\1', osis)
 
         # delete attributes from end tags (since they are invalid)
         osis = re.sub(r'(</[^\s>]+) [^>]*>', r'\1>', osis)
         osis = osis.replace('<lb type="x-p"/>', '<lb/>')
         # delete Unicode tags
-        for c in u'﷐﷑﷒﷓﷔﷕﷖﷗﷘﷙﷚﷛﷜﷝﷞﷟﷠﷡﷢﷣﷤﷥﷦﷧﷨﷩﷪﷫﷬﷭﷮﷯':
+        for c in '\uFDD0\uFDD1\uFDD2\uFDD3\uFDD4\uFDD5\uFDD6\uFDD7\uFDD8\uFDD9\uFDDA\uFDDB\uFDDC\uFDDD\uFDDE\uFDDF\uFDE0\uFDE1\uFDE2\uFDE3\uFDE4\uFDE5\uFDE6\uFDE7\uFDE8\uFDE9\uFDEA\uFDEB\uFDEC\uFDED\uFDEE\uFDEF':
             osis = osis.replace(c, '')
 
         for endBlock in ['p', 'div', 'note', 'l', 'lg', 'chapter', 'verse', 'head', 'title', 'item', 'list']:




More information about the sword-cvs mailing list