[sword-svn] r372 - trunk/modules/python

chrislit at crosswire.org chrislit at crosswire.org
Fri Aug 10 03:46:42 MST 2012


Author: chrislit
Date: 2012-08-10 03:46:41 -0700 (Fri, 10 Aug 2012)
New Revision: 372

Modified:
   trunk/modules/python/usfm2osis.py
Log:
re-implemented \i- introduction tags, independent of non-intro versions
cleaned up extraneous spaces
removed ###TESTED### markers


Modified: trunk/modules/python/usfm2osis.py
===================================================================
--- trunk/modules/python/usfm2osis.py	2012-08-10 05:35:45 UTC (rev 371)
+++ trunk/modules/python/usfm2osis.py	2012-08-10 10:46:41 UTC (rev 372)
@@ -54,7 +54,7 @@
 # ﷐ book
 # ﷑ chapter
 # ﷒ verse
-# ﷓ paragraph 
+# ﷓ paragraph
 # ﷔ title
 # ﷕ ms1
 # ﷖ ms2
@@ -95,7 +95,7 @@
     # DC - Eastern Orthodox
     '3MA':'3Macc', '4MA':'4Macc', '1ES':'1Esd', '2ES':'2Esd', 'MAN':'PrMan', 'PS2':'Ps151',
     # Rahlfs' LXX
-    'ODA':'Odes', 'PSS':'PssSol', 
+    'ODA':'Odes', 'PSS':'PssSol',
     # Esdrae
     'EZA':'4Ezra', '5EZ':'5Ezra', '6EZ':'6Ezra',
     # Inconsistency with Esther
@@ -134,8 +134,8 @@
     '1CL':'1Clem', '2CL':'2Clem', 'SHE':'Herm', 'LBA':'Barn', 'DID':'Did',
     ###
     # Proposed replacements <http://lc.bfbs.org.uk/e107_files/downloads/canonicalissuesinparatext.pdf>
-    'ODE':'Odes', 
-    
+    'ODE':'Odes',
+
     # Additional biblical books
     'ADE':'AddEsth'
     }
@@ -148,7 +148,7 @@
     # DC - Catholic
     'TOB', 'JDT', 'ESG', 'ADE', 'WIS', 'SIR', 'PSS', 'BAR', 'LJE', 'DAG', 'S3Y', 'SUS', 'BEL', '1MA', '2MA',
     # DC - Eastern Orthodox
-    '1ES', 'MAN', 'PS2', '3MA', '2ES', '4MA', 
+    '1ES', 'MAN', 'PS2', '3MA', '2ES', '4MA',
     # NT
     'MAT', 'MRK', 'LUK', 'JHN', 'ACT', 'ROM', '1CO', '2CO', 'GAL', 'EPH', 'PHP', 'COL', '1TH', '2TH', '1TI', '2TI',
     'TIT', 'PHM', 'HEB', 'JAS', '1PE', '2PE', '1JN', '2JN', '3JN', 'JUD', 'REV',
@@ -157,7 +157,7 @@
     # Esdrae
     'EZA', '5EZ', '6EZ',
     # Inconsistency with Esther
-    
+
     # Syriac
     'PS3', '2BA', 'LBA',
     # Ethiopic
@@ -274,31 +274,31 @@
         supported: \id, \ide, \sts, \rem, \h, \toc1, \toc2, \toc3
         """
         global loc2osisBk, osis2locBk
-        # \id_<CODE>_(Name of file, Book name, Language, Last edited, Date etc.)  ###TESTED###
+        # \id_<CODE>_(Name of file, Book name, Language, Last edited, Date etc.)
         osis = re.sub(r'\\id\s+([A-Z0-9]{3})\b\s*([^\\\n]*?)\n(.*)(?=\\id|$)', lambda m: u'﷐<div type="book" osisID="' + bookDict[m.group(1)] + '">\n' + (('<!-- id comment - ' + m.group(2) + ' -->\n') if m.group(2) else '') +  m.group(3) + u'</div type="book">﷐\n' , osis, flags=re.DOTALL)
         # keep a copy of the OSIS book abbreviation for below (\toc3 processing) to store for mapping localized book names to/from OSIS
         osisBook = re.search(r'\\id\s+([A-Z0-9]{3})', osis)
         if osisBook:
             osisBook = bookDict[osisBook.group(1)]
 
-        # \ide_<ENCODING>  ###TESTED###
+        # \ide_<ENCODING>
         osis = re.sub(r'\\ide\b.*\n', r'', osis) # delete, since this was handled above
 
         # \sts_<STATUS CODE>
-        osis = re.sub(r'\\sts\b\s+(.+)\s*\n', r'<milestone type="x-sts" n="\1"/>\n', osis)
+        osis = re.sub(r'\\sts\b\s+(.+)\s*\n', r'<milestone type="x-usfm-sts" n="\1"/>\n', osis)
 
-        # \rem_text...  ###TESTED###
+        # \rem_text...
         osis = re.sub(r'\\rem\b\s+(.+)', r'<!-- rem - \1 -->', osis)
 
-        # \h#_text...  ###TESTED###
+        # \h#_text...
         osis = re.sub(r'\\h\b\s+(.+)\s*\n', r'<title type="runningHead">\1</title>\n', osis)
-        # TODO: \h1-5
+        osis = re.sub(r'\\h(\d)\b\s+(.+)\s*\n', r'<title type="runningHead" n="\1">\2</title>\n', osis)
 
         # \toc1_text...
-        osis = re.sub(r'\\toc1\b\s+(.+)\s*\n', r'<milestone type="x-toc1" n="\1"/>\n', osis)
+        osis = re.sub(r'\\toc1\b\s+(.+)\s*\n', r'<milestone type="x-usfm-toc1" n="\1"/>\n', osis)
 
         # \toc2_text...
-        osis = re.sub(r'\\toc2\b\s+(.+)\s*\n', r'<milestone type="x-toc2" n="\1"/>\n', osis)
+        osis = re.sub(r'\\toc2\b\s+(.+)\s*\n', r'<milestone type="x-utfm-toc2" n="\1"/>\n', osis)
 
         # \toc3_text...
         locBook = re.search(r'\\toc3\b\s+(.+)\s*\n', osis)
@@ -307,7 +307,7 @@
             if osisBook:
                 osis2locBk[osisBook]=locBook
                 loc2osisBk[locBook]=osisBook
-        osis = re.sub(r'\\toc3\b\s+(.+)\s*\n', lambda m: r'<milestone type="x-toc3" n="\1"/>\n', osis)
+        osis = re.sub(r'\\toc3\b\s+(.+)\s*\n', lambda m: r'<milestone type="x-usfm-toc3" n="\1"/>\n', osis)
 
         return osis
 
@@ -315,33 +315,75 @@
     def cvtIntroductions(osis, relaxedConformance):
         """
         Introductions
-        supported:
-        unsupported: \imt#, \is#, \ip, \ipi, \im, \imi, \ipq, \imq, \ipr, \iq#, \ib, \ili, \iot, \io#, \ior...\ior*, \iex, \iqt...\iqt*, \imte, \ie
+        supported: \imt#, \is#, \ip, \ipi, \im, \imi, \ipq, \imq, \ipr, \iq#, \ib, \ili#, \iot, \io#, \ior...\ior*, \iex, \iqt...\iqt*, \imte, \ie
         """
-        # \imt#
-        # \is#
-        # \ip
-        # \ipi
-        # \im
-        # \imi
-        # \ipq
-        # \imq
-        # \ipr
-        # \iq#
+        # \imt#_text...
+        osis = re.sub(r'\\imt(\d?)\s+(.+)', lambda m: r'<title ' + (r'level="'+m.group(1)+r'" ' if m.group(1) else r'') + r'type="main" subType="x-introduction">' + m.group(2) + r'</title>', osis)
+
+        # \is#_text...
+        osis = re.sub(r'\\is1?\s+(.+)', lambda m: u'﷚<div type="section" subType="x-introduction"><title>' + m.group(1) + '</title>', osis)
+        osis = re.sub(u'(﷚[^﷕﷐﷖﷗﷘﷙﷚]+)', r'\1'+u'</div>﷚\n', osis, re.DOTALL)
+        osis = re.sub(r'\\is2\s+(.+)', lambda m: u'﷛<div type="subsection" subType="x-introduction"><title>' + m.group(1) + '</title>', osis)
+        osis = re.sub(u'(﷛[^﷕﷐﷖﷗﷘﷙﷚﷛]+)', r'\1'+u'</div>﷛\n', osis, re.DOTALL)
+        osis = re.sub(r'\\is3\s+(.+)', lambda m: u'﷜<div type="x-subSubSection" subType="x-introduction"><title>' + m.group(1) + '</title>', osis)
+        osis = re.sub(u'(﷜[^﷕﷐﷖﷗﷘﷙﷚﷛﷜]+)', r'\1'+u'</div>﷜\n', osis, re.DOTALL)
+        osis = re.sub(r'\\is4\s+(.+)', lambda m: u'﷝<div type="x-subSubSubSection" subType="x-introduction"><title>' + m.group(1) + '</title>', osis)
+        osis = re.sub(u'(﷝[^﷕﷐﷖﷗﷘﷙﷚﷛﷜﷝]+)', r'\1'+u'</div>﷝\n', osis, re.DOTALL)
+        osis = re.sub(r'\\is5\s+(.+)', lambda m: u'﷞<div type="x-subSubSubSubSection" subType="x-introduction"><title>' + m.group(1) + '</title>', osis)
+        osis = re.sub(u'(﷞[^﷕﷐﷖﷗﷘﷙﷚﷛﷜﷝﷞]+)', r'\1'+u'</div>﷞\n', osis, re.DOTALL)
+
+        # \ip_text...
+        osis = re.sub(r'\\ip\s+(.*?)(?=(\\(i?m|i?p|nb|lit|cls|tr)|<chapter eID|</?div\b|<(p|closer)\b))', lambda m: u'﷓<p subType="x-introduction">\n' + m.group(1) + u'﷓</p>\n', osis, flags=re.DOTALL)
+
+        # \ipi_text...
+        # \im_text...
+        # \imi_text...
+        # \ipq_text...
+        # \imq_text...
+        # \ipr_text...
+        pType = {'ipi':'x-indented', 'im':'x-noindent', 'imi':'x-noindent-indented', 'ipq':'x-quote', 'imq':'x-noindent-quote', 'ipr':'x-right'}
+        osis = re.sub(r'\\(ipi|im|ipq|imq|ipr)\s+(.*?)(?=(\\(i?m|i?p|nb|lit|cls|tr)|<chapter eID|</?div\b|<(p|closer)\b))', lambda m: u'﷓<p type="' + pType[m.group(1)]  + '" subType="x-introduction">\n' + m.group(2) + u'﷓</p>\n', osis, flags=re.DOTALL)
+
+        # \iq#_text...
+        osis = re.sub(r'\\iq\b\s*(.*?)(?=(['+u'﷐﷑﷓﷔'+r']|\\i?q[\d\s]|<l\b|<lb\b|<title\b))', r'<l level="1" subType="x-introduction">\1</l>', osis, flags=re.DOTALL)
+        osis = re.sub(r'\\iq(\d)\b\s*(.*?)(?=(['+u'﷐﷑﷓﷔'+r']|\\i?q[\d\s]|<l\b|<lb\b|<title\b))', r'<l level="\1" subType="x-introduction">\2</l>', osis, flags=re.DOTALL)
+
         # \ib
-        # \ili#
-        # \iot
-        # \io#
-        # \ior...\ior*
-        # \iex
-        # \iqt...\iqt*
-        # \imte#
+        osis = re.sub(r'\\ib\b\s?', r'<lb type="x-p"/>', osis)
+        osis = osis.replace('\n</l>', '</l>\n')
+        osis = re.sub(u'(<l [^﷐﷑﷓﷔]+</l>)', r'<lg>\1</lg>', osis, flags=re.DOTALL)
+        osis = re.sub('(<lg>.+?</lg>)', lambda m: m.group(1).replace(r'<lb type="x-p"/>', r'</lg><lg>'), osis, flags=re.DOTALL) # re-handle \b that occurs within <lg>
+
+        # \ili#_text...
+        osis = re.sub(r'\\ili\b\s*(.*?)(?=(['+u'﷐﷑﷓﷔'+r']|\\ili[\d\s]|<lb\b|<title\b))', r'<item type="x-indent-1" subType="x-introduction">\1</item>', osis, flags=re.DOTALL)
+        osis = re.sub(r'\\ili(\d)\b\s*(.*?)(?=(['+u'﷐﷑﷓﷔'+r']|\\ili[\d\s]|<lb\b|<title\b))', r'<item type="x-indent-\1" subType="x-introduction">\2</item>', osis, flags=re.DOTALL)
+        osis = osis.replace('\n</item>', '</item>\n')
+        osis = re.sub(u'(<item [^﷐﷑﷓﷔]+</item>)', r'<list>\1</list>', osis, flags=re.DOTALL)
+
+        # \iot_text...
+        # \io#_text...(references range)
+        osis = re.sub(r'\\io\b\s*(.*?)(?=(['+u'﷐﷑﷓﷔'+r']|\\io[t\d\s]|<lb\b|<title\b))', r'<item type="x-indent-1" subType="x-introduction">\1</item>', osis, flags=re.DOTALL)
+        osis = re.sub(r'\\io(\d)\b\s*(.*?)(?=(['+u'﷐﷑﷓﷔'+r']|\\io[t\d\s]|<lb\b|<title\b))', r'<item type="x-indent-\1" subType="x-introduction">\2</item>', osis, flags=re.DOTALL)
+        osis = re.sub(r'\\iot\b\s*(.*?)(?=(['+u'﷐﷑﷓﷔'+r']|\\io[t\d\s]|<lb\b|<title\b))', r'<item type="head">\1</item type="head">', osis, flags=re.DOTALL)
+        osis = osis.replace('\n</item>', '</item>\n')
+        osis = re.sub(u'(<item [^﷐﷑﷓﷔]+</item>)', r'<div type="outline"><list>\1</list></div>', osis, flags=re.DOTALL)
+        osis = re.sub(r'item type="head"', r'head', osis)
+
+        # \ior_text...\ior*
+        osis = re.sub(r'\\ior\b\s+(.+?)\\ior\*', r'<reference>\1</reference>', osis, flags=re.DOTALL)
+        
+        # \iex    # TODO: look for example; I have no idea what this would look like in context
+        osis = re.sub(r'\\iex\b\s*(.+?)'+u'?=(\s*(\\c|</div type="book">﷐))', r'<div type="bridge">\1</div>', osis, flags=re.DOTALL)
+
+        # \iqt_text...\iqt*
+        osis = re.sub(r'\\iqt\s+(.+?)\\iqt\*', r'<q subType="x-introduction">\1</q>', osis, flags=re.DOTALL)
+
+        # \imte#_text...
+        osis = re.sub(r'\\imte(\d?)\b\s+(.+)', lambda m: r'<title ' + (r'level="'+m.group(1)+r'" ' if m.group(1) else r'') + r'type="main" subType="x-introduction-end">' + m.group(2) + r'</title>', osis)
+
         # \ie
-        # encapsulate introduction elements in a <div>
-        #osis = re.sub(r'(\\i(mt|mt\d+|s|d\d+|p|pi|m|mi|pq|mq|pr|q|q\d+|b|li|ot|o|o\d+|or|or*|ex|qt|qt*|mte|e)\b.+?)(?=\n\\(c|s|m|p|d))', u'<div type="introduction">'+r'\1'+u'</div>\n', osis, flags=re.DOTALL)
-        # map all introduction elements to their non-introduction equivalents
-        #for e in [r'mt', r'mt\d+', r's', r'd\d+', r'p', r'pi', r'm', r'mi', r'pq', r'mq', r'pr', r'q', r'q\d+', r'b', r'li', r'ot', r'o', r'o\d+', r'or', r'or*', r'ex', r'qt', r'qt*', r'mte', r'e']:
-        #    osis = re.sub(r'\\i('+e+r')\b', r'\\\1', osis)
+        osis = re.sub(r'\\ie\b\s*', r'<milestone type="x-usfm-ie"/>', osis)
+
         return osis
 
 
@@ -350,7 +392,7 @@
         Titles, Headings, and Labels
         supported: \mt#, \mte#, \ms#, \mr, \s#, \sr, \r, \rq...\rq*, \d, \sp
         """
-        # \ms#_text...  ###TESTED###
+        # \ms#_text...
         osis = re.sub(r'\\ms1?\s+(.+)', lambda m: u'﷕<div type="majorSection"><title>' + m.group(1) + '</title>', osis)
         osis = re.sub(u'(﷕[^﷕﷐]+)', r'\1'+u'</div>﷕\n', osis, re.DOTALL)
         osis = re.sub(r'\\ms2\s+(.+)', lambda m: u'﷖<div type="majorSection" n="2"><title>' + m.group(1) + '</title>', osis)
@@ -365,7 +407,7 @@
         # \mr_text...
         osis = re.sub(r'\\mr\s+(.+)', u'﷔<title type="scope"><reference>'+r'\1</reference></title>', osis)
 
-        # \s#_text...  ###TESTED###
+        # \s#_text...
         osis = re.sub(r'\\s1?\s+(.+)', lambda m: u'﷚<div type="section"><title>' + m.group(1) + '</title>', osis)
         osis = re.sub(u'(﷚[^﷕﷐﷖﷗﷘﷙﷚]+)', r'\1'+u'</div>﷚\n', osis, re.DOTALL)
         if relaxedConformance:
@@ -387,13 +429,13 @@
         # \rq_text...\rq*
         osis = re.sub(r'\\rq\s+(.+?)\\rq\*', u'<reference type="source">'+r'\1</reference>', osis, flags=re.DOTALL)
 
-        # \d_text...  ###TESTED###
+        # \d_text...
         osis = re.sub(r'\\d\s+(.+)', u'﷔<title canonical="true" type="psalm">'+r'\1</title>', osis)
 
-        # \sp_text...  ###TESTED###
+        # \sp_text...
         osis = re.sub(r'\\sp\s+(.+)', r'<speaker>\1</speaker>', osis)
 
-        # \mt#_text...  ###TESTED###
+        # \mt#_text...
         osis = re.sub(r'\\mt(\d?)\s+(.+)', lambda m: r'<title ' + (r'level="'+m.group(1)+r'" ' if m.group(1) else r'') + r'type="main">' + m.group(2) + r'</title>', osis)
         # \mte#_text...
         osis = re.sub(r'\\mte(\d?)\s+(.+)', lambda m: r'<title ' + (r'level="'+m.group(1)+r'" ' if m.group(1) else r'') + r'type="main" subType="x-end">' + m.group(2) + r'</title>', osis)
@@ -406,7 +448,7 @@
         Chapters and Verses
         supported: \c, \ca...\ca*, \cl, \cp, \cd, \v, \va...\va*, \vp...\vp*
         """
-        # \c_#  ###TESTED###
+        # \c_#
         osis = re.sub(r'\\c\s+([^\s]+)\b(.+?)(?=(\\c\s+|</div type="book"))', lambda m: u'﷑<chapter osisID="$BOOK$.' + m.group(1) + r'" sID="$BOOK$.' + m.group(1) + '"/>' + m.group(2) +  u'<chapter eID="$BOOK$.' + m.group(1) + u'"/>﷓\n', osis, flags=re.DOTALL)
 
         # \cp_#
@@ -432,7 +474,7 @@
         # \cd_#   <--This # seems to be an error
         osis = re.sub(r'\\cd\b\s+(.+)', u'﷔<title type="x-description">'+r'\1</title>', osis)
 
-        # \v_#  ###TESTED###
+        # \v_#
         osis = re.sub(r'\\v\s+([^\s]+)\b\s*(.+?)(?=(\\v\s+|</div type="book"|<chapter eID))', lambda m: u'﷒<verse osisID="$BOOK$.$CHAP$.' + m.group(1) + r'" sID="$BOOK$.$CHAP$.' + m.group(1) + r'"/>' + m.group(2) +  r'<verse eID="$BOOK$.$CHAP$.' + m.group(1) + u'"/>﷒\n', osis, flags=re.DOTALL)
 
         # \vp_#\vp*
@@ -460,36 +502,36 @@
         Paragraphs
         supported: \p, \m, \pmo, \pm, \pmc, \pmr, \pi#, \mi, \nb, \cls, \li#, \pc, \pr, \ph#, \b
         """
-        # \p(_text...)  ###TESTED###
-        osis = re.sub(r'\\p\s+(.*?)(?=(\\(m|p|nb|lit|cls|tr)|<chapter eID|</?div\b|<(p|closer)\b))', lambda m: u'﷓<p>\n' + m.group(1) + u'﷓</p>\n', osis, flags=re.DOTALL)
+        # \p(_text...)
+        osis = re.sub(r'\\p\s+(.*?)(?=(\\(i?m|i?p|nb|lit|cls|tr)|<chapter eID|</?div\b|<(p|closer)\b))', lambda m: u'﷓<p>\n' + m.group(1) + u'﷓</p>\n', osis, flags=re.DOTALL)
 
         # \pc(_text...)
         # \pr(_text...)
-        # \m(_text...)  ###TESTED###
+        # \m(_text...)
         # \pmo(_text...)
         # \pm(_text...)
         # \pmc(_text...)
         # \pmr_text...          # deprecated: map to same as \pr
         # \pi#(_Sample text...)
         # \mi(_text...)
-        # \nb  ###TESTED###
+        # \nb
         pType = {'pc':'x-center', 'pr':'x-right', 'm':'x-noindent', 'pmo':'x-embedded-opening', 'pm':'x-embedded', 'pmc':'x-embedded-closing', 'pmr':'x-right', 'pi':'x-indented-1', 'pi1':'x-indented-1', 'pi2':'x-indented-2', 'pi3':'x-indented-3', 'pi4':'x-indented-4', 'pi5':'x-indented-5', 'mi':'x-noindent-indented', 'nb':'x-nobreak'}
-        osis = re.sub(r'\\(pc|pr|m|pmo|pm|pmc|pmr|pi|pi1|pi2|pi3|pi4|pi5|mi|nb)\s+(.*?)(?=(\\(m|p|nb|lit|cls|tr)|<chapter eID|</?div\b|<(p|closer)\b))', lambda m: u'﷓<p type="' + pType[m.group(1)]  + '">\n' + m.group(2) + u'﷓</p>\n', osis, flags=re.DOTALL)
+        osis = re.sub(r'\\(pc|pr|m|pmo|pm|pmc|pmr|pi|pi1|pi2|pi3|pi4|pi5|mi|nb)\s+(.*?)(?=(\\(i?m|i?p|nb|lit|cls|tr)|<chapter eID|</?div\b|<(p|closer)\b))', lambda m: u'﷓<p type="' + pType[m.group(1)]  + '">\n' + m.group(2) + u'﷓</p>\n', osis, flags=re.DOTALL)
 
         # \cls_text...
-        osis = re.sub(r'\\m\s+(.+?)(?=(\\(m|p|nb|lit|cls|tr)|<chapter eID|</?div\b|<(p|closer)\b))', lambda m: u'﷓<closer>' + m.group(1) + u'﷓</closer>\n', osis, flags=re.DOTALL)
+        osis = re.sub(r'\\m\s+(.+?)(?=(\\(i?m|i?p|nb|lit|cls|tr)|<chapter eID|</?div\b|<(p|closer)\b))', lambda m: u'﷓<closer>' + m.group(1) + u'﷓</closer>\n', osis, flags=re.DOTALL)
 
         # \ph#(_text...)
-        # \li#(_text...)  ###TESTED###
+        # \li#(_text...)
         osis = re.sub(r'\\ph\b\s*', r'\\li ', osis)
-        osis = re.sub(r'\\ph(\d+)\b\s*', r'\\li\1 ', osis)
+        osis = re.sub(r'\\ph(\d)\b\s*', r'\\li\1 ', osis)
         osis = re.sub(r'\\li\b\s*(.*?)(?=(['+u'﷐﷑﷓﷔'+r']|\\li[\d\s]|<lb\b|<title\b))', r'<item type="x-indent-1">\1</item>', osis, flags=re.DOTALL)
-        osis = re.sub(r'\\li(\d+)\b\s*(.*?)(?=(['+u'﷐﷑﷓﷔'+r']|\\li[\d\s]|<lb\b|<title\b))', r'<item type="x-indent-\1">\2</item>', osis, flags=re.DOTALL)
+        osis = re.sub(r'\\li(\d)\b\s*(.*?)(?=(['+u'﷐﷑﷓﷔'+r']|\\li[\d\s]|<lb\b|<title\b))', r'<item type="x-indent-\1">\2</item>', osis, flags=re.DOTALL)
         osis = osis.replace('\n</item>', '</item>\n')
         osis = re.sub(u'(<item [^﷐﷑﷓﷔]+</item>)', r'<list>\1</list>', osis, flags=re.DOTALL)
 
-        # \b  ###TESTED###
-        osis = re.sub(r'\\b\b\s?', r'<lb type="p"/>', osis)
+        # \b
+        osis = re.sub(r'\\b\b\s?', r'<lb type="x-p"/>', osis)
 
         return osis
 
@@ -502,21 +544,21 @@
         # \qs_(Selah)\qs*
         osis = re.sub(r'\\qs\b\s(.+?)\\qs\*', r'<l type="selah">\1</l>', osis, flags=re.DOTALL)
 
-        # \q#(_text...)  ###TESTED###
+        # \q#(_text...)
         osis = re.sub(r'\\q\b\s*(.*?)(?=(['+u'﷐﷑﷓﷔'+r']|\\q[\d\s]|<l\b|<lb\b|<title\b))', r'<l level="1">\1</l>', osis, flags=re.DOTALL)
-        osis = re.sub(r'\\q(\d+)\b\s*(.*?)(?=(['+u'﷐﷑﷓﷔'+r']|\\q[\d\s]|<l\b|<lb\b|<title\b))', r'<l level="\1">\2</l>', osis, flags=re.DOTALL)
+        osis = re.sub(r'\\q(\d)\b\s*(.*?)(?=(['+u'﷐﷑﷓﷔'+r']|\\q[\d\s]|<l\b|<lb\b|<title\b))', r'<l level="\1">\2</l>', osis, flags=re.DOTALL)
 
         # \qr_text...
         # \qc_text...
         # \qm#(_text...)
         qType = {'qr':'x-right', 'qc':'x-center', 'qm':'x-embedded" level="1', 'qm1':'x-embedded" level="1', 'qm2':'x-embedded" level="2', 'qm3':'x-embedded" level="3', 'qm4':'x-embedded" level="4', 'qm5':'x-embedded" level="5'}
-        osis = re.sub(r'\\(qr|qc|qm\d+)\b\s*(.*?)(?=(['+u'﷐﷑﷓﷔'+r']|\\q[\d\s]|<l\b|<lb\b|<title\b))', lambda m: r'<l type="' + qType[m.group(1)] + '">' + m.group(2) + '</l>', osis, flags=re.DOTALL)
+        osis = re.sub(r'\\(qr|qc|qm\d)\b\s*(.*?)(?=(['+u'﷐﷑﷓﷔'+r']|\\q[\d\s]|<l\b|<lb\b|<title\b))', lambda m: r'<l type="' + qType[m.group(1)] + '">' + m.group(2) + '</l>', osis, flags=re.DOTALL)
 
         osis = osis.replace('\n</l>', '</l>\n')
         osis = re.sub(u'(<l [^﷐﷑﷓﷔]+</l>)', r'<lg>\1</lg>', osis, flags=re.DOTALL)
 
-        # \b  ###TESTED###
-        osis = re.sub('(<lg>.+?</lg>)', lambda m: m.group(1).replace(r'<lb type="p"/>', r'</lg><lg>'), osis, flags=re.DOTALL) # re-handle \b that occurs within <lg>
+        # \b
+        osis = re.sub('(<lg>.+?</lg>)', lambda m: m.group(1).replace(r'<lb type="x-p"/>', r'</lg><lg>'), osis, flags=re.DOTALL) # re-handle \b that occurs within <lg>
 
         # \qa_text...
         osis = re.sub(r'\\qa\s+(.+)', u'﷔<title type="acrostic">'+r'\1</title>', osis)
@@ -551,13 +593,13 @@
         # \fdc_refs...\fdc*
         note = re.sub(r'\\fdc\b\s(.+?)\\fdc\b\*', r'<seg editions="dc">\1</seg>', note)
 
-        # \fq_  ###TESTED###
+        # \fq_
         note = re.sub(r'\\fq\b\s(.+?)(?=(\\f|'+u'﷟))', u'﷟'+r'<catchWord>\1</catchWord>', note)
 
-        # \fqa_  ###TESTED###
+        # \fqa_
         note = re.sub(r'\\fqa\b\s(.+?)(?=(\\f|'+u'﷟))', u'﷟'+r'<rdg type="alternate">\1</rdg>', note)
 
-        # \ft_  ###TESTED###
+        # \ft_
         note = re.sub(r'\\ft\s', r'', note)
 
         # \fr_##SEP##
@@ -595,7 +637,7 @@
         Footnotes
         supported:\f...\f*, \fe...\fe*, \fr, \fk, \fq, \fqa, \fl, \fp, \fv, \ft, \fdc...\fdc*, \fm...\fm*
         """
-        # \f_+_...\f*  ###TESTED###
+        # \f_+_...\f*
         osis = re.sub(r'\\f\s+([^\s\\]+)?\s*(.+?)\s*\\f\*', lambda m: r'<note' + ((' n=""') if (m.group(1) == u'-') else ('' if (m.group(1) == '+') else (' n="' + m.group(1) + '"'))) + ' placement="foot">' + m.group(2) + u'﷟</note>', osis, flags=re.DOTALL)
 
         # \fe_+_...\fe*
@@ -624,7 +666,7 @@
         # \xq_
         note = re.sub(r'\\xq\b\s(.+?)(?=(\\x|'+u'﷟))', u'﷟'+r'<catchWord>\1</catchWord>', note)
 
-        # \xt_  ###TESTED###
+        # \xt_
         note = re.sub(r'\\xt\s', r'', note)
 
         # \xo_##SEP##
@@ -648,7 +690,7 @@
         Cross References
         supported: \\x...\\x*, \\xo, \\xk, \\xq, \\xt, \\xot...\\xot*, \\xnt...\\xnt*, \\xdc...\\xdc*
         """
-        # \x_+_...\x*  ###TESTED###
+        # \x_+_...\x*
         osis = re.sub(r'\\x\s+([^\s]+?)\s+(.+?)\s*\\x\*', lambda m: r'<note' + ((' n=""') if (m.group(1) == u'-') else ('' if (m.group(1) == '+') else (' n="' + m.group(1) + '"'))) + ' type="crossReference"><reference>' + m.group(2) + u'</reference>﷟</note>', osis, flags=re.DOTALL)
 
         osis = re.sub(r'(<note [^>]*?type="crossReference"[^>]*>.*?</note>)', lambda m: processXref(m.group(1)), osis, flags=re.DOTALL)
@@ -664,10 +706,10 @@
         Special Text
         supported: \add...\add*, \bk...\bk*, \dc...\dc*, \k...\k*, \lit, \nd...\nd*, \ord...\ord*, \pn...\pn*, \qt...\qt*, \sig...\sig*, \sls...\sls*, \tl...\tl*, \wj...\wj*
         """
-        # \add_...\add*  ###TESTED###
+        # \add_...\add*
         osis = re.sub(r'\\add\s+(.+?)\\add\*', r'<transChange type="added">\1</transChange>', osis, flags=re.DOTALL)
 
-        # \wj_...\wj*  ###TESTED###
+        # \wj_...\wj*
         osis = re.sub(r'\\wj\s+(.+?)\\wj\*', r'<q who="Jesus" marker="">\1</q>', osis, flags=re.DOTALL)
 
         # \nd_...\nd*
@@ -676,7 +718,7 @@
         # \pn_...\pn*
         osis = re.sub(r'\\pn\s+(.+?)\\pn\*', r'<name>\1</name>', osis, flags=re.DOTALL)
 
-        # \qt_...\qt*
+        # \qt_...\qt* # TODO:should this be <q>?
         osis = re.sub(r'\\qt\s+(.+?)\\qt\*', r'<seg type="otPassage">\1</seg>', osis, flags=re.DOTALL)
 
         # \sig_...\sig*
@@ -688,16 +730,16 @@
         # \tl_...\tl*
         osis = re.sub(r'\\tl\s+(.+?)\\tl\*', r'<foreign>\1</foreign>', osis, flags=re.DOTALL)
 
-        # \bk_...\bk*  ###TESTED###
+        # \bk_...\bk*
         osis = re.sub(r'\\bk\s+(.+?)\\bk\*', r'<name type="x-workTitle">\1</name>', osis, flags=re.DOTALL)
 
-        # \k_...\k*  ###TESTED###
+        # \k_...\k*
         osis = re.sub(r'\\k\s+(.+?)\\k\*', r'<seg type="keyword">\1</seg>', osis, flags=re.DOTALL)
 
         # \lit
-        osis = re.sub(r'\\lit\s+(.*?)(?=(\\(m|p|nb|lit|cls|tr)|<chapter eID|</?div\b|<(p|closer)\b))', lambda m: u'﷓<p type="x-liturgical">\n' + m.group(1) + u'﷓</p>\n', osis, flags=re.DOTALL)
+        osis = re.sub(r'\\lit\s+(.*?)(?=(\\(i?m|i?p|nb|lit|cls|tr)|<chapter eID|</?div\b|<(p|closer)\b))', lambda m: u'﷓<p type="x-liturgical">\n' + m.group(1) + u'﷓</p>\n', osis, flags=re.DOTALL)
 
-        # \dc_...\dc*  #### TODO: Find an example---should this really be transChange?
+        # \dc_...\dc*  # TODO: Find an example---should this really be transChange?
         osis = re.sub(r'\\dc\b\s*(.+?)\\dc\*', r'<transChange type="added" editions="dc">\1</transChange>', osis, flags=re.DOTALL)
 
         # \sls_...\sls*
@@ -717,7 +759,7 @@
         # \bd_...\bd*
         osis = re.sub(r'\\bd\s+(.+?)\\bd\*', r'<hi type="bold">\1</hi>', osis, flags=re.DOTALL)
 
-        # \it_...\it*  ###TESTED###
+        # \it_...\it*
         osis = re.sub(r'\\it\s+(.+?)\\it\*', r'<hi type="italic">\1</hi>', osis, flags=re.DOTALL)
 
         # \bdit_...\bdit*
@@ -781,7 +823,7 @@
             return figure
         osis = re.sub(r'\\fig\b\s+([^\|]*)\s*\|([^\|]*)\s*\|([^\|]*)\s*\|([^\|]*)\s*\|([^\|]*)\s*\|([^\|]*)\s*\|([^\\]*)\s*\\fig\*', makeFigure, osis)
 
-        # \ndx_...\ndx* #TODO tag with x-glossary instead of <index/>? Is <index/> containerable?
+        # \ndx_...\ndx* # TODO tag with x-glossary instead of <index/>? Is <index/> containerable?
         osis = re.sub(r'\\ndx\s+(.+?)(\s*)\\ndx\*', r'\1<index index="Index" level1="\1"/>\2', osis, flags=re.DOTALL)
 
         # \pro_...\pro*
@@ -834,7 +876,7 @@
         osis = re.sub(r'\\ex\s+([^\s]+?)\s+(.+?)\s*\\ex\*', lambda m: r'<note' + ((' n=""') if (m.group(1) == u'-') else ('' if (m.group(1) == '+') else (' n="' + m.group(1) + '"'))) + ' type="crossReference" subType="x-study"><reference>' + m.group(2) + u'</reference>﷟</note>', osis, flags=re.DOTALL)
         osis = re.sub(r'(<note [^>]*?type="crossReference"[^>]*>.*?</note>)', lambda m: processXref(m.group(1)), osis, flags=re.DOTALL)
 
-        # \esb...\esbex  ### TODO: this likely needs to go much earlier in the process
+        # \esb...\esbex  # TODO: this likely needs to go much earlier in the process
         osis = re.sub(r'\\esb\b\s*(.+?)\\esbe\b\s*', '﷕<div type="x-sidebar">\1</div>﷕\n', osis, flags=re.DOTALL)
 
         # \cat_<TAG>\cat*
@@ -850,7 +892,7 @@
         We can't really know what these mean, but will preserve them as <milestone/> elements.
         """
         # \z
-        osis = re.sub(r'\\z([^\s]+)', r'<milestone type="x-z-\1"/>', osis)
+        osis = re.sub(r'\\z([^\s]+)', r'<milestone type="x-usfm-z-\1"/>', osis)
 
         return osis
 
@@ -905,7 +947,7 @@
 
         # delete attributes from end tags (since they are invalid)
         osis = re.sub(r'(</[^\s>]+) [^>]*>', r'\1>', osis)
-        osis = osis.replace(r'<lb type="p"/>', r'<lb/>')
+        osis = osis.replace(r'<lb type="x-p"/>', r'<lb/>')
         # delete Unicode tags
         for c in u'﷐﷑﷒﷓﷔﷕﷖﷗﷘﷙﷚﷛﷜﷝﷞﷟':
             osis = osis.replace(c, '')




More information about the sword-cvs mailing list