[sword-svn] r363 - trunk/modules/python
chrislit at crosswire.org
chrislit at crosswire.org
Sat Aug 4 16:37:28 MST 2012
Author: chrislit
Date: 2012-08-04 16:37:28 -0700 (Sat, 04 Aug 2012)
New Revision: 363
Modified:
trunk/modules/python/usfm2osis.py
Log:
converted lowercase p tag to uppercase P for consistency
Modified: trunk/modules/python/usfm2osis.py
===================================================================
--- trunk/modules/python/usfm2osis.py 2012-08-04 11:24:44 UTC (rev 362)
+++ trunk/modules/python/usfm2osis.py 2012-08-04 23:37:28 UTC (rev 363)
@@ -318,7 +318,7 @@
supported: \c, \ca...\ca*, \cl, \cp, \cd, \v, \va...\va*, \vp...\vp*
"""
# \c_# ###TESTED###
- osis = re.sub(r'\\c\s+([^\s]+)\b(.+?)(?=(\\c\s+|</div type="book"))', lambda m: u'<chapter osisID="$BOOK$.' + m.group(1) + r'" sID="$BOOK$.' + m.group(1) + '"/>' + m.group(2) + u'<chapter eID="$BOOK$.' + m.group(1) + u'"/>\n', osis, flags=re.DOTALL)
+ osis = re.sub(r'\\c\s+([^\s]+)\b(.+?)(?=(\\c\s+|</div type="book"))', lambda m: u'<chapter osisID="$BOOK$.' + m.group(1) + r'" sID="$BOOK$.' + m.group(1) + '"/>' + m.group(2) + u'<chapter eID="$BOOK$.' + m.group(1) + u'"/>\n', osis, flags=re.DOTALL)
# \cp_#
# \ca_#\ca*
@@ -372,7 +372,7 @@
supported: \p, \m, \pmo, \pm, \pmc, \pmr, \pi#, \mi, \nb, \cls, \li#, \pc, \pr, \ph#, \b
"""
# \p(_text...) ###TESTED###
- osis = re.sub(r'\\p\s+(.*?)(?=(\\(m|p|nb|lit|cls|tr)|<chapter eID|</?div\b|<(p|closer)\b))', lambda m: u'<p>\n' + m.group(1) + u'</p>\n', osis, flags=re.DOTALL)
+ osis = re.sub(r'\\p\s+(.*?)(?=(\\(m|p|nb|lit|cls|tr)|<chapter eID|</?div\b|<(p|closer)\b))', lambda m: u'<p>\n' + m.group(1) + u'</p>\n', osis, flags=re.DOTALL)
# \pc(_text...)
# \pr(_text...)
@@ -385,19 +385,19 @@
# \mi(_text...)
# \nb ###TESTED###
pType = {'pc':'x-center', 'pr':'x-right', 'm':'x-noindent', 'pmo':'x-embedded-opening', 'pm':'x-embedded', 'pmc':'x-embedded-closing', 'pmr':'x-right', 'pi':'x-indented-1', 'pi1':'x-indented-1', 'pi2':'x-indented-2', 'pi3':'x-indented-3', 'pi4':'x-indented-4', 'pi5':'x-indented-5', 'mi':'x-noindent-indented', 'nb':'x-nobreak'}
- osis = re.sub(r'\\(pc|pr|m|pmo|pm|pmc|pmr|pi|pi1|pi2|pi3|pi4|pi5|mi|nb)\s+(.*?)(?=(\\(m|p|nb|lit|cls|tr)|<chapter eID|</?div\b|<(p|closer)\b))', lambda m: u'<p type="' + pType[m.group(1)] + '">\n' + m.group(2) + u'</p>\n', osis, flags=re.DOTALL)
+ osis = re.sub(r'\\(pc|pr|m|pmo|pm|pmc|pmr|pi|pi1|pi2|pi3|pi4|pi5|mi|nb)\s+(.*?)(?=(\\(m|p|nb|lit|cls|tr)|<chapter eID|</?div\b|<(p|closer)\b))', lambda m: u'<p type="' + pType[m.group(1)] + '">\n' + m.group(2) + u'</p>\n', osis, flags=re.DOTALL)
# \cls_text...
- osis = re.sub(r'\\m\s+(.+?)(?=(\\(m|p|nb|lit|cls|tr)|<chapter eID|</?div\b|<(p|closer)\b))', lambda m: u'<closer>' + m.group(1) + u'</closer>\n', osis, flags=re.DOTALL)
+ osis = re.sub(r'\\m\s+(.+?)(?=(\\(m|p|nb|lit|cls|tr)|<chapter eID|</?div\b|<(p|closer)\b))', lambda m: u'<closer>' + m.group(1) + u'</closer>\n', osis, flags=re.DOTALL)
# \ph#(_text...)
# \li#(_text...) ###TESTED###
osis = re.sub(r'\\ph\b\s*', r'\\li ', osis)
osis = re.sub(r'\\ph(\d+)\b\s*', r'\\li\1 ', osis)
- osis = re.sub(r'\\li\b\s*(.*?)(?=(['+u''+r']|\\li[\d\s]|<lb\b|<title\b))', r'<item type="x-indent-1">\1</item>', osis, flags=re.DOTALL)
- osis = re.sub(r'\\li(\d+)\b\s*(.*?)(?=(['+u''+r']|\\li[\d\s]|<lb\b|<title\b))', r'<item type="x-indent-\1">\2</item>', osis, flags=re.DOTALL)
+ osis = re.sub(r'\\li\b\s*(.*?)(?=(['+u''+r']|\\li[\d\s]|<lb\b|<title\b))', r'<item type="x-indent-1">\1</item>', osis, flags=re.DOTALL)
+ osis = re.sub(r'\\li(\d+)\b\s*(.*?)(?=(['+u''+r']|\\li[\d\s]|<lb\b|<title\b))', r'<item type="x-indent-\1">\2</item>', osis, flags=re.DOTALL)
osis = osis.replace('\n</item>', '</item>\n')
- osis = re.sub(u'(<item [^]+</item>)', r'<list>\1</list>', osis, flags=re.DOTALL)
+ osis = re.sub(u'(<item [^]+</item>)', r'<list>\1</list>', osis, flags=re.DOTALL)
# \b ###TESTED###
osis = re.sub(r'\\b\b\s?', r'<lb type="p"/>', osis)
@@ -414,17 +414,17 @@
osis = re.sub(r'\\qs\b\s(.+?)\\qs\*', r'<l type="selah">\1</l>', osis, flags=re.DOTALL)
# \q#(_text...) ###TESTED###
- osis = re.sub(r'\\q\b\s*(.*?)(?=(['+u''+r']|\\q[\d\s]|<l\b|<lb\b|<title\b))', r'<l level="1">\1</l>', osis, flags=re.DOTALL)
- osis = re.sub(r'\\q(\d+)\b\s*(.*?)(?=(['+u''+r']|\\q[\d\s]|<l\b|<lb\b|<title\b))', r'<l level="\1">\2</l>', osis, flags=re.DOTALL)
+ osis = re.sub(r'\\q\b\s*(.*?)(?=(['+u''+r']|\\q[\d\s]|<l\b|<lb\b|<title\b))', r'<l level="1">\1</l>', osis, flags=re.DOTALL)
+ osis = re.sub(r'\\q(\d+)\b\s*(.*?)(?=(['+u''+r']|\\q[\d\s]|<l\b|<lb\b|<title\b))', r'<l level="\1">\2</l>', osis, flags=re.DOTALL)
# \qr_text...
# \qc_text...
# \qm#(_text...)
qType = {'qr':'x-right', 'qc':'x-center', 'qm':'x-embedded" level="1', 'qm1':'x-embedded" level="1', 'qm2':'x-embedded" level="2', 'qm3':'x-embedded" level="3', 'qm4':'x-embedded" level="4', 'qm5':'x-embedded" level="5'}
- osis = re.sub(r'\\(qr|qc|qm\d+)\b\s*(.*?)(?=(['+u''+r']|\\q[\d\s]|<l\b|<lb\b|<title\b))', lambda m: r'<l type="' + qType[m.group(1)] + '">' + m.group(2) + '</l>', osis, flags=re.DOTALL)
+ osis = re.sub(r'\\(qr|qc|qm\d+)\b\s*(.*?)(?=(['+u''+r']|\\q[\d\s]|<l\b|<lb\b|<title\b))', lambda m: r'<l type="' + qType[m.group(1)] + '">' + m.group(2) + '</l>', osis, flags=re.DOTALL)
osis = osis.replace('\n</l>', '</l>\n')
- osis = re.sub(u'(<l [^]+</l>)', r'<lg>\1</lg>', osis, flags=re.DOTALL)
+ osis = re.sub(u'(<l [^]+</l>)', r'<lg>\1</lg>', osis, flags=re.DOTALL)
# \b ###TESTED###
osis = re.sub('(<lg>.+?</lg>)', lambda m: m.group(1).replace(r'<lb type="p"/>', r'</lg><lg>'), osis, flags=re.DOTALL) # re-handle \b that occurs within <lg>
@@ -444,7 +444,7 @@
supported: \tr, \th#, \thr#, \tc#, \tcr#
"""
# \tr_
- osis = re.sub(r'\\tr\b\s*(.*?)(?=(['+u''+r']|\\tr\s|<lb\b|<title\b))', r'<row>\1</row>', osis, flags=re.DOTALL)
+ osis = re.sub(r'\\tr\b\s*(.*?)(?=(['+u''+r']|\\tr\s|<lb\b|<title\b))', r'<row>\1</row>', osis, flags=re.DOTALL)
# \th#_text...
# \thr#_text...
@@ -599,7 +599,7 @@
osis = re.sub(r'\\k\s+(.+?)\\k\*', r'<seg type="keyword">\1</seg>', osis, flags=re.DOTALL)
# \lit
- osis = re.sub(r'\\lit\s+(.*?)(?=(\\(m|p|nb|lit|cls|tr)|<chapter eID|</?div\b|<(p|closer)\b))', lambda m: u'<p type="x-liturgical">\n' + m.group(1) + u'</p>\n', osis, flags=re.DOTALL)
+ osis = re.sub(r'\\lit\s+(.*?)(?=(\\(m|p|nb|lit|cls|tr)|<chapter eID|</?div\b|<(p|closer)\b))', lambda m: u'<p type="x-liturgical">\n' + m.group(1) + u'</p>\n', osis, flags=re.DOTALL)
# \dc_...\dc* #### TODO: Find an example---should this really be transChange?
osis = re.sub(r'\\dc\b\s*(.+?)\\dc\*', r'<transChange type="added" editions="dc">\1</transChange>', osis, flags=re.DOTALL)
@@ -803,9 +803,9 @@
def osisReorderAndCleanup(osis):
# assorted re-orderings
- osis = re.sub(u'(<chapter eID=.+?\n)(<verse eID=.+?>)\n?', r'\2\n\1', osis)
+ osis = re.sub(u'(<chapter eID=.+?\n)(<verse eID=.+?>)\n?', r'\2\n\1', osis)
osis = re.sub(u'([]</div>)([^]*<chapter eID.+?>)', r'\2\1', osis)
- osis = re.sub(u'(</p>\n?<p>)\n?(<verse eID=.+?>)\n?', r'\2\n\1\n', osis)
+ osis = re.sub(u'(</p>\n?<p>)\n?(<verse eID=.+?>)\n?', r'\2\n\1\n', osis)
osis = re.sub(u'\n(<verse eID=.+?>)', r'\1\n', osis)
osis = re.sub(u'\n*(<l.+?>)(<verse eID=.+?>[\n]*<verse osisID=.+?>)', r'\2\1', osis)
@@ -813,7 +813,7 @@
osis = re.sub(r'(</[^\s>]+) [^>]*>', r'\1>', osis)
osis = osis.replace(r'<lb type="p"/>', r'<lb/>')
# delete Unicode tags
- for c in u'':
+ for c in u'':
osis = osis.replace(c, '')
for endBlock in ['p', 'div', 'note', 'l', 'lg', 'chapter', 'verse']:
More information about the sword-cvs
mailing list