[sword-svn] r375 - trunk/modules/python
chrislit at crosswire.org
chrislit at crosswire.org
Fri Aug 10 10:09:14 MST 2012
Author: chrislit
Date: 2012-08-10 10:09:14 -0700 (Fri, 10 Aug 2012)
New Revision: 375
Modified:
trunk/modules/python/usfm2osis.py
Log:
cleaned up spacing in output
fixed output validation errors due to addition of intro tags
Modified: trunk/modules/python/usfm2osis.py
===================================================================
--- trunk/modules/python/usfm2osis.py 2012-08-10 12:55:16 UTC (rev 374)
+++ trunk/modules/python/usfm2osis.py 2012-08-10 17:09:14 UTC (rev 375)
@@ -35,7 +35,7 @@
# Employ best-practice conformant OSIS
# Employ modularity (functions rather than a big long script)
# Employ the same command-line syntax as usfm2osis.pl
-# Use & abuse Unicode tags (http://unicode.org/charts/PDF/UE0000.pdf) to simplify Regex processing
+# Use non-characters for milestoning
### Roadmap:
# 0.5 initial commit, including full coverage of core USFM tags
@@ -49,8 +49,8 @@
# 1.x SWORD module output?, requiring SWORD bindings
### Key to non-characters:
-# Used :
-# Unused :
+# Used :
+# Unused :
# book
# chapter
# verse
@@ -67,6 +67,13 @@
# s4
# s5
# notes
+# intro-list
+# intro-outline
+# is1
+# is2
+# is3
+# is4
+# is5
import sys, codecs, re
from encodings.aliases import aliases
@@ -325,7 +332,7 @@
# \rem_text...
osis = re.sub(r'\\rem\b\s+(.+)', r'<!-- rem - \1 -->', osis)
- # \restore: unpublished, seek example
+ # \restore_text...
if relaxedConformance:
osis = re.sub(r'\\restore\b\s+(.+)', r'<!-- restore - \1 -->', osis)
@@ -359,20 +366,23 @@
# \imt#_text...
osis = re.sub(r'\\imt(\d?)\s+(.+)', lambda m: '<title ' + ('level="'+m.group(1)+'" ' if m.group(1) else '') + 'type="main" subType="x-introduction">' + m.group(2) + '</title>', osis)
+ # \imte#_text...
+ osis = re.sub(r'\\imte(\d?)\b\s+(.+)', lambda m: '<title ' + ('level="'+m.group(1)+'" ' if m.group(1) else '') + 'type="main" subType="x-introduction-end">' + m.group(2) + '</title>', osis)
+
# \is#_text...
osis = re.sub(r'\\is1?\s+(.+)', lambda m: u'<div type="section" subType="x-introduction"><title>' + m.group(1) + '</title>', osis)
- osis = re.sub(u'([^]+)', r'\1'+u'</div>\n', osis, re.DOTALL)
- osis = re.sub(r'\\is2\s+(.+)', lambda m: u'<div type="subsection" subType="x-introduction"><title>' + m.group(1) + '</title>', osis)
- osis = re.sub(u'([^]+)', r'\1'+u'</div>\n', osis, re.DOTALL)
+ osis = re.sub(u'([^]+)', r'\1'+u'</div>\n', osis, flags=re.DOTALL)
+ osis = re.sub(r'\\is2\s+(.+)', lambda m: u'<div type="subSection" subType="x-introduction"><title>' + m.group(1) + '</title>', osis)
+ osis = re.sub(u'([^]+)', r'\1'+u'</div>\n', osis, flags=re.DOTALL)
osis = re.sub(r'\\is3\s+(.+)', lambda m: u'<div type="x-subSubSection" subType="x-introduction"><title>' + m.group(1) + '</title>', osis)
- osis = re.sub(u'([^]+)', r'\1'+u'</div>\n', osis, re.DOTALL)
+ osis = re.sub(u'([^]+)', r'\1'+u'</div>\n', osis, flags=re.DOTALL)
osis = re.sub(r'\\is4\s+(.+)', lambda m: u'<div type="x-subSubSubSection" subType="x-introduction"><title>' + m.group(1) + '</title>', osis)
- osis = re.sub(u'([^]+)', r'\1'+u'</div>\n', osis, re.DOTALL)
+ osis = re.sub(u'([^]+)', r'\1'+u'</div>\n', osis, flags=re.DOTALL)
osis = re.sub(r'\\is5\s+(.+)', lambda m: u'<div type="x-subSubSubSubSection" subType="x-introduction"><title>' + m.group(1) + '</title>', osis)
- osis = re.sub(u'([^]+)', r'\1'+u'</div>\n', osis, re.DOTALL)
+ osis = re.sub(u'([^]+)', r'\1'+u'</div>\n', osis, flags=re.DOTALL)
# \ip_text...
- osis = re.sub(r'\\ip\s+(.*?)(?=(\\(i?m|i?p|nb|lit|cls|tr)|<chapter eID|</?div\b|<(p|closer)\b))', lambda m: u'<p subType="x-introduction">\n' + m.group(1) + u'</p>\n', osis, flags=re.DOTALL)
+ osis = re.sub(r'\\ip\s+(.*?)(?=(\\(i?m|i?p|nb|lit|cls|tr|io|iq|i?li|iex?|s)|<chapter eID|</?div\b|<(p|closer)\b))', lambda m: u'<p subType="x-introduction">\n' + m.group(1) + u'</p>\n', osis, flags=re.DOTALL)
# \ipi_text...
# \im_text...
@@ -381,11 +391,11 @@
# \imq_text...
# \ipr_text...
pType = {'ipi':'x-indented', 'im':'x-noindent', 'imi':'x-noindent-indented', 'ipq':'x-quote', 'imq':'x-noindent-quote', 'ipr':'x-right'}
- osis = re.sub(r'\\(ipi|im|ipq|imq|ipr)\s+(.*?)(?=(\\(i?m|i?p|nb|lit|cls|tr)|<chapter eID|</?div\b|<(p|closer)\b))', lambda m: u'<p type="' + pType[m.group(1)] + '" subType="x-introduction">\n' + m.group(2) + u'</p>\n', osis, flags=re.DOTALL)
+ osis = re.sub(r'\\(ipi|im|ipq|imq|ipr)\s+(.*?)(?=(\\(i?m|i?p|nb|lit|cls|tr|io|iq|i?li|iex?|s)|<chapter eID|</?div\b|<(p|closer)\b))', lambda m: u'<p type="' + pType[m.group(1)] + '" subType="x-introduction">\n' + m.group(2) + u'</p>\n', osis, flags=re.DOTALL)
# \iq#_text...
- osis = re.sub(r'\\iq\b\s*(.*?)(?=(['+u''+r']|\\i?q[\d\s]|<l\b|<lb\b|<title\b))', r'<l level="1" subType="x-introduction">\1</l>', osis, flags=re.DOTALL)
- osis = re.sub(r'\\iq(\d)\b\s*(.*?)(?=(['+u''+r']|\\i?q[\d\s]|<l\b|<lb\b|<title\b))', r'<l level="\1" subType="x-introduction">\2</l>', osis, flags=re.DOTALL)
+ osis = re.sub(r'\\iq\b\s*(.*?)(?=(['+u''+r']|\\i?q[\d\s]|\\fig|<l\b|<lb\b|<title\b))', r'<l level="1" subType="x-introduction">\1</l>', osis, flags=re.DOTALL)
+ osis = re.sub(r'\\iq(\d)\b\s*(.*?)(?=(['+u''+r']|\\i?q[\d\s]|\\fig|<l\b|<lb\b|<title\b))', r'<l level="\1" subType="x-introduction">\2</l>', osis, flags=re.DOTALL)
# \ib
osis = re.sub(r'\\ib\b\s?', '<lb type="x-p"/>', osis)
@@ -394,18 +404,18 @@
osis = re.sub('(<lg>.+?</lg>)', lambda m: m.group(1).replace('<lb type="x-p"/>', '</lg><lg>'), osis, flags=re.DOTALL) # re-handle \b that occurs within <lg>
# \ili#_text...
- osis = re.sub(r'\\ili\b\s*(.*?)(?=(['+u''+r']|\\ili[\d\s]|<lb\b|<title\b))', r'<item type="x-indent-1" subType="x-introduction">\1</item>', osis, flags=re.DOTALL)
- osis = re.sub(r'\\ili(\d)\b\s*(.*?)(?=(['+u''+r']|\\ili[\d\s]|<lb\b|<title\b))', r'<item type="x-indent-\1" subType="x-introduction">\2</item>', osis, flags=re.DOTALL)
+ osis = re.sub(r'\\ili\b\s*(.*?)(?=(['+u''+r']|\\ili[\d\s]|<lb\b|<title\b|<item\b))', ur'<item type="x-indent-1" subType="x-introduction">\1</item>', osis, flags=re.DOTALL)
+ osis = re.sub(r'\\ili(\d)\b\s*(.*?)(?=(['+u''+r']|\\ili[\d\s]|<lb\b|<title\b|<item\b))', ur'<item type="x-indent-\1" subType="x-introduction">\2</item>', osis, flags=re.DOTALL)
osis = osis.replace('\n</item>', '</item>\n')
- osis = re.sub(u'(<item [^]+</item>)', r'<list>\1</list>', osis, flags=re.DOTALL)
+ osis = re.sub(u'(<item [^]+</item>)', ur'<list>\1</list>', osis, flags=re.DOTALL)
# \iot_text...
# \io#_text...(references range)
- osis = re.sub(r'\\io\b\s*(.*?)(?=(['+u''+r']|\\io[t\d\s]|<lb\b|<title\b))', r'<item type="x-indent-1" subType="x-introduction">\1</item>', osis, flags=re.DOTALL)
- osis = re.sub(r'\\io(\d)\b\s*(.*?)(?=(['+u''+r']|\\io[t\d\s]|<lb\b|<title\b))', r'<item type="x-indent-\1" subType="x-introduction">\2</item>', osis, flags=re.DOTALL)
- osis = re.sub(r'\\iot\b\s*(.*?)(?=(['+u''+r']|\\io[t\d\s]|<lb\b|<title\b))', r'<item type="head">\1</item type="head">', osis, flags=re.DOTALL)
+ osis = re.sub(r'\\io\b\s*(.*?)(?=(['+u''+r']|\\io[t\d\s]|\\iex?|<lb\b|<title\b|<item\b))', ur'<item type="x-indent-1" subType="x-introduction">\1</item>', osis, flags=re.DOTALL)
+ osis = re.sub(r'\\io(\d)\b\s*(.*?)(?=(['+u''+r']|\\io[t\d\s]|\\iex?|<lb\b|<title\b|<item\b))', ur'<item type="x-indent-\1" subType="x-introduction">\2</item>', osis, flags=re.DOTALL)
+ osis = re.sub(r'\\iot\b\s*(.*?)(?=(['+u''+r']|\\io[t\d\s]|\\iex?|<lb\b|<title\b|<item\b))', ur'<item type="head">\1</item type="head">', osis, flags=re.DOTALL)
osis = osis.replace('\n</item>', '</item>\n')
- osis = re.sub(u'(<item [^]+</item>)', r'<div type="outline"><list>\1</list></div>', osis, flags=re.DOTALL)
+ osis = re.sub(u'(<item [^]+</item>)', ur'<div type="outline"><list>\1</list></div>', osis, flags=re.DOTALL)
osis = re.sub('item type="head"', 'head', osis)
# \ior_text...\ior*
@@ -417,9 +427,6 @@
# \iqt_text...\iqt*
osis = re.sub(r'\\iqt\s+(.+?)\\iqt\*', r'<q subType="x-introduction">\1</q>', osis, flags=re.DOTALL)
- # \imte#_text...
- osis = re.sub(r'\\imte(\d?)\b\s+(.+)', lambda m: '<title ' + ('level="'+m.group(1)+'" ' if m.group(1) else '') + 'type="main" subType="x-introduction-end">' + m.group(2) + '</title>', osis)
-
# \ie
osis = re.sub(r'\\ie\b\s*', '<milestone type="x-usfm-ie"/>', osis)
@@ -433,33 +440,33 @@
"""
# \ms#_text...
osis = re.sub(r'\\ms1?\s+(.+)', lambda m: u'<div type="majorSection"><title>' + m.group(1) + '</title>', osis)
- osis = re.sub(u'([^]+)', r'\1'+u'</div>\n', osis, re.DOTALL)
+ osis = re.sub(u'([^]+)', r'\1'+u'</div>\n', osis, flags=re.DOTALL)
osis = re.sub(r'\\ms2\s+(.+)', lambda m: u'<div type="majorSection" n="2"><title>' + m.group(1) + '</title>', osis)
- osis = re.sub(u'([^]+)', r'\1'+u'</div>\n', osis, re.DOTALL)
+ osis = re.sub(u'([^]+)', r'\1'+u'</div>\n', osis, flags=re.DOTALL)
osis = re.sub(r'\\ms3\s+(.+)', lambda m: u'<div type="majorSection" n="3"><title>' + m.group(1) + '</title>', osis)
- osis = re.sub(u'([^]+)', r'\1'+u'</div>\n', osis, re.DOTALL)
+ osis = re.sub(u'([^]+)', r'\1'+u'</div>\n', osis, flags=re.DOTALL)
osis = re.sub(r'\\ms4\s+(.+)', lambda m: u'<div type="majorSection" n="4"><title>' + m.group(1) + '</title>', osis)
- osis = re.sub(u'([^]+)', r'\1'+u'</div>\n', osis, re.DOTALL)
+ osis = re.sub(u'([^]+)', r'\1'+u'</div>\n', osis, flags=re.DOTALL)
osis = re.sub(r'\\ms5\s+(.+)', lambda m: u'<div type="majorSection" n="5"><title>' + m.group(1) + '</title>', osis)
- osis = re.sub(u'([^]+)', r'\1'+u'</div>\n', osis, re.DOTALL)
+ osis = re.sub(u'([^]+)', r'\1'+u'</div>\n', osis, flags=re.DOTALL)
# \mr_text...
osis = re.sub(r'\\mr\s+(.+)', u'<title type="scope"><reference>'+r'\1</reference></title>', osis)
# \s#_text...
osis = re.sub(r'\\s1?\s+(.+)', lambda m: u'<div type="section"><title>' + m.group(1) + '</title>', osis)
- osis = re.sub(u'([^]+)', r'\1'+u'</div>\n', osis, re.DOTALL)
+ osis = re.sub(u'(<div type="section">[^]+)', r'\1'+u'</div>\n', osis, flags=re.DOTALL)
if relaxedConformance:
osis = re.sub(r'\\ss\s+', r'\\s2 ', osis)
osis = re.sub(r'\\sss\s+', r'\\s3 ', osis)
- osis = re.sub(r'\\s2\s+(.+)', lambda m: u'<div type="subsection"><title>' + m.group(1) + '</title>', osis)
- osis = re.sub(u'([^]+)', r'\1'+u'</div>\n', osis, re.DOTALL)
+ osis = re.sub(r'\\s2\s+(.+)', lambda m: u'<div type="subSection"><title>' + m.group(1) + '</title>', osis)
+ osis = re.sub(u'(<div type="subSection">[^]+)', r'\1'+u'</div>\n', osis, flags=re.DOTALL)
osis = re.sub(r'\\s3\s+(.+)', lambda m: u'<div type="x-subSubSection"><title>' + m.group(1) + '</title>', osis)
- osis = re.sub(u'([^]+)', r'\1'+u'</div>\n', osis, re.DOTALL)
+ osis = re.sub(u'(<div type="x-subSubSection">[^]+)', r'\1'+u'</div>\n', osis, flags=re.DOTALL)
osis = re.sub(r'\\s4\s+(.+)', lambda m: u'<div type="x-subSubSubSection"><title>' + m.group(1) + '</title>', osis)
- osis = re.sub(u'([^]+)', r'\1'+u'</div>\n', osis, re.DOTALL)
+ osis = re.sub(u'(<div type="x-subSubSubSection">[^]+)', r'\1'+u'</div>\n', osis, flags=re.DOTALL)
osis = re.sub(r'\\s5\s+(.+)', lambda m: u'<div type="x-subSubSubSubSection"><title>' + m.group(1) + '</title>', osis)
- osis = re.sub(u'([^]+)', r'\1'+u'</div>\n', osis, re.DOTALL)
+ osis = re.sub(u'(<div type="x-subSubSubSubSection">[^]+)', r'\1'+u'</div>\n', osis, flags=re.DOTALL)
# \sr_text...
osis = re.sub(r'\\sr\s+(.+)', ur'<title type="scope"><reference>\1</reference></title>', osis)
@@ -564,10 +571,10 @@
# \li#(_text...)
osis = re.sub(r'\\ph\b\s*', r'\\li ', osis)
osis = re.sub(r'\\ph(\d)\b\s*', r'\\li\1 ', osis)
- osis = re.sub(r'\\li\b\s*(.*?)(?=(['+u''+r']|\\li[\d\s]|<lb\b|<title\b))', r'<item type="x-indent-1">\1</item>', osis, flags=re.DOTALL)
- osis = re.sub(r'\\li(\d)\b\s*(.*?)(?=(['+u''+r']|\\li[\d\s]|<lb\b|<title\b))', r'<item type="x-indent-\1">\2</item>', osis, flags=re.DOTALL)
+ osis = re.sub(r'\\li\b\s*(.*?)(?=(['+u''+r']|\\li[\d\s]|<lb\b|<title\b|<item\b))', r'<item type="x-indent-1">\1</item>', osis, flags=re.DOTALL)
+ osis = re.sub(r'\\li(\d)\b\s*(.*?)(?=(['+u''+r']|\\li[\d\s]|<lb\b|<title\b|<item\b))', r'<item type="x-indent-\1">\2</item>', osis, flags=re.DOTALL)
osis = osis.replace('\n</item>', '</item>\n')
- osis = re.sub(u'(<item [^]+</item>)', r'<list>\1</list>', osis, flags=re.DOTALL)
+ osis = re.sub(u'(<item [^]+</item>)', ur'<list>\1</list>', osis, flags=re.DOTALL)
# \b
osis = re.sub(r'\\b\b\s?', '<lb type="x-p"/>', osis)
@@ -591,14 +598,14 @@
osis = re.sub(r'\\qs\b\s(.+?)\\qs\*', r'<l type="selah">\1</l>', osis, flags=re.DOTALL)
# \q#(_text...)
- osis = re.sub(r'\\q\b\s*(.*?)(?=(['+u''+r']|\\q[\d\s]|<l\b|<lb\b|<title\b))', r'<l level="1">\1</l>', osis, flags=re.DOTALL)
- osis = re.sub(r'\\q(\d)\b\s*(.*?)(?=(['+u''+r']|\\q[\d\s]|<l\b|<lb\b|<title\b))', r'<l level="\1">\2</l>', osis, flags=re.DOTALL)
+ osis = re.sub(r'\\q\b\s*(.*?)(?=(['+u''+r']|\\q[\d\s]|\\fig|<l\b|<lb\b|<title\b))', r'<l level="1">\1</l>', osis, flags=re.DOTALL)
+ osis = re.sub(r'\\q(\d)\b\s*(.*?)(?=(['+u''+r']|\\q[\d\s]|\\fig|<l\b|<lb\b|<title\b))', r'<l level="\1">\2</l>', osis, flags=re.DOTALL)
# \qr_text...
# \qc_text...
# \qm#(_text...)
qType = {'qr':'x-right', 'qc':'x-center', 'qm':'x-embedded" level="1', 'qm1':'x-embedded" level="1', 'qm2':'x-embedded" level="2', 'qm3':'x-embedded" level="3', 'qm4':'x-embedded" level="4', 'qm5':'x-embedded" level="5'}
- osis = re.sub(r'\\(qr|qc|qm\d)\b\s*(.*?)(?=(['+u''+r']|\\q[\d\s]|<l\b|<lb\b|<title\b))', lambda m: '<l type="' + qType[m.group(1)] + '">' + m.group(2) + '</l>', osis, flags=re.DOTALL)
+ osis = re.sub(r'\\(qr|qc|qm\d)\b\s*(.*?)(?=(['+u''+r']|\\q[\d\s]|\\fig|<l\b|<lb\b|<title\b))', lambda m: '<l type="' + qType[m.group(1)] + '">' + m.group(2) + '</l>', osis, flags=re.DOTALL)
osis = osis.replace('\n</l>', '</l>\n')
osis = re.sub(u'(<l [^]+</l>)', r'<lg>\1</lg>', osis, flags=re.DOTALL)
@@ -649,7 +656,7 @@
note = re.sub(r'\\ft\s', '', note)
# \fr_##SEP##
- note = re.sub(r'\\fr\b\s(.+?)(?=(\\f|'+u'))', u''+r'<reference>\1</reference>', note)
+ note = re.sub(r'\\fr\b\s(.+?)(?=(\\f|'+u'))', u''+r'<reference type="annotateRef">\1</reference>', note)
# \fk_
note = re.sub(r'\\fk\b\s(.+?)(?=(\\f|'+u'))', u''+r'<catchWord>\1</catchWord>', note)
@@ -712,21 +719,20 @@
# \xq_
note = re.sub(r'\\xq\b\s(.+?)(?=(\\x|'+u'))', u''+r'<catchWord>\1</catchWord>', note)
- # \xt_
- note = re.sub(r'\\xt\s', '', note)
+ # \xo_##SEP##
+ note = re.sub(r'\\xo\b\s(.+?)(?=(\\x|'+u'))', u''+r'<reference type="annotateRef">\1</reference>', note)
+ # \xk_
+ note = re.sub(r'\\xk\b\s(.+?)(?=(\\x|'+u'))', u''+r'<catchWord>\1</catchWord>', note)
+
+ # \xt_ # This isn't guaranteed to be *the* reference, but it's a good guess.
+ note = re.sub(r'\\xt\b\s(.+?)(?=(\\x|'+u'))', u''+r'<reference>\1</reference>', note)
+
if relaxedConformance:
# TODO: \xtSee..\xtSee*: Concordance and Names Index markup for an alternate entry target reference.
# TODO: \xtSeeAlso...\xtSeeAlso: Concordance and Names Index markup for an additional entry target reference.
pass
-
- # \xo_##SEP##
- note = re.sub(r'\\xo\b\s(.+?)(?=(\\x|'+u'))', u''+r'<reference>\1</reference>', note)
-
- # \xk_
- note = re.sub(r'\\xk\b\s(.+?)(?=(\\x|'+u'))', u''+r'<catchWord>\1</catchWord>', note)
-
if relaxedConformance:
note = note.replace(r'\xq*', '')
note = note.replace(r'\xt*', '')
@@ -743,7 +749,7 @@
supported: \\x...\\x*, \\xo, \\xk, \\xq, \\xt, \\xot...\\xot*, \\xnt...\\xnt*, \\xdc...\\xdc*
"""
# \x_+_...\x*
- osis = re.sub(r'\\x\s+([^\s]+?)\s+(.+?)\s*\\x\*', lambda m: '<note' + ((' n=""') if (m.group(1) == u'-') else ('' if (m.group(1) == '+') else (' n="' + m.group(1) + '"'))) + ' type="crossReference"><reference>' + m.group(2) + u'</reference></note>', osis, flags=re.DOTALL)
+ osis = re.sub(r'\\x\s+([^\s]+?)\s+(.+?)\s*\\x\*', lambda m: '<note' + ((' n=""') if (m.group(1) == u'-') else ('' if (m.group(1) == '+') else (' n="' + m.group(1) + '"'))) + ' type="crossReference">' + m.group(2) + u'</note>', osis, flags=re.DOTALL)
osis = re.sub(r'(<note [^>]*?type="crossReference"[^>]*>.*?</note>)', lambda m: processXref(m.group(1)), osis, flags=re.DOTALL)
@@ -871,7 +877,7 @@
if fig_cap:
figure += '<caption>' + fig_cap + '</caption>\n'
if fig_ref:
- figure += '<reference>' + fig_ref + '</reference>\n'
+ figure += '<reference type="annotateRef">' + fig_ref + '</reference>\n'
if fig_desc:
figure += '<!-- fig DESC - ' + fig_desc + ' -->\n'
if fig_loc:
@@ -1019,12 +1025,12 @@
osis = re.sub(r'(</[^\s>]+) [^>]*>', r'\1>', osis)
osis = osis.replace('<lb type="x-p"/>', '<lb/>')
# delete Unicode tags
- for c in u'':
+ for c in u'':
osis = osis.replace(c, '')
- for endBlock in ['p', 'div', 'note', 'l', 'lg', 'chapter', 'verse']:
- osis = re.sub(' +</'+endBlock+'>', '</'+endBlock+r'>', osis)
- osis = re.sub(' +<'+endBlock+'( eID=[^/>]+/>)', '</'+endBlock+r'\1', osis)
+ for endBlock in ['p', 'div', 'note', 'l', 'lg', 'chapter', 'verse', 'head', 'title', 'item', 'list']:
+ osis = re.sub('\s+</'+endBlock+'>', '</'+endBlock+r'>\n', osis)
+ osis = re.sub('\s+<'+endBlock+'( eID=[^/>]+/>)', '<'+endBlock+r'\1'+'\n', osis)
osis = re.sub(' +((</[^>]+>)+) *', r'\1 ', osis)
# strip extra spaces & newlines
More information about the sword-cvs
mailing list