[sword-svn] r364 - trunk/modules/python
chrislit at crosswire.org
chrislit at crosswire.org
Sat Aug 4 17:03:38 MST 2012
Author: chrislit
Date: 2012-08-04 17:03:38 -0700 (Sat, 04 Aug 2012)
New Revision: 364
Modified:
trunk/modules/python/usfm2osis.py
Log:
Updated USFM book codes & separated non-standard codes to be excluded unless -r switch is specified.
Modified: trunk/modules/python/usfm2osis.py
===================================================================
--- trunk/modules/python/usfm2osis.py 2012-08-04 23:37:28 UTC (rev 363)
+++ trunk/modules/python/usfm2osis.py 2012-08-05 00:03:38 UTC (rev 364)
@@ -58,6 +58,7 @@
bookDict = {
### Known USFM Book codes from Paratext
+ ### Cf. http://ubs-icap.org/chm/usfm/2.35/index.html?book_codes.htm
# OT
'GEN':'Gen', 'EXO':'Exod', 'LEV':'Lev', 'NUM':'Num', 'DEU':'Deut', 'JOS':'Josh', 'JDG':'Judg', 'RUT':'Ruth',
'1SA':'1Sam', '2SA':'2Sam', '1KI':'1Kgs', '2KI':'2Kgs', '1CH':'1Chr', '2CH':'2Chr', 'EZR':'Ezra', 'NEH':'Neh',
@@ -75,41 +76,49 @@
# DC - Eastern Orthodox
'3MA':'3Macc', '4MA':'4Macc', '1ES':'1Esd', '2ES':'2Esd', 'MAN':'PrMan', 'PS2':'Ps151',
# Rahlfs' LXX
- 'ODA':'Odes', 'PSS':'PssSol', 'JSA':'JoshA', 'JDB':'JudgB', 'TBS':'TobS', 'SST':'SusTh', 'DNT':'DanTh',
- 'BLT':'BelTh',
+ 'ODA':'Odes', 'PSS':'PssSol',
# Esdrae
- '4ES':'4Ezra', '5ES':'5Ezra', '6ES':'6Ezra',
+ 'EZA':'4Ezra', '5EZ':'5Ezra', '6EZ':'6Ezra',
+ # Inconsistency with Esther
+ 'DAG':'DanGr',
+ # Syriac
+ 'PS3':'5ApocSyrPss', '2BA':'2Bar', 'LBA':'EpBar',
+ # Ethiopic
+ 'JUB':'Jub', 'ENO':'1En', '1MQ':'1Meq', '2MQ':'2Meq', '3MQ':'3Meq', 'REP':'Reproof', '4BA':'4Bar',
+ # Vulgate
+ 'LAO':'EpLao',
+
# Additional non-biblical books
'XXA':'XXA', 'XXB':'XXB', 'XXC':'XXC', 'XXD':'XXD', 'XXE':'XXE', 'XXF':'XXF', 'XXG':'XXG',
- ###
+ # Peripheral books
+ 'FRT':'FRONT', 'INT':'INTRODUCTION', 'BAK':'BACK', 'CNC':'CONCORDANCE', 'GLO':'GLOSSARY',
+ 'TDX':'INDEX', 'NDX':'GAZETTEER', 'OTH':'X-OTHER'
+ }
+
+addBookDict = {
+ ### Deprecated
+ # Rahlfs
+ 'JSA':'JoshA', 'JDB':'JudgB', 'TBS':'TobS', 'SST':'SusTh', 'DNT':'DanTh', 'BLT':'BelTh',
+ # Esdrae
+ '4ES':'4Ezra', '5ES':'5Ezra', '6ES':'6Ezra',
+
+
### Proposed Additions <http://lc.bfbs.org.uk/e107_files/downloads/canonicalissuesinparatext.pdf>
- # Inconsistency with Esther
- 'DAG':'DanGr',
# Alternate Psalms
'PSB':'Ps',
- # Ethiopic
- 'JUB':'Jub', 'ENO':'1En', 'REP':'Reproof', # == Tegsas
- '1MQ':'1Meq', '2MQ':'2Meq', '3MQ':'3Meq', '4BA':'4Bar',
- # Syriac
- '2BA':'2Bar', 'LBA':'EpBar', 'PS3':'5ApocSyrPss',
# Vulgate
- 'LAO':'EpLao', 'PSO':'PrSol', 'PJE':'PrJer',
+ 'PSO':'PrSol', 'PJE':'PrJer',
# Armenian
'WSI':'WSir', 'COP':'CorCorr', '3CO':'3Cor', 'EUT':'PrEut', 'DOJ':'DJohn',
# Apostolic Fathers
'1CL':'1Clem', '2CL':'2Clem', 'SHE':'Herm', 'LBA':'Barn', 'DID':'Did',
###
-
# Proposed replacements <http://lc.bfbs.org.uk/e107_files/downloads/canonicalissuesinparatext.pdf>
- 'ODE':'Odes', 'EZA':'4Ezra', '5EZ':'5Ezra', '6EZ':'6Ezra',
-
+ 'ODE':'Odes',
+
# Additional biblical books
- 'ADE':'AddEsth',
-
- # Peripheral books
- 'FRT':'FRONT', 'INT':'INTRODUCTION', 'BAK':'BACK', 'CNC':'CONCORDANCE', 'GLO':'GLOSSARY',
- 'TDX':'INDEX', 'NDX':'GAZETTEER', 'OTH':'X-OTHER'
+ 'ADE':'AddEsth'
}
specialBooks = ['FRONT', 'INTRODUCTION', 'BACK', 'CONCORDANCE', 'GLOSSARY', 'INDEX', 'GAZETTEER', 'X-OTHER']
@@ -195,7 +204,7 @@
"""
global loc2osisBk, osis2locBk
# \id_<CODE>_(Name of file, Book name, Language, Last edited, Date etc.) ###TESTED###
- osis = re.sub(r'\\id\s+([A-Z0-9]{3})\b\s*([^\\\n]*?)\n(.*)(?=\\id|$)', lambda m: u'<div type="book" osisID="' + bookDict[m.group(1)] + '">\n' + (('<!-- id comment - ' + m.group(2) + ' -->\n') if m.group(2) else '') + m.group(3) + u'</div type="book">\n', osis, flags=re.DOTALL)
+ osis = re.sub(r'\\id\s+([A-Z0-9]{3})\b\s*([^\\\n]*?)\n(.*)(?=\\id|$)', lambda m: u'<div type="book" osisID="' + bookDict[m.group(1)] + '">\n' + (('<!-- id comment - ' + m.group(2) + ' -->\n') if m.group(2) else '') + m.group(3) + u'</div type="book">\n' , osis, flags=re.DOTALL)
# keep a copy of the OSIS book abbreviation for below (\toc3 processing) to store for mapping localized book names to/from OSIS
osisBook = re.search(r'\\id\s+([A-Z0-9]{3})', osis)
if osisBook:
@@ -906,8 +915,8 @@
print(' python usfm2osis.py Bible.KJV ./KJV/*.usfm')
verbosePrint('')
verbosePrint('Supported encodings: ' + ', '.join(aliases))
- exit()
+
class Worker(multiprocessing.Process):
def __init__(self, work_queue, result_queue):
@@ -976,6 +985,7 @@
if '-r' in sys.argv:
relaxedConformance = True
+ bookDict = dict(bookDict.items() + addBookDict.items())
inputFilesIdx += 1
usfmDocList = sys.argv[inputFilesIdx:]
More information about the sword-cvs
mailing list