[sword-svn] r369 - trunk/modules/python
chrislit at crosswire.org
chrislit at crosswire.org
Thu Aug 9 04:38:14 MST 2012
Author: chrislit
Date: 2012-08-09 04:38:14 -0700 (Thu, 09 Aug 2012)
New Revision: 369
Modified:
trunk/modules/python/usfm2osis.py
Log:
added some skeletal code for selection of different file sort algorithms
Modified: trunk/modules/python/usfm2osis.py
===================================================================
--- trunk/modules/python/usfm2osis.py 2012-08-06 16:24:47 UTC (rev 368)
+++ trunk/modules/python/usfm2osis.py 2012-08-09 11:38:14 UTC (rev 369)
@@ -140,6 +140,55 @@
'ADE':'AddEsth'
}
+canonicalOrder = (
+ # OT
+ 'GEN', 'EXO', 'LEV', 'NUM', 'DEU', 'JOS', 'JDG', 'RUT', '1SA', '2SA', '1KI', '2KI', '1CH', '2CH', 'EZR', 'NEH',
+ 'EST', 'JOB', 'PSA', 'PRO', 'ECC', 'SNG', 'ISA', 'JER', 'LAM', 'EZK', 'DAN', 'HOS', 'JOL', 'AMO', 'OBA', 'JON',
+ 'MIC', 'NAM', 'HAB', 'ZEP', 'HAG', 'ZEC', 'MAL',
+ # DC - Catholic
+ 'TOB', 'JDT', 'ESG', 'ADE', 'WIS', 'SIR', 'PSS', 'BAR', 'LJE', 'DAG', 'S3Y', 'SUS', 'BEL', '1MA', '2MA',
+ # DC - Eastern Orthodox
+ '1ES', 'MAN', 'PS2', '3MA', '2ES', '4MA',
+ # NT
+ 'MAT', 'MRK', 'LUK', 'JHN', 'ACT', 'ROM', '1CO', '2CO', 'GAL', 'EPH', 'PHP', 'COL', '1TH', '2TH', '1TI', '2TI',
+ 'TIT', 'PHM', 'HEB', 'JAS', '1PE', '2PE', '1JN', '2JN', '3JN', 'JUD', 'REV',
+ # Rahlfs' LXX
+ 'ODA', 'ODE',
+ # Esdrae
+ 'EZA', '5EZ', '6EZ',
+ # Inconsistency with Esther
+
+ # Syriac
+ 'PS3', '2BA', 'LBA',
+ # Ethiopic
+ 'JUB', 'ENO', '1MQ', '2MQ', '3MQ', 'REP', '4BA',
+ # Vulgate
+ 'LAO',
+
+ # Additional non-biblical books
+ 'XXA', 'XXB', 'XXC', 'XXD', 'XXE', 'XXF', 'XXG',
+
+ # Peripheral books
+ 'FRT', 'INT', 'BAK', 'CNC', 'GLO',
+ 'TDX', 'NDX', 'OTH'
+ ### Deprecated
+ # Rahlfs
+ 'JSA', 'JDB', 'TBS', 'SST', 'DNT', 'BLT',
+ # Esdrae
+ '4ES', '5ES', '6ES',
+
+ # Alternate Psalms
+ 'PSB',
+ # Vulgate
+ 'PSO', 'PJE',
+ # Armenian
+ 'WSI', 'COP', '3CO', 'EUT', 'DOJ',
+ # Apostolic Fathers
+ '1CL', '2CL', 'SHE', 'LBA', 'DID',
+ ###
+ # Proposed replacements <http://lc.bfbs.org.uk/e107_files/downloads/canonicalissuesinparatext.pdf>
+ )
+
specialBooks = ['FRONT', 'INTRODUCTION', 'BACK', 'CONCORDANCE', 'GLOSSARY', 'INDEX', 'GAZETTEER', 'X-OTHER']
peripherals = {
@@ -936,6 +985,7 @@
print(' -h, --help print this usage information')
print(' -o FILENAME output filename (default is: <osisWork>.osis.xml)')
print(' -r enable relaxed markup processing (for non-standard USFM)')
+ print(' -s mode set book sorting mode: natural (default), alpha, canonical, none')
print(' -v verbose feedback')
print('')
print('As an example, if you want to generate the osisWork <Bible.KJV> and your USFM')
@@ -983,6 +1033,7 @@
encoding = ''
relaxedConformance = False
inputFilesIdx = 2 # This marks the point in the sys.argv array, after which all values represent USFM files to be converted.
+ usfmDocList = list()
if '-v' in sys.argv:
verbose = True
@@ -1016,6 +1067,27 @@
bookDict = dict(bookDict.items() + addBookDict.items())
inputFilesIdx += 1
+ if '-s' in sys.argv:
+ i = sys.argv.index('-s')+1
+ if len(sys.argv) < i+1:
+ printUsage()
+ if sys.argv[i].startsWith('a'):
+ sortHelper = keynat # TODO: write appropriate helpers
+ print('Sorting book files alphanumerically.')
+ elif sys.argv[i].startsWith('na'):
+ sortHelper = keynat
+ print('Sorting book files naturally.')
+ elif sys.argv[i].startsWith('c'):
+ sortHelper = keynat # TODO: write appropriate helpers
+ print('Sorting book files canonically.')
+ else:
+ sortHelper = usfmDocList.index
+ print('Leaving book files unsorted.')
+ else:
+ sortHelper = keynat
+ print('Sorting book files naturally.')
+
+
usfmDocList = sys.argv[inputFilesIdx:]
OSISfile = codecs.open(OSISfileName, 'w', 'utf-8')
@@ -1025,7 +1097,7 @@
# run
# load up work queue
work_queue = multiprocessing.Queue()
- for job in sorted(usfmDocList, key=keynat):
+ for job in sorted(usfmDocList, key=sortHelper):
work_queue.put(job)
# create a queue to pass to workers to store the results
More information about the sword-cvs
mailing list