[sword-svn] r396 - trunk/modules/python
chrislit at crosswire.org
chrislit at crosswire.org
Sun Aug 26 02:28:04 MST 2012
Author: chrislit
Date: 2012-08-26 02:28:04 -0700 (Sun, 26 Aug 2012)
New Revision: 396
Modified:
trunk/modules/python/usfm2osis.py
Log:
implemented sorting key functions for canonical & usfm-numberic orders
Modified: trunk/modules/python/usfm2osis.py
===================================================================
--- trunk/modules/python/usfm2osis.py 2012-08-26 01:39:07 UTC (rev 395)
+++ trunk/modules/python/usfm2osis.py 2012-08-26 09:28:04 UTC (rev 396)
@@ -209,7 +209,7 @@
'INDEX', 'GAZETTEER', 'X-OTHER'
]
-sfmNumericOrder = [
+usfmNumericOrder = [
# Front Matter
'FRONT', 'INTRODUCTION',
@@ -282,6 +282,7 @@
osis2locBk = dict()
loc2osisBk = dict()
+filename2osis = dict()
verbose = bool()
ucs4 = (sys.maxunicode > 0xFFFF)
@@ -317,6 +318,18 @@
END PSF-licened segment
"""
+def keycanon(filename):
+ if filename2osis:
+ return canonicalOrder.index(filename2osis[filename])
+ else:
+ return keynat(filename)
+
+def keyusfm(filename):
+ if filename2osis:
+ return usfmNumericOrder.index(filename2osis[filename])
+ else:
+ return keynat(filename)
+
def convertToOSIS(sFile):
global encoding
global relaxedConformance
@@ -375,18 +388,19 @@
return osis
- def cvtIdentification(osis, relaxedConformance):
+ def cvtIdentification(osis, relaxedConformance, filename):
"""
Identification
supported: \id, \ide, \sts, \rem, \h, \toc1, \toc2, \toc3
"""
- global loc2osisBk, osis2locBk
+ global loc2osisBk, osis2locBk, filename2osis
# \id_<CODE>_(Name of file, Book name, Language, Last edited, Date etc.)
osis = re.sub(r'\\id\s+([A-Z0-9]{3})\b\s*([^\\'+'\n'+']*?)'+'\n'+r'(.*)(?=\\id|$)', lambda m: u'<div type="book" osisID="' + bookDict[m.group(1)] + '">\n' + (('<!-- id comment - ' + m.group(2) + ' -->\n') if m.group(2) else '') + m.group(3) + u'</div type="book">\n' , osis, flags=re.DOTALL)
# keep a copy of the OSIS book abbreviation for below (\toc3 processing) to store for mapping localized book names to/from OSIS
osisBook = re.search(r'\\id\s+([A-Z0-9]{3})', osis)
if osisBook:
osisBook = bookDict[osisBook.group(1)]
+ filename2osis[filename] = osisBook
# \ide_<ENCODING>
osis = re.sub(r'\\ide\b.*'+'\n', '', osis) # delete, since this was handled above
@@ -1128,7 +1142,7 @@
# call individual conversion processors in series
osis = cvtPreprocess(osis, relaxedConformance)
osis = cvtRelaxedConformanceRemaps(osis, relaxedConformance)
- osis = cvtIdentification(osis, relaxedConformance)
+ osis = cvtIdentification(osis, relaxedConformance, sFile)
osis = cvtIntroductions(osis, relaxedConformance)
osis = cvtTitles(osis, relaxedConformance)
osis = cvtChaptersAndVerses(osis, relaxedConformance)
@@ -1175,7 +1189,7 @@
print(' -h, --help print this usage information')
print(' -o FILENAME output filename (default is: <osisWork>.osis.xml)')
print(' -r enable relaxed markup processing (for non-standard USFM)')
- print(' -s mode set book sorting mode: natural (default), alpha, canonical, none')
+ print(' -s mode set book sorting mode: natural (default), alpha, canonical, usfm, none')
print(' -v verbose feedback')
print(' -x disable XML validation')
print('')
@@ -1280,24 +1294,22 @@
printUsage()
if sys.argv[i].startswith('a'):
sortKey = None
- sortCmp = None
print('Sorting book files alphanumerically.')
elif sys.argv[i].startswith('na'):
sortKey = keynat
- sortCmp = None
print('Sorting book files naturally.')
elif sys.argv[i].startswith('c'):
- sortKey = keynat # TODO: write appropriate helpers
- sortCmp = None
+ sortKey = keycanon
print('Sorting book files canonically.')
+ elif sys.argv[i].startswith('u'):
+ sortKey = keyusfm
+ print('Sorting book files by USFM book number.')
else:
sortKey = None # TODO: write appropriate helpers
- sortCmp = None
print('Leaving book files unsorted.')
inputFilesIdx += 2 # increment 2, reflecting 2 args for -s
else:
sortKey = keynat
- sortCmp = None
print('Sorting book files naturally.')
More information about the sword-cvs
mailing list