[sword-svn] r393 - trunk/modules/python
chrislit at crosswire.org
chrislit at crosswire.org
Sat Aug 25 00:47:35 MST 2012
Author: chrislit
Date: 2012-08-25 00:47:35 -0700 (Sat, 25 Aug 2012)
New Revision: 393
Modified:
trunk/modules/python/usfm2osis.py
Log:
added command-line toggle for XML validation
fixed sorting command-line arg handler to all alphanumeric sorting
Modified: trunk/modules/python/usfm2osis.py
===================================================================
--- trunk/modules/python/usfm2osis.py 2012-08-25 06:11:21 UTC (rev 392)
+++ trunk/modules/python/usfm2osis.py 2012-08-25 07:47:35 UTC (rev 393)
@@ -1126,6 +1126,7 @@
print(' -r enable relaxed markup processing (for non-standard USFM)')
print(' -s mode set book sorting mode: natural (default), alpha, canonical, none')
print(' -v verbose feedback')
+ print(' -x disable XML validation')
print('')
print('As an example, if you want to generate the osisWork <Bible.KJV> and your USFM')
print(' are located in the ./KJV folder, enter:')
@@ -1181,6 +1182,12 @@
else:
verbose = False
+ if '-x' in sys.argv:
+ validatexml = False
+ inputFilesIdx += 1
+ else:
+ validatexml = True
+
if '-d' in sys.argv:
DEBUG = True
inputFilesIdx += 1
@@ -1220,20 +1227,26 @@
i = sys.argv.index('-s')+1
if len(sys.argv) < i+1:
printUsage()
- if sys.argv[i].startsWith('a'):
- sortHelper = keynat # TODO: write appropriate helpers
+ if sys.argv[i].startswith('a'):
+ sortKey = None
+ sortCmp = None
print('Sorting book files alphanumerically.')
- elif sys.argv[i].startsWith('na'):
- sortHelper = keynat
+ elif sys.argv[i].startswith('na'):
+ sortKey = keynat
+ sortCmp = None
print('Sorting book files naturally.')
- elif sys.argv[i].startsWith('c'):
- sortHelper = keynat # TODO: write appropriate helpers
+ elif sys.argv[i].startswith('c'):
+ sortKey = keynat # TODO: write appropriate helpers
+ sortCmp = None
print('Sorting book files canonically.')
else:
- sortHelper = usfmDocList.index
+ sortKey = None # TODO: write appropriate helpers
+ sortCmp = None
print('Leaving book files unsorted.')
+ inputFilesIdx += 2 # increment 2, reflecting 2 args for -s
else:
- sortHelper = keynat
+ sortKey = keynat
+ sortCmp = None
print('Sorting book files naturally.')
@@ -1244,7 +1257,7 @@
# run
# load up work queue
work_queue = multiprocessing.Queue()
- for job in sorted(usfmDocList, key=sortHelper):
+ for job in sorted(usfmDocList, key=sortKey):
work_queue.put(job)
# create a queue to pass to workers to store the results
@@ -1261,26 +1274,27 @@
k,v=result_queue.get()
osisSegment[k]=v
+
unhandledTags = set()
- for doc in sorted(usfmDocList, key=keynat):
+ for doc in sorted(usfmDocList, key=sortKey):
unhandledTags |= set(re.findall(r'(\\[^\s\*]+?\b\*?)', osisSegment[doc]))
osisDoc += osisSegment[doc]
osisDoc += '</osisText>\n</osis>\n'
- valid = True
- try:
- #import urllib
- from lxml import etree
- verbosePrint('Validating XML...')
- osisParser = etree.XMLParser(schema = etree.XMLSchema(etree.XML(osisSchema)))
- #osisParser = etree.XMLParser(schema = etree.XMLSchema(etree.XML(urllib.urlopen('http://www.bibletechnologies.net/osisCore.' + osisVersion + '.xsd').read())))
- etree.fromstring(osisDoc, osisParser)
- verbosePrint('XML Valid')
- except ImportError:
- verbosePrint('For schema validation, install lxml')
- #except etree.XMLSyntaxError as eVal:
- # print('XML Validation error: ' + eVal)
+ if validatexml:
+ try:
+ #import urllib
+ from lxml import etree
+ verbosePrint('Validating XML...')
+ osisParser = etree.XMLParser(schema = etree.XMLSchema(etree.XML(osisSchema)))
+ #osisParser = etree.XMLParser(schema = etree.XMLSchema(etree.XML(urllib.urlopen('http://www.bibletechnologies.net/osisCore.' + osisVersion + '.xsd').read())))
+ etree.fromstring(osisDoc, osisParser)
+ verbosePrint('XML Valid')
+ except ImportError:
+ verbosePrint('For schema validation, install lxml')
+ except etree.XMLSyntaxError as eVal:
+ print('XML Validation error: ' + eVal)
osisFile = codecs.open(osisFileName, 'w', 'utf-8')
osisFile.write('<?xml version="1.0" encoding="UTF-8"?>\n')
More information about the sword-cvs
mailing list