[sword-svn] r420 - trunk/modules/python
chrislit at crosswire.org
chrislit at crosswire.org
Sun Jun 9 15:10:02 MST 2013
Author: chrislit
Date: 2013-06-09 15:10:02 -0700 (Sun, 09 Jun 2013)
New Revision: 420
Modified:
trunk/modules/python/usfm2osis.py
Log:
reorganized roadmap & bumped version to reflect current featureset
improved printed feedback
decreased threads to one less than the number of processors to improve UI response during the long USFM to OSIS conversion processNote
Modified: trunk/modules/python/usfm2osis.py
===================================================================
--- trunk/modules/python/usfm2osis.py 2013-06-09 21:53:34 UTC (rev 419)
+++ trunk/modules/python/usfm2osis.py 2013-06-09 22:10:02 UTC (rev 420)
@@ -9,7 +9,7 @@
usfmVersion = '2.35' # http://ubs-icap.org/chm/usfm/2.35/index.html
osisVersion = '2.1.1' # http://www.bibletechnologies.net/osisCore.2.1.1.xsd
-scriptVersion = '0.5'
+scriptVersion = '0.6'
# usfm2osis.py
# Copyright 2012 by the CrossWire Bible Society <http://www.crosswire.org/>
@@ -40,9 +40,10 @@
### Roadmap:
# 0.5 initial commit, including full coverage of core USFM tags
-# 0.6 file sorting options (natural/alphabetic/canonical/none); expand sub-verses with ! in osisIDs; Python3 compatability; add optional schema validator (lxml probably); docstrings; unittest; make fully OO; PyDev project?
-# 0.7 test suite incorporating all USFM examples from UBS ICAP and other complex cases
-# 0.8 more clean-up & re-ordering to correctly encapsulate milestones within appropriate containers; clear remaining TODO items, to the extent possible
+# 0.6 file sorting options (natural/alphabetic/canonical/none); Python3 compatability; add optional schema validator (lxml probably); docstrings
+# 0.7 expand sub-verses with ! in osisIDs; unittest; make fully OO; PyDev project?
+# 0.8 test suite incorporating all USFM examples from UBS ICAP and other complex cases
+# 0.9 more clean-up & re-ordering to correctly encapsulate milestones within appropriate containers; clear remaining TODO items, to the extent possible
# 1.0 feature complete for release & production use
# 1.x xreffix.pl-functionality (osisParse(ref)), requiring SWORD bindings; use toc3 for localization
# 1.x SWORD-mode output?
@@ -1313,7 +1314,7 @@
if encoding in aliases:
osis = codecs.open(sFile, 'r', encoding).read().strip() + '\n'
else:
- print(('WARNING: Encoding "' + encoding + '" unknown, processing ' + sFile + ' as UTF-8.'))
+ print(('WARNING: Encoding "' + encoding + '" unknown, processing ' + sFile + ' as UTF-8'))
encoding = 'utf-8'
if sys.version_info[0] < 3:
@@ -1380,7 +1381,7 @@
if encoding in aliases:
osis = codecs.open(filename, 'r', encoding).read().strip() + '\n'
else:
- #print(('WARNING: Encoding "' + encoding + '" unknown, processing ' + filename + ' as UTF-8.'))
+ #print(('WARNING: Encoding "' + encoding + '" unknown, processing ' + filename + ' as UTF-8'))
encoding = 'utf-8'
# keep a copy of the OSIS book abbreviation for below (\toc3 processing) to store for mapping localized book names to/from OSIS
@@ -1458,7 +1459,7 @@
global encoding
global relaxedConformance
- num_processes = multiprocessing.cpu_count()
+ num_processes = max(1,multiprocessing.cpu_count()-1)
num_jobs = num_processes
encoding = ''
@@ -1519,26 +1520,26 @@
printUsage()
if sys.argv[i].startswith('a'):
sortKey = None
- print('Sorting book files alphanumerically.')
+ print('Sorting book files alphanumerically')
elif sys.argv[i].startswith('na'):
sortKey = keynat
- print('Sorting book files naturally.')
+ print('Sorting book files naturally')
elif sys.argv[i].startswith('c'):
sortKey = keycanon
- print('Sorting book files canonically.')
+ print('Sorting book files canonically')
elif sys.argv[i].startswith('u'):
sortKey = keyusfm
- print('Sorting book files by USFM book number.')
+ print('Sorting book files by USFM book number')
elif sys.argv[i].startswith('random'): # for testing only
sortKey = lambda filename: int(random.random()*256)
- print('Sorting book files randomly.')
+ print('Sorting book files randomly')
else:
sortKey = keysupplied
- print('Leaving book files unsorted, in the order in which they were supplied.')
+ print('Leaving book files unsorted, in the order in which they were supplied')
inputFilesIdx += 2 # increment 2, reflecting 2 args for -s
else:
sortKey = keynat
- print('Sorting book files naturally.')
+ print('Sorting book files naturally')
usfmDocList = sys.argv[inputFilesIdx:]
@@ -1556,6 +1557,7 @@
result_queue = multiprocessing.Queue()
# spawn workers
+ print('Converting USFM documents to OSIS...')
for i in range(num_processes):
worker = Worker(work_queue, result_queue)
worker.start()
@@ -1566,7 +1568,7 @@
k,v=result_queue.get()
osisSegment[k]=v
- verbosePrint('Assembling OSIS document...')
+ print('Assembling OSIS document')
osisDoc = '<osis xmlns="http://www.bibletechnologies.net/2003/OSIS/namespace" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.bibletechnologies.net/2003/OSIS/namespace http://www.bibletechnologies.net/osisCore.'+osisVersion+'.xsd">\n<osisText osisRefWork="Bible" xml:lang="und" osisIDWork="' + osisWork + '">\n<header>\n<work osisWork="' + osisWork + '"/>\n</header>\n'
unhandledTags = set()
@@ -1580,13 +1582,13 @@
try:
#import urllib
from lxml import etree
- verbosePrint('Validating XML...')
+ print('Validating XML...')
osisParser = etree.XMLParser(schema = etree.XMLSchema(etree.XML(osisSchema)))
#osisParser = etree.XMLParser(schema = etree.XMLSchema(etree.XML(urllib.urlopen('http://www.bibletechnologies.net/osisCore.' + osisVersion + '.xsd').read())))
etree.fromstring(osisDoc, osisParser)
- verbosePrint('XML Valid')
+ print('XML Valid')
except ImportError:
- verbosePrint('For schema validation, install lxml')
+ print('For schema validation, install lxml')
except etree.XMLSyntaxError as eVal:
print('XML Validation error: ' + str(eVal))
@@ -1594,9 +1596,11 @@
osisFile.write('<?xml version="1.0" encoding="UTF-8"?>\n')
osisFile.write(osisDoc)
+ print('Done!')
+
if unhandledTags:
- if verbose:
- print('')
+ print('')
print(('Unhandled USFM tags: ' + ', '.join(sorted(unhandledTags)) + ' (' + str(len(unhandledTags)) + ' total)'))
if not relaxedConformance:
- print('Consider using the -r option for relaxed markup processing.')
+ print('Consider using the -r option for relaxed markup processing')
+
More information about the sword-cvs
mailing list