[sword-svn] r393 - trunk/modules/python

chrislit at crosswire.org chrislit at crosswire.org
Sat Aug 25 00:47:35 MST 2012


Author: chrislit
Date: 2012-08-25 00:47:35 -0700 (Sat, 25 Aug 2012)
New Revision: 393

Modified:
   trunk/modules/python/usfm2osis.py
Log:
added command-line toggle for XML validation
fixed sorting command-line arg handler to all alphanumeric sorting


Modified: trunk/modules/python/usfm2osis.py
===================================================================
--- trunk/modules/python/usfm2osis.py	2012-08-25 06:11:21 UTC (rev 392)
+++ trunk/modules/python/usfm2osis.py	2012-08-25 07:47:35 UTC (rev 393)
@@ -1126,6 +1126,7 @@
     print('  -r               enable relaxed markup processing (for non-standard USFM)')
     print('  -s mode          set book sorting mode: natural (default), alpha, canonical, none')
     print('  -v               verbose feedback')
+    print('  -x               disable XML validation')
     print('')
     print('As an example, if you want to generate the osisWork <Bible.KJV> and your USFM')
     print('  are located in the ./KJV folder, enter:')
@@ -1181,6 +1182,12 @@
     else:
         verbose = False
 
+    if '-x' in sys.argv:
+        validatexml = False
+        inputFilesIdx += 1
+    else:
+        validatexml = True
+
     if '-d' in sys.argv:
         DEBUG = True
         inputFilesIdx += 1
@@ -1220,20 +1227,26 @@
             i = sys.argv.index('-s')+1
             if len(sys.argv) < i+1:
                 printUsage()
-            if sys.argv[i].startsWith('a'):
-                sortHelper = keynat # TODO: write appropriate helpers
+            if sys.argv[i].startswith('a'):
+                sortKey = None
+                sortCmp = None
                 print('Sorting book files alphanumerically.')
-            elif sys.argv[i].startsWith('na'):
-                sortHelper = keynat
+            elif sys.argv[i].startswith('na'):
+                sortKey = keynat
+                sortCmp = None
                 print('Sorting book files naturally.')
-            elif sys.argv[i].startsWith('c'):
-                sortHelper = keynat # TODO: write appropriate helpers
+            elif sys.argv[i].startswith('c'):
+                sortKey = keynat # TODO: write appropriate helpers
+                sortCmp = None
                 print('Sorting book files canonically.')
             else:
-                sortHelper = usfmDocList.index
+                sortKey = None # TODO: write appropriate helpers
+                sortCmp = None
                 print('Leaving book files unsorted.')
+            inputFilesIdx += 2 # increment 2, reflecting 2 args for -s
         else:
-            sortHelper = keynat
+            sortKey = keynat
+            sortCmp = None
             print('Sorting book files naturally.')
 
 
@@ -1244,7 +1257,7 @@
         # run
         # load up work queue
         work_queue = multiprocessing.Queue()
-        for job in sorted(usfmDocList, key=sortHelper):
+        for job in sorted(usfmDocList, key=sortKey):
             work_queue.put(job)
 
         # create a queue to pass to workers to store the results
@@ -1261,26 +1274,27 @@
             k,v=result_queue.get()
             osisSegment[k]=v
 
+        
         unhandledTags = set()
-        for doc in sorted(usfmDocList, key=keynat):
+        for doc in sorted(usfmDocList, key=sortKey):
             unhandledTags |= set(re.findall(r'(\\[^\s\*]+?\b\*?)', osisSegment[doc]))
             osisDoc += osisSegment[doc]
 
         osisDoc += '</osisText>\n</osis>\n'
 
-        valid = True
-        try:
-            #import urllib
-            from lxml import etree
-            verbosePrint('Validating XML...')
-            osisParser = etree.XMLParser(schema = etree.XMLSchema(etree.XML(osisSchema)))
-            #osisParser = etree.XMLParser(schema = etree.XMLSchema(etree.XML(urllib.urlopen('http://www.bibletechnologies.net/osisCore.' + osisVersion + '.xsd').read())))
-            etree.fromstring(osisDoc, osisParser)
-            verbosePrint('XML Valid')
-        except ImportError:
-            verbosePrint('For schema validation, install lxml')
-        #except etree.XMLSyntaxError as eVal:
-        #    print('XML Validation error: ' + eVal)
+        if validatexml:
+            try:
+                #import urllib
+                from lxml import etree
+                verbosePrint('Validating XML...')
+                osisParser = etree.XMLParser(schema = etree.XMLSchema(etree.XML(osisSchema)))
+                #osisParser = etree.XMLParser(schema = etree.XMLSchema(etree.XML(urllib.urlopen('http://www.bibletechnologies.net/osisCore.' + osisVersion + '.xsd').read())))
+                etree.fromstring(osisDoc, osisParser)
+                verbosePrint('XML Valid')
+            except ImportError:
+                verbosePrint('For schema validation, install lxml')
+            except etree.XMLSyntaxError as eVal:
+                print('XML Validation error: ' + eVal)
 
         osisFile = codecs.open(osisFileName, 'w', 'utf-8')
         osisFile.write('<?xml version="1.0" encoding="UTF-8"?>\n')




More information about the sword-cvs mailing list