[sword-devel] [PATCH] Make versification/av11n.py script working again.

Matěj Cepl mcepl at cepl.eu
Sat Mar 31 09:44:18 MST 2018


That means:
    * Make it PEP8 compliant
    * That unfortunately includes '\t' -> '    ' change (PEP8 doesn’t
      like TABs), which makes for the massive diff.
    * Don’t use deprecated API calls.
        Use Sword.VersificationMgr.getSystemVersificationMgr() instead
            of Sword.VerseMgr.getSystemVerseMgr().
        Use popError() instead of Error().
    * Switch to logging to make it a little bit more civilized.
    * Don’t use RE when you don’t need it.
    * Shorten the labels so they are not overflowing the screen.
    * Don’t fool with PyQuery and use standard XML libraries.
    * Use argparser.
---
 versification/av11n.py | 213 ++++++++++++++++++++++++++++---------------------
 1 file changed, 124 insertions(+), 89 deletions(-)

diff --git a/versification/av11n.py b/versification/av11n.py
index 136a382..5386b8e 100755
--- a/versification/av11n.py
+++ b/versification/av11n.py
@@ -1,4 +1,5 @@
 #!/usr/bin/env python
+# coding: utf-8
 #
 # This does a very roughshod attempt to compare the osisIDs found in an
 # XML file with each of the versifications that SWORD knows about to help
@@ -7,106 +8,140 @@
 # in the proper order, although within each testament, it requires nothing
 # special as for ordering.
 #
-# Invoke simply by calling the program and the file name.  If you want
-# more output, change the following line to be True instead of False
-verbose = False
-debug = True
-import sys
+# Invoke simply by calling the program and the file name.
+from __future__ import print_function
+import argparse
+import io
+import logging
 import re
-verseid = re.compile('^.+\..+\..+$')
+import sys
+
+# in normal state level should be debug.WARNING, debug.INFO and debug.DEBUG
+# give additional information.
+logging.basicConfig(format='%(levelname)s:%(message)s',
+                    level=logging.INFO)
+log = logging.getLogger('versification')
+
+try:
+    import lxml.etree as ET
+except ImportError:
+    import xml.etree.ElementTree as ET
+
+OSIS_NS = 'http://www.bibletechnologies.net/2003/OSIS/namespace'
+VERSEID_RE = re.compile(r'^(.+\.\d+\.\d+).*$')
 
 # Inform the user that we need the SWORD extension
 try:
-	import Sword
-except:
-	print "You do not have the SWORD library installed. Please install it."
-	sys.exit(1)
+    import Sword
+except ImportError:
+    log.exception(
+        "You do not have the SWORD library installed. Please install it.")
+    sys.exit(1)
 
-# Inform the user that we need pyquery, as it makes parsing XML files that much easier
-try:
-	from pyquery import PyQuery as pq
-except:
-	print "You do not appear to have PyQuery installed. Please install it."
-	sys.exit(2)
+arg_parser = argparse.ArgumentParser(
+    description='Compare OSIS file with available v11ns.')
 
-# Without the name of a file, we cannot proceed any further
-if len(sys.argv) < 2 or sys.argv[1] == '--help':
-	print "Usage: %s <OSISfile>" % (sys.argv[0],)
+arg_parser.add_argument('--verbose', '-v', action='count')
+arg_parser.add_argument('filename', nargs=1)
+
+
+args = arg_parser.parse_args()
+
+if args.verbose:
+    log.setLevel = logging.DEBUG
+
+log.debug('args = %s', args)
 
 # Open the file
-if debug:
-	print 'Opening %s' % (sys.argv[1],)
-d = pq(filename=sys.argv[1])
+log.debug('Opening %s', args.filename[0])
+
+tree = ET.parse(io.open(args.filename[0], encoding='utf8')).getroot()
 # Get the list of versifications
-if debug:
-	print 'Fetching a list of versifications'
-vmgr = Sword.VerseMgr.getSystemVerseMgr()
+log.debug('Fetching a list of v11ns')
+vmgr = Sword.VersificationMgr.getSystemVersificationMgr()
 av11ns = vmgr.getVersificationSystems()
+log.debug('av11ns = %s', av11ns)
 
 # Get the list of all osisIDs
-if debug:
-	print 'Fetching a list of OSIS IDs'
-ids = d("*[osisID]")
+log.debug('Fetching a list of OSIS IDs')
+ids = set()
+for item in tree.iter('{%s}verse' % OSIS_NS):
+    if 'osisID' in item.attrib:
+        ids.add(item.attrib['osisID'].split('!')[0])
+log.debug('ids = len(%d)', len(ids))
+
 # Iterate each versification scheme
 for v11n in av11ns:
-	print 'Checking %s' % (v11n.c_str(),)
-	# Construct a list of the IDs in this versification
-	key = Sword.VerseKey()
-	key.setVersificationSystem(v11n.c_str())
-	otkeyList = [] # Anything left in this afterwards is missing from the OSIS ot
-	ntkeyList = [] # Anything left in this afterwards is missing from the OSIS nt
-	otextraKeys = [] # Anything that gets placed in here is extraneous OT material (we think)
-	ntextraKeys = [] # Anything that gets placed in here is extraneous NT material (we think)
-	
-	inNT = False
-	while key.Error() == '\x00':
-		skey = key.getOSISRef()
-		if not inNT and re.match('^Matt', skey): # Assume we enter the NT when we hit Matthew
-			inNT = True
-		if inNT:
-			ntkeyList.append(skey)
-		else:
-			otkeyList.append(skey)
-		key.increment()
-	ntkeyList = set(ntkeyList) # The 'in' operator only works on a set
-	otkeyList = set(otkeyList)
-	
-	inNT = False
-	# Now iterate the ones we have in this file
-	for e in ids:
-		osisid = e.attrib.get('osisID')
-		#print 'Checking key %s' % (osisid,)
-		if osisid in otkeyList:
-			otkeyList.remove(osisid)
-		elif osisid in ntkeyList:
-			ntkeyList.remove(osisid)
-			inNT = True
-		elif verseid.match(osisid) and inNT:
-			ntextraKeys.append(osisid)
-		elif verseid.match(osisid) and not inNT:
-			otextraKeys.append(osisid)
-		# Ignore it if not verseid.match() 
-			
-	# Now let's see what is left over
-	keyList = list(otkeyList.union(ntkeyList)) # Sets in Python cannot be ordered
-	keyList.sort()
-	if len(keyList) > 0:
-		if verbose:
-			print '\tThe following IDs do not appear in your file:'
-			for k in keyList:
-				print k
-		else:
-			print '\tThere are %d OT IDs and %d NT IDs in the versification which are not in your file.' % (len(otkeyList), len(ntkeyList))
-	else:
-		print '\tYour file has all the references in this versification'
-		
-	# Now let's see if you had extra
-	if len(otextraKeys + ntextraKeys) > 0:
-		if verbose:
-			print '\tThe following IDs do not appear in the versification:'
-			for k in ntextraKeys + otextraKeys:
-				print k
-		else:
-			print '\tThere are %d OT IDs and %d NT IDs in your file which do not appear in the versification.' % (len(otextraKeys), len(ntextraKeys))
-	else:
-		print '\tYour file has no extra references'
+    v11n_name = v11n.c_str()
+    print('\nChecking %s:\n%s' %
+          (v11n_name, (len(v11n_name) + 10) * '-'))
+    # Construct a list of the IDs in this versification
+    key = Sword.VerseKey()
+    key.setVersificationSystem(v11n.c_str())
+    # Anything left in this afterwards is missing from the OSIS ot
+    otkeyList = []
+    # Anything left in this afterwards is missing from the OSIS nt
+    ntkeyList = []
+    # Anything that gets placed in here is extraneous OT material (we think)
+    otextraKeys = []
+    # Anything that gets placed in here is extraneous NT material (we think)
+    ntextraKeys = []
+
+    inNT = False
+    while key.popError() == '\x00':
+        skey = key.getOSISRef()
+        # Assume we enter the NT when we hit Matthew
+        if not inNT and skey.startswith('Matt'):
+            inNT = True
+        if inNT:
+            ntkeyList.append(skey)
+        else:
+            otkeyList.append(skey)
+        key.increment()
+    ntkeyList = set(ntkeyList)  # The 'in' operator only works on a set
+    otkeyList = set(otkeyList)
+
+    inNT = False
+    # Now iterate the ones we have in this file
+    for osisid in ids:
+#        log.debug('Checking key %s', osisid)
+        if osisid in otkeyList:
+            otkeyList.remove(osisid)
+        elif osisid in ntkeyList:
+            ntkeyList.remove(osisid)
+            inNT = True
+        else:
+            verse_match = VERSEID_RE.match(osisid)
+            if verse_match and inNT:
+                ntextraKeys.append(verse_match.group(1))
+            elif verse_match and not inNT:
+                otextraKeys.append(verse_match.group(1))
+            # Ignore it if not VERSEID_RE.match()
+
+    # Now let's see what is left over
+    # Sets in Python cannot be ordered
+    keyList = list(otkeyList.union(ntkeyList))
+    keyList.sort()
+    if len(keyList) > 0:
+        if len(keyList) < 100:
+            log.info('\tThe following IDs don’t appear in your file:\n%s',
+                     str(", ".join(keyList)))
+        print(('\tThere are %d OT IDs and %d NT IDs ' +
+               'in v11n which aren’t in your file.') \
+              % (len(otkeyList), len(ntkeyList)))
+    else:
+        print('\tYour file has all the references in this v11n')
+
+    # Now let's see if you had extra
+    if len(otextraKeys + ntextraKeys) > 0:
+        # It doesn't make sense to print out lists longer than 100
+        # they cannot be read anyway
+        if len(keyList) < 100:
+            log.info(
+                '\tThe following IDs don’t appear in v11n:\n%s',
+                str(", ".join(keyList)))
+        print('\tThere are %d OT IDs and %d NT IDs ' +
+              'in your file which don’t appear in v11n.') \
+              % (len(otextraKeys), len(ntextraKeys))
+    else:
+        print('\tYour file has no extra references')
-- 
2.16.2




More information about the sword-devel mailing list