[sword-devel] [PATCH] Make versification/av11n.py script working again.
Matěj Cepl
mcepl at cepl.eu
Sat Mar 31 09:44:18 MST 2018
That means:
* Make it PEP8 compliant
* That unfortunately includes '\t' -> ' ' change (PEP8 doesn’t
like TABs), which makes for the massive diff.
* Don’t use deprecated API calls.
Use Sword.VersificationMgr.getSystemVersificationMgr() instead
of Sword.VerseMgr.getSystemVerseMgr().
Use popError() instead of Error().
* Switch to logging to make it a little bit more civilized.
* Don’t use RE when you don’t need it.
* Shorten the labels so they are not overflowing the screen.
* Don’t fool with PyQuery and use standard XML libraries.
* Use argparser.
---
versification/av11n.py | 213 ++++++++++++++++++++++++++++---------------------
1 file changed, 124 insertions(+), 89 deletions(-)
diff --git a/versification/av11n.py b/versification/av11n.py
index 136a382..5386b8e 100755
--- a/versification/av11n.py
+++ b/versification/av11n.py
@@ -1,4 +1,5 @@
#!/usr/bin/env python
+# coding: utf-8
#
# This does a very roughshod attempt to compare the osisIDs found in an
# XML file with each of the versifications that SWORD knows about to help
@@ -7,106 +8,140 @@
# in the proper order, although within each testament, it requires nothing
# special as for ordering.
#
-# Invoke simply by calling the program and the file name. If you want
-# more output, change the following line to be True instead of False
-verbose = False
-debug = True
-import sys
+# Invoke simply by calling the program and the file name.
+from __future__ import print_function
+import argparse
+import io
+import logging
import re
-verseid = re.compile('^.+\..+\..+$')
+import sys
+
+# in normal state level should be debug.WARNING, debug.INFO and debug.DEBUG
+# give additional information.
+logging.basicConfig(format='%(levelname)s:%(message)s',
+ level=logging.INFO)
+log = logging.getLogger('versification')
+
+try:
+ import lxml.etree as ET
+except ImportError:
+ import xml.etree.ElementTree as ET
+
+OSIS_NS = 'http://www.bibletechnologies.net/2003/OSIS/namespace'
+VERSEID_RE = re.compile(r'^(.+\.\d+\.\d+).*$')
# Inform the user that we need the SWORD extension
try:
- import Sword
-except:
- print "You do not have the SWORD library installed. Please install it."
- sys.exit(1)
+ import Sword
+except ImportError:
+ log.exception(
+ "You do not have the SWORD library installed. Please install it.")
+ sys.exit(1)
-# Inform the user that we need pyquery, as it makes parsing XML files that much easier
-try:
- from pyquery import PyQuery as pq
-except:
- print "You do not appear to have PyQuery installed. Please install it."
- sys.exit(2)
+arg_parser = argparse.ArgumentParser(
+ description='Compare OSIS file with available v11ns.')
-# Without the name of a file, we cannot proceed any further
-if len(sys.argv) < 2 or sys.argv[1] == '--help':
- print "Usage: %s <OSISfile>" % (sys.argv[0],)
+arg_parser.add_argument('--verbose', '-v', action='count')
+arg_parser.add_argument('filename', nargs=1)
+
+
+args = arg_parser.parse_args()
+
+if args.verbose:
+ log.setLevel = logging.DEBUG
+
+log.debug('args = %s', args)
# Open the file
-if debug:
- print 'Opening %s' % (sys.argv[1],)
-d = pq(filename=sys.argv[1])
+log.debug('Opening %s', args.filename[0])
+
+tree = ET.parse(io.open(args.filename[0], encoding='utf8')).getroot()
# Get the list of versifications
-if debug:
- print 'Fetching a list of versifications'
-vmgr = Sword.VerseMgr.getSystemVerseMgr()
+log.debug('Fetching a list of v11ns')
+vmgr = Sword.VersificationMgr.getSystemVersificationMgr()
av11ns = vmgr.getVersificationSystems()
+log.debug('av11ns = %s', av11ns)
# Get the list of all osisIDs
-if debug:
- print 'Fetching a list of OSIS IDs'
-ids = d("*[osisID]")
+log.debug('Fetching a list of OSIS IDs')
+ids = set()
+for item in tree.iter('{%s}verse' % OSIS_NS):
+ if 'osisID' in item.attrib:
+ ids.add(item.attrib['osisID'].split('!')[0])
+log.debug('ids = len(%d)', len(ids))
+
# Iterate each versification scheme
for v11n in av11ns:
- print 'Checking %s' % (v11n.c_str(),)
- # Construct a list of the IDs in this versification
- key = Sword.VerseKey()
- key.setVersificationSystem(v11n.c_str())
- otkeyList = [] # Anything left in this afterwards is missing from the OSIS ot
- ntkeyList = [] # Anything left in this afterwards is missing from the OSIS nt
- otextraKeys = [] # Anything that gets placed in here is extraneous OT material (we think)
- ntextraKeys = [] # Anything that gets placed in here is extraneous NT material (we think)
-
- inNT = False
- while key.Error() == '\x00':
- skey = key.getOSISRef()
- if not inNT and re.match('^Matt', skey): # Assume we enter the NT when we hit Matthew
- inNT = True
- if inNT:
- ntkeyList.append(skey)
- else:
- otkeyList.append(skey)
- key.increment()
- ntkeyList = set(ntkeyList) # The 'in' operator only works on a set
- otkeyList = set(otkeyList)
-
- inNT = False
- # Now iterate the ones we have in this file
- for e in ids:
- osisid = e.attrib.get('osisID')
- #print 'Checking key %s' % (osisid,)
- if osisid in otkeyList:
- otkeyList.remove(osisid)
- elif osisid in ntkeyList:
- ntkeyList.remove(osisid)
- inNT = True
- elif verseid.match(osisid) and inNT:
- ntextraKeys.append(osisid)
- elif verseid.match(osisid) and not inNT:
- otextraKeys.append(osisid)
- # Ignore it if not verseid.match()
-
- # Now let's see what is left over
- keyList = list(otkeyList.union(ntkeyList)) # Sets in Python cannot be ordered
- keyList.sort()
- if len(keyList) > 0:
- if verbose:
- print '\tThe following IDs do not appear in your file:'
- for k in keyList:
- print k
- else:
- print '\tThere are %d OT IDs and %d NT IDs in the versification which are not in your file.' % (len(otkeyList), len(ntkeyList))
- else:
- print '\tYour file has all the references in this versification'
-
- # Now let's see if you had extra
- if len(otextraKeys + ntextraKeys) > 0:
- if verbose:
- print '\tThe following IDs do not appear in the versification:'
- for k in ntextraKeys + otextraKeys:
- print k
- else:
- print '\tThere are %d OT IDs and %d NT IDs in your file which do not appear in the versification.' % (len(otextraKeys), len(ntextraKeys))
- else:
- print '\tYour file has no extra references'
+ v11n_name = v11n.c_str()
+ print('\nChecking %s:\n%s' %
+ (v11n_name, (len(v11n_name) + 10) * '-'))
+ # Construct a list of the IDs in this versification
+ key = Sword.VerseKey()
+ key.setVersificationSystem(v11n.c_str())
+ # Anything left in this afterwards is missing from the OSIS ot
+ otkeyList = []
+ # Anything left in this afterwards is missing from the OSIS nt
+ ntkeyList = []
+ # Anything that gets placed in here is extraneous OT material (we think)
+ otextraKeys = []
+ # Anything that gets placed in here is extraneous NT material (we think)
+ ntextraKeys = []
+
+ inNT = False
+ while key.popError() == '\x00':
+ skey = key.getOSISRef()
+ # Assume we enter the NT when we hit Matthew
+ if not inNT and skey.startswith('Matt'):
+ inNT = True
+ if inNT:
+ ntkeyList.append(skey)
+ else:
+ otkeyList.append(skey)
+ key.increment()
+ ntkeyList = set(ntkeyList) # The 'in' operator only works on a set
+ otkeyList = set(otkeyList)
+
+ inNT = False
+ # Now iterate the ones we have in this file
+ for osisid in ids:
+# log.debug('Checking key %s', osisid)
+ if osisid in otkeyList:
+ otkeyList.remove(osisid)
+ elif osisid in ntkeyList:
+ ntkeyList.remove(osisid)
+ inNT = True
+ else:
+ verse_match = VERSEID_RE.match(osisid)
+ if verse_match and inNT:
+ ntextraKeys.append(verse_match.group(1))
+ elif verse_match and not inNT:
+ otextraKeys.append(verse_match.group(1))
+ # Ignore it if not VERSEID_RE.match()
+
+ # Now let's see what is left over
+ # Sets in Python cannot be ordered
+ keyList = list(otkeyList.union(ntkeyList))
+ keyList.sort()
+ if len(keyList) > 0:
+ if len(keyList) < 100:
+ log.info('\tThe following IDs don’t appear in your file:\n%s',
+ str(", ".join(keyList)))
+ print(('\tThere are %d OT IDs and %d NT IDs ' +
+ 'in v11n which aren’t in your file.') \
+ % (len(otkeyList), len(ntkeyList)))
+ else:
+ print('\tYour file has all the references in this v11n')
+
+ # Now let's see if you had extra
+ if len(otextraKeys + ntextraKeys) > 0:
+ # It doesn't make sense to print out lists longer than 100
+ # they cannot be read anyway
+ if len(keyList) < 100:
+ log.info(
+ '\tThe following IDs don’t appear in v11n:\n%s',
+ str(", ".join(keyList)))
+ print('\tThere are %d OT IDs and %d NT IDs ' +
+ 'in your file which don’t appear in v11n.') \
+ % (len(otextraKeys), len(ntextraKeys))
+ else:
+ print('\tYour file has no extra references')
--
2.16.2
More information about the sword-devel
mailing list