<div dir="ltr">Personally I'd prefer to see this as a few separate patches. Especaially one that separates out the PEP8 stuff from the other problems.</div><div class="gmail_extra"><br><div class="gmail_quote">On Sat, Mar 31, 2018 at 11:44 AM, Matěj Cepl <span dir="ltr"><<a href="mailto:mcepl@cepl.eu" target="_blank">mcepl@cepl.eu</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">That means:<br>
* Make it PEP8 compliant<br>
* That unfortunately includes '\t' -> ' ' change (PEP8 doesn’t<br>
like TABs), which makes for the massive diff.<br>
* Don’t use deprecated API calls.<br>
Use Sword.VersificationMgr.<wbr>getSystemVersificationMgr() instead<br>
of Sword.VerseMgr.<wbr>getSystemVerseMgr().<br>
Use popError() instead of Error().<br>
* Switch to logging to make it a little bit more civilized.<br>
* Don’t use RE when you don’t need it.<br>
* Shorten the labels so they are not overflowing the screen.<br>
* Don’t fool with PyQuery and use standard XML libraries.<br>
* Use argparser.<br>
---<br>
versification/av11n.py | 213 ++++++++++++++++++++++++++++--<wbr>-------------------<br>
1 file changed, 124 insertions(+), 89 deletions(-)<br>
<br>
diff --git a/versification/av11n.py b/versification/av11n.py<br>
index 136a382..5386b8e 100755<br>
--- a/versification/av11n.py<br>
+++ b/versification/av11n.py<br>
@@ -1,4 +1,5 @@<br>
#!/usr/bin/env python<br>
+# coding: utf-8<br>
#<br>
# This does a very roughshod attempt to compare the osisIDs found in an<br>
# XML file with each of the versifications that SWORD knows about to help<br>
@@ -7,106 +8,140 @@<br>
# in the proper order, although within each testament, it requires nothing<br>
# special as for ordering.<br>
#<br>
-# Invoke simply by calling the program and the file name. If you want<br>
-# more output, change the following line to be True instead of False<br>
-verbose = False<br>
-debug = True<br>
-import sys<br>
+# Invoke simply by calling the program and the file name.<br>
+from __future__ import print_function<br>
+import argparse<br>
+import io<br>
+import logging<br>
import re<br>
-verseid = re.compile('^.+\..+\..+$')<br>
+import sys<br>
+<br>
+# in normal state level should be debug.WARNING, debug.INFO and debug.DEBUG<br>
+# give additional information.<br>
+logging.basicConfig(format='%<wbr>(levelname)s:%(message)s',<br>
+ level=logging.INFO)<br>
+log = logging.getLogger('<wbr>versification')<br>
+<br>
+try:<br>
+ import lxml.etree as ET<br>
+except ImportError:<br>
+ import xml.etree.ElementTree as ET<br>
+<br>
+OSIS_NS = '<a href="http://www.bibletechnologies.net/2003/OSIS/namespace" rel="noreferrer" target="_blank">http://www.bibletechnologies.<wbr>net/2003/OSIS/namespace</a>'<br>
+VERSEID_RE = re.compile(r'^(.+\.\d+\.\d+).*<wbr>$')<br>
<br>
# Inform the user that we need the SWORD extension<br>
try:<br>
- import Sword<br>
-except:<br>
- print "You do not have the SWORD library installed. Please install it."<br>
- sys.exit(1)<br>
+ import Sword<br>
+except ImportError:<br>
+ log.exception(<br>
+ "You do not have the SWORD library installed. Please install it.")<br>
+ sys.exit(1)<br>
<br>
-# Inform the user that we need pyquery, as it makes parsing XML files that much easier<br>
-try:<br>
- from pyquery import PyQuery as pq<br>
-except:<br>
- print "You do not appear to have PyQuery installed. Please install it."<br>
- sys.exit(2)<br>
+arg_parser = argparse.ArgumentParser(<br>
+ description='Compare OSIS file with available v11ns.')<br>
<br>
-# Without the name of a file, we cannot proceed any further<br>
-if len(sys.argv) < 2 or sys.argv[1] == '--help':<br>
- print "Usage: %s <OSISfile>" % (sys.argv[0],)<br>
+arg_parser.add_argument('--<wbr>verbose', '-v', action='count')<br>
+arg_parser.add_argument('<wbr>filename', nargs=1)<br>
+<br>
+<br>
+args = arg_parser.parse_args()<br>
+<br>
+if args.verbose:<br>
+ log.setLevel = logging.DEBUG<br>
+<br>
+log.debug('args = %s', args)<br>
<br>
# Open the file<br>
-if debug:<br>
- print 'Opening %s' % (sys.argv[1],)<br>
-d = pq(filename=sys.argv[1])<br>
+log.debug('Opening %s', args.filename[0])<br>
+<br>
+tree = ET.parse(io.open(args.<wbr>filename[0], encoding='utf8')).getroot()<br>
# Get the list of versifications<br>
-if debug:<br>
- print 'Fetching a list of versifications'<br>
-vmgr = Sword.VerseMgr.<wbr>getSystemVerseMgr()<br>
+log.debug('Fetching a list of v11ns')<br>
+vmgr = Sword.VersificationMgr.<wbr>getSystemVersificationMgr()<br>
av11ns = vmgr.getVersificationSystems()<br>
+log.debug('av11ns = %s', av11ns)<br>
<br>
# Get the list of all osisIDs<br>
-if debug:<br>
- print 'Fetching a list of OSIS IDs'<br>
-ids = d("*[osisID]")<br>
+log.debug('Fetching a list of OSIS IDs')<br>
+ids = set()<br>
+for item in tree.iter('{%s}verse' % OSIS_NS):<br>
+ if 'osisID' in item.attrib:<br>
+ ids.add(item.attrib['osisID'].<wbr>split('!')[0])<br>
+log.debug('ids = len(%d)', len(ids))<br>
+<br>
# Iterate each versification scheme<br>
for v11n in av11ns:<br>
- print 'Checking %s' % (v11n.c_str(),)<br>
- # Construct a list of the IDs in this versification<br>
- key = Sword.VerseKey()<br>
- key.setVersificationSystem(<wbr>v11n.c_str())<br>
- otkeyList = [] # Anything left in this afterwards is missing from the OSIS ot<br>
- ntkeyList = [] # Anything left in this afterwards is missing from the OSIS nt<br>
- otextraKeys = [] # Anything that gets placed in here is extraneous OT material (we think)<br>
- ntextraKeys = [] # Anything that gets placed in here is extraneous NT material (we think)<br>
-<br>
- inNT = False<br>
- while key.Error() == '\x00':<br>
- skey = key.getOSISRef()<br>
- if not inNT and re.match('^Matt', skey): # Assume we enter the NT when we hit Matthew<br>
- inNT = True<br>
- if inNT:<br>
- ntkeyList.append(skey)<br>
- else:<br>
- otkeyList.append(skey)<br>
- key.increment()<br>
- ntkeyList = set(ntkeyList) # The 'in' operator only works on a set<br>
- otkeyList = set(otkeyList)<br>
-<br>
- inNT = False<br>
- # Now iterate the ones we have in this file<br>
- for e in ids:<br>
- osisid = e.attrib.get('osisID')<br>
- #print 'Checking key %s' % (osisid,)<br>
- if osisid in otkeyList:<br>
- otkeyList.remove(osisid)<br>
- elif osisid in ntkeyList:<br>
- ntkeyList.remove(osisid)<br>
- inNT = True<br>
- elif verseid.match(osisid) and inNT:<br>
- ntextraKeys.append(osisid)<br>
- elif verseid.match(osisid) and not inNT:<br>
- otextraKeys.append(osisid)<br>
- # Ignore it if not verseid.match()<br>
-<br>
- # Now let's see what is left over<br>
- keyList = list(otkeyList.union(<wbr>ntkeyList)) # Sets in Python cannot be ordered<br>
- keyList.sort()<br>
- if len(keyList) > 0:<br>
- if verbose:<br>
- print '\tThe following IDs do not appear in your file:'<br>
- for k in keyList:<br>
- print k<br>
- else:<br>
- print '\tThere are %d OT IDs and %d NT IDs in the versification which are not in your file.' % (len(otkeyList), len(ntkeyList))<br>
- else:<br>
- print '\tYour file has all the references in this versification'<br>
-<br>
- # Now let's see if you had extra<br>
- if len(otextraKeys + ntextraKeys) > 0:<br>
- if verbose:<br>
- print '\tThe following IDs do not appear in the versification:'<br>
- for k in ntextraKeys + otextraKeys:<br>
- print k<br>
- else:<br>
- print '\tThere are %d OT IDs and %d NT IDs in your file which do not appear in the versification.' % (len(otextraKeys), len(ntextraKeys))<br>
- else:<br>
- print '\tYour file has no extra references'<br>
+ v11n_name = v11n.c_str()<br>
+ print('\nChecking %s:\n%s' %<br>
+ (v11n_name, (len(v11n_name) + 10) * '-'))<br>
+ # Construct a list of the IDs in this versification<br>
+ key = Sword.VerseKey()<br>
+ key.setVersificationSystem(<wbr>v11n.c_str())<br>
+ # Anything left in this afterwards is missing from the OSIS ot<br>
+ otkeyList = []<br>
+ # Anything left in this afterwards is missing from the OSIS nt<br>
+ ntkeyList = []<br>
+ # Anything that gets placed in here is extraneous OT material (we think)<br>
+ otextraKeys = []<br>
+ # Anything that gets placed in here is extraneous NT material (we think)<br>
+ ntextraKeys = []<br>
+<br>
+ inNT = False<br>
+ while key.popError() == '\x00':<br>
+ skey = key.getOSISRef()<br>
+ # Assume we enter the NT when we hit Matthew<br>
+ if not inNT and skey.startswith('Matt'):<br>
+ inNT = True<br>
+ if inNT:<br>
+ ntkeyList.append(skey)<br>
+ else:<br>
+ otkeyList.append(skey)<br>
+ key.increment()<br>
+ ntkeyList = set(ntkeyList) # The 'in' operator only works on a set<br>
+ otkeyList = set(otkeyList)<br>
+<br>
+ inNT = False<br>
+ # Now iterate the ones we have in this file<br>
+ for osisid in ids:<br>
+# log.debug('Checking key %s', osisid)<br>
+ if osisid in otkeyList:<br>
+ otkeyList.remove(osisid)<br>
+ elif osisid in ntkeyList:<br>
+ ntkeyList.remove(osisid)<br>
+ inNT = True<br>
+ else:<br>
+ verse_match = VERSEID_RE.match(osisid)<br>
+ if verse_match and inNT:<br>
+ ntextraKeys.append(verse_<wbr>match.group(1))<br>
+ elif verse_match and not inNT:<br>
+ otextraKeys.append(verse_<wbr>match.group(1))<br>
+ # Ignore it if not VERSEID_RE.match()<br>
+<br>
+ # Now let's see what is left over<br>
+ # Sets in Python cannot be ordered<br>
+ keyList = list(otkeyList.union(<wbr>ntkeyList))<br>
+ keyList.sort()<br>
+ if len(keyList) > 0:<br>
+ if len(keyList) < 100:<br>
+ <a href="http://log.info" rel="noreferrer" target="_blank">log.info</a>('\tThe following IDs don’t appear in your file:\n%s',<br>
+ str(", ".join(keyList)))<br>
+ print(('\tThere are %d OT IDs and %d NT IDs ' +<br>
+ 'in v11n which aren’t in your file.') \<br>
+ % (len(otkeyList), len(ntkeyList)))<br>
+ else:<br>
+ print('\tYour file has all the references in this v11n')<br>
+<br>
+ # Now let's see if you had extra<br>
+ if len(otextraKeys + ntextraKeys) > 0:<br>
+ # It doesn't make sense to print out lists longer than 100<br>
+ # they cannot be read anyway<br>
+ if len(keyList) < 100:<br>
+ <a href="http://log.info" rel="noreferrer" target="_blank">log.info</a>(<br>
+ '\tThe following IDs don’t appear in v11n:\n%s',<br>
+ str(", ".join(keyList)))<br>
+ print('\tThere are %d OT IDs and %d NT IDs ' +<br>
+ 'in your file which don’t appear in v11n.') \<br>
+ % (len(otextraKeys), len(ntextraKeys))<br>
+ else:<br>
+ print('\tYour file has no extra references')<br>
<span class="HOEnZb"><font color="#888888">--<br>
2.16.2<br>
<br>
<br>
______________________________<wbr>_________________<br>
sword-devel mailing list: <a href="mailto:sword-devel@crosswire.org">sword-devel@crosswire.org</a><br>
<a href="http://www.crosswire.org/mailman/listinfo/sword-devel" rel="noreferrer" target="_blank">http://www.crosswire.org/<wbr>mailman/listinfo/sword-devel</a><br>
Instructions to unsubscribe/change your settings at above page</font></span></blockquote></div><br></div>