[sword-devel] [PATCH] Make versification/av11n.py script working again.
Greg Hellings
greg.hellings at gmail.com
Sat Mar 31 10:02:02 MST 2018
Personally I'd prefer to see this as a few separate patches. Especaially
one that separates out the PEP8 stuff from the other problems.
On Sat, Mar 31, 2018 at 11:44 AM, Matěj Cepl <mcepl at cepl.eu> wrote:
> That means:
> * Make it PEP8 compliant
> * That unfortunately includes '\t' -> ' ' change (PEP8 doesn’t
> like TABs), which makes for the massive diff.
> * Don’t use deprecated API calls.
> Use Sword.VersificationMgr.getSystemVersificationMgr() instead
> of Sword.VerseMgr.getSystemVerseMgr().
> Use popError() instead of Error().
> * Switch to logging to make it a little bit more civilized.
> * Don’t use RE when you don’t need it.
> * Shorten the labels so they are not overflowing the screen.
> * Don’t fool with PyQuery and use standard XML libraries.
> * Use argparser.
> ---
> versification/av11n.py | 213 ++++++++++++++++++++++++++++--
> -------------------
> 1 file changed, 124 insertions(+), 89 deletions(-)
>
> diff --git a/versification/av11n.py b/versification/av11n.py
> index 136a382..5386b8e 100755
> --- a/versification/av11n.py
> +++ b/versification/av11n.py
> @@ -1,4 +1,5 @@
> #!/usr/bin/env python
> +# coding: utf-8
> #
> # This does a very roughshod attempt to compare the osisIDs found in an
> # XML file with each of the versifications that SWORD knows about to help
> @@ -7,106 +8,140 @@
> # in the proper order, although within each testament, it requires nothing
> # special as for ordering.
> #
> -# Invoke simply by calling the program and the file name. If you want
> -# more output, change the following line to be True instead of False
> -verbose = False
> -debug = True
> -import sys
> +# Invoke simply by calling the program and the file name.
> +from __future__ import print_function
> +import argparse
> +import io
> +import logging
> import re
> -verseid = re.compile('^.+\..+\..+$')
> +import sys
> +
> +# in normal state level should be debug.WARNING, debug.INFO and
> debug.DEBUG
> +# give additional information.
> +logging.basicConfig(format='%(levelname)s:%(message)s',
> + level=logging.INFO)
> +log = logging.getLogger('versification')
> +
> +try:
> + import lxml.etree as ET
> +except ImportError:
> + import xml.etree.ElementTree as ET
> +
> +OSIS_NS = 'http://www.bibletechnologies.net/2003/OSIS/namespace'
> +VERSEID_RE = re.compile(r'^(.+\.\d+\.\d+).*$')
>
> # Inform the user that we need the SWORD extension
> try:
> - import Sword
> -except:
> - print "You do not have the SWORD library installed. Please install
> it."
> - sys.exit(1)
> + import Sword
> +except ImportError:
> + log.exception(
> + "You do not have the SWORD library installed. Please install it.")
> + sys.exit(1)
>
> -# Inform the user that we need pyquery, as it makes parsing XML files
> that much easier
> -try:
> - from pyquery import PyQuery as pq
> -except:
> - print "You do not appear to have PyQuery installed. Please install
> it."
> - sys.exit(2)
> +arg_parser = argparse.ArgumentParser(
> + description='Compare OSIS file with available v11ns.')
>
> -# Without the name of a file, we cannot proceed any further
> -if len(sys.argv) < 2 or sys.argv[1] == '--help':
> - print "Usage: %s <OSISfile>" % (sys.argv[0],)
> +arg_parser.add_argument('--verbose', '-v', action='count')
> +arg_parser.add_argument('filename', nargs=1)
> +
> +
> +args = arg_parser.parse_args()
> +
> +if args.verbose:
> + log.setLevel = logging.DEBUG
> +
> +log.debug('args = %s', args)
>
> # Open the file
> -if debug:
> - print 'Opening %s' % (sys.argv[1],)
> -d = pq(filename=sys.argv[1])
> +log.debug('Opening %s', args.filename[0])
> +
> +tree = ET.parse(io.open(args.filename[0], encoding='utf8')).getroot()
> # Get the list of versifications
> -if debug:
> - print 'Fetching a list of versifications'
> -vmgr = Sword.VerseMgr.getSystemVerseMgr()
> +log.debug('Fetching a list of v11ns')
> +vmgr = Sword.VersificationMgr.getSystemVersificationMgr()
> av11ns = vmgr.getVersificationSystems()
> +log.debug('av11ns = %s', av11ns)
>
> # Get the list of all osisIDs
> -if debug:
> - print 'Fetching a list of OSIS IDs'
> -ids = d("*[osisID]")
> +log.debug('Fetching a list of OSIS IDs')
> +ids = set()
> +for item in tree.iter('{%s}verse' % OSIS_NS):
> + if 'osisID' in item.attrib:
> + ids.add(item.attrib['osisID'].split('!')[0])
> +log.debug('ids = len(%d)', len(ids))
> +
> # Iterate each versification scheme
> for v11n in av11ns:
> - print 'Checking %s' % (v11n.c_str(),)
> - # Construct a list of the IDs in this versification
> - key = Sword.VerseKey()
> - key.setVersificationSystem(v11n.c_str())
> - otkeyList = [] # Anything left in this afterwards is missing from
> the OSIS ot
> - ntkeyList = [] # Anything left in this afterwards is missing from
> the OSIS nt
> - otextraKeys = [] # Anything that gets placed in here is extraneous
> OT material (we think)
> - ntextraKeys = [] # Anything that gets placed in here is extraneous
> NT material (we think)
> -
> - inNT = False
> - while key.Error() == '\x00':
> - skey = key.getOSISRef()
> - if not inNT and re.match('^Matt', skey): # Assume we enter
> the NT when we hit Matthew
> - inNT = True
> - if inNT:
> - ntkeyList.append(skey)
> - else:
> - otkeyList.append(skey)
> - key.increment()
> - ntkeyList = set(ntkeyList) # The 'in' operator only works on a set
> - otkeyList = set(otkeyList)
> -
> - inNT = False
> - # Now iterate the ones we have in this file
> - for e in ids:
> - osisid = e.attrib.get('osisID')
> - #print 'Checking key %s' % (osisid,)
> - if osisid in otkeyList:
> - otkeyList.remove(osisid)
> - elif osisid in ntkeyList:
> - ntkeyList.remove(osisid)
> - inNT = True
> - elif verseid.match(osisid) and inNT:
> - ntextraKeys.append(osisid)
> - elif verseid.match(osisid) and not inNT:
> - otextraKeys.append(osisid)
> - # Ignore it if not verseid.match()
> -
> - # Now let's see what is left over
> - keyList = list(otkeyList.union(ntkeyList)) # Sets in Python
> cannot be ordered
> - keyList.sort()
> - if len(keyList) > 0:
> - if verbose:
> - print '\tThe following IDs do not appear in your
> file:'
> - for k in keyList:
> - print k
> - else:
> - print '\tThere are %d OT IDs and %d NT IDs in the
> versification which are not in your file.' % (len(otkeyList),
> len(ntkeyList))
> - else:
> - print '\tYour file has all the references in this
> versification'
> -
> - # Now let's see if you had extra
> - if len(otextraKeys + ntextraKeys) > 0:
> - if verbose:
> - print '\tThe following IDs do not appear in the
> versification:'
> - for k in ntextraKeys + otextraKeys:
> - print k
> - else:
> - print '\tThere are %d OT IDs and %d NT IDs in your
> file which do not appear in the versification.' % (len(otextraKeys),
> len(ntextraKeys))
> - else:
> - print '\tYour file has no extra references'
> + v11n_name = v11n.c_str()
> + print('\nChecking %s:\n%s' %
> + (v11n_name, (len(v11n_name) + 10) * '-'))
> + # Construct a list of the IDs in this versification
> + key = Sword.VerseKey()
> + key.setVersificationSystem(v11n.c_str())
> + # Anything left in this afterwards is missing from the OSIS ot
> + otkeyList = []
> + # Anything left in this afterwards is missing from the OSIS nt
> + ntkeyList = []
> + # Anything that gets placed in here is extraneous OT material (we
> think)
> + otextraKeys = []
> + # Anything that gets placed in here is extraneous NT material (we
> think)
> + ntextraKeys = []
> +
> + inNT = False
> + while key.popError() == '\x00':
> + skey = key.getOSISRef()
> + # Assume we enter the NT when we hit Matthew
> + if not inNT and skey.startswith('Matt'):
> + inNT = True
> + if inNT:
> + ntkeyList.append(skey)
> + else:
> + otkeyList.append(skey)
> + key.increment()
> + ntkeyList = set(ntkeyList) # The 'in' operator only works on a set
> + otkeyList = set(otkeyList)
> +
> + inNT = False
> + # Now iterate the ones we have in this file
> + for osisid in ids:
> +# log.debug('Checking key %s', osisid)
> + if osisid in otkeyList:
> + otkeyList.remove(osisid)
> + elif osisid in ntkeyList:
> + ntkeyList.remove(osisid)
> + inNT = True
> + else:
> + verse_match = VERSEID_RE.match(osisid)
> + if verse_match and inNT:
> + ntextraKeys.append(verse_match.group(1))
> + elif verse_match and not inNT:
> + otextraKeys.append(verse_match.group(1))
> + # Ignore it if not VERSEID_RE.match()
> +
> + # Now let's see what is left over
> + # Sets in Python cannot be ordered
> + keyList = list(otkeyList.union(ntkeyList))
> + keyList.sort()
> + if len(keyList) > 0:
> + if len(keyList) < 100:
> + log.info('\tThe following IDs don’t appear in your
> file:\n%s',
> + str(", ".join(keyList)))
> + print(('\tThere are %d OT IDs and %d NT IDs ' +
> + 'in v11n which aren’t in your file.') \
> + % (len(otkeyList), len(ntkeyList)))
> + else:
> + print('\tYour file has all the references in this v11n')
> +
> + # Now let's see if you had extra
> + if len(otextraKeys + ntextraKeys) > 0:
> + # It doesn't make sense to print out lists longer than 100
> + # they cannot be read anyway
> + if len(keyList) < 100:
> + log.info(
> + '\tThe following IDs don’t appear in v11n:\n%s',
> + str(", ".join(keyList)))
> + print('\tThere are %d OT IDs and %d NT IDs ' +
> + 'in your file which don’t appear in v11n.') \
> + % (len(otextraKeys), len(ntextraKeys))
> + else:
> + print('\tYour file has no extra references')
> --
> 2.16.2
>
>
> _______________________________________________
> sword-devel mailing list: sword-devel at crosswire.org
> http://www.crosswire.org/mailman/listinfo/sword-devel
> Instructions to unsubscribe/change your settings at above page
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://www.crosswire.org/pipermail/sword-devel/attachments/20180331/75953d19/attachment-0001.html>
More information about the sword-devel
mailing list