[sword-devel] [PATCH] Make versification/av11n.py script working again.

Greg Hellings greg.hellings at gmail.com
Sat Mar 31 10:02:02 MST 2018


Personally I'd prefer to see this as a few separate patches. Especaially
one that separates out the PEP8 stuff from the other problems.

On Sat, Mar 31, 2018 at 11:44 AM, Matěj Cepl <mcepl at cepl.eu> wrote:

> That means:
>     * Make it PEP8 compliant
>     * That unfortunately includes '\t' -> '    ' change (PEP8 doesn’t
>       like TABs), which makes for the massive diff.
>     * Don’t use deprecated API calls.
>         Use Sword.VersificationMgr.getSystemVersificationMgr() instead
>             of Sword.VerseMgr.getSystemVerseMgr().
>         Use popError() instead of Error().
>     * Switch to logging to make it a little bit more civilized.
>     * Don’t use RE when you don’t need it.
>     * Shorten the labels so they are not overflowing the screen.
>     * Don’t fool with PyQuery and use standard XML libraries.
>     * Use argparser.
> ---
>  versification/av11n.py | 213 ++++++++++++++++++++++++++++--
> -------------------
>  1 file changed, 124 insertions(+), 89 deletions(-)
>
> diff --git a/versification/av11n.py b/versification/av11n.py
> index 136a382..5386b8e 100755
> --- a/versification/av11n.py
> +++ b/versification/av11n.py
> @@ -1,4 +1,5 @@
>  #!/usr/bin/env python
> +# coding: utf-8
>  #
>  # This does a very roughshod attempt to compare the osisIDs found in an
>  # XML file with each of the versifications that SWORD knows about to help
> @@ -7,106 +8,140 @@
>  # in the proper order, although within each testament, it requires nothing
>  # special as for ordering.
>  #
> -# Invoke simply by calling the program and the file name.  If you want
> -# more output, change the following line to be True instead of False
> -verbose = False
> -debug = True
> -import sys
> +# Invoke simply by calling the program and the file name.
> +from __future__ import print_function
> +import argparse
> +import io
> +import logging
>  import re
> -verseid = re.compile('^.+\..+\..+$')
> +import sys
> +
> +# in normal state level should be debug.WARNING, debug.INFO and
> debug.DEBUG
> +# give additional information.
> +logging.basicConfig(format='%(levelname)s:%(message)s',
> +                    level=logging.INFO)
> +log = logging.getLogger('versification')
> +
> +try:
> +    import lxml.etree as ET
> +except ImportError:
> +    import xml.etree.ElementTree as ET
> +
> +OSIS_NS = 'http://www.bibletechnologies.net/2003/OSIS/namespace'
> +VERSEID_RE = re.compile(r'^(.+\.\d+\.\d+).*$')
>
>  # Inform the user that we need the SWORD extension
>  try:
> -       import Sword
> -except:
> -       print "You do not have the SWORD library installed. Please install
> it."
> -       sys.exit(1)
> +    import Sword
> +except ImportError:
> +    log.exception(
> +        "You do not have the SWORD library installed. Please install it.")
> +    sys.exit(1)
>
> -# Inform the user that we need pyquery, as it makes parsing XML files
> that much easier
> -try:
> -       from pyquery import PyQuery as pq
> -except:
> -       print "You do not appear to have PyQuery installed. Please install
> it."
> -       sys.exit(2)
> +arg_parser = argparse.ArgumentParser(
> +    description='Compare OSIS file with available v11ns.')
>
> -# Without the name of a file, we cannot proceed any further
> -if len(sys.argv) < 2 or sys.argv[1] == '--help':
> -       print "Usage: %s <OSISfile>" % (sys.argv[0],)
> +arg_parser.add_argument('--verbose', '-v', action='count')
> +arg_parser.add_argument('filename', nargs=1)
> +
> +
> +args = arg_parser.parse_args()
> +
> +if args.verbose:
> +    log.setLevel = logging.DEBUG
> +
> +log.debug('args = %s', args)
>
>  # Open the file
> -if debug:
> -       print 'Opening %s' % (sys.argv[1],)
> -d = pq(filename=sys.argv[1])
> +log.debug('Opening %s', args.filename[0])
> +
> +tree = ET.parse(io.open(args.filename[0], encoding='utf8')).getroot()
>  # Get the list of versifications
> -if debug:
> -       print 'Fetching a list of versifications'
> -vmgr = Sword.VerseMgr.getSystemVerseMgr()
> +log.debug('Fetching a list of v11ns')
> +vmgr = Sword.VersificationMgr.getSystemVersificationMgr()
>  av11ns = vmgr.getVersificationSystems()
> +log.debug('av11ns = %s', av11ns)
>
>  # Get the list of all osisIDs
> -if debug:
> -       print 'Fetching a list of OSIS IDs'
> -ids = d("*[osisID]")
> +log.debug('Fetching a list of OSIS IDs')
> +ids = set()
> +for item in tree.iter('{%s}verse' % OSIS_NS):
> +    if 'osisID' in item.attrib:
> +        ids.add(item.attrib['osisID'].split('!')[0])
> +log.debug('ids = len(%d)', len(ids))
> +
>  # Iterate each versification scheme
>  for v11n in av11ns:
> -       print 'Checking %s' % (v11n.c_str(),)
> -       # Construct a list of the IDs in this versification
> -       key = Sword.VerseKey()
> -       key.setVersificationSystem(v11n.c_str())
> -       otkeyList = [] # Anything left in this afterwards is missing from
> the OSIS ot
> -       ntkeyList = [] # Anything left in this afterwards is missing from
> the OSIS nt
> -       otextraKeys = [] # Anything that gets placed in here is extraneous
> OT material (we think)
> -       ntextraKeys = [] # Anything that gets placed in here is extraneous
> NT material (we think)
> -
> -       inNT = False
> -       while key.Error() == '\x00':
> -               skey = key.getOSISRef()
> -               if not inNT and re.match('^Matt', skey): # Assume we enter
> the NT when we hit Matthew
> -                       inNT = True
> -               if inNT:
> -                       ntkeyList.append(skey)
> -               else:
> -                       otkeyList.append(skey)
> -               key.increment()
> -       ntkeyList = set(ntkeyList) # The 'in' operator only works on a set
> -       otkeyList = set(otkeyList)
> -
> -       inNT = False
> -       # Now iterate the ones we have in this file
> -       for e in ids:
> -               osisid = e.attrib.get('osisID')
> -               #print 'Checking key %s' % (osisid,)
> -               if osisid in otkeyList:
> -                       otkeyList.remove(osisid)
> -               elif osisid in ntkeyList:
> -                       ntkeyList.remove(osisid)
> -                       inNT = True
> -               elif verseid.match(osisid) and inNT:
> -                       ntextraKeys.append(osisid)
> -               elif verseid.match(osisid) and not inNT:
> -                       otextraKeys.append(osisid)
> -               # Ignore it if not verseid.match()
> -
> -       # Now let's see what is left over
> -       keyList = list(otkeyList.union(ntkeyList)) # Sets in Python
> cannot be ordered
> -       keyList.sort()
> -       if len(keyList) > 0:
> -               if verbose:
> -                       print '\tThe following IDs do not appear in your
> file:'
> -                       for k in keyList:
> -                               print k
> -               else:
> -                       print '\tThere are %d OT IDs and %d NT IDs in the
> versification which are not in your file.' % (len(otkeyList),
> len(ntkeyList))
> -       else:
> -               print '\tYour file has all the references in this
> versification'
> -
> -       # Now let's see if you had extra
> -       if len(otextraKeys + ntextraKeys) > 0:
> -               if verbose:
> -                       print '\tThe following IDs do not appear in the
> versification:'
> -                       for k in ntextraKeys + otextraKeys:
> -                               print k
> -               else:
> -                       print '\tThere are %d OT IDs and %d NT IDs in your
> file which do not appear in the versification.' % (len(otextraKeys),
> len(ntextraKeys))
> -       else:
> -               print '\tYour file has no extra references'
> +    v11n_name = v11n.c_str()
> +    print('\nChecking %s:\n%s' %
> +          (v11n_name, (len(v11n_name) + 10) * '-'))
> +    # Construct a list of the IDs in this versification
> +    key = Sword.VerseKey()
> +    key.setVersificationSystem(v11n.c_str())
> +    # Anything left in this afterwards is missing from the OSIS ot
> +    otkeyList = []
> +    # Anything left in this afterwards is missing from the OSIS nt
> +    ntkeyList = []
> +    # Anything that gets placed in here is extraneous OT material (we
> think)
> +    otextraKeys = []
> +    # Anything that gets placed in here is extraneous NT material (we
> think)
> +    ntextraKeys = []
> +
> +    inNT = False
> +    while key.popError() == '\x00':
> +        skey = key.getOSISRef()
> +        # Assume we enter the NT when we hit Matthew
> +        if not inNT and skey.startswith('Matt'):
> +            inNT = True
> +        if inNT:
> +            ntkeyList.append(skey)
> +        else:
> +            otkeyList.append(skey)
> +        key.increment()
> +    ntkeyList = set(ntkeyList)  # The 'in' operator only works on a set
> +    otkeyList = set(otkeyList)
> +
> +    inNT = False
> +    # Now iterate the ones we have in this file
> +    for osisid in ids:
> +#        log.debug('Checking key %s', osisid)
> +        if osisid in otkeyList:
> +            otkeyList.remove(osisid)
> +        elif osisid in ntkeyList:
> +            ntkeyList.remove(osisid)
> +            inNT = True
> +        else:
> +            verse_match = VERSEID_RE.match(osisid)
> +            if verse_match and inNT:
> +                ntextraKeys.append(verse_match.group(1))
> +            elif verse_match and not inNT:
> +                otextraKeys.append(verse_match.group(1))
> +            # Ignore it if not VERSEID_RE.match()
> +
> +    # Now let's see what is left over
> +    # Sets in Python cannot be ordered
> +    keyList = list(otkeyList.union(ntkeyList))
> +    keyList.sort()
> +    if len(keyList) > 0:
> +        if len(keyList) < 100:
> +            log.info('\tThe following IDs don’t appear in your
> file:\n%s',
> +                     str(", ".join(keyList)))
> +        print(('\tThere are %d OT IDs and %d NT IDs ' +
> +               'in v11n which aren’t in your file.') \
> +              % (len(otkeyList), len(ntkeyList)))
> +    else:
> +        print('\tYour file has all the references in this v11n')
> +
> +    # Now let's see if you had extra
> +    if len(otextraKeys + ntextraKeys) > 0:
> +        # It doesn't make sense to print out lists longer than 100
> +        # they cannot be read anyway
> +        if len(keyList) < 100:
> +            log.info(
> +                '\tThe following IDs don’t appear in v11n:\n%s',
> +                str(", ".join(keyList)))
> +        print('\tThere are %d OT IDs and %d NT IDs ' +
> +              'in your file which don’t appear in v11n.') \
> +              % (len(otextraKeys), len(ntextraKeys))
> +    else:
> +        print('\tYour file has no extra references')
> --
> 2.16.2
>
>
> _______________________________________________
> sword-devel mailing list: sword-devel at crosswire.org
> http://www.crosswire.org/mailman/listinfo/sword-devel
> Instructions to unsubscribe/change your settings at above page
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://www.crosswire.org/pipermail/sword-devel/attachments/20180331/75953d19/attachment-0001.html>


More information about the sword-devel mailing list