#! /usr/bin/env python # Distributed under the "Here, have it" license # Written by Greg Hellings, 2011, all rights reserved def usage(name): ''' Helpful hints for the user. Let them know what it is that this script expects of them. ''' print "Usage: %s " % (name,) print "Requires the SWORD Python library to operate." print "Also requires the Numpy library (fast array calculations)" sys.exit(-1) def increment(v, *args): ''' Copies the SWKey out of the first module and into any other modules down the row. ''' v.increment() k = v.getKey() for m in args: m.setKey(k) def main(argv): ''' Main loops and the like for the program. ''' if len(argv) != 4: usage(argv[0]) if argv[3] not in ('OT', 'NT'): usage(argv[0]) # Which testament are we comparing against if argv[3] == 'NT': checkNT = True else: checkNT = False # Fetch the original source language module mgr = Sword.SWMgr() original = mgr.getModule(argv[1]) if original == None: print "You might want to pick a translation that exists." sys.exit(-1) trans = mgr.getModule(argv[2]) if trans == None: print "You might want to pick a translation that exists." sys.exit(-1) counts = [] NT = False # Iterate the whole selection print"""\t********************************** ******** Building tables ******** **********************************""" while original.Error() == '\x00' and trans.Error() == '\x00': oWords = original.StripText().decode('utf-8').split() tWords = trans.StripText().decode('utf-8').split() key = Sword.VerseKey(original.getKey()) # Check if we've entered the NT if not NT and key.getOSISRef().startswith('Matt'): NT = True # Only check one testament if checkNT != NT: increment(original, trans) continue if len(oWords) == 0: print 'Unable to check verse %s - no content in source' % (key.getText(),) elif len(tWords) == 0: print 'Unable to check verse %s - no content in target' % (key.getText(),) else: counts.append((key.getOSISRef(), len(oWords) / float(len(tWords)), key.getVerse() == key.getChapterMax())) increment(original, trans) # Now that we have all the values, let's see if there # are any that seem way out of whack vals = [x for k, x, b in counts] mean = numpy.average(vals) std = numpy.std(vals) print """\t*********************************** ****** Beginning comparisons ****** ***********************************""" print "Average ratio: %0.4f" % (mean,) print "Standard dev: %0.4f" % (std,) # Now iterate the target translation print "The following references fall outside of the target standard deviation" for ref, ratio, b in counts: if abs(ratio - mean) >= 2 * std and b: print '%s' % (ref,) try: import Sword import sys import numpy except m: usage(sys.argv[0]) if __name__ == '__main__': main(sys.argv)