[sword-svn] r441 - trunk/versification

chrislit at crosswire.org chrislit at crosswire.org
Wed Jul 24 00:46:39 MST 2013


Author: chrislit
Date: 2013-07-24 00:46:38 -0700 (Wed, 24 Jul 2013)
New Revision: 441

Added:
   trunk/versification/validate_v11n.py
Log:
added a simple utility for validating v11n definitions


Added: trunk/versification/validate_v11n.py
===================================================================
--- trunk/versification/validate_v11n.py	                        (rev 0)
+++ trunk/versification/validate_v11n.py	2013-07-24 07:46:38 UTC (rev 441)
@@ -0,0 +1,45 @@
+#!/usr/bin/env python
+
+import re, sys
+
+files = sys.argv[1:]
+
+if not files:
+    print 'Usage: ' + sys.argv[0] + ' <canon.h-style file(s)>'
+    exit()
+
+booksChapters = 0
+vmChapters = 0
+
+for fn in files:
+    f = open(fn).readlines()
+
+    inBooks = False
+    inVm = False
+
+    for l in f:
+        # This is not robust. It assumes that [on]tbooks[] precedes vm[] and
+        # that all of the verse counts in vm[] are part of books listed in
+        # [on]tbooks[]. In general, it assumes canon files that look like what
+        # we include in the library and generate from v11nsys.pl.
+
+        l = re.sub(r'#.*', '', l)
+        l = re.sub(r'\s*$', '', l)
+        
+        if l:        
+            if re.search(r'struct sbook [on]tbooks.*?\[\]', l):
+                inBooks = True
+            elif re.search(r'int vm.*?\[\]', l):
+                inVm = True
+            elif (inVm or inBooks) and re.search(r'};', l):            
+                inBooks = False
+                inVm = False
+            elif inBooks:
+                match = re.search(r'{".+?", ".+?", ".+?", (\d+)},', l)
+                if match:
+                    booksChapters += int(match.group(1))
+            elif inVm:
+                match = re.findall(r'\d+,?', l)
+                vmChapters += len(match)
+                
+    print fn + ' is ' + ('' if booksChapters == vmChapters else 'not ') + 'valid: ' + str(booksChapters) + ':' + str(vmChapters) +'\n'




More information about the sword-cvs mailing list