[sword-svn] r568 - trunk/modules/conf
domcox at crosswire.org
domcox at crosswire.org
Sat Aug 2 10:51:36 EDT 2025
Author: domcox
Date: 2025-08-02 10:51:36 -0400 (Sat, 02 Aug 2025)
New Revision: 568
Added:
trunk/modules/conf/conflint.py
Log:
Add conflint.py, a conf analysis tool
Added: trunk/modules/conf/conflint.py
===================================================================
--- trunk/modules/conf/conflint.py (rev 0)
+++ trunk/modules/conf/conflint.py 2025-08-02 14:51:36 UTC (rev 568)
@@ -0,0 +1,597 @@
+#!/usr/bin/env python3
+
+# -*- coding: utf-8 -*-
+
+
+# conflint.py - Validate a conf file.
+#
+
+# Copyright (C) 2021 CrossWire Bible Society
+
+# Author: domcox <domcox at crosswire.org>
+
+# This file is part of Sword Modules
+
+# Sword Modules is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# Sword Modules is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with Sword Modules. If not, see <https://www.gnu.org/licenses/>.
+
+# Created: 2021-01-22
+
+
+# Requirements
+
+import argparse
+import os.path
+import re
+import sys
+from pathlib import Path
+from datetime import date
+
+
+# Variables
+# Version
+version = '1.0'
+
+# Module Name
+modname = ''
+
+elemtype = {
+ "Abbreviation" : ['text'],
+ "Description" : ['text'],
+ "DataPath" : ['text'],
+ "ModDrv" : ['keyword'],
+ "SourceType" : ['keyword'],
+ "Encoding" : ['keyword'],
+ "CompressType" : ['keyword'],
+ "BlockType" : ['keyword'],
+ "BlockCount" : ['integer'],
+ "Versification" : ['keyword'],
+ "CipherKey" : ['text'],
+ "KeyType" : ['keyword'],
+ "CaseSensitiveKeys" : ['boolean'],
+ "GlobalOptionFilter" : ['keyword'],
+ "Direction" : ['keyword'],
+ "DisplayLevel" : ['integer'],
+ "Font" : ['text'],
+ "OSISqToTick" : ['boolean'],
+ "Feature" : ['keyword'],
+ "GlossaryFrom" : ['text'],
+ "GlossaryTo" : ['text'],
+ "PreferredCSSXHTML" : ['text'],
+ "AndBibleCSS" : ['text'],
+ "CaseInsensitiveKeys" : ['boolean'],
+ "LangSortOrder" : ['text'],
+ "StrongsPadding" : ['boolean'],
+ "LocalStripFilter" : ['text'],
+ "About" : ['text','rtf'],
+ "SwordVersionDate" : ['isodate'],
+ "Version" : ['text'],
+ "History" : ['text','html'],
+ "MinimumVersion" : ['text'],
+ "Category" : ['keyword'],
+ "LCSH" : ['text'],
+ "Lang" : ['text'],
+ "Obsoletes" : ['text'],
+ "OSISVersion" : ['text'],
+ "Companion" : ['text'],
+ "DistributionLicense" : ['keyword'],
+ "DistributionNotes" : ['text'],
+ "Copyright" : ['text'],
+ "CopyrightHolder" : ['text'],
+ "CopyrightDate" : ['year'],
+ "CopyrightNotes" : ['text'],
+ "CopyrightContactName" : ['text'],
+ "CopyrightContactNotes" : ['text'],
+ "CopyrightContactAddress": ['text'],
+ "CopyrightContactEmail" : ['text'],
+ "ShortPromo" : ['text','html'],
+ "ShortCopyright" : ['text'],
+ "TextSource" : ['text'],
+ "UnlockInfo" : ['text','html','rtf+html'],
+ "InstallSize" : ['integer'],
+ "Notes" : ['text'],
+ "ReferenceBible" : ['text'],
+ "Scope" : ['text'],
+ "SearchOption" : ['keyword'],
+ "Siglum1" : ['text'],
+ "Siglum2" : ['text']
+
+}
+
+keywords = {
+ "ModDrv" : ['RawText','RawText4','zText','zText4','RawCom',
+ 'RawCom4','zCom','zCom4','HREFCom','RawFiles',
+ 'RawLD','RawLD4','zLD','RawGenBook'],
+ "SourceType" : ['OSIS','TEI','GBF','ThML'],
+ "Encoding" : ['UTF-8','UTF-16','SCSU'],
+ "CompressType" : ['ZIP','LZSS','BZIP2','XZ'],
+ "BlockType" : ['BOOK','CHAPTER','VERSE'],
+ "Versification" : ['Calvin','Catholic','Catholic2','DarbyFr','German',
+ 'KJV','KJVA','LXX','Leningrad','Luther','MT','NRSV',
+ 'NRSVA','Orthodox','Segond','Synodal','SynodalProt',
+ 'Vulg'],
+ "KeyType" : ['TreeKey','VerseKey'],
+ "GlobalOptionFilter" : ['UTF8Cantillation','UTF8GreekAccents',
+ 'UTF8HebrewPoints','UTF8ArabicPoints','OSISLemma',
+ 'OSISMorphSegmentation','OSISStrongs','OSISFootnotes',
+ 'OSISScripref','OSISMorph','OSISHeadings',
+ 'OSISVariants','OSISRedLetterWords','OSISGlosses',
+ 'OSISXlit','OSISEnum','OSISReferenceLinks','OSISRuby',
+ 'GBFStrongs','GBFFootnotes','GBFMorph','GBFHeadings',
+ 'GBFRedLetterWords','ThMLStrongs','ThMLFootnotes',
+ 'ThMLScripref','ThMLMorph','ThMLHeadings',
+ 'ThMLVariants','ThMLLemma'],
+ "Direction" : ['LtoR','RtoL','BiDi'],
+ "Feature" : ['StrongsNumbers','GreekDef','HebrewDef','GreekParse',
+ 'HebrewParse','DailyDevotion','Glossary','Images',
+ 'NoParagraphs'],
+ "Category" : ['Biblical Texts','Commentaries',
+ 'Lexicons / Dictionaries','Glossaries',
+ 'Daily Devotional','Generic Books','Maps','Images',
+ 'Cults / Unorthodox / Questionable Material','Essays'],
+ "DistributionLicense" : ['Public Domain','Copyrighted',
+ 'Copyrighted; Permission to distribute granted to CrossWire',
+ 'Copyrighted; Permission granted to distribute non-commercially in SWORD format',
+ 'Copyrighted; Free non-commercial distribution',
+ 'Copyrighted; Freely distributable','GFDL','GPL',
+ 'Creative Commons: BY-NC-ND 4.0',
+ 'Creative Commons: BY-NC-SA 4.0',
+ 'Creative Commons: BY-NC 4.0',
+ 'Creative Commons: BY-ND 4.0',
+ 'Creative Commons: BY-SA 4.0',
+ 'Creative Commons: BY 4.0',
+ 'Creative Commons: CC0'],
+ "SearchOption" : ['IncludeKeyInSearch']
+}
+
+multi = ['GlobalOptionFilter','Feature','Obsoletes']
+
+continuation = ['About','Copyright','CopyrightNotes','CopyrightContactName',
+ 'CopyrightContactNotes','CopyrightContactAddress','DistributionNotes',
+ 'TextSource','UnlockInfo','Notes']
+
+localization = ['Abbreviation','Description','About','History','Copyright',
+ 'CopyrightHolder','CopyrightNotes','CopyrightContactName',
+ 'CopyrightContactNotes','CopyrightContactAddress',
+ 'CopyrightContactEmail','ShortPromo','ShortCopyright',
+ 'DistributionNotes','TextSource','UnlockInfo']
+
+required = ['Description','DataPath','ModDrv','About', 'SwordVersionDate',
+ 'DistributionLicense','TextSource','Version']
+
+unidentified = ['Notes','ReferenceBible','SearchOption','Siglum1','Siglum2']
+
+deprecated = ['OSISqToTick']
+
+# Existing elemtype in the .conf file
+existing = []
+
+# Number of errors
+errors = 0
+
+# Number of warnings
+warnings = 0
+
+
+def die(msg):
+ '''
+ Show an error message then exit on error
+ '''
+ print(' ERROR! ' + msg, file=sys.stderr)
+ print('Parsing failed\n')
+ sys.exit(1)
+
+
+def error(msg):
+ '''
+ Show an error message, increment errors number
+ '''
+ global errors
+ print(' ERROR! ' + msg, file=sys.stderr)
+ errors += 1
+ return(errors)
+
+
+def warning(msg):
+ '''
+ Show a warning message, increment warnings number
+ '''
+ global warnings
+ print(' WARNING! ' + msg, file=sys.stderr)
+ warnings += 1
+ return(warnings)
+
+
+def get_parameters():
+ '''
+ Get Parse command-line options.
+ Returns string containing .conf filename
+ '''
+ description = '''
+ Validate a SWORD .conf file contents.
+ '''
+ # Parse command-line
+ parser = argparse.ArgumentParser(description=description)
+ parser.add_argument('conf', help='config file')
+ args = vars(parser.parse_args())
+
+ # Checking conf file in input
+ fileconf = args['conf']
+ fileObj = Path(fileconf)
+ if not fileObj.is_file():
+ die(f"File '{fileconf}' does not exist.")
+ return(fileconf)
+
+
+# Discard: doesn't work with continuation
+def readconf2(file):
+ config = configparser.RawConfigParser(strict=False)
+ config.optionxform = lambda option: option
+ config.read(file)
+ if (len(config.sections())) != 1:
+ die('Invalid File Format')
+
+ for sect in config.sections():
+ for k,v in config.items(sect):
+ print(' {} = {}'.format(k,v))
+ print()
+
+
+def readconf(file):
+ '''
+ Read conf file in input
+ Returns list of elemtype
+ '''
+ # List of elemtype
+ config =[]
+ # Key element
+ element = ''
+ # open conf file
+ with open(file, 'r', encoding='utf-8', newline='\n') as f:
+ for line in f:
+ # Read line
+ line = line.strip()
+ # Line continuation
+ if line.endswith('\\'):
+ element = element + line +'\n'
+ # Simple line or end of continuation
+ else:
+ if line:
+ if line[0] != '#':
+ config.append(element + line)
+ element = ''
+ # List of elemtype
+ return config
+
+
+def parseconf(config):
+ '''
+ Parse config list of elemtype
+ Return list of tuples (element, value)
+ '''
+ global modname
+ # Config list
+ parsed_config = []
+ # List of known keys in a config file
+ known_elemtype = elemtype.keys()
+
+ for entry in config:
+ # Strip trailing whitespaces
+ entry = entry.strip()
+ #print(f"->{entry}<-")
+ # Search for Module identifier
+ id = (re.search(rf'^\[(.+?)\]$', entry))
+ if id:
+ modname = id.group(1)
+ if not modname.isidentifier():
+ die(f"{modname}: Invalid Unique Identifier.")
+ else:
+ # Check the '=' separator exists
+ if not '=' in entry:
+ error(f"{entry}: Parsing error, unexpected item")
+ else:
+ # Extract key from entry
+ key, value = entry.split('=', 1)
+ element = key
+ if '_' in key:
+ key, lang = key.split('_', 1)
+ # Check if key exists
+ if key not in known_elemtype:
+ error(f"{key}: Unknown element")
+ else:
+ parsed_config.append( tuple([ element, value ] ))
+ if key in unidentified:
+ warning(f"{key}: Element is not documented")
+ # End
+ return(parsed_config)
+
+
+def chk_type(config):
+ '''
+ Check element type
+ Returns list containing commented elemtype
+ '''
+ for elem, value in config:
+ if '_' in elem:
+ # Remove extension (eg: about_de, remove _de)
+ subelem, ext = elem.split('_', 1)
+ else:
+ subelem = elem
+ if '|' in value:
+ # Remove parameters (eg: GlobalOptionFilter=OSISReferenceLinks|Reference..|..)
+ value, parms = value.split('|',1)
+ # Type=keyword
+ if 'keyword' in elemtype[subelem]:
+ if value not in keywords[subelem]:
+ error(f"{elem}={value}: Not matching predefined value")
+ # Type=isodate
+ elif 'isodate' in elemtype[subelem]:
+ isoregex = '^([0-9]{4})-?(1[0-2]|0[1-9])-?(3[01]|0[1-9]|[12][0-9])$'
+ if re.match(isoregex, value):
+ SWdate = date.fromisoformat(value)
+ if SWdate > date.today():
+ error(f"{SWdate}: Future dates are not allowed")
+ if SWdate < date.fromisoformat('1992-01-01'):
+ error(f"{elem}={SWdate}: Older dates than Sword's are not allowed")
+ else:
+ error(f"{elem}={value}: Incorrect format")
+ # Type=year
+ elif 'year' in elemtype[subelem]:
+ value = value.replace('-',',')
+ listyears = value.split(',')
+ for year in listyears:
+ yregex = '^([0-9]{4})$'
+ if re.match(yregex, year):
+ SWdate = date.fromisoformat(f"{year}-01-01")
+ if SWdate > date.today():
+ error(f"{elem}={year}: Future years are not allowed")
+ if SWdate < date.fromisoformat('1583-01-01'):
+ error(f"{elem}={year}: Years prior to 1583 are not allowed")
+ else:
+ error(f"{elem}={year}: Incorrect format")
+ # Type=text, html or rtf
+ else:
+ if typevalue(value) not in elemtype[subelem]:
+ error(f"{elem}: '{typevalue(value)}' formatting is not allowed")
+
+
+def typevalue(str):
+ '''
+ Return type of str (boolean, integer, text, rtf, html, rtf+html)
+ '''
+ if str.capitalize() in ['True','False']:
+ return('boolean')
+ elif str.isnumeric():
+ return('integer')
+ elif ishtml(str) and isrtf(str):
+ return('rtf+html')
+ elif ishtml(str):
+ return('html')
+ elif isrtf(str):
+ return('rtf')
+ else:
+ return('text')
+
+
+def ishtml(str):
+ '''
+ Return True is str contains html codes
+ '''
+ regexp = r'<a|/>|</'
+ return(re.search(regexp, str))
+
+
+def isrtf(str):
+ '''
+ Return True is str contains rtf codes
+ '''
+ regexp = r'\\par|\\qc'
+ return(re.search(regexp, str))
+
+
+def listconf(config):
+ '''
+ Parse config list of tuples (key, value)
+ Return list of existing elemtype
+ '''
+ for key, value in config:
+ existing.append(key)
+
+
+def chk_repeats(config):
+ '''
+ Check element repetition
+ '''
+ printed = []
+ for elem, value in config:
+ if '_' in elem:
+ subelem, ext = elem.split('_', 1)
+ else:
+ subelem = elem
+ # Count repeats
+ repeats = existing.count(elem)
+ if repeats > 1 and elem not in multi:
+ if not elem in printed:
+ error(f"{elem}: Repeating this element is not allowed.")
+ printed.append(elem)
+
+
+def chk_continuation(config):
+ '''
+ Check element continuation
+ '''
+ for elem, value in config:
+ if '_' in elem:
+ subelem, ext = elem.split('_', 1)
+ else:
+ subelem = elem
+ # Search for strings containing '/\n'
+ regexp = r'\\\n'
+ if (re.search(regexp, value)):
+ if subelem not in continuation:
+ error(f"{elem}: Continuation not allowed on that element.")
+
+
+def chk_localization(config):
+ '''
+ Check element continuation
+ '''
+ for elem, value in config:
+ if '_' in elem:
+ subelem, ext = elem.split('_', 1)
+ if subelem not in localization:
+ error(f"{elem}: Localization is not allowed")
+
+
+def chk_required(config):
+ '''
+ Check required elements
+ '''
+ for elem in required:
+ if not elem in existing:
+ error(f"{elem}: Missing required element")
+
+
+def chk_datapath(config):
+ '''
+ Check DataPath
+ '''
+ path = {
+ 'RawText' : './modules/texts/rawtext/',
+ 'RawText4' : './modules/texts/rawtext4/',
+ 'zText' : './modules/texts/ztext/',
+ 'zText4' : './modules/texts/ztext4/',
+ 'zCom' : './modules/comments/zcom/',
+ 'zCom4' : './modules/comments/zcom4/',
+ 'hREFCom' : './modules/comments/hrefcom/',
+ 'RawCom' : './modules/comments/rawcom/',
+ 'RawCom4' : './modules/comments/rawcom4/',
+ 'RawFiles' : './modules/comments/rawfiles/',
+ 'zLD' : './modules/lexdict/zld/',
+ 'RawLD' : './modules/lexdict/rawld/',
+ 'RawLD4' : './modules/lexdict/rawld4/',
+ 'RawGenBook': './modules/genbook/rawgenbook/'
+ }
+ # Module
+ module = modname.lower()
+ # Read needed values
+ datapath = ''
+ moddrv = ''
+ category = ''
+ for elem, value in config:
+ if elem == 'DataPath':
+ datapath = value
+ if elem == 'ModDrv':
+ moddrv = value
+ if elem == 'Category':
+ category = value
+ # Build category specific sub-dir
+ if category == 'DailyDevotion':
+ category = 'devotionals/'
+ elif category == 'Glossary':
+ category = 'glossaries/'
+ else:
+ category = ''
+ if moddrv in path.keys():
+ # Build the recommended DataPath
+ if moddrv in ['zLD','RawLD','RawLD4']:
+ suitedpath = path[moddrv] + category + module + '/dict'
+ else:
+ suitedpath = path[moddrv] + module + '/'
+ # Compare DataPath values
+ if datapath != suitedpath:
+ warning('DataPath differs from the recommended convention,')
+ if 'devotionals' in datapath:
+ print(' or Feature=DailyDevotion is missing,')
+ if 'glossaries' in datapath:
+ print(' or Feature=Glossary is missing,')
+ print(f" DataPath={datapath}\n Rec.Conv={suitedpath}")
+
+
+def chk_length(config):
+ '''
+ Check element continuation
+ '''
+ for elem, value in config:
+ # Remove locale
+ subelem = elem
+ if '_' in elem:
+ subelem, ext = elem.split('_', 1)
+ # Select short elements
+ if subelem in ['Description','ShortPromo','ShortCopyright']:
+ # Check max length
+ if len(value) > 80:
+ warning(f"{elem}: Element length is longer than expected")
+
+
+def chk_obsolete(config):
+ '''
+ Check deprecated
+ '''
+ for elem, value in config:
+ subelem = elem
+ if '_' in elem:
+ subelem, ext = elem.split('_', 1)
+ if subelem in deprecated:
+ warning(f"{elem}: This attribute is deprecated")
+
+
+def chk_https(config):
+ '''
+ Check http:// use
+ '''
+ for elem, value in config:
+ subelem = elem
+ if '_' in elem:
+ subelem, ext = elem.split('_', 1)
+ if subelem in ['TextSource']:
+ regexp = r'http\:'
+ if (re.search(regexp, value)):
+ error(f"{elem}: URL is not secure, please use https://")
+
+def main():
+ '''
+ Main function
+ '''
+ # Get filename
+ filename = get_parameters()
+ basename = os.path.basename(filename)
+ print(f"Validating {basename}:", file=sys.stdout)
+ # Parse file
+ config = readconf(filename)
+ parsed = parseconf(config)
+ listconf(parsed)
+ # Checks
+ chk_type(parsed)
+ chk_repeats(parsed)
+ chk_continuation(parsed)
+ chk_localization(parsed)
+ chk_required(parsed)
+ chk_datapath(parsed)
+ chk_length(parsed)
+ chk_obsolete(parsed)
+ chk_https(parsed)
+ # Final report
+ if errors == 1:
+ printerrors = '1 error'
+ else:
+ printerrors = f'{errors} errors'
+ if errors:
+ print(f'{basename} fails to validate with {printerrors}', file=sys.stdout)
+ sys.exit(1)
+ else:
+ print(f'{basename} validates', file=sys.stdout)
+ return (True)
+
+
+main()
Property changes on: trunk/modules/conf/conflint.py
___________________________________________________________________
Added: svn:executable
## -0,0 +1 ##
+*
\ No newline at end of property
More information about the sword-cvs
mailing list