[sword-svn] r568 - trunk/modules/conf

domcox at crosswire.org domcox at crosswire.org
Sat Aug 2 10:51:36 EDT 2025


Author: domcox
Date: 2025-08-02 10:51:36 -0400 (Sat, 02 Aug 2025)
New Revision: 568

Added:
   trunk/modules/conf/conflint.py
Log:
Add conflint.py, a conf analysis tool

Added: trunk/modules/conf/conflint.py
===================================================================
--- trunk/modules/conf/conflint.py	                        (rev 0)
+++ trunk/modules/conf/conflint.py	2025-08-02 14:51:36 UTC (rev 568)
@@ -0,0 +1,597 @@
+#!/usr/bin/env python3
+
+# -*- coding: utf-8 -*-
+
+
+# conflint.py - Validate a conf file.
+#
+
+# Copyright (C) 2021 CrossWire Bible Society
+
+# Author: domcox <domcox at crosswire.org>
+
+# This file is part of Sword Modules
+
+# Sword Modules is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# Sword Modules is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with Sword Modules.  If not, see <https://www.gnu.org/licenses/>.
+
+# Created:  2021-01-22
+
+
+# Requirements
+
+import argparse
+import os.path
+import re
+import sys
+from pathlib import Path
+from datetime import date
+
+
+# Variables
+# Version
+version = '1.0'
+
+# Module Name
+modname = ''
+
+elemtype = {
+    "Abbreviation"           : ['text'],
+    "Description"            : ['text'],
+    "DataPath"               : ['text'],
+    "ModDrv"                 : ['keyword'],
+    "SourceType"             : ['keyword'],
+    "Encoding"               : ['keyword'],
+    "CompressType"           : ['keyword'],
+    "BlockType"              : ['keyword'],
+    "BlockCount"             : ['integer'],
+    "Versification"          : ['keyword'],
+    "CipherKey"              : ['text'],
+    "KeyType"                : ['keyword'],
+    "CaseSensitiveKeys"      : ['boolean'],
+    "GlobalOptionFilter"     : ['keyword'],
+    "Direction"              : ['keyword'],
+    "DisplayLevel"           : ['integer'],
+    "Font"                   : ['text'],
+    "OSISqToTick"            : ['boolean'],
+    "Feature"                : ['keyword'],
+    "GlossaryFrom"           : ['text'],
+    "GlossaryTo"             : ['text'],
+    "PreferredCSSXHTML"      : ['text'],
+    "AndBibleCSS"            : ['text'],
+    "CaseInsensitiveKeys"    : ['boolean'],
+    "LangSortOrder"          : ['text'],
+    "StrongsPadding"         : ['boolean'],
+    "LocalStripFilter"       : ['text'],
+    "About"                  : ['text','rtf'],
+    "SwordVersionDate"       : ['isodate'],
+    "Version"                : ['text'],
+    "History"                : ['text','html'],
+    "MinimumVersion"         : ['text'],
+    "Category"               : ['keyword'],
+    "LCSH"                   : ['text'],
+    "Lang"                   : ['text'],
+    "Obsoletes"              : ['text'],
+    "OSISVersion"            : ['text'],
+    "Companion"              : ['text'],
+    "DistributionLicense"    : ['keyword'],
+    "DistributionNotes"      : ['text'],
+    "Copyright"              : ['text'],
+    "CopyrightHolder"        : ['text'],
+    "CopyrightDate"          : ['year'],
+    "CopyrightNotes"         : ['text'],
+    "CopyrightContactName"   : ['text'],
+    "CopyrightContactNotes"  : ['text'],
+    "CopyrightContactAddress": ['text'],
+    "CopyrightContactEmail"  : ['text'],
+    "ShortPromo"             : ['text','html'],
+    "ShortCopyright"         : ['text'],
+    "TextSource"             : ['text'],
+    "UnlockInfo"             : ['text','html','rtf+html'],
+    "InstallSize"            : ['integer'],
+    "Notes"                  : ['text'],
+    "ReferenceBible"         : ['text'],
+    "Scope"                  : ['text'],
+    "SearchOption"           : ['keyword'],
+    "Siglum1"                : ['text'],
+    "Siglum2"                : ['text']
+
+}
+
+keywords = {
+    "ModDrv"                 : ['RawText','RawText4','zText','zText4','RawCom',
+                                'RawCom4','zCom','zCom4','HREFCom','RawFiles',
+                                'RawLD','RawLD4','zLD','RawGenBook'],
+    "SourceType"             : ['OSIS','TEI','GBF','ThML'],
+    "Encoding"               : ['UTF-8','UTF-16','SCSU'],
+    "CompressType"           : ['ZIP','LZSS','BZIP2','XZ'],
+    "BlockType"              : ['BOOK','CHAPTER','VERSE'],
+    "Versification"          : ['Calvin','Catholic','Catholic2','DarbyFr','German',
+                                'KJV','KJVA','LXX','Leningrad','Luther','MT','NRSV',
+                                'NRSVA','Orthodox','Segond','Synodal','SynodalProt',
+                                'Vulg'],
+    "KeyType"                : ['TreeKey','VerseKey'],
+    "GlobalOptionFilter"     : ['UTF8Cantillation','UTF8GreekAccents',
+                                'UTF8HebrewPoints','UTF8ArabicPoints','OSISLemma',
+                                'OSISMorphSegmentation','OSISStrongs','OSISFootnotes',
+                                'OSISScripref','OSISMorph','OSISHeadings',
+                                'OSISVariants','OSISRedLetterWords','OSISGlosses',
+                                'OSISXlit','OSISEnum','OSISReferenceLinks','OSISRuby',
+                                'GBFStrongs','GBFFootnotes','GBFMorph','GBFHeadings',
+                                'GBFRedLetterWords','ThMLStrongs','ThMLFootnotes',
+                                'ThMLScripref','ThMLMorph','ThMLHeadings',
+                                'ThMLVariants','ThMLLemma'],
+    "Direction"              : ['LtoR','RtoL','BiDi'],
+    "Feature"                : ['StrongsNumbers','GreekDef','HebrewDef','GreekParse',
+                                'HebrewParse','DailyDevotion','Glossary','Images',
+                                'NoParagraphs'],
+    "Category"               : ['Biblical Texts','Commentaries',
+                                'Lexicons / Dictionaries','Glossaries',
+                                'Daily Devotional','Generic Books','Maps','Images',
+                                'Cults / Unorthodox / Questionable Material','Essays'],
+    "DistributionLicense"    : ['Public Domain','Copyrighted',
+                                'Copyrighted; Permission to distribute granted to CrossWire',
+                                'Copyrighted; Permission granted to distribute non-commercially in SWORD format',
+                                'Copyrighted; Free non-commercial distribution',
+                                'Copyrighted; Freely distributable','GFDL','GPL',
+                                'Creative Commons: BY-NC-ND 4.0',
+                                'Creative Commons: BY-NC-SA 4.0',
+                                'Creative Commons: BY-NC 4.0',
+                                'Creative Commons: BY-ND 4.0',
+                                'Creative Commons: BY-SA 4.0',
+                                'Creative Commons: BY 4.0',
+                                'Creative Commons: CC0'],
+    "SearchOption"           : ['IncludeKeyInSearch']
+}
+
+multi = ['GlobalOptionFilter','Feature','Obsoletes']
+
+continuation = ['About','Copyright','CopyrightNotes','CopyrightContactName',
+                'CopyrightContactNotes','CopyrightContactAddress','DistributionNotes',
+                'TextSource','UnlockInfo','Notes']
+
+localization = ['Abbreviation','Description','About','History','Copyright',
+                'CopyrightHolder','CopyrightNotes','CopyrightContactName',
+                'CopyrightContactNotes','CopyrightContactAddress',
+                'CopyrightContactEmail','ShortPromo','ShortCopyright',
+                'DistributionNotes','TextSource','UnlockInfo']
+
+required = ['Description','DataPath','ModDrv','About', 'SwordVersionDate',
+            'DistributionLicense','TextSource','Version']
+
+unidentified = ['Notes','ReferenceBible','SearchOption','Siglum1','Siglum2']
+
+deprecated = ['OSISqToTick']
+
+# Existing elemtype in the .conf file
+existing = []
+
+# Number of errors
+errors = 0
+
+# Number of warnings
+warnings = 0
+
+
+def die(msg):
+    '''
+    Show an error message then exit on error
+    '''
+    print(' ERROR! ' + msg, file=sys.stderr)
+    print('Parsing failed\n')
+    sys.exit(1)
+
+
+def error(msg):
+    '''
+    Show an error message, increment errors number
+    '''
+    global errors
+    print(' ERROR! ' + msg, file=sys.stderr)
+    errors += 1
+    return(errors)
+
+
+def warning(msg):
+    '''
+    Show a warning message, increment warnings number
+    '''
+    global warnings
+    print(' WARNING! ' + msg, file=sys.stderr)
+    warnings += 1
+    return(warnings)
+
+
+def get_parameters():
+    '''
+    Get Parse command-line options.
+    Returns string containing .conf filename
+    '''
+    description = '''
+    Validate a SWORD .conf file contents.
+    '''
+    # Parse command-line
+    parser = argparse.ArgumentParser(description=description)
+    parser.add_argument('conf', help='config file')
+    args = vars(parser.parse_args())
+
+    # Checking conf file in input
+    fileconf = args['conf']
+    fileObj = Path(fileconf)
+    if not fileObj.is_file():
+         die(f"File '{fileconf}' does not exist.")
+    return(fileconf)
+
+
+# Discard: doesn't work with continuation
+def readconf2(file):
+    config = configparser.RawConfigParser(strict=False)
+    config.optionxform = lambda option: option
+    config.read(file)
+    if (len(config.sections())) != 1:
+        die('Invalid File Format')
+
+    for sect in config.sections():
+        for k,v in config.items(sect):
+            print(' {} = {}'.format(k,v))
+        print()
+
+
+def readconf(file):
+    '''
+    Read conf file in input
+    Returns list of elemtype
+    '''
+    # List of elemtype
+    config =[]
+    # Key element
+    element = ''
+    # open conf file
+    with open(file, 'r', encoding='utf-8', newline='\n') as f:
+        for line in f:
+            # Read line
+            line = line.strip()
+            # Line continuation
+            if line.endswith('\\'):
+                 element = element + line +'\n'
+            # Simple line or end of continuation
+            else:
+                 if line:
+                      if line[0] != '#':
+                           config.append(element + line)
+                 element = ''
+    # List of elemtype
+    return config
+
+
+def parseconf(config):
+    '''
+    Parse config list of elemtype
+    Return list of tuples (element, value)
+    '''
+    global modname
+    # Config list
+    parsed_config = []
+    # List of known keys in a config file
+    known_elemtype = elemtype.keys()
+
+    for entry in config:
+         # Strip trailing whitespaces
+         entry = entry.strip()
+         #print(f"->{entry}<-")
+         # Search for Module identifier
+         id = (re.search(rf'^\[(.+?)\]$', entry))
+         if id:
+             modname = id.group(1)
+             if not modname.isidentifier():
+                 die(f"{modname}: Invalid Unique Identifier.")
+         else:
+             # Check the '=' separator exists
+             if not '=' in entry:
+                 error(f"{entry}: Parsing error, unexpected item")
+             else:
+                 # Extract key from entry
+                 key, value = entry.split('=', 1)
+                 element = key
+                 if '_' in key:
+                     key, lang = key.split('_', 1)
+                 # Check if key exists
+                 if key not in known_elemtype:
+                     error(f"{key}: Unknown element")
+                 else:
+                     parsed_config.append( tuple([ element, value ] ))
+                 if key in unidentified:
+                     warning(f"{key}: Element is not documented")
+    # End
+    return(parsed_config)
+
+
+def chk_type(config):
+    '''
+    Check element type
+    Returns list containing commented elemtype
+    '''
+    for elem, value in config:
+        if '_' in elem:
+            # Remove extension (eg: about_de, remove _de)
+            subelem, ext = elem.split('_', 1)
+        else:
+            subelem = elem
+        if '|' in value:
+            # Remove parameters (eg: GlobalOptionFilter=OSISReferenceLinks|Reference..|..)
+            value, parms = value.split('|',1)
+        # Type=keyword
+        if 'keyword' in elemtype[subelem]:
+            if value not in keywords[subelem]:
+                error(f"{elem}={value}: Not matching predefined value")
+        # Type=isodate
+        elif 'isodate' in elemtype[subelem]:
+            isoregex = '^([0-9]{4})-?(1[0-2]|0[1-9])-?(3[01]|0[1-9]|[12][0-9])$'
+            if re.match(isoregex, value):
+                SWdate = date.fromisoformat(value)
+                if SWdate > date.today():
+                    error(f"{SWdate}: Future dates are not allowed")
+                if SWdate < date.fromisoformat('1992-01-01'):
+                    error(f"{elem}={SWdate}: Older dates than Sword's are not allowed")
+            else:
+                error(f"{elem}={value}: Incorrect format")
+        # Type=year
+        elif 'year' in elemtype[subelem]:
+            value = value.replace('-',',')
+            listyears = value.split(',')
+            for year in listyears:
+                yregex = '^([0-9]{4})$'
+                if re.match(yregex, year):
+                    SWdate = date.fromisoformat(f"{year}-01-01")
+                    if SWdate > date.today():
+                        error(f"{elem}={year}: Future years are not allowed")
+                    if SWdate < date.fromisoformat('1583-01-01'):
+                        error(f"{elem}={year}: Years prior to 1583 are not allowed")
+                else:
+                    error(f"{elem}={year}: Incorrect format")
+        # Type=text, html or rtf
+        else:
+            if typevalue(value) not in elemtype[subelem]:
+                error(f"{elem}: '{typevalue(value)}' formatting is not allowed")
+
+
+def typevalue(str):
+    '''
+    Return type of str (boolean, integer, text, rtf, html, rtf+html)
+    '''
+    if str.capitalize() in ['True','False']:
+        return('boolean')
+    elif str.isnumeric():
+        return('integer')
+    elif ishtml(str) and isrtf(str):
+        return('rtf+html')
+    elif ishtml(str):
+        return('html')
+    elif isrtf(str):
+        return('rtf')
+    else:
+        return('text')
+
+
+def ishtml(str):
+    '''
+    Return True is str contains html codes
+    '''
+    regexp = r'<a|/>|</'
+    return(re.search(regexp, str))
+
+
+def isrtf(str):
+    '''
+    Return True is str contains rtf codes
+    '''
+    regexp = r'\\par|\\qc'
+    return(re.search(regexp, str))
+
+
+def listconf(config):
+    '''
+    Parse config list of tuples (key, value)
+    Return list of existing elemtype
+    '''
+    for key, value in config:
+        existing.append(key)
+
+
+def chk_repeats(config):
+    '''
+    Check element repetition
+    '''
+    printed = []
+    for elem, value in config:
+        if '_' in elem:
+            subelem, ext = elem.split('_', 1)
+        else:
+            subelem = elem
+        # Count repeats
+        repeats = existing.count(elem)
+        if repeats > 1 and elem not in multi:
+            if not elem in printed:
+                error(f"{elem}: Repeating this element is not allowed.")
+                printed.append(elem)
+
+
+def chk_continuation(config):
+    '''
+    Check element continuation
+    '''
+    for elem, value in config:
+        if '_' in elem:
+            subelem, ext = elem.split('_', 1)
+        else:
+            subelem = elem
+        # Search for strings containing '/\n'
+        regexp = r'\\\n'
+        if (re.search(regexp, value)):
+            if subelem not in continuation:
+                error(f"{elem}: Continuation not allowed on that element.")
+
+
+def chk_localization(config):
+    '''
+    Check element continuation
+    '''
+    for elem, value in config:
+        if '_' in elem:
+            subelem, ext = elem.split('_', 1)
+            if subelem not in localization:
+                error(f"{elem}: Localization is not allowed")
+
+
+def chk_required(config):
+    '''
+    Check required elements
+    '''
+    for elem in required:
+        if not elem in existing:
+            error(f"{elem}: Missing required element")
+
+
+def chk_datapath(config):
+    '''
+    Check DataPath
+    '''
+    path = {
+        'RawText'   : './modules/texts/rawtext/',
+        'RawText4'  : './modules/texts/rawtext4/',
+        'zText'     : './modules/texts/ztext/',
+        'zText4'    : './modules/texts/ztext4/',
+        'zCom'      : './modules/comments/zcom/',
+        'zCom4'     : './modules/comments/zcom4/',
+        'hREFCom'   : './modules/comments/hrefcom/',
+        'RawCom'    : './modules/comments/rawcom/',
+        'RawCom4'   : './modules/comments/rawcom4/',
+        'RawFiles'  : './modules/comments/rawfiles/',
+        'zLD'       : './modules/lexdict/zld/',
+        'RawLD'     : './modules/lexdict/rawld/',
+        'RawLD4'    : './modules/lexdict/rawld4/',
+        'RawGenBook': './modules/genbook/rawgenbook/'
+        }
+    # Module
+    module = modname.lower()
+    # Read needed values
+    datapath = ''
+    moddrv = ''
+    category = ''
+    for elem, value in config:
+        if elem == 'DataPath':
+            datapath = value
+        if elem == 'ModDrv':
+            moddrv = value
+        if elem == 'Category':
+            category = value
+    # Build category specific sub-dir
+    if category == 'DailyDevotion':
+        category = 'devotionals/'
+    elif category == 'Glossary':
+        category = 'glossaries/'
+    else:
+        category = ''
+    if moddrv in path.keys():
+        # Build the recommended DataPath
+        if moddrv in ['zLD','RawLD','RawLD4']:
+            suitedpath = path[moddrv] + category + module + '/dict'
+        else:
+            suitedpath = path[moddrv] +  module + '/'
+        # Compare DataPath values
+        if datapath != suitedpath:
+            warning('DataPath differs from the recommended convention,')
+            if 'devotionals' in datapath:
+                print(' or Feature=DailyDevotion is missing,')
+            if 'glossaries' in datapath:
+                print(' or Feature=Glossary is missing,')
+            print(f"  DataPath={datapath}\n  Rec.Conv={suitedpath}")
+
+
+def chk_length(config):
+    '''
+    Check element continuation
+    '''
+    for elem, value in config:
+        # Remove locale
+        subelem = elem
+        if '_' in elem:
+            subelem, ext = elem.split('_', 1)
+        # Select short elements
+        if subelem in ['Description','ShortPromo','ShortCopyright']:
+            # Check max length
+            if len(value) > 80:
+                warning(f"{elem}: Element length is longer than expected")
+
+
+def chk_obsolete(config):
+    '''
+    Check deprecated
+    '''
+    for elem, value in config:
+        subelem = elem
+        if '_' in elem:
+            subelem, ext = elem.split('_', 1)
+        if subelem in deprecated:
+            warning(f"{elem}: This attribute is deprecated")
+
+
+def chk_https(config):
+    '''
+    Check http:// use
+    '''
+    for elem, value in config:
+        subelem = elem
+        if '_' in elem:
+            subelem, ext = elem.split('_', 1)
+        if subelem in ['TextSource']:
+            regexp = r'http\:'
+            if (re.search(regexp, value)):
+                error(f"{elem}: URL is not secure, please use https://")
+
+def main():
+    '''
+    Main function
+    '''
+    # Get filename
+    filename = get_parameters()
+    basename = os.path.basename(filename)
+    print(f"Validating {basename}:", file=sys.stdout)
+    # Parse file
+    config = readconf(filename)
+    parsed = parseconf(config)
+    listconf(parsed)
+    # Checks
+    chk_type(parsed)
+    chk_repeats(parsed)
+    chk_continuation(parsed)
+    chk_localization(parsed)
+    chk_required(parsed)
+    chk_datapath(parsed)
+    chk_length(parsed)
+    chk_obsolete(parsed)
+    chk_https(parsed)
+    # Final report
+    if errors == 1:
+        printerrors = '1 error'
+    else:
+        printerrors = f'{errors} errors'
+    if errors:
+        print(f'{basename} fails to validate with {printerrors}', file=sys.stdout)
+        sys.exit(1)
+    else:
+        print(f'{basename} validates', file=sys.stdout)
+    return (True)
+
+
+main()


Property changes on: trunk/modules/conf/conflint.py
___________________________________________________________________
Added: svn:executable
## -0,0 +1 ##
+*
\ No newline at end of property


More information about the sword-cvs mailing list