[sword-svn] r553 - trunk/modules/conf

domcox at crosswire.org domcox at crosswire.org
Sun Sep 18 13:05:59 EDT 2022


Author: domcox
Date: 2022-09-18 13:05:59 -0400 (Sun, 18 Sep 2022)
New Revision: 553

Added:
   trunk/modules/conf/confmaker.py
Removed:
   trunk/modules/conf/confmaker.pl
Log:
Replace confmaker.pl by confmaker.py

Deleted: trunk/modules/conf/confmaker.pl
===================================================================
--- trunk/modules/conf/confmaker.pl	2022-09-17 12:21:29 UTC (rev 552)
+++ trunk/modules/conf/confmaker.pl	2022-09-18 17:05:59 UTC (rev 553)
@@ -1,345 +0,0 @@
-#!/usr/bin/perl
-## confmaker.pl - provides a initial conf file for a new module by analysing  given OSIS xml file. 
-## The programme searches for relevant tags and creates the GlobalOptionFilter entries and other relevant conf entries
-
-## Licensed under the standard BSD license:
-
-# Copyright (c) 2002-2009 CrossWire Bible Society <http://www.crosswire.org/>
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met:
-#
-#     * Redistributions of source code must retain the above copyright
-#        notice, this list of conditions and the following disclaimer.
-#     * Redistributions in binary form must reproduce the above copyright
-#       notice, this list of conditions and the following disclaimer in
-#       the documentation and/or other materials provided with the
-#       distribution.
-#     * Neither the name of the CrossWire Bible Society nor the names of
-#       its contributors may be used to endorse or promote products
-#       derived from this software without specific prior written
-#       permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
-# IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
-# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
-# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-## For general inquiries, comments, suggestions, bug reports, etc. email:
-## sword-support at crosswire.org
-
-#########################################################################
-use XML::LibXML;
-use I18N::LangTags::List;
-use Unicode::UCD 'charinfo';
-#use open ':std', ':encoding(UTF-8)';
-#use open qw/:std :utf8/;
-use utf8;
-use Sword;
-use HTML::Strip;
-
-my %version	 = (  KJV     		=> '1.5.9',
-                      KJVA    		=> '1.6.0',
-                      NRSV    		=> '1.6.0',
-                      NRSVA		=> '1.6.0',
-                      MT		=> '1.6.0',
-                      Leningrad		=> '1.6.0',
-                      Synodal		=> '1.6.1',
-                      Vulg		=> '1.6.1',
-                      Luther		=> '1.6.1',
-                      German		=> '1.6.1',
-                      Catholic		=> '1.6.2',
-                      Catholic2		=> '1.6.2',
-                      LXX		=> '1.7.2',
-                      Orthodox		=> '1.7.2',
-                      SynodalProt	=> '1.7.2',
-                      DarbyFr		=> '1.8.0',
-                      Segond		=> '1.8.0',
-                      Calvin		=> '1.8.0'
-                   );
-
-my @av11n	= ( 'KJV', 'KJVA', 'NRSV', 'NRSVA', 'MT', 'Leningrad', 'Synodal', 'Vulg', 
-                    'Luther', 'German', 'Catholic', 'Catholic2', 'LXX', 'Orthodox', 
-                    'SynodalProt', 'DarbyFR', 'Segond', 'Calvin' 
-                  );
-                  
-my $v11n	= "KJV"; # If the script is called without a v11n chosen it will set KJV as standard. 
-
-## Obtain arguments
-if (scalar(@ARGV) < 1) {
-    print "\nconfmaker.pl -- - provides a initial conf file for a new module by analysing  given OSIS xml file.\n";
-    print "Syntax: confmaker.pl <osis XML file> [-o <conf-output-file>] [-i <conf-input-file>] [-m] [-l <language-code>] [-v <v11n>]\n";
-    print "- Arguments in braces < > are required. Arguments in brackets [ ] are optional.\n";
-    print "- If no -o option is specified <STDOUT> is used.\n";
-    print "- if the -m option is used no -i option may be used. -m expects parametres added by other means, e.g. a makefile";
-    print "- The script can currently produce a valid conf file for OSIS bibles, but not for any other import formats.\n";
-    exit (-1);
-}
-
-$file = @ARGV[0];
-
-$nextarg = 1;
-
-if (@ARGV[$nextarg] eq "-o") {
-       $outputFileName = "@ARGV[$nextarg+1]";
-       $nextarg += 2;
-       open (OUTF,, ">", "$outputFileName") or die "Could not open file $outputFileName for writing.";
-       select(OUTF)
-       }
-
-if (@ARGV[$nextarg] eq "-i") {
-       $inputFileName = "@ARGV[$nextarg+1]";
-       $nextarg += 2;
-       open (INF,, "<","$inputFileName") or die "Could not open inputfile $inputFileName for reading" ; 
-       @inputFile = <INF>;
-       }
-
-if (@ARGV[$nextarg] eq "-m") {
-       if ($inputFileName) {
-          print STDERR "You can not define both an input file and use the -m option, sorry...\n";
-          exit 1;}
-       $makefile=true;
-       $nextarg +=1;
-       }
-
-if (@ARGV[$nextarg] eq "-l") {
-       $language = "@ARGV[$nextarg+1]";
-       $nextarg += 2;
-       }
-
-if (@ARGV[$nextarg] eq "-v") {
-       $v11n = "@ARGV[$nextarg+1]";
-       }
-
- 
-
-my $parser = XML::LibXML->new();
-my $doc = $parser->parse_file($file);
-
-my $manager = new Sword::SWMgr();
-
-$manager->setGlobalOption("Hebrew Vowel Points", "Off");
-$manager->setGlobalOption("Hebrew Cantillation", "Off");
-$manager->setGlobalOption("Arabic Vowel Points", "Off");
-$manager->setGlobalOption("Greek Accents", "Off");
-
-my $hs = HTML::Strip->new();
-my $doc_text = new Sword::SWBuf($hs->parse($doc->toString()));
-
-## obtain name, type and language
-
-my @elements = $doc->getElementsByTagName('osisText');
-
-my $doc_name = @elements[0]->getAttribute('osisIDWork');
-my $doc_type = @elements[0]->getAttribute('osisRefWork');
-my $doc_lang = @elements[0]->getAttribute('xml:lang');
-my $doc_lang_name=I18N::LangTags::List::name($doc_lang);
-;
-
- 
-
-if ((length($language)==0) && (length($doc_lang)==0)) {
-   print STDERR $language."\n", $doc_lang."\n", $doc_lang_name."\n";
-   print STDERR "The language is undefined and no language was given on the commandline !\n";
-   exit;
-   }
-
-if (((length($language)>0) && (length($doc_lang)>0)) && ($language ne $doc_lang)){
-   print STDERR "The language ($language) given on the commandline and the language of the document ($doc_lang_name) appear not to agree with each other !\n";
-   exit;
-   }
-
-if ((length($language)>0) && (length($doc_lang)==0)) {
-   $doc_lang_name = I18N::LangTags::List::name($language);
-   }
-
-if (!(exists $version{$v11n}))  {
-    print STDERR "This versification does not exist (yet) \n";
-    print STDERR "Valid versfication systems are\n\t";
-
-    my $notmorethan4 = 1;
-    foreach (@av11n) {
-      $notmorethan4++;
-      if ($notmorethan4 <= 5) {
-        print STDERR "$_ ";
-      }
-      else {
-        print STDERR "\n\t$_ ";
-        $notmorethan4 = 2;
-      }
-    }
-    print STDERR "\n";
-    exit(-1);
-  }
-#remove <header> tag and child nodes as its presence can cause confusion
-for my $header ($doc->getElementsByTagName('header')) {
-    $header->unbindNode;
-}
-
-
-
-##GlobalOptionsFilter - prepare
-
-my @doc_features = ('title', 'note', 'reference', 'q', 'figure', 'rdg', 'seg');
-my @word_features = ('lemma', 'strong', 'gloss', 'morph',);
-my @char_features = ('Hebrew Vowel Points', 'Arabic Vowel Points', 'Hebrew Cantillation', 'Greek Accents');
-
-my %doc_filters = ( 'title'	=> "OSISHeadings",
-                    'note'  	=> "OSISFootnotes",
-                    'reference' => "OSISScripref",
-                    'gloss' 	=> "OSISGlosses",
-                    'lemma' 	=> "OSISLemma",
-                    'strong' 	=> "OSISStrongs",
-                    'morph' 	=> "OSISMorph",
-                    'q'  	=> "OSISRedLetterWords",
-                    'rdg' 	=> 'OSISVariants',
-                    'enum' 	=> 'OSISEnum',
-                    'xlit' 	=> 'OSISXlit',
-                    'seg' 	=> 'OSISMorphSegmentation'
-             
-            );
-
-            
-my %doc_feature = ( 'strong' => 'StrongsNumbers',
-                    'figure' => 'Images',
-                     'p'  => 'NoParagraphs'
-                     
-                  );
-
-my %diacritics = ( 'Hebrew Vowel Points' => "UTF8HebrewPoints",
-                   'Arabic Vowel Points' => 'UTF8ArabicPoints',
-                   'Hebrew Cantillation' => 'UTF8Cantillation',
-                   'Greek Accents' 	 => 'UTF8GreekAccents',
-                 );
-
-            
-my %doc_has_feature;
-
-## GlobalOptionsFilter - search for
-            
-foreach (@doc_features) {
-   my @elements = $doc->getElementsByTagName($_);
-   if (@elements>0) { $doc_has_feature{$_}=true } ;
-   }
-
-my @elements = $doc->getElementsByTagName('w');
-
-foreach my $f(@word_features) {
-
-  foreach my $e(@elements) {
-   if ($e->hasAttribute($f)) {
-    $doc_has_feature{$f}=true;
-    last;
-   }
-  }
- 
-}   
-
-if ($doc_has_feature{'lemma'}) {
-  foreach my $e(@elements) {
-   if ($e->hasAttribute('lemma')) {
-    my $lemma = $e->getAttribute('lemma');
-    
-    if (index(lc($lemma), 'strong') != -1) {
-      $doc_has_feature{'strong'}=true;
-      last;
-    }
-   }
-  }     
-} 
-    
-
-my @paragraphs = $doc->getElementsByTagName('p');
-if (@paragraphs==0) {$doc_has_feature{'p'}=true};
-
-
-   
-# Assemble and print out
-
-print "[".$doc_name."]\n";
-
-if ($doc_type =~ m/Bible/i) { 
- print  "ModDrv=zText\n";
- print "DataPath=./modules/texts/ztext/".lc($doc_name)."/\n";
-}
-
-if ($doc_type =~ m/Commentary/i) {
- print  "ModDrv=zCom\n";
- print "DataPath=./modules/comments/zcom/".lc($doc_name)."/\n";
-}
-
-
-print "CompressType=ZIP\n";
-print "BlockType=BOOK\n";
-
-print  "Encoding=UTF-8\n";
-print  "SourceType=OSIS\n";
-print  "SwordVersionDate=".`date +"%F"`;
-
-print  "Lang=".$doc_lang."\n";
-
-
-
-foreach (@doc_features) {
-   if ($doc_has_feature{$_}) { 
-      print  "GlobalOptionFilter=".$doc_filters{$_}."\n"
-      }
-   }   
-foreach (@word_features) {
-   if ($doc_has_feature{$_}) { 
-      print  "GlobalOptionFilter=".$doc_filters{$_}."\n"
-      }
-   }   
-
-foreach $filter(@char_features) {
-   my $tmp = new Sword::SWBuf($hs->parse($doc->toString()));
-   
-   $manager->filterText($filter, $tmp);
-
-   if ($tmp->c_str() ne $doc_text->c_str()) {
-      print "GlobalOptionFilter=".%diacritics{$filter}."\n";
-      
-   }
-}
-
-
-      
-foreach (@doc_features) {
-   if ($doc_has_feature{$_} && exists $doc_feature{$_}) { 
-      print  "Feature=".$doc_feature{$_}."\n"
-      }
-   }   
-foreach (@word_features) {
-   if ($doc_has_feature{$_} && exists $doc_feature{$_}) { 
-      print  "Feature=".$doc_feature{$_}."\n"
-      }
-   }   
-if ($doc_has_feature{'p'}) {
-   print "Feature=".$doc_feature{'p'}."\n"
-   }
-
-print  "LCSH=".$doc_type.".".I18N::LangTags::List::name($doc_lang)."\n";
-print "MinimumVersion=".$version{$v11n}."\n";
-print "Versification=".$v11n."\n";
-
-if (@inputFile>0) {
-   foreach(@inputFile) {
-      print $_;
-      }
-   }
-elsif (!$makefile){
-   print "DistributionLicense=copyrighted. Do not distribute\n";   
-   print "Description=".$doc_name." Bible in ".$doc_lang_name."\n";
-   print "About=".$doc_name." Bible in ".$doc_lang_name."\n";
-   print "Version=1.0\n";
-   print "History_1.0=First release\n";
-}

Added: trunk/modules/conf/confmaker.py
===================================================================
--- trunk/modules/conf/confmaker.py	                        (rev 0)
+++ trunk/modules/conf/confmaker.py	2022-09-18 17:05:59 UTC (rev 553)
@@ -0,0 +1,543 @@
+#!/usr/bin/env python3
+
+# -*- coding: utf-8 -*-
+
+# confmaker.py - Provides a initial conf file for a new module by analyzing
+#                the related OSIS xml file.
+
+## The programme searches for relevant tags and creates the GlobalOptionFilter
+#  entries and other relevant conf entries. This a port to Python from the
+#  previous confmaker.pl Perl script we were using. It fixes detection of
+#  diacritics and OSISMorphSegmentation (GlobalOpionFilters) and adds support
+#  for genbook and modules with large entries > 64Kb.
+
+# Copyright (C) 2020 CrossWire Bible Society
+
+
+# Author: kris <kristof.szabo at lutheran.hu> & domcox <domcox at crosswire.org>
+
+# This file is part of Sword Modules
+
+# Sword Modules is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# Sword Modules is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with Sword Modules.  If not, see <https://www.gnu.org/licenses/>.
+
+# Created:  2021-01-08
+#
+# Revision:
+# 2021-01-16 domcox <domcox at crosswire.org>
+#            Changed language library from iso-639 to langtags
+
+
+# Requirements
+
+import time
+import re
+import argparse
+import sys
+import xml.etree.ElementTree as ET
+from datetime import date
+from pathlib import Path
+try:
+     import langtags
+except:
+     sys.stderr.write("You do not have the Python langtags library installed. Please install it (pip install langtags).\n")
+     sys.exit(1)
+try:
+     import Sword
+except:
+     sys.stderr.write("You do not have the SWORD library installed. Please install it.\n")
+     sys.exit(1)
+
+
+# Variables
+
+Version = '1.1'
+
+doc = []
+
+versification = {
+    'KJV': '1.5.9',
+    'KJVA': '1.6.0',
+    'NRSV': '1.6.0',
+    'NRSVA': '1.6.0',
+    'MT': '1.6.0',
+    'Leningrad': '1.6.0',
+    'Synodal': '1.6.1',
+    'Vulg': '1.6.1',
+    'Luther': '1.6.1',
+    'German': '1.6.1',
+    'Catholic': '1.6.2',
+    'Catholic2': '1.6.2',
+    'LXX': '1.7.2',
+    'Orthodox': '1.7.2',
+    'SynodalProt': '1.7.2',
+    'DarbyFr': '1.8.0',
+    'Segond': '1.8.0',
+    'Calvin': '1.8.0'
+}
+
+
+# Functions
+
+def die(msg):
+     '''
+     Show an error message then exit on error
+     '''
+     print('ERROR! ' + msg, file=sys.stderr)
+     sys.exit(1)
+
+
+def get_parameters():
+    """
+    Get Parse command-line options.
+    Returns dict containing parameters values
+    """
+
+    # Creating parser
+    description = '''
+    provides a conf file for a module by analysing given OSIS XML file and optionally including extra elements from a conf.in file.
+    '''
+    parser = argparse.ArgumentParser(description=description)
+
+    # Adding arguments
+    parser.add_argument("-i", "--infile", help="conf.in file containing extra elements to include, (default none)")
+    parser.add_argument("-o", "--outfile", help="name of generated conf file, (default to screen)")
+    parser.add_argument("-v", "--v11n", default='KJV', help="versification schema, (default: KJV)")
+    parser.add_argument("-s", "--size", default='2' , help="set -s 4 for modules with large entries > 64Kb, (default -s 2)")
+    parser.add_argument('osis', help='OSIS XML file')
+
+    # Parsing arguments
+    args = parser.parse_args()
+
+    return (vars(args))
+
+
+def check_parameters(params):
+     '''
+     Check CLI parameters for validity
+     '''
+
+     # Checking OSIS file
+     osisfile = params['osis']
+     fileObj = Path(osisfile)
+     if not fileObj.is_file():
+          die(f"File '{osisfile}' does not exist.")
+
+     # Checking conf.in file in input
+     if params['infile']:
+          infile = params['infile']
+          fileObj = Path(infile)
+          if not fileObj.is_file():
+               die(f"File '{infile}' does not exist.")
+
+     # Checking Size
+     size = params['size']
+     if size not in ('2', '4'):
+          die(f"--size='{size}' Incorrect value.")
+
+     # Chexcking versification schema
+     v11n = params['v11n']
+     av11n = versification.keys()
+     if v11n not in av11n:
+          die(f"'{v11n}': Unknown versification schema.")
+     return (True)
+
+
+def get_osistext(osisfile):
+    """
+    Read osisText node from osis file.
+    Returns dict containing osisIDWork, osisRefWork, osisLang
+    """
+
+    # Search for <osisText ... > node
+    start_tag = '<osisText'
+    end_tag = '>'
+    start_tag_identified = False
+    node_identified = False
+    # osisText content in XML
+    captured_line = ''
+    # osisText attributes
+    osistext = dict([])
+    # open Osis
+    with open(osisfile) as f:
+        # Read lines until osisText is captured
+        while not node_identified:
+            line = f.readline()
+            if not line:
+                # End of File
+                die('osisText not found in osis file')
+            # Search for osisText tag
+            if start_tag in line:
+                start_tag_identified = True
+            if start_tag_identified:
+                # capture osisText content
+                captured_line += line
+                if end_tag in line:
+                    # osisText is fully captured
+                    node_identified = True
+                    # Read attributes
+                    for attribute in {'osisIDWork', 'osisRefWork', 'xml:lang'}:
+                        value = (re.search(rf'{attribute}="(.+?)"', captured_line, flags=re.IGNORECASE))
+                        if value:
+                            osistext[attribute] = value.group(1)
+                        else:
+                            die(f'osisText attribute missing: {attribute}')
+    return osistext
+
+
+def check_osistext(osistext):
+    '''
+    Check osisText attributes
+    '''
+    # Check osisIDWork
+    module = osistext['osisIDWork'].lower()
+    if len(module) < 1:
+        die('FATAL: osisIDWork is empty.')
+
+    # Check osisRefWork
+    moduletype = osistext['osisRefWork']
+    if moduletype.lower() not in ['bible','commentary','genbook']:
+        die(f"FATAL: Invalid attribute osisRefWork: {osiswork}")
+
+    # Check Language
+    lang = osistext['xml:lang']
+    language = get_language(lang)
+
+    return True
+
+
+def get_language(lang):
+    """
+    Search BCP-47 Languages Database for lang
+    """
+    found = False
+
+    try:
+        tag = langtags.Tag(lang)
+        found = True
+    except:
+         die(f"Language '{lang}' not found in BCP 47 Languages Database")
+
+    # Sometimes language description is multiline -> remove '\n'
+    return (tag.language.description.replace('\n', ' '))
+
+
+def is_tag(xml_file, tag):
+    """
+    Search for 'tag' in OSIS file and returns True if 'tag' exists, False otherwise
+    """
+    # Start searching after <header> tag to avoid confusion
+    end_header_tag = '</header>'
+    header_read = False
+    # Tag to search
+    start_tag = f'<{tag}'
+    tag_identified = False
+    line = True
+    with open(xml_file) as f:
+         # Read until tag is identified
+         while line and not tag_identified:
+              line = f.readline()
+              # Skip <header> section
+              if end_header_tag in line:
+                   header_read = True
+              if header_read:
+                   if start_tag in line:
+                        tag_identified = True
+    return tag_identified
+
+
+def is_attribute(xml_file, tag, attribute):
+    """
+    Search for 'tag' + 'attribute' in OSIS file,
+    returns True if 'tag' + 'attribute' exists, False otherwise
+    """
+    # Start searching after <header> tag to avoid confusion
+    end_header_tag = '</header>'
+    header_read = False
+    # Start and end tags defining the element that may have 'attribute'
+    start_tag = f'<{tag}'
+    end_tag = f'</{tag}>'
+    element = ''
+    start_tag_identified = False
+    attribute_identified = False
+    line = True
+    with open(xml_file) as f:
+         # Read lines until attribute is identified
+         while line and not attribute_identified:
+              line = f.readline()
+              # Skip <header> section
+              if end_header_tag in line:
+                   header_read = True
+              if header_read:
+                   # Search for tag
+                   if start_tag in line:
+                        start_tag_identified = True
+                   if start_tag_identified:
+                        # Read elemnt
+                        element += line
+                   if end_tag in line:
+                        element += line
+                        start_tag_identified = False
+                        # Search for attribute
+                        if attribute in line:
+                            attribute_identified = True
+                        element = ''
+    return attribute_identified
+
+
+def is_diacritic(xml_file, lang, diacritic):
+     '''
+     Search for 'diacritic' in OSIS File
+     Returns True or False
+     '''
+     # Don't search OSIS targetting other languages than Hebrew, Greek, Arabic
+     if not lang in ('ar','grc','he','hbo'):
+          return False
+     elif not lang in 'ar' and diacritic == 'Arabic Vowel Points':
+          return False
+     elif not lang in 'grc' and diacritic == 'Greek Accents':
+          return False
+     elif not lang in ('he','hbo') and diacritic == 'Hebrew Cantillation':
+          return False
+     elif not lang in ('he','hbo') and diacritic == 'Hebrew Vowel Points':
+          return False
+     else:
+          # Grab the base SWORD manager
+          mgr = Sword.SWMgr()
+          mgr.setGlobalOption("Arabic Vowel Points", "Off");
+          mgr.setGlobalOption("Greek Accents", "Off");
+          mgr.setGlobalOption("Hebrew Cantillation", "Off");
+          mgr.setGlobalOption("Hebrew Vowel Points", "Off");
+
+          # Parse XML
+          xml_text = ET.parse(xml_file)
+          xml_root = xml_text.getroot()
+
+          # Remove all tags and keep bare text only, make 2 sets
+          strip_text = ET.tostring(xml_root, encoding='unicode', method='text')
+          ref_text = Sword.SWBuf(strip_text)
+          mod_text = Sword.SWBuf(strip_text)
+
+          # Apply filter on 1 text
+          mgr.filterText(diacritic, mod_text)
+
+          # Compare original bare text and filtered one
+          # return True is the filter has made changes to the text, False otherwise
+          return(ref_text.c_str() != mod_text.c_str())
+
+
+def build_doc(conf):
+     '''
+     Generate conf file
+     '''
+     # Module Name
+     module = conf['osisIDWork']
+     doc.append("[" + module + "]")
+
+     # Module Type
+     moduletype = conf['osisRefWork']
+     # Parameters related to moduletype
+     # Big entries
+     size = conf['size']
+     block = '4' if size == '4' else ''
+     # mod
+     mod = module.lower()
+     # ModDrv + Datapath
+     if moduletype.lower() in 'bible':
+          doc.append("ModDrv=zText" + block)
+          doc.append("DataPath=./modules/texts/ztext" + block + "/" + mod + "/")
+     if moduletype.lower() in 'commentary':
+          doc.append("ModDrv=zCom" + block)
+          doc.append("DataPath=./modules/comments/zcom" + block + "/" + mod + "/")
+     if moduletype.lower() in 'genbook':
+          doc.append("ModDrv=RawGenBook" + block)
+          doc.append("DataPath=./modules/genbook/rawgenbook/" + block + "/" + mod + "/" + mod)
+
+     # Compression
+     if moduletype.lower() in ['bible','commentary']:
+          doc.append('CompressType=ZIP')
+
+     # misc.
+     doc.append('BlockType=BOOK')
+     doc.append('Encoding=UTF-8')
+     doc.append('SourceType=OSIS')
+     doc.append('OSISVersion=2.1.1')
+     doc.append('SwordVersionDate=' + str(date.today()))
+
+     # Language
+     lang = conf['xml:lang']
+     doc.append('Lang=' + lang)
+
+     # GlobalOptionFilter
+     # Get Osis file name
+     osis = conf['osis']
+     # We should have Footnotes before Headings on order to have
+     # working notes in titles
+     # Footnotes
+     if is_tag(osis, 'note'):
+          doc.append('GlobalOptionFilter=OSISFootnotes')
+     # Headings
+     if is_tag(osis, 'title'):
+          doc.append('GlobalOptionFilter=OSISHeadings')
+     # Scripref
+     if is_tag(osis, 'reference'):
+          doc.append('GlobalOptionFilter=OSISScripref')
+     # RedLetterWords
+     if is_tag(osis, 'q '):
+          doc.append('GlobalOptionFilter=OSISRedLetterWords')
+     # Variants
+     variants = False
+     if is_attribute(osis, 'seg', ' type="x-variant"'):
+          variants = True
+     if is_tag(osis, 'rdg'):
+          variants = True
+     if variants:
+          doc.append('GlobalOptionFilter=OSISVariants')
+     # MorphSegmentaton
+     osisMorphSegmentation = False
+     if is_attribute(osis, 'seg', 'type="morph"'):
+          osisMorphSegmentation = True
+     if is_attribute(osis, 'seg', 'type="x-morph"'):
+          osisMorphSegmentation = True
+     if osisMorphSegmentation:
+          doc.append('GlobalOptionFilter=OSISMorphSegmentation')
+     # Lemma
+     if is_attribute(osis, 'w', ' lemma='):
+          doc.append('GlobalOptionFilter=OSISLemma')
+     # Strong
+     strong = is_attribute(osis, 'w', 'strong:')
+     if strong:
+          doc.append('GlobalOptionFilter=OSISStrongs')
+     # Glosses
+     if is_attribute(osis, 'w', ' gloss='):
+          doc.append('GlobalOptionFilter=OSISGlosses')
+     # Morph
+     if is_attribute(osis, 'w', ' morph='):
+          doc.append('GlobalOptionFilter=OSISMorph')
+     # Enum
+     if is_attribute(osis, 'w', ' n='):
+          doc.append('GlobalOptionFilter=OSISEnum')
+     # Xlit
+     if is_attribute(osis, 'w', ' xlit='):
+          doc.append('GlobalOptionFilter=OSISXlit')
+
+     # Diacritics
+     # Hebrew Vowel Points
+     if is_diacritic(osis, lang, 'Hebrew Vowel Points'):
+          doc.append('GlobalOptionFilter=UTF8HebrewPoints')
+     # Arabic Vowel Points
+     if is_diacritic(osis, lang, 'Arabic Vowel Points'):
+          doc.append('GlobalOptionFilter=UTF8ArabicPoints')
+     # Hebrew Cantillation
+     if is_diacritic(osis, lang, 'Hebrew Cantillation'):
+          doc.append('GlobalOptionFilter=UTF8Cantillation')
+     # Greek Accents
+     if is_diacritic(osis, lang, 'Greek Accents'):
+          doc.append('GlobalOptionFilter=UTF8GreekAccents ')
+
+     # Features
+     # StrongsNumbers
+     if strong:
+          doc.append('Feature=StrongsNumbers')
+     # Images
+     if is_tag(osis, 'figure '):
+          doc.append('Feature=Images')
+     # NoParagraphs
+     if not is_tag(osis, 'p '):
+          doc.append('Feature=NoParagraphs')
+
+     # LCSH
+     lang_name = get_language(lang)
+     if moduletype.lower() in ['bible','commentary']:
+          doc.append('LCSH=' + moduletype + '.' + lang_name)
+
+     # Sword Minimum Version
+     doc.append('MinimumVersion=' + versification[conf['v11n']])
+     if moduletype.lower() in ['bible','commentary']:
+          doc.append('Versification=' + conf['v11n'])
+     return True
+
+
+def include_file(conf):
+     '''
+     Include conf.in file if it exists
+     '''
+     # Get conf.in file if it exists
+     infile = conf['infile']
+     if infile:
+          # Read and include conf.in contents
+          with open(infile, 'r', encoding='utf-8', newline='\n') as f:
+               for line in f:
+                    doc.append(line.rstrip())
+     else:
+          # No conf.in file -> generate default values
+          module = conf['osisIDWork']
+          moduletype = conf['osisRefWork']
+          language = get_language(conf['xml:lang'])
+          doc.append('DistributionLicense=Copyrighted')
+          doc.append(f'Description={module}, {moduletype} in {language}')
+          doc.append(f'About={module}, {moduletype} in {language}')
+          doc.append('Version=1.0')
+          doc.append('History_1.0=First release')
+     return True
+
+
+def print_out(conf, doc):
+     '''
+     Print generated conf file
+     '''
+     # Get conf file name
+     outfile = conf['outfile']
+     if not outfile:
+          # Default to screen
+          for element in doc:
+               print(element)
+     else:
+          # Write config to file
+          with open(outfile, 'w') as f:
+               for element in doc:
+                    print(element, file=f)
+     return True
+
+
+def main():
+     '''
+     Main function
+     '''
+     # Start benchmark
+     start_time = time.perf_counter()
+
+     # Read CLI params
+     params = get_parameters()
+     check_parameters(params)
+
+     # Read OSIS attributes
+     osis_attributes = (get_osistext(params['osis']))
+     #print(osis_attributes)
+     check_osistext(osis_attributes)
+
+     # Generate conf
+     cf = {**params, **osis_attributes}
+     build_doc(cf)
+     include_file(cf)
+     print_out(cf, doc)
+
+     # Benchmark results
+     end_time = time.perf_counter()
+     total_time = round(end_time - start_time, 1)
+     print(f'-- Module Config generated in {total_time} s')
+
+     return True
+
+
+main()


Property changes on: trunk/modules/conf/confmaker.py
___________________________________________________________________
Added: svn:executable
## -0,0 +1 ##
+*
\ No newline at end of property


More information about the sword-cvs mailing list