[sword-svn] r179 - trunk/versification
chrislit at crosswire.org
chrislit at crosswire.org
Sun Mar 15 20:10:57 MST 2009
Author: chrislit
Date: 2009-03-15 20:10:56 -0700 (Sun, 15 Mar 2009)
New Revision: 179
Modified:
trunk/versification/v11nsys.pl
Log:
added ability to generate v11n system definitions from OSIS files
added option to either count verses/chapters or trust the number of the final verse/chapter (with optional warnings if the numbers differ)
added warning messages
added help text
added progress messages
added (but disabled) ability to generate builtin_abbrevs replacements
added v11n name & book order comments to output .h
sundry bugfixes
Modified: trunk/versification/v11nsys.pl
===================================================================
--- trunk/versification/v11nsys.pl 2009-03-16 02:34:45 UTC (rev 178)
+++ trunk/versification/v11nsys.pl 2009-03-16 03:10:56 UTC (rev 179)
@@ -2,14 +2,10 @@
###############################################################################
#
-# When run without any arguments, this script reads the CCEL files:
-# Bible.*.xml.
-#
-# Otherwise, supply the script with a list (or wildcard) files and it
-# will attempt to create a v11n system definition on that basis.
-#
-# The --vpl switch instructs the script to interpret files as VPL files.
-# The --imp switch instructs the script to interpret files as IMP files.
+# versification (v11n) system .h file generator:
+# processes various file types, including VPL, IMP, OSIS, & CCEL's
+# versification XML files (see http://www.ccel.org/refsys/refsys.html)
+# and generates a C++ header file for use in the Sword project
#
###############################################################################
@@ -23,14 +19,30 @@
}
$osisBook = $osis{lc($lastBook)};
+ if ($count eq "count") {
+ $cval = $cCount;
+ }
+ else {
+ $cval = $lastChap;
+ }
if ($otnt == 0) {
- $otbooks .= "\t{\"$idmap{$osisBook}\", \"$osisBook\", \"$osisBook\", $cCount},\n";
+ $otbooks .= " {\"$idmap{$osisBook}\", \"$osisBook\", \"$osisBook\", $cval},\n";
}
else {
- $ntbooks .= "\t{\"$idmap{$osisBook}\", \"$osisBook\", \"$osisBook\", $cCount},\n";
+ $ntbooks .= " {\"$idmap{$osisBook}\", \"$osisBook\", \"$osisBook\", $cval},\n";
}
+ $bookOrder .= " $osisBook";
+ if ($warn == 1) {
+ if ($cCount ne $lastChap) {
+ print "WARNING: chapter count ($cCount) does not equal last chapter ($lastChap) in book $osisBook of versification $v11n ($infile).\n";
+ }
+ }
}
+sub printUsage() {
+ print " v11nsys.pl --? --warn --(vpl|imp|xml|osis) --(count|last) [files]\n\n When run without any arguments, this script looks for the CCEL files: Bible.*.xml.\n\n Otherwise, supply the script with a list of files (or wildcard) and it will attempt to create a v11n system definition on that basis.\n\n --? prints usage (this).\n\n --warn turns on warning messages.\n\n --vpl instructs the script to interpret files as VPL files.\n --imp instructs the script to interpret files as IMP files.\n --xml instructs the script to interpret files as XML files using CCEL's definition format (default).\n --osis instructs the script to interpret files as OSIS XML files.\n\n --count instructs the script to count chapters/verse it encounters.\n --last instructs the script to assume that the last chapter/verse it encounters is equal to the number of chaptes/verses in a book/chapter (default).\n\n"
+}
+
# @canons will contain this list of files, these are in a basic XML format.
# Each file lists osisIDs along with the English names associated with the
# osisID. These aren't exhaustive, and may or may not overlap (but hopefully
@@ -65,7 +77,9 @@
$osis{lc($name)} = $id;
}
else {
-# print "ERROR: Duplicate mapping from $id found in $mapfile (<abbr>).\n";
+ if ($warn == 1) {
+ print "ERROR: Duplicate mapping from $id found in $mapfile (<abbr>).\n";
+ }
}
}
elsif ($line =~ /<name>(.+?)<\/name>/) {
@@ -75,7 +89,9 @@
$abbrevs{lc($id)} .= "$name;"
}
else {
-# print "ERROR: Duplicate mapping from $id found in $mapfile (<name>).\n";
+ if ($warn == 1) {
+ print "ERROR: Duplicate mapping from $id found in $mapfile (<name>).\n";
+ }
}
if ($idmap{$id} eq "") {
@@ -83,45 +99,77 @@
}
else {
# Duplicates most likely indicate alternate names, so ignore them.
-# print "ERROR: Duplicate mapping from $id found in $mapfile.\n";
+ if ($warn == 1) {
+ print "ERROR: Duplicate mapping from $id found in $mapfile.\n";
+ }
}
}
}
close (MAP);
}
+$mode = "xml";
+$count = "last";
+$warn = 0;
+
if (@ARGV[0] ne "") {
$n = 0;
- $mode = "xml";
while (@ARGV[$n] ne "") {
if (@ARGV[$n] eq "--vpl") {
$mode = "vpl";
+ print "Set interpretation mode to VPL.\n";
}
elsif (@ARGV[$n] eq "--imp") {
$mode = "imp";
+ print "Set interpretation mode to IMP.\n";
}
+ elsif (@ARGV[$n] eq "--osis") {
+ $mode = "osis";
+ print "Set interpretation mode to OSIS.\n";
+ }
elsif (@ARGV[$n] eq "--xml") {
$mode = "xml";
}
+ elsif (@ARGV[$n] eq "--count") {
+ $count = "count";
+ print "Set count mode to last.\n";
+ }
+ elsif (@ARGV[$n] eq "--last") {
+ $count = "last";
+ }
+ elsif (@ARGV[$n] eq "--warn") {
+ $warn = 1;
+ print "Warning messages enabled.\n";
+ }
+ elsif (@ARGV[$n] =~ /^\-+(h|\?|usage)/) {
+ printUsage();
+ exit();
+ }
else {
push @srcfiles, @ARGV[$n];
}
$n++;
}
}
-else {
- opendir (DIR, ".");
- @srcfiles = grep /Bible\.[^\.]+\.xml$/, readdir DIR;
- closedir(DIR);
- $mode = "xml";
+if ($mode eq "xml") {
+ print "Set interpretation mode to CCEL XML.\n";
}
+if ($count eq "last") {
+ print "Set count mode to last.\n";
+}
+if (@srcfiles == 0) {
+ printUsage();
+}
+
foreach $infile (@srcfiles) {
-
if ($infile =~ /^Bible.+xml$/) {
- $infile =~ /^Bible\.([^\.]+)\.xml/;
- $v11n = $1;
- $outfile = lc("v11n$1.h");
+ $infile =~ /^Bible(\.(.+))?\.xml/;
+ $v11n = $2;
+ if ($v11n eq "") {
+ $v11n = "NRSVA";
+ }
+ $outfile = lc("v11n$2.h");
}
else {
$v11n = $infile;
@@ -130,6 +178,8 @@
$outfile = lc("v11n$v11n.h");
}
+ print "Processing $infile --> $outfile (v11n: $v11n).\n";
+
open INF, $infile;
open OUTF, ">$outfile";
@@ -145,31 +195,55 @@
$vCount = 0;
$abbrevs = "/******************************************************************************\n * Abbreviations - MUST be in alphabetical order & by PRIORITY\n * RULE: first match of entire key\n * (e.g. key: \"1CH\"; match: \"1CHRONICLES\")\n */\n\nconst struct abbrev builtin_abbrevs$v11n\[\] = {\n";
- $abbrevsCloser = "\t{\"\", \"\"}\n};\n\n\n";
+ $abbrevsCloser = " {\"\", \"\"}\n};\n\n\n";
$abbrevsList = "";
- $vm = "/******************************************************************************\n * Maximum verses per chapter\n */\n\nint vm$v11n\[\] = {";
- $otbooks = "/******************************************************************************\n * [on]tbooks$v11n - initialize static instance for all canonical text names\n * and chapmax\n */\nstruct sbook otbooks$v11n\[\] = {\n";
- $ntbooks = "struct sbook ntbooks$v11n\[\] = {\n";
- $booksCloser = "\t{\"\", \"\", \"\", 0}\n};\n\n";
+ $vm = "/******************************************************************************\n * Maximum verses per chapter\n */\n\nint vm_$v11n\[\] = {";
+ $otbooks = "/******************************************************************************\n * [on]tbooks_$v11n - initialize static instance for all canonical text names\n * and chapmax\n */\nstruct sbook otbooks_$v11n\[\] = {\n";
+ $ntbooks = "struct sbook ntbooks_$v11n\[\] = {\n";
+ $booksCloser = " {\"\", \"\", \"\", 0}\n};\n\n";
+ $bookOrder = "// Book order:";
+
$otnt = 0; # 0 = ot, 1 = nt
+ if ($mode eq "osis") {
+ while (<INF>) {
+ $line = $_;
+ $line =~ s/<verse([^>]+)(osisID=\"[^\"]+\")/$idList .= "$2\n";/eg;
+ }
+
+ close (INF);
+
+ open (TMP, ">tempfile");
+ print TMP $idList;
+ close (TMP);
+
+ open (INF, "tempfile");
+ }
+
while (<INF>) {
$line = $_;
$osisID = "";
- if ($line =~ /<osisID.+?code=\"(.+?)\"\/>/) {
+ if ($line =~ /<osisID.+?code=\"([^\"]+?)\"\/>/) {
$osisID = $1;
}
- elsif ($mode eq "vpl" && $line =~ /^(.+?) ([0-9A-Za-z]+):([0-9A-Za-z]+)/) {
+ elsif ($mode eq "vpl" && $line =~ /^([^:]+?) ([0-9A-Za-z]+):([0-9A-Za-z]+)/) {
$osisID = "$1.$2.$3";
}
- elsif ($mode eq "imp" && $line =~ /^\$\$\$(.+?) ([0-9A-Za-z]+):([0-9A-Za-z]+)/) {
+ elsif ($mode eq "imp" && $line =~ /^\$\$\$([^:]+?) ([0-9A-Za-z]+):([0-9A-Za-z]+)/) {
$osisID = "$1.$2.$3";
}
-
+ elsif ($mode eq "osis" && $line =~ /osisID=\"([^\"]+)\"/) {
+ $osisID = $1;
+ }
+
+ if ($osisID =~ /\.0(\.|$)/) { # in the case of chap/verse 0
+ $osisID = "";
+ }
+
if ($osisID ne "") {
$lastBook = $thisBook;
$lastChap = $thisChap;
@@ -181,13 +255,23 @@
$thisChap = $2;
$thisVers = $3;
- if ((($thisBook ne $lastBook) ||($thisChap ne $lastChap)) && $lastVers ne "") {
- $vm .= "$vCount, ";
+ if ((($thisBook ne $lastBook) || ($thisChap ne $lastChap)) && $lastVers ne "") {
+ if ($count eq "count") {
+ $vm .= "$vCount, ";
+ }
+ else {
+ $vm .= "$lastVers, ";
+ }
+ if ($warn == 1) {
+ if ($vCount ne $lastVers) {
+ print "WARNING: verse count ($vCount) does not equal last verse ($lastVers) in chapter $osis{lc($lastBook)} $lastChap of versification $v11n ($infile).\n";
+ }
+ }
}
-
+
if ($thisBook ne $lastBook) {
$bCount++;
- $vm .= "\n\t// $idmap{$osis{lc($thisBook)}}\n\t";
+ $vm .= "\n // $idmap{$osis{lc($thisBook)}}\n ";
$abbrevsList .= $abbrevs{lc($osis{lc($thisBook)})};
if ($lastBook ne "") {
@@ -205,7 +289,19 @@
}
}
}
- $vm .= "$vCount\n};\n";
+ if ($count eq "count") {
+ $vm .= "$vCount";
+ }
+ else {
+ $vm .= "$thisVers";
+ }
+ $vm .= "\n};\n";
+ if ($warn == 1) {
+ if ($vCount ne $thisVers) {
+ print "WARNING: verse count ($vCount) does not equal last verse ($thisVers) in chapter $osis{lc($lastBook)} $lastChap of versification $v11n ($infile).\n";
+ }
+ }
+
buildBooksArrays();
$otbooks .= $booksCloser;
$ntbooks .= $booksCloser;
@@ -218,17 +314,23 @@
}
@abbrevsQueue = sort @abbrevsQueue;
foreach $a (@abbrevsQueue) {
- $abbrevs .= "\t{\"" . uc($a) . "\", \"" . $osis{lc($a)} . "\"},\t\t//" . $idmap{$osis{lc($a)}} . "\n";
+ $abbrevs .= " {\"" . uc($a) . "\", \"" . $osis{lc($a)} . "\"},\t\t//" . $idmap{$osis{lc($a)}} . "\n";
}
$abbrevs .= $abbrevsCloser;
+
+
+ print OUTF "// Versification system: $v11n\n";
+ print OUTF "$bookOrder\n\n";
+
print OUTF $otbooks;
print OUTF $ntbooks;
- print OUTF $abbrevs;
+# print OUTF $abbrevs; # line disabled so that we don't print out replacement builtin_abbrev line
print OUTF $vm;
print OUTF "\n\nSWORD_NAMESPACE_END\n\n\n#endif\n";
close (INF);
close (OUTF);
+ unlink("tempfile");
}
More information about the sword-cvs
mailing list