[sword-svn] r175 - trunk/versification
chrislit at crosswire.org
chrislit at crosswire.org
Sat Mar 14 19:23:29 MST 2009
Author: chrislit
Date: 2009-03-14 19:23:28 -0700 (Sat, 14 Mar 2009)
New Revision: 175
Added:
trunk/versification/makeabbrevs.pl
Log:
generate a builtin_abbrevs from XML canon definitions
Added: trunk/versification/makeabbrevs.pl
===================================================================
--- trunk/versification/makeabbrevs.pl (rev 0)
+++ trunk/versification/makeabbrevs.pl 2009-03-15 02:23:28 UTC (rev 175)
@@ -0,0 +1,72 @@
+#!/usr/bin/perl
+
+# @canons will contain this list of files, these are in a basic XML format.
+# Each file lists osisIDs along with the English names associated with the
+# osisID. These aren't exhaustive, and may or may not overlap (but hopefully
+# don't). We are only using these to load mappings from osisIDs.
+ at canons = (
+ "canon.bible.xml", # the Bible, broadly defined
+# "canon.af.xml", # Apostolic Fathers
+# "canon.otp.xml", # OT pseudepigrapha
+# "canon.nta.xml", # NT apocrypha
+# "canon.lds.xml", # Mormon books
+# "canon.naghammadi.xml", # Nag Hammadi Library
+# "canon.qumran.xml", # Qumran mss
+# "canon.classical.xml", # intended for classical works, currently just Josephus
+);
+
+ at abbrevsQueue = ();
+
+foreach $mapfile (@canons) {
+ open MAP, "$mapfile";
+ while (<MAP>) {
+ $line = $_;
+
+ $line =~ s/<!\-\-.+?\-\->//g;
+ $line =~ s/\&/\&/g;
+
+ if ($line =~ /<id>(.+?)<\/id>/) {
+ $id = $1;
+ $osis{lc($id)} = $id;
+ push @abbrevsQueue, "$id"
+ }
+ elsif ($line =~ /<name>(.+?)<\/name>/) {
+ $name = $1;
+ if ($osis{lc($name)} eq "") {
+ $osis{lc($name)} = $id;
+ push @abbrevsQueue, "$name"
+ }
+ else {
+ if ($warn) {
+ print "ERROR: Duplicate mapping from $id found in $mapfile.\n";
+ }
+ }
+
+ if ($idmap{$id} eq "") {
+ $idmap{$id} = $name;
+ }
+ else {
+ # Duplicates most likely indicate alternate names, so ignore them.
+ if ($warn) {
+ print "ERROR: Duplicate mapping from $id found in $mapfile.\n";
+ }
+ }
+ }
+ }
+ close (MAP);
+}
+
+$abbrevs = "/******************************************************************************\n * Abbreviations - MUST be in alphabetical order & by PRIORITY\n * RULE: first match of entire key\n * (e.g. key: \"1CH\"; match: \"1CHRONICLES\")\n */\n\nconst struct abbrev builtin_abbrevs\[\] = {\n";
+
+ at abbrevsQueue = sort @abbrevsQueue;
+foreach $a (@abbrevsQueue) {
+ if ($a =~ /^.+\d/) {
+ $abbrevs .= "//";
+ }
+ $abbrevs .= " {\"" . uc($a) . "\", \"" . $osis{lc($a)} . "\"},\t\t//" . $idmap{$osis{lc($a)}} . "\n";
+}
+$abbrevs .= " {\"\", \"\"}\n};\n\n\n";
+
+open OUTF, ">builtin_abbrevs.h";
+print OUTF $abbrevs;
+close OUTF;
More information about the sword-cvs
mailing list