[sword-svn] r81 - in trunk/modules: . perlconverters
chrislit at www.crosswire.org
chrislit at www.crosswire.org
Mon Apr 23 03:27:36 MST 2007
Author: chrislit
Date: 2007-04-23 03:27:36 -0700 (Mon, 23 Apr 2007)
New Revision: 81
Added:
trunk/modules/perlconverters/
trunk/modules/perlconverters/unb2osis.pl
trunk/modules/perlconverters/usfm2osis.pl
trunk/modules/perlconverters/zef2osis.pl
Log:
Some new Perl conversion scripts targetting OSIS
(working, but not necessarily very well)
Added: trunk/modules/perlconverters/unb2osis.pl
===================================================================
--- trunk/modules/perlconverters/unb2osis.pl (rev 0)
+++ trunk/modules/perlconverters/unb2osis.pl 2007-04-23 10:27:36 UTC (rev 81)
@@ -0,0 +1,157 @@
+#!/usr/bin/perl
+
+## Unbound Bible database to OSIS (2.1.1) converter
+
+## Licensed under the standard BSD license:
+
+# Copyright (c) 2007 CrossWire Bible Society <http://www.crosswire.org/>
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of the CrossWire Bible Society nor the names of
+# its contributors may be used to endorse or promote products
+# derived from this software without specific prior written
+# permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+# IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+## For general inquiries, comments, suggestions, bug reports, etc. email:
+## sword-support at crosswire.org
+
+#########################################################################
+
+$version = "1.0";
+$date = "2007-04-20";
+$osisVersion = "2.1.1";
+
+ at OSISbook = (
+#OT
+ "Gen", "Exod", "Lev", "Num", "Deut", "Josh", "Judg", "Ruth", "1Sam", "2Sam", "1Kgs", "2Kgs", "1Chr", "2Chr", "Ezra", "Neh", "Esth", "Job", "Ps", "Prov", "Eccl", "Song", "Isa", "Jer", "Lam", "Ezek", "Dan", "Hos", "Joel", "Amos", "Obad", "Jonah", "Mic", "Nah", "Hab", "Zeph", "Hag", "Zech", "Mal",
+
+#NT
+"Matt", "Mark", "Luke", "John", "Acts", "Rom", "1Cor", "2Cor", "Gal", "Eph", "Phil", "Col", "1Thess", "2Thess", "1Tim", "2Tim", "Titus", "Phlm", "Heb", "Jas", "1Pet", "2Pet", "1John", "2John", "3John", "Jude", "Rev",
+
+#Apocrypha
+"Tob", "Jdt", "AddEsth", "Wis", "Sir", "Bar", "EpJer", "PrAzar", "Sus", "Bel", "1Macc", "2Macc", "3Macc", "4Macc", "1Esd", "2Esd", "PrMan", "Ps151", "PssSol", "Odes"
+);
+
+if (scalar(@ARGV) < 2) {
+ print "unb2osis.pl -- Unbound Bible format to OSIS $osisVersion converter version $version ($date)\nSyntax: unb2osis.pl <osisWork> <input filename> [-o OSIS-file]\n";
+ exit (-1);
+}
+
+$osisWork = $ARGV[0];
+
+if ($ARGV[2] eq "-o") {
+ $outputFilename = "$ARGV[3];"
+}
+else {
+ $outputFilename = "$osisWork.osis.xml";
+}
+open (OUTF, ">$outputFilename") or die "Could not open file $ARGV[2] for writing.";
+
+print OUTF "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<osis xmlns=\"http://www.bibletechnologies.net/2003/OSIS/namespace\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.bibletechnologies.net/2003/OSIS/namespace http://www.bibletechnologies.net/osisCore.$osisVersion.xsd\">\n<osisText osisRefWork=\"Bible\" xml:lang=\"en\" osisIDWork=\"$osisWork\">\n<header>\n<work osisWork=\"$osisWork\"\/>\n<\/header>\n";
+
+open (INF, $ARGV[1]);
+ at data = <INF>;
+close (INF);
+
+$book = "";
+$chap = "";
+$vers = "";
+
+$i = 10;
+
+foreach $line (@data) {
+ if ($line =~ /^\#/) {
+ $line = "";
+ }
+ elsif ($line =~ /\d+[ONA]\t\d+\t\d+\t(\d+)[ONA]\t(\d+)\t(\d+)\t(.*\t)(\d+)\t(.+)/) {
+ $line = sprintf("%08d\t$1\t$2\t$3\t$4$6", $5);
+ }
+ elsif ($line =~ /(\d+)[ONA]\t(\d+)\t(\d+)\t(.*\t)(\d+)\t(.+)/) {
+ $line = sprintf("%08d\t$1\t$2\t$3\t$4$6", $5);
+ }
+ elsif ($line =~ /(\d+)[ONA]\t(\d+)\t(\d+)\t(.+)/) {
+ $line = sprintf("%08d\t$1\t$2\t$3\t\t$4", $i+10);
+ }
+ else {
+ print "Error on line: $line\n";
+ }
+}
+
+ at data = sort @data;
+#print @data;
+
+foreach $line (@data) {
+ if ($line =~ /(\d+)\t(\d+)\t(\d+)\t(\d+)\t(.*\t)(.+)/) {
+
+ $ord = $1;
+
+ $nBook = $2;
+ $nChap = $3;
+ $vers = $4;
+ $sub = $5;
+
+ $text = $6;
+
+ $sub =~ s/\s*//g;
+ if ($sub ne "") {
+ $vers = "$vers!$sub";
+ }
+
+ $oBook = @OSISbook[$nBook-1];
+ if ($oBook eq "") {
+ print "Error unknown book: $book\n";
+ }
+
+ $text =~ s/\s*$//g;
+
+ if ($book ne $nBook) {
+ if ($book ne "") {
+ print OUTF "<\/chapter>\n";
+ $chap = "";
+ print OUTF "<\/div>\n";
+ }
+ print OUTF "<div type=\"book\" osisID=\"$oBook\">\n";
+ }
+
+ if ($chap ne $nChap) {
+ if ($chap ne "") {
+ print OUTF "<\/chapter>\n";
+ }
+ print OUTF "<chapter osisID=\"$oBook.$nChap\">\n";
+ }
+
+ print OUTF "<verse osisID=\"$oBook.$nChap.$vers\">$text<\/verse>\n";
+
+ $book = $nBook;
+ $chap = $nChap;
+ }
+}
+
+print OUTF "<\/chapter>\n";
+print OUTF "<\/div>\n";
+print OUTF "<\/osisText>\n";
+print OUTF "<\/osis>\n";
+
+close (OUTF);
Added: trunk/modules/perlconverters/usfm2osis.pl
===================================================================
--- trunk/modules/perlconverters/usfm2osis.pl (rev 0)
+++ trunk/modules/perlconverters/usfm2osis.pl 2007-04-23 10:27:36 UTC (rev 81)
@@ -0,0 +1,630 @@
+#!/usr/bin/perl
+
+## USFM to OSIS (2.0) converter
+
+## Licensed under the standard BSD license:
+
+# Copyright (c) 2002,2003,2007 CrossWire Bible Society <http://www.crosswire.org/>
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of the CrossWire Bible Society nor the names of
+# its contributors may be used to endorse or promote products
+# derived from this software without specific prior written
+# permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+# IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+## For general inquiries, comments, suggestions, bug reports, etc. email:
+## sword-support at crosswire.org
+
+#########################################################################
+
+$version = "1.1";
+$date = "2007-04-23";
+$osisVersion = "2.1.1";
+
+%OSISbook = (
+# Theoretically, these are laid out according to <BooksPresent>, but I can really only guess without a spec
+"" => "", "GEN" => "Gen", "EXO" => "Exod", "LEV" => "Lev", "NUM" => "Num",
+ "DEU" => "Deut", "JOS" => "Josh", "JDG" => "Judg", "RUT" => "Ruth",
+ "1SA" => "1Sam", "2SA" => "2Sam", "1KI" => "1Kgs", "2KI" => "2Kgs",
+ "1CH" => "1Chr", "2CH" => "2Chr", "EZR" => "Ezra", "NEH" => "Neh",
+ "EST" => "Esth", "JOB" => "Job", "PSA" => "Ps", "PRO" => "Prov",
+ "ECC" => "Eccl", "SNG" => "Song", "ISA" => "Isa", "JER" => "Jer",
+ "LAM" => "Lam", "EZK" => "Ezek", "DAN" => "Dan", "HOS" => "Hos",
+ "JOL" => "Joel", "AMO" => "Amos", "OBA" => "Obad", "JON" => "Jonah",
+ "MIC" => "Mic", "NAM" => "Nah", "HAB" => "Hab", "ZEP" => "Zeph",
+ "HAG" => "Hag", "ZEC" => "Zech", "MAL" => "Mal", "MAT" => "Matt",
+ "MRK" => "Mark", "LUK" => "Luke", "JHN" => "John", "ACT" => "Acts",
+ "ROM" => "Rom", "1CO" => "1Cor", "2CO" => "2Cor", "GAL" => "Gal",
+ "EPH" => "Eph", "PHP" => "Phil", "COL" => "Col", "1TH" => "1Thess",
+ "2TH" => "2Thess", "1TI" => "1Tim", "2TI" => "2Tim", "TIT" => "Titus",
+ "PHM" => "Phlm", "HEB" => "Heb", "JAS" => "Jas", "1PE" => "1Pet",
+ "2PE" => "2Pet", "1JN" => "1John", "2JN" => "2John", "3JN" => "3John",
+ "JUD" => "Jude", "REV" => "Rev", "TOB" => "Tob", "JDT" => "Jdt",
+ "ESG" => "Esth", "WIS" => "Wis", "SIR" => "Sir", "BAR" => "Bar",
+ "LJE" => "EpJer", "S3Y" => "PrAzar", "SUS" => "Sus", "BEL" => "Bel",
+ "1MA" => "1Macc", "2MA" => "2Macc", "3MA" => "3Macc", "4MA" => "4Macc",
+ "1ES" => "1Esd", "2ES" => "2Esd", "MAN" => "PrMan",
+# Following this is just an uneducated guess
+"PS2" => "Ps151", "ODA" => "Odes", "PSS" => "PssSol", "JSA" => "Josh",
+ "JSB" => "Josh", "TBS" => "Tob", "SST" => "Sus", "DNT" => "Dan",
+ "BLT" => "Bel", "ADE" => "AddEsth"
+);
+
+if (scalar(@ARGV) < 2) {
+ print "usfm2osis.pl -- USFM to OSIS $osisVersion converter version $version ($date)\nSyntax: usfm2osis.pl <osisWork> [-o OSIS-file] <USFM filenames|wildcard>\n";
+ exit (-1);
+}
+
+$osisWork = $ARGV[0];
+
+if ($ARGV[1] eq "-o") {
+ $outputFilename = "$ARGV[2];"
+}
+else {
+ $outputFilename = "$osisWork.osis.xml";
+}
+open (OUTF, ">$outputFilename") or die "Could not open file $ARGV[2] for writing.";
+
+if ($ARGV[1] eq "-o") {
+ for ($i = 3; $i < scalar(@ARGV); $i++) {
+ push(@files, $ARGV[$i]);
+ }
+}
+else {
+ for ($i = 1; $i < scalar(@ARGV); $i++) {
+ push(@files, $ARGV[$i]);
+ }
+}
+
+push (@outdata, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<osis xmlns=\"http://www.bibletechnologies.net/2003/OSIS/namespace\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.bibletechnologies.net/2003/OSIS/namespace osisCore.$osisVersion.xsd\">\n<osisText osisRefWork=\"Bible\" xml:lang=\"en\" osisIDWork=\"$osisWork\">\n<header>\n<work osisWork=\"$osisWork\"\/>\n<\/header>\n");
+
+$tagStack = "<\/osisText><\/osis>";
+$chapClose = "";
+$versClose = "";
+
+sub closeTag {
+ $tag = @_[0];
+
+ if ($tagStack =~ /$tag/) {
+ $tagStack =~ s/(.*?$tag)//;
+ $taglist = $1;
+ $taglist =~ s/>/>\n/g;
+ $taglist =~ s/(<\/\w+)\s+[^>]+>/$1>/g;
+ return $taglist;
+ }
+ else {
+ return:
+ }
+}
+
+sub openTag {
+ $tag = @_[0];
+ $tagStack = $tag . $tagStack;
+ return;
+}
+
+foreach $file (@files) {
+ @filedata = `uconv -f windows-1252 -t utf-8 $file`;
+
+ $ollevel = 0;
+ $vers = 0;
+ $chap = 0;
+ $book = "";
+
+ #encoding stuff
+ for ($i = 0; $i < scalar(@filedata); $i++) {
+ $line = @filedata[$i];
+ $line =~ s/[\r\n]//g;
+
+ ### Basic XML entity encoding
+
+ $line =~ s/&(?![a-zA-Z0-9])/&/g;
+ $line =~ s/<< ?/\@/g;
+ $line =~ s/>>/\#/g;
+ $line =~ s/</\$/g;
+ $line =~ s/>/\%/g;
+
+ $line =~ s/(\w)\'(\w)/$1ʼ$2/g;
+ $line =~ s/\\fr 1\/2 \\fr\*/½/g;
+
+ @filedata[$i] = $line;
+ }
+
+ for ($i = 0; $i < scalar(@filedata); $i++) {
+ $line = @filedata[$i];
+
+ $line =~ s/LORD/<divineName>Lord<\/divineName>/g;
+
+ ### File Identification
+
+ # \id (book marker)
+ if ($line =~ /^\\id\b\s*([^ ]*)/) {
+ $book = $OSISbook{$1};
+ $chap = 0;
+ if ($chapClose =~ "<verse") {
+ push (@outdata, $verseClose); # close verse
+ $verseClose = "";
+ }
+ if ($chapClose =~ "<chapter") {
+ push (@outdata, $chapClose); # close chapter
+ $chapClose = "";
+ }
+ push (@outdata, closeTag("<\/div type=\"book\">")); #close book
+ if ($book eq "") {
+ $book = "UnknownUSFMBook";
+ }
+ push (@outdata, "<div type=\"book\" osisID=\"$book\">\n"); # open current book
+ openTag("<\/div type=\"book\">");
+ $line = "";
+ }
+
+ # \h (running header--discard)
+ if ($line =~ /^\\h\b/) {
+ $line = "";
+ }
+
+ # \cc (concistent changes script--discard)
+ if ($line =~ /^\\cc\b/) {
+ $line = "";
+ }
+
+
+ ### Introduction
+
+ # \it title
+ if ($line =~ /^\\it\b\s*(.*)/) {
+ $line = "<div type=\"introduction\">\n<title>$1<\/title>";
+ openTag("<\/div>");
+ }
+
+ # \is introduction section title
+ if ($line =~ /^\\is(\d*)\b\s*(.*)/) {
+ $level = $1;
+ if ($level eq "") {
+ $level = "1";
+ }
+ $line = "<div type=\"section\"><title>$2<\/title>";
+ openTag("<\/div>");
+ }
+
+ # \iot introduction outline title
+ if ($line =~ /^\\iot\b\s*(.*)/) {
+ $line = "<div type=\"outline\">\n<title>$1<\/title>";
+ }
+
+ # \io\d+ introduction outline item
+ if ($line =~ /^\\io(\d+)\b\s*(.*)/) {
+ if ($ollevel == $1) {
+ $line = "<item>$2<\/item>";
+ }
+ elsif ($ollevel > $1) {
+ $line = "";
+ while ($ollevel > $1) {
+ $line .= "<\/list>";
+ $ollevel--;
+ }
+ $line .= "<item>$2<\/item>";
+ }
+ elsif ($ollevel < $1) {
+ $line = "";
+ while ($ollevel < $1) {
+ $line .= "<list>\n";
+ $ollevel++;
+ }
+ $line .= "<item>$2<\/item>\n";
+ }
+
+ if (@filedata[$i+1] !~ /^\\io/) {
+ while ($ollevel > 0) {
+ $line .= "\n<\/list>";
+ $ollevel--;
+ }
+ if ($ollevel == 0) {
+ $line .= "\n<\/div>";
+ }
+ }
+ }
+
+ # \ip introduction paragraph
+ if ($line =~ /^\\ip\b\s*(.*)/) {
+ $line = "<p>$1<\/p>";
+ }
+
+
+ ### Chapters and Verses
+
+ # \c chapter
+ if ($line =~ /^\\c\b\s*([^ ]*)/) {
+ if ($1 ne "") {
+ $chap = $1;
+ }
+ else {
+ $chap++;
+ }
+
+ push (@outdata, $versClose);
+ $versClose = "";
+ push (@outdata, closeTag("<\/p>"));
+ if ($chapClose =~ "<chapter") {
+ push (@outdata, $chapClose); # close previous chapter
+ $chapClose = "";
+ } else {
+ push (@outdata, closeTag("<\/div>")); # close introduction div
+ }
+
+ push (@outdata, "<chapter sID=\"$book.$chap\" osisID=\"$book.$chap\"\/>\n");
+ $chapClose = "<chapter eID=\"$book.$chap\"\/>\n";
+ $line =~ s/\\c\b\s*([^ ]*)//;
+ }
+
+ # \d majorSection
+ if ($line =~ /^\\d\b\s*(.+)/) {
+ push (@outdata, closeTag("<\/p>"));
+ push (@outdata, closeTag("<\/div type=\"majorSection\">"));
+ push (@outdata, "<div type=\"majorSection\">\n");
+ openTag("<\/div type=\"majorSection\">");
+ $line =~ s/\\d\b\s*(.+)/<title>$1<\/title>/;
+ }
+
+ # \s section
+ if ($line =~ /^\\s\b\s*(.+)/) {
+ push (@outdata, closeTag("<\/p>"));
+ push (@outdata, closeTag("<\/div type=\"section\">"));
+ push (@outdata, "<div type=\"section\">\n");
+ openTag("<\/div type=\"section\">");
+ $line =~ s/\\s\b\s*(.+)/<title>$1<\/title>/;
+ if ($line =~ /HEBREW TITLE/) {
+ $line =~ s/<title>/<title type=\"psalm\">/;
+ }
+ }
+
+ # \ss subSection
+ if ($line =~ /^\\ss\b\s*(.+)/) {
+ $line =~ s/\\ss\b\s*(.+)/<title>$1<\/title>/;
+ }
+
+ # \sss x-subsubSection
+ if ($line =~ /^\\sss\b\s*(.+)/) {
+ push (@outdata, closeTag("<\/p>"));
+ push (@outdata, closeTag("<\/div type=\"x=subSubSection\">"));
+ push (@outdata, "<div type=\"x-subSubSection\">\n");
+ openTag("<\/div type=\"x-subSubSection\">");
+ $line =~ s/\\sss\b\s*(.+)/<title>$1<\/title>/;
+ }
+
+ # \p paragraph
+ if ($line =~ /^\\p\b\s*/) {
+ push (@outdata, closeTag("<\/p>"));
+ push (@outdata, "<p>\n");
+ openTag("<\/p>");
+ $line =~ s/\\p\b\s*//;
+ }
+
+ # \v verse
+ if ($line =~ /^\\v\b\s*(\d[^\\ ]*)?/) {
+ if ($1 ne "") {
+ $vers = $1;
+ }
+ else {
+ $vers++;
+ }
+
+ push (@outdata, $versClose);
+ $versClose = "";
+
+ if ($vers =~ /(\d+[^\\\- ]*)\-(\d+[^\\ ]*)/) {
+ $vF = $1;
+ $vT = $2;
+ $vF =~ /^(\d+)/;
+ $vFn = scalar($1);
+ $vT =~ /^(\d+)/;
+ $vTn = scalar($1);
+ $osisID = "$book.$chap.$vF";
+ if ($vTn > $vFn && $vFn > 0) {
+ for ($j = $vFn + 1; $j < $vTn; $j++) {
+ $osisID .=" $book.$chap.$j";
+ }
+ }
+ $osisID .= " $book.$chap.$vT";
+ }
+ else {
+ $osisID = "$book.$chap.$vers";
+ }
+ push (@outdata, "<verse sID=\"$osisID\" osisID=\"$osisID\"\/>\n");
+ $versClose = "<verse eID=\"$osisID\"\/>\n";
+ $line =~ s/\\v\b\s*(\d[^\\ ]*)? *//;
+ }
+
+ ## Notes
+
+ # \f note
+ for ($j = 2; $j > 0; $j--) {
+ if ($line =~ /\\f\b\s*([^\s]+)\s*\\rf\s*([^\\]+)\\rf\*\s*/) {
+ $nVal = $1;
+ $scopeVal = $2;
+
+ $scopeVal =~ s/://g;
+ $scopeVal = "$book.$scopeVal";
+ $scopeVal =~ s/(\d+)\.(\d[^\,]+)\,\s*(\d.+)/$1.$2 $book.$1.$3/;
+ $scopeVal =~ s/(\d+)\.(\d[^\-]+)\-+\s*(\d.+)/$1.$2\-$book.$1.$3/;
+
+ $line =~ s/\\f\b\s*([^\s]+)\s*\\rf\s*([^\\]+)\\rf\*\s*/<note n=\"$nVal\" annotateRef=\"$scopeVal\">/;
+ $line =~ s/(<note [^>]+>)([A-Z][^a-z:]*?):/$1<catchWord>$2<\/catchWord>/g;
+ }
+ }
+ # \f hebrew title note
+ if ($line =~ /\\f\b\s*([^\s]+)\s*HEBREW TITLE:\s*/) {
+ $nVal = $1;
+ $line =~ s/\\f\b\s*([^\s]+)\s*HEBREW TITLE:\s*/<note n=\"$nVal\">/;
+ }
+ # \f spare notes
+ if ($line =~ /\\f\b\s*([^\s]+)\s*\*\s*/) {
+ $nVal = $1;
+ $line =~ s/\\f\b\s*([^\s]+)\s*\*\s*/<note n=\"$nVal\">/;
+ $line =~ s/(<note [^>]+>)([A-Z][^a-z:]*?):/$1<catchWord>$2<\/catchWord>/g;
+ }
+
+ # \x crossReference
+ for ($j = 2; $j > 0; $j--) {
+ if ($line =~ /\\x\b\s*\\rf\s*([^\\]+)\\rf\*\s*/) {
+ $scopeVal = $1;
+
+ $scopeVal =~ s/://g;
+ $scopeVal = "$book.$scopeVal";
+ $scopeVal =~ s/(\d+)\.(\d[^\,]*)\,\s*(\d.*)/$1.$2 $book.$1.$3/;
+ $scopeVal =~ s/(\d+)\.(\d[^\-]*)\-+\s*(\d.*)/$1.$2\-$book.$1.$3/;
+
+ $line =~ s/\\x\b\s*\\rf\s*([^\\]+)\\rf\*\s*/<note type=\"crossReference\" annotateRef=\"$scopeVal\">/;
+ $line =~ s/\[2\]\s*([^\[]+?)(\s*\[1\]|\\x\*)/<seg type=\"x-dc\">$1<\/seg>$2/g;
+ $line =~ s/\[2\]\s*([^\[]+?)$/<seg type=\"x-dc\">$1<\/seg>/g;
+ $line =~ s/\s*\[1\]//g;
+ if ($line =~ /<note type=\"crossReference\" annotateRef=\"[^\"]+?\">\\bw/) {
+ $line =~ s/(<note type=\"crossReference\" annotateRef=\"[^\"]+?\")>\\bw (.+?) \\bw\* /$1 n=\"$2\">/;
+ $pa = $1;
+ $line =~ s/\\bw (.+?) \\bw\* /<\/note>\n$pa n=\"$1\">/g;
+ }
+ if ($line =~ /<\/seg> \\bw/) {
+ $line =~ /(<note type=\"crossReference\" annotateRef=\"[^\"]+?\")>/;
+ $pa = $1;
+ $line =~ s/\s*\\bw (.+?) \\bw\* /<\/note>\n$pa n=\"$1\">/g;
+ }
+ }
+
+ }
+ # \x hebrew title crossReference
+ if ($line =~ /\\x\b\s*Title:\s*/) {
+ $nVal = $1;
+ $line =~ s/\\x\b\s*Title:\s*/<note type=\"crossReference\">/;
+ }
+
+
+ # \[fx]* note/crossReference closers
+ if ($line =~ /\\[fx]\*/) {
+ $line =~ s/\\[fx]\*/<\/note>/g;
+ }
+
+
+ ## Poetry
+
+ # \q line
+ if ($line =~ /^\\q/) {
+ if ($l != 1) {
+ push (@outdata, "<lg>\n");
+ $l = 1;
+ }
+ if ($line =~ /\\q(c|\d*)$/) {
+ if ($1 eq "") {
+ $line = "<l>\n";
+ }
+ elsif ($1 eq "c") {
+ $line = "<l type=\"x-centered\">";
+ }
+ else {
+ $line = "<l level=\"$1\">\n";
+ }
+ @filedata[$i+1] .= "<\/l>";
+ if (@filedata[$i+2] !~ /\\q/) {
+ @filedata[$i+1] .= "\n<\/lg>";
+ $l = 0;
+ }
+ }
+ else {
+ $line =~ s/\\q\b\s*(.+)/<l>$1<\/l>/;
+ $line =~ s/\\q(\d+)\b\s*(.+)/<l level=\"$1\">$2<\/l>/;
+ $line =~ s/\\qc\b\s*(.+)/<l type=\"x-centered\">$1<\/l>/;
+ if (@filedata[$i+1] !~ /\\q/) {
+ $line .= "\n<\/lg>";
+ $l = 0;
+ }
+ }
+ }
+
+
+ ## Tables
+
+ # \th table heading
+ if ($line =~ /^\\t/) {
+ if ($line =~ /^\\th1\b\s*(.*)/) {
+ if ($table != 1) {
+ push (@outdata, "<table>\n");
+ $table = 1;
+ }
+ $line = "<row><cell role=\"label\">$1<\/cell>\n";
+ }
+ elsif ($line =~ /^\\th\d+\b\s*(.*)/) {
+ $line = "<cell role=\"label\">$1<\/cell>\n";
+ }
+ if ($line =~ /^\\tb1\b\s*(.*)/) {
+ if ($table != 1) {
+ push (@outdata, "<table>\n");
+ $table = 1;
+ }
+ else {
+ push (@outdata, "<\/row>");
+ }
+ $line = "<row><cell>$1<\/cell>\n";
+ if (@filedata[$i+1] !~ /\\tb/) {
+ $line .= "<\/row><\/table>\n";
+ $table = 0;
+ }
+ }
+ elsif ($line =~ /^\\tb\d+\b\s*(.*)/) {
+ $line = "<cell>$1<\/cell>\n";
+ if (@filedata[$i+1] !~ /\\tb/) {
+ $line .= "<\/row><\/table>\n";
+ $table = 0;
+ }
+ }
+ }
+
+ ## Other
+
+ # \ls list
+ if ($line =~ /^\\ls\b\s*(.+)/) {
+ if ($ls != 1) {
+ push (@outdata, "<list>\n");
+ $ls = 1;
+ }
+ $line = "<item>$1<\/item>\n";
+ if (@filedata[$i+1] !~ /\\ls/) {
+ $line .= "<\/list>";
+ $ls = 0;
+ }
+ }
+
+ # \mt title
+ if ($line =~ /^\\mt\b\s*(.+)/) {
+ $line = "<title type=\"main\">$1<\/title>";
+ }
+
+ # \st,\st2 title
+ if ($line =~ /^\\st2?\b\s*(.+)/) {
+ $line = "<title type=\"continued\">$1<\/title>";
+ }
+
+ # \st3 title
+ if ($line =~ /^\\st3\b\s*(.+)/) {
+ $line = "<title type=\"sub\">$1<\/title>";
+ }
+
+ # \r sub title
+ if ($line =~ /^\\mr\b\s*(.+)/) {
+ $line = "<title type=\"sub\">$1<\/title>";
+ }
+
+ # \r parallel title
+ if ($line =~ /^\\r\b\s*(.+)/) {
+ $line = "<title type=\"parallel\">$1<\/title>";
+ }
+
+ # \sp speaker
+ if ($line =~ /^\\sp\b\s*(.+)/) {
+ $line = "<speaker>$1<\/speaker>";
+ }
+
+ # \itw italic word
+ $line =~ s/\\itw\b\s*(.*?)\\itw\*/<hi type=\"italic\">$1<\/hi>/g;
+
+ # \n superscripted verse number
+ $line =~ s/\\n\b\s*(.*?)\\n\*\s*/<seg type=\"x-versenum\">$1<\/seg>/g;
+
+ # remove unnecessary tags
+ $line =~ s/\\b\b//;
+ $line =~ s/\\m\b//;
+
+
+ $line =~ s/\\bq\*/<\/p><\/q>/g;
+ $line =~ s/\\bq\b\s*/<q type=\"block\"><p>/g;
+ $line =~ s/\\pp/<\/p><p>/g;
+ $line =~ s/\\in\*/<\/p><\/inscription>/g;
+ $line =~ s/\\in\b\s*/<inscription><p>/g;
+
+
+ if ($line !~ /^\s*$/) {
+ push (@outdata, "$line\n");
+ }
+ }
+
+ close (INF);
+}
+
+push (@outdata, closeTag("<\/osis>"));
+
+for ($i = 0; $i < scalar(@outdata); $i++) {
+ @outdata[$i] =~ s/---/―/g;
+ @outdata[$i] =~ s/--/—/g;
+ @outdata[$i] =~ s/([es]ID=\"[^\" ]+) [^\"]*\"/$1\"/;
+}
+
+for ($i = 0; $i < scalar(@outdata); $i++) {
+ if (@outdata[$i] !~ /^\s*$/) {
+ @outdata[$i] =~ s/[\r\n]+/\n/g;
+ @outdata[$i] =~ s/\n?$/\n/;
+ print OUTF @outdata[$i];
+ }
+}
+
+close (OUTF);
+
+open (INF, "$outputFilename");
+ at filedata = <INF>;
+close (INF);
+open (OUTF, ">$outputFilename");
+for ($i = 0; $i < scalar(@filedata); $i++) {
+ if (@filedata[$i] =~ /^<\// && @filedata[$i-1] =~ /^<chapter.+\/>/) {
+ $temp = @filedata[$i];
+ @filedata[$i] = @filedata[$i-1];
+ @filedata[$i-1] = $temp;
+ $i -= 2;
+ }
+}
+for ($i = 0; $i < scalar(@filedata); $i++) {
+ $fullfile .= @filedata[$i];
+}
+
+$q = 1;
+
+$fullfile =~ s/\$([^\%]+?)\%/"<q level=\"2\" sID=\"q2." . $q . "\"\/>" . $1 . "<q level=\"2\" eID=\"q2." . $q++ . "\"\/>"/eg;
+
+$fullfile =~ s/\$/"<milestone type=\"cQuote\" subType=\"x-level-2\"\/>"/eg;
+
+$q = 1;
+
+while ($fullfile =~ /(\@[^\@\#]+?)\@([^\@\#]+?)\#(([^\@\#]+?\@[^\@\#]+?\#)+[^\@\#]+?\#)/) {
+ $fullfile =~ s/(\@[^\@\#]+?)\@([^\@\#]+?)\#(([^\@\#]+?\@[^\@\#]+?\#)+[^\@\#]+?\#)/$1 . "<q level=\"1\" sID=\"q1." . $q . "\"\/>" . $2 . "<q level=\"1\" eID=\"q1." . $q++ . "\"\/>" . $3/eg;
+}
+while ($fullfile =~ /(\@[^\@\#]+?)\@([^\@\#]+?)\#([^\@\#]+?\#)/) {
+ $fullfile =~ s/(\@[^\@\#]+?)\@([^\@\#]+?)\#([^\@\#]+?\#)/$1 . "<q level=\"1\" sID=\"q1." . $q . "\"\/>" . $2 . "<q level=\"1\" eID=\"q1." . $q++ . "\"\/>" . $3/eg;
+}
+
+$fullfile =~ s/\@([^\#]+?)\#/"<q level=\"1\" sID=\"q1." . $q . "\"\/>" . $1 . "<q level=\"1\" eID=\"q1." . $q++ . "\"\/>"/eg;
+$fullfile =~ s/\@/"<milestone type=\"cQuote\" subType=\"x-level-1\"\/>"/eg;
+
+$fullfile =~ s/\^/"<q level=\"1\" eID=\"q1." . $q++ . ".false\"\/>"/eg;
+
+
+
+print OUTF $fullfile;
+close (OUTF);
Added: trunk/modules/perlconverters/zef2osis.pl
===================================================================
--- trunk/modules/perlconverters/zef2osis.pl (rev 0)
+++ trunk/modules/perlconverters/zef2osis.pl 2007-04-23 10:27:36 UTC (rev 81)
@@ -0,0 +1,179 @@
+#!/usr/bin/perl
+
+## Zefania XML to OSIS (2.1.1) converter
+
+## Licensed under the standard BSD license:
+
+# Copyright (c) 2007 CrossWire Bible Society <http://www.crosswire.org/>
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of the CrossWire Bible Society nor the names of
+# its contributors may be used to endorse or promote products
+# derived from this software without specific prior written
+# permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+# IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+## For general inquiries, comments, suggestions, bug reports, etc. email:
+## sword-support at crosswire.org
+
+#########################################################################
+
+$version = "1.0";
+$date = "2007-04-21";
+$osisVersion = "2.1.1";
+
+ at OSISbook = (
+#OT
+ "Gen", "Exod", "Lev", "Num", "Deut", "Josh", "Judg", "Ruth", "1Sam", "2Sam", "1Kgs", "2Kgs", "1Chr", "2Chr", "Ezra", "Neh", "Esth", "Job", "Ps", "Prov", "Eccl", "Song", "Isa", "Jer", "Lam", "Ezek", "Dan", "Hos", "Joel", "Amos", "Obad", "Jonah", "Mic", "Nah", "Hab", "Zeph", "Hag", "Zech", "Mal",
+
+#NT
+"Matt", "Mark", "Luke", "John", "Acts", "Rom", "1Cor", "2Cor", "Gal", "Eph", "Phil", "Col", "1Thess", "2Thess", "1Tim", "2Tim", "Titus", "Phlm", "Heb", "Jas", "1Pet", "2Pet", "1John", "2John", "3John", "Jude", "Rev",
+
+#Apocrypha
+"Tob", "Jdt", "AddEsth", "Wis", "Sir", "Bar", "EpJer", "PrAzar", "Sus", "Bel", "1Macc", "2Macc", "3Macc", "4Macc", "1Esd", "2Esd", "PrMan", "Ps151", "PssSol", "Odes"
+);
+
+if (scalar(@ARGV) < 2) {
+ print "zef2osis.pl -- Zefania XML to OSIS $osisVersion converter version $version ($date)\nSyntax: zef2osis.pl <osisWork> <input filename> [-o OSIS-file]\n";
+ exit (-1);
+}
+
+$osisWork = $ARGV[0];
+
+if ($ARGV[2] eq "-o") {
+ $outputFilename = "$ARGV[3];"
+}
+else {
+ $outputFilename = "$osisWork.osis.xml";
+}
+open (OUTF, ">$outputFilename") or die "Could not open file $ARGV[2] for writing.";
+
+($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst)=localtime(time);
+$year += 1900;
+$mon++;
+$date = sprintf("%04d\-%02d\-%02d", $year, $mon, $mday);
+
+print OUTF "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<osis xmlns=\"http://www.bibletechnologies.net/2003/OSIS/namespace\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.bibletechnologies.net/2003/OSIS/namespace http://www.bibletechnologies.net/osisCore.$osisVersion.xsd\">\n<osisText osisRefWork=\"Bible\" xml:lang=\"en\" osisIDWork=\"$osisWork\">\n<header>\n<revisionDesc><date>$date<\/date><p>initial OSIS 2.1.1 version<\/p><\/revisionDesc>\n<work osisWork=\"$osisWork\">\n<title><\/title>\n<creator role=\"encoder\">zef2osis.pl by Chris Little www.crosswire.org<\/creator>\n<type type=\"OSIS\">Bible<\/type>\n<identifier type=\"OSIS\">$osisWork<\/identifier>\n<source>Zefania XML<\/source>\n<language type=\"IETF\"><\/language>\n<scope><\/scope>\n<refSystem>Bible<\/refSystem>\n<\/work>\n<\/header>\n";
+
+open (INF, $ARGV[1]);
+ at data = <INF>;
+close (INF);
+
+$book = "";
+$chap = "";
+$vers = "";
+
+$pr = 0;
+
+sub delempty {
+ $iline = @_[1];
+ $tag = @_[0];
+ $iline =~ s/<$tag[^>]*><\/$tag>//g;
+ return $iline;
+}
+
+$q = 0;
+foreach $line (@data) {
+
+ if ($pr == 1) {
+
+ $line =~ s/[\r\n]+/\n/g;
+ $line =~ s/^\s+//;
+ $line =~ s/\s+$//;
+ $line =~ s/¶//g; #delete pilcrows from OLB modules--they're inserted programmatically, not based on the actual text
+ $line =~ s/<(\?|\!--)[^\>]+>//;
+ $line =~ s/<\/XMLBIBLE>//;
+
+ $i = 1;
+ while ($i > 0) {
+ $line = delempty("NOTE", $line);
+ $line = delempty("DIV", $line);
+ $line = delempty("STYLE", $line);
+ $line = delempty("BIBLEBOOK", $line);
+ $line = delempty("CHAPTER", $line);
+ $line = delempty("VERS", $line);
+ $i--;
+ }
+
+ $line =~ s/#FF0000/red/g;
+
+ $line =~ s/<STYLE css=\"color:red\">([^<]+?)<\/STYLE>/<q who="Jesus">$1<\/q>/g;
+ $line =~ s/<STYLE css=\"font-style:italic\;color:red\">([^<]+?)<\/STYLE>/<q who="Jesus"><hi type="italic">$1<\/hi><\/q>/g;
+
+ $line =~ s/\;? ?color:\#[0-9a-fA-F]{6}\;?//g;
+
+ $line =~ s/<STYLE css=\"font\-weight:bold\">(.+?)<\/STYLE>/<hi type="bold">$1<\/hi>/g;
+ $line =~ s/<STYLE css=\"font\-style:italic\">(.+?)<\/STYLE>/<hi type="italic">$1<\/hi>/g;
+ $line =~ s/<STYLE css=\"font\-size: ?x-small\">(.+?)<\/STYLE>/<hi type="x-small">$1<\/hi>/g;
+
+ $line =~ s/<STYLE css=\"color:red\">(.+?)<\/STYLE>/<q who="Jesus">$1<\/q>/g;
+
+ if ($line =~ /<BIBLEBOOK /) {
+ $line =~ s/<BIBLEBOOK .*?bnumber=\"(\d+)\".*?>/<div type="book" osisID="@OSISbook[$1-1]">/;
+ $book = @OSISbook[$1-1];
+ }
+ if ($line =~ /<CHAPTER /) {
+ $line =~ s/<CHAPTER .*?cnumber=\"(\d+)\".*?>/<chapter osisID="$book.$1">/;
+ $chap = $1;
+ }
+
+ $line =~ s/<VERS vnumber=\"0\">(.+?)<\/VERS>/<p>$1<\/p>/g;
+
+ if ($line =~ /<VERS /) {
+ $line =~ s/<VERS .*?vnumber=\"(\d+)\".*?>/<verse osisID="$book.$chap.$1">/;
+ $vers = $1;
+ }
+
+ $line =~ s/<\/BIBLEBOOK>/<\/div>/g;
+ $line =~ s/<\/CHAPTER>/<\/chapter>/g;
+ $line =~ s/<\/VERS>/<\/verse>/g;
+ $line =~ s/<(\/?)CAPTION[^>]*>/<$1title>/g;
+ $line =~ s/<PROLOG[^>]*>/<div type="introduction">/g;
+ $line =~ s/<\/PROLOG>/<\/div>/g;
+
+ $line =~ s/n-studynote/x-studynote/g; #elberfelder hack
+ $line =~ s/<DIV><NOTE type="x-studynote"><DIV><NOTE type="x-studynote">(.+?)<\/NOTE><\/DIV><\/NOTE><\/DIV>/<note>$2<\/note>/g; #elberfelder hack
+ $line =~ s/<DIV><NOTE type="(x-studynote|x-bold)">(.+?)<\/NOTE>([^<]+)<\/DIV>/<note>$2$3<\/note>/g; #elberfelder hack
+ $line =~ s/<DIV> <NOTE type="(x-studynote|x-bold)">(.+?)<\/NOTE><\/DIV>/ <note>$2<\/note>/g; #elberfelder hack
+
+ $line =~ s/<DIV><NOTE type="(x-studynote|x-bold)">(.+?)<\/NOTE><\/DIV>/<note>$2<\/note>/g;
+ $line =~ s/<DIV><NOTE type="(x-studynote|x-bold)">(.+?)<\/NOTE><\/DIV>/<note>$2<\/note>/g;
+
+
+ $line =~ s/ <\/hi>/<\/hi> /g;
+
+ if ($line !~ /^\s*$/) {
+ print OUTF "$line\n";
+ }
+ }
+ if ($line =~ /<\/INFORMATION>/) {
+ $pr = 1;
+ }
+
+}
+
+print OUTF "<\/osisText>\n";
+print OUTF "<\/osis>\n";
+
+close (OUTF);
More information about the sword-cvs
mailing list