[sword-svn] r56 - trunk/modules/mt-lxx-parallel
mgruner at crosswire.org
mgruner at crosswire.org
Sun Dec 4 14:24:23 MST 2005
Author: mgruner
Date: 2005-12-04 14:24:14 -0700 (Sun, 04 Dec 2005)
New Revision: 56
Modified:
trunk/modules/mt-lxx-parallel/convert.pl
Log:
numerous fixes. output file should be valid OSIS (I hope;)
Modified: trunk/modules/mt-lxx-parallel/convert.pl
===================================================================
--- trunk/modules/mt-lxx-parallel/convert.pl 2005-12-02 11:15:52 UTC (rev 55)
+++ trunk/modules/mt-lxx-parallel/convert.pl 2005-12-04 21:24:14 UTC (rev 56)
@@ -93,33 +93,6 @@
"!" => "!", #occurs in the text!?
"|" => "|", #occurs in the text!?
-# "*A" =>chr(0x0391), #GREEK CAPITAL LETTER ALPHA
-# "*B" =>chr(0x0392), #GREEK CAPITAL LETTER BETA
-# "*G" =>chr(0x0393), #GREEK CAPITAL LETTER GAMMA
-# "*D" =>chr(0x0394), #GREEK CAPITAL LETTER DELTA
-# "*E" =>chr(0x0395), #GREEK CAPITAL LETTER EPSILON
-# "*V" =>chr(0x03DC), #GREEK LETTER DIGAMMA
-# "*Z" =>chr(0x0396), #GREEK CAPITAL LETTER ZETA
-# "*H" =>chr(0x0397), #GREEK CAPITAL LETTER ETA
-# "*Q" =>chr(0x0398), #GREEK CAPITAL LETTER THETA
-# "*I" =>chr(0x0399), #GREEK CAPITAL LETTER IOTA
-# "*K" =>chr(0x039A), #GREEK CAPITAL LETTER KAPPA
-# "*L" =>chr(0x039B), #GREEK CAPITAL LETTER LAMDA
-# "*M" =>chr(0x039C), #GREEK CAPITAL LETTER MU
-# "*N" =>chr(0x039D), #GREEK CAPITAL LETTER NU
-# "*C" =>chr(0x039E), #GREEK CAPITAL LETTER XI
-# "*O" =>chr(0x039F), #GREEK CAPITAL LETTER OMICRON
-# "*P" =>chr(0x03A0), #GREEK CAPITAL LETTER PI
-# "*R" =>chr(0x03A1), #GREEK CAPITAL LETTER RHO
-# "*S" =>chr(0x03A3), #GREEK CAPITAL LETTER SIGMA
-# "*J" =>chr(0x03A3), #GREEK CAPITAL LETTER SIGMA #at end of Word
-# "*T" =>chr(0x03A4), #GREEK CAPITAL LETTER TAU
-# "*U" =>chr(0x03A5), #GREEK CAPITAL LETTER UPSILON
-# "*F" =>chr(0x03A6), #GREEK CAPITAL LETTER PHI
-# "*X" =>chr(0x03A7), #GREEK CAPITAL LETTER CHI
-# "*Y" =>chr(0x03A8), #GREEK CAPITAL LETTER PSI
-# "*W" =>chr(0x03A9), #GREEK CAPITAL LETTER OMEGA
-
"A" =>chr(0x03B1), #GREEK SMALL LETTER ALPHA
"B" =>chr(0x03B2), #GREEK SM LETT BETA / SM LETTER BETA BEGINNING OF WORD
"G" =>chr(0x03B3), #GREEK SMALL LETTER GAMMA
@@ -251,6 +224,7 @@
"=\%p" => "Difference in preposition or particle.",
"=p\%" => "Difference in preposition or particle.",
"=\%p?" => "Difference in preposition or particle?",
+"=\%?p" => "Difference in preposition or particle?",
"=p" => "Difference in preposition or particle.", # TODO: my addition, check, uncertain?
"={d}\%p" => "Difference in preposition or particle.", # TODO: my addition, check, uncertain? DOUBLET?
"=\%pa" => "Difference in preposition or particle.", # TODO: my addition, check
@@ -260,6 +234,7 @@
"=\%p+?" => "Addition of preposition or particle?",
"=\%p-" => "Omission of preposition or particle.",
"=\%p-?" => "Omission of preposition or particle?",
+"=\%?p-" => "Omission of preposition or particle?",
"=p\%-" => "Omission of preposition or particle.", # TODO: my addition, check, uncertain?
"=p-" => "Omission of preposition or particle.", # TODO: my addition, check, uncertain?
"=;" => "Retroversion in col. b based on equivalence occurring in immediate or remote context.",
@@ -311,25 +286,17 @@
);
-sub createNote(){
- my $noteText = shift;
- return("<note type=\"textual\">$noteText</note> ");
-}
-sub openNote(){
- my $noteText = shift;
- return("<note type=\"textual\">$noteText ");
-}
-sub closeNote(){
- my $noteText = shift;
- return("$noteText</note> ");
-}
+sub createNote(){ my $noteText = shift; return("<note type=\"textual\">$noteText</note> "); }
+sub openNote(){ my $noteText = shift; return("<note type=\"textual\">$noteText "); }
+sub closeNote(){ my $noteText = shift; return("$noteText</note> "); }
-
sub translateHebrewNote(){
my $origNote = shift;
# print("TranslateHebrewNote $origNote\n");
+ (not $origNote) and die("Hebrew note empty.");
+
($origNote eq "=") and return; #= only marks colB, no real note
($notes{ $origNote }) and return( &createNote( $notes{$origNote} ) );
@@ -403,7 +370,7 @@
($origNote =~ m/^.+[.].+$/) and #Occurs e.g.: "<gen1.1 ex1.2 lev3.3"
return $origNote;
- ($origNote =~ m/^[?].*/) and
+ ($origNote =~ m/^[?](.*)/) and
return( &createNote( $notes{"?"} ) . &translateHebrewWordorNote( $1 ) );
($origNote =~ m/^(.+),(.+)$/) and # 2 Notes / Words, split up, but only at the end
@@ -438,6 +405,8 @@
# print("TranslateGreekNote $origNote\n");
+ (not $origNote) and die("Greek note empty.");
+
($notes{ $origNote }) and return( &createNote( $notes{$origNote} ) );
($origNote =~ m/^\[(.+)\]?/) and
@@ -540,6 +509,8 @@
sub translateHebrewWordorNote(){ #will return unicode hebrew with morph separation
my $hebrew = shift;
+ if (not $hebrew) { die("Hebrew string empty.") };
+
# print("TranslateHebrew of: $hebrew\n");
$hebrew =~ s/^mn$/.mn/; #Ezek 24:17, error?
@@ -568,6 +539,7 @@
my $greek = shift;
+ if (not $greek) { die("Greek string empty.") };
# printf("TranslateGreek of $greek\n");
( $notes {$greek} ) and return &translateGreekNote( $greek ); # exact match first
@@ -617,22 +589,24 @@
$origLine =~ s/=a\$\/DY/=A\$\/DY/;# TODO: UGLY HACK, Hebrew letter wrong
$origLine =~ s/{\.\.\^EPIQEI\\S\.\.\^E\)FI\/LHSA}/{..^EPIQEI\\S E)FI\/LHSA}/;# TODO: UGLY HACK, strange note
$origLine =~ s/E\t\)KPE\/SH\|/\tE)KPE\/SH|/; #occurs, tab misplaced
-
$origLine =~ s/^\(..r\(L\/YK}/{..r(L\/YK}/; # in EZEK
$origLine =~ s/^DANW {t}$/DANW\t{t}/; # in DAN
-
$origLine =~ s/AI\)W=NOS\[110\.10/AI)W=NOS [110.10/; # in PS
$origLine =~ s/W\/YD\(Y{\*\*}/W\/YD(Y {**}/; # in PS
- $origLine =~ s/{\.1\.dU\(PE\\R}/{..dU(PE\R}/; # in PS
+ $origLine =~ s/{\.1\.dU\(PE\\R}/{..dU(PE\\R}/; # in PS
-
$origLine =~ m/^W\(\/SPER/ and return; #ignore, probably an error
($origLine eq "W/)T H/GRG\$Y ^ =W/)T W/H/)MRY KAI\\ TO\\N AMORRAI=ON ") and
$origLine = "W/)T H/GRG\$Y ^ =W/)T W/H/)MRY\tKAI\\ TO\\N AMORRAI=ON"; # TODO: hack, Tab missing
- ($origLine eq "W/H/KHNYM =W/H/)BNYM .m .kb # KAI\\ OI( LI/QOI ") and
- $origLine = "W/H/KHNYM =W/H/)BNYM .m .kb\tKAI\\ OI( LI/QOI"; # TODO: hack, Tab missing
+
+ ($origLine eq "W/H/KHNYM =W/H/)BNYM .m .kb # KAI\\ OI( LI/QOI ") and # in JoshB: Tab misplaced
+ $origLine = "W/H/KHNYM =W/H/)BNYM .m .kb\tKAI\ OI( LI/QOI"; # TODO: hack, Tab missing
+
+ ($origLine eq "{...?AU)TOU=} MDBR =v\tLALOU=NTOS") and
+ $origLine = "MDBR =v\tLALOU=NTOS"; # In EZEK: TODO: error, greek in first col
+
($origLine eq "W/YC+YRW =;W/YC+YDW .rd <9.12 E)PESITI/SANTO {d} KAI\\ H(TOIMA/SANTO ") and
$origLine = "W/YC+YRW =;W/YC+YDW .rd <9.12\tE)PESITI/SANTO {d} KAI\\ H(TOIMA/SANTO"; # TODO: hack, Tab missing
($origLine eq "W/YBW) {...EI)S}\tKAI\\ EI)SH=LQEN") and
@@ -640,7 +614,7 @@
($origLine eq "W/L) {..^OU)}\tDE\\") and
$origLine = "W/L)\t{..^OU)} DE\\"; # TODO: hack, TAB misplaced
-# printf("parsing %s\n", $origLine);
+# print("parsing %s\n", $origLine);
($origLine =~ m/^([^=\t]+)?([=][^\t]*)?\t(.+)$/) or die("No match in parseLine().\n");
($1 or $2) or die("Hebrew not found.\n");
@@ -655,25 +629,17 @@
$result .= "<row>\n <cell>";
foreach my $wordA (@hebrewWordsColA){
- $result .= &translateHebrewWordorNote( $wordA ) . " ";
+ ($wordA) and $result .= &translateHebrewWordorNote( $wordA ) . " ";
}
$result .= "</cell>\n <cell>";
foreach my $wordB (@hebrewWordsColB){
- $result .= &translateHebrewWordorNote( $wordB ) . " ";
+ ($wordB) and $result .= &translateHebrewWordorNote( $wordB ) . " ";
}
$result .= "</cell>\n <cell>";
foreach my $wordG (@greekWords){
-# if ( $greekWords[$index] eq "{x}" ){ #special case: note containing a space, has to be handled together
-# $result .= &translateGreekWordorNote( "$wordG $greekWords[$index+1]" );
-# $index += 2;
-# }
-# elsif ( $wordG eq "{x}" ){ #skip
-# ++$index;
-# }
-# else{
- $result .= &translateGreekWordorNote( $wordG ). " ";
+ ($wordG) and $result .= &translateGreekWordorNote( $wordG ). " ";
}
$result .= "</cell>\n</row>";
# printf("Result: %s\n", $result);
@@ -729,16 +695,26 @@
my @result;
+ push(@result, "<div type=\"book\" osisID=\"$osis_id\">");
+
CHAPTER: foreach my $chapter(1..1000){
+ my $chapter_header_written;
print("Processing $bookname_infile chapter $chapter.\n");
my $verse_found;
VERSE: foreach my $verse(1..1000){
my @verseContent = &grabVerseContent($bookname_infile, $chapter, $verse, @BUF);
if (@verseContent) {
if ($bookname_infile eq "Obad"){
+ if (not $chapter_header_written) {
+ $chapter_header_written = 1; #no chapters in Obadiah
+ }
push(@result, "<verse osisID=\"$osis_id.$verse\">"); #chapter will be ignored for >1 by grabVerseContent
}
else{
+ if (not $chapter_header_written) {
+ push(@result, "<chapter osisID=\"$osis_id.$chapter\">");
+ $chapter_header_written = 1;
+ }
push(@result, "<verse osisID=\"$osis_id.$chapter.$verse\">");
}
push(@result, @verseContent);
@@ -746,6 +722,9 @@
$verse_found = 1;
}
else{ #verse nonexistent, goto next chapter
+ if ($chapter_header_written and (not $bookname_infile eq "Obad") ) {
+ push(@result, "</chapter>");
+ }
last VERSE;
}
}
@@ -754,8 +733,11 @@
last CHAPTER;
}
}
+
+ push(@result, "</div>"); #book
+ print("done.\n");
+
return(@result);
- print("done.\n");
}
sub processBookVariant(){
@@ -776,17 +758,25 @@
my @result;
+ push(@result, "<div type=\"book\" osisID=\"$osis_id\">");
+
CHAPTER: foreach my $chapter(1..1000){
print("Processing $bookname_infile_A and $bookname_infile_B chapter $chapter.\n");
+ my $chapter_header_written;
my $verse_found;
VERSE: foreach my $verse(1..1000){
my @verseContentA = &grabVerseContent($bookname_infile_A, $chapter, $verse, @BUFA);
my @verseContentB = &grabVerseContent($bookname_infile_B, $chapter, $verse, @BUFB);
if (@verseContentA or @verseContentB) {
+ if (not $chapter_header_written) {
+ push(@result, "<chapter osisID=\"$osis_id.$chapter\">");
+ $chapter_header_written = 1;
+ }
push(@result, "<verse osisID=\"$osis_id.$chapter.$verse\">");
$verse_found = 1;
}
else{ #verse nonexistent, goto next chapter
+ if ($chapter_header_written) { push(@result, "</chapter>"); }
last VERSE;
}
if (@verseContentA){
@@ -805,9 +795,12 @@
last CHAPTER;
}
}
- return(@result);
+
+ push(@result, "</div>"); #book
print("done.\n");
+ return(@result);
+
}
sub loadFile(){ #$fileName loads the file into the buffer and makes small corrections
@@ -818,47 +811,47 @@
my @result;
my $index = 0;
- foreach my $currentItem (@buffer){
- if ($buffer[$index] =~ m/^DANIHL/){
+ LOOP: foreach my $currentItem (@buffer){
+ if ($currentItem =~ m/^DANIHL/){
$result[$#result] .= " " .$buffer[$index];
}
- elsif ($buffer[$index] =~ m/^NUMA/){
+ elsif ($currentItem =~ m/^NUMA/){
$result[$#result] .= $buffer[$index];
}
- elsif ($buffer[$index] =~ m/^DEUTERONO\/MION/){
+ elsif ($currentItem =~ m/^DEUTERONO\/MION/){
$result[$#result] .= " ".$buffer[$index];
}
- elsif ($buffer[$index] =~ m/^AU\)TOU=/){
+ elsif ($currentItem =~ m/^AU\)TOU=/){
$result[$#result] .= " ".$buffer[$index];
}
- elsif ($buffer[$index] =~ m/^E\(\/C/){
+ elsif ($currentItem =~ m/^E\(\/C/){
$result[$#result] .= " ". $buffer[$index];
}
- elsif ($buffer[$index] =~ m/^MOU/){
+ elsif ($currentItem =~ m/^MOU/){
$result[$#result] .= " " . $buffer[$index];
}
- elsif ($buffer[$index] =~ m/^NEHL$/){
+ elsif ($currentItem =~ m/^NEHL$/){
$result[$#result] .= $buffer[$index]; # no space, ANANEL
}
- elsif ($buffer[$index] =~ m/^ESTHKE\/NAI$/){
+ elsif ($currentItem =~ m/^ESTHKE\/NAI$/){
$result[$#result] .= $buffer[$index]; # no space
}
- elsif ($buffer[$index] =~ m/^ESTHKW\\S$/){
+ elsif ($currentItem =~ m/^ESTHKW\\S$/){
$result[$#result] .= $buffer[$index]; # no space
}
- elsif ($buffer[$index] =~ m/^ISA/){ # a few lines in ISAIAH have this in different styles
+ elsif ($currentItem =~ m/^ISA/){ # a few lines in ISAIAH have this in different styles
$result[$#result] .= $buffer[$index]; # no space
}
- elsif ($buffer[$index] =~ m/^LAMYAN/){ # in LAM
+ elsif ($currentItem =~ m/^LAMYAN/){ # in LAM
$result[$#result] .= $buffer[$index]; # no space
}
- elsif ($buffer[$index] =~ m/^EZEKIHL/){ # in LAM
+ elsif ($currentItem =~ m/^EZEKIHL/){ # in LAM
$result[$#result] .= $buffer[$index]; # no space
}
- elsif ($buffer[$index] =~ m/^\)$/){ # in PS
+ elsif ($currentItem =~ m/^\)$/){ # in PS
$result[$#result] .= $buffer[$index]; # no space
}
- elsif ($buffer[$index] =~ m/^PS[Y\s]/){ # in PS; breaks at PS or PSY
+ elsif ($currentItem =~ m/^PS[Y\s]/){ # in PS; breaks at PS or PSY
$result[$#result] .= $buffer[$index]; # no space
}
elsif (($buffer[$index+1] =~ m/^#/) && ($buffer[$index] =~ m/^(.*)#$/)){ # in Daniel, # is used as a "continue line on next line" marker
@@ -878,14 +871,30 @@
my @result;
+
+push(@result,"<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n");
+
+push(@result,"<osis xmlns=\"http://www.bibletechnologies.net/2003/OSIS/namespace\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.bibletechnologies.net/2003/OSIS/namespace osisCore.2.1.xsd\">\n");
+
+push(@result,"<osisText osisIDWork=\"MT-LXX-Parallel\" xml:lang=\"en\">\n");
+
+push(@result,"
+<header>\
+ <work osisWork=\"MT-LXX-Parallel\">\
+ <title>The Parallel Aligned Hebrew-Aramaic and Greek texts of Jewish Scripture</title>\
+ <identifier type=\"OSIS\">MT-LXX-Parallel</identifier>\
+ <refSystem>Bible.Tanach</refSystem>\
+ </work>\
+</header>\n");
+
# File File id ThML id OSIS id Short Book Title
-#push(@result, &processBook("01.Genesis.par", "Gen", "Gen", "Gen", "Genesis") );
-#push(@result, &processBook("02.Exodus.par", "Exod", "Exod", "Exod", "Exodus") );
-#push(@result, &processBook("03.Lev.par", "Lev", "Lev", "Lev", "Leviticus") );
-#push(@result, &processBook("04.Num.par", "Num", "Num", "Num", "Numbers") );
-#push(@result, &processBook("05.Deut.par", "Deut", "Deut", "Deut", "Deuteronomy") );
-#push(@result, &processBookVariant("07.JoshA.par", "JoshA", "Codex Alexandrinus:", "06.JoshB.par", "JoshB", "Codex Vaticanus:", "Josh", "Josh", "Joshua") );
-#push(@result, &processBookVariant("09.JudgesA.par", "JudgA", "Codex Alexandrinus:", "08.JudgesB.par", "JudgB", "Codex Vaticanus:", "Judg", "Judg", "Judges") );
+# push(@result, &processBook("01.Genesis.par", "Gen", "Gen", "Gen", "Genesis") );
+# push(@result, &processBook("02.Exodus.par", "Exod", "Exod", "Exod", "Exodus") );
+# push(@result, &processBook("03.Lev.par", "Lev", "Lev", "Lev", "Leviticus") );
+# push(@result, &processBook("04.Num.par", "Num", "Num", "Num", "Numbers") );
+# push(@result, &processBook("05.Deut.par", "Deut", "Deut", "Deut", "Deuteronomy") );
+# push(@result, &processBookVariant("07.JoshA.par", "JoshA", "Codex Alexandrinus:", "06.JoshB.par", "JoshB", "Codex Vaticanus:", "Josh", "Josh", "Joshua") );
+# push(@result, &processBookVariant("09.JudgesA.par", "JudgA", "Codex Alexandrinus:", "08.JudgesB.par", "JudgB", "Codex Vaticanus:", "Judg", "Judg", "Judges") );
# push(@result, &processBook("10.Ruth.par", "Ruth", "Ruth", "Ruth", "Ruth") );
# push(@result, &processBook("11.1Sam.par", "1Sam/K", "iSam", "1Sam", "1 Samuel") );
@@ -899,7 +908,6 @@
# push(@result, &processBook("18.Esther.par", "Esth", "Esth", "Esth", "Esther") );
# push(@result, &processBook("26.Job.par", "Job", "Job", "Job", "Job") );
#
-# #This might need special handling
#push(@result, &processBook("20.Psalms.par", "Ps", "Ps", "Ps", "Psalms"));
#
# push(@result, &processBook("23.Prov.par", "Prov", "Prov", "Prov", "Proverbs") );
@@ -909,9 +917,9 @@
# push(@result, &processBook("41.Jer.par", "Jer", "Jer", "Jer", "Jeremiah") );
# push(@result, &processBook("43.Lam.par", "Lam", "Lam", "Lam", "Lamentations") );
# push(@result, &processBook("44.Ezekiel.par", "Ezek", "Ezek", "Ezek", "Ezekiel") );
-#
+
# push(@result, &processBookVariant("45.DanielOG.par", "Dan", "Old Greek:", "46.DanielTh.par", "DanTh", "Theodotion:", "Dan", "Dan", "Daniel"));
-#
+
# push(@result, &processBook("28.Hosea.par", "Hos", "Hos", "Hos", "Hosea") );
# push(@result, &processBook("31.Joel.par", "Joel", "Joel", "Joel", "Joel") );
# push(@result, &processBook("30.Amos.par", "Amos", "Amos", "Amos", "Amos") );
@@ -925,6 +933,8 @@
# push(@result, &processBook("38.Zech.par", "Zech", "Zech", "Zech", "Zechariah") );
# push(@result, &processBook("39.Malachi.par", "Mal", "Mal", "Mal", "Malachi") );
+push(@result, "</osisText>\n</osis>");
-print( join("\n", @result) );
+open( OUTPUT, ">mt-lxx-par.osis.xml" );
+print( OUTPUT join("\n", @result) );
\ No newline at end of file
More information about the sword-cvs
mailing list