[sword-svn] r47 - trunk/modules/mt-lxx-parallel
mgruner at crosswire.org
mgruner at crosswire.org
Sat Oct 15 02:13:29 MST 2005
Author: mgruner
Date: 2005-10-15 02:13:28 -0700 (Sat, 15 Oct 2005)
New Revision: 47
Modified:
trunk/modules/mt-lxx-parallel/convert.pl
Log:
some progress
Modified: trunk/modules/mt-lxx-parallel/convert.pl
===================================================================
--- trunk/modules/mt-lxx-parallel/convert.pl 2005-10-10 11:58:38 UTC (rev 46)
+++ trunk/modules/mt-lxx-parallel/convert.pl 2005-10-15 09:13:28 UTC (rev 47)
@@ -21,6 +21,9 @@
my $hebrewLetters="A-Z\(\)\+\#\$\*\&/"; #used in a character class of a regexp later
my %hebrew2utf8 = (
")" =>chr(0x05D0), #HEBREW LETTER ALEF
+
+"A" =>chr(0x05D0), #HEBREW LETTER ALEF # TODO: check, this is from an occurrence of ABRHM
+
"B" =>chr(0x05D1), #HEBREW LETTER BET
"G" =>chr(0x05D2), #HEBREW LETTER GIMEL
"D" =>chr(0x05D3), #HEBREW LETTER DALET
@@ -58,10 +61,12 @@
"," => ",", #separate words in colB
-"{" => "{", # TODO: CHECK IF NECCESSARY
-"}" => "}",
-"." => ".",
+"?" => "<note type=\"textual\">Uncertain.</note>" #HACK
+#"{" => "{", # TODO: CHECK IF NECCESSARY
+# "}" => "}",
+#"." => ".",
+
);
my %greek2utf8 = (
@@ -147,87 +152,77 @@
"*z" => "Qere wela ketib, ketib wela qere.",
"[ ]" => "Reference of number of verse in LXX, different from MT.", # TODO: MAKE USE OF IT
"[[ ]]" => "Reference number of verse in MT, different from the LXX.",
+"{x}" => "UNKNOWN", # TODO: FIX
"--- {x}" => "Apparent minus created by lack of equivalence between long stretches of text in the LXX and MT.",
"--+ {x}" => "Apparent plus created by lack of equivalence between long stretches of text in the LXX and MT.",
"{...}" => "Equivalent reflected elsewhere in the text, disregarded by indexing program.",
"~" => "Difference in sequence between MT and LXX, denoted after the first Hebrew word and before the second one, as well as between two Greek words.",
"~~~" => "Equivalent of the Hebrew or Greek word(s) occurring elsewhere in the verse or context (transposition).",
"{..~}" => "Stylistic or grammatical transposition.",
+"{..}" => "Stylistic or grammatical transposition.", # TODO: occurs in the text, unknown meaning
"---" => "In the Greek column: Hebrew counterpart lacking in the LXX (minus in the LXX).",
+"--" => "In the Greek column: Hebrew counterpart lacking in the LXX (minus in the LXX).", # TODO: my addition, check, probably wrong
"--+" => "In col a. of the Hebrew: element \"added\" in the Greek (plus in the LXX).",
"---+" => "In col a. of the Hebrew: element \"added\" in the Greek (plus in the LXX).", # TODO: my addition
+"'" => "Long minus or plus (at least four lines).", # TODO: my addition, check
"''" => "Long minus or plus (at least four lines).",
"{d}" => "Reference to doublet (occurring between the two elements of the doublet).",
"{d?}" => "Reference to doublet (occurring between the two elements of the doublet)?",
"{..d}" => "Distributive rendering, occurring once in the translation but referring to more than one Hebrew word.",
"{..r}" => "Notation in Hebrew column of elements repeated in the translation.",
"?" => "Questionable notation, equivalent, etc.",
+"??" => "Questionable notation, equivalent, etc.", # TODO: my addition
"{p}" => "Greek preverb representing Hebrew preposition.",
"{..p}" => "Preposition added in the LXX in accordance with the rules of the Greek language or translational habits.",
+
"{!}" => "Infinitive absolute.",
"{!}na" => "Infinitive absolute.", # TODO: occurs in the text, but unknown meaning
+"{!}ad" => "Infinitive absolute.", # TODO: occurs in the text, but unknown meaning
+"{!}aj" => "Infinitive absolute.", # TODO: occurs in the text, but unknown meaning
+"{!}nad" => "Infinitive absolute.", # TODO: occurs in the text, but unknown meaning
"{!}nd" => "Infinitive absolute.", # TODO: occurs in the text, but unknown meaning
+"{!}nd+" => "Infinitive absolute.", # TODO: occurs in the text, but unknown meaning
"{!}p" => "Infinitive absolute.", # TODO: occurs in the text, but unknown meaning
+"{!}p+" => "Infinitive absolute.", # TODO: occurs in the text, but unknown meaning
"{!}pd" => "Infinitive absolute.", # TODO: occurs in the text, but unknown meaning
"{!}-" => "Infinitive absolute.", # TODO: occurs in the text, but unknown meaning
+"{!}v" => "Infinitive absolute.", # TODO: occurs in the text, but unknown meaning
+
"{s}" => "Hebrew M/, MN (comparative, superlative) reflected by Greek comparative or superlative.", # TODO: UTF-8
"{t}" => "Transliterated Hebrew word.",
"\#" => "Long line continuing in next one, placed both at the end of the line running over and at the beginning of the following line in the opposite column.",
"{v}" => "The reading of the main text of the LXX seems to reflect a secondary text, while the \"original\" reading is reflected in a variant.",
-
+# Notes regarding ColB of the Hebrew
"=" => "Introducing col. b of the Hebrew (a selection of retroverted readings, presumably found in the parent text of the LXX).",
-"=?" => "? Introducing col. b of the Hebrew (a selection of retroverted readings, presumably found in the parent text of the LXX).", # TODO: my addition, check
"={d}" => "Reference to doublet (occurring between the two elements of the doublet).", # TODO: my addition, check
"={d?}" => "Reference to doublet (occurring between the two elements of the doublet)?", # TODO: my addition, check
"=\%" => "Introducing categories of translation technique recorded in col. b.",
"=\%vap" => "Change from active to passive form in verbs.",
"=\%vpa" => "Change from passive to active form in verbs.",
"=\%p" => "Difference in preposition or particle.",
+"=\%pa" => "Difference in preposition or particle.", # TODO: my addition, check
"=\%p+" => "Addition of preposition or particle.",
+"=\%p+?" => "Addition of preposition or particle?",
"=\%p-" => "Omission of preposition or particle.",
"=;" => "Retroversion in col. b based on equivalence occurring in immediate or remote context.",
-"G" => "Hebrew variant, but at this stage no plausible retroversion is suggested.",
+#"G" => "Hebrew variant, but at this stage no plausible retroversion is suggested.",
"=+" => "Difference in numbers between MT and the LXX.",
"=\@" => "Etymological exegesis.",
+"=?\@" => "Etymological exegesis?", #my addition
+"=\@?" => "Etymological exegesis?", #my addition
"=\@...a" => "Etymological exegesis according to Aramaic.",
"=:" => "Introducing reconstructed proper noun.",
"=v" => "Difference in vocalization (reading).",
+"=vs" => "Difference in vocalization (reading).", # TODO: check, occurs in text
"=r" => "Incomplete retroversion.",
"{*}" => "Agreement of LXX with ketib.",
"{**}" => "Agreement of LXX with qere.",
-
"." => "Interchange of consonants between MT and the presumed Hebrew parent text of the LXX.",
-".)(" => "Interchange of consonants (א/ע) between MT and the presumed Hebrew parent text of the LXX.", # TODO: my addition
-".(q" => "Interchange of consonants (ע/ק) between MT and the presumed Hebrew parent text of the LXX.", # TODO: my addition
-".q(" => "Interchange of consonants (ק/ע) between MT and the presumed Hebrew parent text of the LXX.", # TODO: my addition
-".)x" => "Interchange of consonants (א/ח) between MT and the presumed Hebrew parent text of the LXX.", # TODO: my addition
-".x)" => "Interchange of consonants (ח/א) between MT and the presumed Hebrew parent text of the LXX.", # TODO: my addition
-".(x" => "Interchange of consonants (ע/ח) between MT and the presumed Hebrew parent text of the LXX.", # TODO: my addition
-".x(" => "Interchange of consonants (ח/ע) between MT and the presumed Hebrew parent text of the LXX.", # TODO: my addition
-".+d" => "Interchange of consonants (ט/ד) between MT and the presumed Hebrew parent text of the LXX.", # TODO: my addition
-".br" => "Interchange of consonants (ב/ר) between MT and the presumed Hebrew parent text of the LXX.", # TODO: my addition
-".rb" => "Interchange of consonants (ר/ב) between MT and the presumed Hebrew parent text of the LXX.", # TODO: my addition
-".rd" => "Interchange of consonants (ר/ד) between MT and the presumed Hebrew parent text of the LXX.", # TODO: my addition
-".dr" => "Interchange of consonants (ד/ר) between MT and the presumed Hebrew parent text of the LXX.", # TODO: my addition
-".rg" => "Interchange of consonants (ר/ג) between MT and the presumed Hebrew parent text of the LXX.", # TODO: my addition
-".rh" => "Interchange of consonants (ר/ה) between MT and the presumed Hebrew parent text of the LXX.", # TODO: my addition
-".rl" => "Interchange of consonants (ר/ל) between MT and the presumed Hebrew parent text of the LXX.", # TODO: my addition
-".\mb" => "Interchange of consonants (ק/מ) between MT and the presumed Hebrew parent text of the LXX.", # TODO: my addition
-".mn" => "Interchange of consonants (מ/נ) between MT and the presumed Hebrew parent text of the LXX.", # TODO: my addition
-".nm" => "Interchange of consonants (נ/מ) between MT and the presumed Hebrew parent text of the LXX.", # TODO: my addition
-".nr" => "Interchange of consonants (נ/ר) between MT and the presumed Hebrew parent text of the LXX.", # TODO: my addition
-".ny" => "Interchange of consonants (נ/י) between MT and the presumed Hebrew parent text of the LXX.", # TODO: my addition
-".yn" => "Interchange of consonants (י/נ) between MT and the presumed Hebrew parent text of the LXX.", # TODO: my addition
-".rn" => "Interchange of consonants (ר/נ) between MT and the presumed Hebrew parent text of the LXX.", # TODO: my addition
-".\$c" => "Interchange of consonants (שׁ/צ) between MT and the presumed Hebrew parent text of the LXX.", # TODO: my addition
-".\qb" => "Interchange of consonants (ק/ב) between MT and the presumed Hebrew parent text of the LXX.", # TODO: my addition
-".\wy" => "Interchange of consonants (ו/י) between MT and the presumed Hebrew parent text of the LXX.", # TODO: my addition
-".\yw" => "Interchange of consonants (י/ו) between MT and the presumed Hebrew parent text of the LXX.", # TODO: my addition
-
+".a" => "Interchange of consonants between MT and the presumed Hebrew parent text of the LXX.", # TODO: occurs, unknown
".m" => "Metathesis of consonants between MT and the presumed Hebrew parent text of the LXX.",
".z" => "Possible abbreviation.",
".s" => "One word of MT separated into two or more words in the parent text of the LXX.",
@@ -235,6 +230,8 @@
".w" => "Different word-division reflected in the parent text of the LXX.",
"<sp" => "<sp", #TODO: FIX, occurs in text
+"<sp>" => "<sp>", #TODO: FIX, occurs in text
+"<sp^>" => "<sp^>", #TODO: FIX, occurs in text
"^" => "^", #Notsure what these are
"^^^" => "^^^",
@@ -257,41 +254,111 @@
sub translateHebrewNote(){
my $origNote = shift;
- #print("TranslateHebrewNote $origNote\n");
+# print("TranslateHebrewNote $origNote\n");
- ($origNote =~ m/{\.\.d(.+)}/) and
+ ($origNote =~ m/^=?\.([a-z()\$+-])([a-z()\$+-])$/) and
+ return( &createNote("Interchange of consonants (" .
+ &translateHebrewLetter( uc( $1 ) ) . "/" . &translateHebrewLetter( uc( $2 ) ) .
+ ") between MT and the presumed Hebrew parent text of the LXX.") );
+
+
+ ($origNote =~ m/^{\.\.d(.+)}/) and
return( &createNote("(".&translateHebrewWordorNote( $1 ).") ". $notes{ "{..d}" }) );
- ($origNote =~ m/{\.\.r(.+)}/) and
+ ($origNote =~ m/^{\.\.r(.+)}/) and
return( &createNote("(".&translateHebrewWordorNote( $1 ).") ". $notes{ "{..r}" }) );
- ($origNote =~ m/{\.\.\.(.+)}/) and
+ ($origNote =~ m/^{\.\.\.(.+)}/) and
return( &createNote("(".&translateHebrewWordorNote( $1 ).") ". $notes{ "{...}" }) );
- #Special cases: the note includes more than one hebrew word
+ ($origNote =~ m/^{\.\.(.+)}/) and
+ return( &createNote("(".&translateHebrewWordorNote( $1 ).") ". $notes{ "{..}" }) );
+
+ #Special cases: the note includes more than one hebrew word, "cat" the results together
($origNote =~ m/^{\.\.\.([^}]+)$/) and
return( &openNote("(".&translateHebrewWordorNote($1).") ".$notes{"{...}"} ) );
($origNote =~ m/^([^{]+)}$/) and
return( &closeNote("(".&translateHebrewWordorNote( $1 ).") ". $notes{ "{...}" }) );
- ($origNote =~ m/^=[^?$hebrewLetters]*([?$hebrewLetters]+)$/) and
- return( &createNote( $notes{"?"} ) . &translateHebrewWordorNote( $1 ) );
+ ($origNote =~ m/^@([?$hebrewLetters]+)/) and
+ return( &createNote( $notes{ "=\@" } ) . &translateHebrewWordorNote( $1 ) );
- #special case: no note, but a crossref
- ($origNote =~ m/<(.+)>/) and
- return("<reference osisRef=\"$1\"><$1></reference> ");
+ ($origNote =~ m/^\^([?$hebrewLetters]+)/) and
+ return( "^" . &translateHebrewWordorNote( $1 ) ); # TODO: check, what is ^?
+ #
+ # Special handling for the = colB Notes
+ #
+ if (($origNote =~ m/^=/) and (not $notes{ $origNote } )) { #only split if the note does not exist, to avoid parsing problems
+ print("note reads $origNote\n");
+ if ($origNote =~ m/^=(<[0-9.a-z]+>)$/){
+ return( &translateHebrewWordorNote( $1 ) );
+ }
+ elsif ($origNote =~m/^=(.+)$/ and $notes{ $1 }){
+ return( &translateHebrewWordorNote( $1 ) );
+ }
+ elsif ($origNote =~ m/^=([?$hebrewLetters]+)/){
+ return( &translateHebrewWordorNote( $1 ) );
+ }
+ elsif ($origNote =~ m/^=([^?$hebrewLetters]+)([?$hebrewLetters]+)/){ #Note + Hebrew text, split up
+ if ($notes{ $1 }){
+ return( &translateHebrewNote( $1 ) . &translateHebrewWordorNote( $2 ) );
+ }
+ elsif( $notes{ "=$1" }){
+ return( &translateHebrewNote( "=$1" ) . &translateHebrewWordorNote( $2 ) );
+ }
+ else { die("Could not parse note.\n"); }
+ }
+ else { die("Could not parse note.\n"); }
+ }
+
+ #special case: no note, but a crossref (no book ID) # TODO: for now OSIS refs are not parsed
+ ($origNote =~ m/^<|>$/) and
+# return("<reference osisRef=\"$1.$2\"/>");
+ return $origNote;
+
+# #Special cases: osisREf with bookID, split because of space char, so put them together again
+# ($origNote =~ m/^<\^?(\w+)$/) and
+# # return( "<reference osisRef=\"$1." ); # TODO: check if <reference/> exists
+# return $origNote;
+# ($origNote =~ m/^(\d+)[.:](\d+)>?/) and
+# # return( "$1.$2\"/> " );
+# return $origNote;
+
+
+
+ #special case: no note, but a crossref (with book ID)
+# ($origNote =~ m/^<\^?(\w+)\s?(\d+)[.:](\d+)>?/) and
+# return("<reference osisRef=\"$1.$2.$3\"><$1></reference> ");
+
($origNote =~ m/^[?].*/) and
return( &createNote( $notes{"?"} ) . &translateHebrewWordorNote( $1 ) );
- ($notes{ $origNote }) or die("Note $origNote not found.\n");
- return( &createNote( $notes{$origNote} ) );
+ ($notes{ $origNote }) and return( &createNote( $notes{$origNote} ) );
+
+ for my $i ( 1 .. (length($origNote)-1) ){ #last try, split up into chunks
+ if ( $notes{ substr($origNote,0,$i) } ){
+ return( &translateHebrewNote(substr($origNote,0,$i)) . &translateHebrewWordorNote(substr($origNote,$i, length($origNote) - $i) ) );
+ }
+ }
+
+ die("Note $origNote not found.\n");
}
+sub translateHebrewLetter(){ #will return unicode hebrew without morph separation
+ my $hebrew = shift;
+
+ my $result;
+ $result = $hebrew2utf8{ $hebrew } || die("Could not find Hebrew letter $hebrew\n");
+
+ return $result;
+}
+
+
sub translateHebrewWordorNote(){ #will return unicode hebrew with morph separation
my $hebrew = shift;
-# print("TranslateHebrew of: $hebrew");
+# print("TranslateHebrew of: $hebrew\n");
( $hebrew =~ m/^[^$hebrewLetters]/ ) and return &translateHebrewNote( $hebrew );
( $hebrew =~ m/[}]$/ ) and return &translateHebrewNote( $hebrew );
@@ -311,6 +378,9 @@
}
sub translateGreekWordorNote(){
+
+ return; # TODO: remove
+
my $greek = shift;
foreach my $key (keys %notes){
@@ -342,9 +412,9 @@
my $origLine = shift;
my $result;
-# printf("parsing %s\n", $origLine);
+ printf("parsing %s\n", $origLine);
- $origLine =~ s/--=/--+/; # TODO: UGLY HACK, this appears in the text but not the notes
+ $origLine =~ s/--=/--+ =/; # TODO: UGLY HACK, this appears in the text but not the notes; this seems most reasonable
($origLine =~ m/^([^=]+)?([=].+)?\t(.+)$/) or die("No match in parseLine().\n");
($1 or $2) or die("Hebrew not found.\n");
@@ -364,14 +434,14 @@
$result .= "</cell>\n <cell>";
foreach my $wordB (@hebrewWordsColB){
- if ( substr($wordB, 0, 1) eq "=" ){
- $wordB =~ m/(=[^$hebrewLetters]*)([$hebrewLetters].*)?/ or die("No match in ColB.\n");
- $1 and $result .= &translateHebrewWordorNote( $1 ); #This isolates the notes introducing colB (=*)
- $result .= &translateHebrewWordorNote( $2 );
- }
- else {
+# if ( $wordB =~ m/^=/ ){
+# $wordB =~ m/(=[^$hebrewLetters()]*)([$hebrewLetters].*)?/ or die("No match in ColB.\n"); #added ( and ) in the first expression, because they can occur in notes also
+# $1 and $result .= &translateHebrewWordorNote( $1 ); #This isolates the notes introducing colB (=*)
+# $result .= &translateHebrewWordorNote( $2 );
+# }
+# else {
$result .= &translateHebrewWordorNote( $wordB );
- }
+# }
}
$result .= "</cell>\n <cell>";
@@ -552,6 +622,8 @@
# File File id ThML id OSIS id Short Book Title
push(@result, &processBook("01.Genesis.par", "Gen", "Gen", "Gen", "Genesis") );
+die "Finished Genesis\n";
+
push(@result, &processBook("02.Exodus.par", "Exod", "Exod", "Exod", "Exodus") );
push(@result, &processBook("03.Lev.par", "Lev", "Lev", "Lev", "Leviticus") );
push(@result, &processBook("04.Num.par", "Num", "Num", "Num", "Numbers") );
More information about the sword-cvs
mailing list