[sword-svn] r80 - in trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS: . Parse Translate
mgruner at www.crosswire.org
mgruner at www.crosswire.org
Sat Sep 16 14:25:39 MST 2006
Author: mgruner
Date: 2006-09-16 14:25:27 -0700 (Sat, 16 Sep 2006)
New Revision: 80
Modified:
trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Markers.java
trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Parser.java
trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Translate/MCO.java
trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Translate/Note.java
trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Translate/Translate.java
trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/WLC2OSIS.java
trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/WLC2OSISMain.java
Log:
finalized
sent preview version to Kirk for first check
Modified: trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Markers.java
===================================================================
--- trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Markers.java 2006-09-15 16:40:20 UTC (rev 79)
+++ trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Markers.java 2006-09-16 21:25:27 UTC (rev 80)
@@ -27,14 +27,14 @@
// Samek
public void samek(){ //parasah setumah, closed paragraph == small space in line
- A.writer.appendText(" " + H.samekh + " ") ;
+ WLC2OSIS.writer.appendText(" " + H.samekh + " ") ;
// P.MarkerWritten = true ;
}
//------------------------------------------------------------------------------
// Pe
public void pe(){ // parasah petuhah, open paragraph == new line
- A.writer.appendText(" " + H.pe + " " + "<p/>") ;
+ WLC2OSIS.writer.appendText(" " + H.pe + " " + "<p/>") ;
// P.MarkerWritten = true ;
}
//------------------------------------------------------------------------------
Modified: trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Parser.java
===================================================================
--- trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Parser.java 2006-09-15 16:40:20 UTC (rev 79)
+++ trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Parser.java 2006-09-16 21:25:27 UTC (rev 80)
@@ -6,32 +6,26 @@
public class Parser{
-WLC2OSIS A ;
Translate T ;
-public final String MorphologicalSegmentStart = "<seg type=\"morph\">" ;
-public final String MorphologicalSegmentEnd = "</seg>" ;
-public final String MorphologicalDivisionMarker = MorphologicalSegmentEnd + MorphologicalSegmentStart;
+public static final String MorphologicalSegmentStart = "<seg type=\"morph\">" ;
+public static final String MorphologicalSegmentEnd = "</seg>" ;
+public static final String MorphologicalDivisionMarker = MorphologicalSegmentEnd + MorphologicalSegmentStart;
+public static String currentLine;
+public static String nextLine;
//-----------------------------------------------------------------------------
-public Parser(WLC2OSIS A, boolean wlc_only) {
- this.A = A ;
-
- T = new Translate(A, this) ;
-
+public Parser() {
+ T = new Translate() ;
new MC() ;
Note.setNotes();
- }
+}
-
-public void parse(){
- String s ;
- System.out.println("\n");
-
+public void parse(String in_file, boolean wlc_only){
BufferedReader file;
try{
- file = new BufferedReader( new FileReader( A.InputFilename ));
+ file = new BufferedReader( new FileReader( in_file ));
}
catch (IOException e) {
file = null;
@@ -42,27 +36,47 @@
String oldBookCode = "";
String newBookCode = "";
+ String nextBookCode = "";
int oldChapter = 0;
int newChapter = 0;
+ int nextChapter = 0;
int oldVerse = 0;
int newVerse = 0;
+ int nextVerse = 0;
int oldWordNumber = 0;
int newWordNumber = 0;
+ int nextWordNumber = 0;
+
+ String newWord = "";
+ String oldWord = "";
+
+ currentLine = "";
+ try{
+ nextLine = file.readLine(); //skip first line, book intro
+ nextLine = file.readLine();
+ }
+ catch (IOException e) {
+ System.out.println("Read error: " + e) ;
+ System.exit(1);
+ }
// book chap vs word# subword# note word lemma homonym lang morph
java.util.regex.Pattern pattern = java.util.regex.Pattern.compile("(\\w\\w)(\\d+):(\\d+),(\\d+)\\.(\\d+)(\\]\\S)?\\s(\\S+)\\s([^_]+)(_\\d+)?(@|%)(\\S+)");
while ( true ){
- s="";
+ currentLine = nextLine;
try{
- s = file.readLine();
- System.out.println(s);
+ nextLine = file.readLine();
+ if ( (nextLine != null) && nextLine.startsWith(">") ){ //skip this line, book intro
+ nextLine = file.readLine();
+ }
+// System.out.println("CurrentLine: "+currentLine);
+// System.out.println("NextLine: "+nextLine);
}
catch (IOException e) {
- System.out.println("Read error: " + e) ;
- break;
}
- if ( s == null ){
+
+ if ( currentLine == null ){
try{
file.close();
}
@@ -72,16 +86,34 @@
break;
}
- if ( s.startsWith(">") ){ //ignore this line
+ else if ( !currentLine.startsWith("gn") ){ //limit to Genesis for presentation purposes
continue;
}
- java.util.regex.Matcher match = pattern.matcher( s );
+
+ java.util.regex.Matcher match = pattern.matcher( currentLine );
if (!match.matches()){
- System.out.println("No match found: " +s);
+ System.out.println("No match found at line: " + currentLine);
System.exit(1);
}
-
+
+ java.util.regex.Matcher match_nextLine = null;
+ if (nextLine != null){
+ match_nextLine = pattern.matcher( nextLine );
+ if (match_nextLine.matches()){
+ nextBookCode = match_nextLine.group(1);
+ nextChapter = Integer.parseInt( match_nextLine.group(2) );
+ nextVerse = Integer.parseInt( match_nextLine.group(3) );
+ nextWordNumber = Integer.parseInt( match_nextLine.group(4) );
+ }
+ }
+ if ((match_nextLine == null) || !match_nextLine.matches()){
+ nextBookCode = "";
+ nextChapter = 0;
+ nextVerse = 0;
+ nextWordNumber = 0;
+ }
+
//remember old values
oldBookCode = newBookCode;
oldChapter = newChapter;
@@ -93,9 +125,12 @@
newChapter = Integer.parseInt( match.group(2) );
newVerse = Integer.parseInt( match.group(3) );
newWordNumber = Integer.parseInt( match.group(4) );
+
//newSubWordNumber = Integer.parseInt( match.group(5) ); not used
String note = match.group(6);
- String word = match.group(7);
+ //remember pevious word value
+ oldWord = newWord;
+ newWord = match.group(7);
String lemma = match.group(8);
String homonym = match.group(9);
if (homonym != null){
@@ -104,97 +139,116 @@
String separator = match.group(10);
String morph = match.group(11);
+ if ((newBookCode.equals(nextBookCode)) && (newChapter == nextChapter) && (newVerse == nextVerse) && (newWordNumber == nextWordNumber))
+ Translate.do_setFinal = false;
+ else
+ Translate.do_setFinal = true;
+
// Verse changed, close old and open new
if ((!newBookCode.equals(oldBookCode)) || (newChapter != oldChapter) || (newVerse != oldVerse))
{
if (oldVerse > 0) {
- A.writer.appendText("</w></verse>");
+ WLC2OSIS.writer.appendText("</w></verse>");
}
- A.writer.openTag(
+ WLC2OSIS.writer.openTag(
"verse osisID=\""+
BookName.getBookName(bookNames, newBookCode).abbrev+"."+
newChapter+"."+
newVerse+"\"", 2);
- if (separator.equals("@")){
- A.writer.appendText("<w xml:lang=\"he\">");
+ if (wlc_only){
+ WLC2OSIS.writer.appendText("<w>");
+ }
+ else if (separator.equals("@")){
+ WLC2OSIS.writer.appendText("<w xml:lang=\"he\">");
}
else if (separator.equals("%")){
- A.writer.appendText("<w xml:lang=\"ah+\">");
+ WLC2OSIS.writer.appendText("<w xml:lang=\"ah+\">");
}
else {
- System.out.println("unknown separator: "+s);
+ System.err.println("unknown separator: " + currentLine);
System.exit(1);
}
}
//same verse, another word, add space
if ((oldVerse == newVerse) && (oldWordNumber != newWordNumber)){
- if (separator.equals("@")){
- A.writer.appendText("</w> <w xml:lang=\"he\">");
+ //Last word had a maqqef, no space between words
+ if (oldWord.endsWith("-"))
+ WLC2OSIS.writer.appendText("</w>");
+ //normal case, space between words
+ else
+ WLC2OSIS.writer.appendText("</w> ");
+
+ if (wlc_only){
+ WLC2OSIS.writer.appendText("<w>");
+ }
+ else if (separator.equals("@")){
+ WLC2OSIS.writer.appendText("<w xml:lang=\"he\">");
}
else if (separator.equals("%")){
- A.writer.appendText("</w> <w xml:lang=\"ah+\">");
+ WLC2OSIS.writer.appendText("<w xml:lang=\"ah+\">");
}
else {
- System.out.println("unknown separator: "+s);
+ System.err.println("unknown separator: " + currentLine);
System.exit(1);
}
}
//special case: nonprinting article, leave out for now
// TODO: FIX
- if (word.equals("_") || word.equals("*_") || word.equals("**_")){
+ if (newWord.equals("_") || newWord.equals("*_") || newWord.equals("**_")){
continue;
}
//Qere / Ketiv only
- else if (word.equals("**qq")){
- A.writer.appendText("**<note type=\"textual\" xml:lang=\"en\">Ketiv without Quere.</note>");
+ else if (newWord.equals("**qq")){
+ WLC2OSIS.writer.appendText("**<note type=\"textual\" xml:lang=\"en\">Ketiv without Quere.</note>");
continue;
}
- else if (word.equals("*kk")){
- A.writer.appendText("*<note type=\"textual\" xml:lang=\"en\">Qere without Ketiv.</note>");
+ else if (newWord.equals("*kk")){
+ WLC2OSIS.writer.appendText("*<note type=\"textual\" xml:lang=\"en\">Qere without Ketiv.</note>");
continue;
}
-
// Paragraph marker found
- if (morph.compareTo("x") == 0){
- if (word.compareTo("P") == 0){ //
- A.writer.appendText(" "+constructWord(word, lemma, homonym, morph)+"<p/>");
+ else if (morph.compareTo("x") == 0){
+ if (newWord.compareTo("P") == 0){ //
+ WLC2OSIS.writer.appendText(" "+constructSegment(newWord, lemma, homonym, morph, wlc_only)+"<p/>");
}
- else if (word.compareTo("S") == 0){ //
- A.writer.appendText(" "+constructWord(word, lemma, homonym, morph)+" ");
+ else if (newWord.compareTo("S") == 0){ //
+ WLC2OSIS.writer.appendText(" "+constructSegment(newWord, lemma, homonym, morph, wlc_only)+" ");
}
- else if (word.compareTo("N") == 0){ //inverted nun
- A.writer.appendText(" "+constructWord(word, lemma, homonym, morph)+" ");
+ else if (newWord.compareTo("N") == 0){ //inverted nun
+ WLC2OSIS.writer.appendText(" "+constructSegment(newWord, lemma, homonym, morph, wlc_only)+" ");
}
- else {System.out.println("Unknown paragraph marker: " + s); System.exit(1);}
+ else {
+ System.err.println("Unknown paragraph marker: " + currentLine);
+ System.exit(1);
+ }
}
+ else{
+ //now the text itself
+ WLC2OSIS.writer.appendText( constructSegment(newWord, lemma, homonym, morph, wlc_only) );
+ }
-
-
- //now the text itself
- A.writer.appendText( constructWord(word, lemma, homonym, morph) );
-
//Note found
if (note != null && note.length() > 0){
//System.out.println("Note: "+s);
- A.writer.appendText("<note type=\"textual\" xml:lang=\"en\">"+Note.Notes.get(note)+"</note>" );
+ WLC2OSIS.writer.appendText("<note type=\"textual\" xml:lang=\"en\">"+Note.Notes.get(note)+"</note>" );
}
}
- A.writer.closeTag("verse", 2);
-
- return ;
+ WLC2OSIS.writer.appendText("</w></verse>");
}
-//----------------------------------------------------------------------------
-public String constructWord(String word, String lemma, String homonym, String morph){
- String result = "<seg type=\"x-morph\" lemma=\""+T.convertCompoundWord(lemma) + "\" ";
- if (homonym != null) {
- result += "homonym=\""+homonym + "\" ";
+public String constructSegment(String word, String lemma, String homonym, String morph, boolean wlc_only){
+ String result = "<seg type=\"morph\"";
+ if (!wlc_only){
+ result += " lemma=\""+T.convertCompoundWord(lemma)+"\"";
+ if (homonym != null)
+ result += " homonym=\""+homonym +"\"";
+ result += " morph=\""+morph+"\"";
}
- result += "morph=\""+morph+"\">";
+ result += ">";
result += T.convertCompoundWord(word)+"</seg>";
return result;
}
Modified: trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Translate/MCO.java
===================================================================
--- trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Translate/MCO.java 2006-09-15 16:40:20 UTC (rev 79)
+++ trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Translate/MCO.java 2006-09-16 21:25:27 UTC (rev 80)
@@ -94,6 +94,7 @@
public void print(){
System.out.print(Name + " + ") ;
}
+
//-----------------------------------------------------------------------------
public Object clone(){
Modified: trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Translate/Note.java
===================================================================
--- trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Translate/Note.java 2006-09-15 16:40:20 UTC (rev 79)
+++ trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Translate/Note.java 2006-09-16 21:25:27 UTC (rev 80)
@@ -9,7 +9,7 @@
//==============================================================================
public class Note{
-public static Hashtable Notes = new Hashtable();
+public static Hashtable<String, String> Notes = new Hashtable<String, String>();
public static void setNotes(){
Modified: trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Translate/Translate.java
===================================================================
--- trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Translate/Translate.java 2006-09-15 16:40:20 UTC (rev 79)
+++ trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Translate/Translate.java 2006-09-16 21:25:27 UTC (rev 80)
@@ -1,360 +1,355 @@
-package WLC2OSIS.Translate ;
+package WLC2OSIS.Translate;
-import WLC2OSIS.* ;
-import WLC2OSIS.Parse.* ;
-import WLC2OSIS.Translate.Note ;
+import WLC2OSIS.Parse.*;
+import WLC2OSIS.Translate.Note;
-import java.util.Vector ;
+import java.util.Vector;
+
//==============================================================================
/**
* <b>Critical translation of KQ-free MC words into Unicode characters.</b><p>
*/
//==============================================================================
-public class Translate{
+public class Translate {
-private
- Parser P ;
- WLC2OSIS A ;
+private static MCO M;
+private static MCO Mark;
+private static char c, c1;
+private static int Type, I, k1, len;
+private static Vector<MCO> MCOs, OrderedMCOs;
+private static int[] ConsonantPositions = new int[100];
- MCO M;
- MCO Mark ;
- char c, c1 ;
- int Type, I, k1, len ;
- Vector MCOs, OrderedMCOs ;
- int[] ConsonantPositions = new int[100] ;
+//This controls if the last consonant may be changed into its final form
+//will be set to true by Parser only if we are at the end of one word
+public static boolean do_setFinal = true;
+
//-----------------------------------------------------------------------------
-public Translate(WLC2OSIS A, Parser P) {
- this.A = A ;
- this.P = P ;
- }
+public Translate() {
+}
+
//------------------------------------------------------------------------------
-public String convertCompoundWord(String W){
- if (W.contains("~")){ //compound word without maqqef
+public String convertCompoundWord(String W) {
+ if (W.contains("~")) { //compound word without maqqef
String[] tmp = W.split("~");
return convertWord(tmp[0]) + " " + convertWord(tmp[1]);
- }
- else if (W.contains("-") && !W.endsWith("-")){ //compound word with maqqef
+ } else if (W.contains("-") && !W.endsWith("-")) { //compound word with maqqef
String[] tmp = W.split("-");
return convertWord(tmp[0]) + convertWord("-") + convertWord(tmp[1]);
- }
- else{
+ } else {
return convertWord(W);
}
}
-public String convertWord(String W){
+public String convertWord(String W) {
if (W.startsWith("**"))
- return "**<note type=\"textual\" xml:lang=\"en\">Quere.</note>"+convertChars(W.substring(2));
+ return "**<note type=\"textual\" xml:lang=\"en\">Quere.</note>"
+ + convertChars(W.substring(2));
else if (W.startsWith("*"))
- return "*<note type=\"textual\" xml:lang=\"en\">Ketiv.</note>"+convertChars(W.substring(1));
- else return convertChars(W);
+ return "*<note type=\"textual\" xml:lang=\"en\">Ketiv.</note>"
+ + convertChars(W.substring(1));
+ else
+ return convertChars(W);
}
// Translates an MC word (not qere or ketib) to a Unicode String.
// Notes are included as <note type="textual">text of note</note>.
-public String convertChars(String W){
+public String convertChars(String W) {
- len = W.length() ;
-
-// Convert characters in String to MCO objects, expanding
-// ConsonantMarks, Numbers, and Notes as necessary.
-// Move PrepositiveMarks to after their consonants.
+ len = W.length();
- MCOs = new Vector() ;
- for (int k = 0; k < len; k++) {
- c = W.charAt(k) ;
-
- M = (MCO) (MC.getMCO(c)).clone() ;
- Type = M.Type ;
+ // Convert characters in String to MCO objects, expanding
+ // ConsonantMarks, Numbers, and Notes as necessary.
+ // Move PrepositiveMarks to after their consonants.
-// Unknown
- if(Type == MCO.Unknown){
- message("MC: MC Object is of type Unknown."
- + "\nWord: " + W
- + "\nCharacter: " + c
- + "\n " ) ;
- System.exit(1);
- }
- else if(Type <= 5){ // These types need no expansion.
- MCOs.add(M) ;
- }
- else if(Type == MCO.Sheva ){
-// Might be a Hatef Vowel
- k1 = k + 1 ;
- if(k1 < len){
- c1 = W.charAt(k1) ;
- Mark = (MCO) (MC.getMCO(c1)).clone() ;
- if (Mark.Type == MCO.Vowel){
-// It is a Hatef vowel
- MCO Hatef = new MCO("hataf"+Mark.Name, MCO.Vowel,
- Mark.FinalValue, Mark.FinalValue, Mark.Group) ;
- MCOs.add(Hatef) ;
- k++ ;
- }
-// Not a Hatef Vowel
- else{
- MCOs.add(M) ;
- }
- }
-// Could only be a Sheva
- else{
- MCOs.add(M) ;
- }
- }
- else if(Type == MCO.ConsonantMark){
- Mark = (MCO) (M.Object).clone() ;
- M.Type = MCO.Consonant ;
- MCOs.add(M) ;
- MCOs.add(Mark) ;
- }
-//-----------------------------------------------------------------------------
- else if(Type == MCO.Number){
- k++ ;
- String StringInt = "" ;
- StringInt = StringInt + c ;
- StringInt = StringInt + W.charAt(k) ;
-
- I = new Integer(StringInt).intValue() ;
- if ((I > 99) | (I < 0) ){
- message("MC: Reconstructed int is out of range."
- + "\nWord: " + W
- + "\nCharacters: " + c + W.charAt(k)
- + "\nint: " + I ) ;
- }
- else{
-// Found a Mark of some sort
- MCO Found = MC.getMCO(I) ;
-
- if(Found.Type == MCO.PrepositiveMark){
-// System.out.println("***** Found a prepositive mark. " + I) ;
- k++ ;
- c1 = W.charAt(k) ;
- M = (MCO) (MC.getMCO(c1)).clone() ;
- if( M.Type == MCO.Consonant){
-// System.out.println("Swapping prepositive mark and consonant.") ;
- MCOs.add(M) ;
- MCOs.add(Found) ;
- }
-// *** Special section for MCO.ConsonantMark added 17 June 2004 ***
- else if( M.Type == MCO.ConsonantMark ){
-// System.out.println("Swapping prepositive mark and consonant-mark.") ;
- MCOs.add(M) ;
- MCOs.add(M.Object) ;
- MCOs.add(Found) ;
- }
- else{
- System.out.println("MC: PrepositiveMark not followed by a Consonant."
- + "\nWord: " + W + "\nType: " + M.Type ) ;
- MCOs.add(M) ;
- System.exit(0) ;
- }
- }
- else{
- MCOs.add(MC.getMCO(I)) ;
- }
- }
- }
-//-----------------------------------------------------------------------------
-
- else if(Type == MCO.Note){
- k++ ;
- c1 = W.charAt(k) ;
-// Check for an inverted nun. Assumes the note immediately follows a nun.
- if (c1=='8'){
- MCO Mtemp = (MCO) MCOs.lastElement() ;
- if( (Mtemp.Name).compareTo("nun") == 0) {
- String in = H.Invertednun ;
- Mtemp.Name = "invertednun" ;
- Mtemp.Value = in ;
- Mtemp.FinalValue = in ;
-// The masoranumberdot fails to work in IE. Ezra SIL already provides a dot.
-// Providing no accent.
-// MCO ud = (MCO) (MC.getMCO(81)).clone() ; // revia 81, masora 96
-// MCOs.add(ud) ;
- }
- else{
- System.out.println("Prior MCO isn't a nun!") ;
- }
- }
-// Leave a note
- M.Value = Character.toString(c1) ;
- MCOs.add(M) ;
- }
- else{
- System.out.println("MC: Unknown type for an MCO Object.") ;
- }
- }
-//-----------------------------------------------------------------------------
-
-// At this point the Consonants are where they should be.
-// Locating the consonants and FinalConsonants.
+ MCOs = new Vector<MCO>();
+ for (int k = 0; k < len; k++) {
+ c = W.charAt(k);
-// An incoming word a final consonant before a maqaf
-// as well as at the end .
-
- int LastConsonant = -1 ;
- int ConsonantIndex = 0 ;
- int ConsonantCount = 0 ;
- for (int k = 0; k < MCOs.size(); k++){
- M = (MCO) MCOs.elementAt(k) ;
+ M = (MCO) (MC.getMCO(c)).clone();
+ Type = M.Type;
-// *** Test for MCO.ConsonantMark added 17 June 2004 ***
- if( M.Type == MCO.Consonant | M.Type == MCO.ConsonantMark ){
+ // Unknown
+ if (Type == MCO.Unknown) {
+ errorMessage("MC: MC Object is of type Unknown." + "\nWord: " + W
+ + "\nCharacter: " + c
+ + "\n ");
+ } else if (Type <= 5) { // These types need no expansion.
+ MCOs.addElement(M);
+ } else if (Type == MCO.Sheva) {
+ // Might be a Hatef Vowel
+ k1 = k + 1;
+ if (k1 < len) {
+ c1 = W.charAt(k1);
+ Mark = (MCO) (MC.getMCO(c1)).clone();
+ if (Mark.Type == MCO.Vowel) {
+ // It is a Hatef vowel
+ MCO Hatef = new MCO("hataf" + Mark.Name, MCO.Vowel,
+ Mark.FinalValue, Mark.FinalValue, Mark.Group);
+ MCOs.add(Hatef);
+ k++;
+ }
+ // Not a Hatef Vowel
+ else {
+ MCOs.add(M);
+ }
+ }
+ // Could only be a Sheva
+ else {
+ MCOs.add(M);
+ }
+ } else if (Type == MCO.ConsonantMark) {
+ Mark = (MCO) (M.Object).clone();
+ M.Type = MCO.Consonant;
+ MCOs.add(M);
+ MCOs.add(Mark);
+ }
+ //-----------------------------------------------------------------------------
+ else if (Type == MCO.Number) {
+ k++;
+ String StringInt = "";
+ StringInt = StringInt + c;
+ StringInt = StringInt + W.charAt(k);
- ConsonantPositions[ConsonantIndex] = k ;
- LastConsonant = k ;
- ConsonantIndex++ ;
- ConsonantCount++ ;
- }
-// Look for a Maqef, if found, set the LastConsonant final.
- if( (M.Name).compareTo("maqef") == 0){
- setFinal(LastConsonant) ;
- }
- }
-
- if (ConsonantCount > 0){
- setFinal(LastConsonant) ;
- OrderedMCOs = new Vector() ;
- int Limit = 0 ;
- for( int ConsonantNumber = 0; ConsonantNumber < ConsonantCount; ConsonantNumber++) {
- if (ConsonantNumber+1 == ConsonantCount){
- Limit = MCOs.size() ;
- }
- else{
- Limit = ConsonantPositions[ConsonantNumber+1] ;
- }
- Order(MCOs, ConsonantPositions[ConsonantNumber], Limit, OrderedMCOs ) ;
- }
- }
- else{
- OrderedMCOs = MCOs ;
- }
+ I = new Integer(StringInt).intValue();
+ if ((I > 99) | (I < 0)) {
+ errorMessage("MC: Reconstructed int is out of range."
+ + "\nWord: " + W + "\nCharacters: " + c + W.charAt(k)
+ + "\nint: " + I);
+ } else {
+ // Found a Mark of some sort
+ MCO Found = MC.getMCO(I);
-// Output the ordered Vector
+ if (Found.Type == MCO.PrepositiveMark) {
+ // System.out.println("***** Found a prepositive mark. " + I) ;
+ k++;
+ c1 = W.charAt(k);
+ M = (MCO) (MC.getMCO(c1)).clone();
+ if (M.Type == MCO.Consonant) {
+ // System.out.println("Swapping prepositive mark and consonant.") ;
+ MCOs.add(M);
+ MCOs.add(Found);
+ }
+ // *** Special section for MCO.ConsonantMark added 17 June 2004 ***
+ else if (M.Type == MCO.ConsonantMark) {
+ // System.out.println("Swapping prepositive mark and consonant-mark.") ;
+ MCOs.add(M);
+ MCOs.add(M.Object);
+ MCOs.add(Found);
+ } else {
+ System.out
+ .println("MC: PrepositiveMark not followed by a Consonant."
+ + "\nWord: " + W + "\nType: " + M.Type);
+ MCOs.add(M);
+ System.exit(0);
+ }
+ } else {
+ MCOs.add(MC.getMCO(I));
+ }
+ }
+ }
+ //-----------------------------------------------------------------------------
- String S = "" ;
- for (int k = 0; k < OrderedMCOs.size(); k++){
- M = (MCO) OrderedMCOs.elementAt(k) ;
- Type = M.Type ;
-// Only Notes require special treatment
- if (Type == MCO.Note){
- S = S + "<note type=\"textual\" xml:lang=\"en\">"+ Note.Notes.get( M.Value)+ "</note>";
- }
-// //Mark morph segments when a maqef is present
-// else if ( (M.Name).compareTo("maqef") == 0 ){
-// S = S + P.MorphologicalSegmentEnd + M.Value + P.MorphologicalSegmentStart;
-// }
-
- else if ((Type == MCO.MorphologicalDivision)){
- S = S + P.MorphologicalDivisionMarker ;
- }
- else{
- S = S + M.Value ;
- }
- }
-
- return S ;
- }
+ else if (Type == MCO.Note) {
+ k++;
+ c1 = W.charAt(k);
+ // Check for an inverted nun. Assumes the note immediately follows a nun.
+ if (c1 == '8') {
+ MCO Mtemp = (MCO) MCOs.lastElement();
+ if ((Mtemp.Name).compareTo("nun") == 0) {
+ String in = H.Invertednun;
+ Mtemp.Name = "invertednun";
+ Mtemp.Value = in;
+ Mtemp.FinalValue = in;
+ // The masoranumberdot fails to work in IE. Ezra SIL already provides a dot.
+ // Providing no accent.
+ // MCO ud = (MCO) (MC.getMCO(81)).clone() ; // revia 81, masora 96
+ // MCOs.add(ud) ;
+ } else {
+ System.out.println("Prior MCO isn't a nun!");
+ }
+ }
+ // Leave a note
+ M.Value = Character.toString(c1);
+ MCOs.add(M);
+ } else {
+ System.out.println("MC: Unknown type for an MCO Object.");
+ }
+ }
+ //-----------------------------------------------------------------------------
+
+ // At this point the Consonants are where they should be.
+ // Locating the consonants and FinalConsonants.
+
+ // An incoming word a final consonant before a maqaf
+ // as well as at the end .
+
+ int LastConsonant = -1;
+ int ConsonantIndex = 0;
+ int ConsonantCount = 0;
+ for (int k = 0; k < MCOs.size(); k++) {
+ M = (MCO) MCOs.elementAt(k);
+
+ if ((M.Type == MCO.Consonant) || (M.Type == MCO.ConsonantMark)) {
+ ConsonantPositions[ConsonantIndex] = k;
+ LastConsonant = k;
+ ConsonantIndex++;
+ ConsonantCount++;
+ }
+ // Look for a Maqef, if found, set the LastConsonant final.
+ if ((M.Name).compareTo("maqef") == 0) {
+ setFinal(LastConsonant);
+ }
+ }
+
+ if (ConsonantCount > 0) {
+ setFinal(LastConsonant);
+ OrderedMCOs = new Vector<MCO>();
+ int Limit = 0;
+ for (int ConsonantNumber = 0; ConsonantNumber < ConsonantCount; ConsonantNumber++) {
+ if (ConsonantNumber + 1 == ConsonantCount) {
+ Limit = MCOs.size();
+ } else {
+ Limit = ConsonantPositions[ConsonantNumber + 1];
+ }
+ Order(MCOs, ConsonantPositions[ConsonantNumber], Limit, OrderedMCOs);
+ }
+ } else {
+ OrderedMCOs = MCOs;
+ }
+
+ // Output the ordered Vector
+
+ String S = "";
+ for (int k = 0; k < OrderedMCOs.size(); k++) {
+ M = (MCO) OrderedMCOs.elementAt(k);
+ Type = M.Type;
+ // Only Notes require special treatment
+ if (Type == MCO.Note) {
+ S = S + "<note type=\"textual\" xml:lang=\"en\">"
+ + Note.Notes.get(M.Value) + "</note>";
+ }
+ // //Mark morph segments when a maqef is present
+ // else if ( (M.Name).compareTo("maqef") == 0 ){
+ // S = S + P.MorphologicalSegmentEnd + M.Value + P.MorphologicalSegmentStart;
+ // }
+
+ else if ((Type == MCO.MorphologicalDivision)) {
+ S = S + Parser.MorphologicalDivisionMarker;
+ } else {
+ S = S + M.Value;
+ }
+ }
+
+ return S;
+}
+
//-----------------------------------------------------------------------------
// Order the Marks following a Consonant.
-void Order( Vector MCOs, int StartIndex, int Limit, Vector OrderedMCOs) {
- MCO m ;
-
-// Check for no Marks
- if(StartIndex+1==Limit){
- m = (MCO) MCOs.elementAt(StartIndex) ;
- OrderedMCOs.add(m) ;
- return ;
- }
-// Check for one Mark
- if(StartIndex+2==Limit){
- m = (MCO) MCOs.elementAt(StartIndex) ;
- OrderedMCOs.add(m) ;
- m = (MCO) MCOs.elementAt(StartIndex+1) ;
- OrderedMCOs.add(m) ;
- return ;
- }
-
-// Two or more Marks
-
-// Save the Consonant
- m = (MCO) MCOs.elementAt(StartIndex) ;
- OrderedMCOs.add(m) ;
+void Order(Vector<MCO> MCOs, int StartIndex, int Limit, Vector<MCO> OrderedMCOs) {
+ MCO m;
-// Order the marks
- int MarkCount = (Limit-StartIndex)-1 ;
- boolean[] Written = new boolean[MarkCount] ;
- for (int k =0; k < MarkCount; k++){
- Written[k] = false ;
- }
-
- int WrittenCount = 0 ;
- do{
-// Find the MCO with the smallest possible Group value and write it.
- int GroupTest = 1000 ;
- int MCOMin = -1 ;
- for( int k = StartIndex+1; k < Limit; k++){
- if(!Written[ k -(StartIndex+1) ]){
- m = (MCO) MCOs.elementAt(k) ;
- int g = m.Group ;
- if ( g < GroupTest ){
- GroupTest = g ;
- MCOMin = k ;
- }
- }
- }
- m = (MCO) MCOs.elementAt(MCOMin) ;
- OrderedMCOs.add(m) ;
- Written[MCOMin -(StartIndex+1) ] = true ;
- WrittenCount++ ;
- }while(WrittenCount < MarkCount) ;
-
- }
+ // Check for no Marks
+ if (StartIndex + 1 == Limit) {
+ m = (MCO) MCOs.elementAt(StartIndex);
+ OrderedMCOs.add(m);
+ return;
+ }
+ // Check for one Mark
+ if (StartIndex + 2 == Limit) {
+ m = (MCO) MCOs.elementAt(StartIndex);
+ OrderedMCOs.add(m);
+ m = (MCO) MCOs.elementAt(StartIndex + 1);
+ OrderedMCOs.add(m);
+ return;
+ }
+
+ // Two or more Marks
+
+ // Save the Consonant
+ m = (MCO) MCOs.elementAt(StartIndex);
+ OrderedMCOs.add(m);
+
+ // Order the marks
+ int MarkCount = (Limit - StartIndex) - 1;
+ boolean[] Written = new boolean[MarkCount];
+ for (int k = 0; k < MarkCount; k++) {
+ Written[k] = false;
+ }
+
+ int WrittenCount = 0;
+ do {
+ // Find the MCO with the smallest possible Group value and write it.
+ int GroupTest = 1000;
+ int MCOMin = -1;
+ for (int k = StartIndex + 1; k < Limit; k++) {
+ if (!Written[k - (StartIndex + 1)]) {
+ m = (MCO) MCOs.elementAt(k);
+ int g = m.Group;
+ if (g < GroupTest) {
+ GroupTest = g;
+ MCOMin = k;
+ }
+ }
+ }
+ m = (MCO) MCOs.elementAt(MCOMin);
+ OrderedMCOs.add(m);
+ Written[MCOMin - (StartIndex + 1)] = true;
+ WrittenCount++;
+ } while (WrittenCount < MarkCount);
+
+}
+
//-----------------------------------------------------------------------------
// Output a message plus position.
-public void message(String m){
- System.out.print(m) ;
-// P.printPosition() ;
- }
+public void errorMessage(String m) {
+ System.err.println(m);
+ System.err.println("Current line: " + Parser.currentLine);
+ System.exit(1);
+}
+
//-----------------------------------------------------------------------------
// Output a message plus position.
-public void print(Vector v){
- System.out.println("\n") ;
- for (int k = 0; k < v.size(); k++){
- MCO m = (MCO) v.elementAt(k) ;
- m.print() ;
- }
- System.out.println("\n") ;
- }
+public void print(Vector v) {
+ System.out.println("\n");
+ for (int k = 0; k < v.size(); k++) {
+ MCO m = (MCO) v.elementAt(k);
+ m.print();
+ }
+ System.out.println("\n");
+}
+
//-----------------------------------------------------------------------------
// Set a consonant as final.
-public void setFinal(int Position){
-// Do nothing if there's no Position to set
- if (Position < 0) return ;
-
- M = (MCO) MCOs.elementAt(Position) ;
- if (M.Type == MCO.Consonant){
- MCO Final = new MCO("final" + M.Name, MCO.Consonant, M.FinalValue, M.FinalValue,
- M.Group) ;
- MCOs.setElementAt(Final, Position) ;
- }
- else{
- message("LastConsonant is not a consonant!") ;
- }
- return ;
- }
-//-----------------------------------------------------------------------------
+public void setFinal(int Position) {
+ // Do nothing if setFinal is not set; we are in the midst of a word, but not at its end
+ if (!do_setFinal)
+ return;
+ // Do nothing if there's no Position to set
+ if (Position < 0)
+ return;
+
+ M = (MCO) MCOs.elementAt(Position);
+ if ((M.Type == MCO.Consonant) || (M.Type == MCO.ConsonantMark)) {
+ MCO Final = new MCO("final" + M.Name, MCO.Consonant, M.FinalValue,
+ M.FinalValue, M.Group);
+ MCOs.setElementAt(Final, Position);
+ } else {
+ String err = "LastConsonant is not a consonant!\n";
+ for (int i = 0; i < MCOs.size(); i++)
+ err += "MCOs[" + i + "] == " + MCOs.elementAt(i).Name + "\n";
+ err += "Position " + Position + "\n";
+ errorMessage(err);
+ }
}
-//==============================================================================
-//==============================================================================
+}
\ No newline at end of file
Modified: trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/WLC2OSIS.java
===================================================================
--- trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/WLC2OSIS.java 2006-09-15 16:40:20 UTC (rev 79)
+++ trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/WLC2OSIS.java 2006-09-16 21:25:27 UTC (rev 80)
@@ -13,12 +13,12 @@
public class WLC2OSIS{
// Define the title and descriptions.
-public String Title = "The Westminster Leningrad Codex (WLC)" ;
+private String Title = "The Westminster Leningrad Codex (WLC)" ;
-public String ShortDescription = "from the electronic version of the Leningrad Codex "
+private String ShortDescription = "from the electronic version of the Leningrad Codex "
+ "maintained by the Westminster Hebrew Institute." ;
-public String[] Description = {
+private String[] Description = {
"This text began as an electronic transcription by Whitaker and Parunak of the 1983 "
+ "printed edition of Biblia Hebraica Stuttgartensia (BHS). The "
+ "transcription is called the Michigan-Claremont electronic text and was archived "
@@ -51,25 +51,27 @@
"The book names in English and Hebrew of the Jewish Publication Society "
+ "(JPS) Tanach have been added."} ;
+public static XMLWriter writer;
+public static String OutputDirectory ;
-public Parser p ;
-public XMLWriter writer;
-public String InputFilename;
-public String OutputDirectory ;
-
-public WLC2OSIS( String file, String directory ){
+public WLC2OSIS( String in_file, String out_directory, String out_file, boolean wlc_only ){
- InputFilename = file ;
- OutputDirectory = directory ;
+ OutputDirectory = out_directory ;
- System.out.println("\nWLC2OSIS: " + Title + " " + ShortDescription ) ;
- System.out.println("\nInput file: " + InputFilename ) ;
- System.out.println("\nOutput directory: " + OutputDirectory ) ;
+ System.out.println("WLC2OSIS: " + Title + " " + ShortDescription );
+ if (wlc_only){
+ System.out.println("Not including MORPH data.");
+ }
+ else{
+ System.out.println("Including full MORPH data.");
+ }
+ System.out.println("Input file: " + in_file );
+ System.out.println("Output file: " + OutputDirectory + "/" + out_file );
// Read, parse, and write the book files.
- writer = new XMLWriter(OutputDirectory, "wlc_morph") ;
+ writer = new XMLWriter(OutputDirectory, out_file) ;
writer.openTag("osisText osisIDWork=\"writer\" osisRefWork=\"bible\" xml:lang=\"he\"", 0) ;
writer.openTag("header", 0) ;
@@ -81,20 +83,20 @@
writer.writeAttributedString("type", 2, "type=\"OSIS\"", "Bible");
writer.writeAttributedString("identifier", 2, "type=\"OSIS\"", "Bible.he.writer.2004");
writer.writeAttributedString("rights", 2, "type=\"x-copyright\"",
- "The writer is maintained by the Westminster Hebrew Institute, Philadelphia, PA (http://whi.wts.edu/WHI)");
+ "The WLC is maintained by the Westminster Hebrew Institute, Philadelphia, PA (http://whi.wts.edu/WHI)");
writer.writeString("scope", 2, "Hebrew Bible, Old Testament");
writer.writeString("refSystem", 2, "MT");
writer.closeTag("work", 1);
writer.closeTag("header", 0);
- p = new Parser(this, false) ;
- p.parse() ;
+ Parser p = new Parser() ;
+ p.parse(in_file, wlc_only) ;
writer.closeTag("osisText", 0);
writer.close();
- System.out.println("\nWLC2OSIS: Normal end.") ;
+ System.out.println("Finished.") ;
}
}
\ No newline at end of file
Modified: trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/WLC2OSISMain.java
===================================================================
--- trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/WLC2OSISMain.java 2006-09-15 16:40:20 UTC (rev 79)
+++ trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/WLC2OSISMain.java 2006-09-16 21:25:27 UTC (rev 80)
@@ -1,17 +1,18 @@
package WLC2OSIS ;
public class WLC2OSISMain{
-
-static WLC2OSIS A ;
-
/**
- * Static main() method
- *
+ * Static main() method
* @param args String[]
*/
public static void main( String[] args) {
- A = new WLC2OSIS( args[0], args[1] ) ;
- System.exit(0) ;
+ {
+ WLC2OSIS A = new WLC2OSIS( args[0], args[1], "wlc_morph", false ) ;
+ }
+ {
+ WLC2OSIS A = new WLC2OSIS( args[0], args[1], "wlc", true ) ;
+ }
+ System.exit(0);
}
} //class
More information about the sword-cvs
mailing list