[sword-svn] r80 - in trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS: . Parse Translate

mgruner at www.crosswire.org mgruner at www.crosswire.org
Sat Sep 16 14:25:39 MST 2006


Author: mgruner
Date: 2006-09-16 14:25:27 -0700 (Sat, 16 Sep 2006)
New Revision: 80

Modified:
   trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Markers.java
   trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Parser.java
   trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Translate/MCO.java
   trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Translate/Note.java
   trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Translate/Translate.java
   trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/WLC2OSIS.java
   trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/WLC2OSISMain.java
Log:
finalized
sent preview version to Kirk for first check


Modified: trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Markers.java
===================================================================
--- trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Markers.java	2006-09-15 16:40:20 UTC (rev 79)
+++ trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Markers.java	2006-09-16 21:25:27 UTC (rev 80)
@@ -27,14 +27,14 @@
 
 // Samek
 public void samek(){ //parasah setumah, closed paragraph == small space in line
-    A.writer.appendText("   " + H.samekh + "   ") ;
+	WLC2OSIS.writer.appendText("   " + H.samekh + "   ") ;
 //    P.MarkerWritten = true ;
     }
 //------------------------------------------------------------------------------
 
 // Pe
 public void pe(){ // parasah petuhah, open paragraph == new line
-    A.writer.appendText(" " + H.pe + " " + "<p/>") ;
+	WLC2OSIS.writer.appendText(" " + H.pe + " " + "<p/>") ;
 //    P.MarkerWritten = true ;
     }
 //------------------------------------------------------------------------------

Modified: trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Parser.java
===================================================================
--- trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Parser.java	2006-09-15 16:40:20 UTC (rev 79)
+++ trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Parser.java	2006-09-16 21:25:27 UTC (rev 80)
@@ -6,32 +6,26 @@
 
 public class Parser{
 
-WLC2OSIS A ;
 Translate T ;
 
-public final String MorphologicalSegmentStart  = "<seg type=\"morph\">" ;
-public final String MorphologicalSegmentEnd    = "</seg>" ;
-public final String MorphologicalDivisionMarker = MorphologicalSegmentEnd + MorphologicalSegmentStart;
+public static final String MorphologicalSegmentStart  = "<seg type=\"morph\">" ;
+public static final String MorphologicalSegmentEnd    = "</seg>" ;
+public static final String MorphologicalDivisionMarker = MorphologicalSegmentEnd + MorphologicalSegmentStart;
+public static String currentLine;
+public static String nextLine;
 
 //-----------------------------------------------------------------------------
-public Parser(WLC2OSIS A, boolean wlc_only) {
-    this.A = A ;
-
-    T = new Translate(A, this) ;
-  
+public Parser() {
+    T = new Translate() ;
     new MC() ;
     Note.setNotes();
-    }    
+}    
 
-
-public void parse(){
-    String s ;
-    System.out.println("\n");
-
+public void parse(String in_file, boolean wlc_only){
     BufferedReader file;
 
    	try{
-	    file = new BufferedReader( new FileReader( A.InputFilename ));
+	    file = new BufferedReader( new FileReader( in_file ));
 	}
 	catch (IOException e) {
 		file = null;
@@ -42,27 +36,47 @@
 	
 	String oldBookCode = "";
 	String newBookCode = "";
+	String nextBookCode = "";
 	int oldChapter = 0;
 	int newChapter = 0;
+	int nextChapter = 0;
 	int oldVerse = 0;
 	int newVerse = 0;
+	int nextVerse = 0;
 	int oldWordNumber = 0;
 	int newWordNumber = 0;
+	int nextWordNumber = 0;
+	
+	String newWord = "";
+	String oldWord = "";
+	
+	currentLine = "";
+	try{
+		nextLine = file.readLine(); //skip first line, book intro
+		nextLine = file.readLine();
+	}
+  	catch (IOException e) {
+		System.out.println("Read error: " + e) ;
+		System.exit(1);
+    }
 
 //																		book	chap	vs	  word#	subword#  note		word	lemma	homonym lang  morph
     java.util.regex.Pattern pattern = java.util.regex.Pattern.compile("(\\w\\w)(\\d+):(\\d+),(\\d+)\\.(\\d+)(\\]\\S)?\\s(\\S+)\\s([^_]+)(_\\d+)?(@|%)(\\S+)");
      
     while ( true ){
-    	s="";
+    	currentLine = nextLine;
     	try{
-	    	s = file.readLine();
-	    	System.out.println(s);
+	    	nextLine = file.readLine();
+	        if ( (nextLine != null) && nextLine.startsWith(">") ){ //skip this line, book intro
+		    	nextLine = file.readLine();
+	        }
+//	        System.out.println("CurrentLine: "+currentLine);
+//	    	System.out.println("NextLine:    "+nextLine);
 	    }
 	  	catch (IOException e) {
-			System.out.println("Read error: " + e) ;
-			break;
         }
-        if ( s == null ){
+
+	  	if ( currentLine == null ){
         	try{
 	        	file.close();
 	        }
@@ -72,16 +86,34 @@
         	break;
         }
     
-        if ( s.startsWith(">") ){ //ignore this line
+        else if ( !currentLine.startsWith("gn") ){ //limit to Genesis for presentation purposes
         	continue;
         }
 
-		java.util.regex.Matcher match = pattern.matcher( s );
+	  	
+		java.util.regex.Matcher match = pattern.matcher( currentLine );
 		if (!match.matches()){
-			System.out.println("No match found: " +s);
+			System.out.println("No match found at line: " + currentLine);
 			System.exit(1);
 		}
-		
+
+		java.util.regex.Matcher match_nextLine = null;
+		if (nextLine != null){
+			match_nextLine = pattern.matcher( nextLine );
+			if (match_nextLine.matches()){
+				nextBookCode = match_nextLine.group(1);
+				nextChapter = Integer.parseInt( match_nextLine.group(2) );
+				nextVerse   = Integer.parseInt( match_nextLine.group(3) );
+				nextWordNumber	= Integer.parseInt( match_nextLine.group(4) );
+			}
+		}
+		if ((match_nextLine == null) || !match_nextLine.matches()){
+			nextBookCode = "";
+			nextChapter = 0;
+			nextVerse = 0;
+			nextWordNumber = 0;
+		}
+
 		//remember old values
 		oldBookCode = newBookCode;
 	    oldChapter = newChapter;
@@ -93,9 +125,12 @@
 		newChapter = Integer.parseInt( match.group(2) );
 		newVerse   = Integer.parseInt( match.group(3) );
 		newWordNumber	= Integer.parseInt( match.group(4) );
+		
 		//newSubWordNumber = Integer.parseInt( match.group(5) ); not used
 		String note = match.group(6);
-		String word = match.group(7);
+		//remember pevious word value
+		oldWord = newWord; 
+		newWord = match.group(7);
 		String lemma	= match.group(8);
 		String homonym = match.group(9);
 		if (homonym != null){
@@ -104,97 +139,116 @@
 		String separator = match.group(10);
 		String morph  = match.group(11);
 		
+		if ((newBookCode.equals(nextBookCode)) && (newChapter == nextChapter) && (newVerse == nextVerse) && (newWordNumber == nextWordNumber))
+			Translate.do_setFinal = false;
+		else
+			Translate.do_setFinal = true;
+		
 		// Verse changed, close old and open new
 		if ((!newBookCode.equals(oldBookCode)) || (newChapter != oldChapter) || (newVerse != oldVerse))
 		{
 			if (oldVerse > 0) {
-				A.writer.appendText("</w></verse>");
+				WLC2OSIS.writer.appendText("</w></verse>");
 			}
-		    A.writer.openTag(
+			WLC2OSIS.writer.openTag(
 		    		"verse osisID=\""+
 		    		BookName.getBookName(bookNames, newBookCode).abbrev+"."+
 		    		newChapter+"."+
 		    		newVerse+"\"", 2);
 
-		   	if (separator.equals("@")){
-		   		A.writer.appendText("<w xml:lang=\"he\">");
+		   	if (wlc_only){
+		   		WLC2OSIS.writer.appendText("<w>");
+		   	}
+		   	else if (separator.equals("@")){
+		   		WLC2OSIS.writer.appendText("<w xml:lang=\"he\">");
 		    }	
 		    else if (separator.equals("%")){
-		    	A.writer.appendText("<w xml:lang=\"ah+\">");
+		    	WLC2OSIS.writer.appendText("<w xml:lang=\"ah+\">");
 		    }
 		    else {
-		    	System.out.println("unknown separator: "+s);
+		    	System.err.println("unknown separator: " + currentLine);
 		    	System.exit(1);
 		    }
 		}
 		
 		//same verse, another word, add space
 		if ((oldVerse == newVerse) && (oldWordNumber != newWordNumber)){
-			if (separator.equals("@")){
-				A.writer.appendText("</w> <w xml:lang=\"he\">");
+			//Last word had a maqqef, no space between words
+			if (oldWord.endsWith("-"))
+		   		WLC2OSIS.writer.appendText("</w>");
+			//normal case, space between words
+			else
+		   		WLC2OSIS.writer.appendText("</w> ");
+			
+		   	if (wlc_only){
+		   		WLC2OSIS.writer.appendText("<w>");
+		   	}
+		   	else if (separator.equals("@")){
+		   		WLC2OSIS.writer.appendText("<w xml:lang=\"he\">");
 			}
 			else if (separator.equals("%")){
-				A.writer.appendText("</w> <w xml:lang=\"ah+\">");
+				WLC2OSIS.writer.appendText("<w xml:lang=\"ah+\">");
 			}
 			else {
-				System.out.println("unknown separator: "+s);
+				System.err.println("unknown separator: " + currentLine);
 				System.exit(1);
 			}
 		}
 		
 		//special case: nonprinting article, leave out for now
 		// TODO: FIX
-		if (word.equals("_") || word.equals("*_") || word.equals("**_")){
+		if (newWord.equals("_") || newWord.equals("*_") || newWord.equals("**_")){
 			continue;
 		}
 		//Qere / Ketiv only
-		else if (word.equals("**qq")){
-			A.writer.appendText("**<note type=\"textual\" xml:lang=\"en\">Ketiv without Quere.</note>");
+		else if (newWord.equals("**qq")){
+			WLC2OSIS.writer.appendText("**<note type=\"textual\" xml:lang=\"en\">Ketiv without Quere.</note>");
 			continue;
 		}
-		else if (word.equals("*kk")){
-			A.writer.appendText("*<note type=\"textual\" xml:lang=\"en\">Qere without Ketiv.</note>");
+		else if (newWord.equals("*kk")){
+			WLC2OSIS.writer.appendText("*<note type=\"textual\" xml:lang=\"en\">Qere without Ketiv.</note>");
 			continue;
 		}
-
 		// Paragraph marker found
-		if (morph.compareTo("x") == 0){
-			if (word.compareTo("P") == 0){ //
-				A.writer.appendText("  "+constructWord(word, lemma, homonym, morph)+"<p/>");
+		else if (morph.compareTo("x") == 0){
+			if (newWord.compareTo("P") == 0){ //
+				WLC2OSIS.writer.appendText("  "+constructSegment(newWord, lemma, homonym, morph, wlc_only)+"<p/>");
 			}
-			else if (word.compareTo("S") == 0){ //
-				A.writer.appendText("  "+constructWord(word, lemma, homonym, morph)+"  ");
+			else if (newWord.compareTo("S") == 0){ //
+				WLC2OSIS.writer.appendText("  "+constructSegment(newWord, lemma, homonym, morph, wlc_only)+"  ");
 			}
-			else if (word.compareTo("N") == 0){ //inverted nun
-				A.writer.appendText(" "+constructWord(word, lemma, homonym, morph)+" ");
+			else if (newWord.compareTo("N") == 0){ //inverted nun
+				WLC2OSIS.writer.appendText(" "+constructSegment(newWord, lemma, homonym, morph, wlc_only)+" ");
 			}
-			else {System.out.println("Unknown paragraph marker: " + s); System.exit(1);}
+			else {
+				System.err.println("Unknown paragraph marker: " + currentLine); 
+				System.exit(1);
+			}
 		}
+		else{
+			//now the text itself
+			WLC2OSIS.writer.appendText( constructSegment(newWord, lemma, homonym, morph, wlc_only) );
+		}
 		
-
-		
-		//now the text itself
-		A.writer.appendText( constructWord(word, lemma, homonym, morph) );
-		
 		//Note found
 		if (note != null && note.length() > 0){
 			//System.out.println("Note: "+s);
-			A.writer.appendText("<note type=\"textual\" xml:lang=\"en\">"+Note.Notes.get(note)+"</note>" );
+			WLC2OSIS.writer.appendText("<note type=\"textual\" xml:lang=\"en\">"+Note.Notes.get(note)+"</note>" );
 		}
     }
     
-    A.writer.closeTag("verse", 2);
-    
-    return ;
+    WLC2OSIS.writer.appendText("</w></verse>");
 }
-//----------------------------------------------------------------------------
 
-public String constructWord(String word, String lemma, String homonym, String morph){
-	String result = "<seg type=\"x-morph\" lemma=\""+T.convertCompoundWord(lemma) + "\" ";
-	if (homonym != null) {
-		result += "homonym=\""+homonym + "\" ";
+public String constructSegment(String word, String lemma, String homonym, String morph, boolean wlc_only){
+	String result = "<seg type=\"morph\"";
+	if (!wlc_only){
+		result += " lemma=\""+T.convertCompoundWord(lemma)+"\"";
+		if (homonym != null)
+			result += " homonym=\""+homonym +"\"";
+		result += " morph=\""+morph+"\"";
 	}
-	result += "morph=\""+morph+"\">";
+	result += ">";
 	result += T.convertCompoundWord(word)+"</seg>";
 	return result;
 }

Modified: trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Translate/MCO.java
===================================================================
--- trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Translate/MCO.java	2006-09-15 16:40:20 UTC (rev 79)
+++ trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Translate/MCO.java	2006-09-16 21:25:27 UTC (rev 80)
@@ -94,6 +94,7 @@
 public void print(){
    System.out.print(Name + " + ") ; 
    }
+
 //-----------------------------------------------------------------------------
 
 public Object clone(){

Modified: trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Translate/Note.java
===================================================================
--- trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Translate/Note.java	2006-09-15 16:40:20 UTC (rev 79)
+++ trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Translate/Note.java	2006-09-16 21:25:27 UTC (rev 80)
@@ -9,7 +9,7 @@
 //==============================================================================
 public class Note{
 
-public static Hashtable Notes = new Hashtable();
+public static Hashtable<String, String> Notes = new Hashtable<String, String>();
 
 public static void setNotes(){
 

Modified: trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Translate/Translate.java
===================================================================
--- trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Translate/Translate.java	2006-09-15 16:40:20 UTC (rev 79)
+++ trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Translate/Translate.java	2006-09-16 21:25:27 UTC (rev 80)
@@ -1,360 +1,355 @@
-package WLC2OSIS.Translate ;
+package WLC2OSIS.Translate;
 
-import WLC2OSIS.* ;
-import WLC2OSIS.Parse.* ;
-import WLC2OSIS.Translate.Note ;
+import WLC2OSIS.Parse.*;
+import WLC2OSIS.Translate.Note;
 
-import java.util.Vector ;
+import java.util.Vector;
+
 //==============================================================================
 /**
  *  <b>Critical translation of KQ-free MC words into Unicode characters.</b><p>
  */
 //==============================================================================
-public class Translate{
+public class Translate {
 
-private
-	Parser P ;
-	WLC2OSIS A ;
+private static MCO M;
+private static MCO Mark;
+private static char c, c1;
+private static int Type, I, k1, len;
+private static Vector<MCO> MCOs, OrderedMCOs;
+private static int[] ConsonantPositions = new int[100];
 
-	MCO M;
-	MCO Mark ;
-	char c, c1 ;
-	int Type, I, k1, len ;
-	Vector MCOs, OrderedMCOs ;
-	int[] ConsonantPositions = new int[100] ;
+//This controls if the last consonant may be changed into its final form
+//will be set to true by Parser only if we are at the end of one word
+public static boolean do_setFinal = true;
+
 //-----------------------------------------------------------------------------
 
-public Translate(WLC2OSIS A, Parser P) {
-    this.A = A ;
-    this.P = P ;
-    }    
+public Translate() {
+}
+
 //------------------------------------------------------------------------------
 
-public String convertCompoundWord(String W){
-	if (W.contains("~")){ //compound word without maqqef
+public String convertCompoundWord(String W) {
+	if (W.contains("~")) { //compound word without maqqef
 		String[] tmp = W.split("~");
 		return convertWord(tmp[0]) + " " + convertWord(tmp[1]);
-	}
-	else if (W.contains("-") && !W.endsWith("-")){ //compound word with maqqef
+	} else if (W.contains("-") && !W.endsWith("-")) { //compound word with maqqef
 		String[] tmp = W.split("-");
 		return convertWord(tmp[0]) + convertWord("-") + convertWord(tmp[1]);
-	}
-	else{
+	} else {
 		return convertWord(W);
 	}
 }
 
-public String convertWord(String W){
+public String convertWord(String W) {
 	if (W.startsWith("**"))
-		return "**<note type=\"textual\" xml:lang=\"en\">Quere.</note>"+convertChars(W.substring(2));
+		return "**<note type=\"textual\" xml:lang=\"en\">Quere.</note>"
+				+ convertChars(W.substring(2));
 	else if (W.startsWith("*"))
-		return "*<note type=\"textual\" xml:lang=\"en\">Ketiv.</note>"+convertChars(W.substring(1));
-	else return convertChars(W);
+		return "*<note type=\"textual\" xml:lang=\"en\">Ketiv.</note>"
+				+ convertChars(W.substring(1));
+	else
+		return convertChars(W);
 }
 
 // Translates an MC word (not qere or ketib) to a Unicode String.
 // Notes are included as <note type="textual">text of note</note>.
 
-public String convertChars(String W){
+public String convertChars(String W) {
 
-	len = W.length() ;
-        
-// Convert characters in String to MCO objects, expanding
-// ConsonantMarks, Numbers, and Notes as necessary.
-// Move PrepositiveMarks to after their consonants.
+	len = W.length();
 
-    MCOs = new Vector() ;
-    for (int k = 0; k < len; k++) {
-        c = W.charAt(k) ;        
-        
-        M = (MCO) (MC.getMCO(c)).clone() ;
-        Type = M.Type ;
+	// Convert characters in String to MCO objects, expanding
+	// ConsonantMarks, Numbers, and Notes as necessary.
+	// Move PrepositiveMarks to after their consonants.
 
-// Unknown 
-        if(Type == MCO.Unknown){
-            message("MC: MC Object is of type Unknown."
-                + "\nWord: " + W 
-                + "\nCharacter: " + c
-                + "\n                                     " ) ;
-            System.exit(1);
-             }
-        else if(Type <= 5){  // These types need no expansion.
-            MCOs.add(M) ;
-            }
-        else if(Type == MCO.Sheva ){
-// Might be a Hatef Vowel
-            k1 = k + 1 ;
-            if(k1 < len){
-                c1 = W.charAt(k1) ;
-                Mark = (MCO) (MC.getMCO(c1)).clone() ;
-                if (Mark.Type == MCO.Vowel){
-// It is a Hatef vowel
-                    MCO Hatef = new MCO("hataf"+Mark.Name, MCO.Vowel,
-                           Mark.FinalValue, Mark.FinalValue, Mark.Group) ;
-                    MCOs.add(Hatef) ;
-                    k++ ;
-                    }
-// Not a Hatef Vowel
-                else{
-                    MCOs.add(M) ;
-                    }
-                }
-// Could only be a Sheva
-            else{
-                MCOs.add(M) ;
-                }        
-            }
-      else if(Type == MCO.ConsonantMark){
-            Mark = (MCO) (M.Object).clone() ;
-            M.Type = MCO.Consonant ;
-            MCOs.add(M) ;
-            MCOs.add(Mark) ;
-            }
-//-----------------------------------------------------------------------------
-      else if(Type == MCO.Number){
-            k++ ;
-            String StringInt = "" ;
-            StringInt = StringInt + c ;
-            StringInt = StringInt + W.charAt(k) ;
-            
-            I = new Integer(StringInt).intValue() ;
-            if ((I > 99) | (I < 0) ){
-                 message("MC: Reconstructed int is out of range."
-                    + "\nWord: " + W 
-                    + "\nCharacters: " + c + W.charAt(k)
-                    + "\nint: " + I ) ;
-                 }
-            else{
-// Found a Mark of some sort
-                MCO Found = MC.getMCO(I) ;
-                
-                if(Found.Type == MCO.PrepositiveMark){
-//                  System.out.println("*****  Found a prepositive mark. " + I) ;
-                    k++ ;
-                    c1 = W.charAt(k) ;
-                    M = (MCO) (MC.getMCO(c1)).clone() ;
-                    if( M.Type == MCO.Consonant){
-//                      System.out.println("Swapping prepositive mark and consonant.") ;
-                        MCOs.add(M) ;
-                        MCOs.add(Found) ;
-                        } 
-// *** Special section for MCO.ConsonantMark added 17 June 2004 ***
-                    else if( M.Type == MCO.ConsonantMark ){
-//                      System.out.println("Swapping prepositive mark and consonant-mark.") ;
-                        MCOs.add(M) ;
-                        MCOs.add(M.Object) ;
-                        MCOs.add(Found) ;
-                        } 
-                    else{
-                        System.out.println("MC: PrepositiveMark not followed by a Consonant."
-                            + "\nWord: " + W + "\nType: " + M.Type ) ;
-                        MCOs.add(M) ;
-                        System.exit(0) ;
-                        }
-                    }
-                else{
-                    MCOs.add(MC.getMCO(I)) ;
-                    }
-                }
-            }
-//-----------------------------------------------------------------------------
-            
-        else if(Type == MCO.Note){
-            k++ ;
-            c1 = W.charAt(k) ;
-// Check for an inverted nun.   Assumes the note immediately follows a nun.
-            if (c1=='8'){
-                MCO Mtemp = (MCO) MCOs.lastElement() ;
-                if( (Mtemp.Name).compareTo("nun") == 0) {
-                    String in = H.Invertednun ;
-                    Mtemp.Name = "invertednun" ;   
-                    Mtemp.Value = in ;   
-                    Mtemp.FinalValue = in ;  
-// The masoranumberdot fails to work in IE. Ezra SIL already provides a dot.
-// Providing no accent.
-//                  MCO ud = (MCO) (MC.getMCO(81)).clone() ; // revia 81, masora 96
-//                  MCOs.add(ud) ;
-                    }
-                else{
-                    System.out.println("Prior MCO isn't a nun!") ;
-                    }
-                }
-// Leave a note
-            M.Value = Character.toString(c1) ;
-            MCOs.add(M) ;
-            }
-        else{
-            System.out.println("MC: Unknown type for an MCO Object.") ;
-            }
-        }
-//-----------------------------------------------------------------------------
-               
-// At this point the Consonants are where they should be.
-// Locating the consonants and FinalConsonants.
+	MCOs = new Vector<MCO>();
+	for (int k = 0; k < len; k++) {
+		c = W.charAt(k);
 
-// An incoming word a final consonant before a maqaf 
-// as well as at the end .
- 
-    int LastConsonant = -1 ;
-    int ConsonantIndex = 0 ;
-    int ConsonantCount = 0 ;
-    for (int k = 0; k < MCOs.size(); k++){
-        M = (MCO) MCOs.elementAt(k) ;
+		M = (MCO) (MC.getMCO(c)).clone();
+		Type = M.Type;
 
-// *** Test for MCO.ConsonantMark added 17 June 2004 ***
-        if( M.Type == MCO.Consonant | M.Type == MCO.ConsonantMark ){
+		// Unknown 
+		if (Type == MCO.Unknown) {
+			errorMessage("MC: MC Object is of type Unknown." + "\nWord: " + W
+					+ "\nCharacter: " + c
+					+ "\n                                     ");
+		} else if (Type <= 5) { // These types need no expansion.
+			MCOs.addElement(M);
+		} else if (Type == MCO.Sheva) {
+			// Might be a Hatef Vowel
+			k1 = k + 1;
+			if (k1 < len) {
+				c1 = W.charAt(k1);
+				Mark = (MCO) (MC.getMCO(c1)).clone();
+				if (Mark.Type == MCO.Vowel) {
+					// It is a Hatef vowel
+					MCO Hatef = new MCO("hataf" + Mark.Name, MCO.Vowel,
+							Mark.FinalValue, Mark.FinalValue, Mark.Group);
+					MCOs.add(Hatef);
+					k++;
+				}
+				// Not a Hatef Vowel
+				else {
+					MCOs.add(M);
+				}
+			}
+			// Could only be a Sheva
+			else {
+				MCOs.add(M);
+			}
+		} else if (Type == MCO.ConsonantMark) {
+			Mark = (MCO) (M.Object).clone();
+			M.Type = MCO.Consonant;
+			MCOs.add(M);
+			MCOs.add(Mark);
+		}
+		//-----------------------------------------------------------------------------
+		else if (Type == MCO.Number) {
+			k++;
+			String StringInt = "";
+			StringInt = StringInt + c;
+			StringInt = StringInt + W.charAt(k);
 
-            ConsonantPositions[ConsonantIndex] = k ;
-            LastConsonant = k ;
-            ConsonantIndex++ ;
-            ConsonantCount++ ;
-            }
-// Look for a Maqef, if found, set the LastConsonant final.
-        if( (M.Name).compareTo("maqef") == 0){
-            setFinal(LastConsonant) ;
-            }
-        }
-        
-    if (ConsonantCount > 0){
-        setFinal(LastConsonant) ;
-	    OrderedMCOs = new Vector() ;    
-    	int Limit = 0 ;
-	    for( int ConsonantNumber = 0; ConsonantNumber < ConsonantCount; ConsonantNumber++) {
-    	   if (ConsonantNumber+1 == ConsonantCount){
-        	   Limit = MCOs.size() ;
-	           }
-    	   else{
-        	   Limit = ConsonantPositions[ConsonantNumber+1] ;
-	           }
-    	   Order(MCOs, ConsonantPositions[ConsonantNumber], Limit, OrderedMCOs ) ;
-	   	   }
-       }
-    else{
-       OrderedMCOs = MCOs ;
-       }      
+			I = new Integer(StringInt).intValue();
+			if ((I > 99) | (I < 0)) {
+				errorMessage("MC: Reconstructed int is out of range."
+						+ "\nWord: " + W + "\nCharacters: " + c + W.charAt(k)
+						+ "\nint: " + I);
+			} else {
+				// Found a Mark of some sort
+				MCO Found = MC.getMCO(I);
 
-// Output the ordered Vector
+				if (Found.Type == MCO.PrepositiveMark) {
+					//                  System.out.println("*****  Found a prepositive mark. " + I) ;
+					k++;
+					c1 = W.charAt(k);
+					M = (MCO) (MC.getMCO(c1)).clone();
+					if (M.Type == MCO.Consonant) {
+						//                      System.out.println("Swapping prepositive mark and consonant.") ;
+						MCOs.add(M);
+						MCOs.add(Found);
+					}
+					// *** Special section for MCO.ConsonantMark added 17 June 2004 ***
+					else if (M.Type == MCO.ConsonantMark) {
+						//                      System.out.println("Swapping prepositive mark and consonant-mark.") ;
+						MCOs.add(M);
+						MCOs.add(M.Object);
+						MCOs.add(Found);
+					} else {
+						System.out
+								.println("MC: PrepositiveMark not followed by a Consonant."
+										+ "\nWord: " + W + "\nType: " + M.Type);
+						MCOs.add(M);
+						System.exit(0);
+					}
+				} else {
+					MCOs.add(MC.getMCO(I));
+				}
+			}
+		}
+		//-----------------------------------------------------------------------------
 
-    String S = "" ;
-    for (int k = 0; k < OrderedMCOs.size(); k++){
-        M = (MCO) OrderedMCOs.elementAt(k) ;
-        Type = M.Type ;
-//  Only Notes require special treatment
-        if (Type == MCO.Note){
-            S = S + "<note type=\"textual\" xml:lang=\"en\">"+ Note.Notes.get( M.Value)+ "</note>";
-            }
-//		//Mark morph segments when a maqef is present
-//		else if ( (M.Name).compareTo("maqef") == 0 ){
-//			S = S + P.MorphologicalSegmentEnd + M.Value + P.MorphologicalSegmentStart; 
-//		}
-	    
-        else if ((Type == MCO.MorphologicalDivision)){
-            S = S + P.MorphologicalDivisionMarker ;
-            }
-        else{
-            S = S + M.Value ;
-            } 
-        }      
-        
-    return S ;
-    }
+		else if (Type == MCO.Note) {
+			k++;
+			c1 = W.charAt(k);
+			// Check for an inverted nun.   Assumes the note immediately follows a nun.
+			if (c1 == '8') {
+				MCO Mtemp = (MCO) MCOs.lastElement();
+				if ((Mtemp.Name).compareTo("nun") == 0) {
+					String in = H.Invertednun;
+					Mtemp.Name = "invertednun";
+					Mtemp.Value = in;
+					Mtemp.FinalValue = in;
+					// The masoranumberdot fails to work in IE. Ezra SIL already provides a dot.
+					// Providing no accent.
+					//                  MCO ud = (MCO) (MC.getMCO(81)).clone() ; // revia 81, masora 96
+					//                  MCOs.add(ud) ;
+				} else {
+					System.out.println("Prior MCO isn't a nun!");
+				}
+			}
+			// Leave a note
+			M.Value = Character.toString(c1);
+			MCOs.add(M);
+		} else {
+			System.out.println("MC: Unknown type for an MCO Object.");
+		}
+	}
+	//-----------------------------------------------------------------------------
+
+	// At this point the Consonants are where they should be.
+	// Locating the consonants and FinalConsonants.
+
+	// An incoming word a final consonant before a maqaf 
+	// as well as at the end .
+
+	int LastConsonant = -1;
+	int ConsonantIndex = 0;
+	int ConsonantCount = 0;
+	for (int k = 0; k < MCOs.size(); k++) {
+		M = (MCO) MCOs.elementAt(k);
+
+		if ((M.Type == MCO.Consonant) || (M.Type == MCO.ConsonantMark)) {
+			ConsonantPositions[ConsonantIndex] = k;
+			LastConsonant = k;
+			ConsonantIndex++;
+			ConsonantCount++;
+		}
+		// Look for a Maqef, if found, set the LastConsonant final.
+		if ((M.Name).compareTo("maqef") == 0) {
+			setFinal(LastConsonant);
+		}
+	}
+
+	if (ConsonantCount > 0) {
+		setFinal(LastConsonant);
+		OrderedMCOs = new Vector<MCO>();
+		int Limit = 0;
+		for (int ConsonantNumber = 0; ConsonantNumber < ConsonantCount; ConsonantNumber++) {
+			if (ConsonantNumber + 1 == ConsonantCount) {
+				Limit = MCOs.size();
+			} else {
+				Limit = ConsonantPositions[ConsonantNumber + 1];
+			}
+			Order(MCOs, ConsonantPositions[ConsonantNumber], Limit, OrderedMCOs);
+		}
+	} else {
+		OrderedMCOs = MCOs;
+	}
+
+	// Output the ordered Vector
+
+	String S = "";
+	for (int k = 0; k < OrderedMCOs.size(); k++) {
+		M = (MCO) OrderedMCOs.elementAt(k);
+		Type = M.Type;
+		//  Only Notes require special treatment
+		if (Type == MCO.Note) {
+			S = S + "<note type=\"textual\" xml:lang=\"en\">"
+					+ Note.Notes.get(M.Value) + "</note>";
+		}
+		//		//Mark morph segments when a maqef is present
+		//		else if ( (M.Name).compareTo("maqef") == 0 ){
+		//			S = S + P.MorphologicalSegmentEnd + M.Value + P.MorphologicalSegmentStart; 
+		//		}
+
+		else if ((Type == MCO.MorphologicalDivision)) {
+			S = S + Parser.MorphologicalDivisionMarker;
+		} else {
+			S = S + M.Value;
+		}
+	}
+
+	return S;
+}
+
 //-----------------------------------------------------------------------------
 
 // Order the Marks following a Consonant.
 
-void Order( Vector MCOs, int StartIndex, int Limit, Vector OrderedMCOs) {
-     MCO m ;
-     
-//  Check for no Marks
-     if(StartIndex+1==Limit){
-         m = (MCO) MCOs.elementAt(StartIndex) ;
-         OrderedMCOs.add(m) ;
-         return ;
-         }
-//  Check for one Mark
-     if(StartIndex+2==Limit){
-         m = (MCO) MCOs.elementAt(StartIndex) ;
-         OrderedMCOs.add(m) ;
-         m = (MCO) MCOs.elementAt(StartIndex+1) ;
-         OrderedMCOs.add(m) ;
-         return ;
-         }
-         
-// Two or more Marks
-         
-// Save the Consonant
-     m = (MCO) MCOs.elementAt(StartIndex) ;
-     OrderedMCOs.add(m) ;
+void Order(Vector<MCO> MCOs, int StartIndex, int Limit, Vector<MCO> OrderedMCOs) {
+	MCO m;
 
-// Order the marks
-     int MarkCount = (Limit-StartIndex)-1 ; 
-     boolean[] Written = new boolean[MarkCount] ;
-     for (int k =0; k < MarkCount; k++){
-         Written[k] = false ;
-         }
-         
-     int WrittenCount = 0 ;
-     do{        
-// Find the MCO with the smallest possible Group value and write it.
-         int GroupTest = 1000 ;
-         int MCOMin = -1 ;
-         for( int k = StartIndex+1; k < Limit; k++){
-             if(!Written[ k -(StartIndex+1) ]){
-                 m = (MCO) MCOs.elementAt(k)  ;
-                 int g = m.Group ;
-                 if ( g < GroupTest ){
-                     GroupTest = g ;
-                     MCOMin = k ;
-                     }
-                 }
-             }
-         m = (MCO) MCOs.elementAt(MCOMin) ;
-         OrderedMCOs.add(m) ;
-         Written[MCOMin -(StartIndex+1) ] = true ;
-         WrittenCount++ ;
-         }while(WrittenCount < MarkCount) ;
-            
-     }
+	//  Check for no Marks
+	if (StartIndex + 1 == Limit) {
+		m = (MCO) MCOs.elementAt(StartIndex);
+		OrderedMCOs.add(m);
+		return;
+	}
+	//  Check for one Mark
+	if (StartIndex + 2 == Limit) {
+		m = (MCO) MCOs.elementAt(StartIndex);
+		OrderedMCOs.add(m);
+		m = (MCO) MCOs.elementAt(StartIndex + 1);
+		OrderedMCOs.add(m);
+		return;
+	}
+
+	// Two or more Marks
+
+	// Save the Consonant
+	m = (MCO) MCOs.elementAt(StartIndex);
+	OrderedMCOs.add(m);
+
+	// Order the marks
+	int MarkCount = (Limit - StartIndex) - 1;
+	boolean[] Written = new boolean[MarkCount];
+	for (int k = 0; k < MarkCount; k++) {
+		Written[k] = false;
+	}
+
+	int WrittenCount = 0;
+	do {
+		// Find the MCO with the smallest possible Group value and write it.
+		int GroupTest = 1000;
+		int MCOMin = -1;
+		for (int k = StartIndex + 1; k < Limit; k++) {
+			if (!Written[k - (StartIndex + 1)]) {
+				m = (MCO) MCOs.elementAt(k);
+				int g = m.Group;
+				if (g < GroupTest) {
+					GroupTest = g;
+					MCOMin = k;
+				}
+			}
+		}
+		m = (MCO) MCOs.elementAt(MCOMin);
+		OrderedMCOs.add(m);
+		Written[MCOMin - (StartIndex + 1)] = true;
+		WrittenCount++;
+	} while (WrittenCount < MarkCount);
+
+}
+
 //-----------------------------------------------------------------------------
 
 // Output a message plus position.
 
-public void message(String m){
-    System.out.print(m) ;
-//    P.printPosition() ;
-    }
+public void errorMessage(String m) {
+	System.err.println(m);
+	System.err.println("Current line: " + Parser.currentLine);
+	System.exit(1);
+}
+
 //-----------------------------------------------------------------------------
 
 // Output a message plus position.
 
-public void print(Vector v){
-    System.out.println("\n") ;
-    for (int k = 0; k < v.size(); k++){
-        MCO m = (MCO) v.elementAt(k) ;
-        m.print() ;
-        }
-    System.out.println("\n") ;
-    }
+public void print(Vector v) {
+	System.out.println("\n");
+	for (int k = 0; k < v.size(); k++) {
+		MCO m = (MCO) v.elementAt(k);
+		m.print();
+	}
+	System.out.println("\n");
+}
+
 //-----------------------------------------------------------------------------
 
 // Set a consonant as final.
 
-public void setFinal(int Position){
-// Do nothing if there's no Position to set
-        if (Position < 0) return ;
-        
-        M = (MCO) MCOs.elementAt(Position) ;
-        if (M.Type == MCO.Consonant){
-            MCO Final = new MCO("final" + M.Name, MCO.Consonant, M.FinalValue, M.FinalValue,
-                 M.Group) ;
-            MCOs.setElementAt(Final, Position) ;
-            }
-        else{
-            message("LastConsonant is not a consonant!") ;
-            }
-        return ;
-        }
-//-----------------------------------------------------------------------------
+public void setFinal(int Position) {
+	// Do nothing if setFinal is not set; we are in the midst of a word, but not at its end
+	if (!do_setFinal)
+		return;
+	// Do nothing if there's no Position to set
+	if (Position < 0)
+		return;
+
+	M = (MCO) MCOs.elementAt(Position);
+	if ((M.Type == MCO.Consonant) || (M.Type == MCO.ConsonantMark)) {
+		MCO Final = new MCO("final" + M.Name, MCO.Consonant, M.FinalValue,
+				M.FinalValue, M.Group);
+		MCOs.setElementAt(Final, Position);
+	} else {
+		String err = "LastConsonant is not a consonant!\n";
+		for (int i = 0; i < MCOs.size(); i++)
+			err += "MCOs[" + i + "] == " + MCOs.elementAt(i).Name + "\n";
+		err += "Position " + Position + "\n";
+		errorMessage(err);
+	}
 }
-//==============================================================================
-//==============================================================================
+}
\ No newline at end of file

Modified: trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/WLC2OSIS.java
===================================================================
--- trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/WLC2OSIS.java	2006-09-15 16:40:20 UTC (rev 79)
+++ trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/WLC2OSIS.java	2006-09-16 21:25:27 UTC (rev 80)
@@ -13,12 +13,12 @@
 public class WLC2OSIS{
 
 // Define the title and descriptions.
-public String Title = "The Westminster Leningrad Codex (WLC)" ;
+private String Title = "The Westminster Leningrad Codex (WLC)" ;
 
-public String ShortDescription = "from the electronic version of the Leningrad Codex "
+private String ShortDescription = "from the electronic version of the Leningrad Codex "
     + "maintained by the Westminster Hebrew Institute." ;
     
-public String[] Description = {
+private String[] Description = {
    "This text began as an electronic transcription by Whitaker and Parunak of the 1983 "
  + "printed edition of Biblia Hebraica Stuttgartensia (BHS). The " 
  + "transcription is called the Michigan-Claremont electronic text and was archived "
@@ -51,25 +51,27 @@
    "The book names in English and Hebrew of the Jewish Publication Society "
  + "(JPS) Tanach have been added."} ;  
 
+public static XMLWriter writer;
+public static String OutputDirectory ;
 
-public Parser p ;
-public XMLWriter writer;
-public String InputFilename;
-public String OutputDirectory ;
 
-
-public WLC2OSIS( String file, String directory ){
+public WLC2OSIS( String in_file, String out_directory, String out_file, boolean wlc_only ){
     
-    InputFilename = file ;
-    OutputDirectory = directory ;    
+    OutputDirectory = out_directory ;    
 
-    System.out.println("\nWLC2OSIS: " + Title + " " + ShortDescription ) ;
-    System.out.println("\nInput file:       " + InputFilename ) ;
-    System.out.println("\nOutput directory: " + OutputDirectory ) ;
+    System.out.println("WLC2OSIS: " + Title + " " + ShortDescription );
+    if (wlc_only){
+        System.out.println("Not including MORPH data.");
+    }
+    else{
+        System.out.println("Including full MORPH data.");
+    }
+    System.out.println("Input file:  " + in_file );
+    System.out.println("Output file: " + OutputDirectory + "/" + out_file );
         
 // Read, parse, and write the book files.
 
-    writer = new XMLWriter(OutputDirectory, "wlc_morph") ;
+    writer = new XMLWriter(OutputDirectory, out_file) ;
 
     writer.openTag("osisText osisIDWork=\"writer\" osisRefWork=\"bible\" xml:lang=\"he\"", 0) ;
     writer.openTag("header", 0) ;
@@ -81,20 +83,20 @@
     writer.writeAttributedString("type", 2, "type=\"OSIS\"", "Bible");
     writer.writeAttributedString("identifier", 2, "type=\"OSIS\"", "Bible.he.writer.2004");
     writer.writeAttributedString("rights", 2, "type=\"x-copyright\"", 
-      "The writer is maintained by the Westminster Hebrew Institute, Philadelphia, PA (http://whi.wts.edu/WHI)");
+      "The WLC is maintained by the Westminster Hebrew Institute, Philadelphia, PA (http://whi.wts.edu/WHI)");
     writer.writeString("scope", 2, "Hebrew Bible, Old Testament");
     writer.writeString("refSystem", 2, "MT");
        
     writer.closeTag("work", 1);
     writer.closeTag("header", 0);
 
-    p = new Parser(this, false) ;
-    p.parse() ;    
+    Parser p = new Parser() ;
+    p.parse(in_file, wlc_only) ;    
 
     writer.closeTag("osisText", 0);
 	writer.close();
 
-    System.out.println("\nWLC2OSIS: Normal end.") ;
+    System.out.println("Finished.") ;
 }
 
 }
\ No newline at end of file

Modified: trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/WLC2OSISMain.java
===================================================================
--- trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/WLC2OSISMain.java	2006-09-15 16:40:20 UTC (rev 79)
+++ trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/WLC2OSISMain.java	2006-09-16 21:25:27 UTC (rev 80)
@@ -1,17 +1,18 @@
 package WLC2OSIS ;
 
 public class WLC2OSISMain{
-
-static WLC2OSIS A ;
-
 /**
- *    Static main() method
- *
+ * Static main() method
  * @param args String[] 
  */
 public static void main( String[] args) {
-        A = new WLC2OSIS( args[0], args[1] ) ;
-        System.exit(0) ;
+	{
+		WLC2OSIS A = new WLC2OSIS( args[0], args[1], "wlc_morph", false ) ;
+	}
+	{
+		WLC2OSIS A = new WLC2OSIS( args[0], args[1], "wlc", true ) ;
+	}
+	System.exit(0);
 }
 
 } //class




More information about the sword-cvs mailing list