[sword-svn] r76 - trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse

mgruner at crosswire.org mgruner at crosswire.org
Sun Nov 12 16:30:20 MST 2006


Author: mgruner
Date: 2006-08-03 12:24:27 -0700 (Thu, 03 Aug 2006)
New Revision: 76

Modified:
   trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Parser.java
Log:
pretty cool now

Modified: trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Parser.java
===================================================================
--- trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Parser.java	2006-07-31 20:20:27 UTC (rev 75)
+++ trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Parser.java	2006-08-03 19:24:27 UTC (rev 76)
@@ -52,7 +52,7 @@
 	int newSubWordNumber = 0;
 
 
-    java.util.regex.Pattern pattern = java.util.regex.Pattern.compile("(\\w\\w)(\\d+):(\\d+),(\\d+)\\.(\\d+)(\\]\\S)?\\s(\\S+)\\s(\\S+)(@|%)(\\S+)");
+    java.util.regex.Pattern pattern = java.util.regex.Pattern.compile("(\\w\\w)(\\d+):(\\d+),(\\d+)\\.(\\d+)(\\]\\S)?\\s(\\S+)\\s([^_]+)(_\\d+)?(@|%)(\\S+)");
      
     while ( true ){
     	s="";
@@ -83,6 +83,13 @@
 			System.exit(1);
 		}
 		
+		//remember old values
+		oldBookCode = newBookCode;
+	    oldChapter = newChapter;
+	    oldVerse = newVerse;
+	    oldWordNumber = newWordNumber;
+	    oldSubWordNumber = newSubWordNumber;
+		
 		// Parse the identifier
 		newBookCode = match.group(1);
 		newChapter = Integer.parseInt( match.group(2) );
@@ -92,53 +99,81 @@
 		String note = match.group(6);
 		String word = match.group(7);
 		String lemma	= match.group(8);
-		String separator = match.group(9);
-		String morph  = match.group(10);
+		String homonym = match.group(9);
+		if (homonym != null){
+			homonym = homonym.substring(1); //"_1" to "1"
+		}
+		String separator = match.group(10);
+		String morph  = match.group(11);
 		
 		// Verse changed, close old and open new
-		if ((newBookCode != oldBookCode) || (newChapter != oldChapter) || (newVerse != oldVerse))
+		if ((!newBookCode.equals(oldBookCode)) || (newChapter != oldChapter) || (newVerse != oldVerse))
 		{
-			if (oldVerse >= 1) A.writer.closeTag("verse", 2);
+			if (oldVerse > 0) {
+				A.writer.appendText("</w></verse>");
+			}
 		    A.writer.openTag(
 		    		"verse osisID=\""+
 		    		BookName.getBookName(bookNames, newBookCode).abbrev+"."+
 		    		newChapter+"."+
 		    		newVerse+"\"", 2);
+
+		   	if (separator.equals("@")){
+		   		A.writer.appendText("<w xml:lang=\"he\">");
+		    }	
+		    else if (separator.equals("%")){
+		    	A.writer.appendText("<w xml:lang=\"ah+\">");
+		    }
+		    else {
+		    	System.out.println("unknown separator: "+s);
+		    	System.exit(1);
+		    }
 		}
 		
 		//same verse, another word, add space
 		if ((oldVerse == newVerse) && (oldWordNumber != newWordNumber)){
-			A.writer.appendText(" ");
+			if (separator.equals("@")){
+				A.writer.appendText("</w> <w xml:lang=\"he\">");
+			}
+			else if (separator.equals("%")){
+				A.writer.appendText("</w> <w xml:lang=\"ah+\">");
+			}
+			else {
+				System.out.println("unknown separator: "+s);
+				System.exit(1);
+			}
 		}
 		
-		//System.out.println("Expression: " + word);
+		//special case: nonprinting article, leave out for now
+		// TODO: FIX
+		if (word.equals("_")){
+			continue;
+		}
 		
+//		System.out.println("s: " + s);
+		
 		// Paragraph marker found
 		if (morph.compareTo("x") == 0){
 			if (word.compareTo("P") == 0){ //
-				A.writer.appendText("  "+constructWord(word, lemma, morph)+"<p/>");
+				A.writer.appendText("  "+constructWord(word, lemma, homonym, morph)+"<p/>");
 			}
 			else if (word.compareTo("S") == 0){ //
-				A.writer.appendText("  "+constructWord(word, lemma, morph)+"  ");
+				A.writer.appendText("  "+constructWord(word, lemma, homonym, morph)+"  ");
 			}
 			else if (word.compareTo("N") == 0){ //inverted nun
-				A.writer.appendText(" "+constructWord(word, lemma, morph)+" ");
+				A.writer.appendText(" "+constructWord(word, lemma, homonym, morph)+" ");
 			}
 			else {System.out.println("Unknown paragraph marker: " + s); System.exit(1);}
 		}
 		
+		//now the text itself
+		A.writer.appendText( constructWord(word, lemma, homonym, morph) );
+		
 		//Note found
 		if (note != null && note.length() > 0){
-			System.out.println("Note: "+s);
+			//System.out.println("Note: "+s);
 			A.writer.appendText("<note type=\"textual\" xml:lang=\"en\">"+Note.Notes.get(note)+"</note>" );
 		}
-		
-		//remember
-		oldBookCode = newBookCode;
-	    oldChapter = newChapter;
-	    oldVerse = newVerse;
-	    oldWordNumber = newWordNumber;
-	    oldSubWordNumber = newSubWordNumber;
     }
     
     A.writer.closeTag("verse", 2);
@@ -147,8 +182,14 @@
 }
 //----------------------------------------------------------------------------
 
-public String constructWord(String word, String lemma, String morph){
-	return "<seg type=\"x-morph\" lemma=\""+T.translate(lemma)+"\" morph=\""+morph+"\">"+T.translate(word)+"</seg>";
+public String constructWord(String word, String lemma, String homonym, String morph){
+	String result = "<seg type=\"x-morph\" lemma=\""+T.translate(lemma) + "\" ";
+	if (homonym != null) {
+		result += "homonym=\""+homonym + "\" ";
+	}
+	result += "morph=\""+morph+"\">";
+	result += T.translate(word)+"</seg>";
+	return result;
 }
 
 }



More information about the sword-cvs mailing list