[sword-svn] r76 - trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse
mgruner at crosswire.org
mgruner at crosswire.org
Sun Nov 12 16:30:20 MST 2006
Author: mgruner
Date: 2006-08-03 12:24:27 -0700 (Thu, 03 Aug 2006)
New Revision: 76
Modified:
trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Parser.java
Log:
pretty cool now
Modified: trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Parser.java
===================================================================
--- trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Parser.java 2006-07-31 20:20:27 UTC (rev 75)
+++ trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Parser.java 2006-08-03 19:24:27 UTC (rev 76)
@@ -52,7 +52,7 @@
int newSubWordNumber = 0;
- java.util.regex.Pattern pattern = java.util.regex.Pattern.compile("(\\w\\w)(\\d+):(\\d+),(\\d+)\\.(\\d+)(\\]\\S)?\\s(\\S+)\\s(\\S+)(@|%)(\\S+)");
+ java.util.regex.Pattern pattern = java.util.regex.Pattern.compile("(\\w\\w)(\\d+):(\\d+),(\\d+)\\.(\\d+)(\\]\\S)?\\s(\\S+)\\s([^_]+)(_\\d+)?(@|%)(\\S+)");
while ( true ){
s="";
@@ -83,6 +83,13 @@
System.exit(1);
}
+ //remember old values
+ oldBookCode = newBookCode;
+ oldChapter = newChapter;
+ oldVerse = newVerse;
+ oldWordNumber = newWordNumber;
+ oldSubWordNumber = newSubWordNumber;
+
// Parse the identifier
newBookCode = match.group(1);
newChapter = Integer.parseInt( match.group(2) );
@@ -92,53 +99,81 @@
String note = match.group(6);
String word = match.group(7);
String lemma = match.group(8);
- String separator = match.group(9);
- String morph = match.group(10);
+ String homonym = match.group(9);
+ if (homonym != null){
+ homonym = homonym.substring(1); //"_1" to "1"
+ }
+ String separator = match.group(10);
+ String morph = match.group(11);
// Verse changed, close old and open new
- if ((newBookCode != oldBookCode) || (newChapter != oldChapter) || (newVerse != oldVerse))
+ if ((!newBookCode.equals(oldBookCode)) || (newChapter != oldChapter) || (newVerse != oldVerse))
{
- if (oldVerse >= 1) A.writer.closeTag("verse", 2);
+ if (oldVerse > 0) {
+ A.writer.appendText("</w></verse>");
+ }
A.writer.openTag(
"verse osisID=\""+
BookName.getBookName(bookNames, newBookCode).abbrev+"."+
newChapter+"."+
newVerse+"\"", 2);
+
+ if (separator.equals("@")){
+ A.writer.appendText("<w xml:lang=\"he\">");
+ }
+ else if (separator.equals("%")){
+ A.writer.appendText("<w xml:lang=\"ah+\">");
+ }
+ else {
+ System.out.println("unknown separator: "+s);
+ System.exit(1);
+ }
}
//same verse, another word, add space
if ((oldVerse == newVerse) && (oldWordNumber != newWordNumber)){
- A.writer.appendText(" ");
+ if (separator.equals("@")){
+ A.writer.appendText("</w> <w xml:lang=\"he\">");
+ }
+ else if (separator.equals("%")){
+ A.writer.appendText("</w> <w xml:lang=\"ah+\">");
+ }
+ else {
+ System.out.println("unknown separator: "+s);
+ System.exit(1);
+ }
}
- //System.out.println("Expression: " + word);
+ //special case: nonprinting article, leave out for now
+ // TODO: FIX
+ if (word.equals("_")){
+ continue;
+ }
+// System.out.println("s: " + s);
+
// Paragraph marker found
if (morph.compareTo("x") == 0){
if (word.compareTo("P") == 0){ //
- A.writer.appendText(" "+constructWord(word, lemma, morph)+"<p/>");
+ A.writer.appendText(" "+constructWord(word, lemma, homonym, morph)+"<p/>");
}
else if (word.compareTo("S") == 0){ //
- A.writer.appendText(" "+constructWord(word, lemma, morph)+" ");
+ A.writer.appendText(" "+constructWord(word, lemma, homonym, morph)+" ");
}
else if (word.compareTo("N") == 0){ //inverted nun
- A.writer.appendText(" "+constructWord(word, lemma, morph)+" ");
+ A.writer.appendText(" "+constructWord(word, lemma, homonym, morph)+" ");
}
else {System.out.println("Unknown paragraph marker: " + s); System.exit(1);}
}
+ //now the text itself
+ A.writer.appendText( constructWord(word, lemma, homonym, morph) );
+
//Note found
if (note != null && note.length() > 0){
- System.out.println("Note: "+s);
+ //System.out.println("Note: "+s);
A.writer.appendText("<note type=\"textual\" xml:lang=\"en\">"+Note.Notes.get(note)+"</note>" );
}
-
- //remember
- oldBookCode = newBookCode;
- oldChapter = newChapter;
- oldVerse = newVerse;
- oldWordNumber = newWordNumber;
- oldSubWordNumber = newSubWordNumber;
}
A.writer.closeTag("verse", 2);
@@ -147,8 +182,14 @@
}
//----------------------------------------------------------------------------
-public String constructWord(String word, String lemma, String morph){
- return "<seg type=\"x-morph\" lemma=\""+T.translate(lemma)+"\" morph=\""+morph+"\">"+T.translate(word)+"</seg>";
+public String constructWord(String word, String lemma, String homonym, String morph){
+ String result = "<seg type=\"x-morph\" lemma=\""+T.translate(lemma) + "\" ";
+ if (homonym != null) {
+ result += "homonym=\""+homonym + "\" ";
+ }
+ result += "morph=\""+morph+"\">";
+ result += T.translate(word)+"</seg>";
+ return result;
}
}
More information about the sword-cvs
mailing list