[sword-svn] r79 - in trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS: Parse Translate
mgruner at www.crosswire.org
mgruner at www.crosswire.org
Fri Sep 15 09:40:26 MST 2006
Author: mgruner
Date: 2006-09-15 09:40:20 -0700 (Fri, 15 Sep 2006)
New Revision: 79
Modified:
trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Parser.java
trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Translate/Translate.java
Log:
about ready
will soon send Kirk a demo OSIS file
Modified: trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Parser.java
===================================================================
--- trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Parser.java 2006-09-14 19:58:01 UTC (rev 78)
+++ trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Parser.java 2006-09-15 16:40:20 UTC (rev 79)
@@ -49,15 +49,14 @@
int oldWordNumber = 0;
int newWordNumber = 0;
-// book chap vs word# subword# note ketivquere word homonym lang lemma
- java.util.regex.Pattern pattern = java.util.regex.Pattern.compile("(\\w\\w)(\\d+):(\\d+),(\\d+)\\.(\\d+)(\\]\\S)?\\s([*]+)?([^* ]+)\\s([^_]+)(_\\d+)?(@|%)(\\S+)");
+// book chap vs word# subword# note word lemma homonym lang morph
+ java.util.regex.Pattern pattern = java.util.regex.Pattern.compile("(\\w\\w)(\\d+):(\\d+),(\\d+)\\.(\\d+)(\\]\\S)?\\s(\\S+)\\s([^_]+)(_\\d+)?(@|%)(\\S+)");
while ( true ){
s="";
try{
- s= file.readLine();
- system.out.println(s);
- continue;
+ s = file.readLine();
+ System.out.println(s);
}
catch (IOException e) {
System.out.println("Read error: " + e) ;
@@ -96,18 +95,15 @@
newWordNumber = Integer.parseInt( match.group(4) );
//newSubWordNumber = Integer.parseInt( match.group(5) ); not used
String note = match.group(6);
- String ketivqere = match.group(7);
- String word = match.group(8);
- String lemma = match.group(9);
- String homonym = match.group(10);
+ String word = match.group(7);
+ String lemma = match.group(8);
+ String homonym = match.group(9);
if (homonym != null){
homonym = homonym.substring(1); //"_1" to "1"
}
- String separator = match.group(11);
- String morph = match.group(12);
+ String separator = match.group(10);
+ String morph = match.group(11);
- System.out.println(s);
-
// Verse changed, close old and open new
if ((!newBookCode.equals(oldBookCode)) || (newChapter != oldChapter) || (newVerse != oldVerse))
{
@@ -148,28 +144,37 @@
//special case: nonprinting article, leave out for now
// TODO: FIX
- if (word.equals("_")){
+ if (word.equals("_") || word.equals("*_") || word.equals("**_")){
continue;
}
-
-// System.out.println("s: " + s);
-
+ //Qere / Ketiv only
+ else if (word.equals("**qq")){
+ A.writer.appendText("**<note type=\"textual\" xml:lang=\"en\">Ketiv without Quere.</note>");
+ continue;
+ }
+ else if (word.equals("*kk")){
+ A.writer.appendText("*<note type=\"textual\" xml:lang=\"en\">Qere without Ketiv.</note>");
+ continue;
+ }
+
// Paragraph marker found
if (morph.compareTo("x") == 0){
if (word.compareTo("P") == 0){ //
- //A.writer.appendText(" "+constructWord(word, lemma, homonym, morph)+"<p/>");
+ A.writer.appendText(" "+constructWord(word, lemma, homonym, morph)+"<p/>");
}
else if (word.compareTo("S") == 0){ //
- //A.writer.appendText(" "+constructWord(word, lemma, homonym, morph)+" ");
+ A.writer.appendText(" "+constructWord(word, lemma, homonym, morph)+" ");
}
else if (word.compareTo("N") == 0){ //inverted nun
- //A.writer.appendText(" "+constructWord(word, lemma, homonym, morph)+" ");
+ A.writer.appendText(" "+constructWord(word, lemma, homonym, morph)+" ");
}
else {System.out.println("Unknown paragraph marker: " + s); System.exit(1);}
}
+
+
//now the text itself
- //A.writer.appendText( constructWord(word, lemma, homonym, morph) );
+ A.writer.appendText( constructWord(word, lemma, homonym, morph) );
//Note found
if (note != null && note.length() > 0){
@@ -185,12 +190,12 @@
//----------------------------------------------------------------------------
public String constructWord(String word, String lemma, String homonym, String morph){
- String result = "<seg type=\"x-morph\" lemma=\""+T.translate(lemma) + "\" ";
+ String result = "<seg type=\"x-morph\" lemma=\""+T.convertCompoundWord(lemma) + "\" ";
if (homonym != null) {
result += "homonym=\""+homonym + "\" ";
}
result += "morph=\""+morph+"\">";
- result += T.translate(word)+"</seg>";
+ result += T.convertCompoundWord(word)+"</seg>";
return result;
}
Modified: trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Translate/Translate.java
===================================================================
--- trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Translate/Translate.java 2006-09-14 19:58:01 UTC (rev 78)
+++ trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Translate/Translate.java 2006-09-15 16:40:20 UTC (rev 79)
@@ -12,15 +12,16 @@
//==============================================================================
public class Translate{
-Parser P ;
-WLC2OSIS A ;
+private
+ Parser P ;
+ WLC2OSIS A ;
-MCO M ;
-MCO Mark ;
-Vector MCOs, OrderedMCOs ;
-int Type, I, k1, len ;
-int[] ConsonantPositions = new int[100] ;
-char c, c1 ;
+ MCO M;
+ MCO Mark ;
+ char c, c1 ;
+ int Type, I, k1, len ;
+ Vector MCOs, OrderedMCOs ;
+ int[] ConsonantPositions = new int[100] ;
//-----------------------------------------------------------------------------
public Translate(WLC2OSIS A, Parser P) {
@@ -29,11 +30,34 @@
}
//------------------------------------------------------------------------------
+public String convertCompoundWord(String W){
+ if (W.contains("~")){ //compound word without maqqef
+ String[] tmp = W.split("~");
+ return convertWord(tmp[0]) + " " + convertWord(tmp[1]);
+ }
+ else if (W.contains("-") && !W.endsWith("-")){ //compound word with maqqef
+ String[] tmp = W.split("-");
+ return convertWord(tmp[0]) + convertWord("-") + convertWord(tmp[1]);
+ }
+ else{
+ return convertWord(W);
+ }
+}
+
+public String convertWord(String W){
+ if (W.startsWith("**"))
+ return "**<note type=\"textual\" xml:lang=\"en\">Quere.</note>"+convertChars(W.substring(2));
+ else if (W.startsWith("*"))
+ return "*<note type=\"textual\" xml:lang=\"en\">Ketiv.</note>"+convertChars(W.substring(1));
+ else return convertChars(W);
+}
+
// Translates an MC word (not qere or ketib) to a Unicode String.
// Notes are included as <note type="textual">text of note</note>.
-public String translate(String W){
- len = W.length() ;
+public String convertChars(String W){
+
+ len = W.length() ;
// Convert characters in String to MCO objects, expanding
// ConsonantMarks, Numbers, and Notes as necessary.
@@ -41,9 +65,8 @@
MCOs = new Vector() ;
for (int k = 0; k < len; k++) {
- c = W.charAt(k) ;
+ c = W.charAt(k) ;
-
M = (MCO) (MC.getMCO(c)).clone() ;
Type = M.Type ;
@@ -53,6 +76,7 @@
+ "\nWord: " + W
+ "\nCharacter: " + c
+ "\n " ) ;
+ System.exit(1);
}
else if(Type <= 5){ // These types need no expansion.
MCOs.add(M) ;
More information about the sword-cvs
mailing list