[sword-svn] r72 - in trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS: . Parse Translate
mgruner at crosswire.org
mgruner at crosswire.org
Mon Jul 10 12:20:17 MST 2006
Author: mgruner
Date: 2006-07-10 12:20:08 -0700 (Mon, 10 Jul 2006)
New Revision: 72
Modified:
trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Parser.java
trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Words.java
trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Translate/Translate.java
trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/WLC2OSIS.java
Log:
Modified: trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Parser.java
===================================================================
--- trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Parser.java 2006-07-10 16:56:50 UTC (rev 71)
+++ trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Parser.java 2006-07-10 19:20:08 UTC (rev 72)
@@ -19,7 +19,6 @@
public Words w ;
Translate T ;
-Markers m ;
public WKQ wkq ;
// Current state
@@ -32,8 +31,11 @@
public int BookVerseCount ;
public int BookChapterCount ;
-int WordNumber ;
+public String MorphologicalSegmentStart = "<seg type=\"morph\">" ;
+public String MorphologicalSegmentEnd = "</seg>" ;
+public String MorphologicalDivisionMarker = MorphologicalSegmentEnd + MorphologicalSegmentStart;
+
BufferedReader file;
@@ -44,17 +46,12 @@
T = new Translate(A, this) ;
w = new Words(A, this) ;
- m = new Markers(A, this) ;
wkq = new WKQ(this) ;
new MC() ;
Note.setNotes();
}
-//------------------------------------------------------------------------------
-// Gets the next token as a String.
-// EOF is indicated by a return of EOF.
-
public void parse(){
String s ;
System.out.println("\n");
@@ -75,7 +72,7 @@
int oldSubWordNumber = 0;
- java.util.regex.Pattern pattern = java.util.regex.Pattern.compile("(\\w\\w)(\\d+):(\\d+),(\\d+)\\.(\\d+)(\\]\\S)?\\s(\\S+)\\s(\\S+)(?:@|%)(\\S+)");
+ java.util.regex.Pattern pattern = java.util.regex.Pattern.compile("(\\w\\w)(\\d+):(\\d+),(\\d+)\\.(\\d+)(\\]\\S)?\\s(\\S+)\\s(\\S+)(@|%)(\\S+)");
while ( true ){
s="";
@@ -115,7 +112,8 @@
String note = match.group(6);
String expression = match.group(7);
String lemma = match.group(8);
- String grammar = match.group(9);
+ String separator = match.group(9);
+ String grammar = match.group(10);
if ((newBookCode != oldBookCode) || (newChapter != oldChapter) || (newVerse != oldVerse))
{
@@ -125,10 +123,12 @@
BookName.getBookName(bookNames, newBookCode).abbrev+"."+
newChapter+"."+
newVerse+"\"", 2);
- oldBookCode = newBookCode;
- oldChapter = newChapter;
- oldVerse = newVerse;
}
+
+ //same verse, another word, add space
+ if ((oldVerse == newVerse) && (oldWordNumber != newWordNumber)){
+ A.writer.appendText(" ");
+ }
// Process a word.
@@ -143,19 +143,22 @@
else if( s.compareTo("S")==0){
m.samek() ;
}
- else if( s.compareTo("?")==0){
- m.line() ;
- }
else{ // It's a word
w.process(s) ;
}
}*/
- }
+ oldBookCode = newBookCode;
+ oldChapter = newChapter;
+ oldVerse = newVerse;
+ oldWordNumber = newWordNumber;
+ oldSubWordNumber = newSubWordNumber;
+
+ }
A.writer.closeTag("verse", 2);
return ;
- }
+}
//----------------------------------------------------------------------------
// Counts the number of occurences of a character in a String.
Modified: trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Words.java
===================================================================
--- trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Words.java 2006-07-10 16:56:50 UTC (rev 71)
+++ trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Words.java 2006-07-10 19:20:08 UTC (rev 72)
@@ -16,19 +16,31 @@
boolean TrailingMaqaf ;
String MaqafWord ;
String MaqafWordType ;
+Markers m ;
+
//-----------------------------------------------------------------------------
public Words(WLC2OSIS A, Parser P ) {
this.A = A ;
this.P = P ;
+ m = new Markers(A, P) ;
+
}
//------------------------------------------------------------------------------
// Processes a word,
public void process(String W){
- P.wkq.process(W) ;
- }
+ if( W.compareTo("P")==0){
+ m.pe();
+ }
+ else if( W.compareTo("S")==0){
+ m.samek() ;
+ }
+ else{
+ P.wkq.process(W);
+ }
+}
// Translates and writes a word (simple, ketib, qere) with exception markers.
@@ -43,7 +55,6 @@
if (asteriskcount > 0){
System.out.print("Words.write: Unexpected KQ character * ") ;
-// P.printPosition() ;
System.out.println("Word: " + W) ;
}
@@ -55,9 +66,6 @@
Word = MaqafWord+Word ; // Combine them.
}
else{
-// System.out.print("Words: Mismatched types for combining "
-// + MaqafWordType + ", " + Type + " at " ) ;
-// P.printPosition() ;
writeWord(MaqafWord, MaqafWordType) ;
TrailingMaqaf = false ;
}
@@ -98,17 +106,18 @@
}
if (Type.charAt(0) == 'w') {
- A.writer.appendText(A.MorphologicalSegmentStart + Out + A.MorphologicalSegmentEnd + " ") ;
+ A.writer.appendText(P.MorphologicalSegmentStart + Out + P.MorphologicalSegmentEnd) ;
}
else if (Type.charAt(0) == 'k') {
- A.writer.appendText("[" + A.MorphologicalSegmentStart + Out + A.MorphologicalSegmentEnd + " " + H.kaf + "] ") ;
+ A.writer.appendText("[" + P.MorphologicalSegmentStart + Out + P.MorphologicalSegmentEnd + " " + H.kaf + "]") ;
}
else if (Type.charAt(0) == 'q') {
- A.writer.appendText("("+A.MorphologicalSegmentStart + Out + A.MorphologicalSegmentEnd + " " + H.qof+ ") ") ;
+ A.writer.appendText("("+P.MorphologicalSegmentStart + Out + P.MorphologicalSegmentEnd + " " + H.qof+ ")") ;
}
else {
System.out.println("Warning: unknown word type!");
// P.printPosition();
+ System.exit(0);
}
P.MarkerWritten = false ;
}
Modified: trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Translate/Translate.java
===================================================================
--- trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Translate/Translate.java 2006-07-10 16:56:50 UTC (rev 71)
+++ trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Translate/Translate.java 2006-07-10 19:20:08 UTC (rev 72)
@@ -221,11 +221,11 @@
}
//Mark morph segments when a maqef is present
else if ( (M.Name).compareTo("maqef") == 0 ){
- S = S + A.MorphologicalSegmentEnd + M.Value + A.MorphologicalSegmentStart;
+ S = S + P.MorphologicalSegmentEnd + M.Value + P.MorphologicalSegmentStart;
}
else if ((Type == MCO.MorphologicalDivision)){
- S = S + A.MorphologicalDivisionMarker ;
+ S = S + P.MorphologicalDivisionMarker ;
}
else{
S = S + M.Value ;
Modified: trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/WLC2OSIS.java
===================================================================
--- trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/WLC2OSIS.java 2006-07-10 16:56:50 UTC (rev 71)
+++ trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/WLC2OSIS.java 2006-07-10 19:20:08 UTC (rev 72)
@@ -10,13 +10,8 @@
*/
//=================================================================================================
-public class WLC2OSIS /*implements Stoppable*/ {
+public class WLC2OSIS{
-// Definitions of input and output to be set by user.
-
-// public String ProgramDate = "30 May 2004" ;
-public String InputFilename;
-public String OutputDirectory ;
// Define the title and descriptions.
public String Title = "The Westminster Leningrad Codex (WLC)" ;
@@ -56,15 +51,13 @@
"The book names in English and Hebrew of the Jewish Publication Society "
+ "(JPS) Tanach have been added."} ;
-public String MorphologicalSegmentStart = "<seg type=\"morph\">" ;
-public String MorphologicalSegmentEnd = "</seg>" ;
-public String MorphologicalDivisionMarker = MorphologicalSegmentEnd + MorphologicalSegmentStart;
-//-----------------------------------------------------------------------------
-
public Parser p ;
public XMLWriter writer;
+public String InputFilename;
+public String OutputDirectory ;
+
public WLC2OSIS( String file, String directory ){
InputFilename = file ;
@@ -101,15 +94,7 @@
writer.closeTag("osisText", 0);
writer.close();
- done() ;
- }
-
-//------------------------------------------------------------------------------
-/**
- * Universal exit.
- */
-public void done(){
- System.out.println("\nwriter2OSIS: Normal end.") ;
- System.exit(0) ;
- }
+ System.out.println("\nWLC2OSIS: Normal end.") ;
}
+
+}
\ No newline at end of file
More information about the sword-cvs
mailing list