[sword-svn] r74 - in trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS: Parse Translate
mgruner at crosswire.org
mgruner at crosswire.org
Thu Jul 27 12:53:25 MST 2006
Author: mgruner
Date: 2006-07-27 12:53:15 -0700 (Thu, 27 Jul 2006)
New Revision: 74
Removed:
trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Words.java
Modified:
trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Markers.java
trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Parser.java
trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Translate/Translate.java
Log:
some work, still not functional
Modified: trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Markers.java
===================================================================
--- trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Markers.java 2006-07-11 20:02:44 UTC (rev 73)
+++ trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Markers.java 2006-07-27 19:53:15 UTC (rev 74)
@@ -27,44 +27,18 @@
// Samek
public void samek(){ //parasah setumah, closed paragraph == small space in line
- testMaqafWord() ;
-// A.wlc.writeMarker("samekh", 4) ;
A.writer.appendText(" " + H.samekh + " ") ;
- P.MarkerWritten = true ;
+// P.MarkerWritten = true ;
}
//------------------------------------------------------------------------------
// Pe
public void pe(){ // parasah petuhah, open paragraph == new line
- testMaqafWord() ;
-// A.writer.writeMarker("pe", 4) ;
A.writer.appendText(" " + H.pe + " " + "<p/>") ;
- P.MarkerWritten = true ;
+// P.MarkerWritten = true ;
}
//------------------------------------------------------------------------------
-// Line
-public void line(){
- System.out.println("Markers: End-of-line encountered!") ;
- }
-//----------------------------------------------------------------------------
-/**
- * Tests for a preceding trailing maqaf word and writes it.
- *
- * Before any marker is written, the TrailingMaqaf flag must be
- * tested. If a trailing maqaf word precedes the mark, it must be
- * written before the marker.
- *
- * Apparemtly ONLY EOLs cause this test to be activated.
- */
-void testMaqafWord(){
- if (P.w.TrailingMaqaf){
-// System.out.print("Markers: Marker follows trailing maqaf at ") ;
-// P.printPosition() ;
- P.w.writeWord(P.w.MaqafWord, P.w.MaqafWordType) ;
- P.w.TrailingMaqaf = false ;
- }
- }
}
//==============================================================================
//==============================================================================
Modified: trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Parser.java
===================================================================
--- trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Parser.java 2006-07-11 20:02:44 UTC (rev 73)
+++ trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Parser.java 2006-07-27 19:53:15 UTC (rev 74)
@@ -3,50 +3,22 @@
import java.io.*;
import WLC2OSIS.* ;
import WLC2OSIS.Translate.* ;
-//==============================================================================
-/**
- * <b>Parser dispatches tokens to Books, Chapters, Markers, Tanach,
- * Verses, and Words start/end methods, special to WLC. </b>
- *
- * Extensively modified for WLC.
- */
-//==============================================================================
+
public class Parser{
WLC2OSIS A ;
-
-// Working classes
-
-public Words w ;
Translate T ;
-public WKQ wkq ;
-// Current state
+public final String MorphologicalSegmentStart = "<seg type=\"morph\">" ;
+public final String MorphologicalSegmentEnd = "</seg>" ;
+public final String MorphologicalDivisionMarker = MorphologicalSegmentEnd + MorphologicalSegmentStart;
-public boolean MarkerWritten ; // Indicates a marker has been written
- // between two words.
-
-// Assorted counts
-public int ChapterVerseCount ;
-public int BookVerseCount ;
-public int BookChapterCount ;
-
-public String MorphologicalSegmentStart = "<seg type=\"morph\">" ;
-public String MorphologicalSegmentEnd = "</seg>" ;
-public String MorphologicalDivisionMarker = MorphologicalSegmentEnd + MorphologicalSegmentStart;
-
-
-BufferedReader file;
-
-
//-----------------------------------------------------------------------------
public Parser(WLC2OSIS A, boolean wlc_only) {
this.A = A ;
T = new Translate(A, this) ;
- w = new Words(A, this) ;
- wkq = new WKQ(this) ;
new MC() ;
Note.setNotes();
}
@@ -55,21 +27,29 @@
public void parse(){
String s ;
System.out.println("\n");
-
+
+ BufferedReader file;
+
try{
file = new BufferedReader( new FileReader( A.InputFilename ));
}
catch (IOException e) {
+ file = null;
System.out.println("File not found: " + e) ;
}
BookName[] bookNames = BookName.setBookNames();
String oldBookCode = "";
+ String newBookCode = "";
int oldChapter = 0;
+ int newChapter = 0;
int oldVerse = 0;
+ int newVerse = 0;
int oldWordNumber = 0;
+ int newWordNumber = 0;
int oldSubWordNumber = 0;
+ int newSubWordNumber = 0;
java.util.regex.Pattern pattern = java.util.regex.Pattern.compile("(\\w\\w)(\\d+):(\\d+),(\\d+)\\.(\\d+)(\\]\\S)?\\s(\\S+)\\s(\\S+)(@|%)(\\S+)");
@@ -103,18 +83,19 @@
System.exit(1);
}
-// Parse the identifier
- String newBookCode = match.group(1);
- int newChapter = Integer.parseInt( match.group(2) );
- int newVerse = Integer.parseInt( match.group(3) );
- int newWordNumber = Integer.parseInt( match.group(4) );
- int newSubWordNumber = Integer.parseInt( match.group(5) );
+ // Parse the identifier
+ newBookCode = match.group(1);
+ newChapter = Integer.parseInt( match.group(2) );
+ newVerse = Integer.parseInt( match.group(3) );
+ newWordNumber = Integer.parseInt( match.group(4) );
+ newSubWordNumber = Integer.parseInt( match.group(5) );
String note = match.group(6);
- String expression = match.group(7);
+ String word = match.group(7);
String lemma = match.group(8);
String separator = match.group(9);
- String grammar = match.group(10);
+ String morph = match.group(10);
+ // Verse changed, close old and open new
if ((newBookCode != oldBookCode) || (newChapter != oldChapter) || (newVerse != oldVerse))
{
if (oldVerse >= 1) A.writer.closeTag("verse", 2);
@@ -130,16 +111,26 @@
A.writer.appendText(" ");
}
- System.out.println("Expression: " + expression);
+ //System.out.println("Expression: " + word);
- w.process(expression);
-
- oldBookCode = newBookCode;
+ // Paragraph marker found
+ if (morph == "x"){
+ System.out.println("paragraph marker found!");
+ if (word == "P"){
+ A.writer.appendText(" "+constructWord(word, lemma, morph)+"<p/>");
+ }
+ else if (word == "S"){
+ A.writer.appendText(" "+constructWord(word, lemma, morph)+" ");
+ }
+ else {System.out.println("Unknown marker."); System.exit(1);}
+ }
+
+ //remember
+ oldBookCode = newBookCode;
oldChapter = newChapter;
oldVerse = newVerse;
oldWordNumber = newWordNumber;
oldSubWordNumber = newSubWordNumber;
-
}
A.writer.closeTag("verse", 2);
@@ -148,18 +139,8 @@
}
//----------------------------------------------------------------------------
-// Counts the number of occurences of a character in a String.
+public String constructWord(String word, String lemma, String morph){
+ return "<seg type=\"x-morph\" lemma=\""+lemma+"\" morph=\""+morph+"\">"+word+"</seg>";
+}
-public int countChar(String W, char c){
- int Count = 0 ;
- for(int k=0; k < W.length(); k++){
- if(W.charAt(k)==c){
- Count++ ;
- }
- }
- return Count ;
- }
-//----------------------------------------------------------------------------
-
-
}
Deleted: trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Words.java
===================================================================
--- trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Words.java 2006-07-11 20:02:44 UTC (rev 73)
+++ trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Words.java 2006-07-27 19:53:15 UTC (rev 74)
@@ -1,127 +0,0 @@
-package WLC2OSIS.Parse ;
-
-import WLC2OSIS.* ;
-import WLC2OSIS.Translate.H ;
-//==============================================================================
-/**
- * <b>Processes words, sending them to the Translate class
- * after their word, qere, ketiv properties have been determined.</b>
- */
-//==============================================================================
-public class Words{
-
-WLC2OSIS A ;
-Parser P ;
-
-boolean TrailingMaqaf ;
-String MaqafWord ;
-String MaqafWordType ;
-Markers m ;
-
-
-//-----------------------------------------------------------------------------
-
-public Words(WLC2OSIS A, Parser P ) {
- this.A = A ;
- this.P = P ;
- m = new Markers(A, P) ;
-
- }
-//------------------------------------------------------------------------------
-
-// Processes a word,
-public void process(String W){
- if( W.compareTo("P")==0){
- m.pe();
- }
- else if( W.compareTo("S")==0){
- m.samek() ;
- }
- else{
- P.wkq.process(W);
- }
-}
-
-
-// Translates and writes a word (simple, ketib, qere) with exception markers.
-// All returns leave P.MarkerWritten = false ;
-public void write(String W, String Type) {
-
- String Word = P.T.translate(W) ;
-
-// Check for any KQ markers which should NOT be here!
-
- int asteriskcount = P.countChar(W, '*') ;
-
- if (asteriskcount > 0){
- System.out.print("Words.write: Unexpected KQ character * ") ;
- System.out.println("Word: " + W) ;
- }
-
-// Look for a case in which there's been trailing maqaf
-// without an intervening marker.
-
- if(TrailingMaqaf & !P.MarkerWritten ){
- if(MaqafWordType.charAt(0)== Type.charAt(0) ){
- Word = MaqafWord+Word ; // Combine them.
- }
- else{
- writeWord(MaqafWord, MaqafWordType) ;
- TrailingMaqaf = false ;
- }
- }
-
-// Check for a trailing maqaf.
-// Don't write the word here.
-
- TrailingMaqaf = false ;
- if( Word.charAt(Word.length()-1) == H.maqaf){
- TrailingMaqaf = true ;
- MaqafWord = Word ;
- MaqafWordType = Type ;
- P.MarkerWritten = false ;
- return ;
- }
-
- writeWord(Word, Type) ;
- }
-//----------------------------------------------------------------------------------
-
-public void writeWord(String Word, String Type) {
-
-// Check for any exception markers ]x
-
- String Out = "" ;
- for (int k=0 ; k < Word.length() ; k++){
- char c = Word.charAt(k) ;
- if(c == ']'){
- k++ ;
- char ExceptionValue = Word.charAt(k) ;
- Out = Out + "<x>" + ExceptionValue +"</x>" ;
- System.out.println("Exception occured");
- }
- else{
- Out = Out + c ;
- }
- }
-
- if (Type.charAt(0) == 'w') {
- A.writer.appendText(P.MorphologicalSegmentStart + Out + P.MorphologicalSegmentEnd) ;
- }
- else if (Type.charAt(0) == 'k') {
- A.writer.appendText("[" + P.MorphologicalSegmentStart + Out + P.MorphologicalSegmentEnd + " " + H.kaf + "]") ;
- }
- else if (Type.charAt(0) == 'q') {
- A.writer.appendText("("+P.MorphologicalSegmentStart + Out + P.MorphologicalSegmentEnd + " " + H.qof+ ")") ;
- }
- else {
- System.out.println("Warning: unknown word type!");
-// P.printPosition();
- System.exit(0);
- }
- P.MarkerWritten = false ;
- }
-}
-
-//==============================================================================
-//==============================================================================
Modified: trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Translate/Translate.java
===================================================================
--- trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Translate/Translate.java 2006-07-11 20:02:44 UTC (rev 73)
+++ trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Translate/Translate.java 2006-07-27 19:53:15 UTC (rev 74)
@@ -30,7 +30,7 @@
//------------------------------------------------------------------------------
// Translates an MC word (not qere or ketib) to a Unicode String.
-// Notes are included as <note type="textual">text of note</x>.
+// Notes are included as <note type="textual">text of note</note>.
public String translate(String W){
len = W.length() ;
More information about the sword-cvs
mailing list