[sword-svn] r68 - in trunk/modules/hebrew-wlc/WLC2OSIS: . WLC2OSIS WLC2OSIS/Parse WLC2OSIS/Translate WLC2OSIS/Utilities
mgruner at crosswire.org
mgruner at crosswire.org
Fri Jul 7 09:50:42 MST 2006
Author: mgruner
Date: 2006-07-07 09:50:30 -0700 (Fri, 07 Jul 2006)
New Revision: 68
Removed:
trunk/modules/hebrew-wlc/WLC2OSIS/Utilities/
trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Translate/Header.java
trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Utilities/FileRead.java
Modified:
trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Parser.java
trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Tokenizer.java
trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/WLC2OSIS.java
Log:
update; unusable atm
Modified: trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Parser.java
===================================================================
--- trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Parser.java 2006-07-06 20:03:39 UTC (rev 67)
+++ trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Parser.java 2006-07-07 16:50:30 UTC (rev 68)
@@ -1,5 +1,6 @@
package WLC2OSIS.Parse ;
+import java.io.*;
import WLC2OSIS.* ;
import WLC2OSIS.Translate.* ;
import WLC2OSIS.Utilities.* ;
@@ -56,6 +57,8 @@
int VerseNumber ;
int WordNumber ;
+BufferedReader file;
+
//-----------------------------------------------------------------------------
public Parser(WLC2OSIS A) {
this.A = A ;
@@ -86,37 +89,52 @@
boolean PreviousEOL = true ;
System.out.println("\n") ;
- A.w = new XMLWriter(A.OutputDirectory, "WLC_OSIS") ;
-
// Write the header
- Header.writeHeader(A, A.w) ;
+ Header.writeHeader(A, A.wlc) ;
+ Header.writeHeader(A, A.morph) ;
+
+ try{
+ file = new BufferedReader( new FileReader( A.InputFilename ));
+ }
+ catch (IOException e) {
+ System.out.println("File not found: " + e) ;
+ }
- for (int k = 0; k < A.InputChars.length ; k++){
+ while ( true ){
+ s="";
+ try{
+ s= file.readLine();
+ }
+ catch (IOException e) {
+ System.out.println("Read error: " + e) ;
+ break;
+ }
- s = t.nextToken() ;
- System.out.println("processing: " + s);
-
- if(s.compareTo(t.EOF) == 0){
- break ;
- }
-
-//-----------------------------------------------------------------------------
+// System.out.println("processing: " + s);
-// Process a line identifier
-
- if (PreviousEOL){
- int ColonIndex = s.indexOf(':') ;
- if(ColonIndex <=0 ){
- System.out.println("Parser: Incorrect line identifier: " + s + " !") ;
- break ;
- }
- PreviousEOL= false ;
+ if ( s.startsWith(">") ){ //ignore this line
+ continue;
+ }
+
+ java.util.regex.Pattern p = java.util.regex.Pattern.compile("(\\w\\w)(\\d+):(\\d+),(\\d+)\\.(\\d+)\\S*\\s(\\S+)\\s(\\S+)(?:@|%)(\\S+)");
+ java.util.regex.Matcher m = p.matcher( s );
+ if (!m.matches()){
+ System.out.println("No match!");
+ System.exit(1);
+ }
+
+// Parse the identifier
+ String BookCode = m.group(1);
+ int Chapter = Integer.parseInt( m.group(2) );
+ int Verse = Integer.parseInt( m.group(3) );
+ int wordNumber = Integer.parseInt( m.group(4) );
+ int subWordNumber = Integer.parseInt( m.group(5) );
+ String expression = m.group(6);
+ String lemma = m.group(7);
+ String grammar = m.group(8);
-// Parse the identifier
- String BookCode = s.substring(0,2) ;
- int Chapter = Integer.parseInt( s.substring(2, ColonIndex) ) ;
- int Verse = Integer.parseInt( s.substring(ColonIndex+1) ) ;
-
+ System.out.println(BookCode + " " + Chapter + " " + Verse + " " + wordNumber + " " + subWordNumber + " " +expression+" "+lemma+" "+grammar);
+/*
// Change in Book, start a book.
if(BookCode.compareTo(LastBookCode) != 0){
v.end() ;
@@ -145,18 +163,12 @@
v.end() ;
v.start() ;
LastVerse = Verse ;
- }
- }
+ }*/
//-----------------------------------------------------------------------------
// Process a word.
- else{
- if(s.compareTo(t.EOL) == 0){
- PreviousEOL = true ;
- }
- else{
- if (s.length() > 1){
+/* if (s.length() > 1){
w.process(s) ;
}
else{
@@ -173,11 +185,8 @@
else{ // It's a word
w.process(s) ;
}
- }
- }
- }
-
- }
+ }*/
+ }
v.end() ;
c.end() ;
b.end() ;
Modified: trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Tokenizer.java
===================================================================
--- trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Tokenizer.java 2006-07-06 20:03:39 UTC (rev 67)
+++ trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Tokenizer.java 2006-07-07 16:50:30 UTC (rev 68)
@@ -17,7 +17,7 @@
public final String EOF = "***EOF***" ;
public final String EOL = "***EOL***" ;
WLC2OSIS A ;
-CharArrayReader car ;
+//CharArrayReader car ;
StreamTokenizer st ;
//-----------------------------------------------------------------------------
@@ -27,8 +27,13 @@
// Set up the tokenizer
- car = new CharArrayReader(A.InputChars) ;
- st = new StreamTokenizer( car) ;
+// car = new CharArrayReader(A.InputChars) ;
+ try{
+ st = new StreamTokenizer( new FileInputStream( A.InputFilename ) ) ;
+ }
+ catch (IOException e) {
+ System.exit(0);
+ }
st.resetSyntax() ;
st.wordChars(33, 126 ) ; // All printables are word characters
st.ordinaryChar(63) ; // ? is a special symbol, the EOL marker.
Deleted: trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Translate/Header.java
===================================================================
--- trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Translate/Header.java 2006-07-06 20:03:39 UTC (rev 67)
+++ trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Translate/Header.java 2006-07-07 16:50:30 UTC (rev 68)
@@ -1,69 +0,0 @@
-package WLC2OSIS.Translate ;
-
-import WLC2OSIS.* ;
-import WLC2OSIS.Translate.* ;
-import WLC2OSIS.Utilities.* ;
-
-// import java.util.Date ;
-// import java.text.SimpleDateFormat ;
-//==============================================================================
-/**
- * <b>Header information for Tanach.</b><p>
- */
-//==============================================================================
-public class Header{
-
-// static SimpleDateFormat DateFormat = new SimpleDateFormat("dd MMM yyyy") ;
-// static String DateTime ;
-
-public Header(){
- }
-//-----------------------------------------------------------------------------
-
-// Writes the Notes to the XML file.
-
-public static void writeHeader(WLC2OSIS A, XMLWriter w) {
-// Date DT = new Date() ;
-// DateTime = DateFormat.format(DT) ;
- A.w.openTag("osisText osisIDWork=\"WLC\" osisRefWork=\"bible\" xml:lang=\"he\"", 0) ;
- A.w.openTag("header", 0) ;
-
- A.w.openTag("work osisWork=\"WLC\"", 1) ;
-
- A.w.writeString("title", 2, "Westminster Leningrad Codex");
- A.w.writeAttributedString("contributor", 2, "role=\"encoder\"", "Martin Gruner");
- A.w.writeAttributedString("type", 2, "type=\"OSIS\"", "Bible");
- A.w.writeAttributedString("identifier", 2, "type=\"OSIS\"", "Bible.he.WLC.2004");
- A.w.writeAttributedString("rights", 2, "type=\"x-copyright\"",
- "The WLC is maintained by the Westminster Hebrew Institute, Philadelphia, PA (http://whi.wts.edu/WHI)");
- A.w.writeString("scope", 2, "Hebrew Bible, Old Testament");
- A.w.writeString("refSystem", 2, "MT");
-
- A.w.closeTag("work", 1);
-
- A.w.closeTag("header", 0);
-
-// A.w.writeString("hebrewname", 1, H.Tnk) ;
-// A.w.writeString("title", 1, A.Title) ;
-// A.w.writeString("shortdescription", 1, A.ShortDescription) ;
-// for (int i =0; i < A.Description.length; i++){
-// A.w.writeString("description", 1, A.Description[i]) ;
-// }
-// //A.w.writeString("date", 1, A.Date) ;
-// A.w.writeString("transcriptiondate", 1, DateTime) ;
-// A.w.writeString("copyright", 1, "\u00A9 C. V. Kimball 2004") ;
-// A.w.writeString("filename", 1, A.InputFilename) ;
-//
-// A.w.closeTag("header", 0) ;
- }
-
-//==============================================================================
-
-public static void writeFooter(WLC2OSIS A, XMLWriter w) {
-
- A.w.closeTag("osisText", 0);
-
- }
-//-----------------------------------------------------------------------------
-//-----------------------------------------------------------------------------
-}//==============================================================================
Deleted: trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Utilities/FileRead.java
===================================================================
--- trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Utilities/FileRead.java 2006-07-06 20:03:39 UTC (rev 67)
+++ trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Utilities/FileRead.java 2006-07-07 16:50:30 UTC (rev 68)
@@ -1,106 +0,0 @@
-package WLC2OSIS.Utilities ;
-
-import WLC2OSIS.* ;
-import Utilities.FileChooser ;
-//import Utilities.Message ;
-
-import java.io.* ;
-import javax.swing.* ;
-import java.awt.* ;
-//==============================================================================
-/**
- * <b>Reads the input file.</b>
- */
-//==============================================================================
-public class FileRead{
-
-
-WLC2OSIS A ;
-int InputLength ;
-public byte[] InputBuffer ;
-File F ;
-FileInputStream FIS ;
-boolean Error ;
-//-----------------------------------------------------------------------------
-
-public FileRead(WLC2OSIS A ) {
- this.A = A ;
- InputBuffer = new byte[A.InputBufferSize] ;
- Error = false ;
- }
-//------------------------------------------------------------------------------
-
-// Reads the specified file, forming the char[] InputChars and StringBuffer Input.
-
-public void read(String Filename){
- Error = true ;
-
-// Open the file
-
- F = new File(Filename) ;
-
- try{
- FIS = new FileInputStream(F);
- if (FIS.available() > InputBuffer.length){
- System.out.println(
- "The input file length, " + FIS.available()
- + " bytes,\nis too long for the internal buffer of "
- + InputBuffer.length + " bytes.") ;
- return ;
- }
- }
- catch(IOException e){
- System.out.println(
- "FileRead.read: Error in opening FileInputStream.\n\n"
- + F.getPath() + "\n\n"
- + e.toString() + "\nNo further action taken.") ;
- return ;
- }
-//------------------------------------------------------------------------------
-
-// Read the file
-
- try{
- InputLength = FIS.read(InputBuffer) ;
- }
- catch(IOException e){
- System.out.println(
- "FileRead.read: Error on read of input file.\n\n"
- + F.getPath() + "\n\n"
- + e.toString() + "\nNo further action taken.") ;
- return;
- }
-
-// Close the file.
-
- try{
- FIS.close() ;
- }
- catch(IOException e){
- System.out.println(
- "FileRead.read: Error on close of input file.\n\n"
- + F.getPath() + "\n\n"
- + e.toString() + "\nNo further action taken.") ;
- }
-
-// Convert bytes to char[] array.
-
- A.InputChars = new char[InputLength] ;
- for (int k = 0; k < InputLength; k++){
- short shrt = (short) InputBuffer[k] ;
- A.InputChars[k] = (char) shrt ;
- }
-
- Error = false ;
- }
-//------------------------------------------------------------------------------
-
-// Gets the error condition.
-
-public boolean getError(){
- return Error ;
- }
-//-----------------------------------------------------------------------------
-}
-//==============================================================================
-//==============================================================================
Modified: trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/WLC2OSIS.java
===================================================================
--- trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/WLC2OSIS.java 2006-07-06 20:03:39 UTC (rev 67)
+++ trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/WLC2OSIS.java 2006-07-07 16:50:30 UTC (rev 68)
@@ -70,10 +70,12 @@
//-----------------------------------------------------------------------------
-public final int InputBufferSize = 40000000 ; // Length of input in bytes.
+//public final int InputBufferSize = 40000000 ; // Length of input in bytes.
public char[] InputChars ; // Input char[] array set by FileRead.
public Parser p ;
-public XMLWriter w ;
+public XMLWriter wlc;
+public XMLWriter morph;
+
public Fmt F = new Fmt() ;
// public boolean StandAlone = true ; // If used by another app,
@@ -82,21 +84,43 @@
InputFilename = file ;
OutputDirectory = directory ;
-// Identifying printouts
-
F.bar("=", 80) ;
System.out.println("\nWLC2OSIS: " + Title + " " + ShortDescription ) ;
System.out.println("\nInput file: " + InputFilename ) ;
-// System.out.println( "Input file date: " + Date ) ;
System.out.println("\nOutput directory: " + OutputDirectory ) ;
- FileRead FR = new FileRead(this) ;
- FR.read(InputFilename) ;
+// Read, parse, and write the book files.
+
+ wlc = new XMLWriter(A.OutputDirectory, "wlc_osis") ;
+ morph = new XMLWriter(A.OutputDirectory, "morph_osis") ;
+
+
+ wlc.openTag("osisText osisIDWork=\"WLC\" osisRefWork=\"bible\" xml:lang=\"he\"", 0) ;
+ wlc.openTag("header", 0) ;
-// Read, parse, and write the book files.
+ wlc.openTag("work osisWork=\"WLC\"", 1) ;
+
+ wlc.writeString("title", 2, "Westminster Leningrad Codex");
+ wlc.writeAttributedString("contributor", 2, "role=\"encoder\"", "Martin Gruner");
+ wlc.writeAttributedString("type", 2, "type=\"OSIS\"", "Bible");
+ wlc.writeAttributedString("identifier", 2, "type=\"OSIS\"", "Bible.he.WLC.2004");
+ wlc.writeAttributedString("rights", 2, "type=\"x-copyright\"",
+ "The WLC is maintained by the Westminster Hebrew Institute, Philadelphia, PA (http://whi.wts.edu/WHI)");
+ wlc.writeString("scope", 2, "Hebrew Bible, Old Testament");
+ wlc.writeString("refSystem", 2, "MT");
+
+ wlc.closeTag("work", 1);
+ wlc.closeTag("header", 0);
+
+
+
+
+
p = new Parser(this) ;
p.parse() ;
+ wlc.closeTag("osisText", 0);
+
done() ;
}
More information about the sword-cvs
mailing list