[sword-svn] r39 - in trunk/modules: hebrew-wlc/WLC2OSIS/WLC2OSIS
hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse
hebrew-wlc/WLC2OSIS/WLC2OSIS/Translate mt-lxx-parallel
mgruner at crosswire.org
mgruner at crosswire.org
Fri Jun 3 02:41:17 MST 2005
Author: mgruner
Date: 2005-06-03 02:41:16 -0700 (Fri, 03 Jun 2005)
New Revision: 39
Modified:
trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Words.java
trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Translate/Translate.java
trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/WLC2OSIS.java
trunk/modules/mt-lxx-parallel/prepare_files.cpp
trunk/modules/mt-lxx-parallel/run.sh
Log:
further work on WLC update. Morphological segmentation works in BibleTime already!
Modified: trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Words.java
===================================================================
--- trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Words.java 2005-06-01 20:02:04 UTC (rev 38)
+++ trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Words.java 2005-06-03 09:41:16 UTC (rev 39)
@@ -114,13 +114,13 @@
}
if (Type.charAt(0) == 'w') {
- A.w.appendText("<seg>" + Out + "</seg> ") ;
+ A.w.appendText(A.MorphologicalSegmentStart + Out + A.MorphologicalSegmentEnd + " ") ;
}
else if (Type.charAt(0) == 'k') {
- A.w.appendText("[<seg>" + Out + "</seg> " + H.kaf + "] ") ;
+ A.w.appendText("[" + A.MorphologicalSegmentStart + Out + A.MorphologicalSegmentEnd + " " + H.kaf + "] ") ;
}
else if (Type.charAt(0) == 'q') {
- A.w.appendText("(<seg>" + Out + "</seg> " + H.qof+ ") ") ;
+ A.w.appendText("("+A.MorphologicalSegmentStart + Out + A.MorphologicalSegmentEnd + " " + H.qof+ ") ") ;
}
else {
System.out.println("Warning: unknown word type!");
Modified: trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Translate/Translate.java
===================================================================
--- trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Translate/Translate.java 2005-06-01 20:02:04 UTC (rev 38)
+++ trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Translate/Translate.java 2005-06-03 09:41:16 UTC (rev 39)
@@ -247,11 +247,13 @@
if (Type == MCO.Note){
S = S + "<note type=\"textual\" xml:lang=\"en\">"+ Note.Notes.get( M.Value)+ "</note>";
}
+ //Mark morph segments when a maqef is present
+ else if ( (M.Name).compareTo("maqef") == 0 ){
+ S = S + A.MorphologicalSegmentEnd + M.Value + A.MorphologicalSegmentStart;
+ }
- // MG DISABLE MORPH DIVISION!!!!!!!!!!!!!!!!
-
else if ((Type == MCO.MorphologicalDivision)){
-// S = S + A.MorphologicalDivisionMarker ;
+ S = S + A.MorphologicalDivisionMarker ;
}
else{
S = S + M.Value ;
Modified: trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/WLC2OSIS.java
===================================================================
--- trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/WLC2OSIS.java 2005-06-01 20:02:04 UTC (rev 38)
+++ trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/WLC2OSIS.java 2005-06-03 09:41:16 UTC (rev 39)
@@ -67,7 +67,10 @@
"The book names in English and Hebrew of the Jewish Publication Society "
+ "(JPS) Tanach have been added."} ;
-//public char MorphologicalDivisionMarker = '/' ;
+public String MorphologicalSegmentStart = "<seg type=\"morph\">" ;
+public String MorphologicalSegmentEnd = "</seg>" ;
+public String MorphologicalDivisionMarker = MorphologicalSegmentEnd + MorphologicalSegmentStart;
+
//-----------------------------------------------------------------------------
public final int InputBufferSize = 4000000 ; // Length of input in bytes.
Modified: trunk/modules/mt-lxx-parallel/prepare_files.cpp
===================================================================
--- trunk/modules/mt-lxx-parallel/prepare_files.cpp 2005-06-01 20:02:04 UTC (rev 38)
+++ trunk/modules/mt-lxx-parallel/prepare_files.cpp 2005-06-03 09:41:16 UTC (rev 39)
@@ -20,16 +20,21 @@
void processDaniel();
bool processverseDaniel(const char *sourcea, const char *sourceb, char *destination, int chapter, int verse);
+void processJudges();
+bool processverseJudges(const char *sourcea, const char *sourceb, char *destination, int chapter, int verse);
+
+
void collectverses(const char * source, char * destination, const char * book, const char * title, int chapter, int verse);
int strfind(const char * source, const char * str, int pos);
void strcatrange(const char * source, char * destination, int start, int size);
-char bufa[500000], bufb[500000], dest[100000];
+char bufa[500000], bufb[500000], dest[200000];
int main(int argc, char * argv[], char * envp[])
{
processJoshua();
processDaniel();
+ processJudges();
return 1;
}
@@ -60,7 +65,7 @@
output = fopen("Joshua_processed.par", "w+");
if (!output){
- printf("Unable to open/create Joshua.par\n");
+ printf("Unable to open/create Joshua_processed.par\n");
return ;
}
@@ -113,7 +118,7 @@
output = fopen("Daniel_processed.par", "w+");
if (!output){
- printf("Unable to open/create Daniel.par\n");
+ printf("Unable to open/create Daniel_processed.par\n");
return;
}
@@ -139,6 +144,60 @@
printf("\nFinished Daniel\n");
}
+void processJudges(){
+ FILE * input, * output;
+ int chapter, verse;
+
+ input = fopen("08.JudgesB.par", "r");
+
+ if (!input){
+ printf("Unable to open 08.JudgesB.par\n");
+ return;
+ }
+ readfile(input, bufa, false);
+ fclose(input);
+
+ input = fopen("09.JudgesA.par", "r");
+ if (!input){
+ printf("Unable to open 09.JudgesA.par\n");
+ return;
+ }
+ readfile(input, bufb, false);
+ fclose(input);
+
+ output = fopen("Judges_processed.par", "w+");
+
+ if (!output){
+ printf("Unable to open/create Judges_processed.par\n");
+ return;
+ }
+
+ chapter = 1;
+ verse = 1;
+
+ // Break only when no entries for the current chapter can be found in either file.
+ while (checkforchapter(bufa, "JudgB", chapter) == 1 || checkforchapter(bufb, "JudgA", chapter)){
+
+ while (verse < 200){
+ // It is possible that both files may fail to include the current verse,
+ // to avoid premature termination of the process try to find 1-200.
+ dest[0] = '\0';
+
+ if (processverseJudges(bufb, bufa, dest, chapter, verse)){
+ fputs(dest, output);
+ printf("%i:%i\n", chapter, verse);
+ }
+ verse ++;
+ }
+ chapter ++;
+ verse = 1;
+ }
+
+ fclose(output);
+ printf("\nFinished Judges\n");
+}
+
+
void readfile(FILE * fs, char * destination, bool bfix)
{
// Read a source file completely into memory.
@@ -264,6 +323,48 @@
return true;
}
+bool processverseJudges(const char *sourcea, const char * sourceb, char *destination, int chapter, int verse)
+{
+ char title[32], *posa, *posb;
+
+ // Check each buffer to see if it contains the current verse.
+ sprintf(title, "JudgA %i:%i\n", chapter, verse);
+ posa = strstr(sourcea, title);
+
+ sprintf(title, "JudgB %i:%i\n", chapter, verse);
+ posb = strstr(sourceb, title);
+
+ if (!posa && !posb)
+ return false;
+
+ sprintf(destination, "Judg %i:%i", chapter, verse);
+
+ if (posa){
+ // If both files contain the verse identify which
+ // file it came from.
+ if (posb)
+ strcat(destination, "\nCodex Alexandrinus:");
+
+ sprintf(title, "JudgA %i:%i\n", chapter, verse);
+
+ // The file may contain multiple entries for the verse,
+ // collectverses will grab all entries.
+ collectverses(sourcea, destination, "JudgA", title, chapter, verse);
+ }
+
+ if (posb){
+ // As above.
+ if (posa)
+ strcat(destination, "\nCodex Vaticanus:");
+
+ sprintf(title, "JudgB %i:%i\n", chapter, verse);
+ collectverses(sourceb, destination, "JudgB", title, chapter, verse);
+ }
+
+ strcat(destination, "\n");
+ return true;
+}
+
void collectverses(const char * source, char * destination, const char * book, const char * title, int chapter, int verse)
{
// Find all verses that have the specified id in source,
@@ -291,9 +392,9 @@
hits ++;
}
-
}
+
int strfind(const char * source, const char * str, int pos)
{
// Get the index position of from strstr instead of a memory pointer.
Modified: trunk/modules/mt-lxx-parallel/run.sh
===================================================================
--- trunk/modules/mt-lxx-parallel/run.sh 2005-06-01 20:02:04 UTC (rev 38)
+++ trunk/modules/mt-lxx-parallel/run.sh 2005-06-03 09:41:16 UTC (rev 39)
@@ -60,5 +60,5 @@
cd $TEMP_DIR;
prepare_files;
#These are not needed in TEMP_DIR any more
-rm "06.JoshB.par" "07.JoshA.par" "45.DanielOG.par" "46.DanielTh.par" "prepare_files"
+rm "06.JoshB.par" "07.JoshA.par" "08.JudgesB.par" "09.JudgesA.par" "45.DanielOG.par" "46.DanielTh.par" "prepare_files"
More information about the sword-cvs
mailing list