[sword-svn] r43 - trunk/modules/mt-lxx-parallel
mgruner at crosswire.org
mgruner at crosswire.org
Thu Oct 6 02:23:27 MST 2005
Author: mgruner
Date: 2005-10-06 02:23:26 -0700 (Thu, 06 Oct 2005)
New Revision: 43
Added:
trunk/modules/mt-lxx-parallel/convert.pl
Removed:
trunk/modules/mt-lxx-parallel/prepare_files.cpp
trunk/modules/mt-lxx-parallel/run.sh
Log:
updates to conversion software
Added: trunk/modules/mt-lxx-parallel/convert.pl
===================================================================
--- trunk/modules/mt-lxx-parallel/convert.pl 2005-06-19 13:10:52 UTC (rev 42)
+++ trunk/modules/mt-lxx-parallel/convert.pl 2005-10-06 09:23:26 UTC (rev 43)
@@ -0,0 +1,149 @@
+#!/usr/bin/perl -w
+
+#
+# This tool is supposed to convert the ccat Parallel MT/LXX
+# to a valid OSIS file.
+#
+# @author Martin Gruner
+# @copyright GPL
+#
+
+use strict;
+
+my $prefix = "parallel/";
+
+#
+# grabVerseContent - if the Verse can be found, returns its Content, otherwise nothing
+#
+sub grabVerseContent(){ #Bookname, chapter, verse, @list
+ my @result;
+ my $bookname = shift; my $chapter = shift; my $verse = shift; my @buffer = @_;
+
+ my $index=0;
+# unless ( grep(/^$bookname $chapter:$verse/, @buffer) ){ return; } #not found
+ LOOP: foreach my $current_item (@buffer){
+ if ($current_item =~ m/^$bookname $chapter:$verse/){
+ while ( not $buffer[++$index] =~ m/^\n|^\s*$/ ){
+ push(@result, $buffer[$index] );
+ }
+ return @result;
+ }
+ $index++;
+ }
+ return;
+} #Nothing found, don't return a value.
+
+sub processBook(){
+#bookname, filename
+ my $bookname = shift;
+ my $filename = shift;
+
+ open( FILE, "$prefix/$filename") or die("Could not open file $prefix/$filename");
+ my @BUF = <FILE>; chomp(@BUF); close( FILE );
+
+ my @result;
+
+ CHAPTER: foreach my $chapter(1..1000){
+ print("Processing $bookname chapter $chapter.\n");
+ my $verse_found;
+ VERSE: foreach my $verse(1..1000){
+ my @verseContent = &grabVerseContent($bookname, $chapter, $verse, @BUF);
+ if (@verseContent) {
+ push(@result, "$bookname $chapter:$verse");
+ push(@result, @verseContent);
+ $verse_found = 1;
+ }
+ else{ #verse nonexistent, goto next chapter
+ last VERSE;
+ }
+ }
+ if (not $verse_found){ #chapter empty, stop here
+ last CHAPTER;
+ }
+ }
+ return(@result);
+ print("done.\n");
+}
+
+sub processBookVariant(){
+#booknameA, filenameA, variantnameA,
+#booknameB, filenameB, variantnameB,
+#neutralBookName
+ my $booknameA = shift;
+ my $filenameA = shift;
+ my $variantNameA = shift;
+ my $booknameB = shift;
+ my $filenameB = shift;
+ my $variantNameB = shift;
+ my $neutralBookname = shift;
+# print("Processing $booknameA $filenameA $booknameB $filenameB $neutralBookname... \n");
+
+ open( FILE, "$prefix/$filenameA") or die("Could not open file $prefix/$filenameA");
+ my @BUFA = <FILE>; chomp(@BUFA); close( FILE );
+
+ open( FILE, "$prefix/$filenameB") or die("Could not open file $prefix/$filenameB");
+ my @BUFB = <FILE>; chomp(@BUFB); close( FILE );
+
+ my @result;
+
+ CHAPTER: foreach my $chapter(1..1000){
+ print("Processing $booknameA and $booknameB chapter $chapter.\n");
+ my $verse_found;
+ VERSE: foreach my $verse(1..1000){
+ my @verseContentA = &grabVerseContent($booknameA, $chapter, $verse, @BUFA);
+ my @verseContentB = &grabVerseContent($booknameB, $chapter, $verse, @BUFB);
+ if (@verseContentA or @verseContentB) {
+ push(@result, "$neutralBookname $chapter:$verse");
+ $verse_found = 1;
+ }
+ else{ #verse nonexistent, goto next chapter
+ last VERSE;
+ }
+ if (@verseContentA){
+ if (@verseContentB){ push(@result, $variantNameA) };
+ push(@result, @verseContentA);
+ if (@verseContentB){ push(@result, "") };
+ }
+ if (@verseContentB){
+ if (@verseContentA){ push(@result, $variantNameB) };
+ push(@result, @verseContentB);
+ }
+ }
+ if (not $verse_found){ #chapter empty, stop here
+ last CHAPTER;
+ }
+ }
+ return(@result);
+ print("done.\n");
+
+}
+
+sub fixDaniel(){ #@buffer
+ my @buffer = @_;
+ my @result;
+ my $index = 0;
+ foreach my $currentItem (@buffer){
+ if ($buffer[$index] =~ m/^DANIHL/){}#Do not add this line to the result
+ else{
+ if ($buffer[$index + 1] =~ m/^DANIHL/){ #Push both lines on one
+ push(@result, $buffer[$index] . $buffer[$index +1] );
+ }
+ else{
+ push(@result, $buffer[$index] ); #The normal case
+ }
+ }
+ $index++;
+ }
+ return @result;
+}
+
+my @result;
+#push(@result, &processBookVariant("JoshA", "07.JoshA.par", "Codex Alexandrinus:", "JoshB", "06.JoshB.par", "Codex Vaticanus:", "Josh") );
+#push(@result, &processBookVariant("JudgA", "09.JudgesA.par", "Codex Alexandrinus:", "JudgB", "08.JudgesB.par", "Codex Vaticanus:", "Judges") );
+
+push(@result, &processBook("Isa", "40.Isaiah.par") );
+
+#my @danielTmp = &processBookVariant("Dan", "45.DanielOG.par", "Old Greek:", "DanTh", "46.DanielTh.par", "Theodotion:", "Daniel");
+#push(@result, &fixDaniel( @danielTmp ) );
+
+print( join("\n", @result) );
Deleted: trunk/modules/mt-lxx-parallel/prepare_files.cpp
===================================================================
--- trunk/modules/mt-lxx-parallel/prepare_files.cpp 2005-06-19 13:10:52 UTC (rev 42)
+++ trunk/modules/mt-lxx-parallel/prepare_files.cpp 2005-10-06 09:23:26 UTC (rev 43)
@@ -1,415 +0,0 @@
-/*
- April/2005
-
- Creates the composite files for needed to process the parallel MS/LXX text.
- JoshA.par & JoshB.par >> Joshua_processed.par
- DanielOG.par & DanielTh.par >> Daniel_processed.par
-*/
-
-#include <stdio.h>
-#include <string.h>
-
-// The file paths are hardwired, change them according to their path's on your system.
-
-void readfile(FILE * fs, char * destination, bool bfix);
-bool checkforchapter(const char * source, const char * name, int chapter);
-
-void processJoshua();
-bool processverseJoshua(const char *sourcea, const char * sourceb, char *destination, int chapter, int verse);
-
-void processDaniel();
-bool processverseDaniel(const char *sourcea, const char *sourceb, char *destination, int chapter, int verse);
-
-void processJudges();
-bool processverseJudges(const char *sourcea, const char *sourceb, char *destination, int chapter, int verse);
-
-
-void collectverses(const char * source, char * destination, const char * book, const char * title, int chapter, int verse);
-int strfind(const char * source, const char * str, int pos);
-void strcatrange(const char * source, char * destination, int start, int size);
-
-char bufa[500000], bufb[500000], dest[200000];
-
-int main(int argc, char * argv[], char * envp[])
-{
- processJoshua();
- processDaniel();
- processJudges();
-
- return 1;
-}
-
-void processJoshua(){
- FILE * input, * output;
- int chapter, verse;
-
- // Load both Joshua files into memory, and create the output file.
- // Note that JoshB is stored in bufa, and JoshA is stored in bufb.
- // JoshB is the more complete file.
-
- input = fopen("06.JoshB.par", "r");
- if (!input) {
- printf("Unable to open 06.JoshB.par\n");
- return;
- }
- readfile(input, bufa, false);
- fclose(input);
-
- input = fopen("07.JoshA.par", "r");
- if (!input) {
- printf("Unable to open 06.JoshA.par\n");
- return;
- }
- readfile(input, bufb, false);
- fclose(input);
-
- output = fopen("Joshua_processed.par", "w+");
- if (!output){
- printf("Unable to open/create Joshua_processed.par\n");
- return ;
- }
-
- chapter = 1;
- verse = 1;
-
- // Break only when no entries for the current chapter can be found in either file.
- while ( checkforchapter(bufa, "JoshB", chapter) || checkforchapter(bufb, "JoshA", chapter) ){
-
- while (verse < 200){
- // It is possible that both files may fail to include the current verse,
- // to avoid premature termination of the process try to find 1-200.
- dest[0] = '\0';
-
- if ( processverseJoshua(bufb, bufa, dest, chapter, verse) ){
- fputs(dest, output);
- printf("%i:%i\n", chapter, verse);
- }
-
- verse ++;
- }
- chapter ++;
- verse = 1;
- }
-
- fclose(output);
- printf("\nFinished Joshua\n");
-}
-
-void processDaniel(){
- FILE * input, * output;
- int chapter, verse;
-
- // Same process as above for Joshua.
- input = fopen("45.DanielOG.par", "r");
- if (!input) {
- printf("Unable to open 45.DanielOG.par\n");
- return;
- }
- readfile(input, bufa, true);
- fclose(input);
-
- input = fopen("46.DanielTh.par", "r");
- if (!input){
- printf("Unable to open 46.DanielTh.par\n");
- return;
- }
- readfile(input, bufb, false);
- fclose(input);
-
- output = fopen("Daniel_processed.par", "w+");
- if (!output){
- printf("Unable to open/create Daniel_processed.par\n");
- return;
- }
-
- chapter = 1;
- verse = 1;
-
- while ( checkforchapter(bufa, "Dan", chapter) || checkforchapter(bufb, "DanTh", chapter) ){
-
- while (verse < 200){
- dest[0] = '\0';
-
- if ( processverseDaniel(bufa, bufb, dest, chapter, verse) ){
- fputs(dest, output);
- printf("%i:%i\n", chapter, verse);
- }
- verse ++;
- }
- chapter ++;
- verse = 1;
- }
-
- fclose(output);
- printf("\nFinished Daniel\n");
-}
-
-void processJudges(){
- FILE * input, * output;
- int chapter, verse;
-
- input = fopen("08.JudgesB.par", "r");
-
- if (!input){
- printf("Unable to open 08.JudgesB.par\n");
- return;
- }
- readfile(input, bufa, false);
- fclose(input);
-
- input = fopen("09.JudgesA.par", "r");
- if (!input){
- printf("Unable to open 09.JudgesA.par\n");
- return;
- }
- readfile(input, bufb, false);
- fclose(input);
-
- output = fopen("Judges_processed.par", "w+");
-
- if (!output){
- printf("Unable to open/create Judges_processed.par\n");
- return;
- }
-
- chapter = 1;
- verse = 1;
-
- // Break only when no entries for the current chapter can be found in either file.
- while (checkforchapter(bufa, "JudgB", chapter) == 1 || checkforchapter(bufb, "JudgA", chapter)){
-
- while (verse < 200){
- // It is possible that both files may fail to include the current verse,
- // to avoid premature termination of the process try to find 1-200.
- dest[0] = '\0';
-
- if (processverseJudges(bufb, bufa, dest, chapter, verse)){
- fputs(dest, output);
- printf("%i:%i\n", chapter, verse);
- }
- verse ++;
- }
- chapter ++;
- verse = 1;
- }
-
- fclose(output);
- printf("\nFinished Judges\n");
-}
-
-
-void readfile(FILE * fs, char * destination, bool bfix)
-{
- // Read a source file completely into memory.
- char * pos, buf[1024];
-
- while (fgets(buf, 1024, fs)){
- // Fix for verse 3:56 in DanielOG.par.
- if (bfix){
- pos = strstr(buf, "Dan 3:56");
- if (pos){
- pos[6] = '2';
- pos[7] = '3';
- bfix = false;
- }
- }
-
- // Fix for cases of DANIHL which should have been on the preceeding line.
- if (strstr(buf, "DANIHL") == buf)
- destination[strlen(destination) - 1] = '\0';
-
- // Fix for Linux, input files have have Windows \r\n.
- // On Linux output files will not have '\r'.
- pos = strchr(buf, '\r');
- if (pos){
- pos[0] = '\n';
- pos[1] = '\0';
- }
- strcat(destination, buf);
- }
-}
-
-bool checkforchapter(const char * source, const char * name, int chapter)
-{
- // Check to see if this chapter id can be found in the buffer.
- char title[32];
- sprintf(title, "%s %i:", name, chapter);
-
- if (strstr(source, title) != NULL)
- return true;
- else
- return false;
-}
-
-bool processverseJoshua(const char *sourcea, const char * sourceb, char *destination, int chapter, int verse)
-{
- char title[32], *posa, *posb;
-
- // Check each buffer to see if it contains the current verse.
- sprintf(title, "JoshA %i:%i\n", chapter, verse);
- posa = strstr(sourcea, title);
-
- sprintf(title, "JoshB %i:%i\n", chapter, verse);
- posb = strstr(sourceb, title);
-
- if (!posa && !posb)
- return false;
-
- sprintf(destination, "Josh %i:%i", chapter, verse);
-
- if (posa)
- {
- // If both files contain the verse identify which
- // file it came from.
- if (posb)
- strcat(destination, "\nCodex Alexandrinus:");
-
- sprintf(title, "JoshA %i:%i\n", chapter, verse);
-
- // The file may contain multiple entries for the verse,
- // collectverses will grab all entries.
- collectverses(sourcea, destination, "JoshA", title, chapter, verse);
- }
-
- if (posb)
- {
- // As above.
- if (posa)
- strcat(destination, "\nCodex Vaticanus:");
-
- sprintf(title, "JoshB %i:%i\n", chapter, verse);
- collectverses(sourceb, destination, "JoshB", title, chapter, verse);
- }
-
- strcat(destination, "\n");
- return true;
-}
-
-bool processverseDaniel(const char *sourcea, const char *sourceb, char *destination, int chapter, int verse)
-{
- char title[32], *posa, *posb;
-
- // Same as processverseJoshua except for book name, and file identifiers.
- sprintf(title, "Dan %i:%i\n", chapter, verse);
- posa = strstr(sourcea, title);
-
- sprintf(title, "DanTh %i:%i\n", chapter, verse);
- posb = strstr(sourceb, title);
-
- if (!posa && !posb)
- return false;
-
- sprintf(destination, "Dan %i:%i", chapter, verse);
-
- if (posa)
- {
- if (posb)
- strcat(destination, "\nSeptuagint:");
-
- sprintf(title, "Dan %i:%i\n", chapter, verse);
- collectverses(sourcea, destination, "Dan ", title, chapter, verse);
- }
-
- if (posb)
- {
- if (posa)
- strcat(destination, "\nTheodotion:");
-
- sprintf(title, "DanTh %i:%i\n", chapter, verse);
- collectverses(sourceb, destination, "DanTh ", title, chapter, verse);
- }
-
- strcat(destination, "\n");
- return true;
-}
-
-bool processverseJudges(const char *sourcea, const char * sourceb, char *destination, int chapter, int verse)
-{
- char title[32], *posa, *posb;
-
- // Check each buffer to see if it contains the current verse.
- sprintf(title, "JudgA %i:%i\n", chapter, verse);
- posa = strstr(sourcea, title);
-
- sprintf(title, "JudgB %i:%i\n", chapter, verse);
- posb = strstr(sourceb, title);
-
- if (!posa && !posb)
- return false;
-
- sprintf(destination, "Judg %i:%i", chapter, verse);
-
- if (posa){
- // If both files contain the verse identify which
- // file it came from.
- if (posb)
- strcat(destination, "\nCodex Alexandrinus:");
-
- sprintf(title, "JudgA %i:%i\n", chapter, verse);
-
- // The file may contain multiple entries for the verse,
- // collectverses will grab all entries.
- collectverses(sourcea, destination, "JudgA", title, chapter, verse);
- }
-
- if (posb){
- // As above.
- if (posa)
- strcat(destination, "\nCodex Vaticanus:");
-
- sprintf(title, "JudgB %i:%i\n", chapter, verse);
- collectverses(sourceb, destination, "JudgB", title, chapter, verse);
- }
-
- strcat(destination, "\n");
- return true;
-}
-
-void collectverses(const char * source, char * destination, const char * book, const char * title, int chapter, int verse)
-{
- // Find all verses that have the specified id in source,
- // and add them to destination.
- const char * pos = source;
- int end, hits = 0;
-
- while (pos = strstr(pos, title)){
- pos += strlen(title);
- end = strfind(pos, book, 0);
-
- if (end == -1)
- end = strlen(pos);
-
- // Insert a new line only if it is the first entry found.
- if (!hits)
- strcat(destination, "\n");
-
- strcatrange(pos, destination, 0, end);
-
- end = strlen(destination);
-
- if (destination[end -1] == '\n')
- destination[end -1] = '\0';
-
- hits ++;
- }
-}
-
-
-int strfind(const char * source, const char * str, int pos)
-{
- // Get the index position of from strstr instead of a memory pointer.
- const char * psz = strstr(source + pos, str);
-
- if (!psz)
- return -1;
-
- return psz - source;
-}
-
-void strcatrange(const char * source, char * destination, int start, int size)
-{
- // Copy a specified range from source to destination, and terminate.
- int len = strlen(destination);
- memcpy(&destination[len], &source[start], size);
- destination[len + size] = '\0';
-}
Deleted: trunk/modules/mt-lxx-parallel/run.sh
===================================================================
--- trunk/modules/mt-lxx-parallel/run.sh 2005-06-19 13:10:52 UTC (rev 42)
+++ trunk/modules/mt-lxx-parallel/run.sh 2005-10-06 09:23:26 UTC (rev 43)
@@ -1,64 +0,0 @@
-#!/bin/bash
-
-ORIG_FILES="\
-01.Genesis.par \
-02.Exodus.par \
-03.Lev.par \
-04.Num.par \
-05.Deut.par \
-06.JoshB.par \
-07.JoshA.par \
-08.JudgesB.par \
-09.JudgesA.par \
-10.Ruth.par \
-11.1Sam.par \
-12.2Sam.par \
-13.1Kings.par \
-14.2Kings.par \
-15.1Chron.par \
-16.2Chron.par \
-17.1Esdras.par \
-18.Esther.par \
-18.Ezra.par \
-19.Neh.par \
-20.Psalms.par \
-22.Ps151.par \
-23.Prov.par \
-24.Qoh.par \
-25.Cant.par \
-26.Job.par \
-27.Sirach.par \
-28.Hosea.par \
-29.Micah.par \
-30.Amos.par \
-31.Joel.par \
-32.Jonah.par \
-33.Obadiah.par \
-34.Nahum.par \
-35.Hab.par \
-36.Zeph.par \
-37.Haggai.par \
-38.Zech.par \
-39.Malachi.par \
-40.Isaiah.par \
-41.Jer.par \
-42.Baruch.par \
-43.Lam.par \
-44.Ezekiel.par \
-45.DanielOG.par \
-46.DanielTh.par"
-
-TEMP_DIR="tmp/"
-
-mkdir $TEMP_DIR;
-rm $TEMP_DIR/*;
-cp $ORIG_FILES $TEMP_DIR;
-
-g++ prepare_files.cpp -o $TEMP_DIR/prepare_files;
-
-#THIS WILL CREATE Joshua_processed.par and Daniel_processed.par
-cd $TEMP_DIR;
-prepare_files;
-#These are not needed in TEMP_DIR any more
-rm "06.JoshB.par" "07.JoshA.par" "08.JudgesB.par" "09.JudgesA.par" "45.DanielOG.par" "46.DanielTh.par" "prepare_files"
-
More information about the sword-cvs
mailing list