[sword-svn] r43 - trunk/modules/mt-lxx-parallel

Thu Oct 6 02:23:27 MST 2005

Author: mgruner
Date: 2005-10-06 02:23:26 -0700 (Thu, 06 Oct 2005)
New Revision: 43

Added:
   trunk/modules/mt-lxx-parallel/convert.pl
Removed:
   trunk/modules/mt-lxx-parallel/prepare_files.cpp
   trunk/modules/mt-lxx-parallel/run.sh
Log:
updates to conversion software


Added: trunk/modules/mt-lxx-parallel/convert.pl
===================================================================

--- trunk/modules/mt-lxx-parallel/convert.pl	2005-06-19 13:10:52 UTC (rev 42)
+++ trunk/modules/mt-lxx-parallel/convert.pl	2005-10-06 09:23:26 UTC (rev 43)
@@ -0,0 +1,149 @@
+#!/usr/bin/perl -w
+
+#
+# This tool is supposed to convert the ccat Parallel MT/LXX
+# to a valid OSIS file.
+#
+# @author Martin Gruner
+# @copyright GPL
+#
+
+use strict;
+
+my $prefix = "parallel/";
+
+#
+# grabVerseContent - if the Verse can be found, returns its Content, otherwise nothing
+#
+sub grabVerseContent(){  #Bookname, chapter, verse, @list
+	my @result;
+	my $bookname = shift; my $chapter = shift; my $verse = shift; my @buffer = @_;
+
+	my $index=0;
+#  unless ( grep(/^$bookname $chapter:$verse/, @buffer) ){ return; } #not found
+	LOOP: foreach my $current_item (@buffer){
+		if ($current_item =~ m/^$bookname $chapter:$verse/){
+			while ( not $buffer[++$index] =~ m/^\n|^\s*$/ ){
+				push(@result, $buffer[$index] );
+			}
+			return @result;
+		}
+		$index++;
+	}
+	return;
+}	#Nothing found, don't return a value.
+
+sub processBook(){
+#bookname, filename
+	my $bookname = shift;
+	my $filename = shift;
+
+	open( FILE, "$prefix/$filename") or die("Could not open file $prefix/$filename");
+	my @BUF = <FILE>; chomp(@BUF); close( FILE );
+
+	my @result;
+
+	CHAPTER: foreach my $chapter(1..1000){
+		print("Processing $bookname chapter $chapter.\n");
+		my $verse_found;
+		VERSE: foreach my $verse(1..1000){
+			my @verseContent = &grabVerseContent($bookname, $chapter, $verse, @BUF);
+			if (@verseContent) { 
+				push(@result, "$bookname $chapter:$verse");
+				push(@result, @verseContent);
+				$verse_found = 1;
+			}
+			else{ #verse nonexistent, goto next chapter
+				last VERSE;
+			}
+		}
+		if (not $verse_found){ #chapter empty, stop here
+			last CHAPTER;
+		}
+	}
+	return(@result);
+	print("done.\n");
+}
+
+sub processBookVariant(){ 
+#booknameA, filenameA, variantnameA, 
+#booknameB, filenameB, variantnameB,
+#neutralBookName
+	my $booknameA = shift;
+	my $filenameA = shift;
+	my $variantNameA = shift;
+	my $booknameB = shift;
+	my $filenameB = shift;
+	my $variantNameB = shift;
+	my $neutralBookname = shift;
+#	print("Processing $booknameA $filenameA $booknameB $filenameB $neutralBookname... \n");
+
+	open( FILE, "$prefix/$filenameA") or die("Could not open file $prefix/$filenameA");
+	my @BUFA = <FILE>; chomp(@BUFA); close( FILE );
+
+	open( FILE, "$prefix/$filenameB") or die("Could not open file $prefix/$filenameB");
+	my @BUFB = <FILE>; chomp(@BUFB); close( FILE );
+
+	my @result;
+
+	CHAPTER: foreach my $chapter(1..1000){
+		print("Processing $booknameA and $booknameB chapter $chapter.\n");
+		my $verse_found;
+		VERSE: foreach my $verse(1..1000){
+			my @verseContentA = &grabVerseContent($booknameA, $chapter, $verse, @BUFA);
+			my @verseContentB = &grabVerseContent($booknameB, $chapter, $verse, @BUFB);
+			if (@verseContentA or @verseContentB) { 
+				push(@result, "$neutralBookname $chapter:$verse");
+				$verse_found = 1;
+			}
+			else{ #verse nonexistent, goto next chapter
+				last VERSE;
+			}
+			if (@verseContentA){
+				if (@verseContentB){ push(@result, $variantNameA) };
+				push(@result, @verseContentA);
+				if (@verseContentB){ push(@result, "") };
+			}
+			if (@verseContentB){
+				if (@verseContentA){ push(@result, $variantNameB) };
+				push(@result, @verseContentB);
+			}
+		}
+		if (not $verse_found){ #chapter empty, stop here
+			last CHAPTER;
+		}
+	}
+	return(@result);
+	print("done.\n");
+
+}
+
+sub fixDaniel(){ #@buffer
+	my @buffer = @_;
+	my @result;
+	my $index = 0;
+	foreach my $currentItem (@buffer){
+		if ($buffer[$index] =~ m/^DANIHL/){}#Do not add this line to the result
+		else{
+			if ($buffer[$index + 1] =~ m/^DANIHL/){ #Push both lines on one
+				push(@result, $buffer[$index] . $buffer[$index +1] );
+			}
+			else{
+				push(@result, $buffer[$index] ); #The normal case
+			}
+		}
+		$index++;
+	}
+	return @result;
+}
+
+my @result;
+#push(@result, &processBookVariant("JoshA", "07.JoshA.par", "Codex Alexandrinus:", "JoshB", "06.JoshB.par", "Codex Vaticanus:", "Josh") );
+#push(@result, &processBookVariant("JudgA", "09.JudgesA.par", "Codex Alexandrinus:", "JudgB", "08.JudgesB.par", "Codex Vaticanus:", "Judges") );
+
+push(@result, &processBook("Isa", "40.Isaiah.par") );
+
+#my @danielTmp = &processBookVariant("Dan", "45.DanielOG.par", "Old Greek:", "DanTh", "46.DanielTh.par", "Theodotion:", "Daniel");
+#push(@result, &fixDaniel( @danielTmp ) );
+
+print( join("\n", @result) );

Deleted: trunk/modules/mt-lxx-parallel/prepare_files.cpp
===================================================================
--- trunk/modules/mt-lxx-parallel/prepare_files.cpp	2005-06-19 13:10:52 UTC (rev 42)
+++ trunk/modules/mt-lxx-parallel/prepare_files.cpp	2005-10-06 09:23:26 UTC (rev 43)
@@ -1,415 +0,0 @@
-/*
-	April/2005
-	
-	Creates the composite files for needed to process the parallel MS/LXX text.
-	JoshA.par & JoshB.par >> Joshua_processed.par
-	DanielOG.par & DanielTh.par >> Daniel_processed.par
-*/
-
-#include <stdio.h>
-#include <string.h>
-
-// The file paths are hardwired, change them according to their path's on your system.
-
-void readfile(FILE * fs, char * destination, bool bfix);
-bool checkforchapter(const char * source, const char * name, int chapter);
-
-void processJoshua();
-bool processverseJoshua(const char *sourcea, const char * sourceb, char *destination, int chapter, int verse);
-
-void processDaniel();
-bool processverseDaniel(const char *sourcea, const char *sourceb, char *destination, int chapter, int verse);
-
-void processJudges();
-bool processverseJudges(const char *sourcea, const char *sourceb, char *destination, int chapter, int verse);
-
-
-void collectverses(const char * source, char * destination, const char * book, const char * title, int chapter, int verse);
-int  strfind(const char * source, const char * str, int pos);
-void strcatrange(const char * source, char * destination, int start, int size);
-
-char bufa[500000], bufb[500000], dest[200000];
-
-int main(int argc, char * argv[], char * envp[])
-{
-	processJoshua();
-	processDaniel();
-	processJudges();
-	
-	return 1;
-}
-
-void processJoshua(){
-	FILE * input, * output;
-	int chapter, verse;
-
-	// Load both Joshua files into memory, and create the output file.
-	// Note that JoshB is stored in bufa, and JoshA is stored in bufb.
-	// JoshB is the more complete file.
-
-	input = fopen("06.JoshB.par", "r");
-	if (!input)	{
-		printf("Unable to open 06.JoshB.par\n");
-		return;
-	}
-	readfile(input, bufa, false);
-	fclose(input);
-	
-	input = fopen("07.JoshA.par", "r");
-	if (!input)	{
-		printf("Unable to open 06.JoshA.par\n");
-		return;
-	}
-	readfile(input, bufb, false);
-	fclose(input);
-
-	output = fopen("Joshua_processed.par", "w+");
-	if (!output){
-		printf("Unable to open/create Joshua_processed.par\n");
-		return ;
-	}
-
-	chapter = 1;
-	verse = 1;
-	
-	// Break only when no entries for the current chapter can be found in either file.
-	while ( checkforchapter(bufa, "JoshB", chapter) || checkforchapter(bufb, "JoshA", chapter) ){
-
-		while (verse < 200){
-			// It is possible that both files may fail to include the current verse,
-			// to avoid premature termination of the process try to find 1-200.
-			dest[0] = '\0';
-
-			if ( processverseJoshua(bufb, bufa, dest, chapter, verse) ){
-				fputs(dest, output);
-				printf("%i:%i\n", chapter, verse);
-			}
-
-			verse ++;
-		}
-		chapter ++;
-		verse = 1;
-	}
-		
-	fclose(output);
-	printf("\nFinished Joshua\n");		
-}
-
-void processDaniel(){
-	FILE * input, * output;
-	int chapter, verse;
-
-	// Same process as above for Joshua.
-	input = fopen("45.DanielOG.par", "r");	
-	if (!input)	{
-		printf("Unable to open 45.DanielOG.par\n");
-		return;
-	}
-	readfile(input, bufa, true);
-	fclose(input);
-
-	input = fopen("46.DanielTh.par", "r");
-	if (!input){
-		printf("Unable to open 46.DanielTh.par\n");
-		return;
-	}
-	readfile(input, bufb, false);
-	fclose(input);
-
-	output = fopen("Daniel_processed.par", "w+");
-	if (!output){
-		printf("Unable to open/create Daniel_processed.par\n");
-		return;
-	}
-
-	chapter = 1;
-	verse = 1;
-		
-	while ( checkforchapter(bufa, "Dan", chapter) || checkforchapter(bufb, "DanTh", chapter) ){
-	
-		while (verse < 200){
-			dest[0] = '\0';
-
-			if ( processverseDaniel(bufa, bufb, dest, chapter, verse) ){
-				fputs(dest, output);
-				printf("%i:%i\n", chapter, verse);
-			}
-			verse ++;
-		}
-		chapter ++;
-		verse = 1;
-	}
-		
-	fclose(output);
-	printf("\nFinished Daniel\n");
-}
-
-void processJudges(){
-	FILE * input, * output;
-	int chapter, verse;
-
-	input = fopen("08.JudgesB.par", "r");
-	
-	if (!input){
-		printf("Unable to open 08.JudgesB.par\n");
-		return;
-	}
-	readfile(input, bufa, false);
-	fclose(input);
-	
-	input = fopen("09.JudgesA.par", "r");
-	if (!input){
-		printf("Unable to open 09.JudgesA.par\n");
-		return;
-	}
-	readfile(input, bufb, false);
-	fclose(input);
-
-	output = fopen("Judges_processed.par", "w+");
-
-	if (!output){
-		printf("Unable to open/create Judges_processed.par\n");
-		return;
-	}
-
-	chapter = 1;
-	verse = 1;
-	
-	// Break only when no entries for the current chapter can be found in either file.
-	while (checkforchapter(bufa, "JudgB", chapter) == 1 || checkforchapter(bufb, "JudgA", chapter)){
-
-		while (verse < 200){
-			// It is possible that both files may fail to include the current verse,
-			// to avoid premature termination of the process try to find 1-200.
-			dest[0] = '\0';
-
-			if (processverseJudges(bufb, bufa, dest, chapter, verse)){
-				fputs(dest, output);
-				printf("%i:%i\n", chapter, verse);
-			}
-			verse ++;
-		}
-		chapter ++;
-		verse = 1;
-	}
-		
-	fclose(output);
-	printf("\nFinished Judges\n");
-}
-
-
-void readfile(FILE * fs, char * destination, bool bfix)
-{
-	// Read a source file completely into memory.
-	char * pos, buf[1024];
-	
-	while (fgets(buf, 1024, fs)){
-		// Fix for verse 3:56 in DanielOG.par.
-		if (bfix){
-			pos = strstr(buf, "Dan 3:56");
-			if (pos){
-				pos[6] = '2';
-				pos[7] = '3';
-				bfix = false;
-			}
-		}
-
-		// Fix for cases of DANIHL which should have been on the preceeding line.
-		if (strstr(buf, "DANIHL") == buf)
-			destination[strlen(destination) - 1] = '\0';
-
-		// Fix for Linux, input files have have Windows \r\n.
-		// On Linux output files will not have '\r'.
-		pos = strchr(buf, '\r');
-		if (pos){
-			pos[0] = '\n';
-			pos[1] = '\0';
-		}
-		strcat(destination, buf);
-	}
-}
-
-bool checkforchapter(const char * source, const char * name, int chapter)
-{
-	// Check to see if this chapter id can be found in the buffer.
-	char title[32];
-	sprintf(title, "%s %i:", name, chapter);
-
-	if (strstr(source, title) != NULL)
-		return true;
-	else
-		return false;
-}
-
-bool processverseJoshua(const char *sourcea, const char * sourceb, char *destination, int chapter, int verse)
-{
-	char title[32], *posa, *posb;
-
-	// Check each buffer to see if it contains the current verse.
-	sprintf(title, "JoshA %i:%i\n", chapter, verse);
-	posa = strstr(sourcea, title);
-
-	sprintf(title, "JoshB %i:%i\n", chapter, verse);
-	posb = strstr(sourceb, title);
-
-	if (!posa && !posb)
-		return false;
-
-	sprintf(destination, "Josh %i:%i", chapter, verse);
-
-	if (posa)
-	{
-		// If both files contain the verse identify which
-		// file it came from.
-		if (posb)
-			strcat(destination, "\nCodex Alexandrinus:");
-
-		sprintf(title, "JoshA %i:%i\n", chapter, verse);
-
-		// The file may contain multiple entries for the verse,
-		// collectverses will grab all entries.
-		collectverses(sourcea, destination, "JoshA", title, chapter, verse);
-	}
-
-	if (posb)
-	{
-		// As above.
-		if (posa)
-			strcat(destination, "\nCodex Vaticanus:");
-
-		sprintf(title, "JoshB %i:%i\n", chapter, verse);
-		collectverses(sourceb, destination, "JoshB", title, chapter, verse);
-	}
-
-	strcat(destination, "\n");
-	return true;
-}
-
-bool processverseDaniel(const char *sourcea, const char *sourceb, char *destination, int chapter, int verse)
-{
-	char title[32], *posa, *posb;
-
-	// Same as processverseJoshua except for book name, and file identifiers.
-	sprintf(title, "Dan %i:%i\n", chapter, verse);
-	posa = strstr(sourcea, title);
-
-	sprintf(title, "DanTh %i:%i\n", chapter, verse);
-	posb = strstr(sourceb, title);
-
-	if (!posa && !posb)
-		return false;
-
-	sprintf(destination, "Dan %i:%i", chapter, verse);
-
-	if (posa)
-	{
-		if (posb)
-			strcat(destination, "\nSeptuagint:");
-
-		sprintf(title, "Dan %i:%i\n", chapter, verse);
-		collectverses(sourcea, destination, "Dan ", title, chapter, verse);
-	}
-
-	if (posb)
-	{
-		if (posa)
-			strcat(destination, "\nTheodotion:");
-
-		sprintf(title, "DanTh %i:%i\n", chapter, verse);
-		collectverses(sourceb, destination, "DanTh ", title, chapter, verse);
-	}
-
-	strcat(destination, "\n");
-	return true;
-}
-
-bool processverseJudges(const char *sourcea, const char * sourceb, char *destination, int chapter, int verse)
-{
-	char title[32], *posa, *posb;
-
-	// Check each buffer to see if it contains the current verse.
-	sprintf(title, "JudgA %i:%i\n", chapter, verse);
-	posa = strstr(sourcea, title);
-
-	sprintf(title, "JudgB %i:%i\n", chapter, verse);
-	posb = strstr(sourceb, title);
-
-	if (!posa && !posb)
-		return false;
-
-	sprintf(destination, "Judg %i:%i", chapter, verse);
-
-	if (posa){
-		// If both files contain the verse identify which
-		// file it came from.
-		if (posb)
-			strcat(destination, "\nCodex Alexandrinus:");
-
-		sprintf(title, "JudgA %i:%i\n", chapter, verse);
-
-		// The file may contain multiple entries for the verse,
-		// collectverses will grab all entries.
-		collectverses(sourcea, destination, "JudgA", title, chapter, verse);
-	}
-
-	if (posb){
-		// As above.
-		if (posa)
-			strcat(destination, "\nCodex Vaticanus:");
-
-		sprintf(title, "JudgB %i:%i\n", chapter, verse);
-		collectverses(sourceb, destination, "JudgB", title, chapter, verse);
-	}
-
-	strcat(destination, "\n");
-	return true;
-}
-
-void collectverses(const char * source, char * destination, const char * book, const char * title, int chapter, int verse)
-{
-	// Find all verses that have the specified id in source,
-	// and add them to destination.
-	const char * pos = source;
-	int end, hits = 0;
-
-	while (pos = strstr(pos, title)){
-		pos += strlen(title);
-		end = strfind(pos, book, 0);
-
-		if (end == -1)
-			end = strlen(pos);
-
-		// Insert a new line only if it is the first entry found.
-		if (!hits)
-			strcat(destination, "\n");
-
-		strcatrange(pos, destination, 0, end);
-
-		end = strlen(destination);
-
-		if (destination[end -1] == '\n')
-			destination[end -1] = '\0';
-
-		hits ++;
-	}
-}
-
-
-int strfind(const char * source, const char * str, int pos)
-{
-	// Get the index position of from strstr instead of a memory pointer.
-	const char * psz = strstr(source + pos, str);
-	
-	if (!psz)
-		return -1;
-
-	return psz - source;
-}
-
-void strcatrange(const char * source, char * destination, int start, int size)
-{
-	// Copy a specified range from source to destination, and terminate.
-	int len = strlen(destination); 
-	memcpy(&destination[len], &source[start], size);
-	destination[len + size] = '\0';
-}

Deleted: trunk/modules/mt-lxx-parallel/run.sh
===================================================================
--- trunk/modules/mt-lxx-parallel/run.sh	2005-06-19 13:10:52 UTC (rev 42)
+++ trunk/modules/mt-lxx-parallel/run.sh	2005-10-06 09:23:26 UTC (rev 43)
@@ -1,64 +0,0 @@
-#!/bin/bash
-
-ORIG_FILES="\
-01.Genesis.par \
-02.Exodus.par \
-03.Lev.par \
-04.Num.par \
-05.Deut.par \
-06.JoshB.par \
-07.JoshA.par \
-08.JudgesB.par \
-09.JudgesA.par \
-10.Ruth.par \
-11.1Sam.par \
-12.2Sam.par \
-13.1Kings.par \
-14.2Kings.par \
-15.1Chron.par \
-16.2Chron.par \
-17.1Esdras.par \
-18.Esther.par \
-18.Ezra.par \
-19.Neh.par \
-20.Psalms.par \
-22.Ps151.par \
-23.Prov.par \
-24.Qoh.par \
-25.Cant.par \
-26.Job.par \
-27.Sirach.par \
-28.Hosea.par \
-29.Micah.par \
-30.Amos.par \
-31.Joel.par \
-32.Jonah.par \
-33.Obadiah.par \
-34.Nahum.par \
-35.Hab.par \
-36.Zeph.par \
-37.Haggai.par \
-38.Zech.par \
-39.Malachi.par \
-40.Isaiah.par \
-41.Jer.par \
-42.Baruch.par \
-43.Lam.par \
-44.Ezekiel.par \
-45.DanielOG.par \
-46.DanielTh.par"
-
-TEMP_DIR="tmp/"
-
-mkdir $TEMP_DIR;
-rm $TEMP_DIR/*;
-cp $ORIG_FILES $TEMP_DIR;
-
-g++ prepare_files.cpp -o $TEMP_DIR/prepare_files;
-
-#THIS WILL CREATE Joshua_processed.par and Daniel_processed.par
-cd $TEMP_DIR; 
-prepare_files;
-#These are not needed in TEMP_DIR any more
-rm "06.JoshB.par" "07.JoshA.par" "08.JudgesB.par" "09.JudgesA.par" "45.DanielOG.par" "46.DanielTh.par" "prepare_files"
-