[sword-cvs] sword/utilities mod2zmod2.cpp, NONE, 1.1 vpl2zmod2.cpp, NONE, 1.1

sword at www.crosswire.org sword at www.crosswire.org
Fri May 7 10:28:20 MST 2004


Update of /cvs/core/sword/utilities
In directory www:/tmp/cvs-serv7037

Added Files:
	mod2zmod2.cpp vpl2zmod2.cpp 
Log Message:

dglassey: some apps for making zText2 modules


--- NEW FILE: mod2zmod2.cpp ---
// Compression on variable granularity

#include <fcntl.h>
#include <iostream>
#include <fstream>
#include <string>

#ifndef __GNUC__
#include <io.h>
#else
#include <unistd.h>
#endif

//#include <ztext.h>
#include <ztext2.h>
//#include <zld.h>
//#include <zcom.h>
#include <swmgr.h>
#include <lzsscomprs.h>
#include <zipcomprs.h>
#include <versekey.h>
#include <versekey2.h>
#include <stdio.h>
#include <cipherfil.h>
#ifndef NO_SWORD_NAMESPACE
using sword::SWCompress;
using sword::CipherFilter;
using sword::SWModule;
using sword::SWMgr;
using sword::ModMap;
using sword::zText2;
//using sword::zLD;
//using sword::zCom;
using sword::SWFilter;
using sword::VerseKey2;
using sword::VerseKey;
using sword::SWKey;
using sword::SW_POSITION;
using sword::ZipCompress;
using sword::LZSSCompress;
#endif

using std::cerr;
using std::string;
using std::cout;
using std::endl;

void errorOutHelp(char *appName) {
	cerr << appName << " - a tool to create compressed Sword modules\n";
	cerr << "version 0.1\n\n";
	cerr << "usage: "<< appName << " <modname> <datapath> [blockType [blockNum [compressType [cipherKey]]]]\n\n";
	cerr << "datapath: the directory in which to write the zModule\n";
	cerr << "blockType  : (default 4)\n\t2 - verses\n\t3 - chapters\n\t4 - books\n";
	cerr << "blockNum  : (default 1)\n\tnumber of blockType per block\n";
	cerr << "compressType: (default 1):\n\t1 - LZSS\n\t2 - Zip\n";
	cerr << "\n\n";
	exit(-1);
}


int main(int argc, char **argv)
{
	int iType = 4;
	int iPerBlock = 1;
	int compType = 1;
	string cipherKey = "";
	SWCompress *compressor = 0;
	SWModule *inModule     = 0;
	SWModule *outModule    = 0;
	

	if ((argc < 3) || (argc > 6)) {
		errorOutHelp(argv[0]);
	}

	if (argc > 3) {
		iType = atoi(argv[3]);
		if (argc > 4) {
			iPerBlock = atoi(argv[4]);
			if (argc > 5) {
				compType = atoi(argv[5]);
				if (argc > 6) {
					cipherKey = argv[6];
				}
			}
		}
	}

	if ((iType < 2) || (iType > 5) || (iPerBlock < 1) || (compType < 1) || (compType > 2) || (!strcmp(argv[1], "-h")) || 
		(!strcmp(argv[1], "--help")) || (!strcmp(argv[1], "/?")) || (!strcmp(argv[1], "-?")) || (!strcmp(argv[1], "-help"))) {
		errorOutHelp(argv[0]);
	}

	SWMgr mgr;

	ModMap::iterator it = mgr.Modules.find(argv[1]);
	if (it == mgr.Modules.end()) {
		fprintf(stderr, "error: %s: couldn't find module: %s \n", argv[0], argv[1]);
		exit(-2);
	}

	inModule = it->second;

	// Try to initialize a default set of datafiles and indicies at our
	// datapath location passed to us from the user.
	
#define BIBLE 1
#define LEX 2
#define COM 3

	int modType = 0;
	if (!strcmp(inModule->Type(), "Biblical Texts")) modType = BIBLE;
	if (!strcmp(inModule->Type(), "Lexicons / Dictionaries")) modType = LEX;
	if (!strcmp(inModule->Type(), "Commentaries")) modType = COM;

	switch (compType) {	// these are deleted by zText
	case 1: compressor = new LZSSCompress(); break;
	case 2: compressor = new ZipCompress(); break;
	}
	cout << "Creating module " << argv[2] << endl;
	int result = 1;
	switch (modType) {
	case BIBLE:
		result = zText2::createModule(argv[2], iType);
		break;
	case LEX:
		//result = zLD::createModule(argv[2]);
		result = 1;
		break;
	case COM:
		result = zText2::createModule(argv[2], iType);
		break;
	}

	if (result) {
		fprintf(stderr, "error: %s: couldn't create module at path: %s \n", argv[0], argv[2]);
		exit(-3);
	}

	cout << "Opening datapath with driver " << argv[2] << " Type " << iType << " PerBlock " << iPerBlock << endl;
	switch (modType) {
	case BIBLE:
		outModule = new zText2(argv[2], 0, 0, iType, iPerBlock, compressor);	// open our datapath with our RawText driver.
		((VerseKey *)(SWKey *)(*inModule))->Headings(1);
		break;
	case LEX:
		//outModule = new zLD(argv[2], 0, 0, iType, compressor);	// open our datapath with our RawText driver.
		break;
	case COM:
		outModule = new zText2(argv[2], 0, 0, iType, iPerBlock, compressor);	// open our datapath with our RawText driver.
		((VerseKey *)(SWKey *)(*inModule))->Headings(1);
		break;
	}

	SWFilter *cipherFilter = 0;
	if (!cipherKey.empty()) {
		cipherFilter = new CipherFilter(cipherKey.c_str());
		outModule->AddRawFilter(cipherFilter);
	}

	string lastBuffer = "Something that would never be first module entry";
	SWKey bufferKey;
	SWKey lastBufferKey;
	SWKey *outModuleKey = outModule->CreateKey();
	VerseKey2 *vkey = SWDYNAMIC_CAST(VerseKey2, outModuleKey);
	outModuleKey->Persist(1);
	if (vkey) {
		vkey->Headings(1);
		vkey->AutoNormalize(0);
	}
	outModule->setKey(*outModuleKey);

	inModule->setSkipConsecutiveLinks(false);
	(*inModule) = TOP;
	while (!inModule->Error()) {
		bufferKey = *(SWKey *)(*inModule);
		cout << bufferKey << endl;
		// pseudo-check for link.  Will get most common links.
		if ((lastBuffer == inModule->getRawEntry()) &&(lastBuffer.length() > 0)) {
			*outModuleKey = bufferKey;
			outModule->linkEntry(&lastBufferKey);	// link to last key
		//cout << "Adding [" << bufferKey << "] link to: [" << lastBufferKey << "]\n";
		}
		else {
			lastBuffer = inModule->getRawEntry();
			lastBufferKey = inModule->KeyText();
			if (lastBuffer.length() > 0) {
				//cout << "Adding [" << bufferKey << "] new text. \n";
				*outModuleKey = bufferKey;
//				outModule->getRawEntry();	// snap
//				outModule->setKey(bufferKey);
				(*outModule) << lastBuffer.c_str();	// save new text;
			}
			else {
				cout << "Skipping [" << bufferKey << "] no entry in inModule. \n";
			}
		}
		(*inModule)++;
	}
	delete outModule;
	delete outModuleKey;
	if (cipherFilter)
		delete cipherFilter;
}


--- NEW FILE: vpl2zmod2.cpp ---
#include <ctype.h>
#include <stdio.h>
#include <fcntl.h>
#include <errno.h>
#include <stdlib.h>

#ifndef __GNUC__
#include <io.h>
#else
#include <unistd.h>
#endif

#include <swmgr.h>
#include <ztext2.h>
#include <iostream>
#include <swbuf.h>
#include <lzsscomprs.h>
#include <zipcomprs.h>
#include <versekey2.h>

#ifndef O_BINARY
#define O_BINARY 0
#endif

#ifndef NO_SWORD_NAMESPACE
using sword::SWMgr;
using sword::SWBuf;
using sword::zText2;
using sword::VerseKey2;
using sword::SW_POSITION;
using sword::SWCompress;
//using sword::CipherFilter;
using sword::zText2;
using sword::ZipCompress;
using sword::LZSSCompress;
#endif

using std::string;

char readline(int fd, char **buf) {
	char ch;
	if (*buf)
		delete [] *buf;
	*buf = 0;
	int len;


	long index = lseek(fd, 0, SEEK_CUR);
	// clean up any preceding white space
	while ((len = read(fd, &ch, 1)) == 1) {
		if ((ch != 13) && (ch != ' ') && (ch != '\t'))
			break;
		else index++;
	}


	while (ch != 10) {
        if ((len = read(fd, &ch, 1)) != 1)
			break;
	}
	
	int size = (lseek(fd, 0, SEEK_CUR) - index) - 1;

	*buf = new char [ size + 1 ];

	if (size > 0) {
		lseek(fd, index, SEEK_SET);
		read(fd, *buf, size);
		read(fd, &ch, 1);   //pop terminating char
		(*buf)[size] = 0;

		// clean up any trailing junk on buf
		for (char *it = *buf+(strlen(*buf)-1); it > *buf; it--) {
			if ((*it != 10) && (*it != 13) && (*it != ' ') && (*it != '\t'))
				break;
			else *it = 0;
		}
	}
	else **buf = 0;
	return !len;
}


char *parseVReg(char *buf) {
	char stage = 0;

	while (*buf) {
		switch (stage) {
		case 0:
			if (isalpha(*buf))
				stage++;
			break;
		case 1:
			if (isdigit(*buf))
				stage++;
			break;
		case 2:
			if (*buf == ':')
				stage++;
			break;
		case 3:
			if (isdigit(*buf))
				stage++;
			break;
	   case 4:
			if (*buf == ' ') {
				*buf = 0;
				return ++buf;
			}
			break;
		}
		buf++;
	}
	return (stage == 4) ? buf : 0;  // if we got to stage 4 return after key buf, else return 0;
}


bool isKJVRef(const char *buf) {
	VerseKey2 vk, test;
	vk.AutoNormalize(0);
	vk.Headings(1);	// turn on mod/testmnt/book/chap headings
	vk.Persist(1);
	// lets do some tests on the verse --------------
	vk = buf;
	test = buf;

	if (vk.Book() && vk.Chapter() && vk.Verse()) { // if we're not a heading
		//std::cerr << (const char*)vk << " == "  << (const char*)test << std::endl;
		return (vk == test);
	}
	else return true;	// no check if we're a heading... Probably bad.
}


void fixText(char *text) {
	char *to = text;
	while(*text) {
		*to++ = *text++;
		*to++ = *text++;
		if (!*text)
			break;
		if (*text != ' ')
			std::cerr << "problem\n";
		else	text++;
	}
	*to = 0;
}

int main(int argc, char **argv) {
	int iType = 3;
	int iPerBlock = 7;
	int compType = 2;
	SWBuf cipherKey = "";
	SWCompress *compressor = 0;

	// Let's test our command line arguments
	if (argc < 2) {
//		fprintf(stderr, "usage: %s <vpl_file> </path/to/mod> [0|1 - file includes prepended verse references]\n", argv[0]);
		fprintf(stderr, "usage: %s <source_vpl_file> </path/to/output/mod/> [0|1 - prepended verse refs] [0|1 - NT only]\n\n", argv[0]);
		fprintf(stderr, "\tWith no verse refs, source file must contain exactly 31102 lines.\n");
		fprintf(stderr, "\tThis is KJV verse count plus headings for MODULE,\n");
		fprintf(stderr, "\tTESTAMENT, BOOK, CHAPTER. An example snippet follows:\n\n");
		fprintf(stderr, "\t\tMODULE HEADER\n");
		fprintf(stderr, "\t\tOLD TESTAMENT HEADER\n");
		fprintf(stderr, "\t\tGENESIS HEADER\n");
		fprintf(stderr, "\t\tCHAPTER 1 HEADER\n");
		fprintf(stderr, "\t\tIn the beginning...\n\n");
		fprintf(stderr, "\t... implying there must also be a CHAPTER2 HEADER,\n");
        fprintf(stderr, "\tEXODUS HEADER, NEW TESTAMENT HEADER, etc.  If there is no text for\n");
		fprintf(stderr, "\tthe header, a blank line must, at least, hold place.\n\n");
		fprintf(stderr, "\tWith verse refs, source file must simply contain any number of lines,\n");
		fprintf(stderr, "\tthat begin with the verse reference for which it is an entry.  e.g.:\n\n");
		fprintf(stderr, "\t\tgen 1:0 CHAPTER 1 HEADER\n");
		fprintf(stderr, "\t\tgen 1:1 In the beginning...\n\n");
		exit(-1);
	}

	// Let's see if we can open our input file
	int fd = open(argv[1], O_RDONLY|O_BINARY);
	if (fd < 0) {
		fprintf(stderr, "error: %s: couldn't open input file: %s \n", argv[0], argv[1]);
		exit(-2);
	}

	// not used yet, but for future support of a vpl file with each line
	// prepended with verse reference, eg. "Gen 1:1 In the beginning..."
	bool vref = false;

	// if 'nt' is the 4th arg, our vpl file only has the NT
	bool ntonly = false;

	if (argc > 3) {
		vref = (argv[3][0] == '0') ? false : true;
		if (argc > 4) {
			ntonly = (argv[4][0] == '0') ? false : true;
			if (argc > 5) {
				iType = atoi(argv[5]);
				if (argc > 6) {
					iPerBlock = atoi(argv[6]);
					if (argc > 7) {
						compType = atoi(argv[7]);
						if (argc > 8) {
							cipherKey = argv[8];
						}
					}
				}
			}
		}
	}

	// Try to initialize a default set of datafiles and indicies at our
	// datapath location passed to us from the user.
	if (zText2::createModule(argv[2], iType)) {
		fprintf(stderr, "error: %s: couldn't create module at path: %s \n", argv[0], argv[2]);
		exit(-3);
	}
	switch (compType) {	// these are deleted by zText
	case 1: compressor = new LZSSCompress(); break;
	case 2: compressor = new ZipCompress(); break;
	}

	// Do some initialization stuff
	char *buffer = 0;
	zText2 mod(argv[2], 0, 0, iType, iPerBlock, compressor);	// open our datapath with our zText2 driver.
	VerseKey2 vk;
	vk.AutoNormalize(0);
	vk.Headings(1);	// turn on mod/testmnt/book/chap headings
	vk.Persist(1);

	mod.setKey(vk);

	// Loop through module from TOP to BOTTOM and set next line from
	// input file as text for this entry in the module
	mod = TOP;
	if (ntonly) vk = "Matthew 1:1";
	  
	int successive = 0;  //part of hack below
	while ((!mod.Error()) && (!readline(fd, &buffer))) {
		if (*buffer == '|')	// comments, ignore line
			continue;
		if (vref) {
			const char *verseText = parseVReg(buffer);
			if (!verseText) {	// if we didn't find a valid verse ref
				std::cerr << "No valid verse ref found on line: " << buffer << "\n";
				exit(-4);
			}

			vk = buffer;
			if (vk.Error()) {
				std::cerr << "Error parsing key: " << buffer << "\n";
				exit(-5);
			}
			string orig = mod.getRawEntry();

			if (!isKJVRef(buffer)) {
				VerseKey2 origVK = vk;
				/* This block is functioning improperly -- problem with AutoNormalize???
				do {
					vk--;
				}
				while (!vk.Error() && !isKJVRef(vk)); */
				//hack to replace above:
				successive++;
				vk -= successive;
				orig = mod.getRawEntry();

				std::cerr << "Not a valid KJV ref: " << origVK << "\n";
				std::cerr << "appending to ref: " << vk << "\n";
				orig += " [ (";
				orig += origVK;
				orig += ") ";
				orig += verseText;
				orig += " ] ";
				verseText = orig.c_str();
			}
			else {
			  successive = 0;
			}

			if (orig.length() > 1)
				   std::cerr << "Warning, overwriting verse: " << vk << std::endl;
			  
			// ------------- End verse tests -----------------
			mod << verseText;	// save text to module at current position
		}
		else {
			fixText(buffer);
			mod << buffer;	// save text to module at current position
			mod++;	// increment module position
		}
	}

	// clear up our buffer that readline might have allocated
	if (buffer)
		delete [] buffer;
}




More information about the sword-cvs mailing list