[sword-cvs] sword/src/modules/texts/ztext ztext2.cpp,NONE,1.1
sword@www.crosswire.org
sword@www.crosswire.org
Mon, 12 Apr 2004 06:53:07 -0700
Update of /cvs/core/sword/src/modules/texts/ztext
In directory www:/tmp/cvs-serv31934/texts/ztext
Added Files:
ztext2.cpp
Log Message:
dglassey: add new zverse and ztext that use versekey2 for indexes and have initial support for separated markup
--- NEW FILE: ztext2.cpp ---
/******************************************************************************
* ztext.cpp - code for class 'zText2'- a module that reads compressed text
* files: ot and nt using indexs ??.vss
*/
#include <ctype.h>
#include <stdio.h>
#include <fcntl.h>
#ifndef __GNUC__
#include <io.h>
#else
#include <unistd.h>
#endif
#include <utilfuns.h>
#include <ztext2.h>
#include <versekey2.h>
#include <regex.h> // GNU
#ifdef USELUCENE
#include <CLucene/CLucene.h>
using namespace lucene::search;
using namespace lucene::queryParser;
#endif
SWORD_NAMESPACE_START
/******************************************************************************
* zText2 Constructor - Initializes data for instance of zText2
*
* ENT: ipath - path to data files
* iname - Internal name for module
* idesc - Name to display to user for module
* iblockType - verse, chapter, book, etc. of index chunks
* icomp - Compressor object
* idisp - Display object to use for displaying
*/
zText2::zText2(const char *ipath, const char *iname, const char *idesc,
int iblockType, int idxPerBlock, SWCompress *icomp, SWDisplay *idisp,
SWTextEncoding enc, SWTextDirection dir, SWTextMarkup mark, const char* ilang)
: zVerse2(ipath, -1, iblockType, icomp, idxPerBlock),
SWText(iname, idesc, idisp, enc, dir, mark, ilang) {
blockType = iblockType;
lastWriteKey = 0;
#ifdef USELUCENE
SWBuf fname;
fname = path;
ir = 0;
is = 0;
char ch = fname.c_str()[strlen(fname.c_str())-1];
if ((ch != '/') && (ch != '\\'))
fname += "/lucene";
if (IndexReader::indexExists(fname.c_str())) {
ir = &IndexReader::open(fname);
is = new IndexSearcher(*ir);
}
#endif
}
/******************************************************************************
* zText2 Destructor - Cleans up instance of zText2
*/
zText2::~zText2()
{
flushCache();
if (lastWriteKey)
delete lastWriteKey;
#ifdef USELUCENE
if (is)
is->close();
if (ir)
delete ir;
#endif
}
/******************************************************************************
* zText2::getRawEntry - Returns the current verse buffer
*
* RET: buffer with verse
*/
#if 0 // do we really want this any more
SWBuf &zText2::getRawEntryBuf() {
long start = 0;
unsigned short size = 0;
VerseKey2 &key = getVerseKey();
findOffsetText(key.Index(), &start, &size);
entrySize = size; // support getEntrySize call
entryBuf = "";
zReadText(start, size, entryBuf);
rawFilter(entryBuf, &key);
// if (!isUnicode())
#if 0
prepText(entryBuf);
#endif
return entryBuf;
}
#endif
bool zText2::sameBlock(VerseKey2 *k1, VerseKey2 *k2) {
switch (blockType) {
case VERSEBLOCKS:
if (k1->Verse() != k2->Verse())
return false;
case CHAPTERBLOCKS:
if (k1->Chapter() != k2->Chapter())
return false;
case BOOKBLOCKS:
if (k1->Book() != k2->Book())
return false;
case INDEXBLOCKS:
if ((k1->Index() % m_idxPerBlock) != (k2->Index() % m_idxPerBlock))
return false;
}
return true;
}
void zText2::setEntry(const char *inbuf, long len) {
VerseKey2 &key = getVerseKey();
// see if we've jumped across blocks since last write
if (lastWriteKey) {
if (!sameBlock(lastWriteKey, &key)) {
flushCache();
}
delete lastWriteKey;
}
doSetText(key.Index(), inbuf, len);
lastWriteKey = (VerseKey2 *)key.clone(); // must delete
}
void zText2::linkEntry(const SWKey *inkey) {
VerseKey2 &destkey = getVerseKey();
const VerseKey2 *srckey = 0;
// see if we have a VerseKey2 * or decendant
SWTRY {
srckey = (const VerseKey2 *) SWDYNAMIC_CAST(VerseKey2, inkey);
}
SWCATCH ( ... ) {
}
// if we don't have a VerseKey2 * decendant, create our own
if (!srckey)
srckey = new VerseKey2(inkey);
doLinkEntry(destkey.Index(), srckey->Index());
if (inkey != srckey) // free our key if we created a VerseKey2
delete srckey;
}
/******************************************************************************
* zFiles::deleteEntry - deletes this entry
*
*/
void zText2::deleteEntry() {
VerseKey2 &key = getVerseKey();
doSetText(key.Index(), "");
}
/******************************************************************************
* zText2::increment - Increments module key a number of entries
*
* ENT: increment - Number of entries to jump forward
*
*/
void zText2::increment(int steps) {
long start;
unsigned short size;
VerseKey2 *tmpkey = &getVerseKey();
findOffsetText(tmpkey->Index(), &start, &size);
SWKey lastgood = *tmpkey;
while (steps) {
long laststart = start;
unsigned short lastsize = size;
SWKey lasttry = *tmpkey;
(steps > 0) ? (*key)++ : (*key)--;
tmpkey = &getVerseKey();
if ((error = key->Error())) {
*key = lastgood;
break;
}
long index = tmpkey->Index();
findOffsetText(index, &start, &size);
if (
(((laststart != start) || (lastsize != size)) // we're a different entry
// && (start > 0)
&& (size)) // and we actually have a size
||(!skipConsecutiveLinks)) { // or we don't want to skip consecutive links
steps += (steps < 0) ? 1 : -1;
lastgood = *tmpkey;
}
}
error = (error) ? KEYERR_OUTOFBOUNDS : 0;
}
VerseKey2 &zText2::getVerseKey() {
static VerseKey2 tmpVK;
VerseKey2 *key;
// see if we have a VerseKey2 * or decendant
SWTRY {
key = SWDYNAMIC_CAST(VerseKey2, this->key);
}
SWCATCH ( ... ) { }
if (!key) {
ListKey *lkTest = 0;
SWTRY {
lkTest = SWDYNAMIC_CAST(ListKey, this->key);
}
SWCATCH ( ... ) { }
if (lkTest) {
SWTRY {
key = SWDYNAMIC_CAST(VerseKey2, lkTest->GetElement());
}
SWCATCH ( ... ) { }
}
}
if (!key) {
tmpVK = *(this->key);
return tmpVK;
}
else return *key;
}
#ifndef O_BINARY
#define O_BINARY 0
#endif
signed char zText2::createSearchFramework() {
#ifdef USELUCENE
SWKey *savekey = 0;
SWKey *searchkey = 0;
SWKey textkey;
char *word = 0;
char *wordBuf = 0;
// save key information so as not to disrupt original
// module position
if (!key->Persist()) {
savekey = CreateKey();
*savekey = *key;
}
else savekey = key;
searchkey = (key->Persist())?key->clone():0;
if (searchkey) {
searchkey->Persist(1);
setKey(*searchkey);
}
// position module at the beginning
*this = TOP;
VerseKey2 *lkey = (VerseKey2 *)key;
// iterate thru each entry in module
IndexWriter* writer = NULL;
Directory* d = NULL;
lucene::analysis::SimpleAnalyzer& an = *new lucene::analysis::SimpleAnalyzer();
SWBuf target = path;
char ch = target.c_str()[strlen(target.c_str())-1];
if ((ch != '/') && (ch != '\\'))
target += "/lucene";
if (IndexReader::indexExists(target.c_str())) {
d = &FSDirectory::getDirectory(target.c_str(), false);
if (IndexReader::isLocked(*d)) {
IndexReader::unlock(*d);
}
writer = new IndexWriter(*d, an, false);
} else {
d = &FSDirectory::getDirectory(target.c_str(), true);
writer = new IndexWriter( *d ,an, true);
}
while (!Error()) {
Document &doc = *new Document();
doc.add( Field::Text(_T("key"), (const char *)*lkey ) );
doc.add( Field::Text(_T("content"), StripText()) );
writer->addDocument(doc);
delete &doc;
(*this)++;
}
writer->optimize();
writer->close();
delete writer;
delete &an;
// reposition module back to where it was before we were called
setKey(*savekey);
if (!savekey->Persist())
delete savekey;
if (searchkey)
delete searchkey;
#endif
return 0;
}
/******************************************************************************
* SWModule::Search - Searches a module for a string
*
* ENT: istr - string for which to search
* searchType - type of search to perform
* >=0 - regex
* -1 - phrase
* -2 - multiword
* flags - options flags for search
* justCheckIfSupported - if set, don't search, only tell if this
* function supports requested search.
*
* RET: listkey set to verses that contain istr
*/
ListKey &zText2::search(const char *istr, int searchType, int flags, SWKey *scope,
bool *justCheckIfSupported, void (*percent)(char, void *), void *percentUserData) {
#ifdef USELUCENE
listkey.ClearList();
if ((is) && (ir)) {
switch (searchType) {
case -2: { // let lucene replace multiword for now
// test to see if our scope for this search is bounded by a
// VerseKey2
VerseKey2 *testKeyType = 0, vk;
SWTRY {
testKeyType = SWDYNAMIC_CAST(VerseKey2, ((scope)?scope:key));
}
SWCATCH ( ... ) {}
// if we don't have a VerseKey2 * decendant we can't handle
// because of scope.
// In the future, add bool SWKey::isValid(const char *tryString);
if (!testKeyType)
break;
// check if we just want to see if search is supported.
// If we've gotten this far, then it is supported.
if (justCheckIfSupported) {
*justCheckIfSupported = true;
return listkey;
}
(*percent)(10, percentUserData);
standard::StandardAnalyzer analyzer;
Query &q = QueryParser::Parse(istr, _T("content"), analyzer);
(*percent)(20, percentUserData);
Hits &h = is->search(q);
(*percent)(80, percentUserData);
// iterate thru each good module position that meets the search
for (long i = 0; i < h.Length(); i++) {
Document &doc = h.doc(i);
// set a temporary verse key to this module position
vk = doc.get(_T("key"));
// check scope
// Try to set our scope key to this verse key
if (scope) {
*testKeyType = vk;
// check to see if it set ok and if so, add to our return list
if (*testKeyType == vk)
listkey << (const char *) vk;
listkey.GetElement()->userData = (void *)(int)(h.score(i)*100);
}
else {
listkey << (const char*) vk;
listkey.GetElement()->userData = (void *)(int)(h.score(i)*100);
}
}
(*percent)(98, percentUserData);
delete &h;
delete &q;
listkey = TOP;
(*percent)(100, percentUserData);
return listkey;
}
default:
break;
}
}
// check if we just want to see if search is supported
if (justCheckIfSupported) {
*justCheckIfSupported = false;
return listkey;
}
#endif
// if we don't support this search, fall back to base class
return SWModule::search(istr, searchType, flags, scope, justCheckIfSupported, percent, percentUserData);
}
SWORD_NAMESPACE_END