[sword-cvs] sword/src/modules/texts/rawtext rawtext.cpp,1.65,1.66
sword@www.crosswire.org
sword@www.crosswire.org
Fri, 29 Aug 2003 12:02:47 -0700
Update of /usr/local/cvsroot/sword/src/modules/texts/rawtext
In directory www:/tmp/cvs-serv3169/src/modules/texts/rawtext
Modified Files:
rawtext.cpp
Log Message:
Added lucene to rawtext driver so all Bibles can use lucene search.
Looking forward to having a better search PLUGIN mechanism soon.
Index: rawtext.cpp
===================================================================
RCS file: /usr/local/cvsroot/sword/src/modules/texts/rawtext/rawtext.cpp,v
retrieving revision 1.65
retrieving revision 1.66
diff -u -d -r1.65 -r1.66
--- rawtext.cpp 29 Aug 2003 06:00:16 -0000 1.65
+++ rawtext.cpp 29 Aug 2003 19:02:45 -0000 1.66
@@ -17,19 +17,26 @@
#include <rawverse.h>
#include <rawtext.h>
+#include <regex.h> // GNU
+#ifdef USELUCENE
+#include <CLucene/CLucene.h>
+using namespace lucene::search;
+using namespace lucene::queryParser;
+#else
#include <map>
#include <list>
#include <algorithm>
-#include <regex.h> // GNU
-
-#ifndef O_BINARY
-#define O_BINARY 0
-#endif
using std::map;
using std::list;
using std::find;
+#endif
+
+#ifndef O_BINARY
+#define O_BINARY 0
+#endif
+
SWORD_NAMESPACE_START
typedef map < SWBuf, list<long> > strlist;
@@ -47,6 +54,19 @@
: SWText(iname, idesc, idisp, enc, dir, mark, ilang),
RawVerse(ipath) {
+#ifdef USELUCENE
+ SWBuf fname;
+ fname = path;
+ ir = 0;
+ is = 0;
+ char ch = fname.c_str()[strlen(fname.c_str())-1];
+ if ((ch != '/') && (ch != '\\'))
+ fname += "/lucene";
+ if (IndexReader::indexExists(fname.c_str())) {
+ ir = &IndexReader::open(fname);
+ is = new IndexSearcher(*ir);
+ }
+#else
SWBuf fname;
fname = path;
char ch = fname.c_str()[strlen(fname.c_str())-1];
@@ -62,6 +82,7 @@
fastSearch[loop] = new RawStr((fname + ((loop)?"ntwords":"otwords")).c_str());
}
}
+#endif
}
@@ -69,13 +90,20 @@
* RawText Destructor - Cleans up instance of RawText
*/
-RawText::~RawText()
-{
+RawText::~RawText() {
+#ifdef USELUCENE
+ if (is)
+ is->close();
+
+ if (ir)
+ delete ir;
+#else
if (fastSearch[0])
delete fastSearch[0];
if (fastSearch[1])
delete fastSearch[1];
+#endif
}
@@ -137,6 +165,84 @@
signed char RawText::createSearchFramework() {
+#ifdef USELUCENE
+ SWKey *savekey = 0;
+ SWKey *searchkey = 0;
+ SWKey textkey;
+ char *word = 0;
+ char *wordBuf = 0;
+
+
+ // save key information so as not to disrupt original
+ // module position
+ if (!key->Persist()) {
+ savekey = CreateKey();
+ *savekey = *key;
+ }
+ else savekey = key;
+
+ searchkey = (key->Persist())?key->clone():0;
+ if (searchkey) {
+ searchkey->Persist(1);
+ setKey(*searchkey);
+ }
+
+ // position module at the beginning
+ *this = TOP;
+
+ VerseKey *lkey = (VerseKey *)key;
+
+ // iterate thru each entry in module
+
+ IndexWriter* writer = NULL;
+ Directory* d = NULL;
+
+ lucene::analysis::SimpleAnalyzer& an = *new lucene::analysis::SimpleAnalyzer();
+ SWBuf target = path;
+ char ch = target.c_str()[strlen(target.c_str())-1];
+ if ((ch != '/') && (ch != '\\'))
+ target += "/lucene";
+
+ if (IndexReader::indexExists(target.c_str())) {
+ d = &FSDirectory::getDirectory(target.c_str(), false);
+ if (IndexReader::isLocked(*d)) {
+ IndexReader::unlock(*d);
+ }
+
+ writer = new IndexWriter(*d, an, false);
+ } else {
+ d = &FSDirectory::getDirectory(target.c_str(), true);
+ writer = new IndexWriter( *d ,an, true);
+ }
+
+
+
+ while (!Error()) {
+ Document &doc = *new Document();
+ doc.add( Field::Text(_T("key"), (const char *)*lkey ) );
+ doc.add( Field::Text(_T("content"), StripText()) );
+ writer->addDocument(doc);
+ delete &doc;
+
+ (*this)++;
+ }
+
+ writer->optimize();
+ writer->close();
+ delete writer;
+ delete &an;
+
+ // reposition module back to where it was before we were called
+ setKey(*savekey);
+
+ if (!savekey->Persist())
+ delete savekey;
+
+ if (searchkey)
+ delete searchkey;
+
+
+#else
SWKey *savekey = 0;
SWKey *searchkey = 0;
SWKey textkey;
@@ -262,6 +368,7 @@
close(datfd);
close(idxfd);
}
+#endif
return 0;
}
@@ -281,8 +388,85 @@
* RET: listkey set to verses that contain istr
*/
-ListKey &RawText::search(const char *istr, int searchType, int flags, SWKey *scope, bool *justCheckIfSupported, void (*percent)(char, void *), void *percentUserData)
-{
+ListKey &RawText::search(const char *istr, int searchType, int flags, SWKey *scope, bool *justCheckIfSupported, void (*percent)(char, void *), void *percentUserData) {
+#ifdef USELUCENE
+ listkey.ClearList();
+
+ if ((is) && (ir)) {
+
+ switch (searchType) {
+ case -3: {
+
+
+ // test to see if our scope for this search is bounded by a
+ // VerseKey
+ VerseKey *testKeyType = 0, vk;
+ try {
+ testKeyType = SWDYNAMIC_CAST(VerseKey, ((scope)?scope:key));
+ }
+ catch ( ... ) {}
+ // if we don't have a VerseKey * decendant we can't handle
+ // because of scope.
+ // In the future, add bool SWKey::isValid(const char *tryString);
+ if (!testKeyType)
+ break;
+
+
+ // check if we just want to see if search is supported.
+ // If we've gotten this far, then it is supported.
+ if (justCheckIfSupported) {
+ *justCheckIfSupported = true;
+ return listkey;
+ }
+
+ (*percent)(10, percentUserData);
+
+ standard::StandardAnalyzer analyzer;
+ Query &q = QueryParser::Parse(istr, _T("content"), analyzer);
+ (*percent)(20, percentUserData);
+ Hits &h = is->search(q);
+ (*percent)(80, percentUserData);
+
+
+ // iterate thru each good module position that meets the search
+ for (long i = 0; i < h.Length(); i++) {
+ Document &doc = h.doc(i);
+
+ // set a temporary verse key to this module position
+ vk = doc.get(_T("key"));
+
+ // check scope
+ // Try to set our scope key to this verse key
+ if (scope) {
+ *testKeyType = vk;
+
+ // check to see if it set ok and if so, add to our return list
+ if (*testKeyType == vk)
+ listkey << (const char *) vk;
+ }
+ else listkey << (const char*) vk;
+ }
+ (*percent)(98, percentUserData);
+
+ delete &h;
+ delete &q;
+
+ listkey = TOP;
+ (*percent)(100, percentUserData);
+ return listkey;
+ }
+
+ default:
+ break;
+ }
+ }
+
+ // check if we just want to see if search is supported
+ if (justCheckIfSupported) {
+ *justCheckIfSupported = false;
+ return listkey;
+ }
+#else
listkey.ClearList();
if ((fastSearch[0]) && (fastSearch[1])) {
@@ -461,6 +645,7 @@
return listkey;
}
+#endif
// if we don't support this search, fall back to base class
return SWModule::Search(istr, searchType, flags, scope, justCheckIfSupported, percent, percentUserData);
}