[sword-cvs] sword/src/modules/texts/ztext ztext.cpp,1.41,1.42
sword@www.crosswire.org
sword@www.crosswire.org
Thu, 28 Aug 2003 23:00:19 -0700
- Previous message: [sword-cvs] sword/include swsearchable.h,NONE,1.1 rawtext.h,1.27,1.28 swcacher.h,1.4,1.5 swmodule.h,1.67,1.68 ztext.h,1.28,1.29
- Next message: [sword-cvs] sword Makefile.am,1.37,1.38 usrinst.sh,1.32,1.33
- Messages sorted by:
[ date ]
[ thread ]
[ subject ]
[ author ]
Update of /usr/local/cvsroot/sword/src/modules/texts/ztext
In directory www:/tmp/cvs-serv27914/src/modules/texts/ztext
Modified Files:
ztext.cpp
Log Message:
Abstracted the search interface from SWModule
Added experimental implementation of fast
search framework to zText using
clucene
Index: ztext.cpp
===================================================================
RCS file: /usr/local/cvsroot/sword/src/modules/texts/ztext/ztext.cpp,v
retrieving revision 1.41
retrieving revision 1.42
diff -u -d -r1.41 -r1.42
--- ztext.cpp 12 Aug 2003 05:36:31 -0000 1.41
+++ ztext.cpp 29 Aug 2003 06:00:16 -0000 1.42
@@ -19,6 +19,23 @@
#include <ztext.h>
//#include <zlib.h>
+#include <map>
+#include <list>
+#include <algorithm>
+#include <regex.h> // GNU
+
+
+
+using std::map;
+using std::list;
+using std::find;
+
+#ifdef USELUCENE
+#include <CLucene/CLucene.h>
+using namespace lucene::search;
+using namespace lucene::queryParser;
+#endif
+
SWORD_NAMESPACE_START
/******************************************************************************
@@ -36,6 +53,17 @@
: zVerse(ipath, -1, iblockType, icomp), SWText(iname, idesc, idisp, enc, dir, mark, ilang) {
blockType = iblockType;
lastWriteKey = 0;
+ SWBuf fname;
+ fname = path;
+ ir = 0;
+ is = 0;
+ char ch = fname.c_str()[strlen(fname.c_str())-1];
+ if ((ch != '/') && (ch != '\\'))
+ fname += "/lucene";
+ if (IndexReader::indexExists(fname.c_str())) {
+ ir = &IndexReader::open(fname);
+ is = new IndexSearcher(*ir);
+ }
}
@@ -49,6 +77,12 @@
if (lastWriteKey)
delete lastWriteKey;
+
+ if (is)
+ ((IndexSearcher *)is)->close();
+
+ if (ir)
+ delete (IndexReader *)ir;
}
@@ -216,6 +250,195 @@
return tmpVK;
}
else return *key;
+}
+
+
+
+typedef map < SWBuf, list<long> > strlist;
+typedef list<long> longlist;
+#ifndef O_BINARY
+#define O_BINARY 0
+#endif
+
+signed char zText::createSearchFramework() {
+#ifdef USELUCENE
+ SWKey *savekey = 0;
+ SWKey *searchkey = 0;
+ SWKey textkey;
+ char *word = 0;
+ char *wordBuf = 0;
+
+
+ // save key information so as not to disrupt original
+ // module position
+ if (!key->Persist()) {
+ savekey = CreateKey();
+ *savekey = *key;
+ }
+ else savekey = key;
+
+ searchkey = (key->Persist())?key->clone():0;
+ if (searchkey) {
+ searchkey->Persist(1);
+ setKey(*searchkey);
+ }
+
+ // position module at the beginning
+ *this = TOP;
+
+ VerseKey *lkey = (VerseKey *)key;
+
+ // iterate thru each entry in module
+
+ IndexWriter* writer = NULL;
+ Directory* d = NULL;
+
+ lucene::analysis::SimpleAnalyzer& an = *new lucene::analysis::SimpleAnalyzer();
+ SWBuf target = path;
+ char ch = target.c_str()[strlen(target.c_str())-1];
+ if ((ch != '/') && (ch != '\\'))
+ target += "/lucene";
+
+ if (IndexReader::indexExists(target.c_str())) {
+ d = &FSDirectory::getDirectory(target.c_str(), false);
+ if (IndexReader::isLocked(*d)) {
+ IndexReader::unlock(*d);
+ }
+
+ writer = new IndexWriter(*d, an, false);
+ } else {
+ d = &FSDirectory::getDirectory(target.c_str(), true);
+ writer = new IndexWriter( *d ,an, true);
+ }
+
+
+
+ while (!Error()) {
+ Document &doc = *new Document();
+ doc.add( Field::Text(_T("key"), (const char *)*lkey ) );
+ doc.add( Field::Text(_T("content"), StripText()) );
+ writer->addDocument(doc);
+ delete &doc;
+
+ (*this)++;
+ }
+
+ writer->optimize();
+ writer->close();
+ delete writer;
+ delete &an;
+
+ // reposition module back to where it was before we were called
+ setKey(*savekey);
+
+ if (!savekey->Persist())
+ delete savekey;
+
+ if (searchkey)
+ delete searchkey;
+
+
+#endif
+ return 0;
+}
+
+
+/******************************************************************************
+ * SWModule::Search - Searches a module for a string
+ *
+ * ENT: istr - string for which to search
+ * searchType - type of search to perform
+ * >=0 - regex
+ * -1 - phrase
+ * -2 - multiword
+ * flags - options flags for search
+ * justCheckIfSupported - if set, don't search, only tell if this
+ * function supports requested search.
+ *
+ * RET: listkey set to verses that contain istr
+ */
+
+ListKey &zText::search(const char *istr, int searchType, int flags, SWKey *scope, bool *justCheckIfSupported, void (*percent)(char, void *), void *percentUserData) {
+#ifdef USELUCENE
+ listkey.ClearList();
+
+ if ((is) && (ir)) {
+
+ switch (searchType) {
+ case -3: {
+
+
+ // test to see if our scope for this search is bounded by a
+ // VerseKey
+ VerseKey *testKeyType = 0, vk;
+ try {
+ testKeyType = SWDYNAMIC_CAST(VerseKey, ((scope)?scope:key));
+ }
+ catch ( ... ) {}
+ // if we don't have a VerseKey * decendant we can't handle
+ // because of scope.
+ // In the future, add bool SWKey::isValid(const char *tryString);
+ if (!testKeyType)
+ break;
+
+
+ // check if we just want to see if search is supported.
+ // If we've gotten this far, then it is supported.
+ if (justCheckIfSupported) {
+ *justCheckIfSupported = true;
+ return listkey;
+ }
+
+ (*percent)(10, percentUserData);
+
+ standard::StandardAnalyzer analyzer;
+ Query &q = QueryParser::Parse(istr, _T("content"), analyzer);
+ (*percent)(20, percentUserData);
+ Hits &h = is->search(q);
+ (*percent)(80, percentUserData);
+
+
+ // iterate thru each good module position that meets the search
+ for (long i = 0; i < h.Length(); i++) {
+ Document &doc = h.doc(i);
+
+ // set a temporary verse key to this module position
+ vk = doc.get(_T("key"));
+
+ // check scope
+ // Try to set our scope key to this verse key
+ if (scope) {
+ *testKeyType = vk;
+
+ // check to see if it set ok and if so, add to our return list
+ if (*testKeyType == vk)
+ listkey << (const char *) vk;
+ }
+ else listkey << (const char*) vk;
+ }
+ (*percent)(98, percentUserData);
+
+ delete &h;
+ delete &q;
+
+ listkey = TOP;
+ (*percent)(100, percentUserData);
+ return listkey;
+ }
+
+ default:
+ break;
+ }
+ }
+
+ // check if we just want to see if search is supported
+ if (justCheckIfSupported) {
+ *justCheckIfSupported = false;
+ return listkey;
+ }
+#endif
+ // if we don't support this search, fall back to base class
+ return SWModule::Search(istr, searchType, flags, scope, justCheckIfSupported, percent, percentUserData);
}
- Previous message: [sword-cvs] sword/include swsearchable.h,NONE,1.1 rawtext.h,1.27,1.28 swcacher.h,1.4,1.5 swmodule.h,1.67,1.68 ztext.h,1.28,1.29
- Next message: [sword-cvs] sword Makefile.am,1.37,1.38 usrinst.sh,1.32,1.33
- Messages sorted by:
[ date ]
[ thread ]
[ subject ]
[ author ]