[sword-svn] r1847 - trunk/src/modules
scribe at crosswire.org
scribe at crosswire.org
Fri Aug 12 19:13:26 MST 2005
Author: scribe
Date: 2005-08-12 19:13:25 -0700 (Fri, 12 Aug 2005)
New Revision: 1847
Modified:
trunk/src/modules/swmodule.cpp
Log:
prox searching beginnings
Modified: trunk/src/modules/swmodule.cpp
===================================================================
--- trunk/src/modules/swmodule.cpp 2005-08-04 12:43:03 UTC (rev 1846)
+++ trunk/src/modules/swmodule.cpp 2005-08-13 02:13:25 UTC (rev 1847)
@@ -10,6 +10,7 @@
#include <regex.h> // GNU
#include <swfilter.h>
#include <versekey.h> // KLUDGE for Search
+#include <treekeyidx.h> // KLUDGE for Search
#include <filemgr.h>
#ifndef _MSC_VER
#include <iostream>
@@ -520,6 +521,7 @@
#endif
+ // multi-word
if (searchType == -2) {
wordBuf = (char *)calloc(sizeof(char), strlen(istr) + 1);
strcpy(wordBuf, istr);
@@ -536,6 +538,7 @@
}
}
+ // entry attributes
if (searchType == -3) {
wordBuf = (char *)calloc(sizeof(char), strlen(istr) + 1);
char *checkSlash = wordBuf;
@@ -596,6 +599,8 @@
listkey << textkey;
}
}
+
+ // phrase
else if (searchType == -1) {
sres = ((flags & REG_ICASE) == REG_ICASE) ? stristr(StripText(), istr) : strstr(StripText(), istr);
if (sres) { //it's also in the StripText(), so we have a valid search result item now
@@ -603,11 +608,14 @@
listkey << textkey;
}
}
+
+ // multiword
else if (searchType == -2) {
int loopCount = 0;
int foundWords = 0;
+ bool specialStrips = getConfigEntry("LocalStripFilter");
do {
- const char* textBuf = (loopCount == 0) ? getRawEntry() : StripText();
+ const char* textBuf = ((loopCount == 0)&&(!specialStrips)) ? getRawEntry() : StripText();
foundWords = 0;
for (int i = 0; i < wordCount; ++i) {
@@ -626,6 +634,8 @@
listkey << textkey;
}
}
+
+ // entry attributes
else if (searchType == -3) {
int i;
RenderText(); // force parse
@@ -903,11 +913,6 @@
setKey(*searchkey);
}
- // position module at the beginning
- *this = TOP;
-
- // iterate thru each entry in module
-
IndexWriter *writer = NULL;
Directory *d = NULL;
@@ -935,10 +940,13 @@
char perc = 1;
VerseKey *vkcheck = 0;
- SWTRY {
- vkcheck = SWDYNAMIC_CAST(VerseKey, key);
- }
- SWCATCH (...) {}
+ vkcheck = SWDYNAMIC_CAST(VerseKey, key);
+
+ TreeKeyIdx *tkcheck = 0;
+ tkcheck = SWDYNAMIC_CAST(TreeKeyIdx, key);
+
+
+ *this = BOTTOM;
long highIndex = (vkcheck)?32300/*vkcheck->NewIndex()*/:key->Index();
if (!highIndex)
highIndex = 1; // avoid division by zero errors.
@@ -946,12 +954,25 @@
bool savePEA = isProcessEntryAttributes();
processEntryAttributes(true);
- while (!Error()) {
+ // prox chapter blocks
+ // position module at the beginning
+ *this = TOP;
+
+ VerseKey chapMax;
+ SWBuf proxBuf;
+ SWBuf proxLem;
+ SWBuf strong;
+
+ char err = Error();
+ while (!err) {
long mindex = 0;
if (vkcheck)
mindex = vkcheck->NewIndex();
else mindex = key->Index();
+ proxBuf = "";
+ proxLem = "";
+
// computer percent complete so we can report to our progress callback
float per = (float)mindex / highIndex;
// between 5%-98%
@@ -965,14 +986,15 @@
// get "content" field
const char *content = StripText();
+ bool good = false;
+
+ // start out entry
+ Document *doc = new Document();
+ // get "key" field
+ SWBuf keyText = (vkcheck) ? vkcheck->getOSISRef() : getKeyText();
if (content && *content) {
-
- // get "key" field
- SWBuf keyText = (vkcheck) ? vkcheck->getOSISRef() : getKeyText();
-
+ good = true;
// build "strong" field
- SWBuf strong;
-
AttributeTypeList::iterator words;
AttributeList::iterator word;
AttributeValue::iterator strongVal;
@@ -992,18 +1014,129 @@
}
}
}
-
- // add our entry to the clucene index
- Document *doc = new Document();
doc->add( Field::UnIndexed(_T("key"), keyText ) );
doc->add( Field::UnStored(_T("content"), content) );
if (strong.length() > 0)
doc->add( Field::UnStored(_T("lemma"), strong) );
+//printf("setting fields (%s).\ncontent: %s\nlemma: %s\n", (const char *)*key, content, strong.c_str());
+printf("setting fields (%s).\n", (const char *)*key);
+fflush(stdout);
+ }
+ // don't write yet, cuz we have to see if we're the first of a prox block (5:1 or chapter5/verse1
+
+ // for VerseKeys use chapter
+ if (vkcheck) {
+ chapMax = *vkcheck;
+ // we're the first verse in a chapter
+ if (vkcheck->Verse() == 1) {
+ chapMax = MAXVERSE;
+ VerseKey saveKey = *vkcheck;
+ while ((!err) && (*vkcheck <= chapMax)) {
+//printf("building proxBuf from (%s).\nproxBuf.c_str(): %s\n", (const char *)*key, proxBuf.c_str());
+printf("building proxBuf from (%s).\n", (const char *)*key);
+
+ // build "strong" field
+ strong = "";
+ AttributeTypeList::iterator words;
+ AttributeList::iterator word;
+ AttributeValue::iterator strongVal;
+
+ words = getEntryAttributes().find("Word");
+ if (words != getEntryAttributes().end()) {
+ for (word = words->second.begin();word != words->second.end(); word++) {
+ strongVal = word->second.find("Lemma");
+ if (strongVal != word->second.end()) {
+ // cheeze. skip empty article tags that weren't assigned to any text
+ if (strongVal->second == "G3588") {
+ if (word->second.find("Text") == word->second.end())
+ continue; // no text? let's skip
+ }
+ strong.append(strongVal->second);
+ strong.append(' ');
+ }
+ }
+ }
+ content = getRawEntry();
+ if (content && *content) {
+ proxBuf += content;
+ proxBuf.append(' ');
+ }
+ proxLem += strong;
+ if (proxLem.length())
+ proxLem.append("\n");
+ (*this)++;
+ err = Error();
+ }
+ err = 0;
+ *vkcheck = saveKey;
+ }
+ }
+
+ // for TreeKeys use siblings if we have no children
+ else if (tkcheck) {
+ if (!tkcheck->hasChildren()) {
+ if (!tkcheck->previousSibling()) {
+ do {
+printf("building proxBuf from (%s).\n", (const char *)*key);
+fflush(stdout);
+
+ // build "strong" field
+ strong = "";
+ AttributeTypeList::iterator words;
+ AttributeList::iterator word;
+ AttributeValue::iterator strongVal;
+
+ words = getEntryAttributes().find("Word");
+ if (words != getEntryAttributes().end()) {
+ for (word = words->second.begin();word != words->second.end(); word++) {
+ strongVal = word->second.find("Lemma");
+ if (strongVal != word->second.end()) {
+ // cheeze. skip empty article tags that weren't assigned to any text
+ if (strongVal->second == "G3588") {
+ if (word->second.find("Text") == word->second.end())
+ continue; // no text? let's skip
+ }
+ strong.append(strongVal->second);
+ strong.append(' ');
+ }
+ }
+ }
+
+ content = getRawEntry();
+ if (content && *content) {
+ proxBuf += content;
+ proxBuf.append("\n");
+ }
+ proxLem += strong;
+ if (proxLem.length())
+ proxLem.append(' ');
+ } while (tkcheck->nextSibling());
+ tkcheck->parent();
+ tkcheck->firstChild();
+ }
+ else tkcheck->nextSibling(); // reposition from our previousSibling test
+ }
+ }
+ if (proxBuf.length() > 0) {
+printf("proxBuf before (%s).\n%s\n", (const char *)*key, proxBuf.c_str());
+ proxBuf = StripText(proxBuf);
+printf("proxBuf after (%s).\n%s\n", (const char *)*key, proxBuf.c_str());
+ doc->add( Field::UnStored(_T("prox"), proxBuf) );
+ good = true;
+ }
+ if (proxLem.length() > 0) {
+ doc->add( Field::UnStored(_T("proxlem"), proxLem) );
+ good = true;
+ }
+ if (good) {
+printf("writing (%s).\n", (const char *)*key);
+fflush(stdout);
writer->addDocument(*doc);
- delete doc;
}
+ delete doc;
(*this)++;
+ err = Error();
}
writer->optimize();
More information about the sword-cvs
mailing list