/****************************************************************************** * swmodule.cpp -code for base class 'module'. Module is the basis for all * types of modules (e.g. texts, commentaries, maps, lexicons, * etc.) */ #include #include #include #include #include // GNU #include #include // KLUDGE for Search #include // KLUDGE for Search #include #include #include #ifndef _MSC_VER #include #endif #ifdef USELUCENE #include #include //Lucence includes //#include "CLucene.h" //#include "CLucene/util/Reader.h" //#include "CLucene/util/Misc.h" //#include "CLucene/util/dirent.h" using namespace lucene::index; using namespace lucene::analysis; using namespace lucene::util; using namespace lucene::store; using namespace lucene::document; using namespace lucene::queryParser; using namespace lucene::search; #endif using std::vector; SWORD_NAMESPACE_START SWDisplay SWModule::rawdisp; typedef std::list StringList; /****************************************************************************** * SWModule Constructor - Initializes data for instance of SWModule * * ENT: imodname - Internal name for module * imoddesc - Name to display to user for module * idisp - Display object to use for displaying * imodtype - Type of Module (All modules will be displayed with * others of same type under their modtype heading * unicode - if this module is unicode */ SWModule::SWModule(const char *imodname, const char *imoddesc, SWDisplay *idisp, char *imodtype, SWTextEncoding encoding, SWTextDirection direction, SWTextMarkup markup, const char* imodlang) { key = CreateKey(); entryBuf = ""; config = &ownConfig; modname = 0; error = 0; moddesc = 0; modtype = 0; modlang = 0; this->encoding = encoding; this->direction = direction; this->markup = markup; entrySize= -1; disp = (idisp) ? idisp : &rawdisp; stdstr(&modname, imodname); stdstr(&moddesc, imoddesc); stdstr(&modtype, imodtype); stdstr(&modlang, imodlang); stripFilters = new FilterList(); rawFilters = new FilterList(); renderFilters = new FilterList(); optionFilters = new OptionFilterList(); encodingFilters = new FilterList(); skipConsecutiveLinks = true; procEntAttr = true; } /****************************************************************************** * SWModule Destructor - Cleans up instance of SWModule */ SWModule::~SWModule() { if (modname) delete [] modname; if (moddesc) delete [] moddesc; if (modtype) delete [] modtype; if (modlang) delete [] modlang; if (key) { if (!key->Persist()) delete key; } stripFilters->clear(); rawFilters->clear(); renderFilters->clear(); optionFilters->clear(); encodingFilters->clear(); entryAttributes.clear(); delete stripFilters; delete rawFilters; delete renderFilters; delete optionFilters; delete encodingFilters; } /****************************************************************************** * SWModule::CreateKey - Allocates a key of specific type for module * * RET: pointer to allocated key */ SWKey *SWModule::CreateKey() { return new SWKey(); } /****************************************************************************** * SWModule::Error - Gets and clears error status * * RET: error status */ char SWModule::Error() { char retval = error; error = 0; return retval; } /****************************************************************************** * SWModule::Name - Sets/gets module name * * ENT: imodname - value which to set modname * [0] - only get * * RET: pointer to modname */ char *SWModule::Name(const char *imodname) { return stdstr(&modname, imodname); } char *SWModule::Name() const { return modname; } /****************************************************************************** * SWModule::Description - Sets/gets module description * * ENT: imoddesc - value which to set moddesc * [0] - only get * * RET: pointer to moddesc */ char *SWModule::Description(const char *imoddesc) { return stdstr(&moddesc, imoddesc); } char *SWModule::Description() const { return moddesc; } /****************************************************************************** * SWModule::Type - Sets/gets module type * * ENT: imodtype - value which to set modtype * [0] - only get * * RET: pointer to modtype */ char *SWModule::Type(const char *imodtype) { return stdstr(&modtype, imodtype); } char *SWModule::Type() const { return modtype; } /****************************************************************************** * SWModule::Direction - Sets/gets module direction * * ENT: newdir - value which to set direction * [-1] - only get * * RET: char direction */ char SWModule::Direction(signed char newdir) { if (newdir != -1) direction = newdir; return direction; } /****************************************************************************** * SWModule::Encoding - Sets/gets module encoding * * ENT: newdir - value which to set direction * [-1] - only get * * RET: char encoding */ char SWModule::Encoding(signed char newenc) { if (newenc != -1) encoding = newenc; return encoding; } /****************************************************************************** * SWModule::Markup - Sets/gets module markup * * ENT: newdir - value which to set direction * [-1] - only get * * RET: char markup */ char SWModule::Markup(signed char newmark) { if (newmark != -1) markup = newmark; return markup; } /****************************************************************************** * SWModule::Lang - Sets/gets module language * * ENT: imodlang - value which to set modlang * [0] - only get * * RET: pointer to modname */ char *SWModule::Lang(const char *imodlang) { return stdstr(&modlang, imodlang); } /****************************************************************************** * SWModule::Disp - Sets/gets display driver * * ENT: idisp - value which to set disp * [0] - only get * * RET: pointer to disp */ SWDisplay *SWModule::getDisplay() const { return disp; } void SWModule::setDisplay(SWDisplay *idisp) { disp = idisp; } /****************************************************************************** * SWModule::Display - Calls this modules display object and passes itself * * RET: error status */ char SWModule::Display() { disp->Display(*this); return 0; } /****************************************************************************** * SWModule::getKey - Gets the key from this module that points to the position * record * * RET: key object */ SWKey *SWModule::getKey() const { return key; } /****************************************************************************** * SWModule::setKey - Sets a key to this module for position to a particular * record * * ENT: ikey - key with which to set this module * * RET: error status */ char SWModule::setKey(const SWKey *ikey) { SWKey *oldKey = 0; if (key) { if (!key->Persist()) // if we have our own copy oldKey = key; } if (!ikey->Persist()) { // if we are to keep our own copy key = CreateKey(); *key = *ikey; } else key = (SWKey *)ikey; // if we are to just point to an external key if (oldKey) delete oldKey; return 0; } /****************************************************************************** * SWModule::setPosition(SW_POSITION) - Positions this modules to an entry * * ENT: p - position (e.g. TOP, BOTTOM) * * RET: *this */ void SWModule::setPosition(SW_POSITION p) { *key = p; char saveError = key->Error(); switch (p) { case POS_TOP: (*this)++; (*this)--; break; case POS_BOTTOM: (*this)--; (*this)++; break; } error = saveError; } /****************************************************************************** * SWModule::increment - Increments module key a number of entries * * ENT: increment - Number of entries to jump forward * * RET: *this */ void SWModule::increment(int steps) { (*key) += steps; error = key->Error(); } /****************************************************************************** * SWModule::decrement - Decrements module key a number of entries * * ENT: decrement - Number of entries to jump backward * * RET: *this */ void SWModule::decrement(int steps) { (*key) -= steps; error = key->Error(); } /****************************************************************************** * SWModule::Search - Searches a module for a string * * ENT: istr - string for which to search * searchType - type of search to perform * >=0 - regex * -1 - phrase * -2 - multiword * -3 - entryAttrib (eg. Word//Lemma/G1234/) * -4 - clucene * flags - options flags for search * justCheckIfSupported - if set, don't search, only tell if this * function supports requested search. * * RET: ListKey set to verses that contain istr */ ListKey &SWModule::search(const char *istr, int searchType, int flags, SWKey *scope, bool *justCheckIfSupported, void (*percent)(char, void *), void *percentUserData) { listKey.ClearList(); SWBuf term = istr; #ifdef USELUCENE SWBuf target = getConfigEntry("AbsoluteDataPath"); char ch = target.c_str()[strlen(target.c_str())-1]; if ((ch != '/') && (ch != '\\')) target.append('/'); target.append("lucene"); #endif if (justCheckIfSupported) { *justCheckIfSupported = (searchType >= -3); #ifdef USELUCENE if ((searchType == -4) && (IndexReader::indexExists(target.c_str()))) { *justCheckIfSupported = true; } #endif return listKey; } SWKey *saveKey = 0; SWKey *searchKey = 0; SWKey *resultKey = CreateKey(); regex_t preg; vector words; const char *sres; terminateSearch = false; char perc = 1; bool savePEA = isProcessEntryAttributes(); // determine if we might be doing special strip searches. useful for knowing if we can use shortcuts bool specialStrips = (getConfigEntry("LocalStripFilter") || strchr(istr, '<')); processEntryAttributes(searchType == -3); if (!key->Persist()) { saveKey = CreateKey(); *saveKey = *key; } else saveKey = key; searchKey = (scope)?scope->clone():(key->Persist())?key->clone():0; if (searchKey) { searchKey->Persist(1); setKey(*searchKey); } (*percent)(perc, percentUserData); // MAJOR KLUDGE: VerseKey::Index still return index within testament. // VerseKey::NewIndex should be moved to Index and Index should be some // VerseKey specific name VerseKey *vkcheck = 0; SWTRY { vkcheck = SWDYNAMIC_CAST(VerseKey, key); } SWCATCH (...) {} // end MAJOR KLUDGE *this = BOTTOM; // fix below when we find out the bug long highIndex = (vkcheck)?32300/*vkcheck->NewIndex()*/:key->Index(); if (!highIndex) highIndex = 1; // avoid division by zero errors. *this = TOP; if (searchType >= 0) { flags |=searchType|REG_NOSUB|REG_EXTENDED; regcomp(&preg, istr, flags); } (*percent)(++perc, percentUserData); #ifdef USELUCENE if (searchType == -4) { // lucene //Buffers for the wchar<->utf8 char* conversion const unsigned short int MAX_CONV_SIZE = 2047; wchar_t wcharBuffer[MAX_CONV_SIZE + 1]; char utfBuffer[MAX_CONV_SIZE + 1]; // Make sure our scope for this search is bounded by // something we can test // In the future, add bool SWKey::isValid(const char *tryString); bool freeTestKey = false; // only enforce range if we're VerseKey decendant bool enforceRange = SWDYNAMIC_CAST(VerseKey, resultKey); SWKey *testKey = 0; if (enforceRange) { SWTRY { testKey = SWDYNAMIC_CAST(VerseKey, ((scope)?scope:key)); if (!testKey) { testKey = SWDYNAMIC_CAST(ListKey, ((scope)?scope:key)); } } SWCATCH ( ... ) {} if (!testKey) { testKey = new ListKey(); *testKey = ((VerseKey *)resultKey)->ParseVerseList((const char *)((scope)?scope:key), *resultKey, true); freeTestKey = true; } } lucene::index::IndexReader *ir = 0; lucene::search::IndexSearcher *is = 0; Query *q = 0; Hits *h = 0; try { ir = IndexReader::open(target); is = new IndexSearcher(ir); (*percent)(10, percentUserData); standard::StandardAnalyzer analyzer; lucene_utf8towcs(wcharBuffer, istr, MAX_CONV_SIZE); //TODO Is istr always utf8? q = QueryParser::parse(wcharBuffer, _T("content"), &analyzer); (*percent)(20, percentUserData); h = is->search(q); (*percent)(80, percentUserData); // iterate thru each good module position that meets the search for (long i = 0; i < h->length(); i++) { Document &doc = h->doc(i); // set a temporary verse key to this module position lucene_wcstoutf8(utfBuffer, doc.get(_T("key")), MAX_CONV_SIZE); *resultKey = utfBuffer; //TODO Does a key always accept utf8? if (enforceRange) { // check scope // Try to set our scope key to this verse key *testKey = *resultKey; // check to see if it set ok and if so, add to our return list if (*testKey == *resultKey) { listKey << *resultKey; listKey.GetElement()->userData = (void *)((__u32)(h->score(i)*100)); } } else { listKey << *resultKey; listKey.GetElement()->userData = (void *)((__u32)(h->score(i)*100)); } } (*percent)(98, percentUserData); } catch (...) { q = 0; // invalid clucene query } delete h; delete q; delete is; if (ir) { ir->close(); } if (freeTestKey) { delete testKey; } } #endif // some pre-loop processing switch (searchType) { // phrase case -1: // let's see if we're told to ignore case. If so, then we'll touppstr our term if ((flags & REG_ICASE) == REG_ICASE) toupperstr(term); break; // multi-word case -2: // let's break the term down into our words vector while (1) { const char *word = term.stripPrefix(' '); if (!word) { words.push_back(term); break; } words.push_back(word); } if ((flags & REG_ICASE) == REG_ICASE) { for (unsigned int i = 0; i < words.size(); i++) { toupperstr(words[i]); } } break; // entry attributes case -3: // let's break the attribute segs down. We'll reuse our words vector for each segment while (1) { const char *word = term.stripPrefix('/'); if (!word) { words.push_back(term); break; } words.push_back(word); } break; } // our main loop to iterate the module and find the stuff perc = 5; (*percent)(perc, percentUserData); while ((searchType > -4) && !Error() && !terminateSearch) { long mindex = 0; if (vkcheck) mindex = vkcheck->NewIndex(); else mindex = key->Index(); float per = (float)mindex / highIndex; per *= 93; per += 5; char newperc = (char)per; // char newperc = (char)(5+(93*(((float)((vkcheck)?vkcheck->NewIndex():key->Index()))/highIndex))); if (newperc > perc) { perc = newperc; (*percent)(perc, percentUserData); } else if (newperc < perc) { #ifndef _MSC_VER std::cerr << "Serious error: new percentage complete is less than previous value\n"; std::cerr << "using vk? " << ((vkcheck)?"yes":"no") << "\n"; std::cerr << "index: " << ((vkcheck)?vkcheck->NewIndex():key->Index()) << "\n"; std::cerr << "highIndex: " << highIndex << "\n"; std::cerr << "newperc ==" << (int)newperc << "%" << "is smaller than\n"; std::cerr << "perc == " << (int )perc << "% \n"; #endif } if (searchType >= 0) { if (!regexec(&preg, StripText(), 0, 0, 0)) { *resultKey = *getKey(); listKey << *resultKey; } } // phrase else { SWBuf textBuf; switch (searchType) { // phrase case -1: textBuf = StripText(); if ((flags & REG_ICASE) == REG_ICASE) toupperstr(textBuf); sres = strstr(textBuf.c_str(), term.c_str()); if (sres) { //it's also in the StripText(), so we have a valid search result item now *resultKey = *getKey(); listKey << *resultKey; } break; // multiword case -2: { // enclose our allocations int loopCount = 0; unsigned int foundWords = 0; do { textBuf = ((loopCount == 0)&&(!specialStrips)) ? getRawEntry() : StripText(); foundWords = 0; for (unsigned int i = 0; i < words.size(); i++) { if ((flags & REG_ICASE) == REG_ICASE) toupperstr(textBuf); sres = strstr(textBuf.c_str(), words[i].c_str()); if (!sres) { break; //for loop } foundWords++; } loopCount++; } while ( (loopCount < 2) && (foundWords == words.size())); if ((loopCount == 2) && (foundWords == words.size())) { //we found the right words in both raw and stripped text, which means it's a valid result item *resultKey = *getKey(); listKey << *resultKey; } } break; // entry attributes case -3: RenderText(); // force parse AttributeTypeList &entryAttribs = getEntryAttributes(); AttributeTypeList::iterator i1Start, i1End; AttributeList::iterator i2Start, i2End; AttributeValue::iterator i3Start, i3End; if ((words.size()) && (words[0].length())) { i1Start = entryAttribs.find(words[0]); i1End = i1Start; if (i1End != entryAttribs.end()) i1End++; } else { i1Start = entryAttribs.begin(); i1End = entryAttribs.end(); } for (;i1Start != i1End; i1Start++) { if ((words.size()>1) && (words[1].length())) { i2Start = i1Start->second.find(words[1]); i2End = i2Start; if (i2End != i1Start->second.end()) i2End++; } else { i2Start = i1Start->second.begin(); i2End = i1Start->second.end(); } for (;i2Start != i2End; i2Start++) { if ((words.size()>2) && (words[2].length())) { i3Start = i2Start->second.find(words[2]); i3End = i3Start; if (i3End != i2Start->second.end()) i3End++; } else { i3Start = i2Start->second.begin(); i3End = i2Start->second.end(); } for (;i3Start != i3End; i3Start++) { if ((words.size()>3) && (words[3].length())) { if (flags & SEARCHFLAG_MATCHWHOLEENTRY) { bool found = !(((flags & REG_ICASE) == REG_ICASE) ? sword::stricmp(i3Start->second.c_str(), words[3]) : strcmp(i3Start->second.c_str(), words[3])); sres = (found) ? i3Start->second.c_str() : 0; } else { sres = ((flags & REG_ICASE) == REG_ICASE) ? stristr(i3Start->second.c_str(), words[3]) : strstr(i3Start->second.c_str(), words[3]); } if (sres) { *resultKey = *getKey(); listKey << *resultKey; break; } } } if (i3Start != i3End) break; } if (i2Start != i2End) break; } break; } // end switch } (*this)++; } // cleaup work if (searchType >= 0) regfree(&preg); setKey(*saveKey); if (!saveKey->Persist()) delete saveKey; if (searchKey) delete searchKey; delete resultKey; listKey = TOP; processEntryAttributes(savePEA); (*percent)(100, percentUserData); return listKey; } /****************************************************************************** * SWModule::StripText() - calls all stripfilters on current text * * ENT: buf - buf to massage instead of this modules current text * len - max len of buf * * RET: this module's text at current key location massaged by Strip filters */ const char *SWModule::StripText(const char *buf, int len) { return RenderText(buf, len, false); } /****************************************************************************** * SWModule::RenderText - calls all renderfilters on current text * * ENT: buf - buffer to Render instead of current module position * * RET: this module's text at current key location massaged by RenderText filters */ const char *SWModule::RenderText(const char *buf, int len, bool render) { entryAttributes.clear(); static SWBuf local; if (buf) local = buf; SWBuf &tmpbuf = (buf) ? local : getRawEntryBuf(); SWKey *key = 0; static char *null = ""; if (tmpbuf) { unsigned long size = (len < 0) ? ((getEntrySize()<0) ? strlen(tmpbuf) : getEntrySize()) : len; if (size > 0) { key = (SWKey *)*this; optionFilter(tmpbuf, key); if (render) { renderFilter(tmpbuf, key); encodingFilter(tmpbuf, key); } else stripFilter(tmpbuf, key); } } else { tmpbuf = null; } return tmpbuf; } /****************************************************************************** * SWModule::RenderText - calls all renderfilters on current text * * ENT: tmpKey - key to use to grab text * * RET: this module's text at current key location massaged by RenderFilers */ const char *SWModule::RenderText(SWKey *tmpKey) { SWKey *saveKey; const char *retVal; if (!key->Persist()) { saveKey = CreateKey(); *saveKey = *key; } else saveKey = key; setKey(*tmpKey); retVal = RenderText(); setKey(*saveKey); if (!saveKey->Persist()) delete saveKey; return retVal; } /****************************************************************************** * SWModule::StripText - calls all StripTextFilters on current text * * ENT: tmpKey - key to use to grab text * * RET: this module's text at specified key location massaged by Strip filters */ const char *SWModule::StripText(SWKey *tmpKey) { SWKey *saveKey; const char *retVal; if (!key->Persist()) { saveKey = CreateKey(); *saveKey = *key; } else saveKey = key; setKey(*tmpKey); retVal = StripText(); setKey(*saveKey); if (!saveKey->Persist()) delete saveKey; return retVal; } const char *SWModule::getConfigEntry(const char *key) const { ConfigEntMap::iterator it = config->find(key); return (it != config->end()) ? it->second.c_str() : 0; } void SWModule::setConfig(ConfigEntMap *config) { this->config = config; } bool SWModule::hasSearchFramework() { #ifdef USELUCENE return true; #else return SWSearchable::hasSearchFramework(); #endif } void SWModule::deleteSearchFramework() { #ifdef USELUCENE SWBuf target = getConfigEntry("AbsoluteDataPath"); char ch = target.c_str()[strlen(target.c_str())-1]; if ((ch != '/') && (ch != '\\')) target.append('/'); target.append("lucene"); FileMgr::removeDir(target.c_str()); #else SWSearchable::deleteSearchFramework(); #endif } signed char SWModule::createSearchFramework(void (*percent)(char, void *), void *percentUserData) { #ifdef USELUCENE SWKey *saveKey = 0; SWKey *searchKey = 0; SWKey textkey; SWBuf c; // turn all filters to default values StringList filterSettings; for (OptionFilterList::iterator filter = optionFilters->begin(); filter != optionFilters->end(); filter++) { filterSettings.push_back((*filter)->getOptionValue()); (*filter)->setOptionValue(*((*filter)->getOptionValues().begin())); if (!strcmp("Greek Accents", (*filter)->getOptionName())) { (*filter)->setOptionValue("Off"); } } // be sure we give CLucene enough file handles FileMgr::getSystemFileMgr()->flush(); // save key information so as not to disrupt original // module position if (!key->Persist()) { saveKey = CreateKey(); *saveKey = *key; } else saveKey = key; searchKey = (key->Persist())?key->clone():0; if (searchKey) { searchKey->Persist(1); setKey(*searchKey); } RAMDirectory *ramDir = NULL; IndexWriter *coreWriter = NULL; IndexWriter *fsWriter = NULL; Directory *d = NULL; standard::StandardAnalyzer *an = new standard::StandardAnalyzer(); SWBuf target = getConfigEntry("AbsoluteDataPath"); bool includeKeyInSearch = getConfig().has("SearchOption", "IncludeKeyInSearch"); char ch = target.c_str()[strlen(target.c_str())-1]; if ((ch != '/') && (ch != '\\')) target.append('/'); target.append("lucene"); FileMgr::createParent(target+"/dummy"); ramDir = new RAMDirectory(); coreWriter = new IndexWriter(ramDir, an, true); char perc = 1; VerseKey *vkcheck = 0; vkcheck = SWDYNAMIC_CAST(VerseKey, key); TreeKeyIdx *tkcheck = 0; tkcheck = SWDYNAMIC_CAST(TreeKeyIdx, key); *this = BOTTOM; long highIndex = (vkcheck)?32300/*vkcheck->NewIndex()*/:key->Index(); if (!highIndex) highIndex = 1; // avoid division by zero errors. bool savePEA = isProcessEntryAttributes(); processEntryAttributes(true); // prox chapter blocks // position module at the beginning *this = TOP; VerseKey chapMax; SWBuf proxBuf; SWBuf proxLem; SWBuf strong; const short int MAX_CONV_SIZE = 2047; wchar_t wcharBuffer[MAX_CONV_SIZE + 1]; char err = Error(); while (!err) { long mindex = 0; if (vkcheck) mindex = vkcheck->NewIndex(); else mindex = key->Index(); proxBuf = ""; proxLem = ""; // computer percent complete so we can report to our progress callback float per = (float)mindex / highIndex; // between 5%-98% per *= 93; per += 5; char newperc = (char)per; if (newperc > perc) { perc = newperc; (*percent)(perc, percentUserData); } // get "content" field const char *content = StripText(); bool good = false; // start out entry Document *doc = new Document(); // get "key" field SWBuf keyText = (vkcheck) ? vkcheck->getOSISRef() : getKeyText(); if (content && *content) { good = true; // build "strong" field AttributeTypeList::iterator words; AttributeList::iterator word; AttributeValue::iterator strongVal; strong=""; words = getEntryAttributes().find("Word"); if (words != getEntryAttributes().end()) { for (word = words->second.begin();word != words->second.end(); word++) { int partCount = atoi(word->second["PartCount"]); if (!partCount) partCount = 1; for (int i = 0; i < partCount; i++) { SWBuf tmp = "Lemma"; if (partCount > 1) tmp.appendFormatted(".%d", i+1); strongVal = word->second.find(tmp); if (strongVal != word->second.end()) { // cheeze. skip empty article tags that weren't assigned to any text if (strongVal->second == "G3588") { if (word->second.find("Text") == word->second.end()) continue; // no text? let's skip } strong.append(strongVal->second); strong.append(' '); } } } } lucene_utf8towcs(wcharBuffer, keyText, MAX_CONV_SIZE); //keyText must be utf8 doc->add( *Field::Text(_T("key"), wcharBuffer ) ); if (includeKeyInSearch) { c = keyText; c += " "; c += content; content = c.c_str(); } lucene_utf8towcs(wcharBuffer, content, MAX_CONV_SIZE); //content must be utf8 doc->add( *Field::UnStored(_T("content"), wcharBuffer) ); if (strong.length() > 0) { lucene_utf8towcs(wcharBuffer, strong, MAX_CONV_SIZE); doc->add( *Field::UnStored(_T("lemma"), wcharBuffer) ); //printf("setting fields (%s).\ncontent: %s\nlemma: %s\n", (const char *)*key, content, strong.c_str()); } //printf("setting fields (%s).\n", (const char *)*key); //fflush(stdout); } // don't write yet, cuz we have to see if we're the first of a prox block (5:1 or chapter5/verse1 // for VerseKeys use chapter if (vkcheck) { chapMax = *vkcheck; // we're the first verse in a chapter if (vkcheck->Verse() == 1) { chapMax = MAXVERSE; VerseKey saveKey = *vkcheck; while ((!err) && (*vkcheck <= chapMax)) { //printf("building proxBuf from (%s).\nproxBuf.c_str(): %s\n", (const char *)*key, proxBuf.c_str()); //printf("building proxBuf from (%s).\n", (const char *)*key); content = StripText(); if (content && *content) { // build "strong" field strong = ""; AttributeTypeList::iterator words; AttributeList::iterator word; AttributeValue::iterator strongVal; words = getEntryAttributes().find("Word"); if (words != getEntryAttributes().end()) { for (word = words->second.begin();word != words->second.end(); word++) { int partCount = atoi(word->second["PartCount"]); if (!partCount) partCount = 1; for (int i = 0; i < partCount; i++) { SWBuf tmp = "Lemma"; if (partCount > 1) tmp.appendFormatted(".%d", i+1); strongVal = word->second.find(tmp); if (strongVal != word->second.end()) { // cheeze. skip empty article tags that weren't assigned to any text if (strongVal->second == "G3588") { if (word->second.find("Text") == word->second.end()) continue; // no text? let's skip } strong.append(strongVal->second); strong.append(' '); } } } } proxBuf += content; proxBuf.append(' '); proxLem += strong; if (proxLem.length()) proxLem.append("\n"); } (*this)++; err = Error(); } err = 0; *vkcheck = saveKey; } } // for TreeKeys use siblings if we have no children else if (tkcheck) { if (!tkcheck->hasChildren()) { if (!tkcheck->previousSibling()) { do { //printf("building proxBuf from (%s).\n", (const char *)*key); //fflush(stdout); content = StripText(); if (content && *content) { // build "strong" field strong = ""; AttributeTypeList::iterator words; AttributeList::iterator word; AttributeValue::iterator strongVal; words = getEntryAttributes().find("Word"); if (words != getEntryAttributes().end()) { for (word = words->second.begin();word != words->second.end(); word++) { int partCount = atoi(word->second["PartCount"]); if (!partCount) partCount = 1; for (int i = 0; i < partCount; i++) { SWBuf tmp = "Lemma"; if (partCount > 1) tmp.appendFormatted(".%d", i+1); strongVal = word->second.find(tmp); if (strongVal != word->second.end()) { // cheeze. skip empty article tags that weren't assigned to any text if (strongVal->second == "G3588") { if (word->second.find("Text") == word->second.end()) continue; // no text? let's skip } strong.append(strongVal->second); strong.append(' '); } } } } proxBuf += content; proxBuf.append(' '); proxLem += strong; if (proxLem.length()) proxLem.append("\n"); } } while (tkcheck->nextSibling()); tkcheck->parent(); tkcheck->firstChild(); } else tkcheck->nextSibling(); // reposition from our previousSibling test } } if (proxBuf.length() > 0) { lucene_utf8towcs(wcharBuffer, proxBuf, MAX_CONV_SIZE); //keyText must be utf8 //printf("proxBuf after (%s).\nprox: %s\nproxLem: %s\n", (const char *)*key, proxBuf.c_str(), proxLem.c_str()); doc->add( *Field::UnStored(_T("prox"), wcharBuffer) ); good = true; } if (proxLem.length() > 0) { lucene_utf8towcs(wcharBuffer, proxLem, MAX_CONV_SIZE); //keyText must be utf8 doc->add( *Field::UnStored(_T("proxlem"), wcharBuffer) ); good = true; } if (good) { //printf("writing (%s).\n", (const char *)*key); //fflush(stdout); coreWriter->addDocument(doc); } delete doc; (*this)++; err = Error(); } // Optimizing automatically happens with the call to addIndexes //coreWriter->optimize(); coreWriter->close(); if (IndexReader::indexExists(target.c_str())) { d = FSDirectory::getDirectory(target.c_str(), false); if (IndexReader::isLocked(d)) { IndexReader::unlock(d); } fsWriter = new IndexWriter( d, an, false); } else { d = FSDirectory::getDirectory(target.c_str(), true); fsWriter = new IndexWriter( d ,an, true); } Directory *dirs[] = { ramDir, 0 }; fsWriter->addIndexes(dirs); fsWriter->close(); delete ramDir; delete coreWriter; delete fsWriter; delete an; // reposition module back to where it was before we were called setKey(*saveKey); if (!saveKey->Persist()) delete saveKey; if (searchKey) delete searchKey; processEntryAttributes(savePEA); // reset option filters back to original values StringList::iterator origVal = filterSettings.begin(); for (OptionFilterList::iterator filter = optionFilters->begin(); filter != optionFilters->end(); filter++) { (*filter)->setOptionValue(*origVal++); } return 0; #else return SWSearchable::createSearchFramework(percent, percentUserData); #endif } /** OptionFilterBuffer a text buffer * @param filters the FilterList of filters to iterate * @param buf the buffer to filter * @param key key location from where this buffer was extracted */ void SWModule::filterBuffer(OptionFilterList *filters, SWBuf &buf, SWKey *key) { OptionFilterList::iterator it; for (it = filters->begin(); it != filters->end(); it++) { (*it)->processText(buf, key, this); } } /** FilterBuffer a text buffer * @param filters the FilterList of filters to iterate * @param buf the buffer to filter * @param key key location from where this buffer was extracted */ void SWModule::filterBuffer(FilterList *filters, SWBuf &buf, SWKey *key) { FilterList::iterator it; for (it = filters->begin(); it != filters->end(); it++) { (*it)->processText(buf, key, this); } } signed char SWModule::createModule(const char*) { return -1; } void SWModule::setEntry(const char*, long) { } void SWModule::linkEntry(const SWKey*) { } SWORD_NAMESPACE_END