401 bool includeComponents =
false;
419 #if defined USEXAPIAN
421 #elif defined USELUCENE
424 if (justCheckIfSupported) {
426 #if defined USEXAPIAN
428 *justCheckIfSupported =
true;
430 #elif defined USELUCENE
432 *justCheckIfSupported =
true;
439 SWKey *searchKey = 0;
446 std::locale oldLocale;
447 std::locale::global(std::locale(
"en_US.UTF-8"));
450 #elif defined(USEICUREGEX)
451 icu::RegexMatcher *matcher = 0;
457 vector<SWBuf> window;
465 || (
getConfig().has(
"GlobalOptionFilter",
"UTF8GreekAccents"))
466 || (
getConfig().has(
"GlobalOptionFilter",
"UTF8HebrewPoints"))
467 || (
getConfig().has(
"GlobalOptionFilter",
"UTF8ArabicPoints"))
468 || (strchr(istr,
'<')));
485 (*percent)(perc, percentUserData);
492 if (searchType >= 0) {
494 preg = std::regex((
SWBuf(
".*")+istr+
".*").c_str(), std::regex_constants::extended | searchType | flags);
495 #elif defined(USEICUREGEX)
496 UErrorCode status = U_ZERO_ERROR;
497 matcher =
new icu::RegexMatcher(istr, searchType | flags, status);
498 if (U_FAILURE(status)) {
505 int err = regcomp(&preg, istr, flags);
513 (*percent)(++perc, percentUserData);
516 #if defined USEXAPIAN || defined USELUCENE
517 (*percent)(10, percentUserData);
519 #if defined USEXAPIAN
521 Xapian::Database database(target.
c_str());
522 Xapian::QueryParser queryParser;
523 queryParser.set_default_op(Xapian::Query::OP_AND);
525 queryParser.set_stemmer(Xapian::Stem(
getLanguage()));
527 queryParser.set_stemming_strategy(queryParser.STEM_SOME);
528 queryParser.add_prefix(
"content",
"C");
529 queryParser.add_prefix(
"lemma",
"L");
530 queryParser.add_prefix(
"morph",
"M");
531 queryParser.add_prefix(
"prox",
"P");
532 queryParser.add_prefix(
"proxlem",
"PL");
533 queryParser.add_prefix(
"proxmorph",
"PM");
535 #elif defined USELUCENE
537 lucene::index::IndexReader *ir = 0;
538 lucene::search::IndexSearcher *is = 0;
542 ir = IndexReader::open(target);
543 is =
new IndexSearcher(ir);
544 const TCHAR *stopWords[] = { 0 };
545 standard::StandardAnalyzer analyzer(stopWords);
549 #if defined USEXAPIAN
550 Xapian::Query q = queryParser.parse_query(istr);
551 Xapian::Enquire enquire = Xapian::Enquire(database);
552 #elif defined USELUCENE
553 q = QueryParser::parse((
wchar_t *)
utf8ToWChar(istr).getRawData(), _T(
"content"), &analyzer);
555 (*percent)(20, percentUserData);
558 #if defined USEXAPIAN
559 enquire.set_query(q);
560 Xapian::MSet h = enquire.get_mset(0, 99999);
561 #elif defined USELUCENE
564 (*percent)(80, percentUserData);
568 #if defined USEXAPIAN
569 Xapian::MSetIterator i;
570 for (i = h.begin(); i != h.end(); ++i) {
572 SW_u64 score = i.get_percent();
573 Xapian::Document doc = i.get_document();
574 *resultKey = doc.get_data().c_str();
575 #elif defined USELUCENE
576 for (
unsigned long i = 0; i < (
unsigned long)h->length(); i++) {
577 Document &doc = h->doc(i);
586 if (*
getKey() != *resultKey) {
593 (*percent)(98, percentUserData);
596 #if defined USEXAPIAN
597 #elif defined USELUCENE
602 #if defined USEXAPIAN
603 #elif defined USELUCENE
616 switch (searchType) {
629 words.push_back(term);
632 words.push_back(word);
634 if ((flags & REG_ICASE) == REG_ICASE) {
635 for (
unsigned int i = 0; i < words.size(); i++) {
647 words.push_back(term);
650 words.push_back(word);
652 if ((words.size()>2) && words[2].endsWith(
".")) {
653 includeComponents =
true;
662 (*percent)(perc, percentUserData);
667 float per = (float)mindex / highIndex;
670 char newperc = (char)per;
671 if (newperc > perc) {
673 (*percent)(perc, percentUserData);
675 else if (newperc < perc) {
677 "Serious error: new percentage complete is less than previous value\nindex: %d\nhighIndex: %d\nnewperc == %d%% is smaller than\nperc == %d%%",
678 key->
getIndex(), highIndex, (int)newperc, (
int )perc);
682 if (searchType >= 0) {
686 #elif defined(USEICUREGEX)
687 icu::UnicodeString stringToTest = textBuf.
c_str();
688 matcher->reset(stringToTest);
690 if (matcher->find()) {
692 if (!regexec(&preg, textBuf, 0, 0, 0)) {
700 else if (std::regex_match(
std::string((lastBuf +
' ' + textBuf).c_str()), preg)) {
701 #elif defined(USEICUREGEX)
703 stringToTest = (lastBuf +
' ' + textBuf).c_str();
704 matcher->reset(stringToTest);
706 if (matcher->find()) {
708 else if (!regexec(&preg, lastBuf +
' ' + textBuf, 0, 0, 0)) {
718 *resultKey = *lastKey;
722 lastBuf = (windowSize > 1) ? textBuf.
c_str() :
"";
725 lastBuf = (windowSize > 1) ? textBuf.
c_str() :
"";
727 #if defined(USEICUREGEX)
734 switch (searchType) {
738 if ((flags & REG_ICASE) == REG_ICASE) textBuf.
toUpper();
751 unsigned int foundWords = 0;
772 if (stripped||specialStrips||multiVerse) {
773 testBuf = multiVerse ? lastBuf +
' ' + textBuf : textBuf;
774 if (stripped) testBuf =
stripText(testBuf);
779 if ((flags & REG_ICASE) == REG_ICASE) testBuf.
size() ? testBuf.
toUpper() : textBuf.
toUpper();
780 for (
unsigned int i = 0; i < words.size(); i++) {
781 sres = strstr(testBuf.
size() ? testBuf.
c_str() : textBuf.
c_str(), words[i].c_str());
789 }
while ( (stripped < 2) && (foundWords == words.size()));
791 }
while ((windowSize > 1) && (multiVerse < 2) && (stripped != 2 || foundWords != words.size()));
793 if ((stripped == 2) && (foundWords == words.size())) {
796 *resultKey = (multiVerse > 1 && !vkCheck) ? *lastKey : *
getKey();
797 if (multiVerse > 1 && vkCheck) {
808 if (multiVerse == 2) {
813 lastBuf = (windowSize > 1) ? textBuf.
c_str() :
"";
821 AttributeTypeList::iterator i1Start, i1End;
822 AttributeList::iterator i2Start, i2End;
823 AttributeValue::iterator i3Start, i3End;
825 if ((words.size()) && (words[0].
length())) {
827 for (i1Start = entryAttribs.begin(); i1Start != entryAttribs.end(); ++i1Start) {
830 i1Start = entryAttribs.find(words[0]);
832 if (i1End != entryAttribs.end()) {
837 i1Start = entryAttribs.begin();
838 i1End = entryAttribs.end();
840 for (;i1Start != i1End; i1Start++) {
841 if ((words.size()>1) && (words[1].
length())) {
842 i2Start = i1Start->second.find(words[1]);
844 if (i2End != i1Start->second.end())
848 i2Start = i1Start->second.begin();
849 i2End = i1Start->second.end();
851 for (;i2Start != i2End; i2Start++) {
852 if ((words.size()>2) && (words[2].
length()) && (!includeComponents)) {
853 i3Start = i2Start->second.find(words[2]);
855 if (i3End != i2Start->second.end())
859 i3Start = i2Start->second.begin();
860 i3End = i2Start->second.end();
862 for (;i3Start != i3End; i3Start++) {
863 if ((words.size()>3) && (words[3].
length())) {
864 if (includeComponents) {
869 if (key != words[2])
continue;
872 bool found = !(((flags &
REG_ICASE) == REG_ICASE) ?
sword::stricmp(i3Start->second.c_str(), words[3]) : strcmp(i3Start->second.c_str(), words[3]));
873 sres = (found) ? i3Start->second.c_str() : 0;
876 sres = ((flags &
REG_ICASE) == REG_ICASE) ?
stristr(i3Start->second.c_str(), words[3]) : strstr(i3Start->second.c_str(), words[3]);
886 if (i3Start != i3End)
889 if (i2Start != i2End)
899 for (AttributeList::iterator it = words.begin(); it != words.end(); it++) {
900 int parts = atoi(it->second[
"PartCount"]);
903 for (
int i = 1; i <= parts; i++) {
906 AttributeValue::iterator li = it->second.find(key);
907 if (li != it->second.end()) {
908 if (i > 1) lemma +=
" ";
910 AttributeValue::iterator lci = it->second.find(key);
911 if (lci != it->second.end()) {
912 lemma += lci->second +
":";
917 li = it->second.find(key);
919 if (i == 1 && parts != 1 && li == it->second.end()) {
920 li = it->second.find(
"Morph");
922 if (li != it->second.end()) {
923 if (i > 1) morph +=
" ";
925 AttributeValue::iterator lci = it->second.find(key);
927 if (i == 1 && parts != 1 && lci == it->second.end()) {
928 lci = it->second.find(
"MorphClass");
930 if (lci != it->second.end()) {
931 morph += lci->second +
":";
937 while (window.size() < (unsigned)flags) {
950 if (searchType >= 0) {
952 std::locale::global(oldLocale);
953 #elif defined(USEICUREGEX)
974 (*percent)(100, percentUserData);
virtual bool isBoundSet() const
const char * getLanguage() const
static signed char existsDir(const char *ipath, const char *idirName=0)
virtual const char * getConfigEntry(const char *key) const
static SWLog * getSystemLog()
std::map< SWBuf, AttributeList, std::less< SWBuf > > AttributeTypeList
SWBuf wcharToUTF8(const wchar_t *buf)
virtual SWKey * getKey() const
virtual char setKey(const SWKey *ikey)
SWBuf utf8ToWChar(const char *buf)
bool endsWith(const SWBuf &postfix) const
int stricmp(const char *s1, const char *s2)
virtual SWKey * getElement(int pos=-1)
virtual SWKey * clone() const
void setLowerBound(const VerseKey &lb)
virtual void clearBounds() const
void setPersist(bool ipersist)
static const signed int SEARCHTYPE_MULTIWORD
const char * stristr(const char *s1, const char *s2)
const char * c_str() const
SWBuf & append(const char *str, long max=-1)
static const signed int SEARCHTYPE_EXTERNAL
#define SWDYNAMIC_CAST(className, object)
void setUpperBound(const VerseKey &ub)
unsigned long size() const
const char * stripPrefix(char separator, bool endOfStringAsSeparator=false)
virtual bool isProcessEntryAttributes() const
virtual AttributeTypeList & getEntryAttributes() const
virtual void setProcessEntryAttributes(bool val) const
unsigned long long SW_u64
virtual const char * stripText(const char *buf=0, int len=-1)
void logError(const char *fmt,...) const
static const signed int SEARCHFLAG_MATCHWHOLEENTRY
virtual SWKey * createKey() const
SWBuf & setFormatted(const char *format,...)
const char * getRawEntry() const
virtual long getIndex() const
std::map< SWBuf, AttributeValue, std::less< SWBuf > > AttributeList
void setSize(unsigned long len)
virtual const ConfigEntMap & getConfig() const
static const signed int SEARCHTYPE_PHRASE
static const signed int SEARCHTYPE_ENTRYATTR
static const signed int SEARCHFLAG_STRICTBOUNDARIES