[sword-svn] r3731 - in trunk: examples/cmdline include src/modules
scribe at crosswire.org
scribe at crosswire.org
Mon May 4 16:59:18 MST 2020
Author: scribe
Date: 2020-05-04 16:59:18 -0700 (Mon, 04 May 2020)
New Revision: 3731
Modified:
trunk/examples/cmdline/search.cpp
trunk/include/swmodule.h
trunk/src/modules/swmodule.cpp
Log:
Better documented search flags. Added new flag SEARCHFLAG_STRICTBOUNDARIES to turn off checks across verse boundaries
Modified: trunk/examples/cmdline/search.cpp
===================================================================
--- trunk/examples/cmdline/search.cpp 2020-05-04 23:57:14 UTC (rev 3730)
+++ trunk/examples/cmdline/search.cpp 2020-05-04 23:59:18 UTC (rev 3731)
@@ -50,6 +50,8 @@
int flags = 0
// for case insensitivity
| REG_ICASE
+// for enforcing strict verse boundaries
+| SEARCHFLAG_STRICTBOUNDARIES
// for use with entryAttrib search type to match whole entry to value, e.g., G1234 and not G12345
//| SEARCHFLAG_MATCHWHOLEENTRY
;
Modified: trunk/include/swmodule.h
===================================================================
--- trunk/include/swmodule.h 2020-05-04 23:57:14 UTC (rev 3730)
+++ trunk/include/swmodule.h 2020-05-04 23:59:18 UTC (rev 3731)
@@ -44,8 +44,13 @@
class SWOptionFilter;
class SWFilter;
+// used for matching whole entry (not substring) in entry attributes searches.
#define SEARCHFLAG_MATCHWHOLEENTRY 4096
+// used for turning off the default behavior of SWORD to use a sliding search window
+// which allows hits across verse boundaries.
+#define SEARCHFLAG_STRICTBOUNDARIES 8192
+
#define SWMODULE_OPERATORS \
operator SWBuf() { return renderText(); } \
operator SWKey &() { return *getKey(); } \
@@ -388,7 +393,10 @@
* -3 - entryAttrib (eg. Word//Lemma./G1234/) (Lemma with dot means check components (Lemma.[1-9]) also)
* -4 - Lucene
* -5 - multilemma window; set 'flags' param to window size (NOT DONE)
- * @param flags options flags for search
+ * @param flags bitwise options flags for search. Each search type supports different options.
+ * REG_ICASE - perform case insensitive search. Supported by most all search types
+ * SEARCHFLAG_* - SWORD-specific search flags for various search types. See defines for details
+ *
* @param scope Key containing the scope. VerseKey or ListKey are useful here.
* @param justCheckIfSupported If set, don't search but instead set this variable to true/false if the requested search is supported,
* @param percent Callback function to get the current search status in %.
Modified: trunk/src/modules/swmodule.cpp
===================================================================
--- trunk/src/modules/swmodule.cpp 2020-05-04 23:57:14 UTC (rev 3730)
+++ trunk/src/modules/swmodule.cpp 2020-05-04 23:59:18 UTC (rev 3731)
@@ -387,6 +387,18 @@
SWBuf term = istr;
bool includeComponents = false; // for entryAttrib e.g., /Lemma.1/
+ // this only works for 1 or 2 verses right now, and for some search types (regex and multi word).
+ // future plans are to extend functionality
+ // By default SWORD defaults to allowing searches to cross the artificial boundaries of verse markers
+ // Searching are done in a sliding window of 2 verses right now.
+ // To turn this off, include SEARCHFLAG_STRICTBOUNDARIES in search flags
+ int windowSize = 2;
+ if ((flags & SEARCHFLAG_STRICTBOUNDARIES) && (searchType == -2 || searchType > 0)) {
+ // remove custom SWORD flag to prevent possible overlap with unknown regex option
+ flags ^= SEARCHFLAG_STRICTBOUNDARIES;
+ windowSize = 1;
+ }
+
SWBuf target = getConfigEntry("AbsoluteDataPath");
if (!target.endsWith("/") && !target.endsWith("\\")) {
target.append('/');
@@ -653,6 +665,8 @@
"Serious error: new percentage complete is less than previous value\nindex: %d\nhighIndex: %d\nnewperc == %d%% is smaller than\nperc == %d%%",
key->getIndex(), highIndex, (int)newperc, (int )perc);
}
+
+ // regex
if (searchType >= 0) {
SWBuf textBuf = stripText();
#ifdef USECXX11REGEX
@@ -683,23 +697,22 @@
#endif
lastKey->clearBound();
listKey << *lastKey;
- lastBuf = textBuf;
+ lastBuf = (windowSize > 1) ? textBuf : "";
}
else {
- lastBuf = textBuf;
+ lastBuf = (windowSize > 1) ? textBuf : "";
}
#if defined(USEICUREGEX)
}
#endif
}
- // phrase
else {
SWBuf textBuf;
switch (searchType) {
// phrase
- case -1:
+ case -1: {
textBuf = stripText();
if ((flags & REG_ICASE) == REG_ICASE) textBuf.toUpper();
sres = strstr(textBuf.c_str(), term.c_str());
@@ -709,6 +722,7 @@
listKey << *resultKey;
}
break;
+ }
// multiword
case -2: { // enclose our allocations
@@ -754,19 +768,21 @@
++stripped;
} while ( (stripped < 2) && (foundWords == words.size()));
++multiVerse;
- } while ( (multiVerse < 2) && (stripped != 2 || foundWords != words.size()));
+ } while ((windowSize > 1) && (multiVerse < 2) && (stripped != 2 || foundWords != words.size()));
if ((stripped == 2) && (foundWords == words.size())) { //we found the right words in both raw and stripped text, which means it's a valid result item
*resultKey = (multiVerse == 1) ? *getKey() : *lastKey;
resultKey->clearBound();
listKey << *resultKey;
lastBuf = "";
- if (twoVerse == 2) {
+ // if we're searching windowSize > 1 and we had a hit which required the current verse
+ // let's start the next window with our current verse in case we have another hit adjacent
+ if (multiVerse == 2) {
lastBuf = textBuf;
}
}
else {
- lastBuf = textBuf;
+ lastBuf = (windowSize > 1) ? textBuf : "";
}
}
break;
More information about the sword-cvs
mailing list