[sword-svn] r3464 - in trunk: . examples/cmdline src/modules
scribe at crosswire.org
scribe at crosswire.org
Sun May 21 00:38:20 MST 2017
Author: scribe
Date: 2017-05-21 00:38:20 -0700 (Sun, 21 May 2017)
New Revision: 3464
Modified:
trunk/ChangeLog
trunk/configure.ac
trunk/examples/cmdline/search.cpp
trunk/src/modules/swmodule.cpp
trunk/usrinst.sh
Log:
Added ICU-REGEX option to use the ICU regex engine for searching
Modified: trunk/ChangeLog
===================================================================
--- trunk/ChangeLog 2017-05-17 21:21:16 UTC (rev 3463)
+++ trunk/ChangeLog 2017-05-21 07:38:20 UTC (rev 3464)
@@ -1,5 +1,8 @@
API ChangeLog
+21-May-2017 Troy A. Griffitts <scribe at crosswire.org>
+ Added --with-icuregex option to use ICU regex engine
+
24-Apr-2017 Troy A. Griffitts <scribe at crosswire.org>
Branching 1.8.x
Modified: trunk/configure.ac
===================================================================
--- trunk/configure.ac 2017-05-17 21:21:16 UTC (rev 3463)
+++ trunk/configure.ac 2017-05-21 07:38:20 UTC (rev 3464)
@@ -72,6 +72,8 @@
# AC_HELP_STRING([--with-lucene],[include lucene support for searching (default=no)]),,with_lucene=no)
AC_ARG_WITH([internalregex],
AS_HELP_STRING([--with-internalregex], [Compile using SWORDs internal copy of regex]))
+AC_ARG_WITH([icuregex],
+ AS_HELP_STRING([--with-icuregex], [use ICU regex engine]))
AC_ARG_WITH(xapian,
AC_HELP_STRING([--with-xapian],[use xapian search engine (default=yes)]),,with_xapian=yes)
@@ -306,13 +308,24 @@
fi
fi
-if test x$with_xapian = xyes; then
+if test x$with_xapian = xyes; then
AC_LANG_CPLUSPLUS
AC_CHECK_LIB(xapian,main,,with_xapian="no")
else
with_xapian="no"
fi
+if test x$with_icuregex = xyes; then
+ if test x$with_icu = xno; then
+ with_icuregex="requested; but using ICU not enabled"
+ else
+ AM_CFLAGS="$AM_CFLAGS -DUSEICUREGEX"
+ AM_CXXFLAGS="$AM_CXXFLAGS -DUSEICUREGEX"
+ fi
+else
+ with_icuregex="no"
+fi
+
if test x$with_xapian = xyes; then
AM_CFLAGS="$AM_CFLAGS -DUSEXAPIAN"
AM_CXXFLAGS="$AM_CXXFLAGS -DUSEXAPIAN"
@@ -425,6 +438,7 @@
AM_CONDITIONAL(SHAREDLIB, test x$enable_shared = xyes)
AM_CONDITIONAL(INSTCONF, test x$with_conf = xyes)
AM_CONDITIONAL(USECXX11REGEX, test x$with_cxx11regex = xyes)
+AM_CONDITIONAL(USEICUREGEX, test x$with_icuregex = xyes)
AM_CONDITIONAL(WITHCURL, test x$with_curl = xyes)
AM_CONDITIONAL(WITHCURLSFTP, test x$with_curl_sftp = xyes)
AM_CONDITIONAL(INTERNALFTPLIB, test x$with_internalftplib = xyes)
@@ -465,7 +479,8 @@
echo " BZIP2: $with_bzip2"
echo " XZ: $with_xz"
echo " ICUSWORD: $with_icusword"
-echo " CXX11REGEX: $with_cxx11regex"
+echo " CXX11-REGEX: $with_cxx11regex"
+echo " ICU-REGEX: $with_icuregex"
echo " XAPIAN-CORE: $with_xapian"
Modified: trunk/examples/cmdline/search.cpp
===================================================================
--- trunk/examples/cmdline/search.cpp 2017-05-17 21:21:16 UTC (rev 3463)
+++ trunk/examples/cmdline/search.cpp 2017-05-21 07:38:20 UTC (rev 3464)
@@ -89,7 +89,7 @@
SWBuf searchTerm = argv[2];
manager.setGlobalOption("Greek Accents", "Off");
- manager.setGlobalOption("Strong's Numbers", "On");
+ manager.setGlobalOption("Strong's Numbers", "Off");
manager.setGlobalOption("Hebrew Vowel Points", "Off");
manager.filterText("Greek Accents", searchTerm);
Modified: trunk/src/modules/swmodule.cpp
===================================================================
--- trunk/src/modules/swmodule.cpp 2017-05-17 21:21:16 UTC (rev 3463)
+++ trunk/src/modules/swmodule.cpp 2017-05-21 07:38:20 UTC (rev 3464)
@@ -46,6 +46,9 @@
#endif
#elif defined(USEICUREGEX)
#include <unicode/regex.h>
+#ifndef REG_ICASE
+#define REG_ICASE UREGEX_CASE_INSENSITIVE
+#endif
#else
#include <regex.h> // GNU
#endif
@@ -418,6 +421,8 @@
std::locale::global(std::locale("en_US.UTF-8"));
std::regex preg;
+#elif defined(USEICUREGEX)
+ RegexMatcher *matcher = 0;
#else
regex_t preg;
#endif
@@ -461,6 +466,14 @@
if (searchType >= 0) {
#ifdef USECXX11REGEX
preg = std::regex((SWBuf(".*")+istr+".*").c_str(), std::regex_constants::extended | searchType | flags);
+#elif defined(USEICUREGEX)
+ UErrorCode status = U_ZERO_ERROR;
+ matcher = new RegexMatcher(istr, searchType | flags, status);
+ if (U_FAILURE(status)) {
+ SWLog::getSystemLog()->logError("Error compiling Regex: %d", status);
+ return listKey;
+ }
+
#else
flags |=searchType|REG_NOSUB|REG_EXTENDED;
int err = regcomp(&preg, istr, flags);
@@ -648,6 +661,11 @@
SWBuf textBuf = stripText();
#ifdef USECXX11REGEX
if (std::regex_match(std::string(textBuf.c_str()), preg)) {
+#elif defined(USEICUREGEX)
+ UnicodeString stringToTest = textBuf.c_str();
+ matcher->reset(stringToTest);
+
+ if (matcher->find()) {
#else
if (!regexec(&preg, textBuf, 0, 0, 0)) {
#endif
@@ -658,6 +676,12 @@
}
#ifdef USECXX11REGEX
else if (std::regex_match(std::string((lastBuf + ' ' + textBuf).c_str()), preg)) {
+#elif defined(USEICUREGEX)
+ else {
+ stringToTest = (lastBuf + ' ' + textBuf).c_str();
+ matcher->reset(stringToTest);
+
+ if (matcher->find()) {
#else
else if (!regexec(&preg, lastBuf + ' ' + textBuf, 0, 0, 0)) {
#endif
@@ -668,6 +692,9 @@
else {
lastBuf = textBuf;
}
+#if defined(USEICUREGEX)
+ }
+#endif
}
// phrase
@@ -851,6 +878,8 @@
if (searchType >= 0) {
#ifdef USECXX11REGEX
std::locale::global(oldLocale);
+#elif defined(USEICUREGEX)
+ delete matcher;
#else
regfree(&preg);
#endif
Modified: trunk/usrinst.sh
===================================================================
--- trunk/usrinst.sh 2017-05-17 21:21:16 UTC (rev 3463)
+++ trunk/usrinst.sh 2017-05-21 07:38:20 UTC (rev 3464)
@@ -35,6 +35,7 @@
#OPTIONS="--enable-profile $OPTIONS"
#OPTIONS="--with-cxx11regex $OPTIONS"
+OPTIONS="--with-icuregex $OPTIONS"
#OPTIONS="--with-icusword $OPTIONS"
#OPTIONS="--without-icu $OPTIONS"
#OPTIONS="--without-clucene $OPTIONS"
More information about the sword-cvs
mailing list