[sword-svn] r2661 - in trunk: . m4 src/modules
scribe at crosswire.org
scribe at crosswire.org
Sat Oct 29 08:16:41 MST 2011
Author: scribe
Date: 2011-10-29 08:16:41 -0700 (Sat, 29 Oct 2011)
New Revision: 2661
Modified:
trunk/configure.ac
trunk/m4/acx_clucene.m4
trunk/src/modules/swmodule.cpp
Log:
Commiting patch from GHellings to support both CLucene 0.9x and 2.x
Leaving the CMake stuff for Greg to commit himself so CMake blame stays with him ;)
Modified: trunk/configure.ac
===================================================================
--- trunk/configure.ac 2011-10-25 18:09:21 UTC (rev 2660)
+++ trunk/configure.ac 2011-10-29 15:16:41 UTC (rev 2661)
@@ -39,7 +39,10 @@
AC_C_BIGENDIAN
+PKG_CHECK_MODULES([CLUCENE2], [libclucene-core >= 2.3])
+if test "x$CLUCENE2_LIBS" = x; then
ACX_CLUCENE
+fi
# ---------------------------------------------------------------------
# With options
@@ -57,7 +60,7 @@
#AC_ARG_WITH(lucene,
# AC_HELP_STRING([--with-lucene],[include lucene support for searching (default=no)]),,with_lucene=no)
AC_ARG_WITH([internalregex],
- AS_HELP_STRING([--with-internalregex], [Compile using SWORD's internal copy of regex]))
+ AS_HELP_STRING([--with-internalregex], [Compile using SWORDs internal copy of regex]))
# ---------------------------------------------------------------------
@@ -250,15 +253,23 @@
# ---------------------------------------------------------------------
with_clucene=no
-if test -z "$CLUCENE_LIBS"; then
- echo "lucene searching options not available"
+if test "x$CLUCENE2_LIBS" != x; then
+ echo "clucene 2.x found - lucene searching options available"
+ AM_CXXFLAGS="$AM_CXXFLAGS $CLUCENE2_CFLAGS -DUSELUCENE -DCLUCENE2"
+ AM_CFLAGS="$AM_CFLAGS $CLUCENE2_CFLAGS -DUSELUCENE -DCLUCENE2"
+ LIBS="$LIBS $CLUCENE2_LIBS"
+ with_clucene="yes 2.x"
else
- echo "lucene found - lucene searching options available"
+if test "x$CLUCENE_LIBS" != x; then
+ echo "lucene 0.x found - lucene searching options available"
AM_CXXFLAGS="$AM_CXXFLAGS $CLUCENE_CXXFLAGS -DUSELUCENE"
AM_CFLAGS="$AM_CFLAGS -DUSELUCENE"
LIBS="$LIBS $CLUCENE_LIBS"
- with_clucene="yes"
+ with_clucene="yes 0.x"
+else
+ echo "lucene searching options not available"
fi
+fi
AC_CHECK_FUNCS(vsnprintf, [have_vsnprintf="yes"])
@@ -298,7 +309,6 @@
AC_SUBST(target_mingw32)
AC_SUBST(CURL_LIBS)
-AC_SUBST(CLUCENE_LIBS)
AC_SUBST(ICU_LIBS)
AC_SUBST(ICU_IOLIBS)
@@ -313,7 +323,7 @@
AM_CONDITIONAL(HAVE_ICUSWORD, test x$with_icusword = xyes)
AM_CONDITIONAL(HAVE_VSNPRINTF, test x$have_vsnprintf = xyes)
-AM_CONDITIONAL(USELUCENE, test x$with_clucene = xyes)
+AM_CONDITIONAL(USELUCENE, test "x$with_clucene" != xno)
AM_CONDITIONAL(SHAREDLIB, test x$enable_shared = xyes)
AM_CONDITIONAL(INSTCONF, test x$with_conf = xyes)
AM_CONDITIONAL(WITHCURL, test x$with_curl = xyes)
Modified: trunk/m4/acx_clucene.m4
===================================================================
--- trunk/m4/acx_clucene.m4 2011-10-25 18:09:21 UTC (rev 2660)
+++ trunk/m4/acx_clucene.m4 2011-10-29 15:16:41 UTC (rev 2661)
@@ -9,7 +9,7 @@
AC_HELP_STRING([ --with-clucene=<path>],
[prefix of CLucene-Core installation. e.g. /usr/local or /usr]),,)
-AC_MSG_CHECKING([how to include clucene])
+AC_MSG_CHECKING([how to include clucene 0.x])
if test "x$with_clucene" = "xno"; then
AC_MSG_RESULT(excluding support)
else
Modified: trunk/src/modules/swmodule.cpp
===================================================================
--- trunk/src/modules/swmodule.cpp 2011-10-25 18:09:21 UTC (rev 2660)
+++ trunk/src/modules/swmodule.cpp 2011-10-29 15:16:41 UTC (rev 2661)
@@ -40,7 +40,6 @@
#ifdef USELUCENE
#include <CLucene.h>
-#include <CLucene/CLBackwards.h>
//Lucence includes
//#include "CLucene.h"
@@ -497,10 +496,6 @@
#ifdef USELUCENE
if (searchType == -4) { // lucene
- //Buffers for the wchar<->utf8 char* conversion
- const unsigned short int MAX_CONV_SIZE = 2047;
- wchar_t wcharBuffer[MAX_CONV_SIZE + 1];
- char utfBuffer[MAX_CONV_SIZE + 1];
lucene::index::IndexReader *ir = 0;
lucene::search::IndexSearcher *is = 0;
@@ -513,20 +508,18 @@
const TCHAR *stopWords[] = { 0 };
standard::StandardAnalyzer analyzer(stopWords);
- lucene_utf8towcs(wcharBuffer, istr, MAX_CONV_SIZE); //TODO Is istr always utf8?
- q = QueryParser::parse(wcharBuffer, _T("content"), &analyzer);
+ q = QueryParser::parse((wchar_t *)utf8ToWChar(istr).getRawData(), _T("content"), &analyzer);
(*percent)(20, percentUserData);
h = is->search(q);
(*percent)(80, percentUserData);
// iterate thru each good module position that meets the search
bool checkBounds = getKey()->isBoundSet();
- for (long i = 0; i < h->length(); i++) {
+ for (unsigned long i = 0; i < h->length(); i++) {
Document &doc = h->doc(i);
// set a temporary verse key to this module position
- lucene_wcstoutf8(utfBuffer, doc.get(_T("key")), MAX_CONV_SIZE);
- *resultKey = utfBuffer; //TODO Does a key always accept utf8?
+ *resultKey = wcharToUTF8(doc.get(_T("key"))); //TODO Does a key always accept utf8?
// check to see if it sets ok (within our bounds) and if not, skip
if (checkBounds) {
@@ -1022,7 +1015,6 @@
SWBuf c;
const int MAX_CONV_SIZE = 1024 * 1024;
- wchar_t *wcharBuffer = new wchar_t[MAX_CONV_SIZE + 1];
// turn all filters to default values
StringList filterSettings;
@@ -1156,11 +1148,8 @@
}
}
- lucene_utf8towcs(wcharBuffer, keyText, MAX_CONV_SIZE); //keyText must be utf8
-// doc->add( *(new Field("key", wcharBuffer, Field::STORE_YES | Field::INDEX_TOKENIZED)));
- doc->add( *Field::Text(_T("key"), wcharBuffer ) );
+ doc->add(*_CLNEW Field(_T("key"), (wchar_t *)utf8ToWChar(keyText).getRawData(), Field::STORE_YES | Field::INDEX_UNTOKENIZED));
-
if (includeKeyInSearch) {
c = keyText;
c += " ";
@@ -1168,12 +1157,10 @@
content = c.c_str();
}
- lucene_utf8towcs(wcharBuffer, content, MAX_CONV_SIZE); //content must be utf8
- doc->add( *Field::UnStored(_T("content"), wcharBuffer) );
+ doc->add(*_CLNEW Field(_T("content"), (wchar_t *)utf8ToWChar(content).getRawData(), Field::STORE_NO | Field::INDEX_TOKENIZED));
if (strong.length() > 0) {
- lucene_utf8towcs(wcharBuffer, strong, MAX_CONV_SIZE);
- doc->add( *Field::UnStored(_T("lemma"), wcharBuffer) );
++ doc->add(*_CLNEW Field(_T("lemma"), (wchar_t *)utf8ToWChar(strong).getRawData(), Field::STORE_NO | Field::INDEX_TOKENIZED));
//printf("setting fields (%s).\ncontent: %s\nlemma: %s\n", (const char *)*key, content, strong.c_str());
}
@@ -1290,16 +1277,11 @@
if (proxBuf.length() > 0) {
- lucene_utf8towcs(wcharBuffer, proxBuf, MAX_CONV_SIZE); //keyText must be utf8
-
-//printf("proxBuf after (%s).\nprox: %s\nproxLem: %s\n", (const char *)*key, proxBuf.c_str(), proxLem.c_str());
-
- doc->add( *Field::UnStored(_T("prox"), wcharBuffer) );
+ doc->add(*_CLNEW Field(_T("prox"), (wchar_t *)utf8ToWChar(proxBuf).getRawData(), Field::STORE_NO | Field::INDEX_TOKENIZED));
good = true;
}
if (proxLem.length() > 0) {
- lucene_utf8towcs(wcharBuffer, proxLem, MAX_CONV_SIZE); //keyText must be utf8
- doc->add( *Field::UnStored(_T("proxlem"), wcharBuffer) );
+ doc->add(*_CLNEW Field(_T("proxlem"), (wchar_t *)utf8ToWChar(proxLem).getRawData(), Field::STORE_NO | Field::INDEX_TOKENIZED) );
good = true;
}
if (good) {
@@ -1317,20 +1299,32 @@
//coreWriter->optimize();
coreWriter->close();
+#ifdef CLUCENE2
+ d = FSDirectory::getDirectory(target.c_str());
+#endif
if (IndexReader::indexExists(target.c_str())) {
+#ifndef CLUCENE2
d = FSDirectory::getDirectory(target.c_str(), false);
+#endif
if (IndexReader::isLocked(d)) {
IndexReader::unlock(d);
}
-
fsWriter = new IndexWriter( d, an, false);
- } else {
+ }
+ else {
+#ifndef CLUCENE2
d = FSDirectory::getDirectory(target.c_str(), true);
+#endif
fsWriter = new IndexWriter(d, an, true);
}
Directory *dirs[] = { ramDir, 0 };
+#ifdef CLUCENE2
+ lucene::util::ConstValueArray< lucene::store::Directory *>dirsa(dirs, 1);
+ fsWriter->addIndexes(dirsa);
+#else
fsWriter->addIndexes(dirs);
+#endif
fsWriter->close();
delete ramDir;
@@ -1357,8 +1351,6 @@
(*filter)->setOptionValue(*origVal++);
}
- delete [] wcharBuffer;
-
return 0;
#else
return SWSearchable::createSearchFramework(percent, percentUserData);
More information about the sword-cvs
mailing list