[sword-svn] r1934 - in trunk: include lib/bcppmake src/mgr src/modules src/modules/filters
scribe at crosswire.org
scribe at crosswire.org
Thu Jun 29 16:23:29 MST 2006
Author: scribe
Date: 2006-06-29 16:22:22 -0700 (Thu, 29 Jun 2006)
New Revision: 1934
Modified:
trunk/include/stringmgr.h
trunk/lib/bcppmake/libsword.bpr
trunk/src/mgr/stringmgr.cpp
trunk/src/modules/filters/osisrtf.cpp
trunk/src/modules/swmodule.cpp
Log:
cleaned up stringmgr to conform to coding style.
added support for multi-strongs in osis to rtf
catch clucene query parsing exception.
Modified: trunk/include/stringmgr.h
===================================================================
--- trunk/include/stringmgr.h 2006-06-27 11:01:57 UTC (rev 1933)
+++ trunk/include/stringmgr.h 2006-06-29 23:22:22 UTC (rev 1934)
@@ -25,6 +25,8 @@
#define STRINGMGR_H
#include <defs.h>
+#include <swbuf.h>
+#include <utilstr.h>
SWORD_NAMESPACE_START
@@ -33,20 +35,22 @@
*/
class SWDLLEXPORT StringMgr {
public:
+
/** Sets the global StringMgr handle
* @param newStringMgr The new global StringMgr. This pointer will be deleted by this StringMgr
*/
- static void setSystemStringMgr( StringMgr* newStringMgr );
+ static void setSystemStringMgr(StringMgr *newStringMgr);
+
/** Returns the global StringMgr handle
* @return The global string handle
*/
- static StringMgr* getSystemStringMgr();
+ static StringMgr *getSystemStringMgr();
/** Checks whether Utf8 support is available.
* Override the function supportsUnicode() to tell whether your implementation has utf8 support.
* @return True if this implementation provides support for Utf8 handling or false if just latin1 handling is available
*/
- static const bool hasUTF8Support() {
+ static inline bool hasUTF8Support() {
return getSystemStringMgr()->supportsUnicode();
};
@@ -54,11 +58,12 @@
* @param text The text encoded in utf8 which should be turned into an upper case string
* @param max Only change max chars
*/
- virtual char* upperUTF8(char* text, const unsigned int max = 0);
+ virtual char *upperUTF8(char *text, unsigned int max = 0) const;
+
/** Converts the param to an uppercase latin1 string
* @param text The text encoded in latin1 which should be turned into an upper case string
*/
- virtual char* upperLatin1(char* text);
+ virtual char *upperLatin1(char *text, unsigned int max = 0) const;
protected:
@@ -67,29 +72,38 @@
/** Default constructor. Protected to make instances on user side impossible, because this is a Singleton
*/
StringMgr();
+
/** Copy constructor
*/
- StringMgr( const StringMgr& );
+ StringMgr(const StringMgr &);
+
/** Destructor
*/
virtual ~StringMgr();
- virtual const bool supportsUnicode() const;
+ virtual bool supportsUnicode() const;
private:
- static StringMgr* m_systemStringMgr;
+ static StringMgr *systemStringMgr;
};
-namespace utilstr {
- inline char* toupperstr( char* t ) {
- return StringMgr::getSystemStringMgr()->upperLatin1( t );
- };
+inline char *toupperstr(char *t, unsigned int max = 0) {
+ return (StringMgr::hasUTF8Support())
+ ? StringMgr::getSystemStringMgr()->upperLatin1(t, max)
+ : StringMgr::getSystemStringMgr()->upperUTF8(t, max);
+}
- inline char* toupperstr_utf8( char* t, const unsigned int max = 0 ) {
- return StringMgr::getSystemStringMgr()->upperUTF8( t, max );
- };
-};
-using namespace utilstr;
+inline char *toupperstr_utf8(char *t, unsigned int max = 0) {
+ return StringMgr::getSystemStringMgr()->upperUTF8(t, max);
+}
+
+inline SWBuf &toupperstr(SWBuf &b) {
+ char *utf8 = 0;
+ stdstr(&utf8, b.c_str(), 2);
+ toupperstr(utf8, strlen(utf8)*2);
+ b = utf8;
+ return b;
+}
SWORD_NAMESPACE_END
Modified: trunk/lib/bcppmake/libsword.bpr
===================================================================
--- trunk/lib/bcppmake/libsword.bpr 2006-06-27 11:01:57 UTC (rev 1933)
+++ trunk/lib/bcppmake/libsword.bpr 2006-06-29 23:22:22 UTC (rev 1934)
@@ -128,7 +128,7 @@
<PATHRC value=".;"/>
<PATHASM value=".;"/>
<LINKER value="TLib"/>
- <USERDEFINES value="UNICODE;_ICU_;_ICUSWORD_;USBINARY;U_HAVE_PLACEMENT_NEW=0;USELUCENE;_WIN32;_CL_DISABLE_MULTITHREADING"/>
+ <USERDEFINES value="UNICODE;_ICU_;_ICUSWORD_;USBINARY;U_HAVE_PLACEMENT_NEW=0;USELUCENE;_WIN32;_CL_DISABLE_MULTITHREADING;_DEBUG"/>
<SYSDEFINES value="NO_STRICT"/>
<MAINSOURCE value="libsword.bpf"/>
<INCLUDEPATH value="..\..\src\modules\tests;..\..\src\utilfuns\zlib;..\..\src\modules\lexdict\zld;..\..\src\modules\lexdict\rawld4;..\..\src\modules\comments\zcom;..\..\src\modules\genbook\rawgenbook;..\..\src\modules\genbook;..\..\src\modules\texts\ztext;..\..\src\modules\texts\rawtext;..\..\src\modules\texts;..\..\src\modules\lexdict\rawld;..\..\src\modules\lexdict;..\..\src\modules\filters;..\..\src\modules\common;..\..\src\modules\comments\rawfiles;..\..\src\modules\comments\rawcom;..\..\src\modules\comments\hrefcom;..\..\src\modules\comments;..\..\src\modules;..\..\src\frontend;..\..\src\utilfuns;..\..\src\mgr;..\..\src\keys;..\..\..\icu-sword\source\common;..\..\apps\windoze\CBuilder5\InstallMgr\curl\include;..\..\include;$(BCB)\include;$(BCB)\include\vcl;..\..\..\icu-sword\source\i18n;..\..\..\biblecs\clucene\src;..\..\..\biblecs\apps\InstallMgr\curl\include"/>
@@ -154,9 +154,9 @@
-I..\..\..\biblecs\apps\InstallMgr\curl\include -src_suffix cpp -DUNICODE
-D_ICU_ -D_ICUSWORD_ -DUSBINARY -DU_HAVE_PLACEMENT_NEW=0 -DUSELUCENE
-D_WIN32 -D_CL_DISABLE_MULTITHREADING -no_tie -boa"/>
- <CFLAG1 value="-O2 -Vx -X- -a8 -b- -k- -vi -c -tW -tWM"/>
- <PFLAGS value="-$Y- -$L- -$D- -v -JPHNE -M"/>
- <AFLAGS value="/mx /w2 /zn"/>
+ <CFLAG1 value="-Od -Vx -X- -r- -a8 -b- -k -y -v -vi- -c -tW -tWM"/>
+ <PFLAGS value="-$Y+ -$W -$O- -v -JPHNE -M"/>
+ <AFLAGS value="/mx /w2 /zi"/>
<LFLAGS value="/P512"/>
</OPTIONS>
<LINKER>
@@ -230,8 +230,8 @@
[HistoryLists\hlConditionals]
Count=22
-Item0=UNICODE;_ICU_;_ICUSWORD_;USBINARY;U_HAVE_PLACEMENT_NEW=0;USELUCENE;_WIN32;_CL_DISABLE_MULTITHREADING
-Item1=UNICODE;_ICU_;_ICUSWORD_;USBINARY;U_HAVE_PLACEMENT_NEW=0;USELUCENE;_WIN32;_CL_DISABLE_MULTITHREADING;_DEBUG
+Item0=UNICODE;_ICU_;_ICUSWORD_;USBINARY;U_HAVE_PLACEMENT_NEW=0;USELUCENE;_WIN32;_CL_DISABLE_MULTITHREADING;_DEBUG
+Item1=UNICODE;_ICU_;_ICUSWORD_;USBINARY;U_HAVE_PLACEMENT_NEW=0;USELUCENE;_WIN32;_CL_DISABLE_MULTITHREADING
Item2=_ICU_;_ICUSWORD_;USBINARY;U_HAVE_PLACEMENT_NEW=0;USELUCENE;_WIN32;_CL_DISABLE_MULTITHREADING
Item3=_ICU_;_ICUSWORD_;USBINARY;U_HAVE_PLACEMENT_NEW=0;USELUCENE;_WIN32;_CL_DISABLE_MULTITHREADING;UNICODE
Item4=_UCS2;_ICU_;_ICUSWORD_;USBINARY;U_HAVE_PLACEMENT_NEW=0;USELUCENE;_WIN32;_CL_DISABLE_MULTITHREADING
Modified: trunk/src/mgr/stringmgr.cpp
===================================================================
--- trunk/src/mgr/stringmgr.cpp 2006-06-27 11:01:57 UTC (rev 1933)
+++ trunk/src/mgr/stringmgr.cpp 2006-06-29 23:22:22 UTC (rev 1934)
@@ -40,28 +40,25 @@
SWORD_NAMESPACE_START
-StringMgr* StringMgr::m_systemStringMgr = 0;
+StringMgr *StringMgr::systemStringMgr = 0;
class __staticsystemStringMgr {
public:
- __staticsystemStringMgr() { }
- ~__staticsystemStringMgr() { if (StringMgr::m_systemStringMgr) delete StringMgr::m_systemStringMgr; StringMgr::m_systemStringMgr = 0; }
+ __staticsystemStringMgr() { }
+ ~__staticsystemStringMgr() { if (StringMgr::systemStringMgr) delete StringMgr::systemStringMgr; StringMgr::systemStringMgr = 0; }
} _staticsystemStringMgr;
#ifdef _ICU_
- //here comes our IcuStringMgr reimplementation
- class ICUStringMgr : public StringMgr {
- public:
- virtual char* upperUTF8(char*, const unsigned int maxlen = 0);
- //virtual char* upperLatin1(char*);
+//here comes our ICUStringMgr reimplementation
+class ICUStringMgr : public StringMgr {
+public:
+ virtual char *upperUTF8(char *, unsigned int maxlen = 0) const;
- protected:
- virtual const bool supportsUnicode() const {
- return true;
- };
- };
+protected:
+ virtual bool supportsUnicode() const { return true; };
+};
#endif
@@ -73,7 +70,7 @@
/** Copy constructor
*/
-StringMgr::StringMgr( const StringMgr& m ) {
+StringMgr::StringMgr(const StringMgr &m) {
}
/** Destructor
@@ -84,36 +81,40 @@
/** Sets the global StringMgr handle
* @param newStringMgr The new global StringMgr. This pointer will be deleted by this StringMgr
*/
-void StringMgr::setSystemStringMgr( StringMgr* newStringMgr ) {
- if (m_systemStringMgr)
- delete m_systemStringMgr;
+void StringMgr::setSystemStringMgr(StringMgr *newStringMgr) {
+ if (systemStringMgr)
+ delete systemStringMgr;
- m_systemStringMgr = newStringMgr;
- LocaleMgr::getSystemLocaleMgr()->setSystemLocaleMgr( new LocaleMgr() );
+ systemStringMgr = newStringMgr;
+
+ // TODO: this is magic. apparently we have to reset the system localemgr upon changing stringmgr.
+ // setting system stringmgr should be set before localemgr and not possible to change.
+ // rework this design.
+ LocaleMgr::getSystemLocaleMgr()->setSystemLocaleMgr(new LocaleMgr());
}
/** Returns the global StringMgr handle
* @return The global string handle
*/
StringMgr* StringMgr::getSystemStringMgr() {
- if (!m_systemStringMgr) {
+ if (!systemStringMgr) {
#ifdef _ICU_
- m_systemStringMgr = new ICUStringMgr();
+ systemStringMgr = new ICUStringMgr();
// SWLog::getSystemLog()->logInformation("created default ICUStringMgr");
#else
- m_systemStringMgr = new StringMgr();
+ systemStringMgr = new StringMgr();
// SWLog::getSystemLog()->logInformation("created default StringMgr");
#endif
}
- return m_systemStringMgr;
+ return systemStringMgr;
}
/** Converts the param to an upper case Utf8 string
* @param The text encoded in utf8 which should be turned into an upper case string
*/
-char* StringMgr::upperUTF8(char* t, const unsigned int maxlen) {
+char *StringMgr::upperUTF8(char *t, unsigned int maxlen) const {
// try to decide if it's worth trying to toupper. Do we have more
// characters which are probably lower latin than not?
long performOp = 0;
@@ -130,13 +131,13 @@
/** Converts the param to an uppercase latin1 string
* @param The text encoded in latin1 which should be turned into an upper case string
*/
-char* StringMgr::upperLatin1(char* buf) {
+char* StringMgr::upperLatin1(char* buf, unsigned int maxlen) const {
if (!buf)
return 0;
char *ret = buf;
- while (*buf) {
+ while (*buf && maxlen--) {
*buf = SW_toupper(*buf);
buf++;
}
@@ -144,48 +145,48 @@
return ret;
}
-const bool StringMgr::supportsUnicode() const {
+bool StringMgr::supportsUnicode() const {
return false; //default impl has no UTF8 support
}
#ifdef _ICU_
- char* ICUStringMgr::upperUTF8(char* buf, const unsigned int maxlen) {
- char* ret = buf;
- const int max = maxlen ? maxlen : strlen(buf);
+char *ICUStringMgr::upperUTF8(char *buf, unsigned int maxlen) const {
+ char *ret = buf;
+ int max = (maxlen) ? maxlen : strlen(buf);
- UErrorCode err = U_ZERO_ERROR;
+ UErrorCode err = U_ZERO_ERROR;
- if (!buf || !max) {
- return ret;
- }
+ if (!buf || !max) {
+ return ret;
+ }
- UChar *lowerStr = new UChar[max+10];
- UChar *upperStr = new UChar[max+10];
+ UChar *lowerStr = new UChar[max+10];
+ UChar *upperStr = new UChar[max+10];
- u_strFromUTF8(lowerStr, max+9, 0, buf, -1, &err);
- if (err != U_ZERO_ERROR) {
-// SWLog::getSystemLog()->logError("from: %s", u_errorName(err));
- delete [] lowerStr;
- delete [] upperStr;
- return ret;
- }
+ u_strFromUTF8(lowerStr, max+9, 0, buf, -1, &err);
+ if (err != U_ZERO_ERROR) {
+// SWLog::getSystemLog()->logError("from: %s", u_errorName(err));
+ delete [] lowerStr;
+ delete [] upperStr;
+ return ret;
+ }
- u_strToUpper(upperStr, max+9, lowerStr, -1, 0, &err);
- if (err != U_ZERO_ERROR) {
-// SWLog::getSystemLog()->logError("upperCase: %s", u_errorName(err));
- delete [] lowerStr;
- delete [] upperStr;
- return ret;
- }
-
- ret = u_strToUTF8(ret, max, 0, upperStr, -1, &err);
-
+ u_strToUpper(upperStr, max+9, lowerStr, -1, 0, &err);
+ if (err != U_ZERO_ERROR) {
+// SWLog::getSystemLog()->logError("upperCase: %s", u_errorName(err));
delete [] lowerStr;
delete [] upperStr;
return ret;
}
+
+ ret = u_strToUTF8(ret, max, 0, upperStr, -1, &err);
+
+ delete [] lowerStr;
+ delete [] upperStr;
+ return ret;
+}
#endif
Modified: trunk/src/modules/filters/osisrtf.cpp
===================================================================
--- trunk/src/modules/filters/osisrtf.cpp 2006-06-27 11:01:57 UTC (rev 1933)
+++ trunk/src/modules/filters/osisrtf.cpp 2006-06-29 23:22:22 UTC (rev 1934)
@@ -20,6 +20,7 @@
#include <utilxml.h>
#include <versekey.h>
#include <swmodule.h>
+#include <stringmgr.h>
SWORD_NAMESPACE_START
@@ -97,18 +98,19 @@
buf.appendFormatted(" {\\fs15 <%s>}", val);
}
if (attrib = tag.getAttribute("lemma")) {
- int count = tag.getAttributePartCount("lemma");
+ int count = tag.getAttributePartCount("lemma", ' ');
int i = (count > 1) ? 0 : -1; // -1 for whole value cuz it's faster, but does the same thing as 0
do {
- attrib = tag.getAttribute("lemma", i);
+ attrib = tag.getAttribute("lemma", i, ' ');
if (i < 0) i = 0; // to handle our -1 condition
val = strchr(attrib, ':');
val = (val) ? (val + 1) : attrib;
+ const char *val2 = val;
if ((strchr("GH", *val)) && (isdigit(val[1])))
- val++;
- if ((!strcmp(val, "3588")) && (lastText.length() < 1))
+ val2++;
+ if ((!strcmp(val2, "3588")) && (lastText.length() < 1))
show = false;
- else buf.appendFormatted(" {\\cf3 \\sub <%s>}", val);
+ else buf.appendFormatted(" {\\cf3 \\sub <%s>}", val2);
} while (++i < count);
}
if ((attrib = tag.getAttribute("morph")) && (show)) {
@@ -116,16 +118,17 @@
if ((strstr(savelemma.c_str(), "3588")) && (lastText.length() < 1))
show = false;
if (show) {
- int count = tag.getAttributePartCount("morph");
+ int count = tag.getAttributePartCount("morph", ' ');
int i = (count > 1) ? 0 : -1; // -1 for whole value cuz it's faster, but does the same thing as 0
do {
- attrib = tag.getAttribute("morph", i);
+ attrib = tag.getAttribute("morph", i, ' ');
if (i < 0) i = 0; // to handle our -1 condition
val = strchr(attrib, ':');
val = (val) ? (val + 1) : attrib;
+ const char *val2 = val;
if ((*val == 'T') && (strchr("GH", val[1])) && (isdigit(val[2])))
- val+=2;
- buf.appendFormatted(" {\\cf4 \\sub (%s)}", val);
+ val2+=2;
+ buf.appendFormatted(" {\\cf4 \\sub (%s)}", val2);
} while (++i < count);
}
}
@@ -304,6 +307,25 @@
}
}
+ // <divineName>
+ else if (!strcmp(tag.getName(), "divineName")) {
+
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+
+// just do all transChange tags this way for now
+// if (type == "supplied")
+ u->suspendTextPassThru = true;
+ }
+ else if (tag.isEndTag()) {
+ u->suspendTextPassThru = false;
+ SWBuf lastText = u->lastTextNode.c_str();
+ if (lastText.size()) {
+ toupperstr(lastText);
+ buf.appendFormatted("{\\fs19%c\\fs16%s}", lastText[0], lastText.c_str()+1);
+ }
+ }
+ }
+
// image
else if (!strcmp(tag.getName(), "figure")) {
const char *src = tag.getAttribute("src");
Modified: trunk/src/modules/swmodule.cpp
===================================================================
--- trunk/src/modules/swmodule.cpp 2006-06-27 11:01:57 UTC (rev 1933)
+++ trunk/src/modules/swmodule.cpp 2006-06-29 23:22:22 UTC (rev 1934)
@@ -506,53 +506,61 @@
freeTestKey = true;
}
}
- lucene::index::IndexReader *ir;
- lucene::search::IndexSearcher *is;
- ir = IndexReader::open(target);
- is = new IndexSearcher(ir);
- (*percent)(10, percentUserData);
+ lucene::index::IndexReader *ir = 0;
+ lucene::search::IndexSearcher *is = 0;
+ Query *q = 0;
+ Hits *h = 0;
+ try {
+ ir = IndexReader::open(target);
+ is = new IndexSearcher(ir);
+ (*percent)(10, percentUserData);
- standard::StandardAnalyzer analyzer;
- lucene_utf8towcs(wcharBuffer, istr, MAX_CONV_SIZE); //TODO Is istr always utf8?
- Query *q = QueryParser::parse(wcharBuffer, _T("content"), &analyzer);
- (*percent)(20, percentUserData);
- Hits *h = is->search(q);
- (*percent)(80, percentUserData);
+ standard::StandardAnalyzer analyzer;
+ lucene_utf8towcs(wcharBuffer, istr, MAX_CONV_SIZE); //TODO Is istr always utf8?
+ q = QueryParser::parse(wcharBuffer, _T("content"), &analyzer);
+ (*percent)(20, percentUserData);
+ h = is->search(q);
+ (*percent)(80, percentUserData);
+ // iterate thru each good module position that meets the search
+ for (long i = 0; i < h->length(); i++) {
+ Document &doc = h->doc(i);
- // iterate thru each good module position that meets the search
- for (long i = 0; i < h->length(); i++) {
- Document &doc = h->doc(i);
+ // set a temporary verse key to this module position
+ lucene_wcstoutf8(utfBuffer, doc.get(_T("key")), MAX_CONV_SIZE);
+ *resultKey = utfBuffer; //TODO Does a key always accept utf8?
+ if (enforceRange) {
+ // check scope
+ // Try to set our scope key to this verse key
+ *testKey = *resultKey;
- // set a temporary verse key to this module position
- lucene_wcstoutf8(utfBuffer, doc.get(_T("key")), MAX_CONV_SIZE);
- *resultKey = utfBuffer; //TODO Does a key always accept utf8?
- if (enforceRange) {
- // check scope
- // Try to set our scope key to this verse key
- *testKey = *resultKey;
+ // check to see if it set ok and if so, add to our return list
+ if (*testKey == *resultKey) {
+ listKey << *resultKey;
+ listKey.GetElement()->userData = (void *)((__u32)(h->score(i)*100));
+ }
+ }
+ else {
+ listKey << *resultKey;
+ listKey.GetElement()->userData = (void *)((__u32)(h->score(i)*100));
+ }
+ }
+ (*percent)(98, percentUserData);
+ }
+ catch (...) {
+ q = 0;
+ // invalid clucene query
+ }
+ delete h;
+ delete q;
- // check to see if it set ok and if so, add to our return list
- if (*testKey == *resultKey) {
- listKey << *resultKey;
- listKey.GetElement()->userData = (void *)((__u32)(h->score(i)*100));
- }
- }
- else {
- listKey << *resultKey;
- listKey.GetElement()->userData = (void *)((__u32)(h->score(i)*100));
- }
- }
- (*percent)(98, percentUserData);
-
- delete h;
- delete q;
-
- delete is;
- ir->close();
- if (freeTestKey) {
- delete testKey;
- }
+ delete is;
+ if (ir) {
+ ir->close();
+ }
+ if (freeTestKey) {
+ delete testKey;
+ }
}
#endif
More information about the sword-cvs
mailing list