[sword-svn] r1934 - in trunk: include lib/bcppmake src/mgr src/modules src/modules/filters

Thu Jun 29 16:23:29 MST 2006

Author: scribe
Date: 2006-06-29 16:22:22 -0700 (Thu, 29 Jun 2006)
New Revision: 1934

Modified:
   trunk/include/stringmgr.h
   trunk/lib/bcppmake/libsword.bpr
   trunk/src/mgr/stringmgr.cpp
   trunk/src/modules/filters/osisrtf.cpp
   trunk/src/modules/swmodule.cpp
Log:
cleaned up stringmgr to conform to coding style.
added support for multi-strongs in osis to rtf
catch clucene query parsing exception.

Modified: trunk/include/stringmgr.h
===================================================================

--- trunk/include/stringmgr.h	2006-06-27 11:01:57 UTC (rev 1933)
+++ trunk/include/stringmgr.h	2006-06-29 23:22:22 UTC (rev 1934)
@@ -25,6 +25,8 @@
 #define STRINGMGR_H
 
 #include <defs.h>
+#include <swbuf.h>
+#include <utilstr.h>
 
 SWORD_NAMESPACE_START
 
@@ -33,20 +35,22 @@
  */
 class SWDLLEXPORT StringMgr {
 public:
+
 	/** Sets the global StringMgr handle
 	* @param newStringMgr The new global StringMgr. This pointer will be deleted by this StringMgr
 	*/	
-	static void setSystemStringMgr( StringMgr* newStringMgr );
+	static void setSystemStringMgr(StringMgr *newStringMgr);
+   
 	/** Returns the global StringMgr handle
 	* @return The global string handle
 	*/
-	static StringMgr* getSystemStringMgr();
+	static StringMgr *getSystemStringMgr();
 
 	/** Checks whether Utf8 support is available.
 	* Override the function supportsUnicode() to tell whether your implementation has utf8 support.
 	* @return True if this implementation provides support for Utf8 handling or false if just latin1 handling is available
 	*/
-	static const bool hasUTF8Support() {
+	static inline bool hasUTF8Support() {
 		return getSystemStringMgr()->supportsUnicode();
 	};
 	
@@ -54,11 +58,12 @@
 	* @param text The text encoded in utf8 which should be turned into an upper case string
 	* @param max Only change max chars
 	*/	
-	virtual char* upperUTF8(char* text, const unsigned int max = 0);
+	virtual char *upperUTF8(char *text, unsigned int max = 0) const;
+   
 	/** Converts the param to an uppercase latin1 string
 	* @param text The text encoded in latin1 which should be turned into an upper case string
 	*/	
-	virtual char* upperLatin1(char* text);
+	virtual char *upperLatin1(char *text, unsigned int max = 0) const;
 	
 
 protected:
@@ -67,29 +72,38 @@
 	/** Default constructor. Protected to make instances on user side impossible, because this is a Singleton
 	*/		
 	StringMgr();
+   
 	/** Copy constructor
 	*/	
-	StringMgr( const StringMgr& );
+	StringMgr(const StringMgr &);
+   
 	/** Destructor
 	*/	
 	virtual ~StringMgr();
 	
-	virtual const bool supportsUnicode() const;
+	virtual bool supportsUnicode() const;
 
 private:
-	static StringMgr* m_systemStringMgr;
+	static StringMgr *systemStringMgr;
 };
 
-namespace utilstr {
-	inline char* toupperstr( char* t ) {
-		return StringMgr::getSystemStringMgr()->upperLatin1( t );
-	};
+inline char *toupperstr(char *t, unsigned int max = 0) {
+	return (StringMgr::hasUTF8Support())
+		? StringMgr::getSystemStringMgr()->upperLatin1(t, max)
+		: StringMgr::getSystemStringMgr()->upperUTF8(t, max);
+}
 	
-	inline char* toupperstr_utf8( char* t, const unsigned int max = 0 ) {
-		return StringMgr::getSystemStringMgr()->upperUTF8( t, max );
-	};
-};
-using namespace utilstr;
+inline char *toupperstr_utf8(char *t, unsigned int max = 0) {
+	return StringMgr::getSystemStringMgr()->upperUTF8(t, max);
+}
+	
+inline SWBuf &toupperstr(SWBuf &b) {
+	char *utf8 = 0;
+	stdstr(&utf8, b.c_str(), 2);
+	toupperstr(utf8, strlen(utf8)*2);
+	b = utf8;
+	return b;
+}
 
 SWORD_NAMESPACE_END
 

Modified: trunk/lib/bcppmake/libsword.bpr
===================================================================
--- trunk/lib/bcppmake/libsword.bpr	2006-06-27 11:01:57 UTC (rev 1933)
+++ trunk/lib/bcppmake/libsword.bpr	2006-06-29 23:22:22 UTC (rev 1934)
@@ -128,7 +128,7 @@
     <PATHRC value=".;"/>
     <PATHASM value=".;"/>
     <LINKER value="TLib"/>
-    <USERDEFINES value="UNICODE;_ICU_;_ICUSWORD_;USBINARY;U_HAVE_PLACEMENT_NEW=0;USELUCENE;_WIN32;_CL_DISABLE_MULTITHREADING"/>
+    <USERDEFINES value="UNICODE;_ICU_;_ICUSWORD_;USBINARY;U_HAVE_PLACEMENT_NEW=0;USELUCENE;_WIN32;_CL_DISABLE_MULTITHREADING;_DEBUG"/>
     <SYSDEFINES value="NO_STRICT"/>
     <MAINSOURCE value="libsword.bpf"/>
     <INCLUDEPATH value="..\..\src\modules\tests;..\..\src\utilfuns\zlib;..\..\src\modules\lexdict\zld;..\..\src\modules\lexdict\rawld4;..\..\src\modules\comments\zcom;..\..\src\modules\genbook\rawgenbook;..\..\src\modules\genbook;..\..\src\modules\texts\ztext;..\..\src\modules\texts\rawtext;..\..\src\modules\texts;..\..\src\modules\lexdict\rawld;..\..\src\modules\lexdict;..\..\src\modules\filters;..\..\src\modules\common;..\..\src\modules\comments\rawfiles;..\..\src\modules\comments\rawcom;..\..\src\modules\comments\hrefcom;..\..\src\modules\comments;..\..\src\modules;..\..\src\frontend;..\..\src\utilfuns;..\..\src\mgr;..\..\src\keys;..\..\..\icu-sword\source\common;..\..\apps\windoze\CBuilder5\InstallMgr\curl\include;..\..\include;$(BCB)\include;$(BCB)\include\vcl;..\..\..\icu-sword\source\i18n;..\..\..\biblecs\clucene\src;..\..\..\biblecs\apps\InstallMgr\curl\include"/>
@@ -154,9 +154,9 @@
       -I..\..\..\biblecs\apps\InstallMgr\curl\include -src_suffix cpp -DUNICODE 
       -D_ICU_ -D_ICUSWORD_ -DUSBINARY -DU_HAVE_PLACEMENT_NEW=0 -DUSELUCENE 
       -D_WIN32 -D_CL_DISABLE_MULTITHREADING -no_tie -boa"/>
-    <CFLAG1 value="-O2 -Vx -X- -a8 -b- -k- -vi -c -tW -tWM"/>
-    <PFLAGS value="-$Y- -$L- -$D- -v -JPHNE -M"/>
-    <AFLAGS value="/mx /w2 /zn"/>
+    <CFLAG1 value="-Od -Vx -X- -r- -a8 -b- -k -y -v -vi- -c -tW -tWM"/>
+    <PFLAGS value="-$Y+ -$W -$O- -v -JPHNE -M"/>
+    <AFLAGS value="/mx /w2 /zi"/>
     <LFLAGS value="/P512"/>
   </OPTIONS>
   <LINKER>
@@ -230,8 +230,8 @@
 
 [HistoryLists\hlConditionals]
 Count=22
-Item0=UNICODE;_ICU_;_ICUSWORD_;USBINARY;U_HAVE_PLACEMENT_NEW=0;USELUCENE;_WIN32;_CL_DISABLE_MULTITHREADING
-Item1=UNICODE;_ICU_;_ICUSWORD_;USBINARY;U_HAVE_PLACEMENT_NEW=0;USELUCENE;_WIN32;_CL_DISABLE_MULTITHREADING;_DEBUG
+Item0=UNICODE;_ICU_;_ICUSWORD_;USBINARY;U_HAVE_PLACEMENT_NEW=0;USELUCENE;_WIN32;_CL_DISABLE_MULTITHREADING;_DEBUG
+Item1=UNICODE;_ICU_;_ICUSWORD_;USBINARY;U_HAVE_PLACEMENT_NEW=0;USELUCENE;_WIN32;_CL_DISABLE_MULTITHREADING
 Item2=_ICU_;_ICUSWORD_;USBINARY;U_HAVE_PLACEMENT_NEW=0;USELUCENE;_WIN32;_CL_DISABLE_MULTITHREADING
 Item3=_ICU_;_ICUSWORD_;USBINARY;U_HAVE_PLACEMENT_NEW=0;USELUCENE;_WIN32;_CL_DISABLE_MULTITHREADING;UNICODE
 Item4=_UCS2;_ICU_;_ICUSWORD_;USBINARY;U_HAVE_PLACEMENT_NEW=0;USELUCENE;_WIN32;_CL_DISABLE_MULTITHREADING

Modified: trunk/src/mgr/stringmgr.cpp
===================================================================
--- trunk/src/mgr/stringmgr.cpp	2006-06-27 11:01:57 UTC (rev 1933)
+++ trunk/src/mgr/stringmgr.cpp	2006-06-29 23:22:22 UTC (rev 1934)
@@ -40,28 +40,25 @@
 
 SWORD_NAMESPACE_START
 
-StringMgr* StringMgr::m_systemStringMgr = 0;
+StringMgr *StringMgr::systemStringMgr = 0;
 
 class __staticsystemStringMgr {
 public:
- 	__staticsystemStringMgr() { }
- 	~__staticsystemStringMgr() { if (StringMgr::m_systemStringMgr) delete StringMgr::m_systemStringMgr; StringMgr::m_systemStringMgr = 0; }
+	__staticsystemStringMgr() { }
+	~__staticsystemStringMgr() { if (StringMgr::systemStringMgr) delete StringMgr::systemStringMgr; StringMgr::systemStringMgr = 0; }
 } _staticsystemStringMgr;
 
 
 #ifdef _ICU_
 
-	//here comes our IcuStringMgr reimplementation
-	class ICUStringMgr : public StringMgr {
-	public:
-		virtual char* upperUTF8(char*, const unsigned int maxlen = 0);
-		//virtual char* upperLatin1(char*);
+//here comes our ICUStringMgr reimplementation
+class ICUStringMgr : public StringMgr {
+public:
+	virtual char *upperUTF8(char *, unsigned int maxlen = 0) const;
 	
-	protected:
-		virtual const bool supportsUnicode() const {
-			return true;
-		};
-	};
+protected:
+	virtual bool supportsUnicode() const { return true; };
+};
 
 #endif
 
@@ -73,7 +70,7 @@
 
 /** Copy constructor
 */	
-StringMgr::StringMgr( const StringMgr& m ) {
+StringMgr::StringMgr(const StringMgr &m) {
 }
 
 /** Destructor
@@ -84,36 +81,40 @@
 /** Sets the global StringMgr handle
 * @param newStringMgr The new global StringMgr. This pointer will be deleted by this StringMgr
 */	
-void StringMgr::setSystemStringMgr( StringMgr* newStringMgr ) {
-	if (m_systemStringMgr) 
-		delete m_systemStringMgr;
+void StringMgr::setSystemStringMgr(StringMgr *newStringMgr) {
+	if (systemStringMgr) 
+		delete systemStringMgr;
 	
-	m_systemStringMgr = newStringMgr;
-	LocaleMgr::getSystemLocaleMgr()->setSystemLocaleMgr( new LocaleMgr() );
+	systemStringMgr = newStringMgr;
+
+   // TODO: this is magic. apparently we have to reset the system localemgr upon changing stringmgr.
+   // setting system stringmgr should be set before localemgr and not possible to change.
+   // rework this design.
+	LocaleMgr::getSystemLocaleMgr()->setSystemLocaleMgr(new LocaleMgr());
 }
 
 /** Returns the global StringMgr handle
 * @return The global string handle
 */
 StringMgr* StringMgr::getSystemStringMgr() {
-	if (!m_systemStringMgr) {
+	if (!systemStringMgr) {
 #ifdef _ICU_
-		m_systemStringMgr = new ICUStringMgr();
+		systemStringMgr = new ICUStringMgr();
 // 		SWLog::getSystemLog()->logInformation("created default ICUStringMgr");
 #else
-		m_systemStringMgr = new StringMgr();
+		systemStringMgr = new StringMgr();
 //  		SWLog::getSystemLog()->logInformation("created default StringMgr");
 #endif
 	}
 	
-	return m_systemStringMgr;
+	return systemStringMgr;
 }
 
 
 /** Converts the param to an upper case Utf8 string
 * @param The text encoded in utf8 which should be turned into an upper case string
 */	
-char* StringMgr::upperUTF8(char* t, const unsigned int maxlen) {
+char *StringMgr::upperUTF8(char *t, unsigned int maxlen) const {
 	// try to decide if it's worth trying to toupper.  Do we have more
 	// characters which are probably lower latin than not?
 	long performOp = 0;
@@ -130,13 +131,13 @@
 /** Converts the param to an uppercase latin1 string
 * @param The text encoded in latin1 which should be turned into an upper case string
 */	
-char* StringMgr::upperLatin1(char* buf) {
+char* StringMgr::upperLatin1(char* buf, unsigned int maxlen) const {
 	if (!buf)
 		return 0;
 		
 	char *ret = buf;
 
-	while (*buf) {
+	while (*buf && maxlen--) {
 		*buf = SW_toupper(*buf);
 		buf++;
 	}
@@ -144,48 +145,48 @@
 	return ret;
 }
 
-const bool StringMgr::supportsUnicode() const {
+bool StringMgr::supportsUnicode() const {
 	return false; //default impl has no UTF8 support
 }
 
 
 #ifdef _ICU_
 
-	char* ICUStringMgr::upperUTF8(char* buf, const unsigned int maxlen) {
-		char* ret = buf;
-		const int max = maxlen ? maxlen : strlen(buf);
+char *ICUStringMgr::upperUTF8(char *buf, unsigned int maxlen) const {
+	char *ret = buf;
+	int max = (maxlen) ? maxlen : strlen(buf);
 		
-		UErrorCode err = U_ZERO_ERROR;
+	UErrorCode err = U_ZERO_ERROR;
 		
-		if (!buf || !max) {
-			return ret;
-		}
+	if (!buf || !max) {
+		return ret;
+	}
 		
-		UChar *lowerStr = new UChar[max+10];
-		UChar *upperStr = new UChar[max+10];
+	UChar *lowerStr = new UChar[max+10];
+	UChar *upperStr = new UChar[max+10];
 		
-		u_strFromUTF8(lowerStr, max+9, 0, buf, -1, &err);
-		if (err != U_ZERO_ERROR) {
-//			SWLog::getSystemLog()->logError("from: %s", u_errorName(err));
-			delete [] lowerStr;
-			delete [] upperStr;
-			return ret;
-		}
+	u_strFromUTF8(lowerStr, max+9, 0, buf, -1, &err);
+	if (err != U_ZERO_ERROR) {
+//		SWLog::getSystemLog()->logError("from: %s", u_errorName(err));
+		delete [] lowerStr;
+		delete [] upperStr;
+		return ret;
+	}
 
-		u_strToUpper(upperStr, max+9, lowerStr, -1, 0, &err);
-		if (err != U_ZERO_ERROR) {
-//			SWLog::getSystemLog()->logError("upperCase: %s", u_errorName(err));
-			delete [] lowerStr;
-			delete [] upperStr;
-			return ret;
-		}
-
-		ret = u_strToUTF8(ret, max, 0, upperStr, -1, &err);
-		
+	u_strToUpper(upperStr, max+9, lowerStr, -1, 0, &err);
+	if (err != U_ZERO_ERROR) {
+//		SWLog::getSystemLog()->logError("upperCase: %s", u_errorName(err));
 		delete [] lowerStr;
 		delete [] upperStr;
 		return ret;
 	}
+
+	ret = u_strToUTF8(ret, max, 0, upperStr, -1, &err);
+		
+	delete [] lowerStr;
+	delete [] upperStr;
+	return ret;
+}
 	
 #endif
 

Modified: trunk/src/modules/filters/osisrtf.cpp
===================================================================
--- trunk/src/modules/filters/osisrtf.cpp	2006-06-27 11:01:57 UTC (rev 1933)
+++ trunk/src/modules/filters/osisrtf.cpp	2006-06-29 23:22:22 UTC (rev 1934)
@@ -20,6 +20,7 @@
 #include <utilxml.h>
 #include <versekey.h>
 #include <swmodule.h>
+#include <stringmgr.h>
 
 SWORD_NAMESPACE_START
 
@@ -97,18 +98,19 @@
 					buf.appendFormatted(" {\\fs15 <%s>}", val);
 				}
 				if (attrib = tag.getAttribute("lemma")) {
-					int count = tag.getAttributePartCount("lemma");
+					int count = tag.getAttributePartCount("lemma", ' ');
 					int i = (count > 1) ? 0 : -1;		// -1 for whole value cuz it's faster, but does the same thing as 0
 					do {
-						attrib = tag.getAttribute("lemma", i);
+						attrib = tag.getAttribute("lemma", i, ' ');
 						if (i < 0) i = 0;	// to handle our -1 condition
 						val = strchr(attrib, ':');
 						val = (val) ? (val + 1) : attrib;
+						const char *val2 = val;
 						if ((strchr("GH", *val)) && (isdigit(val[1])))
-							val++;
-						if ((!strcmp(val, "3588")) && (lastText.length() < 1))
+							val2++;
+						if ((!strcmp(val2, "3588")) && (lastText.length() < 1))
 							show = false;
-						else	buf.appendFormatted(" {\\cf3 \\sub <%s>}", val);
+						else	buf.appendFormatted(" {\\cf3 \\sub <%s>}", val2);
 					} while (++i < count);
 				}
 				if ((attrib = tag.getAttribute("morph")) && (show)) {
@@ -116,16 +118,17 @@
 					if ((strstr(savelemma.c_str(), "3588")) && (lastText.length() < 1))
 						show = false;
 					if (show) {
-						int count = tag.getAttributePartCount("morph");
+						int count = tag.getAttributePartCount("morph", ' ');
 						int i = (count > 1) ? 0 : -1;		// -1 for whole value cuz it's faster, but does the same thing as 0
 						do {
-							attrib = tag.getAttribute("morph", i);
+							attrib = tag.getAttribute("morph", i, ' ');
 							if (i < 0) i = 0;	// to handle our -1 condition
 							val = strchr(attrib, ':');
 							val = (val) ? (val + 1) : attrib;
+							const char *val2 = val;
 							if ((*val == 'T') && (strchr("GH", val[1])) && (isdigit(val[2])))
-								val+=2;
-							buf.appendFormatted(" {\\cf4 \\sub (%s)}", val);
+								val2+=2;
+							buf.appendFormatted(" {\\cf4 \\sub (%s)}", val2);
 						} while (++i < count);
 					}
 				}
@@ -304,6 +307,25 @@
 			}
 		}
 
+		// <divineName>
+		else if (!strcmp(tag.getName(), "divineName")) {
+
+			if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+
+// just do all transChange tags this way for now
+//				if (type == "supplied")
+				u->suspendTextPassThru = true;
+			}
+			else if (tag.isEndTag()) {
+				u->suspendTextPassThru = false;
+				SWBuf lastText = u->lastTextNode.c_str();
+				if (lastText.size()) {
+					toupperstr(lastText);
+					buf.appendFormatted("{\\fs19%c\\fs16%s}", lastText[0], lastText.c_str()+1);
+				}
+			}
+		}
+
 		// image
 		else if (!strcmp(tag.getName(), "figure")) {
 			const char *src = tag.getAttribute("src");

Modified: trunk/src/modules/swmodule.cpp
===================================================================
--- trunk/src/modules/swmodule.cpp	2006-06-27 11:01:57 UTC (rev 1933)
+++ trunk/src/modules/swmodule.cpp	2006-06-29 23:22:22 UTC (rev 1934)
@@ -506,53 +506,61 @@
 				freeTestKey = true;
 			}
 		}
-		lucene::index::IndexReader *ir;
-		lucene::search::IndexSearcher *is;
-		ir = IndexReader::open(target);
-		is = new IndexSearcher(ir);
-		(*percent)(10, percentUserData);
+		lucene::index::IndexReader    *ir = 0;
+		lucene::search::IndexSearcher *is = 0;
+      Query                          *q = 0;
+      Hits                           *h = 0;
+      try {
+        ir = IndexReader::open(target);
+        is = new IndexSearcher(ir);
+        (*percent)(10, percentUserData);
 
-		standard::StandardAnalyzer analyzer;
-		lucene_utf8towcs(wcharBuffer, istr, MAX_CONV_SIZE); //TODO Is istr always utf8?
-		Query *q =  QueryParser::parse(wcharBuffer, _T("content"), &analyzer);
-		(*percent)(20, percentUserData);
-		Hits *h = is->search(q);
-		(*percent)(80, percentUserData);
+        standard::StandardAnalyzer analyzer;
+        lucene_utf8towcs(wcharBuffer, istr, MAX_CONV_SIZE); //TODO Is istr always utf8?
+  		  q = QueryParser::parse(wcharBuffer, _T("content"), &analyzer);
+   	  (*percent)(20, percentUserData);
+	     h = is->search(q);
+	     (*percent)(80, percentUserData);
 
+        // iterate thru each good module position that meets the search
+        for (long i = 0; i < h->length(); i++) {
+           Document &doc = h->doc(i);
 
-		// iterate thru each good module position that meets the search
-		for (long i = 0; i < h->length(); i++) {
-			Document &doc = h->doc(i);
+           // set a temporary verse key to this module position
+           lucene_wcstoutf8(utfBuffer, doc.get(_T("key")), MAX_CONV_SIZE);	
+           *resultKey = utfBuffer; //TODO Does a key always accept utf8?
+           if (enforceRange) {
+              // check scope
+              // Try to set our scope key to this verse key
+              *testKey = *resultKey;
 
-			// set a temporary verse key to this module position
-			lucene_wcstoutf8(utfBuffer, doc.get(_T("key")), MAX_CONV_SIZE);	
-			*resultKey = utfBuffer; //TODO Does a key always accept utf8?
-			if (enforceRange) {
-				// check scope
-				// Try to set our scope key to this verse key
-				*testKey = *resultKey;
+              // check to see if it set ok and if so, add to our return list
+              if (*testKey == *resultKey) {
+                 listKey << *resultKey;
+                 listKey.GetElement()->userData = (void *)((__u32)(h->score(i)*100));
+              }
+           }
+           else {
+              listKey << *resultKey;
+              listKey.GetElement()->userData = (void *)((__u32)(h->score(i)*100));
+           }
+        }
+        (*percent)(98, percentUserData);
+      }
+      catch (...) {
+         q = 0;
+         // invalid clucene query
+      }
+      delete h;
+      delete q;
 
-				// check to see if it set ok and if so, add to our return list
-				if (*testKey == *resultKey) {
-					listKey << *resultKey;
-					listKey.GetElement()->userData = (void *)((__u32)(h->score(i)*100));
-				}
-			}
-			else {
-				listKey << *resultKey;
-				listKey.GetElement()->userData = (void *)((__u32)(h->score(i)*100));
-			}
-		}
-		(*percent)(98, percentUserData);
-
-		delete h;
-		delete q;
-
-		delete is;
-		ir->close();
-		if (freeTestKey) {
-			delete testKey;
-		}
+      delete is;
+      if (ir) {
+         ir->close();
+      }
+      if (freeTestKey) {
+         delete testKey;
+      }
 	}
 #endif