[sword-svn] r2444 - in trunk: src/modules src/utilfuns tests

scribe at crosswire.org scribe at crosswire.org
Sun Aug 23 12:49:52 MST 2009


Author: scribe
Date: 2009-08-23 12:49:52 -0700 (Sun, 23 Aug 2009)
New Revision: 2444

Added:
   trunk/tests/utf8norm.cpp
Modified:
   trunk/src/modules/swmodule.cpp
   trunk/src/utilfuns/utilstr.cpp
   trunk/tests/Makefile.am
Log:
Moved assureValidUTF8 to utilstr so others can take advantage.
Added a test for making this method actually work.



Modified: trunk/src/modules/swmodule.cpp
===================================================================
--- trunk/src/modules/swmodule.cpp	2009-08-23 06:16:26 UTC (rev 2443)
+++ trunk/src/modules/swmodule.cpp	2009-08-23 19:49:52 UTC (rev 2444)
@@ -674,7 +674,8 @@
 					resultKey->clearBound();
 					listKey << *resultKey;
 				}
-				} break;
+				}
+				break;
 
 			// entry attributes
 			case -3: {
@@ -685,10 +686,15 @@
 				AttributeValue::iterator i3Start, i3End;
 
 				if ((words.size()) && (words[0].length())) {
+cout << "Word: " << words[0] << endl;
+				for (i1Start = entryAttribs.begin(); i1Start != entryAttribs.end(); ++i1Start) {
+cout << "stuff: " << i1Start->first.c_str() << endl;
+				}
 					i1Start = entryAttribs.find(words[0]);
 					i1End = i1Start;
-					if (i1End != entryAttribs.end())
-					i1End++;
+					if (i1End != entryAttribs.end()) {
+						i1End++;
+					}
 				}
 				else {
 					i1Start = entryAttribs.begin();

Modified: trunk/src/utilfuns/utilstr.cpp
===================================================================
--- trunk/src/utilfuns/utilstr.cpp	2009-08-23 06:16:26 UTC (rev 2443)
+++ trunk/src/utilfuns/utilstr.cpp	2009-08-23 19:49:52 UTC (rev 2444)
@@ -20,6 +20,7 @@
 #include <string.h>
 
 #include <sysdata.h>
+#include <swlog.h>
 
 
 SWORD_NAMESPACE_START
@@ -235,4 +236,30 @@
 }
 
 
+
+SWBuf assureValidUTF8(const char *buf) {
+
+	SWBuf myCopy = buf;
+	const unsigned char *b = (const unsigned char *)myCopy.c_str();
+	const unsigned char *q = 0;
+	bool invalidChar = false;
+	while (*b) {
+		q = b;
+		if (!getUniCharFromUTF8(&b)) {
+			long len = b - q;
+			if (len) {
+				invalidChar = true;
+				for (long start = q - (const unsigned char *)myCopy.c_str(); len; len--) {
+					myCopy[start+len-1] = 0x1a;	// unicode replacement character
+				}
+				
+			}
+		}
+	}
+	if (invalidChar) {
+		SWLog::getSystemLog()->logWarning("Changing invalid UTF-8 string (%s) to (%s)\n", buf, myCopy.c_str());
+	}
+	return myCopy;
+}
+
 SWORD_NAMESPACE_END

Modified: trunk/tests/Makefile.am
===================================================================
--- trunk/tests/Makefile.am	2009-08-23 06:16:26 UTC (rev 2443)
+++ trunk/tests/Makefile.am	2009-08-23 19:49:52 UTC (rev 2444)
@@ -4,7 +4,7 @@
 
 SUBDIRS = cppunit
 
-noinst_PROGRAMS = ciphertest keytest mgrtest parsekey versekeytest vtreekeytest versemgrtest listtest casttest \
+noinst_PROGRAMS = utf8norm ciphertest keytest mgrtest parsekey versekeytest vtreekeytest versemgrtest listtest casttest \
 modtest compnone complzss localetest introtest indextest configtest keycast \
 romantest testblocks filtertest rawldidxtest lextest swaptest \
  swbuftest xmltest webiftest striptest
@@ -39,6 +39,7 @@
 keytest_SOURCES = keytest.cpp
 mgrtest_SOURCES = mgrtest.cpp
 ciphertest_SOURCES = ciphertest.cpp
+utf8norm_SOURCES = utf8norm.cpp
 parsekey_SOURCES = parsekey.cpp
 versekeytest_SOURCES = versekeytest.cpp
 vtreekeytest_SOURCES = vtreekeytest.cpp

Added: trunk/tests/utf8norm.cpp
===================================================================
--- trunk/tests/utf8norm.cpp	                        (rev 0)
+++ trunk/tests/utf8norm.cpp	2009-08-23 19:49:52 UTC (rev 2444)
@@ -0,0 +1,16 @@
+#include <iostream>
+#include <utilstr.h>
+#include <swbuf.h>
+
+using namespace sword;
+using namespace std;
+
+int main(int argc, char **argv) {
+	const char *buf = (argc > 1) ? argv[1] : "Description=German Unrevidierte Luther Übersetzung von 1545";
+
+	SWBuf fixed = assureValidUTF8(buf);
+
+	cout << "input / processed:\n" << buf << "\n" << fixed << endl;
+
+	return 0;
+}




More information about the sword-cvs mailing list