[sword-svn] r2444 - in trunk: src/modules src/utilfuns tests
scribe at crosswire.org
scribe at crosswire.org
Sun Aug 23 12:49:52 MST 2009
Author: scribe
Date: 2009-08-23 12:49:52 -0700 (Sun, 23 Aug 2009)
New Revision: 2444
Added:
trunk/tests/utf8norm.cpp
Modified:
trunk/src/modules/swmodule.cpp
trunk/src/utilfuns/utilstr.cpp
trunk/tests/Makefile.am
Log:
Moved assureValidUTF8 to utilstr so others can take advantage.
Added a test for making this method actually work.
Modified: trunk/src/modules/swmodule.cpp
===================================================================
--- trunk/src/modules/swmodule.cpp 2009-08-23 06:16:26 UTC (rev 2443)
+++ trunk/src/modules/swmodule.cpp 2009-08-23 19:49:52 UTC (rev 2444)
@@ -674,7 +674,8 @@
resultKey->clearBound();
listKey << *resultKey;
}
- } break;
+ }
+ break;
// entry attributes
case -3: {
@@ -685,10 +686,15 @@
AttributeValue::iterator i3Start, i3End;
if ((words.size()) && (words[0].length())) {
+cout << "Word: " << words[0] << endl;
+ for (i1Start = entryAttribs.begin(); i1Start != entryAttribs.end(); ++i1Start) {
+cout << "stuff: " << i1Start->first.c_str() << endl;
+ }
i1Start = entryAttribs.find(words[0]);
i1End = i1Start;
- if (i1End != entryAttribs.end())
- i1End++;
+ if (i1End != entryAttribs.end()) {
+ i1End++;
+ }
}
else {
i1Start = entryAttribs.begin();
Modified: trunk/src/utilfuns/utilstr.cpp
===================================================================
--- trunk/src/utilfuns/utilstr.cpp 2009-08-23 06:16:26 UTC (rev 2443)
+++ trunk/src/utilfuns/utilstr.cpp 2009-08-23 19:49:52 UTC (rev 2444)
@@ -20,6 +20,7 @@
#include <string.h>
#include <sysdata.h>
+#include <swlog.h>
SWORD_NAMESPACE_START
@@ -235,4 +236,30 @@
}
+
+SWBuf assureValidUTF8(const char *buf) {
+
+ SWBuf myCopy = buf;
+ const unsigned char *b = (const unsigned char *)myCopy.c_str();
+ const unsigned char *q = 0;
+ bool invalidChar = false;
+ while (*b) {
+ q = b;
+ if (!getUniCharFromUTF8(&b)) {
+ long len = b - q;
+ if (len) {
+ invalidChar = true;
+ for (long start = q - (const unsigned char *)myCopy.c_str(); len; len--) {
+ myCopy[start+len-1] = 0x1a; // unicode replacement character
+ }
+
+ }
+ }
+ }
+ if (invalidChar) {
+ SWLog::getSystemLog()->logWarning("Changing invalid UTF-8 string (%s) to (%s)\n", buf, myCopy.c_str());
+ }
+ return myCopy;
+}
+
SWORD_NAMESPACE_END
Modified: trunk/tests/Makefile.am
===================================================================
--- trunk/tests/Makefile.am 2009-08-23 06:16:26 UTC (rev 2443)
+++ trunk/tests/Makefile.am 2009-08-23 19:49:52 UTC (rev 2444)
@@ -4,7 +4,7 @@
SUBDIRS = cppunit
-noinst_PROGRAMS = ciphertest keytest mgrtest parsekey versekeytest vtreekeytest versemgrtest listtest casttest \
+noinst_PROGRAMS = utf8norm ciphertest keytest mgrtest parsekey versekeytest vtreekeytest versemgrtest listtest casttest \
modtest compnone complzss localetest introtest indextest configtest keycast \
romantest testblocks filtertest rawldidxtest lextest swaptest \
swbuftest xmltest webiftest striptest
@@ -39,6 +39,7 @@
keytest_SOURCES = keytest.cpp
mgrtest_SOURCES = mgrtest.cpp
ciphertest_SOURCES = ciphertest.cpp
+utf8norm_SOURCES = utf8norm.cpp
parsekey_SOURCES = parsekey.cpp
versekeytest_SOURCES = versekeytest.cpp
vtreekeytest_SOURCES = vtreekeytest.cpp
Added: trunk/tests/utf8norm.cpp
===================================================================
--- trunk/tests/utf8norm.cpp (rev 0)
+++ trunk/tests/utf8norm.cpp 2009-08-23 19:49:52 UTC (rev 2444)
@@ -0,0 +1,16 @@
+#include <iostream>
+#include <utilstr.h>
+#include <swbuf.h>
+
+using namespace sword;
+using namespace std;
+
+int main(int argc, char **argv) {
+ const char *buf = (argc > 1) ? argv[1] : "Description=German Unrevidierte Luther Übersetzung von 1545";
+
+ SWBuf fixed = assureValidUTF8(buf);
+
+ cout << "input / processed:\n" << buf << "\n" << fixed << endl;
+
+ return 0;
+}
More information about the sword-cvs
mailing list