[sword-devel] patch
Martin Gruner
sword-devel@crosswire.org
Sun, 18 Jan 2004 16:30:36 +0100
--Boundary-00=_caqCAALvtLUnOD9
Content-Type: text/plain;
charset="us-ascii"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline
Hi,
I tried to work out how utf-8 locales can work in sword. I noticed that there
are 2 toupper() functions in sword, one of which only works on latin1, and
the second can utilize icu.
Therefore I created a patch to be able to handle utf-8 consistently in sword
by deleting toupper() and patching toupper_utf8() a little. Please look
through and see if this is ok.
It should work, I only have the problem that ICU does not do the toUpper()
correctly on my system right now, not sure why. It leaves the string as it
is. Chris, can you help me here? Does it work for you?
Thanks for all feedback.
Martin
--Boundary-00=_caqCAALvtLUnOD9
Content-Type: text/x-diff;
charset="us-ascii";
name="toupper.patch"
Content-Transfer-Encoding: 7bit
Content-Disposition: attachment;
filename="toupper.patch"
Index: debian/rules
===================================================================
RCS file: /cvs/core/sword/debian/rules,v
retrieving revision 1.7
diff -u -3 -p -u -r1.7 rules
--- debian/rules 17 Jan 2004 21:21:13 -0000 1.7
+++ debian/rules 18 Jan 2004 15:26:04 -0000
@@ -34,7 +34,7 @@ configure-stamp:
chmod 755 configure
./configure $(confflags) --prefix=/usr --mandir=\$${prefix}/share/man \
--infodir=\$${prefix}/share/info --with-zlib \
- --sysconfdir=/etc --enable-shared --without-icu \
+ --sysconfdir=/etc --enable-shared --with-icu \
--without-lucene
touch configure.stamp
Index: include/utilstr.h
===================================================================
RCS file: /cvs/core/sword/include/utilstr.h,v
retrieving revision 1.11
diff -u -3 -p -u -r1.11 utilstr.h
--- include/utilstr.h 22 Jun 2003 23:50:23 -0000 1.11
+++ include/utilstr.h 18 Jan 2004 15:26:04 -0000
@@ -33,7 +33,7 @@ char *strstrip (char *istr);
const char *stristr (const char *s1, const char *s2);
const char strnicmp(const char *s1, const char *s2, int len);
unsigned int strlenw(const char *s1);
-char *toupperstr(char *buf);
+//char *toupperstr(char *buf);
char *toupperstr_utf8(char *buf, unsigned int max = 0);
/*
Index: src/keys/versekey.cpp
===================================================================
RCS file: /cvs/core/sword/src/keys/versekey.cpp,v
retrieving revision 1.58
diff -u -3 -p -u -r1.58 versekey.cpp
--- src/keys/versekey.cpp 27 Jun 2003 01:41:07 -0000 1.58
+++ src/keys/versekey.cpp 18 Jan 2004 15:26:05 -0000
@@ -324,7 +324,7 @@ int VerseKey::getBookAbbrev(const char *
stdstr(&abbr, iabbr);
strstrip(abbr);
if (!i)
- toupperstr(abbr);
+ toupperstr_utf8(abbr);
abLen = strlen(abbr);
if (abLen) {
Index: src/modules/filters/swbasicfilter.cpp
===================================================================
RCS file: /cvs/core/sword/src/modules/filters/swbasicfilter.cpp,v
retrieving revision 1.33
diff -u -3 -p -u -r1.33 swbasicfilter.cpp
--- src/modules/filters/swbasicfilter.cpp 24 Oct 2003 02:43:46 -0000 1.33
+++ src/modules/filters/swbasicfilter.cpp 18 Jan 2004 15:26:05 -0000
@@ -93,7 +93,7 @@ void SWBasicFilter::addTokenSubstitute(c
if (!tokenCaseSensitive) {
stdstr(&buf, findString);
- toupperstr(buf);
+ toupperstr_utf8(buf);
tokenSubMap[buf] = replaceString;
delete [] buf;
}
@@ -114,7 +114,7 @@ void SWBasicFilter::addEscapeStringSubst
if (!escStringCaseSensitive) {
stdstr(&buf, findString);
- toupperstr(buf);
+ toupperstr_utf8(buf);
escSubMap.insert(DualStringMap::value_type(buf, replaceString));
delete [] buf;
}
@@ -135,7 +135,7 @@ bool SWBasicFilter::substituteToken(SWBu
if (!tokenCaseSensitive) {
char *tmp = 0;
stdstr(&tmp, token);
- toupperstr(tmp);
+ toupperstr_utf8(tmp);
it = tokenSubMap.find(tmp);
delete [] tmp;
} else
@@ -155,7 +155,7 @@ bool SWBasicFilter::substituteEscapeStri
if (!escStringCaseSensitive) {
char *tmp = 0;
stdstr(&tmp, escString);
- toupperstr(tmp);
+ toupperstr_utf8(tmp);
it = escSubMap.find(tmp);
delete [] tmp;
} else
Index: src/modules/texts/rawtext/rawtext.cpp
===================================================================
RCS file: /cvs/core/sword/src/modules/texts/rawtext/rawtext.cpp,v
retrieving revision 1.69
diff -u -3 -p -u -r1.69 rawtext.cpp
--- src/modules/texts/rawtext/rawtext.cpp 17 Jan 2004 04:33:25 -0000 1.69
+++ src/modules/texts/rawtext/rawtext.cpp 18 Jan 2004 15:26:06 -0000
@@ -282,7 +282,7 @@ signed char RawText::createSearchFramewo
while (word) {
// make word upper case
- toupperstr(word);
+ toupperstr_utf8(word);
// lookup word in dictionary (or make entry in dictionary
// for this word) and add this module position (index) to
@@ -519,7 +519,7 @@ ListKey &RawText::search(const char *ist
// toupper our copy of search string
stdstr(&wordBuf, istr);
- toupperstr(wordBuf);
+ toupperstr_utf8(wordBuf);
// get list of individual words
words = (char **)calloc(sizeof(char *), 10);
Index: src/utilfuns/utilstr.cpp
===================================================================
RCS file: /cvs/core/sword/src/utilfuns/utilstr.cpp,v
retrieving revision 1.25
diff -u -3 -p -u -r1.25 utilstr.cpp
--- src/utilfuns/utilstr.cpp 27 Jun 2003 02:21:05 -0000 1.25
+++ src/utilfuns/utilstr.cpp 18 Jan 2004 15:26:06 -0000
@@ -1,6 +1,7 @@
#include <utilstr.h>
#include <ctype.h>
#include <string.h>
+#include <iostream>
#ifdef _ICU_
#include <unicode/utypes.h>
@@ -147,26 +148,29 @@ unsigned int strlenw(const char *s1) {
}
-/******************************************************************************
- * toupperstr - converts a string to uppercase string
- *
- * ENT: target - string to convert
- *
- * RET: target
- */
-
-char *toupperstr(char *buf) {
- char *ret = buf;
-
- while (*buf)
- *buf = SW_toupper(*buf++);
-
- return ret;
-}
+///******************************************************************************
+// * toupperstr - converts a string to uppercase string
+// *
+// * ENT: target - string to convert
+// *
+// * RET: target
+// */
+//
+//char *toupperstr(char *buf) {
+// char *ret = buf;
+//
+// while (*buf)
+// *buf = SW_toupper(*buf++);
+//
+// return ret;
+//}
/******************************************************************************
- * toupperstr - converts a string to uppercase string
+ * toupperstr_utf8 - converts a string to uppercase string
+ * If ICU support is enabled in sword, this function will use it to do the work.
+ * If ICU support is not enabled, this function will ONLY work correctly with
+ * Latin-1 data!
*
* ENT: target - string to convert
*
@@ -179,23 +183,26 @@ char *toupperstr_utf8(char *buf, unsigne
#ifndef _ICU_
// try to decide if it's worth trying to toupper. Do we have more
// characters that are probably lower latin than not?
- long performOp = 0;
- for (const char *ch = buf; *ch; ch++)
- performOp += (*ch > 0) ? 1 : -1;
- if (performOp) {
+//mgruner: WHAT IS THIS CODE FOR? TOUPPER IS SUPPOSED TO ALWAYS WORK...
+// long performOp = 0;
+// for (const char *ch = buf; *ch; ch++)
+// performOp += (*ch > 0) ? 1 : -1;
+//
+// if (performOp) {
while (*buf)
*buf = SW_toupper(*buf++);
- }
+// }
#else
if (!max)
max = strlen(ret);
- UErrorCode err = U_ZERO_ERROR;
- UConverter *conv = ucnv_open("UTF-8", &err);
- UnicodeString str(buf, -1, conv, err);
- UnicodeString ustr = str.toUpper();
- ustr.extract(ret, max, conv, err);
- ucnv_close(conv);
+
+ UErrorCode err = U_ZERO_ERROR;
+ UConverter *conv = ucnv_open("UTF-8", &err);
+ UnicodeString str(buf, -1, conv, err);
+ UnicodeString ustr = str.toUpper();
+ ustr.extract(ret, max, conv, err);
+ ucnv_close(conv);
#endif
return ret;
--Boundary-00=_caqCAALvtLUnOD9--