[sword-devel] MacSword 1.1.3
Troy A. Griffitts
scribe at crosswire.org
Thu Nov 25 19:53:21 MST 2004
Hey guys,
What's the status on ICU and StringMgr? I thought that if ICU was
compiled into the library, it provided a subclass: ICUStringMgr that
worked with UTF8. This would mean that Will doesn't have to do
anything if he already includes ICU support.
-Troy.
Joachim Ansorg wrote:
> Hi,
>
> it's simple - I hope :)
>
>
>>So all I have to do is replace char* StringMgr::upperUTF8(char* t,
>>const unsigned int maxlen)?
>
>
> Reimplement upperUTF8 and then tell Sword to use an instance of your
> reimplementation to handle Unicode/Latin1 Strings with
> StringMgr::setSystemStringMgr.
>
>
>>If so, then what is maxlen for? Is it expecting t to be overwritten or
>>just a maximum buffer allocated? I assume that sword will dealloc any
>>buffer I return.
>
>
> If maxlen is > 0 upper only maxlen chars. It expects t to be overwritten. No
> buffers are allocated withing upperUTF8. I attached the BTStringMgr we use in
> BibleTime.
> I advise to check a string if it contains unicode chars before uppering the
> chars using Unicode. Checking is a lot faster than without.
>
> I hope that helps. And yes, we need better documentation :)
>
> If you return true in supportsUnicode then LocaleMgr will only load locales
> which are in UTF-8, so you can be sure that all verse keys are in UTF-8.
>
> Let me know if you need help,
> Joachim
>
>
> ------------------------------------------------------------------------
>
> //
> // C++ Implementation: btstringmgr
> //
> // Description:
> //
> //
> // Author: The BibleTime team <info at bibletime.info>, (C) 2004
> //
> // Copyright: See COPYING file that comes with this distribution
> //
> //
>
> #include "btstringmgr.h"
>
> //System includes
> #include <ctype.h>
>
> char* BTStringMgr::upperUTF8(char* text, const unsigned int maxlen) {
> const int max = (maxlen>0) ? maxlen : strlen(text);
>
> if (isUtf8(text)) {
> strncpy(text, (const char*)QString::fromUtf8(text).upper().utf8(), max);
>
> return text;
> }
> else {
> char* ret = text;
> while (*text) {
> *text = toupper(*text);
> text++;
> }
>
> return ret;
> }
>
> return text;
> }
>
> char* BTStringMgr::upperLatin1(char* text) {
> char* ret = text;
>
> while (*text) {
> *text++ = toupper(*text);
> }
>
> return ret;
> }
>
> const bool BTStringMgr::supportsUnicode() const {
> return true;
> }
>
> const bool BTStringMgr::isUtf8(const char *buf) {
> int i, n;
> register unsigned char c;
> bool gotone = false;
>
> #define F 0 /* character never appears in text */
> #define T 1 /* character appears in plain ASCII text */
> #define I 2 /* character appears in ISO-8859 text */
> #define X 3 /* character appears in non-ISO extended ASCII (Mac, IBM PC) */
>
> static const unsigned char text_chars[256] = {
> /* BEL BS HT LF FF CR */
> F, F, F, F, F, F, F, T, T, T, T, F, T, T, F, F, /* 0x0X */
> /* ESC */
> F, F, F, F, F, F, F, F, F, F, F, T, F, F, F, F, /* 0x1X */
> T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x2X */
> T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x3X */
> T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x4X */
> T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x5X */
> T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x6X */
> T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, F, /* 0x7X */
> /* NEL */
> X, X, X, X, X, T, X, X, X, X, X, X, X, X, X, X, /* 0x8X */
> X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, /* 0x9X */
> I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xaX */
> I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xbX */
> I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xcX */
> I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xdX */
> I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xeX */
> I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I /* 0xfX */
> };
>
> /* *ulen = 0; */
> for (i = 0; (c = buf[i]); i++) {
> if ((c & 0x80) == 0) { /* 0xxxxxxx is plain ASCII */
> /*
> * Even if the whole file is valid UTF-8 sequences,
> * still reject it if it uses weird control characters.
> */
>
> if (text_chars[c] != T)
> return false;
>
> } else if ((c & 0x40) == 0) { /* 10xxxxxx never 1st byte */
> return false;
> } else { /* 11xxxxxx begins UTF-8 */
> int following;
>
> if ((c & 0x20) == 0) { /* 110xxxxx */
> following = 1;
> } else if ((c & 0x10) == 0) { /* 1110xxxx */
> following = 2;
> } else if ((c & 0x08) == 0) { /* 11110xxx */
> following = 3;
> } else if ((c & 0x04) == 0) { /* 111110xx */
> following = 4;
> } else if ((c & 0x02) == 0) { /* 1111110x */
> following = 5;
> } else
> return false;
>
> for (n = 0; n < following; n++) {
> i++;
> if (!(c = buf[i]))
> goto done;
>
> if ((c & 0x80) == 0 || (c & 0x40))
> return false;
> }
> gotone = true;
> }
> }
> done:
> return gotone; /* don't claim it's UTF-8 if it's all 7-bit */
> }
>
> #undef F
> #undef T
> #undef I
> #undef X
>
>
> ------------------------------------------------------------------------
>
> //
> // C++ Interface: btstringmgr
> //
> // Description:
> //
> //
> // Author: The BibleTime team <info at bibletime.info>, (C) 2004
> //
> // Copyright: See COPYING file that comes with this distribution
> //
> //
> #ifndef BTSTRINGMGR_H
> #define BTSTRINGMGR_H
>
> //Sword includes
> #include <stringmgr.h>
>
> //Qt includes
> #include <qstring.h>
>
> using namespace sword;
>
> class BTStringMgr : public StringMgr {
> public:
> /** Converts the param to an upper case Utf8 string
> * @param The text encoded in utf8 which should be turned into an upper case string
> */
> virtual char* upperUTF8(char*, const unsigned int maxlen = 0);
>
> /** Converts the param to an uppercase latin1 string
> * @param The text encoded in latin1 which should be turned into an upper case string
> */
> virtual char* upperLatin1(char*);
>
> protected:
> virtual const bool supportsUnicode() const;
>
> /** CODE TAKEN FROM KDELIBS 3.2
> * This function checks whether a string is utf8 or not.
> *
> * It was taken from kdelibs so we do not depend on KDE 3.2.
> */
> const bool isUtf8(const char *buf);
> };
>
> #endif
>
>
> ------------------------------------------------------------------------
>
> _______________________________________________
> sword-devel mailing list
> sword-devel at crosswire.org
> http://www.crosswire.org/mailman/listinfo/sword-devel
More information about the sword-devel
mailing list