31 #include <unicode/utypes.h>
32 #include <unicode/ucnv.h>
33 #include <unicode/ustring.h>
34 #include <unicode/uchar.h>
36 #include <unicode/unistr.h>
37 #include <unicode/translit.h>
39 #include <unicode/locid.h>
84 int isValidUTF8(
unsigned char *txt) {
85 unsigned int countUTF8 = 0;
87 unsigned char parts = 0;
90 unsigned char *p = txt;
97 for (parts = 0; i & 0x80; parts++) {
107 while (--parts && ++*p) {
109 if (0xc0 & *p != 0x80) {
125 return countUTF8 ? 1 : -1;
128 char *lowerLatin1(
char *buf,
unsigned int maxlen = 0) {
133 bool checkMax = maxlen;
135 while (*buf && (!checkMax || maxlen--)) {
150 virtual char *
upperUTF8(
char *,
unsigned int maxlen = 0)
const;
151 virtual char *
lowerUTF8(
char *,
unsigned int maxlen = 0)
const;
228 const unsigned char* from = (
unsigned char*)orig.
c_str();
230 std::map<SW_u32, SW_u32>::const_iterator it =
toUpperData.end();
236 if (!ch) ch = 0xFFFD;
241 long len = maxlen ? (text.
size() < maxlen ? text.
size() : (maxlen - 1)) : 0;
242 if (len) memcpy(t, text.
c_str(), len);
292 if (!isValidUTF8((
unsigned char *)t)) {
296 for (
const char *ch = t; *ch; ch++) {
297 performOp += (*ch > 0) ? 1 : -1;
302 return lowerLatin1(t);
309 return isupper(character);
312 return islower(character);
315 return isdigit(character);
318 return isalpha(character);
332 bool checkMax = maxlen;
334 while (*buf && (!checkMax || maxlen--)) {
349 char *ICUStringMgr::upperUTF8(
char *buf,
unsigned int maxlen)
const {
351 int max = (int)((maxlen) ? maxlen : strlen(buf));
353 UErrorCode err = U_ZERO_ERROR;
359 UChar *lowerStr =
new UChar[max+10];
360 UChar *upperStr =
new UChar[max+10];
362 u_strFromUTF8(lowerStr, max+9, 0, buf, -1, &err);
363 if (err != U_ZERO_ERROR) {
370 u_strToUpper(upperStr, max+9, lowerStr, -1, 0, &err);
371 if (err != U_ZERO_ERROR) {
378 ret = u_strToUTF8(ret, max, 0, upperStr, -1, &err);
385 char *ICUStringMgr::lowerUTF8(
char *buf,
unsigned int maxlen)
const {
387 int max = (int)((maxlen) ? maxlen : strlen(buf));
389 UErrorCode err = U_ZERO_ERROR;
395 UChar *sourceStr =
new UChar[max+10];
396 UChar *resultStr =
new UChar[max+10];
398 u_strFromUTF8(sourceStr, max+9, 0, buf, -1, &err);
399 if (err != U_ZERO_ERROR) {
406 u_strToLower(resultStr, max+9, sourceStr, -1, 0, &err);
407 if (err != U_ZERO_ERROR) {
414 ret = u_strToUTF8(ret, max, 0, resultStr, -1, &err);
421 bool ICUStringMgr::isUpper(
SW_u32 character)
const {
422 return u_isupper(character);
424 bool ICUStringMgr::isLower(
SW_u32 character)
const {
425 return u_islower(character);
427 bool ICUStringMgr::isDigit(
SW_u32 character)
const {
428 return u_isdigit(character);
430 bool ICUStringMgr::isAlpha(
SW_u32 character)
const {
431 return u_isalpha(character);
#define SWORD_NAMESPACE_START
class __staticsystemStringMgr _staticsystemStringMgr
__staticsystemStringMgr()
virtual bool isLower(SW_u32 character) const
virtual bool isDigit(SW_u32 character) const
static StringMgr * getSystemStringMgr()
virtual bool supportsUnicode() const
const char * c_str() const
static StringMgr * systemStringMgr
virtual char * upperUTF8(char *text, unsigned int max=0) const
virtual bool isUpper(SW_u32 character) const
static LocaleMgr * getSystemLocaleMgr()
static void setSystemStringMgr(StringMgr *newStringMgr)
unsigned long size() const
static void setSystemLocaleMgr(LocaleMgr *newLocaleMgr)
SWBuf * getUTF8FromUniChar(SW_u32 uchar, SWBuf *appendTo)
virtual char * upperLatin1(char *text, unsigned int max=0) const
~__staticsystemStringMgr()
virtual bool isAlpha(SW_u32 character) const
SWORD_NAMESPACE_START std::map< SW_u32, SW_u32 > toUpperData
#define SWORD_NAMESPACE_END
SW_u32 getUniCharFromUTF8(const unsigned char **buf, bool skipValidation=false)
virtual char * lowerUTF8(char *text, unsigned int max=0) const