30 #include <unicode/ucnv.h>
31 #include <unicode/uchar.h>
36 #include "unicode/resbund.h"
95 #ifdef ICU_CUSTOM_RESOURCE_BUILDING
100 const char UTF8Transliterator::SW_RB_RULE_BASED_IDS[] =
"RuleBasedTransliteratorIDs";
101 const char UTF8Transliterator::SW_RB_RULE[] =
"Rule";
103 const char UTF8Transliterator::SW_RESDATA[] = SWICU_DATA;
105 const char UTF8Transliterator::SW_RESDATA[] =
"/usr/local/lib/sword/";
110 inline SWCharString(
const UnicodeString& str);
111 inline ~SWCharString();
112 inline operator const char*() {
return ptr; }
117 SWCharString::SWCharString(
const UnicodeString& str) {
120 if (str.length() >= (int32_t)
sizeof(buf)) {
121 ptr =
new char[str.length() + 8];
125 str.extract(0, 0x7FFFFFFF, ptr,
"");
128 SWCharString::~SWCharString() {
135 #endif // ICU_CUSTOM_RESOURCE_BUILDING
143 #ifdef ICU_CUSTOM_RESOURCE_BUILDING
145 utf8status = U_ZERO_ERROR;
155 #ifdef ICU_CUSTOM_RESOURCE_BUILDING
156 void UTF8Transliterator::Load(UErrorCode &status)
159 static const char translit_swordindex[] =
"translit_swordindex";
161 UResourceBundle *bundle = 0, *transIDs = 0, *colBund = 0;
162 bundle = ures_openDirect(SW_RESDATA, translit_swordindex, &status);
163 if (U_FAILURE(status)) {
169 transIDs = ures_getByKey(bundle, SW_RB_RULE_BASED_IDS, 0, &status);
172 int32_t row, maxRows;
173 if (U_SUCCESS(status)) {
174 maxRows = ures_getSize(transIDs);
175 for (row = 0; row < maxRows; row++) {
176 colBund = ures_getByIndex(transIDs, row, 0, &status);
178 if (U_SUCCESS(status) && ures_getSize(colBund) == 4) {
179 UnicodeString
id = ures_getUnicodeStringByIndex(colBund, 0, &status);
180 UChar type = ures_getUnicodeStringByIndex(colBund, 1, &status).charAt(0);
181 UnicodeString resString = ures_getUnicodeStringByIndex(colBund, 2, &status);
184 if (U_SUCCESS(status)) {
192 UTransDirection dir =
193 (ures_getUnicodeStringByIndex(colBund, 3, &status).charAt(0) ==
195 UTRANS_FORWARD : UTRANS_REVERSE;
197 SWLOGD(
"instantiating %s ...", resString.getBuffer());
198 registerTrans(
id, resString, dir, status);
219 ures_close(transIDs);
225 void UTF8Transliterator::registerTrans(
const UnicodeString& ID,
const UnicodeString& resource,
226 UTransDirection dir, UErrorCode &status )
229 SWLOGD(
"registering ID locally %s", ID.getBuffer());
235 swpair.second = swstuff;
236 transMap.insert(swpair);
240 bool UTF8Transliterator::checkTrans(
const UnicodeString& ID, UErrorCode &status )
243 Transliterator *trans = Transliterator::createInstance(ID, UTRANS_FORWARD, status);
244 if (!U_FAILURE(status)) {
246 SWLOGD(
"already have it %s", ID.getBuffer());
250 status = U_ZERO_ERROR;
252 SWTransMap::iterator swelement;
253 if ((swelement = transMap.find(ID)) != transMap.end()) {
254 SWLOGD(
"found element in map");
256 UParseError parseError;
265 UResourceBundle *bundle = ures_openDirect(SW_RESDATA, ch, &status);
266 const UnicodeString rules = ures_getUnicodeStringByKey(bundle, SW_RB_RULE, &status);
270 if (U_FAILURE(status)) {
277 Transliterator *trans = Transliterator::createFromRules(ID, rules, swstuff.
dir,
279 if (U_FAILURE(status)) {
291 Transliterator::registerInstance(trans);
304 #endif // ICU_CUSTOM_RESOURCE_BUILDING
307 #ifdef ICU_CUSTOM_RESOURCE_BUILDING
310 if (checkTrans(UnicodeString(newTrans), status)) {
312 #endif // ICU_CUSTOM_RESOURCE_BUILDING
313 *transList += newTrans;
316 #ifdef ICU_CUSTOM_RESOURCE_BUILDING
323 #endif // ICU_CUSTOM_RESOURCE_BUILDING
329 icu::Transliterator *trans = icu::Transliterator::createInstance(ID,UTRANS_FORWARD,status);
330 if (U_FAILURE(status)) {
357 UErrorCode err = U_ZERO_ERROR;
358 UConverter * conv =
NULL;
359 conv = ucnv_open(
"UTF-8", &err);
366 int32_t len = (j * 2) + 1;
367 UChar *source =
new UChar[len];
369 len = ucnv_toUChars(conv, source, len, text, j, &err);
379 for (i = 0; i < (
unsigned long)len; i++) {
380 j = ublock_getCode(source[i]);
384 case UBLOCK_GREEK: scripts[
SE_GREEK] =
true;
break;
385 case UBLOCK_HEBREW: scripts[
SE_HEBREW] =
true;
break;
386 case UBLOCK_CYRILLIC: scripts[
SE_CYRILLIC] =
true;
break;
387 case UBLOCK_ARABIC: scripts[
SE_ARABIC] =
true;
break;
388 case UBLOCK_SYRIAC: scripts[
SE_SYRIAC] =
true;
break;
389 case UBLOCK_KATAKANA: scripts[
SE_KATAKANA] =
true;
break;
390 case UBLOCK_HIRAGANA: scripts[
SE_HIRAGANA] =
true;
break;
391 case UBLOCK_HANGUL_SYLLABLES: scripts[
SE_HANGUL] =
true;
break;
392 case UBLOCK_HANGUL_JAMO: scripts[
SE_JAMO] =
true;
break;
393 case UBLOCK_DEVANAGARI: scripts[
SE_DEVANAGARI] =
true;
break;
394 case UBLOCK_TAMIL: scripts[
SE_TAMIL] =
true;
break;
395 case UBLOCK_BENGALI: scripts[
SE_BENGALI] =
true;
break;
396 case UBLOCK_GURMUKHI: scripts[
SE_GURMUKHI] =
true;
break;
397 case UBLOCK_GUJARATI: scripts[
SE_GUJARATI] =
true;
break;
398 case UBLOCK_ORIYA: scripts[
SE_ORIYA] =
true;
break;
399 case UBLOCK_TELUGU: scripts[
SE_TELUGU] =
true;
break;
400 case UBLOCK_KANNADA: scripts[
SE_KANNADA] =
true;
break;
401 case UBLOCK_MALAYALAM: scripts[
SE_MALAYALAM] =
true;
break;
402 case UBLOCK_THAI: scripts[
SE_THAI] =
true;
break;
403 case UBLOCK_GEORGIAN: scripts[
SE_GEORGIAN] =
true;
break;
404 case UBLOCK_ARMENIAN: scripts[
SE_ARMENIAN] =
true;
break;
405 case UBLOCK_ETHIOPIC: scripts[
SE_ETHIOPIC] =
true;
break;
406 case UBLOCK_GOTHIC: scripts[
SE_GOTHIC] =
true;
break;
407 case UBLOCK_UGARITIC: scripts[
SE_UGARITIC] =
true;
break;
409 case UBLOCK_LINEAR_B_SYLLABARY: scripts[
SE_LINEARB] =
true;
break;
410 case UBLOCK_CYPRIOT_SYLLABARY: scripts[
SE_CYPRIOT] =
true;
break;
411 case UBLOCK_RUNIC: scripts[
SE_RUNIC] =
true;
break;
412 case UBLOCK_OGHAM: scripts[
SE_OGHAM] =
true;
break;
413 case UBLOCK_THAANA: scripts[
SE_THAANA] =
true;
break;
414 case UBLOCK_GLAGOLITIC: scripts[
SE_GLAGOLITIC] =
true;
break;
415 case UBLOCK_CHEROKEE: scripts[
SE_CHEROKEE] =
true;
break;
418 case UBLOCK_CJK_RADICALS_SUPPLEMENT:
419 case UBLOCK_KANGXI_RADICALS:
420 case UBLOCK_IDEOGRAPHIC_DESCRIPTION_CHARACTERS:
421 case UBLOCK_CJK_SYMBOLS_AND_PUNCTUATION:
422 case UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A:
423 case UBLOCK_CJK_UNIFIED_IDEOGRAPHS:
426 case UBLOCK_CJK_COMPATIBILITY:
427 case UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS:
428 case UBLOCK_CJK_COMPATIBILITY_FORMS:
432 case UBLOCK_HANGUL_COMPATIBILITY_JAMO:
469 addTrans(
"Greek-Latin/BGreek", &ID);
471 addTrans(
"Greek-Latin/UNGEGN", &ID);
508 addTrans(
"Hebrew-Latin/UNGEGN", &ID);
510 addTrans(
"Hebrew-Latin/ALALC", &ID);
520 addTrans(
"Cyrillic-Glagolitic", &ID);
536 addTrans(
"Syriac-Latin/Hugoye", &ID);
537 else if (
option == SE_HEBREW)
550 addTrans(
"Georgian-Latin/ISO", &ID);
552 addTrans(
"Georgian-Latin/ALALC", &ID);
554 addTrans(
"Georgian-Latin/BGN", &ID);
564 addTrans(
"Armenian-Latin/ISO", &ID);
566 addTrans(
"Armenian-Latin/ALALC", &ID);
568 addTrans(
"Armenian-Latin/BGN", &ID);
578 addTrans(
"Ethiopic-Latin/UNGEGN", &ID);
580 addTrans(
"Ethiopic-Latin/ISO", &ID);
582 addTrans(
"Ethiopic-Latin/ALALC", &ID);
584 addTrans(
"Ethiopic-Latin/SERA", &ID);
592 addTrans(
"Gothic-Latin/Basic", &ID);
602 addTrans(
"Ugaritic-Latin/SBL", &ID);
630 addTrans(
"Thaana-Latin/ALALC", &ID);
640 addTrans(
"Glagolitic-Latin/ISO", &ID);
642 addTrans(
"Glagolitic-Latin/ALALC", &ID);
644 addTrans(
"Glagolitic-Cyrillic", &ID);
654 if (scripts[SE_THAI]) {
658 if (scripts[SE_THAI]) {
683 if (scripts[SE_KATAKANA]) {
687 if (scripts[SE_HIRAGANA]) {
734 if (scripts[SE_MALAYALAM]) {
744 addTrans(
"Devanagari-InterIndic", &ID);
746 if (scripts[SE_TAMIL]) {
749 if (scripts[SE_BENGALI]) {
750 addTrans(
"Bengali-InterIndic", &ID);
752 if (scripts[SE_GURMUKHI]) {
753 addTrans(
"Gurmurkhi-InterIndic", &ID);
755 if (scripts[SE_GUJARATI]) {
756 addTrans(
"Gujarati-InterIndic", &ID);
758 if (scripts[SE_ORIYA]) {
761 if (scripts[SE_TELUGU]) {
764 if (scripts[SE_KANNADA]) {
765 addTrans(
"Kannada-InterIndic", &ID);
767 if (scripts[SE_MALAYALAM]) {
768 addTrans(
"Malayalam-InterIndic", &ID);
773 addTrans(
"InterIndic-Devanagari", &ID);
779 addTrans(
"InterIndic-Bengali", &ID);
782 addTrans(
"InterIndic-Gurmukhi", &ID);
785 addTrans(
"InterIndic-Gujarati", &ID);
794 addTrans(
"InterIndic-Kannada", &ID);
797 addTrans(
"InterIndic-Malayalam", &ID);
815 if (scripts[SE_LATIN]) {
905 icu::Transliterator * trans =
createTrans(icu::UnicodeString(ID), UTRANS_FORWARD, err);
906 if (trans && !U_FAILURE(err)) {
907 icu::UnicodeString target = icu::UnicodeString(source);
908 trans->transliterate(target);
910 len = ucnv_fromUChars(conv, text.
getRawData(), text.
size(), target.getBuffer(), target.length(), &err);
virtual void setOptionValue(const char *ival)
static const char optTip[]
#define SWORD_NAMESPACE_START
virtual char processText(SWBuf &text, const SWKey *key=0, const SWModule *module=0)
virtual const char * getOptionValue()
static SWLog * getSystemLog()
static const char optName[]
int stricmp(const char *s1, const char *s2)
std::map< const icu::UnicodeString, SWTransData > SWTransMap
unsigned long size() const
static const char optionstring[NUMTARGETSCRIPTS][16]
void logError(const char *fmt,...) const
int strnicmp(const char *s1, const char *s2, int len)
std::pair< icu::UnicodeString, SWTransData > SWTransPair
#define SWORD_NAMESPACE_END
icu::Transliterator * createTrans(const icu::UnicodeString &ID, UTransDirection dir, UErrorCode &status)
bool addTrans(const char *newTrans, SWBuf *transList)
icu::UnicodeString resource
void setSize(unsigned long len)