utf8transliterator.h

00001 /******************************************************************************
00002  *
00003  * $Id: utf8transliterator.h 2088 2007-09-26 23:42:44Z chrislit $
00004  *
00005  * Copyright 2001 CrossWire Bible Society (http://www.crosswire.org)
00006  *      CrossWire Bible Society
00007  *      P. O. Box 2528
00008  *      Tempe, AZ  85280-2528
00009  *
00010  * This program is free software; you can redistribute it and/or modify it
00011  * under the terms of the GNU General Public License as published by the
00012  * Free Software Foundation version 2.
00013  *
00014  * This program is distributed in the hope that it will be useful, but
00015  * WITHOUT ANY WARRANTY; without even the implied warranty of
00016  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00017  * General Public License for more details.
00018  *
00019  */
00020 
00021 #ifndef UTF8TRANSLITERATOR_H
00022 #define UTF8TRANSLITERATOR_H
00023 
00024 enum scriptEnum {SE_OFF, SE_LATIN, /*one-way (to) transliterators*/  SE_IPA, SE_BASICLATIN, SE_SBL, SE_TC, SE_BETA, SE_BGREEK, SE_SERA, SE_HUGOYE, SE_UNGEGN, SE_ISO, SE_ALALC, SE_BGNPCGN, /*two-way transliterators*/ SE_GREEK, SE_HEBREW, SE_CYRILLIC, SE_ARABIC, SE_SYRIAC, SE_KATAKANA, SE_HIRAGANA, SE_HANGUL, SE_DEVANAGARI, SE_TAMIL, SE_BENGALI, SE_GURMUKHI, SE_GUJARATI, SE_ORIYA, SE_TELUGU, SE_KANNADA, SE_MALAYALAM, SE_THAI, SE_GEORGIAN, SE_ARMENIAN, SE_ETHIOPIC, SE_GOTHIC, SE_UGARITIC, SE_COPTIC, SE_MEROITIC, SE_LINEARB, SE_CYPRIOT, SE_RUNIC, SE_OGHAM, SE_THAANA, SE_GLAGOLITIC, /*SE_TENGWAR, SE_CIRTH,*/ /*one-way (from) transliterators*/ SE_JAMO, SE_HAN, SE_KANJI};
00025 #define NUMSCRIPTS 48
00026 #define NUMTARGETSCRIPTS NUMSCRIPTS-3//6
00027 
00028 #include <swoptfilter.h>
00029 
00030 #include <unicode/unistr.h>
00031 
00032 #include <unicode/translit.h>
00033 
00034 #include <defs.h>
00035 #include <map>
00036 
00037 SWORD_NAMESPACE_START
00038 
00039 class SWModule;
00040 
00041 struct SWTransData {
00042         UnicodeString resource;
00043         UTransDirection dir;
00044 };
00045 typedef std::map<const UnicodeString, SWTransData> SWTransMap;
00046 typedef std::pair<UnicodeString, SWTransData> SWTransPair;
00047 
00048 // Chris, please add more javadoc-style documentation in this header file
00049 // so that the information will show up in the doxygen-generated
00050 // api-docs.
00051 
00054 class SWDLLEXPORT UTF8Transliterator : public SWOptionFilter {
00055 private:
00056 
00057         unsigned char option;
00058 
00059         static const char optionstring[NUMTARGETSCRIPTS][16];
00060 
00061         static const char optName[];
00062         static const char optTip[];
00063         static const char SW_RB_RULE_BASED_IDS[];
00064         static const char SW_RB_RULE[];
00065         static const char SW_RESDATA[];
00066         StringList options;
00067         static SWTransMap transMap;
00068         UErrorCode utf8status;
00069 
00070         void Load(UErrorCode &status);
00071         void registerTrans(const UnicodeString& ID, const UnicodeString& resource, UTransDirection dir, UErrorCode &status);
00072         bool addTrans(const char* newTrans, SWBuf* transList);
00073         bool checkTrans(const UnicodeString& ID, UErrorCode &status);
00074         Transliterator *createTrans(const UnicodeString& ID, UTransDirection dir, UErrorCode &status);
00075 
00076 public:
00077         UTF8Transliterator();
00078         virtual char processText(SWBuf &text, const SWKey *key = 0, const SWModule *module = 0);
00079         virtual const char *getOptionName() { return optName; }
00080         virtual const char *getOptionTip() { return optTip; }
00081         virtual void setOptionValue(const char *ival);
00082         virtual const char *getOptionValue();
00083         virtual StringList getOptionValues() { return options; }
00084 };
00085 
00086 SWORD_NAMESPACE_END
00087 #endif