/* ******************************************************************************* * * Copyright (C) 2001-2010, International Business Machines * Corporation and others. All Rights Reserved. * ******************************************************************************* * file name: ucol_tok.h * encoding: US-ASCII * tab size: 8 (not used) * indentation:4 * * created 02/22/2001 * created by: Vladimir Weinstein * * This module reads a tailoring rule string and produces a list of * tokens that will be turned into collation elements * */ #ifndef UCOL_TOKENS_H #define UCOL_TOKENS_H #include "unicode/utypes.h" #include "unicode/uset.h" #if !UCONFIG_NO_COLLATION #include "ucol_imp.h" #include "uhash.h" #include "unicode/parseerr.h" #define UCOL_TOK_UNSET 0xFFFFFFFF #define UCOL_TOK_RESET 0xDEADBEEF #define UCOL_TOK_POLARITY_NEGATIVE 0 #define UCOL_TOK_POLARITY_POSITIVE 1 #define UCOL_TOK_TOP 0x04 #define UCOL_TOK_VARIABLE_TOP 0x08 #define UCOL_TOK_BEFORE 0x03 #define UCOL_TOK_SUCCESS 0x10 /* this is space for the extra strings that need to be unquoted */ /* during the parsing of the rules */ #define UCOL_TOK_EXTRA_RULE_SPACE_SIZE 4096 typedef struct UColToken UColToken; typedef struct { UColToken* first; UColToken* last; UColToken* reset; UBool indirect; uint32_t baseCE; uint32_t baseContCE; uint32_t nextCE; uint32_t nextContCE; uint32_t previousCE; uint32_t previousContCE; int32_t pos[UCOL_STRENGTH_LIMIT]; uint32_t gapsLo[3*UCOL_CE_STRENGTH_LIMIT]; uint32_t gapsHi[3*UCOL_CE_STRENGTH_LIMIT]; uint32_t numStr[UCOL_CE_STRENGTH_LIMIT]; UColToken* fStrToken[UCOL_CE_STRENGTH_LIMIT]; UColToken* lStrToken[UCOL_CE_STRENGTH_LIMIT]; } UColTokListHeader; struct UColToken { UChar debugSource; UChar debugExpansion; UChar debugPrefix; uint32_t CEs[128]; uint32_t noOfCEs; uint32_t expCEs[128]; uint32_t noOfExpCEs; uint32_t source; uint32_t expansion; uint32_t prefix; uint32_t strength; uint32_t toInsert; uint32_t polarity; /* 1 for <, <<, <<<, , ; and -1 for >, >>, >>> */ UColTokListHeader *listHeader; UColToken* previous; UColToken* next; UChar **rulesToParseHdl; uint16_t flags; }; /* * This is a token that has been parsed * but not yet processed. Used to reduce * the number of arguments in the parser */ typedef struct { uint32_t strength; uint32_t charsOffset; uint32_t charsLen; uint32_t extensionOffset; uint32_t extensionLen; uint32_t prefixOffset; uint32_t prefixLen; uint16_t flags; uint16_t indirectIndex; } UColParsedToken; typedef struct { UColParsedToken parsedToken; UChar *source; UChar *end; const UChar *current; UChar *sourceCurrent; UChar *extraCurrent; UChar *extraEnd; const InverseUCATableHeader *invUCA; const UCollator *UCA; UHashtable *tailored; UColOptionSet *opts; uint32_t resultLen; uint32_t listCapacity; UColTokListHeader *lh; UColToken *varTop; USet *copySet; USet *removeSet; UBool buildCCTabFlag; /* Tailoring rule requirs building combining class table. */ UChar32 previousCp; /* Previous code point. */ /* For processing starred lists. */ UBool isStarred; /* Are we processing a starred token? */ UBool savedIsStarred; uint32_t currentStarredCharIndex; /* Index of the current charrecter in the starred expression. */ uint32_t lastStarredCharIndex; /* Index to the last character in the starred expression. */ /* For processing ranges. */ UBool inRange; /* Are we in a range? */ UChar32 currentRangeCp; /* Current code point in the range. */ UChar32 lastRangeCp; /* The last code point in the range. */ /* reorder codes for collation reordering */ int32_t* reorderCodes; int32_t reorderCodesLength; } UColTokenParser; typedef struct { const UChar *subName; int32_t subLen; UColAttributeValue attrVal; } ucolTokSuboption; typedef struct { const UChar *optionName; int32_t optionLen; const ucolTokSuboption *subopts; int32_t subSize; UColAttribute attr; } ucolTokOption; #define ucol_tok_isSpecialChar(ch) \ (((((ch) <= 0x002F) && ((ch) >= 0x0020)) || \ (((ch) <= 0x003F) && ((ch) >= 0x003A)) || \ (((ch) <= 0x0060) && ((ch) >= 0x005B)) || \ (((ch) <= 0x007E) && ((ch) >= 0x007D)) || \ (ch) == 0x007B)) U_CFUNC uint32_t ucol_tok_assembleTokenList(UColTokenParser *src, UParseError *parseError, UErrorCode *status); U_CFUNC void ucol_tok_initTokenList(UColTokenParser *src, const UChar *rules, const uint32_t rulesLength, const UCollator *UCA, GetCollationRulesFunction importFunc, void* context, UErrorCode *status); U_CFUNC void ucol_tok_closeTokenList(UColTokenParser *src); U_CAPI const UChar* U_EXPORT2 ucol_tok_parseNextToken(UColTokenParser *src, UBool startOfRules, UParseError *parseError, UErrorCode *status); U_CAPI const UChar * U_EXPORT2 ucol_tok_getNextArgument(const UChar *start, const UChar *end, UColAttribute *attrib, UColAttributeValue *value, UErrorCode *status); U_CAPI int32_t U_EXPORT2 ucol_inv_getNextCE(const UColTokenParser *src, uint32_t CE, uint32_t contCE, uint32_t *nextCE, uint32_t *nextContCE, uint32_t strength); U_CFUNC int32_t U_EXPORT2 ucol_inv_getPrevCE(const UColTokenParser *src, uint32_t CE, uint32_t contCE, uint32_t *prevCE, uint32_t *prevContCE, uint32_t strength); U_CFUNC const UChar* ucol_tok_getRulesFromBundle( void* context, const char* locale, const char* type, int32_t* pLength, UErrorCode* status); #endif /* #if !UCONFIG_NO_COLLATION */ #endif